dccd 2.3.1__tar.gz → 2.3.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. {dccd-2.3.1 → dccd-2.3.2}/CHANGELOG.md +19 -1
  2. {dccd-2.3.1 → dccd-2.3.2}/PKG-INFO +21 -20
  3. {dccd-2.3.1 → dccd-2.3.2}/README.rst +17 -16
  4. {dccd-2.3.1 → dccd-2.3.2}/dccd/continuous_dl/bitfinex.py +1 -1
  5. {dccd-2.3.1 → dccd-2.3.2}/dccd/daemon/backfill.py +65 -31
  6. {dccd-2.3.1 → dccd-2.3.2}/dccd/daemon/cli.py +211 -27
  7. {dccd-2.3.1 → dccd-2.3.2}/dccd/daemon/config.py +47 -0
  8. {dccd-2.3.1 → dccd-2.3.2}/dccd/daemon/stream_manager.py +3 -4
  9. {dccd-2.3.1 → dccd-2.3.2}/dccd/histo_dl/binance.py +1 -1
  10. {dccd-2.3.1 → dccd-2.3.2}/dccd/histo_dl/bybit.py +1 -1
  11. {dccd-2.3.1 → dccd-2.3.2}/dccd/histo_dl/coinbase.py +1 -1
  12. {dccd-2.3.1 → dccd-2.3.2}/dccd/histo_dl/exchange.py +43 -49
  13. {dccd-2.3.1 → dccd-2.3.2}/dccd/histo_dl/okx.py +1 -1
  14. dccd-2.3.2/dccd/process_data.py +139 -0
  15. {dccd-2.3.1 → dccd-2.3.2}/dccd/storage.py +33 -36
  16. {dccd-2.3.1 → dccd-2.3.2}/dccd/tests/test_backfill.py +48 -0
  17. dccd-2.3.2/dccd/tests/test_daemon_cli.py +358 -0
  18. {dccd-2.3.1 → dccd-2.3.2}/dccd/tests/test_daemon_config.py +48 -0
  19. {dccd-2.3.1 → dccd-2.3.2}/dccd/tests/test_histo_dl.py +13 -5
  20. {dccd-2.3.1 → dccd-2.3.2}/dccd/tests/test_io.py +26 -26
  21. {dccd-2.3.1 → dccd-2.3.2}/dccd/tests/test_process_data.py +19 -17
  22. {dccd-2.3.1 → dccd-2.3.2}/dccd/tests/test_storage.py +12 -11
  23. dccd-2.3.2/dccd/tools/io.py +240 -0
  24. {dccd-2.3.1 → dccd-2.3.2}/dccd.egg-info/PKG-INFO +21 -20
  25. {dccd-2.3.1 → dccd-2.3.2}/dccd.egg-info/requires.txt +3 -3
  26. {dccd-2.3.1 → dccd-2.3.2}/pyproject.toml +5 -4
  27. dccd-2.3.1/dccd/process_data.py +0 -166
  28. dccd-2.3.1/dccd/tests/test_daemon_cli.py +0 -108
  29. dccd-2.3.1/dccd/tools/io.py +0 -495
  30. {dccd-2.3.1 → dccd-2.3.2}/CONTRIBUTING.md +0 -0
  31. {dccd-2.3.1 → dccd-2.3.2}/LICENSE.txt +0 -0
  32. {dccd-2.3.1 → dccd-2.3.2}/MANIFEST.in +0 -0
  33. {dccd-2.3.1 → dccd-2.3.2}/dccd/__init__.py +0 -0
  34. {dccd-2.3.1 → dccd-2.3.2}/dccd/continuous_dl/__init__.py +0 -0
  35. {dccd-2.3.1 → dccd-2.3.2}/dccd/continuous_dl/binance.py +0 -0
  36. {dccd-2.3.1 → dccd-2.3.2}/dccd/continuous_dl/bitmex.py +0 -0
  37. {dccd-2.3.1 → dccd-2.3.2}/dccd/continuous_dl/bybit.py +0 -0
  38. {dccd-2.3.1 → dccd-2.3.2}/dccd/continuous_dl/exchange.py +0 -0
  39. {dccd-2.3.1 → dccd-2.3.2}/dccd/continuous_dl/kraken.py +0 -0
  40. {dccd-2.3.1 → dccd-2.3.2}/dccd/continuous_dl/okx.py +0 -0
  41. {dccd-2.3.1 → dccd-2.3.2}/dccd/daemon/__init__.py +0 -0
  42. {dccd-2.3.1 → dccd-2.3.2}/dccd/daemon/health.py +0 -0
  43. {dccd-2.3.1 → dccd-2.3.2}/dccd/daemon/scheduler.py +0 -0
  44. {dccd-2.3.1 → dccd-2.3.2}/dccd/daemon/storage.py +0 -0
  45. {dccd-2.3.1 → dccd-2.3.2}/dccd/histo_dl/__init__.py +0 -0
  46. {dccd-2.3.1 → dccd-2.3.2}/dccd/histo_dl/kraken.py +0 -0
  47. {dccd-2.3.1 → dccd-2.3.2}/dccd/models.py +0 -0
  48. {dccd-2.3.1 → dccd-2.3.2}/dccd/tests/__init__.py +0 -0
  49. {dccd-2.3.1 → dccd-2.3.2}/dccd/tests/conftest.py +0 -0
  50. {dccd-2.3.1 → dccd-2.3.2}/dccd/tests/test_binance.py +0 -0
  51. {dccd-2.3.1 → dccd-2.3.2}/dccd/tests/test_binance_ws.py +0 -0
  52. {dccd-2.3.1 → dccd-2.3.2}/dccd/tests/test_bitfinex.py +0 -0
  53. {dccd-2.3.1 → dccd-2.3.2}/dccd/tests/test_bitmex.py +0 -0
  54. {dccd-2.3.1 → dccd-2.3.2}/dccd/tests/test_bybit.py +0 -0
  55. {dccd-2.3.1 → dccd-2.3.2}/dccd/tests/test_bybit_ws.py +0 -0
  56. {dccd-2.3.1 → dccd-2.3.2}/dccd/tests/test_coinbase.py +0 -0
  57. {dccd-2.3.1 → dccd-2.3.2}/dccd/tests/test_daemon_health.py +0 -0
  58. {dccd-2.3.1 → dccd-2.3.2}/dccd/tests/test_daemon_scheduler.py +0 -0
  59. {dccd-2.3.1 → dccd-2.3.2}/dccd/tests/test_daemon_storage.py +0 -0
  60. {dccd-2.3.1 → dccd-2.3.2}/dccd/tests/test_daemon_stream_manager.py +0 -0
  61. {dccd-2.3.1 → dccd-2.3.2}/dccd/tests/test_date_time.py +0 -0
  62. {dccd-2.3.1 → dccd-2.3.2}/dccd/tests/test_kraken.py +0 -0
  63. {dccd-2.3.1 → dccd-2.3.2}/dccd/tests/test_kraken_ws.py +0 -0
  64. {dccd-2.3.1 → dccd-2.3.2}/dccd/tests/test_models.py +0 -0
  65. {dccd-2.3.1 → dccd-2.3.2}/dccd/tests/test_okx.py +0 -0
  66. {dccd-2.3.1 → dccd-2.3.2}/dccd/tests/test_okx_ws.py +0 -0
  67. {dccd-2.3.1 → dccd-2.3.2}/dccd/tests/test_websocket.py +0 -0
  68. {dccd-2.3.1 → dccd-2.3.2}/dccd/tools/__init__.py +0 -0
  69. {dccd-2.3.1 → dccd-2.3.2}/dccd/tools/date_time.py +0 -0
  70. {dccd-2.3.1 → dccd-2.3.2}/dccd/tools/websocket.py +0 -0
  71. {dccd-2.3.1 → dccd-2.3.2}/dccd.egg-info/SOURCES.txt +0 -0
  72. {dccd-2.3.1 → dccd-2.3.2}/dccd.egg-info/dependency_links.txt +0 -0
  73. {dccd-2.3.1 → dccd-2.3.2}/dccd.egg-info/entry_points.txt +0 -0
  74. {dccd-2.3.1 → dccd-2.3.2}/dccd.egg-info/top_level.txt +0 -0
  75. {dccd-2.3.1 → dccd-2.3.2}/setup.cfg +0 -0
@@ -6,10 +6,28 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
6
6
 
7
7
  ## [Unreleased]
8
8
 
9
- ## [2.3.1] - 2026-05-24
9
+ ## [2.3.2] - 2026-05-25
10
+
11
+ ### Added
12
+
13
+ - `dccd/daemon/cli.py` — `dccd status --json`: emit raw metrics as a JSON object on stdout, suitable for piping into Grafana / jq (#53)
14
+ - `dccd/daemon/config.py` — `HistoJob.max_retries` (int, 1–10, default 3) and `HistoJob.retry_delay` (float ≥ 0, default 2.0): per-job retry configuration for transient network errors; delay is exponential (`retry_delay * 2^(attempt-1)`) (#53)
15
+ - `dccd/daemon/config.py` — `resolve_config_path()` and `DEFAULT_CONFIG_PATH`: CLI commands now fall back to `$XDG_CONFIG_HOME/dccd/config.yml` (default `~/.config/dccd/config.yml`) when no `--config` option is provided and `./config.yml` does not exist (#49)
16
+ - `dccd/daemon/cli.py` — `dccd inventory`: scans `data_path` and prints a table of all stored OHLC, trades, and orderbook data with date range, row count, and gap count per series; uses Polars for fast columnar reads (#50)
17
+ - `dccd/daemon/cli.py` — `dccd remove --exchange X --pair Y --span N`: removes a pair from a histo_job (or the whole job if it was the last pair) and re-validates the config before writing (#50)
18
+
19
+ ### Changed
20
+
21
+ - `dccd/storage.py`, `dccd/histo_dl/exchange.py`, `dccd/daemon/backfill.py`, `dccd/process_data.py`, `dccd/daemon/stream_manager.py` — replace pandas with polars throughout; `DataStore.save/load` accept/return `pl.DataFrame`; `get_data()` defaults to `format='polars'`; `set_marketdepth` returns a flat long-format `pl.DataFrame`; stream savers write parquet via `DataStore`; `pandas` removed from core dependencies (#52)
22
+ - `dccd/daemon/backfill.py` — backfill progress bar now shows the current window date (`YYYY-MM-DD → YYYY-MM-DD`) instead of a raw window count (`n win`); makes it easy to see which period is being downloaded at a glance (#48)
10
23
 
11
24
  ### Fixed
12
25
 
26
+ - `dccd/histo_dl/exchange.py` — `_sort_data` no longer raises `ColumnNotFoundError: "date"` when the exchange API returns an empty candle list; the polars migration (PR #52) had re-introduced a variant of the empty-data crash from v2.3.1; now returns early with an empty `self.df` (#54)
27
+
28
+ ## [2.3.1] - 2026-05-24
29
+
30
+ ### Fixed
13
31
  - `dccd/storage.py` — `DataStore.missing_intervals` now detects the gap **before** the first saved row when the requested `start` predates `file_min`; previously only the trailing gap (after `file_max`) was returned, causing `dccd backfill --start <early-date>` to silently skip all historical data before the first existing candle (#46)
14
32
  - `dccd/histo_dl/coinbase.py` — raise `RuntimeError` when Coinbase returns HTTP 200 with a JSON dict (e.g. `{"message": "..."}` for near-future windows) instead of silently iterating dict keys and crashing with `ValueError` (#45)
15
33
  - `dccd/histo_dl/coinbase.py` — additional guard: raise `RuntimeError` when Coinbase returns a JSON list whose first element is not itself a list/tuple (e.g. `["message"]`); previously caused `float("m")` `ValueError` (#45)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dccd
3
- Version: 2.3.1
3
+ Version: 2.3.2
4
4
  Summary: Download Crypto Currency Data from different exchanges.
5
5
  Author-email: Arthur Bernard <arthur.bernard.92@gmail.com>
6
6
  License: MIT
@@ -22,7 +22,8 @@ Requires-Python: >=3.10
22
22
  Description-Content-Type: text/x-rst
23
23
  License-File: LICENSE.txt
24
24
  Requires-Dist: numpy>=1.26
25
- Requires-Dist: pandas>=2.0
25
+ Requires-Dist: polars>=0.20
26
+ Requires-Dist: pyarrow>=13
26
27
  Requires-Dist: requests>=2.28
27
28
  Requires-Dist: openpyxl>=3.1
28
29
  Requires-Dist: websockets>=12.0
@@ -31,8 +32,8 @@ Requires-Dist: SQLAlchemy>=2.0
31
32
  Requires-Dist: tenacity>=8.0
32
33
  Requires-Dist: pydantic>=2.0
33
34
  Provides-Extra: io
34
- Requires-Dist: pyarrow>=13; extra == "io"
35
35
  Requires-Dist: polars>=0.20; extra == "io"
36
+ Requires-Dist: pyarrow>=13; extra == "io"
36
37
  Provides-Extra: daemon
37
38
  Requires-Dist: pyyaml>=6.0; extra == "daemon"
38
39
  Requires-Dist: apscheduler<4,>=3.10; extra == "daemon"
@@ -45,7 +46,6 @@ Requires-Dist: pytest-cov>=4.1; extra == "dev"
45
46
  Requires-Dist: ruff>=0.4; extra == "dev"
46
47
  Requires-Dist: interrogate>=1.5; extra == "dev"
47
48
  Requires-Dist: mypy>=1.0; extra == "dev"
48
- Requires-Dist: pandas-stubs>=2.0; extra == "dev"
49
49
  Requires-Dist: pyyaml>=6.0; extra == "dev"
50
50
  Requires-Dist: apscheduler<4,>=3.10; extra == "dev"
51
51
  Requires-Dist: typer>=0.12; extra == "dev"
@@ -110,10 +110,6 @@ From pip::
110
110
 
111
111
  $ pip install dccd
112
112
 
113
- With optional Parquet / Polars support::
114
-
115
- $ pip install "dccd[io]"
116
-
117
113
  With autonomous daemon support (APScheduler + PyYAML)::
118
114
 
119
115
  $ pip install "dccd[daemon]"
@@ -173,14 +169,14 @@ Output formats
173
169
  --------------
174
170
 
175
171
  Historical data can be saved as **CSV**, **Excel** (``.xlsx``), **SQLite**,
176
- **PostgreSQL** (via SQLAlchemy), or **Parquet** (requires ``dccd[io]``).
177
- Parquet files can be read back as either a ``pandas.DataFrame`` or a
178
- ``polars.DataFrame``.
172
+ **PostgreSQL** (via SQLAlchemy), or **Parquet**.
173
+ All DataFrames are native ``polars.DataFrame``. A ``pandas.DataFrame`` can be
174
+ obtained via ``get_data(format='pandas')``.
179
175
 
180
176
  Quick start
181
177
  ===========
182
178
 
183
- Historical data (pandas):
179
+ Historical data:
184
180
 
185
181
  .. code-block:: python
186
182
 
@@ -189,13 +185,8 @@ Historical data (pandas):
189
185
  obj = FromBinance('/path/to/data/', 'BTC', 3600, fiat='USDT')
190
186
  obj.import_data(start='2024-01-01 00:00:00', end='2024-12-31 00:00:00')
191
187
  obj.save(form='parquet')
192
- df = obj.get_data() # pandas DataFrame
193
-
194
- Polars output:
195
-
196
- .. code-block:: python
197
-
198
- df_pl = obj.get_data(format='polars')
188
+ df = obj.get_data() # polars DataFrame (default)
189
+ df_pd = obj.get_data(format='pandas') # pandas DataFrame (optional)
199
190
 
200
191
  Incremental update (resume from last saved point):
201
192
 
@@ -222,7 +213,7 @@ Trades (historical or recent):
222
213
  obj = FromBinance('/path/', 'BTC', 3600, fiat='USDT')
223
214
  obj.import_trades(start='2024-01-01 00:00:00', end='2024-01-02 00:00:00')
224
215
  obj.save_trades(form='csv')
225
- df = obj.trades_df # pandas DataFrame — columns: timestamp, price, amount, type, tid
216
+ df = obj.trades_df # polars DataFrame — columns: TS, price, amount, type, tid
226
217
 
227
218
  # Kraken also supports full history; Bybit/Coinbase return recent-only snapshots
228
219
  FromKraken('/path/', 'BTC', 3600).import_trades(start='2024-01-01', end='2024-01-02').save_trades()
@@ -286,6 +277,16 @@ CLI quick start:
286
277
  # Continuous daemon (Ctrl-C to stop)
287
278
  dccd start --config config.yml
288
279
 
280
+ # Add / remove a histo job in-place
281
+ dccd add --exchange kraken --pair ETH/USD --span 86400 --config config.yml
282
+ dccd remove --exchange kraken --pair ETH/USD --span 86400 --config config.yml
283
+
284
+ # Inspect all data on disk (OHLC, trades, orderbook)
285
+ dccd inventory --config config.yml
286
+
287
+ Note: ``--config`` is optional — dccd searches ``./config.yml`` then
288
+ ``~/.config/dccd/config.yml`` when omitted.
289
+
289
290
  Python API:
290
291
 
291
292
  .. code-block:: python
@@ -48,10 +48,6 @@ From pip::
48
48
 
49
49
  $ pip install dccd
50
50
 
51
- With optional Parquet / Polars support::
52
-
53
- $ pip install "dccd[io]"
54
-
55
51
  With autonomous daemon support (APScheduler + PyYAML)::
56
52
 
57
53
  $ pip install "dccd[daemon]"
@@ -111,14 +107,14 @@ Output formats
111
107
  --------------
112
108
 
113
109
  Historical data can be saved as **CSV**, **Excel** (``.xlsx``), **SQLite**,
114
- **PostgreSQL** (via SQLAlchemy), or **Parquet** (requires ``dccd[io]``).
115
- Parquet files can be read back as either a ``pandas.DataFrame`` or a
116
- ``polars.DataFrame``.
110
+ **PostgreSQL** (via SQLAlchemy), or **Parquet**.
111
+ All DataFrames are native ``polars.DataFrame``. A ``pandas.DataFrame`` can be
112
+ obtained via ``get_data(format='pandas')``.
117
113
 
118
114
  Quick start
119
115
  ===========
120
116
 
121
- Historical data (pandas):
117
+ Historical data:
122
118
 
123
119
  .. code-block:: python
124
120
 
@@ -127,13 +123,8 @@ Historical data (pandas):
127
123
  obj = FromBinance('/path/to/data/', 'BTC', 3600, fiat='USDT')
128
124
  obj.import_data(start='2024-01-01 00:00:00', end='2024-12-31 00:00:00')
129
125
  obj.save(form='parquet')
130
- df = obj.get_data() # pandas DataFrame
131
-
132
- Polars output:
133
-
134
- .. code-block:: python
135
-
136
- df_pl = obj.get_data(format='polars')
126
+ df = obj.get_data() # polars DataFrame (default)
127
+ df_pd = obj.get_data(format='pandas') # pandas DataFrame (optional)
137
128
 
138
129
  Incremental update (resume from last saved point):
139
130
 
@@ -160,7 +151,7 @@ Trades (historical or recent):
160
151
  obj = FromBinance('/path/', 'BTC', 3600, fiat='USDT')
161
152
  obj.import_trades(start='2024-01-01 00:00:00', end='2024-01-02 00:00:00')
162
153
  obj.save_trades(form='csv')
163
- df = obj.trades_df # pandas DataFrame — columns: timestamp, price, amount, type, tid
154
+ df = obj.trades_df # polars DataFrame — columns: TS, price, amount, type, tid
164
155
 
165
156
  # Kraken also supports full history; Bybit/Coinbase return recent-only snapshots
166
157
  FromKraken('/path/', 'BTC', 3600).import_trades(start='2024-01-01', end='2024-01-02').save_trades()
@@ -224,6 +215,16 @@ CLI quick start:
224
215
  # Continuous daemon (Ctrl-C to stop)
225
216
  dccd start --config config.yml
226
217
 
218
+ # Add / remove a histo job in-place
219
+ dccd add --exchange kraken --pair ETH/USD --span 86400 --config config.yml
220
+ dccd remove --exchange kraken --pair ETH/USD --span 86400 --config config.yml
221
+
222
+ # Inspect all data on disk (OHLC, trades, orderbook)
223
+ dccd inventory --config config.yml
224
+
225
+ Note: ``--config`` is optional — dccd searches ``./config.yml`` then
226
+ ``~/.config/dccd/config.yml`` when omitted.
227
+
227
228
  Python API:
228
229
 
229
230
  .. code-block:: python
@@ -286,7 +286,7 @@ def get_data_bitfinex(channel: str, process_func: Any, process_params: dict[str,
286
286
  'Oracle', 'MSSQL', 'MySQL'},
287
287
  It will create an IODataBase object to save/update the database in the
288
288
  specified format `save_method`, default is 'DataFrame' it save as
289
- binary pd.DataFrame object. More informations are available into
289
+ binary pl.DataFrame object. More informations are available into
290
290
  :mod:`dccd.tools.io`.
291
291
  io_params : dict, optional
292
292
  Dictionary of the keyword arguments available to the
@@ -37,9 +37,10 @@ from __future__ import annotations
37
37
 
38
38
  import time as time_mod
39
39
  from abc import ABC, abstractmethod
40
+ from datetime import datetime, timezone
40
41
  from typing import TYPE_CHECKING
41
42
 
42
- import pandas as pd
43
+ import polars as pl
43
44
  from tqdm import tqdm
44
45
 
45
46
  from dccd.tools.date_time import date_to_TS
@@ -92,6 +93,12 @@ class _BackfillBase(ABC):
92
93
  form : str
93
94
  Output format (accepted for backward compatibility; storage is
94
95
  always Parquet via :class:`~dccd.storage.DataStore`).
96
+ max_retries : int, optional
97
+ Maximum fetch attempts per window before skipping it. Default 3.
98
+ retry_delay : float, optional
99
+ Base delay in seconds between retries. Actual delay is
100
+ ``retry_delay * 2 ** (attempt - 1)`` (exponential back-off).
101
+ Default 2.0.
95
102
 
96
103
  """
97
104
 
@@ -100,10 +107,14 @@ class _BackfillBase(ABC):
100
107
  obj: ImportDataCryptoCurrencies,
101
108
  sleep: float,
102
109
  form: str,
110
+ max_retries: int = 3,
111
+ retry_delay: float = 2.0,
103
112
  ) -> None:
104
113
  self.obj = obj
105
114
  self.sleep = sleep
106
115
  self.form = form
116
+ self.max_retries = max_retries
117
+ self.retry_delay = retry_delay
107
118
  cls_name = type(obj).__name__[4:] # strip leading 'From'
108
119
  self.label = f'{cls_name:8s} {obj.crypto}/{obj.fiat}'
109
120
 
@@ -184,9 +195,15 @@ class _BackfillBase(ABC):
184
195
  tqdm.write(f'[{self.label}] already up to date')
185
196
  return
186
197
 
187
- total = sum(max(1, (e - s) // self.window_size + 1) for s, e in intervals)
198
+ def _iso(ts: int) -> str:
199
+ return datetime.fromtimestamp(ts, tz=timezone.utc).strftime('%Y-%m-%d')
200
+
201
+ total = sum(max(1, (e - s) // self.window_size + 1) for s, e in intervals)
202
+ end_date_str = _iso(intervals[-1][1])
188
203
  bar = tqdm(
189
- total=total, desc=self.label, unit='win',
204
+ total=total,
205
+ desc=f'{self.label} {_iso(intervals[0][0])} → {end_date_str}',
206
+ unit='',
190
207
  position=position, leave=True, dynamic_ncols=True,
191
208
  )
192
209
 
@@ -200,22 +217,23 @@ class _BackfillBase(ABC):
200
217
 
201
218
  last_exc: Exception | None = None
202
219
  n = 0
203
- for attempt in range(1, _MAX_RETRIES + 1):
220
+ for attempt in range(1, self.max_retries + 1):
204
221
  try:
205
222
  n = self._fetch_window(current, end)
206
223
  break
207
224
  except Exception as exc:
208
225
  last_exc = exc
209
- if attempt < _MAX_RETRIES:
226
+ if attempt < self.max_retries:
227
+ delay = self.retry_delay * (2 ** (attempt - 1))
210
228
  tqdm.write(
211
- f'[{self.label}] attempt {attempt}/{_MAX_RETRIES} '
212
- f'failed: {exc} — retrying in 2s'
229
+ f'[{self.label}] attempt {attempt}/{self.max_retries} '
230
+ f'failed: {exc} — retrying in {delay:.1f}s'
213
231
  )
214
- time_mod.sleep(2)
232
+ time_mod.sleep(delay)
215
233
  else:
216
234
  tqdm.write(
217
235
  f'[{self.label}] window {current} failed after '
218
- f'{_MAX_RETRIES} attempts: {last_exc} — skipping'
236
+ f'{self.max_retries} attempts: {last_exc} — skipping'
219
237
  )
220
238
  skipped += 1
221
239
  current += self.window_size
@@ -227,8 +245,10 @@ class _BackfillBase(ABC):
227
245
  self.obj.save()
228
246
  n_candles += n
229
247
 
248
+ current_date = _iso(current)
230
249
  current = self._advance(current, end)
231
250
  bar.update(1)
251
+ bar.set_description(f'{self.label} {current_date} → {end_date_str}')
232
252
  bar.set_postfix(candles=n_candles, skipped=skipped)
233
253
  time_mod.sleep(self.sleep)
234
254
 
@@ -256,6 +276,10 @@ class OHLCBackfill(_BackfillBase):
256
276
  Seconds to wait between requests.
257
277
  form : str
258
278
  Output format (accepted for backward compatibility).
279
+ max_retries : int, optional
280
+ See :class:`_BackfillBase`. Default 3.
281
+ retry_delay : float, optional
282
+ See :class:`_BackfillBase`. Default 2.0.
259
283
 
260
284
  """
261
285
 
@@ -265,8 +289,10 @@ class OHLCBackfill(_BackfillBase):
265
289
  max_candles: int,
266
290
  sleep: float,
267
291
  form: str,
292
+ max_retries: int = 3,
293
+ retry_delay: float = 2.0,
268
294
  ) -> None:
269
- super().__init__(obj, sleep, form)
295
+ super().__init__(obj, sleep, form, max_retries=max_retries, retry_delay=retry_delay)
270
296
  self.max_candles = max_candles
271
297
 
272
298
  @property
@@ -275,7 +301,7 @@ class OHLCBackfill(_BackfillBase):
275
301
 
276
302
  def _fetch_window(self, current: int, end: int) -> int:
277
303
  self.obj.import_data(start=current, end=end)
278
- if self.obj.df is None or self.obj.df.empty:
304
+ if self.obj.df is None or len(self.obj.df) == 0:
279
305
  return 0
280
306
  self._warn_if_suspicious(current, end)
281
307
  return len(self.obj.df)
@@ -439,28 +465,29 @@ class KrakenBackfill(_BackfillBase):
439
465
  return []
440
466
 
441
467
  span = self.obj.span
442
- df = pd.DataFrame(trades)
443
- df['ts'] = pd.to_datetime(df['timestamp'], unit='s', utc=True)
444
- df = df.set_index('ts').sort_index()
445
-
446
- freq = f'{span}s'
447
- origin = pd.Timestamp(start_ts, unit='s', tz='UTC')
448
- resample_kw = {'closed': 'left', 'label': 'left', 'origin': origin}
449
- ohlc = df['price'].resample(freq, **resample_kw).ohlc()
450
- vol = df['amount'].resample(freq, **resample_kw).sum()
451
- qvol = (df['price'] * df['amount']).resample(freq, **resample_kw).sum()
452
-
468
+ df = (
469
+ pl.DataFrame(trades)
470
+ .with_columns(pl.from_epoch('timestamp', time_unit='s').alias('ts'))
471
+ .sort('ts')
472
+ )
453
473
  result = (
454
- ohlc.join(vol.rename('volume')).join(qvol.rename('quoteVolume'))
455
- .loc[
456
- pd.Timestamp(start_ts, unit='s', tz='UTC'):
457
- pd.Timestamp(end_ts - 1, unit='s', tz='UTC'),
458
- ]
459
- .dropna(subset=['open'])
474
+ df.group_by_dynamic('ts', every=f'{span}s', closed='left', start_by='datapoint')
475
+ .agg(
476
+ pl.col('price').first().alias('open'),
477
+ pl.col('price').max().alias('high'),
478
+ pl.col('price').min().alias('low'),
479
+ pl.col('price').last().alias('close'),
480
+ pl.col('amount').sum().alias('volume'),
481
+ (pl.col('price') * pl.col('amount')).sum().alias('quoteVolume'),
482
+ )
483
+ .filter(
484
+ (pl.col('ts').dt.epoch(time_unit='s') >= start_ts)
485
+ & (pl.col('ts').dt.epoch(time_unit='s') < end_ts)
486
+ )
460
487
  )
461
488
 
462
489
  return [{
463
- 'date': round(row_ts.timestamp()),
490
+ 'date': int(row['ts'].timestamp()),
464
491
  'open': float(row['open']),
465
492
  'high': float(row['high']),
466
493
  'low': float(row['low']),
@@ -471,7 +498,7 @@ class KrakenBackfill(_BackfillBase):
471
498
  float(row['quoteVolume'] / row['volume'])
472
499
  if row['volume'] > 0 else float(row['close'])
473
500
  ),
474
- } for row_ts, row in result.iterrows()]
501
+ } for row in result.iter_rows(named=True)]
475
502
 
476
503
 
477
504
  # ---------------------------------------------------------------------------
@@ -486,6 +513,8 @@ def make_job(
486
513
  path: str,
487
514
  tz: str,
488
515
  form: str,
516
+ max_retries: int = 3,
517
+ retry_delay: float = 2.0,
489
518
  ) -> _BackfillBase:
490
519
  """Build the appropriate backfill strategy for an (exchange, pair).
491
520
 
@@ -532,13 +561,16 @@ def make_job(
532
561
  obj = cls(path, crypto, span, fiat, form=form, tz=tz)
533
562
 
534
563
  if exchange == _KRAKEN_EXCHANGE:
535
- return KrakenBackfill(obj, sleep=sleep, form=form)
564
+ return KrakenBackfill(obj, sleep=sleep, form=form,
565
+ max_retries=max_retries, retry_delay=retry_delay)
536
566
 
537
567
  return OHLCBackfill(
538
568
  obj,
539
569
  max_candles=defaults['max_candles'],
540
570
  sleep=sleep,
541
571
  form=form,
572
+ max_retries=max_retries,
573
+ retry_delay=retry_delay,
542
574
  )
543
575
 
544
576
 
@@ -587,6 +619,8 @@ def run_backfill(
587
619
  job = make_job(
588
620
  histo_job.exchange, crypto, fiat, histo_job.span,
589
621
  path, tz, histo_job.format,
622
+ max_retries=histo_job.max_retries,
623
+ retry_delay=histo_job.retry_delay,
590
624
  )
591
625
  if job.obj.full_path in seen_paths:
592
626
  tqdm.write(