dccd 2.3.1__tar.gz → 2.3.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dccd-2.3.1 → dccd-2.3.2}/CHANGELOG.md +19 -1
- {dccd-2.3.1 → dccd-2.3.2}/PKG-INFO +21 -20
- {dccd-2.3.1 → dccd-2.3.2}/README.rst +17 -16
- {dccd-2.3.1 → dccd-2.3.2}/dccd/continuous_dl/bitfinex.py +1 -1
- {dccd-2.3.1 → dccd-2.3.2}/dccd/daemon/backfill.py +65 -31
- {dccd-2.3.1 → dccd-2.3.2}/dccd/daemon/cli.py +211 -27
- {dccd-2.3.1 → dccd-2.3.2}/dccd/daemon/config.py +47 -0
- {dccd-2.3.1 → dccd-2.3.2}/dccd/daemon/stream_manager.py +3 -4
- {dccd-2.3.1 → dccd-2.3.2}/dccd/histo_dl/binance.py +1 -1
- {dccd-2.3.1 → dccd-2.3.2}/dccd/histo_dl/bybit.py +1 -1
- {dccd-2.3.1 → dccd-2.3.2}/dccd/histo_dl/coinbase.py +1 -1
- {dccd-2.3.1 → dccd-2.3.2}/dccd/histo_dl/exchange.py +43 -49
- {dccd-2.3.1 → dccd-2.3.2}/dccd/histo_dl/okx.py +1 -1
- dccd-2.3.2/dccd/process_data.py +139 -0
- {dccd-2.3.1 → dccd-2.3.2}/dccd/storage.py +33 -36
- {dccd-2.3.1 → dccd-2.3.2}/dccd/tests/test_backfill.py +48 -0
- dccd-2.3.2/dccd/tests/test_daemon_cli.py +358 -0
- {dccd-2.3.1 → dccd-2.3.2}/dccd/tests/test_daemon_config.py +48 -0
- {dccd-2.3.1 → dccd-2.3.2}/dccd/tests/test_histo_dl.py +13 -5
- {dccd-2.3.1 → dccd-2.3.2}/dccd/tests/test_io.py +26 -26
- {dccd-2.3.1 → dccd-2.3.2}/dccd/tests/test_process_data.py +19 -17
- {dccd-2.3.1 → dccd-2.3.2}/dccd/tests/test_storage.py +12 -11
- dccd-2.3.2/dccd/tools/io.py +240 -0
- {dccd-2.3.1 → dccd-2.3.2}/dccd.egg-info/PKG-INFO +21 -20
- {dccd-2.3.1 → dccd-2.3.2}/dccd.egg-info/requires.txt +3 -3
- {dccd-2.3.1 → dccd-2.3.2}/pyproject.toml +5 -4
- dccd-2.3.1/dccd/process_data.py +0 -166
- dccd-2.3.1/dccd/tests/test_daemon_cli.py +0 -108
- dccd-2.3.1/dccd/tools/io.py +0 -495
- {dccd-2.3.1 → dccd-2.3.2}/CONTRIBUTING.md +0 -0
- {dccd-2.3.1 → dccd-2.3.2}/LICENSE.txt +0 -0
- {dccd-2.3.1 → dccd-2.3.2}/MANIFEST.in +0 -0
- {dccd-2.3.1 → dccd-2.3.2}/dccd/__init__.py +0 -0
- {dccd-2.3.1 → dccd-2.3.2}/dccd/continuous_dl/__init__.py +0 -0
- {dccd-2.3.1 → dccd-2.3.2}/dccd/continuous_dl/binance.py +0 -0
- {dccd-2.3.1 → dccd-2.3.2}/dccd/continuous_dl/bitmex.py +0 -0
- {dccd-2.3.1 → dccd-2.3.2}/dccd/continuous_dl/bybit.py +0 -0
- {dccd-2.3.1 → dccd-2.3.2}/dccd/continuous_dl/exchange.py +0 -0
- {dccd-2.3.1 → dccd-2.3.2}/dccd/continuous_dl/kraken.py +0 -0
- {dccd-2.3.1 → dccd-2.3.2}/dccd/continuous_dl/okx.py +0 -0
- {dccd-2.3.1 → dccd-2.3.2}/dccd/daemon/__init__.py +0 -0
- {dccd-2.3.1 → dccd-2.3.2}/dccd/daemon/health.py +0 -0
- {dccd-2.3.1 → dccd-2.3.2}/dccd/daemon/scheduler.py +0 -0
- {dccd-2.3.1 → dccd-2.3.2}/dccd/daemon/storage.py +0 -0
- {dccd-2.3.1 → dccd-2.3.2}/dccd/histo_dl/__init__.py +0 -0
- {dccd-2.3.1 → dccd-2.3.2}/dccd/histo_dl/kraken.py +0 -0
- {dccd-2.3.1 → dccd-2.3.2}/dccd/models.py +0 -0
- {dccd-2.3.1 → dccd-2.3.2}/dccd/tests/__init__.py +0 -0
- {dccd-2.3.1 → dccd-2.3.2}/dccd/tests/conftest.py +0 -0
- {dccd-2.3.1 → dccd-2.3.2}/dccd/tests/test_binance.py +0 -0
- {dccd-2.3.1 → dccd-2.3.2}/dccd/tests/test_binance_ws.py +0 -0
- {dccd-2.3.1 → dccd-2.3.2}/dccd/tests/test_bitfinex.py +0 -0
- {dccd-2.3.1 → dccd-2.3.2}/dccd/tests/test_bitmex.py +0 -0
- {dccd-2.3.1 → dccd-2.3.2}/dccd/tests/test_bybit.py +0 -0
- {dccd-2.3.1 → dccd-2.3.2}/dccd/tests/test_bybit_ws.py +0 -0
- {dccd-2.3.1 → dccd-2.3.2}/dccd/tests/test_coinbase.py +0 -0
- {dccd-2.3.1 → dccd-2.3.2}/dccd/tests/test_daemon_health.py +0 -0
- {dccd-2.3.1 → dccd-2.3.2}/dccd/tests/test_daemon_scheduler.py +0 -0
- {dccd-2.3.1 → dccd-2.3.2}/dccd/tests/test_daemon_storage.py +0 -0
- {dccd-2.3.1 → dccd-2.3.2}/dccd/tests/test_daemon_stream_manager.py +0 -0
- {dccd-2.3.1 → dccd-2.3.2}/dccd/tests/test_date_time.py +0 -0
- {dccd-2.3.1 → dccd-2.3.2}/dccd/tests/test_kraken.py +0 -0
- {dccd-2.3.1 → dccd-2.3.2}/dccd/tests/test_kraken_ws.py +0 -0
- {dccd-2.3.1 → dccd-2.3.2}/dccd/tests/test_models.py +0 -0
- {dccd-2.3.1 → dccd-2.3.2}/dccd/tests/test_okx.py +0 -0
- {dccd-2.3.1 → dccd-2.3.2}/dccd/tests/test_okx_ws.py +0 -0
- {dccd-2.3.1 → dccd-2.3.2}/dccd/tests/test_websocket.py +0 -0
- {dccd-2.3.1 → dccd-2.3.2}/dccd/tools/__init__.py +0 -0
- {dccd-2.3.1 → dccd-2.3.2}/dccd/tools/date_time.py +0 -0
- {dccd-2.3.1 → dccd-2.3.2}/dccd/tools/websocket.py +0 -0
- {dccd-2.3.1 → dccd-2.3.2}/dccd.egg-info/SOURCES.txt +0 -0
- {dccd-2.3.1 → dccd-2.3.2}/dccd.egg-info/dependency_links.txt +0 -0
- {dccd-2.3.1 → dccd-2.3.2}/dccd.egg-info/entry_points.txt +0 -0
- {dccd-2.3.1 → dccd-2.3.2}/dccd.egg-info/top_level.txt +0 -0
- {dccd-2.3.1 → dccd-2.3.2}/setup.cfg +0 -0
|
@@ -6,10 +6,28 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
6
6
|
|
|
7
7
|
## [Unreleased]
|
|
8
8
|
|
|
9
|
-
## [2.3.
|
|
9
|
+
## [2.3.2] - 2026-05-25
|
|
10
|
+
|
|
11
|
+
### Added
|
|
12
|
+
|
|
13
|
+
- `dccd/daemon/cli.py` — `dccd status --json`: emit raw metrics as a JSON object on stdout, suitable for piping into Grafana / jq (#53)
|
|
14
|
+
- `dccd/daemon/config.py` — `HistoJob.max_retries` (int, 1–10, default 3) and `HistoJob.retry_delay` (float ≥ 0, default 2.0): per-job retry configuration for transient network errors; delay is exponential (`retry_delay * 2^(attempt-1)`) (#53)
|
|
15
|
+
- `dccd/daemon/config.py` — `resolve_config_path()` and `DEFAULT_CONFIG_PATH`: CLI commands now fall back to `$XDG_CONFIG_HOME/dccd/config.yml` (default `~/.config/dccd/config.yml`) when no `--config` option is provided and `./config.yml` does not exist (#49)
|
|
16
|
+
- `dccd/daemon/cli.py` — `dccd inventory`: scans `data_path` and prints a table of all stored OHLC, trades, and orderbook data with date range, row count, and gap count per series; uses Polars for fast columnar reads (#50)
|
|
17
|
+
- `dccd/daemon/cli.py` — `dccd remove --exchange X --pair Y --span N`: removes a pair from a histo_job (or the whole job if it was the last pair) and re-validates the config before writing (#50)
|
|
18
|
+
|
|
19
|
+
### Changed
|
|
20
|
+
|
|
21
|
+
- `dccd/storage.py`, `dccd/histo_dl/exchange.py`, `dccd/daemon/backfill.py`, `dccd/process_data.py`, `dccd/daemon/stream_manager.py` — replace pandas with polars throughout; `DataStore.save/load` accept/return `pl.DataFrame`; `get_data()` defaults to `format='polars'`; `set_marketdepth` returns a flat long-format `pl.DataFrame`; stream savers write parquet via `DataStore`; `pandas` removed from core dependencies (#52)
|
|
22
|
+
- `dccd/daemon/backfill.py` — backfill progress bar now shows the current window date (`YYYY-MM-DD → YYYY-MM-DD`) instead of a raw window count (`n win`); makes it easy to see which period is being downloaded at a glance (#48)
|
|
10
23
|
|
|
11
24
|
### Fixed
|
|
12
25
|
|
|
26
|
+
- `dccd/histo_dl/exchange.py` — `_sort_data` no longer raises `ColumnNotFoundError: "date"` when the exchange API returns an empty candle list; the polars migration (PR #52) had re-introduced a variant of the empty-data crash from v2.3.1; now returns early with an empty `self.df` (#54)
|
|
27
|
+
|
|
28
|
+
## [2.3.1] - 2026-05-24
|
|
29
|
+
|
|
30
|
+
### Fixed
|
|
13
31
|
- `dccd/storage.py` — `DataStore.missing_intervals` now detects the gap **before** the first saved row when the requested `start` predates `file_min`; previously only the trailing gap (after `file_max`) was returned, causing `dccd backfill --start <early-date>` to silently skip all historical data before the first existing candle (#46)
|
|
14
32
|
- `dccd/histo_dl/coinbase.py` — raise `RuntimeError` when Coinbase returns HTTP 200 with a JSON dict (e.g. `{"message": "..."}` for near-future windows) instead of silently iterating dict keys and crashing with `ValueError` (#45)
|
|
15
33
|
- `dccd/histo_dl/coinbase.py` — additional guard: raise `RuntimeError` when Coinbase returns a JSON list whose first element is not itself a list/tuple (e.g. `["message"]`); previously caused `float("m")` `ValueError` (#45)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dccd
|
|
3
|
-
Version: 2.3.
|
|
3
|
+
Version: 2.3.2
|
|
4
4
|
Summary: Download Crypto Currency Data from different exchanges.
|
|
5
5
|
Author-email: Arthur Bernard <arthur.bernard.92@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -22,7 +22,8 @@ Requires-Python: >=3.10
|
|
|
22
22
|
Description-Content-Type: text/x-rst
|
|
23
23
|
License-File: LICENSE.txt
|
|
24
24
|
Requires-Dist: numpy>=1.26
|
|
25
|
-
Requires-Dist:
|
|
25
|
+
Requires-Dist: polars>=0.20
|
|
26
|
+
Requires-Dist: pyarrow>=13
|
|
26
27
|
Requires-Dist: requests>=2.28
|
|
27
28
|
Requires-Dist: openpyxl>=3.1
|
|
28
29
|
Requires-Dist: websockets>=12.0
|
|
@@ -31,8 +32,8 @@ Requires-Dist: SQLAlchemy>=2.0
|
|
|
31
32
|
Requires-Dist: tenacity>=8.0
|
|
32
33
|
Requires-Dist: pydantic>=2.0
|
|
33
34
|
Provides-Extra: io
|
|
34
|
-
Requires-Dist: pyarrow>=13; extra == "io"
|
|
35
35
|
Requires-Dist: polars>=0.20; extra == "io"
|
|
36
|
+
Requires-Dist: pyarrow>=13; extra == "io"
|
|
36
37
|
Provides-Extra: daemon
|
|
37
38
|
Requires-Dist: pyyaml>=6.0; extra == "daemon"
|
|
38
39
|
Requires-Dist: apscheduler<4,>=3.10; extra == "daemon"
|
|
@@ -45,7 +46,6 @@ Requires-Dist: pytest-cov>=4.1; extra == "dev"
|
|
|
45
46
|
Requires-Dist: ruff>=0.4; extra == "dev"
|
|
46
47
|
Requires-Dist: interrogate>=1.5; extra == "dev"
|
|
47
48
|
Requires-Dist: mypy>=1.0; extra == "dev"
|
|
48
|
-
Requires-Dist: pandas-stubs>=2.0; extra == "dev"
|
|
49
49
|
Requires-Dist: pyyaml>=6.0; extra == "dev"
|
|
50
50
|
Requires-Dist: apscheduler<4,>=3.10; extra == "dev"
|
|
51
51
|
Requires-Dist: typer>=0.12; extra == "dev"
|
|
@@ -110,10 +110,6 @@ From pip::
|
|
|
110
110
|
|
|
111
111
|
$ pip install dccd
|
|
112
112
|
|
|
113
|
-
With optional Parquet / Polars support::
|
|
114
|
-
|
|
115
|
-
$ pip install "dccd[io]"
|
|
116
|
-
|
|
117
113
|
With autonomous daemon support (APScheduler + PyYAML)::
|
|
118
114
|
|
|
119
115
|
$ pip install "dccd[daemon]"
|
|
@@ -173,14 +169,14 @@ Output formats
|
|
|
173
169
|
--------------
|
|
174
170
|
|
|
175
171
|
Historical data can be saved as **CSV**, **Excel** (``.xlsx``), **SQLite**,
|
|
176
|
-
**PostgreSQL** (via SQLAlchemy), or **Parquet
|
|
177
|
-
|
|
178
|
-
``
|
|
172
|
+
**PostgreSQL** (via SQLAlchemy), or **Parquet**.
|
|
173
|
+
All DataFrames are native ``polars.DataFrame``. A ``pandas.DataFrame`` can be
|
|
174
|
+
obtained via ``get_data(format='pandas')``.
|
|
179
175
|
|
|
180
176
|
Quick start
|
|
181
177
|
===========
|
|
182
178
|
|
|
183
|
-
Historical data
|
|
179
|
+
Historical data:
|
|
184
180
|
|
|
185
181
|
.. code-block:: python
|
|
186
182
|
|
|
@@ -189,13 +185,8 @@ Historical data (pandas):
|
|
|
189
185
|
obj = FromBinance('/path/to/data/', 'BTC', 3600, fiat='USDT')
|
|
190
186
|
obj.import_data(start='2024-01-01 00:00:00', end='2024-12-31 00:00:00')
|
|
191
187
|
obj.save(form='parquet')
|
|
192
|
-
df = obj.get_data()
|
|
193
|
-
|
|
194
|
-
Polars output:
|
|
195
|
-
|
|
196
|
-
.. code-block:: python
|
|
197
|
-
|
|
198
|
-
df_pl = obj.get_data(format='polars')
|
|
188
|
+
df = obj.get_data() # polars DataFrame (default)
|
|
189
|
+
df_pd = obj.get_data(format='pandas') # pandas DataFrame (optional)
|
|
199
190
|
|
|
200
191
|
Incremental update (resume from last saved point):
|
|
201
192
|
|
|
@@ -222,7 +213,7 @@ Trades (historical or recent):
|
|
|
222
213
|
obj = FromBinance('/path/', 'BTC', 3600, fiat='USDT')
|
|
223
214
|
obj.import_trades(start='2024-01-01 00:00:00', end='2024-01-02 00:00:00')
|
|
224
215
|
obj.save_trades(form='csv')
|
|
225
|
-
df = obj.trades_df #
|
|
216
|
+
df = obj.trades_df # polars DataFrame — columns: TS, price, amount, type, tid
|
|
226
217
|
|
|
227
218
|
# Kraken also supports full history; Bybit/Coinbase return recent-only snapshots
|
|
228
219
|
FromKraken('/path/', 'BTC', 3600).import_trades(start='2024-01-01', end='2024-01-02').save_trades()
|
|
@@ -286,6 +277,16 @@ CLI quick start:
|
|
|
286
277
|
# Continuous daemon (Ctrl-C to stop)
|
|
287
278
|
dccd start --config config.yml
|
|
288
279
|
|
|
280
|
+
# Add / remove a histo job in-place
|
|
281
|
+
dccd add --exchange kraken --pair ETH/USD --span 86400 --config config.yml
|
|
282
|
+
dccd remove --exchange kraken --pair ETH/USD --span 86400 --config config.yml
|
|
283
|
+
|
|
284
|
+
# Inspect all data on disk (OHLC, trades, orderbook)
|
|
285
|
+
dccd inventory --config config.yml
|
|
286
|
+
|
|
287
|
+
Note: ``--config`` is optional — dccd searches ``./config.yml`` then
|
|
288
|
+
``~/.config/dccd/config.yml`` when omitted.
|
|
289
|
+
|
|
289
290
|
Python API:
|
|
290
291
|
|
|
291
292
|
.. code-block:: python
|
|
@@ -48,10 +48,6 @@ From pip::
|
|
|
48
48
|
|
|
49
49
|
$ pip install dccd
|
|
50
50
|
|
|
51
|
-
With optional Parquet / Polars support::
|
|
52
|
-
|
|
53
|
-
$ pip install "dccd[io]"
|
|
54
|
-
|
|
55
51
|
With autonomous daemon support (APScheduler + PyYAML)::
|
|
56
52
|
|
|
57
53
|
$ pip install "dccd[daemon]"
|
|
@@ -111,14 +107,14 @@ Output formats
|
|
|
111
107
|
--------------
|
|
112
108
|
|
|
113
109
|
Historical data can be saved as **CSV**, **Excel** (``.xlsx``), **SQLite**,
|
|
114
|
-
**PostgreSQL** (via SQLAlchemy), or **Parquet
|
|
115
|
-
|
|
116
|
-
``
|
|
110
|
+
**PostgreSQL** (via SQLAlchemy), or **Parquet**.
|
|
111
|
+
All DataFrames are native ``polars.DataFrame``. A ``pandas.DataFrame`` can be
|
|
112
|
+
obtained via ``get_data(format='pandas')``.
|
|
117
113
|
|
|
118
114
|
Quick start
|
|
119
115
|
===========
|
|
120
116
|
|
|
121
|
-
Historical data
|
|
117
|
+
Historical data:
|
|
122
118
|
|
|
123
119
|
.. code-block:: python
|
|
124
120
|
|
|
@@ -127,13 +123,8 @@ Historical data (pandas):
|
|
|
127
123
|
obj = FromBinance('/path/to/data/', 'BTC', 3600, fiat='USDT')
|
|
128
124
|
obj.import_data(start='2024-01-01 00:00:00', end='2024-12-31 00:00:00')
|
|
129
125
|
obj.save(form='parquet')
|
|
130
|
-
df = obj.get_data()
|
|
131
|
-
|
|
132
|
-
Polars output:
|
|
133
|
-
|
|
134
|
-
.. code-block:: python
|
|
135
|
-
|
|
136
|
-
df_pl = obj.get_data(format='polars')
|
|
126
|
+
df = obj.get_data() # polars DataFrame (default)
|
|
127
|
+
df_pd = obj.get_data(format='pandas') # pandas DataFrame (optional)
|
|
137
128
|
|
|
138
129
|
Incremental update (resume from last saved point):
|
|
139
130
|
|
|
@@ -160,7 +151,7 @@ Trades (historical or recent):
|
|
|
160
151
|
obj = FromBinance('/path/', 'BTC', 3600, fiat='USDT')
|
|
161
152
|
obj.import_trades(start='2024-01-01 00:00:00', end='2024-01-02 00:00:00')
|
|
162
153
|
obj.save_trades(form='csv')
|
|
163
|
-
df = obj.trades_df #
|
|
154
|
+
df = obj.trades_df # polars DataFrame — columns: TS, price, amount, type, tid
|
|
164
155
|
|
|
165
156
|
# Kraken also supports full history; Bybit/Coinbase return recent-only snapshots
|
|
166
157
|
FromKraken('/path/', 'BTC', 3600).import_trades(start='2024-01-01', end='2024-01-02').save_trades()
|
|
@@ -224,6 +215,16 @@ CLI quick start:
|
|
|
224
215
|
# Continuous daemon (Ctrl-C to stop)
|
|
225
216
|
dccd start --config config.yml
|
|
226
217
|
|
|
218
|
+
# Add / remove a histo job in-place
|
|
219
|
+
dccd add --exchange kraken --pair ETH/USD --span 86400 --config config.yml
|
|
220
|
+
dccd remove --exchange kraken --pair ETH/USD --span 86400 --config config.yml
|
|
221
|
+
|
|
222
|
+
# Inspect all data on disk (OHLC, trades, orderbook)
|
|
223
|
+
dccd inventory --config config.yml
|
|
224
|
+
|
|
225
|
+
Note: ``--config`` is optional — dccd searches ``./config.yml`` then
|
|
226
|
+
``~/.config/dccd/config.yml`` when omitted.
|
|
227
|
+
|
|
227
228
|
Python API:
|
|
228
229
|
|
|
229
230
|
.. code-block:: python
|
|
@@ -286,7 +286,7 @@ def get_data_bitfinex(channel: str, process_func: Any, process_params: dict[str,
|
|
|
286
286
|
'Oracle', 'MSSQL', 'MySQL'},
|
|
287
287
|
It will create an IODataBase object to save/update the database in the
|
|
288
288
|
specified format `save_method`, default is 'DataFrame' it save as
|
|
289
|
-
binary
|
|
289
|
+
binary pl.DataFrame object. More informations are available into
|
|
290
290
|
:mod:`dccd.tools.io`.
|
|
291
291
|
io_params : dict, optional
|
|
292
292
|
Dictionary of the keyword arguments available to the
|
|
@@ -37,9 +37,10 @@ from __future__ import annotations
|
|
|
37
37
|
|
|
38
38
|
import time as time_mod
|
|
39
39
|
from abc import ABC, abstractmethod
|
|
40
|
+
from datetime import datetime, timezone
|
|
40
41
|
from typing import TYPE_CHECKING
|
|
41
42
|
|
|
42
|
-
import
|
|
43
|
+
import polars as pl
|
|
43
44
|
from tqdm import tqdm
|
|
44
45
|
|
|
45
46
|
from dccd.tools.date_time import date_to_TS
|
|
@@ -92,6 +93,12 @@ class _BackfillBase(ABC):
|
|
|
92
93
|
form : str
|
|
93
94
|
Output format (accepted for backward compatibility; storage is
|
|
94
95
|
always Parquet via :class:`~dccd.storage.DataStore`).
|
|
96
|
+
max_retries : int, optional
|
|
97
|
+
Maximum fetch attempts per window before skipping it. Default 3.
|
|
98
|
+
retry_delay : float, optional
|
|
99
|
+
Base delay in seconds between retries. Actual delay is
|
|
100
|
+
``retry_delay * 2 ** (attempt - 1)`` (exponential back-off).
|
|
101
|
+
Default 2.0.
|
|
95
102
|
|
|
96
103
|
"""
|
|
97
104
|
|
|
@@ -100,10 +107,14 @@ class _BackfillBase(ABC):
|
|
|
100
107
|
obj: ImportDataCryptoCurrencies,
|
|
101
108
|
sleep: float,
|
|
102
109
|
form: str,
|
|
110
|
+
max_retries: int = 3,
|
|
111
|
+
retry_delay: float = 2.0,
|
|
103
112
|
) -> None:
|
|
104
113
|
self.obj = obj
|
|
105
114
|
self.sleep = sleep
|
|
106
115
|
self.form = form
|
|
116
|
+
self.max_retries = max_retries
|
|
117
|
+
self.retry_delay = retry_delay
|
|
107
118
|
cls_name = type(obj).__name__[4:] # strip leading 'From'
|
|
108
119
|
self.label = f'{cls_name:8s} {obj.crypto}/{obj.fiat}'
|
|
109
120
|
|
|
@@ -184,9 +195,15 @@ class _BackfillBase(ABC):
|
|
|
184
195
|
tqdm.write(f'[{self.label}] already up to date')
|
|
185
196
|
return
|
|
186
197
|
|
|
187
|
-
|
|
198
|
+
def _iso(ts: int) -> str:
|
|
199
|
+
return datetime.fromtimestamp(ts, tz=timezone.utc).strftime('%Y-%m-%d')
|
|
200
|
+
|
|
201
|
+
total = sum(max(1, (e - s) // self.window_size + 1) for s, e in intervals)
|
|
202
|
+
end_date_str = _iso(intervals[-1][1])
|
|
188
203
|
bar = tqdm(
|
|
189
|
-
total=total,
|
|
204
|
+
total=total,
|
|
205
|
+
desc=f'{self.label} {_iso(intervals[0][0])} → {end_date_str}',
|
|
206
|
+
unit='',
|
|
190
207
|
position=position, leave=True, dynamic_ncols=True,
|
|
191
208
|
)
|
|
192
209
|
|
|
@@ -200,22 +217,23 @@ class _BackfillBase(ABC):
|
|
|
200
217
|
|
|
201
218
|
last_exc: Exception | None = None
|
|
202
219
|
n = 0
|
|
203
|
-
for attempt in range(1,
|
|
220
|
+
for attempt in range(1, self.max_retries + 1):
|
|
204
221
|
try:
|
|
205
222
|
n = self._fetch_window(current, end)
|
|
206
223
|
break
|
|
207
224
|
except Exception as exc:
|
|
208
225
|
last_exc = exc
|
|
209
|
-
if attempt <
|
|
226
|
+
if attempt < self.max_retries:
|
|
227
|
+
delay = self.retry_delay * (2 ** (attempt - 1))
|
|
210
228
|
tqdm.write(
|
|
211
|
-
f'[{self.label}] attempt {attempt}/{
|
|
212
|
-
f'failed: {exc} — retrying in
|
|
229
|
+
f'[{self.label}] attempt {attempt}/{self.max_retries} '
|
|
230
|
+
f'failed: {exc} — retrying in {delay:.1f}s'
|
|
213
231
|
)
|
|
214
|
-
time_mod.sleep(
|
|
232
|
+
time_mod.sleep(delay)
|
|
215
233
|
else:
|
|
216
234
|
tqdm.write(
|
|
217
235
|
f'[{self.label}] window {current} failed after '
|
|
218
|
-
f'{
|
|
236
|
+
f'{self.max_retries} attempts: {last_exc} — skipping'
|
|
219
237
|
)
|
|
220
238
|
skipped += 1
|
|
221
239
|
current += self.window_size
|
|
@@ -227,8 +245,10 @@ class _BackfillBase(ABC):
|
|
|
227
245
|
self.obj.save()
|
|
228
246
|
n_candles += n
|
|
229
247
|
|
|
248
|
+
current_date = _iso(current)
|
|
230
249
|
current = self._advance(current, end)
|
|
231
250
|
bar.update(1)
|
|
251
|
+
bar.set_description(f'{self.label} {current_date} → {end_date_str}')
|
|
232
252
|
bar.set_postfix(candles=n_candles, skipped=skipped)
|
|
233
253
|
time_mod.sleep(self.sleep)
|
|
234
254
|
|
|
@@ -256,6 +276,10 @@ class OHLCBackfill(_BackfillBase):
|
|
|
256
276
|
Seconds to wait between requests.
|
|
257
277
|
form : str
|
|
258
278
|
Output format (accepted for backward compatibility).
|
|
279
|
+
max_retries : int, optional
|
|
280
|
+
See :class:`_BackfillBase`. Default 3.
|
|
281
|
+
retry_delay : float, optional
|
|
282
|
+
See :class:`_BackfillBase`. Default 2.0.
|
|
259
283
|
|
|
260
284
|
"""
|
|
261
285
|
|
|
@@ -265,8 +289,10 @@ class OHLCBackfill(_BackfillBase):
|
|
|
265
289
|
max_candles: int,
|
|
266
290
|
sleep: float,
|
|
267
291
|
form: str,
|
|
292
|
+
max_retries: int = 3,
|
|
293
|
+
retry_delay: float = 2.0,
|
|
268
294
|
) -> None:
|
|
269
|
-
super().__init__(obj, sleep, form)
|
|
295
|
+
super().__init__(obj, sleep, form, max_retries=max_retries, retry_delay=retry_delay)
|
|
270
296
|
self.max_candles = max_candles
|
|
271
297
|
|
|
272
298
|
@property
|
|
@@ -275,7 +301,7 @@ class OHLCBackfill(_BackfillBase):
|
|
|
275
301
|
|
|
276
302
|
def _fetch_window(self, current: int, end: int) -> int:
|
|
277
303
|
self.obj.import_data(start=current, end=end)
|
|
278
|
-
if self.obj.df is None or self.obj.df
|
|
304
|
+
if self.obj.df is None or len(self.obj.df) == 0:
|
|
279
305
|
return 0
|
|
280
306
|
self._warn_if_suspicious(current, end)
|
|
281
307
|
return len(self.obj.df)
|
|
@@ -439,28 +465,29 @@ class KrakenBackfill(_BackfillBase):
|
|
|
439
465
|
return []
|
|
440
466
|
|
|
441
467
|
span = self.obj.span
|
|
442
|
-
df =
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
origin = pd.Timestamp(start_ts, unit='s', tz='UTC')
|
|
448
|
-
resample_kw = {'closed': 'left', 'label': 'left', 'origin': origin}
|
|
449
|
-
ohlc = df['price'].resample(freq, **resample_kw).ohlc()
|
|
450
|
-
vol = df['amount'].resample(freq, **resample_kw).sum()
|
|
451
|
-
qvol = (df['price'] * df['amount']).resample(freq, **resample_kw).sum()
|
|
452
|
-
|
|
468
|
+
df = (
|
|
469
|
+
pl.DataFrame(trades)
|
|
470
|
+
.with_columns(pl.from_epoch('timestamp', time_unit='s').alias('ts'))
|
|
471
|
+
.sort('ts')
|
|
472
|
+
)
|
|
453
473
|
result = (
|
|
454
|
-
|
|
455
|
-
.
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
474
|
+
df.group_by_dynamic('ts', every=f'{span}s', closed='left', start_by='datapoint')
|
|
475
|
+
.agg(
|
|
476
|
+
pl.col('price').first().alias('open'),
|
|
477
|
+
pl.col('price').max().alias('high'),
|
|
478
|
+
pl.col('price').min().alias('low'),
|
|
479
|
+
pl.col('price').last().alias('close'),
|
|
480
|
+
pl.col('amount').sum().alias('volume'),
|
|
481
|
+
(pl.col('price') * pl.col('amount')).sum().alias('quoteVolume'),
|
|
482
|
+
)
|
|
483
|
+
.filter(
|
|
484
|
+
(pl.col('ts').dt.epoch(time_unit='s') >= start_ts)
|
|
485
|
+
& (pl.col('ts').dt.epoch(time_unit='s') < end_ts)
|
|
486
|
+
)
|
|
460
487
|
)
|
|
461
488
|
|
|
462
489
|
return [{
|
|
463
|
-
'date':
|
|
490
|
+
'date': int(row['ts'].timestamp()),
|
|
464
491
|
'open': float(row['open']),
|
|
465
492
|
'high': float(row['high']),
|
|
466
493
|
'low': float(row['low']),
|
|
@@ -471,7 +498,7 @@ class KrakenBackfill(_BackfillBase):
|
|
|
471
498
|
float(row['quoteVolume'] / row['volume'])
|
|
472
499
|
if row['volume'] > 0 else float(row['close'])
|
|
473
500
|
),
|
|
474
|
-
} for
|
|
501
|
+
} for row in result.iter_rows(named=True)]
|
|
475
502
|
|
|
476
503
|
|
|
477
504
|
# ---------------------------------------------------------------------------
|
|
@@ -486,6 +513,8 @@ def make_job(
|
|
|
486
513
|
path: str,
|
|
487
514
|
tz: str,
|
|
488
515
|
form: str,
|
|
516
|
+
max_retries: int = 3,
|
|
517
|
+
retry_delay: float = 2.0,
|
|
489
518
|
) -> _BackfillBase:
|
|
490
519
|
"""Build the appropriate backfill strategy for an (exchange, pair).
|
|
491
520
|
|
|
@@ -532,13 +561,16 @@ def make_job(
|
|
|
532
561
|
obj = cls(path, crypto, span, fiat, form=form, tz=tz)
|
|
533
562
|
|
|
534
563
|
if exchange == _KRAKEN_EXCHANGE:
|
|
535
|
-
return KrakenBackfill(obj, sleep=sleep, form=form
|
|
564
|
+
return KrakenBackfill(obj, sleep=sleep, form=form,
|
|
565
|
+
max_retries=max_retries, retry_delay=retry_delay)
|
|
536
566
|
|
|
537
567
|
return OHLCBackfill(
|
|
538
568
|
obj,
|
|
539
569
|
max_candles=defaults['max_candles'],
|
|
540
570
|
sleep=sleep,
|
|
541
571
|
form=form,
|
|
572
|
+
max_retries=max_retries,
|
|
573
|
+
retry_delay=retry_delay,
|
|
542
574
|
)
|
|
543
575
|
|
|
544
576
|
|
|
@@ -587,6 +619,8 @@ def run_backfill(
|
|
|
587
619
|
job = make_job(
|
|
588
620
|
histo_job.exchange, crypto, fiat, histo_job.span,
|
|
589
621
|
path, tz, histo_job.format,
|
|
622
|
+
max_retries=histo_job.max_retries,
|
|
623
|
+
retry_delay=histo_job.retry_delay,
|
|
590
624
|
)
|
|
591
625
|
if job.obj.full_path in seen_paths:
|
|
592
626
|
tqdm.write(
|