cryptodatapy 0.2.9__tar.gz → 0.2.11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. {cryptodatapy-0.2.9 → cryptodatapy-0.2.11}/PKG-INFO +5 -5
  2. {cryptodatapy-0.2.9 → cryptodatapy-0.2.11}/pyproject.toml +3 -3
  3. {cryptodatapy-0.2.9 → cryptodatapy-0.2.11}/src/cryptodatapy/extract/getdata.py +68 -0
  4. {cryptodatapy-0.2.9 → cryptodatapy-0.2.11}/src/cryptodatapy/extract/libraries/ccxt_api.py +117 -56
  5. {cryptodatapy-0.2.9 → cryptodatapy-0.2.11}/src/cryptodatapy/transform/convertparams.py +124 -7
  6. {cryptodatapy-0.2.9 → cryptodatapy-0.2.11}/src/cryptodatapy/transform/filter.py +7 -8
  7. {cryptodatapy-0.2.9 → cryptodatapy-0.2.11}/src/cryptodatapy/transform/impute.py +2 -1
  8. {cryptodatapy-0.2.9 → cryptodatapy-0.2.11}/src/cryptodatapy/transform/wrangle.py +6 -11
  9. {cryptodatapy-0.2.9 → cryptodatapy-0.2.11}/LICENSE +0 -0
  10. {cryptodatapy-0.2.9 → cryptodatapy-0.2.11}/README.md +0 -0
  11. {cryptodatapy-0.2.9 → cryptodatapy-0.2.11}/src/cryptodatapy/__init__.py +0 -0
  12. {cryptodatapy-0.2.9 → cryptodatapy-0.2.11}/src/cryptodatapy/conf/__init__.py +0 -0
  13. {cryptodatapy-0.2.9 → cryptodatapy-0.2.11}/src/cryptodatapy/conf/fields.csv +0 -0
  14. {cryptodatapy-0.2.9 → cryptodatapy-0.2.11}/src/cryptodatapy/conf/tickers.csv +0 -0
  15. {cryptodatapy-0.2.9 → cryptodatapy-0.2.11}/src/cryptodatapy/datasets/__init__.py +0 -0
  16. {cryptodatapy-0.2.9 → cryptodatapy-0.2.11}/src/cryptodatapy/datasets/br_econ_calendar.csv +0 -0
  17. {cryptodatapy-0.2.9 → cryptodatapy-0.2.11}/src/cryptodatapy/datasets/ca_econ_calendar.csv +0 -0
  18. {cryptodatapy-0.2.9 → cryptodatapy-0.2.11}/src/cryptodatapy/datasets/cn_econ_calendar.csv +0 -0
  19. {cryptodatapy-0.2.9 → cryptodatapy-0.2.11}/src/cryptodatapy/datasets/de_econ_calendar.csv +0 -0
  20. {cryptodatapy-0.2.9 → cryptodatapy-0.2.11}/src/cryptodatapy/datasets/ez_econ_calendar.csv +0 -0
  21. {cryptodatapy-0.2.9 → cryptodatapy-0.2.11}/src/cryptodatapy/datasets/fr_econ_calendar.csv +0 -0
  22. {cryptodatapy-0.2.9 → cryptodatapy-0.2.11}/src/cryptodatapy/datasets/gb_econ_calendar.csv +0 -0
  23. {cryptodatapy-0.2.9 → cryptodatapy-0.2.11}/src/cryptodatapy/datasets/get_econ_calendars.py +0 -0
  24. {cryptodatapy-0.2.9 → cryptodatapy-0.2.11}/src/cryptodatapy/datasets/id_econ_calendar.csv +0 -0
  25. {cryptodatapy-0.2.9 → cryptodatapy-0.2.11}/src/cryptodatapy/datasets/in_econ_calendar.csv +0 -0
  26. {cryptodatapy-0.2.9 → cryptodatapy-0.2.11}/src/cryptodatapy/datasets/it_econ_calendar.csv +0 -0
  27. {cryptodatapy-0.2.9 → cryptodatapy-0.2.11}/src/cryptodatapy/datasets/jp_econ_calendar.csv +0 -0
  28. {cryptodatapy-0.2.9 → cryptodatapy-0.2.11}/src/cryptodatapy/datasets/kr_econ_calendar.csv +0 -0
  29. {cryptodatapy-0.2.9 → cryptodatapy-0.2.11}/src/cryptodatapy/datasets/mx_econ_calendar.csv +0 -0
  30. {cryptodatapy-0.2.9 → cryptodatapy-0.2.11}/src/cryptodatapy/datasets/ru_econ_calendar.csv +0 -0
  31. {cryptodatapy-0.2.9 → cryptodatapy-0.2.11}/src/cryptodatapy/datasets/tr_econ_calendar.csv +0 -0
  32. {cryptodatapy-0.2.9 → cryptodatapy-0.2.11}/src/cryptodatapy/datasets/us_econ_calendar.csv +0 -0
  33. {cryptodatapy-0.2.9 → cryptodatapy-0.2.11}/src/cryptodatapy/extract/__init__.py +0 -0
  34. {cryptodatapy-0.2.9 → cryptodatapy-0.2.11}/src/cryptodatapy/extract/data_vendors/.ipynb_checkpoints/CCXT-checkpoint.ipynb +0 -0
  35. {cryptodatapy-0.2.9 → cryptodatapy-0.2.11}/src/cryptodatapy/extract/data_vendors/.ipynb_checkpoints/DBNomics-checkpoint.ipynb +0 -0
  36. {cryptodatapy-0.2.9 → cryptodatapy-0.2.11}/src/cryptodatapy/extract/data_vendors/.ipynb_checkpoints/InvestPy-checkpoint.ipynb +0 -0
  37. {cryptodatapy-0.2.9 → cryptodatapy-0.2.11}/src/cryptodatapy/extract/data_vendors/.ipynb_checkpoints/NasdaqDataLink-checkpoint.ipynb +0 -0
  38. {cryptodatapy-0.2.9 → cryptodatapy-0.2.11}/src/cryptodatapy/extract/data_vendors/.ipynb_checkpoints/PandasDataReader-checkpoint.ipynb +0 -0
  39. {cryptodatapy-0.2.9 → cryptodatapy-0.2.11}/src/cryptodatapy/extract/data_vendors/__init__.py +0 -0
  40. {cryptodatapy-0.2.9 → cryptodatapy-0.2.11}/src/cryptodatapy/extract/data_vendors/coinmetrics_api.py +0 -0
  41. {cryptodatapy-0.2.9 → cryptodatapy-0.2.11}/src/cryptodatapy/extract/data_vendors/cryptocompare_api.py +0 -0
  42. {cryptodatapy-0.2.9 → cryptodatapy-0.2.11}/src/cryptodatapy/extract/data_vendors/datavendor.py +0 -0
  43. {cryptodatapy-0.2.9 → cryptodatapy-0.2.11}/src/cryptodatapy/extract/data_vendors/glassnode_api.py +0 -0
  44. {cryptodatapy-0.2.9 → cryptodatapy-0.2.11}/src/cryptodatapy/extract/data_vendors/tiingo_api.py +0 -0
  45. {cryptodatapy-0.2.9 → cryptodatapy-0.2.11}/src/cryptodatapy/extract/datarequest.py +0 -0
  46. {cryptodatapy-0.2.9 → cryptodatapy-0.2.11}/src/cryptodatapy/extract/exchanges/__init__.py +0 -0
  47. {cryptodatapy-0.2.9 → cryptodatapy-0.2.11}/src/cryptodatapy/extract/exchanges/dydx.py +0 -0
  48. {cryptodatapy-0.2.9 → cryptodatapy-0.2.11}/src/cryptodatapy/extract/exchanges/exchange.py +0 -0
  49. {cryptodatapy-0.2.9 → cryptodatapy-0.2.11}/src/cryptodatapy/extract/libraries/__init__.py +0 -0
  50. {cryptodatapy-0.2.9 → cryptodatapy-0.2.11}/src/cryptodatapy/extract/libraries/dbnomics_api.py +0 -0
  51. {cryptodatapy-0.2.9 → cryptodatapy-0.2.11}/src/cryptodatapy/extract/libraries/investpy_api.py +0 -0
  52. {cryptodatapy-0.2.9 → cryptodatapy-0.2.11}/src/cryptodatapy/extract/libraries/library.py +0 -0
  53. {cryptodatapy-0.2.9 → cryptodatapy-0.2.11}/src/cryptodatapy/extract/libraries/pandasdr_api.py +0 -0
  54. {cryptodatapy-0.2.9 → cryptodatapy-0.2.11}/src/cryptodatapy/extract/web/__init__.py +0 -0
  55. {cryptodatapy-0.2.9 → cryptodatapy-0.2.11}/src/cryptodatapy/extract/web/aqr.py +0 -0
  56. {cryptodatapy-0.2.9 → cryptodatapy-0.2.11}/src/cryptodatapy/extract/web/web.py +0 -0
  57. {cryptodatapy-0.2.9 → cryptodatapy-0.2.11}/src/cryptodatapy/transform/__init__.py +0 -0
  58. {cryptodatapy-0.2.9 → cryptodatapy-0.2.11}/src/cryptodatapy/transform/clean.py +0 -0
  59. {cryptodatapy-0.2.9 → cryptodatapy-0.2.11}/src/cryptodatapy/transform/od.py +0 -0
  60. {cryptodatapy-0.2.9 → cryptodatapy-0.2.11}/src/cryptodatapy/util/__init__.py +0 -0
  61. {cryptodatapy-0.2.9 → cryptodatapy-0.2.11}/src/cryptodatapy/util/datacatalog.py +0 -0
  62. {cryptodatapy-0.2.9 → cryptodatapy-0.2.11}/src/cryptodatapy/util/datacredentials.py +0 -0
@@ -1,17 +1,17 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.3
2
2
  Name: cryptodatapy
3
- Version: 0.2.9
3
+ Version: 0.2.11
4
4
  Summary: Cryptoasset data library
5
5
  License: Apache-2.0
6
6
  Author: Systamental
7
- Requires-Python: >=3.8,<4.0
7
+ Requires-Python: >=3.9,<4.0
8
8
  Classifier: License :: OSI Approved :: Apache Software License
9
9
  Classifier: Programming Language :: Python :: 3
10
- Classifier: Programming Language :: Python :: 3.8
11
10
  Classifier: Programming Language :: Python :: 3.9
12
11
  Classifier: Programming Language :: Python :: 3.10
13
12
  Classifier: Programming Language :: Python :: 3.11
14
13
  Classifier: Programming Language :: Python :: 3.12
14
+ Classifier: Programming Language :: Python :: 3.13
15
15
  Requires-Dist: DBnomics (>=1.2.3)
16
16
  Requires-Dist: ccxt (>=1.91.52)
17
17
  Requires-Dist: coinmetrics-api-client (>=2022.6.17) ; python_version >= "3.7"
@@ -20,7 +20,7 @@ Requires-Dist: investpy (>=1.0.8)
20
20
  Requires-Dist: matplotlib (>=3.5.2)
21
21
  Requires-Dist: numpy (>=1.23.2)
22
22
  Requires-Dist: openpyxl (>=3.1.2)
23
- Requires-Dist: pandas (>=1.4.4)
23
+ Requires-Dist: pandas (>=2.2.3)
24
24
  Requires-Dist: pandas-datareader (>=0.10.0)
25
25
  Requires-Dist: prophet (>=1.1) ; python_version >= "3.7"
26
26
  Requires-Dist: pyarrow (>=17.0.0)
@@ -1,13 +1,13 @@
1
1
  [tool.poetry]
2
2
  name = "cryptodatapy"
3
- version = "0.2.9"
3
+ version = "0.2.11"
4
4
  description = "Cryptoasset data library"
5
5
  authors = ["Systamental"]
6
6
  license = "Apache License 2.0"
7
7
  readme = "README.md"
8
8
 
9
9
  [tool.poetry.dependencies]
10
- python = ">=3.8,<4.0"
10
+ python = ">=3.9,<4.0"
11
11
  matplotlib = ">=3.5.2"
12
12
  requests = {version = ">=2.28.0", python = ">=3.7"}
13
13
  coinmetrics-api-client = {version = ">=2022.6.17", python = ">=3.7"}
@@ -19,7 +19,7 @@ statsmodels = ">=0.13.2"
19
19
  prophet = {version = ">=1.1", python = ">=3.7"}
20
20
  selenium = ">=4.4.3"
21
21
  numpy = ">=1.23.2"
22
- pandas = ">=1.4.4"
22
+ pandas = ">=2.2.3"
23
23
  pandas-datareader = ">=0.10.0"
24
24
  responses = ">=0.21.0"
25
25
  yfinance = ">=0.2.14"
@@ -220,3 +220,71 @@ class GetData:
220
220
  df = getattr(ds, method)(self.data_req)
221
221
 
222
222
  return df
223
+
224
+ async def get_series_async(self, method: str = "get_data_async") -> pd.DataFrame:
225
+ """
226
+ Get requested data.
227
+
228
+ Parameters
229
+ ----------
230
+ method: str, default 'get_data'
231
+ Gets the specified method from the data source object.
232
+
233
+ Returns
234
+ -------
235
+ df: pd.DataFrame - MultiIndex
236
+ DataFrame with DatetimeIndex (level 0), ticker (level 1), and field (cols) values.
237
+
238
+ Examples
239
+ --------
240
+ >>> data_req = DataRequest(source='ccxt', tickers=['btc', 'eth'], fields=['open', 'high', 'low', 'close',
241
+ 'volume'], freq='d', exch='ftx', start_date='2017-01-01')
242
+ >>> GetData(data_req).get_series()
243
+ open high low close volume
244
+ date ticker
245
+ 2020-03-28 BTC 6243.25 6298.5 6028.0 6237.5 3888.9424
246
+ ETH 128.995 133.0 125.11 131.04 1751.65972
247
+ 2020-03-29 BTC 6233.5 6262.5 5869.5 5876.5 114076.5831
248
+ ETH 130.98 131.84 123.81 124.33 138449.60906
249
+ 2020-03-30 BTC 5876.0 6609.0 5856.0 6396.5 224231.1718
250
+
251
+
252
+ >>> data_req = DataRequest(source='glassnode', tickers=['btc', 'eth'],
253
+ fields=['add_act', 'tx_count', 'issuance'], freq='d', start_date='2016-01-01')
254
+ >>> GetData(data_req).get_series()
255
+ add_act tx_count issuance
256
+ date ticker
257
+ 2016-01-01 BTC 316489 123957 0.085386
258
+ ETH 2350 8232 0.133048
259
+ 2016-01-02 BTC 419389 148893 0.09197
260
+ ETH 2410 9164 0.140147
261
+ 2016-01-03 BTC 394047 142463 0.091947
262
+ """
263
+ # data source objects
264
+ data_source_dict = {
265
+ "cryptocompare": CryptoCompare,
266
+ "coinmetrics": CoinMetrics,
267
+ "ccxt": CCXT,
268
+ "glassnode": Glassnode,
269
+ "tiingo": Tiingo,
270
+ "investpy": InvestPy,
271
+ "dbnomics": DBnomics,
272
+ "yahoo": PandasDataReader,
273
+ "fred": PandasDataReader,
274
+ "av-daily": PandasDataReader,
275
+ "av-forex-daily": PandasDataReader,
276
+ "famafrench": PandasDataReader,
277
+ "aqr": AQR
278
+ }
279
+
280
+ # data source
281
+ ds = data_source_dict[self.data_req.source]
282
+ # instantiate ds obj
283
+ if self.api_key is not None:
284
+ ds = ds(api_key=self.api_key)
285
+ else:
286
+ ds = ds()
287
+ # get data
288
+ df = await getattr(ds, method)(self.data_req)
289
+
290
+ return df
@@ -2,6 +2,7 @@ import logging
2
2
  from typing import Any, Dict, List, Optional, Union
3
3
  import pandas as pd
4
4
  import asyncio
5
+ import random
5
6
  from time import sleep
6
7
  import ccxt
7
8
  import ccxt.async_support as ccxt_async
@@ -76,7 +77,7 @@ class CCXT(Library):
76
77
  fields, frequencies, base_url, api_key, max_obs_per_call, rate_limit
77
78
  )
78
79
  self.exchange = None
79
- # self.exchange_async = None
80
+ self.exchange_async = None
80
81
  self.data_req = None
81
82
  self.data = pd.DataFrame()
82
83
 
@@ -311,13 +312,26 @@ class CCXT(Library):
311
312
  if self.rate_limit is None:
312
313
  self.rate_limit = self.exchange.rateLimit
313
314
 
315
+ @staticmethod
316
+ def exponential_backoff_with_jitter(base_delay: float, max_delay: int, attempts: int) -> None:
317
+ delay = min(max_delay, base_delay * (2 ** attempts))
318
+ delay_with_jitter = delay + random.uniform(0, delay * 0.5)
319
+ sleep(delay_with_jitter)
320
+
321
+ @staticmethod
322
+ async def exponential_backoff_with_jitter_async(base_delay: float, max_delay: int, attempts: int) -> None:
323
+ delay = min(max_delay, base_delay * (2 ** attempts))
324
+ delay_with_jitter = delay + random.uniform(0, delay * 0.5)
325
+ await asyncio.sleep(delay_with_jitter)
326
+
314
327
  async def _fetch_ohlcv_async(self,
315
328
  ticker: str,
316
329
  freq: str,
317
330
  start_date: str,
318
331
  end_date: str,
319
332
  exch: str,
320
- trials: int = 3
333
+ trials: int = 3,
334
+ pause: int = 1
321
335
  ) -> List:
322
336
  """
323
337
  Fetches OHLCV data for a specific ticker.
@@ -336,6 +350,8 @@ class CCXT(Library):
336
350
  Name of exchange.
337
351
  trials: int, default 3
338
352
  Number of attempts to fetch data.
353
+ pause: int, default 60
354
+ Pause in seconds to respect the rate limit.
339
355
 
340
356
  Returns
341
357
  -------
@@ -346,16 +362,17 @@ class CCXT(Library):
346
362
  data = []
347
363
 
348
364
  # inst exch
349
- self.exchange = getattr(ccxt_async, exch)()
365
+ if self.exchange_async is None:
366
+ self.exchange_async = getattr(ccxt_async, exch)()
350
367
 
351
368
  # fetch data
352
- if self.exchange.has['fetchOHLCV']:
369
+ if self.exchange_async.has['fetchOHLCV']:
353
370
 
354
371
  # while loop to fetch all data
355
372
  while start_date < end_date and attempts < trials:
356
373
 
357
374
  try:
358
- data_resp = await self.exchange.fetch_ohlcv(
375
+ data_resp = await self.exchange_async.fetch_ohlcv(
359
376
  ticker,
360
377
  freq,
361
378
  since=start_date,
@@ -368,29 +385,33 @@ class CCXT(Library):
368
385
  start_date = data_resp[-1][0] + 1
369
386
  data.extend(data_resp)
370
387
  else:
388
+ if not data:
389
+ logging.warning(f"No OHLCV data available for {ticker}.")
371
390
  break
372
391
 
373
392
  except Exception as e:
374
393
  logging.warning(
375
- f"Failed to get OHLCV data from {self.exchange.id} for {ticker} "
394
+ f"Failed to get OHLCV data from {self.exchange_async.id} for {ticker} "
376
395
  f"on attempt #{attempts + 1}: {e}."
377
396
  )
378
397
  attempts += 1
379
398
  if attempts >= trials:
380
399
  logging.warning(
381
- f"Failed to get OHLCV data from {self.exchange.id} "
400
+ f"Failed to get OHLCV data from {self.exchange_async.id} "
382
401
  f"for {ticker} after {trials} attempts."
383
402
  )
384
403
  break
385
404
 
386
405
  finally:
387
- await asyncio.sleep(self.exchange.rateLimit / 1000)
406
+ await self.exponential_backoff_with_jitter_async(self.exchange_async.rateLimit / 1000,
407
+ pause,
408
+ attempts)
388
409
 
389
- await self.exchange.close()
410
+ await self.exchange_async.close()
390
411
  return data
391
412
 
392
413
  else:
393
- logging.warning(f"OHLCV data is not available for {self.exchange.id}.")
414
+ logging.warning(f"OHLCV data is not available for {self.exchange_async.id}.")
394
415
  return None
395
416
 
396
417
  def _fetch_ohlcv(self,
@@ -399,7 +420,8 @@ class CCXT(Library):
399
420
  start_date: str,
400
421
  end_date: str,
401
422
  exch: str,
402
- trials: int = 3
423
+ trials: int = 3,
424
+ pause: int = 1
403
425
  ) -> List:
404
426
  """
405
427
  Fetches OHLCV data for a specific ticker.
@@ -418,6 +440,8 @@ class CCXT(Library):
418
440
  Name of exchange.
419
441
  trials: int, default 3
420
442
  Number of attempts to fetch data.
443
+ pause: int, default 60
444
+ Pause in seconds to respect the rate limit.
421
445
 
422
446
  Returns
423
447
  -------
@@ -428,7 +452,8 @@ class CCXT(Library):
428
452
  data = []
429
453
 
430
454
  # inst exch
431
- self.exchange = getattr(ccxt, exch)()
455
+ if self.exchange is None:
456
+ self.exchange = getattr(ccxt, exch)()
432
457
 
433
458
  # fetch data
434
459
  if self.exchange.has['fetchOHLCV']:
@@ -442,7 +467,10 @@ class CCXT(Library):
442
467
  freq,
443
468
  since=start_date,
444
469
  limit=self.max_obs_per_call,
445
- params={'until': end_date}
470
+ params={
471
+ 'until': end_date,
472
+ 'paginate': True
473
+ }
446
474
  )
447
475
 
448
476
  # add data to list
@@ -450,6 +478,8 @@ class CCXT(Library):
450
478
  start_date = data_resp[-1][0] + 1
451
479
  data.extend(data_resp)
452
480
  else:
481
+ if not data:
482
+ logging.warning(f"No OHLCV data available for {ticker}.")
453
483
  break
454
484
 
455
485
  except Exception as e:
@@ -466,7 +496,7 @@ class CCXT(Library):
466
496
  break
467
497
 
468
498
  finally:
469
- sleep(self.exchange.rateLimit / 1000)
499
+ self.exponential_backoff_with_jitter(self.exchange.rateLimit / 1000, pause, attempts)
470
500
 
471
501
  return data
472
502
 
@@ -481,7 +511,7 @@ class CCXT(Library):
481
511
  end_date: str,
482
512
  exch: str,
483
513
  trials: int = 3,
484
- pause: int = 0.5
514
+ pause: int = 1
485
515
  ):
486
516
  """
487
517
  Fetches OHLCV data for a list of tickers.
@@ -509,7 +539,8 @@ class CCXT(Library):
509
539
  List of lists of timestamps and OHLCV data for each ticker.
510
540
  """
511
541
  # inst exch
512
- self.exchange = getattr(ccxt_async, exch)()
542
+ if self.exchange_async is None:
543
+ self.exchange_async = getattr(ccxt_async, exch)()
513
544
 
514
545
  data = []
515
546
 
@@ -523,7 +554,7 @@ class CCXT(Library):
523
554
  data.append(data_resp)
524
555
  pbar.update(1)
525
556
 
526
- await self.exchange.close()
557
+ await self.exchange_async.close()
527
558
 
528
559
  return data
529
560
 
@@ -534,7 +565,7 @@ class CCXT(Library):
534
565
  end_date: str,
535
566
  exch: str,
536
567
  trials: int = 3,
537
- pause: int = 0.5
568
+ pause: int = 1
538
569
  ):
539
570
  """
540
571
  Fetches OHLCV data for a list of tickers.
@@ -557,7 +588,8 @@ class CCXT(Library):
557
588
  Pause in seconds to respect the rate limit.
558
589
  """
559
590
  # inst exch
560
- self.exchange = getattr(ccxt, exch)()
591
+ if self.exchange is None:
592
+ self.exchange = getattr(ccxt, exch)()
561
593
 
562
594
  data = []
563
595
 
@@ -578,7 +610,8 @@ class CCXT(Library):
578
610
  start_date: str,
579
611
  end_date: str,
580
612
  exch: str,
581
- trials: int = 3
613
+ trials: int = 3,
614
+ pause: int = 1
582
615
  ) -> List:
583
616
  """
584
617
  Fetches funding rates data for a specific ticker.
@@ -593,6 +626,8 @@ class CCXT(Library):
593
626
  End date in integers in milliseconds since Unix epoch.
594
627
  trials: int, default 3
595
628
  Number of attempts to fetch data.
629
+ pause: int, default 1
630
+ Pause in seconds to respect the rate limit.
596
631
 
597
632
  Returns
598
633
  -------
@@ -603,16 +638,17 @@ class CCXT(Library):
603
638
  data = []
604
639
 
605
640
  # inst exch
606
- self.exchange = getattr(ccxt_async, exch)()
641
+ if self.exchange_async is None:
642
+ self.exchange_async = getattr(ccxt_async, exch)()
607
643
 
608
644
  # fetch data
609
- if self.exchange.has['fetchFundingRateHistory']:
645
+ if self.exchange_async.has['fetchFundingRateHistory']:
610
646
 
611
647
  # while loop to get all data
612
648
  while start_date < end_date and attempts < trials:
613
649
 
614
650
  try:
615
- data_resp = await self.exchange.fetch_funding_rate_history(
651
+ data_resp = await self.exchange_async.fetch_funding_rate_history(
616
652
  ticker,
617
653
  since=start_date,
618
654
  limit=self.max_obs_per_call,
@@ -624,29 +660,33 @@ class CCXT(Library):
624
660
  start_date = data_resp[-1]['timestamp'] + 1
625
661
  data.extend(data_resp)
626
662
  else:
663
+ if not data:
664
+ logging.warning(f"No funding rates data available for {ticker}.")
627
665
  break
628
666
 
629
667
  except Exception as e:
630
668
  logging.warning(
631
- f"Failed to get funding rates from {self.exchange.id} for {ticker} "
669
+ f"Failed to get funding rates from {self.exchange_async.id} for {ticker} "
632
670
  f"on attempt #{attempts + 1}: {e}."
633
671
  )
634
672
  attempts += 1
635
673
  if attempts >= trials:
636
674
  logging.warning(
637
- f"Failed to get funding rates from {self.exchange.id} "
675
+ f"Failed to get funding rates from {self.exchange_async.id} "
638
676
  f"for {ticker} after {trials} attempts."
639
677
  )
640
678
  break
641
679
 
642
680
  finally:
643
- await asyncio.sleep(self.exchange.rateLimit / 1000)
681
+ await self.exponential_backoff_with_jitter_async(self.exchange_async.rateLimit / 1000,
682
+ pause,
683
+ attempts)
644
684
 
645
- await self.exchange.close()
685
+ await self.exchange_async.close()
646
686
  return data
647
687
 
648
688
  else:
649
- logging.warning(f"Funding rates are not available for {self.exchange.id}.")
689
+ logging.warning(f"Funding rates are not available for {self.exchange_async.id}.")
650
690
  return None
651
691
 
652
692
  def _fetch_funding_rates(self,
@@ -654,7 +694,8 @@ class CCXT(Library):
654
694
  start_date: str,
655
695
  end_date: str,
656
696
  exch: str,
657
- trials: int = 3
697
+ trials: int = 3,
698
+ pause: int = 1
658
699
  ) -> List:
659
700
  """
660
701
  Fetches funding rates data for a specific ticker.
@@ -669,6 +710,8 @@ class CCXT(Library):
669
710
  End date in integers in milliseconds since Unix epoch.
670
711
  trials: int, default 3
671
712
  Number of attempts to fetch data.
713
+ pause: int, default 1
714
+ Pause in seconds to respect the rate limit.
672
715
 
673
716
  Returns
674
717
  -------
@@ -679,7 +722,8 @@ class CCXT(Library):
679
722
  data = []
680
723
 
681
724
  # inst exch
682
- self.exchange = getattr(ccxt, exch)()
725
+ if self.exchange is None:
726
+ self.exchange = getattr(ccxt, exch)()
683
727
 
684
728
  # fetch data
685
729
  if self.exchange.has['fetchFundingRateHistory']:
@@ -700,6 +744,8 @@ class CCXT(Library):
700
744
  start_date = data_resp[-1]['timestamp'] + 1
701
745
  data.extend(data_resp)
702
746
  else:
747
+ if not data:
748
+ logging.warning(f"No funding rates data available for {ticker}.")
703
749
  break
704
750
 
705
751
  except Exception as e:
@@ -716,7 +762,7 @@ class CCXT(Library):
716
762
  break
717
763
 
718
764
  finally:
719
- sleep(self.exchange.rateLimit / 1000)
765
+ self.exponential_backoff_with_jitter(self.exchange.rateLimit / 1000, pause, attempts)
720
766
 
721
767
  return data
722
768
 
@@ -730,7 +776,7 @@ class CCXT(Library):
730
776
  end_date: str,
731
777
  exch: str,
732
778
  trials: int = 3,
733
- pause: int = 0.5
779
+ pause: int = 1
734
780
  ):
735
781
  """
736
782
  Fetches funding rates data for a list of tickers.
@@ -756,7 +802,8 @@ class CCXT(Library):
756
802
  List of lists of dictionaries with timestamps and funding rates data for each ticker.
757
803
  """
758
804
  # inst exch
759
- self.exchange = getattr(ccxt_async, exch)()
805
+ if self.exchange_async is None:
806
+ self.exchange_async = getattr(ccxt_async, exch)()
760
807
 
761
808
  data = []
762
809
 
@@ -770,7 +817,7 @@ class CCXT(Library):
770
817
  pbar.update(1)
771
818
  await asyncio.sleep(pause)
772
819
 
773
- await self.exchange.close()
820
+ await self.exchange_async.close()
774
821
 
775
822
  return data
776
823
 
@@ -780,7 +827,7 @@ class CCXT(Library):
780
827
  end_date: str,
781
828
  exch: str,
782
829
  trials: int = 3,
783
- pause: int = 0.5
830
+ pause: int = 1
784
831
  ):
785
832
  """
786
833
  Fetches funding rates data for a list of tickers.
@@ -807,7 +854,8 @@ class CCXT(Library):
807
854
  """
808
855
 
809
856
  # inst exch
810
- self.exchange = getattr(ccxt, exch)()
857
+ if self.exchange is None:
858
+ self.exchange = getattr(ccxt, exch)()
811
859
 
812
860
  data = []
813
861
 
@@ -829,7 +877,8 @@ class CCXT(Library):
829
877
  start_date: str,
830
878
  end_date: str,
831
879
  exch: str,
832
- trials: int = 3
880
+ trials: int = 3,
881
+ pause: int = 1
833
882
  ) -> List:
834
883
  """
835
884
  Fetches open interest data for a specific ticker.
@@ -848,6 +897,8 @@ class CCXT(Library):
848
897
  Name of exchange.
849
898
  trials: int, default 3
850
899
  Number of attempts to fetch data.
900
+ pause: int, default 1
901
+ Pause in seconds to respect the rate limit.
851
902
 
852
903
  Returns
853
904
  -------
@@ -859,16 +910,17 @@ class CCXT(Library):
859
910
  data = []
860
911
 
861
912
  # inst exch
862
- self.exchange = getattr(ccxt_async, exch)()
913
+ if self.exchange_async is None:
914
+ self.exchange_async = getattr(ccxt_async, exch)()
863
915
 
864
916
  # fetch data
865
- if self.exchange.has['fetchOpenInterestHistory']:
917
+ if self.exchange_async.has['fetchOpenInterestHistory']:
866
918
 
867
919
  # while loop to get all data
868
920
  while start_date < end_date and attempts < trials:
869
921
 
870
922
  try:
871
- data_resp = await self.exchange.fetch_open_interest_history(
923
+ data_resp = await self.exchange_async.fetch_open_interest_history(
872
924
  ticker,
873
925
  freq,
874
926
  since=start_date,
@@ -881,29 +933,34 @@ class CCXT(Library):
881
933
  start_date = data_resp[-1]['timestamp'] + 1
882
934
  data.extend(data_resp)
883
935
  else:
936
+ if not data:
937
+ logging.warning(f"No open interest data available for {ticker}.")
884
938
  break
885
939
 
886
940
  except Exception as e:
887
941
  logging.warning(
888
- f"Failed to get open interest from {self.exchange.id} for {ticker} "
942
+ f"Failed to get open interest from {self.exchange_async.id} for {ticker} "
889
943
  f"on attempt #{attempts + 1}: {e}."
890
944
  )
891
945
  attempts += 1
892
946
  if attempts >= trials:
893
947
  logging.warning(
894
- f"Failed to get open interest from {self.exchange.id} "
948
+ f"Failed to get open interest from {self.exchange_async.id} "
895
949
  f"for {ticker} after {trials} attempts."
896
950
  )
897
951
  break
898
952
 
899
953
  finally:
900
- await asyncio.sleep(self.exchange.rateLimit / 1000)
954
+ await self.exponential_backoff_with_jitter_async(self.exchange_async.rateLimit / 1000,
955
+ pause,
956
+ attempts)
957
+ # await asyncio.sleep(self.exchange_async.rateLimit / 1000)
901
958
 
902
- await self.exchange.close()
959
+ await self.exchange_async.close()
903
960
  return data
904
961
 
905
962
  else:
906
- logging.warning(f"Open interest is not available for {self.exchange.id}.")
963
+ logging.warning(f"Open interest is not available for {self.exchange_async.id}.")
907
964
  return None
908
965
 
909
966
  def _fetch_open_interest(self,
@@ -912,7 +969,8 @@ class CCXT(Library):
912
969
  start_date: str,
913
970
  end_date: str,
914
971
  exch: str,
915
- trials: int = 3
972
+ trials: int = 3,
973
+ pause: int = 1
916
974
  ) -> List:
917
975
  """
918
976
  Fetches open interest data for a specific ticker.
@@ -931,6 +989,8 @@ class CCXT(Library):
931
989
  Name of exchange.
932
990
  trials: int, default 3
933
991
  Number of attempts to fetch data.
992
+ pause: int, default 1
993
+ Pause in seconds to respect the rate limit.
934
994
 
935
995
  Returns
936
996
  -------
@@ -964,6 +1024,8 @@ class CCXT(Library):
964
1024
  start_date = data_resp[-1]['timestamp'] + 1
965
1025
  data.extend(data_resp)
966
1026
  else:
1027
+ if not data:
1028
+ logging.warning(f"No open interest data available for {ticker}.")
967
1029
  break
968
1030
 
969
1031
  except Exception as e:
@@ -980,7 +1042,8 @@ class CCXT(Library):
980
1042
  break
981
1043
 
982
1044
  finally:
983
- sleep(self.exchange.rateLimit / 1000)
1045
+ self.exponential_backoff_with_jitter(self.exchange.rateLimit / 1000, pause, attempts)
1046
+ # sleep(self.exchange.rateLimit / 1000)
984
1047
 
985
1048
  return data
986
1049
 
@@ -995,7 +1058,7 @@ class CCXT(Library):
995
1058
  end_date: str,
996
1059
  exch: str,
997
1060
  trials: int = 3,
998
- pause: int = 0.5
1061
+ pause: int = 1
999
1062
  ):
1000
1063
  """
1001
1064
  Fetches open interest data for a list of tickers.
@@ -1023,7 +1086,8 @@ class CCXT(Library):
1023
1086
  List of lists of dictionaries with timestamps and open interest data for each ticker.
1024
1087
  """
1025
1088
  # inst exch
1026
- self.exchange = getattr(ccxt_async, exch)()
1089
+ if self.exchange_async is None:
1090
+ self.exchange_async = getattr(ccxt_async, exch)()
1027
1091
 
1028
1092
  data = []
1029
1093
 
@@ -1038,7 +1102,7 @@ class CCXT(Library):
1038
1102
  pbar.update(1)
1039
1103
  await asyncio.sleep(pause)
1040
1104
 
1041
- await self.exchange.close()
1105
+ await self.exchange_async.close()
1042
1106
 
1043
1107
  return data
1044
1108
 
@@ -1049,7 +1113,7 @@ class CCXT(Library):
1049
1113
  end_date: str,
1050
1114
  exch: str,
1051
1115
  trials: int = 3,
1052
- pause: int = 0.5
1116
+ pause: int = 1
1053
1117
  ):
1054
1118
  """
1055
1119
  Fetches open interest data for a list of tickers.
@@ -1077,7 +1141,8 @@ class CCXT(Library):
1077
1141
  List of lists of dictionaries with timestamps and open interest data for each ticker.
1078
1142
  """
1079
1143
  # inst exch
1080
- self.exchange = getattr(ccxt, exch)()
1144
+ if self.exchange is None:
1145
+ self.exchange = getattr(ccxt, exch)()
1081
1146
 
1082
1147
  data = []
1083
1148
 
@@ -1112,10 +1177,6 @@ class CCXT(Library):
1112
1177
  # get metadata
1113
1178
  self.get_metadata(self.data_req.exch)
1114
1179
 
1115
- print(self.data_req.exch)
1116
-
1117
- print(self.exchange)
1118
-
1119
1180
  # check markets
1120
1181
  if not any([market in self.markets for market in self.data_req.source_markets]):
1121
1182
  raise ValueError(
@@ -57,17 +57,17 @@ class ConvertParams:
57
57
  exch = self.data_req.exch
58
58
  # convert start date
59
59
  if self.data_req.freq[-3:] == "min": # limit to higher frequency data responses
60
- start_date = round((datetime.now() - timedelta(days=7)).timestamp())
60
+ start_date = int((datetime.now() - timedelta(days=7)).timestamp())
61
61
  # no start date
62
62
  elif self.data_req.start_date is None:
63
- start_date = round(pd.Timestamp("2009-01-03 00:00:00").timestamp())
63
+ start_date = int(pd.Timestamp("2009-01-03 00:00:00").timestamp())
64
64
  else:
65
- start_date = round(pd.Timestamp(self.data_req.start_date).timestamp())
65
+ start_date = int(pd.Timestamp(self.data_req.start_date).timestamp())
66
66
  # convert end date
67
67
  if self.data_req.end_date is None:
68
- end_date = round(pd.Timestamp.utcnow()).timestamp()
68
+ end_date = int(pd.Timestamp.utcnow().timestamp())
69
69
  else:
70
- end_date = round(pd.Timestamp(self.data_req.end_date).timestamp())
70
+ end_date = int(pd.Timestamp(self.data_req.end_date).timestamp())
71
71
  # fields
72
72
  if self.data_req.source_fields is not None:
73
73
  fields = self.data_req.source_fields
@@ -986,7 +986,6 @@ class ConvertParams:
986
986
  -------
987
987
  quote_ccy: str
988
988
  Quote currency.
989
-
990
989
  """
991
990
  mkts = [] # fx pairs list
992
991
  # fx groups
@@ -1021,7 +1020,7 @@ class ConvertParams:
1021
1020
  List of fields in data source format.
1022
1021
 
1023
1022
  """
1024
- # get fields
1023
+ # x fields
1025
1024
  with resources.path("cryptodatapy.conf", "fields.csv") as f:
1026
1025
  fields_dict_path = f
1027
1026
  fields_df, fields_list = (
@@ -1046,3 +1045,121 @@ class ConvertParams:
1046
1045
  )
1047
1046
 
1048
1047
  return fields_list
1048
+
1049
+ def to_dydx_dict(self) -> Dict[str, Union[list, str, int, float, None]]:
1050
+ """
1051
+ Convert parameters from CryptoDataPy to dYdX format.
1052
+ """
1053
+ if self.data_req.source_tickers is not None:
1054
+ tickers = self.data_req.source_tickers
1055
+ self.data_req.tickers = self.data_req.source_tickers
1056
+ else:
1057
+ tickers = [ticker.upper() for ticker in self.data_req.tickers]
1058
+
1059
+ # convert markets (if needed)
1060
+ markets = [f"{ticker}-USD" for ticker in tickers]
1061
+
1062
+ # convert freq
1063
+ if self.data_req.source_freq is not None:
1064
+ freq = self.data_req.source_freq
1065
+ self.data_req.freq = self.data_req.source_freq
1066
+ else:
1067
+ if self.data_req.freq is None:
1068
+ freq = "1DAY"
1069
+ elif self.data_req.freq == "1min":
1070
+ freq = "1MIN"
1071
+ elif self.data_req.freq == "5min":
1072
+ freq = "5MINS"
1073
+ elif self.data_req.freq == "15min":
1074
+ freq = "15MINS"
1075
+ elif self.data_req.freq == "30min":
1076
+ freq = "30MINS"
1077
+ elif self.data_req.freq == "1h":
1078
+ freq = "1HOUR"
1079
+ elif self.data_req.freq == "4h":
1080
+ freq = "4HOURS"
1081
+ elif self.data_req.freq in ["1d", "d"]:
1082
+ freq = "1DAY"
1083
+ else:
1084
+ freq = "1DAY" # Default to daily
1085
+
1086
+ # convert fields
1087
+ if self.data_req.source_fields is not None:
1088
+ fields = self.data_req.source_fields
1089
+ self.data_req.fields = self.data_req.source_fields
1090
+ else:
1091
+ # Map our standard fields to dYdX field names
1092
+ field_mapping = {
1093
+ 'open': 'open',
1094
+ 'high': 'high',
1095
+ 'low': 'low',
1096
+ 'close': 'close',
1097
+ 'volume': 'baseTokenVolume',
1098
+ 'funding_rate': 'nextFundingRate',
1099
+ 'oi': 'openInterest'
1100
+ }
1101
+
1102
+ fields = []
1103
+ for field in self.data_req.fields:
1104
+ if field in field_mapping:
1105
+ fields.append(field_mapping[field])
1106
+ else:
1107
+ logging.warning(f"Field {field} not available in dYdX API")
1108
+ return {
1109
+ "tickers": tickers, # List of market tickers
1110
+ "freq": freq, # Converted frequency
1111
+ "quote_ccy": self.data_req.quote_ccy,
1112
+ "exch": "dydx",
1113
+ "mkt_type": self.data_req.mkt_type,
1114
+ "mkts": markets, # Market identifiers
1115
+ "start_date": self.data_req.start_date,
1116
+ "end_date": self.data_req.end_date,
1117
+ "fields": fields, # Converted field names
1118
+ "tz": self.data_req.tz or "UTC",
1119
+ "cat": "crypto",
1120
+ "trials": self.data_req.trials,
1121
+ "pause": self.data_req.pause,
1122
+ "source_tickers": self.data_req.source_tickers,
1123
+ "source_freq": self.data_req.source_freq,
1124
+ "source_fields": self.data_req.source_fields,
1125
+ }
1126
+
1127
+ def to_dydx(self) -> DataRequest:
1128
+
1129
+ # tickers
1130
+ if self.data_req.source_tickers is None:
1131
+ self.data_req.source_tickers = [ticker.upper() for ticker in self.data_req.tickers]
1132
+
1133
+ # markets
1134
+ if self.data_req.source_markets is None:
1135
+ self.data_req.source_markets = [f"{ticker}-USD"
1136
+ for ticker in self.data_req.source_tickers]
1137
+
1138
+ if self.data_req.source_freq is None:
1139
+ if self.data_req.freq is None:
1140
+ self.data_req.source_freq = "1DAY"
1141
+ elif self.data_req.freq == "1min":
1142
+ self.data_req.source_freq = "1MIN"
1143
+ elif self.data_req.freq == "5min":
1144
+ self.data_req.source_freq = "5MINS"
1145
+ elif self.data_req.freq == "15min":
1146
+ self.data_req.source_freq = "15MINS"
1147
+ elif self.data_req.freq == "30min":
1148
+ self.data_req.source_freq = "30MINS"
1149
+ elif self.data_req.freq == "1h":
1150
+ self.data_req.source_freq = "1HOUR"
1151
+ elif self.data_req.freq == "4h":
1152
+ self.data_req.source_freq = "4HOURS"
1153
+ elif self.data_req.freq in ["1d", "d"]:
1154
+ self.data_req.source_freq = "1DAY"
1155
+
1156
+ field_mapping = {
1157
+ 'open': 'open',
1158
+ 'high': 'high',
1159
+ 'low': 'low',
1160
+ 'close': 'close',
1161
+ 'volume': 'baseTokenVolume',
1162
+ 'funding_rate': 'nextFundingRate',
1163
+ 'oi': 'openInterest'
1164
+ }
1165
+ return self.data_req
@@ -159,12 +159,12 @@ class Filter:
159
159
  and fields (cols).
160
160
  """
161
161
  # drop tickers with nobs < ts_obs
162
- obs = self.df.groupby(level=1).count().min(axis=1)
162
+ obs = self.df.groupby(level=1).count().median(axis=1)
163
163
  drop_tickers_list = obs[obs < ts_obs].index.to_list()
164
164
  self.filtered_df = self.df.drop(drop_tickers_list, level=1, axis=0)
165
165
 
166
166
  # drop tickers with nobs < cs_obs
167
- obs = self.filtered_df.groupby(level=0).count().min(axis=1)
167
+ obs = self.filtered_df.groupby(level=0).count().median(axis=1)
168
168
  idx_start = obs[obs > cs_obs].index[0]
169
169
  self.filtered_df = self.filtered_df.loc[idx_start:]
170
170
 
@@ -185,16 +185,15 @@ class Filter:
185
185
  Filtered dataFrame with DatetimeIndex (level 0), tickers (level 1) and fields (cols).
186
186
  """
187
187
  # unchanged rows
188
- unch_rows = (self.df.subtract(self.df.iloc[:, :4].mean(axis=1), axis=0) == 0).any(axis=1)
188
+ unch_rows: object = (self.df.subtract(self.df.iloc[:, :4].mean(axis=1), axis=0) == 0).any(axis=1)
189
189
 
190
- # delisted tickers
191
- delisted_tickers = unch_rows.unstack().iloc[-1][unch_rows.unstack().iloc[-1]].index.to_list()
190
+ # replace delisted with NaNs
191
+ self.filtered_df = self.df.loc[~unch_rows].reindex(self.df.index)
192
192
 
193
193
  # repair
194
194
  if method == 'remove':
195
- self.filtered_df = self.df.drop(delisted_tickers, level=1)
196
- else:
197
- self.filtered_df = self.df.loc[~unch_rows].reindex(self.df.index)
195
+ self.filtered_df = list(self.filtered_df.unstack().columns[self.filtered_df.unstack().iloc[-1].isna()].
196
+ droplevel(0).unique())
198
197
 
199
198
  return self.filtered_df
200
199
 
@@ -121,7 +121,8 @@ class Impute:
121
121
  self.imputed_df = pd.DataFrame(imp_yhat, index=self.filtered_df.index, columns=self.filtered_df.columns)
122
122
 
123
123
  # type conversion
124
- self.imputed_df = self.imputed_df.apply(pd.to_numeric, errors="ignore").convert_dtypes()
124
+ self.imputed_df = self.imputed_df.convert_dtypes()
125
+
125
126
 
126
127
  # plot
127
128
  if self.plot:
@@ -854,7 +854,8 @@ class WrangleData:
854
854
  self.tidy_data = self.tidy_data.apply(pd.to_numeric, errors='coerce').convert_dtypes()
855
855
 
856
856
  # remove bad data
857
- self.tidy_data = self.tidy_data[self.tidy_data != 0] # 0 values
857
+ if data_type != 'funding_rates':
858
+ self.tidy_data = self.tidy_data[self.tidy_data != 0] # 0 values
858
859
  self.tidy_data = self.tidy_data[~self.tidy_data.index.duplicated()] # duplicate rows
859
860
  self.tidy_data = self.tidy_data.dropna(how='all').dropna(how='all', axis=1) # entire row or col NaNs
860
861
 
@@ -910,13 +911,10 @@ class WrangleData:
910
911
  # tickers
911
912
  tickers_dict = {source_ticker: ticker for source_ticker, ticker in zip(self.data_req.source_tickers,
912
913
  self.data_req.tickers)}
913
- if len(self.data_req.tickers) == 1:
914
- self.data_resp['Ticker'] = self.data_req.tickers[0]
915
- else:
916
- self.data_resp = self.data_resp.stack()
917
- self.data_resp.index.names = ['Date', 'Ticker']
918
- self.data_resp.index = self.data_resp.index.set_levels(self.data_resp.index.levels[1].map(tickers_dict),
919
- level=1)
914
+ self.data_resp = self.data_resp.stack(future_stack=True)
915
+ self.data_resp.columns.name = None
916
+ self.data_resp.index = self.data_resp.index.set_levels(self.data_resp.index.levels[1].map(tickers_dict),
917
+ level=1)
920
918
  self.data_resp.reset_index(inplace=True)
921
919
 
922
920
  # fields
@@ -931,9 +929,6 @@ class WrangleData:
931
929
  resample(self.data_req.freq, level='date').\
932
930
  last().swaplevel('ticker', 'date').sort_index()
933
931
 
934
- # re-order cols
935
- self.data_resp = self.data_resp.loc[:, ['open', 'high', 'low', 'close', 'close_adj', 'volume']]
936
-
937
932
  # type conversion
938
933
  self.data_resp = self.data_resp.apply(pd.to_numeric, errors='coerce').convert_dtypes()
939
934
 
File without changes
File without changes