cryptodatapy 0.2.6__py3-none-any.whl → 0.2.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,9 +1,11 @@
1
1
  import logging
2
- from time import sleep
3
2
  from typing import Any, Dict, List, Optional, Union
4
3
 
5
- import ccxt
6
4
  import pandas as pd
5
+ import asyncio
6
+ import ccxt
7
+ import ccxt.async_support as ccxt_async
8
+ from tqdm.asyncio import tqdm # Progress bar for async
7
9
 
8
10
  from cryptodatapy.extract.datarequest import DataRequest
9
11
  from cryptodatapy.extract.libraries.library import Library
@@ -27,11 +29,11 @@ class CCXT(Library):
27
29
  assets: Optional[Dict[str, List[str]]] = None,
28
30
  markets: Optional[Dict[str, List[str]]] = None,
29
31
  market_types: List[str] = ["spot", "future", "perpetual_future", "option"],
30
- fields: Optional[List[str]] = None,
31
- frequencies: Optional[Dict[str, List[str]]] = None,
32
+ fields: Optional[List[str]] = ["open", "high", "low", "close", "volume", "funding_rate", 'oi'],
33
+ frequencies: Optional[Dict[str, Union[str, int]]] = None,
32
34
  base_url: Optional[str] = None,
33
35
  api_key: Optional[str] = None,
34
- max_obs_per_call: Optional[int] = 10000,
36
+ max_obs_per_call: Optional[int] = 1000,
35
37
  rate_limit: Optional[Any] = None,
36
38
  ):
37
39
  """
@@ -84,6 +86,11 @@ class CCXT(Library):
84
86
  rate_limit,
85
87
  )
86
88
 
89
+ self.exchange = None
90
+ self.exchange_async = None
91
+ self.data_req = None
92
+ self.data = pd.DataFrame()
93
+
87
94
  def get_exchanges_info(self) -> List[str]:
88
95
  """
89
96
  Get exchanges info.
@@ -93,7 +100,8 @@ class CCXT(Library):
93
100
  exch: list or pd.DataFrame
94
101
  List or dataframe with info on supported exchanges.
95
102
  """
96
- self.exchanges = ccxt.exchanges
103
+ if self.exchanges is None:
104
+ self.exchanges = ccxt.exchanges
97
105
 
98
106
  return self.exchanges
99
107
 
@@ -103,17 +111,13 @@ class CCXT(Library):
103
111
  """
104
112
  return None
105
113
 
106
- def get_assets_info(
107
- self,
108
- exch: str = "binance",
109
- as_list: bool = False
110
- ) -> Union[pd.DataFrame, List[str]]:
114
+ def get_assets_info(self, exch: str, as_list: bool = False) -> Union[pd.DataFrame, List[str]]:
111
115
  """
112
116
  Get assets info.
113
117
 
114
118
  Parameters
115
119
  ----------
116
- exch: str, default 'binance'
120
+ exch: str
117
121
  Name of exchange.
118
122
  as_list: bool, default False
119
123
  Returns assets info for selected exchanges as list.
@@ -123,23 +127,31 @@ class CCXT(Library):
123
127
  assets: list or pd.DataFrame
124
128
  Dataframe with info on available assets or list of assets.
125
129
  """
126
- # inst exch
127
- exchange = getattr(ccxt, exch)()
130
+ if self.assets is None:
131
+
132
+ # inst exchange
133
+ if exch not in ccxt.exchanges:
134
+ raise ValueError(
135
+ f"{exch} is not a supported exchange. "
136
+ f"Use get_exchanges_info() to get a list of supported exchanges.")
137
+ else:
138
+ if self.exchange is None:
139
+ self.exchange = getattr(ccxt, exch)()
128
140
 
129
- # get assets on exchange and create df
130
- exchange.load_markets()
131
- self.assets = pd.DataFrame(exchange.currencies).T
132
- self.assets.index.name = "ticker"
141
+ # get assets on exchange and create df
142
+ self.exchange.load_markets()
143
+ self.assets = pd.DataFrame(self.exchange.currencies).T
144
+ self.assets.index.name = "ticker"
133
145
 
134
- # as list of assets
135
- if as_list:
136
- self.assets = self.assets.index.to_list()
146
+ # as list of assets
147
+ if as_list:
148
+ self.assets = self.assets.index.to_list()
137
149
 
138
150
  return self.assets
139
151
 
140
152
  def get_markets_info(
141
153
  self,
142
- exch: str = "binance",
154
+ exch: str,
143
155
  quote_ccy: Optional[str] = None,
144
156
  mkt_type: Optional[str] = None,
145
157
  as_list: bool = False,
@@ -149,7 +161,7 @@ class CCXT(Library):
149
161
 
150
162
  Parameters
151
163
  ----------
152
- exch: str, default 'binance'
164
+ exch: str
153
165
  Name of exchange.
154
166
  quote_ccy: str, optional, default None
155
167
  Quote currency.
@@ -163,29 +175,37 @@ class CCXT(Library):
163
175
  markets: list or pd.DataFrame
164
176
  List or dataframe with info on available markets, by exchange.
165
177
  """
166
- # inst exch
167
- exchange = getattr(ccxt, exch)()
178
+ if self.markets is None:
179
+
180
+ # inst exchange
181
+ if exch not in ccxt.exchanges:
182
+ raise ValueError(
183
+ f"{exch} is not a supported exchange. "
184
+ f"Use get_exchanges_info() to get a list of supported exchanges.")
185
+ else:
186
+ if self.exchange is None:
187
+ self.exchange = getattr(ccxt, exch)()
168
188
 
169
- # get assets on exchange
170
- self.markets = pd.DataFrame(exchange.load_markets()).T
171
- self.markets.index.name = "ticker"
189
+ # get assets on exchange
190
+ self.markets = pd.DataFrame(self.exchange.load_markets()).T
191
+ self.markets.index.name = "ticker"
172
192
 
173
- # quote ccy
174
- if quote_ccy is not None:
175
- self.markets = self.markets[self.markets.quote == quote_ccy.upper()]
193
+ # quote ccy
194
+ if quote_ccy is not None:
195
+ self.markets = self.markets[self.markets.quote == quote_ccy.upper()]
176
196
 
177
- # mkt type
178
- if mkt_type == "perpetual_future":
179
- if self.markets[self.markets.type == "swap"].empty:
180
- self.markets = self.markets[self.markets.type == "future"]
181
- else:
182
- self.markets = self.markets[self.markets.type == "swap"]
183
- elif mkt_type == "spot" or mkt_type == "future" or mkt_type == "option":
184
- self.markets = self.markets[self.markets.type == mkt_type]
197
+ # mkt type
198
+ if mkt_type == "perpetual_future":
199
+ if self.markets[self.markets.type == "swap"].empty:
200
+ self.markets = self.markets[self.markets.type == "future"]
201
+ else:
202
+ self.markets = self.markets[self.markets.type == "swap"]
203
+ elif mkt_type == "spot" or mkt_type == "future" or mkt_type == "option":
204
+ self.markets = self.markets[self.markets.type == mkt_type]
185
205
 
186
- # dict of assets
187
- if as_list:
188
- self.markets = self.markets.index.to_list()
206
+ # dict of assets
207
+ if as_list:
208
+ self.markets = self.markets.index.to_list()
189
209
 
190
210
  return self.markets
191
211
 
@@ -198,18 +218,18 @@ class CCXT(Library):
198
218
  fields: list
199
219
  List of available fields.
200
220
  """
201
- # list of fields
202
- self.fields = ["open", "high", "low", "close", "volume", "funding_rate"]
221
+ if self.fields is None:
222
+ self.fields = ["open", "high", "low", "close", "volume", "funding_rate", 'oi']
203
223
 
204
224
  return self.fields
205
225
 
206
- def get_frequencies_info(self, exch: str = "binance") -> Dict[str, List[str]]:
226
+ def get_frequencies_info(self, exch: str) -> Dict[str, Union[str, int]]:
207
227
  """
208
228
  Get frequencies info.
209
229
 
210
230
  Parameters
211
231
  ----------
212
- exch: str, default 'binance'
232
+ exch: str
213
233
  Name of exchange for which to get available assets.
214
234
 
215
235
  Returns
@@ -217,437 +237,659 @@ class CCXT(Library):
217
237
  freq: dictionary
218
238
  Dictionary with info on available frequencies.
219
239
  """
220
- # inst exch and load mkts
221
- exchange = getattr(ccxt, exch)()
222
- exchange.load_markets()
240
+ if self.frequencies is None:
241
+
242
+ # inst exchange
243
+ if exch not in ccxt.exchanges:
244
+ raise ValueError(
245
+ f"{exch} is not a supported exchange. "
246
+ f"Use get_exchanges_info() to get a list of supported exchanges.")
247
+ else:
248
+ if self.exchange is None:
249
+ self.exchange = getattr(ccxt, exch)()
223
250
 
224
- # freq dict
225
- self.frequencies = exchange.timeframes
251
+ # freq dict
252
+ self.frequencies = self.exchange.timeframes
226
253
 
227
254
  return self.frequencies
228
255
 
229
- def get_rate_limit_info(self, exch: str = "binance") -> Dict[str, Union[str, int]]:
256
+ def get_rate_limit_info(self, exch: str) -> Dict[str, Union[str, int]]:
230
257
  """
231
258
  Get rate limit info.
232
259
 
233
260
  Parameters
234
261
  ----------
235
- exch: str, default 'binance'
262
+ exch: str
236
263
  Name of exchange.
237
264
 
238
265
  Returns
239
266
  -------
240
267
  rate_limit: dictionary
241
268
  Dictionary with exchange and required minimal delay between HTTP requests that exchange in milliseconds.
269
+
242
270
  """
243
- # inst exch
244
- exchange = getattr(ccxt, exch)()
271
+ if self.rate_limit is None:
272
+
273
+ # inst exchange
274
+ if exch not in ccxt.exchanges:
275
+ raise ValueError(
276
+ f"{exch} is not a supported exchange. "
277
+ f"Use get_exchanges_info() to get a list of supported exchanges.")
278
+ else:
279
+ if self.exchange is None:
280
+ self.exchange = getattr(ccxt, exch)()
281
+
282
+ self.rate_limit = {
283
+ "exchange rate limit":
284
+ "delay in milliseconds between two consequent HTTP requests to the same exchange",
285
+ exch: self.exchange.rateLimit
286
+ }
245
287
 
246
- self.rate_limit = {
247
- "exchange rate limit": "delay in milliseconds between two consequent HTTP requests to the same exchange",
248
- exch: exchange.rateLimit
249
- }
250
288
  return self.rate_limit
251
289
 
252
- def get_metadata(self) -> None:
290
+ def get_metadata(self, exch: str) -> None:
253
291
  """
254
292
  Get CCXT metadata.
293
+
294
+ Parameters
295
+ ----------
296
+ exch: str
297
+ Name of exchange.
255
298
  """
299
+ # inst exchange
300
+ if exch not in ccxt.exchanges:
301
+ raise ValueError(
302
+ f"{exch} is not a supported exchange. Use get_exchanges_info() to get a list of supported exchanges.")
303
+ else:
304
+ if self.exchange is None:
305
+ self.exchange = getattr(ccxt, exch)()
306
+
307
+ # load markets
308
+ self.exchange.load_markets()
309
+
256
310
  if self.exchanges is None:
257
311
  self.exchanges = self.get_exchanges_info()
258
312
  if self.market_types is None:
259
313
  self.market_types = ["spot", "future", "perpetual_future", "option"]
260
314
  if self.assets is None:
261
- self.assets = self.get_assets_info(as_list=True)
315
+ self.assets = list(self.exchange.currencies.keys())
262
316
  if self.markets is None:
263
- self.markets = self.get_markets_info(as_list=True)
317
+ self.markets = list(self.exchange.markets.keys())
264
318
  if self.fields is None:
265
- self.fields = self.get_fields_info()
319
+ self.fields = ["open", "high", "low", "close", "volume", "funding_rate", 'oi']
266
320
  if self.frequencies is None:
267
- self.frequencies = self.get_frequencies_info()
321
+ self.frequencies = list(self.exchange.timeframes.keys())
268
322
  if self.rate_limit is None:
269
- self.rate_limit = self.get_rate_limit_info()
323
+ self.rate_limit = self.exchange.rateLimit
270
324
 
271
- def req_data(self,
272
- data_req: DataRequest,
273
- data_type: str,
274
- ticker: str,
275
- start_date: str = None,
276
- end_date: str = None,
277
- ) -> pd.DataFrame:
325
+ async def _fetch_ohlcv(self,
326
+ ticker: str,
327
+ freq: str,
328
+ start_date: str,
329
+ end_date: str,
330
+ exch: str,
331
+ trials: int = 3
332
+ ) -> List:
278
333
  """
279
- Sends data request to Python client.
334
+ Fetches OHLCV data for a specific ticker.
280
335
 
281
336
  Parameters
282
337
  ----------
283
- data_req: DataRequest
284
- Parameters of data request in CryptoDataPy format.
285
- data_type: str, {'ohlcv', 'funding_rates'},
286
- Data type to retrieve.
287
338
  ticker: str
288
- Ticker symbol to request data for.
339
+ Ticker symbol.
340
+ freq: str
341
+ Frequency of data, e.g. '1m', '5m', '1h', '1d'.
289
342
  start_date: str
290
- Start date in 'YYYY-MM-DD' format.
343
+ Start date in integers in milliseconds since Unix epoch.
291
344
  end_date: str
292
- End date in 'YYYY-MM-DD' format.
293
-
345
+ End date in integers in milliseconds since Unix epoch.
346
+ exch: str
347
+ Name of exchange.
348
+ trials: int, default 3
349
+ Number of attempts to fetch data.
294
350
 
295
351
  Returns
296
352
  -------
297
- df: pd.DataFrame
298
- Dataframe with datetime, ticker/identifier, and field/col values.
353
+ data: list
354
+ List of timestamps with OHLCV data.
299
355
  """
300
- # convert data request parameters to CCXT format
301
- cx_data_req = ConvertParams(data_req).to_ccxt()
302
- if start_date is None:
303
- start_date = cx_data_req['start_date']
304
- if end_date is None:
305
- end_date = cx_data_req['end_date']
306
-
307
- # data types
308
- data_types = {'ohlcv': 'fetchOHLCV', 'funding_rates': 'fetchFundingRateHistory'}
356
+ attempts = 0
357
+ data = []
309
358
 
310
359
  # inst exch
311
- exch = getattr(ccxt, cx_data_req['exch'])()
312
- data_resp = []
313
-
314
- try:
315
- if data_type == 'ohlcv':
316
- data_resp = getattr(exch, data_types[data_type])(
317
- ticker,
318
- cx_data_req["freq"],
319
- since=start_date,
320
- limit=self.max_obs_per_call,
321
- params={'until': end_date}
322
- )
323
- elif data_type == 'funding_rates':
324
- data_resp = getattr(exch, data_types[data_type])(
325
- ticker,
326
- since=start_date,
327
- limit=1000,
328
- params={'until': end_date}
329
- )
360
+ if self.exchange_async is None:
361
+ self.exchange_async = getattr(ccxt_async, exch)()
362
+
363
+ # fetch data
364
+ if self.exchange_async.has['fetchOHLCV']:
365
+
366
+ # while loop to fetch all data
367
+ while start_date < end_date and attempts < trials:
368
+
369
+ try:
370
+ data_resp = await getattr(self.exchange_async, 'fetchOHLCV')(
371
+ ticker,
372
+ freq,
373
+ since=start_date,
374
+ limit=self.max_obs_per_call,
375
+ params={'until': end_date}
376
+ )
330
377
 
331
- return data_resp
378
+ except Exception as e:
379
+ logging.warning(
380
+ f"Failed to get OHLCV data from {self.exchange_async.id} for {ticker} on attempt #{attempts+1}."
381
+ )
382
+ logging.warning(e)
383
+ attempts += 1
384
+ if attempts == trials:
385
+ logging.warning(
386
+ f"Failed to get OHLCV data from {self.exchange_async.id} "
387
+ f"for {ticker} after {trials} attempts."
388
+ )
389
+ return data
390
+
391
+ await asyncio.sleep(self.exchange_async.rateLimit / 1000)
392
+ continue
332
393
 
333
- except Exception as e:
334
- logging.warning(f"Failed to get {data_type} data for {ticker}.")
335
- logging.warning(e)
394
+ else:
395
+ # check if data resp is empty
396
+ if len(data_resp):
397
+ # next start date
398
+ start_date = data_resp[-1][0] + 1
399
+ data.extend(data_resp)
400
+ await asyncio.sleep(self.exchange_async.rateLimit / 1000)
401
+
402
+ else:
403
+ break
404
+
405
+ return data
336
406
 
407
+ else:
408
+ logging.warning(f"OHLCV data is not available for {self.exchange_async.id}.")
337
409
  return None
338
410
 
339
- def fetch_all_ohlcv_hist(self, data_req: DataRequest, ticker: str) -> pd.DataFrame:
411
+ async def fetch_all_ohlcv(self,
412
+ tickers,
413
+ freq: str,
414
+ start_date: str,
415
+ end_date: str,
416
+ exch: str,
417
+ trials: int = 3,
418
+ pause: int = 0.5
419
+ ):
340
420
  """
341
- Submits get requests to API until entire OHLCV history has been collected. Only necessary when
342
- number of observations is larger than the maximum number of observations per call.
421
+ Fetches OHLCV data for a list of tickers.
422
+
423
+ Parameters
424
+ ----------
425
+ tickers: list
426
+ List of ticker symbols.
427
+ freq: str
428
+ Frequency of data, e.g. '1m', '5m', '1h', '1d'.
429
+ start_date: str
430
+ Start date in integers in milliseconds since Unix epoch.
431
+ end_date: str
432
+ End date in integers in milliseconds since Unix epoch.
433
+ exch: str
434
+ Name of exchange.
435
+ trials: int, default 3
436
+ Number of attempts to fetch data.
437
+ pause: int, default 0.5
438
+ Pause in seconds to respect the rate limit.
439
+
440
+ Returns
441
+ -------
442
+ data: list
443
+ List of lists of timestamps and OHLCV data for each ticker.
444
+ """
445
+ # inst exch
446
+ if self.exchange_async is None:
447
+ self.exchange_async = getattr(ccxt_async, exch)()
448
+
449
+ data = []
450
+
451
+ # create progress bar
452
+ pbar = tqdm(total=len(tickers), desc="Fetching OHLCV data", unit="ticker")
453
+
454
+ # loop through tickers
455
+ for ticker in tickers:
456
+ data_resp = await self._fetch_ohlcv(ticker, freq, start_date, end_date, trials=trials, exch=exch)
457
+ data.append(data_resp)
458
+ pbar.update(1)
459
+ await asyncio.sleep(pause) # pause between ticker requests to respect the rate limit
460
+
461
+ await self.exchange_async.close()
462
+
463
+ return data
464
+
465
+ async def _fetch_funding_rates(self,
466
+ ticker: str,
467
+ start_date: str,
468
+ end_date: str,
469
+ exch: str,
470
+ trials: int = 3
471
+ ) -> List:
472
+ """
473
+ Fetches funding rates data for a specific ticker.
343
474
 
344
475
  Parameters
345
476
  ----------
346
- data_req: DataRequest
347
- Parameters of data request in CryptoDataPy format.
348
477
  ticker: str
349
478
  Ticker symbol.
479
+ start_date: str
480
+ Start date in integers in milliseconds since Unix epoch.
481
+ end_date: str
482
+ End date in integers in milliseconds since Unix epoch.
483
+ trials: int, default 3
484
+ Number of attempts to fetch data.
350
485
 
351
486
  Returns
352
487
  -------
353
- df: pd.DataFrame
354
- Dataframe with entire data history retrieved.
488
+ data: list
489
+ List of dictionaries with timestamps and funding rates data.
355
490
  """
356
- # convert data request parameters to CCXT format and set start date
357
- cx_data_req = ConvertParams(data_req).to_ccxt()
358
- start_date = cx_data_req['start_date']
359
- end_date = cx_data_req['end_date']
491
+ attempts = 0
492
+ data = []
360
493
 
361
- # create empty df
362
- df = pd.DataFrame()
494
+ # inst exch
495
+ if self.exchange_async is None:
496
+ self.exchange_async = getattr(ccxt_async, exch)()
363
497
 
364
- # while loop condition
365
- missing_vals, attempts = True, 0
498
+ # fetch data
499
+ if self.exchange_async.has['fetchFundingRateHistory']:
366
500
 
367
- # run a while loop until all data collected
368
- while missing_vals and attempts < cx_data_req['trials']:
501
+ # while loop to get all data
502
+ while start_date < end_date and attempts < trials:
369
503
 
370
- data_resp = self.req_data(data_req=data_req,
371
- data_type='ohlcv',
372
- ticker=ticker,
373
- start_date=start_date,
374
- end_date=end_date)
504
+ try:
505
+ data_resp = await getattr(self.exchange_async, 'fetchFundingRateHistory')(
506
+ ticker,
507
+ since=start_date,
508
+ limit=self.max_obs_per_call,
509
+ params={'until': end_date}
510
+ )
375
511
 
376
- if data_resp is None:
377
- attempts += 1
378
- sleep(self.get_rate_limit_info(exch=cx_data_req['exch'])[cx_data_req['exch']] / 1000)
379
- logging.warning(
380
- f"Failed to pull data on attempt #{attempts}."
381
- )
382
- if attempts == cx_data_req["trials"]:
512
+ except Exception as e:
383
513
  logging.warning(
384
- f"Failed to get OHLCV data from {cx_data_req['exch']} for {ticker} after many attempts."
514
+ f"Failed to get funding rates from {self.exchange_async.id} "
515
+ f"for {ticker} on attempt #{attempts+1}."
385
516
  )
386
- return None
517
+ logging.warning(e)
518
+ attempts += 1
519
+ if attempts == trials:
520
+ logging.warning(
521
+ f"Failed to get funding rates from {self.exchange_async.id} "
522
+ f"for {ticker} after {trials} attempts."
523
+ )
524
+ return data
525
+
526
+ await asyncio.sleep(self.exchange_async.rateLimit / 1000)
527
+ continue
387
528
 
388
- else:
389
- # name cols and create df
390
- header = ["datetime", "open", "high", "low", "close", "volume"]
391
- data = pd.DataFrame(data_resp, columns=header)
392
- df = pd.concat([df, data])
393
-
394
- # check if all data has been extracted
395
- time_diff = cx_data_req["end_date"] - df.datetime.iloc[-1]
396
- if pd.Timedelta(milliseconds=time_diff) < pd.Timedelta(cx_data_req["freq"]):
397
- missing_vals = False
398
- # missing data, infinite loop
399
- elif df.datetime.iloc[-1] == df.datetime.iloc[-2]:
400
- missing_vals = False
401
- logging.warning(f"Missing recent OHLCV data for {ticker}.")
402
- # reset end date and pause before calling API
403
529
  else:
404
- # change end date
405
- start_date = df.datetime.iloc[-1]
406
-
407
- # rate limit
408
- sleep(self.get_rate_limit_info(exch=cx_data_req['exch'])[cx_data_req['exch']] / 1000)
409
-
410
- return df
530
+ # check if data resp is empty
531
+ if len(data_resp):
532
+ # next start date
533
+ start_date = data_resp[-1]['timestamp'] + 1
534
+ data.extend(data_resp)
535
+ await asyncio.sleep(self.exchange_async.rateLimit / 1000)
536
+ else:
537
+ break
538
+
539
+ return data
540
+
541
+ else:
542
+ logging.warning(f"Funding rates are not available for {self.exchange_async.id}.")
543
+ return None
411
544
 
412
- def fetch_all_funding_hist(self, data_req: DataRequest, ticker: str) -> pd.DataFrame:
545
+ async def fetch_all_funding_rates(self,
546
+ tickers,
547
+ start_date: str,
548
+ end_date: str,
549
+ exch: str,
550
+ trials: int = 3,
551
+ pause: int = 0.5
552
+ ):
413
553
  """
414
- Submits get requests to API until entire funding rate history has been collected. Only necessary when
415
- number of observations is larger than the maximum number of observations per call.
554
+ Fetches funding rates data for a list of tickers.
416
555
 
417
556
  Parameters
418
557
  ----------
419
- data_req: DataRequest
420
- Parameters of data request in CryptoDataPy format.
421
- ticker: str
422
- Ticker symbol.|
558
+ tickers: list
559
+ List of ticker symbols.
560
+ start_date: str
561
+ Start date in integers in milliseconds since Unix epoch.
562
+ end_date: str
563
+ End date in integers in milliseconds since Unix epoch.
564
+ exch: str
565
+ Name of exchange.
566
+ trials: int, default 3
567
+ Number of attempts to fetch data.
568
+ pause: int, default 0.5
569
+ Pause in seconds to respect the rate limit.
423
570
 
424
571
  Returns
425
572
  -------
426
- df: pd.DataFrame
427
- Dataframe with entire data history retrieved.
428
- """
429
- # convert data request parameters to CCXT format and set start date
430
- cx_data_req = ConvertParams(data_req).to_ccxt()
431
- start_date = cx_data_req['start_date']
432
- end_date = cx_data_req['end_date']
433
-
434
- # create empty df
435
- df = pd.DataFrame()
436
- # while loop condition
437
- missing_vals, attempts = True, 0
438
-
439
- # run a while loop until all data collected
440
- while missing_vals and attempts < cx_data_req['trials']:
441
-
442
- # data req
443
- data_resp = self.req_data(data_req=data_req,
444
- data_type='funding_rates',
445
- ticker=ticker,
446
- start_date=start_date,
447
- end_date=end_date)
448
-
449
- if data_resp is None:
450
- attempts += 1
451
- sleep(self.get_rate_limit_info(exch=cx_data_req['exch'])[cx_data_req['exch']] / 1000)
452
- logging.warning(
453
- f"Failed to pull data on attempt #{attempts}."
454
- )
455
- if attempts == cx_data_req["trials"]:
456
- logging.warning(
457
- f"Failed to get funding_rates from {cx_data_req['exch']} for {ticker} after many attempts."
458
- )
459
- return None
573
+ data: list
574
+ List of lists of dictionaries with timestamps and funding rates data for each ticker.
575
+ """
576
+ # inst exch
577
+ if self.exchange_async is None:
578
+ self.exchange_async = getattr(ccxt_async, exch)()
460
579
 
461
- else:
462
- # add to df
463
- data = pd.DataFrame(data_resp)
464
- df = pd.concat([df, data])
465
- # check if all data has been extracted
466
- time_diff = pd.to_datetime(
467
- cx_data_req["end_date"], unit="ms"
468
- ) - pd.to_datetime(data.datetime.iloc[-1]).tz_localize(None)
469
- if time_diff < pd.Timedelta("8h"):
470
- missing_vals = False
471
- # missing data, infinite loop
472
- elif df.datetime.iloc[-1] == df.datetime.iloc[-2]:
473
- missing_vals = False
474
- logging.warning(f"Missing recent funding rate data for {ticker}.")
475
- # reset end date and pause before calling API
476
- else:
477
- # change end date
478
- start_date = data.timestamp.iloc[-1]
580
+ data = []
479
581
 
480
- # rate limit
481
- sleep(self.get_rate_limit_info(exch=cx_data_req['exch'])[cx_data_req['exch']] / 1000)
582
+ # create progress bar
583
+ pbar = tqdm(total=len(tickers), desc="Fetching funding rates", unit="ticker")
482
584
 
483
- return df
585
+ # loop through tickers
586
+ for ticker in tickers:
587
+ data_resp = await self._fetch_funding_rates(ticker, start_date, end_date, trials=trials, exch=exch)
588
+ data.append(data_resp)
589
+ pbar.update(1)
590
+ await asyncio.sleep(pause) # pause between ticker requests to respect the rate limit
484
591
 
485
- @staticmethod
486
- def wrangle_data_resp(data_req: DataRequest, data_resp: pd.DataFrame) -> pd.DataFrame:
592
+ await self.exchange_async.close()
593
+
594
+ return data
595
+
596
+ async def _fetch_open_interest(self,
597
+ ticker: str,
598
+ freq: str,
599
+ start_date: str,
600
+ end_date: str,
601
+ exch: str,
602
+ trials: int = 3
603
+ ) -> List:
487
604
  """
488
- Wrangle data response.
605
+ Fetches open interest data for a specific ticker.
489
606
 
490
607
  Parameters
491
608
  ----------
492
- data_req: DataRequest
493
- Parameters of data request in CryptoDataPy format.
494
- data_resp: pd.DataFrame
495
- Data response from GET request.
609
+ ticker: str
610
+ Ticker symbol.
611
+ freq: str
612
+ Frequency of data, e.g. '1m', '5m', '1h', '1d'.
613
+ start_date: str
614
+ Start date in integers in milliseconds since Unix epoch.
615
+ end_date: str
616
+ End date in integers in milliseconds since Unix epoch.
617
+ exch: str
618
+ Name of exchange.
619
+ trials: int, default 3
620
+ Number of attempts to fetch data.
496
621
 
497
622
  Returns
498
623
  -------
499
- df: pd.DataFrame
500
- Wrangled dataframe with DatetimeIndex and values in tidy format.
624
+ data: list
625
+ List of dictionaries with timestamps and open interest data.
501
626
  """
627
+ # number of attempts
628
+ attempts = 0
629
+ data = []
630
+
631
+ # inst exch
632
+ if self.exchange_async is None:
633
+ self.exchange_async = getattr(ccxt_async, exch)()
634
+
635
+ # fetch data
636
+ if self.exchange_async.has['fetchOpenInterestHistory']:
637
+
638
+ # while loop to get all data
639
+ while start_date < end_date and attempts < trials:
640
+
641
+ try:
642
+ data_resp = await getattr(self.exchange_async, 'fetchOpenInterestHistory')(
643
+ ticker,
644
+ freq,
645
+ since=start_date,
646
+ limit=500,
647
+ params={'until': end_date}
648
+ )
649
+
650
+ except Exception as e:
651
+ logging.warning(
652
+ f"Failed to get open interest from {self.exchange_async.id} "
653
+ f"for {ticker} on attempt #{attempts + 1}."
654
+ )
655
+ logging.warning(e)
656
+ attempts += 1
657
+ if attempts == trials:
658
+ logging.warning(
659
+ f"Failed to get open interest from {self.exchange_async.id} "
660
+ f"for {ticker} after {trials} attempts."
661
+ )
662
+ return data
663
+
664
+ await asyncio.sleep(self.exchange_async.rateLimit / 1000)
665
+ continue
666
+
667
+ else:
668
+ # check if data resp is empty
669
+ if len(data_resp):
670
+ # next start date
671
+ start_date = data_resp[-1]['timestamp'] + 1
672
+ data.extend(data_resp)
673
+ await asyncio.sleep(self.exchange_async.rateLimit / 1000)
674
+ else:
675
+ break
676
+
677
+ return data
678
+
679
+ else:
680
+ logging.warning(f"Open interest is not available for {self.exchange_async.id}.")
681
+ return None
502
682
 
503
- return WrangleData(data_req, data_resp).ccxt()
683
+ async def fetch_all_open_interest(self,
684
+ tickers,
685
+ freq: str,
686
+ start_date: str,
687
+ end_date: str,
688
+ exch: str,
689
+ trials: int = 3,
690
+ pause: int = 0.5
691
+ ):
504
692
 
505
- def fetch_tidy_ohlcv(self, data_req: DataRequest, ticker: str) -> pd.DataFrame:
506
693
  """
507
- Gets entire OHLCV history and wrangles the data response into tidy data format.
694
+ Fetches open interest data for a list of tickers.
508
695
 
509
696
  Parameters
510
697
  ----------
511
- data_req: DataRequest
512
- Parameters of data request in CryptoDataPy format.
513
- ticker: str
514
- Ticker symbol.
698
+ tickers: list
699
+ List of ticker symbols.
700
+ freq: str
701
+ Frequency of data, e.g. '1m', '5m', '1h', '1d'.
702
+ start_date: str
703
+ Start date in integers in milliseconds since Unix epoch.
704
+ end_date: str
705
+ End date in integers in milliseconds since Unix epoch.
706
+ exch: str
707
+ Name of exchange.
708
+ trials: int, default 3
709
+ Number of attempts to fetch data.
710
+ pause: int, default 0.5
711
+ Pause in seconds to respect the rate limit.
515
712
 
516
713
  Returns
517
714
  -------
518
- df: pd.DataFrame
519
- Dataframe with entire data history retrieved and wrangled into tidy data format.
715
+ data: list
716
+ List of lists of dictionaries with timestamps and open interest data for each ticker.
520
717
  """
521
- # get entire data history
522
- df = self.fetch_all_ohlcv_hist(data_req, ticker)
718
+ # inst exch
719
+ if self.exchange_async is None:
720
+ self.exchange_async = getattr(ccxt_async, exch)()
523
721
 
524
- # wrangle df
525
- if df is not None:
526
- df = self.wrangle_data_resp(data_req, df)
722
+ data = []
723
+
724
+ # create progress bar
725
+ pbar = tqdm(total=len(tickers), desc="Fetching open interest", unit="ticker")
726
+
727
+ # loop through tickers
728
+ for ticker in tickers:
729
+ data_resp = await self._fetch_open_interest(ticker, freq, start_date, end_date, trials=trials, exch=exch)
730
+ data.append(data_resp)
731
+ pbar.update(1)
732
+ await asyncio.sleep(pause) # pause between ticker requests to respect the rate limit
527
733
 
528
- return df
734
+ await self.exchange_async.close()
529
735
 
530
- def fetch_tidy_funding_rates(self, data_req: DataRequest, ticker: str) -> pd.DataFrame:
736
+ return data
737
+
738
+ def convert_params(self, data_req: DataRequest) -> DataRequest:
531
739
  """
532
- Gets entire funding rates history and wrangles the data response into tidy data format.
740
+ Converts data request parameters to CCXT format.
533
741
 
534
742
  Parameters
535
743
  ----------
536
744
  data_req: DataRequest
537
745
  Parameters of data request in CryptoDataPy format.
538
- ticker: str
539
- Ticker symbol.
540
746
 
541
747
  Returns
542
748
  -------
543
- df: pd.DataFrame
544
- Dataframe with entire data history retrieved and wrangled into tidy data format.
749
+ data_req: DataRequest
750
+ Parameters of data request in CCXT format.
545
751
  """
546
- # get entire data history
547
- df = self.fetch_all_funding_hist(data_req, ticker)
752
+ self.data_req = ConvertParams(data_req).to_ccxt()
548
753
 
549
- # wrangle df
550
- if df is not None:
551
- df = self.wrangle_data_resp(data_req, df)
754
+ # get metadata
755
+ self.get_metadata(self.data_req.exch)
552
756
 
553
- return df
757
+ # check markets
758
+ if not any([market in self.markets for market in self.data_req.source_markets]):
759
+ raise ValueError(
760
+ f"Selected markets are not available. Use the '.markets' attribute to check supported markets."
761
+ )
554
762
 
555
- def check_params(self, data_req) -> None:
556
- """
557
- Checks if data request parameters are valid.
763
+ # check freq
764
+ if self.data_req.source_freq not in self.frequencies:
765
+ raise ValueError(
766
+ f"{self.data_req.source_freq} frequency is not available. "
767
+ f"Use the '.frequencies' attribute to check available frequencies."
768
+ )
558
769
 
559
- """
560
- # convert data request parameters to CCXT format
561
- cx_data_req = ConvertParams(data_req).to_ccxt()
770
+ # check quote ccy
771
+ if self.data_req.quote_ccy is not None:
772
+ if self.data_req.quote_ccy not in self.assets:
773
+ raise ValueError(
774
+ f"{self.data_req.quote_ccy} is not supported. "
775
+ f"Use the '.assets' attribute to check supported currencies."
776
+ )
562
777
 
563
- # inst exch
564
- exch = getattr(ccxt, cx_data_req['exch'])()
778
+ # check mkt type
779
+ if self.data_req.mkt_type not in self.market_types:
780
+ raise ValueError(
781
+ f"{self.data_req.mkt_type} is not available for {self.data_req.exch}."
782
+ )
783
+
784
+ # check start date
785
+ if not isinstance(self.data_req.source_start_date, int):
786
+ raise ValueError(
787
+ f"Start date must be in integers in milliseconds since Unix epoch."
788
+ )
565
789
 
566
- # check tickers
567
- tickers = self.get_assets_info(exch=cx_data_req["exch"], as_list=True)
568
- if not any([ticker.upper() in tickers for ticker in cx_data_req["tickers"]]):
790
+ # check end date
791
+ if not isinstance(self.data_req.source_end_date, int):
569
792
  raise ValueError(
570
- f"Assets are not available. Use assets attribute to check available assets for {cx_data_req['exch']}")
793
+ f"End date must be in integers in milliseconds since Unix epoch."
794
+ )
571
795
 
572
- # check tickers
573
- fields = self.get_fields_info()
574
- if not any([field in fields for field in data_req.fields]):
796
+ # check fields
797
+ if not any([field in self.fields for field in self.data_req.fields]):
575
798
  raise ValueError(
576
- f"Fields are not available. Use fields attribute to check available fields."
799
+ f"Selected fields are not available for {self.data_req.exch}. "
800
+ f"Use fields attribute to check available fields."
577
801
  )
578
802
 
579
- # check freq
580
- if cx_data_req["freq"] not in exch.timeframes:
803
+ # check ohlcv
804
+ if any([field in ['open', 'high', 'low', 'close', 'volume'] for field in self.data_req.fields]) and \
805
+ not self.exchange.has["fetchOHLCV"]:
581
806
  raise ValueError(
582
- f"{data_req.freq} is not available for {cx_data_req['exch']}."
807
+ f"OHLCV data is not available for {self.data_req.exch}."
808
+ f" Try another exchange or data request."
583
809
  )
584
810
 
585
- # check if ohlcv avail on exch
586
- if any([field in self.fields[:-1] for field in data_req.fields]) and \
587
- not exch.has["fetchOHLCV"]:
811
+ # check funding rates
812
+ if any([field == 'funding_rate' for field in self.data_req.fields]) and \
813
+ not self.exchange.has["fetchFundingRateHistory"]:
588
814
  raise ValueError(
589
- f"OHLCV data is not available for {cx_data_req['exch']}."
815
+ f"Funding rates are not available for {self.data_req.exch}."
590
816
  f" Try another exchange or data request."
591
817
  )
592
818
 
593
- # check if funding avail on exch
594
- if any([field == 'funding_rate' for field in data_req.fields]) and \
595
- not exch.has["fetchFundingRateHistory"]:
819
+ # check open interest
820
+ if any([field == 'oi' for field in self.data_req.fields]) and \
821
+ not self.exchange.has["fetchOpenInterestHistory"]:
596
822
  raise ValueError(
597
- f"Funding rates are not available for {cx_data_req['exch']}."
823
+ f"Open interest is not available for {self.data_req.exch}."
598
824
  f" Try another exchange or data request."
599
825
  )
600
826
 
601
- # check if perp future
602
- if any([field == 'funding_rate' for field in data_req.fields]) and \
603
- data_req.mkt_type == "spot":
827
+ # check perp future
828
+ if any([(field == 'funding_rate' or field == 'open_interest') for field in self.data_req.fields]) and \
829
+ self.data_req.mkt_type not in ['perpetual_future', 'future']:
604
830
  raise ValueError(
605
- f"Funding rates are not available for spot markets."
606
- f" Market type must be perpetual futures."
831
+ f"You have requested fields only available for futures markets."
832
+ f" Change mkt_type to 'perpetual_future' or 'future'."
607
833
  )
608
834
 
609
- def fetch_ohlcv(self, data_req: DataRequest) -> pd.DataFrame:
835
+ return self.data_req
836
+
837
+ def wrangle_data_resp(self, data_resp: pd.DataFrame, data_type: str) -> pd.DataFrame:
610
838
  """
611
- Loops list of tickers, retrieves OHLCV data for each ticker in tidy format and stores it in a
612
- multiindex dataframe.
839
+ Wrangle data response.
613
840
 
614
841
  Parameters
615
842
  ----------
616
- data_req: DataRequest
617
- Parameters of data request in CryptoDataPy format.
843
+ data_resp: pd.DataFrame
844
+ Data response from GET request.
845
+ data_type: str
846
+ Type of data, e.g. 'ohlcv', 'funding_rate', 'open_interest'.
618
847
 
619
848
  Returns
620
849
  -------
621
- df: pd.DataFrame - MultiIndex
622
- Dataframe with DatetimeIndex (level 0), ticker (level 1) and OHLCV values (cols), in tidy data format.
850
+ df: pd.DataFrame
851
+ Wrangled dataframe with DatetimeIndex and values in tidy format.
623
852
  """
624
- # convert data request parameters to CCXT format
625
- cx_data_req = ConvertParams(data_req).to_ccxt()
626
853
 
627
- # check params
628
- self.check_params(data_req)
854
+ return WrangleData(self.data_req, data_resp).ccxt(data_type=data_type)
629
855
 
630
- # empty df to add data
631
- df = pd.DataFrame()
856
+ async def fetch_tidy_ohlcv(self, data_req: DataRequest) -> pd.DataFrame:
857
+ """
858
+ Gets entire OHLCV history and wrangles the data response into tidy data format.
632
859
 
633
- # loop through tickers
634
- for mkt, ticker in zip(cx_data_req['mkts'], data_req.tickers):
860
+ Parameters
861
+ ----------
862
+ data_req: DataRequest
863
+ Parameters of data request in CryptoDataPy format.
635
864
 
636
- df0 = self.fetch_tidy_ohlcv(data_req, mkt)
865
+ Returns
866
+ -------
867
+ df: pd.DataFrame
868
+ Dataframe with entire OHLCV data history retrieved and wrangled into tidy data format.
869
+ """
870
+ # convert data request parameters to CCXT format
871
+ if self.data_req is None:
872
+ self.convert_params(data_req)
637
873
 
638
- if df0 is not None:
639
- # add ticker to index
640
- df0['ticker'] = ticker.upper()
641
- df0.set_index(['ticker'], append=True, inplace=True)
642
- # concat df and df1
643
- df = pd.concat([df, df0])
874
+ # get entire data history
875
+ data_resp = await self.fetch_all_ohlcv(self.data_req.source_markets,
876
+ self.data_req.source_freq,
877
+ self.data_req.source_start_date,
878
+ self.data_req.source_end_date,
879
+ self.data_req.exch,
880
+ trials=self.data_req.trials,
881
+ pause=self.data_req.pause)
644
882
 
645
- return df
883
+ # wrangle df
884
+ if any(data_resp):
885
+ df = self.wrangle_data_resp(data_resp, data_type='ohlcv')
886
+ return df
887
+ else:
888
+ logging.warning("Failed to get requested OHLCV data.")
646
889
 
647
- def fetch_funding_rates(self, data_req: DataRequest) -> pd.DataFrame:
890
+ async def fetch_tidy_funding_rates(self, data_req: DataRequest) -> pd.DataFrame:
648
891
  """
649
- Loops list of tickers, retrieves funding rates data for each ticker in tidy format and stores it in a
650
- multiindex dataframe.
892
+ Gets entire funding rates history and wrangles the data response into tidy data format.
651
893
 
652
894
  Parameters
653
895
  ----------
@@ -656,33 +898,63 @@ class CCXT(Library):
656
898
 
657
899
  Returns
658
900
  -------
659
- df: pd.DataFrame - MultiIndex
660
- Dataframe with DatetimeIndex (level 0), ticker (level 1) and OHLCV values (cols), in tidy data format.
901
+ df: pd.DataFrame
902
+ Dataframe with entire data history retrieved and wrangled into tidy data format.
661
903
  """
662
904
  # convert data request parameters to CCXT format
663
- cx_data_req = ConvertParams(data_req).to_ccxt()
905
+ if self.data_req is None:
906
+ self.convert_params(data_req)
664
907
 
665
- # check params
666
- self.check_params(data_req)
908
+ # get entire data history
909
+ data_resp = await self.fetch_all_funding_rates(self.data_req.source_markets,
910
+ self.data_req.source_start_date,
911
+ self.data_req.source_end_date,
912
+ self.data_req.exch,
913
+ trials=self.data_req.trials,
914
+ pause=self.data_req.pause)
667
915
 
668
- # empty df to add data
669
- df = pd.DataFrame()
916
+ # wrangle df
917
+ if any(data_resp):
918
+ df = self.wrangle_data_resp(data_resp, data_type='funding_rates')
919
+ return df
920
+ else:
921
+ logging.warning("Failed to get requested funding rates.")
670
922
 
671
- # loop through tickers
672
- for mkt, ticker in zip(cx_data_req['mkts'], data_req.tickers):
923
+ async def fetch_tidy_open_interest(self, data_req: DataRequest) -> pd.DataFrame:
924
+ """
925
+ Gets entire open interest history and wrangles the data response into tidy data format.
926
+
927
+ Parameters
928
+ ----------
929
+ data_req: DataRequest
930
+ Parameters of data request in CryptoDataPy format.
673
931
 
674
- df0 = self.fetch_tidy_funding_rates(data_req, mkt)
932
+ Returns
933
+ -------
934
+ df: pd.DataFrame
935
+ Dataframe with entire data history retrieved and wrangled into tidy data format.
936
+ """
937
+ # convert data request parameters to CCXT format
938
+ if self.data_req is None:
939
+ self.convert_params(data_req)
675
940
 
676
- if df0 is not None:
677
- # add ticker to index
678
- df0['ticker'] = ticker.upper()
679
- df0.set_index(['ticker'], append=True, inplace=True)
680
- # concat df and df1
681
- df = pd.concat([df, df0])
941
+ # get entire data history
942
+ data_resp = await self.fetch_all_open_interest(self.data_req.source_markets,
943
+ self.data_req.source_freq,
944
+ self.data_req.source_start_date,
945
+ self.data_req.source_end_date,
946
+ self.data_req.exch,
947
+ trials=self.data_req.trials,
948
+ pause=self.data_req.pause)
682
949
 
683
- return df
950
+ # wrangle df
951
+ if any(data_resp):
952
+ df = self.wrangle_data_resp(data_resp, data_type='open_interest')
953
+ return df
954
+ else:
955
+ logging.warning("Failed to get requested open interest.")
684
956
 
685
- def get_data(self, data_req: DataRequest) -> pd.DataFrame:
957
+ async def get_data(self, data_req: DataRequest) -> pd.DataFrame:
686
958
  """
687
959
  Get data specified by data request.
688
960
 
@@ -695,28 +967,29 @@ class CCXT(Library):
695
967
  df: pd.DataFrame - MultiIndex
696
968
  DataFrame with DatetimeIndex (level 0), ticker (level 1), and values for selected fields (cols).
697
969
  """
698
- # empty df
699
- df = pd.DataFrame()
700
-
701
- # get OHLCV data
702
- ohlcv_list = ["open", "high", "low", "close", "volume"]
703
- if any([field in ohlcv_list for field in data_req.fields]):
704
- df0 = self.fetch_ohlcv(data_req)
705
- df = pd.concat([df, df0])
970
+ # get OHLCV
971
+ if any([field in ["open", "high", "low", "close", "volume"] for field in data_req.fields]):
972
+ df = await self.fetch_tidy_ohlcv(data_req)
973
+ self.data = pd.concat([self.data, df])
706
974
 
707
- # get funding rate data
975
+ # get funding rates
708
976
  if any([field == "funding_rate" for field in data_req.fields]):
709
- df1 = self.fetch_funding_rates(data_req)
710
- df = pd.concat([df, df1], axis=1)
977
+ df = await self.fetch_tidy_funding_rates(data_req)
978
+ self.data = pd.concat([self.data, df], axis=1)
979
+
980
+ # get open interest
981
+ if any([field == "oi" for field in data_req.fields]):
982
+ df = await self.fetch_tidy_open_interest(data_req)
983
+ self.data = pd.concat([self.data, df], axis=1)
711
984
 
712
- # check if df empty
713
- if df.empty:
985
+ # check df
986
+ if self.data.empty:
714
987
  raise Exception(
715
988
  "No data returned. Check data request parameters and try again."
716
989
  )
717
990
 
718
991
  # filter df for desired fields and typecast
719
- fields = [field for field in data_req.fields if field in df.columns]
720
- df = df.loc[:, fields]
992
+ fields = [field for field in data_req.fields if field in self.data.columns]
993
+ self.data = self.data.loc[:, fields]
721
994
 
722
- return df.sort_index()
995
+ return self.data.sort_index()