cryptodatapy 0.2.24__py3-none-any.whl → 0.2.26__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cryptodatapy/conf/fields.csv +126 -126
- cryptodatapy/conf/tickers.csv +2020 -2020
- cryptodatapy/extract/data_vendors/coinmetrics_api.py +1 -1
- cryptodatapy/extract/data_vendors/polygon_api.py +388 -0
- cryptodatapy/extract/data_vendors/tiingo_api.py +0 -2
- cryptodatapy/extract/datarequest.py +3 -0
- cryptodatapy/extract/exchanges/dydx.py +627 -42
- cryptodatapy/extract/getdata.py +14 -9
- cryptodatapy/extract/libraries/pandasdr_api.py +20 -3
- cryptodatapy/transform/clean.py +0 -41
- cryptodatapy/transform/convertparams.py +222 -75
- cryptodatapy/transform/wrangle.py +71 -1
- cryptodatapy/util/datacredentials.py +11 -0
- cryptodatapy/util/utils.py +82 -0
- {cryptodatapy-0.2.24.dist-info → cryptodatapy-0.2.26.dist-info}/METADATA +3 -2
- {cryptodatapy-0.2.24.dist-info → cryptodatapy-0.2.26.dist-info}/RECORD +18 -16
- {cryptodatapy-0.2.24.dist-info → cryptodatapy-0.2.26.dist-info}/WHEEL +1 -1
- {cryptodatapy-0.2.24.dist-info → cryptodatapy-0.2.26.dist-info}/LICENSE +0 -0
@@ -1,17 +1,16 @@
|
|
1
1
|
import logging
|
2
2
|
from typing import Any, Dict, List, Optional, Union
|
3
|
+
from datetime import datetime, timedelta
|
4
|
+
import time
|
3
5
|
|
4
6
|
import pandas as pd
|
5
|
-
import
|
7
|
+
import requests
|
8
|
+
import pytz
|
6
9
|
|
7
10
|
from cryptodatapy.extract.datarequest import DataRequest
|
8
11
|
from cryptodatapy.extract.exchanges.exchange import Exchange
|
9
12
|
from cryptodatapy.transform.convertparams import ConvertParams
|
10
13
|
from cryptodatapy.transform.wrangle import WrangleData
|
11
|
-
from cryptodatapy.util.datacredentials import DataCredentials
|
12
|
-
|
13
|
-
# data credentials
|
14
|
-
data_cred = DataCredentials()
|
15
14
|
|
16
15
|
|
17
16
|
class Dydx(Exchange):
|
@@ -26,15 +25,20 @@ class Dydx(Exchange):
|
|
26
25
|
categories: Union[str, List[str]] = "crypto",
|
27
26
|
assets: Optional[Dict[str, List[str]]] = None,
|
28
27
|
markets: Optional[Dict[str, List[str]]] = None,
|
29
|
-
market_types: List[str] = ["
|
30
|
-
fields: Optional[List[str]] = ["open", "high", "low", "close", "volume", "funding_rate"
|
31
|
-
frequencies: Optional[Dict[str, Union[str, int]]] =
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
28
|
+
market_types: List[str] = ["perpetual_future"],
|
29
|
+
fields: Optional[List[str]] = ["open", "high", "low", "close", "volume", "funding_rate"],
|
30
|
+
frequencies: Optional[Dict[str, Union[str, int]]] = {
|
31
|
+
"1m": "1MIN",
|
32
|
+
"5m": "5MINS",
|
33
|
+
"15m": "15MINS",
|
34
|
+
"1h": "1HOUR",
|
35
|
+
"4h": "4HOURS",
|
36
|
+
"1d": "1DAY"
|
37
|
+
},
|
38
|
+
fees: Optional[Dict[str, float]] = {'perpetual_future': {'maker': 0.0, 'taker': 0.0}},
|
39
|
+
base_url: Optional[str] = "https://indexer.dydx.trade/v4",
|
36
40
|
api_key: Optional[str] = None,
|
37
|
-
max_obs_per_call: Optional[int] =
|
41
|
+
max_obs_per_call: Optional[int] = 1000,
|
38
42
|
rate_limit: Optional[Any] = None
|
39
43
|
):
|
40
44
|
"""
|
@@ -90,48 +94,629 @@ class Dydx(Exchange):
|
|
90
94
|
self.data_req = None
|
91
95
|
self.data = pd.DataFrame()
|
92
96
|
|
93
|
-
def get_assets_info(self):
|
94
|
-
|
97
|
+
def get_assets_info(self) -> pd.DataFrame:
|
98
|
+
"""
|
99
|
+
Gets info for available assets from dYdX.
|
100
|
+
|
101
|
+
Returns
|
102
|
+
-------
|
103
|
+
pd.DataFrame
|
104
|
+
DataFrame with asset information.
|
105
|
+
"""
|
106
|
+
url = f"{self.base_url}/perpetualMarkets"
|
107
|
+
response = requests.get(url)
|
108
|
+
response.raise_for_status()
|
109
|
+
|
110
|
+
markets_data = response.json()['markets']
|
111
|
+
assets_info = []
|
112
|
+
seen_assets = set()
|
113
|
+
|
114
|
+
for ticker, market in markets_data.items():
|
115
|
+
base_currency = ticker.split('-')[0]
|
116
|
+
if base_currency not in seen_assets and market['status'] == 'ACTIVE':
|
117
|
+
assets_info.append({
|
118
|
+
'asset_id': base_currency,
|
119
|
+
'symbol': base_currency,
|
120
|
+
'decimals': abs(int(market['atomicResolution'])),
|
121
|
+
'status': market['status']
|
122
|
+
})
|
123
|
+
seen_assets.add(base_currency)
|
124
|
+
|
125
|
+
return pd.DataFrame(assets_info)
|
126
|
+
|
127
|
+
def get_markets_info(self, quote_ccy: Optional[str] = None, mkt_type: Optional[str] = None, as_list: bool = False) -> Union[pd.DataFrame, List[str]]:
|
128
|
+
"""
|
129
|
+
Gets info for available markets from dYdX.
|
130
|
+
|
131
|
+
Parameters
|
132
|
+
----------
|
133
|
+
quote_ccy: str, optional
|
134
|
+
Quote currency to filter by (e.g., 'USD', 'USDC'). For dYdX, this is typically 'USD'.
|
135
|
+
mkt_type: str, optional
|
136
|
+
Market type to filter by. For dYdX, this is typically 'perpetual_future'.
|
137
|
+
as_list: bool, default False
|
138
|
+
If True, returns a list of ticker symbols instead of a DataFrame.
|
139
|
+
|
140
|
+
Returns
|
141
|
+
-------
|
142
|
+
pd.DataFrame or List[str]
|
143
|
+
DataFrame with market information or list of ticker symbols.
|
144
|
+
"""
|
145
|
+
url = f"{self.base_url}/perpetualMarkets"
|
146
|
+
response = requests.get(url)
|
147
|
+
response.raise_for_status()
|
148
|
+
|
149
|
+
markets_data = response.json()['markets']
|
150
|
+
markets_info = []
|
151
|
+
|
152
|
+
for ticker, market in markets_data.items():
|
153
|
+
if market['status'] == 'ACTIVE':
|
154
|
+
base_currency = ticker.split('-')[0]
|
155
|
+
quote_currency = ticker.split('-')[1]
|
156
|
+
|
157
|
+
# Apply quote currency filter if specified
|
158
|
+
if quote_ccy is not None and quote_currency.upper() != quote_ccy.upper():
|
159
|
+
continue
|
160
|
+
|
161
|
+
# Apply market type filter if specified
|
162
|
+
# dYdX only has perpetual futures, so we only include if mkt_type is None or 'perpetual_future'
|
163
|
+
if mkt_type is not None and mkt_type != 'perpetual_future':
|
164
|
+
continue
|
165
|
+
|
166
|
+
markets_info.append({
|
167
|
+
'ticker': ticker,
|
168
|
+
'base_currency': base_currency,
|
169
|
+
'quote_currency': quote_currency,
|
170
|
+
'min_trade_amount': float(market['stepSize']),
|
171
|
+
'price_precision': abs(int(market['atomicResolution'])),
|
172
|
+
'min_price': float(market['tickSize']),
|
173
|
+
'status': market['status'],
|
174
|
+
'type': 'perpetual_future' # dYdX only has perpetual futures
|
175
|
+
})
|
176
|
+
|
177
|
+
if not markets_info:
|
178
|
+
if as_list:
|
179
|
+
return []
|
180
|
+
else:
|
181
|
+
return pd.DataFrame()
|
182
|
+
|
183
|
+
df = pd.DataFrame(markets_info)
|
184
|
+
|
185
|
+
if as_list:
|
186
|
+
return df['ticker'].tolist()
|
187
|
+
else:
|
188
|
+
return df
|
189
|
+
|
190
|
+
def get_fields_info(self, data_type: Optional[str] = None) -> pd.DataFrame:
|
191
|
+
"""
|
192
|
+
Gets info for available fields from dYdX.
|
193
|
+
|
194
|
+
Parameters
|
195
|
+
----------
|
196
|
+
data_type: str, optional
|
197
|
+
Type of data for which to return field information.
|
198
|
+
|
199
|
+
Returns
|
200
|
+
-------
|
201
|
+
pd.DataFrame
|
202
|
+
DataFrame with field information.
|
203
|
+
"""
|
204
|
+
fields = [
|
205
|
+
{'field': 'open', 'description': 'Opening price'},
|
206
|
+
{'field': 'high', 'description': 'Highest price'},
|
207
|
+
{'field': 'low', 'description': 'Lowest price'},
|
208
|
+
{'field': 'close', 'description': 'Closing price'},
|
209
|
+
{'field': 'volume', 'description': 'Trading volume'},
|
210
|
+
{'field': 'funding_rate', 'description': 'Hourly funding rate (dYdX charges funding every hour)'}
|
211
|
+
]
|
212
|
+
return pd.DataFrame(fields)
|
213
|
+
|
214
|
+
def get_frequencies_info(self) -> pd.DataFrame:
|
215
|
+
"""
|
216
|
+
Gets info for available frequencies from dYdX.
|
217
|
+
|
218
|
+
Returns
|
219
|
+
-------
|
220
|
+
pd.DataFrame
|
221
|
+
DataFrame with frequency information.
|
222
|
+
"""
|
223
|
+
return pd.DataFrame({
|
224
|
+
'frequency': list(self.frequencies.keys()),
|
225
|
+
'description': list(self.frequencies.values())
|
226
|
+
})
|
227
|
+
|
228
|
+
def get_rate_limit_info(self) -> Dict[str, Any]:
|
229
|
+
"""
|
230
|
+
Gets rate limit information from dYdX.
|
231
|
+
|
232
|
+
Returns
|
233
|
+
-------
|
234
|
+
Dict[str, Any]
|
235
|
+
Dictionary with rate limit information.
|
236
|
+
"""
|
237
|
+
return {
|
238
|
+
'requests_per_second': 10,
|
239
|
+
'requests_per_minute': 300
|
240
|
+
}
|
241
|
+
|
242
|
+
def get_metadata(self) -> Dict[str, Any]:
|
243
|
+
"""
|
244
|
+
Gets metadata about the exchange.
|
245
|
+
|
246
|
+
Returns
|
247
|
+
-------
|
248
|
+
Dict[str, Any]
|
249
|
+
Dictionary with exchange metadata.
|
250
|
+
"""
|
251
|
+
return {
|
252
|
+
'name': self.name,
|
253
|
+
'type': self.exch_type,
|
254
|
+
'status': 'active' if self.is_active else 'inactive',
|
255
|
+
'categories': self.categories,
|
256
|
+
'market_types': self.market_types,
|
257
|
+
'base_url': self.base_url
|
258
|
+
}
|
259
|
+
|
260
|
+
def _fetch_ohlcv(self) -> pd.DataFrame:
|
261
|
+
"""
|
262
|
+
Fetches OHLCV data from dYdX for multiple markets with pagination support.
|
263
|
+
|
264
|
+
The dYdX candles API has a limit (typically 1000 records) and returns data in
|
265
|
+
reverse chronological order (newest first). For large date ranges, we need to
|
266
|
+
implement pagination to retrieve all historical data.
|
267
|
+
|
268
|
+
Returns
|
269
|
+
-------
|
270
|
+
pd.DataFrame
|
271
|
+
DataFrame with OHLCV data for all requested markets.
|
272
|
+
"""
|
273
|
+
if not self.data_req:
|
274
|
+
raise ValueError("Data request not set")
|
95
275
|
|
96
|
-
|
97
|
-
|
276
|
+
# Parse date range
|
277
|
+
try:
|
278
|
+
# source dates are guaranteed to be set by parameter conversion
|
279
|
+
start_dt = pd.to_datetime(self.data_req.source_start_date)
|
280
|
+
if start_dt.tz is None:
|
281
|
+
start_dt = start_dt.tz_localize('UTC')
|
282
|
+
|
283
|
+
end_dt = pd.to_datetime(self.data_req.source_end_date)
|
284
|
+
if end_dt.tz is None:
|
285
|
+
end_dt = end_dt.tz_localize('UTC')
|
286
|
+
|
287
|
+
# Add buffer to end date to ensure we get data up to the requested time
|
288
|
+
buffered_end_dt = end_dt + pd.Timedelta(hours=1)
|
289
|
+
|
290
|
+
except Exception as e:
|
291
|
+
logging.error(f"Could not parse date range: {e}")
|
292
|
+
return pd.DataFrame()
|
98
293
|
|
99
|
-
|
100
|
-
|
294
|
+
all_records = []
|
295
|
+
|
296
|
+
for ticker in self.data_req.source_tickers:
|
297
|
+
market_symbol = f"{ticker}-USD"
|
298
|
+
|
299
|
+
# Initialize pagination variables
|
300
|
+
current_end_date = buffered_end_dt
|
301
|
+
ticker_records = []
|
302
|
+
page_count = 0
|
303
|
+
max_pages = 100 # Safety limit for longer date ranges
|
304
|
+
|
305
|
+
while page_count < max_pages:
|
306
|
+
page_count += 1
|
307
|
+
url = f"{self.base_url}/candles/perpetualMarkets/{market_symbol}"
|
308
|
+
|
309
|
+
params = {
|
310
|
+
'resolution': self.data_req.source_freq,
|
311
|
+
'fromISO': self.data_req.source_start_date,
|
312
|
+
'toISO': current_end_date.isoformat(),
|
313
|
+
'limit': 1000 # Maximum allowed by dYdX API
|
314
|
+
}
|
315
|
+
|
316
|
+
try:
|
317
|
+
response = requests.get(url, params=params, timeout=30)
|
318
|
+
response.raise_for_status()
|
319
|
+
data = response.json()
|
320
|
+
|
321
|
+
# Validate API response
|
322
|
+
if 'candles' not in data or not data['candles']:
|
323
|
+
break
|
324
|
+
|
325
|
+
page_records = data['candles']
|
326
|
+
|
327
|
+
# Convert timestamps efficiently
|
328
|
+
page_df = pd.DataFrame(page_records)
|
329
|
+
page_df['startedAt'] = pd.to_datetime(page_df['startedAt'])
|
330
|
+
|
331
|
+
# Ensure timezone consistency
|
332
|
+
if page_df['startedAt'].dt.tz is None:
|
333
|
+
page_df['startedAt'] = page_df['startedAt'].dt.tz_localize('UTC')
|
334
|
+
|
335
|
+
# Early termination check - if oldest record is before start date
|
336
|
+
oldest_timestamp = page_df['startedAt'].min()
|
337
|
+
if oldest_timestamp < start_dt:
|
338
|
+
# Filter only records within date range before adding
|
339
|
+
mask = (page_df['startedAt'] >= start_dt) & (page_df['startedAt'] <= end_dt)
|
340
|
+
filtered_records = page_df[mask]
|
341
|
+
|
342
|
+
if not filtered_records.empty:
|
343
|
+
ticker_records.extend(filtered_records.to_dict('records'))
|
344
|
+
|
345
|
+
break
|
346
|
+
else:
|
347
|
+
# All records on this page are within or after the date range
|
348
|
+
mask = page_df['startedAt'] <= end_dt
|
349
|
+
filtered_records = page_df[mask]
|
350
|
+
|
351
|
+
if not filtered_records.empty:
|
352
|
+
ticker_records.extend(filtered_records.to_dict('records'))
|
353
|
+
|
354
|
+
# Check if we got fewer records than requested (end of data)
|
355
|
+
if len(page_records) < 1000:
|
356
|
+
break
|
357
|
+
|
358
|
+
# Set next pagination point (oldest timestamp from current page minus 1 second)
|
359
|
+
current_end_date = oldest_timestamp - pd.Timedelta(seconds=1)
|
360
|
+
|
361
|
+
# Rate limiting
|
362
|
+
time.sleep(1.0)
|
363
|
+
|
364
|
+
except requests.exceptions.Timeout:
|
365
|
+
logging.warning(f"Timeout fetching OHLCV data for {market_symbol} on page {page_count}, retrying...")
|
366
|
+
time.sleep(2.0)
|
367
|
+
continue
|
368
|
+
except requests.exceptions.RequestException as e:
|
369
|
+
logging.error(f"Failed to fetch OHLCV data for {market_symbol} on page {page_count}: {str(e)}")
|
370
|
+
break
|
371
|
+
except Exception as e:
|
372
|
+
logging.error(f"Error processing OHLCV data for {market_symbol} on page {page_count}: {str(e)}")
|
373
|
+
break
|
374
|
+
|
375
|
+
if ticker_records:
|
376
|
+
all_records.extend(ticker_records)
|
101
377
|
|
102
|
-
|
103
|
-
|
378
|
+
if not all_records:
|
379
|
+
return pd.DataFrame()
|
104
380
|
|
105
|
-
|
106
|
-
|
381
|
+
# Create final DataFrame
|
382
|
+
final_df = pd.DataFrame(all_records)
|
383
|
+
final_df = final_df.sort_values(['ticker', 'startedAt']).reset_index(drop=True)
|
384
|
+
|
385
|
+
return final_df
|
107
386
|
|
108
|
-
def
|
109
|
-
|
387
|
+
def _fetch_funding_rates(self) -> pd.DataFrame:
|
388
|
+
"""
|
389
|
+
Fetches funding rate data from dYdX for multiple markets with pagination support.
|
390
|
+
|
391
|
+
Note: dYdX charges funding every hour, unlike other exchanges that typically
|
392
|
+
use 8-hour funding cycles. This method retrieves the complete historical
|
393
|
+
funding rate data for any requested date range, making multiple API calls as needed.
|
110
394
|
|
111
|
-
|
112
|
-
|
395
|
+
Returns
|
396
|
+
-------
|
397
|
+
pd.DataFrame
|
398
|
+
DataFrame with hourly funding rate data for all requested markets.
|
399
|
+
"""
|
400
|
+
if not self.data_req:
|
401
|
+
raise ValueError("Data request not set")
|
113
402
|
|
114
|
-
|
115
|
-
|
403
|
+
# Parse date range
|
404
|
+
try:
|
405
|
+
# source dates are guaranteed to be set by parameter conversion
|
406
|
+
start_dt = pd.to_datetime(self.data_req.source_start_date)
|
407
|
+
if start_dt.tz is None:
|
408
|
+
start_dt = start_dt.tz_localize('UTC')
|
409
|
+
|
410
|
+
end_dt = pd.to_datetime(self.data_req.source_end_date)
|
411
|
+
if end_dt.tz is None:
|
412
|
+
end_dt = end_dt.tz_localize('UTC')
|
413
|
+
|
414
|
+
buffered_end_dt = end_dt + pd.Timedelta(hours=1)
|
415
|
+
|
416
|
+
except Exception as e:
|
417
|
+
logging.error(f"Could not parse date range: {e}")
|
418
|
+
return pd.DataFrame()
|
116
419
|
|
117
|
-
|
118
|
-
|
420
|
+
all_records = []
|
421
|
+
|
422
|
+
for ticker in self.data_req.source_tickers:
|
423
|
+
market_symbol = f"{ticker}-USD"
|
424
|
+
|
425
|
+
# Initialize pagination variables
|
426
|
+
current_end_date = buffered_end_dt
|
427
|
+
ticker_records = []
|
428
|
+
page_count = 0
|
429
|
+
max_pages = 100
|
430
|
+
|
431
|
+
while page_count < max_pages:
|
432
|
+
page_count += 1
|
433
|
+
url = f"{self.base_url}/historicalFunding/{market_symbol}"
|
434
|
+
|
435
|
+
params = {
|
436
|
+
'effectiveBeforeOrAt': current_end_date.isoformat(),
|
437
|
+
'limit': 1000
|
438
|
+
}
|
439
|
+
|
440
|
+
try:
|
441
|
+
response = requests.get(url, params=params, timeout=30)
|
442
|
+
response.raise_for_status()
|
443
|
+
data = response.json()
|
444
|
+
|
445
|
+
if 'historicalFunding' not in data or not data['historicalFunding']:
|
446
|
+
break
|
447
|
+
|
448
|
+
page_records = data['historicalFunding']
|
449
|
+
|
450
|
+
# Convert timestamps
|
451
|
+
page_df = pd.DataFrame(page_records)
|
452
|
+
page_df['effectiveAt'] = pd.to_datetime(page_df['effectiveAt'])
|
453
|
+
|
454
|
+
if page_df['effectiveAt'].dt.tz is None:
|
455
|
+
page_df['effectiveAt'] = page_df['effectiveAt'].dt.tz_localize('UTC')
|
456
|
+
|
457
|
+
# Filter records within date range
|
458
|
+
oldest_timestamp = page_df['effectiveAt'].min()
|
459
|
+
if oldest_timestamp < start_dt:
|
460
|
+
mask = (page_df['effectiveAt'] >= start_dt) & (page_df['effectiveAt'] <= buffered_end_dt)
|
461
|
+
filtered_records = page_df[mask]
|
462
|
+
|
463
|
+
if not filtered_records.empty:
|
464
|
+
filtered_records = filtered_records.copy()
|
465
|
+
filtered_records['rate'] = pd.to_numeric(filtered_records['rate'], errors='coerce')
|
466
|
+
ticker_records.extend(filtered_records.to_dict('records'))
|
467
|
+
|
468
|
+
break
|
469
|
+
else:
|
470
|
+
mask = page_df['effectiveAt'] <= buffered_end_dt
|
471
|
+
filtered_records = page_df[mask]
|
472
|
+
|
473
|
+
if not filtered_records.empty:
|
474
|
+
filtered_records = filtered_records.copy()
|
475
|
+
filtered_records['rate'] = pd.to_numeric(filtered_records['rate'], errors='coerce')
|
476
|
+
ticker_records.extend(filtered_records.to_dict('records'))
|
477
|
+
|
478
|
+
# Check if we got fewer records than requested (end of data)
|
479
|
+
if len(page_records) < 1000:
|
480
|
+
break
|
481
|
+
|
482
|
+
# Set next pagination point
|
483
|
+
current_end_date = oldest_timestamp - pd.Timedelta(microseconds=1)
|
484
|
+
|
485
|
+
# Rate limiting
|
486
|
+
time.sleep(1.0)
|
487
|
+
|
488
|
+
except requests.exceptions.Timeout:
|
489
|
+
logging.warning(f"Timeout fetching funding rate data for {market_symbol}, retrying...")
|
490
|
+
time.sleep(2.0)
|
491
|
+
continue
|
492
|
+
except requests.exceptions.RequestException as e:
|
493
|
+
logging.error(f"Failed to fetch funding rate data for {market_symbol}: {str(e)}")
|
494
|
+
break
|
495
|
+
except Exception as e:
|
496
|
+
logging.error(f"Error processing funding rate data for {market_symbol}: {str(e)}")
|
497
|
+
break
|
498
|
+
|
499
|
+
if ticker_records:
|
500
|
+
all_records.extend(ticker_records)
|
119
501
|
|
120
|
-
|
121
|
-
|
502
|
+
if not all_records:
|
503
|
+
return pd.DataFrame()
|
504
|
+
|
505
|
+
# Create final DataFrame
|
506
|
+
final_df = pd.DataFrame(all_records)
|
507
|
+
final_df = final_df.sort_values(['ticker', 'effectiveAt']).reset_index(drop=True)
|
508
|
+
|
509
|
+
return final_df
|
510
|
+
|
511
|
+
def _fetch_open_interest(self) -> pd.DataFrame:
|
512
|
+
"""
|
513
|
+
Fetches current open interest from dYdX.
|
514
|
+
Note: This implementation only provides current open interest values, not historical data.
|
515
|
+
Historical open interest data is not available through the dYdX API.
|
516
|
+
|
517
|
+
Returns
|
518
|
+
-------
|
519
|
+
pd.DataFrame
|
520
|
+
DataFrame with current open interest values.
|
521
|
+
The DataFrame has a MultiIndex with 'date' and 'ticker' levels.
|
522
|
+
The 'date' index will be the current timestamp for all entries.
|
523
|
+
"""
|
524
|
+
if not self.data_req:
|
525
|
+
raise ValueError("Data request not set")
|
526
|
+
|
527
|
+
# Get current timestamp for all entries
|
528
|
+
current_time = pd.Timestamp.utcnow()
|
529
|
+
|
530
|
+
all_dfs = []
|
531
|
+
for ticker in self.data_req.source_tickers:
|
532
|
+
market_symbol = f"{ticker}-USD"
|
533
|
+
url = f"{self.base_url}/perpetualMarkets/{market_symbol}"
|
534
|
+
|
535
|
+
try:
|
536
|
+
response = requests.get(url)
|
537
|
+
response.raise_for_status()
|
538
|
+
data = response.json()
|
539
|
+
|
540
|
+
if 'market' not in data:
|
541
|
+
logging.warning(f"No market data found for {market_symbol}")
|
542
|
+
continue
|
543
|
+
|
544
|
+
market_data = data['market']
|
545
|
+
if 'openInterest' not in market_data:
|
546
|
+
logging.warning(f"No open interest data found for {market_symbol}")
|
547
|
+
continue
|
548
|
+
|
549
|
+
# Create DataFrame with current open interest data
|
550
|
+
df = pd.DataFrame({
|
551
|
+
'oi': [float(market_data['openInterest'])],
|
552
|
+
'date': [current_time],
|
553
|
+
'ticker': [ticker]
|
554
|
+
})
|
555
|
+
all_dfs.append(df)
|
556
|
+
except requests.exceptions.RequestException as e:
|
557
|
+
logging.warning(f"Failed to fetch open interest for {market_symbol}: {str(e)}")
|
558
|
+
continue
|
559
|
+
|
560
|
+
if not all_dfs:
|
561
|
+
return pd.DataFrame()
|
562
|
+
|
563
|
+
# Combine all DataFrames
|
564
|
+
return pd.concat(all_dfs, ignore_index=True)
|
565
|
+
|
566
|
+
def _convert_params(self) -> None:
|
567
|
+
"""
|
568
|
+
Converts parameters for the data request using ConvertParams class.
|
569
|
+
"""
|
570
|
+
if not self.data_req:
|
571
|
+
raise ValueError("Data request not set")
|
572
|
+
|
573
|
+
# Convert parameters to dYdX format using ConvertParams class
|
574
|
+
self.data_req = ConvertParams(self.data_req).to_dydx()
|
122
575
|
|
123
576
|
@staticmethod
|
124
577
|
def _wrangle_data_resp(data_req: DataRequest, data_resp: Union[Dict[str, Any], pd.DataFrame]) -> pd.DataFrame:
|
125
|
-
|
578
|
+
"""
|
579
|
+
Wrangles data response from dYdX using WrangleData class.
|
126
580
|
|
127
|
-
|
128
|
-
|
581
|
+
Parameters
|
582
|
+
----------
|
583
|
+
data_req: DataRequest
|
584
|
+
Parameters of data request.
|
585
|
+
data_resp: Union[Dict[str, Any], pd.DataFrame]
|
586
|
+
Data response from dYdX.
|
129
587
|
|
130
|
-
|
131
|
-
|
588
|
+
Returns
|
589
|
+
-------
|
590
|
+
pd.DataFrame
|
591
|
+
Wrangled DataFrame.
|
592
|
+
"""
|
593
|
+
# Determine data type based on the actual data structure
|
594
|
+
if isinstance(data_resp, pd.DataFrame) and not data_resp.empty:
|
595
|
+
# Check columns to determine data type
|
596
|
+
if 'effectiveAt' in data_resp.columns or 'rate' in data_resp.columns:
|
597
|
+
data_type = 'funding_rates'
|
598
|
+
elif 'oi' in data_resp.columns:
|
599
|
+
data_type = 'open_interest'
|
600
|
+
elif 'startedAt' in data_resp.columns or any(col in data_resp.columns for col in ['open', 'high', 'low', 'close']):
|
601
|
+
data_type = 'ohlcv'
|
602
|
+
else:
|
603
|
+
# Fallback to field-based detection
|
604
|
+
if 'funding_rate' in data_req.fields:
|
605
|
+
data_type = 'funding_rates'
|
606
|
+
elif 'oi' in data_req.fields:
|
607
|
+
data_type = 'open_interest'
|
608
|
+
else:
|
609
|
+
data_type = 'ohlcv'
|
610
|
+
else:
|
611
|
+
# Empty DataFrame or other format - use field-based detection
|
612
|
+
if 'funding_rate' in data_req.fields:
|
613
|
+
data_type = 'funding_rates'
|
614
|
+
elif 'oi' in data_req.fields:
|
615
|
+
data_type = 'open_interest'
|
616
|
+
else:
|
617
|
+
data_type = 'ohlcv'
|
618
|
+
|
619
|
+
# Use dYdX-specific wrangling method
|
620
|
+
wrangler = WrangleData(data_req, data_resp)
|
621
|
+
return wrangler.dydx(data_type)
|
132
622
|
|
133
|
-
def
|
134
|
-
|
623
|
+
def _fetch_tidy_ohlcv(self) -> pd.DataFrame:
|
624
|
+
"""
|
625
|
+
Fetches and tidies OHLCV data.
|
626
|
+
|
627
|
+
Returns
|
628
|
+
-------
|
629
|
+
pd.DataFrame
|
630
|
+
Tidy DataFrame with OHLCV data.
|
631
|
+
"""
|
632
|
+
df = self._fetch_ohlcv()
|
633
|
+
return self._wrangle_data_resp(self.data_req, df)
|
634
|
+
|
635
|
+
def _fetch_tidy_funding_rates(self) -> pd.DataFrame:
|
636
|
+
"""
|
637
|
+
Fetches and tidies funding rates.
|
638
|
+
|
639
|
+
Returns
|
640
|
+
-------
|
641
|
+
pd.DataFrame
|
642
|
+
Tidy DataFrame with funding rates.
|
643
|
+
"""
|
644
|
+
df = self._fetch_funding_rates()
|
645
|
+
return self._wrangle_data_resp(self.data_req, df)
|
135
646
|
|
136
|
-
def
|
137
|
-
|
647
|
+
def _fetch_tidy_open_interest(self) -> pd.DataFrame:
|
648
|
+
"""
|
649
|
+
Fetches and tidies open interest.
|
650
|
+
|
651
|
+
Returns
|
652
|
+
-------
|
653
|
+
pd.DataFrame
|
654
|
+
Tidy DataFrame with open interest.
|
655
|
+
"""
|
656
|
+
df = self._fetch_open_interest()
|
657
|
+
return self._wrangle_data_resp(self.data_req, df)
|
658
|
+
|
659
|
+
def get_data(self, data_req: DataRequest) -> pd.DataFrame:
|
660
|
+
"""
|
661
|
+
Gets market data from dYdX.
|
662
|
+
|
663
|
+
Parameters
|
664
|
+
----------
|
665
|
+
data_req: DataRequest
|
666
|
+
Parameters of data request.
|
667
|
+
|
668
|
+
Returns
|
669
|
+
-------
|
670
|
+
pd.DataFrame
|
671
|
+
DataFrame with market data.
|
672
|
+
"""
|
673
|
+
self.data_req = data_req
|
674
|
+
self._convert_params()
|
675
|
+
|
676
|
+
# Determine what types of data to fetch based on requested fields
|
677
|
+
ohlcv_fields = {'open', 'high', 'low', 'close', 'volume'}
|
678
|
+
requested_fields = set(data_req.fields)
|
679
|
+
|
680
|
+
needs_ohlcv = bool(ohlcv_fields.intersection(requested_fields))
|
681
|
+
needs_funding = 'funding_rate' in requested_fields
|
682
|
+
needs_oi = 'oi' in requested_fields
|
683
|
+
|
684
|
+
dfs_to_combine = []
|
685
|
+
|
686
|
+
# Fetch OHLCV data if needed
|
687
|
+
if needs_ohlcv:
|
688
|
+
ohlcv_df = self._fetch_tidy_ohlcv()
|
689
|
+
if not ohlcv_df.empty:
|
690
|
+
dfs_to_combine.append(ohlcv_df)
|
691
|
+
|
692
|
+
# Fetch funding rates if needed
|
693
|
+
if needs_funding:
|
694
|
+
funding_df = self._fetch_tidy_funding_rates()
|
695
|
+
if not funding_df.empty:
|
696
|
+
dfs_to_combine.append(funding_df)
|
697
|
+
|
698
|
+
# Fetch open interest if needed
|
699
|
+
if needs_oi:
|
700
|
+
oi_df = self._fetch_tidy_open_interest()
|
701
|
+
if not oi_df.empty:
|
702
|
+
dfs_to_combine.append(oi_df)
|
703
|
+
|
704
|
+
# Combine all DataFrames
|
705
|
+
if not dfs_to_combine:
|
706
|
+
return pd.DataFrame()
|
707
|
+
elif len(dfs_to_combine) == 1:
|
708
|
+
return dfs_to_combine[0]
|
709
|
+
else:
|
710
|
+
# Combine multiple DataFrames on their common index (date, ticker)
|
711
|
+
# Use proper merge strategy for different data frequencies
|
712
|
+
combined_df = dfs_to_combine[0]
|
713
|
+
for df in dfs_to_combine[1:]:
|
714
|
+
# Use merge instead of concat to handle different frequencies better
|
715
|
+
combined_df = combined_df.merge(df, left_index=True, right_index=True, how='outer')
|
716
|
+
|
717
|
+
# Filter to only requested fields
|
718
|
+
available_cols = [col for col in data_req.fields if col in combined_df.columns]
|
719
|
+
if available_cols:
|
720
|
+
combined_df = combined_df[available_cols]
|
721
|
+
|
722
|
+
return combined_df
|