cryptodatapy 0.2.5__py3-none-any.whl → 0.2.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cryptodatapy/conf/tickers.csv +0 -1
- cryptodatapy/extract/data_vendors/CoinMetrics.ipynb +747 -0
- cryptodatapy/extract/data_vendors/coinmetrics_api.py +279 -209
- cryptodatapy/extract/data_vendors/cryptocompare_api.py +3 -5
- cryptodatapy/extract/data_vendors/datavendor.py +32 -12
- cryptodatapy/extract/data_vendors/glassnode_api.py +3 -2
- cryptodatapy/extract/data_vendors/tiingo_api.py +3 -2
- cryptodatapy/extract/datarequest.py +55 -9
- cryptodatapy/extract/libraries/ccxt_api.py +13 -2
- cryptodatapy/transform/cc_onchain_data.csv +118423 -0
- cryptodatapy/transform/clean.py +17 -15
- cryptodatapy/transform/clean_onchain_data.ipynb +4750 -0
- cryptodatapy/transform/clean_perp_futures_ohlcv.ipynb +1597 -1178
- cryptodatapy/transform/convertparams.py +28 -18
- cryptodatapy/transform/credit_data.ipynb +291 -0
- cryptodatapy/transform/eqty_data.ipynb +809 -0
- cryptodatapy/transform/filter.py +13 -10
- cryptodatapy/transform/global_credit_data_daily.parquet +0 -0
- cryptodatapy/transform/od.py +1 -0
- cryptodatapy/transform/rates_data.ipynb +465 -0
- cryptodatapy/transform/us_rates_daily.csv +227752 -0
- cryptodatapy/util/datacredentials.py +28 -7
- {cryptodatapy-0.2.5.dist-info → cryptodatapy-0.2.6.dist-info}/METADATA +2 -2
- {cryptodatapy-0.2.5.dist-info → cryptodatapy-0.2.6.dist-info}/RECORD +26 -28
- cryptodatapy/.DS_Store +0 -0
- cryptodatapy/.idea/.gitignore +0 -3
- cryptodatapy/.idea/cryptodatapy.iml +0 -12
- cryptodatapy/.idea/csv-plugin.xml +0 -16
- cryptodatapy/.idea/inspectionProfiles/Project_Default.xml +0 -6
- cryptodatapy/.idea/inspectionProfiles/profiles_settings.xml +0 -6
- cryptodatapy/.idea/misc.xml +0 -4
- cryptodatapy/.idea/modules.xml +0 -8
- cryptodatapy/.idea/vcs.xml +0 -6
- cryptodatapy/extract/libraries/ccxt.ipynb +0 -873
- {cryptodatapy-0.2.5.dist-info → cryptodatapy-0.2.6.dist-info}/LICENSE +0 -0
- {cryptodatapy-0.2.5.dist-info → cryptodatapy-0.2.6.dist-info}/WHEEL +0 -0
cryptodatapy/transform/clean.py
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
from typing import Optional, Union
|
3
3
|
import pandas as pd
|
4
|
-
|
5
4
|
from cryptodatapy.transform.od import OutlierDetection
|
6
5
|
from cryptodatapy.transform.impute import Impute
|
7
6
|
from cryptodatapy.transform.filter import Filter
|
@@ -66,9 +65,9 @@ class CleanData:
|
|
66
65
|
self.excluded_cols = None
|
67
66
|
self.outliers = None
|
68
67
|
self.yhat = None
|
68
|
+
self.repaired_df = None
|
69
69
|
self.filtered_df = None
|
70
70
|
self.filtered_tickers = None
|
71
|
-
self.repaired_df = None
|
72
71
|
self.summary = pd.DataFrame()
|
73
72
|
self.initialize_summary()
|
74
73
|
self.check_types()
|
@@ -127,7 +126,7 @@ class CleanData:
|
|
127
126
|
|
128
127
|
# add to summary
|
129
128
|
self.summary.loc["%_outliers", self.outliers.unstack().columns] = (
|
130
|
-
self.outliers.unstack().notna().sum() /
|
129
|
+
self.outliers.unstack().notna().sum() / od.df.unstack().notna().sum()
|
131
130
|
).values * 100
|
132
131
|
|
133
132
|
# filtered df
|
@@ -157,7 +156,7 @@ class CleanData:
|
|
157
156
|
|
158
157
|
# add repaired % to summary
|
159
158
|
rep_vals = self.repaired_df.unstack().notna().sum() - self.df.unstack().notna().sum()
|
160
|
-
self.summary.loc["%_imputed", self.df.unstack().columns] = rep_vals / self.df.unstack().
|
159
|
+
self.summary.loc["%_imputed", self.df.unstack().columns] = rep_vals / self.df.unstack().notna().sum() * 100
|
161
160
|
|
162
161
|
# repaired df
|
163
162
|
if self.excluded_cols is not None:
|
@@ -192,9 +191,8 @@ class CleanData:
|
|
192
191
|
|
193
192
|
# add to summary
|
194
193
|
filtered_vals = self.df.unstack().notna().sum() - self.filtered_df.unstack().notna().sum()
|
195
|
-
self.summary.loc["%_below_avg_trading_val", self.df.unstack().columns] =
|
196
|
-
filtered_vals / self.df.unstack().
|
197
|
-
).values * 100
|
194
|
+
self.summary.loc["%_below_avg_trading_val", self.df.unstack().columns] = \
|
195
|
+
(filtered_vals / self.df.unstack().notna().sum()).values * 100
|
198
196
|
|
199
197
|
# filtered df
|
200
198
|
self.df = self.filtered_df.sort_index()
|
@@ -223,7 +221,7 @@ class CleanData:
|
|
223
221
|
self.df.unstack().notna().sum() - self.filtered_df.unstack().notna().sum()
|
224
222
|
)
|
225
223
|
self.summary.loc["%_missing_vals_gaps", self.df.unstack().columns] = (
|
226
|
-
missing_vals_gap / self.df.unstack().
|
224
|
+
missing_vals_gap / self.df.unstack().notna().sum()
|
227
225
|
).values * 100
|
228
226
|
|
229
227
|
# filtered df
|
@@ -258,23 +256,21 @@ class CleanData:
|
|
258
256
|
)
|
259
257
|
|
260
258
|
# add to summary
|
261
|
-
self.summary.loc["
|
259
|
+
self.summary.loc["n_filtered_tickers", self.df.unstack().columns] = len(self.filtered_tickers)
|
262
260
|
|
263
261
|
# filtered df
|
264
262
|
self.df = self.filtered_df.sort_index()
|
265
263
|
|
266
264
|
return self
|
267
265
|
|
268
|
-
def filter_delisted_tickers(self,
|
266
|
+
def filter_delisted_tickers(self, method: str = 'replace') -> CleanData:
|
269
267
|
"""
|
270
268
|
Removes delisted tickers from dataframe.
|
271
269
|
|
272
270
|
Parameters
|
273
271
|
----------
|
274
|
-
|
275
|
-
|
276
|
-
n_unch_vals: int, default 30
|
277
|
-
Number of consecutive unchanged values to consider a ticker as delisted.
|
272
|
+
method: str, {'replace', 'remove'}, default 'replace'
|
273
|
+
Method to use for handling delisted tickers.
|
278
274
|
|
279
275
|
Returns
|
280
276
|
-------
|
@@ -282,7 +278,7 @@ class CleanData:
|
|
282
278
|
CleanData object
|
283
279
|
"""
|
284
280
|
# filter tickers
|
285
|
-
self.filtered_df = Filter(self.df).
|
281
|
+
self.filtered_df = Filter(self.df).delisted_tickers(method=method)
|
286
282
|
|
287
283
|
# tickers < min obs
|
288
284
|
self.filtered_tickers = list(
|
@@ -292,6 +288,12 @@ class CleanData:
|
|
292
288
|
)
|
293
289
|
|
294
290
|
# add to summary
|
291
|
+
filtered_vals = (
|
292
|
+
self.df.unstack().notna().sum() - self.filtered_df.unstack().notna().sum()
|
293
|
+
)
|
294
|
+
self.summary.loc["%_delisted_ticker_vals", self.df.unstack().columns] = (
|
295
|
+
filtered_vals / self.df.unstack().notna().sum()
|
296
|
+
).values * 100
|
295
297
|
self.summary.loc["n_filtered_tickers", self.df.unstack().columns] = len(self.filtered_tickers)
|
296
298
|
|
297
299
|
# filtered df
|