cryptodatapy 0.2.5__py3-none-any.whl → 0.2.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. cryptodatapy/conf/tickers.csv +0 -1
  2. cryptodatapy/extract/data_vendors/CoinMetrics.ipynb +747 -0
  3. cryptodatapy/extract/data_vendors/coinmetrics_api.py +279 -209
  4. cryptodatapy/extract/data_vendors/cryptocompare_api.py +3 -5
  5. cryptodatapy/extract/data_vendors/datavendor.py +32 -12
  6. cryptodatapy/extract/data_vendors/glassnode_api.py +3 -2
  7. cryptodatapy/extract/data_vendors/tiingo_api.py +3 -2
  8. cryptodatapy/extract/datarequest.py +55 -9
  9. cryptodatapy/extract/libraries/ccxt_api.py +13 -2
  10. cryptodatapy/transform/cc_onchain_data.csv +118423 -0
  11. cryptodatapy/transform/clean.py +17 -15
  12. cryptodatapy/transform/clean_onchain_data.ipynb +4750 -0
  13. cryptodatapy/transform/clean_perp_futures_ohlcv.ipynb +1597 -1178
  14. cryptodatapy/transform/convertparams.py +28 -18
  15. cryptodatapy/transform/credit_data.ipynb +291 -0
  16. cryptodatapy/transform/eqty_data.ipynb +809 -0
  17. cryptodatapy/transform/filter.py +13 -10
  18. cryptodatapy/transform/global_credit_data_daily.parquet +0 -0
  19. cryptodatapy/transform/od.py +1 -0
  20. cryptodatapy/transform/rates_data.ipynb +465 -0
  21. cryptodatapy/transform/us_rates_daily.csv +227752 -0
  22. cryptodatapy/util/datacredentials.py +28 -7
  23. {cryptodatapy-0.2.5.dist-info → cryptodatapy-0.2.6.dist-info}/METADATA +2 -2
  24. {cryptodatapy-0.2.5.dist-info → cryptodatapy-0.2.6.dist-info}/RECORD +26 -28
  25. cryptodatapy/.DS_Store +0 -0
  26. cryptodatapy/.idea/.gitignore +0 -3
  27. cryptodatapy/.idea/cryptodatapy.iml +0 -12
  28. cryptodatapy/.idea/csv-plugin.xml +0 -16
  29. cryptodatapy/.idea/inspectionProfiles/Project_Default.xml +0 -6
  30. cryptodatapy/.idea/inspectionProfiles/profiles_settings.xml +0 -6
  31. cryptodatapy/.idea/misc.xml +0 -4
  32. cryptodatapy/.idea/modules.xml +0 -8
  33. cryptodatapy/.idea/vcs.xml +0 -6
  34. cryptodatapy/extract/libraries/ccxt.ipynb +0 -873
  35. {cryptodatapy-0.2.5.dist-info → cryptodatapy-0.2.6.dist-info}/LICENSE +0 -0
  36. {cryptodatapy-0.2.5.dist-info → cryptodatapy-0.2.6.dist-info}/WHEEL +0 -0
@@ -1,7 +1,6 @@
1
1
  from __future__ import annotations
2
2
  from typing import Optional, Union
3
3
  import pandas as pd
4
-
5
4
  from cryptodatapy.transform.od import OutlierDetection
6
5
  from cryptodatapy.transform.impute import Impute
7
6
  from cryptodatapy.transform.filter import Filter
@@ -66,9 +65,9 @@ class CleanData:
66
65
  self.excluded_cols = None
67
66
  self.outliers = None
68
67
  self.yhat = None
68
+ self.repaired_df = None
69
69
  self.filtered_df = None
70
70
  self.filtered_tickers = None
71
- self.repaired_df = None
72
71
  self.summary = pd.DataFrame()
73
72
  self.initialize_summary()
74
73
  self.check_types()
@@ -127,7 +126,7 @@ class CleanData:
127
126
 
128
127
  # add to summary
129
128
  self.summary.loc["%_outliers", self.outliers.unstack().columns] = (
130
- self.outliers.unstack().notna().sum() / self.df.unstack().shape[0]
129
+ self.outliers.unstack().notna().sum() / od.df.unstack().notna().sum()
131
130
  ).values * 100
132
131
 
133
132
  # filtered df
@@ -157,7 +156,7 @@ class CleanData:
157
156
 
158
157
  # add repaired % to summary
159
158
  rep_vals = self.repaired_df.unstack().notna().sum() - self.df.unstack().notna().sum()
160
- self.summary.loc["%_imputed", self.df.unstack().columns] = rep_vals / self.df.unstack().shape[0] * 100
159
+ self.summary.loc["%_imputed", self.df.unstack().columns] = rep_vals / self.df.unstack().notna().sum() * 100
161
160
 
162
161
  # repaired df
163
162
  if self.excluded_cols is not None:
@@ -192,9 +191,8 @@ class CleanData:
192
191
 
193
192
  # add to summary
194
193
  filtered_vals = self.df.unstack().notna().sum() - self.filtered_df.unstack().notna().sum()
195
- self.summary.loc["%_below_avg_trading_val", self.df.unstack().columns] = (
196
- filtered_vals / self.df.unstack().shape[0]
197
- ).values * 100
194
+ self.summary.loc["%_below_avg_trading_val", self.df.unstack().columns] = \
195
+ (filtered_vals / self.df.unstack().notna().sum()).values * 100
198
196
 
199
197
  # filtered df
200
198
  self.df = self.filtered_df.sort_index()
@@ -223,7 +221,7 @@ class CleanData:
223
221
  self.df.unstack().notna().sum() - self.filtered_df.unstack().notna().sum()
224
222
  )
225
223
  self.summary.loc["%_missing_vals_gaps", self.df.unstack().columns] = (
226
- missing_vals_gap / self.df.unstack().shape[0]
224
+ missing_vals_gap / self.df.unstack().notna().sum()
227
225
  ).values * 100
228
226
 
229
227
  # filtered df
@@ -258,23 +256,21 @@ class CleanData:
258
256
  )
259
257
 
260
258
  # add to summary
261
- self.summary.loc["n_tickers_below_min_obs", self.df.unstack().columns] = len(self.filtered_tickers)
259
+ self.summary.loc["n_filtered_tickers", self.df.unstack().columns] = len(self.filtered_tickers)
262
260
 
263
261
  # filtered df
264
262
  self.df = self.filtered_df.sort_index()
265
263
 
266
264
  return self
267
265
 
268
- def filter_delisted_tickers(self, field: str = 'close', n_unch_vals: int = 30) -> CleanData:
266
+ def filter_delisted_tickers(self, method: str = 'replace') -> CleanData:
269
267
  """
270
268
  Removes delisted tickers from dataframe.
271
269
 
272
270
  Parameters
273
271
  ----------
274
- field: str, default 'close'
275
- Field/column to use for detecting delisted tickers.
276
- n_unch_vals: int, default 30
277
- Number of consecutive unchanged values to consider a ticker as delisted.
272
+ method: str, {'replace', 'remove'}, default 'replace'
273
+ Method to use for handling delisted tickers.
278
274
 
279
275
  Returns
280
276
  -------
@@ -282,7 +278,7 @@ class CleanData:
282
278
  CleanData object
283
279
  """
284
280
  # filter tickers
285
- self.filtered_df = Filter(self.df).remove_delisted(field=field, n_unch_vals=n_unch_vals)
281
+ self.filtered_df = Filter(self.df).delisted_tickers(method=method)
286
282
 
287
283
  # tickers < min obs
288
284
  self.filtered_tickers = list(
@@ -292,6 +288,12 @@ class CleanData:
292
288
  )
293
289
 
294
290
  # add to summary
291
+ filtered_vals = (
292
+ self.df.unstack().notna().sum() - self.filtered_df.unstack().notna().sum()
293
+ )
294
+ self.summary.loc["%_delisted_ticker_vals", self.df.unstack().columns] = (
295
+ filtered_vals / self.df.unstack().notna().sum()
296
+ ).values * 100
295
297
  self.summary.loc["n_filtered_tickers", self.df.unstack().columns] = len(self.filtered_tickers)
296
298
 
297
299
  # filtered df