cryptodatapy 0.2.3__tar.gz → 0.2.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. {cryptodatapy-0.2.3 → cryptodatapy-0.2.4}/PKG-INFO +4 -1
  2. {cryptodatapy-0.2.3 → cryptodatapy-0.2.4}/pyproject.toml +4 -1
  3. {cryptodatapy-0.2.3 → cryptodatapy-0.2.4}/setup.py +4 -1
  4. {cryptodatapy-0.2.3 → cryptodatapy-0.2.4}/src/cryptodatapy/transform/clean.py +43 -7
  5. cryptodatapy-0.2.4/src/cryptodatapy/transform/clean_perp_futures_ohlcv.ipynb +1025 -0
  6. {cryptodatapy-0.2.3 → cryptodatapy-0.2.4}/src/cryptodatapy/transform/filter.py +32 -4
  7. cryptodatapy-0.2.3/src/cryptodatapy/transform/clean_perp_futures_ohlcv.ipynb +0 -1639
  8. {cryptodatapy-0.2.3 → cryptodatapy-0.2.4}/LICENSE +0 -0
  9. {cryptodatapy-0.2.3 → cryptodatapy-0.2.4}/README.md +0 -0
  10. {cryptodatapy-0.2.3 → cryptodatapy-0.2.4}/src/cryptodatapy/.DS_Store +0 -0
  11. {cryptodatapy-0.2.3 → cryptodatapy-0.2.4}/src/cryptodatapy/.idea/.gitignore +0 -0
  12. {cryptodatapy-0.2.3 → cryptodatapy-0.2.4}/src/cryptodatapy/.idea/cryptodatapy.iml +0 -0
  13. {cryptodatapy-0.2.3 → cryptodatapy-0.2.4}/src/cryptodatapy/.idea/csv-plugin.xml +0 -0
  14. {cryptodatapy-0.2.3 → cryptodatapy-0.2.4}/src/cryptodatapy/.idea/inspectionProfiles/Project_Default.xml +0 -0
  15. {cryptodatapy-0.2.3 → cryptodatapy-0.2.4}/src/cryptodatapy/.idea/inspectionProfiles/profiles_settings.xml +0 -0
  16. {cryptodatapy-0.2.3 → cryptodatapy-0.2.4}/src/cryptodatapy/.idea/misc.xml +0 -0
  17. {cryptodatapy-0.2.3 → cryptodatapy-0.2.4}/src/cryptodatapy/.idea/modules.xml +0 -0
  18. {cryptodatapy-0.2.3 → cryptodatapy-0.2.4}/src/cryptodatapy/.idea/vcs.xml +0 -0
  19. {cryptodatapy-0.2.3 → cryptodatapy-0.2.4}/src/cryptodatapy/__init__.py +0 -0
  20. {cryptodatapy-0.2.3 → cryptodatapy-0.2.4}/src/cryptodatapy/conf/__init__.py +0 -0
  21. {cryptodatapy-0.2.3 → cryptodatapy-0.2.4}/src/cryptodatapy/conf/fields.csv +0 -0
  22. {cryptodatapy-0.2.3 → cryptodatapy-0.2.4}/src/cryptodatapy/conf/fx_tickers.csv +0 -0
  23. {cryptodatapy-0.2.3 → cryptodatapy-0.2.4}/src/cryptodatapy/conf/tickers.csv +0 -0
  24. {cryptodatapy-0.2.3 → cryptodatapy-0.2.4}/src/cryptodatapy/datasets/__init__.py +0 -0
  25. {cryptodatapy-0.2.3 → cryptodatapy-0.2.4}/src/cryptodatapy/datasets/br_econ_calendar.csv +0 -0
  26. {cryptodatapy-0.2.3 → cryptodatapy-0.2.4}/src/cryptodatapy/datasets/ca_econ_calendar.csv +0 -0
  27. {cryptodatapy-0.2.3 → cryptodatapy-0.2.4}/src/cryptodatapy/datasets/cn_econ_calendar.csv +0 -0
  28. {cryptodatapy-0.2.3 → cryptodatapy-0.2.4}/src/cryptodatapy/datasets/de_econ_calendar.csv +0 -0
  29. {cryptodatapy-0.2.3 → cryptodatapy-0.2.4}/src/cryptodatapy/datasets/ez_econ_calendar.csv +0 -0
  30. {cryptodatapy-0.2.3 → cryptodatapy-0.2.4}/src/cryptodatapy/datasets/fr_econ_calendar.csv +0 -0
  31. {cryptodatapy-0.2.3 → cryptodatapy-0.2.4}/src/cryptodatapy/datasets/gb_econ_calendar.csv +0 -0
  32. {cryptodatapy-0.2.3 → cryptodatapy-0.2.4}/src/cryptodatapy/datasets/get_econ_calendars.py +0 -0
  33. {cryptodatapy-0.2.3 → cryptodatapy-0.2.4}/src/cryptodatapy/datasets/id_econ_calendar.csv +0 -0
  34. {cryptodatapy-0.2.3 → cryptodatapy-0.2.4}/src/cryptodatapy/datasets/in_econ_calendar.csv +0 -0
  35. {cryptodatapy-0.2.3 → cryptodatapy-0.2.4}/src/cryptodatapy/datasets/it_econ_calendar.csv +0 -0
  36. {cryptodatapy-0.2.3 → cryptodatapy-0.2.4}/src/cryptodatapy/datasets/jp_econ_calendar.csv +0 -0
  37. {cryptodatapy-0.2.3 → cryptodatapy-0.2.4}/src/cryptodatapy/datasets/kr_econ_calendar.csv +0 -0
  38. {cryptodatapy-0.2.3 → cryptodatapy-0.2.4}/src/cryptodatapy/datasets/mx_econ_calendar.csv +0 -0
  39. {cryptodatapy-0.2.3 → cryptodatapy-0.2.4}/src/cryptodatapy/datasets/ru_econ_calendar.csv +0 -0
  40. {cryptodatapy-0.2.3 → cryptodatapy-0.2.4}/src/cryptodatapy/datasets/tr_econ_calendar.csv +0 -0
  41. {cryptodatapy-0.2.3 → cryptodatapy-0.2.4}/src/cryptodatapy/datasets/us_econ_calendar.csv +0 -0
  42. {cryptodatapy-0.2.3 → cryptodatapy-0.2.4}/src/cryptodatapy/extract/__init__.py +0 -0
  43. {cryptodatapy-0.2.3 → cryptodatapy-0.2.4}/src/cryptodatapy/extract/data_vendors/.ipynb_checkpoints/CCXT-checkpoint.ipynb +0 -0
  44. {cryptodatapy-0.2.3 → cryptodatapy-0.2.4}/src/cryptodatapy/extract/data_vendors/.ipynb_checkpoints/DBNomics-checkpoint.ipynb +0 -0
  45. {cryptodatapy-0.2.3 → cryptodatapy-0.2.4}/src/cryptodatapy/extract/data_vendors/.ipynb_checkpoints/InvestPy-checkpoint.ipynb +0 -0
  46. {cryptodatapy-0.2.3 → cryptodatapy-0.2.4}/src/cryptodatapy/extract/data_vendors/.ipynb_checkpoints/NasdaqDataLink-checkpoint.ipynb +0 -0
  47. {cryptodatapy-0.2.3 → cryptodatapy-0.2.4}/src/cryptodatapy/extract/data_vendors/.ipynb_checkpoints/PandasDataReader-checkpoint.ipynb +0 -0
  48. {cryptodatapy-0.2.3 → cryptodatapy-0.2.4}/src/cryptodatapy/extract/data_vendors/__init__.py +0 -0
  49. {cryptodatapy-0.2.3 → cryptodatapy-0.2.4}/src/cryptodatapy/extract/data_vendors/coinmetrics_api.py +0 -0
  50. {cryptodatapy-0.2.3 → cryptodatapy-0.2.4}/src/cryptodatapy/extract/data_vendors/cryptocompare_api.py +0 -0
  51. {cryptodatapy-0.2.3 → cryptodatapy-0.2.4}/src/cryptodatapy/extract/data_vendors/datavendor.py +0 -0
  52. {cryptodatapy-0.2.3 → cryptodatapy-0.2.4}/src/cryptodatapy/extract/data_vendors/glassnode_api.py +0 -0
  53. {cryptodatapy-0.2.3 → cryptodatapy-0.2.4}/src/cryptodatapy/extract/data_vendors/tiingo_api.py +0 -0
  54. {cryptodatapy-0.2.3 → cryptodatapy-0.2.4}/src/cryptodatapy/extract/datarequest.py +0 -0
  55. {cryptodatapy-0.2.3 → cryptodatapy-0.2.4}/src/cryptodatapy/extract/getdata.py +0 -0
  56. {cryptodatapy-0.2.3 → cryptodatapy-0.2.4}/src/cryptodatapy/extract/libraries/__init__.py +0 -0
  57. {cryptodatapy-0.2.3 → cryptodatapy-0.2.4}/src/cryptodatapy/extract/libraries/ccxt_api.py +0 -0
  58. {cryptodatapy-0.2.3 → cryptodatapy-0.2.4}/src/cryptodatapy/extract/libraries/dbnomics_api.py +0 -0
  59. {cryptodatapy-0.2.3 → cryptodatapy-0.2.4}/src/cryptodatapy/extract/libraries/investpy_api.py +0 -0
  60. {cryptodatapy-0.2.3 → cryptodatapy-0.2.4}/src/cryptodatapy/extract/libraries/library.py +0 -0
  61. {cryptodatapy-0.2.3 → cryptodatapy-0.2.4}/src/cryptodatapy/extract/libraries/pandasdr_api.py +0 -0
  62. {cryptodatapy-0.2.3 → cryptodatapy-0.2.4}/src/cryptodatapy/extract/web/__init__.py +0 -0
  63. {cryptodatapy-0.2.3 → cryptodatapy-0.2.4}/src/cryptodatapy/extract/web/aqr.py +0 -0
  64. {cryptodatapy-0.2.3 → cryptodatapy-0.2.4}/src/cryptodatapy/extract/web/web.py +0 -0
  65. {cryptodatapy-0.2.3 → cryptodatapy-0.2.4}/src/cryptodatapy/transform/__init__.py +0 -0
  66. {cryptodatapy-0.2.3 → cryptodatapy-0.2.4}/src/cryptodatapy/transform/convertparams.py +0 -0
  67. {cryptodatapy-0.2.3 → cryptodatapy-0.2.4}/src/cryptodatapy/transform/impute.py +0 -0
  68. {cryptodatapy-0.2.3 → cryptodatapy-0.2.4}/src/cryptodatapy/transform/od.py +0 -0
  69. {cryptodatapy-0.2.3 → cryptodatapy-0.2.4}/src/cryptodatapy/transform/wrangle.py +0 -0
  70. {cryptodatapy-0.2.3 → cryptodatapy-0.2.4}/src/cryptodatapy/util/__init__.py +0 -0
  71. {cryptodatapy-0.2.3 → cryptodatapy-0.2.4}/src/cryptodatapy/util/datacatalog.py +0 -0
  72. {cryptodatapy-0.2.3 → cryptodatapy-0.2.4}/src/cryptodatapy/util/datacredentials.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cryptodatapy
3
- Version: 0.2.3
3
+ Version: 0.2.4
4
4
  Summary: Cryptoasset data library
5
5
  License: Apache-2.0
6
6
  Author: Systamental
@@ -13,6 +13,7 @@ Classifier: Programming Language :: Python :: 3.9
13
13
  Requires-Dist: DBnomics (>=1.2.3)
14
14
  Requires-Dist: ccxt (>=1.91.52)
15
15
  Requires-Dist: coinmetrics-api-client (>=2022.6.17); python_version >= "3.7"
16
+ Requires-Dist: fsspec (>=2024.6.1)
16
17
  Requires-Dist: investpy (>=1.0.8)
17
18
  Requires-Dist: matplotlib (>=3.5.2)
18
19
  Requires-Dist: numpy (>=1.23.2)
@@ -20,8 +21,10 @@ Requires-Dist: openpyxl (>=3.1.2)
20
21
  Requires-Dist: pandas (>=1.4.4)
21
22
  Requires-Dist: pandas-datareader (>=0.10.0)
22
23
  Requires-Dist: prophet (>=1.1); python_version >= "3.7"
24
+ Requires-Dist: pyarrow (>=17.0.0)
23
25
  Requires-Dist: requests (>=2.28.0); python_version >= "3.7"
24
26
  Requires-Dist: responses (>=0.21.0)
27
+ Requires-Dist: s3fs (>=2024.6.1,<2025.0.0)
25
28
  Requires-Dist: selenium (>=4.4.3)
26
29
  Requires-Dist: statsmodels (>=0.13.2)
27
30
  Requires-Dist: webdriver-manager (>=3.8.3)
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "cryptodatapy"
3
- version = "0.2.3"
3
+ version = "0.2.4"
4
4
  description = "Cryptoasset data library"
5
5
  authors = ["Systamental"]
6
6
  license = "Apache License 2.0"
@@ -25,6 +25,9 @@ responses = ">=0.21.0"
25
25
  yfinance = ">=0.2.14"
26
26
  openpyxl = ">=3.1.2"
27
27
  xlrd = ">=2.0.1"
28
+ fsspec = ">=2024.6.1"
29
+ pyarrow = ">=17.0.0"
30
+ s3fs = "^2024.6.1"
28
31
 
29
32
  [tool.poetry.dev-dependencies]
30
33
  pytest = ">=7.1.2"
@@ -83,13 +83,16 @@ package_data = \
83
83
  install_requires = \
84
84
  ['DBnomics>=1.2.3',
85
85
  'ccxt>=1.91.52',
86
+ 'fsspec>=2024.6.1',
86
87
  'investpy>=1.0.8',
87
88
  'matplotlib>=3.5.2',
88
89
  'numpy>=1.23.2',
89
90
  'openpyxl>=3.1.2',
90
91
  'pandas-datareader>=0.10.0',
91
92
  'pandas>=1.4.4',
93
+ 'pyarrow>=17.0.0',
92
94
  'responses>=0.21.0',
95
+ 's3fs>=2024.6.1,<2025.0.0',
93
96
  'selenium>=4.4.3',
94
97
  'statsmodels>=0.13.2',
95
98
  'webdriver-manager>=3.8.3',
@@ -103,7 +106,7 @@ extras_require = \
103
106
 
104
107
  setup_kwargs = {
105
108
  'name': 'cryptodatapy',
106
- 'version': '0.2.3',
109
+ 'version': '0.2.4',
107
110
  'description': 'Cryptoasset data library',
108
111
  'long_description': "![](cryptodatapy_logo.jpeg)\n\n# CryptoDataPy\n### _Better data beats fancier algorithms_\n<br/>\n\n**CryptoDataPy** is a python library which makes it easy to build high quality data pipelines \nfor the analysis of digital assets. By providing easy access to over 100,000 time series for thousands of assets, \nit facilitates the pre-processing of a wide range of data from different sources.\n\nCryptoassets generate a huge amount of market, on-chain and off-chain data. \nBut unlike legacy financial markets, this data is often fragmented, \nunstructured and dirty. By extracting data from various sources, \npre-processing it into a user-friendly (tidy) format, detecting and repairing 'bad' data,\nand allowing for easy storage and retrieval, CryptoDataPy allows you to spend less time gathering \nand cleaning data, and more time analyzing it.\n\nOur data includes:\n\n- **Market:** market prices of varying granularity (e.g. tick, trade and bar data, aka OHLC),\nfor spot, futures and options markets, as well as funding rates for the analysis of \ncryptoasset returns.\n- **On-chain:** network health and usage data, circulating supply, asset holder positions and \ncost-basis, for the analysis of underlying crypto network fundamentals.\n- **Off-chain:** news, social media, developer activity, web traffic and search for project interest and \nsentiment, as well as traditional financial market and macroeconomic data for broader financial and \neconomic conditions.\n\nThe library's intuitive interface facilitates each step of the ETL/ETL (extract-transform-load) process:\n\n- **Extract**: Extracting data from a wide range of data sources and file formats.\n- **Transform**: \n - Wrangling data into a pandas DataFrame in a structured and user-friendly format, \n a.k.a [tidy data](https://www.jstatsoft.org/article/view/v059i10). \n - Detecting, scrubbing and repairing 'bad' data (e.g. outliers, missing values, 0s, etc.) to improve the accuracy and reliability\nof machine learning/predictive models.\n- **Load**: Storing clean and ready-for-analysis data and metadata for easy access.\n\n## Installation\n\n```bash\n$ pip install cryptodatapy\n```\n\n## Usage\n\n**CryptoDataPy** allows you to pull ready-to-analyze data from a variety of sources \nwith only a few lines of code.\n\nFirst specify which data you want with a `DataRequest`:\n\n```python\n# import DataRequest\nfrom cryptodatapy.extract.datarequest import DataRequest\n# specify parameters for data request: tickers, fields, start date, end_date, etc.\ndata_req = DataRequest(\n source='glassnode', # name of data source\n tickers=['btc', 'eth'], # list of asset tickers, in CryptoDataPy format, defaults to 'btc'\n fields=['close', 'add_act', 'hashrate'], # list of fields, in CryptoDataPy, defaults to 'close'\n freq=None, # data frequency, defaults to daily \n quote_ccy=None, # defaults to USD/USDT\n exch=None, # defaults to exchange weighted average or Binance\n mkt_type= 'spot', # defaults to spot\n start_date=None, # defaults to start date for longest series\n end_date=None, # defaults to most recent \n tz=None, # defaults to UTC time\n cat=None, # optional, should be specified when asset class is not crypto, eg. 'fx', 'rates', 'macro', etc.\n)\n```\nThen get the data :\n\n```python\n# import GetData\nfrom cryptodatapy.extract.getdata import GetData\n# get data\nGetData(data_req).get_series()\n```\n\nWith the same data request parameters, you can retrieve the same data from a different source:\n\n```python\n# modify data source parameter\ndata_req = DataRequest(\n source='coinmetrics', \n tickers=['btc', 'eth'], \n fields=['close', 'add_act', 'hashrate'], \n req='d',\n start_date='2016-01-01')\n# get data\nGetData(data_req).get_series()\n```\n\nFor more detailed code examples and interactive tutorials \nsee [here](https://github.com/systamental/cryptodatapy/blob/main/docs/example.ipynb).\n## Supported Data Sources\n\n- [CryptoCompare](https://min-api.cryptocompare.com/documentation)\n- [CCXT](https://docs.ccxt.com/en/latest/)\n- [Glassnode](https://docs.glassnode.com/)\n- [Coin Metrics](https://docs.coinmetrics.io/api/v4/)\n- [Tiingo](https://api.tiingo.com/documentation/general/overview)\n- [Yahoo Finance](https://github.com/ranaroussi/yfinance)\n- [Fama-French Data](http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/data_library.html)\n- [AQR](https://www.aqr.com/Insights/Datasets)\n- [Federal Reserve Economic Data (FRED)](https://fred.stlouisfed.org/docs/api/fred/)\n- [DBnomics](https://db.nomics.world/docs/)\n- [WorldBank](https://documents.worldbank.org/en/publication/documents-reports/api)\n- [Pandas-datareader](https://pandas-datareader.readthedocs.io/en/latest/)\n\n## Contributing\n\nInterested in contributing? Check out the contributing guidelines and \ncontact us at info@systamental.com. Please note that this project is s\nreleased with a Code of Conduct. By contributing to this project, you agree \nto abide by its terms.\n\n## License\n\n`cryptodatapy` was created by Systamental. \nIt is licensed under the terms of the Apache License 2.0 license.\n\n",
109
112
  'author': 'Systamental',
@@ -131,7 +131,7 @@ class CleanData:
131
131
  ).values * 100
132
132
 
133
133
  # filtered df
134
- self.df = self.filtered_df
134
+ self.df = self.filtered_df.sort_index()
135
135
 
136
136
  return self
137
137
 
@@ -161,11 +161,12 @@ class CleanData:
161
161
 
162
162
  # repaired df
163
163
  if self.excluded_cols is not None:
164
- self.df = pd.concat([self.repaired_df, self.raw_df[self.excluded_cols]], join="outer", axis=1)
164
+ self.df = pd.concat([self.repaired_df, self.raw_df[self.excluded_cols]], join="inner", axis=1)
165
165
  else:
166
166
  self.df = self.repaired_df
167
+
167
168
  # reorder cols
168
- self.df = self.df[self.raw_df.columns]
169
+ self.df = self.df[self.raw_df.columns].sort_index()
169
170
 
170
171
  return self
171
172
 
@@ -196,7 +197,7 @@ class CleanData:
196
197
  ).values * 100
197
198
 
198
199
  # filtered df
199
- self.df = self.filtered_df
200
+ self.df = self.filtered_df.sort_index()
200
201
 
201
202
  return self
202
203
 
@@ -226,7 +227,7 @@ class CleanData:
226
227
  ).values * 100
227
228
 
228
229
  # filtered df
229
- self.df = self.filtered_df
230
+ self.df = self.filtered_df.sort_index()
230
231
 
231
232
  return self
232
233
 
@@ -260,7 +261,41 @@ class CleanData:
260
261
  self.summary.loc["n_tickers_below_min_obs", self.df.unstack().columns] = len(self.filtered_tickers)
261
262
 
262
263
  # filtered df
263
- self.df = self.filtered_df
264
+ self.df = self.filtered_df.sort_index()
265
+
266
+ return self
267
+
268
+ def filter_delisted_tickers(self, field: str = 'close', n_unch_vals: int = 30) -> CleanData:
269
+ """
270
+ Removes delisted tickers from dataframe.
271
+
272
+ Parameters
273
+ ----------
274
+ field: str, default 'close'
275
+ Field/column to use for detecting delisted tickers.
276
+ n_unch_vals: int, default 30
277
+ Number of consecutive unchanged values to consider a ticker as delisted.
278
+
279
+ Returns
280
+ -------
281
+ CleanData
282
+ CleanData object
283
+ """
284
+ # filter tickers
285
+ self.filtered_df = Filter(self.df).remove_delisted(field=field, n_unch_vals=n_unch_vals)
286
+
287
+ # tickers < min obs
288
+ self.filtered_tickers = list(
289
+ set(self.filtered_df.index.droplevel(0).unique()).symmetric_difference(
290
+ set(self.df.index.droplevel(0).unique())
291
+ )
292
+ )
293
+
294
+ # add to summary
295
+ self.summary.loc["n_filtered_tickers", self.df.unstack().columns] = len(self.filtered_tickers)
296
+
297
+ # filtered df
298
+ self.df = self.filtered_df.sort_index()
264
299
 
265
300
  return self
266
301
 
@@ -283,6 +318,7 @@ class CleanData:
283
318
  self.filtered_df = Filter(self.df).tickers(tickers_list)
284
319
 
285
320
  # tickers < min obs
321
+
286
322
  self.filtered_tickers = list(
287
323
  set(self.filtered_df.index.droplevel(0).unique()).symmetric_difference(
288
324
  set(self.df.index.droplevel(0).unique())
@@ -293,7 +329,7 @@ class CleanData:
293
329
  self.summary.loc["n_filtered_tickers", self.df.unstack().columns] = len(self.filtered_tickers)
294
330
 
295
331
  # filtered df
296
- self.df = self.filtered_df
332
+ self.df = self.filtered_df.sort_index()
297
333
 
298
334
  return self
299
335