cryptodatapy 0.2.3__py3-none-any.whl → 0.2.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cryptodatapy/transform/clean.py +43 -7
- cryptodatapy/transform/clean_perp_futures_ohlcv.ipynb +194 -808
- cryptodatapy/transform/filter.py +32 -4
- {cryptodatapy-0.2.3.dist-info → cryptodatapy-0.2.4.dist-info}/METADATA +4 -1
- {cryptodatapy-0.2.3.dist-info → cryptodatapy-0.2.4.dist-info}/RECORD +7 -7
- {cryptodatapy-0.2.3.dist-info → cryptodatapy-0.2.4.dist-info}/LICENSE +0 -0
- {cryptodatapy-0.2.3.dist-info → cryptodatapy-0.2.4.dist-info}/WHEEL +0 -0
cryptodatapy/transform/clean.py
CHANGED
@@ -131,7 +131,7 @@ class CleanData:
|
|
131
131
|
).values * 100
|
132
132
|
|
133
133
|
# filtered df
|
134
|
-
self.df = self.filtered_df
|
134
|
+
self.df = self.filtered_df.sort_index()
|
135
135
|
|
136
136
|
return self
|
137
137
|
|
@@ -161,11 +161,12 @@ class CleanData:
|
|
161
161
|
|
162
162
|
# repaired df
|
163
163
|
if self.excluded_cols is not None:
|
164
|
-
self.df = pd.concat([self.repaired_df, self.raw_df[self.excluded_cols]], join="
|
164
|
+
self.df = pd.concat([self.repaired_df, self.raw_df[self.excluded_cols]], join="inner", axis=1)
|
165
165
|
else:
|
166
166
|
self.df = self.repaired_df
|
167
|
+
|
167
168
|
# reorder cols
|
168
|
-
self.df = self.df[self.raw_df.columns]
|
169
|
+
self.df = self.df[self.raw_df.columns].sort_index()
|
169
170
|
|
170
171
|
return self
|
171
172
|
|
@@ -196,7 +197,7 @@ class CleanData:
|
|
196
197
|
).values * 100
|
197
198
|
|
198
199
|
# filtered df
|
199
|
-
self.df = self.filtered_df
|
200
|
+
self.df = self.filtered_df.sort_index()
|
200
201
|
|
201
202
|
return self
|
202
203
|
|
@@ -226,7 +227,7 @@ class CleanData:
|
|
226
227
|
).values * 100
|
227
228
|
|
228
229
|
# filtered df
|
229
|
-
self.df = self.filtered_df
|
230
|
+
self.df = self.filtered_df.sort_index()
|
230
231
|
|
231
232
|
return self
|
232
233
|
|
@@ -260,7 +261,41 @@ class CleanData:
|
|
260
261
|
self.summary.loc["n_tickers_below_min_obs", self.df.unstack().columns] = len(self.filtered_tickers)
|
261
262
|
|
262
263
|
# filtered df
|
263
|
-
self.df = self.filtered_df
|
264
|
+
self.df = self.filtered_df.sort_index()
|
265
|
+
|
266
|
+
return self
|
267
|
+
|
268
|
+
def filter_delisted_tickers(self, field: str = 'close', n_unch_vals: int = 30) -> CleanData:
|
269
|
+
"""
|
270
|
+
Removes delisted tickers from dataframe.
|
271
|
+
|
272
|
+
Parameters
|
273
|
+
----------
|
274
|
+
field: str, default 'close'
|
275
|
+
Field/column to use for detecting delisted tickers.
|
276
|
+
n_unch_vals: int, default 30
|
277
|
+
Number of consecutive unchanged values to consider a ticker as delisted.
|
278
|
+
|
279
|
+
Returns
|
280
|
+
-------
|
281
|
+
CleanData
|
282
|
+
CleanData object
|
283
|
+
"""
|
284
|
+
# filter tickers
|
285
|
+
self.filtered_df = Filter(self.df).remove_delisted(field=field, n_unch_vals=n_unch_vals)
|
286
|
+
|
287
|
+
# tickers < min obs
|
288
|
+
self.filtered_tickers = list(
|
289
|
+
set(self.filtered_df.index.droplevel(0).unique()).symmetric_difference(
|
290
|
+
set(self.df.index.droplevel(0).unique())
|
291
|
+
)
|
292
|
+
)
|
293
|
+
|
294
|
+
# add to summary
|
295
|
+
self.summary.loc["n_filtered_tickers", self.df.unstack().columns] = len(self.filtered_tickers)
|
296
|
+
|
297
|
+
# filtered df
|
298
|
+
self.df = self.filtered_df.sort_index()
|
264
299
|
|
265
300
|
return self
|
266
301
|
|
@@ -283,6 +318,7 @@ class CleanData:
|
|
283
318
|
self.filtered_df = Filter(self.df).tickers(tickers_list)
|
284
319
|
|
285
320
|
# tickers < min obs
|
321
|
+
|
286
322
|
self.filtered_tickers = list(
|
287
323
|
set(self.filtered_df.index.droplevel(0).unique()).symmetric_difference(
|
288
324
|
set(self.df.index.droplevel(0).unique())
|
@@ -293,7 +329,7 @@ class CleanData:
|
|
293
329
|
self.summary.loc["n_filtered_tickers", self.df.unstack().columns] = len(self.filtered_tickers)
|
294
330
|
|
295
331
|
# filtered df
|
296
|
-
self.df = self.filtered_df
|
332
|
+
self.df = self.filtered_df.sort_index()
|
297
333
|
|
298
334
|
return self
|
299
335
|
|