cryptodatapy 0.2.2__py3-none-any.whl → 0.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cryptodatapy/conf/fx_tickers.csv +31 -0
- cryptodatapy/transform/clean.py +136 -173
- cryptodatapy/transform/clean_perp_futures_ohlcv.ipynb +1639 -0
- cryptodatapy/transform/filter.py +56 -143
- cryptodatapy/transform/impute.py +36 -83
- cryptodatapy/transform/od.py +221 -450
- {cryptodatapy-0.2.2.dist-info → cryptodatapy-0.2.3.dist-info}/METADATA +1 -1
- {cryptodatapy-0.2.2.dist-info → cryptodatapy-0.2.3.dist-info}/RECORD +10 -8
- {cryptodatapy-0.2.2.dist-info → cryptodatapy-0.2.3.dist-info}/LICENSE +0 -0
- {cryptodatapy-0.2.2.dist-info → cryptodatapy-0.2.3.dist-info}/WHEEL +0 -0
@@ -0,0 +1,31 @@
|
|
1
|
+
id,name,tiingo_id
|
2
|
+
eurusd,,
|
3
|
+
gbpusd,,
|
4
|
+
usdjpy,,
|
5
|
+
usdchf,,
|
6
|
+
usdcad,,
|
7
|
+
usdsek,,
|
8
|
+
usdnok,,
|
9
|
+
audusd,,
|
10
|
+
nzdusd,,
|
11
|
+
usdars,,
|
12
|
+
usdmxn,,
|
13
|
+
usdbrl,,
|
14
|
+
usdcop,,
|
15
|
+
usdclp,,
|
16
|
+
usdpen,,
|
17
|
+
usdils,,
|
18
|
+
usdrub,,
|
19
|
+
usdczk,,
|
20
|
+
usdpln,,
|
21
|
+
usdhuf,,
|
22
|
+
usdzar,,
|
23
|
+
usdtry,,
|
24
|
+
usdcny,,
|
25
|
+
usdhkd,,
|
26
|
+
usdsgd,,
|
27
|
+
usdtwd,,
|
28
|
+
usdkrw,,
|
29
|
+
usdphp,,
|
30
|
+
usdinr,,
|
31
|
+
usdidr,,
|
cryptodatapy/transform/clean.py
CHANGED
@@ -1,19 +1,57 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
from typing import Optional, Union
|
3
|
-
|
4
3
|
import pandas as pd
|
5
4
|
|
6
|
-
from cryptodatapy.transform.filter import Filter
|
7
|
-
from cryptodatapy.transform.impute import Impute
|
8
5
|
from cryptodatapy.transform.od import OutlierDetection
|
6
|
+
from cryptodatapy.transform.impute import Impute
|
7
|
+
from cryptodatapy.transform.filter import Filter
|
9
8
|
|
10
9
|
|
11
|
-
|
10
|
+
def stitch_dataframes(dfs):
|
12
11
|
"""
|
13
|
-
|
12
|
+
Stitches together dataframes with different start dates.
|
14
13
|
|
14
|
+
Parameters
|
15
|
+
----------
|
16
|
+
dfs: list
|
17
|
+
List of dataframes to be stitched together.
|
18
|
+
|
19
|
+
Returns
|
20
|
+
-------
|
21
|
+
combined_df: pd.DataFrame
|
22
|
+
Combined dataframe with extended start date.
|
15
23
|
"""
|
24
|
+
# check if dfs is a list
|
25
|
+
if not isinstance(dfs, list):
|
26
|
+
raise TypeError("Dataframes must be a list.")
|
27
|
+
|
28
|
+
# check index types
|
29
|
+
if all([isinstance(df.index, pd.MultiIndex) for df in dfs]):
|
30
|
+
dfs.sort(key=lambda df: df.index.levels[0][0], reverse=True)
|
31
|
+
elif all([isinstance(df.index, pd.DatetimeIndex) for df in dfs]):
|
32
|
+
dfs.sort(key=lambda df: df.index[0], reverse=True)
|
33
|
+
else:
|
34
|
+
raise TypeError("Dataframes must be pd.MultiIndex or have DatetimeIndex.")
|
16
35
|
|
36
|
+
# most recent start date
|
37
|
+
combined_df = dfs[0]
|
38
|
+
|
39
|
+
# combine dfs
|
40
|
+
for df in dfs[1:]:
|
41
|
+
combined_df = combined_df.combine_first(df)
|
42
|
+
|
43
|
+
# reorder cols
|
44
|
+
max_columns = max(len(df.columns) for df in dfs)
|
45
|
+
cols = next(df.columns.tolist() for df in dfs if len(df.columns) == max_columns)
|
46
|
+
combined_df = combined_df[cols]
|
47
|
+
|
48
|
+
return combined_df
|
49
|
+
|
50
|
+
|
51
|
+
class CleanData:
|
52
|
+
"""
|
53
|
+
Cleans data to improve data quality.
|
54
|
+
"""
|
17
55
|
def __init__(self, df: pd.DataFrame):
|
18
56
|
"""
|
19
57
|
Constructor
|
@@ -22,26 +60,44 @@ class CleanData:
|
|
22
60
|
----------
|
23
61
|
df: pd.DataFrame
|
24
62
|
DataFrame MultiIndex with DatetimeIndex (level 0), ticker (level 1) and field (cols) values.
|
25
|
-
|
26
63
|
"""
|
27
|
-
self.
|
64
|
+
self.raw_df = df.copy() # keepy copy of raw dataframe
|
28
65
|
self.df = df
|
29
|
-
self.
|
30
|
-
self.
|
31
|
-
self.
|
32
|
-
self.
|
66
|
+
self.excluded_cols = None
|
67
|
+
self.outliers = None
|
68
|
+
self.yhat = None
|
69
|
+
self.filtered_df = None
|
70
|
+
self.filtered_tickers = None
|
71
|
+
self.repaired_df = None
|
72
|
+
self.summary = pd.DataFrame()
|
73
|
+
self.initialize_summary()
|
74
|
+
self.check_types()
|
75
|
+
|
76
|
+
def initialize_summary(self) -> None:
|
77
|
+
"""
|
78
|
+
Initializes summary dataframe with data quality metrics.
|
79
|
+
"""
|
33
80
|
# add obs and missing vals
|
34
|
-
self.summary.loc["n_obs", self.df.unstack().columns] = (
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
81
|
+
self.summary.loc["n_obs", self.df.unstack().columns] = self.df.unstack().notna().sum().values
|
82
|
+
self.summary.loc["%_NaN_start", self.df.unstack().columns] = \
|
83
|
+
(self.df.unstack().isnull().sum() / self.df.unstack().shape[0]).values * 100
|
84
|
+
|
85
|
+
def check_types(self) -> None:
|
86
|
+
"""
|
87
|
+
Checks data types of columns and converts them to the appropriate data types.
|
88
|
+
|
89
|
+
Returns
|
90
|
+
-------
|
91
|
+
CleanData
|
92
|
+
CleanData object
|
93
|
+
"""
|
94
|
+
if not isinstance(self.df, pd.DataFrame):
|
95
|
+
raise TypeError("Data must be a pandas DataFrame.")
|
40
96
|
|
41
97
|
def filter_outliers(
|
42
98
|
self,
|
99
|
+
od_method: str = "mad",
|
43
100
|
excl_cols: Optional[Union[str, list]] = None,
|
44
|
-
od_method: str = "z_score",
|
45
101
|
**kwargs
|
46
102
|
) -> CleanData:
|
47
103
|
"""
|
@@ -49,92 +105,37 @@ class CleanData:
|
|
49
105
|
|
50
106
|
Parameters
|
51
107
|
----------
|
52
|
-
excl_cols: str or list
|
53
|
-
Name of columns to exclude from outlier filtering.
|
54
108
|
od_method: str, {'atr', 'iqr', 'mad', 'z_score', 'ewma', 'stl', 'seasonal_decomp', 'prophet'}, default z_score
|
55
109
|
Outlier detection method to use for filtering.
|
56
|
-
|
57
|
-
|
58
|
-
----------------
|
59
|
-
log: bool, default False
|
60
|
-
Converts series into log of series.
|
61
|
-
window_size: int, default 7
|
62
|
-
Number of observations in the rolling window.
|
63
|
-
model_type: str, {'estimation', 'prediction'}, default 'estimation'
|
64
|
-
Estimation models use past, current and future values to estimate the expected value of a series,
|
65
|
-
e.g. expected x_t of series x at time t uses values from [x_t-s, x_t+s].
|
66
|
-
Prediction models use only past and current values to estimate the expected value of a series,
|
67
|
-
e.g. expected x_t of series x at time t uses values from [x_t-s, x_t].
|
68
|
-
thresh_val: int, default 2
|
69
|
-
Value for upper and lower thresholds used in outlier detection.
|
70
|
-
period: int, optional, default 7
|
71
|
-
periodicity of the sequence.
|
72
|
-
model: str, {'additive', 'multiplicative'}, default 'additive'
|
73
|
-
Type of seasonal component.
|
74
|
-
filt: array-like, optional, default None
|
75
|
-
The filter coefficients for filtering out the seasonal component.
|
76
|
-
The concrete moving average method used in filtering is determined by two_sided.
|
77
|
-
two_sided: bool, optional, default True
|
78
|
-
The moving average method used in filtering. If True (default), a centered moving average is
|
79
|
-
computed using the filt. If False, the filter coefficients are for past values only.
|
80
|
-
extrapolate_trend: int, optional, default 0
|
81
|
-
If set to > 0, the trend resulting from the convolution is linear least-squares extrapolated
|
82
|
-
on both ends (or the single one if two_sided is False) considering this many (+1) closest points.
|
83
|
-
If set to ‘freq’, use freq closest points. Setting this parameter results in no NaN values in trend
|
84
|
-
or resid components.
|
85
|
-
seasonal_deg: int, optional, default 1
|
86
|
-
Degree of seasonal LOESS. 0 (constant) or 1 (constant and trend).
|
87
|
-
trend_deg: int, optional, default 1
|
88
|
-
Degree of trend LOESS. 0 (constant) or 1 (constant and trend).
|
89
|
-
low_pass_deg: int, optional, default 1
|
90
|
-
Degree of low pass LOESS. 0 (constant) or 1 (constant and trend).
|
91
|
-
robust: bool, optional, default False
|
92
|
-
Flag indicating whether to use a weighted version that is robust to some forms of outliers.
|
93
|
-
seasonal_jump: int, optional, default 1
|
94
|
-
Positive integer determining the linear interpolation step. If larger than 1,
|
95
|
-
the LOESS is used every seasonal_jump points and linear interpolation is between fitted points.
|
96
|
-
Higher values reduce estimation time.
|
97
|
-
trend_jump: int, optional, default 1
|
98
|
-
Positive integer determining the linear interpolation step. If larger than 1,
|
99
|
-
the LOESS is used every trend_jump points and values between the two are linearly interpolated.
|
100
|
-
Higher values reduce estimation time.
|
101
|
-
low_pass_jump: int, optional, default 1
|
102
|
-
Positive integer determining the linear interpolation step. If larger than 1,
|
103
|
-
the LOESS is used every low_pass_jump points and values between the two are linearly interpolated.
|
104
|
-
Higher values reduce estimation time.
|
105
|
-
interval_width: float, optional, default 0.99
|
106
|
-
Uncertainty interval estimated by Monte Carlo simulation. The larger the value,
|
107
|
-
the larger the upper/lower thresholds interval for outlier detection.
|
108
|
-
plot: bool, default False
|
109
|
-
Plots series with outliers highlighted (red dots).
|
110
|
-
plot_series: tuple, default ('BTC', 'close')
|
111
|
-
The specific time series to plot given by (ticker, field/column) tuple.
|
110
|
+
excl_cols: str or list
|
111
|
+
Name of columns to exclude from outlier filtering.
|
112
112
|
|
113
113
|
Returns
|
114
114
|
-------
|
115
115
|
CleanData
|
116
116
|
CleanData object
|
117
|
-
|
118
117
|
"""
|
119
118
|
# outlier detection
|
120
|
-
od =
|
121
|
-
|
122
|
-
|
123
|
-
self.fcsts = od["yhat"]
|
119
|
+
od = OutlierDetection(self.df, excl_cols=excl_cols, **kwargs)
|
120
|
+
self.excluded_cols = excl_cols
|
121
|
+
|
124
122
|
# filter outliers
|
125
|
-
|
123
|
+
getattr(od, od_method)()
|
124
|
+
self.filtered_df = od.filtered_df
|
125
|
+
self.outliers = od.outliers
|
126
|
+
self.yhat = od.yhat
|
127
|
+
|
126
128
|
# add to summary
|
127
|
-
self.summary.loc["%_outliers", self.
|
128
|
-
|
129
|
+
self.summary.loc["%_outliers", self.outliers.unstack().columns] = (
|
130
|
+
self.outliers.unstack().notna().sum() / self.df.unstack().shape[0]
|
129
131
|
).values * 100
|
132
|
+
|
130
133
|
# filtered df
|
131
|
-
self.df =
|
134
|
+
self.df = self.filtered_df
|
132
135
|
|
133
136
|
return self
|
134
137
|
|
135
|
-
def repair_outliers(
|
136
|
-
self, imp_method: str = "interpolate", **kwargs
|
137
|
-
) -> CleanData:
|
138
|
+
def repair_outliers(self, imp_method: str = "interpolate", **kwargs) -> CleanData:
|
138
139
|
"""
|
139
140
|
Repairs outliers using an imputation method.
|
140
141
|
|
@@ -143,46 +144,32 @@ class CleanData:
|
|
143
144
|
imp_method: str, {"fwd_fill', 'interpolate', 'fcst'}, default 'fwd_fill'
|
144
145
|
Imputation method used to replace filtered outliers.
|
145
146
|
|
146
|
-
Other Parameters
|
147
|
-
----------------
|
148
|
-
method: str, {'linear', ‘nearest’, ‘zero’, ‘slinear’, ‘quadratic’, ‘cubic’, ‘spline’, ‘barycentric’,
|
149
|
-
‘polynomial’, ‘krogh’, ‘piecewise_polynomial’, ‘pchip’, ‘akima’, ‘cubicspline’}, default spline
|
150
|
-
Interpolation method to use.
|
151
|
-
order: int, optional, default None
|
152
|
-
Order of polynomial or spline.
|
153
|
-
axis: {{0 or ‘index’, 1 or ‘columns’, None}}, default None
|
154
|
-
Axis to interpolate along.
|
155
|
-
limit: int, optional, default None
|
156
|
-
Maximum number of consecutive NaNs to fill. Must be greater than 0.
|
157
|
-
plot: bool, default False
|
158
|
-
Plots series with outliers highlighted with red dots.
|
159
|
-
plot_series: tuple, default ('BTC', 'close')
|
160
|
-
Plots the time series of a specific (ticker, field/column) tuple.
|
161
|
-
|
162
147
|
Returns
|
163
148
|
-------
|
164
149
|
CleanData
|
165
150
|
CleanData object
|
166
|
-
|
167
151
|
"""
|
168
152
|
# impute missing vals
|
169
153
|
if imp_method == "fcst":
|
170
|
-
|
154
|
+
self.repaired_df = getattr(Impute(self.df), imp_method)(self.yhat, **kwargs)
|
171
155
|
else:
|
172
|
-
|
156
|
+
self.repaired_df = getattr(Impute(self.df), imp_method)(**kwargs)
|
157
|
+
|
173
158
|
# add repaired % to summary
|
174
|
-
rep_vals =
|
175
|
-
self.summary.loc["%_imputed", self.df.unstack().columns] = (
|
176
|
-
|
177
|
-
) * 100
|
159
|
+
rep_vals = self.repaired_df.unstack().notna().sum() - self.df.unstack().notna().sum()
|
160
|
+
self.summary.loc["%_imputed", self.df.unstack().columns] = rep_vals / self.df.unstack().shape[0] * 100
|
161
|
+
|
178
162
|
# repaired df
|
179
|
-
self.
|
163
|
+
if self.excluded_cols is not None:
|
164
|
+
self.df = pd.concat([self.repaired_df, self.raw_df[self.excluded_cols]], join="outer", axis=1)
|
165
|
+
else:
|
166
|
+
self.df = self.repaired_df
|
167
|
+
# reorder cols
|
168
|
+
self.df = self.df[self.raw_df.columns]
|
180
169
|
|
181
170
|
return self
|
182
171
|
|
183
|
-
def filter_avg_trading_val(
|
184
|
-
self, thresh_val: int = 10000000, window_size: int = 30, **kwargs
|
185
|
-
) -> CleanData:
|
172
|
+
def filter_avg_trading_val(self, thresh_val: int = 10000000, window_size: int = 30) -> CleanData:
|
186
173
|
"""
|
187
174
|
Filters values below a threshold of average trading value (price * volume/size in quote currency) over some
|
188
175
|
lookback window, replacing them with NaNs.
|
@@ -194,34 +181,26 @@ class CleanData:
|
|
194
181
|
window_size: int, default 30
|
195
182
|
Size of rolling window.
|
196
183
|
|
197
|
-
Other Parameters
|
198
|
-
----------------
|
199
|
-
plot: bool, default False
|
200
|
-
Plots series with outliers highlighted with red dots.
|
201
|
-
plot_series: tuple, default ('BTC', 'close')
|
202
|
-
Plots the time series of a specific (ticker, field/column) tuple.
|
203
|
-
|
204
184
|
Returns
|
205
185
|
-------
|
206
186
|
CleanData
|
207
187
|
CleanData object
|
208
|
-
|
209
188
|
"""
|
210
189
|
# filter outliers
|
211
|
-
|
212
|
-
|
213
|
-
)
|
190
|
+
self.filtered_df = Filter(self.df).avg_trading_val(thresh_val=thresh_val, window_size=window_size)
|
191
|
+
|
214
192
|
# add to summary
|
215
|
-
|
193
|
+
filtered_vals = self.df.unstack().notna().sum() - self.filtered_df.unstack().notna().sum()
|
216
194
|
self.summary.loc["%_below_avg_trading_val", self.df.unstack().columns] = (
|
217
|
-
|
195
|
+
filtered_vals / self.df.unstack().shape[0]
|
218
196
|
).values * 100
|
197
|
+
|
219
198
|
# filtered df
|
220
|
-
self.df =
|
199
|
+
self.df = self.filtered_df
|
221
200
|
|
222
201
|
return self
|
223
202
|
|
224
|
-
def filter_missing_vals_gaps(self, gap_window: int = 30
|
203
|
+
def filter_missing_vals_gaps(self, gap_window: int = 30) -> CleanData:
|
225
204
|
"""
|
226
205
|
Filters values before a large gap of missing values, replacing them with NaNs.
|
227
206
|
|
@@ -230,37 +209,28 @@ class CleanData:
|
|
230
209
|
gap_window: int, default 30
|
231
210
|
Size of window where all values are missing (NaNs).
|
232
211
|
|
233
|
-
Other Parameters
|
234
|
-
----------------
|
235
|
-
plot: bool, default False
|
236
|
-
Plots series with outliers highlighted with red dots.
|
237
|
-
plot_series: tuple, default ('BTC', 'close')
|
238
|
-
Plots the time series of a specific (ticker, field/column) tuple.
|
239
|
-
|
240
212
|
Returns
|
241
213
|
-------
|
242
214
|
CleanData
|
243
215
|
CleanData object
|
244
|
-
|
245
216
|
"""
|
246
217
|
# filter outliers
|
247
|
-
|
218
|
+
self.filtered_df = Filter(self.df).missing_vals_gaps(gap_window=gap_window)
|
219
|
+
|
248
220
|
# add to summary
|
249
221
|
missing_vals_gap = (
|
250
|
-
self.df.unstack().notna().sum() -
|
222
|
+
self.df.unstack().notna().sum() - self.filtered_df.unstack().notna().sum()
|
251
223
|
)
|
252
224
|
self.summary.loc["%_missing_vals_gaps", self.df.unstack().columns] = (
|
253
225
|
missing_vals_gap / self.df.unstack().shape[0]
|
254
226
|
).values * 100
|
227
|
+
|
255
228
|
# filtered df
|
256
|
-
self.df =
|
229
|
+
self.df = self.filtered_df
|
257
230
|
|
258
231
|
return self
|
259
232
|
|
260
|
-
def filter_min_nobs(self,
|
261
|
-
ts_obs: int = 100,
|
262
|
-
cs_obs: int = 2
|
263
|
-
) -> CleanData:
|
233
|
+
def filter_min_nobs(self, ts_obs: int = 100, cs_obs: int = 2) -> CleanData:
|
264
234
|
"""
|
265
235
|
Removes tickers from dataframe if the ticker has less than a minimum number of observations.
|
266
236
|
|
@@ -277,21 +247,20 @@ class CleanData:
|
|
277
247
|
CleanData object
|
278
248
|
"""
|
279
249
|
# filter outliers
|
280
|
-
|
250
|
+
self.filtered_df = Filter(self.df).min_nobs(ts_obs=ts_obs, cs_obs=cs_obs)
|
251
|
+
|
281
252
|
# tickers < min obs
|
282
|
-
|
283
|
-
set(
|
253
|
+
self.filtered_tickers = list(
|
254
|
+
set(self.filtered_df.index.droplevel(0).unique()).symmetric_difference(
|
284
255
|
set(self.df.index.droplevel(0).unique())
|
285
256
|
)
|
286
257
|
)
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
filt_tickers
|
292
|
-
)
|
258
|
+
|
259
|
+
# add to summary
|
260
|
+
self.summary.loc["n_tickers_below_min_obs", self.df.unstack().columns] = len(self.filtered_tickers)
|
261
|
+
|
293
262
|
# filtered df
|
294
|
-
self.df =
|
263
|
+
self.df = self.filtered_df
|
295
264
|
|
296
265
|
return self
|
297
266
|
|
@@ -309,30 +278,26 @@ class CleanData:
|
|
309
278
|
-------
|
310
279
|
CleanData
|
311
280
|
CleanData object
|
312
|
-
|
313
281
|
"""
|
314
282
|
# filter tickers
|
315
|
-
|
283
|
+
self.filtered_df = Filter(self.df).tickers(tickers_list)
|
284
|
+
|
316
285
|
# tickers < min obs
|
317
|
-
|
318
|
-
set(
|
286
|
+
self.filtered_tickers = list(
|
287
|
+
set(self.filtered_df.index.droplevel(0).unique()).symmetric_difference(
|
319
288
|
set(self.df.index.droplevel(0).unique())
|
320
289
|
)
|
321
290
|
)
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
filt_tickers
|
327
|
-
)
|
291
|
+
|
292
|
+
# add to summary
|
293
|
+
self.summary.loc["n_filtered_tickers", self.df.unstack().columns] = len(self.filtered_tickers)
|
294
|
+
|
328
295
|
# filtered df
|
329
|
-
self.df =
|
296
|
+
self.df = self.filtered_df
|
330
297
|
|
331
298
|
return self
|
332
299
|
|
333
|
-
def show_plot(
|
334
|
-
self, plot_series: tuple = ("BTC", "close"), compare_series: bool = True
|
335
|
-
) -> None:
|
300
|
+
def show_plot(self, plot_series: tuple = ("BTC", "close"), compare_series: bool = True) -> None:
|
336
301
|
"""
|
337
302
|
Plots clean time series and compares it to the raw series.
|
338
303
|
|
@@ -342,7 +307,6 @@ class CleanData:
|
|
342
307
|
Plots the time series of a specific (ticker, field) tuple.
|
343
308
|
compare_series: bool, default True
|
344
309
|
Compares clean time series with raw series
|
345
|
-
|
346
310
|
"""
|
347
311
|
ax = (
|
348
312
|
self.df.loc[pd.IndexSlice[:, plot_series[0]], plot_series[1]]
|
@@ -357,7 +321,7 @@ class CleanData:
|
|
357
321
|
)
|
358
322
|
if compare_series:
|
359
323
|
ax = (
|
360
|
-
self.
|
324
|
+
self.raw_df.loc[pd.IndexSlice[:, plot_series[0]], plot_series[1]]
|
361
325
|
.droplevel(1)
|
362
326
|
.plot(
|
363
327
|
linewidth=1,
|
@@ -382,14 +346,13 @@ class CleanData:
|
|
382
346
|
|
383
347
|
Parameters
|
384
348
|
----------
|
385
|
-
attr: str, {'df', 'outliers', '
|
349
|
+
attr: str, {'df', 'outliers', 'yhat', 'filtered_tickers', 'summary'}, default 'df'
|
386
350
|
GetData object attribute to return
|
387
351
|
|
388
352
|
Returns
|
389
353
|
-------
|
390
354
|
CleanData
|
391
355
|
CleanData object
|
392
|
-
|
393
356
|
"""
|
394
357
|
self.summary.loc["%_NaN_end", self.df.unstack().columns] = (
|
395
358
|
self.df.unstack().isnull().sum() / self.df.unstack().shape[0]
|