AnomalyLab 0.2.8__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {anomalylab-0.2.8 → anomalylab-0.3.0/AnomalyLab.egg-info}/PKG-INFO +1 -1
- {anomalylab-0.2.8/AnomalyLab.egg-info → anomalylab-0.3.0}/PKG-INFO +1 -1
- {anomalylab-0.2.8 → anomalylab-0.3.0}/anomalylab/core/core.py +11 -5
- {anomalylab-0.2.8 → anomalylab-0.3.0}/anomalylab/empirical/correlation.py +1 -1
- {anomalylab-0.2.8 → anomalylab-0.3.0}/anomalylab/empirical/fm_regression.py +3 -3
- {anomalylab-0.2.8 → anomalylab-0.3.0}/anomalylab/empirical/persistence.py +5 -5
- {anomalylab-0.2.8 → anomalylab-0.3.0}/anomalylab/empirical/portfolio.py +26 -24
- {anomalylab-0.2.8 → anomalylab-0.3.0}/anomalylab/preprocess/outliers.py +2 -2
- {anomalylab-0.2.8 → anomalylab-0.3.0}/anomalylab/structure/data.py +1 -8
- {anomalylab-0.2.8 → anomalylab-0.3.0}/anomalylab/structure/panel_data.py +7 -3
- {anomalylab-0.2.8 → anomalylab-0.3.0}/anomalylab/structure/time_series.py +3 -0
- {anomalylab-0.2.8 → anomalylab-0.3.0}/setup.py +1 -1
- {anomalylab-0.2.8 → anomalylab-0.3.0}/.gitattributes +0 -0
- {anomalylab-0.2.8 → anomalylab-0.3.0}/.github/workflows/python-publish.yml +0 -0
- {anomalylab-0.2.8 → anomalylab-0.3.0}/.gitignore +0 -0
- {anomalylab-0.2.8 → anomalylab-0.3.0}/AnomalyLab.egg-info/SOURCES.txt +0 -0
- {anomalylab-0.2.8 → anomalylab-0.3.0}/AnomalyLab.egg-info/dependency_links.txt +0 -0
- {anomalylab-0.2.8 → anomalylab-0.3.0}/AnomalyLab.egg-info/requires.txt +0 -0
- {anomalylab-0.2.8 → anomalylab-0.3.0}/AnomalyLab.egg-info/top_level.txt +0 -0
- {anomalylab-0.2.8 → anomalylab-0.3.0}/LICENSE +0 -0
- {anomalylab-0.2.8 → anomalylab-0.3.0}/MANIFEST.in +0 -0
- {anomalylab-0.2.8 → anomalylab-0.3.0}/README.md +0 -0
- {anomalylab-0.2.8 → anomalylab-0.3.0}/anomalylab/__init__.py +0 -0
- {anomalylab-0.2.8 → anomalylab-0.3.0}/anomalylab/config.py +0 -0
- {anomalylab-0.2.8 → anomalylab-0.3.0}/anomalylab/core/__init__.py +0 -0
- {anomalylab-0.2.8 → anomalylab-0.3.0}/anomalylab/datasets/__init__.py +0 -0
- {anomalylab-0.2.8 → anomalylab-0.3.0}/anomalylab/datasets/dataset.py +0 -0
- {anomalylab-0.2.8 → anomalylab-0.3.0}/anomalylab/datasets/panel_data.csv +0 -0
- {anomalylab-0.2.8 → anomalylab-0.3.0}/anomalylab/datasets/time_series_data.csv +0 -0
- {anomalylab-0.2.8 → anomalylab-0.3.0}/anomalylab/datasets/transition_matrix.png +0 -0
- {anomalylab-0.2.8 → anomalylab-0.3.0}/anomalylab/empirical/__init__.py +0 -0
- {anomalylab-0.2.8 → anomalylab-0.3.0}/anomalylab/empirical/empirical.py +0 -0
- {anomalylab-0.2.8 → anomalylab-0.3.0}/anomalylab/empirical/factor_return.py +0 -0
- {anomalylab-0.2.8 → anomalylab-0.3.0}/anomalylab/empirical/summary.py +0 -0
- {anomalylab-0.2.8 → anomalylab-0.3.0}/anomalylab/preprocess/__init__.py +0 -0
- {anomalylab-0.2.8 → anomalylab-0.3.0}/anomalylab/preprocess/fillna.py +0 -0
- {anomalylab-0.2.8 → anomalylab-0.3.0}/anomalylab/preprocess/normalize.py +0 -0
- {anomalylab-0.2.8 → anomalylab-0.3.0}/anomalylab/preprocess/preprocessor.py +0 -0
- {anomalylab-0.2.8 → anomalylab-0.3.0}/anomalylab/preprocess/shift.py +0 -0
- {anomalylab-0.2.8 → anomalylab-0.3.0}/anomalylab/preprocess/truncate.py +0 -0
- {anomalylab-0.2.8 → anomalylab-0.3.0}/anomalylab/structure/__init__.py +0 -0
- {anomalylab-0.2.8 → anomalylab-0.3.0}/anomalylab/utils/__init__.py +0 -0
- {anomalylab-0.2.8 → anomalylab-0.3.0}/anomalylab/utils/imports.py +0 -0
- {anomalylab-0.2.8 → anomalylab-0.3.0}/anomalylab/utils/utils.py +0 -0
- {anomalylab-0.2.8 → anomalylab-0.3.0}/anomalylab/visualization/__init__.py +0 -0
- {anomalylab-0.2.8 → anomalylab-0.3.0}/anomalylab/visualization/format.py +0 -0
- {anomalylab-0.2.8 → anomalylab-0.3.0}/requirements.txt +0 -0
- {anomalylab-0.2.8 → anomalylab-0.3.0}/setup.cfg +0 -0
- {anomalylab-0.2.8 → anomalylab-0.3.0}/tests/__init__.py +0 -0
|
@@ -19,7 +19,7 @@ from anomalylab.visualization import FormatExcel
|
|
|
19
19
|
|
|
20
20
|
@dataclass
|
|
21
21
|
class Panel:
|
|
22
|
-
_df:
|
|
22
|
+
_df: DataFrame = field(repr=False)
|
|
23
23
|
name: Optional[str] = None
|
|
24
24
|
id: str = "permno"
|
|
25
25
|
time: str = "date"
|
|
@@ -27,6 +27,7 @@ class Panel:
|
|
|
27
27
|
ret: str = "return"
|
|
28
28
|
classifications: Optional[list[str] | str] = None
|
|
29
29
|
drop_all_chars_missing: bool = False
|
|
30
|
+
is_copy: bool = False
|
|
30
31
|
|
|
31
32
|
def __post_init__(self) -> None:
|
|
32
33
|
self.panel_data: PanelData = PanelData(
|
|
@@ -38,6 +39,7 @@ class Panel:
|
|
|
38
39
|
ret=self.ret,
|
|
39
40
|
classifications=self.classifications,
|
|
40
41
|
drop_all_chars_missing=self.drop_all_chars_missing,
|
|
42
|
+
is_copy=self.is_copy,
|
|
41
43
|
)
|
|
42
44
|
self._normalize_processor = None
|
|
43
45
|
self._fillna_processor = None
|
|
@@ -260,7 +262,7 @@ class Panel:
|
|
|
260
262
|
draw: bool = False,
|
|
261
263
|
path: Optional[str] = None,
|
|
262
264
|
decimal: Optional[int] = None,
|
|
263
|
-
) ->
|
|
265
|
+
) -> DataFrame:
|
|
264
266
|
return self.persistence_processor.transition_matrix(
|
|
265
267
|
var=var,
|
|
266
268
|
group=group,
|
|
@@ -304,7 +306,7 @@ class Panel:
|
|
|
304
306
|
factors_series: Optional[TimeSeries] = None,
|
|
305
307
|
pivot: bool = True,
|
|
306
308
|
format: bool = False,
|
|
307
|
-
|
|
309
|
+
sort_type: str = "dependent",
|
|
308
310
|
decimal: Optional[int] = None,
|
|
309
311
|
factor_return: bool = False,
|
|
310
312
|
) -> tuple:
|
|
@@ -317,7 +319,7 @@ class Panel:
|
|
|
317
319
|
core_g=core_g,
|
|
318
320
|
pivot=pivot,
|
|
319
321
|
format=format,
|
|
320
|
-
|
|
322
|
+
sort_type=sort_type,
|
|
321
323
|
decimal=decimal,
|
|
322
324
|
factor_return=factor_return,
|
|
323
325
|
)
|
|
@@ -372,7 +374,11 @@ if __name__ == "__main__":
|
|
|
372
374
|
}
|
|
373
375
|
|
|
374
376
|
panel = Panel(
|
|
375
|
-
df,
|
|
377
|
+
df,
|
|
378
|
+
name="Stocks",
|
|
379
|
+
classifications="industry",
|
|
380
|
+
drop_all_chars_missing=True,
|
|
381
|
+
is_copy=False,
|
|
376
382
|
)
|
|
377
383
|
time_series: TimeSeries = TimeSeries(df=ts, name="Factor Series")
|
|
378
384
|
pp(panel)
|
|
@@ -74,7 +74,7 @@ class Correlation(Empirical):
|
|
|
74
74
|
)
|
|
75
75
|
is_upper = False # Switch to lower triangle for the next method
|
|
76
76
|
|
|
77
|
-
return
|
|
77
|
+
return DataFrame(data=merged_corr, index=columns, columns=columns).map(
|
|
78
78
|
func=round_to_string,
|
|
79
79
|
decimal=decimal or self.decimal, # Round results to specified decimals
|
|
80
80
|
)
|
|
@@ -131,11 +131,11 @@ class FamaMacBethRegression(Empirical):
|
|
|
131
131
|
coefs = results.params
|
|
132
132
|
coefs[self.time] = time
|
|
133
133
|
coef_df.append(coefs)
|
|
134
|
-
coef_df =
|
|
134
|
+
coef_df = DataFrame(coef_df)
|
|
135
135
|
coef_df = coef_df[
|
|
136
136
|
[self.time] + [col for col in coef_df.columns if col != self.time]
|
|
137
137
|
]
|
|
138
|
-
return
|
|
138
|
+
return DataFrame(coef_df)
|
|
139
139
|
|
|
140
140
|
# Fama-MacBeth regression with Newey-West adjustment
|
|
141
141
|
fmb = FamaMacBeth(
|
|
@@ -207,7 +207,7 @@ class FamaMacBethRegression(Empirical):
|
|
|
207
207
|
Returns:
|
|
208
208
|
Series: Formatted regression results including parameters, t-values, and statistics.
|
|
209
209
|
"""
|
|
210
|
-
result: Series =
|
|
210
|
+
result: Series = DataFrame(
|
|
211
211
|
data={
|
|
212
212
|
"params": reg_result["params"].map(
|
|
213
213
|
arg=lambda x: round_to_string(value=x, decimal=decimal)
|
|
@@ -29,7 +29,7 @@ class Persistence(Empirical):
|
|
|
29
29
|
no_process_columns: Columns = None,
|
|
30
30
|
process_all_characteristics: bool = True,
|
|
31
31
|
decimal: Optional[int] = None,
|
|
32
|
-
) ->
|
|
32
|
+
) -> DataFrame:
|
|
33
33
|
"""
|
|
34
34
|
Computes average persistence (autocorrelation) for specified columns over defined time periods.
|
|
35
35
|
|
|
@@ -48,7 +48,7 @@ class Persistence(Empirical):
|
|
|
48
48
|
Defaults to None.
|
|
49
49
|
|
|
50
50
|
Returns:
|
|
51
|
-
|
|
51
|
+
DataFrame: A DataFrame containing the average persistence for specified columns.
|
|
52
52
|
|
|
53
53
|
Note:
|
|
54
54
|
The resulting DataFrame contains the average correlations for each lag, formatted to the
|
|
@@ -90,7 +90,7 @@ class Persistence(Empirical):
|
|
|
90
90
|
all_monthly_corrs.extend(monthly_corrs)
|
|
91
91
|
|
|
92
92
|
# Convert to DataFrame
|
|
93
|
-
all_monthly_corrs_df =
|
|
93
|
+
all_monthly_corrs_df = DataFrame(all_monthly_corrs)
|
|
94
94
|
|
|
95
95
|
# Calculate average monthly correlations
|
|
96
96
|
mean_corrs_df = (
|
|
@@ -116,7 +116,7 @@ class Persistence(Empirical):
|
|
|
116
116
|
draw: bool = False,
|
|
117
117
|
path: Optional[str] = None,
|
|
118
118
|
decimal: Optional[int] = None,
|
|
119
|
-
) ->
|
|
119
|
+
) -> DataFrame:
|
|
120
120
|
"""Calculate the transition matrix for a specified variable and lag.
|
|
121
121
|
|
|
122
122
|
This method computes the transition matrix that shows how groups change over time based on
|
|
@@ -173,7 +173,7 @@ class Persistence(Empirical):
|
|
|
173
173
|
)
|
|
174
174
|
|
|
175
175
|
# Create DataFrame for the transition matrix
|
|
176
|
-
transition_matrix_df =
|
|
176
|
+
transition_matrix_df = DataFrame(
|
|
177
177
|
transition_matrix, columns=range(1, group + 1), index=range(1, group + 1)
|
|
178
178
|
)
|
|
179
179
|
|
|
@@ -69,8 +69,8 @@ class PortfolioAnalysis(Empirical):
|
|
|
69
69
|
self,
|
|
70
70
|
vars: Union[str, list[str]],
|
|
71
71
|
groups: Union[int, list[int]],
|
|
72
|
-
|
|
73
|
-
) ->
|
|
72
|
+
sort_type: Optional[str] = None,
|
|
73
|
+
) -> DataFrame:
|
|
74
74
|
"""Group variables into portfolios based on specified groups.
|
|
75
75
|
|
|
76
76
|
This method creates portfolios for the specified variables in the panel data.
|
|
@@ -78,7 +78,7 @@ class PortfolioAnalysis(Empirical):
|
|
|
78
78
|
Args:
|
|
79
79
|
vars (list of str): List of variables to group.
|
|
80
80
|
groups (list of int): List of integers defining the number of groups for each variable.
|
|
81
|
-
|
|
81
|
+
sort_type (str, optional): Type of sorting, can be 'dependent' to adjust based on the previous variable.
|
|
82
82
|
|
|
83
83
|
Returns:
|
|
84
84
|
DataFrame: A DataFrame with new columns for grouped variables.
|
|
@@ -105,7 +105,7 @@ class PortfolioAnalysis(Empirical):
|
|
|
105
105
|
# Adjust group definitions
|
|
106
106
|
group_col = [self.time]
|
|
107
107
|
for i, var in enumerate(vars):
|
|
108
|
-
if
|
|
108
|
+
if sort_type == "dependent" and i > 0:
|
|
109
109
|
group_col.append(f"{vars[i-1]}_g{groups[i-1]}")
|
|
110
110
|
# Grouping dependent on the previous variable
|
|
111
111
|
out_df[f"{var}_g{groups[i]}"] = (
|
|
@@ -142,7 +142,7 @@ class PortfolioAnalysis(Empirical):
|
|
|
142
142
|
|
|
143
143
|
return out_df
|
|
144
144
|
|
|
145
|
-
def _claculate_value(self, df:
|
|
145
|
+
def _claculate_value(self, df: DataFrame, decimal: Optional[int] = None) -> dict:
|
|
146
146
|
"""Calculate various portfolio performance metrics.
|
|
147
147
|
|
|
148
148
|
This method computes mean returns, t-values, Sharpe ratios, and model-adjusted alpha and t values.
|
|
@@ -159,7 +159,7 @@ class PortfolioAnalysis(Empirical):
|
|
|
159
159
|
|
|
160
160
|
return {**stat_dict, **factors_dict, **sharpe_dict}
|
|
161
161
|
|
|
162
|
-
def _calculate_mean_and_t_value(self, df:
|
|
162
|
+
def _calculate_mean_and_t_value(self, df: DataFrame) -> dict:
|
|
163
163
|
"""Calculate mean and t-value for the dependent variable.
|
|
164
164
|
|
|
165
165
|
This method computes the mean return and its t-value assuming the null hypothesis
|
|
@@ -176,7 +176,7 @@ class PortfolioAnalysis(Empirical):
|
|
|
176
176
|
lag = math.ceil(4 * (T / 100) ** (4 / 25))
|
|
177
177
|
|
|
178
178
|
Y = df[self.endog].values
|
|
179
|
-
X =
|
|
179
|
+
X = DataFrame({"constant": [1] * len(df[self.endog])}).values
|
|
180
180
|
reg = sm.OLS(Y, X).fit(
|
|
181
181
|
cov_type="HAC", cov_kwds={"maxlags": lag, "use_correction": False}
|
|
182
182
|
)
|
|
@@ -190,7 +190,7 @@ class PortfolioAnalysis(Empirical):
|
|
|
190
190
|
|
|
191
191
|
return stat_dict
|
|
192
192
|
|
|
193
|
-
def _calculate_alpha_and_t_value(self, df:
|
|
193
|
+
def _calculate_alpha_and_t_value(self, df: DataFrame) -> dict:
|
|
194
194
|
"""Calculate alpha and t-value for specified models.
|
|
195
195
|
|
|
196
196
|
This method computes alpha values and their t-statistics for various regression models
|
|
@@ -239,7 +239,7 @@ class PortfolioAnalysis(Empirical):
|
|
|
239
239
|
else:
|
|
240
240
|
return {}
|
|
241
241
|
|
|
242
|
-
def _calculate_sharpe(self, df:
|
|
242
|
+
def _calculate_sharpe(self, df: DataFrame, decimal: Optional[int] = 0) -> dict:
|
|
243
243
|
"""Calculate the Sharpe ratio for the dependent variable.
|
|
244
244
|
|
|
245
245
|
This method computes the annualized Sharpe ratio based on the mean and standard deviation
|
|
@@ -280,6 +280,7 @@ class PortfolioAnalysis(Empirical):
|
|
|
280
280
|
core_g (int): The group number for portfolio grouping of the core variable.
|
|
281
281
|
format (bool): Whether to format the output for display. Defaults to False.
|
|
282
282
|
decimal (Optional[int]): The number of decimal places for formatting. Defaults to None.
|
|
283
|
+
factor_return (bool): Whether to output factor returns in the analysis. Defaults to False.
|
|
283
284
|
|
|
284
285
|
Returns:
|
|
285
286
|
tuple: A tuple containing the equal-weighted and value-weighted results DataFrames.
|
|
@@ -299,7 +300,7 @@ class PortfolioAnalysis(Empirical):
|
|
|
299
300
|
) # type: ignore
|
|
300
301
|
vw_ret_d.index.names = [self.time, core_var]
|
|
301
302
|
|
|
302
|
-
def process_group(group:
|
|
303
|
+
def process_group(group: DataFrame) -> Series:
|
|
303
304
|
"""Process each group to calculate differences and prepare the output.
|
|
304
305
|
|
|
305
306
|
This function computes the difference between the highest portfolio and the lowest
|
|
@@ -309,7 +310,7 @@ class PortfolioAnalysis(Empirical):
|
|
|
309
310
|
group (DataFrame): The grouped DataFrame for which to process data.
|
|
310
311
|
|
|
311
312
|
Returns:
|
|
312
|
-
|
|
313
|
+
Series: The processed Series with differences and averages.
|
|
313
314
|
"""
|
|
314
315
|
group = group.sort_index(axis=0, level=[0, 1])
|
|
315
316
|
|
|
@@ -318,7 +319,7 @@ class PortfolioAnalysis(Empirical):
|
|
|
318
319
|
[(group.index.get_level_values(0)[0], "Diff")],
|
|
319
320
|
names=[self.time, core_var],
|
|
320
321
|
)
|
|
321
|
-
core_diff =
|
|
322
|
+
core_diff = Series(core_diff, index=new_index)
|
|
322
323
|
|
|
323
324
|
return pd.concat([group, core_diff])
|
|
324
325
|
|
|
@@ -358,7 +359,7 @@ class PortfolioAnalysis(Empirical):
|
|
|
358
359
|
|
|
359
360
|
def calculate_time_series_metrics(
|
|
360
361
|
series: Series, format: bool = format
|
|
361
|
-
) ->
|
|
362
|
+
) -> DataFrame:
|
|
362
363
|
"""Calculate metrics for each time series and format results.
|
|
363
364
|
|
|
364
365
|
This function computes performance metrics for each time series and formats the results
|
|
@@ -383,7 +384,7 @@ class PortfolioAnalysis(Empirical):
|
|
|
383
384
|
values[core_var] = key
|
|
384
385
|
data.append(values)
|
|
385
386
|
|
|
386
|
-
combined_results =
|
|
387
|
+
combined_results = DataFrame(data)
|
|
387
388
|
|
|
388
389
|
combined_results.set_index(core_var, inplace=True)
|
|
389
390
|
|
|
@@ -424,7 +425,7 @@ class PortfolioAnalysis(Empirical):
|
|
|
424
425
|
core_g: int,
|
|
425
426
|
pivot: bool = True,
|
|
426
427
|
format: bool = False,
|
|
427
|
-
|
|
428
|
+
sort_type: str = "dependent",
|
|
428
429
|
decimal: Optional[int] = None,
|
|
429
430
|
factor_return: bool = False,
|
|
430
431
|
) -> tuple:
|
|
@@ -443,6 +444,7 @@ class PortfolioAnalysis(Empirical):
|
|
|
443
444
|
format (bool): Whether to format the output for display. Defaults to False.
|
|
444
445
|
type (str): Type of grouping, can be 'dependent' or 'independent'. Defaults to 'dependent'.
|
|
445
446
|
decimal (Optional[int]): The number of decimal places to round to. Defaults to None.
|
|
447
|
+
factor_return (bool): Whether to output factor returns in the analysis. Defaults to False.
|
|
446
448
|
|
|
447
449
|
Returns:
|
|
448
450
|
tuple: A tuple containing the equal-weighted and value-weighted results DataFrames.
|
|
@@ -450,7 +452,7 @@ class PortfolioAnalysis(Empirical):
|
|
|
450
452
|
data_d = self.GroupN(
|
|
451
453
|
[sort_var, core_var],
|
|
452
454
|
[sort_g, core_g],
|
|
453
|
-
|
|
455
|
+
sort_type=sort_type,
|
|
454
456
|
)
|
|
455
457
|
|
|
456
458
|
ew_ret_d = data_d.groupby(
|
|
@@ -465,7 +467,7 @@ class PortfolioAnalysis(Empirical):
|
|
|
465
467
|
)
|
|
466
468
|
vw_ret_d.index.names = [self.time, sort_var, core_var]
|
|
467
469
|
|
|
468
|
-
def process_group(group:
|
|
470
|
+
def process_group(group: DataFrame) -> DataFrame:
|
|
469
471
|
"""Process each group to calculate differences and averages.
|
|
470
472
|
|
|
471
473
|
This function computes the difference between the highest portfolio and lowest portfolio,
|
|
@@ -527,7 +529,7 @@ class PortfolioAnalysis(Empirical):
|
|
|
527
529
|
if factor_return:
|
|
528
530
|
return ew_ret_d, vw_ret_d
|
|
529
531
|
|
|
530
|
-
def generate_time_series_dict(df:
|
|
532
|
+
def generate_time_series_dict(df: DataFrame) -> dict:
|
|
531
533
|
"""Generate a dictionary of time series data from the DataFrame.
|
|
532
534
|
|
|
533
535
|
This function extracts time series for each unique combination of sorting and core variables.
|
|
@@ -554,8 +556,8 @@ class PortfolioAnalysis(Empirical):
|
|
|
554
556
|
return time_series_dict
|
|
555
557
|
|
|
556
558
|
def calculate_time_series_metrics(
|
|
557
|
-
df:
|
|
558
|
-
) ->
|
|
559
|
+
df: DataFrame, pivot: bool = pivot, format: bool = format
|
|
560
|
+
) -> DataFrame:
|
|
559
561
|
"""Calculate metrics for each time series and format results.
|
|
560
562
|
|
|
561
563
|
This function computes performance metrics for each time series and formats the results
|
|
@@ -583,7 +585,7 @@ class PortfolioAnalysis(Empirical):
|
|
|
583
585
|
values[core_var] = key[1]
|
|
584
586
|
data.append(values)
|
|
585
587
|
|
|
586
|
-
combined_results =
|
|
588
|
+
combined_results = DataFrame(data)
|
|
587
589
|
|
|
588
590
|
combined_results.set_index([sort_var, core_var], inplace=True)
|
|
589
591
|
|
|
@@ -609,16 +611,16 @@ class PortfolioAnalysis(Empirical):
|
|
|
609
611
|
:, ~combined_results.columns.str.endswith("p")
|
|
610
612
|
]
|
|
611
613
|
|
|
612
|
-
def reorder_diff_avg(df:
|
|
614
|
+
def reorder_diff_avg(df: DataFrame) -> DataFrame:
|
|
613
615
|
"""Reorder the rows and columns of a DataFrame to place 'Diff' before 'Avg'.
|
|
614
616
|
|
|
615
617
|
This function rearranges the DataFrame to improve readability.
|
|
616
618
|
|
|
617
619
|
Args:
|
|
618
|
-
df (
|
|
620
|
+
df (DataFrame): The DataFrame to reorder.
|
|
619
621
|
|
|
620
622
|
Returns:
|
|
621
|
-
|
|
623
|
+
DataFrame: The reordered DataFrame.
|
|
622
624
|
"""
|
|
623
625
|
columns_order = [
|
|
624
626
|
col for col in df.columns if col not in ["Diff", "Avg"]
|
|
@@ -40,7 +40,7 @@ class OutlierMethod:
|
|
|
40
40
|
Returns:
|
|
41
41
|
Series: A new Series with winsorized values.
|
|
42
42
|
"""
|
|
43
|
-
return
|
|
43
|
+
return Series(
|
|
44
44
|
data=np.where(
|
|
45
45
|
series.isnull(),
|
|
46
46
|
np.nan,
|
|
@@ -69,7 +69,7 @@ class OutlierMethod:
|
|
|
69
69
|
Returns:
|
|
70
70
|
Series: A new Series with truncated values.
|
|
71
71
|
"""
|
|
72
|
-
return
|
|
72
|
+
return Series(
|
|
73
73
|
data=np.where(
|
|
74
74
|
series.isnull(),
|
|
75
75
|
np.nan,
|
|
@@ -21,18 +21,11 @@ class Data(ABC):
|
|
|
21
21
|
1. Check if the columns are valid.
|
|
22
22
|
2. Preprocess the data.
|
|
23
23
|
3. Set the flag if needed.
|
|
24
|
-
4. Call the other_init method if needed.
|
|
25
24
|
"""
|
|
26
|
-
|
|
27
|
-
self.name = "anomaly"
|
|
25
|
+
|
|
28
26
|
self._check_columns()
|
|
29
27
|
self._preprocess()
|
|
30
28
|
self.set_flag()
|
|
31
|
-
self.other_init()
|
|
32
|
-
|
|
33
|
-
def other_init(self) -> None:
|
|
34
|
-
"""This method is a placeholder for additional initialization logic."""
|
|
35
|
-
pass
|
|
36
29
|
|
|
37
30
|
def set_flag(self) -> None:
|
|
38
31
|
"""This method is meant to be overridden by subclasses to set flags."""
|
|
@@ -20,7 +20,7 @@ class PanelData(Data):
|
|
|
20
20
|
frequency (Literal["D", "M", "Y"]):
|
|
21
21
|
The frequency of the data. Defaults to "M".
|
|
22
22
|
ret (str):
|
|
23
|
-
The column name for the excess return. Defaults to
|
|
23
|
+
The column name for the excess return. Defaults to None.
|
|
24
24
|
classifications (list[str]):
|
|
25
25
|
The list of classification columns.
|
|
26
26
|
drop_all_chars_missing (bool):
|
|
@@ -30,9 +30,10 @@ class PanelData(Data):
|
|
|
30
30
|
id: str = "permno"
|
|
31
31
|
time: str = "date"
|
|
32
32
|
frequency: Literal["D", "M", "Y"] = "M"
|
|
33
|
-
ret: str =
|
|
33
|
+
ret: Optional[str] = None
|
|
34
34
|
classifications: Optional[list[str] | str] = None
|
|
35
35
|
drop_all_chars_missing: bool = False
|
|
36
|
+
is_copy: bool = False
|
|
36
37
|
|
|
37
38
|
def set_flag(self) -> None:
|
|
38
39
|
"""Set default flags for the `PanelData` object."""
|
|
@@ -57,10 +58,12 @@ class PanelData(Data):
|
|
|
57
58
|
|
|
58
59
|
This method identifies remaining columns as firm characteristics, excluding classifications.
|
|
59
60
|
"""
|
|
61
|
+
if self.is_copy:
|
|
62
|
+
self.df = copy.deepcopy(self.df)
|
|
60
63
|
self.df[self.id] = self.df[self.id].astype(int)
|
|
61
64
|
self.df[self.time] = pd.to_datetime(self.df[self.time], format="ISO8601")
|
|
62
65
|
self.df[self.time] = self.df[self.time].dt.to_period(freq=self.frequency)
|
|
63
|
-
self.df
|
|
66
|
+
self.df.sort_values(by=[self.time, self.id], inplace=True)
|
|
64
67
|
basic_column = (
|
|
65
68
|
[self.id, self.time] if self.ret is None else [self.id, self.time, self.ret]
|
|
66
69
|
)
|
|
@@ -208,6 +211,7 @@ if __name__ == "__main__":
|
|
|
208
211
|
ret="return",
|
|
209
212
|
classifications="industry",
|
|
210
213
|
drop_all_chars_missing=True,
|
|
214
|
+
is_copy=False,
|
|
211
215
|
)
|
|
212
216
|
pp(panel_data)
|
|
213
217
|
pp(panel_data.df)
|
|
@@ -24,6 +24,7 @@ class TimeSeries(Data):
|
|
|
24
24
|
time: str = "date"
|
|
25
25
|
frequency: Literal["D", "M", "Y"] = "M"
|
|
26
26
|
factors: list[str] = field(init=False)
|
|
27
|
+
is_copy: bool = False
|
|
27
28
|
|
|
28
29
|
def __repr__(self) -> str:
|
|
29
30
|
return f"TimeSeriesData({self.name})" # todo: add frequency
|
|
@@ -34,6 +35,8 @@ class TimeSeries(Data):
|
|
|
34
35
|
|
|
35
36
|
This method renames the time column to a standardized name and identifies remaining columns as factors.
|
|
36
37
|
"""
|
|
38
|
+
if self.is_copy:
|
|
39
|
+
self.df = copy.deepcopy(self.df)
|
|
37
40
|
self.df[self.time] = pd.to_datetime(self.df[self.time], format="ISO8601")
|
|
38
41
|
self.df[self.time] = self.df[self.time].dt.to_period(freq=self.frequency)
|
|
39
42
|
self.df = self.df.sort_values(by=self.time)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|