AnomalyLab 0.4.0__tar.gz → 0.4.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {anomalylab-0.4.0 → anomalylab-0.4.2}/AnomalyLab.egg-info/PKG-INFO +1 -1
- {anomalylab-0.4.0 → anomalylab-0.4.2}/AnomalyLab.egg-info/SOURCES.txt +0 -1
- {anomalylab-0.4.0 → anomalylab-0.4.2}/PKG-INFO +1 -1
- {anomalylab-0.4.0 → anomalylab-0.4.2}/anomalylab/core/core.py +42 -31
- {anomalylab-0.4.0 → anomalylab-0.4.2}/anomalylab/empirical/fm_regression.py +4 -31
- {anomalylab-0.4.0 → anomalylab-0.4.2}/anomalylab/structure/panel_data.py +8 -0
- {anomalylab-0.4.0 → anomalylab-0.4.2}/anomalylab/structure/time_series.py +8 -0
- {anomalylab-0.4.0 → anomalylab-0.4.2}/setup.py +1 -1
- anomalylab-0.4.0/.png +0 -0
- {anomalylab-0.4.0 → anomalylab-0.4.2}/.gitattributes +0 -0
- {anomalylab-0.4.0 → anomalylab-0.4.2}/.github/workflows/python-publish.yml +0 -0
- {anomalylab-0.4.0 → anomalylab-0.4.2}/.gitignore +0 -0
- {anomalylab-0.4.0 → anomalylab-0.4.2}/AnomalyLab.egg-info/dependency_links.txt +0 -0
- {anomalylab-0.4.0 → anomalylab-0.4.2}/AnomalyLab.egg-info/requires.txt +0 -0
- {anomalylab-0.4.0 → anomalylab-0.4.2}/AnomalyLab.egg-info/top_level.txt +0 -0
- {anomalylab-0.4.0 → anomalylab-0.4.2}/LICENSE +0 -0
- {anomalylab-0.4.0 → anomalylab-0.4.2}/MANIFEST.in +0 -0
- {anomalylab-0.4.0 → anomalylab-0.4.2}/README.md +0 -0
- {anomalylab-0.4.0 → anomalylab-0.4.2}/anomalylab/__init__.py +0 -0
- {anomalylab-0.4.0 → anomalylab-0.4.2}/anomalylab/config.py +0 -0
- {anomalylab-0.4.0 → anomalylab-0.4.2}/anomalylab/core/__init__.py +0 -0
- {anomalylab-0.4.0 → anomalylab-0.4.2}/anomalylab/datasets/__init__.py +0 -0
- {anomalylab-0.4.0 → anomalylab-0.4.2}/anomalylab/datasets/dataset.py +0 -0
- {anomalylab-0.4.0 → anomalylab-0.4.2}/anomalylab/datasets/panel_data.csv +0 -0
- {anomalylab-0.4.0 → anomalylab-0.4.2}/anomalylab/datasets/time_series_data.csv +0 -0
- {anomalylab-0.4.0 → anomalylab-0.4.2}/anomalylab/empirical/__init__.py +0 -0
- {anomalylab-0.4.0 → anomalylab-0.4.2}/anomalylab/empirical/correlation.py +0 -0
- {anomalylab-0.4.0 → anomalylab-0.4.2}/anomalylab/empirical/empirical.py +0 -0
- {anomalylab-0.4.0 → anomalylab-0.4.2}/anomalylab/empirical/persistence.py +0 -0
- {anomalylab-0.4.0 → anomalylab-0.4.2}/anomalylab/empirical/portfolio.py +0 -0
- {anomalylab-0.4.0 → anomalylab-0.4.2}/anomalylab/empirical/summary.py +0 -0
- {anomalylab-0.4.0 → anomalylab-0.4.2}/anomalylab/preprocess/__init__.py +0 -0
- {anomalylab-0.4.0 → anomalylab-0.4.2}/anomalylab/preprocess/fillna.py +0 -0
- {anomalylab-0.4.0 → anomalylab-0.4.2}/anomalylab/preprocess/normalize.py +0 -0
- {anomalylab-0.4.0 → anomalylab-0.4.2}/anomalylab/preprocess/outliers.py +0 -0
- {anomalylab-0.4.0 → anomalylab-0.4.2}/anomalylab/preprocess/preprocessor.py +0 -0
- {anomalylab-0.4.0 → anomalylab-0.4.2}/anomalylab/preprocess/shift.py +0 -0
- {anomalylab-0.4.0 → anomalylab-0.4.2}/anomalylab/preprocess/truncate.py +0 -0
- {anomalylab-0.4.0 → anomalylab-0.4.2}/anomalylab/structure/__init__.py +0 -0
- {anomalylab-0.4.0 → anomalylab-0.4.2}/anomalylab/structure/data.py +0 -0
- {anomalylab-0.4.0 → anomalylab-0.4.2}/anomalylab/utils/__init__.py +0 -0
- {anomalylab-0.4.0 → anomalylab-0.4.2}/anomalylab/utils/imports.py +0 -0
- {anomalylab-0.4.0 → anomalylab-0.4.2}/anomalylab/utils/utils.py +0 -0
- {anomalylab-0.4.0 → anomalylab-0.4.2}/anomalylab/visualization/__init__.py +0 -0
- {anomalylab-0.4.0 → anomalylab-0.4.2}/anomalylab/visualization/format.py +0 -0
- {anomalylab-0.4.0 → anomalylab-0.4.2}/requirements.txt +0 -0
- {anomalylab-0.4.0 → anomalylab-0.4.2}/setup.cfg +0 -0
- {anomalylab-0.4.0 → anomalylab-0.4.2}/tests/__init__.py +0 -0
|
@@ -499,12 +499,20 @@ class Panel:
|
|
|
499
499
|
DataFrame: If inplace=False (default), returns a new DataFrame with grouped variables.
|
|
500
500
|
None: If inplace=True, modifies the original dataset and returns None.
|
|
501
501
|
"""
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
502
|
+
if inplace:
|
|
503
|
+
self.panel_data.df = self.portfolio_analysis_processor(
|
|
504
|
+
endog=endog, weight=weight
|
|
505
|
+
).GroupN(
|
|
506
|
+
vars=vars,
|
|
507
|
+
groups=groups,
|
|
508
|
+
sort_type=sort_type,
|
|
509
|
+
)
|
|
510
|
+
else:
|
|
511
|
+
return self.portfolio_analysis_processor(endog=endog, weight=weight).GroupN(
|
|
512
|
+
vars=vars,
|
|
513
|
+
groups=groups,
|
|
514
|
+
sort_type=sort_type,
|
|
515
|
+
)
|
|
508
516
|
|
|
509
517
|
def univariate_analysis(
|
|
510
518
|
self,
|
|
@@ -618,7 +626,6 @@ class Panel:
|
|
|
618
626
|
industry_weighed_method: Literal["value", "equal"] = "value",
|
|
619
627
|
is_winsorize: bool = False,
|
|
620
628
|
is_normalize: bool = False,
|
|
621
|
-
dummy_no_norm: Optional[list[str] | str] = None,
|
|
622
629
|
decimal: Optional[int] = None,
|
|
623
630
|
return_intermediate: bool = False,
|
|
624
631
|
) -> DataFrame:
|
|
@@ -639,8 +646,6 @@ class Panel:
|
|
|
639
646
|
industry_weighed_method (Literal["value", "equal"]): Method for weighting industries.
|
|
640
647
|
is_winsorize (bool): Indicates whether to apply winsorization.
|
|
641
648
|
is_normalize (bool): Indicates whether to normalize exogenous variables.
|
|
642
|
-
dummy_no_norm (Optional[list[str] | str]): Name(s) of dummy variables (e.g., 0 or 1)
|
|
643
|
-
that should be excluded from normalization.
|
|
644
649
|
decimal (Optional[int]): Number of decimal places for rounding in output.
|
|
645
650
|
return_intermediate (bool): If True, returns the intermediate results (e.g., coefficients for each time period).
|
|
646
651
|
|
|
@@ -658,7 +663,6 @@ class Panel:
|
|
|
658
663
|
industry_weighed_method=industry_weighed_method,
|
|
659
664
|
is_winsorize=is_winsorize,
|
|
660
665
|
is_normalize=is_normalize,
|
|
661
|
-
dummy_no_norm=dummy_no_norm,
|
|
662
666
|
decimal=decimal,
|
|
663
667
|
return_intermediate=return_intermediate,
|
|
664
668
|
)
|
|
@@ -739,30 +743,37 @@ if __name__ == "__main__":
|
|
|
739
743
|
panel.winsorize(method="winsorize", group_columns="date")
|
|
740
744
|
pp(panel)
|
|
741
745
|
|
|
742
|
-
summary = panel.summary()
|
|
743
|
-
pp(summary)
|
|
744
|
-
|
|
745
|
-
correlation = panel.correlation()
|
|
746
|
-
pp(correlation)
|
|
747
|
-
|
|
748
|
-
persistence = panel.persistence(periods=[1, 3, 6, 12, 36, 60])
|
|
749
|
-
pp(persistence)
|
|
750
|
-
pp(
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
)
|
|
746
|
+
# summary = panel.summary()
|
|
747
|
+
# pp(summary)
|
|
748
|
+
|
|
749
|
+
# correlation = panel.correlation()
|
|
750
|
+
# pp(correlation)
|
|
751
|
+
|
|
752
|
+
# persistence = panel.persistence(periods=[1, 3, 6, 12, 36, 60])
|
|
753
|
+
# pp(persistence)
|
|
754
|
+
# pp(
|
|
755
|
+
# panel.transition_matrix(
|
|
756
|
+
# var="MktCap",
|
|
757
|
+
# group=10,
|
|
758
|
+
# lag=12,
|
|
759
|
+
# draw=False,
|
|
760
|
+
# # path=str(resources.files("anomalylab.datasets")) + "/transition_matrix.png",
|
|
761
|
+
# path="...",
|
|
762
|
+
# decimal=2,
|
|
763
|
+
# )
|
|
764
|
+
# )
|
|
761
765
|
|
|
762
|
-
|
|
766
|
+
panel.group("return", "MktCap", "Illiq", 10, inplace=True)
|
|
763
767
|
|
|
764
768
|
uni_ew, uni_vw = panel.univariate_analysis(
|
|
765
|
-
"return",
|
|
769
|
+
"return",
|
|
770
|
+
"MktCap",
|
|
771
|
+
"Illiq",
|
|
772
|
+
10,
|
|
773
|
+
Models,
|
|
774
|
+
time_series,
|
|
775
|
+
factor_return=False,
|
|
776
|
+
already_grouped=True,
|
|
766
777
|
)
|
|
767
778
|
pp(uni_ew)
|
|
768
779
|
pp(uni_vw)
|
|
@@ -92,7 +92,6 @@ class FamaMacBethRegression(Empirical):
|
|
|
92
92
|
df: DataFrame,
|
|
93
93
|
reg: RegModel,
|
|
94
94
|
is_normalize: bool,
|
|
95
|
-
dummy_no_norm: list[str] = [],
|
|
96
95
|
return_intermediate: bool = False,
|
|
97
96
|
) -> RegResult:
|
|
98
97
|
"""Performs Fama-MacBeth regression on the provided DataFrame.
|
|
@@ -105,9 +104,6 @@ class FamaMacBethRegression(Empirical):
|
|
|
105
104
|
df (DataFrame): DataFrame containing the data for regression.
|
|
106
105
|
reg (RegModel): Model specification containing endogenous and exogenous variables.
|
|
107
106
|
is_normalize (bool): Indicates whether to normalize the exogenous variables.
|
|
108
|
-
dummy_no_norm (list[str]): List of variable names that should be excluded from normalization.
|
|
109
|
-
Typically, these are dummy variables that take values like 0 or 1,
|
|
110
|
-
where normalization may not make sense.
|
|
111
107
|
return_intermediate (bool): If True, returns intermediate regression results
|
|
112
108
|
(e.g., coefficients, t-values, and R²) for each time period.
|
|
113
109
|
|
|
@@ -121,26 +117,9 @@ class FamaMacBethRegression(Empirical):
|
|
|
121
117
|
df = df.groupby(self.time).filter(lambda x: len(x) > 1)
|
|
122
118
|
lag: int = math.ceil(4 * (df[self.time].nunique() / 100) ** (4 / 25))
|
|
123
119
|
if is_normalize:
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
col for col in exogenous if col not in dummy_no_norm
|
|
128
|
-
]
|
|
129
|
-
|
|
130
|
-
for col in variables_to_normalize:
|
|
131
|
-
grouped_std = df.groupby(self.time)[col].std()
|
|
132
|
-
|
|
133
|
-
if (grouped_std == 0).any():
|
|
134
|
-
problematic_group = grouped_std[grouped_std == 0].index.tolist()
|
|
135
|
-
raise ValueError(
|
|
136
|
-
f"Standard deviation is 0 for variable '{col}' in group(s) {problematic_group}. "
|
|
137
|
-
f"Cannot normalize this variable. If '{col}' is a dummy variable, please add it to `dummy_no_norm`."
|
|
138
|
-
)
|
|
139
|
-
|
|
140
|
-
if variables_to_normalize:
|
|
141
|
-
df[variables_to_normalize] = df.groupby(self.time)[
|
|
142
|
-
variables_to_normalize
|
|
143
|
-
].transform(func=lambda x: (x - x.mean()) / x.std())
|
|
120
|
+
df[exogenous] = df.groupby(self.time)[exogenous].transform(
|
|
121
|
+
func=lambda x: (x - x.mean()) / x.std()
|
|
122
|
+
)
|
|
144
123
|
|
|
145
124
|
df[self.time] = df[self.time].dt.to_timestamp()
|
|
146
125
|
df = df.set_index([self.id, self.time])
|
|
@@ -172,7 +151,7 @@ class FamaMacBethRegression(Empirical):
|
|
|
172
151
|
params=fmb.params,
|
|
173
152
|
tvalues=fmb.tstats,
|
|
174
153
|
pvalues=fmb.pvalues,
|
|
175
|
-
mean_obs=str(
|
|
154
|
+
mean_obs=str(round(fmb.time_info["mean"])),
|
|
176
155
|
rsquared=(
|
|
177
156
|
df.reset_index(level=df.index.names[0], drop=True)
|
|
178
157
|
.groupby(self.time)
|
|
@@ -301,7 +280,6 @@ class FamaMacBethRegression(Empirical):
|
|
|
301
280
|
industry_weighed_method: Literal["value", "equal"] = "value",
|
|
302
281
|
is_winsorize: bool = False,
|
|
303
282
|
is_normalize: bool = False,
|
|
304
|
-
dummy_no_norm: Optional[list[str] | str] = None,
|
|
305
283
|
decimal: Optional[int] = None,
|
|
306
284
|
return_intermediate: bool = False, # New parameter to control whether intermediate results are returned
|
|
307
285
|
) -> DataFrame:
|
|
@@ -322,8 +300,6 @@ class FamaMacBethRegression(Empirical):
|
|
|
322
300
|
industry_weighed_method (Literal["value", "equal"]): Method for weighting industries.
|
|
323
301
|
is_winsorize (bool): Indicates whether to apply winsorization.
|
|
324
302
|
is_normalize (bool): Indicates whether to normalize exogenous variables.
|
|
325
|
-
dummy_no_norm (Optional[list[str] | str]): Name(s) of dummy variables (e.g., 0 or 1)
|
|
326
|
-
that should be excluded from normalization.
|
|
327
303
|
decimal (Optional[int]): Number of decimal places for rounding in output.
|
|
328
304
|
return_intermediate (bool): If True, returns the intermediate results (e.g., coefficients for each time period).
|
|
329
305
|
|
|
@@ -334,7 +310,6 @@ class FamaMacBethRegression(Empirical):
|
|
|
334
310
|
reg_models: RegModels = self._model_parse(
|
|
335
311
|
regs=regs, endog=endog, exog=columns_to_list(exog)
|
|
336
312
|
)
|
|
337
|
-
dummy_no_norm = columns_to_list(dummy_no_norm)
|
|
338
313
|
self._winsorize(is_winsorize=is_winsorize, exog=reg_models.exogenous)
|
|
339
314
|
self._industry_weighted(
|
|
340
315
|
endog=reg_models.dependent,
|
|
@@ -350,7 +325,6 @@ class FamaMacBethRegression(Empirical):
|
|
|
350
325
|
df=self.panel_data.df,
|
|
351
326
|
reg=model,
|
|
352
327
|
is_normalize=is_normalize,
|
|
353
|
-
dummy_no_norm=dummy_no_norm,
|
|
354
328
|
return_intermediate=True,
|
|
355
329
|
)
|
|
356
330
|
for model in reg_models.models
|
|
@@ -366,7 +340,6 @@ class FamaMacBethRegression(Empirical):
|
|
|
366
340
|
df=self.panel_data.df,
|
|
367
341
|
reg=model,
|
|
368
342
|
is_normalize=is_normalize,
|
|
369
|
-
dummy_no_norm=dummy_no_norm,
|
|
370
343
|
return_intermediate=False,
|
|
371
344
|
),
|
|
372
345
|
decimal=decimal or self.decimal,
|
|
@@ -112,9 +112,17 @@ class PanelData(Data):
|
|
|
112
112
|
"""Check if the required columns are present in the DataFrame.
|
|
113
113
|
|
|
114
114
|
Raises:
|
|
115
|
+
ValueError: If any duplicate column names are found in the DataFrame.
|
|
115
116
|
ValueError: If any required columns are missing from the DataFrame.
|
|
116
117
|
ValueError: If there are no firm characteristics remaining after checking.
|
|
117
118
|
"""
|
|
119
|
+
# Check for duplicate column names
|
|
120
|
+
duplicated_columns = self.df.columns[self.df.columns.duplicated()].tolist()
|
|
121
|
+
if duplicated_columns:
|
|
122
|
+
raise ValueError(
|
|
123
|
+
f"Duplicate column names found in the DataFrame: {duplicated_columns}"
|
|
124
|
+
)
|
|
125
|
+
|
|
118
126
|
if isinstance(self.classifications, str):
|
|
119
127
|
self.classifications = [self.classifications]
|
|
120
128
|
# Check if the required columns are present in the DataFrame
|
|
@@ -49,9 +49,17 @@ class TimeSeries(Data):
|
|
|
49
49
|
Check if the required column is present in the DataFrame and ensure there are additional columns.
|
|
50
50
|
|
|
51
51
|
Raises:
|
|
52
|
+
ValueError: If duplicate column names are found in the DataFrame.
|
|
52
53
|
ValueError: If the time column is missing from the DataFrame.
|
|
53
54
|
ValueError: If there are no additional columns for factor returns.
|
|
54
55
|
"""
|
|
56
|
+
# Check for duplicate column names
|
|
57
|
+
duplicated_columns = self.df.columns[self.df.columns.duplicated()].tolist()
|
|
58
|
+
if duplicated_columns:
|
|
59
|
+
raise ValueError(
|
|
60
|
+
f"Duplicate column names found in the DataFrame: {duplicated_columns}"
|
|
61
|
+
)
|
|
62
|
+
|
|
55
63
|
if self.time not in self.df.columns:
|
|
56
64
|
raise ValueError(f"Missing column in the DataFrame: {self.time}")
|
|
57
65
|
|
anomalylab-0.4.0/.png
DELETED
|
Binary file
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|