AnomalyLab 0.3.3__tar.gz → 0.3.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {anomalylab-0.3.3 → anomalylab-0.3.4/AnomalyLab.egg-info}/PKG-INFO +1 -1
- {anomalylab-0.3.3 → anomalylab-0.3.4}/AnomalyLab.egg-info/SOURCES.txt +0 -1
- {anomalylab-0.3.3/AnomalyLab.egg-info → anomalylab-0.3.4}/PKG-INFO +1 -1
- {anomalylab-0.3.3 → anomalylab-0.3.4}/anomalylab/core/core.py +2 -0
- {anomalylab-0.3.3 → anomalylab-0.3.4}/anomalylab/empirical/correlation.py +3 -1
- {anomalylab-0.3.3 → anomalylab-0.3.4}/anomalylab/empirical/fm_regression.py +36 -7
- {anomalylab-0.3.3 → anomalylab-0.3.4}/anomalylab/empirical/persistence.py +3 -2
- {anomalylab-0.3.3 → anomalylab-0.3.4}/anomalylab/empirical/portfolio.py +3 -1
- {anomalylab-0.3.3 → anomalylab-0.3.4}/anomalylab/empirical/summary.py +3 -1
- {anomalylab-0.3.3 → anomalylab-0.3.4}/anomalylab/preprocess/normalize.py +3 -1
- {anomalylab-0.3.3 → anomalylab-0.3.4}/anomalylab/preprocess/outliers.py +3 -2
- {anomalylab-0.3.3 → anomalylab-0.3.4}/anomalylab/preprocess/shift.py +3 -1
- {anomalylab-0.3.3 → anomalylab-0.3.4}/setup.py +1 -1
- anomalylab-0.3.3/anomalylab/empirical/factor_return.py +0 -43
- {anomalylab-0.3.3 → anomalylab-0.3.4}/.gitattributes +0 -0
- {anomalylab-0.3.3 → anomalylab-0.3.4}/.github/workflows/python-publish.yml +0 -0
- {anomalylab-0.3.3 → anomalylab-0.3.4}/.gitignore +0 -0
- {anomalylab-0.3.3 → anomalylab-0.3.4}/AnomalyLab.egg-info/dependency_links.txt +0 -0
- {anomalylab-0.3.3 → anomalylab-0.3.4}/AnomalyLab.egg-info/requires.txt +0 -0
- {anomalylab-0.3.3 → anomalylab-0.3.4}/AnomalyLab.egg-info/top_level.txt +0 -0
- {anomalylab-0.3.3 → anomalylab-0.3.4}/LICENSE +0 -0
- {anomalylab-0.3.3 → anomalylab-0.3.4}/MANIFEST.in +0 -0
- {anomalylab-0.3.3 → anomalylab-0.3.4}/README.md +0 -0
- {anomalylab-0.3.3 → anomalylab-0.3.4}/anomalylab/__init__.py +0 -0
- {anomalylab-0.3.3 → anomalylab-0.3.4}/anomalylab/config.py +0 -0
- {anomalylab-0.3.3 → anomalylab-0.3.4}/anomalylab/core/__init__.py +0 -0
- {anomalylab-0.3.3 → anomalylab-0.3.4}/anomalylab/datasets/__init__.py +0 -0
- {anomalylab-0.3.3 → anomalylab-0.3.4}/anomalylab/datasets/dataset.py +0 -0
- {anomalylab-0.3.3 → anomalylab-0.3.4}/anomalylab/datasets/panel_data.csv +0 -0
- {anomalylab-0.3.3 → anomalylab-0.3.4}/anomalylab/datasets/time_series_data.csv +0 -0
- {anomalylab-0.3.3 → anomalylab-0.3.4}/anomalylab/datasets/transition_matrix.png +0 -0
- {anomalylab-0.3.3 → anomalylab-0.3.4}/anomalylab/empirical/__init__.py +0 -0
- {anomalylab-0.3.3 → anomalylab-0.3.4}/anomalylab/empirical/empirical.py +0 -0
- {anomalylab-0.3.3 → anomalylab-0.3.4}/anomalylab/preprocess/__init__.py +0 -0
- {anomalylab-0.3.3 → anomalylab-0.3.4}/anomalylab/preprocess/fillna.py +0 -0
- {anomalylab-0.3.3 → anomalylab-0.3.4}/anomalylab/preprocess/preprocessor.py +0 -0
- {anomalylab-0.3.3 → anomalylab-0.3.4}/anomalylab/preprocess/truncate.py +0 -0
- {anomalylab-0.3.3 → anomalylab-0.3.4}/anomalylab/structure/__init__.py +0 -0
- {anomalylab-0.3.3 → anomalylab-0.3.4}/anomalylab/structure/data.py +0 -0
- {anomalylab-0.3.3 → anomalylab-0.3.4}/anomalylab/structure/panel_data.py +0 -0
- {anomalylab-0.3.3 → anomalylab-0.3.4}/anomalylab/structure/time_series.py +0 -0
- {anomalylab-0.3.3 → anomalylab-0.3.4}/anomalylab/utils/__init__.py +0 -0
- {anomalylab-0.3.3 → anomalylab-0.3.4}/anomalylab/utils/imports.py +0 -0
- {anomalylab-0.3.3 → anomalylab-0.3.4}/anomalylab/utils/utils.py +0 -0
- {anomalylab-0.3.3 → anomalylab-0.3.4}/anomalylab/visualization/__init__.py +0 -0
- {anomalylab-0.3.3 → anomalylab-0.3.4}/anomalylab/visualization/format.py +0 -0
- {anomalylab-0.3.3 → anomalylab-0.3.4}/requirements.txt +0 -0
- {anomalylab-0.3.3 → anomalylab-0.3.4}/setup.cfg +0 -0
- {anomalylab-0.3.3 → anomalylab-0.3.4}/tests/__init__.py +0 -0
|
@@ -23,7 +23,6 @@ anomalylab/datasets/transition_matrix.png
|
|
|
23
23
|
anomalylab/empirical/__init__.py
|
|
24
24
|
anomalylab/empirical/correlation.py
|
|
25
25
|
anomalylab/empirical/empirical.py
|
|
26
|
-
anomalylab/empirical/factor_return.py
|
|
27
26
|
anomalylab/empirical/fm_regression.py
|
|
28
27
|
anomalylab/empirical/persistence.py
|
|
29
28
|
anomalylab/empirical/portfolio.py
|
|
@@ -335,6 +335,7 @@ class Panel:
|
|
|
335
335
|
industry_weighed_method: Literal["value", "equal"] = "value",
|
|
336
336
|
is_winsorize: bool = False,
|
|
337
337
|
is_normalize: bool = False,
|
|
338
|
+
dummy_no_norm: Optional[list[str] | str] = None,
|
|
338
339
|
decimal: Optional[int] = None,
|
|
339
340
|
return_intermediate: bool = False,
|
|
340
341
|
) -> DataFrame:
|
|
@@ -349,6 +350,7 @@ class Panel:
|
|
|
349
350
|
industry_weighed_method=industry_weighed_method,
|
|
350
351
|
is_winsorize=is_winsorize,
|
|
351
352
|
is_normalize=is_normalize,
|
|
353
|
+
dummy_no_norm=dummy_no_norm,
|
|
352
354
|
decimal=decimal,
|
|
353
355
|
return_intermediate=return_intermediate,
|
|
354
356
|
)
|
|
@@ -85,6 +85,8 @@ if __name__ == "__main__":
|
|
|
85
85
|
|
|
86
86
|
df: DataFrame = DataSet.get_panel_data()
|
|
87
87
|
|
|
88
|
-
panel: PanelData = PanelData(
|
|
88
|
+
panel: PanelData = PanelData(
|
|
89
|
+
df=df, name="Stocks", ret="return", classifications="industry"
|
|
90
|
+
)
|
|
89
91
|
correlation: Correlation = Correlation(panel_data=panel)
|
|
90
92
|
pp(correlation.average_correlation())
|
|
@@ -92,6 +92,7 @@ class FamaMacBethRegression(Empirical):
|
|
|
92
92
|
df: DataFrame,
|
|
93
93
|
reg: RegModel,
|
|
94
94
|
is_normalize: bool,
|
|
95
|
+
dummy_no_norm: list[str] = [],
|
|
95
96
|
return_intermediate: bool = False,
|
|
96
97
|
) -> RegResult:
|
|
97
98
|
"""Performs Fama-MacBeth regression on the provided DataFrame.
|
|
@@ -104,6 +105,11 @@ class FamaMacBethRegression(Empirical):
|
|
|
104
105
|
df (DataFrame): DataFrame containing the data for regression.
|
|
105
106
|
reg (RegModel): Model specification containing endogenous and exogenous variables.
|
|
106
107
|
is_normalize (bool): Indicates whether to normalize the exogenous variables.
|
|
108
|
+
dummy_no_norm (list[str]): List of variable names that should be excluded from normalization.
|
|
109
|
+
Typically, these are dummy variables that take values like 0 or 1,
|
|
110
|
+
where normalization may not make sense.
|
|
111
|
+
return_intermediate (bool): If True, returns intermediate regression results
|
|
112
|
+
(e.g., coefficients, t-values, and R²) for each time period.
|
|
107
113
|
|
|
108
114
|
Returns:
|
|
109
115
|
RegResult: Results of the regression including parameters, t-values, p-values, and adjusted R².
|
|
@@ -115,9 +121,27 @@ class FamaMacBethRegression(Empirical):
|
|
|
115
121
|
df = df.groupby(self.time).filter(lambda x: len(x) > 1)
|
|
116
122
|
lag: int = math.ceil(4 * (df[self.time].nunique() / 100) ** (4 / 25))
|
|
117
123
|
if is_normalize:
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
124
|
+
dummy_no_norm = [col for col in dummy_no_norm if col in exogenous]
|
|
125
|
+
|
|
126
|
+
variables_to_normalize = [
|
|
127
|
+
col for col in exogenous if col not in dummy_no_norm
|
|
128
|
+
]
|
|
129
|
+
|
|
130
|
+
for col in variables_to_normalize:
|
|
131
|
+
grouped_std = df.groupby(self.time)[col].std()
|
|
132
|
+
|
|
133
|
+
if (grouped_std == 0).any():
|
|
134
|
+
problematic_group = grouped_std[grouped_std == 0].index.tolist()
|
|
135
|
+
raise ValueError(
|
|
136
|
+
f"Standard deviation is 0 for variable '{col}' in group(s) {problematic_group}. "
|
|
137
|
+
f"Cannot normalize this variable. If '{col}' is a dummy variable, please add it to `dummy_no_norm`."
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
if variables_to_normalize:
|
|
141
|
+
df[variables_to_normalize] = df.groupby(self.time)[
|
|
142
|
+
variables_to_normalize
|
|
143
|
+
].transform(func=lambda x: (x - x.mean()) / x.std())
|
|
144
|
+
|
|
121
145
|
df[self.time] = df[self.time].dt.to_timestamp()
|
|
122
146
|
df = df.set_index([self.id, self.time])
|
|
123
147
|
|
|
@@ -277,6 +301,7 @@ class FamaMacBethRegression(Empirical):
|
|
|
277
301
|
industry_weighed_method: Literal["value", "equal"] = "value",
|
|
278
302
|
is_winsorize: bool = False,
|
|
279
303
|
is_normalize: bool = False,
|
|
304
|
+
dummy_no_norm: Optional[list[str] | str] = None,
|
|
280
305
|
decimal: Optional[int] = None,
|
|
281
306
|
return_intermediate: bool = False, # New parameter to control whether intermediate results are returned
|
|
282
307
|
) -> DataFrame:
|
|
@@ -297,6 +322,8 @@ class FamaMacBethRegression(Empirical):
|
|
|
297
322
|
industry_weighed_method (Literal["value", "equal"]): Method for weighting industries.
|
|
298
323
|
is_winsorize (bool): Indicates whether to apply winsorization.
|
|
299
324
|
is_normalize (bool): Indicates whether to normalize exogenous variables.
|
|
325
|
+
dummy_no_norm (Optional[list[str] | str]): Name(s) of dummy variables (e.g., 0 or 1)
|
|
326
|
+
that should be excluded from normalization.
|
|
300
327
|
decimal (Optional[int]): Number of decimal places for rounding in output.
|
|
301
328
|
return_intermediate (bool): If True, returns the intermediate results (e.g., coefficients for each time period).
|
|
302
329
|
|
|
@@ -307,6 +334,7 @@ class FamaMacBethRegression(Empirical):
|
|
|
307
334
|
reg_models: RegModels = self._model_parse(
|
|
308
335
|
regs=regs, endog=endog, exog=columns_to_list(exog)
|
|
309
336
|
)
|
|
337
|
+
dummy_no_norm = columns_to_list(dummy_no_norm)
|
|
310
338
|
self._winsorize(is_winsorize=is_winsorize, exog=reg_models.exogenous)
|
|
311
339
|
self._industry_weighted(
|
|
312
340
|
endog=reg_models.dependent,
|
|
@@ -361,8 +389,9 @@ if __name__ == "__main__":
|
|
|
361
389
|
|
|
362
390
|
df: DataFrame = DataSet.get_panel_data()
|
|
363
391
|
|
|
364
|
-
panel: PanelData = PanelData(
|
|
365
|
-
|
|
392
|
+
panel: PanelData = PanelData(
|
|
393
|
+
df=df, name="Stocks", ret="return", classifications="industry"
|
|
394
|
+
)
|
|
366
395
|
fm = FamaMacBethRegression(panel_data=panel)
|
|
367
396
|
result = fm.fit(
|
|
368
397
|
# endog="return",
|
|
@@ -386,8 +415,8 @@ if __name__ == "__main__":
|
|
|
386
415
|
# weight="MktCap",
|
|
387
416
|
is_winsorize=True,
|
|
388
417
|
is_normalize=True,
|
|
389
|
-
return_intermediate=
|
|
418
|
+
return_intermediate=False,
|
|
390
419
|
# decimal=2,
|
|
391
420
|
)
|
|
392
421
|
pp(result)
|
|
393
|
-
pp(result[0])
|
|
422
|
+
# pp(result[0])
|
|
@@ -204,8 +204,9 @@ if __name__ == "__main__":
|
|
|
204
204
|
|
|
205
205
|
df: DataFrame = DataSet.get_panel_data()
|
|
206
206
|
|
|
207
|
-
panel: PanelData = PanelData(
|
|
208
|
-
|
|
207
|
+
panel: PanelData = PanelData(
|
|
208
|
+
df=df, name="Stocks", ret="return", classifications="industry"
|
|
209
|
+
)
|
|
209
210
|
persistence = Persistence(panel)
|
|
210
211
|
pp(persistence.average_persistence(periods=[1, 3, 6, 12, 36, 60]))
|
|
211
212
|
pp(
|
|
@@ -680,7 +680,9 @@ if __name__ == "__main__":
|
|
|
680
680
|
"FF5": ["MKT(5F)", "SMB(5F)", "HML(5F)", "RMW(5F)", "CMA(5F)"],
|
|
681
681
|
}
|
|
682
682
|
|
|
683
|
-
panel: PanelData = PanelData(
|
|
683
|
+
panel: PanelData = PanelData(
|
|
684
|
+
df=df, name="Stocks", ret="return", classifications="industry"
|
|
685
|
+
)
|
|
684
686
|
time_series: TimeSeries = TimeSeries(df=ts, name="Factor Series")
|
|
685
687
|
|
|
686
688
|
portfolio = PortfolioAnalysis(
|
|
@@ -125,7 +125,9 @@ if __name__ == "__main__":
|
|
|
125
125
|
|
|
126
126
|
df: DataFrame = DataSet.get_panel_data()
|
|
127
127
|
|
|
128
|
-
panel: PanelData = PanelData(
|
|
128
|
+
panel: PanelData = PanelData(
|
|
129
|
+
df=df, name="Stocks", ret="return", classifications="industry"
|
|
130
|
+
)
|
|
129
131
|
summary = Summary(panel_data=panel)
|
|
130
132
|
pp(
|
|
131
133
|
summary.average_statistics(
|
|
@@ -157,7 +157,9 @@ if __name__ == "__main__":
|
|
|
157
157
|
|
|
158
158
|
df: DataFrame = DataSet.get_panel_data()
|
|
159
159
|
|
|
160
|
-
panel: PanelData = PanelData(
|
|
160
|
+
panel: PanelData = PanelData(
|
|
161
|
+
df=df, name="Stocks", ret="return", classifications="industry"
|
|
162
|
+
)
|
|
161
163
|
norm: Normalize = Normalize(panel_data=panel)
|
|
162
164
|
norm.normalize(
|
|
163
165
|
# columns="MktCap",
|
|
@@ -193,8 +193,9 @@ if __name__ == "__main__":
|
|
|
193
193
|
|
|
194
194
|
df: DataFrame = DataSet.get_panel_data()
|
|
195
195
|
|
|
196
|
-
panel: PanelData = PanelData(
|
|
197
|
-
|
|
196
|
+
panel: PanelData = PanelData(
|
|
197
|
+
df=df, name="Stocks", ret="return", classifications="industry"
|
|
198
|
+
)
|
|
198
199
|
winsorize = OutlierHandler(panel_data=panel)
|
|
199
200
|
winsorize.winsorize(
|
|
200
201
|
# columns="MktCap",
|
|
@@ -110,7 +110,9 @@ if __name__ == "__main__":
|
|
|
110
110
|
|
|
111
111
|
df: DataFrame = DataSet.get_panel_data()
|
|
112
112
|
|
|
113
|
-
panel: PanelData = PanelData(
|
|
113
|
+
panel: PanelData = PanelData(
|
|
114
|
+
df=df, name="Stocks", ret="return", classifications="industry"
|
|
115
|
+
)
|
|
114
116
|
shift = Shift(panel_data=panel)
|
|
115
117
|
shift.shift(
|
|
116
118
|
# columns="MktCap",
|
|
@@ -1,43 +0,0 @@
|
|
|
1
|
-
from pandas.core.frame import DataFrame
|
|
2
|
-
|
|
3
|
-
from anomalylab.config import *
|
|
4
|
-
from anomalylab.empirical.empirical import Empirical
|
|
5
|
-
from anomalylab.empirical.portfolio import PortfolioAnalysis
|
|
6
|
-
from anomalylab.structure import PanelData, TimeSeries
|
|
7
|
-
from anomalylab.utils.imports import *
|
|
8
|
-
from anomalylab.utils.utils import *
|
|
9
|
-
|
|
10
|
-
if __name__ == "__main__":
|
|
11
|
-
from anomalylab.datasets import DataSet
|
|
12
|
-
|
|
13
|
-
df: DataFrame = DataSet.get_panel_data()
|
|
14
|
-
ts: DataFrame = DataSet.get_time_series_data()
|
|
15
|
-
Models: dict[str, list[str]] = {
|
|
16
|
-
"CAPM": ["MKT(3F)"],
|
|
17
|
-
"FF3": ["MKT(3F)", "SMB(3F)", "HML(3F)"],
|
|
18
|
-
"FF5": ["MKT(5F)", "SMB(5F)", "HML(5F)", "RMW(5F)", "CMA(5F)"],
|
|
19
|
-
}
|
|
20
|
-
|
|
21
|
-
panel: PanelData = PanelData(df=df, name="Stocks", classifications="industry")
|
|
22
|
-
time_series: TimeSeries = TimeSeries(df=ts, name="Factor Series")
|
|
23
|
-
|
|
24
|
-
portfolio = PortfolioAnalysis(
|
|
25
|
-
panel,
|
|
26
|
-
endog="return",
|
|
27
|
-
weight="MktCap",
|
|
28
|
-
# models=Models,
|
|
29
|
-
# factors_series=time_series,
|
|
30
|
-
)
|
|
31
|
-
|
|
32
|
-
group = portfolio.GroupN(["MktCap", "Illiq", "IdioVol"], [3, 3, 3])
|
|
33
|
-
pp(group)
|
|
34
|
-
|
|
35
|
-
# uni_ew, uni_vw = portfolio.univariate_analysis("Illiq", 10)
|
|
36
|
-
# pp(uni_ew)
|
|
37
|
-
# pp(uni_vw)
|
|
38
|
-
|
|
39
|
-
# bi_ew, bi_vw = portfolio.bivariate_analysis(
|
|
40
|
-
# "Illiq", "IdioVol", 10, 10, True, False, "dependent"
|
|
41
|
-
# )
|
|
42
|
-
# pp(bi_ew)
|
|
43
|
-
# pp(bi_vw)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|