AnomalyLab 0.4.2__tar.gz → 0.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {anomalylab-0.4.2 → anomalylab-0.5.0/AnomalyLab.egg-info}/PKG-INFO +1 -1
- {anomalylab-0.4.2 → anomalylab-0.5.0}/AnomalyLab.egg-info/SOURCES.txt +0 -2
- {anomalylab-0.4.2/AnomalyLab.egg-info → anomalylab-0.5.0}/PKG-INFO +1 -1
- {anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/__init__.py +1 -2
- {anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/core/core.py +23 -10
- {anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/datasets/dataset.py +4 -2
- {anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/empirical/correlation.py +7 -3
- {anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/empirical/empirical.py +2 -3
- {anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/empirical/fm_regression.py +27 -7
- {anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/empirical/persistence.py +10 -5
- {anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/empirical/portfolio.py +83 -29
- {anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/empirical/summary.py +6 -4
- {anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/preprocess/fillna.py +9 -5
- {anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/preprocess/normalize.py +42 -8
- {anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/preprocess/outliers.py +8 -2
- {anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/preprocess/preprocessor.py +4 -3
- {anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/preprocess/shift.py +6 -2
- {anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/preprocess/truncate.py +2 -6
- {anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/structure/data.py +7 -2
- {anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/structure/panel_data.py +12 -4
- {anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/structure/time_series.py +11 -4
- anomalylab-0.5.0/anomalylab/utils/__init__.py +25 -0
- {anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/utils/utils.py +8 -1
- anomalylab-0.5.0/anomalylab/visualization/__init__.py +5 -0
- {anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/visualization/format.py +8 -3
- {anomalylab-0.4.2 → anomalylab-0.5.0}/setup.py +1 -1
- anomalylab-0.4.2/anomalylab/config.py +0 -1
- anomalylab-0.4.2/anomalylab/utils/__init__.py +0 -14
- anomalylab-0.4.2/anomalylab/utils/imports.py +0 -58
- anomalylab-0.4.2/anomalylab/visualization/__init__.py +0 -5
- {anomalylab-0.4.2 → anomalylab-0.5.0}/.gitattributes +0 -0
- {anomalylab-0.4.2 → anomalylab-0.5.0}/.github/workflows/python-publish.yml +0 -0
- {anomalylab-0.4.2 → anomalylab-0.5.0}/.gitignore +0 -0
- {anomalylab-0.4.2 → anomalylab-0.5.0}/AnomalyLab.egg-info/dependency_links.txt +0 -0
- {anomalylab-0.4.2 → anomalylab-0.5.0}/AnomalyLab.egg-info/requires.txt +0 -0
- {anomalylab-0.4.2 → anomalylab-0.5.0}/AnomalyLab.egg-info/top_level.txt +0 -0
- {anomalylab-0.4.2 → anomalylab-0.5.0}/LICENSE +0 -0
- {anomalylab-0.4.2 → anomalylab-0.5.0}/MANIFEST.in +0 -0
- {anomalylab-0.4.2 → anomalylab-0.5.0}/README.md +0 -0
- {anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/core/__init__.py +0 -0
- {anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/datasets/__init__.py +0 -0
- {anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/datasets/panel_data.csv +0 -0
- {anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/datasets/time_series_data.csv +0 -0
- {anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/empirical/__init__.py +0 -0
- {anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/preprocess/__init__.py +0 -0
- {anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/structure/__init__.py +0 -0
- {anomalylab-0.4.2 → anomalylab-0.5.0}/requirements.txt +0 -0
- {anomalylab-0.4.2 → anomalylab-0.5.0}/setup.cfg +0 -0
- {anomalylab-0.4.2 → anomalylab-0.5.0}/tests/__init__.py +0 -0
|
@@ -12,7 +12,6 @@ AnomalyLab.egg-info/dependency_links.txt
|
|
|
12
12
|
AnomalyLab.egg-info/requires.txt
|
|
13
13
|
AnomalyLab.egg-info/top_level.txt
|
|
14
14
|
anomalylab/__init__.py
|
|
15
|
-
anomalylab/config.py
|
|
16
15
|
anomalylab/core/__init__.py
|
|
17
16
|
anomalylab/core/core.py
|
|
18
17
|
anomalylab/datasets/__init__.py
|
|
@@ -38,7 +37,6 @@ anomalylab/structure/data.py
|
|
|
38
37
|
anomalylab/structure/panel_data.py
|
|
39
38
|
anomalylab/structure/time_series.py
|
|
40
39
|
anomalylab/utils/__init__.py
|
|
41
|
-
anomalylab/utils/imports.py
|
|
42
40
|
anomalylab/utils/utils.py
|
|
43
41
|
anomalylab/visualization/__init__.py
|
|
44
42
|
anomalylab/visualization/format.py
|
|
@@ -11,8 +11,7 @@ from anomalylab.empirical import (
|
|
|
11
11
|
)
|
|
12
12
|
from anomalylab.preprocess import FillNa, Normalize, OutlierHandler, Shift
|
|
13
13
|
from anomalylab.structure import PanelData, TimeSeries
|
|
14
|
-
from anomalylab.utils import
|
|
15
|
-
from anomalylab.utils.imports import *
|
|
14
|
+
from anomalylab.utils import pp
|
|
16
15
|
from anomalylab.visualization import FormatExcel
|
|
17
16
|
|
|
18
17
|
__all__: list[str] = [
|
|
@@ -1,8 +1,12 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from importlib import resources # noqa: F401
|
|
5
|
+
from typing import Literal, Optional, Union
|
|
6
|
+
|
|
7
|
+
import pandas as pd
|
|
8
|
+
from pandas import DataFrame
|
|
4
9
|
|
|
5
|
-
from anomalylab.config import *
|
|
6
10
|
from anomalylab.empirical import (
|
|
7
11
|
Correlation,
|
|
8
12
|
FamaMacBethRegression,
|
|
@@ -12,8 +16,7 @@ from anomalylab.empirical import (
|
|
|
12
16
|
)
|
|
13
17
|
from anomalylab.preprocess import FillNa, Normalize, OutlierHandler, Shift
|
|
14
18
|
from anomalylab.structure import PanelData, TimeSeries
|
|
15
|
-
from anomalylab.utils import
|
|
16
|
-
from anomalylab.utils.imports import *
|
|
19
|
+
from anomalylab.utils import Columns, Scalar, pp
|
|
17
20
|
from anomalylab.visualization import FormatExcel
|
|
18
21
|
|
|
19
22
|
|
|
@@ -126,8 +129,7 @@ class Panel:
|
|
|
126
129
|
return self._fm_preprocessor
|
|
127
130
|
|
|
128
131
|
def format_preprocessor(self, path: str) -> FormatExcel:
|
|
129
|
-
|
|
130
|
-
self._format_preprocessor = FormatExcel(path=path)
|
|
132
|
+
self._format_preprocessor = FormatExcel(path=path)
|
|
131
133
|
return self._format_preprocessor
|
|
132
134
|
|
|
133
135
|
def normalize(
|
|
@@ -137,6 +139,7 @@ class Panel:
|
|
|
137
139
|
group_columns: Columns = None,
|
|
138
140
|
no_process_columns: Columns = None,
|
|
139
141
|
process_all_characteristics: bool = True,
|
|
142
|
+
fillna_zero_after_norm: bool = False,
|
|
140
143
|
) -> Panel:
|
|
141
144
|
"""
|
|
142
145
|
Normalizes specified columns of the DataFrame using the chosen method.
|
|
@@ -157,6 +160,8 @@ class Panel:
|
|
|
157
160
|
normalization. Defaults to None.
|
|
158
161
|
process_all_characteristics (bool, optional): Whether to process all
|
|
159
162
|
characteristics or not. Defaults to True.
|
|
163
|
+
fillna_zero_after_norm (bool): If True, fills NaN values with zero after normalization.
|
|
164
|
+
Defaults to False.
|
|
160
165
|
|
|
161
166
|
Returns:
|
|
162
167
|
Normalize: The instance of the Normalize class with updated state.
|
|
@@ -171,6 +176,7 @@ class Panel:
|
|
|
171
176
|
group_columns=group_columns,
|
|
172
177
|
no_process_columns=no_process_columns,
|
|
173
178
|
process_all_characteristics=process_all_characteristics,
|
|
179
|
+
fillna_zero_after_norm=fillna_zero_after_norm,
|
|
174
180
|
).panel_data
|
|
175
181
|
return self
|
|
176
182
|
|
|
@@ -482,7 +488,7 @@ class Panel:
|
|
|
482
488
|
groups: Union[int, list[int]],
|
|
483
489
|
sort_type: Literal["independent", "dependent"] = "independent",
|
|
484
490
|
inplace: bool = False,
|
|
485
|
-
) ->
|
|
491
|
+
) -> Optional[pd.DataFrame]:
|
|
486
492
|
"""Group variables into portfolios based on specified groups.
|
|
487
493
|
|
|
488
494
|
This method creates portfolios for the specified variables in the panel data.
|
|
@@ -526,6 +532,7 @@ class Panel:
|
|
|
526
532
|
decimal: Optional[int] = None,
|
|
527
533
|
factor_return: bool = False,
|
|
528
534
|
already_grouped: bool = False,
|
|
535
|
+
is_endog_return: bool = True,
|
|
529
536
|
) -> tuple:
|
|
530
537
|
"""Perform univariate analysis on the specified core variable.
|
|
531
538
|
|
|
@@ -543,6 +550,7 @@ class Panel:
|
|
|
543
550
|
factor_return (bool): Whether to output factor returns in the analysis. Defaults to False.
|
|
544
551
|
already_grouped (bool): If True, skips the grouping step assuming data has been pre-grouped.
|
|
545
552
|
Defaults to False.
|
|
553
|
+
is_endog_return (bool): Whether the dependent variable is a return. Defaults to True.
|
|
546
554
|
|
|
547
555
|
Returns:
|
|
548
556
|
tuple: A tuple containing the equal-weighted and value-weighted results DataFrames.
|
|
@@ -556,6 +564,7 @@ class Panel:
|
|
|
556
564
|
decimal=decimal,
|
|
557
565
|
factor_return=factor_return,
|
|
558
566
|
already_grouped=already_grouped,
|
|
567
|
+
is_endog_return=is_endog_return,
|
|
559
568
|
)
|
|
560
569
|
|
|
561
570
|
def bivariate_analysis(
|
|
@@ -574,6 +583,7 @@ class Panel:
|
|
|
574
583
|
decimal: Optional[int] = None,
|
|
575
584
|
factor_return: bool = False,
|
|
576
585
|
already_grouped: bool = False,
|
|
586
|
+
is_endog_return: bool = True,
|
|
577
587
|
) -> tuple:
|
|
578
588
|
"""Perform bivariate analysis on two specified variables.
|
|
579
589
|
|
|
@@ -595,6 +605,7 @@ class Panel:
|
|
|
595
605
|
factor_return (bool): Whether to output factor returns in the analysis. Defaults to False.
|
|
596
606
|
already_grouped (bool): If True, skips the grouping step assuming data has been pre-grouped.
|
|
597
607
|
Defaults to False.
|
|
608
|
+
is_endog_return (bool): Whether the dependent variable is a return. Defaults to True.
|
|
598
609
|
|
|
599
610
|
Returns:
|
|
600
611
|
tuple: A tuple containing the equal-weighted and value-weighted results DataFrames.
|
|
@@ -612,6 +623,7 @@ class Panel:
|
|
|
612
623
|
decimal=decimal,
|
|
613
624
|
factor_return=factor_return,
|
|
614
625
|
already_grouped=already_grouped,
|
|
626
|
+
is_endog_return=is_endog_return,
|
|
615
627
|
)
|
|
616
628
|
|
|
617
629
|
def fm_reg(
|
|
@@ -731,6 +743,10 @@ if __name__ == "__main__":
|
|
|
731
743
|
# no_process_columns="MktCap",
|
|
732
744
|
# process_all_characteristics=True,
|
|
733
745
|
)
|
|
746
|
+
|
|
747
|
+
panel.winsorize(method="winsorize", group_columns="date")
|
|
748
|
+
pp(panel)
|
|
749
|
+
|
|
734
750
|
# panel.normalize(
|
|
735
751
|
# # columns="MktCap",
|
|
736
752
|
# method="zscore",
|
|
@@ -740,9 +756,6 @@ if __name__ == "__main__":
|
|
|
740
756
|
# )
|
|
741
757
|
# panel.shift(periods=1, drop_original=False)
|
|
742
758
|
|
|
743
|
-
panel.winsorize(method="winsorize", group_columns="date")
|
|
744
|
-
pp(panel)
|
|
745
|
-
|
|
746
759
|
# summary = panel.summary()
|
|
747
760
|
# pp(summary)
|
|
748
761
|
|
|
@@ -1,8 +1,12 @@
|
|
|
1
|
-
from
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
from pandas import DataFrame
|
|
6
|
+
|
|
2
7
|
from anomalylab.empirical.empirical import Empirical
|
|
3
8
|
from anomalylab.structure import PanelData
|
|
4
|
-
from anomalylab.utils
|
|
5
|
-
from anomalylab.utils.utils import *
|
|
9
|
+
from anomalylab.utils import Columns, columns_to_list, pp, round_to_string
|
|
6
10
|
|
|
7
11
|
|
|
8
12
|
@dataclass
|
|
@@ -1,9 +1,27 @@
|
|
|
1
|
-
|
|
1
|
+
import math
|
|
2
|
+
import warnings
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from functools import partial
|
|
5
|
+
from typing import Literal, Optional
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
import pandas as pd
|
|
9
|
+
import statsmodels.api as sm
|
|
10
|
+
from linearmodels import FamaMacBeth
|
|
11
|
+
from pandas import DataFrame, Series
|
|
12
|
+
|
|
2
13
|
from anomalylab.empirical.empirical import Empirical
|
|
3
14
|
from anomalylab.preprocess import OutlierHandler
|
|
4
15
|
from anomalylab.structure import PanelData
|
|
5
|
-
from anomalylab.utils
|
|
6
|
-
|
|
16
|
+
from anomalylab.utils import (
|
|
17
|
+
RegModel,
|
|
18
|
+
RegModels,
|
|
19
|
+
RegResult,
|
|
20
|
+
columns_to_list,
|
|
21
|
+
get_significance_star,
|
|
22
|
+
pp,
|
|
23
|
+
round_to_string,
|
|
24
|
+
)
|
|
7
25
|
|
|
8
26
|
|
|
9
27
|
@dataclass
|
|
@@ -76,12 +94,14 @@ class FamaMacBethRegression(Empirical):
|
|
|
76
94
|
raise ValueError(
|
|
77
95
|
"When calculating the value-weighted industry return, the weight column must be specified!"
|
|
78
96
|
)
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
97
|
+
|
|
98
|
+
def func(x):
|
|
99
|
+
return np.average(
|
|
100
|
+
x, weights=self.panel_data.df.loc[x.index, weight]
|
|
101
|
+
)
|
|
82
102
|
else:
|
|
83
103
|
raise ValueError(
|
|
84
|
-
|
|
104
|
+
"industry_weighed_method must be one of ['value', 'equal']"
|
|
85
105
|
)
|
|
86
106
|
self.panel_data.df[endog] -= self.panel_data.df.groupby(
|
|
87
107
|
by=[self.time, industry]
|
|
@@ -1,11 +1,17 @@
|
|
|
1
|
-
from importlib import resources
|
|
1
|
+
from importlib import resources # noqa: F401
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
import matplotlib.pyplot as plt
|
|
6
|
+
import numpy as np
|
|
7
|
+
import pandas as pd
|
|
8
|
+
import seaborn as sns
|
|
9
|
+
from pandas import DataFrame
|
|
2
10
|
|
|
3
|
-
from anomalylab.config import *
|
|
4
11
|
from anomalylab.empirical.empirical import Empirical
|
|
5
12
|
from anomalylab.preprocess.shift import Shift
|
|
6
13
|
from anomalylab.structure import PanelData
|
|
7
|
-
from anomalylab.utils
|
|
8
|
-
from anomalylab.utils.utils import *
|
|
14
|
+
from anomalylab.utils import Columns, columns_to_list, pp, round_to_string
|
|
9
15
|
|
|
10
16
|
|
|
11
17
|
@dataclass
|
|
@@ -72,7 +78,6 @@ class Persistence(Empirical):
|
|
|
72
78
|
for var in columns:
|
|
73
79
|
all_monthly_corrs = []
|
|
74
80
|
for lag in periods:
|
|
75
|
-
|
|
76
81
|
# Store monthly correlations
|
|
77
82
|
monthly_corrs = []
|
|
78
83
|
|
|
@@ -1,10 +1,16 @@
|
|
|
1
|
-
|
|
1
|
+
import math
|
|
2
|
+
import warnings
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from typing import Literal, Optional, Union
|
|
5
|
+
|
|
6
|
+
import numpy as np
|
|
7
|
+
import pandas as pd
|
|
8
|
+
import statsmodels.api as sm
|
|
9
|
+
from pandas import DataFrame, Series
|
|
2
10
|
|
|
3
|
-
from anomalylab.config import *
|
|
4
11
|
from anomalylab.empirical.empirical import Empirical
|
|
5
12
|
from anomalylab.structure import PanelData, TimeSeries
|
|
6
|
-
from anomalylab.utils
|
|
7
|
-
from anomalylab.utils.utils import *
|
|
13
|
+
from anomalylab.utils import pp, round_to_string
|
|
8
14
|
|
|
9
15
|
warnings.simplefilter(action="ignore", category=FutureWarning)
|
|
10
16
|
|
|
@@ -116,7 +122,7 @@ class PortfolioAnalysis(Empirical):
|
|
|
116
122
|
group_col = [self.time]
|
|
117
123
|
for i, var in enumerate(vars):
|
|
118
124
|
if sort_type == "dependent" and i > 0:
|
|
119
|
-
group_col.append(f"{vars[i-1]}_g{groups[i-1]}")
|
|
125
|
+
group_col.append(f"{vars[i - 1]}_g{groups[i - 1]}")
|
|
120
126
|
out_df[f"{var}_g{groups[i]}"] = (
|
|
121
127
|
out_df.groupby(group_col, observed=False)[var]
|
|
122
128
|
.apply(
|
|
@@ -127,7 +133,7 @@ class PortfolioAnalysis(Empirical):
|
|
|
127
133
|
)
|
|
128
134
|
)
|
|
129
135
|
.reset_index()
|
|
130
|
-
.set_index(f"level_{i+1}")
|
|
136
|
+
.set_index(f"level_{i + 1}")
|
|
131
137
|
.drop(group_col, axis=1)
|
|
132
138
|
)
|
|
133
139
|
else:
|
|
@@ -142,7 +148,7 @@ class PortfolioAnalysis(Empirical):
|
|
|
142
148
|
)
|
|
143
149
|
)
|
|
144
150
|
.reset_index()
|
|
145
|
-
.set_index(
|
|
151
|
+
.set_index("level_1")
|
|
146
152
|
.drop(self.time, axis=1)
|
|
147
153
|
)
|
|
148
154
|
|
|
@@ -157,24 +163,33 @@ class PortfolioAnalysis(Empirical):
|
|
|
157
163
|
else:
|
|
158
164
|
return out_df
|
|
159
165
|
|
|
160
|
-
def _claculate_value(
|
|
166
|
+
def _claculate_value(
|
|
167
|
+
self, df: DataFrame, decimal: Optional[int] = None, is_endog_return: bool = True
|
|
168
|
+
) -> dict:
|
|
161
169
|
"""Calculate various portfolio performance metrics.
|
|
162
170
|
|
|
163
171
|
This method computes mean returns, t-values, Sharpe ratios, and model-adjusted alpha and t values.
|
|
164
172
|
|
|
165
173
|
Args:
|
|
166
174
|
df (DataFrame): The DataFrame containing the relevant data for calculations.
|
|
175
|
+
decimal (Optional[int]): The number of decimal places for formatting. Defaults to None.
|
|
176
|
+
is_endog_return (bool): Whether the dependent variable is a return. Defaults to True.
|
|
167
177
|
|
|
168
178
|
Returns:
|
|
169
179
|
dict: A dictionary containing computed metrics.
|
|
170
180
|
"""
|
|
171
|
-
stat_dict = self._calculate_mean_and_t_value(df)
|
|
172
|
-
factors_dict = self._calculate_alpha_and_t_value(df)
|
|
173
|
-
sharpe_dict = self._calculate_sharpe(df, decimal)
|
|
181
|
+
stat_dict = self._calculate_mean_and_t_value(df, is_endog_return)
|
|
174
182
|
|
|
175
|
-
|
|
183
|
+
if is_endog_return:
|
|
184
|
+
factors_dict = self._calculate_alpha_and_t_value(df)
|
|
185
|
+
sharpe_dict = self._calculate_sharpe(df, decimal)
|
|
186
|
+
return {**stat_dict, **factors_dict, **sharpe_dict}
|
|
187
|
+
|
|
188
|
+
return stat_dict
|
|
176
189
|
|
|
177
|
-
def _calculate_mean_and_t_value(
|
|
190
|
+
def _calculate_mean_and_t_value(
|
|
191
|
+
self, df: DataFrame, is_endog_return: bool = True
|
|
192
|
+
) -> dict:
|
|
178
193
|
"""Calculate mean and t-value for the dependent variable.
|
|
179
194
|
|
|
180
195
|
This method computes the mean return and its t-value assuming the null hypothesis
|
|
@@ -182,6 +197,7 @@ class PortfolioAnalysis(Empirical):
|
|
|
182
197
|
|
|
183
198
|
Args:
|
|
184
199
|
df (DataFrame): The DataFrame containing the relevant data for calculations.
|
|
200
|
+
is_endog_return (bool): Whether the dependent variable is a return. Defaults to True.
|
|
185
201
|
|
|
186
202
|
Returns:
|
|
187
203
|
dict: A dictionary with mean, t-value, and p-value.
|
|
@@ -199,7 +215,9 @@ class PortfolioAnalysis(Empirical):
|
|
|
199
215
|
mean_value = reg.params[0]
|
|
200
216
|
t_value = reg.tvalues[0]
|
|
201
217
|
p_value = reg.pvalues[0]
|
|
202
|
-
|
|
218
|
+
|
|
219
|
+
key_name = "Return" if is_endog_return else self.endog
|
|
220
|
+
stat_dict[key_name] = mean_value
|
|
203
221
|
stat_dict["t"] = t_value
|
|
204
222
|
stat_dict["p"] = p_value
|
|
205
223
|
|
|
@@ -284,6 +302,7 @@ class PortfolioAnalysis(Empirical):
|
|
|
284
302
|
decimal: Optional[int] = None,
|
|
285
303
|
factor_return: bool = False,
|
|
286
304
|
already_grouped: bool = False,
|
|
305
|
+
is_endog_return: bool = True,
|
|
287
306
|
) -> tuple:
|
|
288
307
|
"""Perform univariate analysis on the specified core variable.
|
|
289
308
|
|
|
@@ -299,6 +318,7 @@ class PortfolioAnalysis(Empirical):
|
|
|
299
318
|
factor_return (bool): Whether to output factor returns in the analysis. Defaults to False.
|
|
300
319
|
already_grouped (bool): If True, skips the grouping step assuming data has been pre-grouped.
|
|
301
320
|
Defaults to False.
|
|
321
|
+
is_endog_return (bool): Whether the dependent variable is a return. Defaults to True.
|
|
302
322
|
|
|
303
323
|
Returns:
|
|
304
324
|
tuple: A tuple containing the equal-weighted and value-weighted results DataFrames.
|
|
@@ -399,10 +419,20 @@ class PortfolioAnalysis(Empirical):
|
|
|
399
419
|
results = {}
|
|
400
420
|
|
|
401
421
|
for key, sr in time_series_dict.items():
|
|
402
|
-
results[key] = self._claculate_value(
|
|
422
|
+
results[key] = self._claculate_value(
|
|
423
|
+
sr, decimal=decimal, is_endog_return=is_endog_return
|
|
424
|
+
)
|
|
403
425
|
|
|
426
|
+
key_name = "Return" if is_endog_return else self.endog
|
|
404
427
|
data = []
|
|
405
428
|
for key, values in results.items():
|
|
429
|
+
if key_name == core_var:
|
|
430
|
+
if key_name in values:
|
|
431
|
+
val = values.pop(key_name)
|
|
432
|
+
new_values = {f"{key_name}_val": val}
|
|
433
|
+
new_values.update(values)
|
|
434
|
+
values = new_values
|
|
435
|
+
|
|
406
436
|
values[core_var] = key
|
|
407
437
|
data.append(values)
|
|
408
438
|
|
|
@@ -429,7 +459,7 @@ class PortfolioAnalysis(Empirical):
|
|
|
429
459
|
combined_results.iloc[:, i : i + 3] = subset
|
|
430
460
|
|
|
431
461
|
combined_results = combined_results.loc[
|
|
432
|
-
:, ~combined_results.columns.str.
|
|
462
|
+
:, ~combined_results.columns.str.match(r"(^p$|.*-p$)")
|
|
433
463
|
]
|
|
434
464
|
|
|
435
465
|
return combined_results
|
|
@@ -451,6 +481,7 @@ class PortfolioAnalysis(Empirical):
|
|
|
451
481
|
decimal: Optional[int] = None,
|
|
452
482
|
factor_return: bool = False,
|
|
453
483
|
already_grouped: bool = False,
|
|
484
|
+
is_endog_return: bool = True,
|
|
454
485
|
) -> tuple:
|
|
455
486
|
"""Perform bivariate analysis on two specified variables.
|
|
456
487
|
|
|
@@ -470,6 +501,7 @@ class PortfolioAnalysis(Empirical):
|
|
|
470
501
|
factor_return (bool): Whether to output factor returns in the analysis. Defaults to False.
|
|
471
502
|
already_grouped (bool): If True, skips the grouping step assuming data has been pre-grouped.
|
|
472
503
|
Defaults to False.
|
|
504
|
+
is_endog_return (bool): Whether the dependent variable is a return. Defaults to True.
|
|
473
505
|
|
|
474
506
|
Returns:
|
|
475
507
|
tuple: A tuple containing the equal-weighted and value-weighted results DataFrames.
|
|
@@ -544,15 +576,21 @@ class PortfolioAnalysis(Empirical):
|
|
|
544
576
|
|
|
545
577
|
return pd.concat([group, sort_diff, sort_avg])
|
|
546
578
|
|
|
579
|
+
# Handle potential name collision if endog is same as sort_var or core_var
|
|
580
|
+
value_col = self.endog
|
|
581
|
+
if value_col in [sort_var, core_var]:
|
|
582
|
+
value_col = f"{self.endog}_val"
|
|
583
|
+
|
|
584
|
+
ew_ret_d.name = value_col
|
|
547
585
|
ew_ret_d = ew_ret_d.reset_index()
|
|
548
586
|
ew_ret_d = ew_ret_d.pivot(
|
|
549
|
-
index=[self.time, sort_var], columns=core_var, values=
|
|
587
|
+
index=[self.time, sort_var], columns=core_var, values=value_col
|
|
550
588
|
)
|
|
551
589
|
|
|
552
|
-
vw_ret_d.name =
|
|
590
|
+
vw_ret_d.name = value_col
|
|
553
591
|
vw_ret_d = vw_ret_d.reset_index()
|
|
554
592
|
vw_ret_d = vw_ret_d.pivot(
|
|
555
|
-
index=[self.time, sort_var], columns=core_var, values=
|
|
593
|
+
index=[self.time, sort_var], columns=core_var, values=value_col
|
|
556
594
|
)
|
|
557
595
|
|
|
558
596
|
ew_ret_d = (
|
|
@@ -616,11 +654,21 @@ class PortfolioAnalysis(Empirical):
|
|
|
616
654
|
results = {}
|
|
617
655
|
|
|
618
656
|
for key, series in time_series_dict.items():
|
|
619
|
-
value_dict = self._claculate_value(
|
|
657
|
+
value_dict = self._claculate_value(
|
|
658
|
+
series, decimal=decimal, is_endog_return=is_endog_return
|
|
659
|
+
)
|
|
620
660
|
results[key] = value_dict
|
|
621
661
|
|
|
662
|
+
key_name = "Return" if is_endog_return else self.endog
|
|
622
663
|
data = []
|
|
623
664
|
for key, values in results.items():
|
|
665
|
+
if key_name in [sort_var, core_var]:
|
|
666
|
+
if key_name in values:
|
|
667
|
+
val = values.pop(key_name)
|
|
668
|
+
new_values = {f"{key_name}_val": val}
|
|
669
|
+
new_values.update(values)
|
|
670
|
+
values = new_values
|
|
671
|
+
|
|
624
672
|
values[sort_var] = key[0]
|
|
625
673
|
values[core_var] = key[1]
|
|
626
674
|
data.append(values)
|
|
@@ -648,7 +696,7 @@ class PortfolioAnalysis(Empirical):
|
|
|
648
696
|
combined_results.iloc[:, i : i + 3] = subset
|
|
649
697
|
|
|
650
698
|
combined_results = combined_results.loc[
|
|
651
|
-
:, ~combined_results.columns.str.
|
|
699
|
+
:, ~combined_results.columns.str.match(r"(^p$|.*-p$)")
|
|
652
700
|
]
|
|
653
701
|
|
|
654
702
|
def reorder_diff_avg(df: DataFrame) -> DataFrame:
|
|
@@ -727,17 +775,22 @@ if __name__ == "__main__":
|
|
|
727
775
|
|
|
728
776
|
portfolio = PortfolioAnalysis(
|
|
729
777
|
panel,
|
|
730
|
-
endog="
|
|
778
|
+
endog="IdioVol",
|
|
731
779
|
weight="MktCap",
|
|
732
|
-
models=Models,
|
|
733
|
-
factors_series=time_series,
|
|
780
|
+
# models=Models,
|
|
781
|
+
# factors_series=time_series,
|
|
734
782
|
)
|
|
735
783
|
|
|
736
784
|
# portfolio.GroupN("Illiq", 10, inplace=True)
|
|
737
|
-
portfolio.GroupN(["MktCap", "Illiq"], [
|
|
785
|
+
portfolio.GroupN(["MktCap", "Illiq"], [5, 5], sort_type="dependent", inplace=True)
|
|
738
786
|
|
|
739
787
|
uni_ew, uni_vw = portfolio.univariate_analysis(
|
|
740
|
-
"Illiq",
|
|
788
|
+
"Illiq",
|
|
789
|
+
5,
|
|
790
|
+
format=True,
|
|
791
|
+
# factor_return=False,
|
|
792
|
+
already_grouped=True,
|
|
793
|
+
is_endog_return=False,
|
|
741
794
|
)
|
|
742
795
|
pp(uni_ew)
|
|
743
796
|
pp(uni_vw)
|
|
@@ -745,13 +798,14 @@ if __name__ == "__main__":
|
|
|
745
798
|
bi_ew, bi_vw = portfolio.bivariate_analysis(
|
|
746
799
|
"MktCap",
|
|
747
800
|
"Illiq",
|
|
748
|
-
|
|
801
|
+
5,
|
|
749
802
|
5,
|
|
750
803
|
False,
|
|
751
|
-
|
|
804
|
+
True,
|
|
752
805
|
"dependent",
|
|
753
|
-
factor_return=False,
|
|
806
|
+
# factor_return=False,
|
|
754
807
|
already_grouped=True,
|
|
808
|
+
is_endog_return=False,
|
|
755
809
|
)
|
|
756
810
|
pp(bi_ew)
|
|
757
811
|
pp(bi_vw)
|
|
@@ -1,12 +1,14 @@
|
|
|
1
|
-
from
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
from pandas import DataFrame, Series
|
|
5
|
+
|
|
2
6
|
from anomalylab.empirical.empirical import Empirical
|
|
3
7
|
from anomalylab.structure import PanelData
|
|
4
|
-
from anomalylab.utils
|
|
5
|
-
from anomalylab.utils.utils import *
|
|
8
|
+
from anomalylab.utils import Columns, columns_to_list, pp, round_to_string
|
|
6
9
|
|
|
7
10
|
|
|
8
11
|
class Statistics:
|
|
9
|
-
|
|
10
12
|
@staticmethod
|
|
11
13
|
def mean(series: Series) -> float:
|
|
12
14
|
return series.mean() if not series.isna().all() else None
|
|
@@ -1,9 +1,14 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import warnings
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from typing import Literal, Optional, Union
|
|
6
|
+
|
|
7
|
+
from pandas import DataFrame, Series
|
|
8
|
+
|
|
3
9
|
from anomalylab.preprocess.preprocessor import Preprocessor
|
|
4
10
|
from anomalylab.structure import PanelData
|
|
5
|
-
from anomalylab.utils
|
|
6
|
-
from anomalylab.utils.utils import *
|
|
11
|
+
from anomalylab.utils import Columns, Scalar, columns_to_list, pp
|
|
7
12
|
|
|
8
13
|
|
|
9
14
|
@dataclass
|
|
@@ -68,7 +73,6 @@ class FillMethod:
|
|
|
68
73
|
|
|
69
74
|
@dataclass
|
|
70
75
|
class FillNa(Preprocessor):
|
|
71
|
-
|
|
72
76
|
def fill(
|
|
73
77
|
self,
|
|
74
78
|
series: Series,
|
|
@@ -218,11 +222,11 @@ class FillNa(Preprocessor):
|
|
|
218
222
|
warnings.warn(message=f"Missing values not found in {fill_columns}.")
|
|
219
223
|
if self.panel_data.normalize:
|
|
220
224
|
warnings.warn(
|
|
221
|
-
message=
|
|
225
|
+
message="The data has already been normalized, and missing values have been filled with 0."
|
|
222
226
|
)
|
|
223
227
|
if self.panel_data.fillna:
|
|
224
228
|
warnings.warn(
|
|
225
|
-
message=
|
|
229
|
+
message="The missing values have already been handled earlier."
|
|
226
230
|
)
|
|
227
231
|
|
|
228
232
|
|
|
@@ -1,9 +1,15 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import warnings
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from typing import Literal
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
from pandas import DataFrame, Series
|
|
9
|
+
|
|
3
10
|
from anomalylab.preprocess.preprocessor import Preprocessor
|
|
4
11
|
from anomalylab.structure import PanelData
|
|
5
|
-
from anomalylab.utils
|
|
6
|
-
from anomalylab.utils.utils import *
|
|
12
|
+
from anomalylab.utils import Columns, columns_to_list, pp
|
|
7
13
|
|
|
8
14
|
|
|
9
15
|
class NormalizeMethod:
|
|
@@ -53,7 +59,12 @@ class NormalizeMethod:
|
|
|
53
59
|
return rescaled_df
|
|
54
60
|
|
|
55
61
|
@classmethod
|
|
56
|
-
def call_method(
|
|
62
|
+
def call_method(
|
|
63
|
+
cls,
|
|
64
|
+
method: str,
|
|
65
|
+
df: DataFrame | Series,
|
|
66
|
+
fillna_zero_after_norm: bool = False,
|
|
67
|
+
) -> DataFrame | Series:
|
|
57
68
|
"""
|
|
58
69
|
Calls a specified normalization method on the input DataFrame.
|
|
59
70
|
|
|
@@ -66,20 +77,38 @@ class NormalizeMethod:
|
|
|
66
77
|
cls: The class that is calling this method (NormalizeMethod).
|
|
67
78
|
method (str): The name of the method to call ('zscore' or 'rank').
|
|
68
79
|
df (DataFrame): The input DataFrame to be normalized.
|
|
80
|
+
fillna_zero_after_norm (bool): If True, fills NaN values with zero after normalization.
|
|
81
|
+
Defaults to False.
|
|
69
82
|
|
|
70
83
|
Returns:
|
|
71
|
-
DataFrame: The normalized DataFrame
|
|
84
|
+
DataFrame: The normalized DataFrame. NaN values are filled with zero
|
|
85
|
+
if `fillna_zero_after_norm=True` is set.
|
|
72
86
|
|
|
73
87
|
Raises:
|
|
74
88
|
AttributeError: If the specified method does not exist.
|
|
75
89
|
"""
|
|
76
|
-
if hasattr(cls, method):
|
|
77
|
-
return getattr(cls, method)(df).fillna(value=0)
|
|
78
|
-
else:
|
|
90
|
+
if not hasattr(cls, method):
|
|
79
91
|
raise AttributeError(
|
|
80
92
|
f"Method '{method}' not found, use 'zscore' or 'rank'."
|
|
81
93
|
)
|
|
82
94
|
|
|
95
|
+
normalized_df = getattr(cls, method)(df)
|
|
96
|
+
|
|
97
|
+
if fillna_zero_after_norm:
|
|
98
|
+
normalized_df = normalized_df.fillna(value=0)
|
|
99
|
+
else:
|
|
100
|
+
if isinstance(df, Series):
|
|
101
|
+
if df.isna().all():
|
|
102
|
+
warnings.warn(f"Column {df.name} contains only missing values.")
|
|
103
|
+
else:
|
|
104
|
+
all_nan_cols = df.columns[df.isna().all()].tolist()
|
|
105
|
+
if all_nan_cols:
|
|
106
|
+
warnings.warn(
|
|
107
|
+
f"Columns {all_nan_cols} contain only missing values."
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
return normalized_df
|
|
111
|
+
|
|
83
112
|
|
|
84
113
|
@dataclass
|
|
85
114
|
class Normalize(Preprocessor):
|
|
@@ -101,6 +130,7 @@ class Normalize(Preprocessor):
|
|
|
101
130
|
group_columns: Columns = None,
|
|
102
131
|
no_process_columns: Columns = None,
|
|
103
132
|
process_all_characteristics: bool = True,
|
|
133
|
+
fillna_zero_after_norm: bool = False,
|
|
104
134
|
) -> Normalize:
|
|
105
135
|
"""
|
|
106
136
|
Normalizes specified columns of the DataFrame using the chosen method.
|
|
@@ -121,6 +151,8 @@ class Normalize(Preprocessor):
|
|
|
121
151
|
normalization. Defaults to None.
|
|
122
152
|
process_all_characteristics (bool, optional): Whether to process all
|
|
123
153
|
characteristics or not. Defaults to True.
|
|
154
|
+
fillna_zero_after_norm (bool): If True, fills NaN values with zero after normalization.
|
|
155
|
+
Defaults to False.
|
|
124
156
|
|
|
125
157
|
Returns:
|
|
126
158
|
Normalize: The instance of the Normalize class with updated state.
|
|
@@ -144,7 +176,9 @@ class Normalize(Preprocessor):
|
|
|
144
176
|
# Normalize the selected columns
|
|
145
177
|
self.panel_data.transform(
|
|
146
178
|
columns=columns,
|
|
147
|
-
func=lambda df: NormalizeMethod.call_method(
|
|
179
|
+
func=lambda df: NormalizeMethod.call_method(
|
|
180
|
+
method=method, df=df, fillna_zero_after_norm=fillna_zero_after_norm
|
|
181
|
+
),
|
|
148
182
|
group_columns=group_columns,
|
|
149
183
|
)
|
|
150
184
|
|
|
@@ -1,10 +1,16 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from typing import Literal, Optional
|
|
5
|
+
|
|
6
|
+
import numpy as np
|
|
7
|
+
from pandas import DataFrame, Series
|
|
8
|
+
from scipy.stats.mstats import winsorize as winsorization
|
|
9
|
+
|
|
3
10
|
from anomalylab.preprocess.preprocessor import Preprocessor
|
|
4
11
|
from anomalylab.preprocess.truncate import truncate as truncation
|
|
5
12
|
from anomalylab.structure import PanelData
|
|
6
|
-
from anomalylab.utils
|
|
7
|
-
from anomalylab.utils.utils import *
|
|
13
|
+
from anomalylab.utils import Columns, columns_to_list, pp
|
|
8
14
|
|
|
9
15
|
|
|
10
16
|
class OutlierMethod:
|
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from
|
|
4
|
-
from
|
|
5
|
-
|
|
3
|
+
from abc import ABC
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
|
|
6
|
+
from anomalylab.structure import PanelData
|
|
6
7
|
|
|
7
8
|
|
|
8
9
|
@dataclass
|
|
@@ -1,9 +1,13 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import warnings
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
|
|
6
|
+
from pandas import DataFrame
|
|
7
|
+
|
|
3
8
|
from anomalylab.preprocess.preprocessor import Preprocessor
|
|
4
9
|
from anomalylab.structure import PanelData
|
|
5
|
-
from anomalylab.utils
|
|
6
|
-
from anomalylab.utils.utils import *
|
|
10
|
+
from anomalylab.utils import Columns, columns_to_list, pp
|
|
7
11
|
|
|
8
12
|
|
|
9
13
|
@dataclass
|
|
@@ -1,11 +1,9 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import numpy as np
|
|
3
4
|
import numpy.ma as ma
|
|
4
5
|
from scipy._lib._util import _contains_nan
|
|
5
6
|
|
|
6
|
-
from anomalylab.utils.imports import *
|
|
7
|
-
from anomalylab.utils.utils import *
|
|
8
|
-
|
|
9
7
|
|
|
10
8
|
def truncate(
|
|
11
9
|
a,
|
|
@@ -113,9 +111,7 @@ def truncate(
|
|
|
113
111
|
shp = a.shape # Store the shape of the array
|
|
114
112
|
return _truncate1D(
|
|
115
113
|
a.ravel(), lolim, uplim, loinc, upinc, contains_nan, nan_policy
|
|
116
|
-
).reshape(
|
|
117
|
-
shp
|
|
118
|
-
) # Truncate and reshape the array back to its original shape
|
|
114
|
+
).reshape(shp) # Truncate and reshape the array back to its original shape
|
|
119
115
|
else:
|
|
120
116
|
return ma.apply_along_axis(
|
|
121
117
|
_truncate1D, axis, a, lolim, uplim, loinc, upinc, contains_nan, nan_policy
|
|
@@ -1,5 +1,10 @@
|
|
|
1
|
-
|
|
2
|
-
from
|
|
1
|
+
import copy
|
|
2
|
+
from abc import ABC, abstractmethod
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from typing import Optional
|
|
5
|
+
|
|
6
|
+
from pandas import DataFrame
|
|
7
|
+
from typing_extensions import Self
|
|
3
8
|
|
|
4
9
|
|
|
5
10
|
@dataclass
|
|
@@ -1,6 +1,13 @@
|
|
|
1
|
+
import copy
|
|
2
|
+
import warnings
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from typing import Callable, Literal, Optional
|
|
5
|
+
|
|
6
|
+
import pandas as pd
|
|
7
|
+
from pandas import DataFrame
|
|
8
|
+
|
|
1
9
|
from anomalylab.structure.data import Data
|
|
2
|
-
from anomalylab.utils
|
|
3
|
-
from anomalylab.utils.utils import *
|
|
10
|
+
from anomalylab.utils import Columns, columns_to_list, pp
|
|
4
11
|
|
|
5
12
|
|
|
6
13
|
@dataclass
|
|
@@ -58,8 +65,6 @@ class PanelData(Data):
|
|
|
58
65
|
|
|
59
66
|
This method identifies remaining columns as firm characteristics, excluding classifications.
|
|
60
67
|
"""
|
|
61
|
-
if self.is_copy:
|
|
62
|
-
self.df = copy.deepcopy(self.df)
|
|
63
68
|
self.df[self.id] = self.df[self.id].astype(int)
|
|
64
69
|
if not isinstance(self.df[self.time].dtype, pd.PeriodDtype):
|
|
65
70
|
self.df[self.time] = pd.to_datetime(self.df[self.time], format="ISO8601")
|
|
@@ -116,6 +121,9 @@ class PanelData(Data):
|
|
|
116
121
|
ValueError: If any required columns are missing from the DataFrame.
|
|
117
122
|
ValueError: If there are no firm characteristics remaining after checking.
|
|
118
123
|
"""
|
|
124
|
+
if self.is_copy:
|
|
125
|
+
self.df = copy.deepcopy(self.df)
|
|
126
|
+
|
|
119
127
|
# Check for duplicate column names
|
|
120
128
|
duplicated_columns = self.df.columns[self.df.columns.duplicated()].tolist()
|
|
121
129
|
if duplicated_columns:
|
|
@@ -1,6 +1,12 @@
|
|
|
1
|
+
import copy
|
|
2
|
+
from dataclasses import dataclass, field
|
|
3
|
+
from typing import Literal
|
|
4
|
+
|
|
5
|
+
import pandas as pd
|
|
6
|
+
from pandas import DataFrame
|
|
7
|
+
|
|
1
8
|
from anomalylab.structure.data import Data
|
|
2
|
-
from anomalylab.utils import
|
|
3
|
-
from anomalylab.utils.imports import *
|
|
9
|
+
from anomalylab.utils import pp
|
|
4
10
|
|
|
5
11
|
|
|
6
12
|
@dataclass
|
|
@@ -35,8 +41,6 @@ class TimeSeries(Data):
|
|
|
35
41
|
|
|
36
42
|
This method renames the time column to a standardized name and identifies remaining columns as factors.
|
|
37
43
|
"""
|
|
38
|
-
if self.is_copy:
|
|
39
|
-
self.df = copy.deepcopy(self.df)
|
|
40
44
|
if not isinstance(self.df[self.time].dtype, pd.PeriodDtype):
|
|
41
45
|
self.df[self.time] = pd.to_datetime(self.df[self.time], format="ISO8601")
|
|
42
46
|
self.df[self.time] = self.df[self.time].dt.to_period(freq=self.frequency)
|
|
@@ -53,6 +57,9 @@ class TimeSeries(Data):
|
|
|
53
57
|
ValueError: If the time column is missing from the DataFrame.
|
|
54
58
|
ValueError: If there are no additional columns for factor returns.
|
|
55
59
|
"""
|
|
60
|
+
if self.is_copy:
|
|
61
|
+
self.df = copy.deepcopy(self.df)
|
|
62
|
+
|
|
56
63
|
# Check for duplicate column names
|
|
57
64
|
duplicated_columns = self.df.columns[self.df.columns.duplicated()].tolist()
|
|
58
65
|
if duplicated_columns:
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
from anomalylab.utils.utils import (
|
|
2
|
+
Columns,
|
|
3
|
+
Info,
|
|
4
|
+
RegModel,
|
|
5
|
+
RegModels,
|
|
6
|
+
RegResult,
|
|
7
|
+
Scalar,
|
|
8
|
+
columns_to_list,
|
|
9
|
+
get_significance_star,
|
|
10
|
+
pp,
|
|
11
|
+
round_to_string,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
__all__: list[str] = [
|
|
15
|
+
"Scalar",
|
|
16
|
+
"Columns",
|
|
17
|
+
"Info",
|
|
18
|
+
"RegModel",
|
|
19
|
+
"RegModels",
|
|
20
|
+
"RegResult",
|
|
21
|
+
"columns_to_list",
|
|
22
|
+
"round_to_string",
|
|
23
|
+
"get_significance_star",
|
|
24
|
+
"pp",
|
|
25
|
+
]
|
|
@@ -1,4 +1,11 @@
|
|
|
1
|
-
from
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from itertools import chain
|
|
3
|
+
from typing import Any, Optional, TypedDict, Union
|
|
4
|
+
|
|
5
|
+
from pandas import Series, Timedelta, Timestamp
|
|
6
|
+
from rich import print
|
|
7
|
+
from rich.panel import Panel as rich_Panel
|
|
8
|
+
from rich.pretty import Pretty
|
|
2
9
|
|
|
3
10
|
Scalar = Union[str, int, float, bool, Timestamp, Timedelta]
|
|
4
11
|
Columns = Optional[list[str] | str]
|
|
@@ -1,4 +1,9 @@
|
|
|
1
|
-
|
|
1
|
+
import os
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from glob import glob
|
|
4
|
+
|
|
5
|
+
from openpyxl import load_workbook
|
|
6
|
+
from openpyxl.styles import Alignment, Border, Side
|
|
2
7
|
|
|
3
8
|
|
|
4
9
|
@dataclass
|
|
@@ -45,7 +50,7 @@ class FormatExcel:
|
|
|
45
50
|
- Creates a thick border for the bottom of the first row.
|
|
46
51
|
"""
|
|
47
52
|
thin = Side(border_style="thin", color="000000")
|
|
48
|
-
thick = Side(border_style="thick", color="000000")
|
|
53
|
+
thick = Side(border_style="thick", color="000000") # noqa: F841
|
|
49
54
|
|
|
50
55
|
for ws in self.wb.worksheets:
|
|
51
56
|
for row in ws.iter_rows():
|
|
@@ -104,7 +109,7 @@ class FormatExcel:
|
|
|
104
109
|
2 if ord(char) > 127 else 1 for char in str(cell.value)
|
|
105
110
|
)
|
|
106
111
|
max_length = max(max_length, cell_length)
|
|
107
|
-
except:
|
|
112
|
+
except Exception:
|
|
108
113
|
pass
|
|
109
114
|
# Adjust for header row
|
|
110
115
|
# header_cell = ws[f"{col_letter}1"]
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
DECIMAL = 2
|
|
@@ -1,58 +0,0 @@
|
|
|
1
|
-
import copy
|
|
2
|
-
import functools
|
|
3
|
-
import math
|
|
4
|
-
import os
|
|
5
|
-
import warnings
|
|
6
|
-
from abc import ABC, ABCMeta, abstractmethod
|
|
7
|
-
from dataclasses import dataclass, field
|
|
8
|
-
from datetime import date, datetime, timedelta, tzinfo
|
|
9
|
-
from functools import partial, wraps
|
|
10
|
-
from glob import glob
|
|
11
|
-
from itertools import chain
|
|
12
|
-
from types import SimpleNamespace
|
|
13
|
-
from typing import (
|
|
14
|
-
Any,
|
|
15
|
-
Callable,
|
|
16
|
-
ClassVar,
|
|
17
|
-
Generic,
|
|
18
|
-
Iterable,
|
|
19
|
-
Literal,
|
|
20
|
-
Optional,
|
|
21
|
-
Sequence,
|
|
22
|
-
TypedDict,
|
|
23
|
-
TypeVar,
|
|
24
|
-
Union,
|
|
25
|
-
get_type_hints,
|
|
26
|
-
)
|
|
27
|
-
|
|
28
|
-
import matplotlib.pyplot as plt
|
|
29
|
-
import numpy as np
|
|
30
|
-
import pandas as pd
|
|
31
|
-
import seaborn as sns
|
|
32
|
-
import statsmodels.api as sm
|
|
33
|
-
import statsmodels.formula.api as smf
|
|
34
|
-
from deprecated import deprecated
|
|
35
|
-
from linearmodels import FamaMacBeth
|
|
36
|
-
from numpy import float32, float64
|
|
37
|
-
from numpy.typing import NDArray
|
|
38
|
-
from openpyxl import load_workbook
|
|
39
|
-
from openpyxl.styles import Alignment, Border, Side
|
|
40
|
-
from pandas import (
|
|
41
|
-
DataFrame,
|
|
42
|
-
DatetimeIndex,
|
|
43
|
-
Index,
|
|
44
|
-
Interval,
|
|
45
|
-
Period,
|
|
46
|
-
PeriodIndex,
|
|
47
|
-
Series,
|
|
48
|
-
Timedelta,
|
|
49
|
-
Timestamp,
|
|
50
|
-
)
|
|
51
|
-
from pandas.arrays import PeriodArray
|
|
52
|
-
from rich import print
|
|
53
|
-
from rich.panel import Panel as rich_Panel
|
|
54
|
-
from rich.pretty import Pretty, pprint
|
|
55
|
-
from scipy.stats import kurtosis, skew
|
|
56
|
-
from scipy.stats.mstats import winsorize as winsorization
|
|
57
|
-
from tqdm import tqdm
|
|
58
|
-
from typing_extensions import NotRequired, Required, Self
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|