AnomalyLab 0.4.2__tar.gz → 0.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. {anomalylab-0.4.2 → anomalylab-0.5.0/AnomalyLab.egg-info}/PKG-INFO +1 -1
  2. {anomalylab-0.4.2 → anomalylab-0.5.0}/AnomalyLab.egg-info/SOURCES.txt +0 -2
  3. {anomalylab-0.4.2/AnomalyLab.egg-info → anomalylab-0.5.0}/PKG-INFO +1 -1
  4. {anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/__init__.py +1 -2
  5. {anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/core/core.py +23 -10
  6. {anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/datasets/dataset.py +4 -2
  7. {anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/empirical/correlation.py +7 -3
  8. {anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/empirical/empirical.py +2 -3
  9. {anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/empirical/fm_regression.py +27 -7
  10. {anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/empirical/persistence.py +10 -5
  11. {anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/empirical/portfolio.py +83 -29
  12. {anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/empirical/summary.py +6 -4
  13. {anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/preprocess/fillna.py +9 -5
  14. {anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/preprocess/normalize.py +42 -8
  15. {anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/preprocess/outliers.py +8 -2
  16. {anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/preprocess/preprocessor.py +4 -3
  17. {anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/preprocess/shift.py +6 -2
  18. {anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/preprocess/truncate.py +2 -6
  19. {anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/structure/data.py +7 -2
  20. {anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/structure/panel_data.py +12 -4
  21. {anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/structure/time_series.py +11 -4
  22. anomalylab-0.5.0/anomalylab/utils/__init__.py +25 -0
  23. {anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/utils/utils.py +8 -1
  24. anomalylab-0.5.0/anomalylab/visualization/__init__.py +5 -0
  25. {anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/visualization/format.py +8 -3
  26. {anomalylab-0.4.2 → anomalylab-0.5.0}/setup.py +1 -1
  27. anomalylab-0.4.2/anomalylab/config.py +0 -1
  28. anomalylab-0.4.2/anomalylab/utils/__init__.py +0 -14
  29. anomalylab-0.4.2/anomalylab/utils/imports.py +0 -58
  30. anomalylab-0.4.2/anomalylab/visualization/__init__.py +0 -5
  31. {anomalylab-0.4.2 → anomalylab-0.5.0}/.gitattributes +0 -0
  32. {anomalylab-0.4.2 → anomalylab-0.5.0}/.github/workflows/python-publish.yml +0 -0
  33. {anomalylab-0.4.2 → anomalylab-0.5.0}/.gitignore +0 -0
  34. {anomalylab-0.4.2 → anomalylab-0.5.0}/AnomalyLab.egg-info/dependency_links.txt +0 -0
  35. {anomalylab-0.4.2 → anomalylab-0.5.0}/AnomalyLab.egg-info/requires.txt +0 -0
  36. {anomalylab-0.4.2 → anomalylab-0.5.0}/AnomalyLab.egg-info/top_level.txt +0 -0
  37. {anomalylab-0.4.2 → anomalylab-0.5.0}/LICENSE +0 -0
  38. {anomalylab-0.4.2 → anomalylab-0.5.0}/MANIFEST.in +0 -0
  39. {anomalylab-0.4.2 → anomalylab-0.5.0}/README.md +0 -0
  40. {anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/core/__init__.py +0 -0
  41. {anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/datasets/__init__.py +0 -0
  42. {anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/datasets/panel_data.csv +0 -0
  43. {anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/datasets/time_series_data.csv +0 -0
  44. {anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/empirical/__init__.py +0 -0
  45. {anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/preprocess/__init__.py +0 -0
  46. {anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/structure/__init__.py +0 -0
  47. {anomalylab-0.4.2 → anomalylab-0.5.0}/requirements.txt +0 -0
  48. {anomalylab-0.4.2 → anomalylab-0.5.0}/setup.cfg +0 -0
  49. {anomalylab-0.4.2 → anomalylab-0.5.0}/tests/__init__.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: AnomalyLab
3
- Version: 0.4.2
3
+ Version: 0.5.0
4
4
  Summary: A Python package for empirical asset pricing analysis.
5
5
  Author: FinPhd
6
6
  Author-email: chenhaiwei@stu.sufe.edu.cn
@@ -12,7 +12,6 @@ AnomalyLab.egg-info/dependency_links.txt
12
12
  AnomalyLab.egg-info/requires.txt
13
13
  AnomalyLab.egg-info/top_level.txt
14
14
  anomalylab/__init__.py
15
- anomalylab/config.py
16
15
  anomalylab/core/__init__.py
17
16
  anomalylab/core/core.py
18
17
  anomalylab/datasets/__init__.py
@@ -38,7 +37,6 @@ anomalylab/structure/data.py
38
37
  anomalylab/structure/panel_data.py
39
38
  anomalylab/structure/time_series.py
40
39
  anomalylab/utils/__init__.py
41
- anomalylab/utils/imports.py
42
40
  anomalylab/utils/utils.py
43
41
  anomalylab/visualization/__init__.py
44
42
  anomalylab/visualization/format.py
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: AnomalyLab
3
- Version: 0.4.2
3
+ Version: 0.5.0
4
4
  Summary: A Python package for empirical asset pricing analysis.
5
5
  Author: FinPhd
6
6
  Author-email: chenhaiwei@stu.sufe.edu.cn
@@ -11,8 +11,7 @@ from anomalylab.empirical import (
11
11
  )
12
12
  from anomalylab.preprocess import FillNa, Normalize, OutlierHandler, Shift
13
13
  from anomalylab.structure import PanelData, TimeSeries
14
- from anomalylab.utils import *
15
- from anomalylab.utils.imports import *
14
+ from anomalylab.utils import pp
16
15
  from anomalylab.visualization import FormatExcel
17
16
 
18
17
  __all__: list[str] = [
@@ -1,8 +1,12 @@
1
1
  from __future__ import annotations
2
2
 
3
- from importlib import resources
3
+ from dataclasses import dataclass, field
4
+ from importlib import resources # noqa: F401
5
+ from typing import Literal, Optional, Union
6
+
7
+ import pandas as pd
8
+ from pandas import DataFrame
4
9
 
5
- from anomalylab.config import *
6
10
  from anomalylab.empirical import (
7
11
  Correlation,
8
12
  FamaMacBethRegression,
@@ -12,8 +16,7 @@ from anomalylab.empirical import (
12
16
  )
13
17
  from anomalylab.preprocess import FillNa, Normalize, OutlierHandler, Shift
14
18
  from anomalylab.structure import PanelData, TimeSeries
15
- from anomalylab.utils import *
16
- from anomalylab.utils.imports import *
19
+ from anomalylab.utils import Columns, Scalar, pp
17
20
  from anomalylab.visualization import FormatExcel
18
21
 
19
22
 
@@ -126,8 +129,7 @@ class Panel:
126
129
  return self._fm_preprocessor
127
130
 
128
131
  def format_preprocessor(self, path: str) -> FormatExcel:
129
- if self._format_preprocessor is None:
130
- self._format_preprocessor = FormatExcel(path=path)
132
+ self._format_preprocessor = FormatExcel(path=path)
131
133
  return self._format_preprocessor
132
134
 
133
135
  def normalize(
@@ -137,6 +139,7 @@ class Panel:
137
139
  group_columns: Columns = None,
138
140
  no_process_columns: Columns = None,
139
141
  process_all_characteristics: bool = True,
142
+ fillna_zero_after_norm: bool = False,
140
143
  ) -> Panel:
141
144
  """
142
145
  Normalizes specified columns of the DataFrame using the chosen method.
@@ -157,6 +160,8 @@ class Panel:
157
160
  normalization. Defaults to None.
158
161
  process_all_characteristics (bool, optional): Whether to process all
159
162
  characteristics or not. Defaults to True.
163
+ fillna_zero_after_norm (bool): If True, fills NaN values with zero after normalization.
164
+ Defaults to False.
160
165
 
161
166
  Returns:
162
167
  Normalize: The instance of the Normalize class with updated state.
@@ -171,6 +176,7 @@ class Panel:
171
176
  group_columns=group_columns,
172
177
  no_process_columns=no_process_columns,
173
178
  process_all_characteristics=process_all_characteristics,
179
+ fillna_zero_after_norm=fillna_zero_after_norm,
174
180
  ).panel_data
175
181
  return self
176
182
 
@@ -482,7 +488,7 @@ class Panel:
482
488
  groups: Union[int, list[int]],
483
489
  sort_type: Literal["independent", "dependent"] = "independent",
484
490
  inplace: bool = False,
485
- ) -> tuple:
491
+ ) -> Optional[pd.DataFrame]:
486
492
  """Group variables into portfolios based on specified groups.
487
493
 
488
494
  This method creates portfolios for the specified variables in the panel data.
@@ -526,6 +532,7 @@ class Panel:
526
532
  decimal: Optional[int] = None,
527
533
  factor_return: bool = False,
528
534
  already_grouped: bool = False,
535
+ is_endog_return: bool = True,
529
536
  ) -> tuple:
530
537
  """Perform univariate analysis on the specified core variable.
531
538
 
@@ -543,6 +550,7 @@ class Panel:
543
550
  factor_return (bool): Whether to output factor returns in the analysis. Defaults to False.
544
551
  already_grouped (bool): If True, skips the grouping step assuming data has been pre-grouped.
545
552
  Defaults to False.
553
+ is_endog_return (bool): Whether the dependent variable is a return. Defaults to True.
546
554
 
547
555
  Returns:
548
556
  tuple: A tuple containing the equal-weighted and value-weighted results DataFrames.
@@ -556,6 +564,7 @@ class Panel:
556
564
  decimal=decimal,
557
565
  factor_return=factor_return,
558
566
  already_grouped=already_grouped,
567
+ is_endog_return=is_endog_return,
559
568
  )
560
569
 
561
570
  def bivariate_analysis(
@@ -574,6 +583,7 @@ class Panel:
574
583
  decimal: Optional[int] = None,
575
584
  factor_return: bool = False,
576
585
  already_grouped: bool = False,
586
+ is_endog_return: bool = True,
577
587
  ) -> tuple:
578
588
  """Perform bivariate analysis on two specified variables.
579
589
 
@@ -595,6 +605,7 @@ class Panel:
595
605
  factor_return (bool): Whether to output factor returns in the analysis. Defaults to False.
596
606
  already_grouped (bool): If True, skips the grouping step assuming data has been pre-grouped.
597
607
  Defaults to False.
608
+ is_endog_return (bool): Whether the dependent variable is a return. Defaults to True.
598
609
 
599
610
  Returns:
600
611
  tuple: A tuple containing the equal-weighted and value-weighted results DataFrames.
@@ -612,6 +623,7 @@ class Panel:
612
623
  decimal=decimal,
613
624
  factor_return=factor_return,
614
625
  already_grouped=already_grouped,
626
+ is_endog_return=is_endog_return,
615
627
  )
616
628
 
617
629
  def fm_reg(
@@ -731,6 +743,10 @@ if __name__ == "__main__":
731
743
  # no_process_columns="MktCap",
732
744
  # process_all_characteristics=True,
733
745
  )
746
+
747
+ panel.winsorize(method="winsorize", group_columns="date")
748
+ pp(panel)
749
+
734
750
  # panel.normalize(
735
751
  # # columns="MktCap",
736
752
  # method="zscore",
@@ -740,9 +756,6 @@ if __name__ == "__main__":
740
756
  # )
741
757
  # panel.shift(periods=1, drop_original=False)
742
758
 
743
- panel.winsorize(method="winsorize", group_columns="date")
744
- pp(panel)
745
-
746
759
  # summary = panel.summary()
747
760
  # pp(summary)
748
761
 
@@ -1,7 +1,9 @@
1
1
  from importlib import resources
2
2
 
3
- from anomalylab.utils import *
4
- from anomalylab.utils.imports import *
3
+ import pandas as pd
4
+ from pandas import DataFrame
5
+
6
+ from anomalylab.utils import pp
5
7
 
6
8
 
7
9
  class DataSet:
@@ -1,8 +1,12 @@
1
- from anomalylab.config import *
1
+ from dataclasses import dataclass
2
+ from typing import Optional
3
+
4
+ import numpy as np
5
+ from pandas import DataFrame
6
+
2
7
  from anomalylab.empirical.empirical import Empirical
3
8
  from anomalylab.structure import PanelData
4
- from anomalylab.utils.imports import *
5
- from anomalylab.utils.utils import *
9
+ from anomalylab.utils import Columns, columns_to_list, pp, round_to_string
6
10
 
7
11
 
8
12
  @dataclass
@@ -1,7 +1,6 @@
1
- from anomalylab.config import *
1
+ from dataclasses import dataclass
2
+
2
3
  from anomalylab.preprocess.preprocessor import Preprocessor
3
- from anomalylab.utils.imports import *
4
- from anomalylab.utils.utils import *
5
4
 
6
5
 
7
6
  @dataclass
@@ -1,9 +1,27 @@
1
- from anomalylab.config import *
1
+ import math
2
+ import warnings
3
+ from dataclasses import dataclass
4
+ from functools import partial
5
+ from typing import Literal, Optional
6
+
7
+ import numpy as np
8
+ import pandas as pd
9
+ import statsmodels.api as sm
10
+ from linearmodels import FamaMacBeth
11
+ from pandas import DataFrame, Series
12
+
2
13
  from anomalylab.empirical.empirical import Empirical
3
14
  from anomalylab.preprocess import OutlierHandler
4
15
  from anomalylab.structure import PanelData
5
- from anomalylab.utils.imports import *
6
- from anomalylab.utils.utils import *
16
+ from anomalylab.utils import (
17
+ RegModel,
18
+ RegModels,
19
+ RegResult,
20
+ columns_to_list,
21
+ get_significance_star,
22
+ pp,
23
+ round_to_string,
24
+ )
7
25
 
8
26
 
9
27
  @dataclass
@@ -76,12 +94,14 @@ class FamaMacBethRegression(Empirical):
76
94
  raise ValueError(
77
95
  "When calculating the value-weighted industry return, the weight column must be specified!"
78
96
  )
79
- func = lambda x: np.average(
80
- x, weights=self.panel_data.df.loc[x.index, weight]
81
- )
97
+
98
+ def func(x):
99
+ return np.average(
100
+ x, weights=self.panel_data.df.loc[x.index, weight]
101
+ )
82
102
  else:
83
103
  raise ValueError(
84
- f"industry_weighed_method must be one of ['value', 'equal']"
104
+ "industry_weighed_method must be one of ['value', 'equal']"
85
105
  )
86
106
  self.panel_data.df[endog] -= self.panel_data.df.groupby(
87
107
  by=[self.time, industry]
@@ -1,11 +1,17 @@
1
- from importlib import resources
1
+ from importlib import resources # noqa: F401
2
+ from dataclasses import dataclass
3
+ from typing import Optional
4
+
5
+ import matplotlib.pyplot as plt
6
+ import numpy as np
7
+ import pandas as pd
8
+ import seaborn as sns
9
+ from pandas import DataFrame
2
10
 
3
- from anomalylab.config import *
4
11
  from anomalylab.empirical.empirical import Empirical
5
12
  from anomalylab.preprocess.shift import Shift
6
13
  from anomalylab.structure import PanelData
7
- from anomalylab.utils.imports import *
8
- from anomalylab.utils.utils import *
14
+ from anomalylab.utils import Columns, columns_to_list, pp, round_to_string
9
15
 
10
16
 
11
17
  @dataclass
@@ -72,7 +78,6 @@ class Persistence(Empirical):
72
78
  for var in columns:
73
79
  all_monthly_corrs = []
74
80
  for lag in periods:
75
-
76
81
  # Store monthly correlations
77
82
  monthly_corrs = []
78
83
 
@@ -1,10 +1,16 @@
1
- from pandas import DataFrame
1
+ import math
2
+ import warnings
3
+ from dataclasses import dataclass
4
+ from typing import Literal, Optional, Union
5
+
6
+ import numpy as np
7
+ import pandas as pd
8
+ import statsmodels.api as sm
9
+ from pandas import DataFrame, Series
2
10
 
3
- from anomalylab.config import *
4
11
  from anomalylab.empirical.empirical import Empirical
5
12
  from anomalylab.structure import PanelData, TimeSeries
6
- from anomalylab.utils.imports import *
7
- from anomalylab.utils.utils import *
13
+ from anomalylab.utils import pp, round_to_string
8
14
 
9
15
  warnings.simplefilter(action="ignore", category=FutureWarning)
10
16
 
@@ -116,7 +122,7 @@ class PortfolioAnalysis(Empirical):
116
122
  group_col = [self.time]
117
123
  for i, var in enumerate(vars):
118
124
  if sort_type == "dependent" and i > 0:
119
- group_col.append(f"{vars[i-1]}_g{groups[i-1]}")
125
+ group_col.append(f"{vars[i - 1]}_g{groups[i - 1]}")
120
126
  out_df[f"{var}_g{groups[i]}"] = (
121
127
  out_df.groupby(group_col, observed=False)[var]
122
128
  .apply(
@@ -127,7 +133,7 @@ class PortfolioAnalysis(Empirical):
127
133
  )
128
134
  )
129
135
  .reset_index()
130
- .set_index(f"level_{i+1}")
136
+ .set_index(f"level_{i + 1}")
131
137
  .drop(group_col, axis=1)
132
138
  )
133
139
  else:
@@ -142,7 +148,7 @@ class PortfolioAnalysis(Empirical):
142
148
  )
143
149
  )
144
150
  .reset_index()
145
- .set_index(f"level_{1}")
151
+ .set_index("level_1")
146
152
  .drop(self.time, axis=1)
147
153
  )
148
154
 
@@ -157,24 +163,33 @@ class PortfolioAnalysis(Empirical):
157
163
  else:
158
164
  return out_df
159
165
 
160
- def _claculate_value(self, df: DataFrame, decimal: Optional[int] = None) -> dict:
166
+ def _claculate_value(
167
+ self, df: DataFrame, decimal: Optional[int] = None, is_endog_return: bool = True
168
+ ) -> dict:
161
169
  """Calculate various portfolio performance metrics.
162
170
 
163
171
  This method computes mean returns, t-values, Sharpe ratios, and model-adjusted alpha and t values.
164
172
 
165
173
  Args:
166
174
  df (DataFrame): The DataFrame containing the relevant data for calculations.
175
+ decimal (Optional[int]): The number of decimal places for formatting. Defaults to None.
176
+ is_endog_return (bool): Whether the dependent variable is a return. Defaults to True.
167
177
 
168
178
  Returns:
169
179
  dict: A dictionary containing computed metrics.
170
180
  """
171
- stat_dict = self._calculate_mean_and_t_value(df)
172
- factors_dict = self._calculate_alpha_and_t_value(df)
173
- sharpe_dict = self._calculate_sharpe(df, decimal)
181
+ stat_dict = self._calculate_mean_and_t_value(df, is_endog_return)
174
182
 
175
- return {**stat_dict, **factors_dict, **sharpe_dict}
183
+ if is_endog_return:
184
+ factors_dict = self._calculate_alpha_and_t_value(df)
185
+ sharpe_dict = self._calculate_sharpe(df, decimal)
186
+ return {**stat_dict, **factors_dict, **sharpe_dict}
187
+
188
+ return stat_dict
176
189
 
177
- def _calculate_mean_and_t_value(self, df: DataFrame) -> dict:
190
+ def _calculate_mean_and_t_value(
191
+ self, df: DataFrame, is_endog_return: bool = True
192
+ ) -> dict:
178
193
  """Calculate mean and t-value for the dependent variable.
179
194
 
180
195
  This method computes the mean return and its t-value assuming the null hypothesis
@@ -182,6 +197,7 @@ class PortfolioAnalysis(Empirical):
182
197
 
183
198
  Args:
184
199
  df (DataFrame): The DataFrame containing the relevant data for calculations.
200
+ is_endog_return (bool): Whether the dependent variable is a return. Defaults to True.
185
201
 
186
202
  Returns:
187
203
  dict: A dictionary with mean, t-value, and p-value.
@@ -199,7 +215,9 @@ class PortfolioAnalysis(Empirical):
199
215
  mean_value = reg.params[0]
200
216
  t_value = reg.tvalues[0]
201
217
  p_value = reg.pvalues[0]
202
- stat_dict["Return"] = mean_value
218
+
219
+ key_name = "Return" if is_endog_return else self.endog
220
+ stat_dict[key_name] = mean_value
203
221
  stat_dict["t"] = t_value
204
222
  stat_dict["p"] = p_value
205
223
 
@@ -284,6 +302,7 @@ class PortfolioAnalysis(Empirical):
284
302
  decimal: Optional[int] = None,
285
303
  factor_return: bool = False,
286
304
  already_grouped: bool = False,
305
+ is_endog_return: bool = True,
287
306
  ) -> tuple:
288
307
  """Perform univariate analysis on the specified core variable.
289
308
 
@@ -299,6 +318,7 @@ class PortfolioAnalysis(Empirical):
299
318
  factor_return (bool): Whether to output factor returns in the analysis. Defaults to False.
300
319
  already_grouped (bool): If True, skips the grouping step assuming data has been pre-grouped.
301
320
  Defaults to False.
321
+ is_endog_return (bool): Whether the dependent variable is a return. Defaults to True.
302
322
 
303
323
  Returns:
304
324
  tuple: A tuple containing the equal-weighted and value-weighted results DataFrames.
@@ -399,10 +419,20 @@ class PortfolioAnalysis(Empirical):
399
419
  results = {}
400
420
 
401
421
  for key, sr in time_series_dict.items():
402
- results[key] = self._claculate_value(sr, decimal=decimal)
422
+ results[key] = self._claculate_value(
423
+ sr, decimal=decimal, is_endog_return=is_endog_return
424
+ )
403
425
 
426
+ key_name = "Return" if is_endog_return else self.endog
404
427
  data = []
405
428
  for key, values in results.items():
429
+ if key_name == core_var:
430
+ if key_name in values:
431
+ val = values.pop(key_name)
432
+ new_values = {f"{key_name}_val": val}
433
+ new_values.update(values)
434
+ values = new_values
435
+
406
436
  values[core_var] = key
407
437
  data.append(values)
408
438
 
@@ -429,7 +459,7 @@ class PortfolioAnalysis(Empirical):
429
459
  combined_results.iloc[:, i : i + 3] = subset
430
460
 
431
461
  combined_results = combined_results.loc[
432
- :, ~combined_results.columns.str.endswith("p")
462
+ :, ~combined_results.columns.str.match(r"(^p$|.*-p$)")
433
463
  ]
434
464
 
435
465
  return combined_results
@@ -451,6 +481,7 @@ class PortfolioAnalysis(Empirical):
451
481
  decimal: Optional[int] = None,
452
482
  factor_return: bool = False,
453
483
  already_grouped: bool = False,
484
+ is_endog_return: bool = True,
454
485
  ) -> tuple:
455
486
  """Perform bivariate analysis on two specified variables.
456
487
 
@@ -470,6 +501,7 @@ class PortfolioAnalysis(Empirical):
470
501
  factor_return (bool): Whether to output factor returns in the analysis. Defaults to False.
471
502
  already_grouped (bool): If True, skips the grouping step assuming data has been pre-grouped.
472
503
  Defaults to False.
504
+ is_endog_return (bool): Whether the dependent variable is a return. Defaults to True.
473
505
 
474
506
  Returns:
475
507
  tuple: A tuple containing the equal-weighted and value-weighted results DataFrames.
@@ -544,15 +576,21 @@ class PortfolioAnalysis(Empirical):
544
576
 
545
577
  return pd.concat([group, sort_diff, sort_avg])
546
578
 
579
+ # Handle potential name collision if endog is same as sort_var or core_var
580
+ value_col = self.endog
581
+ if value_col in [sort_var, core_var]:
582
+ value_col = f"{self.endog}_val"
583
+
584
+ ew_ret_d.name = value_col
547
585
  ew_ret_d = ew_ret_d.reset_index()
548
586
  ew_ret_d = ew_ret_d.pivot(
549
- index=[self.time, sort_var], columns=core_var, values=self.endog
587
+ index=[self.time, sort_var], columns=core_var, values=value_col
550
588
  )
551
589
 
552
- vw_ret_d.name = self.endog
590
+ vw_ret_d.name = value_col
553
591
  vw_ret_d = vw_ret_d.reset_index()
554
592
  vw_ret_d = vw_ret_d.pivot(
555
- index=[self.time, sort_var], columns=core_var, values=self.endog
593
+ index=[self.time, sort_var], columns=core_var, values=value_col
556
594
  )
557
595
 
558
596
  ew_ret_d = (
@@ -616,11 +654,21 @@ class PortfolioAnalysis(Empirical):
616
654
  results = {}
617
655
 
618
656
  for key, series in time_series_dict.items():
619
- value_dict = self._claculate_value(series, decimal=decimal)
657
+ value_dict = self._claculate_value(
658
+ series, decimal=decimal, is_endog_return=is_endog_return
659
+ )
620
660
  results[key] = value_dict
621
661
 
662
+ key_name = "Return" if is_endog_return else self.endog
622
663
  data = []
623
664
  for key, values in results.items():
665
+ if key_name in [sort_var, core_var]:
666
+ if key_name in values:
667
+ val = values.pop(key_name)
668
+ new_values = {f"{key_name}_val": val}
669
+ new_values.update(values)
670
+ values = new_values
671
+
624
672
  values[sort_var] = key[0]
625
673
  values[core_var] = key[1]
626
674
  data.append(values)
@@ -648,7 +696,7 @@ class PortfolioAnalysis(Empirical):
648
696
  combined_results.iloc[:, i : i + 3] = subset
649
697
 
650
698
  combined_results = combined_results.loc[
651
- :, ~combined_results.columns.str.endswith("p")
699
+ :, ~combined_results.columns.str.match(r"(^p$|.*-p$)")
652
700
  ]
653
701
 
654
702
  def reorder_diff_avg(df: DataFrame) -> DataFrame:
@@ -727,17 +775,22 @@ if __name__ == "__main__":
727
775
 
728
776
  portfolio = PortfolioAnalysis(
729
777
  panel,
730
- endog="return",
778
+ endog="IdioVol",
731
779
  weight="MktCap",
732
- models=Models,
733
- factors_series=time_series,
780
+ # models=Models,
781
+ # factors_series=time_series,
734
782
  )
735
783
 
736
784
  # portfolio.GroupN("Illiq", 10, inplace=True)
737
- portfolio.GroupN(["MktCap", "Illiq"], [3, 5], sort_type="dependent", inplace=True)
785
+ portfolio.GroupN(["MktCap", "Illiq"], [5, 5], sort_type="dependent", inplace=True)
738
786
 
739
787
  uni_ew, uni_vw = portfolio.univariate_analysis(
740
- "Illiq", 10, factor_return=False, already_grouped=False
788
+ "Illiq",
789
+ 5,
790
+ format=True,
791
+ # factor_return=False,
792
+ already_grouped=True,
793
+ is_endog_return=False,
741
794
  )
742
795
  pp(uni_ew)
743
796
  pp(uni_vw)
@@ -745,13 +798,14 @@ if __name__ == "__main__":
745
798
  bi_ew, bi_vw = portfolio.bivariate_analysis(
746
799
  "MktCap",
747
800
  "Illiq",
748
- 3,
801
+ 5,
749
802
  5,
750
803
  False,
751
- False,
804
+ True,
752
805
  "dependent",
753
- factor_return=False,
806
+ # factor_return=False,
754
807
  already_grouped=True,
808
+ is_endog_return=False,
755
809
  )
756
810
  pp(bi_ew)
757
811
  pp(bi_vw)
@@ -1,12 +1,14 @@
1
- from anomalylab.config import *
1
+ from dataclasses import dataclass
2
+ from typing import Optional
3
+
4
+ from pandas import DataFrame, Series
5
+
2
6
  from anomalylab.empirical.empirical import Empirical
3
7
  from anomalylab.structure import PanelData
4
- from anomalylab.utils.imports import *
5
- from anomalylab.utils.utils import *
8
+ from anomalylab.utils import Columns, columns_to_list, pp, round_to_string
6
9
 
7
10
 
8
11
  class Statistics:
9
-
10
12
  @staticmethod
11
13
  def mean(series: Series) -> float:
12
14
  return series.mean() if not series.isna().all() else None
@@ -1,9 +1,14 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import warnings
4
+ from dataclasses import dataclass
5
+ from typing import Literal, Optional, Union
6
+
7
+ from pandas import DataFrame, Series
8
+
3
9
  from anomalylab.preprocess.preprocessor import Preprocessor
4
10
  from anomalylab.structure import PanelData
5
- from anomalylab.utils.imports import *
6
- from anomalylab.utils.utils import *
11
+ from anomalylab.utils import Columns, Scalar, columns_to_list, pp
7
12
 
8
13
 
9
14
  @dataclass
@@ -68,7 +73,6 @@ class FillMethod:
68
73
 
69
74
  @dataclass
70
75
  class FillNa(Preprocessor):
71
-
72
76
  def fill(
73
77
  self,
74
78
  series: Series,
@@ -218,11 +222,11 @@ class FillNa(Preprocessor):
218
222
  warnings.warn(message=f"Missing values not found in {fill_columns}.")
219
223
  if self.panel_data.normalize:
220
224
  warnings.warn(
221
- message=f"The data has already been normalized, and missing values have been filled with 0."
225
+ message="The data has already been normalized, and missing values have been filled with 0."
222
226
  )
223
227
  if self.panel_data.fillna:
224
228
  warnings.warn(
225
- message=f"The missing values have already been handled earlier."
229
+ message="The missing values have already been handled earlier."
226
230
  )
227
231
 
228
232
 
@@ -1,9 +1,15 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import warnings
4
+ from dataclasses import dataclass
5
+ from typing import Literal
6
+
7
+ import numpy as np
8
+ from pandas import DataFrame, Series
9
+
3
10
  from anomalylab.preprocess.preprocessor import Preprocessor
4
11
  from anomalylab.structure import PanelData
5
- from anomalylab.utils.imports import *
6
- from anomalylab.utils.utils import *
12
+ from anomalylab.utils import Columns, columns_to_list, pp
7
13
 
8
14
 
9
15
  class NormalizeMethod:
@@ -53,7 +59,12 @@ class NormalizeMethod:
53
59
  return rescaled_df
54
60
 
55
61
  @classmethod
56
- def call_method(cls, method: str, df: DataFrame) -> DataFrame:
62
+ def call_method(
63
+ cls,
64
+ method: str,
65
+ df: DataFrame | Series,
66
+ fillna_zero_after_norm: bool = False,
67
+ ) -> DataFrame | Series:
57
68
  """
58
69
  Calls a specified normalization method on the input DataFrame.
59
70
 
@@ -66,20 +77,38 @@ class NormalizeMethod:
66
77
  cls: The class that is calling this method (NormalizeMethod).
67
78
  method (str): The name of the method to call ('zscore' or 'rank').
68
79
  df (DataFrame): The input DataFrame to be normalized.
80
+ fillna_zero_after_norm (bool): If True, fills NaN values with zero after normalization.
81
+ Defaults to False.
69
82
 
70
83
  Returns:
71
- DataFrame: The normalized DataFrame after applying the specified method.
84
+ DataFrame: The normalized DataFrame. NaN values are filled with zero
85
+ if `fillna_zero_after_norm=True` is set.
72
86
 
73
87
  Raises:
74
88
  AttributeError: If the specified method does not exist.
75
89
  """
76
- if hasattr(cls, method):
77
- return getattr(cls, method)(df).fillna(value=0)
78
- else:
90
+ if not hasattr(cls, method):
79
91
  raise AttributeError(
80
92
  f"Method '{method}' not found, use 'zscore' or 'rank'."
81
93
  )
82
94
 
95
+ normalized_df = getattr(cls, method)(df)
96
+
97
+ if fillna_zero_after_norm:
98
+ normalized_df = normalized_df.fillna(value=0)
99
+ else:
100
+ if isinstance(df, Series):
101
+ if df.isna().all():
102
+ warnings.warn(f"Column {df.name} contains only missing values.")
103
+ else:
104
+ all_nan_cols = df.columns[df.isna().all()].tolist()
105
+ if all_nan_cols:
106
+ warnings.warn(
107
+ f"Columns {all_nan_cols} contain only missing values."
108
+ )
109
+
110
+ return normalized_df
111
+
83
112
 
84
113
  @dataclass
85
114
  class Normalize(Preprocessor):
@@ -101,6 +130,7 @@ class Normalize(Preprocessor):
101
130
  group_columns: Columns = None,
102
131
  no_process_columns: Columns = None,
103
132
  process_all_characteristics: bool = True,
133
+ fillna_zero_after_norm: bool = False,
104
134
  ) -> Normalize:
105
135
  """
106
136
  Normalizes specified columns of the DataFrame using the chosen method.
@@ -121,6 +151,8 @@ class Normalize(Preprocessor):
121
151
  normalization. Defaults to None.
122
152
  process_all_characteristics (bool, optional): Whether to process all
123
153
  characteristics or not. Defaults to True.
154
+ fillna_zero_after_norm (bool): If True, fills NaN values with zero after normalization.
155
+ Defaults to False.
124
156
 
125
157
  Returns:
126
158
  Normalize: The instance of the Normalize class with updated state.
@@ -144,7 +176,9 @@ class Normalize(Preprocessor):
144
176
  # Normalize the selected columns
145
177
  self.panel_data.transform(
146
178
  columns=columns,
147
- func=lambda df: NormalizeMethod.call_method(method=method, df=df),
179
+ func=lambda df: NormalizeMethod.call_method(
180
+ method=method, df=df, fillna_zero_after_norm=fillna_zero_after_norm
181
+ ),
148
182
  group_columns=group_columns,
149
183
  )
150
184
 
@@ -1,10 +1,16 @@
1
1
  from __future__ import annotations
2
2
 
3
+ from dataclasses import dataclass
4
+ from typing import Literal, Optional
5
+
6
+ import numpy as np
7
+ from pandas import DataFrame, Series
8
+ from scipy.stats.mstats import winsorize as winsorization
9
+
3
10
  from anomalylab.preprocess.preprocessor import Preprocessor
4
11
  from anomalylab.preprocess.truncate import truncate as truncation
5
12
  from anomalylab.structure import PanelData
6
- from anomalylab.utils.imports import *
7
- from anomalylab.utils.utils import *
13
+ from anomalylab.utils import Columns, columns_to_list, pp
8
14
 
9
15
 
10
16
  class OutlierMethod:
@@ -1,8 +1,9 @@
1
1
  from __future__ import annotations
2
2
 
3
- from anomalylab.structure import PanelData, TimeSeries
4
- from anomalylab.utils.imports import *
5
- from anomalylab.utils.utils import *
3
+ from abc import ABC
4
+ from dataclasses import dataclass
5
+
6
+ from anomalylab.structure import PanelData
6
7
 
7
8
 
8
9
  @dataclass
@@ -1,9 +1,13 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import warnings
4
+ from dataclasses import dataclass
5
+
6
+ from pandas import DataFrame
7
+
3
8
  from anomalylab.preprocess.preprocessor import Preprocessor
4
9
  from anomalylab.structure import PanelData
5
- from anomalylab.utils.imports import *
6
- from anomalylab.utils.utils import *
10
+ from anomalylab.utils import Columns, columns_to_list, pp
7
11
 
8
12
 
9
13
  @dataclass
@@ -1,11 +1,9 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import numpy as np
3
4
  import numpy.ma as ma
4
5
  from scipy._lib._util import _contains_nan
5
6
 
6
- from anomalylab.utils.imports import *
7
- from anomalylab.utils.utils import *
8
-
9
7
 
10
8
  def truncate(
11
9
  a,
@@ -113,9 +111,7 @@ def truncate(
113
111
  shp = a.shape # Store the shape of the array
114
112
  return _truncate1D(
115
113
  a.ravel(), lolim, uplim, loinc, upinc, contains_nan, nan_policy
116
- ).reshape(
117
- shp
118
- ) # Truncate and reshape the array back to its original shape
114
+ ).reshape(shp) # Truncate and reshape the array back to its original shape
119
115
  else:
120
116
  return ma.apply_along_axis(
121
117
  _truncate1D, axis, a, lolim, uplim, loinc, upinc, contains_nan, nan_policy
@@ -1,5 +1,10 @@
1
- from anomalylab.utils.imports import *
2
- from anomalylab.utils.utils import *
1
+ import copy
2
+ from abc import ABC, abstractmethod
3
+ from dataclasses import dataclass
4
+ from typing import Optional
5
+
6
+ from pandas import DataFrame
7
+ from typing_extensions import Self
3
8
 
4
9
 
5
10
  @dataclass
@@ -1,6 +1,13 @@
1
+ import copy
2
+ import warnings
3
+ from dataclasses import dataclass
4
+ from typing import Callable, Literal, Optional
5
+
6
+ import pandas as pd
7
+ from pandas import DataFrame
8
+
1
9
  from anomalylab.structure.data import Data
2
- from anomalylab.utils.imports import *
3
- from anomalylab.utils.utils import *
10
+ from anomalylab.utils import Columns, columns_to_list, pp
4
11
 
5
12
 
6
13
  @dataclass
@@ -58,8 +65,6 @@ class PanelData(Data):
58
65
 
59
66
  This method identifies remaining columns as firm characteristics, excluding classifications.
60
67
  """
61
- if self.is_copy:
62
- self.df = copy.deepcopy(self.df)
63
68
  self.df[self.id] = self.df[self.id].astype(int)
64
69
  if not isinstance(self.df[self.time].dtype, pd.PeriodDtype):
65
70
  self.df[self.time] = pd.to_datetime(self.df[self.time], format="ISO8601")
@@ -116,6 +121,9 @@ class PanelData(Data):
116
121
  ValueError: If any required columns are missing from the DataFrame.
117
122
  ValueError: If there are no firm characteristics remaining after checking.
118
123
  """
124
+ if self.is_copy:
125
+ self.df = copy.deepcopy(self.df)
126
+
119
127
  # Check for duplicate column names
120
128
  duplicated_columns = self.df.columns[self.df.columns.duplicated()].tolist()
121
129
  if duplicated_columns:
@@ -1,6 +1,12 @@
1
+ import copy
2
+ from dataclasses import dataclass, field
3
+ from typing import Literal
4
+
5
+ import pandas as pd
6
+ from pandas import DataFrame
7
+
1
8
  from anomalylab.structure.data import Data
2
- from anomalylab.utils import *
3
- from anomalylab.utils.imports import *
9
+ from anomalylab.utils import pp
4
10
 
5
11
 
6
12
  @dataclass
@@ -35,8 +41,6 @@ class TimeSeries(Data):
35
41
 
36
42
  This method renames the time column to a standardized name and identifies remaining columns as factors.
37
43
  """
38
- if self.is_copy:
39
- self.df = copy.deepcopy(self.df)
40
44
  if not isinstance(self.df[self.time].dtype, pd.PeriodDtype):
41
45
  self.df[self.time] = pd.to_datetime(self.df[self.time], format="ISO8601")
42
46
  self.df[self.time] = self.df[self.time].dt.to_period(freq=self.frequency)
@@ -53,6 +57,9 @@ class TimeSeries(Data):
53
57
  ValueError: If the time column is missing from the DataFrame.
54
58
  ValueError: If there are no additional columns for factor returns.
55
59
  """
60
+ if self.is_copy:
61
+ self.df = copy.deepcopy(self.df)
62
+
56
63
  # Check for duplicate column names
57
64
  duplicated_columns = self.df.columns[self.df.columns.duplicated()].tolist()
58
65
  if duplicated_columns:
@@ -0,0 +1,25 @@
1
+ from anomalylab.utils.utils import (
2
+ Columns,
3
+ Info,
4
+ RegModel,
5
+ RegModels,
6
+ RegResult,
7
+ Scalar,
8
+ columns_to_list,
9
+ get_significance_star,
10
+ pp,
11
+ round_to_string,
12
+ )
13
+
14
+ __all__: list[str] = [
15
+ "Scalar",
16
+ "Columns",
17
+ "Info",
18
+ "RegModel",
19
+ "RegModels",
20
+ "RegResult",
21
+ "columns_to_list",
22
+ "round_to_string",
23
+ "get_significance_star",
24
+ "pp",
25
+ ]
@@ -1,4 +1,11 @@
1
- from anomalylab.utils.imports import *
1
+ from dataclasses import dataclass
2
+ from itertools import chain
3
+ from typing import Any, Optional, TypedDict, Union
4
+
5
+ from pandas import Series, Timedelta, Timestamp
6
+ from rich import print
7
+ from rich.panel import Panel as rich_Panel
8
+ from rich.pretty import Pretty
2
9
 
3
10
  Scalar = Union[str, int, float, bool, Timestamp, Timedelta]
4
11
  Columns = Optional[list[str] | str]
@@ -0,0 +1,5 @@
1
+ from anomalylab.visualization.format import FormatExcel
2
+
3
+ __all__: list[str] = [
4
+ "FormatExcel",
5
+ ]
@@ -1,4 +1,9 @@
1
- from anomalylab.utils.imports import *
1
+ import os
2
+ from dataclasses import dataclass
3
+ from glob import glob
4
+
5
+ from openpyxl import load_workbook
6
+ from openpyxl.styles import Alignment, Border, Side
2
7
 
3
8
 
4
9
  @dataclass
@@ -45,7 +50,7 @@ class FormatExcel:
45
50
  - Creates a thick border for the bottom of the first row.
46
51
  """
47
52
  thin = Side(border_style="thin", color="000000")
48
- thick = Side(border_style="thick", color="000000")
53
+ thick = Side(border_style="thick", color="000000") # noqa: F841
49
54
 
50
55
  for ws in self.wb.worksheets:
51
56
  for row in ws.iter_rows():
@@ -104,7 +109,7 @@ class FormatExcel:
104
109
  2 if ord(char) > 127 else 1 for char in str(cell.value)
105
110
  )
106
111
  max_length = max(max_length, cell_length)
107
- except:
112
+ except Exception:
108
113
  pass
109
114
  # Adjust for header row
110
115
  # header_cell = ws[f"{col_letter}1"]
@@ -2,7 +2,7 @@ from setuptools import find_packages, setup
2
2
 
3
3
  setup(
4
4
  name="AnomalyLab",
5
- version="0.4.2",
5
+ version="0.5.0",
6
6
  author="FinPhd",
7
7
  author_email="chenhaiwei@stu.sufe.edu.cn",
8
8
  description="A Python package for empirical asset pricing analysis.",
@@ -1 +0,0 @@
1
- DECIMAL = 2
@@ -1,14 +0,0 @@
1
- from anomalylab.utils.utils import *
2
-
3
- __all__: list[str] = [
4
- "Scalar",
5
- "Columns",
6
- "Info",
7
- "RegModel",
8
- "RegModels",
9
- "RegResult",
10
- "columns_to_list",
11
- "round_to_string",
12
- "get_significance_star",
13
- "pp",
14
- ]
@@ -1,58 +0,0 @@
1
- import copy
2
- import functools
3
- import math
4
- import os
5
- import warnings
6
- from abc import ABC, ABCMeta, abstractmethod
7
- from dataclasses import dataclass, field
8
- from datetime import date, datetime, timedelta, tzinfo
9
- from functools import partial, wraps
10
- from glob import glob
11
- from itertools import chain
12
- from types import SimpleNamespace
13
- from typing import (
14
- Any,
15
- Callable,
16
- ClassVar,
17
- Generic,
18
- Iterable,
19
- Literal,
20
- Optional,
21
- Sequence,
22
- TypedDict,
23
- TypeVar,
24
- Union,
25
- get_type_hints,
26
- )
27
-
28
- import matplotlib.pyplot as plt
29
- import numpy as np
30
- import pandas as pd
31
- import seaborn as sns
32
- import statsmodels.api as sm
33
- import statsmodels.formula.api as smf
34
- from deprecated import deprecated
35
- from linearmodels import FamaMacBeth
36
- from numpy import float32, float64
37
- from numpy.typing import NDArray
38
- from openpyxl import load_workbook
39
- from openpyxl.styles import Alignment, Border, Side
40
- from pandas import (
41
- DataFrame,
42
- DatetimeIndex,
43
- Index,
44
- Interval,
45
- Period,
46
- PeriodIndex,
47
- Series,
48
- Timedelta,
49
- Timestamp,
50
- )
51
- from pandas.arrays import PeriodArray
52
- from rich import print
53
- from rich.panel import Panel as rich_Panel
54
- from rich.pretty import Pretty, pprint
55
- from scipy.stats import kurtosis, skew
56
- from scipy.stats.mstats import winsorize as winsorization
57
- from tqdm import tqdm
58
- from typing_extensions import NotRequired, Required, Self
@@ -1,5 +0,0 @@
1
- from anomalylab.visualization.format import *
2
-
3
- __all__: list[str] = [
4
- "FormatExcel",
5
- ]
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes