AnomalyLab 0.4.0__tar.gz → 0.4.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. {anomalylab-0.4.0 → anomalylab-0.4.2}/AnomalyLab.egg-info/PKG-INFO +1 -1
  2. {anomalylab-0.4.0 → anomalylab-0.4.2}/AnomalyLab.egg-info/SOURCES.txt +0 -1
  3. {anomalylab-0.4.0 → anomalylab-0.4.2}/PKG-INFO +1 -1
  4. {anomalylab-0.4.0 → anomalylab-0.4.2}/anomalylab/core/core.py +42 -31
  5. {anomalylab-0.4.0 → anomalylab-0.4.2}/anomalylab/empirical/fm_regression.py +4 -31
  6. {anomalylab-0.4.0 → anomalylab-0.4.2}/anomalylab/structure/panel_data.py +8 -0
  7. {anomalylab-0.4.0 → anomalylab-0.4.2}/anomalylab/structure/time_series.py +8 -0
  8. {anomalylab-0.4.0 → anomalylab-0.4.2}/setup.py +1 -1
  9. anomalylab-0.4.0/.png +0 -0
  10. {anomalylab-0.4.0 → anomalylab-0.4.2}/.gitattributes +0 -0
  11. {anomalylab-0.4.0 → anomalylab-0.4.2}/.github/workflows/python-publish.yml +0 -0
  12. {anomalylab-0.4.0 → anomalylab-0.4.2}/.gitignore +0 -0
  13. {anomalylab-0.4.0 → anomalylab-0.4.2}/AnomalyLab.egg-info/dependency_links.txt +0 -0
  14. {anomalylab-0.4.0 → anomalylab-0.4.2}/AnomalyLab.egg-info/requires.txt +0 -0
  15. {anomalylab-0.4.0 → anomalylab-0.4.2}/AnomalyLab.egg-info/top_level.txt +0 -0
  16. {anomalylab-0.4.0 → anomalylab-0.4.2}/LICENSE +0 -0
  17. {anomalylab-0.4.0 → anomalylab-0.4.2}/MANIFEST.in +0 -0
  18. {anomalylab-0.4.0 → anomalylab-0.4.2}/README.md +0 -0
  19. {anomalylab-0.4.0 → anomalylab-0.4.2}/anomalylab/__init__.py +0 -0
  20. {anomalylab-0.4.0 → anomalylab-0.4.2}/anomalylab/config.py +0 -0
  21. {anomalylab-0.4.0 → anomalylab-0.4.2}/anomalylab/core/__init__.py +0 -0
  22. {anomalylab-0.4.0 → anomalylab-0.4.2}/anomalylab/datasets/__init__.py +0 -0
  23. {anomalylab-0.4.0 → anomalylab-0.4.2}/anomalylab/datasets/dataset.py +0 -0
  24. {anomalylab-0.4.0 → anomalylab-0.4.2}/anomalylab/datasets/panel_data.csv +0 -0
  25. {anomalylab-0.4.0 → anomalylab-0.4.2}/anomalylab/datasets/time_series_data.csv +0 -0
  26. {anomalylab-0.4.0 → anomalylab-0.4.2}/anomalylab/empirical/__init__.py +0 -0
  27. {anomalylab-0.4.0 → anomalylab-0.4.2}/anomalylab/empirical/correlation.py +0 -0
  28. {anomalylab-0.4.0 → anomalylab-0.4.2}/anomalylab/empirical/empirical.py +0 -0
  29. {anomalylab-0.4.0 → anomalylab-0.4.2}/anomalylab/empirical/persistence.py +0 -0
  30. {anomalylab-0.4.0 → anomalylab-0.4.2}/anomalylab/empirical/portfolio.py +0 -0
  31. {anomalylab-0.4.0 → anomalylab-0.4.2}/anomalylab/empirical/summary.py +0 -0
  32. {anomalylab-0.4.0 → anomalylab-0.4.2}/anomalylab/preprocess/__init__.py +0 -0
  33. {anomalylab-0.4.0 → anomalylab-0.4.2}/anomalylab/preprocess/fillna.py +0 -0
  34. {anomalylab-0.4.0 → anomalylab-0.4.2}/anomalylab/preprocess/normalize.py +0 -0
  35. {anomalylab-0.4.0 → anomalylab-0.4.2}/anomalylab/preprocess/outliers.py +0 -0
  36. {anomalylab-0.4.0 → anomalylab-0.4.2}/anomalylab/preprocess/preprocessor.py +0 -0
  37. {anomalylab-0.4.0 → anomalylab-0.4.2}/anomalylab/preprocess/shift.py +0 -0
  38. {anomalylab-0.4.0 → anomalylab-0.4.2}/anomalylab/preprocess/truncate.py +0 -0
  39. {anomalylab-0.4.0 → anomalylab-0.4.2}/anomalylab/structure/__init__.py +0 -0
  40. {anomalylab-0.4.0 → anomalylab-0.4.2}/anomalylab/structure/data.py +0 -0
  41. {anomalylab-0.4.0 → anomalylab-0.4.2}/anomalylab/utils/__init__.py +0 -0
  42. {anomalylab-0.4.0 → anomalylab-0.4.2}/anomalylab/utils/imports.py +0 -0
  43. {anomalylab-0.4.0 → anomalylab-0.4.2}/anomalylab/utils/utils.py +0 -0
  44. {anomalylab-0.4.0 → anomalylab-0.4.2}/anomalylab/visualization/__init__.py +0 -0
  45. {anomalylab-0.4.0 → anomalylab-0.4.2}/anomalylab/visualization/format.py +0 -0
  46. {anomalylab-0.4.0 → anomalylab-0.4.2}/requirements.txt +0 -0
  47. {anomalylab-0.4.0 → anomalylab-0.4.2}/setup.cfg +0 -0
  48. {anomalylab-0.4.0 → anomalylab-0.4.2}/tests/__init__.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: AnomalyLab
3
- Version: 0.4.0
3
+ Version: 0.4.2
4
4
  Summary: A Python package for empirical asset pricing analysis.
5
5
  Author: FinPhd
6
6
  Author-email: chenhaiwei@stu.sufe.edu.cn
@@ -1,6 +1,5 @@
1
1
  .gitattributes
2
2
  .gitignore
3
- .png
4
3
  LICENSE
5
4
  MANIFEST.in
6
5
  README.md
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: AnomalyLab
3
- Version: 0.4.0
3
+ Version: 0.4.2
4
4
  Summary: A Python package for empirical asset pricing analysis.
5
5
  Author: FinPhd
6
6
  Author-email: chenhaiwei@stu.sufe.edu.cn
@@ -499,12 +499,20 @@ class Panel:
499
499
  DataFrame: If inplace=False (default), returns a new DataFrame with grouped variables.
500
500
  None: If inplace=True, modifies the original dataset and returns None.
501
501
  """
502
- return self.portfolio_analysis_processor(endog=endog, weight=weight).GroupN(
503
- vars=vars,
504
- groups=groups,
505
- sort_type=sort_type,
506
- inplace=inplace,
507
- )
502
+ if inplace:
503
+ self.panel_data.df = self.portfolio_analysis_processor(
504
+ endog=endog, weight=weight
505
+ ).GroupN(
506
+ vars=vars,
507
+ groups=groups,
508
+ sort_type=sort_type,
509
+ )
510
+ else:
511
+ return self.portfolio_analysis_processor(endog=endog, weight=weight).GroupN(
512
+ vars=vars,
513
+ groups=groups,
514
+ sort_type=sort_type,
515
+ )
508
516
 
509
517
  def univariate_analysis(
510
518
  self,
@@ -618,7 +626,6 @@ class Panel:
618
626
  industry_weighed_method: Literal["value", "equal"] = "value",
619
627
  is_winsorize: bool = False,
620
628
  is_normalize: bool = False,
621
- dummy_no_norm: Optional[list[str] | str] = None,
622
629
  decimal: Optional[int] = None,
623
630
  return_intermediate: bool = False,
624
631
  ) -> DataFrame:
@@ -639,8 +646,6 @@ class Panel:
639
646
  industry_weighed_method (Literal["value", "equal"]): Method for weighting industries.
640
647
  is_winsorize (bool): Indicates whether to apply winsorization.
641
648
  is_normalize (bool): Indicates whether to normalize exogenous variables.
642
- dummy_no_norm (Optional[list[str] | str]): Name(s) of dummy variables (e.g., 0 or 1)
643
- that should be excluded from normalization.
644
649
  decimal (Optional[int]): Number of decimal places for rounding in output.
645
650
  return_intermediate (bool): If True, returns the intermediate results (e.g., coefficients for each time period).
646
651
 
@@ -658,7 +663,6 @@ class Panel:
658
663
  industry_weighed_method=industry_weighed_method,
659
664
  is_winsorize=is_winsorize,
660
665
  is_normalize=is_normalize,
661
- dummy_no_norm=dummy_no_norm,
662
666
  decimal=decimal,
663
667
  return_intermediate=return_intermediate,
664
668
  )
@@ -739,30 +743,37 @@ if __name__ == "__main__":
739
743
  panel.winsorize(method="winsorize", group_columns="date")
740
744
  pp(panel)
741
745
 
742
- summary = panel.summary()
743
- pp(summary)
744
-
745
- correlation = panel.correlation()
746
- pp(correlation)
747
-
748
- persistence = panel.persistence(periods=[1, 3, 6, 12, 36, 60])
749
- pp(persistence)
750
- pp(
751
- panel.transition_matrix(
752
- var="MktCap",
753
- group=10,
754
- lag=12,
755
- draw=False,
756
- # path=str(resources.files("anomalylab.datasets")) + "/transition_matrix.png",
757
- path="...",
758
- decimal=2,
759
- )
760
- )
746
+ # summary = panel.summary()
747
+ # pp(summary)
748
+
749
+ # correlation = panel.correlation()
750
+ # pp(correlation)
751
+
752
+ # persistence = panel.persistence(periods=[1, 3, 6, 12, 36, 60])
753
+ # pp(persistence)
754
+ # pp(
755
+ # panel.transition_matrix(
756
+ # var="MktCap",
757
+ # group=10,
758
+ # lag=12,
759
+ # draw=False,
760
+ # # path=str(resources.files("anomalylab.datasets")) + "/transition_matrix.png",
761
+ # path="...",
762
+ # decimal=2,
763
+ # )
764
+ # )
761
765
 
762
- group_result = panel.group("return", "MktCap", "Illiq", 10)
766
+ panel.group("return", "MktCap", "Illiq", 10, inplace=True)
763
767
 
764
768
  uni_ew, uni_vw = panel.univariate_analysis(
765
- "return", "MktCap", "Illiq", 10, Models, time_series, factor_return=False
769
+ "return",
770
+ "MktCap",
771
+ "Illiq",
772
+ 10,
773
+ Models,
774
+ time_series,
775
+ factor_return=False,
776
+ already_grouped=True,
766
777
  )
767
778
  pp(uni_ew)
768
779
  pp(uni_vw)
@@ -92,7 +92,6 @@ class FamaMacBethRegression(Empirical):
92
92
  df: DataFrame,
93
93
  reg: RegModel,
94
94
  is_normalize: bool,
95
- dummy_no_norm: list[str] = [],
96
95
  return_intermediate: bool = False,
97
96
  ) -> RegResult:
98
97
  """Performs Fama-MacBeth regression on the provided DataFrame.
@@ -105,9 +104,6 @@ class FamaMacBethRegression(Empirical):
105
104
  df (DataFrame): DataFrame containing the data for regression.
106
105
  reg (RegModel): Model specification containing endogenous and exogenous variables.
107
106
  is_normalize (bool): Indicates whether to normalize the exogenous variables.
108
- dummy_no_norm (list[str]): List of variable names that should be excluded from normalization.
109
- Typically, these are dummy variables that take values like 0 or 1,
110
- where normalization may not make sense.
111
107
  return_intermediate (bool): If True, returns intermediate regression results
112
108
  (e.g., coefficients, t-values, and R²) for each time period.
113
109
 
@@ -121,26 +117,9 @@ class FamaMacBethRegression(Empirical):
121
117
  df = df.groupby(self.time).filter(lambda x: len(x) > 1)
122
118
  lag: int = math.ceil(4 * (df[self.time].nunique() / 100) ** (4 / 25))
123
119
  if is_normalize:
124
- dummy_no_norm = [col for col in dummy_no_norm if col in exogenous]
125
-
126
- variables_to_normalize = [
127
- col for col in exogenous if col not in dummy_no_norm
128
- ]
129
-
130
- for col in variables_to_normalize:
131
- grouped_std = df.groupby(self.time)[col].std()
132
-
133
- if (grouped_std == 0).any():
134
- problematic_group = grouped_std[grouped_std == 0].index.tolist()
135
- raise ValueError(
136
- f"Standard deviation is 0 for variable '{col}' in group(s) {problematic_group}. "
137
- f"Cannot normalize this variable. If '{col}' is a dummy variable, please add it to `dummy_no_norm`."
138
- )
139
-
140
- if variables_to_normalize:
141
- df[variables_to_normalize] = df.groupby(self.time)[
142
- variables_to_normalize
143
- ].transform(func=lambda x: (x - x.mean()) / x.std())
120
+ df[exogenous] = df.groupby(self.time)[exogenous].transform(
121
+ func=lambda x: (x - x.mean()) / x.std()
122
+ )
144
123
 
145
124
  df[self.time] = df[self.time].dt.to_timestamp()
146
125
  df = df.set_index([self.id, self.time])
@@ -172,7 +151,7 @@ class FamaMacBethRegression(Empirical):
172
151
  params=fmb.params,
173
152
  tvalues=fmb.tstats,
174
153
  pvalues=fmb.pvalues,
175
- mean_obs=str(int(fmb.time_info["mean"])),
154
+ mean_obs=str(round(fmb.time_info["mean"])),
176
155
  rsquared=(
177
156
  df.reset_index(level=df.index.names[0], drop=True)
178
157
  .groupby(self.time)
@@ -301,7 +280,6 @@ class FamaMacBethRegression(Empirical):
301
280
  industry_weighed_method: Literal["value", "equal"] = "value",
302
281
  is_winsorize: bool = False,
303
282
  is_normalize: bool = False,
304
- dummy_no_norm: Optional[list[str] | str] = None,
305
283
  decimal: Optional[int] = None,
306
284
  return_intermediate: bool = False, # New parameter to control whether intermediate results are returned
307
285
  ) -> DataFrame:
@@ -322,8 +300,6 @@ class FamaMacBethRegression(Empirical):
322
300
  industry_weighed_method (Literal["value", "equal"]): Method for weighting industries.
323
301
  is_winsorize (bool): Indicates whether to apply winsorization.
324
302
  is_normalize (bool): Indicates whether to normalize exogenous variables.
325
- dummy_no_norm (Optional[list[str] | str]): Name(s) of dummy variables (e.g., 0 or 1)
326
- that should be excluded from normalization.
327
303
  decimal (Optional[int]): Number of decimal places for rounding in output.
328
304
  return_intermediate (bool): If True, returns the intermediate results (e.g., coefficients for each time period).
329
305
 
@@ -334,7 +310,6 @@ class FamaMacBethRegression(Empirical):
334
310
  reg_models: RegModels = self._model_parse(
335
311
  regs=regs, endog=endog, exog=columns_to_list(exog)
336
312
  )
337
- dummy_no_norm = columns_to_list(dummy_no_norm)
338
313
  self._winsorize(is_winsorize=is_winsorize, exog=reg_models.exogenous)
339
314
  self._industry_weighted(
340
315
  endog=reg_models.dependent,
@@ -350,7 +325,6 @@ class FamaMacBethRegression(Empirical):
350
325
  df=self.panel_data.df,
351
326
  reg=model,
352
327
  is_normalize=is_normalize,
353
- dummy_no_norm=dummy_no_norm,
354
328
  return_intermediate=True,
355
329
  )
356
330
  for model in reg_models.models
@@ -366,7 +340,6 @@ class FamaMacBethRegression(Empirical):
366
340
  df=self.panel_data.df,
367
341
  reg=model,
368
342
  is_normalize=is_normalize,
369
- dummy_no_norm=dummy_no_norm,
370
343
  return_intermediate=False,
371
344
  ),
372
345
  decimal=decimal or self.decimal,
@@ -112,9 +112,17 @@ class PanelData(Data):
112
112
  """Check if the required columns are present in the DataFrame.
113
113
 
114
114
  Raises:
115
+ ValueError: If any duplicate column names are found in the DataFrame.
115
116
  ValueError: If any required columns are missing from the DataFrame.
116
117
  ValueError: If there are no firm characteristics remaining after checking.
117
118
  """
119
+ # Check for duplicate column names
120
+ duplicated_columns = self.df.columns[self.df.columns.duplicated()].tolist()
121
+ if duplicated_columns:
122
+ raise ValueError(
123
+ f"Duplicate column names found in the DataFrame: {duplicated_columns}"
124
+ )
125
+
118
126
  if isinstance(self.classifications, str):
119
127
  self.classifications = [self.classifications]
120
128
  # Check if the required columns are present in the DataFrame
@@ -49,9 +49,17 @@ class TimeSeries(Data):
49
49
  Check if the required column is present in the DataFrame and ensure there are additional columns.
50
50
 
51
51
  Raises:
52
+ ValueError: If duplicate column names are found in the DataFrame.
52
53
  ValueError: If the time column is missing from the DataFrame.
53
54
  ValueError: If there are no additional columns for factor returns.
54
55
  """
56
+ # Check for duplicate column names
57
+ duplicated_columns = self.df.columns[self.df.columns.duplicated()].tolist()
58
+ if duplicated_columns:
59
+ raise ValueError(
60
+ f"Duplicate column names found in the DataFrame: {duplicated_columns}"
61
+ )
62
+
55
63
  if self.time not in self.df.columns:
56
64
  raise ValueError(f"Missing column in the DataFrame: {self.time}")
57
65
 
@@ -2,7 +2,7 @@ from setuptools import find_packages, setup
2
2
 
3
3
  setup(
4
4
  name="AnomalyLab",
5
- version="0.4.0",
5
+ version="0.4.2",
6
6
  author="FinPhd",
7
7
  author_email="chenhaiwei@stu.sufe.edu.cn",
8
8
  description="A Python package for empirical asset pricing analysis.",
anomalylab-0.4.0/.png DELETED
Binary file
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes