PyPI - AnomalyLab - Versions diffs - 0.4.2__tar.gz → 0.5.0__tar.gz - Mend

AnomalyLab 0.4.2tar.gz → 0.5.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (49) hide show

{anomalylab-0.4.2 → anomalylab-0.5.0/AnomalyLab.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: AnomalyLab
-Version: 0.4.2
+Version: 0.5.0
 Summary: A Python package for empirical asset pricing analysis.
 Author: FinPhd
 Author-email: chenhaiwei@stu.sufe.edu.cn

{anomalylab-0.4.2 → anomalylab-0.5.0}/AnomalyLab.egg-info/SOURCES.txt RENAMED Viewed

@@ -12,7 +12,6 @@ AnomalyLab.egg-info/dependency_links.txt
 AnomalyLab.egg-info/requires.txt
 AnomalyLab.egg-info/top_level.txt
 anomalylab/__init__.py
-anomalylab/config.py
 anomalylab/core/__init__.py
 anomalylab/core/core.py
 anomalylab/datasets/__init__.py
@@ -38,7 +37,6 @@ anomalylab/structure/data.py
 anomalylab/structure/panel_data.py
 anomalylab/structure/time_series.py
 anomalylab/utils/__init__.py
-anomalylab/utils/imports.py
 anomalylab/utils/utils.py
 anomalylab/visualization/__init__.py
 anomalylab/visualization/format.py

{anomalylab-0.4.2/AnomalyLab.egg-info → anomalylab-0.5.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: AnomalyLab
-Version: 0.4.2
+Version: 0.5.0
 Summary: A Python package for empirical asset pricing analysis.
 Author: FinPhd
 Author-email: chenhaiwei@stu.sufe.edu.cn

{anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/__init__.py RENAMED Viewed

@@ -11,8 +11,7 @@ from anomalylab.empirical import (
 )
 from anomalylab.preprocess import FillNa, Normalize, OutlierHandler, Shift
 from anomalylab.structure import PanelData, TimeSeries
-from anomalylab.utils import *
-from anomalylab.utils.imports import *
+from anomalylab.utils import pp
 from anomalylab.visualization import FormatExcel
 __all__: list[str] = [

{anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/core/core.py RENAMED Viewed

@@ -1,8 +1,12 @@
 from __future__ import annotations
-from importlib import resources
+from dataclasses import dataclass, field
+from importlib import resources  # noqa: F401
+from typing import Literal, Optional, Union
+import pandas as pd
+from pandas import DataFrame
-from anomalylab.config import *
 from anomalylab.empirical import (
     Correlation,
     FamaMacBethRegression,
@@ -12,8 +16,7 @@ from anomalylab.empirical import (
 )
 from anomalylab.preprocess import FillNa, Normalize, OutlierHandler, Shift
 from anomalylab.structure import PanelData, TimeSeries
-from anomalylab.utils import *
-from anomalylab.utils.imports import *
+from anomalylab.utils import Columns, Scalar, pp
 from anomalylab.visualization import FormatExcel
@@ -126,8 +129,7 @@ class Panel:
         return self._fm_preprocessor
     def format_preprocessor(self, path: str) -> FormatExcel:
-        if self._format_preprocessor is None:
-            self._format_preprocessor = FormatExcel(path=path)
+        self._format_preprocessor = FormatExcel(path=path)
         return self._format_preprocessor
     def normalize(
@@ -137,6 +139,7 @@ class Panel:
         group_columns: Columns = None,
         no_process_columns: Columns = None,
         process_all_characteristics: bool = True,
+        fillna_zero_after_norm: bool = False,
     ) -> Panel:
         """
         Normalizes specified columns of the DataFrame using the chosen method.
@@ -157,6 +160,8 @@ class Panel:
                 normalization. Defaults to None.
             process_all_characteristics (bool, optional): Whether to process all
                 characteristics or not. Defaults to True.
+            fillna_zero_after_norm (bool): If True, fills NaN values with zero after normalization.
+                Defaults to False.
         Returns:
             Normalize: The instance of the Normalize class with updated state.
@@ -171,6 +176,7 @@ class Panel:
             group_columns=group_columns,
             no_process_columns=no_process_columns,
             process_all_characteristics=process_all_characteristics,
+            fillna_zero_after_norm=fillna_zero_after_norm,
         ).panel_data
         return self
@@ -482,7 +488,7 @@ class Panel:
         groups: Union[int, list[int]],
         sort_type: Literal["independent", "dependent"] = "independent",
         inplace: bool = False,
-    ) -> tuple:
+    ) -> Optional[pd.DataFrame]:
         """Group variables into portfolios based on specified groups.
         This method creates portfolios for the specified variables in the panel data.
@@ -526,6 +532,7 @@ class Panel:
         decimal: Optional[int] = None,
         factor_return: bool = False,
         already_grouped: bool = False,
+        is_endog_return: bool = True,
     ) -> tuple:
         """Perform univariate analysis on the specified core variable.
@@ -543,6 +550,7 @@ class Panel:
             factor_return (bool): Whether to output factor returns in the analysis. Defaults to False.
             already_grouped (bool): If True, skips the grouping step assuming data has been pre-grouped.
                 Defaults to False.
+            is_endog_return (bool): Whether the dependent variable is a return. Defaults to True.
         Returns:
             tuple: A tuple containing the equal-weighted and value-weighted results DataFrames.
@@ -556,6 +564,7 @@ class Panel:
             decimal=decimal,
             factor_return=factor_return,
             already_grouped=already_grouped,
+            is_endog_return=is_endog_return,
         )
     def bivariate_analysis(
@@ -574,6 +583,7 @@ class Panel:
         decimal: Optional[int] = None,
         factor_return: bool = False,
         already_grouped: bool = False,
+        is_endog_return: bool = True,
     ) -> tuple:
         """Perform bivariate analysis on two specified variables.
@@ -595,6 +605,7 @@ class Panel:
             factor_return (bool): Whether to output factor returns in the analysis. Defaults to False.
             already_grouped (bool): If True, skips the grouping step assuming data has been pre-grouped.
                 Defaults to False.
+            is_endog_return (bool): Whether the dependent variable is a return. Defaults to True.
         Returns:
             tuple: A tuple containing the equal-weighted and value-weighted results DataFrames.
@@ -612,6 +623,7 @@ class Panel:
             decimal=decimal,
             factor_return=factor_return,
             already_grouped=already_grouped,
+            is_endog_return=is_endog_return,
         )
     def fm_reg(
@@ -731,6 +743,10 @@ if __name__ == "__main__":
         # no_process_columns="MktCap",
         # process_all_characteristics=True,
     )
+    panel.winsorize(method="winsorize", group_columns="date")
+    pp(panel)
     # panel.normalize(
     #     # columns="MktCap",
     #     method="zscore",
@@ -740,9 +756,6 @@ if __name__ == "__main__":
     # )
     # panel.shift(periods=1, drop_original=False)
-    panel.winsorize(method="winsorize", group_columns="date")
-    pp(panel)
     # summary = panel.summary()
     # pp(summary)

{anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/datasets/dataset.py RENAMED Viewed

@@ -1,7 +1,9 @@
 from importlib import resources
-from anomalylab.utils import *
-from anomalylab.utils.imports import *
+import pandas as pd
+from pandas import DataFrame
+from anomalylab.utils import pp
 class DataSet:

{anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/empirical/correlation.py RENAMED Viewed

@@ -1,8 +1,12 @@
-from anomalylab.config import *
+from dataclasses import dataclass
+from typing import Optional
+import numpy as np
+from pandas import DataFrame
 from anomalylab.empirical.empirical import Empirical
 from anomalylab.structure import PanelData
-from anomalylab.utils.imports import *
-from anomalylab.utils.utils import *
+from anomalylab.utils import Columns, columns_to_list, pp, round_to_string
 @dataclass

{anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/empirical/empirical.py RENAMED Viewed

@@ -1,7 +1,6 @@
-from anomalylab.config import *
+from dataclasses import dataclass
 from anomalylab.preprocess.preprocessor import Preprocessor
-from anomalylab.utils.imports import *
-from anomalylab.utils.utils import *
 @dataclass

{anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/empirical/fm_regression.py RENAMED Viewed

@@ -1,9 +1,27 @@
-from anomalylab.config import *
+import math
+import warnings
+from dataclasses import dataclass
+from functools import partial
+from typing import Literal, Optional
+import numpy as np
+import pandas as pd
+import statsmodels.api as sm
+from linearmodels import FamaMacBeth
+from pandas import DataFrame, Series
 from anomalylab.empirical.empirical import Empirical
 from anomalylab.preprocess import OutlierHandler
 from anomalylab.structure import PanelData
-from anomalylab.utils.imports import *
-from anomalylab.utils.utils import *
+from anomalylab.utils import (
+    RegModel,
+    RegModels,
+    RegResult,
+    columns_to_list,
+    get_significance_star,
+    pp,
+    round_to_string,
+)
 @dataclass
@@ -76,12 +94,14 @@ class FamaMacBethRegression(Empirical):
                     raise ValueError(
                         "When calculating the value-weighted industry return, the weight column must be specified!"
                     )
-                func = lambda x: np.average(
-                    x, weights=self.panel_data.df.loc[x.index, weight]
-                )
+                def func(x):
+                    return np.average(
+                        x, weights=self.panel_data.df.loc[x.index, weight]
+                    )
             else:
                 raise ValueError(
-                    f"industry_weighed_method must be one of ['value', 'equal']"
+                    "industry_weighed_method must be one of ['value', 'equal']"
                 )
             self.panel_data.df[endog] -= self.panel_data.df.groupby(
                 by=[self.time, industry]

{anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/empirical/persistence.py RENAMED Viewed

@@ -1,11 +1,17 @@
-from importlib import resources
+from importlib import resources  # noqa: F401
+from dataclasses import dataclass
+from typing import Optional
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import seaborn as sns
+from pandas import DataFrame
-from anomalylab.config import *
 from anomalylab.empirical.empirical import Empirical
 from anomalylab.preprocess.shift import Shift
 from anomalylab.structure import PanelData
-from anomalylab.utils.imports import *
-from anomalylab.utils.utils import *
+from anomalylab.utils import Columns, columns_to_list, pp, round_to_string
 @dataclass
@@ -72,7 +78,6 @@ class Persistence(Empirical):
         for var in columns:
             all_monthly_corrs = []
             for lag in periods:
                 # Store monthly correlations
                 monthly_corrs = []

{anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/empirical/portfolio.py RENAMED Viewed

@@ -1,10 +1,16 @@
-from pandas import DataFrame
+import math
+import warnings
+from dataclasses import dataclass
+from typing import Literal, Optional, Union
+import numpy as np
+import pandas as pd
+import statsmodels.api as sm
+from pandas import DataFrame, Series
-from anomalylab.config import *
 from anomalylab.empirical.empirical import Empirical
 from anomalylab.structure import PanelData, TimeSeries
-from anomalylab.utils.imports import *
-from anomalylab.utils.utils import *
+from anomalylab.utils import pp, round_to_string
 warnings.simplefilter(action="ignore", category=FutureWarning)
@@ -116,7 +122,7 @@ class PortfolioAnalysis(Empirical):
         group_col = [self.time]
         for i, var in enumerate(vars):
             if sort_type == "dependent" and i > 0:
-                group_col.append(f"{vars[i-1]}_g{groups[i-1]}")
+                group_col.append(f"{vars[i - 1]}_g{groups[i - 1]}")
                 out_df[f"{var}_g{groups[i]}"] = (
                     out_df.groupby(group_col, observed=False)[var]
                     .apply(
@@ -127,7 +133,7 @@ class PortfolioAnalysis(Empirical):
                         )
                     )
                     .reset_index()
-                    .set_index(f"level_{i+1}")
+                    .set_index(f"level_{i + 1}")
                     .drop(group_col, axis=1)
                 )
             else:
@@ -142,7 +148,7 @@ class PortfolioAnalysis(Empirical):
                         )
                     )
                     .reset_index()
-                    .set_index(f"level_{1}")
+                    .set_index("level_1")
                     .drop(self.time, axis=1)
                 )
@@ -157,24 +163,33 @@ class PortfolioAnalysis(Empirical):
         else:
             return out_df
-    def _claculate_value(self, df: DataFrame, decimal: Optional[int] = None) -> dict:
+    def _claculate_value(
+        self, df: DataFrame, decimal: Optional[int] = None, is_endog_return: bool = True
+    ) -> dict:
         """Calculate various portfolio performance metrics.
         This method computes mean returns, t-values, Sharpe ratios, and model-adjusted alpha and t values.
         Args:
             df (DataFrame): The DataFrame containing the relevant data for calculations.
+            decimal (Optional[int]): The number of decimal places for formatting. Defaults to None.
+            is_endog_return (bool): Whether the dependent variable is a return. Defaults to True.
         Returns:
             dict: A dictionary containing computed metrics.
         """
-        stat_dict = self._calculate_mean_and_t_value(df)
-        factors_dict = self._calculate_alpha_and_t_value(df)
-        sharpe_dict = self._calculate_sharpe(df, decimal)
+        stat_dict = self._calculate_mean_and_t_value(df, is_endog_return)
-        return {**stat_dict, **factors_dict, **sharpe_dict}
+        if is_endog_return:
+            factors_dict = self._calculate_alpha_and_t_value(df)
+            sharpe_dict = self._calculate_sharpe(df, decimal)
+            return {**stat_dict, **factors_dict, **sharpe_dict}
+        return stat_dict
-    def _calculate_mean_and_t_value(self, df: DataFrame) -> dict:
+    def _calculate_mean_and_t_value(
+        self, df: DataFrame, is_endog_return: bool = True
+    ) -> dict:
         """Calculate mean and t-value for the dependent variable.
         This method computes the mean return and its t-value assuming the null hypothesis
@@ -182,6 +197,7 @@ class PortfolioAnalysis(Empirical):
         Args:
             df (DataFrame): The DataFrame containing the relevant data for calculations.
+            is_endog_return (bool): Whether the dependent variable is a return. Defaults to True.
         Returns:
             dict: A dictionary with mean, t-value, and p-value.
@@ -199,7 +215,9 @@ class PortfolioAnalysis(Empirical):
         mean_value = reg.params[0]
         t_value = reg.tvalues[0]
         p_value = reg.pvalues[0]
-        stat_dict["Return"] = mean_value
+        key_name = "Return" if is_endog_return else self.endog
+        stat_dict[key_name] = mean_value
         stat_dict["t"] = t_value
         stat_dict["p"] = p_value
@@ -284,6 +302,7 @@ class PortfolioAnalysis(Empirical):
         decimal: Optional[int] = None,
         factor_return: bool = False,
         already_grouped: bool = False,
+        is_endog_return: bool = True,
     ) -> tuple:
         """Perform univariate analysis on the specified core variable.
@@ -299,6 +318,7 @@ class PortfolioAnalysis(Empirical):
             factor_return (bool): Whether to output factor returns in the analysis. Defaults to False.
             already_grouped (bool): If True, skips the grouping step assuming data has been pre-grouped.
                 Defaults to False.
+            is_endog_return (bool): Whether the dependent variable is a return. Defaults to True.
         Returns:
             tuple: A tuple containing the equal-weighted and value-weighted results DataFrames.
@@ -399,10 +419,20 @@ class PortfolioAnalysis(Empirical):
             results = {}
             for key, sr in time_series_dict.items():
-                results[key] = self._claculate_value(sr, decimal=decimal)
+                results[key] = self._claculate_value(
+                    sr, decimal=decimal, is_endog_return=is_endog_return
+                )
+            key_name = "Return" if is_endog_return else self.endog
             data = []
             for key, values in results.items():
+                if key_name == core_var:
+                    if key_name in values:
+                        val = values.pop(key_name)
+                        new_values = {f"{key_name}_val": val}
+                        new_values.update(values)
+                        values = new_values
                 values[core_var] = key
                 data.append(values)
@@ -429,7 +459,7 @@ class PortfolioAnalysis(Empirical):
                 combined_results.iloc[:, i : i + 3] = subset
             combined_results = combined_results.loc[
-                :, ~combined_results.columns.str.endswith("p")
+                :, ~combined_results.columns.str.match(r"(^p$|.*-p$)")
             ]
             return combined_results
@@ -451,6 +481,7 @@ class PortfolioAnalysis(Empirical):
         decimal: Optional[int] = None,
         factor_return: bool = False,
         already_grouped: bool = False,
+        is_endog_return: bool = True,
     ) -> tuple:
         """Perform bivariate analysis on two specified variables.
@@ -470,6 +501,7 @@ class PortfolioAnalysis(Empirical):
             factor_return (bool): Whether to output factor returns in the analysis. Defaults to False.
             already_grouped (bool): If True, skips the grouping step assuming data has been pre-grouped.
                 Defaults to False.
+            is_endog_return (bool): Whether the dependent variable is a return. Defaults to True.
         Returns:
             tuple: A tuple containing the equal-weighted and value-weighted results DataFrames.
@@ -544,15 +576,21 @@ class PortfolioAnalysis(Empirical):
             return pd.concat([group, sort_diff, sort_avg])
+        # Handle potential name collision if endog is same as sort_var or core_var
+        value_col = self.endog
+        if value_col in [sort_var, core_var]:
+            value_col = f"{self.endog}_val"
+        ew_ret_d.name = value_col
         ew_ret_d = ew_ret_d.reset_index()
         ew_ret_d = ew_ret_d.pivot(
-            index=[self.time, sort_var], columns=core_var, values=self.endog
+            index=[self.time, sort_var], columns=core_var, values=value_col
         )
-        vw_ret_d.name = self.endog
+        vw_ret_d.name = value_col
         vw_ret_d = vw_ret_d.reset_index()
         vw_ret_d = vw_ret_d.pivot(
-            index=[self.time, sort_var], columns=core_var, values=self.endog
+            index=[self.time, sort_var], columns=core_var, values=value_col
         )
         ew_ret_d = (
@@ -616,11 +654,21 @@ class PortfolioAnalysis(Empirical):
             results = {}
             for key, series in time_series_dict.items():
-                value_dict = self._claculate_value(series, decimal=decimal)
+                value_dict = self._claculate_value(
+                    series, decimal=decimal, is_endog_return=is_endog_return
+                )
                 results[key] = value_dict
+            key_name = "Return" if is_endog_return else self.endog
             data = []
             for key, values in results.items():
+                if key_name in [sort_var, core_var]:
+                    if key_name in values:
+                        val = values.pop(key_name)
+                        new_values = {f"{key_name}_val": val}
+                        new_values.update(values)
+                        values = new_values
                 values[sort_var] = key[0]
                 values[core_var] = key[1]
                 data.append(values)
@@ -648,7 +696,7 @@ class PortfolioAnalysis(Empirical):
                 combined_results.iloc[:, i : i + 3] = subset
             combined_results = combined_results.loc[
-                :, ~combined_results.columns.str.endswith("p")
+                :, ~combined_results.columns.str.match(r"(^p$|.*-p$)")
             ]
             def reorder_diff_avg(df: DataFrame) -> DataFrame:
@@ -727,17 +775,22 @@ if __name__ == "__main__":
     portfolio = PortfolioAnalysis(
         panel,
-        endog="return",
+        endog="IdioVol",
         weight="MktCap",
-        models=Models,
-        factors_series=time_series,
+        # models=Models,
+        # factors_series=time_series,
     )
     # portfolio.GroupN("Illiq", 10, inplace=True)
-    portfolio.GroupN(["MktCap", "Illiq"], [3, 5], sort_type="dependent", inplace=True)
+    portfolio.GroupN(["MktCap", "Illiq"], [5, 5], sort_type="dependent", inplace=True)
     uni_ew, uni_vw = portfolio.univariate_analysis(
-        "Illiq", 10, factor_return=False, already_grouped=False
+        "Illiq",
+        5,
+        format=True,
+        # factor_return=False,
+        already_grouped=True,
+        is_endog_return=False,
     )
     pp(uni_ew)
     pp(uni_vw)
@@ -745,13 +798,14 @@ if __name__ == "__main__":
     bi_ew, bi_vw = portfolio.bivariate_analysis(
         "MktCap",
         "Illiq",
-        3,
+        5,
         5,
         False,
-        False,
+        True,
         "dependent",
-        factor_return=False,
+        # factor_return=False,
         already_grouped=True,
+        is_endog_return=False,
     )
     pp(bi_ew)
     pp(bi_vw)

{anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/empirical/summary.py RENAMED Viewed

@@ -1,12 +1,14 @@
-from anomalylab.config import *
+from dataclasses import dataclass
+from typing import Optional
+from pandas import DataFrame, Series
 from anomalylab.empirical.empirical import Empirical
 from anomalylab.structure import PanelData
-from anomalylab.utils.imports import *
-from anomalylab.utils.utils import *
+from anomalylab.utils import Columns, columns_to_list, pp, round_to_string
 class Statistics:
     @staticmethod
     def mean(series: Series) -> float:
         return series.mean() if not series.isna().all() else None

{anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/preprocess/fillna.py RENAMED Viewed

@@ -1,9 +1,14 @@
 from __future__ import annotations
+import warnings
+from dataclasses import dataclass
+from typing import Literal, Optional, Union
+from pandas import DataFrame, Series
 from anomalylab.preprocess.preprocessor import Preprocessor
 from anomalylab.structure import PanelData
-from anomalylab.utils.imports import *
-from anomalylab.utils.utils import *
+from anomalylab.utils import Columns, Scalar, columns_to_list, pp
 @dataclass
@@ -68,7 +73,6 @@ class FillMethod:
 @dataclass
 class FillNa(Preprocessor):
     def fill(
         self,
         series: Series,
@@ -218,11 +222,11 @@ class FillNa(Preprocessor):
             warnings.warn(message=f"Missing values not found in {fill_columns}.")
         if self.panel_data.normalize:
             warnings.warn(
-                message=f"The data has already been normalized, and missing values have been filled with 0."
+                message="The data has already been normalized, and missing values have been filled with 0."
             )
         if self.panel_data.fillna:
             warnings.warn(
-                message=f"The missing values have already been handled earlier."
+                message="The missing values have already been handled earlier."
             )

{anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/preprocess/normalize.py RENAMED Viewed

@@ -1,9 +1,15 @@
 from __future__ import annotations
+import warnings
+from dataclasses import dataclass
+from typing import Literal
+import numpy as np
+from pandas import DataFrame, Series
 from anomalylab.preprocess.preprocessor import Preprocessor
 from anomalylab.structure import PanelData
-from anomalylab.utils.imports import *
-from anomalylab.utils.utils import *
+from anomalylab.utils import Columns, columns_to_list, pp
 class NormalizeMethod:
@@ -53,7 +59,12 @@ class NormalizeMethod:
         return rescaled_df
     @classmethod
-    def call_method(cls, method: str, df: DataFrame) -> DataFrame:
+    def call_method(
+        cls,
+        method: str,
+        df: DataFrame | Series,
+        fillna_zero_after_norm: bool = False,
+    ) -> DataFrame | Series:
         """
         Calls a specified normalization method on the input DataFrame.
@@ -66,20 +77,38 @@ class NormalizeMethod:
             cls: The class that is calling this method (NormalizeMethod).
             method (str): The name of the method to call ('zscore' or 'rank').
             df (DataFrame): The input DataFrame to be normalized.
+            fillna_zero_after_norm (bool): If True, fills NaN values with zero after normalization.
+                Defaults to False.
         Returns:
-            DataFrame: The normalized DataFrame after applying the specified method.
+            DataFrame: The normalized DataFrame. NaN values are filled with zero
+                if `fillna_zero_after_norm=True` is set.
         Raises:
             AttributeError: If the specified method does not exist.
         """
-        if hasattr(cls, method):
-            return getattr(cls, method)(df).fillna(value=0)
-        else:
+        if not hasattr(cls, method):
             raise AttributeError(
                 f"Method '{method}' not found, use 'zscore' or 'rank'."
             )
+        normalized_df = getattr(cls, method)(df)
+        if fillna_zero_after_norm:
+            normalized_df = normalized_df.fillna(value=0)
+        else:
+            if isinstance(df, Series):
+                if df.isna().all():
+                    warnings.warn(f"Column {df.name} contains only missing values.")
+            else:
+                all_nan_cols = df.columns[df.isna().all()].tolist()
+                if all_nan_cols:
+                    warnings.warn(
+                        f"Columns {all_nan_cols} contain only missing values."
+                    )
+        return normalized_df
 @dataclass
 class Normalize(Preprocessor):
@@ -101,6 +130,7 @@ class Normalize(Preprocessor):
         group_columns: Columns = None,
         no_process_columns: Columns = None,
         process_all_characteristics: bool = True,
+        fillna_zero_after_norm: bool = False,
     ) -> Normalize:
         """
         Normalizes specified columns of the DataFrame using the chosen method.
@@ -121,6 +151,8 @@ class Normalize(Preprocessor):
                 normalization. Defaults to None.
             process_all_characteristics (bool, optional): Whether to process all
                 characteristics or not. Defaults to True.
+            fillna_zero_after_norm (bool): If True, fills NaN values with zero after normalization.
+                Defaults to False.
         Returns:
             Normalize: The instance of the Normalize class with updated state.
@@ -144,7 +176,9 @@ class Normalize(Preprocessor):
         # Normalize the selected columns
         self.panel_data.transform(
             columns=columns,
-            func=lambda df: NormalizeMethod.call_method(method=method, df=df),
+            func=lambda df: NormalizeMethod.call_method(
+                method=method, df=df, fillna_zero_after_norm=fillna_zero_after_norm
+            ),
             group_columns=group_columns,
         )

{anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/preprocess/outliers.py RENAMED Viewed

@@ -1,10 +1,16 @@
 from __future__ import annotations
+from dataclasses import dataclass
+from typing import Literal, Optional
+import numpy as np
+from pandas import DataFrame, Series
+from scipy.stats.mstats import winsorize as winsorization
 from anomalylab.preprocess.preprocessor import Preprocessor
 from anomalylab.preprocess.truncate import truncate as truncation
 from anomalylab.structure import PanelData
-from anomalylab.utils.imports import *
-from anomalylab.utils.utils import *
+from anomalylab.utils import Columns, columns_to_list, pp
 class OutlierMethod:

{anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/preprocess/preprocessor.py RENAMED Viewed

@@ -1,8 +1,9 @@
 from __future__ import annotations
-from anomalylab.structure import PanelData, TimeSeries
-from anomalylab.utils.imports import *
-from anomalylab.utils.utils import *
+from abc import ABC
+from dataclasses import dataclass
+from anomalylab.structure import PanelData
 @dataclass

{anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/preprocess/shift.py RENAMED Viewed

@@ -1,9 +1,13 @@
 from __future__ import annotations
+import warnings
+from dataclasses import dataclass
+from pandas import DataFrame
 from anomalylab.preprocess.preprocessor import Preprocessor
 from anomalylab.structure import PanelData
-from anomalylab.utils.imports import *
-from anomalylab.utils.utils import *
+from anomalylab.utils import Columns, columns_to_list, pp
 @dataclass

{anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/preprocess/truncate.py RENAMED Viewed

@@ -1,11 +1,9 @@
 from __future__ import annotations
+import numpy as np
 import numpy.ma as ma
 from scipy._lib._util import _contains_nan
-from anomalylab.utils.imports import *
-from anomalylab.utils.utils import *
 def truncate(
     a,
@@ -113,9 +111,7 @@ def truncate(
         shp = a.shape  # Store the shape of the array
         return _truncate1D(
             a.ravel(), lolim, uplim, loinc, upinc, contains_nan, nan_policy
-        ).reshape(
-            shp
-        )  # Truncate and reshape the array back to its original shape
+        ).reshape(shp)  # Truncate and reshape the array back to its original shape
     else:
         return ma.apply_along_axis(
             _truncate1D, axis, a, lolim, uplim, loinc, upinc, contains_nan, nan_policy

{anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/structure/data.py RENAMED Viewed

@@ -1,5 +1,10 @@
-from anomalylab.utils.imports import *
-from anomalylab.utils.utils import *
+import copy
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+from typing import Optional
+from pandas import DataFrame
+from typing_extensions import Self
 @dataclass

{anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/structure/panel_data.py RENAMED Viewed

@@ -1,6 +1,13 @@
+import copy
+import warnings
+from dataclasses import dataclass
+from typing import Callable, Literal, Optional
+import pandas as pd
+from pandas import DataFrame
 from anomalylab.structure.data import Data
-from anomalylab.utils.imports import *
-from anomalylab.utils.utils import *
+from anomalylab.utils import Columns, columns_to_list, pp
 @dataclass
@@ -58,8 +65,6 @@ class PanelData(Data):
         This method identifies remaining columns as firm characteristics, excluding classifications.
         """
-        if self.is_copy:
-            self.df = copy.deepcopy(self.df)
         self.df[self.id] = self.df[self.id].astype(int)
         if not isinstance(self.df[self.time].dtype, pd.PeriodDtype):
             self.df[self.time] = pd.to_datetime(self.df[self.time], format="ISO8601")
@@ -116,6 +121,9 @@ class PanelData(Data):
             ValueError: If any required columns are missing from the DataFrame.
             ValueError: If there are no firm characteristics remaining after checking.
         """
+        if self.is_copy:
+            self.df = copy.deepcopy(self.df)
         # Check for duplicate column names
         duplicated_columns = self.df.columns[self.df.columns.duplicated()].tolist()
         if duplicated_columns:

{anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/structure/time_series.py RENAMED Viewed

@@ -1,6 +1,12 @@
+import copy
+from dataclasses import dataclass, field
+from typing import Literal
+import pandas as pd
+from pandas import DataFrame
 from anomalylab.structure.data import Data
-from anomalylab.utils import *
-from anomalylab.utils.imports import *
+from anomalylab.utils import pp
 @dataclass
@@ -35,8 +41,6 @@ class TimeSeries(Data):
         This method renames the time column to a standardized name and identifies remaining columns as factors.
         """
-        if self.is_copy:
-            self.df = copy.deepcopy(self.df)
         if not isinstance(self.df[self.time].dtype, pd.PeriodDtype):
             self.df[self.time] = pd.to_datetime(self.df[self.time], format="ISO8601")
             self.df[self.time] = self.df[self.time].dt.to_period(freq=self.frequency)
@@ -53,6 +57,9 @@ class TimeSeries(Data):
             ValueError: If the time column is missing from the DataFrame.
             ValueError: If there are no additional columns for factor returns.
         """
+        if self.is_copy:
+            self.df = copy.deepcopy(self.df)
         # Check for duplicate column names
         duplicated_columns = self.df.columns[self.df.columns.duplicated()].tolist()
         if duplicated_columns:

anomalylab-0.5.0/anomalylab/utils/__init__.py ADDED Viewed

@@ -0,0 +1,25 @@
+from anomalylab.utils.utils import (
+    Columns,
+    Info,
+    RegModel,
+    RegModels,
+    RegResult,
+    Scalar,
+    columns_to_list,
+    get_significance_star,
+    pp,
+    round_to_string,
+)
+__all__: list[str] = [
+    "Scalar",
+    "Columns",
+    "Info",
+    "RegModel",
+    "RegModels",
+    "RegResult",
+    "columns_to_list",
+    "round_to_string",
+    "get_significance_star",
+    "pp",
+]

{anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/utils/utils.py RENAMED Viewed

@@ -1,4 +1,11 @@
-from anomalylab.utils.imports import *
+from dataclasses import dataclass
+from itertools import chain
+from typing import Any, Optional, TypedDict, Union
+from pandas import Series, Timedelta, Timestamp
+from rich import print
+from rich.panel import Panel as rich_Panel
+from rich.pretty import Pretty
 Scalar = Union[str, int, float, bool, Timestamp, Timedelta]
 Columns = Optional[list[str] | str]

anomalylab-0.5.0/anomalylab/visualization/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+from anomalylab.visualization.format import FormatExcel
+__all__: list[str] = [
+    "FormatExcel",
+]

{anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/visualization/format.py RENAMED Viewed

@@ -1,4 +1,9 @@
-from anomalylab.utils.imports import *
+import os
+from dataclasses import dataclass
+from glob import glob
+from openpyxl import load_workbook
+from openpyxl.styles import Alignment, Border, Side
 @dataclass
@@ -45,7 +50,7 @@ class FormatExcel:
         - Creates a thick border for the bottom of the first row.
         """
         thin = Side(border_style="thin", color="000000")
-        thick = Side(border_style="thick", color="000000")
+        thick = Side(border_style="thick", color="000000")  # noqa: F841
         for ws in self.wb.worksheets:
             for row in ws.iter_rows():
@@ -104,7 +109,7 @@ class FormatExcel:
                             2 if ord(char) > 127 else 1 for char in str(cell.value)
                         )
                         max_length = max(max_length, cell_length)
-                    except:
+                    except Exception:
                         pass
                 # Adjust for header row
                 # header_cell = ws[f"{col_letter}1"]

{anomalylab-0.4.2 → anomalylab-0.5.0}/setup.py RENAMED Viewed

@@ -2,7 +2,7 @@ from setuptools import find_packages, setup
 setup(
     name="AnomalyLab",
-    version="0.4.2",
+    version="0.5.0",
     author="FinPhd",
     author_email="chenhaiwei@stu.sufe.edu.cn",
     description="A Python package for empirical asset pricing analysis.",

anomalylab-0.4.2/anomalylab/config.py DELETED Viewed

	@@ -1 +0,0 @@
1	- DECIMAL = 2

anomalylab-0.4.2/anomalylab/utils/__init__.py DELETED Viewed

@@ -1,14 +0,0 @@
-from anomalylab.utils.utils import *
-__all__: list[str] = [
-    "Scalar",
-    "Columns",
-    "Info",
-    "RegModel",
-    "RegModels",
-    "RegResult",
-    "columns_to_list",
-    "round_to_string",
-    "get_significance_star",
-    "pp",
-]

anomalylab-0.4.2/anomalylab/utils/imports.py DELETED Viewed

@@ -1,58 +0,0 @@
-import copy
-import functools
-import math
-import os
-import warnings
-from abc import ABC, ABCMeta, abstractmethod
-from dataclasses import dataclass, field
-from datetime import date, datetime, timedelta, tzinfo
-from functools import partial, wraps
-from glob import glob
-from itertools import chain
-from types import SimpleNamespace
-from typing import (
-    Any,
-    Callable,
-    ClassVar,
-    Generic,
-    Iterable,
-    Literal,
-    Optional,
-    Sequence,
-    TypedDict,
-    TypeVar,
-    Union,
-    get_type_hints,
-)
-import matplotlib.pyplot as plt
-import numpy as np
-import pandas as pd
-import seaborn as sns
-import statsmodels.api as sm
-import statsmodels.formula.api as smf
-from deprecated import deprecated
-from linearmodels import FamaMacBeth
-from numpy import float32, float64
-from numpy.typing import NDArray
-from openpyxl import load_workbook
-from openpyxl.styles import Alignment, Border, Side
-from pandas import (
-    DataFrame,
-    DatetimeIndex,
-    Index,
-    Interval,
-    Period,
-    PeriodIndex,
-    Series,
-    Timedelta,
-    Timestamp,
-)
-from pandas.arrays import PeriodArray
-from rich import print
-from rich.panel import Panel as rich_Panel
-from rich.pretty import Pretty, pprint
-from scipy.stats import kurtosis, skew
-from scipy.stats.mstats import winsorize as winsorization
-from tqdm import tqdm
-from typing_extensions import NotRequired, Required, Self

anomalylab-0.4.2/anomalylab/visualization/__init__.py DELETED Viewed

@@ -1,5 +0,0 @@
-from anomalylab.visualization.format import *
-__all__: list[str] = [
-    "FormatExcel",
-]

{anomalylab-0.4.2 → anomalylab-0.5.0}/.gitattributes RENAMED Viewed

File without changes

{anomalylab-0.4.2 → anomalylab-0.5.0}/.github/workflows/python-publish.yml RENAMED Viewed

File without changes

{anomalylab-0.4.2 → anomalylab-0.5.0}/.gitignore RENAMED Viewed

File without changes

{anomalylab-0.4.2 → anomalylab-0.5.0}/AnomalyLab.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{anomalylab-0.4.2 → anomalylab-0.5.0}/AnomalyLab.egg-info/requires.txt RENAMED Viewed

File without changes

{anomalylab-0.4.2 → anomalylab-0.5.0}/AnomalyLab.egg-info/top_level.txt RENAMED Viewed

File without changes

{anomalylab-0.4.2 → anomalylab-0.5.0}/LICENSE RENAMED Viewed

File without changes

{anomalylab-0.4.2 → anomalylab-0.5.0}/MANIFEST.in RENAMED Viewed

File without changes

{anomalylab-0.4.2 → anomalylab-0.5.0}/README.md RENAMED Viewed

File without changes

{anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/core/__init__.py RENAMED Viewed

File without changes

{anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/datasets/__init__.py RENAMED Viewed

File without changes

{anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/datasets/panel_data.csv RENAMED Viewed

File without changes

{anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/datasets/time_series_data.csv RENAMED Viewed

File without changes

{anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/empirical/__init__.py RENAMED Viewed

File without changes

{anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/preprocess/__init__.py RENAMED Viewed

File without changes

{anomalylab-0.4.2 → anomalylab-0.5.0}/anomalylab/structure/__init__.py RENAMED Viewed

File without changes

{anomalylab-0.4.2 → anomalylab-0.5.0}/requirements.txt RENAMED Viewed

File without changes

{anomalylab-0.4.2 → anomalylab-0.5.0}/setup.cfg RENAMED Viewed

File without changes

{anomalylab-0.4.2 → anomalylab-0.5.0}/tests/__init__.py RENAMED Viewed

File without changes

AnomalyLab 0.4.2__tar.gz → 0.5.0__tar.gz

AnomalyLab 0.4.2tar.gz → 0.5.0tar.gz