AnomalyLab 0.2.8__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. {anomalylab-0.2.8 → anomalylab-0.3.0/AnomalyLab.egg-info}/PKG-INFO +1 -1
  2. {anomalylab-0.2.8/AnomalyLab.egg-info → anomalylab-0.3.0}/PKG-INFO +1 -1
  3. {anomalylab-0.2.8 → anomalylab-0.3.0}/anomalylab/core/core.py +11 -5
  4. {anomalylab-0.2.8 → anomalylab-0.3.0}/anomalylab/empirical/correlation.py +1 -1
  5. {anomalylab-0.2.8 → anomalylab-0.3.0}/anomalylab/empirical/fm_regression.py +3 -3
  6. {anomalylab-0.2.8 → anomalylab-0.3.0}/anomalylab/empirical/persistence.py +5 -5
  7. {anomalylab-0.2.8 → anomalylab-0.3.0}/anomalylab/empirical/portfolio.py +26 -24
  8. {anomalylab-0.2.8 → anomalylab-0.3.0}/anomalylab/preprocess/outliers.py +2 -2
  9. {anomalylab-0.2.8 → anomalylab-0.3.0}/anomalylab/structure/data.py +1 -8
  10. {anomalylab-0.2.8 → anomalylab-0.3.0}/anomalylab/structure/panel_data.py +7 -3
  11. {anomalylab-0.2.8 → anomalylab-0.3.0}/anomalylab/structure/time_series.py +3 -0
  12. {anomalylab-0.2.8 → anomalylab-0.3.0}/setup.py +1 -1
  13. {anomalylab-0.2.8 → anomalylab-0.3.0}/.gitattributes +0 -0
  14. {anomalylab-0.2.8 → anomalylab-0.3.0}/.github/workflows/python-publish.yml +0 -0
  15. {anomalylab-0.2.8 → anomalylab-0.3.0}/.gitignore +0 -0
  16. {anomalylab-0.2.8 → anomalylab-0.3.0}/AnomalyLab.egg-info/SOURCES.txt +0 -0
  17. {anomalylab-0.2.8 → anomalylab-0.3.0}/AnomalyLab.egg-info/dependency_links.txt +0 -0
  18. {anomalylab-0.2.8 → anomalylab-0.3.0}/AnomalyLab.egg-info/requires.txt +0 -0
  19. {anomalylab-0.2.8 → anomalylab-0.3.0}/AnomalyLab.egg-info/top_level.txt +0 -0
  20. {anomalylab-0.2.8 → anomalylab-0.3.0}/LICENSE +0 -0
  21. {anomalylab-0.2.8 → anomalylab-0.3.0}/MANIFEST.in +0 -0
  22. {anomalylab-0.2.8 → anomalylab-0.3.0}/README.md +0 -0
  23. {anomalylab-0.2.8 → anomalylab-0.3.0}/anomalylab/__init__.py +0 -0
  24. {anomalylab-0.2.8 → anomalylab-0.3.0}/anomalylab/config.py +0 -0
  25. {anomalylab-0.2.8 → anomalylab-0.3.0}/anomalylab/core/__init__.py +0 -0
  26. {anomalylab-0.2.8 → anomalylab-0.3.0}/anomalylab/datasets/__init__.py +0 -0
  27. {anomalylab-0.2.8 → anomalylab-0.3.0}/anomalylab/datasets/dataset.py +0 -0
  28. {anomalylab-0.2.8 → anomalylab-0.3.0}/anomalylab/datasets/panel_data.csv +0 -0
  29. {anomalylab-0.2.8 → anomalylab-0.3.0}/anomalylab/datasets/time_series_data.csv +0 -0
  30. {anomalylab-0.2.8 → anomalylab-0.3.0}/anomalylab/datasets/transition_matrix.png +0 -0
  31. {anomalylab-0.2.8 → anomalylab-0.3.0}/anomalylab/empirical/__init__.py +0 -0
  32. {anomalylab-0.2.8 → anomalylab-0.3.0}/anomalylab/empirical/empirical.py +0 -0
  33. {anomalylab-0.2.8 → anomalylab-0.3.0}/anomalylab/empirical/factor_return.py +0 -0
  34. {anomalylab-0.2.8 → anomalylab-0.3.0}/anomalylab/empirical/summary.py +0 -0
  35. {anomalylab-0.2.8 → anomalylab-0.3.0}/anomalylab/preprocess/__init__.py +0 -0
  36. {anomalylab-0.2.8 → anomalylab-0.3.0}/anomalylab/preprocess/fillna.py +0 -0
  37. {anomalylab-0.2.8 → anomalylab-0.3.0}/anomalylab/preprocess/normalize.py +0 -0
  38. {anomalylab-0.2.8 → anomalylab-0.3.0}/anomalylab/preprocess/preprocessor.py +0 -0
  39. {anomalylab-0.2.8 → anomalylab-0.3.0}/anomalylab/preprocess/shift.py +0 -0
  40. {anomalylab-0.2.8 → anomalylab-0.3.0}/anomalylab/preprocess/truncate.py +0 -0
  41. {anomalylab-0.2.8 → anomalylab-0.3.0}/anomalylab/structure/__init__.py +0 -0
  42. {anomalylab-0.2.8 → anomalylab-0.3.0}/anomalylab/utils/__init__.py +0 -0
  43. {anomalylab-0.2.8 → anomalylab-0.3.0}/anomalylab/utils/imports.py +0 -0
  44. {anomalylab-0.2.8 → anomalylab-0.3.0}/anomalylab/utils/utils.py +0 -0
  45. {anomalylab-0.2.8 → anomalylab-0.3.0}/anomalylab/visualization/__init__.py +0 -0
  46. {anomalylab-0.2.8 → anomalylab-0.3.0}/anomalylab/visualization/format.py +0 -0
  47. {anomalylab-0.2.8 → anomalylab-0.3.0}/requirements.txt +0 -0
  48. {anomalylab-0.2.8 → anomalylab-0.3.0}/setup.cfg +0 -0
  49. {anomalylab-0.2.8 → anomalylab-0.3.0}/tests/__init__.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: AnomalyLab
3
- Version: 0.2.8
3
+ Version: 0.3.0
4
4
  Summary: A Python package for empirical asset pricing analysis.
5
5
  Author: FinPhd
6
6
  Classifier: Programming Language :: Python :: 3
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: AnomalyLab
3
- Version: 0.2.8
3
+ Version: 0.3.0
4
4
  Summary: A Python package for empirical asset pricing analysis.
5
5
  Author: FinPhd
6
6
  Classifier: Programming Language :: Python :: 3
@@ -19,7 +19,7 @@ from anomalylab.visualization import FormatExcel
19
19
 
20
20
  @dataclass
21
21
  class Panel:
22
- _df: pd.DataFrame = field(repr=False)
22
+ _df: DataFrame = field(repr=False)
23
23
  name: Optional[str] = None
24
24
  id: str = "permno"
25
25
  time: str = "date"
@@ -27,6 +27,7 @@ class Panel:
27
27
  ret: str = "return"
28
28
  classifications: Optional[list[str] | str] = None
29
29
  drop_all_chars_missing: bool = False
30
+ is_copy: bool = False
30
31
 
31
32
  def __post_init__(self) -> None:
32
33
  self.panel_data: PanelData = PanelData(
@@ -38,6 +39,7 @@ class Panel:
38
39
  ret=self.ret,
39
40
  classifications=self.classifications,
40
41
  drop_all_chars_missing=self.drop_all_chars_missing,
42
+ is_copy=self.is_copy,
41
43
  )
42
44
  self._normalize_processor = None
43
45
  self._fillna_processor = None
@@ -260,7 +262,7 @@ class Panel:
260
262
  draw: bool = False,
261
263
  path: Optional[str] = None,
262
264
  decimal: Optional[int] = None,
263
- ) -> pd.DataFrame:
265
+ ) -> DataFrame:
264
266
  return self.persistence_processor.transition_matrix(
265
267
  var=var,
266
268
  group=group,
@@ -304,7 +306,7 @@ class Panel:
304
306
  factors_series: Optional[TimeSeries] = None,
305
307
  pivot: bool = True,
306
308
  format: bool = False,
307
- type: str = "dependent",
309
+ sort_type: str = "dependent",
308
310
  decimal: Optional[int] = None,
309
311
  factor_return: bool = False,
310
312
  ) -> tuple:
@@ -317,7 +319,7 @@ class Panel:
317
319
  core_g=core_g,
318
320
  pivot=pivot,
319
321
  format=format,
320
- type=type,
322
+ sort_type=sort_type,
321
323
  decimal=decimal,
322
324
  factor_return=factor_return,
323
325
  )
@@ -372,7 +374,11 @@ if __name__ == "__main__":
372
374
  }
373
375
 
374
376
  panel = Panel(
375
- df, name="Stocks", classifications="industry", drop_all_chars_missing=True
377
+ df,
378
+ name="Stocks",
379
+ classifications="industry",
380
+ drop_all_chars_missing=True,
381
+ is_copy=False,
376
382
  )
377
383
  time_series: TimeSeries = TimeSeries(df=ts, name="Factor Series")
378
384
  pp(panel)
@@ -74,7 +74,7 @@ class Correlation(Empirical):
74
74
  )
75
75
  is_upper = False # Switch to lower triangle for the next method
76
76
 
77
- return pd.DataFrame(data=merged_corr, index=columns, columns=columns).map(
77
+ return DataFrame(data=merged_corr, index=columns, columns=columns).map(
78
78
  func=round_to_string,
79
79
  decimal=decimal or self.decimal, # Round results to specified decimals
80
80
  )
@@ -131,11 +131,11 @@ class FamaMacBethRegression(Empirical):
131
131
  coefs = results.params
132
132
  coefs[self.time] = time
133
133
  coef_df.append(coefs)
134
- coef_df = pd.DataFrame(coef_df)
134
+ coef_df = DataFrame(coef_df)
135
135
  coef_df = coef_df[
136
136
  [self.time] + [col for col in coef_df.columns if col != self.time]
137
137
  ]
138
- return pd.DataFrame(coef_df)
138
+ return DataFrame(coef_df)
139
139
 
140
140
  # Fama-MacBeth regression with Newey-West adjustment
141
141
  fmb = FamaMacBeth(
@@ -207,7 +207,7 @@ class FamaMacBethRegression(Empirical):
207
207
  Returns:
208
208
  Series: Formatted regression results including parameters, t-values, and statistics.
209
209
  """
210
- result: Series = pd.DataFrame(
210
+ result: Series = DataFrame(
211
211
  data={
212
212
  "params": reg_result["params"].map(
213
213
  arg=lambda x: round_to_string(value=x, decimal=decimal)
@@ -29,7 +29,7 @@ class Persistence(Empirical):
29
29
  no_process_columns: Columns = None,
30
30
  process_all_characteristics: bool = True,
31
31
  decimal: Optional[int] = None,
32
- ) -> pd.DataFrame:
32
+ ) -> DataFrame:
33
33
  """
34
34
  Computes average persistence (autocorrelation) for specified columns over defined time periods.
35
35
 
@@ -48,7 +48,7 @@ class Persistence(Empirical):
48
48
  Defaults to None.
49
49
 
50
50
  Returns:
51
- pd.DataFrame: A DataFrame containing the average persistence for specified columns.
51
+ DataFrame: A DataFrame containing the average persistence for specified columns.
52
52
 
53
53
  Note:
54
54
  The resulting DataFrame contains the average correlations for each lag, formatted to the
@@ -90,7 +90,7 @@ class Persistence(Empirical):
90
90
  all_monthly_corrs.extend(monthly_corrs)
91
91
 
92
92
  # Convert to DataFrame
93
- all_monthly_corrs_df = pd.DataFrame(all_monthly_corrs)
93
+ all_monthly_corrs_df = DataFrame(all_monthly_corrs)
94
94
 
95
95
  # Calculate average monthly correlations
96
96
  mean_corrs_df = (
@@ -116,7 +116,7 @@ class Persistence(Empirical):
116
116
  draw: bool = False,
117
117
  path: Optional[str] = None,
118
118
  decimal: Optional[int] = None,
119
- ) -> pd.DataFrame:
119
+ ) -> DataFrame:
120
120
  """Calculate the transition matrix for a specified variable and lag.
121
121
 
122
122
  This method computes the transition matrix that shows how groups change over time based on
@@ -173,7 +173,7 @@ class Persistence(Empirical):
173
173
  )
174
174
 
175
175
  # Create DataFrame for the transition matrix
176
- transition_matrix_df = pd.DataFrame(
176
+ transition_matrix_df = DataFrame(
177
177
  transition_matrix, columns=range(1, group + 1), index=range(1, group + 1)
178
178
  )
179
179
 
@@ -69,8 +69,8 @@ class PortfolioAnalysis(Empirical):
69
69
  self,
70
70
  vars: Union[str, list[str]],
71
71
  groups: Union[int, list[int]],
72
- type: Optional[str] = None,
73
- ) -> pd.DataFrame:
72
+ sort_type: Optional[str] = None,
73
+ ) -> DataFrame:
74
74
  """Group variables into portfolios based on specified groups.
75
75
 
76
76
  This method creates portfolios for the specified variables in the panel data.
@@ -78,7 +78,7 @@ class PortfolioAnalysis(Empirical):
78
78
  Args:
79
79
  vars (list of str): List of variables to group.
80
80
  groups (list of int): List of integers defining the number of groups for each variable.
81
- type (str, optional): Type of grouping, can be 'dependent' to adjust based on the previous variable.
81
+ sort_type (str, optional): Type of sorting, can be 'dependent' to adjust based on the previous variable.
82
82
 
83
83
  Returns:
84
84
  DataFrame: A DataFrame with new columns for grouped variables.
@@ -105,7 +105,7 @@ class PortfolioAnalysis(Empirical):
105
105
  # Adjust group definitions
106
106
  group_col = [self.time]
107
107
  for i, var in enumerate(vars):
108
- if type == "dependent" and i > 0:
108
+ if sort_type == "dependent" and i > 0:
109
109
  group_col.append(f"{vars[i-1]}_g{groups[i-1]}")
110
110
  # Grouping dependent on the previous variable
111
111
  out_df[f"{var}_g{groups[i]}"] = (
@@ -142,7 +142,7 @@ class PortfolioAnalysis(Empirical):
142
142
 
143
143
  return out_df
144
144
 
145
- def _claculate_value(self, df: pd.DataFrame, decimal: Optional[int] = None) -> dict:
145
+ def _claculate_value(self, df: DataFrame, decimal: Optional[int] = None) -> dict:
146
146
  """Calculate various portfolio performance metrics.
147
147
 
148
148
  This method computes mean returns, t-values, Sharpe ratios, and model-adjusted alpha and t values.
@@ -159,7 +159,7 @@ class PortfolioAnalysis(Empirical):
159
159
 
160
160
  return {**stat_dict, **factors_dict, **sharpe_dict}
161
161
 
162
- def _calculate_mean_and_t_value(self, df: pd.DataFrame) -> dict:
162
+ def _calculate_mean_and_t_value(self, df: DataFrame) -> dict:
163
163
  """Calculate mean and t-value for the dependent variable.
164
164
 
165
165
  This method computes the mean return and its t-value assuming the null hypothesis
@@ -176,7 +176,7 @@ class PortfolioAnalysis(Empirical):
176
176
  lag = math.ceil(4 * (T / 100) ** (4 / 25))
177
177
 
178
178
  Y = df[self.endog].values
179
- X = pd.DataFrame({"constant": [1] * len(df[self.endog])}).values
179
+ X = DataFrame({"constant": [1] * len(df[self.endog])}).values
180
180
  reg = sm.OLS(Y, X).fit(
181
181
  cov_type="HAC", cov_kwds={"maxlags": lag, "use_correction": False}
182
182
  )
@@ -190,7 +190,7 @@ class PortfolioAnalysis(Empirical):
190
190
 
191
191
  return stat_dict
192
192
 
193
- def _calculate_alpha_and_t_value(self, df: pd.DataFrame) -> dict:
193
+ def _calculate_alpha_and_t_value(self, df: DataFrame) -> dict:
194
194
  """Calculate alpha and t-value for specified models.
195
195
 
196
196
  This method computes alpha values and their t-statistics for various regression models
@@ -239,7 +239,7 @@ class PortfolioAnalysis(Empirical):
239
239
  else:
240
240
  return {}
241
241
 
242
- def _calculate_sharpe(self, df: pd.DataFrame, decimal: Optional[int] = 0) -> dict:
242
+ def _calculate_sharpe(self, df: DataFrame, decimal: Optional[int] = 0) -> dict:
243
243
  """Calculate the Sharpe ratio for the dependent variable.
244
244
 
245
245
  This method computes the annualized Sharpe ratio based on the mean and standard deviation
@@ -280,6 +280,7 @@ class PortfolioAnalysis(Empirical):
280
280
  core_g (int): The group number for portfolio grouping of the core variable.
281
281
  format (bool): Whether to format the output for display. Defaults to False.
282
282
  decimal (Optional[int]): The number of decimal places for formatting. Defaults to None.
283
+ factor_return (bool): Whether to output factor returns in the analysis. Defaults to False.
283
284
 
284
285
  Returns:
285
286
  tuple: A tuple containing the equal-weighted and value-weighted results DataFrames.
@@ -299,7 +300,7 @@ class PortfolioAnalysis(Empirical):
299
300
  ) # type: ignore
300
301
  vw_ret_d.index.names = [self.time, core_var]
301
302
 
302
- def process_group(group: pd.DataFrame) -> pd.DataFrame:
303
+ def process_group(group: DataFrame) -> Series:
303
304
  """Process each group to calculate differences and prepare the output.
304
305
 
305
306
  This function computes the difference between the highest portfolio and the lowest
@@ -309,7 +310,7 @@ class PortfolioAnalysis(Empirical):
309
310
  group (DataFrame): The grouped DataFrame for which to process data.
310
311
 
311
312
  Returns:
312
- DataFrame: The processed DataFrame with differences and averages.
313
+ Series: The processed Series with differences and averages.
313
314
  """
314
315
  group = group.sort_index(axis=0, level=[0, 1])
315
316
 
@@ -318,7 +319,7 @@ class PortfolioAnalysis(Empirical):
318
319
  [(group.index.get_level_values(0)[0], "Diff")],
319
320
  names=[self.time, core_var],
320
321
  )
321
- core_diff = pd.Series(core_diff, index=new_index)
322
+ core_diff = Series(core_diff, index=new_index)
322
323
 
323
324
  return pd.concat([group, core_diff])
324
325
 
@@ -358,7 +359,7 @@ class PortfolioAnalysis(Empirical):
358
359
 
359
360
  def calculate_time_series_metrics(
360
361
  series: Series, format: bool = format
361
- ) -> pd.DataFrame:
362
+ ) -> DataFrame:
362
363
  """Calculate metrics for each time series and format results.
363
364
 
364
365
  This function computes performance metrics for each time series and formats the results
@@ -383,7 +384,7 @@ class PortfolioAnalysis(Empirical):
383
384
  values[core_var] = key
384
385
  data.append(values)
385
386
 
386
- combined_results = pd.DataFrame(data)
387
+ combined_results = DataFrame(data)
387
388
 
388
389
  combined_results.set_index(core_var, inplace=True)
389
390
 
@@ -424,7 +425,7 @@ class PortfolioAnalysis(Empirical):
424
425
  core_g: int,
425
426
  pivot: bool = True,
426
427
  format: bool = False,
427
- type: str = "dependent",
428
+ sort_type: str = "dependent",
428
429
  decimal: Optional[int] = None,
429
430
  factor_return: bool = False,
430
431
  ) -> tuple:
@@ -443,6 +444,7 @@ class PortfolioAnalysis(Empirical):
443
444
  format (bool): Whether to format the output for display. Defaults to False.
444
445
  type (str): Type of grouping, can be 'dependent' or 'independent'. Defaults to 'dependent'.
445
446
  decimal (Optional[int]): The number of decimal places to round to. Defaults to None.
447
+ factor_return (bool): Whether to output factor returns in the analysis. Defaults to False.
446
448
 
447
449
  Returns:
448
450
  tuple: A tuple containing the equal-weighted and value-weighted results DataFrames.
@@ -450,7 +452,7 @@ class PortfolioAnalysis(Empirical):
450
452
  data_d = self.GroupN(
451
453
  [sort_var, core_var],
452
454
  [sort_g, core_g],
453
- type=type,
455
+ sort_type=sort_type,
454
456
  )
455
457
 
456
458
  ew_ret_d = data_d.groupby(
@@ -465,7 +467,7 @@ class PortfolioAnalysis(Empirical):
465
467
  )
466
468
  vw_ret_d.index.names = [self.time, sort_var, core_var]
467
469
 
468
- def process_group(group: pd.DataFrame) -> pd.DataFrame:
470
+ def process_group(group: DataFrame) -> DataFrame:
469
471
  """Process each group to calculate differences and averages.
470
472
 
471
473
  This function computes the difference between the highest portfolio and lowest portfolio,
@@ -527,7 +529,7 @@ class PortfolioAnalysis(Empirical):
527
529
  if factor_return:
528
530
  return ew_ret_d, vw_ret_d
529
531
 
530
- def generate_time_series_dict(df: pd.DataFrame) -> dict:
532
+ def generate_time_series_dict(df: DataFrame) -> dict:
531
533
  """Generate a dictionary of time series data from the DataFrame.
532
534
 
533
535
  This function extracts time series for each unique combination of sorting and core variables.
@@ -554,8 +556,8 @@ class PortfolioAnalysis(Empirical):
554
556
  return time_series_dict
555
557
 
556
558
  def calculate_time_series_metrics(
557
- df: pd.DataFrame, pivot: bool = pivot, format: bool = format
558
- ) -> pd.DataFrame:
559
+ df: DataFrame, pivot: bool = pivot, format: bool = format
560
+ ) -> DataFrame:
559
561
  """Calculate metrics for each time series and format results.
560
562
 
561
563
  This function computes performance metrics for each time series and formats the results
@@ -583,7 +585,7 @@ class PortfolioAnalysis(Empirical):
583
585
  values[core_var] = key[1]
584
586
  data.append(values)
585
587
 
586
- combined_results = pd.DataFrame(data)
588
+ combined_results = DataFrame(data)
587
589
 
588
590
  combined_results.set_index([sort_var, core_var], inplace=True)
589
591
 
@@ -609,16 +611,16 @@ class PortfolioAnalysis(Empirical):
609
611
  :, ~combined_results.columns.str.endswith("p")
610
612
  ]
611
613
 
612
- def reorder_diff_avg(df: pd.DataFrame) -> pd.DataFrame:
614
+ def reorder_diff_avg(df: DataFrame) -> DataFrame:
613
615
  """Reorder the rows and columns of a DataFrame to place 'Diff' before 'Avg'.
614
616
 
615
617
  This function rearranges the DataFrame to improve readability.
616
618
 
617
619
  Args:
618
- df (pd.DataFrame): The DataFrame to reorder.
620
+ df (DataFrame): The DataFrame to reorder.
619
621
 
620
622
  Returns:
621
- pd.DataFrame: The reordered DataFrame.
623
+ DataFrame: The reordered DataFrame.
622
624
  """
623
625
  columns_order = [
624
626
  col for col in df.columns if col not in ["Diff", "Avg"]
@@ -40,7 +40,7 @@ class OutlierMethod:
40
40
  Returns:
41
41
  Series: A new Series with winsorized values.
42
42
  """
43
- return pd.Series(
43
+ return Series(
44
44
  data=np.where(
45
45
  series.isnull(),
46
46
  np.nan,
@@ -69,7 +69,7 @@ class OutlierMethod:
69
69
  Returns:
70
70
  Series: A new Series with truncated values.
71
71
  """
72
- return pd.Series(
72
+ return Series(
73
73
  data=np.where(
74
74
  series.isnull(),
75
75
  np.nan,
@@ -21,18 +21,11 @@ class Data(ABC):
21
21
  1. Check if the columns are valid.
22
22
  2. Preprocess the data.
23
23
  3. Set the flag if needed.
24
- 4. Call the other_init method if needed.
25
24
  """
26
- if self.name is None:
27
- self.name = "anomaly"
25
+
28
26
  self._check_columns()
29
27
  self._preprocess()
30
28
  self.set_flag()
31
- self.other_init()
32
-
33
- def other_init(self) -> None:
34
- """This method is a placeholder for additional initialization logic."""
35
- pass
36
29
 
37
30
  def set_flag(self) -> None:
38
31
  """This method is meant to be overridden by subclasses to set flags."""
@@ -20,7 +20,7 @@ class PanelData(Data):
20
20
  frequency (Literal["D", "M", "Y"]):
21
21
  The frequency of the data. Defaults to "M".
22
22
  ret (str):
23
- The column name for the excess return. Defaults to "return".
23
+ The column name for the excess return. Defaults to None.
24
24
  classifications (list[str]):
25
25
  The list of classification columns.
26
26
  drop_all_chars_missing (bool):
@@ -30,9 +30,10 @@ class PanelData(Data):
30
30
  id: str = "permno"
31
31
  time: str = "date"
32
32
  frequency: Literal["D", "M", "Y"] = "M"
33
- ret: str = "return"
33
+ ret: Optional[str] = None
34
34
  classifications: Optional[list[str] | str] = None
35
35
  drop_all_chars_missing: bool = False
36
+ is_copy: bool = False
36
37
 
37
38
  def set_flag(self) -> None:
38
39
  """Set default flags for the `PanelData` object."""
@@ -57,10 +58,12 @@ class PanelData(Data):
57
58
 
58
59
  This method identifies remaining columns as firm characteristics, excluding classifications.
59
60
  """
61
+ if self.is_copy:
62
+ self.df = copy.deepcopy(self.df)
60
63
  self.df[self.id] = self.df[self.id].astype(int)
61
64
  self.df[self.time] = pd.to_datetime(self.df[self.time], format="ISO8601")
62
65
  self.df[self.time] = self.df[self.time].dt.to_period(freq=self.frequency)
63
- self.df = self.df.sort_values(by=[self.time, self.id])
66
+ self.df.sort_values(by=[self.time, self.id], inplace=True)
64
67
  basic_column = (
65
68
  [self.id, self.time] if self.ret is None else [self.id, self.time, self.ret]
66
69
  )
@@ -208,6 +211,7 @@ if __name__ == "__main__":
208
211
  ret="return",
209
212
  classifications="industry",
210
213
  drop_all_chars_missing=True,
214
+ is_copy=False,
211
215
  )
212
216
  pp(panel_data)
213
217
  pp(panel_data.df)
@@ -24,6 +24,7 @@ class TimeSeries(Data):
24
24
  time: str = "date"
25
25
  frequency: Literal["D", "M", "Y"] = "M"
26
26
  factors: list[str] = field(init=False)
27
+ is_copy: bool = False
27
28
 
28
29
  def __repr__(self) -> str:
29
30
  return f"TimeSeriesData({self.name})" # todo: add frequency
@@ -34,6 +35,8 @@ class TimeSeries(Data):
34
35
 
35
36
  This method renames the time column to a standardized name and identifies remaining columns as factors.
36
37
  """
38
+ if self.is_copy:
39
+ self.df = copy.deepcopy(self.df)
37
40
  self.df[self.time] = pd.to_datetime(self.df[self.time], format="ISO8601")
38
41
  self.df[self.time] = self.df[self.time].dt.to_period(freq=self.frequency)
39
42
  self.df = self.df.sort_values(by=self.time)
@@ -2,7 +2,7 @@ from setuptools import find_packages, setup
2
2
 
3
3
  setup(
4
4
  name="AnomalyLab",
5
- version="0.2.8",
5
+ version="0.3.0",
6
6
  author="FinPhd",
7
7
  # author_email="your.email@example.com",
8
8
  description="A Python package for empirical asset pricing analysis.",
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes