AnomalyLab 0.2.9__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {anomalylab-0.2.9 → anomalylab-0.3.0/AnomalyLab.egg-info}/PKG-INFO +1 -1
- {anomalylab-0.2.9/AnomalyLab.egg-info → anomalylab-0.3.0}/PKG-INFO +1 -1
- {anomalylab-0.2.9 → anomalylab-0.3.0}/anomalylab/core/core.py +4 -4
- {anomalylab-0.2.9 → anomalylab-0.3.0}/anomalylab/empirical/correlation.py +1 -1
- {anomalylab-0.2.9 → anomalylab-0.3.0}/anomalylab/empirical/fm_regression.py +3 -3
- {anomalylab-0.2.9 → anomalylab-0.3.0}/anomalylab/empirical/persistence.py +5 -5
- {anomalylab-0.2.9 → anomalylab-0.3.0}/anomalylab/empirical/portfolio.py +24 -24
- {anomalylab-0.2.9 → anomalylab-0.3.0}/anomalylab/preprocess/outliers.py +2 -2
- {anomalylab-0.2.9 → anomalylab-0.3.0}/anomalylab/structure/data.py +1 -3
- {anomalylab-0.2.9 → anomalylab-0.3.0}/anomalylab/structure/panel_data.py +5 -1
- {anomalylab-0.2.9 → anomalylab-0.3.0}/anomalylab/structure/time_series.py +3 -0
- {anomalylab-0.2.9 → anomalylab-0.3.0}/setup.py +1 -1
- {anomalylab-0.2.9 → anomalylab-0.3.0}/.gitattributes +0 -0
- {anomalylab-0.2.9 → anomalylab-0.3.0}/.github/workflows/python-publish.yml +0 -0
- {anomalylab-0.2.9 → anomalylab-0.3.0}/.gitignore +0 -0
- {anomalylab-0.2.9 → anomalylab-0.3.0}/AnomalyLab.egg-info/SOURCES.txt +0 -0
- {anomalylab-0.2.9 → anomalylab-0.3.0}/AnomalyLab.egg-info/dependency_links.txt +0 -0
- {anomalylab-0.2.9 → anomalylab-0.3.0}/AnomalyLab.egg-info/requires.txt +0 -0
- {anomalylab-0.2.9 → anomalylab-0.3.0}/AnomalyLab.egg-info/top_level.txt +0 -0
- {anomalylab-0.2.9 → anomalylab-0.3.0}/LICENSE +0 -0
- {anomalylab-0.2.9 → anomalylab-0.3.0}/MANIFEST.in +0 -0
- {anomalylab-0.2.9 → anomalylab-0.3.0}/README.md +0 -0
- {anomalylab-0.2.9 → anomalylab-0.3.0}/anomalylab/__init__.py +0 -0
- {anomalylab-0.2.9 → anomalylab-0.3.0}/anomalylab/config.py +0 -0
- {anomalylab-0.2.9 → anomalylab-0.3.0}/anomalylab/core/__init__.py +0 -0
- {anomalylab-0.2.9 → anomalylab-0.3.0}/anomalylab/datasets/__init__.py +0 -0
- {anomalylab-0.2.9 → anomalylab-0.3.0}/anomalylab/datasets/dataset.py +0 -0
- {anomalylab-0.2.9 → anomalylab-0.3.0}/anomalylab/datasets/panel_data.csv +0 -0
- {anomalylab-0.2.9 → anomalylab-0.3.0}/anomalylab/datasets/time_series_data.csv +0 -0
- {anomalylab-0.2.9 → anomalylab-0.3.0}/anomalylab/datasets/transition_matrix.png +0 -0
- {anomalylab-0.2.9 → anomalylab-0.3.0}/anomalylab/empirical/__init__.py +0 -0
- {anomalylab-0.2.9 → anomalylab-0.3.0}/anomalylab/empirical/empirical.py +0 -0
- {anomalylab-0.2.9 → anomalylab-0.3.0}/anomalylab/empirical/factor_return.py +0 -0
- {anomalylab-0.2.9 → anomalylab-0.3.0}/anomalylab/empirical/summary.py +0 -0
- {anomalylab-0.2.9 → anomalylab-0.3.0}/anomalylab/preprocess/__init__.py +0 -0
- {anomalylab-0.2.9 → anomalylab-0.3.0}/anomalylab/preprocess/fillna.py +0 -0
- {anomalylab-0.2.9 → anomalylab-0.3.0}/anomalylab/preprocess/normalize.py +0 -0
- {anomalylab-0.2.9 → anomalylab-0.3.0}/anomalylab/preprocess/preprocessor.py +0 -0
- {anomalylab-0.2.9 → anomalylab-0.3.0}/anomalylab/preprocess/shift.py +0 -0
- {anomalylab-0.2.9 → anomalylab-0.3.0}/anomalylab/preprocess/truncate.py +0 -0
- {anomalylab-0.2.9 → anomalylab-0.3.0}/anomalylab/structure/__init__.py +0 -0
- {anomalylab-0.2.9 → anomalylab-0.3.0}/anomalylab/utils/__init__.py +0 -0
- {anomalylab-0.2.9 → anomalylab-0.3.0}/anomalylab/utils/imports.py +0 -0
- {anomalylab-0.2.9 → anomalylab-0.3.0}/anomalylab/utils/utils.py +0 -0
- {anomalylab-0.2.9 → anomalylab-0.3.0}/anomalylab/visualization/__init__.py +0 -0
- {anomalylab-0.2.9 → anomalylab-0.3.0}/anomalylab/visualization/format.py +0 -0
- {anomalylab-0.2.9 → anomalylab-0.3.0}/requirements.txt +0 -0
- {anomalylab-0.2.9 → anomalylab-0.3.0}/setup.cfg +0 -0
- {anomalylab-0.2.9 → anomalylab-0.3.0}/tests/__init__.py +0 -0
|
@@ -19,7 +19,7 @@ from anomalylab.visualization import FormatExcel
|
|
|
19
19
|
|
|
20
20
|
@dataclass
|
|
21
21
|
class Panel:
|
|
22
|
-
_df:
|
|
22
|
+
_df: DataFrame = field(repr=False)
|
|
23
23
|
name: Optional[str] = None
|
|
24
24
|
id: str = "permno"
|
|
25
25
|
time: str = "date"
|
|
@@ -262,7 +262,7 @@ class Panel:
|
|
|
262
262
|
draw: bool = False,
|
|
263
263
|
path: Optional[str] = None,
|
|
264
264
|
decimal: Optional[int] = None,
|
|
265
|
-
) ->
|
|
265
|
+
) -> DataFrame:
|
|
266
266
|
return self.persistence_processor.transition_matrix(
|
|
267
267
|
var=var,
|
|
268
268
|
group=group,
|
|
@@ -306,7 +306,7 @@ class Panel:
|
|
|
306
306
|
factors_series: Optional[TimeSeries] = None,
|
|
307
307
|
pivot: bool = True,
|
|
308
308
|
format: bool = False,
|
|
309
|
-
|
|
309
|
+
sort_type: str = "dependent",
|
|
310
310
|
decimal: Optional[int] = None,
|
|
311
311
|
factor_return: bool = False,
|
|
312
312
|
) -> tuple:
|
|
@@ -319,7 +319,7 @@ class Panel:
|
|
|
319
319
|
core_g=core_g,
|
|
320
320
|
pivot=pivot,
|
|
321
321
|
format=format,
|
|
322
|
-
|
|
322
|
+
sort_type=sort_type,
|
|
323
323
|
decimal=decimal,
|
|
324
324
|
factor_return=factor_return,
|
|
325
325
|
)
|
|
@@ -74,7 +74,7 @@ class Correlation(Empirical):
|
|
|
74
74
|
)
|
|
75
75
|
is_upper = False # Switch to lower triangle for the next method
|
|
76
76
|
|
|
77
|
-
return
|
|
77
|
+
return DataFrame(data=merged_corr, index=columns, columns=columns).map(
|
|
78
78
|
func=round_to_string,
|
|
79
79
|
decimal=decimal or self.decimal, # Round results to specified decimals
|
|
80
80
|
)
|
|
@@ -131,11 +131,11 @@ class FamaMacBethRegression(Empirical):
|
|
|
131
131
|
coefs = results.params
|
|
132
132
|
coefs[self.time] = time
|
|
133
133
|
coef_df.append(coefs)
|
|
134
|
-
coef_df =
|
|
134
|
+
coef_df = DataFrame(coef_df)
|
|
135
135
|
coef_df = coef_df[
|
|
136
136
|
[self.time] + [col for col in coef_df.columns if col != self.time]
|
|
137
137
|
]
|
|
138
|
-
return
|
|
138
|
+
return DataFrame(coef_df)
|
|
139
139
|
|
|
140
140
|
# Fama-MacBeth regression with Newey-West adjustment
|
|
141
141
|
fmb = FamaMacBeth(
|
|
@@ -207,7 +207,7 @@ class FamaMacBethRegression(Empirical):
|
|
|
207
207
|
Returns:
|
|
208
208
|
Series: Formatted regression results including parameters, t-values, and statistics.
|
|
209
209
|
"""
|
|
210
|
-
result: Series =
|
|
210
|
+
result: Series = DataFrame(
|
|
211
211
|
data={
|
|
212
212
|
"params": reg_result["params"].map(
|
|
213
213
|
arg=lambda x: round_to_string(value=x, decimal=decimal)
|
|
@@ -29,7 +29,7 @@ class Persistence(Empirical):
|
|
|
29
29
|
no_process_columns: Columns = None,
|
|
30
30
|
process_all_characteristics: bool = True,
|
|
31
31
|
decimal: Optional[int] = None,
|
|
32
|
-
) ->
|
|
32
|
+
) -> DataFrame:
|
|
33
33
|
"""
|
|
34
34
|
Computes average persistence (autocorrelation) for specified columns over defined time periods.
|
|
35
35
|
|
|
@@ -48,7 +48,7 @@ class Persistence(Empirical):
|
|
|
48
48
|
Defaults to None.
|
|
49
49
|
|
|
50
50
|
Returns:
|
|
51
|
-
|
|
51
|
+
DataFrame: A DataFrame containing the average persistence for specified columns.
|
|
52
52
|
|
|
53
53
|
Note:
|
|
54
54
|
The resulting DataFrame contains the average correlations for each lag, formatted to the
|
|
@@ -90,7 +90,7 @@ class Persistence(Empirical):
|
|
|
90
90
|
all_monthly_corrs.extend(monthly_corrs)
|
|
91
91
|
|
|
92
92
|
# Convert to DataFrame
|
|
93
|
-
all_monthly_corrs_df =
|
|
93
|
+
all_monthly_corrs_df = DataFrame(all_monthly_corrs)
|
|
94
94
|
|
|
95
95
|
# Calculate average monthly correlations
|
|
96
96
|
mean_corrs_df = (
|
|
@@ -116,7 +116,7 @@ class Persistence(Empirical):
|
|
|
116
116
|
draw: bool = False,
|
|
117
117
|
path: Optional[str] = None,
|
|
118
118
|
decimal: Optional[int] = None,
|
|
119
|
-
) ->
|
|
119
|
+
) -> DataFrame:
|
|
120
120
|
"""Calculate the transition matrix for a specified variable and lag.
|
|
121
121
|
|
|
122
122
|
This method computes the transition matrix that shows how groups change over time based on
|
|
@@ -173,7 +173,7 @@ class Persistence(Empirical):
|
|
|
173
173
|
)
|
|
174
174
|
|
|
175
175
|
# Create DataFrame for the transition matrix
|
|
176
|
-
transition_matrix_df =
|
|
176
|
+
transition_matrix_df = DataFrame(
|
|
177
177
|
transition_matrix, columns=range(1, group + 1), index=range(1, group + 1)
|
|
178
178
|
)
|
|
179
179
|
|
|
@@ -69,8 +69,8 @@ class PortfolioAnalysis(Empirical):
|
|
|
69
69
|
self,
|
|
70
70
|
vars: Union[str, list[str]],
|
|
71
71
|
groups: Union[int, list[int]],
|
|
72
|
-
|
|
73
|
-
) ->
|
|
72
|
+
sort_type: Optional[str] = None,
|
|
73
|
+
) -> DataFrame:
|
|
74
74
|
"""Group variables into portfolios based on specified groups.
|
|
75
75
|
|
|
76
76
|
This method creates portfolios for the specified variables in the panel data.
|
|
@@ -78,7 +78,7 @@ class PortfolioAnalysis(Empirical):
|
|
|
78
78
|
Args:
|
|
79
79
|
vars (list of str): List of variables to group.
|
|
80
80
|
groups (list of int): List of integers defining the number of groups for each variable.
|
|
81
|
-
|
|
81
|
+
sort_type (str, optional): Type of sorting, can be 'dependent' to adjust based on the previous variable.
|
|
82
82
|
|
|
83
83
|
Returns:
|
|
84
84
|
DataFrame: A DataFrame with new columns for grouped variables.
|
|
@@ -105,7 +105,7 @@ class PortfolioAnalysis(Empirical):
|
|
|
105
105
|
# Adjust group definitions
|
|
106
106
|
group_col = [self.time]
|
|
107
107
|
for i, var in enumerate(vars):
|
|
108
|
-
if
|
|
108
|
+
if sort_type == "dependent" and i > 0:
|
|
109
109
|
group_col.append(f"{vars[i-1]}_g{groups[i-1]}")
|
|
110
110
|
# Grouping dependent on the previous variable
|
|
111
111
|
out_df[f"{var}_g{groups[i]}"] = (
|
|
@@ -142,7 +142,7 @@ class PortfolioAnalysis(Empirical):
|
|
|
142
142
|
|
|
143
143
|
return out_df
|
|
144
144
|
|
|
145
|
-
def _claculate_value(self, df:
|
|
145
|
+
def _claculate_value(self, df: DataFrame, decimal: Optional[int] = None) -> dict:
|
|
146
146
|
"""Calculate various portfolio performance metrics.
|
|
147
147
|
|
|
148
148
|
This method computes mean returns, t-values, Sharpe ratios, and model-adjusted alpha and t values.
|
|
@@ -159,7 +159,7 @@ class PortfolioAnalysis(Empirical):
|
|
|
159
159
|
|
|
160
160
|
return {**stat_dict, **factors_dict, **sharpe_dict}
|
|
161
161
|
|
|
162
|
-
def _calculate_mean_and_t_value(self, df:
|
|
162
|
+
def _calculate_mean_and_t_value(self, df: DataFrame) -> dict:
|
|
163
163
|
"""Calculate mean and t-value for the dependent variable.
|
|
164
164
|
|
|
165
165
|
This method computes the mean return and its t-value assuming the null hypothesis
|
|
@@ -176,7 +176,7 @@ class PortfolioAnalysis(Empirical):
|
|
|
176
176
|
lag = math.ceil(4 * (T / 100) ** (4 / 25))
|
|
177
177
|
|
|
178
178
|
Y = df[self.endog].values
|
|
179
|
-
X =
|
|
179
|
+
X = DataFrame({"constant": [1] * len(df[self.endog])}).values
|
|
180
180
|
reg = sm.OLS(Y, X).fit(
|
|
181
181
|
cov_type="HAC", cov_kwds={"maxlags": lag, "use_correction": False}
|
|
182
182
|
)
|
|
@@ -190,7 +190,7 @@ class PortfolioAnalysis(Empirical):
|
|
|
190
190
|
|
|
191
191
|
return stat_dict
|
|
192
192
|
|
|
193
|
-
def _calculate_alpha_and_t_value(self, df:
|
|
193
|
+
def _calculate_alpha_and_t_value(self, df: DataFrame) -> dict:
|
|
194
194
|
"""Calculate alpha and t-value for specified models.
|
|
195
195
|
|
|
196
196
|
This method computes alpha values and their t-statistics for various regression models
|
|
@@ -239,7 +239,7 @@ class PortfolioAnalysis(Empirical):
|
|
|
239
239
|
else:
|
|
240
240
|
return {}
|
|
241
241
|
|
|
242
|
-
def _calculate_sharpe(self, df:
|
|
242
|
+
def _calculate_sharpe(self, df: DataFrame, decimal: Optional[int] = 0) -> dict:
|
|
243
243
|
"""Calculate the Sharpe ratio for the dependent variable.
|
|
244
244
|
|
|
245
245
|
This method computes the annualized Sharpe ratio based on the mean and standard deviation
|
|
@@ -300,7 +300,7 @@ class PortfolioAnalysis(Empirical):
|
|
|
300
300
|
) # type: ignore
|
|
301
301
|
vw_ret_d.index.names = [self.time, core_var]
|
|
302
302
|
|
|
303
|
-
def process_group(group:
|
|
303
|
+
def process_group(group: DataFrame) -> Series:
|
|
304
304
|
"""Process each group to calculate differences and prepare the output.
|
|
305
305
|
|
|
306
306
|
This function computes the difference between the highest portfolio and the lowest
|
|
@@ -310,7 +310,7 @@ class PortfolioAnalysis(Empirical):
|
|
|
310
310
|
group (DataFrame): The grouped DataFrame for which to process data.
|
|
311
311
|
|
|
312
312
|
Returns:
|
|
313
|
-
|
|
313
|
+
Series: The processed Series with differences and averages.
|
|
314
314
|
"""
|
|
315
315
|
group = group.sort_index(axis=0, level=[0, 1])
|
|
316
316
|
|
|
@@ -319,7 +319,7 @@ class PortfolioAnalysis(Empirical):
|
|
|
319
319
|
[(group.index.get_level_values(0)[0], "Diff")],
|
|
320
320
|
names=[self.time, core_var],
|
|
321
321
|
)
|
|
322
|
-
core_diff =
|
|
322
|
+
core_diff = Series(core_diff, index=new_index)
|
|
323
323
|
|
|
324
324
|
return pd.concat([group, core_diff])
|
|
325
325
|
|
|
@@ -359,7 +359,7 @@ class PortfolioAnalysis(Empirical):
|
|
|
359
359
|
|
|
360
360
|
def calculate_time_series_metrics(
|
|
361
361
|
series: Series, format: bool = format
|
|
362
|
-
) ->
|
|
362
|
+
) -> DataFrame:
|
|
363
363
|
"""Calculate metrics for each time series and format results.
|
|
364
364
|
|
|
365
365
|
This function computes performance metrics for each time series and formats the results
|
|
@@ -384,7 +384,7 @@ class PortfolioAnalysis(Empirical):
|
|
|
384
384
|
values[core_var] = key
|
|
385
385
|
data.append(values)
|
|
386
386
|
|
|
387
|
-
combined_results =
|
|
387
|
+
combined_results = DataFrame(data)
|
|
388
388
|
|
|
389
389
|
combined_results.set_index(core_var, inplace=True)
|
|
390
390
|
|
|
@@ -425,7 +425,7 @@ class PortfolioAnalysis(Empirical):
|
|
|
425
425
|
core_g: int,
|
|
426
426
|
pivot: bool = True,
|
|
427
427
|
format: bool = False,
|
|
428
|
-
|
|
428
|
+
sort_type: str = "dependent",
|
|
429
429
|
decimal: Optional[int] = None,
|
|
430
430
|
factor_return: bool = False,
|
|
431
431
|
) -> tuple:
|
|
@@ -452,7 +452,7 @@ class PortfolioAnalysis(Empirical):
|
|
|
452
452
|
data_d = self.GroupN(
|
|
453
453
|
[sort_var, core_var],
|
|
454
454
|
[sort_g, core_g],
|
|
455
|
-
|
|
455
|
+
sort_type=sort_type,
|
|
456
456
|
)
|
|
457
457
|
|
|
458
458
|
ew_ret_d = data_d.groupby(
|
|
@@ -467,7 +467,7 @@ class PortfolioAnalysis(Empirical):
|
|
|
467
467
|
)
|
|
468
468
|
vw_ret_d.index.names = [self.time, sort_var, core_var]
|
|
469
469
|
|
|
470
|
-
def process_group(group:
|
|
470
|
+
def process_group(group: DataFrame) -> DataFrame:
|
|
471
471
|
"""Process each group to calculate differences and averages.
|
|
472
472
|
|
|
473
473
|
This function computes the difference between the highest portfolio and lowest portfolio,
|
|
@@ -529,7 +529,7 @@ class PortfolioAnalysis(Empirical):
|
|
|
529
529
|
if factor_return:
|
|
530
530
|
return ew_ret_d, vw_ret_d
|
|
531
531
|
|
|
532
|
-
def generate_time_series_dict(df:
|
|
532
|
+
def generate_time_series_dict(df: DataFrame) -> dict:
|
|
533
533
|
"""Generate a dictionary of time series data from the DataFrame.
|
|
534
534
|
|
|
535
535
|
This function extracts time series for each unique combination of sorting and core variables.
|
|
@@ -556,8 +556,8 @@ class PortfolioAnalysis(Empirical):
|
|
|
556
556
|
return time_series_dict
|
|
557
557
|
|
|
558
558
|
def calculate_time_series_metrics(
|
|
559
|
-
df:
|
|
560
|
-
) ->
|
|
559
|
+
df: DataFrame, pivot: bool = pivot, format: bool = format
|
|
560
|
+
) -> DataFrame:
|
|
561
561
|
"""Calculate metrics for each time series and format results.
|
|
562
562
|
|
|
563
563
|
This function computes performance metrics for each time series and formats the results
|
|
@@ -585,7 +585,7 @@ class PortfolioAnalysis(Empirical):
|
|
|
585
585
|
values[core_var] = key[1]
|
|
586
586
|
data.append(values)
|
|
587
587
|
|
|
588
|
-
combined_results =
|
|
588
|
+
combined_results = DataFrame(data)
|
|
589
589
|
|
|
590
590
|
combined_results.set_index([sort_var, core_var], inplace=True)
|
|
591
591
|
|
|
@@ -611,16 +611,16 @@ class PortfolioAnalysis(Empirical):
|
|
|
611
611
|
:, ~combined_results.columns.str.endswith("p")
|
|
612
612
|
]
|
|
613
613
|
|
|
614
|
-
def reorder_diff_avg(df:
|
|
614
|
+
def reorder_diff_avg(df: DataFrame) -> DataFrame:
|
|
615
615
|
"""Reorder the rows and columns of a DataFrame to place 'Diff' before 'Avg'.
|
|
616
616
|
|
|
617
617
|
This function rearranges the DataFrame to improve readability.
|
|
618
618
|
|
|
619
619
|
Args:
|
|
620
|
-
df (
|
|
620
|
+
df (DataFrame): The DataFrame to reorder.
|
|
621
621
|
|
|
622
622
|
Returns:
|
|
623
|
-
|
|
623
|
+
DataFrame: The reordered DataFrame.
|
|
624
624
|
"""
|
|
625
625
|
columns_order = [
|
|
626
626
|
col for col in df.columns if col not in ["Diff", "Avg"]
|
|
@@ -40,7 +40,7 @@ class OutlierMethod:
|
|
|
40
40
|
Returns:
|
|
41
41
|
Series: A new Series with winsorized values.
|
|
42
42
|
"""
|
|
43
|
-
return
|
|
43
|
+
return Series(
|
|
44
44
|
data=np.where(
|
|
45
45
|
series.isnull(),
|
|
46
46
|
np.nan,
|
|
@@ -69,7 +69,7 @@ class OutlierMethod:
|
|
|
69
69
|
Returns:
|
|
70
70
|
Series: A new Series with truncated values.
|
|
71
71
|
"""
|
|
72
|
-
return
|
|
72
|
+
return Series(
|
|
73
73
|
data=np.where(
|
|
74
74
|
series.isnull(),
|
|
75
75
|
np.nan,
|
|
@@ -13,7 +13,6 @@ class Data(ABC):
|
|
|
13
13
|
|
|
14
14
|
df: DataFrame
|
|
15
15
|
name: Optional[str] = None
|
|
16
|
-
is_copy: bool = False
|
|
17
16
|
|
|
18
17
|
def __post_init__(self) -> None:
|
|
19
18
|
"""
|
|
@@ -23,8 +22,7 @@ class Data(ABC):
|
|
|
23
22
|
2. Preprocess the data.
|
|
24
23
|
3. Set the flag if needed.
|
|
25
24
|
"""
|
|
26
|
-
|
|
27
|
-
self.df = copy.deepcopy(self.df)
|
|
25
|
+
|
|
28
26
|
self._check_columns()
|
|
29
27
|
self._preprocess()
|
|
30
28
|
self.set_flag()
|
|
@@ -33,6 +33,7 @@ class PanelData(Data):
|
|
|
33
33
|
ret: Optional[str] = None
|
|
34
34
|
classifications: Optional[list[str] | str] = None
|
|
35
35
|
drop_all_chars_missing: bool = False
|
|
36
|
+
is_copy: bool = False
|
|
36
37
|
|
|
37
38
|
def set_flag(self) -> None:
|
|
38
39
|
"""Set default flags for the `PanelData` object."""
|
|
@@ -57,10 +58,12 @@ class PanelData(Data):
|
|
|
57
58
|
|
|
58
59
|
This method identifies remaining columns as firm characteristics, excluding classifications.
|
|
59
60
|
"""
|
|
61
|
+
if self.is_copy:
|
|
62
|
+
self.df = copy.deepcopy(self.df)
|
|
60
63
|
self.df[self.id] = self.df[self.id].astype(int)
|
|
61
64
|
self.df[self.time] = pd.to_datetime(self.df[self.time], format="ISO8601")
|
|
62
65
|
self.df[self.time] = self.df[self.time].dt.to_period(freq=self.frequency)
|
|
63
|
-
self.df
|
|
66
|
+
self.df.sort_values(by=[self.time, self.id], inplace=True)
|
|
64
67
|
basic_column = (
|
|
65
68
|
[self.id, self.time] if self.ret is None else [self.id, self.time, self.ret]
|
|
66
69
|
)
|
|
@@ -208,6 +211,7 @@ if __name__ == "__main__":
|
|
|
208
211
|
ret="return",
|
|
209
212
|
classifications="industry",
|
|
210
213
|
drop_all_chars_missing=True,
|
|
214
|
+
is_copy=False,
|
|
211
215
|
)
|
|
212
216
|
pp(panel_data)
|
|
213
217
|
pp(panel_data.df)
|
|
@@ -24,6 +24,7 @@ class TimeSeries(Data):
|
|
|
24
24
|
time: str = "date"
|
|
25
25
|
frequency: Literal["D", "M", "Y"] = "M"
|
|
26
26
|
factors: list[str] = field(init=False)
|
|
27
|
+
is_copy: bool = False
|
|
27
28
|
|
|
28
29
|
def __repr__(self) -> str:
|
|
29
30
|
return f"TimeSeriesData({self.name})" # todo: add frequency
|
|
@@ -34,6 +35,8 @@ class TimeSeries(Data):
|
|
|
34
35
|
|
|
35
36
|
This method renames the time column to a standardized name and identifies remaining columns as factors.
|
|
36
37
|
"""
|
|
38
|
+
if self.is_copy:
|
|
39
|
+
self.df = copy.deepcopy(self.df)
|
|
37
40
|
self.df[self.time] = pd.to_datetime(self.df[self.time], format="ISO8601")
|
|
38
41
|
self.df[self.time] = self.df[self.time].dt.to_period(freq=self.frequency)
|
|
39
42
|
self.df = self.df.sort_values(by=self.time)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|