openstatz 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openstatz/__init__.py +161 -0
- openstatz/__main__.py +11 -0
- openstatz/_compat.py +430 -0
- openstatz/_context.py +123 -0
- openstatz/_kernels.py +83 -0
- openstatz/_montecarlo.py +295 -0
- openstatz/_numpy_compat.py +288 -0
- openstatz/_plotting/__init__.py +0 -0
- openstatz/_plotting/core.py +2137 -0
- openstatz/_plotting/wrappers.py +2114 -0
- openstatz/app/__init__.py +9 -0
- openstatz/app/cli.py +62 -0
- openstatz/app/schemas.py +149 -0
- openstatz/app/serializers.py +383 -0
- openstatz/app/server.py +183 -0
- openstatz/app/static/assets/index-ChOfSsMx.css +1 -0
- openstatz/app/static/assets/index-uPeRPPQG.js +76 -0
- openstatz/app/static/assets/index-uPeRPPQG.js.map +1 -0
- openstatz/app/static/index.html +27 -0
- openstatz/compat.py +56 -0
- openstatz/plots.py +27 -0
- openstatz/providers.py +138 -0
- openstatz/py.typed +0 -0
- openstatz/report.html +65 -0
- openstatz/reports.py +2515 -0
- openstatz/stats.py +3307 -0
- openstatz/utils.py +1002 -0
- openstatz/version.py +1 -0
- openstatz-0.1.0.dist-info/METADATA +189 -0
- openstatz-0.1.0.dist-info/RECORD +34 -0
- openstatz-0.1.0.dist-info/WHEEL +4 -0
- openstatz-0.1.0.dist-info/entry_points.txt +2 -0
- openstatz-0.1.0.dist-info/licenses/LICENSE.txt +202 -0
- openstatz-0.1.0.dist-info/licenses/NOTICE +16 -0
openstatz/__init__.py
ADDED
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
#
|
|
3
|
+
# OpenStatz: Portfolio analytics for quants (a modern rebuild of QuantStats)
|
|
4
|
+
# https://github.com/ranaroussi/quantstats (upstream)
|
|
5
|
+
#
|
|
6
|
+
# Copyright 2019-2025 Ran Aroussi (original QuantStats)
|
|
7
|
+
# Copyright 2026 OpenStatz contributors
|
|
8
|
+
#
|
|
9
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
10
|
+
# you may not use this file except in compliance with the License.
|
|
11
|
+
# You may obtain a copy of the License at
|
|
12
|
+
#
|
|
13
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
14
|
+
#
|
|
15
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
16
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
17
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
18
|
+
# See the License for the specific language governing permissions and
|
|
19
|
+
# limitations under the License.
|
|
20
|
+
|
|
21
|
+
from . import version
|
|
22
|
+
|
|
23
|
+
__version__ = version.version
|
|
24
|
+
__author__ = "Ran Aroussi (QuantStats) / OpenStatz contributors"
|
|
25
|
+
|
|
26
|
+
from . import plots, providers, reports, stats, utils
|
|
27
|
+
|
|
28
|
+
__all__ = ["stats", "plots", "reports", "utils", "providers", "extend_pandas"]
|
|
29
|
+
|
|
30
|
+
# try automatic matplotlib inline
|
|
31
|
+
utils._in_notebook(matplotlib_inline=True)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def extend_pandas():
|
|
35
|
+
"""
|
|
36
|
+
Extends pandas by exposing methods to be used like:
|
|
37
|
+
df.sharpe(), df.best('day'), ...
|
|
38
|
+
"""
|
|
39
|
+
from pandas.core.base import PandasObject as _po # type: ignore[import]
|
|
40
|
+
|
|
41
|
+
_po.compsum = stats.compsum # type: ignore[attr-defined]
|
|
42
|
+
_po.comp = stats.comp # type: ignore[attr-defined]
|
|
43
|
+
_po.expected_return = stats.expected_return # type: ignore[attr-defined]
|
|
44
|
+
_po.geometric_mean = stats.geometric_mean # type: ignore[attr-defined]
|
|
45
|
+
_po.ghpr = stats.ghpr # type: ignore[attr-defined]
|
|
46
|
+
_po.outliers = stats.outliers # type: ignore[attr-defined]
|
|
47
|
+
_po.remove_outliers = stats.remove_outliers # type: ignore[attr-defined]
|
|
48
|
+
_po.best = stats.best # type: ignore[attr-defined]
|
|
49
|
+
_po.worst = stats.worst # type: ignore[attr-defined]
|
|
50
|
+
_po.consecutive_wins = stats.consecutive_wins # type: ignore[attr-defined]
|
|
51
|
+
_po.consecutive_losses = stats.consecutive_losses # type: ignore[attr-defined]
|
|
52
|
+
_po.exposure = stats.exposure # type: ignore[attr-defined]
|
|
53
|
+
_po.win_rate = stats.win_rate # type: ignore[attr-defined]
|
|
54
|
+
_po.avg_return = stats.avg_return # type: ignore[attr-defined]
|
|
55
|
+
_po.avg_win = stats.avg_win # type: ignore[attr-defined]
|
|
56
|
+
_po.avg_loss = stats.avg_loss # type: ignore[attr-defined]
|
|
57
|
+
_po.volatility = stats.volatility # type: ignore[attr-defined]
|
|
58
|
+
_po.rolling_volatility = stats.rolling_volatility # type: ignore[attr-defined]
|
|
59
|
+
_po.implied_volatility = stats.implied_volatility # type: ignore[attr-defined]
|
|
60
|
+
_po.sharpe = stats.sharpe # type: ignore[attr-defined]
|
|
61
|
+
_po.smart_sharpe = stats.smart_sharpe # type: ignore[attr-defined]
|
|
62
|
+
_po.rolling_sharpe = stats.rolling_sharpe # type: ignore[attr-defined]
|
|
63
|
+
_po.sortino = stats.sortino # type: ignore[attr-defined]
|
|
64
|
+
_po.smart_sortino = stats.smart_sortino # type: ignore[attr-defined]
|
|
65
|
+
_po.adjusted_sortino = stats.adjusted_sortino # type: ignore[attr-defined]
|
|
66
|
+
_po.rolling_sortino = stats.rolling_sortino # type: ignore[attr-defined]
|
|
67
|
+
_po.omega = stats.omega # type: ignore[attr-defined]
|
|
68
|
+
_po.cagr = stats.cagr # type: ignore[attr-defined]
|
|
69
|
+
_po.rar = stats.rar # type: ignore[attr-defined]
|
|
70
|
+
_po.skew = stats.skew # type: ignore[attr-defined]
|
|
71
|
+
_po.kurtosis = stats.kurtosis # type: ignore[attr-defined]
|
|
72
|
+
_po.calmar = stats.calmar # type: ignore[attr-defined]
|
|
73
|
+
_po.ulcer_index = stats.ulcer_index # type: ignore[attr-defined]
|
|
74
|
+
_po.ulcer_performance_index = stats.ulcer_performance_index # type: ignore[attr-defined]
|
|
75
|
+
_po.upi = stats.upi # type: ignore[attr-defined]
|
|
76
|
+
_po.serenity_index = stats.serenity_index # type: ignore[attr-defined]
|
|
77
|
+
_po.risk_of_ruin = stats.risk_of_ruin # type: ignore[attr-defined]
|
|
78
|
+
_po.ror = stats.ror # type: ignore[attr-defined]
|
|
79
|
+
_po.value_at_risk = stats.value_at_risk # type: ignore[attr-defined]
|
|
80
|
+
_po.var = stats.var # type: ignore[attr-defined]
|
|
81
|
+
_po.conditional_value_at_risk = stats.conditional_value_at_risk # type: ignore[attr-defined]
|
|
82
|
+
_po.cvar = stats.cvar # type: ignore[attr-defined]
|
|
83
|
+
_po.expected_shortfall = stats.expected_shortfall # type: ignore[attr-defined]
|
|
84
|
+
_po.tail_ratio = stats.tail_ratio # type: ignore[attr-defined]
|
|
85
|
+
_po.payoff_ratio = stats.payoff_ratio # type: ignore[attr-defined]
|
|
86
|
+
_po.win_loss_ratio = stats.win_loss_ratio # type: ignore[attr-defined]
|
|
87
|
+
_po.profit_ratio = stats.profit_ratio # type: ignore[attr-defined]
|
|
88
|
+
_po.profit_factor = stats.profit_factor # type: ignore[attr-defined]
|
|
89
|
+
_po.gain_to_pain_ratio = stats.gain_to_pain_ratio # type: ignore[attr-defined]
|
|
90
|
+
_po.cpc_index = stats.cpc_index # type: ignore[attr-defined]
|
|
91
|
+
_po.common_sense_ratio = stats.common_sense_ratio # type: ignore[attr-defined]
|
|
92
|
+
_po.outlier_win_ratio = stats.outlier_win_ratio # type: ignore[attr-defined]
|
|
93
|
+
_po.outlier_loss_ratio = stats.outlier_loss_ratio # type: ignore[attr-defined]
|
|
94
|
+
_po.recovery_factor = stats.recovery_factor # type: ignore[attr-defined]
|
|
95
|
+
_po.risk_return_ratio = stats.risk_return_ratio # type: ignore[attr-defined]
|
|
96
|
+
_po.max_drawdown = stats.max_drawdown # type: ignore[attr-defined]
|
|
97
|
+
_po.to_drawdown_series = stats.to_drawdown_series # type: ignore[attr-defined]
|
|
98
|
+
_po.kelly_criterion = stats.kelly_criterion # type: ignore[attr-defined]
|
|
99
|
+
_po.monthly_returns = stats.monthly_returns # type: ignore[attr-defined]
|
|
100
|
+
_po.pct_rank = stats.pct_rank # type: ignore[attr-defined]
|
|
101
|
+
|
|
102
|
+
_po.treynor_ratio = stats.treynor_ratio # type: ignore[attr-defined]
|
|
103
|
+
_po.probabilistic_sharpe_ratio = stats.probabilistic_sharpe_ratio # type: ignore[attr-defined]
|
|
104
|
+
_po.probabilistic_sortino_ratio = stats.probabilistic_sortino_ratio # type: ignore[attr-defined]
|
|
105
|
+
_po.probabilistic_adjusted_sortino_ratio = ( # type: ignore[attr-defined]
|
|
106
|
+
stats.probabilistic_adjusted_sortino_ratio
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
# Monte Carlo simulation
|
|
110
|
+
_po.montecarlo = stats.montecarlo # type: ignore[attr-defined]
|
|
111
|
+
_po.montecarlo_sharpe = stats.montecarlo_sharpe # type: ignore[attr-defined]
|
|
112
|
+
_po.montecarlo_drawdown = stats.montecarlo_drawdown # type: ignore[attr-defined]
|
|
113
|
+
_po.montecarlo_cagr = stats.montecarlo_cagr # type: ignore[attr-defined]
|
|
114
|
+
|
|
115
|
+
# methods from utils
|
|
116
|
+
_po.to_returns = utils.to_returns # type: ignore[attr-defined]
|
|
117
|
+
_po.to_prices = utils.to_prices # type: ignore[attr-defined]
|
|
118
|
+
_po.to_log_returns = utils.to_log_returns # type: ignore[attr-defined]
|
|
119
|
+
_po.log_returns = utils.log_returns # type: ignore[attr-defined]
|
|
120
|
+
_po.exponential_stdev = utils.exponential_stdev # type: ignore[attr-defined]
|
|
121
|
+
_po.rebase = utils.rebase # type: ignore[attr-defined]
|
|
122
|
+
_po.aggregate_returns = utils.aggregate_returns # type: ignore[attr-defined]
|
|
123
|
+
_po.to_excess_returns = utils.to_excess_returns # type: ignore[attr-defined]
|
|
124
|
+
_po.multi_shift = utils.multi_shift # type: ignore[attr-defined]
|
|
125
|
+
_po.curr_month = utils._pandas_current_month # type: ignore[attr-defined]
|
|
126
|
+
_po.date = utils._pandas_date # type: ignore[attr-defined]
|
|
127
|
+
_po.mtd = utils._mtd # type: ignore[attr-defined]
|
|
128
|
+
_po.qtd = utils._qtd # type: ignore[attr-defined]
|
|
129
|
+
_po.ytd = utils._ytd # type: ignore[attr-defined]
|
|
130
|
+
|
|
131
|
+
# methods that requires benchmark stats
|
|
132
|
+
_po.r_squared = stats.r_squared # type: ignore[attr-defined]
|
|
133
|
+
_po.r2 = stats.r2 # type: ignore[attr-defined]
|
|
134
|
+
_po.information_ratio = stats.information_ratio # type: ignore[attr-defined]
|
|
135
|
+
_po.greeks = stats.greeks # type: ignore[attr-defined]
|
|
136
|
+
_po.rolling_greeks = stats.rolling_greeks # type: ignore[attr-defined]
|
|
137
|
+
_po.compare = stats.compare # type: ignore[attr-defined]
|
|
138
|
+
|
|
139
|
+
# plotting methods
|
|
140
|
+
_po.plot_snapshot = plots.snapshot # type: ignore[attr-defined]
|
|
141
|
+
_po.plot_earnings = plots.earnings # type: ignore[attr-defined]
|
|
142
|
+
_po.plot_daily_returns = plots.daily_returns # type: ignore[attr-defined]
|
|
143
|
+
_po.plot_distribution = plots.distribution # type: ignore[attr-defined]
|
|
144
|
+
_po.plot_drawdown = plots.drawdown # type: ignore[attr-defined]
|
|
145
|
+
_po.plot_drawdowns_periods = plots.drawdowns_periods # type: ignore[attr-defined]
|
|
146
|
+
_po.plot_histogram = plots.histogram # type: ignore[attr-defined]
|
|
147
|
+
_po.plot_log_returns = plots.log_returns # type: ignore[attr-defined]
|
|
148
|
+
_po.plot_returns = plots.returns # type: ignore[attr-defined]
|
|
149
|
+
_po.plot_rolling_beta = plots.rolling_beta # type: ignore[attr-defined]
|
|
150
|
+
_po.plot_rolling_sharpe = plots.rolling_sharpe # type: ignore[attr-defined]
|
|
151
|
+
_po.plot_rolling_sortino = plots.rolling_sortino # type: ignore[attr-defined]
|
|
152
|
+
_po.plot_rolling_volatility = plots.rolling_volatility # type: ignore[attr-defined]
|
|
153
|
+
_po.plot_yearly_returns = plots.yearly_returns # type: ignore[attr-defined]
|
|
154
|
+
_po.plot_monthly_heatmap = plots.monthly_heatmap # type: ignore[attr-defined]
|
|
155
|
+
_po.plot_montecarlo = plots.montecarlo # type: ignore[attr-defined]
|
|
156
|
+
_po.plot_montecarlo_distribution = plots.montecarlo_distribution # type: ignore[attr-defined]
|
|
157
|
+
|
|
158
|
+
_po.metrics = reports.metrics # type: ignore[attr-defined]
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
# extend_pandas()
|
openstatz/__main__.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
#
|
|
3
|
+
# Lets you run the CLI without the installed console script, e.g.:
|
|
4
|
+
# python -m openstatz serve --port 8200
|
|
5
|
+
#
|
|
6
|
+
# Licensed under the Apache License, Version 2.0.
|
|
7
|
+
|
|
8
|
+
from openstatz.app.cli import main
|
|
9
|
+
|
|
10
|
+
if __name__ == "__main__":
|
|
11
|
+
raise SystemExit(main())
|
openstatz/_compat.py
ADDED
|
@@ -0,0 +1,430 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
"""
|
|
3
|
+
Compatibility layer for pandas/numpy versions
|
|
4
|
+
Handles version differences and deprecated functionality
|
|
5
|
+
|
|
6
|
+
This module provides a unified interface for working with different versions of pandas
|
|
7
|
+
and numpy, ensuring that quantstats functions work consistently across various
|
|
8
|
+
dependency versions. It handles deprecated functionality and version-specific changes.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import pandas as pd
|
|
12
|
+
import numpy as np
|
|
13
|
+
import warnings
|
|
14
|
+
from packaging import version
|
|
15
|
+
import yfinance as yf
|
|
16
|
+
from typing import Union, Optional, List, Callable
|
|
17
|
+
|
|
18
|
+
# Version detection - Parse version strings to enable version comparisons
|
|
19
|
+
PANDAS_VERSION = version.parse(pd.__version__)
|
|
20
|
+
NUMPY_VERSION = version.parse(np.__version__)
|
|
21
|
+
|
|
22
|
+
# Frequency alias mapping for pandas compatibility
|
|
23
|
+
# Starting from pandas 2.2.0, frequency aliases changed to be more explicit
|
|
24
|
+
# M -> ME (Month End), Q -> QE (Quarter End), A/Y -> YE (Year End)
|
|
25
|
+
FREQUENCY_ALIASES = {
|
|
26
|
+
"M": "ME" if PANDAS_VERSION >= version.parse("2.2.0") else "M",
|
|
27
|
+
"Q": "QE" if PANDAS_VERSION >= version.parse("2.2.0") else "Q",
|
|
28
|
+
"A": "YE" if PANDAS_VERSION >= version.parse("2.2.0") else "A",
|
|
29
|
+
"Y": "YE" if PANDAS_VERSION >= version.parse("2.2.0") else "Y",
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def get_frequency_alias(freq: str) -> str:
|
|
34
|
+
"""
|
|
35
|
+
Get the correct frequency alias for current pandas version.
|
|
36
|
+
|
|
37
|
+
This function maps old frequency strings to their new equivalents in
|
|
38
|
+
pandas 2.2.0+, ensuring backward compatibility across pandas versions.
|
|
39
|
+
|
|
40
|
+
Parameters
|
|
41
|
+
----------
|
|
42
|
+
freq : str
|
|
43
|
+
The frequency string (e.g., 'M', 'Q', 'A', 'Y')
|
|
44
|
+
|
|
45
|
+
Returns
|
|
46
|
+
-------
|
|
47
|
+
str
|
|
48
|
+
The appropriate frequency alias for the current pandas version
|
|
49
|
+
|
|
50
|
+
Examples
|
|
51
|
+
--------
|
|
52
|
+
>>> get_frequency_alias('M') # Returns 'ME' in pandas 2.2.0+, 'M' in older versions
|
|
53
|
+
>>> get_frequency_alias('D') # Returns 'D' (unchanged)
|
|
54
|
+
"""
|
|
55
|
+
# Look up the frequency in our mapping, return original if not found
|
|
56
|
+
return FREQUENCY_ALIASES.get(freq, freq)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def normalize_timezone(data: Union[pd.Series, pd.DataFrame]) -> Union[pd.Series, pd.DataFrame]:
|
|
60
|
+
"""
|
|
61
|
+
Normalize timezone information for consistent comparisons.
|
|
62
|
+
|
|
63
|
+
If data has timezone info, converts to UTC then removes timezone info.
|
|
64
|
+
This ensures all data can be compared regardless of original timezone.
|
|
65
|
+
|
|
66
|
+
Parameters
|
|
67
|
+
----------
|
|
68
|
+
data : pd.Series or pd.DataFrame
|
|
69
|
+
Time series data with DatetimeIndex
|
|
70
|
+
|
|
71
|
+
Returns
|
|
72
|
+
-------
|
|
73
|
+
pd.Series or pd.DataFrame
|
|
74
|
+
Data with timezone-naive DatetimeIndex
|
|
75
|
+
"""
|
|
76
|
+
if not isinstance(data.index, pd.DatetimeIndex):
|
|
77
|
+
return data
|
|
78
|
+
|
|
79
|
+
# If timezone aware, convert to UTC then make naive
|
|
80
|
+
if data.index.tz is not None:
|
|
81
|
+
result = data.copy()
|
|
82
|
+
result.index = result.index.tz_convert('UTC').tz_localize(None)
|
|
83
|
+
return result
|
|
84
|
+
|
|
85
|
+
# Already timezone naive, return as is
|
|
86
|
+
return data
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def safe_resample(data: Union[pd.Series, pd.DataFrame],
|
|
90
|
+
freq: str,
|
|
91
|
+
func_name: Optional[Union[str, Callable]] = None,
|
|
92
|
+
**kwargs):
|
|
93
|
+
"""
|
|
94
|
+
Safe resample operation that works with all pandas versions.
|
|
95
|
+
|
|
96
|
+
This function handles the resampling of time series data using the correct
|
|
97
|
+
frequency aliases and aggregation methods that are compatible across
|
|
98
|
+
different pandas versions. It also normalizes timezones to ensure
|
|
99
|
+
consistent comparisons.
|
|
100
|
+
|
|
101
|
+
Parameters
|
|
102
|
+
----------
|
|
103
|
+
data : pd.Series or pd.DataFrame
|
|
104
|
+
The time series data to resample
|
|
105
|
+
freq : str
|
|
106
|
+
The frequency to resample to (e.g., 'M', 'Q', 'A', 'D')
|
|
107
|
+
func_name : str or callable, optional
|
|
108
|
+
The aggregation function to apply. Can be a string name like 'sum',
|
|
109
|
+
'mean', 'std', etc., or a callable function
|
|
110
|
+
**kwargs
|
|
111
|
+
Additional arguments passed to the aggregation function
|
|
112
|
+
|
|
113
|
+
Returns
|
|
114
|
+
-------
|
|
115
|
+
pd.Series or pd.DataFrame
|
|
116
|
+
The resampled data with the specified frequency and aggregation,
|
|
117
|
+
with timezone normalized to UTC if present, or naive if not
|
|
118
|
+
|
|
119
|
+
Examples
|
|
120
|
+
--------
|
|
121
|
+
>>> safe_resample(data, 'M', 'sum') # Monthly sum aggregation
|
|
122
|
+
>>> safe_resample(data, 'Q', 'mean') # Quarterly mean aggregation
|
|
123
|
+
"""
|
|
124
|
+
# Convert frequency to the appropriate alias for current pandas version
|
|
125
|
+
freq_alias = get_frequency_alias(freq)
|
|
126
|
+
|
|
127
|
+
# Create the resampler object using the correct frequency
|
|
128
|
+
resampler = data.resample(freq_alias)
|
|
129
|
+
|
|
130
|
+
# If no aggregation function specified, return the resampler object
|
|
131
|
+
if func_name is None:
|
|
132
|
+
return resampler
|
|
133
|
+
|
|
134
|
+
# Handle string function names with explicit method calls
|
|
135
|
+
# This approach avoids deprecation warnings and ensures compatibility
|
|
136
|
+
result = None
|
|
137
|
+
if isinstance(func_name, str):
|
|
138
|
+
# Map common aggregation functions to their pandas methods
|
|
139
|
+
if func_name == "sum":
|
|
140
|
+
result = resampler.sum(**kwargs)
|
|
141
|
+
elif func_name == "mean":
|
|
142
|
+
result = resampler.mean(**kwargs)
|
|
143
|
+
elif func_name == "std":
|
|
144
|
+
result = resampler.std(**kwargs)
|
|
145
|
+
elif func_name == "count":
|
|
146
|
+
result = resampler.count(**kwargs)
|
|
147
|
+
elif func_name == "min":
|
|
148
|
+
result = resampler.min(**kwargs)
|
|
149
|
+
elif func_name == "max":
|
|
150
|
+
result = resampler.max(**kwargs)
|
|
151
|
+
elif func_name == "first":
|
|
152
|
+
result = resampler.first(**kwargs)
|
|
153
|
+
elif func_name == "last":
|
|
154
|
+
result = resampler.last(**kwargs)
|
|
155
|
+
else:
|
|
156
|
+
# Try to find the method on the resampler object
|
|
157
|
+
if hasattr(resampler, func_name):
|
|
158
|
+
result = getattr(resampler, func_name)(**kwargs)
|
|
159
|
+
else:
|
|
160
|
+
# Fallback to apply for custom string functions
|
|
161
|
+
result = resampler.apply(func_name, **kwargs)
|
|
162
|
+
else:
|
|
163
|
+
# For callable functions, use apply method
|
|
164
|
+
# Suppress FutureWarning about callable usage - our use is intentional
|
|
165
|
+
with warnings.catch_warnings():
|
|
166
|
+
warnings.filterwarnings("ignore", category=FutureWarning,
|
|
167
|
+
message=".*callable.*")
|
|
168
|
+
result = resampler.apply(func_name, **kwargs)
|
|
169
|
+
|
|
170
|
+
# Normalize timezone to ensure consistent comparisons
|
|
171
|
+
return normalize_timezone(result)
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def safe_concat(objs: List[Union[pd.Series, pd.DataFrame]],
|
|
175
|
+
axis: int = 0,
|
|
176
|
+
ignore_index: bool = False,
|
|
177
|
+
sort: bool = False,
|
|
178
|
+
**kwargs) -> Union[pd.Series, pd.DataFrame]:
|
|
179
|
+
"""
|
|
180
|
+
Safe concatenation that handles pandas version differences.
|
|
181
|
+
|
|
182
|
+
This function provides a wrapper around pd.concat with consistent parameters.
|
|
183
|
+
|
|
184
|
+
Parameters
|
|
185
|
+
----------
|
|
186
|
+
objs : list of pd.Series or pd.DataFrame
|
|
187
|
+
Objects to concatenate along the specified axis
|
|
188
|
+
axis : int, default 0
|
|
189
|
+
Axis to concatenate along. 0 for rows, 1 for columns
|
|
190
|
+
ignore_index : bool, default False
|
|
191
|
+
Whether to ignore the index and create a new default integer index
|
|
192
|
+
sort : bool, default False
|
|
193
|
+
Whether to sort the result
|
|
194
|
+
**kwargs
|
|
195
|
+
Additional arguments passed to pd.concat
|
|
196
|
+
|
|
197
|
+
Returns
|
|
198
|
+
-------
|
|
199
|
+
pd.Series or pd.DataFrame
|
|
200
|
+
The concatenated result
|
|
201
|
+
|
|
202
|
+
Examples
|
|
203
|
+
--------
|
|
204
|
+
>>> safe_concat([df1, df2]) # Concatenate along rows
|
|
205
|
+
>>> safe_concat([df1, df2], axis=1) # Concatenate along columns
|
|
206
|
+
"""
|
|
207
|
+
# Perform the concatenation with sort parameter (available in pandas 2.0+)
|
|
208
|
+
return pd.concat(objs, axis=axis, ignore_index=ignore_index, sort=sort, **kwargs) # type: ignore[arg-type]
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def safe_append(df: pd.DataFrame,
|
|
212
|
+
other: Union[pd.DataFrame, pd.Series],
|
|
213
|
+
ignore_index: bool = False,
|
|
214
|
+
sort: bool = False) -> pd.DataFrame:
|
|
215
|
+
"""
|
|
216
|
+
Safe append operation using pd.concat.
|
|
217
|
+
|
|
218
|
+
DataFrame.append() was removed in pandas 2.0.0. This function provides
|
|
219
|
+
a unified interface using pd.concat.
|
|
220
|
+
|
|
221
|
+
Parameters
|
|
222
|
+
----------
|
|
223
|
+
df : pd.DataFrame
|
|
224
|
+
The DataFrame to append to (base DataFrame)
|
|
225
|
+
other : pd.DataFrame or pd.Series
|
|
226
|
+
The data to append to the base DataFrame
|
|
227
|
+
ignore_index : bool, default False
|
|
228
|
+
Whether to ignore the index and create a new default integer index
|
|
229
|
+
sort : bool, default False
|
|
230
|
+
Whether to sort the result by columns
|
|
231
|
+
|
|
232
|
+
Returns
|
|
233
|
+
-------
|
|
234
|
+
pd.DataFrame
|
|
235
|
+
The result of the append operation
|
|
236
|
+
|
|
237
|
+
Examples
|
|
238
|
+
--------
|
|
239
|
+
>>> safe_append(df, new_row) # Append a new row
|
|
240
|
+
>>> safe_append(df, other_df, ignore_index=True) # Append and reset index
|
|
241
|
+
"""
|
|
242
|
+
# Use concat (append was removed in pandas 2.0)
|
|
243
|
+
result = safe_concat([df, other], ignore_index=ignore_index, sort=sort)
|
|
244
|
+
# Ensure we return a DataFrame
|
|
245
|
+
if isinstance(result, pd.DataFrame):
|
|
246
|
+
return result
|
|
247
|
+
elif isinstance(result, pd.Series):
|
|
248
|
+
return pd.DataFrame([result])
|
|
249
|
+
else:
|
|
250
|
+
return pd.DataFrame(result)
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
def safe_frequency_conversion(data: Union[pd.Series, pd.DataFrame],
|
|
254
|
+
freq: str) -> Union[pd.Series, pd.DataFrame]:
|
|
255
|
+
"""
|
|
256
|
+
Safe frequency conversion for time series data.
|
|
257
|
+
|
|
258
|
+
This function converts time series data to a specified frequency using
|
|
259
|
+
the most appropriate method available in the current pandas version.
|
|
260
|
+
|
|
261
|
+
Parameters
|
|
262
|
+
----------
|
|
263
|
+
data : pd.Series or pd.DataFrame
|
|
264
|
+
Time series data with a datetime index
|
|
265
|
+
freq : str
|
|
266
|
+
Target frequency (e.g., 'D', 'M', 'Q', 'A')
|
|
267
|
+
|
|
268
|
+
Returns
|
|
269
|
+
-------
|
|
270
|
+
pd.Series or pd.DataFrame
|
|
271
|
+
Data with converted frequency
|
|
272
|
+
|
|
273
|
+
Examples
|
|
274
|
+
--------
|
|
275
|
+
>>> safe_frequency_conversion(data, 'M') # Convert to monthly frequency
|
|
276
|
+
>>> safe_frequency_conversion(data, 'D') # Convert to daily frequency
|
|
277
|
+
"""
|
|
278
|
+
# Get the appropriate frequency alias for current pandas version
|
|
279
|
+
freq_alias = get_frequency_alias(freq)
|
|
280
|
+
|
|
281
|
+
# Handle different methods for frequency conversion
|
|
282
|
+
if hasattr(data, "asfreq"):
|
|
283
|
+
# Use asfreq if available (most direct method)
|
|
284
|
+
return data.asfreq(freq_alias)
|
|
285
|
+
else:
|
|
286
|
+
# Fallback to resampling with 'last' aggregation
|
|
287
|
+
# This preserves the last value in each period
|
|
288
|
+
return safe_resample(data, freq_alias, "last")
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
def handle_pandas_warnings():
|
|
292
|
+
"""
|
|
293
|
+
Context manager to handle pandas warnings appropriately.
|
|
294
|
+
|
|
295
|
+
This function returns a context manager that can be used to suppress
|
|
296
|
+
or handle pandas warnings in a controlled manner. Useful for managing
|
|
297
|
+
deprecation warnings when working with multiple pandas versions.
|
|
298
|
+
|
|
299
|
+
Returns
|
|
300
|
+
-------
|
|
301
|
+
warnings.catch_warnings
|
|
302
|
+
A context manager for handling warnings
|
|
303
|
+
|
|
304
|
+
Examples
|
|
305
|
+
--------
|
|
306
|
+
>>> with handle_pandas_warnings():
|
|
307
|
+
... # Code that might generate pandas warnings
|
|
308
|
+
... pass
|
|
309
|
+
"""
|
|
310
|
+
# Return the warnings context manager for flexible warning handling
|
|
311
|
+
return warnings.catch_warnings()
|
|
312
|
+
|
|
313
|
+
|
|
314
|
+
# Pandas accessor compatibility functions
|
|
315
|
+
def get_datetime_accessor(series: pd.Series):
|
|
316
|
+
"""
|
|
317
|
+
Get datetime accessor for pandas Series.
|
|
318
|
+
|
|
319
|
+
This function provides a consistent interface for accessing datetime
|
|
320
|
+
properties of a pandas Series across different versions.
|
|
321
|
+
|
|
322
|
+
Parameters
|
|
323
|
+
----------
|
|
324
|
+
series : pd.Series
|
|
325
|
+
The series with datetime data to get the accessor for
|
|
326
|
+
|
|
327
|
+
Returns
|
|
328
|
+
-------
|
|
329
|
+
pd.Series.dt
|
|
330
|
+
The datetime accessor for the series
|
|
331
|
+
|
|
332
|
+
Examples
|
|
333
|
+
--------
|
|
334
|
+
>>> dt_accessor = get_datetime_accessor(date_series)
|
|
335
|
+
>>> dt_accessor.year # Access year component
|
|
336
|
+
>>> dt_accessor.month # Access month component
|
|
337
|
+
"""
|
|
338
|
+
# Return the datetime accessor - consistent across pandas versions
|
|
339
|
+
return series.dt
|
|
340
|
+
|
|
341
|
+
|
|
342
|
+
def get_string_accessor(series: pd.Series):
|
|
343
|
+
"""
|
|
344
|
+
Get string accessor for pandas Series.
|
|
345
|
+
|
|
346
|
+
This function provides a consistent interface for accessing string
|
|
347
|
+
methods of a pandas Series across different versions.
|
|
348
|
+
|
|
349
|
+
Parameters
|
|
350
|
+
----------
|
|
351
|
+
series : pd.Series
|
|
352
|
+
The series with string data to get the accessor for
|
|
353
|
+
|
|
354
|
+
Returns
|
|
355
|
+
-------
|
|
356
|
+
pd.Series.str
|
|
357
|
+
The string accessor for the series
|
|
358
|
+
|
|
359
|
+
Examples
|
|
360
|
+
--------
|
|
361
|
+
>>> str_accessor = get_string_accessor(string_series)
|
|
362
|
+
>>> str_accessor.lower() # Convert to lowercase
|
|
363
|
+
>>> str_accessor.contains('pattern') # Check for pattern
|
|
364
|
+
"""
|
|
365
|
+
# Return the string accessor - consistent across pandas versions
|
|
366
|
+
return series.str
|
|
367
|
+
|
|
368
|
+
|
|
369
|
+
def safe_yfinance_download(tickers: Union[str, List[str]],
|
|
370
|
+
proxy: Optional[str] = None,
|
|
371
|
+
**kwargs) -> pd.DataFrame:
|
|
372
|
+
"""
|
|
373
|
+
Safe yfinance download that handles proxy configuration properly.
|
|
374
|
+
|
|
375
|
+
This function provides a wrapper around yfinance.download that handles
|
|
376
|
+
proxy configuration differences between yfinance versions. It ensures
|
|
377
|
+
compatibility with both old and new yfinance proxy configuration methods.
|
|
378
|
+
|
|
379
|
+
Parameters
|
|
380
|
+
----------
|
|
381
|
+
tickers : str or list
|
|
382
|
+
Ticker symbols to download data for. Can be a single ticker string
|
|
383
|
+
or a list of ticker symbols
|
|
384
|
+
proxy : str, optional
|
|
385
|
+
Proxy configuration string (e.g., 'http://proxy.server:port')
|
|
386
|
+
Handled automatically based on yfinance version
|
|
387
|
+
**kwargs
|
|
388
|
+
Additional arguments passed to yfinance.download such as:
|
|
389
|
+
- start: Start date for data download
|
|
390
|
+
- end: End date for data download
|
|
391
|
+
- period: Period to download (e.g., '1y', '6mo')
|
|
392
|
+
- interval: Data interval (e.g., '1d', '1h')
|
|
393
|
+
|
|
394
|
+
Returns
|
|
395
|
+
-------
|
|
396
|
+
pd.DataFrame
|
|
397
|
+
Downloaded financial data with columns like Open, High, Low, Close, Volume
|
|
398
|
+
|
|
399
|
+
Examples
|
|
400
|
+
--------
|
|
401
|
+
>>> data = safe_yfinance_download('AAPL', start='2020-01-01', end='2021-01-01')
|
|
402
|
+
>>> data = safe_yfinance_download(['AAPL', 'MSFT'], period='1y')
|
|
403
|
+
"""
|
|
404
|
+
# Handle proxy configuration based on yfinance version
|
|
405
|
+
if proxy is not None:
|
|
406
|
+
# Check if the new configuration method exists in yfinance
|
|
407
|
+
if hasattr(yf, "set_config"):
|
|
408
|
+
# New method: use set_config for global proxy configuration
|
|
409
|
+
# This approach is preferred in newer yfinance versions
|
|
410
|
+
yf.set_config(proxy=proxy)
|
|
411
|
+
# Remove proxy from kwargs to avoid duplicate parameter error
|
|
412
|
+
kwargs.pop("proxy", None)
|
|
413
|
+
else:
|
|
414
|
+
# Old method: pass proxy directly to download function
|
|
415
|
+
# This is for backward compatibility with older yfinance versions
|
|
416
|
+
kwargs["proxy"] = proxy
|
|
417
|
+
|
|
418
|
+
# Suppress yfinance warnings about deprecation and future changes
|
|
419
|
+
# This keeps the output clean while maintaining functionality
|
|
420
|
+
with warnings.catch_warnings():
|
|
421
|
+
warnings.filterwarnings("ignore", category=FutureWarning, module="yfinance")
|
|
422
|
+
# Download the data using yfinance with all provided parameters
|
|
423
|
+
result = yf.download(tickers, **kwargs)
|
|
424
|
+
|
|
425
|
+
# Handle case where yfinance returns None (network issues, invalid ticker, etc.)
|
|
426
|
+
if result is None:
|
|
427
|
+
# Return empty DataFrame with standard yfinance columns
|
|
428
|
+
return pd.DataFrame(columns=['Open', 'High', 'Low', 'Close', 'Volume', 'Adj Close'])
|
|
429
|
+
|
|
430
|
+
return result
|