commodutil 3.2.3__tar.gz → 3.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {commodutil-3.2.3 → commodutil-3.3.0}/PKG-INFO +1 -1
- commodutil-3.3.0/commodutil/stats.py +453 -0
- {commodutil-3.2.3 → commodutil-3.3.0}/commodutil/transforms.py +34 -0
- {commodutil-3.2.3 → commodutil-3.3.0}/commodutil.egg-info/PKG-INFO +1 -1
- commodutil-3.3.0/tests/test_stats.py +131 -0
- commodutil-3.2.3/commodutil/stats.py +0 -50
- commodutil-3.2.3/tests/test_stats.py +0 -57
- {commodutil-3.2.3 → commodutil-3.3.0}/.coveragerc +0 -0
- {commodutil-3.2.3 → commodutil-3.3.0}/.github/workflows/1_tests.yml +0 -0
- {commodutil-3.2.3 → commodutil-3.3.0}/.github/workflows/2_coverage.yml +0 -0
- {commodutil-3.2.3 → commodutil-3.3.0}/.github/workflows/3_linting.yml +0 -0
- {commodutil-3.2.3 → commodutil-3.3.0}/.github/workflows/4_release.yml +0 -0
- {commodutil-3.2.3 → commodutil-3.3.0}/.gitignore +0 -0
- {commodutil-3.2.3 → commodutil-3.3.0}/.pypirc +0 -0
- {commodutil-3.2.3 → commodutil-3.3.0}/azure-build-pipelines.yml +0 -0
- {commodutil-3.2.3 → commodutil-3.3.0}/commodutil/__init__.py +0 -0
- {commodutil-3.2.3 → commodutil-3.3.0}/commodutil/arb.py +0 -0
- {commodutil-3.2.3 → commodutil-3.3.0}/commodutil/convfactors.py +0 -0
- {commodutil-3.2.3 → commodutil-3.3.0}/commodutil/dates.py +0 -0
- {commodutil-3.2.3 → commodutil-3.3.0}/commodutil/forward/__init__.py +0 -0
- {commodutil-3.2.3 → commodutil-3.3.0}/commodutil/forward/calendar.py +0 -0
- {commodutil-3.2.3 → commodutil-3.3.0}/commodutil/forward/continuous.py +0 -0
- {commodutil-3.2.3 → commodutil-3.3.0}/commodutil/forward/fly.py +0 -0
- {commodutil-3.2.3 → commodutil-3.3.0}/commodutil/forward/quarterly.py +0 -0
- {commodutil-3.2.3 → commodutil-3.3.0}/commodutil/forward/spreads.py +0 -0
- {commodutil-3.2.3 → commodutil-3.3.0}/commodutil/forward/structure.py +0 -0
- {commodutil-3.2.3 → commodutil-3.3.0}/commodutil/forward/util.py +0 -0
- {commodutil-3.2.3 → commodutil-3.3.0}/commodutil/forwards.py +0 -0
- {commodutil-3.2.3 → commodutil-3.3.0}/commodutil/pandasutil.py +0 -0
- {commodutil-3.2.3 → commodutil-3.3.0}/commodutil.egg-info/SOURCES.txt +0 -0
- {commodutil-3.2.3 → commodutil-3.3.0}/commodutil.egg-info/dependency_links.txt +0 -0
- {commodutil-3.2.3 → commodutil-3.3.0}/commodutil.egg-info/requires.txt +0 -0
- {commodutil-3.2.3 → commodutil-3.3.0}/commodutil.egg-info/top_level.txt +0 -0
- {commodutil-3.2.3 → commodutil-3.3.0}/pyproject.toml +0 -0
- {commodutil-3.2.3 → commodutil-3.3.0}/requirements-test.txt +0 -0
- {commodutil-3.2.3 → commodutil-3.3.0}/requirements.txt +0 -0
- {commodutil-3.2.3 → commodutil-3.3.0}/requirements_dev.txt +0 -0
- {commodutil-3.2.3 → commodutil-3.3.0}/setup.cfg +0 -0
- {commodutil-3.2.3 → commodutil-3.3.0}/tests/__init__.py +0 -0
- {commodutil-3.2.3 → commodutil-3.3.0}/tests/conftest.py +0 -0
- {commodutil-3.2.3 → commodutil-3.3.0}/tests/forward/__init__.py +0 -0
- {commodutil-3.2.3 → commodutil-3.3.0}/tests/forward/conftest.py +0 -0
- {commodutil-3.2.3 → commodutil-3.3.0}/tests/forward/test_calendar.py +0 -0
- {commodutil-3.2.3 → commodutil-3.3.0}/tests/forward/test_continuous.py +0 -0
- {commodutil-3.2.3 → commodutil-3.3.0}/tests/forward/test_fly.py +0 -0
- {commodutil-3.2.3 → commodutil-3.3.0}/tests/forward/test_quarterly.py +0 -0
- {commodutil-3.2.3 → commodutil-3.3.0}/tests/forward/test_spreads.py +0 -0
- {commodutil-3.2.3 → commodutil-3.3.0}/tests/forward/test_structure.py +0 -0
- {commodutil-3.2.3 → commodutil-3.3.0}/tests/forward/test_util.py +0 -0
- {commodutil-3.2.3 → commodutil-3.3.0}/tests/test_arb.py +0 -0
- {commodutil-3.2.3 → commodutil-3.3.0}/tests/test_cl.csv +0 -0
- {commodutil-3.2.3 → commodutil-3.3.0}/tests/test_conv.py +0 -0
- {commodutil-3.2.3 → commodutil-3.3.0}/tests/test_dates.py +0 -0
- {commodutil-3.2.3 → commodutil-3.3.0}/tests/test_forwards.py +0 -0
- {commodutil-3.2.3 → commodutil-3.3.0}/tests/test_pandasutils.py +0 -0
- {commodutil-3.2.3 → commodutil-3.3.0}/tests/test_price_conv.py +0 -0
- {commodutil-3.2.3 → commodutil-3.3.0}/tests/test_transforms.py +0 -0
- {commodutil-3.2.3 → commodutil-3.3.0}/tests/test_weekly.csv +0 -0
|
@@ -0,0 +1,453 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
from typing import Iterable
|
|
5
|
+
|
|
6
|
+
import numpy as np
|
|
7
|
+
import pandas as pd
|
|
8
|
+
|
|
9
|
+
from commodutil import dates
|
|
10
|
+
from commodutil import transforms
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def curve_seasonal_zscore(hist, fwd):
|
|
14
|
+
"""
|
|
15
|
+
Given some history for a timeseries and a forward curve, calculate the monthly
|
|
16
|
+
z-score (std dev away from mean) along the forward curve
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
d = transforms.monthly_mean(hist).T.describe()
|
|
20
|
+
|
|
21
|
+
if isinstance(fwd, pd.Series):
|
|
22
|
+
fwd = pd.DataFrame(fwd)
|
|
23
|
+
fwd["zscore"] = fwd.apply(
|
|
24
|
+
lambda x: (d[x.name.month].loc["mean"] - x.iloc[0])
|
|
25
|
+
/ d[x.name.month].loc["std"],
|
|
26
|
+
1,
|
|
27
|
+
)
|
|
28
|
+
return fwd
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def reindex_zscore(df, range=10, calc_year_start: int = None):
|
|
32
|
+
"""
|
|
33
|
+
Given a dataframe of contracts (or spreads), calculate z-score for current year onwards
|
|
34
|
+
Essentially returns how far away the 'curve' is from historical trading range
|
|
35
|
+
"""
|
|
36
|
+
df = df
|
|
37
|
+
df = df.rename(
|
|
38
|
+
columns={x: int(re.findall("\d\d\d\d", str(x))[0]) for x in df.columns}
|
|
39
|
+
) # turn columns into years
|
|
40
|
+
d = df.loc[
|
|
41
|
+
:, dates.curyear - range - 1 : dates.curyear - 1
|
|
42
|
+
] # get subset of range years
|
|
43
|
+
d = d[:-10] # exclude last 10 rows to due to volatility close to expire
|
|
44
|
+
|
|
45
|
+
dfs = []
|
|
46
|
+
if not calc_year_start:
|
|
47
|
+
calc_year_start = dates.curyear
|
|
48
|
+
for year in df.loc[:, calc_year_start : df.columns[-1]]:
|
|
49
|
+
z = (d.mean(axis=1) - df.loc[:, year]) / d.std(axis=1)
|
|
50
|
+
z.name = year
|
|
51
|
+
dfs.append(z)
|
|
52
|
+
if len(dfs) > 0:
|
|
53
|
+
res = pd.concat(dfs, axis=1)
|
|
54
|
+
return res
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@dataclass(frozen=True)
|
|
58
|
+
class PointStats:
|
|
59
|
+
"""
|
|
60
|
+
Summary statistics for a single "as-of" point against a historical reference set.
|
|
61
|
+
|
|
62
|
+
Percentile is returned as an empirical CDF in [0, 1], i.e. fraction of reference
|
|
63
|
+
values <= current value.
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
asof: pd.Timestamp
|
|
67
|
+
current_year: int | None
|
|
68
|
+
current_value: float | None
|
|
69
|
+
reference_years: list[int]
|
|
70
|
+
reference_values: list[float]
|
|
71
|
+
mean: float | None
|
|
72
|
+
std: float | None
|
|
73
|
+
zscore: float | None
|
|
74
|
+
percentile: float | None
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def last_value_at_or_before(series: pd.Series, asof: datetime | str | pd.Timestamp) -> float | None:
|
|
78
|
+
"""
|
|
79
|
+
Return the last non-null value at or before `asof`.
|
|
80
|
+
|
|
81
|
+
Returns None if no value exists in the window.
|
|
82
|
+
"""
|
|
83
|
+
ts = pd.Timestamp(asof)
|
|
84
|
+
s = series.loc[:ts].dropna()
|
|
85
|
+
if s.empty:
|
|
86
|
+
return None
|
|
87
|
+
val = s.iloc[-1]
|
|
88
|
+
try:
|
|
89
|
+
return float(val)
|
|
90
|
+
except Exception:
|
|
91
|
+
return None
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def empirical_percentile(value: float, reference_values: Iterable[float]) -> float | None:
|
|
95
|
+
"""
|
|
96
|
+
Empirical percentile as fraction of reference values <= value.
|
|
97
|
+
|
|
98
|
+
Returns None if reference is empty after filtering.
|
|
99
|
+
"""
|
|
100
|
+
ref = [float(x) for x in reference_values if x is not None and not np.isnan(x)]
|
|
101
|
+
if not ref:
|
|
102
|
+
return None
|
|
103
|
+
return float(np.mean([x <= value for x in ref]))
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def point_stats(value: float | None, reference_values: Iterable[float]) -> tuple[float | None, float | None, float | None, float | None]:
|
|
107
|
+
"""
|
|
108
|
+
Compute (mean, std, zscore, percentile) for a value vs reference values.
|
|
109
|
+
|
|
110
|
+
Std is sample std (ddof=1) when at least 2 reference values exist, else 0.0.
|
|
111
|
+
"""
|
|
112
|
+
ref = np.array([float(x) for x in reference_values if x is not None and not np.isnan(x)], dtype=float)
|
|
113
|
+
if value is None or np.isnan(value) or ref.size == 0:
|
|
114
|
+
return None, None, None, None
|
|
115
|
+
|
|
116
|
+
mean = float(np.mean(ref))
|
|
117
|
+
std = float(np.std(ref, ddof=1)) if ref.size >= 2 else 0.0
|
|
118
|
+
z = None if std == 0.0 else float((float(value) - mean) / std)
|
|
119
|
+
p = empirical_percentile(float(value), ref.tolist())
|
|
120
|
+
return mean, std, z, p
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def select_reindex_prompt_column(df_reindexed: pd.DataFrame, *, within_days: int = 10):
|
|
124
|
+
"""
|
|
125
|
+
Determine the "prompt" column for a reindex-year dataframe.
|
|
126
|
+
|
|
127
|
+
Mirrors the legacy behavior in `commodplot.commodplotutil.reindex_year_df_rel_col`:
|
|
128
|
+
- Prefer a column whose label contains `dates.curyear`.
|
|
129
|
+
- If that column ends within `within_days` of the max x-date, prefer next year's column.
|
|
130
|
+
"""
|
|
131
|
+
if df_reindexed is None or df_reindexed.empty:
|
|
132
|
+
return None
|
|
133
|
+
|
|
134
|
+
res_col = df_reindexed.columns[0]
|
|
135
|
+
|
|
136
|
+
year_map = dates.find_year(df_reindexed)
|
|
137
|
+
last_val_date = df_reindexed.index[-1]
|
|
138
|
+
|
|
139
|
+
current_year_cols = [c for c in df_reindexed.columns if str(dates.curyear) in str(c)]
|
|
140
|
+
if not current_year_cols:
|
|
141
|
+
return res_col
|
|
142
|
+
|
|
143
|
+
res_col = current_year_cols[0]
|
|
144
|
+
res_year = year_map.get(res_col)
|
|
145
|
+
if not isinstance(res_year, int):
|
|
146
|
+
return res_col
|
|
147
|
+
|
|
148
|
+
relyear = pd.to_datetime(f"{res_year}-01-01")
|
|
149
|
+
|
|
150
|
+
dft = df_reindexed[current_year_cols].dropna()
|
|
151
|
+
if len(dft) == 0:
|
|
152
|
+
return res_col
|
|
153
|
+
|
|
154
|
+
relcol_series = df_reindexed[res_col].dropna()
|
|
155
|
+
if relcol_series.empty:
|
|
156
|
+
return res_col
|
|
157
|
+
|
|
158
|
+
relcol_date = relcol_series.index[-1]
|
|
159
|
+
delta = last_val_date - relcol_date
|
|
160
|
+
if delta.days < within_days:
|
|
161
|
+
relyear1 = (relyear + pd.DateOffset(years=1)).year
|
|
162
|
+
relyear1_cols = [c for c in df_reindexed.columns if str(relyear1) in str(c)]
|
|
163
|
+
if relyear1_cols:
|
|
164
|
+
return relyear1_cols[0]
|
|
165
|
+
|
|
166
|
+
return res_col
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def reindex_year_point_stats(
|
|
170
|
+
df: pd.DataFrame,
|
|
171
|
+
*,
|
|
172
|
+
asof: datetime | str | pd.Timestamp | None = None,
|
|
173
|
+
lookback_years: int = 5,
|
|
174
|
+
within_days: int = 10,
|
|
175
|
+
) -> PointStats:
|
|
176
|
+
"""
|
|
177
|
+
Compute point stats for a reindex-year view.
|
|
178
|
+
|
|
179
|
+
- Reindex to current year (`commodutil.transforms.reindex_year`).
|
|
180
|
+
- Pick the prompt column via `select_reindex_prompt_column`.
|
|
181
|
+
- Compare prompt value at `asof` vs prior `lookback_years` years at the same as-of date.
|
|
182
|
+
"""
|
|
183
|
+
if df is None or df.empty:
|
|
184
|
+
return PointStats(
|
|
185
|
+
asof=pd.NaT,
|
|
186
|
+
current_year=None,
|
|
187
|
+
current_value=None,
|
|
188
|
+
reference_years=[],
|
|
189
|
+
reference_values=[],
|
|
190
|
+
mean=None,
|
|
191
|
+
std=None,
|
|
192
|
+
zscore=None,
|
|
193
|
+
percentile=None,
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
dft = transforms.reindex_year(df)
|
|
197
|
+
if dft is None or dft.empty:
|
|
198
|
+
return PointStats(
|
|
199
|
+
asof=pd.NaT,
|
|
200
|
+
current_year=None,
|
|
201
|
+
current_value=None,
|
|
202
|
+
reference_years=[],
|
|
203
|
+
reference_values=[],
|
|
204
|
+
mean=None,
|
|
205
|
+
std=None,
|
|
206
|
+
zscore=None,
|
|
207
|
+
percentile=None,
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
asof_ts = pd.Timestamp(dft.index.max()) if asof is None else pd.Timestamp(asof)
|
|
211
|
+
|
|
212
|
+
prompt_col = select_reindex_prompt_column(dft, within_days=within_days)
|
|
213
|
+
year_map = dates.find_year(dft)
|
|
214
|
+
prompt_year = year_map.get(prompt_col) if prompt_col is not None else None
|
|
215
|
+
prompt_year_int = prompt_year if isinstance(prompt_year, int) else None
|
|
216
|
+
|
|
217
|
+
current_value = (
|
|
218
|
+
last_value_at_or_before(dft[prompt_col], asof_ts) if prompt_col is not None else None
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
reference_years: list[int] = []
|
|
222
|
+
reference_values: list[float] = []
|
|
223
|
+
if prompt_year_int is not None:
|
|
224
|
+
start_year = prompt_year_int - lookback_years
|
|
225
|
+
end_year = prompt_year_int - 1
|
|
226
|
+
for col in dft.columns:
|
|
227
|
+
col_year = year_map.get(col)
|
|
228
|
+
if not isinstance(col_year, int):
|
|
229
|
+
continue
|
|
230
|
+
if start_year <= col_year <= end_year:
|
|
231
|
+
val = last_value_at_or_before(dft[col], asof_ts)
|
|
232
|
+
if val is not None:
|
|
233
|
+
reference_years.append(col_year)
|
|
234
|
+
reference_values.append(val)
|
|
235
|
+
|
|
236
|
+
mean, std, z, p = point_stats(current_value, reference_values)
|
|
237
|
+
return PointStats(
|
|
238
|
+
asof=asof_ts,
|
|
239
|
+
current_year=prompt_year_int,
|
|
240
|
+
current_value=current_value,
|
|
241
|
+
reference_years=reference_years,
|
|
242
|
+
reference_values=reference_values,
|
|
243
|
+
mean=mean,
|
|
244
|
+
std=std,
|
|
245
|
+
zscore=z,
|
|
246
|
+
percentile=p,
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
def seasonal_point_stats(
|
|
251
|
+
seas: pd.DataFrame,
|
|
252
|
+
*,
|
|
253
|
+
asof: datetime | str | pd.Timestamp | None = None,
|
|
254
|
+
lookback_years: int = 5,
|
|
255
|
+
) -> PointStats:
|
|
256
|
+
"""
|
|
257
|
+
Compute point stats from a seasonalized dataframe.
|
|
258
|
+
|
|
259
|
+
Expects:
|
|
260
|
+
- Index: current-year dates
|
|
261
|
+
- Columns: years (ints)
|
|
262
|
+
|
|
263
|
+
Uses `dates.curyear` as the current year column if present; otherwise uses max year column.
|
|
264
|
+
"""
|
|
265
|
+
if seas is None or seas.empty:
|
|
266
|
+
return PointStats(
|
|
267
|
+
asof=pd.NaT,
|
|
268
|
+
current_year=None,
|
|
269
|
+
current_value=None,
|
|
270
|
+
reference_years=[],
|
|
271
|
+
reference_values=[],
|
|
272
|
+
mean=None,
|
|
273
|
+
std=None,
|
|
274
|
+
zscore=None,
|
|
275
|
+
percentile=None,
|
|
276
|
+
)
|
|
277
|
+
|
|
278
|
+
year_cols = [c for c in seas.columns if isinstance(c, (int, np.integer))]
|
|
279
|
+
if not year_cols:
|
|
280
|
+
return PointStats(
|
|
281
|
+
asof=pd.NaT,
|
|
282
|
+
current_year=None,
|
|
283
|
+
current_value=None,
|
|
284
|
+
reference_years=[],
|
|
285
|
+
reference_values=[],
|
|
286
|
+
mean=None,
|
|
287
|
+
std=None,
|
|
288
|
+
zscore=None,
|
|
289
|
+
percentile=None,
|
|
290
|
+
)
|
|
291
|
+
|
|
292
|
+
current_year = dates.curyear if dates.curyear in year_cols else int(max(year_cols))
|
|
293
|
+
asof_ts = pd.Timestamp(seas.index.max()) if asof is None else pd.Timestamp(asof)
|
|
294
|
+
asof_ts = min(asof_ts, pd.Timestamp(seas.index.max()))
|
|
295
|
+
|
|
296
|
+
# Use the last index <= asof, to avoid KeyError for non-trading days.
|
|
297
|
+
valid_idx = seas.index[seas.index <= asof_ts]
|
|
298
|
+
if len(valid_idx) == 0:
|
|
299
|
+
return PointStats(
|
|
300
|
+
asof=pd.NaT,
|
|
301
|
+
current_year=int(current_year),
|
|
302
|
+
current_value=None,
|
|
303
|
+
reference_years=[],
|
|
304
|
+
reference_values=[],
|
|
305
|
+
mean=None,
|
|
306
|
+
std=None,
|
|
307
|
+
zscore=None,
|
|
308
|
+
percentile=None,
|
|
309
|
+
)
|
|
310
|
+
|
|
311
|
+
asof_row = pd.Timestamp(valid_idx.max())
|
|
312
|
+
try:
|
|
313
|
+
current_value = float(seas.loc[asof_row, current_year])
|
|
314
|
+
except Exception:
|
|
315
|
+
current_value = None
|
|
316
|
+
|
|
317
|
+
reference_years = [
|
|
318
|
+
int(y)
|
|
319
|
+
for y in year_cols
|
|
320
|
+
if (current_year - lookback_years) <= int(y) <= (current_year - 1)
|
|
321
|
+
]
|
|
322
|
+
reference_values: list[float] = []
|
|
323
|
+
for y in reference_years:
|
|
324
|
+
try:
|
|
325
|
+
reference_values.append(float(seas.loc[asof_row, y]))
|
|
326
|
+
except Exception:
|
|
327
|
+
continue
|
|
328
|
+
|
|
329
|
+
mean, std, z, p = point_stats(current_value, reference_values)
|
|
330
|
+
return PointStats(
|
|
331
|
+
asof=asof_row,
|
|
332
|
+
current_year=int(current_year),
|
|
333
|
+
current_value=current_value,
|
|
334
|
+
reference_years=reference_years,
|
|
335
|
+
reference_values=reference_values,
|
|
336
|
+
mean=mean,
|
|
337
|
+
std=std,
|
|
338
|
+
zscore=z,
|
|
339
|
+
percentile=p,
|
|
340
|
+
)
|
|
341
|
+
|
|
342
|
+
|
|
343
|
+
def _base_label_from_column(col) -> str | None:
|
|
344
|
+
"""
|
|
345
|
+
Derive a stable group key from a column label by stripping year-like tokens.
|
|
346
|
+
|
|
347
|
+
Intended for grouping structures like:
|
|
348
|
+
- "JunAug 2026" -> "JunAug"
|
|
349
|
+
- "Q1Q2 2026" -> "Q1Q2"
|
|
350
|
+
- "CAL 2025-2026" -> "CAL"
|
|
351
|
+
|
|
352
|
+
Returns None if the label cannot be converted to a non-empty key.
|
|
353
|
+
"""
|
|
354
|
+
s = str(col).strip()
|
|
355
|
+
if not s:
|
|
356
|
+
return None
|
|
357
|
+
|
|
358
|
+
# Remove year ranges and single years (prefer full year tokens).
|
|
359
|
+
s = re.sub(r"\b(19|20)\d{2}\s*-\s*(19|20)\d{2}\b", "", s)
|
|
360
|
+
s = re.sub(r"\b(19|20)\d{2}\b", "", s)
|
|
361
|
+
|
|
362
|
+
# Cleanup leftover separators/spaces.
|
|
363
|
+
s = re.sub(r"[-/]+", " ", s)
|
|
364
|
+
s = re.sub(r"\s+", " ", s).strip()
|
|
365
|
+
return s or None
|
|
366
|
+
|
|
367
|
+
|
|
368
|
+
def reindex_year_point_stats_table(
|
|
369
|
+
df: pd.DataFrame,
|
|
370
|
+
*,
|
|
371
|
+
asof: datetime | str | pd.Timestamp | None = None,
|
|
372
|
+
lookback_years: int = 5,
|
|
373
|
+
within_days: int = 10,
|
|
374
|
+
min_columns: int = 3,
|
|
375
|
+
) -> pd.DataFrame:
|
|
376
|
+
"""
|
|
377
|
+
Compute prompt-vs-history point stats for many structures in one dataframe.
|
|
378
|
+
|
|
379
|
+
This is designed for frames where columns encode both a structure key and a year,
|
|
380
|
+
e.g. "JunAug 2025", "JunAug 2026", "DecJan 2025", etc.
|
|
381
|
+
|
|
382
|
+
The function:
|
|
383
|
+
- groups columns by a "base label" (column label with year tokens stripped),
|
|
384
|
+
- runs `reindex_year_point_stats` per group,
|
|
385
|
+
- returns a sortable table (z-score/percentile) for scanning cheap/rich structures.
|
|
386
|
+
|
|
387
|
+
Notes:
|
|
388
|
+
- Columns must include a 4-digit year somewhere for `dates.find_year` to work reliably.
|
|
389
|
+
- Groups with fewer than `min_columns` columns are skipped.
|
|
390
|
+
"""
|
|
391
|
+
if df is None or df.empty:
|
|
392
|
+
return pd.DataFrame(
|
|
393
|
+
columns=[
|
|
394
|
+
"group",
|
|
395
|
+
"asof",
|
|
396
|
+
"prompt_year",
|
|
397
|
+
"value",
|
|
398
|
+
"mean",
|
|
399
|
+
"std",
|
|
400
|
+
"zscore",
|
|
401
|
+
"percentile",
|
|
402
|
+
"n_reference",
|
|
403
|
+
]
|
|
404
|
+
)
|
|
405
|
+
|
|
406
|
+
groups: dict[str, list] = {}
|
|
407
|
+
for col in df.columns:
|
|
408
|
+
key = _base_label_from_column(col)
|
|
409
|
+
if key is None:
|
|
410
|
+
continue
|
|
411
|
+
groups.setdefault(key, []).append(col)
|
|
412
|
+
|
|
413
|
+
rows: list[dict] = []
|
|
414
|
+
for key, cols in groups.items():
|
|
415
|
+
if len(cols) < min_columns:
|
|
416
|
+
continue
|
|
417
|
+
stats_res = reindex_year_point_stats(
|
|
418
|
+
df[cols],
|
|
419
|
+
asof=asof,
|
|
420
|
+
lookback_years=lookback_years,
|
|
421
|
+
within_days=within_days,
|
|
422
|
+
)
|
|
423
|
+
rows.append(
|
|
424
|
+
{
|
|
425
|
+
"group": key,
|
|
426
|
+
"asof": stats_res.asof,
|
|
427
|
+
"prompt_year": stats_res.current_year,
|
|
428
|
+
"value": stats_res.current_value,
|
|
429
|
+
"mean": stats_res.mean,
|
|
430
|
+
"std": stats_res.std,
|
|
431
|
+
"zscore": stats_res.zscore,
|
|
432
|
+
"percentile": stats_res.percentile,
|
|
433
|
+
"n_reference": len(stats_res.reference_values),
|
|
434
|
+
}
|
|
435
|
+
)
|
|
436
|
+
|
|
437
|
+
if not rows:
|
|
438
|
+
return pd.DataFrame(
|
|
439
|
+
columns=[
|
|
440
|
+
"group",
|
|
441
|
+
"asof",
|
|
442
|
+
"prompt_year",
|
|
443
|
+
"value",
|
|
444
|
+
"mean",
|
|
445
|
+
"std",
|
|
446
|
+
"zscore",
|
|
447
|
+
"percentile",
|
|
448
|
+
"n_reference",
|
|
449
|
+
]
|
|
450
|
+
)
|
|
451
|
+
|
|
452
|
+
res = pd.DataFrame(rows).set_index("group").sort_values(["zscore", "percentile"], ascending=[True, True])
|
|
453
|
+
return res
|
|
@@ -44,6 +44,40 @@ def seasonailse(df, fillna=True):
|
|
|
44
44
|
return seas
|
|
45
45
|
|
|
46
46
|
|
|
47
|
+
def seasonalize(data, histfreq: str | None = None, fillna: bool = True):
|
|
48
|
+
"""
|
|
49
|
+
Canonical seasonalization helper.
|
|
50
|
+
|
|
51
|
+
This mirrors the logic historically living in `commodplot.commodplottransform.seasonalise`,
|
|
52
|
+
but keeps the core transformation in `commodutil`.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
data: Series or DataFrame with a DatetimeIndex.
|
|
56
|
+
histfreq: Optional frequency hint. If None, inferred from index; defaults to "D".
|
|
57
|
+
fillna: Passed through to `seasonailse` for daily/monthly paths.
|
|
58
|
+
|
|
59
|
+
Returns:
|
|
60
|
+
Seasonalized DataFrame aligned to the current year index, with year columns.
|
|
61
|
+
"""
|
|
62
|
+
if isinstance(data, pd.Series):
|
|
63
|
+
data = pd.DataFrame(data)
|
|
64
|
+
|
|
65
|
+
if histfreq is None:
|
|
66
|
+
histfreq = pd.infer_freq(data.index)
|
|
67
|
+
if histfreq is None:
|
|
68
|
+
histfreq = "D"
|
|
69
|
+
|
|
70
|
+
if histfreq.startswith("W"):
|
|
71
|
+
seas = seasonalise_weekly(data)
|
|
72
|
+
else:
|
|
73
|
+
# `seasonailse` expects a Series (takes first column when given DataFrame),
|
|
74
|
+
# but we normalize above to DataFrame so this stays consistent with legacy behavior.
|
|
75
|
+
seas = seasonailse(data, fillna=fillna)
|
|
76
|
+
|
|
77
|
+
seas = seas.dropna(how="all", axis=1)
|
|
78
|
+
return seas
|
|
79
|
+
|
|
80
|
+
|
|
47
81
|
def cleanup_weekly_data(df):
|
|
48
82
|
"""
|
|
49
83
|
Processes dates in a DataFrame to ensure that the intended weekday data is present for each week.
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import unittest
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
import pandas as pd
|
|
6
|
+
|
|
7
|
+
from commodutil import dates
|
|
8
|
+
from commodutil import forwards
|
|
9
|
+
from commodutil import stats
|
|
10
|
+
from commodutil.forward.util import convert_contract_to_date
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class TestForwards(unittest.TestCase):
|
|
14
|
+
def test_curve_zscore(self):
|
|
15
|
+
dirname, filename = os.path.split(os.path.abspath(__file__))
|
|
16
|
+
cl = pd.read_csv(
|
|
17
|
+
os.path.join(dirname, "test_cl.csv"),
|
|
18
|
+
index_col=0,
|
|
19
|
+
parse_dates=True,
|
|
20
|
+
dayfirst=True,
|
|
21
|
+
)
|
|
22
|
+
contracts = cl.rename(
|
|
23
|
+
columns={x: pd.to_datetime(convert_contract_to_date(x)) for x in cl.columns}
|
|
24
|
+
)
|
|
25
|
+
hist = contracts[["2020-01-01"]].dropna()
|
|
26
|
+
fwd = contracts[["2020-01-01"]]
|
|
27
|
+
|
|
28
|
+
res = stats.curve_seasonal_zscore(hist, fwd)
|
|
29
|
+
self.assertAlmostEqual(res["zscore"]["2019-01-02"], 0.92, 2)
|
|
30
|
+
|
|
31
|
+
def test_reindex_zscore(self):
|
|
32
|
+
dirname, filename = os.path.split(os.path.abspath(__file__))
|
|
33
|
+
cl = pd.read_csv(
|
|
34
|
+
os.path.join(dirname, "test_cl.csv"),
|
|
35
|
+
index_col=0,
|
|
36
|
+
parse_dates=True,
|
|
37
|
+
dayfirst=True,
|
|
38
|
+
)
|
|
39
|
+
contracts = cl.rename(
|
|
40
|
+
columns={x: pd.to_datetime(convert_contract_to_date(x)) for x in cl.columns}
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
q = forwards.quarterly_contracts(contracts)
|
|
44
|
+
q = q[[x for x in q.columns if "Q1" in x]]
|
|
45
|
+
|
|
46
|
+
res = stats.reindex_zscore(q, calc_year_start=2022)
|
|
47
|
+
self.assertIsNotNone(res)
|
|
48
|
+
|
|
49
|
+
def test_select_reindex_prompt_column(self):
|
|
50
|
+
"""
|
|
51
|
+
If the current-year column ends within 10 days of the max x-date, prefer next year.
|
|
52
|
+
Otherwise prefer current year.
|
|
53
|
+
"""
|
|
54
|
+
idx = pd.date_range(f"{dates.curyear}-01-01", f"{dates.curyear}-01-31", freq="D")
|
|
55
|
+
df = pd.DataFrame(
|
|
56
|
+
{
|
|
57
|
+
f"Spread {dates.curyear}": np.arange(len(idx), dtype=float),
|
|
58
|
+
f"Spread {dates.curyear + 1}": np.arange(len(idx), dtype=float) + 100.0,
|
|
59
|
+
},
|
|
60
|
+
index=idx,
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
df1 = df.copy()
|
|
64
|
+
df1.loc[idx[-20:], f"Spread {dates.curyear}"] = np.nan
|
|
65
|
+
sel1 = stats.select_reindex_prompt_column(df1, within_days=10)
|
|
66
|
+
self.assertEqual(sel1, f"Spread {dates.curyear}")
|
|
67
|
+
|
|
68
|
+
df2 = df.copy()
|
|
69
|
+
df2.loc[idx[-5:], f"Spread {dates.curyear}"] = np.nan
|
|
70
|
+
sel2 = stats.select_reindex_prompt_column(df2, within_days=10)
|
|
71
|
+
self.assertEqual(sel2, f"Spread {dates.curyear + 1}")
|
|
72
|
+
|
|
73
|
+
def test_reindex_year_point_stats(self):
|
|
74
|
+
"""
|
|
75
|
+
Ensure point-stats uses aligned as-of values across years after reindexing.
|
|
76
|
+
"""
|
|
77
|
+
years = [dates.curyear - 2, dates.curyear - 1, dates.curyear, dates.curyear + 1]
|
|
78
|
+
frames = []
|
|
79
|
+
for y in years:
|
|
80
|
+
# Extend y+1 so the overall max x-date is later than current-year column,
|
|
81
|
+
# making the prompt selection stay on current year (delta>=10 days).
|
|
82
|
+
end_day = "01-31" if y == dates.curyear + 1 else "01-10"
|
|
83
|
+
idx = pd.date_range(f"{y}-01-01", f"{y}-{end_day}", freq="D")
|
|
84
|
+
if y == dates.curyear - 2:
|
|
85
|
+
vals = np.full(len(idx), 10.0)
|
|
86
|
+
elif y == dates.curyear - 1:
|
|
87
|
+
vals = np.full(len(idx), 12.0)
|
|
88
|
+
elif y == dates.curyear:
|
|
89
|
+
vals = np.full(len(idx), 15.0)
|
|
90
|
+
else:
|
|
91
|
+
vals = np.full(len(idx), 99.0)
|
|
92
|
+
|
|
93
|
+
frames.append(pd.DataFrame({f"Spread {y}": vals}, index=idx))
|
|
94
|
+
|
|
95
|
+
df = pd.concat(frames, axis=1, join="outer")
|
|
96
|
+
|
|
97
|
+
asof = f"{dates.curyear}-01-10"
|
|
98
|
+
res = stats.reindex_year_point_stats(df, asof=asof, lookback_years=2, within_days=10)
|
|
99
|
+
|
|
100
|
+
self.assertEqual(res.current_year, dates.curyear)
|
|
101
|
+
self.assertAlmostEqual(res.current_value, 15.0, 6)
|
|
102
|
+
self.assertEqual(sorted(res.reference_years), [dates.curyear - 2, dates.curyear - 1])
|
|
103
|
+
self.assertAlmostEqual(res.mean, 11.0, 6)
|
|
104
|
+
self.assertAlmostEqual(res.std, np.sqrt(2.0), 6)
|
|
105
|
+
self.assertAlmostEqual(res.zscore, 2.828427, 4)
|
|
106
|
+
self.assertAlmostEqual(res.percentile, 1.0, 6)
|
|
107
|
+
|
|
108
|
+
def test_reindex_year_point_stats_table_groups(self):
|
|
109
|
+
idx = pd.date_range(f"{dates.curyear}-01-01", f"{dates.curyear}-01-10", freq="D")
|
|
110
|
+
df = pd.DataFrame(
|
|
111
|
+
{
|
|
112
|
+
# Group A (3 years -> should be included with min_columns=3)
|
|
113
|
+
"JunAug 2024": np.full(len(idx), 10.0),
|
|
114
|
+
"JunAug 2025": np.full(len(idx), 11.0),
|
|
115
|
+
"JunAug 2026": np.full(len(idx), 12.0),
|
|
116
|
+
# Group B (2 years -> should be excluded with min_columns=3)
|
|
117
|
+
"DecJan 2025": np.full(len(idx), 5.0),
|
|
118
|
+
"DecJan 2026": np.full(len(idx), 6.0),
|
|
119
|
+
},
|
|
120
|
+
index=idx,
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
table = stats.reindex_year_point_stats_table(df, lookback_years=2, min_columns=3)
|
|
124
|
+
self.assertIn("JunAug", table.index)
|
|
125
|
+
self.assertNotIn("DecJan", table.index)
|
|
126
|
+
self.assertIn("zscore", table.columns)
|
|
127
|
+
self.assertEqual(int(table.loc["JunAug", "prompt_year"]), dates.curyear)
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
if __name__ == "__main__":
|
|
131
|
+
unittest.main()
|
|
@@ -1,50 +0,0 @@
|
|
|
1
|
-
import re
|
|
2
|
-
|
|
3
|
-
import pandas as pd
|
|
4
|
-
|
|
5
|
-
from commodutil import dates
|
|
6
|
-
from commodutil import transforms
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
def curve_seasonal_zscore(hist, fwd):
|
|
10
|
-
"""
|
|
11
|
-
Given some history for a timeseries and a forward curve, calculate the monthly
|
|
12
|
-
z-score (std dev away from mean) along the forward curve
|
|
13
|
-
"""
|
|
14
|
-
|
|
15
|
-
d = transforms.monthly_mean(hist).T.describe()
|
|
16
|
-
|
|
17
|
-
if isinstance(fwd, pd.Series):
|
|
18
|
-
fwd = pd.DataFrame(fwd)
|
|
19
|
-
fwd["zscore"] = fwd.apply(
|
|
20
|
-
lambda x: (d[x.name.month].loc["mean"] - x.iloc[0])
|
|
21
|
-
/ d[x.name.month].loc["std"],
|
|
22
|
-
1,
|
|
23
|
-
)
|
|
24
|
-
return fwd
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
def reindex_zscore(df, range=10, calc_year_start: int = None):
|
|
28
|
-
"""
|
|
29
|
-
Given a dataframe of contracts (or spreads), calculate z-score for current year onwards
|
|
30
|
-
Essentially returns how far away the 'curve' is from historical trading range
|
|
31
|
-
"""
|
|
32
|
-
df = df
|
|
33
|
-
df = df.rename(
|
|
34
|
-
columns={x: int(re.findall("\d\d\d\d", str(x))[0]) for x in df.columns}
|
|
35
|
-
) # turn columns into years
|
|
36
|
-
d = df.loc[
|
|
37
|
-
:, dates.curyear - range - 1 : dates.curyear - 1
|
|
38
|
-
] # get subset of range years
|
|
39
|
-
d = d[:-10] # exclude last 10 rows to due to volatility close to expire
|
|
40
|
-
|
|
41
|
-
dfs = []
|
|
42
|
-
if not calc_year_start:
|
|
43
|
-
calc_year_start = dates.curyear
|
|
44
|
-
for year in df.loc[:, calc_year_start : df.columns[-1]]:
|
|
45
|
-
z = (d.mean(axis=1) - df.loc[:, year]) / d.std(axis=1)
|
|
46
|
-
z.name = year
|
|
47
|
-
dfs.append(z)
|
|
48
|
-
if len(dfs) > 0:
|
|
49
|
-
res = pd.concat(dfs, axis=1)
|
|
50
|
-
return res
|
|
@@ -1,57 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
import unittest
|
|
3
|
-
|
|
4
|
-
import pandas as pd
|
|
5
|
-
|
|
6
|
-
from commodutil import forwards
|
|
7
|
-
from commodutil import stats
|
|
8
|
-
from commodutil.forward.util import convert_contract_to_date
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
class TestForwards(unittest.TestCase):
|
|
12
|
-
def test_curve_zscore(self):
|
|
13
|
-
dirname, filename = os.path.split(os.path.abspath(__file__))
|
|
14
|
-
cl = pd.read_csv(
|
|
15
|
-
os.path.join(dirname, "test_cl.csv"),
|
|
16
|
-
index_col=0,
|
|
17
|
-
parse_dates=True,
|
|
18
|
-
dayfirst=True,
|
|
19
|
-
)
|
|
20
|
-
contracts = cl.rename(
|
|
21
|
-
columns={
|
|
22
|
-
x: pd.to_datetime(convert_contract_to_date(x))
|
|
23
|
-
for x in cl.columns
|
|
24
|
-
}
|
|
25
|
-
)
|
|
26
|
-
hist = contracts[["2020-01-01"]].dropna()
|
|
27
|
-
|
|
28
|
-
fwd = contracts[["2020-01-01"]]
|
|
29
|
-
|
|
30
|
-
res = stats.curve_seasonal_zscore(hist, fwd)
|
|
31
|
-
|
|
32
|
-
self.assertAlmostEqual(res["zscore"]["2019-01-02"], 0.92, 2)
|
|
33
|
-
|
|
34
|
-
def test_reindex_zscore(self):
|
|
35
|
-
dirname, filename = os.path.split(os.path.abspath(__file__))
|
|
36
|
-
cl = pd.read_csv(
|
|
37
|
-
os.path.join(dirname, "test_cl.csv"),
|
|
38
|
-
index_col=0,
|
|
39
|
-
parse_dates=True,
|
|
40
|
-
dayfirst=True,
|
|
41
|
-
)
|
|
42
|
-
contracts = cl.rename(
|
|
43
|
-
columns={
|
|
44
|
-
x: pd.to_datetime(convert_contract_to_date(x))
|
|
45
|
-
for x in cl.columns
|
|
46
|
-
}
|
|
47
|
-
)
|
|
48
|
-
|
|
49
|
-
q = forwards.quarterly_contracts(contracts)
|
|
50
|
-
q = q[[x for x in q.columns if "Q1" in x]]
|
|
51
|
-
|
|
52
|
-
res = stats.reindex_zscore(q, calc_year_start=2022)
|
|
53
|
-
self.assertIsNotNone(res)
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
if __name__ == "__main__":
|
|
57
|
-
unittest.main()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|