commodutil 3.2.3__tar.gz → 3.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. {commodutil-3.2.3 → commodutil-3.3.0}/PKG-INFO +1 -1
  2. commodutil-3.3.0/commodutil/stats.py +453 -0
  3. {commodutil-3.2.3 → commodutil-3.3.0}/commodutil/transforms.py +34 -0
  4. {commodutil-3.2.3 → commodutil-3.3.0}/commodutil.egg-info/PKG-INFO +1 -1
  5. commodutil-3.3.0/tests/test_stats.py +131 -0
  6. commodutil-3.2.3/commodutil/stats.py +0 -50
  7. commodutil-3.2.3/tests/test_stats.py +0 -57
  8. {commodutil-3.2.3 → commodutil-3.3.0}/.coveragerc +0 -0
  9. {commodutil-3.2.3 → commodutil-3.3.0}/.github/workflows/1_tests.yml +0 -0
  10. {commodutil-3.2.3 → commodutil-3.3.0}/.github/workflows/2_coverage.yml +0 -0
  11. {commodutil-3.2.3 → commodutil-3.3.0}/.github/workflows/3_linting.yml +0 -0
  12. {commodutil-3.2.3 → commodutil-3.3.0}/.github/workflows/4_release.yml +0 -0
  13. {commodutil-3.2.3 → commodutil-3.3.0}/.gitignore +0 -0
  14. {commodutil-3.2.3 → commodutil-3.3.0}/.pypirc +0 -0
  15. {commodutil-3.2.3 → commodutil-3.3.0}/azure-build-pipelines.yml +0 -0
  16. {commodutil-3.2.3 → commodutil-3.3.0}/commodutil/__init__.py +0 -0
  17. {commodutil-3.2.3 → commodutil-3.3.0}/commodutil/arb.py +0 -0
  18. {commodutil-3.2.3 → commodutil-3.3.0}/commodutil/convfactors.py +0 -0
  19. {commodutil-3.2.3 → commodutil-3.3.0}/commodutil/dates.py +0 -0
  20. {commodutil-3.2.3 → commodutil-3.3.0}/commodutil/forward/__init__.py +0 -0
  21. {commodutil-3.2.3 → commodutil-3.3.0}/commodutil/forward/calendar.py +0 -0
  22. {commodutil-3.2.3 → commodutil-3.3.0}/commodutil/forward/continuous.py +0 -0
  23. {commodutil-3.2.3 → commodutil-3.3.0}/commodutil/forward/fly.py +0 -0
  24. {commodutil-3.2.3 → commodutil-3.3.0}/commodutil/forward/quarterly.py +0 -0
  25. {commodutil-3.2.3 → commodutil-3.3.0}/commodutil/forward/spreads.py +0 -0
  26. {commodutil-3.2.3 → commodutil-3.3.0}/commodutil/forward/structure.py +0 -0
  27. {commodutil-3.2.3 → commodutil-3.3.0}/commodutil/forward/util.py +0 -0
  28. {commodutil-3.2.3 → commodutil-3.3.0}/commodutil/forwards.py +0 -0
  29. {commodutil-3.2.3 → commodutil-3.3.0}/commodutil/pandasutil.py +0 -0
  30. {commodutil-3.2.3 → commodutil-3.3.0}/commodutil.egg-info/SOURCES.txt +0 -0
  31. {commodutil-3.2.3 → commodutil-3.3.0}/commodutil.egg-info/dependency_links.txt +0 -0
  32. {commodutil-3.2.3 → commodutil-3.3.0}/commodutil.egg-info/requires.txt +0 -0
  33. {commodutil-3.2.3 → commodutil-3.3.0}/commodutil.egg-info/top_level.txt +0 -0
  34. {commodutil-3.2.3 → commodutil-3.3.0}/pyproject.toml +0 -0
  35. {commodutil-3.2.3 → commodutil-3.3.0}/requirements-test.txt +0 -0
  36. {commodutil-3.2.3 → commodutil-3.3.0}/requirements.txt +0 -0
  37. {commodutil-3.2.3 → commodutil-3.3.0}/requirements_dev.txt +0 -0
  38. {commodutil-3.2.3 → commodutil-3.3.0}/setup.cfg +0 -0
  39. {commodutil-3.2.3 → commodutil-3.3.0}/tests/__init__.py +0 -0
  40. {commodutil-3.2.3 → commodutil-3.3.0}/tests/conftest.py +0 -0
  41. {commodutil-3.2.3 → commodutil-3.3.0}/tests/forward/__init__.py +0 -0
  42. {commodutil-3.2.3 → commodutil-3.3.0}/tests/forward/conftest.py +0 -0
  43. {commodutil-3.2.3 → commodutil-3.3.0}/tests/forward/test_calendar.py +0 -0
  44. {commodutil-3.2.3 → commodutil-3.3.0}/tests/forward/test_continuous.py +0 -0
  45. {commodutil-3.2.3 → commodutil-3.3.0}/tests/forward/test_fly.py +0 -0
  46. {commodutil-3.2.3 → commodutil-3.3.0}/tests/forward/test_quarterly.py +0 -0
  47. {commodutil-3.2.3 → commodutil-3.3.0}/tests/forward/test_spreads.py +0 -0
  48. {commodutil-3.2.3 → commodutil-3.3.0}/tests/forward/test_structure.py +0 -0
  49. {commodutil-3.2.3 → commodutil-3.3.0}/tests/forward/test_util.py +0 -0
  50. {commodutil-3.2.3 → commodutil-3.3.0}/tests/test_arb.py +0 -0
  51. {commodutil-3.2.3 → commodutil-3.3.0}/tests/test_cl.csv +0 -0
  52. {commodutil-3.2.3 → commodutil-3.3.0}/tests/test_conv.py +0 -0
  53. {commodutil-3.2.3 → commodutil-3.3.0}/tests/test_dates.py +0 -0
  54. {commodutil-3.2.3 → commodutil-3.3.0}/tests/test_forwards.py +0 -0
  55. {commodutil-3.2.3 → commodutil-3.3.0}/tests/test_pandasutils.py +0 -0
  56. {commodutil-3.2.3 → commodutil-3.3.0}/tests/test_price_conv.py +0 -0
  57. {commodutil-3.2.3 → commodutil-3.3.0}/tests/test_transforms.py +0 -0
  58. {commodutil-3.2.3 → commodutil-3.3.0}/tests/test_weekly.csv +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: commodutil
3
- Version: 3.2.3
3
+ Version: 3.3.0
4
4
  Summary: common commodity/oil analytics utils
5
5
  Author-email: aeorxc <author@example.com>
6
6
  Project-URL: Homepage, https://dev.azure.com/RWEST-MFI-TE/Oil/_git/commodutil
@@ -0,0 +1,453 @@
1
+ import re
2
+ from dataclasses import dataclass
3
+ from datetime import datetime
4
+ from typing import Iterable
5
+
6
+ import numpy as np
7
+ import pandas as pd
8
+
9
+ from commodutil import dates
10
+ from commodutil import transforms
11
+
12
+
13
+ def curve_seasonal_zscore(hist, fwd):
14
+ """
15
+ Given some history for a timeseries and a forward curve, calculate the monthly
16
+ z-score (std dev away from mean) along the forward curve
17
+ """
18
+
19
+ d = transforms.monthly_mean(hist).T.describe()
20
+
21
+ if isinstance(fwd, pd.Series):
22
+ fwd = pd.DataFrame(fwd)
23
+ fwd["zscore"] = fwd.apply(
24
+ lambda x: (d[x.name.month].loc["mean"] - x.iloc[0])
25
+ / d[x.name.month].loc["std"],
26
+ 1,
27
+ )
28
+ return fwd
29
+
30
+
31
+ def reindex_zscore(df, range=10, calc_year_start: int = None):
32
+ """
33
+ Given a dataframe of contracts (or spreads), calculate z-score for current year onwards
34
+ Essentially returns how far away the 'curve' is from historical trading range
35
+ """
36
+ df = df
37
+ df = df.rename(
38
+ columns={x: int(re.findall("\d\d\d\d", str(x))[0]) for x in df.columns}
39
+ ) # turn columns into years
40
+ d = df.loc[
41
+ :, dates.curyear - range - 1 : dates.curyear - 1
42
+ ] # get subset of range years
43
+ d = d[:-10] # exclude last 10 rows to due to volatility close to expire
44
+
45
+ dfs = []
46
+ if not calc_year_start:
47
+ calc_year_start = dates.curyear
48
+ for year in df.loc[:, calc_year_start : df.columns[-1]]:
49
+ z = (d.mean(axis=1) - df.loc[:, year]) / d.std(axis=1)
50
+ z.name = year
51
+ dfs.append(z)
52
+ if len(dfs) > 0:
53
+ res = pd.concat(dfs, axis=1)
54
+ return res
55
+
56
+
57
+ @dataclass(frozen=True)
58
+ class PointStats:
59
+ """
60
+ Summary statistics for a single "as-of" point against a historical reference set.
61
+
62
+ Percentile is returned as an empirical CDF in [0, 1], i.e. fraction of reference
63
+ values <= current value.
64
+ """
65
+
66
+ asof: pd.Timestamp
67
+ current_year: int | None
68
+ current_value: float | None
69
+ reference_years: list[int]
70
+ reference_values: list[float]
71
+ mean: float | None
72
+ std: float | None
73
+ zscore: float | None
74
+ percentile: float | None
75
+
76
+
77
+ def last_value_at_or_before(series: pd.Series, asof: datetime | str | pd.Timestamp) -> float | None:
78
+ """
79
+ Return the last non-null value at or before `asof`.
80
+
81
+ Returns None if no value exists in the window.
82
+ """
83
+ ts = pd.Timestamp(asof)
84
+ s = series.loc[:ts].dropna()
85
+ if s.empty:
86
+ return None
87
+ val = s.iloc[-1]
88
+ try:
89
+ return float(val)
90
+ except Exception:
91
+ return None
92
+
93
+
94
+ def empirical_percentile(value: float, reference_values: Iterable[float]) -> float | None:
95
+ """
96
+ Empirical percentile as fraction of reference values <= value.
97
+
98
+ Returns None if reference is empty after filtering.
99
+ """
100
+ ref = [float(x) for x in reference_values if x is not None and not np.isnan(x)]
101
+ if not ref:
102
+ return None
103
+ return float(np.mean([x <= value for x in ref]))
104
+
105
+
106
+ def point_stats(value: float | None, reference_values: Iterable[float]) -> tuple[float | None, float | None, float | None, float | None]:
107
+ """
108
+ Compute (mean, std, zscore, percentile) for a value vs reference values.
109
+
110
+ Std is sample std (ddof=1) when at least 2 reference values exist, else 0.0.
111
+ """
112
+ ref = np.array([float(x) for x in reference_values if x is not None and not np.isnan(x)], dtype=float)
113
+ if value is None or np.isnan(value) or ref.size == 0:
114
+ return None, None, None, None
115
+
116
+ mean = float(np.mean(ref))
117
+ std = float(np.std(ref, ddof=1)) if ref.size >= 2 else 0.0
118
+ z = None if std == 0.0 else float((float(value) - mean) / std)
119
+ p = empirical_percentile(float(value), ref.tolist())
120
+ return mean, std, z, p
121
+
122
+
123
+ def select_reindex_prompt_column(df_reindexed: pd.DataFrame, *, within_days: int = 10):
124
+ """
125
+ Determine the "prompt" column for a reindex-year dataframe.
126
+
127
+ Mirrors the legacy behavior in `commodplot.commodplotutil.reindex_year_df_rel_col`:
128
+ - Prefer a column whose label contains `dates.curyear`.
129
+ - If that column ends within `within_days` of the max x-date, prefer next year's column.
130
+ """
131
+ if df_reindexed is None or df_reindexed.empty:
132
+ return None
133
+
134
+ res_col = df_reindexed.columns[0]
135
+
136
+ year_map = dates.find_year(df_reindexed)
137
+ last_val_date = df_reindexed.index[-1]
138
+
139
+ current_year_cols = [c for c in df_reindexed.columns if str(dates.curyear) in str(c)]
140
+ if not current_year_cols:
141
+ return res_col
142
+
143
+ res_col = current_year_cols[0]
144
+ res_year = year_map.get(res_col)
145
+ if not isinstance(res_year, int):
146
+ return res_col
147
+
148
+ relyear = pd.to_datetime(f"{res_year}-01-01")
149
+
150
+ dft = df_reindexed[current_year_cols].dropna()
151
+ if len(dft) == 0:
152
+ return res_col
153
+
154
+ relcol_series = df_reindexed[res_col].dropna()
155
+ if relcol_series.empty:
156
+ return res_col
157
+
158
+ relcol_date = relcol_series.index[-1]
159
+ delta = last_val_date - relcol_date
160
+ if delta.days < within_days:
161
+ relyear1 = (relyear + pd.DateOffset(years=1)).year
162
+ relyear1_cols = [c for c in df_reindexed.columns if str(relyear1) in str(c)]
163
+ if relyear1_cols:
164
+ return relyear1_cols[0]
165
+
166
+ return res_col
167
+
168
+
169
+ def reindex_year_point_stats(
170
+ df: pd.DataFrame,
171
+ *,
172
+ asof: datetime | str | pd.Timestamp | None = None,
173
+ lookback_years: int = 5,
174
+ within_days: int = 10,
175
+ ) -> PointStats:
176
+ """
177
+ Compute point stats for a reindex-year view.
178
+
179
+ - Reindex to current year (`commodutil.transforms.reindex_year`).
180
+ - Pick the prompt column via `select_reindex_prompt_column`.
181
+ - Compare prompt value at `asof` vs prior `lookback_years` years at the same as-of date.
182
+ """
183
+ if df is None or df.empty:
184
+ return PointStats(
185
+ asof=pd.NaT,
186
+ current_year=None,
187
+ current_value=None,
188
+ reference_years=[],
189
+ reference_values=[],
190
+ mean=None,
191
+ std=None,
192
+ zscore=None,
193
+ percentile=None,
194
+ )
195
+
196
+ dft = transforms.reindex_year(df)
197
+ if dft is None or dft.empty:
198
+ return PointStats(
199
+ asof=pd.NaT,
200
+ current_year=None,
201
+ current_value=None,
202
+ reference_years=[],
203
+ reference_values=[],
204
+ mean=None,
205
+ std=None,
206
+ zscore=None,
207
+ percentile=None,
208
+ )
209
+
210
+ asof_ts = pd.Timestamp(dft.index.max()) if asof is None else pd.Timestamp(asof)
211
+
212
+ prompt_col = select_reindex_prompt_column(dft, within_days=within_days)
213
+ year_map = dates.find_year(dft)
214
+ prompt_year = year_map.get(prompt_col) if prompt_col is not None else None
215
+ prompt_year_int = prompt_year if isinstance(prompt_year, int) else None
216
+
217
+ current_value = (
218
+ last_value_at_or_before(dft[prompt_col], asof_ts) if prompt_col is not None else None
219
+ )
220
+
221
+ reference_years: list[int] = []
222
+ reference_values: list[float] = []
223
+ if prompt_year_int is not None:
224
+ start_year = prompt_year_int - lookback_years
225
+ end_year = prompt_year_int - 1
226
+ for col in dft.columns:
227
+ col_year = year_map.get(col)
228
+ if not isinstance(col_year, int):
229
+ continue
230
+ if start_year <= col_year <= end_year:
231
+ val = last_value_at_or_before(dft[col], asof_ts)
232
+ if val is not None:
233
+ reference_years.append(col_year)
234
+ reference_values.append(val)
235
+
236
+ mean, std, z, p = point_stats(current_value, reference_values)
237
+ return PointStats(
238
+ asof=asof_ts,
239
+ current_year=prompt_year_int,
240
+ current_value=current_value,
241
+ reference_years=reference_years,
242
+ reference_values=reference_values,
243
+ mean=mean,
244
+ std=std,
245
+ zscore=z,
246
+ percentile=p,
247
+ )
248
+
249
+
250
+ def seasonal_point_stats(
251
+ seas: pd.DataFrame,
252
+ *,
253
+ asof: datetime | str | pd.Timestamp | None = None,
254
+ lookback_years: int = 5,
255
+ ) -> PointStats:
256
+ """
257
+ Compute point stats from a seasonalized dataframe.
258
+
259
+ Expects:
260
+ - Index: current-year dates
261
+ - Columns: years (ints)
262
+
263
+ Uses `dates.curyear` as the current year column if present; otherwise uses max year column.
264
+ """
265
+ if seas is None or seas.empty:
266
+ return PointStats(
267
+ asof=pd.NaT,
268
+ current_year=None,
269
+ current_value=None,
270
+ reference_years=[],
271
+ reference_values=[],
272
+ mean=None,
273
+ std=None,
274
+ zscore=None,
275
+ percentile=None,
276
+ )
277
+
278
+ year_cols = [c for c in seas.columns if isinstance(c, (int, np.integer))]
279
+ if not year_cols:
280
+ return PointStats(
281
+ asof=pd.NaT,
282
+ current_year=None,
283
+ current_value=None,
284
+ reference_years=[],
285
+ reference_values=[],
286
+ mean=None,
287
+ std=None,
288
+ zscore=None,
289
+ percentile=None,
290
+ )
291
+
292
+ current_year = dates.curyear if dates.curyear in year_cols else int(max(year_cols))
293
+ asof_ts = pd.Timestamp(seas.index.max()) if asof is None else pd.Timestamp(asof)
294
+ asof_ts = min(asof_ts, pd.Timestamp(seas.index.max()))
295
+
296
+ # Use the last index <= asof, to avoid KeyError for non-trading days.
297
+ valid_idx = seas.index[seas.index <= asof_ts]
298
+ if len(valid_idx) == 0:
299
+ return PointStats(
300
+ asof=pd.NaT,
301
+ current_year=int(current_year),
302
+ current_value=None,
303
+ reference_years=[],
304
+ reference_values=[],
305
+ mean=None,
306
+ std=None,
307
+ zscore=None,
308
+ percentile=None,
309
+ )
310
+
311
+ asof_row = pd.Timestamp(valid_idx.max())
312
+ try:
313
+ current_value = float(seas.loc[asof_row, current_year])
314
+ except Exception:
315
+ current_value = None
316
+
317
+ reference_years = [
318
+ int(y)
319
+ for y in year_cols
320
+ if (current_year - lookback_years) <= int(y) <= (current_year - 1)
321
+ ]
322
+ reference_values: list[float] = []
323
+ for y in reference_years:
324
+ try:
325
+ reference_values.append(float(seas.loc[asof_row, y]))
326
+ except Exception:
327
+ continue
328
+
329
+ mean, std, z, p = point_stats(current_value, reference_values)
330
+ return PointStats(
331
+ asof=asof_row,
332
+ current_year=int(current_year),
333
+ current_value=current_value,
334
+ reference_years=reference_years,
335
+ reference_values=reference_values,
336
+ mean=mean,
337
+ std=std,
338
+ zscore=z,
339
+ percentile=p,
340
+ )
341
+
342
+
343
+ def _base_label_from_column(col) -> str | None:
344
+ """
345
+ Derive a stable group key from a column label by stripping year-like tokens.
346
+
347
+ Intended for grouping structures like:
348
+ - "JunAug 2026" -> "JunAug"
349
+ - "Q1Q2 2026" -> "Q1Q2"
350
+ - "CAL 2025-2026" -> "CAL"
351
+
352
+ Returns None if the label cannot be converted to a non-empty key.
353
+ """
354
+ s = str(col).strip()
355
+ if not s:
356
+ return None
357
+
358
+ # Remove year ranges and single years (prefer full year tokens).
359
+ s = re.sub(r"\b(19|20)\d{2}\s*-\s*(19|20)\d{2}\b", "", s)
360
+ s = re.sub(r"\b(19|20)\d{2}\b", "", s)
361
+
362
+ # Cleanup leftover separators/spaces.
363
+ s = re.sub(r"[-/]+", " ", s)
364
+ s = re.sub(r"\s+", " ", s).strip()
365
+ return s or None
366
+
367
+
368
+ def reindex_year_point_stats_table(
369
+ df: pd.DataFrame,
370
+ *,
371
+ asof: datetime | str | pd.Timestamp | None = None,
372
+ lookback_years: int = 5,
373
+ within_days: int = 10,
374
+ min_columns: int = 3,
375
+ ) -> pd.DataFrame:
376
+ """
377
+ Compute prompt-vs-history point stats for many structures in one dataframe.
378
+
379
+ This is designed for frames where columns encode both a structure key and a year,
380
+ e.g. "JunAug 2025", "JunAug 2026", "DecJan 2025", etc.
381
+
382
+ The function:
383
+ - groups columns by a "base label" (column label with year tokens stripped),
384
+ - runs `reindex_year_point_stats` per group,
385
+ - returns a sortable table (z-score/percentile) for scanning cheap/rich structures.
386
+
387
+ Notes:
388
+ - Columns must include a 4-digit year somewhere for `dates.find_year` to work reliably.
389
+ - Groups with fewer than `min_columns` columns are skipped.
390
+ """
391
+ if df is None or df.empty:
392
+ return pd.DataFrame(
393
+ columns=[
394
+ "group",
395
+ "asof",
396
+ "prompt_year",
397
+ "value",
398
+ "mean",
399
+ "std",
400
+ "zscore",
401
+ "percentile",
402
+ "n_reference",
403
+ ]
404
+ )
405
+
406
+ groups: dict[str, list] = {}
407
+ for col in df.columns:
408
+ key = _base_label_from_column(col)
409
+ if key is None:
410
+ continue
411
+ groups.setdefault(key, []).append(col)
412
+
413
+ rows: list[dict] = []
414
+ for key, cols in groups.items():
415
+ if len(cols) < min_columns:
416
+ continue
417
+ stats_res = reindex_year_point_stats(
418
+ df[cols],
419
+ asof=asof,
420
+ lookback_years=lookback_years,
421
+ within_days=within_days,
422
+ )
423
+ rows.append(
424
+ {
425
+ "group": key,
426
+ "asof": stats_res.asof,
427
+ "prompt_year": stats_res.current_year,
428
+ "value": stats_res.current_value,
429
+ "mean": stats_res.mean,
430
+ "std": stats_res.std,
431
+ "zscore": stats_res.zscore,
432
+ "percentile": stats_res.percentile,
433
+ "n_reference": len(stats_res.reference_values),
434
+ }
435
+ )
436
+
437
+ if not rows:
438
+ return pd.DataFrame(
439
+ columns=[
440
+ "group",
441
+ "asof",
442
+ "prompt_year",
443
+ "value",
444
+ "mean",
445
+ "std",
446
+ "zscore",
447
+ "percentile",
448
+ "n_reference",
449
+ ]
450
+ )
451
+
452
+ res = pd.DataFrame(rows).set_index("group").sort_values(["zscore", "percentile"], ascending=[True, True])
453
+ return res
@@ -44,6 +44,40 @@ def seasonailse(df, fillna=True):
44
44
  return seas
45
45
 
46
46
 
47
+ def seasonalize(data, histfreq: str | None = None, fillna: bool = True):
48
+ """
49
+ Canonical seasonalization helper.
50
+
51
+ This mirrors the logic historically living in `commodplot.commodplottransform.seasonalise`,
52
+ but keeps the core transformation in `commodutil`.
53
+
54
+ Args:
55
+ data: Series or DataFrame with a DatetimeIndex.
56
+ histfreq: Optional frequency hint. If None, inferred from index; defaults to "D".
57
+ fillna: Passed through to `seasonailse` for daily/monthly paths.
58
+
59
+ Returns:
60
+ Seasonalized DataFrame aligned to the current year index, with year columns.
61
+ """
62
+ if isinstance(data, pd.Series):
63
+ data = pd.DataFrame(data)
64
+
65
+ if histfreq is None:
66
+ histfreq = pd.infer_freq(data.index)
67
+ if histfreq is None:
68
+ histfreq = "D"
69
+
70
+ if histfreq.startswith("W"):
71
+ seas = seasonalise_weekly(data)
72
+ else:
73
+ # `seasonailse` expects a Series (takes first column when given DataFrame),
74
+ # but we normalize above to DataFrame so this stays consistent with legacy behavior.
75
+ seas = seasonailse(data, fillna=fillna)
76
+
77
+ seas = seas.dropna(how="all", axis=1)
78
+ return seas
79
+
80
+
47
81
  def cleanup_weekly_data(df):
48
82
  """
49
83
  Processes dates in a DataFrame to ensure that the intended weekday data is present for each week.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: commodutil
3
- Version: 3.2.3
3
+ Version: 3.3.0
4
4
  Summary: common commodity/oil analytics utils
5
5
  Author-email: aeorxc <author@example.com>
6
6
  Project-URL: Homepage, https://dev.azure.com/RWEST-MFI-TE/Oil/_git/commodutil
@@ -0,0 +1,131 @@
1
+ import os
2
+ import unittest
3
+
4
+ import numpy as np
5
+ import pandas as pd
6
+
7
+ from commodutil import dates
8
+ from commodutil import forwards
9
+ from commodutil import stats
10
+ from commodutil.forward.util import convert_contract_to_date
11
+
12
+
13
+ class TestForwards(unittest.TestCase):
14
+ def test_curve_zscore(self):
15
+ dirname, filename = os.path.split(os.path.abspath(__file__))
16
+ cl = pd.read_csv(
17
+ os.path.join(dirname, "test_cl.csv"),
18
+ index_col=0,
19
+ parse_dates=True,
20
+ dayfirst=True,
21
+ )
22
+ contracts = cl.rename(
23
+ columns={x: pd.to_datetime(convert_contract_to_date(x)) for x in cl.columns}
24
+ )
25
+ hist = contracts[["2020-01-01"]].dropna()
26
+ fwd = contracts[["2020-01-01"]]
27
+
28
+ res = stats.curve_seasonal_zscore(hist, fwd)
29
+ self.assertAlmostEqual(res["zscore"]["2019-01-02"], 0.92, 2)
30
+
31
+ def test_reindex_zscore(self):
32
+ dirname, filename = os.path.split(os.path.abspath(__file__))
33
+ cl = pd.read_csv(
34
+ os.path.join(dirname, "test_cl.csv"),
35
+ index_col=0,
36
+ parse_dates=True,
37
+ dayfirst=True,
38
+ )
39
+ contracts = cl.rename(
40
+ columns={x: pd.to_datetime(convert_contract_to_date(x)) for x in cl.columns}
41
+ )
42
+
43
+ q = forwards.quarterly_contracts(contracts)
44
+ q = q[[x for x in q.columns if "Q1" in x]]
45
+
46
+ res = stats.reindex_zscore(q, calc_year_start=2022)
47
+ self.assertIsNotNone(res)
48
+
49
+ def test_select_reindex_prompt_column(self):
50
+ """
51
+ If the current-year column ends within 10 days of the max x-date, prefer next year.
52
+ Otherwise prefer current year.
53
+ """
54
+ idx = pd.date_range(f"{dates.curyear}-01-01", f"{dates.curyear}-01-31", freq="D")
55
+ df = pd.DataFrame(
56
+ {
57
+ f"Spread {dates.curyear}": np.arange(len(idx), dtype=float),
58
+ f"Spread {dates.curyear + 1}": np.arange(len(idx), dtype=float) + 100.0,
59
+ },
60
+ index=idx,
61
+ )
62
+
63
+ df1 = df.copy()
64
+ df1.loc[idx[-20:], f"Spread {dates.curyear}"] = np.nan
65
+ sel1 = stats.select_reindex_prompt_column(df1, within_days=10)
66
+ self.assertEqual(sel1, f"Spread {dates.curyear}")
67
+
68
+ df2 = df.copy()
69
+ df2.loc[idx[-5:], f"Spread {dates.curyear}"] = np.nan
70
+ sel2 = stats.select_reindex_prompt_column(df2, within_days=10)
71
+ self.assertEqual(sel2, f"Spread {dates.curyear + 1}")
72
+
73
+ def test_reindex_year_point_stats(self):
74
+ """
75
+ Ensure point-stats uses aligned as-of values across years after reindexing.
76
+ """
77
+ years = [dates.curyear - 2, dates.curyear - 1, dates.curyear, dates.curyear + 1]
78
+ frames = []
79
+ for y in years:
80
+ # Extend y+1 so the overall max x-date is later than current-year column,
81
+ # making the prompt selection stay on current year (delta>=10 days).
82
+ end_day = "01-31" if y == dates.curyear + 1 else "01-10"
83
+ idx = pd.date_range(f"{y}-01-01", f"{y}-{end_day}", freq="D")
84
+ if y == dates.curyear - 2:
85
+ vals = np.full(len(idx), 10.0)
86
+ elif y == dates.curyear - 1:
87
+ vals = np.full(len(idx), 12.0)
88
+ elif y == dates.curyear:
89
+ vals = np.full(len(idx), 15.0)
90
+ else:
91
+ vals = np.full(len(idx), 99.0)
92
+
93
+ frames.append(pd.DataFrame({f"Spread {y}": vals}, index=idx))
94
+
95
+ df = pd.concat(frames, axis=1, join="outer")
96
+
97
+ asof = f"{dates.curyear}-01-10"
98
+ res = stats.reindex_year_point_stats(df, asof=asof, lookback_years=2, within_days=10)
99
+
100
+ self.assertEqual(res.current_year, dates.curyear)
101
+ self.assertAlmostEqual(res.current_value, 15.0, 6)
102
+ self.assertEqual(sorted(res.reference_years), [dates.curyear - 2, dates.curyear - 1])
103
+ self.assertAlmostEqual(res.mean, 11.0, 6)
104
+ self.assertAlmostEqual(res.std, np.sqrt(2.0), 6)
105
+ self.assertAlmostEqual(res.zscore, 2.828427, 4)
106
+ self.assertAlmostEqual(res.percentile, 1.0, 6)
107
+
108
+ def test_reindex_year_point_stats_table_groups(self):
109
+ idx = pd.date_range(f"{dates.curyear}-01-01", f"{dates.curyear}-01-10", freq="D")
110
+ df = pd.DataFrame(
111
+ {
112
+ # Group A (3 years -> should be included with min_columns=3)
113
+ "JunAug 2024": np.full(len(idx), 10.0),
114
+ "JunAug 2025": np.full(len(idx), 11.0),
115
+ "JunAug 2026": np.full(len(idx), 12.0),
116
+ # Group B (2 years -> should be excluded with min_columns=3)
117
+ "DecJan 2025": np.full(len(idx), 5.0),
118
+ "DecJan 2026": np.full(len(idx), 6.0),
119
+ },
120
+ index=idx,
121
+ )
122
+
123
+ table = stats.reindex_year_point_stats_table(df, lookback_years=2, min_columns=3)
124
+ self.assertIn("JunAug", table.index)
125
+ self.assertNotIn("DecJan", table.index)
126
+ self.assertIn("zscore", table.columns)
127
+ self.assertEqual(int(table.loc["JunAug", "prompt_year"]), dates.curyear)
128
+
129
+
130
+ if __name__ == "__main__":
131
+ unittest.main()
@@ -1,50 +0,0 @@
1
- import re
2
-
3
- import pandas as pd
4
-
5
- from commodutil import dates
6
- from commodutil import transforms
7
-
8
-
9
- def curve_seasonal_zscore(hist, fwd):
10
- """
11
- Given some history for a timeseries and a forward curve, calculate the monthly
12
- z-score (std dev away from mean) along the forward curve
13
- """
14
-
15
- d = transforms.monthly_mean(hist).T.describe()
16
-
17
- if isinstance(fwd, pd.Series):
18
- fwd = pd.DataFrame(fwd)
19
- fwd["zscore"] = fwd.apply(
20
- lambda x: (d[x.name.month].loc["mean"] - x.iloc[0])
21
- / d[x.name.month].loc["std"],
22
- 1,
23
- )
24
- return fwd
25
-
26
-
27
- def reindex_zscore(df, range=10, calc_year_start: int = None):
28
- """
29
- Given a dataframe of contracts (or spreads), calculate z-score for current year onwards
30
- Essentially returns how far away the 'curve' is from historical trading range
31
- """
32
- df = df
33
- df = df.rename(
34
- columns={x: int(re.findall("\d\d\d\d", str(x))[0]) for x in df.columns}
35
- ) # turn columns into years
36
- d = df.loc[
37
- :, dates.curyear - range - 1 : dates.curyear - 1
38
- ] # get subset of range years
39
- d = d[:-10] # exclude last 10 rows to due to volatility close to expire
40
-
41
- dfs = []
42
- if not calc_year_start:
43
- calc_year_start = dates.curyear
44
- for year in df.loc[:, calc_year_start : df.columns[-1]]:
45
- z = (d.mean(axis=1) - df.loc[:, year]) / d.std(axis=1)
46
- z.name = year
47
- dfs.append(z)
48
- if len(dfs) > 0:
49
- res = pd.concat(dfs, axis=1)
50
- return res
@@ -1,57 +0,0 @@
1
- import os
2
- import unittest
3
-
4
- import pandas as pd
5
-
6
- from commodutil import forwards
7
- from commodutil import stats
8
- from commodutil.forward.util import convert_contract_to_date
9
-
10
-
11
- class TestForwards(unittest.TestCase):
12
- def test_curve_zscore(self):
13
- dirname, filename = os.path.split(os.path.abspath(__file__))
14
- cl = pd.read_csv(
15
- os.path.join(dirname, "test_cl.csv"),
16
- index_col=0,
17
- parse_dates=True,
18
- dayfirst=True,
19
- )
20
- contracts = cl.rename(
21
- columns={
22
- x: pd.to_datetime(convert_contract_to_date(x))
23
- for x in cl.columns
24
- }
25
- )
26
- hist = contracts[["2020-01-01"]].dropna()
27
-
28
- fwd = contracts[["2020-01-01"]]
29
-
30
- res = stats.curve_seasonal_zscore(hist, fwd)
31
-
32
- self.assertAlmostEqual(res["zscore"]["2019-01-02"], 0.92, 2)
33
-
34
- def test_reindex_zscore(self):
35
- dirname, filename = os.path.split(os.path.abspath(__file__))
36
- cl = pd.read_csv(
37
- os.path.join(dirname, "test_cl.csv"),
38
- index_col=0,
39
- parse_dates=True,
40
- dayfirst=True,
41
- )
42
- contracts = cl.rename(
43
- columns={
44
- x: pd.to_datetime(convert_contract_to_date(x))
45
- for x in cl.columns
46
- }
47
- )
48
-
49
- q = forwards.quarterly_contracts(contracts)
50
- q = q[[x for x in q.columns if "Q1" in x]]
51
-
52
- res = stats.reindex_zscore(q, calc_year_start=2022)
53
- self.assertIsNotNone(res)
54
-
55
-
56
- if __name__ == "__main__":
57
- unittest.main()
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes