pwb-toolbox 0.1.6__py3-none-any.whl → 0.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,465 @@
1
+ from calendar import month_abbr
2
+ from typing import Sequence, Tuple
3
+ from math import sqrt
4
+ from statistics import NormalDist
5
+
6
+ try:
7
+ import pandas as pd # type: ignore
8
+ except ModuleNotFoundError: # pragma: no cover - optional dependency
9
+ pd = None # type: ignore
10
+
11
+
12
+ def _to_list(data: Sequence[float]) -> list:
13
+ """Convert Series-like data to list."""
14
+ if hasattr(data, "values"):
15
+ return list(data.values)
16
+ return list(data)
17
+
18
+
19
+ def total_return(prices: Sequence[float]) -> float:
20
+ """Return total return of a price series."""
21
+ p = _to_list(prices)
22
+ if not p:
23
+ return 0.0
24
+ return p[-1] / p[0] - 1
25
+
26
+
27
+ def cagr(prices: Sequence[float], periods_per_year: int = 252) -> float:
28
+ """Compound annual growth rate from a price series."""
29
+ p = _to_list(prices)
30
+ if len(p) < 2:
31
+ return 0.0
32
+ years = (len(p) - 1) / periods_per_year
33
+ if years == 0:
34
+ return 0.0
35
+ return (p[-1] / p[0]) ** (1 / years) - 1
36
+
37
+
38
+ def returns_table(prices: 'pd.Series') -> 'pd.DataFrame': # type: ignore
39
+ """Return monthly and yearly percentage returns from a daily price series."""
40
+ if pd is None:
41
+ raise ImportError("pandas is required for returns_table")
42
+
43
+ price_list = _to_list(prices)
44
+ index = list(getattr(prices, 'index', range(len(price_list))))
45
+
46
+ years = sorted({dt.year for dt in index})
47
+ months = list(range(1, 13))
48
+ data = {month_abbr[m]: [] for m in months}
49
+ data["Year"] = []
50
+
51
+ for year in years:
52
+ year_start = None
53
+ year_end = None
54
+ for m in months:
55
+ # indices belonging to year & month
56
+ idx = [i for i, dt in enumerate(index) if dt.year == year and dt.month == m]
57
+ if idx:
58
+ start = idx[0]
59
+ end = idx[-1]
60
+ ret = price_list[end] / price_list[start] - 1
61
+ if year_start is None:
62
+ year_start = price_list[start]
63
+ year_end = price_list[end]
64
+ else:
65
+ ret = None
66
+ data[month_abbr[m]].append(ret)
67
+ if year_start is None:
68
+ data["Year"].append(None)
69
+ else:
70
+ data["Year"].append(year_end / year_start - 1)
71
+
72
+ return pd.DataFrame(data, index=years)
73
+
74
+
75
+ def rolling_cumulative_return(prices: 'pd.Series', window: int) -> 'pd.Series': # type: ignore
76
+ """Rolling cumulative return over a specified window."""
77
+ if pd is None:
78
+ raise ImportError("pandas is required for rolling_cumulative_return")
79
+
80
+ p = _to_list(prices)
81
+ index = list(getattr(prices, 'index', range(len(p))))
82
+ out = []
83
+ for i in range(len(p)):
84
+ if i < window:
85
+ out.append(None)
86
+ else:
87
+ out.append(p[i] / p[i - window] - 1)
88
+ s = pd.Series(out)
89
+ s.index = index
90
+ return s
91
+
92
+
93
+ def annualized_volatility(prices: Sequence[float], periods_per_year: int = 252) -> float:
94
+ """Annualized volatility from a price series."""
95
+ p = _to_list(prices)
96
+ if len(p) < 2:
97
+ return 0.0
98
+ rets = [p[i] / p[i - 1] - 1 for i in range(1, len(p))]
99
+ mean = sum(rets) / len(rets)
100
+ var = sum((r - mean) ** 2 for r in rets) / len(rets)
101
+ return sqrt(var) * sqrt(periods_per_year)
102
+
103
+
104
+ def max_drawdown(prices: Sequence[float]) -> Tuple[float, int]:
105
+ """Maximum drawdown depth and duration."""
106
+ p = _to_list(prices)
107
+ if not p:
108
+ return 0.0, 0
109
+ peak = p[0]
110
+ max_depth = 0.0
111
+ duration = 0
112
+ cur_duration = 0
113
+ for price in p:
114
+ if price > peak:
115
+ peak = price
116
+ cur_duration = 0
117
+ else:
118
+ cur_duration += 1
119
+ dd = price / peak - 1
120
+ if dd < max_depth:
121
+ max_depth = dd
122
+ if cur_duration > duration:
123
+ duration = cur_duration
124
+ return max_depth, duration
125
+
126
+
127
+ def ulcer_index(prices: Sequence[float]) -> float:
128
+ """Ulcer index of a price series."""
129
+ p = _to_list(prices)
130
+ if not p:
131
+ return 0.0
132
+ peak = p[0]
133
+ sum_sq = 0.0
134
+ for price in p:
135
+ if price > peak:
136
+ peak = price
137
+ dd = max(0.0, (peak - price) / peak)
138
+ sum_sq += dd ** 2
139
+ return sqrt(sum_sq / len(p))
140
+
141
+
142
+ def ulcer_performance_index(prices: Sequence[float], risk_free_rate: float = 0.0, periods_per_year: int = 252) -> float:
143
+ """Ulcer Performance Index."""
144
+ ui = ulcer_index(prices)
145
+ if ui == 0:
146
+ return 0.0
147
+ return (cagr(prices, periods_per_year) - risk_free_rate) / ui
148
+
149
+
150
+ def _parametric_stats(prices: Sequence[float]) -> Tuple[float, float]:
151
+ p = _to_list(prices)
152
+ if len(p) < 2:
153
+ return 0.0, 0.0
154
+ rets = [p[i] / p[i - 1] - 1 for i in range(1, len(p))]
155
+ mu = sum(rets) / len(rets)
156
+ var = sum((r - mu) ** 2 for r in rets) / len(rets)
157
+ return mu, sqrt(var)
158
+
159
+
160
+ def parametric_var(prices: Sequence[float], level: float = 0.05) -> float:
161
+ """Parametric (normal) Value at Risk."""
162
+ mu, sigma = _parametric_stats(prices)
163
+ z = NormalDist().inv_cdf(level)
164
+ return -(mu + sigma * z)
165
+
166
+
167
+ def parametric_expected_shortfall(prices: Sequence[float], level: float = 0.05) -> float:
168
+ """Parametric (normal) Expected Shortfall."""
169
+ mu, sigma = _parametric_stats(prices)
170
+ z = NormalDist().inv_cdf(level)
171
+ return -(mu - sigma * NormalDist().pdf(z) / level)
172
+
173
+
174
+ def tail_ratio(prices: Sequence[float]) -> float:
175
+ """Tail ratio of returns (95th percentile over 5th percentile)."""
176
+ p = _to_list(prices)
177
+ if len(p) < 3:
178
+ return 0.0
179
+ rets = sorted(p[i] / p[i - 1] - 1 for i in range(1, len(p)))
180
+ n = len(rets)
181
+ q95 = rets[int(0.95 * (n - 1))]
182
+ q05 = rets[int(0.05 * (n - 1))]
183
+ if q05 == 0:
184
+ return 0.0
185
+ return abs(q95) / abs(q05)
186
+
187
+
188
+ def sharpe_ratio(
189
+ prices: Sequence[float],
190
+ risk_free_rate: float = 0.0,
191
+ periods_per_year: int = 252,
192
+ ) -> float:
193
+ """Annualized Sharpe ratio of a price series."""
194
+ p = _to_list(prices)
195
+ if len(p) < 2:
196
+ return 0.0
197
+ rf_per = risk_free_rate / periods_per_year
198
+ rets = [p[i] / p[i - 1] - 1 - rf_per for i in range(1, len(p))]
199
+ mean = sum(rets) / len(rets)
200
+ var = sum((r - mean) ** 2 for r in rets) / len(rets)
201
+ if var == 0:
202
+ return 0.0
203
+ return mean / sqrt(var) * sqrt(periods_per_year)
204
+
205
+
206
+ def sortino_ratio(
207
+ prices: Sequence[float],
208
+ risk_free_rate: float = 0.0,
209
+ periods_per_year: int = 252,
210
+ ) -> float:
211
+ """Annualized Sortino ratio of a price series."""
212
+ p = _to_list(prices)
213
+ if len(p) < 2:
214
+ return 0.0
215
+ rf_per = risk_free_rate / periods_per_year
216
+ rets = [p[i] / p[i - 1] - 1 for i in range(1, len(p))]
217
+ mean_excess = sum(r - rf_per for r in rets) / len(rets)
218
+ downside = [min(0.0, r - rf_per) for r in rets]
219
+ var = sum(d ** 2 for d in downside) / len(rets)
220
+ if var == 0:
221
+ return 0.0
222
+ return mean_excess / sqrt(var) * sqrt(periods_per_year)
223
+
224
+
225
+ def calmar_ratio(prices: Sequence[float], periods_per_year: int = 252) -> float:
226
+ """Calmar ratio of a price series."""
227
+ mdd, _duration = max_drawdown(prices)
228
+ if mdd == 0:
229
+ return 0.0
230
+ return cagr(prices, periods_per_year) / abs(mdd)
231
+
232
+
233
+ def omega_ratio(
234
+ prices: Sequence[float],
235
+ threshold: float = 0.0,
236
+ periods_per_year: int = 252,
237
+ ) -> float:
238
+ """Omega ratio of returns relative to a threshold."""
239
+ p = _to_list(prices)
240
+ if len(p) < 2:
241
+ return 0.0
242
+ thr = threshold / periods_per_year
243
+ rets = [p[i] / p[i - 1] - 1 for i in range(1, len(p))]
244
+ gains = sum(max(r - thr, 0.0) for r in rets)
245
+ losses = sum(max(thr - r, 0.0) for r in rets)
246
+ if losses == 0:
247
+ return 0.0
248
+ return gains / losses
249
+
250
+
251
+ def information_ratio(
252
+ prices: Sequence[float],
253
+ benchmark: Sequence[float],
254
+ periods_per_year: int = 252,
255
+ ) -> float:
256
+ """Information ratio of strategy vs. benchmark prices."""
257
+ p = _to_list(prices)
258
+ b = _to_list(benchmark)
259
+ n = min(len(p), len(b))
260
+ if n < 2:
261
+ return 0.0
262
+ strat_rets = [p[i] / p[i - 1] - 1 for i in range(1, n)]
263
+ bench_rets = [b[i] / b[i - 1] - 1 for i in range(1, n)]
264
+ active = [r - br for r, br in zip(strat_rets, bench_rets)]
265
+ mean = sum(active) / len(active)
266
+ var = sum((a - mean) ** 2 for a in active) / len(active)
267
+ if var == 0:
268
+ return 0.0
269
+ return mean / sqrt(var) * sqrt(periods_per_year)
270
+
271
+
272
+ def capm_alpha_beta(prices: Sequence[float], benchmark: Sequence[float]) -> Tuple[float, float]:
273
+ """CAPM alpha and beta relative to a benchmark."""
274
+ p = _to_list(prices)
275
+ b = _to_list(benchmark)
276
+ n = min(len(p), len(b))
277
+ if n < 2:
278
+ return 0.0, 0.0
279
+ strat = [p[i] / p[i - 1] - 1 for i in range(1, n)]
280
+ bench = [b[i] / b[i - 1] - 1 for i in range(1, n)]
281
+ mean_x = sum(bench) / len(bench)
282
+ mean_y = sum(strat) / len(strat)
283
+ cov = sum((x - mean_x) * (y - mean_y) for x, y in zip(bench, strat)) / len(bench)
284
+ var_x = sum((x - mean_x) ** 2 for x in bench) / len(bench)
285
+ beta = cov / var_x if var_x else 0.0
286
+ alpha = mean_y - beta * mean_x
287
+ return alpha, beta
288
+
289
+
290
+ def _invert_matrix(matrix: Sequence[Sequence[float]]) -> Sequence[Sequence[float]] | None:
291
+ size = len(matrix)
292
+ aug = [list(row) + [1 if i == j else 0 for j in range(size)] for i, row in enumerate(matrix)]
293
+ for i in range(size):
294
+ pivot = aug[i][i]
295
+ if abs(pivot) < 1e-12:
296
+ swap = next((j for j in range(i + 1, size) if abs(aug[j][i]) > 1e-12), None)
297
+ if swap is None:
298
+ return None
299
+ aug[i], aug[swap] = aug[swap], aug[i]
300
+ pivot = aug[i][i]
301
+ inv_p = 1 / pivot
302
+ for j in range(2 * size):
303
+ aug[i][j] *= inv_p
304
+ for k in range(size):
305
+ if k != i:
306
+ factor = aug[k][i]
307
+ for j in range(2 * size):
308
+ aug[k][j] -= factor * aug[i][j]
309
+ return [row[size:] for row in aug]
310
+
311
+
312
+ def _ols(y: Sequence[float], X: Sequence[Sequence[float]]) -> Sequence[float]:
313
+ n = len(y)
314
+ k = len(X[0]) if X else 0
315
+ xtx = [[0.0 for _ in range(k)] for _ in range(k)]
316
+ xty = [0.0 for _ in range(k)]
317
+ for i in range(n):
318
+ for p in range(k):
319
+ xty[p] += X[i][p] * y[i]
320
+ for q in range(k):
321
+ xtx[p][q] += X[i][p] * X[i][q]
322
+ inv = _invert_matrix(xtx)
323
+ if inv is None:
324
+ return [0.0 for _ in range(k)]
325
+ beta = [sum(inv[i][j] * xty[j] for j in range(k)) for i in range(k)]
326
+ return beta
327
+
328
+
329
+ def fama_french_regression(prices: Sequence[float], factors: 'pd.DataFrame', factor_cols: Sequence[str]) -> 'pd.Series': # type: ignore
330
+ """Run regression of excess returns on Fama-French factors."""
331
+ if pd is None:
332
+ raise ImportError("pandas is required for fama_french_regression")
333
+
334
+ p = _to_list(prices)
335
+ n = min(len(p), len(factors))
336
+ if n < 2:
337
+ data = [0.0] * (len(factor_cols) + 1)
338
+ s = pd.Series(data)
339
+ s.index = ["alpha"] + list(factor_cols)
340
+ return s
341
+
342
+ rets = [p[i] / p[i - 1] - 1 for i in range(1, n)]
343
+ rf = _to_list(factors["RF"]) if "RF" in factors.columns else [0.0] * n
344
+ y = [rets[i - 1] - rf[i] for i in range(1, n)]
345
+ x = [[1.0] + [_to_list(factors[c])[i] for c in factor_cols] for i in range(1, n)]
346
+ beta = _ols(y, x)
347
+ s = pd.Series(beta)
348
+ s.index = ["alpha"] + list(factor_cols)
349
+ return s
350
+
351
+
352
+ def fama_french_3factor(prices: Sequence[float], factors: 'pd.DataFrame') -> 'pd.Series': # type: ignore
353
+ cols = [c for c in ["Mkt-RF", "SMB", "HML"] if c in getattr(factors, "columns", [])]
354
+ return fama_french_regression(prices, factors, cols)
355
+
356
+
357
+ def fama_french_5factor(prices: Sequence[float], factors: 'pd.DataFrame') -> 'pd.Series': # type: ignore
358
+ cols = [c for c in ["Mkt-RF", "SMB", "HML", "RMW", "CMA"] if c in getattr(factors, "columns", [])]
359
+ return fama_french_regression(prices, factors, cols)
360
+
361
+
362
+ def cumulative_excess_return(prices: Sequence[float], benchmark: Sequence[float]) -> 'pd.Series': # type: ignore
363
+ """Cumulative excess return of strategy versus a benchmark."""
364
+ if pd is None:
365
+ raise ImportError("pandas is required for cumulative_excess_return")
366
+
367
+ p = _to_list(prices)
368
+ b = _to_list(benchmark)
369
+ n = min(len(p), len(b))
370
+ index = list(getattr(prices, 'index', range(len(p))))[:n]
371
+ cum = []
372
+ total = 1.0
373
+ for i in range(n):
374
+ if i == 0:
375
+ cum.append(0.0)
376
+ else:
377
+ strat_ret = p[i] / p[i - 1] - 1
378
+ bench_ret = b[i] / b[i - 1] - 1
379
+ total *= 1 + (strat_ret - bench_ret)
380
+ cum.append(total - 1)
381
+ s = pd.Series(cum)
382
+ s.index = index
383
+ return s
384
+
385
+
386
+ def skewness(prices: Sequence[float]) -> float:
387
+ """Skewness of returns of a price series."""
388
+ p = _to_list(prices)
389
+ if len(p) < 3:
390
+ return 0.0
391
+ rets = [p[i] / p[i - 1] - 1 for i in range(1, len(p))]
392
+ mean = sum(rets) / len(rets)
393
+ var = sum((r - mean) ** 2 for r in rets) / len(rets)
394
+ if var == 0:
395
+ return 0.0
396
+ std = sqrt(var)
397
+ m3 = sum((r - mean) ** 3 for r in rets) / len(rets)
398
+ return m3 / (std ** 3)
399
+
400
+
401
+ def kurtosis(prices: Sequence[float]) -> float:
402
+ """Kurtosis of returns of a price series."""
403
+ p = _to_list(prices)
404
+ if len(p) < 3:
405
+ return 0.0
406
+ rets = [p[i] / p[i - 1] - 1 for i in range(1, len(p))]
407
+ mean = sum(rets) / len(rets)
408
+ var = sum((r - mean) ** 2 for r in rets) / len(rets)
409
+ if var == 0:
410
+ return 0.0
411
+ m4 = sum((r - mean) ** 4 for r in rets) / len(rets)
412
+ return m4 / (var ** 2)
413
+
414
+
415
+ def variance_ratio(prices: Sequence[float], lag: int = 2) -> float:
416
+ """Lo-MacKinlay variance ratio test statistic."""
417
+ p = _to_list(prices)
418
+ if len(p) <= lag:
419
+ return 0.0
420
+ rets = [p[i] / p[i - 1] - 1 for i in range(1, len(p))]
421
+ mean = sum(rets) / len(rets)
422
+ var = sum((r - mean) ** 2 for r in rets) / len(rets)
423
+ if var == 0:
424
+ return 0.0
425
+ agg = [sum(rets[i - j] for j in range(1, lag + 1)) for i in range(lag, len(rets))]
426
+ var_lag = sum((a - lag * mean) ** 2 for a in agg) / len(agg)
427
+ return var_lag / (var * lag)
428
+
429
+
430
+ def acf(prices: Sequence[float], lags: Sequence[int]) -> list[float]:
431
+ """Autocorrelation of returns for specified lags."""
432
+ p = _to_list(prices)
433
+ if len(p) < 2:
434
+ return [0.0 for _ in lags]
435
+ rets = [p[i] / p[i - 1] - 1 for i in range(1, len(p))]
436
+ mean = sum(rets) / len(rets)
437
+ var = sum((r - mean) ** 2 for r in rets) / len(rets)
438
+ if var == 0:
439
+ return [0.0 for _ in lags]
440
+ out = []
441
+ for lag in lags:
442
+ if lag <= 0 or lag >= len(rets):
443
+ out.append(0.0)
444
+ else:
445
+ cov = sum((rets[i] - mean) * (rets[i - lag] - mean) for i in range(lag, len(rets))) / (len(rets) - lag)
446
+ out.append(cov / var)
447
+ return out
448
+
449
+
450
+ def pacf(prices: Sequence[float], lags: Sequence[int]) -> list[float]:
451
+ """Partial autocorrelation of returns for specified lags."""
452
+ p = _to_list(prices)
453
+ if len(p) < 2:
454
+ return [0.0 for _ in lags]
455
+ rets = [p[i] / p[i - 1] - 1 for i in range(1, len(p))]
456
+ out = []
457
+ for k in lags:
458
+ if k <= 0 or k >= len(rets):
459
+ out.append(0.0)
460
+ continue
461
+ y = [rets[i] for i in range(k, len(rets))]
462
+ X = [[1.0] + [rets[i - j - 1] for j in range(k)] for i in range(k, len(rets))]
463
+ beta = _ols(y, X)
464
+ out.append(beta[-1] if beta else 0.0)
465
+ return out