quantlib-st 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,10 @@
1
+ Metadata-Version: 2.4
2
+ Name: quantlib-st
3
+ Version: 0.1.0
4
+ Summary: quantlib-st - a quantitative finance library and cli for systematic trading
5
+ Requires-Python: >=3.12
6
+ Requires-Dist: pandas>=2.3.3
7
+ Requires-Dist: numpy>=2.4.0
8
+ Dynamic: requires-dist
9
+ Dynamic: requires-python
10
+ Dynamic: summary
@@ -0,0 +1,25 @@
1
+ # quantlib
2
+
3
+ Minimal, self-contained CLI tools and library for quantitative finance.
4
+
5
+ ## Subcommands
6
+
7
+ - **[corr](correlation/README.md)**: Compute correlation matrices over time from returns.
8
+ - **[costs](costs/README.md)**: Calculate Sharpe Ratio (SR) costs for instruments based on spread and fees.
9
+
10
+ ## Install (editable - for developers)
11
+
12
+ From the repo root:
13
+
14
+ - `cd quantlib`
15
+ - `python -m pip install install -e .`
16
+
17
+ This installs the `quantlib` command.
18
+
19
+ ## Build a single binary with PyInstaller
20
+
21
+ From `quantlib/`:
22
+
23
+ - `make build`
24
+
25
+ Binary will be at `dist/quantlib`.
@@ -0,0 +1,3 @@
1
+ __all__ = ["main"]
2
+
3
+ from cli.main import main
@@ -0,0 +1,5 @@
1
+ from cli.main import main
2
+
3
+
4
+ if __name__ == "__main__":
5
+ raise SystemExit(main())
@@ -0,0 +1,132 @@
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import json
5
+ import sys
6
+ from io import StringIO
7
+
8
+ import pandas as pd
9
+
10
+ from correlation.correlation_over_time import (
11
+ correlation_over_time_for_returns,
12
+ correlation_list_to_jsonable,
13
+ )
14
+
15
+
16
+ def add_corr_subcommand(subparsers: argparse._SubParsersAction) -> None:
17
+ parser = subparsers.add_parser(
18
+ "corr",
19
+ help="Compute correlations over time from CSV piped on stdin (outputs JSON).",
20
+ )
21
+
22
+ parser.add_argument(
23
+ "--frequency",
24
+ default="D",
25
+ help="Resample frequency before correlation (default: D). Use W if you want weekly.",
26
+ )
27
+ parser.add_argument(
28
+ "--date-method",
29
+ default="in_sample",
30
+ choices=["expanding", "rolling", "in_sample"],
31
+ help="How to choose the fit window over time (default: in_sample)",
32
+ )
33
+ parser.add_argument(
34
+ "--rollyears",
35
+ type=int,
36
+ default=20,
37
+ help="Rolling years (used only if --date-method rolling; default: 20)",
38
+ )
39
+ parser.add_argument(
40
+ "--interval-frequency",
41
+ default="12M",
42
+ help="How often to emit a new correlation matrix (default: 12M)",
43
+ )
44
+
45
+ parser.add_argument(
46
+ "--using-exponent",
47
+ action=argparse.BooleanOptionalAction,
48
+ default=True,
49
+ help="Use EWMA correlation (default: true)",
50
+ )
51
+ parser.add_argument(
52
+ "--ew-lookback",
53
+ type=int,
54
+ default=250,
55
+ help="EWMA span/lookback (default: 250)",
56
+ )
57
+ parser.add_argument(
58
+ "--min-periods",
59
+ type=int,
60
+ default=20,
61
+ help="Minimum observations before correlations appear (default: 20)",
62
+ )
63
+
64
+ parser.add_argument(
65
+ "--floor-at-zero",
66
+ action=argparse.BooleanOptionalAction,
67
+ default=True,
68
+ help="Floor negative correlations at 0 (default: true)",
69
+ )
70
+ parser.add_argument(
71
+ "--clip",
72
+ type=float,
73
+ default=None,
74
+ help="Optional absolute clip value for correlations (e.g. 0.9)",
75
+ )
76
+ parser.add_argument(
77
+ "--shrinkage",
78
+ type=float,
79
+ default=0.0,
80
+ help="Optional shrinkage-to-average in [0,1] (default: 0)",
81
+ )
82
+
83
+ parser.add_argument(
84
+ "--forward-fill-price-index",
85
+ action=argparse.BooleanOptionalAction,
86
+ default=True,
87
+ help="Forward fill the synthetic price index before resampling (default: true)",
88
+ )
89
+
90
+ parser.add_argument(
91
+ "--index-col",
92
+ type=int,
93
+ default=0,
94
+ help="Which CSV column is the datetime index (default: 0)",
95
+ )
96
+
97
+ parser.set_defaults(_handler=run_corr)
98
+
99
+
100
+ def run_corr(args: argparse.Namespace) -> int:
101
+ csv_text = sys.stdin.read()
102
+ if not csv_text.strip():
103
+ print(json.dumps({"error": "no input on stdin"}), file=sys.stderr)
104
+ return 2
105
+
106
+ try:
107
+ df = pd.read_csv(StringIO(csv_text), index_col=args.index_col, parse_dates=True)
108
+ except Exception as e:
109
+ print(json.dumps({"error": f"failed to parse CSV: {e}"}), file=sys.stderr)
110
+ return 2
111
+
112
+ df = df.sort_index()
113
+
114
+ corr_list = correlation_over_time_for_returns(
115
+ df,
116
+ frequency=args.frequency,
117
+ forward_fill_price_index=args.forward_fill_price_index,
118
+ date_method=args.date_method,
119
+ rollyears=args.rollyears,
120
+ interval_frequency=args.interval_frequency,
121
+ using_exponent=args.using_exponent,
122
+ ew_lookback=args.ew_lookback,
123
+ min_periods=args.min_periods,
124
+ floor_at_zero=args.floor_at_zero,
125
+ clip=args.clip,
126
+ shrinkage=args.shrinkage,
127
+ )
128
+
129
+ out = correlation_list_to_jsonable(corr_list)
130
+ sys.stdout.write(json.dumps(out))
131
+ sys.stdout.write("\n")
132
+ return 0
@@ -0,0 +1,125 @@
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import json
5
+ import sys
6
+ import pandas as pd
7
+ from io import StringIO
8
+
9
+ from costs.data_source import ConfigFileCostDataSource, IBKRCostDataSource
10
+ from costs.calculator import (
11
+ calculate_sr_cost,
12
+ calculate_annualized_volatility,
13
+ calculate_recent_average_price,
14
+ calculate_cost_percentage_terms,
15
+ )
16
+
17
+
18
+ def add_costs_subcommand(subparsers: argparse._SubParsersAction) -> None:
19
+ parser = subparsers.add_parser(
20
+ "costs",
21
+ help="Calculate SR costs for an instrument from price CSV piped on stdin or provided via file.",
22
+ )
23
+
24
+ parser.add_argument(
25
+ "--instrument",
26
+ required=True,
27
+ help="Instrument code (e.g., ES, GC).",
28
+ )
29
+ parser.add_argument(
30
+ "--config",
31
+ help="Path to JSON file containing instrument cost configuration.",
32
+ )
33
+ parser.add_argument(
34
+ "--use-ibkr",
35
+ action="store_true",
36
+ help="Use IBKR API for cost data (currently a stub).",
37
+ )
38
+ parser.add_argument(
39
+ "--vol",
40
+ type=float,
41
+ help="Override annualized volatility (as a decimal, e.g., 0.15 for 15%%).",
42
+ )
43
+ parser.add_argument(
44
+ "--price",
45
+ type=float,
46
+ help="Override current price (otherwise uses the last price in the CSV).",
47
+ )
48
+
49
+ parser.set_defaults(_handler=handle_costs)
50
+
51
+
52
+ def handle_costs(args: argparse.Namespace) -> int:
53
+ # 1. Get Cost Config
54
+ if args.use_ibkr:
55
+ data_source = IBKRCostDataSource()
56
+ elif args.config:
57
+ data_source = ConfigFileCostDataSource(args.config)
58
+ else:
59
+ print("Error: Must provide either --config or --use-ibkr", file=sys.stderr)
60
+ return 1
61
+
62
+ try:
63
+ cost_config = data_source.get_cost_config(args.instrument)
64
+ except Exception as e:
65
+ print(f"Error fetching cost config: {e}", file=sys.stderr)
66
+ return 1
67
+
68
+ # 2. Get Price Data
69
+ if not sys.stdin.isatty():
70
+ # Read from stdin
71
+ input_data = sys.stdin.read()
72
+ df = pd.read_csv(StringIO(input_data), index_col=0, parse_dates=True)
73
+ else:
74
+ # If no stdin, we need at least --price and --vol if we want to calculate anything
75
+ df = pd.DataFrame()
76
+
77
+ if df.empty and (args.price is None or args.vol is None):
78
+ print(
79
+ "Error: Must pipe price CSV to stdin or provide both --price and --vol overrides.",
80
+ file=sys.stderr,
81
+ )
82
+ return 1
83
+
84
+ # 3. Determine Price and Volatility
85
+ if args.price is not None:
86
+ average_price = float(args.price)
87
+ else:
88
+ # Use average price over the last year (256 days)
89
+ average_price = calculate_recent_average_price(df.iloc[:, 0])
90
+
91
+ if args.vol is not None:
92
+ # If user provides --vol, we assume it's annualized volatility in price units
93
+ ann_stdev_price_units = float(args.vol)
94
+ else:
95
+ # Calculate annualized volatility in price units (average over last year)
96
+ ann_stdev_price_units = float(calculate_annualized_volatility(df.iloc[:, 0]))
97
+
98
+ # 4. Calculate Costs
99
+ sr_cost = float(
100
+ calculate_sr_cost(
101
+ cost_config,
102
+ price=average_price,
103
+ ann_stdev_price_units=ann_stdev_price_units,
104
+ )
105
+ )
106
+
107
+ pct_cost = float(
108
+ calculate_cost_percentage_terms(
109
+ cost_config,
110
+ blocks_traded=1.0,
111
+ price=average_price,
112
+ )
113
+ )
114
+
115
+ # 5. Output Results
116
+ result = {
117
+ "instrument": args.instrument,
118
+ "average_price": round(average_price, 4),
119
+ "ann_stdev_price_units": round(ann_stdev_price_units, 4),
120
+ "sr_cost": round(sr_cost, 5),
121
+ "percentage_cost": round(pct_cost, 6),
122
+ }
123
+
124
+ print(json.dumps(result, indent=2))
125
+ return 0
@@ -0,0 +1,29 @@
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+
5
+ from cli.corr_cmd import add_corr_subcommand
6
+ from cli.costs_cmd import add_costs_subcommand
7
+
8
+
9
+ def main(argv: list[str] | None = None) -> int:
10
+ parser = argparse.ArgumentParser(
11
+ prog="quantlib",
12
+ description="quantlib CLI (corr is the first subcommand; more will be added).",
13
+ )
14
+
15
+ subparsers = parser.add_subparsers(dest="subcommand", required=True)
16
+
17
+ add_corr_subcommand(subparsers)
18
+ add_costs_subcommand(subparsers)
19
+
20
+ args = parser.parse_args(argv)
21
+
22
+ # Dispatch
23
+ if args.subcommand == "corr":
24
+ return args._handler(args)
25
+ elif args.subcommand == "costs":
26
+ return args._handler(args)
27
+
28
+ parser.error(f"Unknown subcommand: {args.subcommand}")
29
+ return 2
File without changes
@@ -0,0 +1,143 @@
1
+ from __future__ import annotations
2
+
3
+ import datetime
4
+ from dataclasses import dataclass
5
+
6
+ import pandas as pd
7
+
8
+ from .fitting_dates import generate_fitting_dates, listOfFittingDates
9
+ from .exponential_correlation import (
10
+ CorrelationEstimate,
11
+ ExponentialCorrelationResults,
12
+ create_boring_corr_matrix,
13
+ modify_correlation,
14
+ )
15
+
16
+
17
+ @dataclass
18
+ class CorrelationList:
19
+ corr_list: list[CorrelationEstimate]
20
+ column_names: list[str]
21
+ fit_dates: listOfFittingDates
22
+
23
+
24
+ def correlation_over_time_for_returns(
25
+ returns_for_correlation: pd.DataFrame,
26
+ frequency: str = "D",
27
+ forward_fill_price_index: bool = True,
28
+ **kwargs,
29
+ ) -> CorrelationList:
30
+ # Build a synthetic price index from returns, resample, then diff.
31
+ # For daily frequency, this is essentially a no-op aside from losing the first row.
32
+ index_prices_for_correlation = returns_for_correlation.cumsum()
33
+ if forward_fill_price_index:
34
+ index_prices_for_correlation = index_prices_for_correlation.ffill()
35
+
36
+ index_prices_for_correlation = index_prices_for_correlation.resample(frequency).last()
37
+ returns_for_correlation = index_prices_for_correlation.diff()
38
+
39
+ return correlation_over_time(returns_for_correlation, **kwargs)
40
+
41
+
42
+ def correlation_over_time(
43
+ data_for_correlation: pd.DataFrame,
44
+ date_method: str = "in_sample",
45
+ rollyears: int = 20,
46
+ interval_frequency: str = "12M",
47
+ using_exponent: bool = True,
48
+ ew_lookback: int = 250,
49
+ min_periods: int = 20,
50
+ no_data_offdiag: float = 0.99,
51
+ floor_at_zero: bool = True,
52
+ clip: float | None = None,
53
+ shrinkage: float = 0.0,
54
+ ) -> CorrelationList:
55
+ column_names = list(data_for_correlation.columns)
56
+
57
+ fit_dates = generate_fitting_dates(
58
+ data_for_correlation,
59
+ date_method=date_method,
60
+ rollyears=rollyears,
61
+ interval_frequency=interval_frequency,
62
+ )
63
+
64
+ corr_list: list[CorrelationEstimate] = []
65
+
66
+ if using_exponent:
67
+ results = ExponentialCorrelationResults(
68
+ data_for_correlation, ew_lookback=ew_lookback, min_periods=min_periods
69
+ )
70
+
71
+ for fit_period in fit_dates:
72
+ if getattr(fit_period, "no_data", False):
73
+ corr_list.append(
74
+ create_boring_corr_matrix(
75
+ len(column_names), column_names, offdiag=no_data_offdiag
76
+ )
77
+ )
78
+ continue
79
+
80
+ corr = results.last_valid_cor_matrix_for_date(fit_period.fit_end)
81
+ if pd.isna(corr.values).all():
82
+ corr = create_boring_corr_matrix(
83
+ len(column_names), column_names, offdiag=no_data_offdiag
84
+ )
85
+
86
+ corr = modify_correlation(
87
+ corr,
88
+ floor_at_zero=floor_at_zero,
89
+ clip_value=clip,
90
+ shrinkage=shrinkage,
91
+ )
92
+ corr_list.append(corr)
93
+
94
+ else:
95
+ for fit_period in fit_dates:
96
+ if getattr(fit_period, "no_data", False):
97
+ corr_list.append(
98
+ create_boring_corr_matrix(
99
+ len(column_names), column_names, offdiag=no_data_offdiag
100
+ )
101
+ )
102
+ continue
103
+
104
+ sub = data_for_correlation.loc[fit_period.fit_start : fit_period.fit_end]
105
+ corr_pd = sub.corr()
106
+ corr = CorrelationEstimate(values=corr_pd.values, columns=column_names)
107
+ corr = modify_correlation(
108
+ corr,
109
+ floor_at_zero=floor_at_zero,
110
+ clip_value=clip,
111
+ shrinkage=shrinkage,
112
+ )
113
+ corr_list.append(corr)
114
+
115
+ return CorrelationList(corr_list=corr_list, column_names=column_names, fit_dates=fit_dates)
116
+
117
+
118
+ def correlation_list_to_jsonable(corr_list: CorrelationList) -> dict:
119
+ periods = []
120
+ for fit_period, corr in zip(corr_list.fit_dates, corr_list.corr_list):
121
+ periods.append(
122
+ {
123
+ "fit_start": _dt_to_iso(fit_period.fit_start),
124
+ "fit_end": _dt_to_iso(fit_period.fit_end),
125
+ "period_start": _dt_to_iso(fit_period.period_start),
126
+ "period_end": _dt_to_iso(fit_period.period_end),
127
+ "no_data": bool(getattr(fit_period, "no_data", False)),
128
+ "correlation": corr.as_dict(),
129
+ }
130
+ )
131
+
132
+ return {
133
+ "columns": list(corr_list.column_names),
134
+ "periods": periods,
135
+ }
136
+
137
+
138
+ def _dt_to_iso(dt: datetime.datetime) -> str:
139
+ if isinstance(dt, pd.Timestamp):
140
+ dt = dt.to_pydatetime()
141
+ if dt.tzinfo is not None:
142
+ return dt.isoformat()
143
+ return dt.replace(tzinfo=datetime.timezone.utc).isoformat()
@@ -0,0 +1,115 @@
1
+ from __future__ import annotations
2
+
3
+ import datetime
4
+ from dataclasses import dataclass
5
+
6
+ import numpy as np
7
+ import pandas as pd
8
+
9
+
10
+ @dataclass
11
+ class CorrelationEstimate:
12
+ values: np.ndarray
13
+ columns: list[str]
14
+
15
+ def as_dict(self) -> dict:
16
+ return {
17
+ "columns": list(self.columns),
18
+ "values": self.values.tolist(),
19
+ }
20
+
21
+ def floor_at_zero(self) -> "CorrelationEstimate":
22
+ values = self.values.copy()
23
+ values[values < 0.0] = 0.0
24
+ np.fill_diagonal(values, 1.0)
25
+ return CorrelationEstimate(values=values, columns=self.columns)
26
+
27
+ def clip(self, clip_value: float | None) -> "CorrelationEstimate":
28
+ if clip_value is None:
29
+ return self
30
+ clip_value = abs(float(clip_value))
31
+ values = self.values.copy()
32
+ values[values < -clip_value] = -clip_value
33
+ values[values > clip_value] = clip_value
34
+ np.fill_diagonal(values, 1.0)
35
+ return CorrelationEstimate(values=values, columns=self.columns)
36
+
37
+ def shrink_to_average(self, shrinkage: float) -> "CorrelationEstimate":
38
+ shrinkage = float(shrinkage)
39
+ if shrinkage <= 0.0:
40
+ return self
41
+ if shrinkage >= 1.0:
42
+ shrinkage = 1.0
43
+
44
+ values = self.values.copy()
45
+ vals = values.copy()
46
+ np.fill_diagonal(vals, np.nan)
47
+ avg = np.nanmean(vals)
48
+ if np.isnan(avg):
49
+ return self
50
+
51
+ prior = np.full_like(values, avg, dtype=float)
52
+ np.fill_diagonal(prior, 1.0)
53
+ shrunk = shrinkage * prior + (1.0 - shrinkage) * values
54
+ np.fill_diagonal(shrunk, 1.0)
55
+ return CorrelationEstimate(values=shrunk, columns=self.columns)
56
+
57
+
58
+ def modify_correlation(
59
+ corr: CorrelationEstimate,
60
+ *,
61
+ floor_at_zero: bool = True,
62
+ shrinkage: float = 0.0,
63
+ clip_value: float | None = None,
64
+ ) -> CorrelationEstimate:
65
+ if floor_at_zero:
66
+ corr = corr.floor_at_zero()
67
+ corr = corr.clip(clip_value)
68
+ if shrinkage and shrinkage > 0.0:
69
+ corr = corr.shrink_to_average(shrinkage)
70
+ return corr
71
+
72
+
73
+ def create_boring_corr_matrix(
74
+ size: int, columns: list[str], offdiag: float = 0.99
75
+ ) -> CorrelationEstimate:
76
+ values = np.full((size, size), offdiag, dtype=float)
77
+ np.fill_diagonal(values, 1.0)
78
+ return CorrelationEstimate(values=values, columns=columns)
79
+
80
+
81
+ class ExponentialCorrelationResults:
82
+ def __init__(
83
+ self, data_for_correlation: pd.DataFrame, ew_lookback: int = 250, min_periods: int = 20
84
+ ):
85
+ self._columns = list(data_for_correlation.columns)
86
+ self._raw_correlations = data_for_correlation.ewm(
87
+ span=ew_lookback, min_periods=min_periods, ignore_na=True
88
+ ).corr(pairwise=True)
89
+
90
+ @property
91
+ def raw_correlations(self) -> pd.DataFrame:
92
+ return self._raw_correlations
93
+
94
+ @property
95
+ def columns(self) -> list[str]:
96
+ return self._columns
97
+
98
+ def last_valid_cor_matrix_for_date(self, date_point: datetime.datetime) -> CorrelationEstimate:
99
+ return last_valid_cor_matrix_for_date(self.raw_correlations, self.columns, date_point)
100
+
101
+
102
+ def last_valid_cor_matrix_for_date(
103
+ raw_correlations: pd.DataFrame, columns: list[str], date_point: datetime.datetime
104
+ ) -> CorrelationEstimate:
105
+ size_of_matrix = len(columns)
106
+ subset = raw_correlations[raw_correlations.index.get_level_values(0) < date_point]
107
+
108
+ if subset.shape[0] < size_of_matrix:
109
+ return CorrelationEstimate(
110
+ values=np.full((size_of_matrix, size_of_matrix), np.nan),
111
+ columns=columns,
112
+ )
113
+
114
+ corr_matrix_values = subset.tail(size_of_matrix).values
115
+ return CorrelationEstimate(values=corr_matrix_values, columns=columns)
@@ -0,0 +1,120 @@
1
+ from __future__ import annotations
2
+
3
+ import datetime
4
+ from dataclasses import dataclass
5
+ from typing import List
6
+
7
+ import pandas as pd
8
+
9
+
10
+ IN_SAMPLE = "in_sample"
11
+ ROLLING = "rolling"
12
+ EXPANDING = "expanding"
13
+
14
+ POSSIBLE_DATE_METHODS = [IN_SAMPLE, ROLLING, EXPANDING]
15
+
16
+
17
+ @dataclass
18
+ class fitDates:
19
+ fit_start: datetime.datetime
20
+ fit_end: datetime.datetime
21
+ period_start: datetime.datetime
22
+ period_end: datetime.datetime
23
+ no_data: bool = False
24
+
25
+
26
+ class listOfFittingDates(list):
27
+ def list_of_starting_periods(self) -> list:
28
+ return [period.period_start for period in self]
29
+
30
+ def index_of_most_recent_period_before_relevant_date(
31
+ self, relevant_date: datetime.datetime
32
+ ):
33
+ index = []
34
+ list_of_start_periods = self.list_of_starting_periods()
35
+ if relevant_date < list_of_start_periods[0]:
36
+ raise Exception(f"Date {relevant_date} is before first fitting date")
37
+
38
+ for index, start_date in enumerate(list_of_start_periods):
39
+ if relevant_date < start_date:
40
+ return index - 1
41
+
42
+ return index
43
+
44
+
45
+ def generate_fitting_dates(
46
+ data: pd.DataFrame,
47
+ date_method: str,
48
+ rollyears: int = 20,
49
+ interval_frequency: str = "12M",
50
+ ) -> listOfFittingDates:
51
+ if date_method not in POSSIBLE_DATE_METHODS:
52
+ raise ValueError(
53
+ f"Unknown date_method={date_method}; expected one of {POSSIBLE_DATE_METHODS}"
54
+ )
55
+
56
+ start_date = data.index[0]
57
+ end_date = data.index[-1]
58
+
59
+ if date_method == IN_SAMPLE:
60
+ return listOfFittingDates(
61
+ [fitDates(start_date, end_date, start_date, end_date, no_data=False)]
62
+ )
63
+
64
+ boundaries = _list_of_starting_dates_per_period(
65
+ start_date, end_date, interval_frequency=interval_frequency
66
+ )
67
+
68
+ # Short history: fall back to a single in-sample period.
69
+ if len(boundaries) < 2:
70
+ return listOfFittingDates(
71
+ [fitDates(start_date, end_date, start_date, end_date, no_data=False)]
72
+ )
73
+
74
+ periods: List[fitDates] = []
75
+ for period_index in range(len(boundaries))[1:-1]:
76
+ period_start = boundaries[period_index]
77
+ period_end = boundaries[period_index + 1]
78
+
79
+ if date_method == EXPANDING:
80
+ fit_start = start_date
81
+ elif date_method == ROLLING:
82
+ yearidx_to_use = max(0, period_index - rollyears)
83
+ fit_start = boundaries[yearidx_to_use]
84
+ else:
85
+ raise ValueError(f"Unknown date_method={date_method}")
86
+
87
+ fit_end = period_start
88
+ periods.append(fitDates(fit_start, fit_end, period_start, period_end, no_data=False))
89
+
90
+ if date_method in [ROLLING, EXPANDING] and len(boundaries) >= 2:
91
+ periods = [
92
+ fitDates(
93
+ start_date,
94
+ start_date,
95
+ start_date,
96
+ boundaries[1],
97
+ no_data=True,
98
+ )
99
+ ] + periods
100
+
101
+ return listOfFittingDates(periods)
102
+
103
+
104
+ def _list_of_starting_dates_per_period(
105
+ start_date: datetime.datetime,
106
+ end_date: datetime.datetime,
107
+ interval_frequency: str = "12M",
108
+ ):
109
+ if interval_frequency == "W":
110
+ use_interval_frequency = "7D"
111
+ elif interval_frequency == "M":
112
+ use_interval_frequency = "30D"
113
+ elif interval_frequency in ["12M", "Y"]:
114
+ use_interval_frequency = "365D"
115
+ else:
116
+ use_interval_frequency = interval_frequency
117
+
118
+ results = list(pd.date_range(end_date, start_date, freq="-" + use_interval_frequency))
119
+ results.reverse()
120
+ return results
@@ -0,0 +1 @@
1
+ from __future__ import annotations
@@ -0,0 +1,118 @@
1
+ from __future__ import annotations
2
+
3
+ import numpy as np
4
+ import pandas as pd
5
+ from costs.config import InstrumentCostConfig
6
+
7
+
8
+ def calculate_annualized_volatility(
9
+ prices: pd.Series, days_per_year: int = 256, vol_lookback: int = 35
10
+ ) -> float:
11
+ """
12
+ Calculate annualized volatility from a price series.
13
+ Matches the logic in pysystemtrade:
14
+ 1. Calculate absolute price changes.
15
+ 2. Calculate EWMA volatility (span=35).
16
+ 3. Take the mean of that volatility over the last year (256 days).
17
+ 4. Annualize it by multiplying by sqrt(256).
18
+ """
19
+ daily_returns = prices.diff().dropna()
20
+
21
+ # EWMA volatility of absolute returns
22
+ daily_vol = daily_returns.ewm(span=vol_lookback, adjust=True, min_periods=10).std()
23
+
24
+ # Average over the last year (256 business days)
25
+ recent_daily_vol = daily_vol.tail(days_per_year).mean()
26
+
27
+ # Annualize
28
+ return float(recent_daily_vol * np.sqrt(days_per_year))
29
+
30
+
31
+ def calculate_recent_average_price(
32
+ prices: pd.Series, days_per_year: int = 256
33
+ ) -> float:
34
+ """
35
+ Calculate the average price over the last year (256 business days).
36
+ Matches _recent_average_price in pysystemtrade.
37
+ """
38
+ return float(prices.tail(days_per_year).mean())
39
+
40
+
41
+ def calculate_sr_cost(
42
+ cost_config: InstrumentCostConfig,
43
+ price: float,
44
+ ann_stdev_price_units: float,
45
+ blocks_traded: float = 1.0,
46
+ ) -> float:
47
+ """
48
+ Calculates the expected reduction in Sharpe Ratio due to costs.
49
+ Ported from pysystemtrade sysobjects.instruments.instrumentCosts.calculate_sr_cost
50
+ """
51
+ cost_instrument_currency = calculate_cost_instrument_currency(
52
+ cost_config, blocks_traded=blocks_traded, price=price
53
+ )
54
+
55
+ # Annualized stdev in instrument currency (price units * point size)
56
+ ann_stdev_instrument_currency = ann_stdev_price_units * cost_config.point_size
57
+
58
+ if ann_stdev_instrument_currency == 0:
59
+ return 0.0
60
+
61
+ return cost_instrument_currency / ann_stdev_instrument_currency
62
+
63
+
64
+ def calculate_cost_instrument_currency(
65
+ cost_config: InstrumentCostConfig,
66
+ blocks_traded: float,
67
+ price: float,
68
+ include_slippage: bool = True,
69
+ ) -> float:
70
+ """
71
+ Ported from pysystemtrade sysobjects.instruments.instrumentCosts.calculate_cost_instrument_currency
72
+ """
73
+ value_per_block = price * cost_config.point_size
74
+
75
+ if include_slippage:
76
+ slippage = (
77
+ abs(blocks_traded) * cost_config.price_slippage * cost_config.point_size
78
+ )
79
+ else:
80
+ slippage = 0.0
81
+
82
+ commission = calculate_total_commission(
83
+ cost_config, blocks_traded=blocks_traded, value_per_block=value_per_block
84
+ )
85
+
86
+ return slippage + commission
87
+
88
+
89
+ def calculate_total_commission(
90
+ cost_config: InstrumentCostConfig, blocks_traded: float, value_per_block: float
91
+ ) -> float:
92
+ """
93
+ Ported from pysystemtrade sysobjects.instruments.instrumentCosts.calculate_total_commission
94
+ """
95
+ per_trade_commission = cost_config.per_trade_commission
96
+ per_block_commission = abs(blocks_traded) * cost_config.per_block_commission
97
+ percentage_commission = (
98
+ cost_config.percentage_commission * abs(blocks_traded) * value_per_block
99
+ )
100
+
101
+ return max([per_block_commission, per_trade_commission, percentage_commission])
102
+
103
+
104
+ def calculate_cost_percentage_terms(
105
+ cost_config: InstrumentCostConfig, blocks_traded: float, price: float
106
+ ) -> float:
107
+ """
108
+ Ported from pysystemtrade sysobjects.instruments.instrumentCosts.calculate_cost_percentage_terms
109
+ """
110
+ cost_in_currency = calculate_cost_instrument_currency(
111
+ cost_config, blocks_traded=blocks_traded, price=price
112
+ )
113
+ total_value = abs(blocks_traded) * price * cost_config.point_size
114
+
115
+ if total_value == 0:
116
+ return 0.0
117
+
118
+ return cost_in_currency / total_value
@@ -0,0 +1,24 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+
5
+
6
+ @dataclass
7
+ class InstrumentCostConfig:
8
+ instrument_code: str
9
+ point_size: float
10
+ price_slippage: float # Half spread in price units
11
+ per_block_commission: float = 0.0
12
+ percentage_commission: float = 0.0
13
+ per_trade_commission: float = 0.0
14
+
15
+ @classmethod
16
+ def from_dict(cls, data: dict) -> InstrumentCostConfig:
17
+ return cls(
18
+ instrument_code=data["instrument_code"],
19
+ point_size=data["point_size"],
20
+ price_slippage=data["price_slippage"],
21
+ per_block_commission=data.get("per_block_commission", 0.0),
22
+ percentage_commission=data.get("percentage_commission", 0.0),
23
+ per_trade_commission=data.get("per_trade_commission", 0.0),
24
+ )
@@ -0,0 +1,46 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from abc import ABC, abstractmethod
5
+ from costs.config import InstrumentCostConfig
6
+
7
+
8
+ class CostDataSource(ABC):
9
+ @abstractmethod
10
+ def get_cost_config(self, instrument_code: str) -> InstrumentCostConfig:
11
+ pass
12
+
13
+
14
+ class ConfigFileCostDataSource(CostDataSource):
15
+ def __init__(self, config_path: str):
16
+ with open(config_path, "r") as f:
17
+ self.config_data = json.load(f)
18
+
19
+ def get_cost_config(self, instrument_code: str) -> InstrumentCostConfig:
20
+ # Assuming the config file is a list of instrument configs or a dict keyed by code
21
+ if isinstance(self.config_data, list):
22
+ for item in self.config_data:
23
+ if item["instrument_code"] == instrument_code:
24
+ return InstrumentCostConfig.from_dict(item)
25
+ elif isinstance(self.config_data, dict):
26
+ if instrument_code in self.config_data:
27
+ return InstrumentCostConfig.from_dict(self.config_data[instrument_code])
28
+ elif (
29
+ "instrument_code" in self.config_data
30
+ and self.config_data["instrument_code"] == instrument_code
31
+ ):
32
+ return InstrumentCostConfig.from_dict(self.config_data)
33
+
34
+ raise ValueError(f"No cost config found for instrument: {instrument_code}")
35
+
36
+
37
+ class IBKRCostDataSource(CostDataSource):
38
+ """
39
+ Stub for future IBKR API integration.
40
+ """
41
+
42
+ def get_cost_config(self, instrument_code: str) -> InstrumentCostConfig:
43
+ # TODO: Implement IBKR API calls to fetch real-time/historical spreads and commissions
44
+ raise NotImplementedError(
45
+ "IBKR API integration not yet implemented. Please use --config for now."
46
+ )
@@ -0,0 +1,10 @@
1
+ Metadata-Version: 2.4
2
+ Name: quantlib-st
3
+ Version: 0.1.0
4
+ Summary: quantlib-st - a quantitative finance library and cli for systematic trading
5
+ Requires-Python: >=3.12
6
+ Requires-Dist: pandas>=2.3.3
7
+ Requires-Dist: numpy>=2.4.0
8
+ Dynamic: requires-dist
9
+ Dynamic: requires-python
10
+ Dynamic: summary
@@ -0,0 +1,25 @@
1
+ README.md
2
+ setup.py
3
+ cli/__init__.py
4
+ cli/__main__.py
5
+ cli/corr_cmd.py
6
+ cli/costs_cmd.py
7
+ cli/main.py
8
+ correlation/__init__.py
9
+ correlation/correlation_over_time.py
10
+ correlation/exponential_correlation.py
11
+ correlation/fitting_dates.py
12
+ costs/__init__.py
13
+ costs/calculator.py
14
+ costs/config.py
15
+ costs/data_source.py
16
+ quantlib_st.egg-info/PKG-INFO
17
+ quantlib_st.egg-info/SOURCES.txt
18
+ quantlib_st.egg-info/dependency_links.txt
19
+ quantlib_st.egg-info/entry_points.txt
20
+ quantlib_st.egg-info/requires.txt
21
+ quantlib_st.egg-info/top_level.txt
22
+ tests/__init__.py
23
+ tests/correlation/__init__.py
24
+ tests/correlation/test_correlation.py
25
+ tests/correlation/test_fitting_dates.py
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ quantlib- = cli.main:main
@@ -0,0 +1,2 @@
1
+ pandas>=2.3.3
2
+ numpy>=2.4.0
@@ -0,0 +1,4 @@
1
+ cli
2
+ correlation
3
+ costs
4
+ tests
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,19 @@
1
+ from setuptools import setup, find_packages
2
+
3
+
4
+ setup(
5
+ name="quantlib-st",
6
+ version="0.1.0",
7
+ description="quantlib-st - a quantitative finance library and cli for systematic trading",
8
+ packages=find_packages(),
9
+ python_requires=">=3.12",
10
+ install_requires=[
11
+ "pandas>=2.3.3",
12
+ "numpy>=2.4.0",
13
+ ],
14
+ entry_points={
15
+ "console_scripts": [
16
+ "quantlib-=cli.main:main",
17
+ ]
18
+ },
19
+ )
File without changes
File without changes
@@ -0,0 +1,75 @@
1
+ import datetime
2
+ import numpy as np
3
+ import pandas as pd
4
+
5
+ from correlation.exponential_correlation import (
6
+ ExponentialCorrelationResults,
7
+ CorrelationEstimate,
8
+ )
9
+
10
+
11
+ def test_exponential_correlation_results_structure():
12
+ # Create dummy returns for 3 instruments over 5 days
13
+ dates = pd.date_range("2023-01-01", periods=5)
14
+ data = pd.DataFrame(np.random.randn(5, 3), index=dates, columns=["A", "B", "C"])
15
+
16
+ # Initialize with a small lookback for testing
17
+ results = ExponentialCorrelationResults(data, ew_lookback=5, min_periods=2)
18
+
19
+ # 1. Understanding _raw_correlations
20
+ # When we call .corr(pairwise=True) on an EWM object, pandas returns a MultiIndex DataFrame.
21
+ # Level 0: Date
22
+ # Level 1: Instrument name
23
+ raw = results.raw_correlations
24
+
25
+ assert isinstance(raw, pd.DataFrame)
26
+ # For 5 days and 3 instruments, we expect 5 * 3 = 15 rows
27
+ assert len(raw) == 15
28
+ # The columns should match the instrument names
29
+ assert list(raw.columns) == ["A", "B", "C"]
30
+
31
+ # Check the first date's matrix (it should be NaN if min_periods=2)
32
+ first_date = dates[0]
33
+ matrix_at_t0 = raw.loc[first_date]
34
+ assert matrix_at_t0.isna().all().all()
35
+
36
+ # Check the third date's matrix (should have values)
37
+ third_date = dates[2]
38
+ matrix_at_t2 = raw.loc[third_date]
39
+ assert not matrix_at_t2.isna().any().any()
40
+ assert matrix_at_t2.loc["A", "A"] == 1.0 # Diagonal is always 1.0
41
+
42
+
43
+ def test_last_valid_cor_matrix_for_date():
44
+ dates = pd.date_range("2023-01-01", periods=10)
45
+ data = pd.DataFrame(np.random.randn(10, 2), index=dates, columns=["X", "Y"])
46
+
47
+ results = ExponentialCorrelationResults(data, ew_lookback=5, min_periods=2)
48
+
49
+ # Test retrieving matrix for a specific point in time
50
+ # If we ask for 2023-01-05, it should give us the matrix from the last available date BEFORE 2023-01-05
51
+ target_date = datetime.datetime(2023, 1, 5)
52
+ estimate = results.last_valid_cor_matrix_for_date(target_date)
53
+
54
+ assert isinstance(estimate, CorrelationEstimate)
55
+ assert estimate.columns == ["X", "Y"]
56
+ assert estimate.values.shape == (2, 2)
57
+
58
+ # The values should match the raw correlation at 2023-01-04
59
+ expected_values = np.asarray(
60
+ results.raw_correlations.loc[pd.Timestamp("2023-01-04")].values
61
+ )
62
+ np.testing.assert_array_almost_equal(estimate.values, expected_values)
63
+
64
+
65
+ def test_last_valid_cor_matrix_no_data():
66
+ # Test what happens if we ask for a date before any data exists
67
+ dates = pd.date_range("2023-01-10", periods=5)
68
+ data = pd.DataFrame(np.random.randn(5, 2), index=dates, columns=["X", "Y"])
69
+ results = ExponentialCorrelationResults(data)
70
+
71
+ early_date = datetime.datetime(2023, 1, 1)
72
+ estimate = results.last_valid_cor_matrix_for_date(early_date)
73
+
74
+ # Should return a matrix of NaNs
75
+ assert np.isnan(estimate.values).all()
@@ -0,0 +1,54 @@
1
+ import pandas as pd
2
+
3
+ from correlation.fitting_dates import (
4
+ generate_fitting_dates,
5
+ IN_SAMPLE,
6
+ EXPANDING,
7
+ ROLLING,
8
+ )
9
+
10
+
11
+ def _make_df(start: str, end: str, freq: str = "M") -> pd.DataFrame:
12
+ idx = pd.date_range(start, end, freq=freq)
13
+ return pd.DataFrame(index=idx, data={"x": 0})
14
+
15
+
16
+ def test_in_sample_returns_single_period():
17
+ df = _make_df("2020-01-01", "2022-12-31")
18
+ periods = generate_fitting_dates(df, date_method=IN_SAMPLE)
19
+
20
+ assert len(periods) == 1
21
+ p = periods[0]
22
+ assert p.fit_start == df.index[0]
23
+ assert p.fit_end == df.index[-1]
24
+ assert p.period_start == df.index[0]
25
+ assert p.period_end == df.index[-1]
26
+ assert p.no_data is False
27
+
28
+
29
+ def test_expanding_uses_initial_start_for_estimation():
30
+ df = _make_df("2018-01-01", "2022-12-31")
31
+ periods = generate_fitting_dates(
32
+ df, date_method=EXPANDING, interval_frequency="12M"
33
+ )
34
+
35
+ # There should be at least one real (non-no-data) period
36
+ non_empty = [p for p in periods if not p.no_data]
37
+ assert len(non_empty) >= 1
38
+
39
+ # For expanding, all real periods should use the original start as fit_start
40
+ assert all(p.fit_start == df.index[0] for p in non_empty)
41
+
42
+
43
+ def test_rolling_moves_fit_start_forward():
44
+ df = _make_df("2018-01-01", "2022-12-31")
45
+ # use a small rollyears so the fit_start should advance away from the original start
46
+ periods = generate_fitting_dates(
47
+ df, date_method=ROLLING, rollyears=1, interval_frequency="12M"
48
+ )
49
+
50
+ non_empty = [p for p in periods if not p.no_data]
51
+ assert len(non_empty) >= 2
52
+
53
+ # At least one period should have a fit_start later than the original data start
54
+ assert any(p.fit_start > df.index[0] for p in non_empty)