quantlib-st 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- quantlib_st-0.1.0/PKG-INFO +10 -0
- quantlib_st-0.1.0/README.md +25 -0
- quantlib_st-0.1.0/cli/__init__.py +3 -0
- quantlib_st-0.1.0/cli/__main__.py +5 -0
- quantlib_st-0.1.0/cli/corr_cmd.py +132 -0
- quantlib_st-0.1.0/cli/costs_cmd.py +125 -0
- quantlib_st-0.1.0/cli/main.py +29 -0
- quantlib_st-0.1.0/correlation/__init__.py +0 -0
- quantlib_st-0.1.0/correlation/correlation_over_time.py +143 -0
- quantlib_st-0.1.0/correlation/exponential_correlation.py +115 -0
- quantlib_st-0.1.0/correlation/fitting_dates.py +120 -0
- quantlib_st-0.1.0/costs/__init__.py +1 -0
- quantlib_st-0.1.0/costs/calculator.py +118 -0
- quantlib_st-0.1.0/costs/config.py +24 -0
- quantlib_st-0.1.0/costs/data_source.py +46 -0
- quantlib_st-0.1.0/quantlib_st.egg-info/PKG-INFO +10 -0
- quantlib_st-0.1.0/quantlib_st.egg-info/SOURCES.txt +25 -0
- quantlib_st-0.1.0/quantlib_st.egg-info/dependency_links.txt +1 -0
- quantlib_st-0.1.0/quantlib_st.egg-info/entry_points.txt +2 -0
- quantlib_st-0.1.0/quantlib_st.egg-info/requires.txt +2 -0
- quantlib_st-0.1.0/quantlib_st.egg-info/top_level.txt +4 -0
- quantlib_st-0.1.0/setup.cfg +4 -0
- quantlib_st-0.1.0/setup.py +19 -0
- quantlib_st-0.1.0/tests/__init__.py +0 -0
- quantlib_st-0.1.0/tests/correlation/__init__.py +0 -0
- quantlib_st-0.1.0/tests/correlation/test_correlation.py +75 -0
- quantlib_st-0.1.0/tests/correlation/test_fitting_dates.py +54 -0
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: quantlib-st
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: quantlib-st - a quantitative finance library and cli for systematic trading
|
|
5
|
+
Requires-Python: >=3.12
|
|
6
|
+
Requires-Dist: pandas>=2.3.3
|
|
7
|
+
Requires-Dist: numpy>=2.4.0
|
|
8
|
+
Dynamic: requires-dist
|
|
9
|
+
Dynamic: requires-python
|
|
10
|
+
Dynamic: summary
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# quantlib
|
|
2
|
+
|
|
3
|
+
Minimal, self-contained CLI tools and library for quantitative finance.
|
|
4
|
+
|
|
5
|
+
## Subcommands
|
|
6
|
+
|
|
7
|
+
- **[corr](correlation/README.md)**: Compute correlation matrices over time from returns.
|
|
8
|
+
- **[costs](costs/README.md)**: Calculate Sharpe Ratio (SR) costs for instruments based on spread and fees.
|
|
9
|
+
|
|
10
|
+
## Install (editable - for developers)
|
|
11
|
+
|
|
12
|
+
From the repo root:
|
|
13
|
+
|
|
14
|
+
- `cd quantlib`
|
|
15
|
+
- `python -m pip install install -e .`
|
|
16
|
+
|
|
17
|
+
This installs the `quantlib` command.
|
|
18
|
+
|
|
19
|
+
## Build a single binary with PyInstaller
|
|
20
|
+
|
|
21
|
+
From `quantlib/`:
|
|
22
|
+
|
|
23
|
+
- `make build`
|
|
24
|
+
|
|
25
|
+
Binary will be at `dist/quantlib`.
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import json
|
|
5
|
+
import sys
|
|
6
|
+
from io import StringIO
|
|
7
|
+
|
|
8
|
+
import pandas as pd
|
|
9
|
+
|
|
10
|
+
from correlation.correlation_over_time import (
|
|
11
|
+
correlation_over_time_for_returns,
|
|
12
|
+
correlation_list_to_jsonable,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def add_corr_subcommand(subparsers: argparse._SubParsersAction) -> None:
|
|
17
|
+
parser = subparsers.add_parser(
|
|
18
|
+
"corr",
|
|
19
|
+
help="Compute correlations over time from CSV piped on stdin (outputs JSON).",
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
parser.add_argument(
|
|
23
|
+
"--frequency",
|
|
24
|
+
default="D",
|
|
25
|
+
help="Resample frequency before correlation (default: D). Use W if you want weekly.",
|
|
26
|
+
)
|
|
27
|
+
parser.add_argument(
|
|
28
|
+
"--date-method",
|
|
29
|
+
default="in_sample",
|
|
30
|
+
choices=["expanding", "rolling", "in_sample"],
|
|
31
|
+
help="How to choose the fit window over time (default: in_sample)",
|
|
32
|
+
)
|
|
33
|
+
parser.add_argument(
|
|
34
|
+
"--rollyears",
|
|
35
|
+
type=int,
|
|
36
|
+
default=20,
|
|
37
|
+
help="Rolling years (used only if --date-method rolling; default: 20)",
|
|
38
|
+
)
|
|
39
|
+
parser.add_argument(
|
|
40
|
+
"--interval-frequency",
|
|
41
|
+
default="12M",
|
|
42
|
+
help="How often to emit a new correlation matrix (default: 12M)",
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
parser.add_argument(
|
|
46
|
+
"--using-exponent",
|
|
47
|
+
action=argparse.BooleanOptionalAction,
|
|
48
|
+
default=True,
|
|
49
|
+
help="Use EWMA correlation (default: true)",
|
|
50
|
+
)
|
|
51
|
+
parser.add_argument(
|
|
52
|
+
"--ew-lookback",
|
|
53
|
+
type=int,
|
|
54
|
+
default=250,
|
|
55
|
+
help="EWMA span/lookback (default: 250)",
|
|
56
|
+
)
|
|
57
|
+
parser.add_argument(
|
|
58
|
+
"--min-periods",
|
|
59
|
+
type=int,
|
|
60
|
+
default=20,
|
|
61
|
+
help="Minimum observations before correlations appear (default: 20)",
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
parser.add_argument(
|
|
65
|
+
"--floor-at-zero",
|
|
66
|
+
action=argparse.BooleanOptionalAction,
|
|
67
|
+
default=True,
|
|
68
|
+
help="Floor negative correlations at 0 (default: true)",
|
|
69
|
+
)
|
|
70
|
+
parser.add_argument(
|
|
71
|
+
"--clip",
|
|
72
|
+
type=float,
|
|
73
|
+
default=None,
|
|
74
|
+
help="Optional absolute clip value for correlations (e.g. 0.9)",
|
|
75
|
+
)
|
|
76
|
+
parser.add_argument(
|
|
77
|
+
"--shrinkage",
|
|
78
|
+
type=float,
|
|
79
|
+
default=0.0,
|
|
80
|
+
help="Optional shrinkage-to-average in [0,1] (default: 0)",
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
parser.add_argument(
|
|
84
|
+
"--forward-fill-price-index",
|
|
85
|
+
action=argparse.BooleanOptionalAction,
|
|
86
|
+
default=True,
|
|
87
|
+
help="Forward fill the synthetic price index before resampling (default: true)",
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
parser.add_argument(
|
|
91
|
+
"--index-col",
|
|
92
|
+
type=int,
|
|
93
|
+
default=0,
|
|
94
|
+
help="Which CSV column is the datetime index (default: 0)",
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
parser.set_defaults(_handler=run_corr)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def run_corr(args: argparse.Namespace) -> int:
|
|
101
|
+
csv_text = sys.stdin.read()
|
|
102
|
+
if not csv_text.strip():
|
|
103
|
+
print(json.dumps({"error": "no input on stdin"}), file=sys.stderr)
|
|
104
|
+
return 2
|
|
105
|
+
|
|
106
|
+
try:
|
|
107
|
+
df = pd.read_csv(StringIO(csv_text), index_col=args.index_col, parse_dates=True)
|
|
108
|
+
except Exception as e:
|
|
109
|
+
print(json.dumps({"error": f"failed to parse CSV: {e}"}), file=sys.stderr)
|
|
110
|
+
return 2
|
|
111
|
+
|
|
112
|
+
df = df.sort_index()
|
|
113
|
+
|
|
114
|
+
corr_list = correlation_over_time_for_returns(
|
|
115
|
+
df,
|
|
116
|
+
frequency=args.frequency,
|
|
117
|
+
forward_fill_price_index=args.forward_fill_price_index,
|
|
118
|
+
date_method=args.date_method,
|
|
119
|
+
rollyears=args.rollyears,
|
|
120
|
+
interval_frequency=args.interval_frequency,
|
|
121
|
+
using_exponent=args.using_exponent,
|
|
122
|
+
ew_lookback=args.ew_lookback,
|
|
123
|
+
min_periods=args.min_periods,
|
|
124
|
+
floor_at_zero=args.floor_at_zero,
|
|
125
|
+
clip=args.clip,
|
|
126
|
+
shrinkage=args.shrinkage,
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
out = correlation_list_to_jsonable(corr_list)
|
|
130
|
+
sys.stdout.write(json.dumps(out))
|
|
131
|
+
sys.stdout.write("\n")
|
|
132
|
+
return 0
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import json
|
|
5
|
+
import sys
|
|
6
|
+
import pandas as pd
|
|
7
|
+
from io import StringIO
|
|
8
|
+
|
|
9
|
+
from costs.data_source import ConfigFileCostDataSource, IBKRCostDataSource
|
|
10
|
+
from costs.calculator import (
|
|
11
|
+
calculate_sr_cost,
|
|
12
|
+
calculate_annualized_volatility,
|
|
13
|
+
calculate_recent_average_price,
|
|
14
|
+
calculate_cost_percentage_terms,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def add_costs_subcommand(subparsers: argparse._SubParsersAction) -> None:
|
|
19
|
+
parser = subparsers.add_parser(
|
|
20
|
+
"costs",
|
|
21
|
+
help="Calculate SR costs for an instrument from price CSV piped on stdin or provided via file.",
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
parser.add_argument(
|
|
25
|
+
"--instrument",
|
|
26
|
+
required=True,
|
|
27
|
+
help="Instrument code (e.g., ES, GC).",
|
|
28
|
+
)
|
|
29
|
+
parser.add_argument(
|
|
30
|
+
"--config",
|
|
31
|
+
help="Path to JSON file containing instrument cost configuration.",
|
|
32
|
+
)
|
|
33
|
+
parser.add_argument(
|
|
34
|
+
"--use-ibkr",
|
|
35
|
+
action="store_true",
|
|
36
|
+
help="Use IBKR API for cost data (currently a stub).",
|
|
37
|
+
)
|
|
38
|
+
parser.add_argument(
|
|
39
|
+
"--vol",
|
|
40
|
+
type=float,
|
|
41
|
+
help="Override annualized volatility (as a decimal, e.g., 0.15 for 15%%).",
|
|
42
|
+
)
|
|
43
|
+
parser.add_argument(
|
|
44
|
+
"--price",
|
|
45
|
+
type=float,
|
|
46
|
+
help="Override current price (otherwise uses the last price in the CSV).",
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
parser.set_defaults(_handler=handle_costs)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def handle_costs(args: argparse.Namespace) -> int:
|
|
53
|
+
# 1. Get Cost Config
|
|
54
|
+
if args.use_ibkr:
|
|
55
|
+
data_source = IBKRCostDataSource()
|
|
56
|
+
elif args.config:
|
|
57
|
+
data_source = ConfigFileCostDataSource(args.config)
|
|
58
|
+
else:
|
|
59
|
+
print("Error: Must provide either --config or --use-ibkr", file=sys.stderr)
|
|
60
|
+
return 1
|
|
61
|
+
|
|
62
|
+
try:
|
|
63
|
+
cost_config = data_source.get_cost_config(args.instrument)
|
|
64
|
+
except Exception as e:
|
|
65
|
+
print(f"Error fetching cost config: {e}", file=sys.stderr)
|
|
66
|
+
return 1
|
|
67
|
+
|
|
68
|
+
# 2. Get Price Data
|
|
69
|
+
if not sys.stdin.isatty():
|
|
70
|
+
# Read from stdin
|
|
71
|
+
input_data = sys.stdin.read()
|
|
72
|
+
df = pd.read_csv(StringIO(input_data), index_col=0, parse_dates=True)
|
|
73
|
+
else:
|
|
74
|
+
# If no stdin, we need at least --price and --vol if we want to calculate anything
|
|
75
|
+
df = pd.DataFrame()
|
|
76
|
+
|
|
77
|
+
if df.empty and (args.price is None or args.vol is None):
|
|
78
|
+
print(
|
|
79
|
+
"Error: Must pipe price CSV to stdin or provide both --price and --vol overrides.",
|
|
80
|
+
file=sys.stderr,
|
|
81
|
+
)
|
|
82
|
+
return 1
|
|
83
|
+
|
|
84
|
+
# 3. Determine Price and Volatility
|
|
85
|
+
if args.price is not None:
|
|
86
|
+
average_price = float(args.price)
|
|
87
|
+
else:
|
|
88
|
+
# Use average price over the last year (256 days)
|
|
89
|
+
average_price = calculate_recent_average_price(df.iloc[:, 0])
|
|
90
|
+
|
|
91
|
+
if args.vol is not None:
|
|
92
|
+
# If user provides --vol, we assume it's annualized volatility in price units
|
|
93
|
+
ann_stdev_price_units = float(args.vol)
|
|
94
|
+
else:
|
|
95
|
+
# Calculate annualized volatility in price units (average over last year)
|
|
96
|
+
ann_stdev_price_units = float(calculate_annualized_volatility(df.iloc[:, 0]))
|
|
97
|
+
|
|
98
|
+
# 4. Calculate Costs
|
|
99
|
+
sr_cost = float(
|
|
100
|
+
calculate_sr_cost(
|
|
101
|
+
cost_config,
|
|
102
|
+
price=average_price,
|
|
103
|
+
ann_stdev_price_units=ann_stdev_price_units,
|
|
104
|
+
)
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
pct_cost = float(
|
|
108
|
+
calculate_cost_percentage_terms(
|
|
109
|
+
cost_config,
|
|
110
|
+
blocks_traded=1.0,
|
|
111
|
+
price=average_price,
|
|
112
|
+
)
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
# 5. Output Results
|
|
116
|
+
result = {
|
|
117
|
+
"instrument": args.instrument,
|
|
118
|
+
"average_price": round(average_price, 4),
|
|
119
|
+
"ann_stdev_price_units": round(ann_stdev_price_units, 4),
|
|
120
|
+
"sr_cost": round(sr_cost, 5),
|
|
121
|
+
"percentage_cost": round(pct_cost, 6),
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
print(json.dumps(result, indent=2))
|
|
125
|
+
return 0
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
|
|
5
|
+
from cli.corr_cmd import add_corr_subcommand
|
|
6
|
+
from cli.costs_cmd import add_costs_subcommand
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def main(argv: list[str] | None = None) -> int:
|
|
10
|
+
parser = argparse.ArgumentParser(
|
|
11
|
+
prog="quantlib",
|
|
12
|
+
description="quantlib CLI (corr is the first subcommand; more will be added).",
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
subparsers = parser.add_subparsers(dest="subcommand", required=True)
|
|
16
|
+
|
|
17
|
+
add_corr_subcommand(subparsers)
|
|
18
|
+
add_costs_subcommand(subparsers)
|
|
19
|
+
|
|
20
|
+
args = parser.parse_args(argv)
|
|
21
|
+
|
|
22
|
+
# Dispatch
|
|
23
|
+
if args.subcommand == "corr":
|
|
24
|
+
return args._handler(args)
|
|
25
|
+
elif args.subcommand == "costs":
|
|
26
|
+
return args._handler(args)
|
|
27
|
+
|
|
28
|
+
parser.error(f"Unknown subcommand: {args.subcommand}")
|
|
29
|
+
return 2
|
|
File without changes
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import datetime
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
|
|
6
|
+
import pandas as pd
|
|
7
|
+
|
|
8
|
+
from .fitting_dates import generate_fitting_dates, listOfFittingDates
|
|
9
|
+
from .exponential_correlation import (
|
|
10
|
+
CorrelationEstimate,
|
|
11
|
+
ExponentialCorrelationResults,
|
|
12
|
+
create_boring_corr_matrix,
|
|
13
|
+
modify_correlation,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@dataclass
|
|
18
|
+
class CorrelationList:
|
|
19
|
+
corr_list: list[CorrelationEstimate]
|
|
20
|
+
column_names: list[str]
|
|
21
|
+
fit_dates: listOfFittingDates
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def correlation_over_time_for_returns(
|
|
25
|
+
returns_for_correlation: pd.DataFrame,
|
|
26
|
+
frequency: str = "D",
|
|
27
|
+
forward_fill_price_index: bool = True,
|
|
28
|
+
**kwargs,
|
|
29
|
+
) -> CorrelationList:
|
|
30
|
+
# Build a synthetic price index from returns, resample, then diff.
|
|
31
|
+
# For daily frequency, this is essentially a no-op aside from losing the first row.
|
|
32
|
+
index_prices_for_correlation = returns_for_correlation.cumsum()
|
|
33
|
+
if forward_fill_price_index:
|
|
34
|
+
index_prices_for_correlation = index_prices_for_correlation.ffill()
|
|
35
|
+
|
|
36
|
+
index_prices_for_correlation = index_prices_for_correlation.resample(frequency).last()
|
|
37
|
+
returns_for_correlation = index_prices_for_correlation.diff()
|
|
38
|
+
|
|
39
|
+
return correlation_over_time(returns_for_correlation, **kwargs)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def correlation_over_time(
|
|
43
|
+
data_for_correlation: pd.DataFrame,
|
|
44
|
+
date_method: str = "in_sample",
|
|
45
|
+
rollyears: int = 20,
|
|
46
|
+
interval_frequency: str = "12M",
|
|
47
|
+
using_exponent: bool = True,
|
|
48
|
+
ew_lookback: int = 250,
|
|
49
|
+
min_periods: int = 20,
|
|
50
|
+
no_data_offdiag: float = 0.99,
|
|
51
|
+
floor_at_zero: bool = True,
|
|
52
|
+
clip: float | None = None,
|
|
53
|
+
shrinkage: float = 0.0,
|
|
54
|
+
) -> CorrelationList:
|
|
55
|
+
column_names = list(data_for_correlation.columns)
|
|
56
|
+
|
|
57
|
+
fit_dates = generate_fitting_dates(
|
|
58
|
+
data_for_correlation,
|
|
59
|
+
date_method=date_method,
|
|
60
|
+
rollyears=rollyears,
|
|
61
|
+
interval_frequency=interval_frequency,
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
corr_list: list[CorrelationEstimate] = []
|
|
65
|
+
|
|
66
|
+
if using_exponent:
|
|
67
|
+
results = ExponentialCorrelationResults(
|
|
68
|
+
data_for_correlation, ew_lookback=ew_lookback, min_periods=min_periods
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
for fit_period in fit_dates:
|
|
72
|
+
if getattr(fit_period, "no_data", False):
|
|
73
|
+
corr_list.append(
|
|
74
|
+
create_boring_corr_matrix(
|
|
75
|
+
len(column_names), column_names, offdiag=no_data_offdiag
|
|
76
|
+
)
|
|
77
|
+
)
|
|
78
|
+
continue
|
|
79
|
+
|
|
80
|
+
corr = results.last_valid_cor_matrix_for_date(fit_period.fit_end)
|
|
81
|
+
if pd.isna(corr.values).all():
|
|
82
|
+
corr = create_boring_corr_matrix(
|
|
83
|
+
len(column_names), column_names, offdiag=no_data_offdiag
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
corr = modify_correlation(
|
|
87
|
+
corr,
|
|
88
|
+
floor_at_zero=floor_at_zero,
|
|
89
|
+
clip_value=clip,
|
|
90
|
+
shrinkage=shrinkage,
|
|
91
|
+
)
|
|
92
|
+
corr_list.append(corr)
|
|
93
|
+
|
|
94
|
+
else:
|
|
95
|
+
for fit_period in fit_dates:
|
|
96
|
+
if getattr(fit_period, "no_data", False):
|
|
97
|
+
corr_list.append(
|
|
98
|
+
create_boring_corr_matrix(
|
|
99
|
+
len(column_names), column_names, offdiag=no_data_offdiag
|
|
100
|
+
)
|
|
101
|
+
)
|
|
102
|
+
continue
|
|
103
|
+
|
|
104
|
+
sub = data_for_correlation.loc[fit_period.fit_start : fit_period.fit_end]
|
|
105
|
+
corr_pd = sub.corr()
|
|
106
|
+
corr = CorrelationEstimate(values=corr_pd.values, columns=column_names)
|
|
107
|
+
corr = modify_correlation(
|
|
108
|
+
corr,
|
|
109
|
+
floor_at_zero=floor_at_zero,
|
|
110
|
+
clip_value=clip,
|
|
111
|
+
shrinkage=shrinkage,
|
|
112
|
+
)
|
|
113
|
+
corr_list.append(corr)
|
|
114
|
+
|
|
115
|
+
return CorrelationList(corr_list=corr_list, column_names=column_names, fit_dates=fit_dates)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def correlation_list_to_jsonable(corr_list: CorrelationList) -> dict:
|
|
119
|
+
periods = []
|
|
120
|
+
for fit_period, corr in zip(corr_list.fit_dates, corr_list.corr_list):
|
|
121
|
+
periods.append(
|
|
122
|
+
{
|
|
123
|
+
"fit_start": _dt_to_iso(fit_period.fit_start),
|
|
124
|
+
"fit_end": _dt_to_iso(fit_period.fit_end),
|
|
125
|
+
"period_start": _dt_to_iso(fit_period.period_start),
|
|
126
|
+
"period_end": _dt_to_iso(fit_period.period_end),
|
|
127
|
+
"no_data": bool(getattr(fit_period, "no_data", False)),
|
|
128
|
+
"correlation": corr.as_dict(),
|
|
129
|
+
}
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
return {
|
|
133
|
+
"columns": list(corr_list.column_names),
|
|
134
|
+
"periods": periods,
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def _dt_to_iso(dt: datetime.datetime) -> str:
|
|
139
|
+
if isinstance(dt, pd.Timestamp):
|
|
140
|
+
dt = dt.to_pydatetime()
|
|
141
|
+
if dt.tzinfo is not None:
|
|
142
|
+
return dt.isoformat()
|
|
143
|
+
return dt.replace(tzinfo=datetime.timezone.utc).isoformat()
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import datetime
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
|
|
6
|
+
import numpy as np
|
|
7
|
+
import pandas as pd
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class CorrelationEstimate:
|
|
12
|
+
values: np.ndarray
|
|
13
|
+
columns: list[str]
|
|
14
|
+
|
|
15
|
+
def as_dict(self) -> dict:
|
|
16
|
+
return {
|
|
17
|
+
"columns": list(self.columns),
|
|
18
|
+
"values": self.values.tolist(),
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
def floor_at_zero(self) -> "CorrelationEstimate":
|
|
22
|
+
values = self.values.copy()
|
|
23
|
+
values[values < 0.0] = 0.0
|
|
24
|
+
np.fill_diagonal(values, 1.0)
|
|
25
|
+
return CorrelationEstimate(values=values, columns=self.columns)
|
|
26
|
+
|
|
27
|
+
def clip(self, clip_value: float | None) -> "CorrelationEstimate":
|
|
28
|
+
if clip_value is None:
|
|
29
|
+
return self
|
|
30
|
+
clip_value = abs(float(clip_value))
|
|
31
|
+
values = self.values.copy()
|
|
32
|
+
values[values < -clip_value] = -clip_value
|
|
33
|
+
values[values > clip_value] = clip_value
|
|
34
|
+
np.fill_diagonal(values, 1.0)
|
|
35
|
+
return CorrelationEstimate(values=values, columns=self.columns)
|
|
36
|
+
|
|
37
|
+
def shrink_to_average(self, shrinkage: float) -> "CorrelationEstimate":
|
|
38
|
+
shrinkage = float(shrinkage)
|
|
39
|
+
if shrinkage <= 0.0:
|
|
40
|
+
return self
|
|
41
|
+
if shrinkage >= 1.0:
|
|
42
|
+
shrinkage = 1.0
|
|
43
|
+
|
|
44
|
+
values = self.values.copy()
|
|
45
|
+
vals = values.copy()
|
|
46
|
+
np.fill_diagonal(vals, np.nan)
|
|
47
|
+
avg = np.nanmean(vals)
|
|
48
|
+
if np.isnan(avg):
|
|
49
|
+
return self
|
|
50
|
+
|
|
51
|
+
prior = np.full_like(values, avg, dtype=float)
|
|
52
|
+
np.fill_diagonal(prior, 1.0)
|
|
53
|
+
shrunk = shrinkage * prior + (1.0 - shrinkage) * values
|
|
54
|
+
np.fill_diagonal(shrunk, 1.0)
|
|
55
|
+
return CorrelationEstimate(values=shrunk, columns=self.columns)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def modify_correlation(
|
|
59
|
+
corr: CorrelationEstimate,
|
|
60
|
+
*,
|
|
61
|
+
floor_at_zero: bool = True,
|
|
62
|
+
shrinkage: float = 0.0,
|
|
63
|
+
clip_value: float | None = None,
|
|
64
|
+
) -> CorrelationEstimate:
|
|
65
|
+
if floor_at_zero:
|
|
66
|
+
corr = corr.floor_at_zero()
|
|
67
|
+
corr = corr.clip(clip_value)
|
|
68
|
+
if shrinkage and shrinkage > 0.0:
|
|
69
|
+
corr = corr.shrink_to_average(shrinkage)
|
|
70
|
+
return corr
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def create_boring_corr_matrix(
|
|
74
|
+
size: int, columns: list[str], offdiag: float = 0.99
|
|
75
|
+
) -> CorrelationEstimate:
|
|
76
|
+
values = np.full((size, size), offdiag, dtype=float)
|
|
77
|
+
np.fill_diagonal(values, 1.0)
|
|
78
|
+
return CorrelationEstimate(values=values, columns=columns)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
class ExponentialCorrelationResults:
|
|
82
|
+
def __init__(
|
|
83
|
+
self, data_for_correlation: pd.DataFrame, ew_lookback: int = 250, min_periods: int = 20
|
|
84
|
+
):
|
|
85
|
+
self._columns = list(data_for_correlation.columns)
|
|
86
|
+
self._raw_correlations = data_for_correlation.ewm(
|
|
87
|
+
span=ew_lookback, min_periods=min_periods, ignore_na=True
|
|
88
|
+
).corr(pairwise=True)
|
|
89
|
+
|
|
90
|
+
@property
|
|
91
|
+
def raw_correlations(self) -> pd.DataFrame:
|
|
92
|
+
return self._raw_correlations
|
|
93
|
+
|
|
94
|
+
@property
|
|
95
|
+
def columns(self) -> list[str]:
|
|
96
|
+
return self._columns
|
|
97
|
+
|
|
98
|
+
def last_valid_cor_matrix_for_date(self, date_point: datetime.datetime) -> CorrelationEstimate:
|
|
99
|
+
return last_valid_cor_matrix_for_date(self.raw_correlations, self.columns, date_point)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def last_valid_cor_matrix_for_date(
|
|
103
|
+
raw_correlations: pd.DataFrame, columns: list[str], date_point: datetime.datetime
|
|
104
|
+
) -> CorrelationEstimate:
|
|
105
|
+
size_of_matrix = len(columns)
|
|
106
|
+
subset = raw_correlations[raw_correlations.index.get_level_values(0) < date_point]
|
|
107
|
+
|
|
108
|
+
if subset.shape[0] < size_of_matrix:
|
|
109
|
+
return CorrelationEstimate(
|
|
110
|
+
values=np.full((size_of_matrix, size_of_matrix), np.nan),
|
|
111
|
+
columns=columns,
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
corr_matrix_values = subset.tail(size_of_matrix).values
|
|
115
|
+
return CorrelationEstimate(values=corr_matrix_values, columns=columns)
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import datetime
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from typing import List
|
|
6
|
+
|
|
7
|
+
import pandas as pd
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
IN_SAMPLE = "in_sample"
|
|
11
|
+
ROLLING = "rolling"
|
|
12
|
+
EXPANDING = "expanding"
|
|
13
|
+
|
|
14
|
+
POSSIBLE_DATE_METHODS = [IN_SAMPLE, ROLLING, EXPANDING]
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@dataclass
|
|
18
|
+
class fitDates:
|
|
19
|
+
fit_start: datetime.datetime
|
|
20
|
+
fit_end: datetime.datetime
|
|
21
|
+
period_start: datetime.datetime
|
|
22
|
+
period_end: datetime.datetime
|
|
23
|
+
no_data: bool = False
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class listOfFittingDates(list):
|
|
27
|
+
def list_of_starting_periods(self) -> list:
|
|
28
|
+
return [period.period_start for period in self]
|
|
29
|
+
|
|
30
|
+
def index_of_most_recent_period_before_relevant_date(
|
|
31
|
+
self, relevant_date: datetime.datetime
|
|
32
|
+
):
|
|
33
|
+
index = []
|
|
34
|
+
list_of_start_periods = self.list_of_starting_periods()
|
|
35
|
+
if relevant_date < list_of_start_periods[0]:
|
|
36
|
+
raise Exception(f"Date {relevant_date} is before first fitting date")
|
|
37
|
+
|
|
38
|
+
for index, start_date in enumerate(list_of_start_periods):
|
|
39
|
+
if relevant_date < start_date:
|
|
40
|
+
return index - 1
|
|
41
|
+
|
|
42
|
+
return index
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def generate_fitting_dates(
|
|
46
|
+
data: pd.DataFrame,
|
|
47
|
+
date_method: str,
|
|
48
|
+
rollyears: int = 20,
|
|
49
|
+
interval_frequency: str = "12M",
|
|
50
|
+
) -> listOfFittingDates:
|
|
51
|
+
if date_method not in POSSIBLE_DATE_METHODS:
|
|
52
|
+
raise ValueError(
|
|
53
|
+
f"Unknown date_method={date_method}; expected one of {POSSIBLE_DATE_METHODS}"
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
start_date = data.index[0]
|
|
57
|
+
end_date = data.index[-1]
|
|
58
|
+
|
|
59
|
+
if date_method == IN_SAMPLE:
|
|
60
|
+
return listOfFittingDates(
|
|
61
|
+
[fitDates(start_date, end_date, start_date, end_date, no_data=False)]
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
boundaries = _list_of_starting_dates_per_period(
|
|
65
|
+
start_date, end_date, interval_frequency=interval_frequency
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
# Short history: fall back to a single in-sample period.
|
|
69
|
+
if len(boundaries) < 2:
|
|
70
|
+
return listOfFittingDates(
|
|
71
|
+
[fitDates(start_date, end_date, start_date, end_date, no_data=False)]
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
periods: List[fitDates] = []
|
|
75
|
+
for period_index in range(len(boundaries))[1:-1]:
|
|
76
|
+
period_start = boundaries[period_index]
|
|
77
|
+
period_end = boundaries[period_index + 1]
|
|
78
|
+
|
|
79
|
+
if date_method == EXPANDING:
|
|
80
|
+
fit_start = start_date
|
|
81
|
+
elif date_method == ROLLING:
|
|
82
|
+
yearidx_to_use = max(0, period_index - rollyears)
|
|
83
|
+
fit_start = boundaries[yearidx_to_use]
|
|
84
|
+
else:
|
|
85
|
+
raise ValueError(f"Unknown date_method={date_method}")
|
|
86
|
+
|
|
87
|
+
fit_end = period_start
|
|
88
|
+
periods.append(fitDates(fit_start, fit_end, period_start, period_end, no_data=False))
|
|
89
|
+
|
|
90
|
+
if date_method in [ROLLING, EXPANDING] and len(boundaries) >= 2:
|
|
91
|
+
periods = [
|
|
92
|
+
fitDates(
|
|
93
|
+
start_date,
|
|
94
|
+
start_date,
|
|
95
|
+
start_date,
|
|
96
|
+
boundaries[1],
|
|
97
|
+
no_data=True,
|
|
98
|
+
)
|
|
99
|
+
] + periods
|
|
100
|
+
|
|
101
|
+
return listOfFittingDates(periods)
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def _list_of_starting_dates_per_period(
|
|
105
|
+
start_date: datetime.datetime,
|
|
106
|
+
end_date: datetime.datetime,
|
|
107
|
+
interval_frequency: str = "12M",
|
|
108
|
+
):
|
|
109
|
+
if interval_frequency == "W":
|
|
110
|
+
use_interval_frequency = "7D"
|
|
111
|
+
elif interval_frequency == "M":
|
|
112
|
+
use_interval_frequency = "30D"
|
|
113
|
+
elif interval_frequency in ["12M", "Y"]:
|
|
114
|
+
use_interval_frequency = "365D"
|
|
115
|
+
else:
|
|
116
|
+
use_interval_frequency = interval_frequency
|
|
117
|
+
|
|
118
|
+
results = list(pd.date_range(end_date, start_date, freq="-" + use_interval_frequency))
|
|
119
|
+
results.reverse()
|
|
120
|
+
return results
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
import pandas as pd
|
|
5
|
+
from costs.config import InstrumentCostConfig
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def calculate_annualized_volatility(
|
|
9
|
+
prices: pd.Series, days_per_year: int = 256, vol_lookback: int = 35
|
|
10
|
+
) -> float:
|
|
11
|
+
"""
|
|
12
|
+
Calculate annualized volatility from a price series.
|
|
13
|
+
Matches the logic in pysystemtrade:
|
|
14
|
+
1. Calculate absolute price changes.
|
|
15
|
+
2. Calculate EWMA volatility (span=35).
|
|
16
|
+
3. Take the mean of that volatility over the last year (256 days).
|
|
17
|
+
4. Annualize it by multiplying by sqrt(256).
|
|
18
|
+
"""
|
|
19
|
+
daily_returns = prices.diff().dropna()
|
|
20
|
+
|
|
21
|
+
# EWMA volatility of absolute returns
|
|
22
|
+
daily_vol = daily_returns.ewm(span=vol_lookback, adjust=True, min_periods=10).std()
|
|
23
|
+
|
|
24
|
+
# Average over the last year (256 business days)
|
|
25
|
+
recent_daily_vol = daily_vol.tail(days_per_year).mean()
|
|
26
|
+
|
|
27
|
+
# Annualize
|
|
28
|
+
return float(recent_daily_vol * np.sqrt(days_per_year))
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def calculate_recent_average_price(
|
|
32
|
+
prices: pd.Series, days_per_year: int = 256
|
|
33
|
+
) -> float:
|
|
34
|
+
"""
|
|
35
|
+
Calculate the average price over the last year (256 business days).
|
|
36
|
+
Matches _recent_average_price in pysystemtrade.
|
|
37
|
+
"""
|
|
38
|
+
return float(prices.tail(days_per_year).mean())
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def calculate_sr_cost(
|
|
42
|
+
cost_config: InstrumentCostConfig,
|
|
43
|
+
price: float,
|
|
44
|
+
ann_stdev_price_units: float,
|
|
45
|
+
blocks_traded: float = 1.0,
|
|
46
|
+
) -> float:
|
|
47
|
+
"""
|
|
48
|
+
Calculates the expected reduction in Sharpe Ratio due to costs.
|
|
49
|
+
Ported from pysystemtrade sysobjects.instruments.instrumentCosts.calculate_sr_cost
|
|
50
|
+
"""
|
|
51
|
+
cost_instrument_currency = calculate_cost_instrument_currency(
|
|
52
|
+
cost_config, blocks_traded=blocks_traded, price=price
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
# Annualized stdev in instrument currency (price units * point size)
|
|
56
|
+
ann_stdev_instrument_currency = ann_stdev_price_units * cost_config.point_size
|
|
57
|
+
|
|
58
|
+
if ann_stdev_instrument_currency == 0:
|
|
59
|
+
return 0.0
|
|
60
|
+
|
|
61
|
+
return cost_instrument_currency / ann_stdev_instrument_currency
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def calculate_cost_instrument_currency(
|
|
65
|
+
cost_config: InstrumentCostConfig,
|
|
66
|
+
blocks_traded: float,
|
|
67
|
+
price: float,
|
|
68
|
+
include_slippage: bool = True,
|
|
69
|
+
) -> float:
|
|
70
|
+
"""
|
|
71
|
+
Ported from pysystemtrade sysobjects.instruments.instrumentCosts.calculate_cost_instrument_currency
|
|
72
|
+
"""
|
|
73
|
+
value_per_block = price * cost_config.point_size
|
|
74
|
+
|
|
75
|
+
if include_slippage:
|
|
76
|
+
slippage = (
|
|
77
|
+
abs(blocks_traded) * cost_config.price_slippage * cost_config.point_size
|
|
78
|
+
)
|
|
79
|
+
else:
|
|
80
|
+
slippage = 0.0
|
|
81
|
+
|
|
82
|
+
commission = calculate_total_commission(
|
|
83
|
+
cost_config, blocks_traded=blocks_traded, value_per_block=value_per_block
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
return slippage + commission
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def calculate_total_commission(
|
|
90
|
+
cost_config: InstrumentCostConfig, blocks_traded: float, value_per_block: float
|
|
91
|
+
) -> float:
|
|
92
|
+
"""
|
|
93
|
+
Ported from pysystemtrade sysobjects.instruments.instrumentCosts.calculate_total_commission
|
|
94
|
+
"""
|
|
95
|
+
per_trade_commission = cost_config.per_trade_commission
|
|
96
|
+
per_block_commission = abs(blocks_traded) * cost_config.per_block_commission
|
|
97
|
+
percentage_commission = (
|
|
98
|
+
cost_config.percentage_commission * abs(blocks_traded) * value_per_block
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
return max([per_block_commission, per_trade_commission, percentage_commission])
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def calculate_cost_percentage_terms(
|
|
105
|
+
cost_config: InstrumentCostConfig, blocks_traded: float, price: float
|
|
106
|
+
) -> float:
|
|
107
|
+
"""
|
|
108
|
+
Ported from pysystemtrade sysobjects.instruments.instrumentCosts.calculate_cost_percentage_terms
|
|
109
|
+
"""
|
|
110
|
+
cost_in_currency = calculate_cost_instrument_currency(
|
|
111
|
+
cost_config, blocks_traded=blocks_traded, price=price
|
|
112
|
+
)
|
|
113
|
+
total_value = abs(blocks_traded) * price * cost_config.point_size
|
|
114
|
+
|
|
115
|
+
if total_value == 0:
|
|
116
|
+
return 0.0
|
|
117
|
+
|
|
118
|
+
return cost_in_currency / total_value
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@dataclass
|
|
7
|
+
class InstrumentCostConfig:
|
|
8
|
+
instrument_code: str
|
|
9
|
+
point_size: float
|
|
10
|
+
price_slippage: float # Half spread in price units
|
|
11
|
+
per_block_commission: float = 0.0
|
|
12
|
+
percentage_commission: float = 0.0
|
|
13
|
+
per_trade_commission: float = 0.0
|
|
14
|
+
|
|
15
|
+
@classmethod
|
|
16
|
+
def from_dict(cls, data: dict) -> InstrumentCostConfig:
|
|
17
|
+
return cls(
|
|
18
|
+
instrument_code=data["instrument_code"],
|
|
19
|
+
point_size=data["point_size"],
|
|
20
|
+
price_slippage=data["price_slippage"],
|
|
21
|
+
per_block_commission=data.get("per_block_commission", 0.0),
|
|
22
|
+
percentage_commission=data.get("percentage_commission", 0.0),
|
|
23
|
+
per_trade_commission=data.get("per_trade_commission", 0.0),
|
|
24
|
+
)
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from abc import ABC, abstractmethod
|
|
5
|
+
from costs.config import InstrumentCostConfig
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class CostDataSource(ABC):
|
|
9
|
+
@abstractmethod
|
|
10
|
+
def get_cost_config(self, instrument_code: str) -> InstrumentCostConfig:
|
|
11
|
+
pass
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class ConfigFileCostDataSource(CostDataSource):
|
|
15
|
+
def __init__(self, config_path: str):
|
|
16
|
+
with open(config_path, "r") as f:
|
|
17
|
+
self.config_data = json.load(f)
|
|
18
|
+
|
|
19
|
+
def get_cost_config(self, instrument_code: str) -> InstrumentCostConfig:
|
|
20
|
+
# Assuming the config file is a list of instrument configs or a dict keyed by code
|
|
21
|
+
if isinstance(self.config_data, list):
|
|
22
|
+
for item in self.config_data:
|
|
23
|
+
if item["instrument_code"] == instrument_code:
|
|
24
|
+
return InstrumentCostConfig.from_dict(item)
|
|
25
|
+
elif isinstance(self.config_data, dict):
|
|
26
|
+
if instrument_code in self.config_data:
|
|
27
|
+
return InstrumentCostConfig.from_dict(self.config_data[instrument_code])
|
|
28
|
+
elif (
|
|
29
|
+
"instrument_code" in self.config_data
|
|
30
|
+
and self.config_data["instrument_code"] == instrument_code
|
|
31
|
+
):
|
|
32
|
+
return InstrumentCostConfig.from_dict(self.config_data)
|
|
33
|
+
|
|
34
|
+
raise ValueError(f"No cost config found for instrument: {instrument_code}")
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class IBKRCostDataSource(CostDataSource):
|
|
38
|
+
"""
|
|
39
|
+
Stub for future IBKR API integration.
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
def get_cost_config(self, instrument_code: str) -> InstrumentCostConfig:
|
|
43
|
+
# TODO: Implement IBKR API calls to fetch real-time/historical spreads and commissions
|
|
44
|
+
raise NotImplementedError(
|
|
45
|
+
"IBKR API integration not yet implemented. Please use --config for now."
|
|
46
|
+
)
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: quantlib-st
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: quantlib-st - a quantitative finance library and cli for systematic trading
|
|
5
|
+
Requires-Python: >=3.12
|
|
6
|
+
Requires-Dist: pandas>=2.3.3
|
|
7
|
+
Requires-Dist: numpy>=2.4.0
|
|
8
|
+
Dynamic: requires-dist
|
|
9
|
+
Dynamic: requires-python
|
|
10
|
+
Dynamic: summary
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
setup.py
|
|
3
|
+
cli/__init__.py
|
|
4
|
+
cli/__main__.py
|
|
5
|
+
cli/corr_cmd.py
|
|
6
|
+
cli/costs_cmd.py
|
|
7
|
+
cli/main.py
|
|
8
|
+
correlation/__init__.py
|
|
9
|
+
correlation/correlation_over_time.py
|
|
10
|
+
correlation/exponential_correlation.py
|
|
11
|
+
correlation/fitting_dates.py
|
|
12
|
+
costs/__init__.py
|
|
13
|
+
costs/calculator.py
|
|
14
|
+
costs/config.py
|
|
15
|
+
costs/data_source.py
|
|
16
|
+
quantlib_st.egg-info/PKG-INFO
|
|
17
|
+
quantlib_st.egg-info/SOURCES.txt
|
|
18
|
+
quantlib_st.egg-info/dependency_links.txt
|
|
19
|
+
quantlib_st.egg-info/entry_points.txt
|
|
20
|
+
quantlib_st.egg-info/requires.txt
|
|
21
|
+
quantlib_st.egg-info/top_level.txt
|
|
22
|
+
tests/__init__.py
|
|
23
|
+
tests/correlation/__init__.py
|
|
24
|
+
tests/correlation/test_correlation.py
|
|
25
|
+
tests/correlation/test_fitting_dates.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
from setuptools import setup, find_packages
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
setup(
|
|
5
|
+
name="quantlib-st",
|
|
6
|
+
version="0.1.0",
|
|
7
|
+
description="quantlib-st - a quantitative finance library and cli for systematic trading",
|
|
8
|
+
packages=find_packages(),
|
|
9
|
+
python_requires=">=3.12",
|
|
10
|
+
install_requires=[
|
|
11
|
+
"pandas>=2.3.3",
|
|
12
|
+
"numpy>=2.4.0",
|
|
13
|
+
],
|
|
14
|
+
entry_points={
|
|
15
|
+
"console_scripts": [
|
|
16
|
+
"quantlib-=cli.main:main",
|
|
17
|
+
]
|
|
18
|
+
},
|
|
19
|
+
)
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
import numpy as np
|
|
3
|
+
import pandas as pd
|
|
4
|
+
|
|
5
|
+
from correlation.exponential_correlation import (
|
|
6
|
+
ExponentialCorrelationResults,
|
|
7
|
+
CorrelationEstimate,
|
|
8
|
+
)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def test_exponential_correlation_results_structure():
|
|
12
|
+
# Create dummy returns for 3 instruments over 5 days
|
|
13
|
+
dates = pd.date_range("2023-01-01", periods=5)
|
|
14
|
+
data = pd.DataFrame(np.random.randn(5, 3), index=dates, columns=["A", "B", "C"])
|
|
15
|
+
|
|
16
|
+
# Initialize with a small lookback for testing
|
|
17
|
+
results = ExponentialCorrelationResults(data, ew_lookback=5, min_periods=2)
|
|
18
|
+
|
|
19
|
+
# 1. Understanding _raw_correlations
|
|
20
|
+
# When we call .corr(pairwise=True) on an EWM object, pandas returns a MultiIndex DataFrame.
|
|
21
|
+
# Level 0: Date
|
|
22
|
+
# Level 1: Instrument name
|
|
23
|
+
raw = results.raw_correlations
|
|
24
|
+
|
|
25
|
+
assert isinstance(raw, pd.DataFrame)
|
|
26
|
+
# For 5 days and 3 instruments, we expect 5 * 3 = 15 rows
|
|
27
|
+
assert len(raw) == 15
|
|
28
|
+
# The columns should match the instrument names
|
|
29
|
+
assert list(raw.columns) == ["A", "B", "C"]
|
|
30
|
+
|
|
31
|
+
# Check the first date's matrix (it should be NaN if min_periods=2)
|
|
32
|
+
first_date = dates[0]
|
|
33
|
+
matrix_at_t0 = raw.loc[first_date]
|
|
34
|
+
assert matrix_at_t0.isna().all().all()
|
|
35
|
+
|
|
36
|
+
# Check the third date's matrix (should have values)
|
|
37
|
+
third_date = dates[2]
|
|
38
|
+
matrix_at_t2 = raw.loc[third_date]
|
|
39
|
+
assert not matrix_at_t2.isna().any().any()
|
|
40
|
+
assert matrix_at_t2.loc["A", "A"] == 1.0 # Diagonal is always 1.0
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def test_last_valid_cor_matrix_for_date():
|
|
44
|
+
dates = pd.date_range("2023-01-01", periods=10)
|
|
45
|
+
data = pd.DataFrame(np.random.randn(10, 2), index=dates, columns=["X", "Y"])
|
|
46
|
+
|
|
47
|
+
results = ExponentialCorrelationResults(data, ew_lookback=5, min_periods=2)
|
|
48
|
+
|
|
49
|
+
# Test retrieving matrix for a specific point in time
|
|
50
|
+
# If we ask for 2023-01-05, it should give us the matrix from the last available date BEFORE 2023-01-05
|
|
51
|
+
target_date = datetime.datetime(2023, 1, 5)
|
|
52
|
+
estimate = results.last_valid_cor_matrix_for_date(target_date)
|
|
53
|
+
|
|
54
|
+
assert isinstance(estimate, CorrelationEstimate)
|
|
55
|
+
assert estimate.columns == ["X", "Y"]
|
|
56
|
+
assert estimate.values.shape == (2, 2)
|
|
57
|
+
|
|
58
|
+
# The values should match the raw correlation at 2023-01-04
|
|
59
|
+
expected_values = np.asarray(
|
|
60
|
+
results.raw_correlations.loc[pd.Timestamp("2023-01-04")].values
|
|
61
|
+
)
|
|
62
|
+
np.testing.assert_array_almost_equal(estimate.values, expected_values)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def test_last_valid_cor_matrix_no_data():
|
|
66
|
+
# Test what happens if we ask for a date before any data exists
|
|
67
|
+
dates = pd.date_range("2023-01-10", periods=5)
|
|
68
|
+
data = pd.DataFrame(np.random.randn(5, 2), index=dates, columns=["X", "Y"])
|
|
69
|
+
results = ExponentialCorrelationResults(data)
|
|
70
|
+
|
|
71
|
+
early_date = datetime.datetime(2023, 1, 1)
|
|
72
|
+
estimate = results.last_valid_cor_matrix_for_date(early_date)
|
|
73
|
+
|
|
74
|
+
# Should return a matrix of NaNs
|
|
75
|
+
assert np.isnan(estimate.values).all()
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
|
|
3
|
+
from correlation.fitting_dates import (
|
|
4
|
+
generate_fitting_dates,
|
|
5
|
+
IN_SAMPLE,
|
|
6
|
+
EXPANDING,
|
|
7
|
+
ROLLING,
|
|
8
|
+
)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _make_df(start: str, end: str, freq: str = "M") -> pd.DataFrame:
|
|
12
|
+
idx = pd.date_range(start, end, freq=freq)
|
|
13
|
+
return pd.DataFrame(index=idx, data={"x": 0})
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def test_in_sample_returns_single_period():
|
|
17
|
+
df = _make_df("2020-01-01", "2022-12-31")
|
|
18
|
+
periods = generate_fitting_dates(df, date_method=IN_SAMPLE)
|
|
19
|
+
|
|
20
|
+
assert len(periods) == 1
|
|
21
|
+
p = periods[0]
|
|
22
|
+
assert p.fit_start == df.index[0]
|
|
23
|
+
assert p.fit_end == df.index[-1]
|
|
24
|
+
assert p.period_start == df.index[0]
|
|
25
|
+
assert p.period_end == df.index[-1]
|
|
26
|
+
assert p.no_data is False
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def test_expanding_uses_initial_start_for_estimation():
|
|
30
|
+
df = _make_df("2018-01-01", "2022-12-31")
|
|
31
|
+
periods = generate_fitting_dates(
|
|
32
|
+
df, date_method=EXPANDING, interval_frequency="12M"
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
# There should be at least one real (non-no-data) period
|
|
36
|
+
non_empty = [p for p in periods if not p.no_data]
|
|
37
|
+
assert len(non_empty) >= 1
|
|
38
|
+
|
|
39
|
+
# For expanding, all real periods should use the original start as fit_start
|
|
40
|
+
assert all(p.fit_start == df.index[0] for p in non_empty)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def test_rolling_moves_fit_start_forward():
|
|
44
|
+
df = _make_df("2018-01-01", "2022-12-31")
|
|
45
|
+
# use a small rollyears so the fit_start should advance away from the original start
|
|
46
|
+
periods = generate_fitting_dates(
|
|
47
|
+
df, date_method=ROLLING, rollyears=1, interval_frequency="12M"
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
non_empty = [p for p in periods if not p.no_data]
|
|
51
|
+
assert len(non_empty) >= 2
|
|
52
|
+
|
|
53
|
+
# At least one period should have a fit_start later than the original data start
|
|
54
|
+
assert any(p.fit_start > df.index[0] for p in non_empty)
|