gnomepy 2.2.4__tar.gz → 2.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {gnomepy-2.2.4 → gnomepy-2.3.0}/PKG-INFO +12 -8
- gnomepy-2.3.0/gnomepy/__init__.py +83 -0
- {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/cli.py +43 -0
- gnomepy-2.3.0/gnomepy/importer/__init__.py +10 -0
- gnomepy-2.3.0/gnomepy/importer/chunker.py +28 -0
- gnomepy-2.3.0/gnomepy/importer/encoder.py +53 -0
- gnomepy-2.3.0/gnomepy/importer/import_job.py +129 -0
- gnomepy-2.3.0/gnomepy/importer/mapping.py +44 -0
- gnomepy-2.3.0/gnomepy/importer/scaling.py +68 -0
- gnomepy-2.3.0/gnomepy/importer/tardis/__init__.py +7 -0
- gnomepy-2.3.0/gnomepy/importer/tardis/book.py +61 -0
- gnomepy-2.3.0/gnomepy/importer/tardis/client.py +51 -0
- gnomepy-2.3.0/gnomepy/importer/tardis/importer.py +167 -0
- gnomepy-2.3.0/gnomepy/importer/tardis/mappings.py +165 -0
- gnomepy-2.3.0/gnomepy/importer/uploader.py +30 -0
- gnomepy-2.3.0/gnomepy/importer/validators.py +42 -0
- {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/java/backtest/runner.py +45 -0
- {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/java/datastore.py +12 -0
- {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/metadata.py +5 -0
- gnomepy-2.3.0/gnomepy/registry/__init__.py +31 -0
- gnomepy-2.3.0/gnomepy/registry/api.py +189 -0
- gnomepy-2.3.0/gnomepy/registry/types.py +146 -0
- {gnomepy-2.2.4 → gnomepy-2.3.0}/pyproject.toml +17 -4
- gnomepy-2.2.4/gnomepy/__init__.py +0 -131
- gnomepy-2.2.4/gnomepy/registry/__init__.py +0 -0
- gnomepy-2.2.4/gnomepy/registry/api.py +0 -70
- gnomepy-2.2.4/gnomepy/registry/types.py +0 -35
- {gnomepy-2.2.4 → gnomepy-2.3.0}/README.md +0 -0
- {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/_fs.py +0 -0
- {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/auth.py +0 -0
- {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/config.py +0 -0
- {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/explorer/__init__.py +0 -0
- {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/explorer/app.py +0 -0
- {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/explorer/data.py +0 -0
- {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/explorer/panels/__init__.py +0 -0
- {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/explorer/panels/event_log.py +0 -0
- {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/explorer/panels/pnl.py +0 -0
- {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/explorer/panels/price.py +0 -0
- {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/explorer/panels/signals.py +0 -0
- {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/explorer/styles.py +0 -0
- {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/java/__init__.py +0 -0
- {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/java/_classpath.py +0 -0
- {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/java/_jvm.py +0 -0
- {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/java/backtest/__init__.py +0 -0
- {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/java/backtest/config.py +0 -0
- {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/java/backtest/orders.py +0 -0
- {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/java/backtest/strategy.py +0 -0
- {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/java/cache.py +0 -0
- {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/java/enums.py +0 -0
- {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/java/market_data.py +0 -0
- {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/java/oms.py +0 -0
- {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/java/recorder.py +0 -0
- {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/java/sbe.py +0 -0
- {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/java/schemas.py +0 -0
- {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/java/statics.py +0 -0
- {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/remote.py +0 -0
- {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/reporting/__init__.py +0 -0
- {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/reporting/metrics.py +0 -0
- {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/reporting/plots.py +0 -0
- {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/reporting/report.py +0 -0
- {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/sweep.py +0 -0
- {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/utils.py +0 -0
|
@@ -1,21 +1,25 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: gnomepy
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.3.0
|
|
4
4
|
Summary:
|
|
5
5
|
Author: mprey
|
|
6
6
|
Author-email: masonprey7@gmail.com
|
|
7
7
|
Requires-Python: >=3.13,<3.14
|
|
8
8
|
Classifier: Programming Language :: Python :: 3
|
|
9
9
|
Classifier: Programming Language :: Python :: 3.13
|
|
10
|
+
Provides-Extra: backtest
|
|
11
|
+
Requires-Dist: anthropic (>=0.40.0,<1.0.0) ; extra == "backtest"
|
|
10
12
|
Requires-Dist: boto3 (>=1.36.18,<2.0.0)
|
|
13
|
+
Requires-Dist: boto3-stubs (>=1.42.73,<1.43.0) ; extra == "backtest"
|
|
11
14
|
Requires-Dist: click (>=8.1,<9.0)
|
|
12
|
-
Requires-Dist: dash (>=2.18,<3.0)
|
|
13
|
-
Requires-Dist: dash-bootstrap-components (>=1.6,<2.0)
|
|
14
|
-
Requires-Dist: jpype1 (>=1.5.0,<2.0.0)
|
|
15
|
-
Requires-Dist: numpy (>=2.2.4,<3.0.0)
|
|
16
|
-
Requires-Dist: pandas (>=2.2.3,<3.0.0)
|
|
17
|
-
Requires-Dist:
|
|
18
|
-
Requires-Dist:
|
|
15
|
+
Requires-Dist: dash (>=2.18,<3.0) ; extra == "backtest"
|
|
16
|
+
Requires-Dist: dash-bootstrap-components (>=1.6,<2.0) ; extra == "backtest"
|
|
17
|
+
Requires-Dist: jpype1 (>=1.5.0,<2.0.0) ; extra == "backtest"
|
|
18
|
+
Requires-Dist: numpy (>=2.2.4,<3.0.0) ; extra == "backtest"
|
|
19
|
+
Requires-Dist: pandas (>=2.2.3,<3.0.0) ; extra == "backtest"
|
|
20
|
+
Requires-Dist: pandas-stubs (>=2.3.3,<2.4.0) ; extra == "backtest"
|
|
21
|
+
Requires-Dist: plotly (>=6.6.0,<6.7.0) ; extra == "backtest"
|
|
22
|
+
Requires-Dist: pyarrow (>=23.0.1,<24.0.0) ; extra == "backtest"
|
|
19
23
|
Requires-Dist: pytz (>=2025.1,<2026.0)
|
|
20
24
|
Requires-Dist: pyyaml (>=6.0,<7.0)
|
|
21
25
|
Requires-Dist: requests (>=2.32.3,<3.0.0)
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
"""gnomepy — backtesting infrastructure for the gnome trading system.
|
|
2
|
+
|
|
3
|
+
Public API:
|
|
4
|
+
|
|
5
|
+
from gnomepy import (
|
|
6
|
+
Strategy, Backtest, run_backtest,
|
|
7
|
+
BacktestConfig, ListingSimConfig, ExchangeProfileConfig,
|
|
8
|
+
StrategyConfig, RiskConfig,
|
|
9
|
+
StaticFeeConfig, StaticLatencyConfig, GaussianLatencyConfig,
|
|
10
|
+
OptimisticQueueConfig, RiskAverseQueueConfig, ProbabilisticQueueConfig,
|
|
11
|
+
Intent, ExecutionReport, OmsView,
|
|
12
|
+
SchemaType, Side, Action,
|
|
13
|
+
Mbp10Schema, ...
|
|
14
|
+
)
|
|
15
|
+
"""
|
|
16
|
+
import importlib
|
|
17
|
+
|
|
18
|
+
_LAZY_IMPORTS: dict[str, tuple[str, str]] = {
|
|
19
|
+
"BacktestConfig": ("gnomepy.java.backtest.config", "BacktestConfig"),
|
|
20
|
+
"ExchangeProfileConfig": ("gnomepy.java.backtest.config", "ExchangeProfileConfig"),
|
|
21
|
+
"GaussianLatencyConfig": ("gnomepy.java.backtest.config", "GaussianLatencyConfig"),
|
|
22
|
+
"ListingSimConfig": ("gnomepy.java.backtest.config", "ListingSimConfig"),
|
|
23
|
+
"OptimisticQueueConfig": ("gnomepy.java.backtest.config", "OptimisticQueueConfig"),
|
|
24
|
+
"ProbabilisticQueueConfig": ("gnomepy.java.backtest.config", "ProbabilisticQueueConfig"),
|
|
25
|
+
"RiskAverseQueueConfig": ("gnomepy.java.backtest.config", "RiskAverseQueueConfig"),
|
|
26
|
+
"RiskConfig": ("gnomepy.java.backtest.config", "RiskConfig"),
|
|
27
|
+
"StaticFeeConfig": ("gnomepy.java.backtest.config", "StaticFeeConfig"),
|
|
28
|
+
"StaticLatencyConfig": ("gnomepy.java.backtest.config", "StaticLatencyConfig"),
|
|
29
|
+
"StrategyConfig": ("gnomepy.java.backtest.config", "StrategyConfig"),
|
|
30
|
+
"ExecutionReport": ("gnomepy.java.backtest.orders", "ExecutionReport"),
|
|
31
|
+
"Backtest": ("gnomepy.java.backtest.runner", "Backtest"),
|
|
32
|
+
"run_backtest": ("gnomepy.java.backtest.runner", "run_backtest"),
|
|
33
|
+
"MarketDataCache": ("gnomepy.java.cache", "MarketDataCache"),
|
|
34
|
+
"Strategy": ("gnomepy.java.backtest.strategy", "Strategy"),
|
|
35
|
+
"DataStore": ("gnomepy.java.datastore", "DataStore"),
|
|
36
|
+
"Action": ("gnomepy.java.enums", "Action"),
|
|
37
|
+
"ExecType": ("gnomepy.java.enums", "ExecType"),
|
|
38
|
+
"OrderStatus": ("gnomepy.java.enums", "OrderStatus"),
|
|
39
|
+
"OrderType": ("gnomepy.java.enums", "OrderType"),
|
|
40
|
+
"SchemaType": ("gnomepy.java.enums", "SchemaType"),
|
|
41
|
+
"Side": ("gnomepy.java.enums", "Side"),
|
|
42
|
+
"TimeInForce": ("gnomepy.java.enums", "TimeInForce"),
|
|
43
|
+
"Intent": ("gnomepy.java.oms", "Intent"),
|
|
44
|
+
"OmsView": ("gnomepy.java.oms", "OmsView"),
|
|
45
|
+
"PositionInfo": ("gnomepy.java.oms", "PositionInfo"),
|
|
46
|
+
"TrackedOrderInfo": ("gnomepy.java.oms", "TrackedOrderInfo"),
|
|
47
|
+
"BacktestResults": ("gnomepy.java.recorder", "BacktestResults"),
|
|
48
|
+
"Bbo1mSchema": ("gnomepy.java.schemas", "Bbo1mSchema"),
|
|
49
|
+
"Bbo1sSchema": ("gnomepy.java.schemas", "Bbo1sSchema"),
|
|
50
|
+
"BboSchema": ("gnomepy.java.schemas", "BboSchema"),
|
|
51
|
+
"MboSchema": ("gnomepy.java.schemas", "MboSchema"),
|
|
52
|
+
"Mbp1Schema": ("gnomepy.java.schemas", "Mbp1Schema"),
|
|
53
|
+
"Mbp10Schema": ("gnomepy.java.schemas", "Mbp10Schema"),
|
|
54
|
+
"Ohlcv1hSchema": ("gnomepy.java.schemas", "Ohlcv1hSchema"),
|
|
55
|
+
"Ohlcv1mSchema": ("gnomepy.java.schemas", "Ohlcv1mSchema"),
|
|
56
|
+
"Ohlcv1sSchema": ("gnomepy.java.schemas", "Ohlcv1sSchema"),
|
|
57
|
+
"OhlcvSchema": ("gnomepy.java.schemas", "OhlcvSchema"),
|
|
58
|
+
"Schema": ("gnomepy.java.schemas", "Schema"),
|
|
59
|
+
"TradesSchema": ("gnomepy.java.schemas", "TradesSchema"),
|
|
60
|
+
"wrap_schema": ("gnomepy.java.schemas", "wrap_schema"),
|
|
61
|
+
"Scales": ("gnomepy.java.statics", "Scales"),
|
|
62
|
+
"BacktestMetadata": ("gnomepy.metadata", "BacktestMetadata"),
|
|
63
|
+
"BacktestReport": ("gnomepy.reporting", "BacktestReport"),
|
|
64
|
+
"Curves": ("gnomepy.reporting.metrics", "Curves"),
|
|
65
|
+
"build_curves": ("gnomepy.reporting.metrics", "build_curves"),
|
|
66
|
+
"compute_sharpe": ("gnomepy.reporting.metrics", "compute_sharpe"),
|
|
67
|
+
"ReportSection": ("gnomepy.reporting.plots", "ReportSection"),
|
|
68
|
+
"generate_backtest_id": ("gnomepy.utils", "generate_backtest_id"),
|
|
69
|
+
"uuid7": ("gnomepy.utils", "uuid7"),
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def __getattr__(name: str):
|
|
74
|
+
if name in _LAZY_IMPORTS:
|
|
75
|
+
mod_path, attr = _LAZY_IMPORTS[name]
|
|
76
|
+
mod = importlib.import_module(mod_path)
|
|
77
|
+
val = getattr(mod, attr)
|
|
78
|
+
globals()[name] = val
|
|
79
|
+
return val
|
|
80
|
+
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
__all__ = list(_LAZY_IMPORTS.keys())
|
|
@@ -170,6 +170,49 @@ def _human_size(n: int) -> str:
|
|
|
170
170
|
return f"{n:.1f} PB"
|
|
171
171
|
|
|
172
172
|
|
|
173
|
+
# ---------------------------------------------------------------------------
|
|
174
|
+
# Import commands
|
|
175
|
+
# ---------------------------------------------------------------------------
|
|
176
|
+
|
|
177
|
+
@main.group("import")
|
|
178
|
+
def import_cmd() -> None:
|
|
179
|
+
"""Import historical market data from external vendors."""
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
@import_cmd.command("tardis")
|
|
183
|
+
@click.option("--exchange", required=True, help="Tardis exchange name (e.g., binance-futures, deribit)")
|
|
184
|
+
@click.option("--symbols", required=True, help="Comma-separated symbols (e.g., BTCUSDT,ETHUSDT)")
|
|
185
|
+
@click.option("--start", required=True, type=click.DateTime(formats=["%Y-%m-%d"]), help="Start date inclusive")
|
|
186
|
+
@click.option("--end", required=True, type=click.DateTime(formats=["%Y-%m-%d"]), help="End date inclusive")
|
|
187
|
+
@click.option("--dry-run", is_flag=True, help="Validate without uploading to S3")
|
|
188
|
+
@click.option("--bucket", default=None, help="Override S3 bucket")
|
|
189
|
+
def import_tardis(
|
|
190
|
+
exchange: str,
|
|
191
|
+
symbols: str,
|
|
192
|
+
start,
|
|
193
|
+
end,
|
|
194
|
+
dry_run: bool,
|
|
195
|
+
bucket: str | None,
|
|
196
|
+
) -> None:
|
|
197
|
+
"""Import Tardis incremental L2 + trades data as MBP_10 into gnome market data."""
|
|
198
|
+
from gnomepy.importer.tardis import TardisImporter, TardisImportRequest
|
|
199
|
+
|
|
200
|
+
request = TardisImportRequest(
|
|
201
|
+
exchange=exchange,
|
|
202
|
+
symbols=[s.strip() for s in symbols.split(",")],
|
|
203
|
+
start_date=start.date(),
|
|
204
|
+
end_date=end.date(),
|
|
205
|
+
bucket=bucket,
|
|
206
|
+
dry_run=dry_run,
|
|
207
|
+
)
|
|
208
|
+
results = TardisImporter().run(request)
|
|
209
|
+
for r in results:
|
|
210
|
+
click.echo(f"{r.exchange} / {r.symbol} (security_id={r.security_id}, exchange_id={r.exchange_id})")
|
|
211
|
+
click.echo(f" processed: {r.days_processed} skipped: {r.days_skipped} records: {r.total_records}")
|
|
212
|
+
for err in r.errors:
|
|
213
|
+
click.echo(f" error: {err}", err=True)
|
|
214
|
+
|
|
215
|
+
|
|
173
216
|
# ---------------------------------------------------------------------------
|
|
174
217
|
# Backtest commands
|
|
175
218
|
# ---------------------------------------------------------------------------
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from datetime import datetime, timezone
|
|
4
|
+
|
|
5
|
+
import pandas as pd
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def chunk_by_minute(df: pd.DataFrame, ts_ns: pd.Series) -> dict[datetime, pd.DataFrame]:
|
|
9
|
+
"""Split df into per-minute buckets using ts_ns (int64 nanoseconds UTC).
|
|
10
|
+
|
|
11
|
+
Returns a dict mapping naive UTC datetime (truncated to minute) → subset of df,
|
|
12
|
+
sorted by timestamp within each bucket.
|
|
13
|
+
"""
|
|
14
|
+
_MINUTE_NS = 60_000_000_000
|
|
15
|
+
|
|
16
|
+
minute_ns = (ts_ns // _MINUTE_NS) * _MINUTE_NS
|
|
17
|
+
df = df.copy()
|
|
18
|
+
df["__ts_ns"] = ts_ns
|
|
19
|
+
df["__minute_ns"] = minute_ns
|
|
20
|
+
|
|
21
|
+
result: dict[datetime, pd.DataFrame] = {}
|
|
22
|
+
for bucket_ns, group in df.groupby("__minute_ns", sort=True):
|
|
23
|
+
group = group.sort_values("__ts_ns").drop(columns=["__ts_ns", "__minute_ns"])
|
|
24
|
+
# Naive UTC datetime matching Java LocalDateTime used in MarketDataEntry
|
|
25
|
+
dt = datetime.fromtimestamp(int(bucket_ns) / 1e9, tz=timezone.utc).replace(tzinfo=None)
|
|
26
|
+
result[dt] = group
|
|
27
|
+
|
|
28
|
+
return result
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
import pandas as pd
|
|
6
|
+
|
|
7
|
+
from gnomepy.importer.mapping import FieldMapping, ImportConfig
|
|
8
|
+
from gnomepy.importer.scaling import parse_timestamp_ns, scale_price, scale_size
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _apply_mapping(value, mapping: FieldMapping) -> Any:
|
|
12
|
+
if pd.isna(value):
|
|
13
|
+
return None
|
|
14
|
+
if mapping.transform == "none":
|
|
15
|
+
return int(value)
|
|
16
|
+
if mapping.transform == "price":
|
|
17
|
+
return scale_price(value)
|
|
18
|
+
if mapping.transform in ("size", "volume"):
|
|
19
|
+
return scale_size(value)
|
|
20
|
+
if mapping.transform == "timestamp":
|
|
21
|
+
return parse_timestamp_ns(value, mapping.timestamp_format, mapping.timestamp_tz)
|
|
22
|
+
if mapping.transform == "enum":
|
|
23
|
+
mapped = mapping.enum_map.get(str(value))
|
|
24
|
+
if mapped is None:
|
|
25
|
+
raise ValueError(f"enum_map has no entry for value {value!r} in field {mapping.target_field!r}")
|
|
26
|
+
return mapped
|
|
27
|
+
raise ValueError(f"Unknown transform {mapping.transform!r} for field {mapping.target_field!r}")
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def encode_chunk(chunk: pd.DataFrame, config: ImportConfig) -> bytes:
|
|
31
|
+
"""Encode a minute-chunk DataFrame to concatenated SBE bytes.
|
|
32
|
+
|
|
33
|
+
Requires the JVM to be started before calling.
|
|
34
|
+
"""
|
|
35
|
+
from gnomepy.java.schemas import get_schema_class
|
|
36
|
+
|
|
37
|
+
schema_cls = get_schema_class(config.schema_type)
|
|
38
|
+
parts: list[bytes] = []
|
|
39
|
+
|
|
40
|
+
for _, row in chunk.iterrows():
|
|
41
|
+
# Build kwargs: config-level IDs, then defaults, then per-row field mappings
|
|
42
|
+
kwargs: dict[str, Any] = {
|
|
43
|
+
"exchange_id": config.exchange_id,
|
|
44
|
+
"security_id": config.security_id,
|
|
45
|
+
}
|
|
46
|
+
kwargs.update(config.defaults)
|
|
47
|
+
for mapping in config.field_mappings:
|
|
48
|
+
kwargs[mapping.target_field] = _apply_mapping(row[mapping.source_column], mapping)
|
|
49
|
+
|
|
50
|
+
schema = schema_cls(**kwargs)
|
|
51
|
+
parts.append(schema.encode())
|
|
52
|
+
|
|
53
|
+
return b"".join(parts)
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
import pandas as pd
|
|
8
|
+
|
|
9
|
+
from gnomepy.importer.chunker import chunk_by_minute
|
|
10
|
+
from gnomepy.importer.encoder import encode_chunk
|
|
11
|
+
from gnomepy.importer.mapping import ImportConfig
|
|
12
|
+
from gnomepy.importer.scaling import apply_timestamp_transform
|
|
13
|
+
from gnomepy.importer.uploader import build_s3_key, compress, default_merged_bucket, upload
|
|
14
|
+
from gnomepy.importer.validators import validate
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@dataclass
|
|
18
|
+
class ImportResult:
|
|
19
|
+
files_uploaded: int
|
|
20
|
+
total_records: int
|
|
21
|
+
minutes_covered: int
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass
|
|
25
|
+
class DryRunResult:
|
|
26
|
+
is_valid: bool
|
|
27
|
+
errors: list[str]
|
|
28
|
+
minutes_count: int
|
|
29
|
+
total_records: int
|
|
30
|
+
sample_keys: list[str] = field(default_factory=list)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class ImportJob:
|
|
34
|
+
"""Orchestrates converting a CSV/Parquet file into the gnome market data format and uploading to S3."""
|
|
35
|
+
|
|
36
|
+
def __init__(self, config: ImportConfig, s3_client=None):
|
|
37
|
+
self.config = config
|
|
38
|
+
self._s3 = s3_client
|
|
39
|
+
|
|
40
|
+
def _s3_client(self):
|
|
41
|
+
if self._s3 is None:
|
|
42
|
+
import boto3
|
|
43
|
+
self._s3 = boto3.client("s3")
|
|
44
|
+
return self._s3
|
|
45
|
+
|
|
46
|
+
def _load(self, source, file_format: str = "auto") -> pd.DataFrame:
|
|
47
|
+
if isinstance(source, pd.DataFrame):
|
|
48
|
+
return source.copy()
|
|
49
|
+
path = str(source)
|
|
50
|
+
fmt = file_format
|
|
51
|
+
if fmt == "auto":
|
|
52
|
+
fmt = "parquet" if path.endswith(".parquet") else "csv"
|
|
53
|
+
if fmt == "parquet":
|
|
54
|
+
return pd.read_parquet(path)
|
|
55
|
+
return pd.read_csv(path)
|
|
56
|
+
|
|
57
|
+
def _timestamp_ns(self, df: pd.DataFrame) -> pd.Series:
|
|
58
|
+
ts_mapping = next(
|
|
59
|
+
m for m in self.config.field_mappings if m.target_field == self.config.timestamp_field
|
|
60
|
+
)
|
|
61
|
+
return apply_timestamp_transform(
|
|
62
|
+
df[ts_mapping.source_column], ts_mapping.timestamp_format, ts_mapping.timestamp_tz
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
def _bucket(self) -> str:
|
|
66
|
+
return self.config.bucket or default_merged_bucket()
|
|
67
|
+
|
|
68
|
+
def dry_run(self, source, file_format: str = "auto") -> DryRunResult:
|
|
69
|
+
"""Validate the config and source data without uploading anything."""
|
|
70
|
+
df = self._load(source, file_format)
|
|
71
|
+
errors = validate(self.config, df)
|
|
72
|
+
if errors:
|
|
73
|
+
return DryRunResult(is_valid=False, errors=errors, minutes_count=0, total_records=0)
|
|
74
|
+
|
|
75
|
+
ts_ns = self._timestamp_ns(df)
|
|
76
|
+
chunks = chunk_by_minute(df, ts_ns)
|
|
77
|
+
config = self.config
|
|
78
|
+
sample_keys = [
|
|
79
|
+
build_s3_key(config.security_id, config.exchange_id, config.schema_type, dt)
|
|
80
|
+
for dt in sorted(chunks)[:5]
|
|
81
|
+
]
|
|
82
|
+
return DryRunResult(
|
|
83
|
+
is_valid=True,
|
|
84
|
+
errors=[],
|
|
85
|
+
minutes_count=len(chunks),
|
|
86
|
+
total_records=len(df),
|
|
87
|
+
sample_keys=sample_keys,
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
def run(self, source, file_format: str = "auto") -> ImportResult:
|
|
91
|
+
"""Encode source data and upload to S3.
|
|
92
|
+
|
|
93
|
+
Validates first; raises ValueError if config is invalid.
|
|
94
|
+
Requires the JVM to be started before calling (for SBE encoding).
|
|
95
|
+
"""
|
|
96
|
+
from gnomepy.java._jvm import ensure_jvm_started
|
|
97
|
+
|
|
98
|
+
df = self._load(source, file_format)
|
|
99
|
+
errors = validate(self.config, df)
|
|
100
|
+
if errors:
|
|
101
|
+
raise ValueError("Import config validation failed:\n" + "\n".join(f" - {e}" for e in errors))
|
|
102
|
+
|
|
103
|
+
ensure_jvm_started()
|
|
104
|
+
ts_ns = self._timestamp_ns(df)
|
|
105
|
+
chunks = chunk_by_minute(df, ts_ns)
|
|
106
|
+
|
|
107
|
+
bucket = self._bucket()
|
|
108
|
+
s3 = self._s3_client()
|
|
109
|
+
files_uploaded = 0
|
|
110
|
+
total_records = 0
|
|
111
|
+
|
|
112
|
+
for minute_dt, chunk_df in sorted(chunks.items()):
|
|
113
|
+
raw = encode_chunk(chunk_df, self.config)
|
|
114
|
+
compressed = compress(raw)
|
|
115
|
+
key = build_s3_key(
|
|
116
|
+
self.config.security_id,
|
|
117
|
+
self.config.exchange_id,
|
|
118
|
+
self.config.schema_type,
|
|
119
|
+
minute_dt,
|
|
120
|
+
)
|
|
121
|
+
upload(s3, bucket, key, compressed)
|
|
122
|
+
files_uploaded += 1
|
|
123
|
+
total_records += len(chunk_df)
|
|
124
|
+
|
|
125
|
+
return ImportResult(
|
|
126
|
+
files_uploaded=files_uploaded,
|
|
127
|
+
total_records=total_records,
|
|
128
|
+
minutes_covered=len(chunks),
|
|
129
|
+
)
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
from gnomepy.java.enums import SchemaType
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class FieldMapping:
|
|
11
|
+
"""Maps one vendor column to one SBE schema field.
|
|
12
|
+
|
|
13
|
+
transform types:
|
|
14
|
+
"none" — pass through as int (e.g. a pre-scaled integer field)
|
|
15
|
+
"price" — float → int64 * 1e9
|
|
16
|
+
"size" — float → int64 * 1e6 (covers size and volume fields)
|
|
17
|
+
"timestamp" — various formats → int64 nanoseconds since epoch
|
|
18
|
+
"enum" — string → string via enum_map (e.g. "buy" → "Bid")
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
source_column: str
|
|
22
|
+
target_field: str
|
|
23
|
+
transform: str = "none"
|
|
24
|
+
# Required when transform="timestamp"
|
|
25
|
+
timestamp_format: str | None = None # "epoch_s", "epoch_ms", "epoch_us", "epoch_ns", "iso8601", or strftime
|
|
26
|
+
timestamp_tz: str | None = None # tz name for tz-naive string sources, e.g. "US/Eastern"
|
|
27
|
+
# Required when transform="enum"
|
|
28
|
+
enum_map: dict[str, str] | None = None
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@dataclass
|
|
32
|
+
class ImportConfig:
|
|
33
|
+
"""Complete configuration for one import job (one security, exchange, schema type)."""
|
|
34
|
+
|
|
35
|
+
schema_type: SchemaType
|
|
36
|
+
security_id: int
|
|
37
|
+
exchange_id: int
|
|
38
|
+
field_mappings: list[FieldMapping]
|
|
39
|
+
# Which target_field holds the event timestamp used for minute-chunking
|
|
40
|
+
timestamp_field: str = "timestamp_event"
|
|
41
|
+
# Override the default merged bucket (gnome-market-data-merged-{STAGE})
|
|
42
|
+
bucket: str | None = None
|
|
43
|
+
# Constant values for SBE fields not present in the source data
|
|
44
|
+
defaults: dict[str, Any] = field(default_factory=dict)
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import pandas as pd
|
|
4
|
+
|
|
5
|
+
_PRICE_SCALE = 1_000_000_000
|
|
6
|
+
_SIZE_SCALE = 1_000_000
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def scale_price(value: float | int) -> int:
|
|
10
|
+
return int(round(float(value) * _PRICE_SCALE))
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def scale_size(value: float | int) -> int:
|
|
14
|
+
return int(round(float(value) * _SIZE_SCALE))
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def parse_timestamp_ns(value, fmt: str, tz: str | None = None) -> int:
|
|
18
|
+
"""Convert a value to nanoseconds since UTC epoch.
|
|
19
|
+
|
|
20
|
+
fmt: "epoch_s", "epoch_ms", "epoch_us", "epoch_ns", "iso8601", or a strftime pattern.
|
|
21
|
+
tz: timezone name for tz-naive string sources (e.g. "US/Eastern"). Ignored for epoch formats.
|
|
22
|
+
"""
|
|
23
|
+
if fmt == "epoch_ns":
|
|
24
|
+
return int(value)
|
|
25
|
+
if fmt == "epoch_us":
|
|
26
|
+
return int(float(value) * 1_000)
|
|
27
|
+
if fmt == "epoch_ms":
|
|
28
|
+
return int(float(value) * 1_000_000)
|
|
29
|
+
if fmt == "epoch_s":
|
|
30
|
+
return int(float(value) * 1_000_000_000)
|
|
31
|
+
|
|
32
|
+
# String / datetime-like formats
|
|
33
|
+
ts = pd.to_datetime(value, format=None if fmt == "iso8601" else fmt, utc=False)
|
|
34
|
+
if ts.tzinfo is None and tz is not None:
|
|
35
|
+
ts = ts.tz_localize(tz).tz_convert("UTC")
|
|
36
|
+
elif ts.tzinfo is not None:
|
|
37
|
+
ts = ts.tz_convert("UTC")
|
|
38
|
+
# pd.Timestamp.value is nanoseconds since epoch
|
|
39
|
+
return ts.value
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def apply_price_transform(series: pd.Series) -> pd.Series:
|
|
43
|
+
return (series.astype(float) * _PRICE_SCALE).round().astype("int64")
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def apply_size_transform(series: pd.Series) -> pd.Series:
|
|
47
|
+
return (series.astype(float) * _SIZE_SCALE).round().astype("int64")
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def apply_timestamp_transform(series: pd.Series, fmt: str, tz: str | None) -> pd.Series:
|
|
51
|
+
"""Vectorized timestamp conversion → int64 nanoseconds since epoch."""
|
|
52
|
+
if fmt == "epoch_ns":
|
|
53
|
+
return series.astype("int64")
|
|
54
|
+
if fmt == "epoch_us":
|
|
55
|
+
return (series.astype(float) * 1_000).round().astype("int64")
|
|
56
|
+
if fmt == "epoch_ms":
|
|
57
|
+
return (series.astype(float) * 1_000_000).round().astype("int64")
|
|
58
|
+
if fmt == "epoch_s":
|
|
59
|
+
return (series.astype(float) * 1_000_000_000).round().astype("int64")
|
|
60
|
+
|
|
61
|
+
# String-based: use pd.to_datetime then extract ns value
|
|
62
|
+
fmt_arg = None if fmt == "iso8601" else fmt
|
|
63
|
+
parsed = pd.to_datetime(series, format=fmt_arg, utc=False)
|
|
64
|
+
if parsed.dt.tz is None and tz is not None:
|
|
65
|
+
parsed = parsed.dt.tz_localize(tz).dt.tz_convert("UTC")
|
|
66
|
+
elif parsed.dt.tz is not None:
|
|
67
|
+
parsed = parsed.dt.tz_convert("UTC")
|
|
68
|
+
return parsed.astype("int64")
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class L2Book:
|
|
5
|
+
"""Reconstructs a top-N order book from incremental L2 updates.
|
|
6
|
+
|
|
7
|
+
Mirrors the depth computation in the Java MbpBufferBook/Mbp10Book gateways:
|
|
8
|
+
depth = the shallowest (minimum) level index where the top-N changed.
|
|
9
|
+
Returns None when the update falls entirely outside the top-N visible levels.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
NUM_LEVELS = 10
|
|
13
|
+
|
|
14
|
+
def __init__(self) -> None:
|
|
15
|
+
self._bids: dict[float, float] = {}
|
|
16
|
+
self._asks: dict[float, float] = {}
|
|
17
|
+
self._top_bids: list[tuple[float, float]] = []
|
|
18
|
+
self._top_asks: list[tuple[float, float]] = []
|
|
19
|
+
|
|
20
|
+
def clear(self) -> None:
|
|
21
|
+
self._bids.clear()
|
|
22
|
+
self._asks.clear()
|
|
23
|
+
self._top_bids = []
|
|
24
|
+
self._top_asks = []
|
|
25
|
+
|
|
26
|
+
def update(self, side: str, price: float, amount: float) -> int | None:
|
|
27
|
+
"""Apply one L2 update. Returns depth if the top-N changed, else None."""
|
|
28
|
+
book = self._bids if side == "bid" else self._asks
|
|
29
|
+
if amount == 0.0:
|
|
30
|
+
book.pop(price, None)
|
|
31
|
+
else:
|
|
32
|
+
book[price] = amount
|
|
33
|
+
|
|
34
|
+
new_bids = sorted(self._bids.items(), reverse=True)[: self.NUM_LEVELS]
|
|
35
|
+
new_asks = sorted(self._asks.items())[: self.NUM_LEVELS]
|
|
36
|
+
|
|
37
|
+
depth = self._shallowest_change(self._top_bids, new_bids, self._top_asks, new_asks)
|
|
38
|
+
|
|
39
|
+
self._top_bids = new_bids
|
|
40
|
+
self._top_asks = new_asks
|
|
41
|
+
return depth
|
|
42
|
+
|
|
43
|
+
def top_levels(self) -> tuple[list[tuple[float, float]], list[tuple[float, float]]]:
|
|
44
|
+
"""Return (top_bids, top_asks) as lists of (price, amount), sorted best-first."""
|
|
45
|
+
return self._top_bids, self._top_asks
|
|
46
|
+
|
|
47
|
+
def _shallowest_change(
|
|
48
|
+
self,
|
|
49
|
+
prev_bids: list[tuple[float, float]],
|
|
50
|
+
new_bids: list[tuple[float, float]],
|
|
51
|
+
prev_asks: list[tuple[float, float]],
|
|
52
|
+
new_asks: list[tuple[float, float]],
|
|
53
|
+
) -> int | None:
|
|
54
|
+
for i in range(self.NUM_LEVELS):
|
|
55
|
+
prev_bid = prev_bids[i] if i < len(prev_bids) else None
|
|
56
|
+
new_bid = new_bids[i] if i < len(new_bids) else None
|
|
57
|
+
prev_ask = prev_asks[i] if i < len(prev_asks) else None
|
|
58
|
+
new_ask = new_asks[i] if i < len(new_asks) else None
|
|
59
|
+
if prev_bid != new_bid or prev_ask != new_ask:
|
|
60
|
+
return i
|
|
61
|
+
return None
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from datetime import date, timedelta
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class TardisClient:
|
|
9
|
+
"""Thin wrapper around the tardis-dev Python package for downloading market data CSV files."""
|
|
10
|
+
|
|
11
|
+
def __init__(self, api_key: str | None = None):
|
|
12
|
+
try:
|
|
13
|
+
import tardis_dev # noqa: F401
|
|
14
|
+
except ImportError:
|
|
15
|
+
raise ImportError(
|
|
16
|
+
"tardis-dev is required for Tardis market data imports. "
|
|
17
|
+
"Install it with: poetry install -E tardis"
|
|
18
|
+
)
|
|
19
|
+
self._api_key = api_key or os.environ.get("TARDIS_API_KEY", "")
|
|
20
|
+
|
|
21
|
+
def download(
|
|
22
|
+
self,
|
|
23
|
+
exchange: str,
|
|
24
|
+
data_types: list[str],
|
|
25
|
+
day: date,
|
|
26
|
+
symbols: list[str],
|
|
27
|
+
dest_dir: Path,
|
|
28
|
+
) -> None:
|
|
29
|
+
"""Download all data_types for a single day to dest_dir.
|
|
30
|
+
|
|
31
|
+
Uses tardis-dev's download_datasets which handles auth, retries, and file naming.
|
|
32
|
+
"""
|
|
33
|
+
from tardis_dev import download_datasets
|
|
34
|
+
|
|
35
|
+
from_date = day.strftime("%Y-%m-%d")
|
|
36
|
+
to_date = (day + timedelta(days=1)).strftime("%Y-%m-%d")
|
|
37
|
+
|
|
38
|
+
download_datasets(
|
|
39
|
+
exchange=exchange,
|
|
40
|
+
data_types=data_types,
|
|
41
|
+
from_date=from_date,
|
|
42
|
+
to_date=to_date,
|
|
43
|
+
symbols=symbols,
|
|
44
|
+
api_key=self._api_key,
|
|
45
|
+
download_dir=str(dest_dir),
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
def get_exchange_details(self, exchange: str) -> dict:
|
|
49
|
+
"""Return exchange metadata from the Tardis API (available symbols, date ranges)."""
|
|
50
|
+
from tardis_dev import get_exchange_details
|
|
51
|
+
return get_exchange_details(exchange)
|