gnomepy 2.2.4__tar.gz → 2.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. {gnomepy-2.2.4 → gnomepy-2.3.0}/PKG-INFO +12 -8
  2. gnomepy-2.3.0/gnomepy/__init__.py +83 -0
  3. {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/cli.py +43 -0
  4. gnomepy-2.3.0/gnomepy/importer/__init__.py +10 -0
  5. gnomepy-2.3.0/gnomepy/importer/chunker.py +28 -0
  6. gnomepy-2.3.0/gnomepy/importer/encoder.py +53 -0
  7. gnomepy-2.3.0/gnomepy/importer/import_job.py +129 -0
  8. gnomepy-2.3.0/gnomepy/importer/mapping.py +44 -0
  9. gnomepy-2.3.0/gnomepy/importer/scaling.py +68 -0
  10. gnomepy-2.3.0/gnomepy/importer/tardis/__init__.py +7 -0
  11. gnomepy-2.3.0/gnomepy/importer/tardis/book.py +61 -0
  12. gnomepy-2.3.0/gnomepy/importer/tardis/client.py +51 -0
  13. gnomepy-2.3.0/gnomepy/importer/tardis/importer.py +167 -0
  14. gnomepy-2.3.0/gnomepy/importer/tardis/mappings.py +165 -0
  15. gnomepy-2.3.0/gnomepy/importer/uploader.py +30 -0
  16. gnomepy-2.3.0/gnomepy/importer/validators.py +42 -0
  17. {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/java/backtest/runner.py +45 -0
  18. {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/java/datastore.py +12 -0
  19. {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/metadata.py +5 -0
  20. gnomepy-2.3.0/gnomepy/registry/__init__.py +31 -0
  21. gnomepy-2.3.0/gnomepy/registry/api.py +189 -0
  22. gnomepy-2.3.0/gnomepy/registry/types.py +146 -0
  23. {gnomepy-2.2.4 → gnomepy-2.3.0}/pyproject.toml +17 -4
  24. gnomepy-2.2.4/gnomepy/__init__.py +0 -131
  25. gnomepy-2.2.4/gnomepy/registry/__init__.py +0 -0
  26. gnomepy-2.2.4/gnomepy/registry/api.py +0 -70
  27. gnomepy-2.2.4/gnomepy/registry/types.py +0 -35
  28. {gnomepy-2.2.4 → gnomepy-2.3.0}/README.md +0 -0
  29. {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/_fs.py +0 -0
  30. {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/auth.py +0 -0
  31. {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/config.py +0 -0
  32. {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/explorer/__init__.py +0 -0
  33. {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/explorer/app.py +0 -0
  34. {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/explorer/data.py +0 -0
  35. {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/explorer/panels/__init__.py +0 -0
  36. {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/explorer/panels/event_log.py +0 -0
  37. {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/explorer/panels/pnl.py +0 -0
  38. {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/explorer/panels/price.py +0 -0
  39. {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/explorer/panels/signals.py +0 -0
  40. {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/explorer/styles.py +0 -0
  41. {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/java/__init__.py +0 -0
  42. {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/java/_classpath.py +0 -0
  43. {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/java/_jvm.py +0 -0
  44. {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/java/backtest/__init__.py +0 -0
  45. {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/java/backtest/config.py +0 -0
  46. {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/java/backtest/orders.py +0 -0
  47. {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/java/backtest/strategy.py +0 -0
  48. {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/java/cache.py +0 -0
  49. {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/java/enums.py +0 -0
  50. {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/java/market_data.py +0 -0
  51. {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/java/oms.py +0 -0
  52. {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/java/recorder.py +0 -0
  53. {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/java/sbe.py +0 -0
  54. {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/java/schemas.py +0 -0
  55. {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/java/statics.py +0 -0
  56. {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/remote.py +0 -0
  57. {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/reporting/__init__.py +0 -0
  58. {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/reporting/metrics.py +0 -0
  59. {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/reporting/plots.py +0 -0
  60. {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/reporting/report.py +0 -0
  61. {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/sweep.py +0 -0
  62. {gnomepy-2.2.4 → gnomepy-2.3.0}/gnomepy/utils.py +0 -0
@@ -1,21 +1,25 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: gnomepy
3
- Version: 2.2.4
3
+ Version: 2.3.0
4
4
  Summary:
5
5
  Author: mprey
6
6
  Author-email: masonprey7@gmail.com
7
7
  Requires-Python: >=3.13,<3.14
8
8
  Classifier: Programming Language :: Python :: 3
9
9
  Classifier: Programming Language :: Python :: 3.13
10
+ Provides-Extra: backtest
11
+ Requires-Dist: anthropic (>=0.40.0,<1.0.0) ; extra == "backtest"
10
12
  Requires-Dist: boto3 (>=1.36.18,<2.0.0)
13
+ Requires-Dist: boto3-stubs (>=1.42.73,<1.43.0) ; extra == "backtest"
11
14
  Requires-Dist: click (>=8.1,<9.0)
12
- Requires-Dist: dash (>=2.18,<3.0)
13
- Requires-Dist: dash-bootstrap-components (>=1.6,<2.0)
14
- Requires-Dist: jpype1 (>=1.5.0,<2.0.0)
15
- Requires-Dist: numpy (>=2.2.4,<3.0.0)
16
- Requires-Dist: pandas (>=2.2.3,<3.0.0)
17
- Requires-Dist: plotly (>=6.6.0,<6.7.0)
18
- Requires-Dist: pyarrow (>=23.0.1,<24.0.0)
15
+ Requires-Dist: dash (>=2.18,<3.0) ; extra == "backtest"
16
+ Requires-Dist: dash-bootstrap-components (>=1.6,<2.0) ; extra == "backtest"
17
+ Requires-Dist: jpype1 (>=1.5.0,<2.0.0) ; extra == "backtest"
18
+ Requires-Dist: numpy (>=2.2.4,<3.0.0) ; extra == "backtest"
19
+ Requires-Dist: pandas (>=2.2.3,<3.0.0) ; extra == "backtest"
20
+ Requires-Dist: pandas-stubs (>=2.3.3,<2.4.0) ; extra == "backtest"
21
+ Requires-Dist: plotly (>=6.6.0,<6.7.0) ; extra == "backtest"
22
+ Requires-Dist: pyarrow (>=23.0.1,<24.0.0) ; extra == "backtest"
19
23
  Requires-Dist: pytz (>=2025.1,<2026.0)
20
24
  Requires-Dist: pyyaml (>=6.0,<7.0)
21
25
  Requires-Dist: requests (>=2.32.3,<3.0.0)
@@ -0,0 +1,83 @@
1
+ """gnomepy — backtesting infrastructure for the gnome trading system.
2
+
3
+ Public API:
4
+
5
+ from gnomepy import (
6
+ Strategy, Backtest, run_backtest,
7
+ BacktestConfig, ListingSimConfig, ExchangeProfileConfig,
8
+ StrategyConfig, RiskConfig,
9
+ StaticFeeConfig, StaticLatencyConfig, GaussianLatencyConfig,
10
+ OptimisticQueueConfig, RiskAverseQueueConfig, ProbabilisticQueueConfig,
11
+ Intent, ExecutionReport, OmsView,
12
+ SchemaType, Side, Action,
13
+ Mbp10Schema, ...
14
+ )
15
+ """
16
+ import importlib
17
+
18
+ _LAZY_IMPORTS: dict[str, tuple[str, str]] = {
19
+ "BacktestConfig": ("gnomepy.java.backtest.config", "BacktestConfig"),
20
+ "ExchangeProfileConfig": ("gnomepy.java.backtest.config", "ExchangeProfileConfig"),
21
+ "GaussianLatencyConfig": ("gnomepy.java.backtest.config", "GaussianLatencyConfig"),
22
+ "ListingSimConfig": ("gnomepy.java.backtest.config", "ListingSimConfig"),
23
+ "OptimisticQueueConfig": ("gnomepy.java.backtest.config", "OptimisticQueueConfig"),
24
+ "ProbabilisticQueueConfig": ("gnomepy.java.backtest.config", "ProbabilisticQueueConfig"),
25
+ "RiskAverseQueueConfig": ("gnomepy.java.backtest.config", "RiskAverseQueueConfig"),
26
+ "RiskConfig": ("gnomepy.java.backtest.config", "RiskConfig"),
27
+ "StaticFeeConfig": ("gnomepy.java.backtest.config", "StaticFeeConfig"),
28
+ "StaticLatencyConfig": ("gnomepy.java.backtest.config", "StaticLatencyConfig"),
29
+ "StrategyConfig": ("gnomepy.java.backtest.config", "StrategyConfig"),
30
+ "ExecutionReport": ("gnomepy.java.backtest.orders", "ExecutionReport"),
31
+ "Backtest": ("gnomepy.java.backtest.runner", "Backtest"),
32
+ "run_backtest": ("gnomepy.java.backtest.runner", "run_backtest"),
33
+ "MarketDataCache": ("gnomepy.java.cache", "MarketDataCache"),
34
+ "Strategy": ("gnomepy.java.backtest.strategy", "Strategy"),
35
+ "DataStore": ("gnomepy.java.datastore", "DataStore"),
36
+ "Action": ("gnomepy.java.enums", "Action"),
37
+ "ExecType": ("gnomepy.java.enums", "ExecType"),
38
+ "OrderStatus": ("gnomepy.java.enums", "OrderStatus"),
39
+ "OrderType": ("gnomepy.java.enums", "OrderType"),
40
+ "SchemaType": ("gnomepy.java.enums", "SchemaType"),
41
+ "Side": ("gnomepy.java.enums", "Side"),
42
+ "TimeInForce": ("gnomepy.java.enums", "TimeInForce"),
43
+ "Intent": ("gnomepy.java.oms", "Intent"),
44
+ "OmsView": ("gnomepy.java.oms", "OmsView"),
45
+ "PositionInfo": ("gnomepy.java.oms", "PositionInfo"),
46
+ "TrackedOrderInfo": ("gnomepy.java.oms", "TrackedOrderInfo"),
47
+ "BacktestResults": ("gnomepy.java.recorder", "BacktestResults"),
48
+ "Bbo1mSchema": ("gnomepy.java.schemas", "Bbo1mSchema"),
49
+ "Bbo1sSchema": ("gnomepy.java.schemas", "Bbo1sSchema"),
50
+ "BboSchema": ("gnomepy.java.schemas", "BboSchema"),
51
+ "MboSchema": ("gnomepy.java.schemas", "MboSchema"),
52
+ "Mbp1Schema": ("gnomepy.java.schemas", "Mbp1Schema"),
53
+ "Mbp10Schema": ("gnomepy.java.schemas", "Mbp10Schema"),
54
+ "Ohlcv1hSchema": ("gnomepy.java.schemas", "Ohlcv1hSchema"),
55
+ "Ohlcv1mSchema": ("gnomepy.java.schemas", "Ohlcv1mSchema"),
56
+ "Ohlcv1sSchema": ("gnomepy.java.schemas", "Ohlcv1sSchema"),
57
+ "OhlcvSchema": ("gnomepy.java.schemas", "OhlcvSchema"),
58
+ "Schema": ("gnomepy.java.schemas", "Schema"),
59
+ "TradesSchema": ("gnomepy.java.schemas", "TradesSchema"),
60
+ "wrap_schema": ("gnomepy.java.schemas", "wrap_schema"),
61
+ "Scales": ("gnomepy.java.statics", "Scales"),
62
+ "BacktestMetadata": ("gnomepy.metadata", "BacktestMetadata"),
63
+ "BacktestReport": ("gnomepy.reporting", "BacktestReport"),
64
+ "Curves": ("gnomepy.reporting.metrics", "Curves"),
65
+ "build_curves": ("gnomepy.reporting.metrics", "build_curves"),
66
+ "compute_sharpe": ("gnomepy.reporting.metrics", "compute_sharpe"),
67
+ "ReportSection": ("gnomepy.reporting.plots", "ReportSection"),
68
+ "generate_backtest_id": ("gnomepy.utils", "generate_backtest_id"),
69
+ "uuid7": ("gnomepy.utils", "uuid7"),
70
+ }
71
+
72
+
73
+ def __getattr__(name: str):
74
+ if name in _LAZY_IMPORTS:
75
+ mod_path, attr = _LAZY_IMPORTS[name]
76
+ mod = importlib.import_module(mod_path)
77
+ val = getattr(mod, attr)
78
+ globals()[name] = val
79
+ return val
80
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
81
+
82
+
83
+ __all__ = list(_LAZY_IMPORTS.keys())
@@ -170,6 +170,49 @@ def _human_size(n: int) -> str:
170
170
  return f"{n:.1f} PB"
171
171
 
172
172
 
173
+ # ---------------------------------------------------------------------------
174
+ # Import commands
175
+ # ---------------------------------------------------------------------------
176
+
177
+ @main.group("import")
178
+ def import_cmd() -> None:
179
+ """Import historical market data from external vendors."""
180
+
181
+
182
+ @import_cmd.command("tardis")
183
+ @click.option("--exchange", required=True, help="Tardis exchange name (e.g., binance-futures, deribit)")
184
+ @click.option("--symbols", required=True, help="Comma-separated symbols (e.g., BTCUSDT,ETHUSDT)")
185
+ @click.option("--start", required=True, type=click.DateTime(formats=["%Y-%m-%d"]), help="Start date inclusive")
186
+ @click.option("--end", required=True, type=click.DateTime(formats=["%Y-%m-%d"]), help="End date inclusive")
187
+ @click.option("--dry-run", is_flag=True, help="Validate without uploading to S3")
188
+ @click.option("--bucket", default=None, help="Override S3 bucket")
189
+ def import_tardis(
190
+ exchange: str,
191
+ symbols: str,
192
+ start,
193
+ end,
194
+ dry_run: bool,
195
+ bucket: str | None,
196
+ ) -> None:
197
+ """Import Tardis incremental L2 + trades data as MBP_10 into gnome market data."""
198
+ from gnomepy.importer.tardis import TardisImporter, TardisImportRequest
199
+
200
+ request = TardisImportRequest(
201
+ exchange=exchange,
202
+ symbols=[s.strip() for s in symbols.split(",")],
203
+ start_date=start.date(),
204
+ end_date=end.date(),
205
+ bucket=bucket,
206
+ dry_run=dry_run,
207
+ )
208
+ results = TardisImporter().run(request)
209
+ for r in results:
210
+ click.echo(f"{r.exchange} / {r.symbol} (security_id={r.security_id}, exchange_id={r.exchange_id})")
211
+ click.echo(f" processed: {r.days_processed} skipped: {r.days_skipped} records: {r.total_records}")
212
+ for err in r.errors:
213
+ click.echo(f" error: {err}", err=True)
214
+
215
+
173
216
  # ---------------------------------------------------------------------------
174
217
  # Backtest commands
175
218
  # ---------------------------------------------------------------------------
@@ -0,0 +1,10 @@
1
+ from gnomepy.importer.import_job import DryRunResult, ImportJob, ImportResult
2
+ from gnomepy.importer.mapping import FieldMapping, ImportConfig
3
+
4
+ __all__ = [
5
+ "FieldMapping",
6
+ "ImportConfig",
7
+ "ImportJob",
8
+ "ImportResult",
9
+ "DryRunResult",
10
+ ]
@@ -0,0 +1,28 @@
1
+ from __future__ import annotations
2
+
3
+ from datetime import datetime, timezone
4
+
5
+ import pandas as pd
6
+
7
+
8
+ def chunk_by_minute(df: pd.DataFrame, ts_ns: pd.Series) -> dict[datetime, pd.DataFrame]:
9
+ """Split df into per-minute buckets using ts_ns (int64 nanoseconds UTC).
10
+
11
+ Returns a dict mapping naive UTC datetime (truncated to minute) → subset of df,
12
+ sorted by timestamp within each bucket.
13
+ """
14
+ _MINUTE_NS = 60_000_000_000
15
+
16
+ minute_ns = (ts_ns // _MINUTE_NS) * _MINUTE_NS
17
+ df = df.copy()
18
+ df["__ts_ns"] = ts_ns
19
+ df["__minute_ns"] = minute_ns
20
+
21
+ result: dict[datetime, pd.DataFrame] = {}
22
+ for bucket_ns, group in df.groupby("__minute_ns", sort=True):
23
+ group = group.sort_values("__ts_ns").drop(columns=["__ts_ns", "__minute_ns"])
24
+ # Naive UTC datetime matching Java LocalDateTime used in MarketDataEntry
25
+ dt = datetime.fromtimestamp(int(bucket_ns) / 1e9, tz=timezone.utc).replace(tzinfo=None)
26
+ result[dt] = group
27
+
28
+ return result
@@ -0,0 +1,53 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any
4
+
5
+ import pandas as pd
6
+
7
+ from gnomepy.importer.mapping import FieldMapping, ImportConfig
8
+ from gnomepy.importer.scaling import parse_timestamp_ns, scale_price, scale_size
9
+
10
+
11
+ def _apply_mapping(value, mapping: FieldMapping) -> Any:
12
+ if pd.isna(value):
13
+ return None
14
+ if mapping.transform == "none":
15
+ return int(value)
16
+ if mapping.transform == "price":
17
+ return scale_price(value)
18
+ if mapping.transform in ("size", "volume"):
19
+ return scale_size(value)
20
+ if mapping.transform == "timestamp":
21
+ return parse_timestamp_ns(value, mapping.timestamp_format, mapping.timestamp_tz)
22
+ if mapping.transform == "enum":
23
+ mapped = mapping.enum_map.get(str(value))
24
+ if mapped is None:
25
+ raise ValueError(f"enum_map has no entry for value {value!r} in field {mapping.target_field!r}")
26
+ return mapped
27
+ raise ValueError(f"Unknown transform {mapping.transform!r} for field {mapping.target_field!r}")
28
+
29
+
30
+ def encode_chunk(chunk: pd.DataFrame, config: ImportConfig) -> bytes:
31
+ """Encode a minute-chunk DataFrame to concatenated SBE bytes.
32
+
33
+ Requires the JVM to be started before calling.
34
+ """
35
+ from gnomepy.java.schemas import get_schema_class
36
+
37
+ schema_cls = get_schema_class(config.schema_type)
38
+ parts: list[bytes] = []
39
+
40
+ for _, row in chunk.iterrows():
41
+ # Build kwargs: config-level IDs, then defaults, then per-row field mappings
42
+ kwargs: dict[str, Any] = {
43
+ "exchange_id": config.exchange_id,
44
+ "security_id": config.security_id,
45
+ }
46
+ kwargs.update(config.defaults)
47
+ for mapping in config.field_mappings:
48
+ kwargs[mapping.target_field] = _apply_mapping(row[mapping.source_column], mapping)
49
+
50
+ schema = schema_cls(**kwargs)
51
+ parts.append(schema.encode())
52
+
53
+ return b"".join(parts)
@@ -0,0 +1,129 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass, field
4
+ from datetime import datetime
5
+ from pathlib import Path
6
+
7
+ import pandas as pd
8
+
9
+ from gnomepy.importer.chunker import chunk_by_minute
10
+ from gnomepy.importer.encoder import encode_chunk
11
+ from gnomepy.importer.mapping import ImportConfig
12
+ from gnomepy.importer.scaling import apply_timestamp_transform
13
+ from gnomepy.importer.uploader import build_s3_key, compress, default_merged_bucket, upload
14
+ from gnomepy.importer.validators import validate
15
+
16
+
17
+ @dataclass
18
+ class ImportResult:
19
+ files_uploaded: int
20
+ total_records: int
21
+ minutes_covered: int
22
+
23
+
24
+ @dataclass
25
+ class DryRunResult:
26
+ is_valid: bool
27
+ errors: list[str]
28
+ minutes_count: int
29
+ total_records: int
30
+ sample_keys: list[str] = field(default_factory=list)
31
+
32
+
33
+ class ImportJob:
34
+ """Orchestrates converting a CSV/Parquet file into the gnome market data format and uploading to S3."""
35
+
36
+ def __init__(self, config: ImportConfig, s3_client=None):
37
+ self.config = config
38
+ self._s3 = s3_client
39
+
40
+ def _s3_client(self):
41
+ if self._s3 is None:
42
+ import boto3
43
+ self._s3 = boto3.client("s3")
44
+ return self._s3
45
+
46
+ def _load(self, source, file_format: str = "auto") -> pd.DataFrame:
47
+ if isinstance(source, pd.DataFrame):
48
+ return source.copy()
49
+ path = str(source)
50
+ fmt = file_format
51
+ if fmt == "auto":
52
+ fmt = "parquet" if path.endswith(".parquet") else "csv"
53
+ if fmt == "parquet":
54
+ return pd.read_parquet(path)
55
+ return pd.read_csv(path)
56
+
57
+ def _timestamp_ns(self, df: pd.DataFrame) -> pd.Series:
58
+ ts_mapping = next(
59
+ m for m in self.config.field_mappings if m.target_field == self.config.timestamp_field
60
+ )
61
+ return apply_timestamp_transform(
62
+ df[ts_mapping.source_column], ts_mapping.timestamp_format, ts_mapping.timestamp_tz
63
+ )
64
+
65
+ def _bucket(self) -> str:
66
+ return self.config.bucket or default_merged_bucket()
67
+
68
+ def dry_run(self, source, file_format: str = "auto") -> DryRunResult:
69
+ """Validate the config and source data without uploading anything."""
70
+ df = self._load(source, file_format)
71
+ errors = validate(self.config, df)
72
+ if errors:
73
+ return DryRunResult(is_valid=False, errors=errors, minutes_count=0, total_records=0)
74
+
75
+ ts_ns = self._timestamp_ns(df)
76
+ chunks = chunk_by_minute(df, ts_ns)
77
+ config = self.config
78
+ sample_keys = [
79
+ build_s3_key(config.security_id, config.exchange_id, config.schema_type, dt)
80
+ for dt in sorted(chunks)[:5]
81
+ ]
82
+ return DryRunResult(
83
+ is_valid=True,
84
+ errors=[],
85
+ minutes_count=len(chunks),
86
+ total_records=len(df),
87
+ sample_keys=sample_keys,
88
+ )
89
+
90
+ def run(self, source, file_format: str = "auto") -> ImportResult:
91
+ """Encode source data and upload to S3.
92
+
93
+ Validates first; raises ValueError if config is invalid.
94
+ Requires the JVM to be started before calling (for SBE encoding).
95
+ """
96
+ from gnomepy.java._jvm import ensure_jvm_started
97
+
98
+ df = self._load(source, file_format)
99
+ errors = validate(self.config, df)
100
+ if errors:
101
+ raise ValueError("Import config validation failed:\n" + "\n".join(f" - {e}" for e in errors))
102
+
103
+ ensure_jvm_started()
104
+ ts_ns = self._timestamp_ns(df)
105
+ chunks = chunk_by_minute(df, ts_ns)
106
+
107
+ bucket = self._bucket()
108
+ s3 = self._s3_client()
109
+ files_uploaded = 0
110
+ total_records = 0
111
+
112
+ for minute_dt, chunk_df in sorted(chunks.items()):
113
+ raw = encode_chunk(chunk_df, self.config)
114
+ compressed = compress(raw)
115
+ key = build_s3_key(
116
+ self.config.security_id,
117
+ self.config.exchange_id,
118
+ self.config.schema_type,
119
+ minute_dt,
120
+ )
121
+ upload(s3, bucket, key, compressed)
122
+ files_uploaded += 1
123
+ total_records += len(chunk_df)
124
+
125
+ return ImportResult(
126
+ files_uploaded=files_uploaded,
127
+ total_records=total_records,
128
+ minutes_covered=len(chunks),
129
+ )
@@ -0,0 +1,44 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass, field
4
+ from typing import Any
5
+
6
+ from gnomepy.java.enums import SchemaType
7
+
8
+
9
+ @dataclass
10
+ class FieldMapping:
11
+ """Maps one vendor column to one SBE schema field.
12
+
13
+ transform types:
14
+ "none" — pass through as int (e.g. a pre-scaled integer field)
15
+ "price" — float → int64 * 1e9
16
+ "size" — float → int64 * 1e6 (covers size and volume fields)
17
+ "timestamp" — various formats → int64 nanoseconds since epoch
18
+ "enum" — string → string via enum_map (e.g. "buy" → "Bid")
19
+ """
20
+
21
+ source_column: str
22
+ target_field: str
23
+ transform: str = "none"
24
+ # Required when transform="timestamp"
25
+ timestamp_format: str | None = None # "epoch_s", "epoch_ms", "epoch_us", "epoch_ns", "iso8601", or strftime
26
+ timestamp_tz: str | None = None # tz name for tz-naive string sources, e.g. "US/Eastern"
27
+ # Required when transform="enum"
28
+ enum_map: dict[str, str] | None = None
29
+
30
+
31
+ @dataclass
32
+ class ImportConfig:
33
+ """Complete configuration for one import job (one security, exchange, schema type)."""
34
+
35
+ schema_type: SchemaType
36
+ security_id: int
37
+ exchange_id: int
38
+ field_mappings: list[FieldMapping]
39
+ # Which target_field holds the event timestamp used for minute-chunking
40
+ timestamp_field: str = "timestamp_event"
41
+ # Override the default merged bucket (gnome-market-data-merged-{STAGE})
42
+ bucket: str | None = None
43
+ # Constant values for SBE fields not present in the source data
44
+ defaults: dict[str, Any] = field(default_factory=dict)
@@ -0,0 +1,68 @@
1
+ from __future__ import annotations
2
+
3
+ import pandas as pd
4
+
5
+ _PRICE_SCALE = 1_000_000_000
6
+ _SIZE_SCALE = 1_000_000
7
+
8
+
9
+ def scale_price(value: float | int) -> int:
10
+ return int(round(float(value) * _PRICE_SCALE))
11
+
12
+
13
+ def scale_size(value: float | int) -> int:
14
+ return int(round(float(value) * _SIZE_SCALE))
15
+
16
+
17
+ def parse_timestamp_ns(value, fmt: str, tz: str | None = None) -> int:
18
+ """Convert a value to nanoseconds since UTC epoch.
19
+
20
+ fmt: "epoch_s", "epoch_ms", "epoch_us", "epoch_ns", "iso8601", or a strftime pattern.
21
+ tz: timezone name for tz-naive string sources (e.g. "US/Eastern"). Ignored for epoch formats.
22
+ """
23
+ if fmt == "epoch_ns":
24
+ return int(value)
25
+ if fmt == "epoch_us":
26
+ return int(float(value) * 1_000)
27
+ if fmt == "epoch_ms":
28
+ return int(float(value) * 1_000_000)
29
+ if fmt == "epoch_s":
30
+ return int(float(value) * 1_000_000_000)
31
+
32
+ # String / datetime-like formats
33
+ ts = pd.to_datetime(value, format=None if fmt == "iso8601" else fmt, utc=False)
34
+ if ts.tzinfo is None and tz is not None:
35
+ ts = ts.tz_localize(tz).tz_convert("UTC")
36
+ elif ts.tzinfo is not None:
37
+ ts = ts.tz_convert("UTC")
38
+ # pd.Timestamp.value is nanoseconds since epoch
39
+ return ts.value
40
+
41
+
42
+ def apply_price_transform(series: pd.Series) -> pd.Series:
43
+ return (series.astype(float) * _PRICE_SCALE).round().astype("int64")
44
+
45
+
46
+ def apply_size_transform(series: pd.Series) -> pd.Series:
47
+ return (series.astype(float) * _SIZE_SCALE).round().astype("int64")
48
+
49
+
50
+ def apply_timestamp_transform(series: pd.Series, fmt: str, tz: str | None) -> pd.Series:
51
+ """Vectorized timestamp conversion → int64 nanoseconds since epoch."""
52
+ if fmt == "epoch_ns":
53
+ return series.astype("int64")
54
+ if fmt == "epoch_us":
55
+ return (series.astype(float) * 1_000).round().astype("int64")
56
+ if fmt == "epoch_ms":
57
+ return (series.astype(float) * 1_000_000).round().astype("int64")
58
+ if fmt == "epoch_s":
59
+ return (series.astype(float) * 1_000_000_000).round().astype("int64")
60
+
61
+ # String-based: use pd.to_datetime then extract ns value
62
+ fmt_arg = None if fmt == "iso8601" else fmt
63
+ parsed = pd.to_datetime(series, format=fmt_arg, utc=False)
64
+ if parsed.dt.tz is None and tz is not None:
65
+ parsed = parsed.dt.tz_localize(tz).dt.tz_convert("UTC")
66
+ elif parsed.dt.tz is not None:
67
+ parsed = parsed.dt.tz_convert("UTC")
68
+ return parsed.astype("int64")
@@ -0,0 +1,7 @@
1
+ from gnomepy.importer.tardis.importer import TardisImportRequest, TardisImportResult, TardisImporter
2
+
3
+ __all__ = [
4
+ "TardisImportRequest",
5
+ "TardisImportResult",
6
+ "TardisImporter",
7
+ ]
@@ -0,0 +1,61 @@
1
+ from __future__ import annotations
2
+
3
+
4
+ class L2Book:
5
+ """Reconstructs a top-N order book from incremental L2 updates.
6
+
7
+ Mirrors the depth computation in the Java MbpBufferBook/Mbp10Book gateways:
8
+ depth = the shallowest (minimum) level index where the top-N changed.
9
+ Returns None when the update falls entirely outside the top-N visible levels.
10
+ """
11
+
12
+ NUM_LEVELS = 10
13
+
14
+ def __init__(self) -> None:
15
+ self._bids: dict[float, float] = {}
16
+ self._asks: dict[float, float] = {}
17
+ self._top_bids: list[tuple[float, float]] = []
18
+ self._top_asks: list[tuple[float, float]] = []
19
+
20
+ def clear(self) -> None:
21
+ self._bids.clear()
22
+ self._asks.clear()
23
+ self._top_bids = []
24
+ self._top_asks = []
25
+
26
+ def update(self, side: str, price: float, amount: float) -> int | None:
27
+ """Apply one L2 update. Returns depth if the top-N changed, else None."""
28
+ book = self._bids if side == "bid" else self._asks
29
+ if amount == 0.0:
30
+ book.pop(price, None)
31
+ else:
32
+ book[price] = amount
33
+
34
+ new_bids = sorted(self._bids.items(), reverse=True)[: self.NUM_LEVELS]
35
+ new_asks = sorted(self._asks.items())[: self.NUM_LEVELS]
36
+
37
+ depth = self._shallowest_change(self._top_bids, new_bids, self._top_asks, new_asks)
38
+
39
+ self._top_bids = new_bids
40
+ self._top_asks = new_asks
41
+ return depth
42
+
43
+ def top_levels(self) -> tuple[list[tuple[float, float]], list[tuple[float, float]]]:
44
+ """Return (top_bids, top_asks) as lists of (price, amount), sorted best-first."""
45
+ return self._top_bids, self._top_asks
46
+
47
+ def _shallowest_change(
48
+ self,
49
+ prev_bids: list[tuple[float, float]],
50
+ new_bids: list[tuple[float, float]],
51
+ prev_asks: list[tuple[float, float]],
52
+ new_asks: list[tuple[float, float]],
53
+ ) -> int | None:
54
+ for i in range(self.NUM_LEVELS):
55
+ prev_bid = prev_bids[i] if i < len(prev_bids) else None
56
+ new_bid = new_bids[i] if i < len(new_bids) else None
57
+ prev_ask = prev_asks[i] if i < len(prev_asks) else None
58
+ new_ask = new_asks[i] if i < len(new_asks) else None
59
+ if prev_bid != new_bid or prev_ask != new_ask:
60
+ return i
61
+ return None
@@ -0,0 +1,51 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ from datetime import date, timedelta
5
+ from pathlib import Path
6
+
7
+
8
+ class TardisClient:
9
+ """Thin wrapper around the tardis-dev Python package for downloading market data CSV files."""
10
+
11
+ def __init__(self, api_key: str | None = None):
12
+ try:
13
+ import tardis_dev # noqa: F401
14
+ except ImportError:
15
+ raise ImportError(
16
+ "tardis-dev is required for Tardis market data imports. "
17
+ "Install it with: poetry install -E tardis"
18
+ )
19
+ self._api_key = api_key or os.environ.get("TARDIS_API_KEY", "")
20
+
21
+ def download(
22
+ self,
23
+ exchange: str,
24
+ data_types: list[str],
25
+ day: date,
26
+ symbols: list[str],
27
+ dest_dir: Path,
28
+ ) -> None:
29
+ """Download all data_types for a single day to dest_dir.
30
+
31
+ Uses tardis-dev's download_datasets which handles auth, retries, and file naming.
32
+ """
33
+ from tardis_dev import download_datasets
34
+
35
+ from_date = day.strftime("%Y-%m-%d")
36
+ to_date = (day + timedelta(days=1)).strftime("%Y-%m-%d")
37
+
38
+ download_datasets(
39
+ exchange=exchange,
40
+ data_types=data_types,
41
+ from_date=from_date,
42
+ to_date=to_date,
43
+ symbols=symbols,
44
+ api_key=self._api_key,
45
+ download_dir=str(dest_dir),
46
+ )
47
+
48
+ def get_exchange_details(self, exchange: str) -> dict:
49
+ """Return exchange metadata from the Tardis API (available symbols, date ranges)."""
50
+ from tardis_dev import get_exchange_details
51
+ return get_exchange_details(exchange)