onesecondtrader 0.52.0__tar.gz → 0.54.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. {onesecondtrader-0.52.0 → onesecondtrader-0.54.0}/PKG-INFO +1 -1
  2. {onesecondtrader-0.52.0 → onesecondtrader-0.54.0}/pyproject.toml +1 -1
  3. onesecondtrader-0.54.0/src/onesecondtrader/datafeeds/__init__.py +11 -0
  4. onesecondtrader-0.54.0/src/onesecondtrader/datafeeds/base.py +90 -0
  5. onesecondtrader-0.54.0/src/onesecondtrader/datafeeds/simulated.py +209 -0
  6. onesecondtrader-0.54.0/src/onesecondtrader/secmaster/__init__.py +15 -0
  7. {onesecondtrader-0.52.0 → onesecondtrader-0.54.0}/src/onesecondtrader/secmaster/schema_versions/secmaster_schema_v1.sql +63 -32
  8. onesecondtrader-0.54.0/src/onesecondtrader/secmaster/utils.py +611 -0
  9. onesecondtrader-0.52.0/src/onesecondtrader/secmaster/__init__.py +0 -3
  10. onesecondtrader-0.52.0/src/onesecondtrader/secmaster/utils.py +0 -64
  11. {onesecondtrader-0.52.0 → onesecondtrader-0.54.0}/LICENSE +0 -0
  12. {onesecondtrader-0.52.0 → onesecondtrader-0.54.0}/README.md +0 -0
  13. {onesecondtrader-0.52.0 → onesecondtrader-0.54.0}/src/onesecondtrader/__init__.py +0 -0
  14. {onesecondtrader-0.52.0 → onesecondtrader-0.54.0}/src/onesecondtrader/brokers/__init__.py +0 -0
  15. {onesecondtrader-0.52.0 → onesecondtrader-0.54.0}/src/onesecondtrader/brokers/base.py +0 -0
  16. {onesecondtrader-0.52.0 → onesecondtrader-0.54.0}/src/onesecondtrader/brokers/simulated.py +0 -0
  17. {onesecondtrader-0.52.0 → onesecondtrader-0.54.0}/src/onesecondtrader/events/__init__.py +0 -0
  18. {onesecondtrader-0.52.0 → onesecondtrader-0.54.0}/src/onesecondtrader/events/base.py +0 -0
  19. {onesecondtrader-0.52.0 → onesecondtrader-0.54.0}/src/onesecondtrader/events/market/__init__.py +0 -0
  20. {onesecondtrader-0.52.0 → onesecondtrader-0.54.0}/src/onesecondtrader/events/market/bar_processed.py +0 -0
  21. {onesecondtrader-0.52.0 → onesecondtrader-0.54.0}/src/onesecondtrader/events/market/bar_received.py +0 -0
  22. {onesecondtrader-0.52.0 → onesecondtrader-0.54.0}/src/onesecondtrader/events/orders/__init__.py +0 -0
  23. {onesecondtrader-0.52.0 → onesecondtrader-0.54.0}/src/onesecondtrader/events/orders/base.py +0 -0
  24. {onesecondtrader-0.52.0 → onesecondtrader-0.54.0}/src/onesecondtrader/events/orders/expirations.py +0 -0
  25. {onesecondtrader-0.52.0 → onesecondtrader-0.54.0}/src/onesecondtrader/events/orders/fills.py +0 -0
  26. {onesecondtrader-0.52.0 → onesecondtrader-0.54.0}/src/onesecondtrader/events/requests/__init__.py +0 -0
  27. {onesecondtrader-0.52.0 → onesecondtrader-0.54.0}/src/onesecondtrader/events/requests/base.py +0 -0
  28. {onesecondtrader-0.52.0 → onesecondtrader-0.54.0}/src/onesecondtrader/events/requests/order_cancellation.py +0 -0
  29. {onesecondtrader-0.52.0 → onesecondtrader-0.54.0}/src/onesecondtrader/events/requests/order_modification.py +0 -0
  30. {onesecondtrader-0.52.0 → onesecondtrader-0.54.0}/src/onesecondtrader/events/requests/order_submission.py +0 -0
  31. {onesecondtrader-0.52.0 → onesecondtrader-0.54.0}/src/onesecondtrader/events/responses/__init__.py +0 -0
  32. {onesecondtrader-0.52.0 → onesecondtrader-0.54.0}/src/onesecondtrader/events/responses/base.py +0 -0
  33. {onesecondtrader-0.52.0 → onesecondtrader-0.54.0}/src/onesecondtrader/events/responses/cancellations.py +0 -0
  34. {onesecondtrader-0.52.0 → onesecondtrader-0.54.0}/src/onesecondtrader/events/responses/modifications.py +0 -0
  35. {onesecondtrader-0.52.0 → onesecondtrader-0.54.0}/src/onesecondtrader/events/responses/orders.py +0 -0
  36. {onesecondtrader-0.52.0 → onesecondtrader-0.54.0}/src/onesecondtrader/indicators/__init__.py +0 -0
  37. {onesecondtrader-0.52.0 → onesecondtrader-0.54.0}/src/onesecondtrader/indicators/base.py +0 -0
  38. {onesecondtrader-0.52.0 → onesecondtrader-0.54.0}/src/onesecondtrader/indicators/market_fields.py +0 -0
  39. {onesecondtrader-0.52.0 → onesecondtrader-0.54.0}/src/onesecondtrader/indicators/moving_averages.py +0 -0
  40. {onesecondtrader-0.52.0 → onesecondtrader-0.54.0}/src/onesecondtrader/messaging/__init__.py +0 -0
  41. {onesecondtrader-0.52.0 → onesecondtrader-0.54.0}/src/onesecondtrader/messaging/eventbus.py +0 -0
  42. {onesecondtrader-0.52.0 → onesecondtrader-0.54.0}/src/onesecondtrader/messaging/subscriber.py +0 -0
  43. {onesecondtrader-0.52.0 → onesecondtrader-0.54.0}/src/onesecondtrader/models/__init__.py +0 -0
  44. {onesecondtrader-0.52.0 → onesecondtrader-0.54.0}/src/onesecondtrader/models/bar_fields.py +0 -0
  45. {onesecondtrader-0.52.0 → onesecondtrader-0.54.0}/src/onesecondtrader/models/bar_period.py +0 -0
  46. {onesecondtrader-0.52.0 → onesecondtrader-0.54.0}/src/onesecondtrader/models/order_types.py +0 -0
  47. {onesecondtrader-0.52.0 → onesecondtrader-0.54.0}/src/onesecondtrader/models/rejection_reasons.py +0 -0
  48. {onesecondtrader-0.52.0 → onesecondtrader-0.54.0}/src/onesecondtrader/models/trade_sides.py +0 -0
  49. {onesecondtrader-0.52.0 → onesecondtrader-0.54.0}/src/onesecondtrader/secmaster/schema_versions/__init__.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: onesecondtrader
3
- Version: 0.52.0
3
+ Version: 0.54.0
4
4
  Summary: The Trading Infrastructure Toolkit for Python. Research, simulate, and deploy algorithmic trading strategies — all in one place.
5
5
  License-File: LICENSE
6
6
  Author: Nils P. Kujath
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "onesecondtrader"
3
- version = "0.52.0"
3
+ version = "0.54.0"
4
4
  description = "The Trading Infrastructure Toolkit for Python. Research, simulate, and deploy algorithmic trading strategies — all in one place."
5
5
  authors = [
6
6
  {name = "Nils P. Kujath",email = "63961429+NilsKujath@users.noreply.github.com"}
@@ -0,0 +1,11 @@
1
+ """
2
+ Provides data feed components for ingesting market data into the system.
3
+ """
4
+
5
+ from .base import DatafeedBase
6
+ from .simulated import SimulatedDatafeed
7
+
8
+ __all__ = [
9
+ "DatafeedBase",
10
+ "SimulatedDatafeed",
11
+ ]
@@ -0,0 +1,90 @@
1
+ from __future__ import annotations
2
+
3
+ import abc
4
+
5
+ from onesecondtrader import events, messaging, models
6
+
7
+
8
+ class DatafeedBase(abc.ABC):
9
+ """
10
+ Abstract base class for market data feed implementations.
11
+
12
+ A data feed is responsible for connecting to an external data source, managing symbol and bar-period subscriptions, and publishing market data events onto the system event bus.
13
+
14
+ Concrete subclasses implement the mechanics of connectivity, subscription handling, and lifecycle management for a specific data source.
15
+ """
16
+
17
+ def __init__(self, event_bus: messaging.EventBus) -> None:
18
+ """
19
+ Initialize the data feed with an event bus.
20
+
21
+ parameters:
22
+ event_bus:
23
+ Event bus used to publish market data events produced by this data feed.
24
+ """
25
+ self._event_bus = event_bus
26
+
27
+ def _publish(self, event: events.EventBase) -> None:
28
+ """
29
+ Publish a market data event to the event bus.
30
+
31
+ This method is intended for use by subclasses to forward incoming data from the external source into the internal event-driven system.
32
+
33
+ parameters:
34
+ event:
35
+ Event instance to be published.
36
+ """
37
+ self._event_bus.publish(event)
38
+
39
+ @abc.abstractmethod
40
+ def connect(self) -> None:
41
+ """
42
+ Establish a connection to the underlying data source.
43
+
44
+ Implementations should perform any required setup, authentication, or resource allocation needed before subscriptions can be registered.
45
+ """
46
+ pass
47
+
48
+ @abc.abstractmethod
49
+ def disconnect(self) -> None:
50
+ """
51
+ Terminate the connection to the underlying data source.
52
+
53
+ Implementations should release resources and ensure that no further events are published after disconnection.
54
+ """
55
+ pass
56
+
57
+ @abc.abstractmethod
58
+ def subscribe(self, symbols: list[str], bar_period: models.BarPeriod) -> None:
59
+ """
60
+ Subscribe to market data for one or more symbols at a given bar period.
61
+
62
+ parameters:
63
+ symbols:
64
+ Instrument symbols to subscribe to, interpreted according to the conventions of the underlying data source.
65
+ bar_period:
66
+ Bar aggregation period specifying the granularity of market data.
67
+ """
68
+ pass
69
+
70
+ @abc.abstractmethod
71
+ def unsubscribe(self, symbols: list[str], bar_period: models.BarPeriod) -> None:
72
+ """
73
+ Cancel existing subscriptions for one or more symbols at a given bar period.
74
+
75
+ parameters:
76
+ symbols:
77
+ Instrument symbols for which subscriptions should be removed.
78
+ bar_period:
79
+ Bar aggregation period associated with the subscriptions.
80
+ """
81
+ pass
82
+
83
+ def wait_until_complete(self) -> None:
84
+ """
85
+ Block until the data feed has completed all pending work.
86
+
87
+ This method may be overridden by subclasses that perform asynchronous ingestion or background processing.
88
+ The default implementation does nothing.
89
+ """
90
+ pass
@@ -0,0 +1,209 @@
1
+ from __future__ import annotations
2
+
3
+ import itertools
4
+ import os
5
+ import sqlite3
6
+ import threading
7
+
8
+ from onesecondtrader import events, messaging, models
9
+ from onesecondtrader.datafeeds.base import DatafeedBase
10
+
11
+ _RTYPE_MAP = {
12
+ models.BarPeriod.SECOND: 32,
13
+ models.BarPeriod.MINUTE: 33,
14
+ models.BarPeriod.HOUR: 34,
15
+ models.BarPeriod.DAY: 35,
16
+ }
17
+
18
+ _RTYPE_TO_BAR_PERIOD = {v: k for k, v in _RTYPE_MAP.items()}
19
+
20
+
21
+ class SimulatedDatafeed(DatafeedBase):
22
+ """
23
+ Simulated market data feed backed by a secmaster SQLite database.
24
+
25
+ This datafeed replays historical OHLCV bars from a secmaster database, resolving symbols
26
+ via time-bounded symbology mappings. Bars are delivered in timestamp order, with all bars
27
+ sharing the same timestamp published before calling `wait_until_system_idle`.
28
+
29
+ Subclasses must set `publisher_name`, `dataset`, and `symbol_type` as class attributes to
30
+ scope the feed to a specific data source. The database must contain publishers with numeric
31
+ `source_instrument_id` values; symbol-only publishers (e.g., yfinance) are not supported.
32
+ """
33
+
34
+ db_path: str = ""
35
+ publisher_name: str = ""
36
+ dataset: str = ""
37
+ symbol_type: str = ""
38
+ price_scale: float = 1e9
39
+ start_ts: int | None = None
40
+ end_ts: int | None = None
41
+
42
+ def __init__(self, event_bus: messaging.EventBus) -> None:
43
+ """
44
+ Parameters:
45
+ event_bus:
46
+ Event bus used to publish bar events and synchronize with subscribers.
47
+ """
48
+ super().__init__(event_bus)
49
+ self._db_path = self.db_path or os.environ.get(
50
+ "SECMASTER_DB_PATH", "secmaster.db"
51
+ )
52
+ if not self.publisher_name:
53
+ raise ValueError("publisher_name is required")
54
+ if not self.dataset:
55
+ raise ValueError("dataset is required")
56
+ if not self.symbol_type:
57
+ raise ValueError("symbol_type is required")
58
+ self._subscriptions: set[tuple[str, models.BarPeriod]] = set()
59
+ self._subscriptions_lock = threading.Lock()
60
+ self._connection: sqlite3.Connection | None = None
61
+ self._thread: threading.Thread | None = None
62
+ self._stop_event = threading.Event()
63
+ self._publisher_id: int | None = None
64
+
65
+ def connect(self) -> None:
66
+ """
67
+ Open a connection to the secmaster database and resolve the publisher.
68
+
69
+ If already connected, this method returns immediately.
70
+ """
71
+ if self._connection:
72
+ return
73
+ self._connection = sqlite3.connect(self._db_path, check_same_thread=False)
74
+ self._connection.execute("PRAGMA foreign_keys = ON")
75
+ self._connection.execute("PRAGMA journal_mode = WAL")
76
+ row = self._connection.execute(
77
+ "SELECT publisher_id FROM publishers WHERE name = ? AND dataset = ?",
78
+ (self.publisher_name, self.dataset),
79
+ ).fetchone()
80
+ if row is None:
81
+ raise ValueError(
82
+ f"Publisher not found: {self.publisher_name}/{self.dataset}"
83
+ )
84
+ self._publisher_id = row[0]
85
+
86
+ def disconnect(self) -> None:
87
+ """
88
+ Close the database connection and stop any active streaming.
89
+
90
+ If not connected, this method returns immediately.
91
+ """
92
+ if not self._connection:
93
+ return
94
+ self._stop_event.set()
95
+ if self._thread and self._thread.is_alive():
96
+ self._thread.join()
97
+ self._connection.close()
98
+ self._connection = None
99
+ self._publisher_id = None
100
+
101
+ def subscribe(self, symbols: list[str], bar_period: models.BarPeriod) -> None:
102
+ """
103
+ Register symbols for bar delivery at the specified period.
104
+
105
+ Parameters:
106
+ symbols:
107
+ List of ticker symbols to subscribe.
108
+ bar_period:
109
+ Bar aggregation period for the subscription.
110
+ """
111
+ with self._subscriptions_lock:
112
+ self._subscriptions.update((s, bar_period) for s in symbols)
113
+
114
+ def unsubscribe(self, symbols: list[str], bar_period: models.BarPeriod) -> None:
115
+ """
116
+ Remove symbols from bar delivery at the specified period.
117
+
118
+ Parameters:
119
+ symbols:
120
+ List of ticker symbols to unsubscribe.
121
+ bar_period:
122
+ Bar aggregation period for the subscription.
123
+ """
124
+ with self._subscriptions_lock:
125
+ self._subscriptions.difference_update((s, bar_period) for s in symbols)
126
+
127
+ def wait_until_complete(self) -> None:
128
+ """
129
+ Stream all subscribed bars and block until delivery is complete.
130
+
131
+ Bars are published in timestamp order. After each timestamp batch, the method
132
+ waits for all event bus subscribers to become idle before proceeding.
133
+ """
134
+ with self._subscriptions_lock:
135
+ has_subscriptions = bool(self._subscriptions)
136
+ if not has_subscriptions:
137
+ return
138
+ if self._thread is None or not self._thread.is_alive():
139
+ self._stop_event.clear()
140
+ self._thread = threading.Thread(
141
+ target=self._stream,
142
+ name=self.__class__.__name__,
143
+ daemon=False,
144
+ )
145
+ self._thread.start()
146
+ self._thread.join()
147
+
148
+ def _stream(self) -> None:
149
+ if not self._connection or self._publisher_id is None:
150
+ return
151
+
152
+ with self._subscriptions_lock:
153
+ subscriptions = list(self._subscriptions)
154
+ if not subscriptions:
155
+ return
156
+
157
+ symbols = list({symbol for symbol, _ in subscriptions})
158
+ rtypes = list({_RTYPE_MAP[bp] for _, bp in subscriptions})
159
+ subscription_set = {(symbol, _RTYPE_MAP[bp]) for symbol, bp in subscriptions}
160
+
161
+ params: list = [self._publisher_id, self.symbol_type]
162
+ params.extend(symbols)
163
+ params.extend(rtypes)
164
+ if self.start_ts is not None:
165
+ params.append(self.start_ts)
166
+ if self.end_ts is not None:
167
+ params.append(self.end_ts)
168
+
169
+ query = f"""
170
+ SELECT s.symbol, o.rtype, o.ts_event, o.open, o.high, o.low, o.close, o.volume
171
+ FROM ohlcv o
172
+ JOIN instruments i ON i.instrument_id = o.instrument_id
173
+ JOIN symbology s
174
+ ON s.publisher_ref = i.publisher_ref
175
+ AND s.source_instrument_id = i.source_instrument_id
176
+ AND date(o.ts_event / 1000000000, 'unixepoch') >= s.start_date
177
+ AND date(o.ts_event / 1000000000, 'unixepoch') <= s.end_date
178
+ WHERE i.publisher_ref = ?
179
+ AND s.symbol_type = ?
180
+ AND s.symbol IN ({",".join("?" * len(symbols))})
181
+ AND o.rtype IN ({",".join("?" * len(rtypes))})
182
+ {"AND o.ts_event >= ?" if self.start_ts is not None else ""}
183
+ {"AND o.ts_event <= ?" if self.end_ts is not None else ""}
184
+ ORDER BY o.ts_event, s.symbol
185
+ """
186
+
187
+ rows = self._connection.execute(query, params)
188
+
189
+ def to_bar(row):
190
+ symbol, rtype, ts_event, open_, high, low, close, volume = row
191
+ if (symbol, rtype) not in subscription_set:
192
+ return None
193
+ return events.market.BarReceived(
194
+ ts_event_ns=ts_event,
195
+ symbol=symbol,
196
+ bar_period=_RTYPE_TO_BAR_PERIOD[rtype],
197
+ open=open_ / self.price_scale,
198
+ high=high / self.price_scale,
199
+ low=low / self.price_scale,
200
+ close=close / self.price_scale,
201
+ volume=volume,
202
+ )
203
+
204
+ for _, group in itertools.groupby(rows, key=lambda r: r[2]):
205
+ if self._stop_event.is_set():
206
+ return
207
+ for bar in filter(None, map(to_bar, group)):
208
+ self._publish(bar)
209
+ self._event_bus.wait_until_system_idle()
@@ -0,0 +1,15 @@
1
+ """
2
+ Provides a schema for creating and utilities to populate the security master database.
3
+ """
4
+
5
+ from .utils import (
6
+ create_secmaster_db,
7
+ ingest_databento_zip,
8
+ ingest_databento_dbn,
9
+ )
10
+
11
+ __all__ = [
12
+ "create_secmaster_db",
13
+ "ingest_databento_zip",
14
+ "ingest_databento_dbn",
15
+ ]
@@ -4,10 +4,16 @@
4
4
  -- Instrument identity is modeled per publisher namespace and supports either numeric upstream identifiers or symbols.
5
5
  -- Contract specifications and other static reference metadata are intentionally out of scope for this schema and should be stored separately if ingested.
6
6
  --
7
+ -- The schema is explicitly ingestion-safe in the sense that:
8
+ --
9
+ -- 1) publishers are keyed by (vendor, dataset) rather than vendor alone, allowing multiple feeds per vendor;
10
+ -- 2) symbology admits multiple mappings sharing the same start date by including the resolved instrument identifier
11
+ -- in the primary key, preventing accidental overwrites during bulk ingestion.
12
+ --
7
13
  -- | Table | Description |
8
14
  -- |---------------|-------------|
9
- -- | `publishers` | Registry of data sources and their identifier namespaces. |
10
- -- | `instruments` | Registry of instruments observed from market data ingestion within a publisher namespace. |
15
+ -- | `publishers` | Registry of vendor+dataset namespaces used for market data and instrument ingestion. |
16
+ -- | `instruments` | Registry of instruments observed from ingestion within a publisher namespace. |
11
17
  -- | `ohlcv` | Aggregated OHLCV bar data keyed by instrument, bar duration (`rtype`), and event timestamp (`ts_event`). |
12
18
  -- | `symbology` | Time-bounded mappings from publisher-native symbols to publisher-native instrument identifiers. |
13
19
 
@@ -15,15 +21,24 @@
15
21
 
16
22
  -- Registry of all data sources used for market data and instrument ingestion.
17
23
  --
18
- -- Each row represents a distinct data source.
19
- -- A publisher establishes the provenance of instrument definitions and price data and provides the context in which raw symbols and native instrument identifiers are interpreted.
24
+ -- Each row represents a distinct data product (feed) within a vendor namespace.
25
+ -- A publisher record is uniquely identified by the pair (`name`, `dataset`), not by `name` alone.
26
+ -- This allows a single vendor (e.g. Databento) to appear multiple times, once per concrete dataset/feed
27
+ -- (e.g. `GLBX.MDP3`, `XNAS.ITCH`).
28
+ --
29
+ -- A publisher establishes the provenance of instrument definitions and price data and provides the context
30
+ -- in which raw symbols and native instrument identifiers are interpreted.
20
31
  --
21
- -- | Field | Type | Constraints | Description |
22
- -- |-----------------|-----------|----------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
23
- -- | `publisher_id` | `INTEGER` | `PRIMARY KEY` | Internal surrogate key uniquely identifying a data source within the system. |
24
- -- | `name` | `TEXT` | `NOT NULL`, `UNIQUE` | Human-readable identifier for the data source or vendor (e.g. `databento`, `yfinance`). |
25
- -- | `dataset` | `TEXT` | `NOT NULL` | Identifier of the concrete data product or feed through which data is sourced; uses Databento dataset names (e.g. `GLBX.MDP3`) for Databento ingestion and internal identifiers for other sources (e.g. `YFINANCE`). |
26
- -- | `venue` | `TEXT` | | Optional ISO 10383 Market Identifier Code (MIC) describing the primary trading venue; may be NULL for aggregated or multi-venue sources. |
32
+ -- | Field | Type | Constraints | Description |
33
+ -- |-----------------|-----------|-------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
34
+ -- | `publisher_id` | `INTEGER` | `PRIMARY KEY` | Internal surrogate key uniquely identifying a publisher record within the system. |
35
+ -- | `name` | `TEXT` | `NOT NULL` | Human-readable vendor identifier for the data source (e.g. `databento`, `yfinance`). |
36
+ -- | `dataset` | `TEXT` | `NOT NULL` | Identifier of the concrete data product or feed through which data is sourced; uses Databento dataset names (e.g. `GLBX.MDP3`) for Databento ingestion and internal identifiers for other sources (e.g. `YFINANCE`). |
37
+ -- | `venue` | `TEXT` | | Optional ISO 10383 Market Identifier Code (MIC) describing the primary trading venue; may be NULL for aggregated or multi-venue sources. |
38
+ --
39
+ -- **Table constraints**
40
+ --
41
+ -- * `UNIQUE(name, dataset)` ensures that each vendor+feed combination is represented at most once.
27
42
  --
28
43
  -- **Examples**
29
44
  --
@@ -33,6 +48,12 @@
33
48
  -- * `dataset` = `'GLBX.MDP3'`
34
49
  -- * `venue` = `XCME`
35
50
  --
51
+ -- Databento NASDAQ TotalView feed:
52
+ --
53
+ -- * `name` = `'databento'`
54
+ -- * `dataset` = `'XNAS.ITCH'`
55
+ -- * `venue` = `XNAS`
56
+ --
36
57
  -- Yahoo Finance equity data:
37
58
  --
38
59
  -- * `name` = `'yfinance'`
@@ -41,14 +62,16 @@
41
62
  --
42
63
  CREATE TABLE publishers (
43
64
  publisher_id INTEGER PRIMARY KEY,
44
- name TEXT NOT NULL UNIQUE,
65
+ name TEXT NOT NULL,
45
66
  dataset TEXT NOT NULL,
46
- venue TEXT
67
+ venue TEXT,
68
+ UNIQUE (name, dataset)
47
69
  );
48
70
 
49
71
 
50
72
 
51
73
 
74
+
52
75
  -- Registry of instruments observed through market data ingestion.
53
76
  --
54
77
  -- Each row represents an instrument identity within a publisher namespace.
@@ -59,13 +82,13 @@ CREATE TABLE publishers (
59
82
  -- The table does not store contract specifications or other reference metadata.
60
83
  -- Such metadata must be stored separately when available.
61
84
  --
62
- -- | Field | Type | Constraints | Description |
63
- -- |------------------------|-----------|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------|
64
- -- | `instrument_id` | `INTEGER` | `PRIMARY KEY` | Internal surrogate key identifying an instrument record within the system. |
65
- -- | `publisher_ref` | `INTEGER` | `NOT NULL`, `FK` | Foreign key reference to `publishers.publisher_id`, defining the publisher namespace in which this instrument identity is valid. |
85
+ -- | Field | Type | Constraints | Description |
86
+ -- |------------------------|-----------|------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------|
87
+ -- | `instrument_id` | `INTEGER` | `PRIMARY KEY` | Internal surrogate key identifying an instrument record within the system. |
88
+ -- | `publisher_ref` | `INTEGER` | `NOT NULL`, `FK` | Foreign key reference to `publishers.publisher_id`, defining the publisher namespace in which this instrument identity is valid. |
66
89
  -- | `source_instrument_id` | `INTEGER` | | Publisher-native numeric instrument identifier as provided by the upstream data source (e.g. Databento instrument_id); may be `NULL` for symbol-only sources. |
67
- -- | `symbol` | `TEXT` | | Publisher-native symbol string identifying the instrument (e.g. raw symbol, ticker); may be NULL when numeric identifiers are used. |
68
- -- | `symbol_type` | `TEXT` | | Identifier describing the symbol scheme or resolution type used by the publisher (e.g. `raw_symbol`, `continuous`, `ticker`). |
90
+ -- | `symbol` | `TEXT` | | Publisher-native symbol string identifying the instrument (e.g. raw symbol, ticker); may be NULL when numeric identifiers are used. |
91
+ -- | `symbol_type` | `TEXT` | | Identifier describing the symbol scheme or resolution type used by the publisher (e.g. `raw_symbol`, `continuous`, `ticker`). |
69
92
  --
70
93
  -- Each instrument must be identifiable by at least one of `source_instrument_id` or `symbol`.
71
94
  -- Uniqueness constraints ensure that instrument identities do not collide within a publisher namespace.
@@ -78,7 +101,7 @@ CREATE TABLE instruments (
78
101
 
79
102
  source_instrument_id INTEGER,
80
103
  symbol TEXT,
81
- symbol_type TEXT,
104
+ symbol_type TEXT,
82
105
 
83
106
  FOREIGN KEY (publisher_ref) REFERENCES publishers(publisher_id),
84
107
 
@@ -87,6 +110,8 @@ CREATE TABLE instruments (
87
110
  OR symbol IS NOT NULL
88
111
  ),
89
112
 
113
+ CHECK (symbol IS NULL OR symbol_type IS NOT NULL),
114
+
90
115
  UNIQUE (publisher_ref, source_instrument_id),
91
116
  UNIQUE (publisher_ref, symbol, symbol_type)
92
117
  );
@@ -98,16 +123,16 @@ CREATE TABLE instruments (
98
123
 
99
124
  -- Stores aggregated OHLCV bars for instruments at multiple time resolutions.
100
125
  --
101
- -- | Field | Type | Constraints | Description |
102
- -- |-----------------|-----------|---------------------------------------------|-------------------------------------------------------------------------------------------------------------------------|
103
- -- | `instrument_id` | `INTEGER` | `NOT NULL`, `FK` | Foreign key reference to `instruments.instrument_id`, identifying the instrument to which this bar belongs. |
126
+ -- | Field | Type | Constraints | Description |
127
+ -- |-----------------|-----------|---------------------------------------------|--------------------------------------------------------------------------------------------------------------------------|
128
+ -- | `instrument_id` | `INTEGER` | `NOT NULL`, `FK` | Foreign key reference to `instruments.instrument_id`, identifying the instrument to which this bar belongs. |
104
129
  -- | `rtype` | `INTEGER` | `NOT NULL`, `CHECK IN (32, 33, 34, 35, 36)` | Record type code encoding the bar duration using Databento OHLCV conventions (e.g. `32`=1s, `33`=1m, `34`=1h, `35`=1d). |
105
- -- | `ts_event` | `INTEGER` | `NOT NULL` | Event timestamp of the bar as provided by the upstream source, stored as nanoseconds since the UTC Unix epoch. |
106
- -- | `open` | `INTEGER` | `NOT NULL` | Opening price of the bar interval, stored as a fixed-point integer using the upstream price scaling convention. |
107
- -- | `high` | `INTEGER` | `NOT NULL` | Highest traded price during the bar interval, stored as a fixed-point integer. |
108
- -- | `low` | `INTEGER` | `NOT NULL`, `CHECK(low <= high)` | Lowest traded price during the bar interval, stored as a fixed-point integer. |
109
- -- | `close` | `INTEGER` | `NOT NULL` | Closing price of the bar interval, stored as a fixed-point integer. |
110
- -- | `volume` | `INTEGER` | `NOT NULL`, `CHECK(volume >= 0)` | Total traded volume during the bar interval. |
130
+ -- | `ts_event` | `INTEGER` | `NOT NULL` | Event timestamp of the bar as provided by the upstream source, stored as nanoseconds since the UTC Unix epoch. |
131
+ -- | `open` | `INTEGER` | `NOT NULL` | Opening price of the bar interval, stored as a fixed-point integer using the upstream price scaling convention. |
132
+ -- | `high` | `INTEGER` | `NOT NULL` | Highest traded price during the bar interval, stored as a fixed-point integer. |
133
+ -- | `low` | `INTEGER` | `NOT NULL`, `CHECK(low <= high)` | Lowest traded price during the bar interval, stored as a fixed-point integer. |
134
+ -- | `close` | `INTEGER` | `NOT NULL` | Closing price of the bar interval, stored as a fixed-point integer. |
135
+ -- | `volume` | `INTEGER` | `NOT NULL`, `CHECK(volume >= 0)` | Total traded volume during the bar interval. |
111
136
  --
112
137
  -- The composite primary key enforces uniqueness per instrument, bar duration, and event timestamp.
113
138
  -- Integrity constraints ensure basic OHLC consistency and prevent invalid price relationships from being stored.
@@ -135,7 +160,12 @@ CREATE TABLE ohlcv (
135
160
 
136
161
  -- Stores time-bounded mappings from publisher-native symbols to publisher-native instrument identifiers.
137
162
  --
138
- -- The table captures symbol resolution rules as provided by upstream data sources and must be interpreted within the namespace of a specific publisher.
163
+ -- The table captures symbol resolution rules as provided by upstream data sources and must be interpreted within the
164
+ -- namespace of a specific publisher.
165
+ --
166
+ -- The schema permits multiple mappings to share the same `start_date` for a given (`publisher_ref`, `symbol`, `symbol_type`)
167
+ -- by including `source_instrument_id` in the primary key. This prevents accidental overwrite when upstream symbology exports
168
+ -- contain same-day corrections, backfills, or parallel resolution segments.
139
169
  --
140
170
  -- | Field | Type | Constraints | Description |
141
171
  -- |------------------------|-----------|------------------|----------------------------------------------------------------------------------------------------------------------------|
@@ -146,7 +176,7 @@ CREATE TABLE ohlcv (
146
176
  -- | `start_date` | `TEXT` | `NOT NULL` | First calendar date (inclusive) on which this symbol-to-instrument mapping is valid, stored in YYYY-MM-DD format. |
147
177
  -- | `end_date` | `TEXT` | `NOT NULL` | Last calendar date (inclusive) on which this symbol-to-instrument mapping is valid, stored in YYYY-MM-DD format. |
148
178
  --
149
- -- The primary key enforces uniqueness of symbol mappings per publisher, symbol type, and start date.
179
+ -- The primary key enforces uniqueness of mappings at the granularity of a resolved instrument.
150
180
  -- Date bounds are interpreted as closed intervals.
151
181
  --
152
182
  CREATE TABLE symbology (
@@ -157,8 +187,9 @@ CREATE TABLE symbology (
157
187
  start_date TEXT NOT NULL,
158
188
  end_date TEXT NOT NULL,
159
189
  FOREIGN KEY (publisher_ref) REFERENCES publishers(publisher_id),
160
- PRIMARY KEY (publisher_ref, symbol, symbol_type, start_date),
161
- UNIQUE (publisher_ref, symbol_type, source_instrument_id, start_date),
190
+ FOREIGN KEY (publisher_ref, source_instrument_id)
191
+ REFERENCES instruments(publisher_ref, source_instrument_id),
192
+ PRIMARY KEY (publisher_ref, symbol, symbol_type, start_date, source_instrument_id),
162
193
  CHECK (start_date <= end_date)
163
194
  );
164
195