liteseries 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
liteseries/__init__.py ADDED
@@ -0,0 +1,5 @@
1
+ from __future__ import annotations
2
+
3
+ from ._handlers import close_ls, launch_ls, ls_cache, threadpool_shutdown_ls
4
+
5
+ __all__ = ["close_ls", "launch_ls", "ls_cache", "threadpool_shutdown_ls"]
@@ -0,0 +1,3 @@
1
+ from ._handlers import close_ls, launch_ls, ls_cache, threadpool_shutdown_ls
2
+
3
+ __all__ = ["close_ls", "launch_ls", "ls_cache", "threadpool_shutdown_ls"]
@@ -0,0 +1,308 @@
1
+ from __future__ import annotations
2
+
3
+ import sqlite3
4
+ import threading as th
5
+ from datetime import datetime, time, timedelta
6
+ from functools import wraps
7
+ from typing import Any, Callable
8
+ from urllib.parse import quote
9
+
10
+ import pyarrow.compute as pc
11
+ from adbc_driver_sqlite import dbapi
12
+ from dateutil import tz
13
+ from pyarrow import Table, concat_tables
14
+
15
+ from . import _util as ut
16
+ from ._sql import insert_cols, last_upd_select, series_range_select, series_tmax_select, update_last_upd
17
+
18
+ LT = list | tuple
19
+ TimeWindow = tuple[time, time]
20
+ SeriesFn = Callable[..., Table]
21
+ CacheDecorator = Callable[[SeriesFn], SeriesFn]
22
+ DEFAULT_ACTIVE_IN = time(hour=16, second=1, tzinfo=tz.gettz("US/Eastern"))
23
+
24
+
25
+ class LocalADBC(th.local):
26
+ uri = None # set from another scope
27
+
28
+ def __init__(self) -> None:
29
+ self.sqlite = dbapi.connect(uri=self.uri, autocommit=False)
30
+ self.cur = self.sqlite.cursor()
31
+ self.cur.execute("PRAGMA busy_timeout = 1000")
32
+ #self.sqlite.commit()
33
+
34
+ def close(self) -> None:
35
+ self.cur.close()
36
+ self.sqlite.close()
37
+
38
+
39
+ local_adbc: LocalADBC
40
+
41
+
42
+ def close_ls() -> None:
43
+ local_adbc.close()
44
+
45
+
46
+ def threadpool_shutdown_ls(thp) -> None:
47
+ live = len(thp._threads)
48
+ if not live:
49
+ return thp.shutdown(wait=True)
50
+
51
+ gate = th.Barrier(live + 1)
52
+ for _ in range(live):
53
+ thp.submit(lambda: (gate.wait(), close_ls()))
54
+ gate.wait()
55
+ return thp.shutdown(wait=True)
56
+
57
+
58
+ def launch_ls(pathuri=None, mem_rep: bool = False) -> None:
59
+ dburi = ut.get_dburi(pathuri)
60
+ if not mem_rep:
61
+ sqlite_con = sqlite3.connect(dburi)
62
+ try:
63
+ sqlite_con.execute("PRAGMA journal_mode=WAL") # ...idk man
64
+ sqlite_con.commit()
65
+ finally:
66
+ sqlite_con.close()
67
+
68
+ if mem_rep:
69
+ LocalADBC.uri = dburi
70
+ elif dburi.startswith("file:"):
71
+ sep = "&" if "?" in dburi else "?"
72
+ LocalADBC.uri = f"{dburi}{sep}cache=shared"
73
+ else:
74
+ qpath = quote(dburi.replace("\\", "/"), safe="/:")
75
+ LocalADBC.uri = f"file:{qpath}?mode=rwc&cache=shared"
76
+ global local_adbc
77
+ local_adbc = LocalADBC()
78
+ # The connection container is now initialized for the current thread.
79
+ # But because it's a threading local, a new object is created for each new thread, also notice that this is not
80
+ # a new connection every time a task is launched in a thread. So long as the thread stays alive and receives new
81
+ # work, this connection will stay alive with it. This also makes the system universally compatible with any thread
82
+ # executor because it simply doesn't interact with them explicitly.
83
+ _24H = timedelta(days=1)
84
+ _0D = timedelta()
85
+ _1MC = timedelta(microseconds=1)
86
+
87
+
88
+ def ls_cache(
89
+ columns,
90
+ time_keys: tuple[str, str],
91
+ time_col: str,
92
+ column_keys: tuple, # need at least one. may change this req at some point.
93
+ refresh_period: timedelta = timedelta(days=1),
94
+ active_in: time | tuple[time, time] = DEFAULT_ACTIVE_IN,
95
+ out_cols=None,
96
+ table_keys=None,
97
+ expires_after: timedelta|None=None,
98
+ rollback: bool = False,
99
+ table=None,
100
+ ) -> CacheDecorator:
101
+ """
102
+
103
+ Note: ``keys`` refer to a named kwargs that are used as a column value for all rows, or as an appended
104
+ extension of the table name. Assumption is they appear as input values in the data function but not in the output
105
+ array.
106
+
107
+ :param columns: All column names that come are included in the sqlite table. Implicitly # of rows, and order of
108
+ columns. If the func endpoint doesn't have all those columns, they will be filled by those in column_keys, it's
109
+ possible that we can do without the extra column keys need.
110
+ :param refresh_period: The period of passed time necessary to elicit an update from the timeseries endpoint. This
111
+ time period pass is calculated using the active_in inclusive range for less-than daily. For daily
112
+ periods or greater, we assume time_keys is a single time that marks (assuming the current day) when it is valid
113
+ to query the timeseries endpoint, think EOD OHLC at 4 pm EST. Or we take the second time of the sequence.
114
+ :param active_in: Intervals are generated starting at active_in[0] and updated additively from the period. When the
115
+ most recent update period is less than today + min(floor_last_pd,active_in[1]), we request an update. Outside of
116
+ 0 and 1 range, we only load data up to that previous range, until the current time intersects with active_in[0]
117
+ again.
118
+ :param time_keys: The two named args that represent the starting and ending date time of the query selection.
119
+ :param table: Table name, if None we use the data function name.
120
+ :param table_keys: keys that make the full table name (example timeframes 1m, 1h, 1s).
121
+ :param column_keys: keys that are included into the database as values for it's column. These will default to the
122
+ function output columns, and otherwise fill values from the matching input kwargs.
123
+ :param out_cols: Specifically the ordered columns of the arrow table that will be produced by this wrapper. Always
124
+ less than or equal to columns.
125
+ :param rollback: Not implemented yet (mainly for continuous futures that are backwards adjusted on the next roll date).
126
+ If we are querying new data, then we include the latest existing datetime in our new data query,
127
+ if the returned row is not equal to the row from our database, then we log a warning, assume
128
+ that timeseries entries for that specific matching key group are obsolete, remove them then
129
+ place them again backfilled to the first date (note replacing row values) likely much quicker
130
+ for this strategy than full removal then reload.
131
+ :return:
132
+ """
133
+ tk, tak, ck = time_keys, () if table_keys is None else table_keys, column_keys
134
+ if not isinstance(columns, dict):
135
+ columns = {columns[i]: i for i in range(len(columns))}
136
+ if out_cols is None:
137
+ # This is a backup that could fail, recommended to set the actual
138
+ # output columns of the function in matching order.
139
+ out_cols = (*(cl for cl in columns if cl not in column_keys),)
140
+
141
+ refr_micros = int(refresh_period / _1MC)
142
+ if expires_after is not None:
143
+ exp_micros = max(int(expires_after / _1MC), refr_micros * 2)
144
+
145
+ def make_keys(kg):
146
+ sdate, edate = kg[tk[0]], kg[tk[1]] # intentional fail if NE
147
+ tav = (*(kg[k] for k in tak),)
148
+ cv = (*(kg[k] for k in ck),)
149
+ return sdate, edate, tav, cv
150
+
151
+ def fix_range(sdt, edt, kg):
152
+ # for simplicity now we will assume the datetime start and end has also been converted to micros pre-wrapper
153
+ kg[tk[0]] = sdt
154
+ kg[tk[1]] = edt
155
+
156
+ return kg
157
+
158
+ if refresh_period >= _24H:
159
+ doff = active_in[1] if not isinstance(active_in, time) else active_in
160
+
161
+ def last_qual() -> int:
162
+ # ltime is unix micros
163
+ # curn=datetime.fromtimestamp(ltime,dt.UTC) #timezone should be irrelevant but if issues, use doff's
164
+ tod = datetime.now(doff.tzinfo).date()
165
+ pperiod = tod - _24H
166
+ comp = int(datetime.combine(pperiod, doff).timestamp() * 1_000_000)
167
+ return comp
168
+ else:
169
+ active_window: TimeWindow = active_in # pyrefly: ignore[bad-assignment]
170
+
171
+ def last_qual() -> int:
172
+ # Note on timechange days this can be an hour off, but it's not
173
+ # really an issue for data queries.
174
+ # If active_in is not a tuple with two time objects, will fail,
175
+ # correct behavior.
176
+ nw = datetime.now(active_window[1].tzinfo)
177
+ tod = nw.date()
178
+ sdt = datetime.combine(tod, active_window[0])
179
+ day = tod - (_24H if sdt > nw else _0D)
180
+ sdt = datetime.combine(day, active_window[0])
181
+ edt = datetime.combine(day, active_window[1])
182
+ nw = min(nw, edt)
183
+
184
+ n = (nw - sdt) // refresh_period
185
+ cp = sdt + refresh_period * n
186
+ return int(cp.timestamp() * 1_000_000)
187
+
188
+ def cache_upd(edate, lsq) -> int:
189
+ if edate is None or edate >= lsq:
190
+ return ut.sys_micros()
191
+ return edate
192
+
193
+ def _w(func: SeriesFn) -> SeriesFn:
194
+ tbn = func.__qualname__ if table is None else table
195
+ tbe = tbn
196
+ tbe_info = f"{tbe}_info"
197
+
198
+ @wraps(func)
199
+ def get_series(**kwargs: Any) -> Table:
200
+ con = local_adbc.sqlite
201
+ cur = local_adbc.cur
202
+ sdate, edate, tav, cv = make_keys(kwargs)
203
+ table_ref = tbe
204
+ info_table_ref = tbe_info
205
+ if tav:
206
+ table_ref = "_".join((tbn, *(str(v) for v in tav)))
207
+ info_table_ref = f"{table_ref}_info"
208
+ # three paths if nfo select fails because no table, init table process (already below)
209
+ # if primary key not in the table, that means new data init.
210
+ # otherwise normal get process.
211
+ last_upd = None
212
+ fl = 0
213
+ try:
214
+ # at this point we are assuming there is at least one index column that isn't time. Fix this later.
215
+ # and only input columns can act as column keys (this is actually needed).
216
+ cur.execute(last_upd_select(info_table_ref, ck), cv)
217
+ last_upd = cur.fetchone()
218
+ except dbapi.DatabaseError:
219
+ fl = 2
220
+ if last_upd is None and fl != 2:
221
+ fl = 1
222
+
223
+ if fl > 0:
224
+ fix_range(None, edate, kwargs)
225
+ ltb = func(**kwargs)
226
+ if not isinstance(ltb, Table) or ltb.num_rows == 0:
227
+ return ltb
228
+ fl_tb = ut.mk_fullarrow(ltb, columns, ck, cv)
229
+ inft: dict[str, str] | None = None
230
+ if fl == 2:
231
+ inft = ut.infer_sqlite_types(cur, fl_tb)
232
+
233
+ ddl_nfo = ut.define_ls_infotable(info_table_ref, inft, ck)
234
+ cur.execute(ddl_nfo)
235
+ # Assumption, take away the timestamp, then the endpoint request only captures a single 'id' for the
236
+ # instrument. Otherwise re-enable the full pass check.
237
+ # Update: if we need multi-id support, it should now be possible just by changing it to the full agg.
238
+ # actually, would still need to handle the info table differently.
239
+ # init info and last update unix micros timestamp
240
+ cur.execute(insert_cols(info_table_ref, (*ck, ut.LAST_UPD)), (*cv, cache_upd(edate, last_qual())))
241
+
242
+ if fl == 2:
243
+ # init the actual lite series table.
244
+ ddl = ut.define_ls_table(table_ref, columns, inft, ck, time_col) # pyrefly: ignore[bad-argument-type]
245
+ cur.execute(ddl)
246
+ cur.adbc_ingest(table_ref, fl_tb, "append")
247
+ con.commit()
248
+ ltb=fl_tb.select(out_cols)
249
+ if sdate is not None:
250
+ mask = pc.greater_equal(ltb[time_col], sdate)
251
+ offset = pc.index(mask, value=True).as_py()
252
+ ltb = ltb.slice(ltb.num_rows, 0) if offset == -1 else ltb.slice(offset)
253
+ else:
254
+ last_upd = last_upd[0] # pyrefly: ignore[unsupported-operation]
255
+ lsq = last_qual()
256
+ en = edate is None
257
+ # “the cache is older than the latest allowable freshness
258
+ # boundary, and the request extends beyond what’s known fresh”
259
+ if lsq > last_upd and (en or edate > last_upd):
260
+ # Then the period we are asking for is not fully contained in our database.
261
+ # Selects the data here.
262
+ cur.execute(*series_range_select(table_ref, out_cols, ck, cv, time_col, sdate, edate))
263
+ ltb_s = cur.fetchallarrow()
264
+ if ltb_s.num_rows == 0:
265
+ cur.execute(series_tmax_select(table_ref, ck, time_col), cv)
266
+ mxt = cur.fetchone()[0]
267
+ if mxt is None:
268
+ return ltb_s
269
+ #mxt = mxt_row[0] # assuming data exists now.
270
+ # we know it queries too far back and the endpoint
271
+ # doesn't have data there.
272
+ if not en and edate < mxt:
273
+ return ltb_s
274
+ if expires_after is not None and mxt + exp_micros < last_upd:
275
+ return ltb_s
276
+ # if sdate>mxt: #we don't actually need this, as it's self evident for this case
277
+ n_sdate = mxt + refr_micros
278
+ else:
279
+ prv_tm = ltb_s[time_col][-1].as_py()
280
+ if expires_after is not None and prv_tm + exp_micros < last_upd:
281
+ return ltb_s
282
+ n_sdate = prv_tm + refr_micros
283
+ fix_range(n_sdate, edate, kwargs)
284
+ ltb_t = func(**kwargs)
285
+ if not isinstance(ltb_t, Table) or ltb_t.num_rows == 0:
286
+ cur.execute(update_last_upd(info_table_ref, ck), (cache_upd(edate, lsq), *cv))
287
+ con.commit()
288
+ return ltb_s
289
+ fl_tb = ut.mk_fullarrow(ltb_t, columns, ck, cv)
290
+ # can be built from columns as well
291
+ cur.execute(update_last_upd(info_table_ref, ck), (cache_upd(edate, lsq), *cv))
292
+
293
+ cur.adbc_ingest(table_ref, fl_tb, "append")
294
+ con.commit()
295
+ # we do this before sending the data
296
+ #ltb_s = ltb_s.select(out_cols) #already fetching by out_cols
297
+ # we use fl_tb instead of ltb_t because out cols could contain more than what func produces
298
+ ltb_t = fl_tb.select(out_cols)
299
+ ltb = concat_tables([ltb_s, ltb_t], promote_options="none")
300
+
301
+ else: # We are requesting for data inside of edate or the new data query happened recently enough.
302
+ cur.execute(*series_range_select(table_ref, out_cols, ck, cv, time_col, sdate, edate))
303
+ ltb = cur.fetchallarrow()#.select(out_cols)
304
+ return ltb
305
+
306
+ return get_series
307
+
308
+ return _w
@@ -0,0 +1,24 @@
1
+ import threading as th
2
+ from datetime import time, timedelta
3
+ from typing import Callable, TypeAlias
4
+ from pyarrow import Table
5
+
6
+ SeriesFn = Callable[..., Table]
7
+ CacheDecorator: TypeAlias = Callable[[SeriesFn], SeriesFn]
8
+
9
+ def close_ls() -> None: ...
10
+ def threadpool_shutdown_ls(thp) -> None: ...
11
+ def launch_ls(pathuri=None, mem_rep: bool = False, schema: str = "liteseries") -> None: ...
12
+ def ls_cache(
13
+ columns,
14
+ time_keys: tuple[str, str],
15
+ time_col: str,
16
+ column_keys: tuple,
17
+ refresh_period: timedelta = ...,
18
+ active_in: time | tuple[time, time] = ...,
19
+ out_cols=None,
20
+ table_keys=None,
21
+ expires_after: timedelta | None = ...,
22
+ rollback: bool = False,
23
+ table=None,
24
+ ) -> CacheDecorator: ...
liteseries/_sql.py ADDED
@@ -0,0 +1,104 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+
5
+ TABLE_EXISTS = "SELECT 1 FROM pragma_table_list WHERE name = ? AND type = 'table' LIMIT 1"
6
+
7
+ TABLE_COLUMNS = "SELECT name FROM pragma_table_xinfo(?) WHERE hidden = 0 ORDER BY cid"
8
+
9
+ LAST_UPD = "last_upd"
10
+
11
+ _ASEQ_C = {}
12
+ _QMRK_C = {}
13
+ _REQ_C = {}
14
+ _INS_C = {}
15
+ _UPD_C = {}
16
+
17
+
18
+ def _protect_names_enabled() -> bool:
19
+ """Return whether SQL identifiers should be double-quoted."""
20
+ return os.getenv("LITESERIES_PROTECTNAMES", "").casefold() == "true"
21
+
22
+
23
+ def _qident_protected(ident: str) -> str:
24
+ """Quote an SQLite identifier and escape embedded double quotes."""
25
+ dquote, escaped_dquote = '"', '""'
26
+ return f'"{ident.replace(dquote, escaped_dquote)}"'
27
+
28
+
29
+ def _qident_plain(ident: str) -> str:
30
+ """Return an already-valid SQLite identifier without allocating a wrapper."""
31
+ return ident
32
+
33
+
34
+ # Keep the hot path as a direct function binding chosen once at import time.
35
+ qident = _qident_protected if _protect_names_enabled() else _qident_plain
36
+
37
+
38
+ def and_seq(cols: tuple):
39
+ rs = _ASEQ_C.get(cols)
40
+ if rs is None:
41
+ rs = " AND ".join(f"{qident(col)} = ?" for col in cols)
42
+ _ASEQ_C[cols] = rs
43
+ return rs
44
+
45
+
46
+ def qmarks(width: int): # pragma: no cover
47
+ rs = _QMRK_C.get(width)
48
+ if rs is None:
49
+ rs = ", ".join("?" for _ in range(width))
50
+ _QMRK_C[width] = rs
51
+ return rs
52
+
53
+
54
+ def colreq(cols: tuple):
55
+ rs = _REQ_C.get(cols)
56
+ if rs is None:
57
+ rs = ", ".join(qident(col) for col in cols)
58
+ _REQ_C[cols] = rs
59
+ return rs
60
+
61
+
62
+ def series_range_select(table_ref: str, req_cols: tuple, pidx: tuple, vidx, time_col, srange, erange):
63
+ sn, en = srange is None, erange is None
64
+ cl, ps = colreq(req_cols), and_seq(pidx)
65
+ tb, tc = qident(table_ref), qident(time_col)
66
+ if sn and en:
67
+ return f"SELECT {cl} FROM {tb} WHERE {ps}", vidx
68
+ if sn:
69
+ return f"SELECT {cl} FROM {tb} WHERE {ps} AND {tc} <= ?", (*vidx, erange)
70
+ if en:
71
+ return f"SELECT {cl} FROM {tb} WHERE {ps} AND {tc} >= ?", (*vidx, srange)
72
+ # other is case 2
73
+ return (f"SELECT {cl} FROM {tb} WHERE {ps} AND {tc} >= ? AND {tc} <= ?", (*vidx, srange, erange))
74
+
75
+
76
+ def series_tmax_select(table_ref: str, pidx: tuple, time_col) -> str:
77
+ return f"SELECT MAX({qident(time_col)}) FROM {qident(table_ref)} WHERE {and_seq(pidx)}"
78
+
79
+
80
+ def last_upd_select(table_ref: str, pidx: tuple) -> str:
81
+ return f"SELECT {qident(LAST_UPD)} FROM {qident(table_ref)} WHERE {and_seq(pidx)}"
82
+
83
+
84
+ def insert_cols(table_ref: str, cols: tuple[str, ...]) -> str:
85
+ key = (table_ref, cols)
86
+ rs = _INS_C.get(key)
87
+ if rs is None:
88
+ rs = f"INSERT INTO {qident(table_ref)} ({colreq(cols)}) VALUES ({qmarks(len(cols))})"
89
+ _INS_C[key] = rs
90
+ return rs
91
+
92
+
93
+ def update_last_upd(table_ref: str, pidx: tuple[str, ...]) -> str:
94
+ key = (table_ref, pidx)
95
+ rs = _UPD_C.get(key)
96
+ if rs is None:
97
+ rs = f"UPDATE {qident(table_ref)} SET {qident(LAST_UPD)} = ? WHERE {and_seq(pidx)}"
98
+ _UPD_C[key] = rs
99
+ return rs
100
+
101
+
102
+ # change this to a cached statement later, we can keep using ingest, but for small
103
+ def insert_row(table_ref: str, width: int) -> str: # pragma: no cover
104
+ return f"INSERT INTO {qident(table_ref)} VALUES ({qmarks(width)})"
liteseries/_sql.pyi ADDED
@@ -0,0 +1 @@
1
+ # pruned: intentionally empty stub
liteseries/_util.py ADDED
@@ -0,0 +1,153 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import time
5
+ from collections.abc import Sequence
6
+ from itertools import chain
7
+ from pathlib import Path
8
+
9
+ import pyarrow as pa
10
+
11
+ from . import _sql
12
+
13
+ LAST_UPD = _sql.LAST_UPD
14
+
15
+
16
+ _FIRST_IMPORT_ROOT: Path | None = None
17
+ _DEFAULT_DB_NAME = "liteseries_db.sqlite"
18
+
19
+
20
+ def _cached_import_root() -> Path:
21
+ global _FIRST_IMPORT_ROOT
22
+ if _FIRST_IMPORT_ROOT is None:
23
+ main_file = getattr(__import__("__main__"), "__file__", None)
24
+ if main_file is not None:
25
+ _FIRST_IMPORT_ROOT = Path(main_file).resolve().parent
26
+ else:
27
+ _FIRST_IMPORT_ROOT = Path.cwd()
28
+ return _FIRST_IMPORT_ROOT
29
+
30
+
31
+ def _pick_sqlite_file(root: Path) -> Path | None:
32
+ sqlite_files = list(root.glob("*.sqlite"))
33
+ if not sqlite_files:
34
+ return None
35
+
36
+ for path in sqlite_files:
37
+ if "liteseries" in path.stem.casefold():
38
+ return path
39
+
40
+ return sqlite_files[0]
41
+
42
+
43
+ def _is_file_uri(path: str) -> bool:
44
+ return path.startswith("file:")
45
+
46
+
47
+ def _looks_like_dir(path: str) -> bool:
48
+ return path.endswith(("/", "\\"))
49
+
50
+
51
+ def _sqlite_path(path: str) -> Path:
52
+ db_path = Path(path).expanduser()
53
+ if db_path.is_dir() or _looks_like_dir(path):
54
+ return db_path / _DEFAULT_DB_NAME
55
+ if db_path.suffix.casefold() != ".sqlite":
56
+ return db_path.with_suffix(".sqlite")
57
+ return db_path
58
+
59
+
60
+ def _touch_sqlite(db_path: Path) -> str:
61
+ pte = db_path.exists()
62
+ db_path.parent.mkdir(parents=True, exist_ok=True)
63
+ db_path.touch(exist_ok=True)
64
+ if not pte:
65
+ print(f"Created new sqlite db at {db_path}")
66
+ return str(db_path.resolve())
67
+
68
+
69
+ def get_dburi(path: str | None) -> str:
70
+ if path is not None:
71
+ if _is_file_uri(path):
72
+ return path
73
+ return _touch_sqlite(_sqlite_path(path))
74
+
75
+ env_path = os.getenv("LITESERIES_DB")
76
+ if env_path:
77
+ if _is_file_uri(env_path):
78
+ return env_path
79
+ return _touch_sqlite(_sqlite_path(env_path))
80
+
81
+ root = _cached_import_root()
82
+ sqlite_file = _pick_sqlite_file(root)
83
+ if sqlite_file is not None:
84
+ return _touch_sqlite(sqlite_file)
85
+
86
+ db_path = root / _DEFAULT_DB_NAME
87
+ dburi = _touch_sqlite(db_path)
88
+ return dburi
89
+
90
+
91
+ def sys_micros() -> int:
92
+ # The less expensive call, not perf counter.
93
+ return int(time.time() * 1_000_000)
94
+
95
+
96
+ def table_exists(cur, table: str) -> bool: # pragma: no cover
97
+ cur.execute(_sql.TABLE_EXISTS, (table,))
98
+ return cur.fetchone() is not None
99
+
100
+
101
+ def insert_row_stmt(cur, table: str) -> str: # pragma: no cover
102
+ cur.execute(_sql.TABLE_COLUMNS, (table,))
103
+ return _sql.insert_row(table, len(cur.fetchall()))
104
+
105
+
106
+ def infer_sqlite_types(cur, data: pa.Table, sample_rows: int = 2) -> dict[str, str]:
107
+ tb = "_temp_types"
108
+ sample = data.slice(0, sample_rows)
109
+ cur.adbc_ingest(tb, sample, mode="create", temporary=True)
110
+ cur.execute(f"PRAGMA table_info({_sql.qident(tb)})")
111
+ type_rows = cur.fetchall()
112
+ cur.execute(f"DROP TABLE {_sql.qident(tb)}")
113
+ return {column_name: column_type for _, column_name, column_type, *_ in type_rows}
114
+
115
+
116
+ def define_ls_table(
117
+ table_ref: str,
118
+ col_ord: dict[str, int],
119
+ col_types: dict[str, str],
120
+ column_keys: Sequence[str],
121
+ time_col: str,
122
+ ) -> str:
123
+ cols = sorted(col_ord, key=col_ord.__getitem__) # in case we change the system later...
124
+ defs = (f"{_sql.qident(col)} {col_types[col]} NOT NULL" for col in cols)
125
+ pk = f"PRIMARY KEY ({', '.join(_sql.qident(col) for col in chain(column_keys, (time_col,)))})"
126
+ ddl = f"CREATE TABLE IF NOT EXISTS {_sql.qident(table_ref)} ({', '.join((*defs, pk))}) STRICT, WITHOUT ROWID"
127
+ return ddl
128
+
129
+
130
+ def define_ls_infotable(
131
+ table_ref: str,
132
+ col_types: dict[str, str],
133
+ column_keys: Sequence[str],
134
+ ) -> str:
135
+ nfks = column_keys
136
+ # Everything but the final rightmost key which is the unix micros.
137
+ defs = (f"{_sql.qident(col)} {col_types.get(col, 'INTEGER')} NOT NULL" for col in chain(nfks, (LAST_UPD,)))
138
+ pk = f"PRIMARY KEY ({', '.join(_sql.qident(col) for col in nfks)})"
139
+ ddl = f"CREATE TABLE IF NOT EXISTS {_sql.qident(table_ref)} ({', '.join((*defs, pk))}) STRICT, WITHOUT ROWID"
140
+ return ddl
141
+
142
+
143
+ def mk_fullarrow(ar_tbl: pa.Table, full_cols, col_k, col_v):
144
+ names0 = set(ar_tbl.column_names)
145
+ ln = ar_tbl.num_rows
146
+ # Reuse existing Arrow column views; only missing key columns allocate filled arrays.
147
+ new_cols = {
148
+ name: pa.repeat(v, ln) for name, v in zip(col_k, col_v, strict=True) if name not in names0
149
+ }
150
+ names = sorted((*ar_tbl.column_names, *new_cols), key=lambda name: full_cols.get(name, len(full_cols)))
151
+ cols = [ar_tbl.column(name) if name in names0 else new_cols[name] for name in names]
152
+
153
+ return pa.Table.from_arrays(cols, names=names)
liteseries/_util.pyi ADDED
@@ -0,0 +1 @@
1
+ # pruned: intentionally empty stub
liteseries/nexus.md ADDED
@@ -0,0 +1,11 @@
1
+ `_handlers.py` provides the public runtime API for launching thread-local SQLite ADBC connections, priming file-backed databases for WAL usage, shutting thread pools down cleanly, and decorating time-series fetchers with the local cache flow including metadata-row inserts, refresh updates, bounded historical watermarks, and optional expired-key lockouts.
2
+ - `_util` : Resolves database paths, shapes Arrow tables for ingest, and builds the `CREATE TABLE IF NOT EXISTS` statements used during cache initialization.
3
+ - `_sql` : Supplies the quoted SQL statement builders used for metadata lookups, metadata row inserts/updates, cached range reads, and max-timestamp queries.
4
+
5
+ `_util.py` contains lower-level helpers for default database discovery, SQLite file creation from default/directory/file paths, timestamp utilities, SQLite table inspection, `CREATE TABLE IF NOT EXISTS` generation, and Arrow table normalization before ingest while reusing existing Arrow column buffers.
6
+ - `_sql` : Provides identifier quoting, table constants, and insert/query helpers that `_util.py` reuses when inspecting or defining SQLite tables.
7
+
8
+ `_sql.py` centralizes reusable SQL fragments and tiny caches for generated SQL so the runtime can build common SQLite statements without repeating formatting work, including metadata-row insert/update statements for the cache info tables. Identifier handling is selected at import time: `LITESERIES_PROTECTNAMES=true` keeps the original double-quoted compatibility path, while the default path passes already-valid Python-style names through unchanged.
9
+ - os : Reads `LITESERIES_PROTECTNAMES` once during module import to choose the identifier function used by the SQL builders.
10
+
11
+ `README.md` introduces the stable public API, the single-threaded launch/cache/close lifecycle, the supported database and identifier environment variables, and beginner-oriented `ls_cache` usage examples for Arrow-backed endpoint functions.
liteseries/py.typed ADDED
File without changes
@@ -0,0 +1,250 @@
1
+ Metadata-Version: 2.4
2
+ Name: liteseries
3
+ Version: 1.0.0
4
+ Summary: SQLite-backed local caching helpers for time series data.
5
+ Author-email: Charles Marks <charlesmarksco@gmail.com>
6
+ License: MIT License
7
+
8
+ Copyright (c) 2026 Charles Marks
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+ License-File: LICENSE
28
+ Classifier: License :: OSI Approved :: MIT License
29
+ Requires-Python: >=3.10
30
+ Requires-Dist: adbc-driver-sqlite
31
+ Requires-Dist: pyarrow
32
+ Requires-Dist: python-dateutil
33
+ Description-Content-Type: text/markdown
34
+
35
+ ![Liteseries](Liteseries-1.png)
36
+
37
+ An SQLite-backed replicator and cache for timeseries data.
38
+
39
+ Liteseries works as an invisible layer around a timeseries endpoint. Your function still returns the rows you would
40
+ normally get from the vendor, while liteseries stores them in SQLite for the next matching call. When a request reaches
41
+ past the saved range, it reads the local slice first and asks the vendor only for the missing forward period. Liteseries
42
+ is stable for single-threaded use, with parallel reads available where the Arrow backend supports them.
43
+
44
+ ```python
45
+ from liteseries import close_ls, launch_ls, ls_cache, threadpool_shutdown_ls
46
+ ```
47
+
48
+ - `launch_ls()` opens the local SQLite/ADBC runtime for the current thread.
49
+ - `ls_cache(...)` decorates endpoint functions that return `pyarrow.Table`
50
+ objects.
51
+ - `close_ls()` closes the runtime connection when your process is done with it.
52
+ - `threadpool_shutdown_ls(...)` for use in a multithreaded environment.
53
+
54
+ ## Usage
55
+
56
+ Install the package, then call `launch_ls()` once before cached functions run:
57
+
58
+ ```python
59
+ from __future__ import annotations
60
+
61
+ from datetime import UTC, datetime, time, timedelta
62
+
63
+ import pyarrow as pa
64
+
65
+ from liteseries import close_ls, launch_ls, ls_cache
66
+
67
+
68
+ def unix_micros(year: int, month: int, day: int) -> int:
69
+ """Return a UTC timestamp in the microsecond units liteseries stores."""
70
+ return int(datetime(year, month, day, tzinfo=UTC).timestamp() * 1_000_000)
71
+
72
+
73
+ launch_ls()
74
+ ```
75
+
76
+ By default, `launch_ls()` looks for an existing `.sqlite` file near the entry
77
+ script, prefers one with `liteseries` in its name, and otherwise creates
78
+ `liteseries_db.sqlite`. Pass a path when you want to choose the file yourself:
79
+
80
+ ```python
81
+ launch_ls("cache/prices.sqlite")
82
+ ```
83
+
84
+ You can also set `LITESERIES_DB` before launch when configuration belongs in the
85
+ environment:
86
+
87
+ ```powershell
88
+ $env:LITESERIES_DB = "D:\market-cache\prices.sqlite"
89
+ ```
90
+
91
+ ### Wrap An Endpoint
92
+
93
+ Your endpoint function should accept keyword arguments for the requested time
94
+ range and key values, then return a `pyarrow.Table`. The time column is expected
95
+ to use Unix microseconds, and `None` means an open-ended side of the range.
96
+
97
+ This example uses a local data source so the shape is easy to see. A real
98
+ function can call an HTTP API, a vendor SDK, or a file reader.
99
+
100
+ ```python
101
+ def vendor_prices(start, end, symbol, interval) -> pa.Table:
102
+ """Fetch rows from the source system and return only endpoint columns."""
103
+ rows = [
104
+ (unix_micros(2026, 1, 2), 101.25, 102.10),
105
+ (unix_micros(2026, 1, 3), 102.00, 103.40),
106
+ (unix_micros(2026, 1, 4), 103.25, 103.05),
107
+ ]
108
+
109
+ if start is not None:
110
+ rows = [row for row in rows if row[0] >= start]
111
+ if end is not None:
112
+ rows = [row for row in rows if row[0] <= end]
113
+
114
+ return pa.table(
115
+ {
116
+ "ts": [row[0] for row in rows],
117
+ "open": [row[1] for row in rows],
118
+ "close": [row[2] for row in rows],
119
+ }
120
+ )
121
+ ```
122
+
123
+ Decorate it with `ls_cache`. `columns` is the full SQLite schema, including key
124
+ columns that may come from function arguments instead of the returned Arrow
125
+ table. `column_keys` identify separate series inside one table, while
126
+ `table_keys` can split families such as intervals into separate SQLite tables.
127
+
128
+ ```python
129
+ @ls_cache(
130
+ columns=("symbol", "ts", "open", "close"),
131
+ time_keys=("start", "end"),
132
+ time_col="ts",
133
+ column_keys=("symbol",),
134
+ out_cols=("ts", "open", "close"),
135
+ table="prices",
136
+ refresh_period=timedelta(days=1),
137
+ active_in=time(0, tzinfo=UTC),
138
+ )
139
+ def prices(start, end, symbol):
140
+ """Return cached prices, refreshing from the endpoint when needed."""
141
+ return vendor_prices(start, end, symbol, interval="1d")
142
+ ```
143
+
144
+ Now call the wrapped function with keyword arguments:
145
+
146
+ ```python
147
+ try:
148
+ first = prices(
149
+ start=unix_micros(2026, 1, 2),
150
+ end=unix_micros(2026, 1, 4),
151
+ symbol="MSFT",
152
+ )
153
+
154
+ second = prices(
155
+ start=unix_micros(2026, 1, 2),
156
+ end=unix_micros(2026, 1, 4),
157
+ symbol="MSFT",
158
+ )
159
+
160
+ assert second.equals(first)
161
+ finally:
162
+ close_ls()
163
+ ```
164
+
165
+ On the first call, liteseries creates the data table and its metadata table,
166
+ fills missing key columns, writes the Arrow rows, and returns the requested
167
+ slice. On later calls, it serves rows from SQLite unless the request reaches
168
+ past the cached freshness boundary.
169
+
170
+ ### Refresh Windows
171
+
172
+ `refresh_period` and `active_in` tell liteseries when it is worth checking the
173
+ endpoint again.
174
+
175
+ For daily or slower data, pass one `time` that marks when the latest period is
176
+ expected to be complete:
177
+
178
+ ```python
179
+ @ls_cache(
180
+ columns=("symbol", "ts", "open", "close"),
181
+ time_keys=("start", "end"),
182
+ time_col="ts",
183
+ column_keys=("symbol",),
184
+ table="daily_prices",
185
+ refresh_period=timedelta(days=1),
186
+ active_in=time(16, 1, tzinfo=UTC),
187
+ )
188
+ def daily_prices(start, end, symbol):
189
+ """Cache daily bars after the market close boundary has passed."""
190
+ return vendor_prices(start, end, symbol, interval="1d")
191
+ ```
192
+
193
+ For intraday data, pass a `(start_time, end_time)` window. Liteseries advances
194
+ the freshness boundary by `refresh_period` steps inside that window:
195
+
196
+ ```python
197
+ @ls_cache(
198
+ columns=("symbol", "ts", "open", "close"),
199
+ time_keys=("start", "end"),
200
+ time_col="ts",
201
+ column_keys=("symbol",),
202
+ table_keys=("interval",),
203
+ out_cols=("ts", "open", "close"),
204
+ table="intraday_prices",
205
+ refresh_period=timedelta(minutes=5),
206
+ active_in=(time(9, 30, tzinfo=UTC), time(16, 0, tzinfo=UTC)),
207
+ )
208
+ def intraday_prices(start, end, symbol, interval):
209
+ """Cache one interval per SQLite table, keyed by symbol within each table."""
210
+ return vendor_prices(start, end, symbol, interval=interval)
211
+ ```
212
+
213
+ ### Columns And Names
214
+
215
+ Use plain Python-style column and table names when you can. That is the default
216
+ fast path. If your endpoint already produces names with spaces or quotes, set
217
+ `LITESERIES_PROTECTNAMES=true` before importing liteseries so SQL identifiers
218
+ are double-quoted:
219
+
220
+ ```powershell
221
+ $env:LITESERIES_PROTECTNAMES = "true"
222
+ ```
223
+
224
+ `out_cols` controls the columns returned by the decorated function. This is handy
225
+ when the database needs key columns such as `symbol`, but callers only want the
226
+ time-series values.
227
+
228
+ ### Empty Tails
229
+
230
+ Some providers stop returning rows after a symbol expires or a contract ends.
231
+ Pass `expires_after` when liteseries should stop asking forward for that key
232
+ after repeated empty tail checks:
233
+
234
+ ```python
235
+ @ls_cache(
236
+ columns=("symbol", "ts", "open", "close"),
237
+ time_keys=("start", "end"),
238
+ time_col="ts",
239
+ column_keys=("symbol",),
240
+ table="expired_prices",
241
+ refresh_period=timedelta(hours=1),
242
+ active_in=(time(0, tzinfo=UTC), time(23, 59, tzinfo=UTC)),
243
+ expires_after=timedelta(days=7),
244
+ )
245
+ def expired_prices(start, end, symbol):
246
+ """Cache sparse series without querying forever beyond the final row."""
247
+ return vendor_prices(start, end, symbol, interval="1h")
248
+ ```
249
+
250
+ For a live provider example with pandas/yfinance conversion, see `demo.py`.
@@ -0,0 +1,14 @@
1
+ liteseries/__init__.py,sha256=HVNs_WXvYR2-7xoDwyoI7Z0-QTMoFXX6CsAjlzd5X5I,188
2
+ liteseries/_handlers.py,sha256=ggdDMJG_0RNCStk_M-_T9nwRdEGiZ32mDAmldpMvyyk,14382
3
+ liteseries/_sql.py,sha256=rvV2LPO6FtleaRjprm_fL6276XQpcEUeBkHmjvQ-PPw,3231
4
+ liteseries/_util.py,sha256=HnNO9PywXFYcCmN5jhsWapa8tD3xFqVMdiWB6hm8aws,4764
5
+ liteseries/nexus.md,sha256=5y6qrg9dxjrNwYYipggHPHZ6NHO2j_Pgz_SiIWhHTSk,1948
6
+ liteseries/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
+ liteseries/__init__.pyi,sha256=BkBechYkS5kqtWmnTri6hOhkinM4uoGUR8Mr43OupRQ,152
8
+ liteseries/_handlers.pyi,sha256=0SfvmuTxbBrImMiYvjfTVkqo8y9JpeCPkFiFTjyoCqY,745
9
+ liteseries/_sql.pyi,sha256=POqiRkpQ8bJnb2e42_oCSpwKH9tHzNl1sKyWFDWgGx4,36
10
+ liteseries/_util.pyi,sha256=POqiRkpQ8bJnb2e42_oCSpwKH9tHzNl1sKyWFDWgGx4,36
11
+ liteseries-1.0.0.dist-info/METADATA,sha256=IbW1QX4kiqNhhVPOBKnByUxISCvZtUVz7pteeMHq-MM,8679
12
+ liteseries-1.0.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
13
+ liteseries-1.0.0.dist-info/licenses/LICENSE,sha256=ylIXgJBn5_PBiprN-8ObD_kWoCnxGbSPs422xH4cpZg,1091
14
+ liteseries-1.0.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.30.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Charles Marks
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.