PyPI - liteseries - Versions diffs - 1.0.0__py3-none-any.whl - Mend

liteseries 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

liteseries/__init__.py +5 -0
liteseries/__init__.pyi +3 -0
liteseries/_handlers.py +308 -0
liteseries/_handlers.pyi +24 -0
liteseries/_sql.py +104 -0
liteseries/_sql.pyi +1 -0
liteseries/_util.py +153 -0
liteseries/_util.pyi +1 -0
liteseries/nexus.md +11 -0
liteseries/py.typed +0 -0
liteseries-1.0.0.dist-info/METADATA +250 -0
liteseries-1.0.0.dist-info/RECORD +14 -0
liteseries-1.0.0.dist-info/WHEEL +4 -0
liteseries-1.0.0.dist-info/licenses/LICENSE +21 -0

liteseries/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+from __future__ import annotations
+from ._handlers import close_ls, launch_ls, ls_cache, threadpool_shutdown_ls
+__all__ = ["close_ls", "launch_ls", "ls_cache", "threadpool_shutdown_ls"]

liteseries/__init__.pyi ADDED Viewed

@@ -0,0 +1,3 @@
+from ._handlers import close_ls, launch_ls, ls_cache, threadpool_shutdown_ls
+__all__ = ["close_ls", "launch_ls", "ls_cache", "threadpool_shutdown_ls"]

liteseries/_handlers.py ADDED Viewed

@@ -0,0 +1,308 @@
+from __future__ import annotations
+import sqlite3
+import threading as th
+from datetime import datetime, time, timedelta
+from functools import wraps
+from typing import Any, Callable
+from urllib.parse import quote
+import pyarrow.compute as pc
+from adbc_driver_sqlite import dbapi
+from dateutil import tz
+from pyarrow import Table, concat_tables
+from . import _util as ut
+from ._sql import insert_cols, last_upd_select, series_range_select, series_tmax_select, update_last_upd
+LT = list | tuple
+TimeWindow = tuple[time, time]
+SeriesFn = Callable[..., Table]
+CacheDecorator = Callable[[SeriesFn], SeriesFn]
+DEFAULT_ACTIVE_IN = time(hour=16, second=1, tzinfo=tz.gettz("US/Eastern"))
+class LocalADBC(th.local):
+    uri = None  # set from another scope
+    def __init__(self) -> None:
+        self.sqlite = dbapi.connect(uri=self.uri, autocommit=False)
+        self.cur = self.sqlite.cursor()
+        self.cur.execute("PRAGMA busy_timeout = 1000")
+        #self.sqlite.commit()
+    def close(self) -> None:
+        self.cur.close()
+        self.sqlite.close()
+local_adbc: LocalADBC
+def close_ls() -> None:
+    local_adbc.close()
+def threadpool_shutdown_ls(thp) -> None:
+    live = len(thp._threads)
+    if not live:
+        return thp.shutdown(wait=True)
+    gate = th.Barrier(live + 1)
+    for _ in range(live):
+        thp.submit(lambda: (gate.wait(), close_ls()))
+    gate.wait()
+    return thp.shutdown(wait=True)
+def launch_ls(pathuri=None, mem_rep: bool = False) -> None:
+    dburi = ut.get_dburi(pathuri)
+    if not mem_rep:
+        sqlite_con = sqlite3.connect(dburi)
+        try:
+            sqlite_con.execute("PRAGMA journal_mode=WAL")  # ...idk man
+            sqlite_con.commit()
+        finally:
+            sqlite_con.close()
+    if mem_rep:
+        LocalADBC.uri = dburi
+    elif dburi.startswith("file:"):
+        sep = "&" if "?" in dburi else "?"
+        LocalADBC.uri = f"{dburi}{sep}cache=shared"
+    else:
+        qpath = quote(dburi.replace("\\", "/"), safe="/:")
+        LocalADBC.uri = f"file:{qpath}?mode=rwc&cache=shared"
+    global local_adbc
+    local_adbc = LocalADBC()
+    # The connection container is now initialized for the current thread.
+    # But because it's a threading local, a new object is created for each new thread, also notice that this is not
+    # a new connection every time a task is launched in a thread. So long as the thread stays alive and receives new
+    # work, this connection will stay alive with it. This also makes the system universally compatible with any thread
+    # executor because it simply doesn't interact with them explicitly.
+_24H = timedelta(days=1)
+_0D = timedelta()
+_1MC = timedelta(microseconds=1)
+def ls_cache(
+    columns,
+    time_keys: tuple[str, str],
+    time_col: str,
+    column_keys: tuple,  # need at least one. may change this req at some point.
+    refresh_period: timedelta = timedelta(days=1),
+    active_in: time | tuple[time, time] = DEFAULT_ACTIVE_IN,
+    out_cols=None,
+    table_keys=None,
+    expires_after: timedelta|None=None,
+    rollback: bool = False,
+    table=None,
+) -> CacheDecorator:
+    """
+    Note: ``keys`` refer to a named kwargs that are used as a column value for all rows, or as an appended
+    extension of the table name. Assumption is they appear as input values in the data function but not in the output
+    array.
+    :param columns: All column names that come are included in the sqlite table. Implicitly # of rows, and order of
+    columns. If the func endpoint doesn't have all those columns, they will be filled by those in column_keys, it's
+    possible that we can do without the extra column keys need.
+    :param refresh_period: The period of passed time necessary to elicit an update from the timeseries endpoint. This
+        time period pass is calculated using the active_in inclusive range for less-than daily. For daily
+        periods or greater, we assume time_keys is a single time that marks (assuming the current day) when it is valid
+        to query the timeseries endpoint, think EOD OHLC at 4 pm EST. Or we take the second time of the sequence.
+    :param active_in: Intervals are generated starting at active_in[0] and updated additively from the period. When the
+        most recent update period is less than today + min(floor_last_pd,active_in[1]), we request an update. Outside of
+        0 and 1 range, we only load data up to that previous range, until the current time intersects with active_in[0]
+        again.
+    :param time_keys: The two named args that represent the starting and ending date time of the query selection.
+    :param table: Table name, if None we use the data function name.
+    :param table_keys: keys that make the full table name (example timeframes 1m, 1h, 1s).
+    :param column_keys: keys that are included into the database as values for it's column. These will default to the
+        function output columns, and otherwise fill values from the matching input kwargs.
+    :param out_cols: Specifically the ordered columns of the arrow table that will be produced by this wrapper. Always
+    less than or equal to columns.
+    :param rollback:  Not implemented yet (mainly for continuous futures that are backwards adjusted on the next roll date).
+        If we are querying new data, then we include the latest existing datetime in our new data query,
+        if the returned row is not equal to the row from our database, then we log a warning, assume
+        that timeseries entries for that specific matching key group are obsolete, remove them then
+        place them again backfilled to the first date (note replacing row values) likely much quicker
+        for this strategy than full removal then reload.
+    :return:
+    """
+    tk, tak, ck = time_keys, () if table_keys is None else table_keys, column_keys
+    if not isinstance(columns, dict):
+        columns = {columns[i]: i for i in range(len(columns))}
+    if out_cols is None:
+        # This is a backup that could fail, recommended to set the actual
+        # output columns of the function in matching order.
+        out_cols = (*(cl for cl in columns if cl not in column_keys),)
+    refr_micros = int(refresh_period / _1MC)
+    if expires_after is not None:
+        exp_micros = max(int(expires_after / _1MC), refr_micros * 2)
+    def make_keys(kg):
+        sdate, edate = kg[tk[0]], kg[tk[1]]  # intentional fail if NE
+        tav = (*(kg[k] for k in tak),)
+        cv = (*(kg[k] for k in ck),)
+        return sdate, edate, tav, cv
+    def fix_range(sdt, edt, kg):
+        # for simplicity now we will assume the datetime start and end has also been converted to micros pre-wrapper
+        kg[tk[0]] = sdt
+        kg[tk[1]] = edt
+        return kg
+    if refresh_period >= _24H:
+        doff = active_in[1] if not isinstance(active_in, time) else active_in
+        def last_qual() -> int:
+            # ltime is unix micros
+            # curn=datetime.fromtimestamp(ltime,dt.UTC) #timezone should be irrelevant but if issues, use doff's
+            tod = datetime.now(doff.tzinfo).date()
+            pperiod = tod - _24H
+            comp = int(datetime.combine(pperiod, doff).timestamp() * 1_000_000)
+            return comp
+    else:
+        active_window: TimeWindow = active_in  # pyrefly: ignore[bad-assignment]
+        def last_qual() -> int:
+            # Note on timechange days this can be an hour off, but it's not
+            # really an issue for data queries.
+            # If active_in is not a tuple with two time objects, will fail,
+            # correct behavior.
+            nw = datetime.now(active_window[1].tzinfo)
+            tod = nw.date()
+            sdt = datetime.combine(tod, active_window[0])
+            day = tod - (_24H if sdt > nw else _0D)
+            sdt = datetime.combine(day, active_window[0])
+            edt = datetime.combine(day, active_window[1])
+            nw = min(nw, edt)
+            n = (nw - sdt) // refresh_period
+            cp = sdt + refresh_period * n
+            return int(cp.timestamp() * 1_000_000)
+    def cache_upd(edate, lsq) -> int:
+        if edate is None or edate >= lsq:
+            return ut.sys_micros()
+        return edate
+    def _w(func: SeriesFn) -> SeriesFn:
+        tbn = func.__qualname__ if table is None else table
+        tbe = tbn
+        tbe_info = f"{tbe}_info"
+        @wraps(func)
+        def get_series(**kwargs: Any) -> Table:
+            con = local_adbc.sqlite
+            cur = local_adbc.cur
+            sdate, edate, tav, cv = make_keys(kwargs)
+            table_ref = tbe
+            info_table_ref = tbe_info
+            if tav:
+                table_ref = "_".join((tbn, *(str(v) for v in tav)))
+                info_table_ref = f"{table_ref}_info"
+            # three paths if nfo select fails because no table, init table process (already below)
+            # if primary key not in the table, that means new data init.
+            # otherwise normal get process.
+            last_upd = None
+            fl = 0
+            try:
+                # at this point we are assuming there is at least one index column that isn't time. Fix this later.
+                # and only input columns can act as column keys (this is actually needed).
+                cur.execute(last_upd_select(info_table_ref, ck), cv)
+                last_upd = cur.fetchone()
+            except dbapi.DatabaseError:
+                fl = 2
+            if last_upd is None and fl != 2:
+                fl = 1
+            if fl > 0:
+                fix_range(None, edate, kwargs)
+                ltb = func(**kwargs)
+                if not isinstance(ltb, Table) or ltb.num_rows == 0:
+                    return ltb
+                fl_tb = ut.mk_fullarrow(ltb, columns, ck, cv)
+                inft: dict[str, str] | None = None
+                if fl == 2:
+                    inft = ut.infer_sqlite_types(cur, fl_tb)
+                    ddl_nfo = ut.define_ls_infotable(info_table_ref, inft, ck)
+                    cur.execute(ddl_nfo)
+                # Assumption, take away the timestamp, then the endpoint request only captures a single 'id' for the
+                # instrument. Otherwise re-enable the full pass check.
+                # Update: if we need multi-id support, it should now be possible just by changing it to the full agg.
+                # actually, would still need to handle the info table differently.
+                # init info and last update unix micros timestamp
+                cur.execute(insert_cols(info_table_ref, (*ck, ut.LAST_UPD)), (*cv, cache_upd(edate, last_qual())))
+                if fl == 2:
+                    # init the actual lite series table.
+                    ddl = ut.define_ls_table(table_ref, columns, inft, ck, time_col)  # pyrefly: ignore[bad-argument-type]
+                    cur.execute(ddl)
+                cur.adbc_ingest(table_ref, fl_tb, "append")
+                con.commit()
+                ltb=fl_tb.select(out_cols)
+                if sdate is not None:
+                    mask = pc.greater_equal(ltb[time_col], sdate)
+                    offset = pc.index(mask, value=True).as_py()
+                    ltb = ltb.slice(ltb.num_rows, 0) if offset == -1 else ltb.slice(offset)
+            else:
+                last_upd = last_upd[0]  # pyrefly: ignore[unsupported-operation]
+                lsq = last_qual()
+                en = edate is None
+                # “the cache is older than the latest allowable freshness
+                # boundary, and the request extends beyond what’s known fresh”
+                if lsq > last_upd and (en or edate > last_upd):
+                    # Then the period we are asking for is not fully contained in our database.
+                    # Selects the data here.
+                    cur.execute(*series_range_select(table_ref, out_cols, ck, cv, time_col, sdate, edate))
+                    ltb_s = cur.fetchallarrow()
+                    if ltb_s.num_rows == 0:
+                        cur.execute(series_tmax_select(table_ref, ck, time_col), cv)
+                        mxt = cur.fetchone()[0]
+                        if mxt is None:
+                            return ltb_s
+                        #mxt = mxt_row[0]  # assuming data exists now.
+                        # we know it queries too far back and the endpoint
+                        # doesn't have data there.
+                        if not en and edate < mxt:
+                            return ltb_s
+                        if expires_after is not None and mxt + exp_micros < last_upd:
+                            return ltb_s
+                        # if sdate>mxt: #we don't actually need this, as it's self evident for this case
+                        n_sdate = mxt + refr_micros
+                    else:
+                        prv_tm = ltb_s[time_col][-1].as_py()
+                        if expires_after is not None and prv_tm + exp_micros < last_upd:
+                            return ltb_s
+                        n_sdate = prv_tm + refr_micros
+                    fix_range(n_sdate, edate, kwargs)
+                    ltb_t = func(**kwargs)
+                    if not isinstance(ltb_t, Table) or ltb_t.num_rows == 0:
+                        cur.execute(update_last_upd(info_table_ref, ck), (cache_upd(edate, lsq), *cv))
+                        con.commit()
+                        return ltb_s
+                    fl_tb = ut.mk_fullarrow(ltb_t, columns, ck, cv)
+                    # can be built from columns as well
+                    cur.execute(update_last_upd(info_table_ref, ck), (cache_upd(edate, lsq), *cv))
+                    cur.adbc_ingest(table_ref, fl_tb, "append")
+                    con.commit()
+                    # we do this before sending the data
+                    #ltb_s = ltb_s.select(out_cols) #already fetching by out_cols
+                    # we use fl_tb instead of ltb_t because out cols could contain more than what func produces
+                    ltb_t = fl_tb.select(out_cols)
+                    ltb = concat_tables([ltb_s, ltb_t], promote_options="none")
+                else:  # We are requesting for data inside of edate or the new data query happened recently enough.
+                    cur.execute(*series_range_select(table_ref, out_cols, ck, cv, time_col, sdate, edate))
+                    ltb = cur.fetchallarrow()#.select(out_cols)
+            return ltb
+        return get_series
+    return _w

liteseries/_handlers.pyi ADDED Viewed

@@ -0,0 +1,24 @@
+import threading as th
+from datetime import time, timedelta
+from typing import Callable, TypeAlias
+from pyarrow import Table
+SeriesFn = Callable[..., Table]
+CacheDecorator: TypeAlias = Callable[[SeriesFn], SeriesFn]
+def close_ls() -> None: ...
+def threadpool_shutdown_ls(thp) -> None: ...
+def launch_ls(pathuri=None, mem_rep: bool = False, schema: str = "liteseries") -> None: ...
+def ls_cache(
+    columns,
+    time_keys: tuple[str, str],
+    time_col: str,
+    column_keys: tuple,
+    refresh_period: timedelta = ...,
+    active_in: time | tuple[time, time] = ...,
+    out_cols=None,
+    table_keys=None,
+    expires_after: timedelta | None = ...,
+    rollback: bool = False,
+    table=None,
+) -> CacheDecorator: ...

liteseries/_sql.py ADDED Viewed

@@ -0,0 +1,104 @@
+from __future__ import annotations
+import os
+TABLE_EXISTS = "SELECT 1 FROM pragma_table_list WHERE name = ?   AND type = 'table' LIMIT 1"
+TABLE_COLUMNS = "SELECT name FROM pragma_table_xinfo(?) WHERE hidden = 0 ORDER BY cid"
+LAST_UPD = "last_upd"
+_ASEQ_C = {}
+_QMRK_C = {}
+_REQ_C = {}
+_INS_C = {}
+_UPD_C = {}
+def _protect_names_enabled() -> bool:
+    """Return whether SQL identifiers should be double-quoted."""
+    return os.getenv("LITESERIES_PROTECTNAMES", "").casefold() == "true"
+def _qident_protected(ident: str) -> str:
+    """Quote an SQLite identifier and escape embedded double quotes."""
+    dquote, escaped_dquote = '"', '""'
+    return f'"{ident.replace(dquote, escaped_dquote)}"'
+def _qident_plain(ident: str) -> str:
+    """Return an already-valid SQLite identifier without allocating a wrapper."""
+    return ident
+# Keep the hot path as a direct function binding chosen once at import time.
+qident = _qident_protected if _protect_names_enabled() else _qident_plain
+def and_seq(cols: tuple):
+    rs = _ASEQ_C.get(cols)
+    if rs is None:
+        rs = " AND ".join(f"{qident(col)} = ?" for col in cols)
+        _ASEQ_C[cols] = rs
+    return rs
+def qmarks(width: int):  # pragma: no cover
+    rs = _QMRK_C.get(width)
+    if rs is None:
+        rs = ", ".join("?" for _ in range(width))
+        _QMRK_C[width] = rs
+    return rs
+def colreq(cols: tuple):
+    rs = _REQ_C.get(cols)
+    if rs is None:
+        rs = ", ".join(qident(col) for col in cols)
+        _REQ_C[cols] = rs
+    return rs
+def series_range_select(table_ref: str, req_cols: tuple, pidx: tuple, vidx, time_col, srange, erange):
+    sn, en = srange is None, erange is None
+    cl, ps = colreq(req_cols), and_seq(pidx)
+    tb, tc = qident(table_ref), qident(time_col)
+    if sn and en:
+        return f"SELECT {cl} FROM {tb} WHERE {ps}", vidx
+    if sn:
+        return f"SELECT {cl} FROM {tb} WHERE {ps} AND {tc} <= ?", (*vidx, erange)
+    if en:
+        return f"SELECT {cl} FROM {tb} WHERE {ps} AND {tc} >= ?", (*vidx, srange)
+    # other is case 2
+    return (f"SELECT {cl} FROM {tb} WHERE {ps} AND {tc} >= ? AND {tc} <= ?", (*vidx, srange, erange))
+def series_tmax_select(table_ref: str, pidx: tuple, time_col) -> str:
+    return f"SELECT MAX({qident(time_col)}) FROM {qident(table_ref)} WHERE {and_seq(pidx)}"
+def last_upd_select(table_ref: str, pidx: tuple) -> str:
+    return f"SELECT {qident(LAST_UPD)} FROM {qident(table_ref)} WHERE {and_seq(pidx)}"
+def insert_cols(table_ref: str, cols: tuple[str, ...]) -> str:
+    key = (table_ref, cols)
+    rs = _INS_C.get(key)
+    if rs is None:
+        rs = f"INSERT INTO {qident(table_ref)} ({colreq(cols)}) VALUES ({qmarks(len(cols))})"
+        _INS_C[key] = rs
+    return rs
+def update_last_upd(table_ref: str, pidx: tuple[str, ...]) -> str:
+    key = (table_ref, pidx)
+    rs = _UPD_C.get(key)
+    if rs is None:
+        rs = f"UPDATE {qident(table_ref)} SET {qident(LAST_UPD)} = ? WHERE {and_seq(pidx)}"
+        _UPD_C[key] = rs
+    return rs
+# change this to a cached statement later, we can keep using ingest, but for small
+def insert_row(table_ref: str, width: int) -> str:  # pragma: no cover
+    return f"INSERT INTO {qident(table_ref)} VALUES ({qmarks(width)})"

liteseries/_sql.pyi ADDED Viewed

	@@ -0,0 +1 @@
1	+ # pruned: intentionally empty stub

liteseries/_util.py ADDED Viewed

@@ -0,0 +1,153 @@
+from __future__ import annotations
+import os
+import time
+from collections.abc import Sequence
+from itertools import chain
+from pathlib import Path
+import pyarrow as pa
+from . import _sql
+LAST_UPD = _sql.LAST_UPD
+_FIRST_IMPORT_ROOT: Path | None = None
+_DEFAULT_DB_NAME = "liteseries_db.sqlite"
+def _cached_import_root() -> Path:
+    global _FIRST_IMPORT_ROOT
+    if _FIRST_IMPORT_ROOT is None:
+        main_file = getattr(__import__("__main__"), "__file__", None)
+        if main_file is not None:
+            _FIRST_IMPORT_ROOT = Path(main_file).resolve().parent
+        else:
+            _FIRST_IMPORT_ROOT = Path.cwd()
+    return _FIRST_IMPORT_ROOT
+def _pick_sqlite_file(root: Path) -> Path | None:
+    sqlite_files = list(root.glob("*.sqlite"))
+    if not sqlite_files:
+        return None
+    for path in sqlite_files:
+        if "liteseries" in path.stem.casefold():
+            return path
+    return sqlite_files[0]
+def _is_file_uri(path: str) -> bool:
+    return path.startswith("file:")
+def _looks_like_dir(path: str) -> bool:
+    return path.endswith(("/", "\\"))
+def _sqlite_path(path: str) -> Path:
+    db_path = Path(path).expanduser()
+    if db_path.is_dir() or _looks_like_dir(path):
+        return db_path / _DEFAULT_DB_NAME
+    if db_path.suffix.casefold() != ".sqlite":
+        return db_path.with_suffix(".sqlite")
+    return db_path
+def _touch_sqlite(db_path: Path) -> str:
+    pte = db_path.exists()
+    db_path.parent.mkdir(parents=True, exist_ok=True)
+    db_path.touch(exist_ok=True)
+    if not pte:
+        print(f"Created new sqlite db at {db_path}")
+    return str(db_path.resolve())
+def get_dburi(path: str | None) -> str:
+    if path is not None:
+        if _is_file_uri(path):
+            return path
+        return _touch_sqlite(_sqlite_path(path))
+    env_path = os.getenv("LITESERIES_DB")
+    if env_path:
+        if _is_file_uri(env_path):
+            return env_path
+        return _touch_sqlite(_sqlite_path(env_path))
+    root = _cached_import_root()
+    sqlite_file = _pick_sqlite_file(root)
+    if sqlite_file is not None:
+        return _touch_sqlite(sqlite_file)
+    db_path = root / _DEFAULT_DB_NAME
+    dburi = _touch_sqlite(db_path)
+    return dburi
+def sys_micros() -> int:
+    # The less expensive call, not perf counter.
+    return int(time.time() * 1_000_000)
+def table_exists(cur, table: str) -> bool:  # pragma: no cover
+    cur.execute(_sql.TABLE_EXISTS, (table,))
+    return cur.fetchone() is not None
+def insert_row_stmt(cur, table: str) -> str:  # pragma: no cover
+    cur.execute(_sql.TABLE_COLUMNS, (table,))
+    return _sql.insert_row(table, len(cur.fetchall()))
+def infer_sqlite_types(cur, data: pa.Table, sample_rows: int = 2) -> dict[str, str]:
+    tb = "_temp_types"
+    sample = data.slice(0, sample_rows)
+    cur.adbc_ingest(tb, sample, mode="create", temporary=True)
+    cur.execute(f"PRAGMA table_info({_sql.qident(tb)})")
+    type_rows = cur.fetchall()
+    cur.execute(f"DROP TABLE {_sql.qident(tb)}")
+    return {column_name: column_type for _, column_name, column_type, *_ in type_rows}
+def define_ls_table(
+    table_ref: str,
+    col_ord: dict[str, int],
+    col_types: dict[str, str],
+    column_keys: Sequence[str],
+    time_col: str,
+) -> str:
+    cols = sorted(col_ord, key=col_ord.__getitem__)  # in case we change the system later...
+    defs = (f"{_sql.qident(col)} {col_types[col]} NOT NULL" for col in cols)
+    pk = f"PRIMARY KEY ({', '.join(_sql.qident(col) for col in chain(column_keys, (time_col,)))})"
+    ddl = f"CREATE TABLE IF NOT EXISTS {_sql.qident(table_ref)} ({', '.join((*defs, pk))}) STRICT, WITHOUT ROWID"
+    return ddl
+def define_ls_infotable(
+    table_ref: str,
+    col_types: dict[str, str],
+    column_keys: Sequence[str],
+) -> str:
+    nfks = column_keys
+    # Everything but the final rightmost key which is the unix micros.
+    defs = (f"{_sql.qident(col)} {col_types.get(col, 'INTEGER')} NOT NULL" for col in chain(nfks, (LAST_UPD,)))
+    pk = f"PRIMARY KEY ({', '.join(_sql.qident(col) for col in nfks)})"
+    ddl = f"CREATE TABLE IF NOT EXISTS {_sql.qident(table_ref)} ({', '.join((*defs, pk))}) STRICT, WITHOUT ROWID"
+    return ddl
+def mk_fullarrow(ar_tbl: pa.Table, full_cols, col_k, col_v):
+    names0 = set(ar_tbl.column_names)
+    ln = ar_tbl.num_rows
+    # Reuse existing Arrow column views; only missing key columns allocate filled arrays.
+    new_cols = {
+        name: pa.repeat(v, ln) for name, v in zip(col_k, col_v, strict=True) if name not in names0
+    }
+    names = sorted((*ar_tbl.column_names, *new_cols), key=lambda name: full_cols.get(name, len(full_cols)))
+    cols = [ar_tbl.column(name) if name in names0 else new_cols[name] for name in names]
+    return pa.Table.from_arrays(cols, names=names)

liteseries/_util.pyi ADDED Viewed

	@@ -0,0 +1 @@
1	+ # pruned: intentionally empty stub

liteseries/nexus.md ADDED Viewed

@@ -0,0 +1,11 @@
+`_handlers.py` provides the public runtime API for launching thread-local SQLite ADBC connections, priming file-backed databases for WAL usage, shutting thread pools down cleanly, and decorating time-series fetchers with the local cache flow including metadata-row inserts, refresh updates, bounded historical watermarks, and optional expired-key lockouts.
+- `_util` : Resolves database paths, shapes Arrow tables for ingest, and builds the `CREATE TABLE IF NOT EXISTS` statements used during cache initialization.
+- `_sql` : Supplies the quoted SQL statement builders used for metadata lookups, metadata row inserts/updates, cached range reads, and max-timestamp queries.
+`_util.py` contains lower-level helpers for default database discovery, SQLite file creation from default/directory/file paths, timestamp utilities, SQLite table inspection, `CREATE TABLE IF NOT EXISTS` generation, and Arrow table normalization before ingest while reusing existing Arrow column buffers.
+- `_sql` : Provides identifier quoting, table constants, and insert/query helpers that `_util.py` reuses when inspecting or defining SQLite tables.
+`_sql.py` centralizes reusable SQL fragments and tiny caches for generated SQL so the runtime can build common SQLite statements without repeating formatting work, including metadata-row insert/update statements for the cache info tables. Identifier handling is selected at import time: `LITESERIES_PROTECTNAMES=true` keeps the original double-quoted compatibility path, while the default path passes already-valid Python-style names through unchanged.
+- os : Reads `LITESERIES_PROTECTNAMES` once during module import to choose the identifier function used by the SQL builders.
+`README.md` introduces the stable public API, the single-threaded launch/cache/close lifecycle, the supported database and identifier environment variables, and beginner-oriented `ls_cache` usage examples for Arrow-backed endpoint functions.

liteseries/py.typed ADDED Viewed

File without changes

liteseries-1.0.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,250 @@
+Metadata-Version: 2.4
+Name: liteseries
+Version: 1.0.0
+Summary: SQLite-backed local caching helpers for time series data.
+Author-email: Charles Marks <charlesmarksco@gmail.com>
+License: MIT License
+        Copyright (c) 2026 Charles Marks
+        Permission is hereby granted, free of charge, to any person obtaining a copy
+        of this software and associated documentation files (the "Software"), to deal
+        in the Software without restriction, including without limitation the rights
+        to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+        copies of the Software, and to permit persons to whom the Software is
+        furnished to do so, subject to the following conditions:
+        The above copyright notice and this permission notice shall be included in all
+        copies or substantial portions of the Software.
+        THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+        IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+        FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+        AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+        LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+        OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+        SOFTWARE.
+License-File: LICENSE
+Classifier: License :: OSI Approved :: MIT License
+Requires-Python: >=3.10
+Requires-Dist: adbc-driver-sqlite
+Requires-Dist: pyarrow
+Requires-Dist: python-dateutil
+Description-Content-Type: text/markdown
+![Liteseries](Liteseries-1.png)
+An SQLite-backed replicator and cache for timeseries data.
+Liteseries works as an invisible layer around a timeseries endpoint. Your function still returns the rows you would
+normally get from the vendor, while liteseries stores them in SQLite for the next matching call. When a request reaches
+past the saved range, it reads the local slice first and asks the vendor only for the missing forward period. Liteseries
+is stable for single-threaded use, with parallel reads available where the Arrow backend supports them.
+```python
+from liteseries import close_ls, launch_ls, ls_cache, threadpool_shutdown_ls
+```
+- `launch_ls()` opens the local SQLite/ADBC runtime for the current thread.
+- `ls_cache(...)` decorates endpoint functions that return `pyarrow.Table`
+  objects.
+- `close_ls()` closes the runtime connection when your process is done with it.
+- `threadpool_shutdown_ls(...)` for use in a multithreaded environment.
+## Usage
+Install the package, then call `launch_ls()` once before cached functions run:
+```python
+from __future__ import annotations
+from datetime import UTC, datetime, time, timedelta
+import pyarrow as pa
+from liteseries import close_ls, launch_ls, ls_cache
+def unix_micros(year: int, month: int, day: int) -> int:
+    """Return a UTC timestamp in the microsecond units liteseries stores."""
+    return int(datetime(year, month, day, tzinfo=UTC).timestamp() * 1_000_000)
+launch_ls()
+```
+By default, `launch_ls()` looks for an existing `.sqlite` file near the entry
+script, prefers one with `liteseries` in its name, and otherwise creates
+`liteseries_db.sqlite`. Pass a path when you want to choose the file yourself:
+```python
+launch_ls("cache/prices.sqlite")
+```
+You can also set `LITESERIES_DB` before launch when configuration belongs in the
+environment:
+```powershell
+$env:LITESERIES_DB = "D:\market-cache\prices.sqlite"
+```
+### Wrap An Endpoint
+Your endpoint function should accept keyword arguments for the requested time
+range and key values, then return a `pyarrow.Table`. The time column is expected
+to use Unix microseconds, and `None` means an open-ended side of the range.
+This example uses a local data source so the shape is easy to see. A real
+function can call an HTTP API, a vendor SDK, or a file reader.
+```python
+def vendor_prices(start, end, symbol, interval) -> pa.Table:
+    """Fetch rows from the source system and return only endpoint columns."""
+    rows = [
+        (unix_micros(2026, 1, 2), 101.25, 102.10),
+        (unix_micros(2026, 1, 3), 102.00, 103.40),
+        (unix_micros(2026, 1, 4), 103.25, 103.05),
+    ]
+    if start is not None:
+        rows = [row for row in rows if row[0] >= start]
+    if end is not None:
+        rows = [row for row in rows if row[0] <= end]
+    return pa.table(
+        {
+            "ts": [row[0] for row in rows],
+            "open": [row[1] for row in rows],
+            "close": [row[2] for row in rows],
+        }
+    )
+```
+Decorate it with `ls_cache`. `columns` is the full SQLite schema, including key
+columns that may come from function arguments instead of the returned Arrow
+table. `column_keys` identify separate series inside one table, while
+`table_keys` can split families such as intervals into separate SQLite tables.
+```python
+@ls_cache(
+    columns=("symbol", "ts", "open", "close"),
+    time_keys=("start", "end"),
+    time_col="ts",
+    column_keys=("symbol",),
+    out_cols=("ts", "open", "close"),
+    table="prices",
+    refresh_period=timedelta(days=1),
+    active_in=time(0, tzinfo=UTC),
+)
+def prices(start, end, symbol):
+    """Return cached prices, refreshing from the endpoint when needed."""
+    return vendor_prices(start, end, symbol, interval="1d")
+```
+Now call the wrapped function with keyword arguments:
+```python
+try:
+    first = prices(
+        start=unix_micros(2026, 1, 2),
+        end=unix_micros(2026, 1, 4),
+        symbol="MSFT",
+    )
+    second = prices(
+        start=unix_micros(2026, 1, 2),
+        end=unix_micros(2026, 1, 4),
+        symbol="MSFT",
+    )
+    assert second.equals(first)
+finally:
+    close_ls()
+```
+On the first call, liteseries creates the data table and its metadata table,
+fills missing key columns, writes the Arrow rows, and returns the requested
+slice. On later calls, it serves rows from SQLite unless the request reaches
+past the cached freshness boundary.
+### Refresh Windows
+`refresh_period` and `active_in` tell liteseries when it is worth checking the
+endpoint again.
+For daily or slower data, pass one `time` that marks when the latest period is
+expected to be complete:
+```python
+@ls_cache(
+    columns=("symbol", "ts", "open", "close"),
+    time_keys=("start", "end"),
+    time_col="ts",
+    column_keys=("symbol",),
+    table="daily_prices",
+    refresh_period=timedelta(days=1),
+    active_in=time(16, 1, tzinfo=UTC),
+)
+def daily_prices(start, end, symbol):
+    """Cache daily bars after the market close boundary has passed."""
+    return vendor_prices(start, end, symbol, interval="1d")
+```
+For intraday data, pass a `(start_time, end_time)` window. Liteseries advances
+the freshness boundary by `refresh_period` steps inside that window:
+```python
+@ls_cache(
+    columns=("symbol", "ts", "open", "close"),
+    time_keys=("start", "end"),
+    time_col="ts",
+    column_keys=("symbol",),
+    table_keys=("interval",),
+    out_cols=("ts", "open", "close"),
+    table="intraday_prices",
+    refresh_period=timedelta(minutes=5),
+    active_in=(time(9, 30, tzinfo=UTC), time(16, 0, tzinfo=UTC)),
+)
+def intraday_prices(start, end, symbol, interval):
+    """Cache one interval per SQLite table, keyed by symbol within each table."""
+    return vendor_prices(start, end, symbol, interval=interval)
+```
+### Columns And Names
+Use plain Python-style column and table names when you can. That is the default
+fast path. If your endpoint already produces names with spaces or quotes, set
+`LITESERIES_PROTECTNAMES=true` before importing liteseries so SQL identifiers
+are double-quoted:
+```powershell
+$env:LITESERIES_PROTECTNAMES = "true"
+```
+`out_cols` controls the columns returned by the decorated function. This is handy
+when the database needs key columns such as `symbol`, but callers only want the
+time-series values.
+### Empty Tails
+Some providers stop returning rows after a symbol expires or a contract ends.
+Pass `expires_after` when liteseries should stop asking forward for that key
+after repeated empty tail checks:
+```python
+@ls_cache(
+    columns=("symbol", "ts", "open", "close"),
+    time_keys=("start", "end"),
+    time_col="ts",
+    column_keys=("symbol",),
+    table="expired_prices",
+    refresh_period=timedelta(hours=1),
+    active_in=(time(0, tzinfo=UTC), time(23, 59, tzinfo=UTC)),
+    expires_after=timedelta(days=7),
+)
+def expired_prices(start, end, symbol):
+    """Cache sparse series without querying forever beyond the final row."""
+    return vendor_prices(start, end, symbol, interval="1h")
+```
+For a live provider example with pandas/yfinance conversion, see `demo.py`.

liteseries-1.0.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,14 @@
+liteseries/__init__.py,sha256=HVNs_WXvYR2-7xoDwyoI7Z0-QTMoFXX6CsAjlzd5X5I,188
+liteseries/_handlers.py,sha256=ggdDMJG_0RNCStk_M-_T9nwRdEGiZ32mDAmldpMvyyk,14382
+liteseries/_sql.py,sha256=rvV2LPO6FtleaRjprm_fL6276XQpcEUeBkHmjvQ-PPw,3231
+liteseries/_util.py,sha256=HnNO9PywXFYcCmN5jhsWapa8tD3xFqVMdiWB6hm8aws,4764
+liteseries/nexus.md,sha256=5y6qrg9dxjrNwYYipggHPHZ6NHO2j_Pgz_SiIWhHTSk,1948
+liteseries/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+liteseries/__init__.pyi,sha256=BkBechYkS5kqtWmnTri6hOhkinM4uoGUR8Mr43OupRQ,152
+liteseries/_handlers.pyi,sha256=0SfvmuTxbBrImMiYvjfTVkqo8y9JpeCPkFiFTjyoCqY,745
+liteseries/_sql.pyi,sha256=POqiRkpQ8bJnb2e42_oCSpwKH9tHzNl1sKyWFDWgGx4,36
+liteseries/_util.pyi,sha256=POqiRkpQ8bJnb2e42_oCSpwKH9tHzNl1sKyWFDWgGx4,36
+liteseries-1.0.0.dist-info/METADATA,sha256=IbW1QX4kiqNhhVPOBKnByUxISCvZtUVz7pteeMHq-MM,8679
+liteseries-1.0.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
+liteseries-1.0.0.dist-info/licenses/LICENSE,sha256=ylIXgJBn5_PBiprN-8ObD_kWoCnxGbSPs422xH4cpZg,1091
+liteseries-1.0.0.dist-info/RECORD,,

liteseries-1.0.0.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,4 @@
+Wheel-Version: 1.0
+Generator: hatchling 1.30.1
+Root-Is-Purelib: true
+Tag: py3-none-any

liteseries-1.0.0.dist-info/licenses/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 Charles Marks
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.