PyPI - liteseries - Versions diffs - 1.0.0__tar.gz - Mend

liteseries 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

liteseries-1.0.0/.gitattributes ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ # *.ipynb linguist-documentation
2	+ *.ipynb linguist-detectable=false

liteseries-1.0.0/.gitconfig ADDED Viewed

File without changes

liteseries-1.0.0/.gitignore ADDED Viewed

@@ -0,0 +1,25 @@
+.idea
+*.iml
+out
+gen
+**/__pycache__/
+**/*.py[cod]
+venv
+.venv
+testing.ipynb
+testing.py
+test.ipynb
+test.py
+**/AGENTS.md
+*.egg-info/
+uv.lock
+.coverage
+.obsidian
+dist/
+**/html
+progress.md
+nexus-task.md
+pyproject_temp.toml
+temp
+liteseries_db.sqlite
+scratch.md

liteseries-1.0.0/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 Charles Marks
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

liteseries-1.0.0/Liteseries-1.png ADDED Viewed

Binary file

liteseries-1.0.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,250 @@
+Metadata-Version: 2.4
+Name: liteseries
+Version: 1.0.0
+Summary: SQLite-backed local caching helpers for time series data.
+Author-email: Charles Marks <charlesmarksco@gmail.com>
+License: MIT License
+        Copyright (c) 2026 Charles Marks
+        Permission is hereby granted, free of charge, to any person obtaining a copy
+        of this software and associated documentation files (the "Software"), to deal
+        in the Software without restriction, including without limitation the rights
+        to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+        copies of the Software, and to permit persons to whom the Software is
+        furnished to do so, subject to the following conditions:
+        The above copyright notice and this permission notice shall be included in all
+        copies or substantial portions of the Software.
+        THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+        IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+        FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+        AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+        LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+        OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+        SOFTWARE.
+License-File: LICENSE
+Classifier: License :: OSI Approved :: MIT License
+Requires-Python: >=3.10
+Requires-Dist: adbc-driver-sqlite
+Requires-Dist: pyarrow
+Requires-Dist: python-dateutil
+Description-Content-Type: text/markdown
+![Liteseries](Liteseries-1.png)
+An SQLite-backed replicator and cache for timeseries data.
+Liteseries works as an invisible layer around a timeseries endpoint. Your function still returns the rows you would
+normally get from the vendor, while liteseries stores them in SQLite for the next matching call. When a request reaches
+past the saved range, it reads the local slice first and asks the vendor only for the missing forward period. Liteseries
+is stable for single-threaded use, with parallel reads available where the Arrow backend supports them.
+```python
+from liteseries import close_ls, launch_ls, ls_cache, threadpool_shutdown_ls
+```
+- `launch_ls()` opens the local SQLite/ADBC runtime for the current thread.
+- `ls_cache(...)` decorates endpoint functions that return `pyarrow.Table`
+  objects.
+- `close_ls()` closes the runtime connection when your process is done with it.
+- `threadpool_shutdown_ls(...)` for use in a multithreaded environment.
+## Usage
+Install the package, then call `launch_ls()` once before cached functions run:
+```python
+from __future__ import annotations
+from datetime import UTC, datetime, time, timedelta
+import pyarrow as pa
+from liteseries import close_ls, launch_ls, ls_cache
+def unix_micros(year: int, month: int, day: int) -> int:
+    """Return a UTC timestamp in the microsecond units liteseries stores."""
+    return int(datetime(year, month, day, tzinfo=UTC).timestamp() * 1_000_000)
+launch_ls()
+```
+By default, `launch_ls()` looks for an existing `.sqlite` file near the entry
+script, prefers one with `liteseries` in its name, and otherwise creates
+`liteseries_db.sqlite`. Pass a path when you want to choose the file yourself:
+```python
+launch_ls("cache/prices.sqlite")
+```
+You can also set `LITESERIES_DB` before launch when configuration belongs in the
+environment:
+```powershell
+$env:LITESERIES_DB = "D:\market-cache\prices.sqlite"
+```
+### Wrap An Endpoint
+Your endpoint function should accept keyword arguments for the requested time
+range and key values, then return a `pyarrow.Table`. The time column is expected
+to use Unix microseconds, and `None` means an open-ended side of the range.
+This example uses a local data source so the shape is easy to see. A real
+function can call an HTTP API, a vendor SDK, or a file reader.
+```python
+def vendor_prices(start, end, symbol, interval) -> pa.Table:
+    """Fetch rows from the source system and return only endpoint columns."""
+    rows = [
+        (unix_micros(2026, 1, 2), 101.25, 102.10),
+        (unix_micros(2026, 1, 3), 102.00, 103.40),
+        (unix_micros(2026, 1, 4), 103.25, 103.05),
+    ]
+    if start is not None:
+        rows = [row for row in rows if row[0] >= start]
+    if end is not None:
+        rows = [row for row in rows if row[0] <= end]
+    return pa.table(
+        {
+            "ts": [row[0] for row in rows],
+            "open": [row[1] for row in rows],
+            "close": [row[2] for row in rows],
+        }
+    )
+```
+Decorate it with `ls_cache`. `columns` is the full SQLite schema, including key
+columns that may come from function arguments instead of the returned Arrow
+table. `column_keys` identify separate series inside one table, while
+`table_keys` can split families such as intervals into separate SQLite tables.
+```python
+@ls_cache(
+    columns=("symbol", "ts", "open", "close"),
+    time_keys=("start", "end"),
+    time_col="ts",
+    column_keys=("symbol",),
+    out_cols=("ts", "open", "close"),
+    table="prices",
+    refresh_period=timedelta(days=1),
+    active_in=time(0, tzinfo=UTC),
+)
+def prices(start, end, symbol):
+    """Return cached prices, refreshing from the endpoint when needed."""
+    return vendor_prices(start, end, symbol, interval="1d")
+```
+Now call the wrapped function with keyword arguments:
+```python
+try:
+    first = prices(
+        start=unix_micros(2026, 1, 2),
+        end=unix_micros(2026, 1, 4),
+        symbol="MSFT",
+    )
+    second = prices(
+        start=unix_micros(2026, 1, 2),
+        end=unix_micros(2026, 1, 4),
+        symbol="MSFT",
+    )
+    assert second.equals(first)
+finally:
+    close_ls()
+```
+On the first call, liteseries creates the data table and its metadata table,
+fills missing key columns, writes the Arrow rows, and returns the requested
+slice. On later calls, it serves rows from SQLite unless the request reaches
+past the cached freshness boundary.
+### Refresh Windows
+`refresh_period` and `active_in` tell liteseries when it is worth checking the
+endpoint again.
+For daily or slower data, pass one `time` that marks when the latest period is
+expected to be complete:
+```python
+@ls_cache(
+    columns=("symbol", "ts", "open", "close"),
+    time_keys=("start", "end"),
+    time_col="ts",
+    column_keys=("symbol",),
+    table="daily_prices",
+    refresh_period=timedelta(days=1),
+    active_in=time(16, 1, tzinfo=UTC),
+)
+def daily_prices(start, end, symbol):
+    """Cache daily bars after the market close boundary has passed."""
+    return vendor_prices(start, end, symbol, interval="1d")
+```
+For intraday data, pass a `(start_time, end_time)` window. Liteseries advances
+the freshness boundary by `refresh_period` steps inside that window:
+```python
+@ls_cache(
+    columns=("symbol", "ts", "open", "close"),
+    time_keys=("start", "end"),
+    time_col="ts",
+    column_keys=("symbol",),
+    table_keys=("interval",),
+    out_cols=("ts", "open", "close"),
+    table="intraday_prices",
+    refresh_period=timedelta(minutes=5),
+    active_in=(time(9, 30, tzinfo=UTC), time(16, 0, tzinfo=UTC)),
+)
+def intraday_prices(start, end, symbol, interval):
+    """Cache one interval per SQLite table, keyed by symbol within each table."""
+    return vendor_prices(start, end, symbol, interval=interval)
+```
+### Columns And Names
+Use plain Python-style column and table names when you can. That is the default
+fast path. If your endpoint already produces names with spaces or quotes, set
+`LITESERIES_PROTECTNAMES=true` before importing liteseries so SQL identifiers
+are double-quoted:
+```powershell
+$env:LITESERIES_PROTECTNAMES = "true"
+```
+`out_cols` controls the columns returned by the decorated function. This is handy
+when the database needs key columns such as `symbol`, but callers only want the
+time-series values.
+### Empty Tails
+Some providers stop returning rows after a symbol expires or a contract ends.
+Pass `expires_after` when liteseries should stop asking forward for that key
+after repeated empty tail checks:
+```python
+@ls_cache(
+    columns=("symbol", "ts", "open", "close"),
+    time_keys=("start", "end"),
+    time_col="ts",
+    column_keys=("symbol",),
+    table="expired_prices",
+    refresh_period=timedelta(hours=1),
+    active_in=(time(0, tzinfo=UTC), time(23, 59, tzinfo=UTC)),
+    expires_after=timedelta(days=7),
+)
+def expired_prices(start, end, symbol):
+    """Cache sparse series without querying forever beyond the final row."""
+    return vendor_prices(start, end, symbol, interval="1h")
+```
+For a live provider example with pandas/yfinance conversion, see `demo.py`.

liteseries-1.0.0/README.md ADDED Viewed

@@ -0,0 +1,216 @@
+![Liteseries](Liteseries-1.png)
+An SQLite-backed replicator and cache for timeseries data.
+Liteseries works as an invisible layer around a timeseries endpoint. Your function still returns the rows you would
+normally get from the vendor, while liteseries stores them in SQLite for the next matching call. When a request reaches
+past the saved range, it reads the local slice first and asks the vendor only for the missing forward period. Liteseries
+is stable for single-threaded use, with parallel reads available where the Arrow backend supports them.
+```python
+from liteseries import close_ls, launch_ls, ls_cache, threadpool_shutdown_ls
+```
+- `launch_ls()` opens the local SQLite/ADBC runtime for the current thread.
+- `ls_cache(...)` decorates endpoint functions that return `pyarrow.Table`
+  objects.
+- `close_ls()` closes the runtime connection when your process is done with it.
+- `threadpool_shutdown_ls(...)` for use in a multithreaded environment.
+## Usage
+Install the package, then call `launch_ls()` once before cached functions run:
+```python
+from __future__ import annotations
+from datetime import UTC, datetime, time, timedelta
+import pyarrow as pa
+from liteseries import close_ls, launch_ls, ls_cache
+def unix_micros(year: int, month: int, day: int) -> int:
+    """Return a UTC timestamp in the microsecond units liteseries stores."""
+    return int(datetime(year, month, day, tzinfo=UTC).timestamp() * 1_000_000)
+launch_ls()
+```
+By default, `launch_ls()` looks for an existing `.sqlite` file near the entry
+script, prefers one with `liteseries` in its name, and otherwise creates
+`liteseries_db.sqlite`. Pass a path when you want to choose the file yourself:
+```python
+launch_ls("cache/prices.sqlite")
+```
+You can also set `LITESERIES_DB` before launch when configuration belongs in the
+environment:
+```powershell
+$env:LITESERIES_DB = "D:\market-cache\prices.sqlite"
+```
+### Wrap An Endpoint
+Your endpoint function should accept keyword arguments for the requested time
+range and key values, then return a `pyarrow.Table`. The time column is expected
+to use Unix microseconds, and `None` means an open-ended side of the range.
+This example uses a local data source so the shape is easy to see. A real
+function can call an HTTP API, a vendor SDK, or a file reader.
+```python
+def vendor_prices(start, end, symbol, interval) -> pa.Table:
+    """Fetch rows from the source system and return only endpoint columns."""
+    rows = [
+        (unix_micros(2026, 1, 2), 101.25, 102.10),
+        (unix_micros(2026, 1, 3), 102.00, 103.40),
+        (unix_micros(2026, 1, 4), 103.25, 103.05),
+    ]
+    if start is not None:
+        rows = [row for row in rows if row[0] >= start]
+    if end is not None:
+        rows = [row for row in rows if row[0] <= end]
+    return pa.table(
+        {
+            "ts": [row[0] for row in rows],
+            "open": [row[1] for row in rows],
+            "close": [row[2] for row in rows],
+        }
+    )
+```
+Decorate it with `ls_cache`. `columns` is the full SQLite schema, including key
+columns that may come from function arguments instead of the returned Arrow
+table. `column_keys` identify separate series inside one table, while
+`table_keys` can split families such as intervals into separate SQLite tables.
+```python
+@ls_cache(
+    columns=("symbol", "ts", "open", "close"),
+    time_keys=("start", "end"),
+    time_col="ts",
+    column_keys=("symbol",),
+    out_cols=("ts", "open", "close"),
+    table="prices",
+    refresh_period=timedelta(days=1),
+    active_in=time(0, tzinfo=UTC),
+)
+def prices(start, end, symbol):
+    """Return cached prices, refreshing from the endpoint when needed."""
+    return vendor_prices(start, end, symbol, interval="1d")
+```
+Now call the wrapped function with keyword arguments:
+```python
+try:
+    first = prices(
+        start=unix_micros(2026, 1, 2),
+        end=unix_micros(2026, 1, 4),
+        symbol="MSFT",
+    )
+    second = prices(
+        start=unix_micros(2026, 1, 2),
+        end=unix_micros(2026, 1, 4),
+        symbol="MSFT",
+    )
+    assert second.equals(first)
+finally:
+    close_ls()
+```
+On the first call, liteseries creates the data table and its metadata table,
+fills missing key columns, writes the Arrow rows, and returns the requested
+slice. On later calls, it serves rows from SQLite unless the request reaches
+past the cached freshness boundary.
+### Refresh Windows
+`refresh_period` and `active_in` tell liteseries when it is worth checking the
+endpoint again.
+For daily or slower data, pass one `time` that marks when the latest period is
+expected to be complete:
+```python
+@ls_cache(
+    columns=("symbol", "ts", "open", "close"),
+    time_keys=("start", "end"),
+    time_col="ts",
+    column_keys=("symbol",),
+    table="daily_prices",
+    refresh_period=timedelta(days=1),
+    active_in=time(16, 1, tzinfo=UTC),
+)
+def daily_prices(start, end, symbol):
+    """Cache daily bars after the market close boundary has passed."""
+    return vendor_prices(start, end, symbol, interval="1d")
+```
+For intraday data, pass a `(start_time, end_time)` window. Liteseries advances
+the freshness boundary by `refresh_period` steps inside that window:
+```python
+@ls_cache(
+    columns=("symbol", "ts", "open", "close"),
+    time_keys=("start", "end"),
+    time_col="ts",
+    column_keys=("symbol",),
+    table_keys=("interval",),
+    out_cols=("ts", "open", "close"),
+    table="intraday_prices",
+    refresh_period=timedelta(minutes=5),
+    active_in=(time(9, 30, tzinfo=UTC), time(16, 0, tzinfo=UTC)),
+)
+def intraday_prices(start, end, symbol, interval):
+    """Cache one interval per SQLite table, keyed by symbol within each table."""
+    return vendor_prices(start, end, symbol, interval=interval)
+```
+### Columns And Names
+Use plain Python-style column and table names when you can. That is the default
+fast path. If your endpoint already produces names with spaces or quotes, set
+`LITESERIES_PROTECTNAMES=true` before importing liteseries so SQL identifiers
+are double-quoted:
+```powershell
+$env:LITESERIES_PROTECTNAMES = "true"
+```
+`out_cols` controls the columns returned by the decorated function. This is handy
+when the database needs key columns such as `symbol`, but callers only want the
+time-series values.
+### Empty Tails
+Some providers stop returning rows after a symbol expires or a contract ends.
+Pass `expires_after` when liteseries should stop asking forward for that key
+after repeated empty tail checks:
+```python
+@ls_cache(
+    columns=("symbol", "ts", "open", "close"),
+    time_keys=("start", "end"),
+    time_col="ts",
+    column_keys=("symbol",),
+    table="expired_prices",
+    refresh_period=timedelta(hours=1),
+    active_in=(time(0, tzinfo=UTC), time(23, 59, tzinfo=UTC)),
+    expires_after=timedelta(days=7),
+)
+def expired_prices(start, end, symbol):
+    """Cache sparse series without querying forever beyond the final row."""
+    return vendor_prices(start, end, symbol, interval="1h")
+```
+For a live provider example with pandas/yfinance conversion, see `demo.py`.

liteseries-1.0.0/demo.py ADDED Viewed

@@ -0,0 +1,138 @@
+from __future__ import annotations
+import os
+from datetime import UTC
+from datetime import datetime
+from datetime import time
+from datetime import timedelta
+import pandas as pd
+import pyarrow as pa
+import yfinance as yf
+from pyarrow import concat_tables
+from liteseries import close_ls
+from liteseries import launch_ls
+from liteseries import ls_cache
+os.environ["LITESERIES_PROTECTNAMES"]='true'
+launch_ls()
+def dt_micros(year: int, month: int, day: int) -> int:
+    return int(datetime(year, month, day, tzinfo=UTC).timestamp() * 1_000_000)
+def yf_to_arrow(frame: pd.DataFrame|None) -> pa.Table|None:
+    if frame is None: return None
+    frame = frame.rename_axis("Date").reset_index()
+    frame["ts"] = pd.to_datetime(frame.pop("Date"),utc=True).astype("int64")*1_000_000
+    frame["Volume"] = frame["Volume"].fillna(0).astype("int64")
+    return pa.Table.from_pandas(frame, preserve_index=False)
+def yf_prices_full(start, end, ticker, interval, auto_adjust=False) -> pa.Table|None:
+    start_dt = pd.Timestamp(start, unit="us", tz="UTC").to_pydatetime() if start is not None else None
+    end_dt = pd.Timestamp(end, unit="us", tz="UTC").to_pydatetime() + pd.Timedelta(days=1) if end is not None else None
+    now_utc = datetime.now(UTC)
+    def download_window(start_dt, end_dt)->pd.DataFrame|None:
+        return yf.download(
+            tickers=ticker,
+            start=start_dt,
+            end=end_dt,
+            interval=interval,
+            auto_adjust=auto_adjust,
+            actions=False,
+            progress=False,
+            threads=True,
+            multi_level_index=False,
+        )
+    if interval != "1m" and start_dt is None:
+        end_dt = None
+    if interval == "1m":
+        if start_dt is None:
+            start_dt = now_utc - timedelta(days=30)
+        if end_dt is None:
+            end_dt = now_utc
+        if end_dt - start_dt > timedelta(days=8):
+            chunks: list[pa.Table] = []
+            chunk_start = start_dt
+            while chunk_start < end_dt:
+                chunk_end = min(chunk_start + timedelta(days=8), end_dt)
+                frame = download_window(chunk_start, chunk_end)
+                if frame is not None and not frame.empty:
+                    chunks.append(yf_to_arrow(frame)) # type: ignore
+                chunk_start = chunk_end
+            if len(chunks)!=0:
+                return concat_tables(chunks, promote_options="none")
+            if not chunks:
+                return yf_to_arrow(download_window(start_dt, end_dt))
+            return concat_tables(chunks, promote_options="none")
+    return yf_to_arrow(download_window(start_dt, end_dt))
+@ls_cache(
+    columns=("Adj Close", "Close", "High", "Low", "Open", "Volume", "ts", "ticker"),
+    time_keys=("start", "end"),
+    time_col="ts",
+    column_keys=("ticker",),
+    table_keys=("interval",),
+    out_cols=("Adj Close", "Close", "High", "Low", "Open", "Volume", "ts"),
+    table="yf_prices",
+    refresh_period=timedelta(days=1),
+    active_in=time(0, tzinfo=UTC),
+)
+def yf_prices(start, end, ticker, interval):
+    return yf_prices_full(start, end, ticker, interval, auto_adjust=False)
+@ls_cache(
+    columns=("Close", "High", "Low", "Open", "Volume", "ts", "ticker"),
+    time_keys=("start", "end"),
+    time_col="ts",
+    column_keys=("ticker",),
+    table_keys=("interval",),
+    out_cols=("Close", "High", "Low", "Open", "Volume", "ts"),
+    table="yf_prices_adj",
+    refresh_period=timedelta(days=1),
+    active_in=time(0, tzinfo=UTC),
+)
+def yf_prices_adj(start, end, ticker, interval):
+    return yf_prices_full(start, end, ticker, interval, auto_adjust=True)
+def main() -> None:
+    try:
+        start = None #dt_micros(2026, 4, 8)
+        end = None #dt_micros(2026, 3, 25)
+        print(start, end)
+        raw_first = yf_prices(start=start, end=end, ticker="MSFT", interval="1m")
+        raw_second = yf_prices(start=start, end=end, ticker="MSFT", interval="1m")
+        adj_first = yf_prices_adj(start=start, end=end, ticker="MSFT", interval="5m")
+        adj_second = yf_prices_adj(start=start, end=end, ticker="MSFT", interval="5m")
+        print("raw columns:", raw_first.column_names)
+        print("raw rows:", raw_first.num_rows)
+        print("raw cached second call:", raw_second.num_rows == raw_first.num_rows)
+        print(raw_second)
+        print()
+        print("adjusted columns:", adj_first.column_names)
+        print("adjusted rows:", adj_first.num_rows)
+        print("adjusted cached second call:", adj_second.num_rows == adj_first.num_rows)
+        print(adj_second)
+        #print(yf_prices_full(start,end,"MSFT",'1m'))
+    finally:
+        close_ls()
+if __name__ == "__main__":
+    main()

liteseries-1.0.0/liteseries/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+from __future__ import annotations
+from ._handlers import close_ls, launch_ls, ls_cache, threadpool_shutdown_ls
+__all__ = ["close_ls", "launch_ls", "ls_cache", "threadpool_shutdown_ls"]