PyPI - oq-data - Versions diffs - 0.1.0__tar.gz - Mend

oq-data 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

oq_data-0.1.0/.gitignore +82 -0
oq_data-0.1.0/PKG-INFO +51 -0
oq_data-0.1.0/README.md +22 -0
oq_data-0.1.0/pyproject.toml +44 -0
oq_data-0.1.0/src/oq_data/__init__.py +94 -0
oq_data-0.1.0/src/oq_data/announcements.py +176 -0
oq_data-0.1.0/src/oq_data/api.py +103 -0
oq_data-0.1.0/src/oq_data/bhavcopy.py +272 -0
oq_data-0.1.0/src/oq_data/cli.py +232 -0
oq_data-0.1.0/src/oq_data/config.py +105 -0
oq_data-0.1.0/src/oq_data/corporate_actions.py +172 -0
oq_data-0.1.0/src/oq_data/delivery.py +156 -0
oq_data-0.1.0/src/oq_data/flows.py +183 -0
oq_data-0.1.0/src/oq_data/fno.py +225 -0
oq_data-0.1.0/src/oq_data/storage.py +285 -0
oq_data-0.1.0/src/oq_data/symbols.py +146 -0
oq_data-0.1.0/src/oq_data/universes.py +130 -0
oq_data-0.1.0/tests/conftest.py +20 -0
oq_data-0.1.0/tests/fixtures/announcements_20240401.json +4 -0
oq_data-0.1.0/tests/fixtures/fii_dii_20240401.json +4 -0
oq_data-0.1.0/tests/fixtures/sec_bhavdata_full_01042024.csv +4 -0
oq_data-0.1.0/tests/test_announcements.py +58 -0
oq_data-0.1.0/tests/test_api.py +100 -0
oq_data-0.1.0/tests/test_bhavcopy.py +144 -0
oq_data-0.1.0/tests/test_cli.py +77 -0
oq_data-0.1.0/tests/test_config.py +38 -0
oq_data-0.1.0/tests/test_corporate_actions.py +117 -0
oq_data-0.1.0/tests/test_delivery.py +65 -0
oq_data-0.1.0/tests/test_flows.py +57 -0
oq_data-0.1.0/tests/test_fno.py +143 -0
oq_data-0.1.0/tests/test_storage.py +97 -0
oq_data-0.1.0/tests/test_symbols.py +43 -0
oq_data-0.1.0/tests/test_universes.py +49 -0

oq_data-0.1.0/.gitignore ADDED Viewed

@@ -0,0 +1,82 @@
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+*.egg
+*.egg-info/
+dist/
+build/
+wheels/
+develop-eggs/
+eggs/
+parts/
+sdist/
+var/
+*.manifest
+*.spec
+pip-log.txt
+pip-delete-this-directory.txt
+# uv
+.venv/
+venv/
+env/
+ENV/
+.python-version
+# Testing / coverage
+.pytest_cache/
+.coverage
+.coverage.*
+htmlcov/
+.tox/
+.nox/
+coverage.xml
+*.cover
+.cache
+# mypy / ruff
+.mypy_cache/
+.ruff_cache/
+.dmypy.json
+dmypy.json
+# Jupyter
+.ipynb_checkpoints/
+*.ipynb_checkpoints
+# Data / artifacts
+data/
+*.parquet
+*.duckdb
+*.duckdb.wal
+*.csv.gz
+*.zip
+.openquant/
+!packages/*/tests/fixtures/**
+# IDE / OS
+.idea/
+.vscode/
+*.swp
+*.swo
+.DS_Store
+Thumbs.db
+# Logs
+*.log
+logs/
+# Secrets
+.env
+.env.*
+!.env.example
+*.pem
+*.key
+# build artifacts
+dist/
+*.egg-info/

oq_data-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,51 @@
+Metadata-Version: 2.4
+Name: oq-data
+Version: 0.1.0
+Summary: NSE/BSE data pipeline for OpenQuant India: bhavcopy ingestion, corporate actions, point-in-time index universes.
+Project-URL: Homepage, https://github.com/revorhq/openquant
+Project-URL: Repository, https://github.com/revorhq/openquant
+Project-URL: Issues, https://github.com/revorhq/openquant/issues
+Author: OpenQuant India Contributors
+License: Apache-2.0
+Keywords: bhavcopy,bse,india,market-data,nse,quant
+Classifier: Development Status :: 3 - Alpha
+Classifier: Intended Audience :: Developers
+Classifier: Intended Audience :: Financial and Insurance Industry
+Classifier: License :: OSI Approved :: Apache Software License
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Topic :: Office/Business :: Financial :: Investment
+Requires-Python: >=3.11
+Requires-Dist: click>=8.1
+Requires-Dist: duckdb>=1.0
+Requires-Dist: httpx>=0.27
+Requires-Dist: numpy>=1.24
+Requires-Dist: oq-core
+Requires-Dist: pandas>=2.0
+Requires-Dist: pyarrow>=15.0
+Description-Content-Type: text/markdown
+# oq-data
+NSE data pipeline for OpenQuant India.
+Bhavcopy ingestion (EOD equity + F&O), corporate-action adjustments, point-in-time
+index universes (Nifty 50/100/500), delivery %, FII/DII flows, and a Parquet +
+DuckDB query layer. Symbol master keyed on ISIN with merger/symbol-change
+mapping (HDFC merger included as a test fixture).
+```bash
+pip install oq-data
+oq sync --quick
+```
+```python
+import oq_data as oq
+prices = oq.prices("RELIANCE", "2015-01-01", "2024-12-31", adjusted=True)
+universe = oq.universe("NIFTY50", as_of="2018-06-30")
+```
+Part of [OpenQuant India](https://github.com/revorhq/openquant) — honest, open
+source quant infrastructure for Indian markets. Apache 2.0.

oq_data-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,22 @@
+# oq-data
+NSE data pipeline for OpenQuant India.
+Bhavcopy ingestion (EOD equity + F&O), corporate-action adjustments, point-in-time
+index universes (Nifty 50/100/500), delivery %, FII/DII flows, and a Parquet +
+DuckDB query layer. Symbol master keyed on ISIN with merger/symbol-change
+mapping (HDFC merger included as a test fixture).
+```bash
+pip install oq-data
+oq sync --quick
+```
+```python
+import oq_data as oq
+prices = oq.prices("RELIANCE", "2015-01-01", "2024-12-31", adjusted=True)
+universe = oq.universe("NIFTY50", as_of="2018-06-30")
+```
+Part of [OpenQuant India](https://github.com/revorhq/openquant) — honest, open
+source quant infrastructure for Indian markets. Apache 2.0.

oq_data-0.1.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,44 @@
+[project]
+name = "oq-data"
+version = "0.1.0"
+description = "NSE/BSE data pipeline for OpenQuant India: bhavcopy ingestion, corporate actions, point-in-time index universes."
+requires-python = ">=3.11"
+license = { text = "Apache-2.0" }
+readme = "README.md"
+authors = [{ name = "OpenQuant India Contributors" }]
+keywords = ["quant", "nse", "bse", "india", "bhavcopy", "market-data"]
+classifiers = [
+    "Development Status :: 3 - Alpha",
+    "Intended Audience :: Developers",
+    "Intended Audience :: Financial and Insurance Industry",
+    "License :: OSI Approved :: Apache Software License",
+    "Operating System :: OS Independent",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Topic :: Office/Business :: Financial :: Investment",
+]
+dependencies = [
+    "numpy>=1.24",
+    "pandas>=2.0",
+    "pyarrow>=15.0",
+    "duckdb>=1.0",
+    "httpx>=0.27",
+    "click>=8.1",
+    "oq-core",
+]
+[project.urls]
+Homepage = "https://github.com/revorhq/openquant"
+Repository = "https://github.com/revorhq/openquant"
+Issues = "https://github.com/revorhq/openquant/issues"
+[project.scripts]
+oq = "oq_data.cli:main"
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+[tool.hatch.build.targets.wheel]
+packages = ["src/oq_data"]

oq_data-0.1.0/src/oq_data/__init__.py ADDED Viewed

@@ -0,0 +1,94 @@
+"""oq-data — NSE/BSE data pipeline for OpenQuant India.
+Top-level convenience imports mirror the most-used public API:
+    >>> from oq_data import prices, universe, wide_prices
+"""
+from __future__ import annotations
+from oq_data.announcements import (
+    download_announcements,
+    parse_announcements_blob,
+    read_announcements,
+    write_announcements,
+)
+from oq_data.api import list_symbols, prices, resolve_symbol, universe, wide_prices
+from oq_data.bhavcopy import build_url, download_bhavcopy, parse_bhavcopy_blob, sync_range
+from oq_data.config import DataPaths, default_root, get_paths
+from oq_data.corporate_actions import CorporateAction, add_actions, adjust_prices, load_actions
+from oq_data.delivery import (
+    download_delivery,
+    parse_delivery_blob,
+    read_delivery,
+    write_delivery,
+)
+from oq_data.flows import (
+    download_flows,
+    parse_flows_blob,
+    read_flows,
+    write_flows,
+)
+from oq_data.fno import (
+    download_fno,
+    parse_fno_blob,
+)
+from oq_data.storage import (
+    coverage,
+    query,
+    read_fno,
+    read_prices,
+    write_eod,
+    write_fno,
+)
+from oq_data.symbols import SymbolMaster, add_mapping, load_master
+from oq_data.universes import UniverseEntry, add_entries, load_universes, members_as_of
+__version__ = "0.1.0"
+__all__ = [
+    "CorporateAction",
+    "DataPaths",
+    "SymbolMaster",
+    "UniverseEntry",
+    "__version__",
+    "add_actions",
+    "add_entries",
+    "add_mapping",
+    "adjust_prices",
+    "build_url",
+    "coverage",
+    "default_root",
+    "download_announcements",
+    "download_bhavcopy",
+    "download_delivery",
+    "download_flows",
+    "download_fno",
+    "get_paths",
+    "list_symbols",
+    "load_actions",
+    "load_master",
+    "load_universes",
+    "members_as_of",
+    "parse_announcements_blob",
+    "parse_bhavcopy_blob",
+    "parse_delivery_blob",
+    "parse_flows_blob",
+    "parse_fno_blob",
+    "prices",
+    "query",
+    "read_announcements",
+    "read_delivery",
+    "read_flows",
+    "read_fno",
+    "read_prices",
+    "resolve_symbol",
+    "sync_range",
+    "universe",
+    "wide_prices",
+    "write_announcements",
+    "write_delivery",
+    "write_eod",
+    "write_flows",
+    "write_fno",
+]

oq_data-0.1.0/src/oq_data/announcements.py ADDED Viewed

@@ -0,0 +1,176 @@
+"""Corporate-announcements feed ingestion.
+NSE publishes a rolling JSON feed of corporate announcements at
+``https://www.nseindia.com/api/corporate-announcements?index=equities``.
+Each row carries the announcement timestamp, symbol, broad category, a
+short subject line, and an attachment URL.
+The canonical schema we persist is::
+    date, symbol, category, subject, attachment
+``date`` is the announcement business date (``date``-typed), suitable
+for the same year-partitioned storage layout used by the EOD writers.
+Network calls go through the same injectable ``Fetcher`` as the rest of
+the pipeline so the suite stays offline.
+"""
+from __future__ import annotations
+import io
+import json
+import logging
+from collections.abc import Iterable
+from dataclasses import dataclass
+from datetime import date, timedelta
+import pandas as pd
+from oq_data.bhavcopy import Fetcher, _default_fetcher
+from oq_data.config import DataPaths, get_paths
+from oq_data.storage import write_partitioned
+logger = logging.getLogger(__name__)
+_NORMALISED_COLUMNS = ["date", "symbol", "category", "subject", "attachment"]
+@dataclass(frozen=True, slots=True)
+class AnnouncementsSource:
+    when: date
+    url: str
+    filename: str
+def build_url(when: date) -> AnnouncementsSource:
+    fname = f"announcements_{when:%Y%m%d}.json"
+    url = (
+        "https://www.nseindia.com/api/corporate-announcements"
+        f"?index=equities&from_date={when:%d-%m-%Y}&to_date={when:%d-%m-%Y}"
+    )
+    return AnnouncementsSource(when=when, url=url, filename=fname)
+def _pick(row: dict, *keys: str) -> object:
+    for k in keys:
+        if k in row and row[k] not in (None, ""):
+            return row[k]
+    return ""
+def parse_announcements_blob(blob: bytes, when: date) -> pd.DataFrame:
+    text = blob.decode("utf-8-sig", errors="ignore").lstrip()
+    if text.startswith("[") or text.startswith("{"):
+        data = json.loads(text)
+        rows = data if isinstance(data, list) else data.get("data", data.get("rows", []))
+    else:
+        rows = pd.read_csv(io.BytesIO(blob)).to_dict("records")
+    if not rows:
+        return pd.DataFrame(columns=_NORMALISED_COLUMNS)
+    df = pd.DataFrame(
+        {
+            "date": pd.to_datetime(when),
+            "symbol": [str(_pick(r, "symbol", "Symbol", "SYMBOL")).strip() for r in rows],
+            "category": [
+                str(_pick(r, "category", "Category", "broadcastsubject")).strip() for r in rows
+            ],
+            "subject": [
+                str(_pick(r, "subject", "Subject", "desc", "Description")).strip() for r in rows
+            ],
+            "attachment": [
+                str(_pick(r, "attchmntFile", "attachment", "attachmentUrl")).strip() for r in rows
+            ],
+        }
+    )
+    df = df[df["symbol"] != ""].reset_index(drop=True)
+    return df[_NORMALISED_COLUMNS]
+def _cache_dir(paths: DataPaths):
+    p = paths.raw / "announcements"
+    p.mkdir(parents=True, exist_ok=True)
+    return p
+def download_announcements(
+    when: date,
+    paths: DataPaths | None = None,
+    fetcher: Fetcher | None = None,
+    use_cache: bool = True,
+) -> pd.DataFrame:
+    paths = paths or get_paths()
+    paths.ensure()
+    src = build_url(when)
+    cache_path = _cache_dir(paths) / src.filename
+    fetch = fetcher or _default_fetcher
+    if use_cache and cache_path.exists():
+        blob = cache_path.read_bytes()
+    else:
+        blob = fetch(src.url)
+        cache_path.write_bytes(blob)
+    return parse_announcements_blob(blob, when)
+def write_announcements(df: pd.DataFrame, paths: DataPaths | None = None) -> int:
+    paths = paths or get_paths()
+    paths.ensure()
+    keys = ["date", "symbol", "subject"]
+    return write_partitioned(df, paths.announcements, keys)
+def read_announcements(
+    symbols: str | Iterable[str] | None = None,
+    start: date | str | None = None,
+    end: date | str | None = None,
+    paths: DataPaths | None = None,
+) -> pd.DataFrame:
+    paths = paths or get_paths()
+    parts = sorted(paths.announcements.glob("year=*/data.parquet"))
+    if not parts:
+        return pd.DataFrame(columns=_NORMALISED_COLUMNS)
+    df = pd.concat([pd.read_parquet(p) for p in parts], ignore_index=True)
+    if symbols is not None:
+        syms = {symbols} if isinstance(symbols, str) else set(symbols)
+        df = df[df["symbol"].isin(syms)]
+    if start is not None:
+        df = df[df["date"] >= pd.to_datetime(start)]
+    if end is not None:
+        df = df[df["date"] <= pd.to_datetime(end)]
+    return df.sort_values(["date", "symbol"]).reset_index(drop=True)
+def sync_range(
+    start: date,
+    end: date,
+    paths: DataPaths | None = None,
+    fetcher: Fetcher | None = None,
+    on_missing: str = "skip",
+) -> Iterable[date]:
+    if end < start:
+        raise ValueError("end must be >= start")
+    paths = paths or get_paths()
+    paths.ensure()
+    cur = start
+    one_day = timedelta(days=1)
+    while cur <= end:
+        if cur.weekday() < 5:
+            try:
+                download_announcements(cur, paths=paths, fetcher=fetcher)
+                yield cur
+            except Exception as exc:
+                if on_missing == "raise":
+                    raise
+                logger.info("announcements feed unavailable for %s: %s", cur, exc)
+        cur += one_day
+__all__ = [
+    "AnnouncementsSource",
+    "build_url",
+    "download_announcements",
+    "parse_announcements_blob",
+    "read_announcements",
+    "sync_range",
+    "write_announcements",
+]

oq_data-0.1.0/src/oq_data/api.py ADDED Viewed

@@ -0,0 +1,103 @@
+"""High-level Python API for downstream packages.
+This is the surface most users will touch: :func:`prices` for a clean
+adjusted price series, :func:`universe` for a point-in-time membership
+set, and :func:`wide_prices` for a date-indexed wide frame that drops
+straight into :func:`oq_backtest.backtest`.
+"""
+from __future__ import annotations
+from collections.abc import Iterable
+from datetime import date
+import pandas as pd
+from oq_data import corporate_actions, storage, symbols, universes
+from oq_data.config import DataPaths, get_paths
+def prices(
+    symbol: str | Iterable[str],
+    start: date | str | None = None,
+    end: date | str | None = None,
+    adjusted: bool = True,
+    paths: DataPaths | None = None,
+) -> pd.DataFrame:
+    """Read a long-form OHLCV frame for one or many symbols.
+    With ``adjusted=True`` (default), splits, bonuses, and dividends are
+    back-adjusted so the returned series is a continuous total-return
+    proxy suitable for backtesting.
+    """
+    paths = paths or get_paths()
+    df = storage.read_prices(symbols=symbol, start=start, end=end, paths=paths)
+    if df.empty or not adjusted:
+        return df
+    actions = corporate_actions.load_actions(paths=paths)
+    if actions.empty:
+        return df
+    return corporate_actions.adjust_prices(df, actions)
+def wide_prices(
+    universe_symbols: Iterable[str],
+    start: date | str | None = None,
+    end: date | str | None = None,
+    field: str = "close",
+    adjusted: bool = True,
+    paths: DataPaths | None = None,
+) -> pd.DataFrame:
+    """Return a date-indexed wide DataFrame ready for the backtester.
+    The output is what :func:`oq_backtest.backtest` consumes as
+    ``prices``: rows are trading dates, columns are symbols, values are
+    the requested field (default ``close``).
+    """
+    syms = list(universe_symbols)
+    if not syms:
+        raise ValueError("universe_symbols must be non-empty")
+    long_df = prices(syms, start=start, end=end, adjusted=adjusted, paths=paths)
+    if long_df.empty:
+        return pd.DataFrame()
+    if field not in long_df.columns:
+        raise KeyError(f"field {field!r} not in {sorted(long_df.columns)}")
+    wide = long_df.pivot_table(index="date", columns="symbol", values=field, aggfunc="last")
+    wide = wide.sort_index()
+    wide.index = pd.DatetimeIndex(wide.index)
+    return wide
+def universe(
+    index_name: str,
+    as_of: date | str,
+    paths: DataPaths | None = None,
+) -> list[str]:
+    """List the symbols that made up ``index_name`` on ``as_of``."""
+    paths = paths or get_paths()
+    when = pd.to_datetime(as_of).date()
+    members = universes.members_as_of(index_name, when, paths=paths)
+    return members["symbol"].tolist()
+def resolve_symbol(symbol: str, when: date | str, paths: DataPaths | None = None) -> str:
+    """Translate a current ticker to the symbol used on ``when``."""
+    paths = paths or get_paths()
+    master = symbols.load_master(paths=paths)
+    return master.resolve_as_of(symbol, pd.to_datetime(when).date())
+def list_symbols(paths: DataPaths | None = None) -> list[str]:
+    """All distinct ``symbol`` values present in the EOD dataset."""
+    paths = paths or get_paths()
+    df = storage.query("SELECT DISTINCT symbol FROM eod ORDER BY symbol", paths=paths)
+    return df["symbol"].tolist() if not df.empty else []
+__all__ = [
+    "list_symbols",
+    "prices",
+    "resolve_symbol",
+    "universe",
+    "wide_prices",
+]