oq-data 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,82 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ .Python
7
+ *.egg
8
+ *.egg-info/
9
+ dist/
10
+ build/
11
+ wheels/
12
+ develop-eggs/
13
+ eggs/
14
+ parts/
15
+ sdist/
16
+ var/
17
+ *.manifest
18
+ *.spec
19
+ pip-log.txt
20
+ pip-delete-this-directory.txt
21
+
22
+ # uv
23
+ .venv/
24
+ venv/
25
+ env/
26
+ ENV/
27
+ .python-version
28
+
29
+ # Testing / coverage
30
+ .pytest_cache/
31
+ .coverage
32
+ .coverage.*
33
+ htmlcov/
34
+ .tox/
35
+ .nox/
36
+ coverage.xml
37
+ *.cover
38
+ .cache
39
+
40
+ # mypy / ruff
41
+ .mypy_cache/
42
+ .ruff_cache/
43
+ .dmypy.json
44
+ dmypy.json
45
+
46
+ # Jupyter
47
+ .ipynb_checkpoints/
48
+ *.ipynb_checkpoints
49
+
50
+ # Data / artifacts
51
+ data/
52
+ *.parquet
53
+ *.duckdb
54
+ *.duckdb.wal
55
+ *.csv.gz
56
+ *.zip
57
+ .openquant/
58
+
59
+ !packages/*/tests/fixtures/**
60
+
61
+ # IDE / OS
62
+ .idea/
63
+ .vscode/
64
+ *.swp
65
+ *.swo
66
+ .DS_Store
67
+ Thumbs.db
68
+
69
+ # Logs
70
+ *.log
71
+ logs/
72
+
73
+ # Secrets
74
+ .env
75
+ .env.*
76
+ !.env.example
77
+ *.pem
78
+ *.key
79
+
80
+ # build artifacts
81
+ dist/
82
+ *.egg-info/
oq_data-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,51 @@
1
+ Metadata-Version: 2.4
2
+ Name: oq-data
3
+ Version: 0.1.0
4
+ Summary: NSE/BSE data pipeline for OpenQuant India: bhavcopy ingestion, corporate actions, point-in-time index universes.
5
+ Project-URL: Homepage, https://github.com/revorhq/openquant
6
+ Project-URL: Repository, https://github.com/revorhq/openquant
7
+ Project-URL: Issues, https://github.com/revorhq/openquant/issues
8
+ Author: OpenQuant India Contributors
9
+ License: Apache-2.0
10
+ Keywords: bhavcopy,bse,india,market-data,nse,quant
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Intended Audience :: Financial and Insurance Industry
14
+ Classifier: License :: OSI Approved :: Apache Software License
15
+ Classifier: Operating System :: OS Independent
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Topic :: Office/Business :: Financial :: Investment
20
+ Requires-Python: >=3.11
21
+ Requires-Dist: click>=8.1
22
+ Requires-Dist: duckdb>=1.0
23
+ Requires-Dist: httpx>=0.27
24
+ Requires-Dist: numpy>=1.24
25
+ Requires-Dist: oq-core
26
+ Requires-Dist: pandas>=2.0
27
+ Requires-Dist: pyarrow>=15.0
28
+ Description-Content-Type: text/markdown
29
+
30
+ # oq-data
31
+
32
+ NSE data pipeline for OpenQuant India.
33
+
34
+ Bhavcopy ingestion (EOD equity + F&O), corporate-action adjustments, point-in-time
35
+ index universes (Nifty 50/100/500), delivery %, FII/DII flows, and a Parquet +
36
+ DuckDB query layer. Symbol master keyed on ISIN with merger/symbol-change
37
+ mapping (HDFC merger included as a test fixture).
38
+
39
+ ```bash
40
+ pip install oq-data
41
+ oq sync --quick
42
+ ```
43
+
44
+ ```python
45
+ import oq_data as oq
46
+ prices = oq.prices("RELIANCE", "2015-01-01", "2024-12-31", adjusted=True)
47
+ universe = oq.universe("NIFTY50", as_of="2018-06-30")
48
+ ```
49
+
50
+ Part of [OpenQuant India](https://github.com/revorhq/openquant) — honest, open
51
+ source quant infrastructure for Indian markets. Apache 2.0.
@@ -0,0 +1,22 @@
1
+ # oq-data
2
+
3
+ NSE data pipeline for OpenQuant India.
4
+
5
+ Bhavcopy ingestion (EOD equity + F&O), corporate-action adjustments, point-in-time
6
+ index universes (Nifty 50/100/500), delivery %, FII/DII flows, and a Parquet +
7
+ DuckDB query layer. Symbol master keyed on ISIN with merger/symbol-change
8
+ mapping (HDFC merger included as a test fixture).
9
+
10
+ ```bash
11
+ pip install oq-data
12
+ oq sync --quick
13
+ ```
14
+
15
+ ```python
16
+ import oq_data as oq
17
+ prices = oq.prices("RELIANCE", "2015-01-01", "2024-12-31", adjusted=True)
18
+ universe = oq.universe("NIFTY50", as_of="2018-06-30")
19
+ ```
20
+
21
+ Part of [OpenQuant India](https://github.com/revorhq/openquant) — honest, open
22
+ source quant infrastructure for Indian markets. Apache 2.0.
@@ -0,0 +1,44 @@
1
+ [project]
2
+ name = "oq-data"
3
+ version = "0.1.0"
4
+ description = "NSE/BSE data pipeline for OpenQuant India: bhavcopy ingestion, corporate actions, point-in-time index universes."
5
+ requires-python = ">=3.11"
6
+ license = { text = "Apache-2.0" }
7
+ readme = "README.md"
8
+ authors = [{ name = "OpenQuant India Contributors" }]
9
+ keywords = ["quant", "nse", "bse", "india", "bhavcopy", "market-data"]
10
+ classifiers = [
11
+ "Development Status :: 3 - Alpha",
12
+ "Intended Audience :: Developers",
13
+ "Intended Audience :: Financial and Insurance Industry",
14
+ "License :: OSI Approved :: Apache Software License",
15
+ "Operating System :: OS Independent",
16
+ "Programming Language :: Python :: 3",
17
+ "Programming Language :: Python :: 3.11",
18
+ "Programming Language :: Python :: 3.12",
19
+ "Topic :: Office/Business :: Financial :: Investment",
20
+ ]
21
+ dependencies = [
22
+ "numpy>=1.24",
23
+ "pandas>=2.0",
24
+ "pyarrow>=15.0",
25
+ "duckdb>=1.0",
26
+ "httpx>=0.27",
27
+ "click>=8.1",
28
+ "oq-core",
29
+ ]
30
+
31
+ [project.urls]
32
+ Homepage = "https://github.com/revorhq/openquant"
33
+ Repository = "https://github.com/revorhq/openquant"
34
+ Issues = "https://github.com/revorhq/openquant/issues"
35
+
36
+ [project.scripts]
37
+ oq = "oq_data.cli:main"
38
+
39
+ [build-system]
40
+ requires = ["hatchling"]
41
+ build-backend = "hatchling.build"
42
+
43
+ [tool.hatch.build.targets.wheel]
44
+ packages = ["src/oq_data"]
@@ -0,0 +1,94 @@
1
+ """oq-data — NSE/BSE data pipeline for OpenQuant India.
2
+
3
+ Top-level convenience imports mirror the most-used public API:
4
+
5
+ >>> from oq_data import prices, universe, wide_prices
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from oq_data.announcements import (
11
+ download_announcements,
12
+ parse_announcements_blob,
13
+ read_announcements,
14
+ write_announcements,
15
+ )
16
+ from oq_data.api import list_symbols, prices, resolve_symbol, universe, wide_prices
17
+ from oq_data.bhavcopy import build_url, download_bhavcopy, parse_bhavcopy_blob, sync_range
18
+ from oq_data.config import DataPaths, default_root, get_paths
19
+ from oq_data.corporate_actions import CorporateAction, add_actions, adjust_prices, load_actions
20
+ from oq_data.delivery import (
21
+ download_delivery,
22
+ parse_delivery_blob,
23
+ read_delivery,
24
+ write_delivery,
25
+ )
26
+ from oq_data.flows import (
27
+ download_flows,
28
+ parse_flows_blob,
29
+ read_flows,
30
+ write_flows,
31
+ )
32
+ from oq_data.fno import (
33
+ download_fno,
34
+ parse_fno_blob,
35
+ )
36
+ from oq_data.storage import (
37
+ coverage,
38
+ query,
39
+ read_fno,
40
+ read_prices,
41
+ write_eod,
42
+ write_fno,
43
+ )
44
+ from oq_data.symbols import SymbolMaster, add_mapping, load_master
45
+ from oq_data.universes import UniverseEntry, add_entries, load_universes, members_as_of
46
+
47
+ __version__ = "0.1.0"
48
+
49
+ __all__ = [
50
+ "CorporateAction",
51
+ "DataPaths",
52
+ "SymbolMaster",
53
+ "UniverseEntry",
54
+ "__version__",
55
+ "add_actions",
56
+ "add_entries",
57
+ "add_mapping",
58
+ "adjust_prices",
59
+ "build_url",
60
+ "coverage",
61
+ "default_root",
62
+ "download_announcements",
63
+ "download_bhavcopy",
64
+ "download_delivery",
65
+ "download_flows",
66
+ "download_fno",
67
+ "get_paths",
68
+ "list_symbols",
69
+ "load_actions",
70
+ "load_master",
71
+ "load_universes",
72
+ "members_as_of",
73
+ "parse_announcements_blob",
74
+ "parse_bhavcopy_blob",
75
+ "parse_delivery_blob",
76
+ "parse_flows_blob",
77
+ "parse_fno_blob",
78
+ "prices",
79
+ "query",
80
+ "read_announcements",
81
+ "read_delivery",
82
+ "read_flows",
83
+ "read_fno",
84
+ "read_prices",
85
+ "resolve_symbol",
86
+ "sync_range",
87
+ "universe",
88
+ "wide_prices",
89
+ "write_announcements",
90
+ "write_delivery",
91
+ "write_eod",
92
+ "write_flows",
93
+ "write_fno",
94
+ ]
@@ -0,0 +1,176 @@
1
+ """Corporate-announcements feed ingestion.
2
+
3
+ NSE publishes a rolling JSON feed of corporate announcements at
4
+ ``https://www.nseindia.com/api/corporate-announcements?index=equities``.
5
+ Each row carries the announcement timestamp, symbol, broad category, a
6
+ short subject line, and an attachment URL.
7
+
8
+ The canonical schema we persist is::
9
+
10
+ date, symbol, category, subject, attachment
11
+
12
+ ``date`` is the announcement business date (``date``-typed), suitable
13
+ for the same year-partitioned storage layout used by the EOD writers.
14
+
15
+ Network calls go through the same injectable ``Fetcher`` as the rest of
16
+ the pipeline so the suite stays offline.
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import io
22
+ import json
23
+ import logging
24
+ from collections.abc import Iterable
25
+ from dataclasses import dataclass
26
+ from datetime import date, timedelta
27
+
28
+ import pandas as pd
29
+
30
+ from oq_data.bhavcopy import Fetcher, _default_fetcher
31
+ from oq_data.config import DataPaths, get_paths
32
+ from oq_data.storage import write_partitioned
33
+
34
+ logger = logging.getLogger(__name__)
35
+
36
+ _NORMALISED_COLUMNS = ["date", "symbol", "category", "subject", "attachment"]
37
+
38
+
39
+ @dataclass(frozen=True, slots=True)
40
+ class AnnouncementsSource:
41
+ when: date
42
+ url: str
43
+ filename: str
44
+
45
+
46
+ def build_url(when: date) -> AnnouncementsSource:
47
+ fname = f"announcements_{when:%Y%m%d}.json"
48
+ url = (
49
+ "https://www.nseindia.com/api/corporate-announcements"
50
+ f"?index=equities&from_date={when:%d-%m-%Y}&to_date={when:%d-%m-%Y}"
51
+ )
52
+ return AnnouncementsSource(when=when, url=url, filename=fname)
53
+
54
+
55
+ def _pick(row: dict, *keys: str) -> object:
56
+ for k in keys:
57
+ if k in row and row[k] not in (None, ""):
58
+ return row[k]
59
+ return ""
60
+
61
+
62
+ def parse_announcements_blob(blob: bytes, when: date) -> pd.DataFrame:
63
+ text = blob.decode("utf-8-sig", errors="ignore").lstrip()
64
+ if text.startswith("[") or text.startswith("{"):
65
+ data = json.loads(text)
66
+ rows = data if isinstance(data, list) else data.get("data", data.get("rows", []))
67
+ else:
68
+ rows = pd.read_csv(io.BytesIO(blob)).to_dict("records")
69
+ if not rows:
70
+ return pd.DataFrame(columns=_NORMALISED_COLUMNS)
71
+ df = pd.DataFrame(
72
+ {
73
+ "date": pd.to_datetime(when),
74
+ "symbol": [str(_pick(r, "symbol", "Symbol", "SYMBOL")).strip() for r in rows],
75
+ "category": [
76
+ str(_pick(r, "category", "Category", "broadcastsubject")).strip() for r in rows
77
+ ],
78
+ "subject": [
79
+ str(_pick(r, "subject", "Subject", "desc", "Description")).strip() for r in rows
80
+ ],
81
+ "attachment": [
82
+ str(_pick(r, "attchmntFile", "attachment", "attachmentUrl")).strip() for r in rows
83
+ ],
84
+ }
85
+ )
86
+ df = df[df["symbol"] != ""].reset_index(drop=True)
87
+ return df[_NORMALISED_COLUMNS]
88
+
89
+
90
+ def _cache_dir(paths: DataPaths):
91
+ p = paths.raw / "announcements"
92
+ p.mkdir(parents=True, exist_ok=True)
93
+ return p
94
+
95
+
96
+ def download_announcements(
97
+ when: date,
98
+ paths: DataPaths | None = None,
99
+ fetcher: Fetcher | None = None,
100
+ use_cache: bool = True,
101
+ ) -> pd.DataFrame:
102
+ paths = paths or get_paths()
103
+ paths.ensure()
104
+ src = build_url(when)
105
+ cache_path = _cache_dir(paths) / src.filename
106
+ fetch = fetcher or _default_fetcher
107
+ if use_cache and cache_path.exists():
108
+ blob = cache_path.read_bytes()
109
+ else:
110
+ blob = fetch(src.url)
111
+ cache_path.write_bytes(blob)
112
+ return parse_announcements_blob(blob, when)
113
+
114
+
115
+ def write_announcements(df: pd.DataFrame, paths: DataPaths | None = None) -> int:
116
+ paths = paths or get_paths()
117
+ paths.ensure()
118
+ keys = ["date", "symbol", "subject"]
119
+ return write_partitioned(df, paths.announcements, keys)
120
+
121
+
122
+ def read_announcements(
123
+ symbols: str | Iterable[str] | None = None,
124
+ start: date | str | None = None,
125
+ end: date | str | None = None,
126
+ paths: DataPaths | None = None,
127
+ ) -> pd.DataFrame:
128
+ paths = paths or get_paths()
129
+ parts = sorted(paths.announcements.glob("year=*/data.parquet"))
130
+ if not parts:
131
+ return pd.DataFrame(columns=_NORMALISED_COLUMNS)
132
+ df = pd.concat([pd.read_parquet(p) for p in parts], ignore_index=True)
133
+ if symbols is not None:
134
+ syms = {symbols} if isinstance(symbols, str) else set(symbols)
135
+ df = df[df["symbol"].isin(syms)]
136
+ if start is not None:
137
+ df = df[df["date"] >= pd.to_datetime(start)]
138
+ if end is not None:
139
+ df = df[df["date"] <= pd.to_datetime(end)]
140
+ return df.sort_values(["date", "symbol"]).reset_index(drop=True)
141
+
142
+
143
+ def sync_range(
144
+ start: date,
145
+ end: date,
146
+ paths: DataPaths | None = None,
147
+ fetcher: Fetcher | None = None,
148
+ on_missing: str = "skip",
149
+ ) -> Iterable[date]:
150
+ if end < start:
151
+ raise ValueError("end must be >= start")
152
+ paths = paths or get_paths()
153
+ paths.ensure()
154
+ cur = start
155
+ one_day = timedelta(days=1)
156
+ while cur <= end:
157
+ if cur.weekday() < 5:
158
+ try:
159
+ download_announcements(cur, paths=paths, fetcher=fetcher)
160
+ yield cur
161
+ except Exception as exc:
162
+ if on_missing == "raise":
163
+ raise
164
+ logger.info("announcements feed unavailable for %s: %s", cur, exc)
165
+ cur += one_day
166
+
167
+
168
+ __all__ = [
169
+ "AnnouncementsSource",
170
+ "build_url",
171
+ "download_announcements",
172
+ "parse_announcements_blob",
173
+ "read_announcements",
174
+ "sync_range",
175
+ "write_announcements",
176
+ ]
@@ -0,0 +1,103 @@
1
+ """High-level Python API for downstream packages.
2
+
3
+ This is the surface most users will touch: :func:`prices` for a clean
4
+ adjusted price series, :func:`universe` for a point-in-time membership
5
+ set, and :func:`wide_prices` for a date-indexed wide frame that drops
6
+ straight into :func:`oq_backtest.backtest`.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from collections.abc import Iterable
12
+ from datetime import date
13
+
14
+ import pandas as pd
15
+
16
+ from oq_data import corporate_actions, storage, symbols, universes
17
+ from oq_data.config import DataPaths, get_paths
18
+
19
+
20
+ def prices(
21
+ symbol: str | Iterable[str],
22
+ start: date | str | None = None,
23
+ end: date | str | None = None,
24
+ adjusted: bool = True,
25
+ paths: DataPaths | None = None,
26
+ ) -> pd.DataFrame:
27
+ """Read a long-form OHLCV frame for one or many symbols.
28
+
29
+ With ``adjusted=True`` (default), splits, bonuses, and dividends are
30
+ back-adjusted so the returned series is a continuous total-return
31
+ proxy suitable for backtesting.
32
+ """
33
+ paths = paths or get_paths()
34
+ df = storage.read_prices(symbols=symbol, start=start, end=end, paths=paths)
35
+ if df.empty or not adjusted:
36
+ return df
37
+ actions = corporate_actions.load_actions(paths=paths)
38
+ if actions.empty:
39
+ return df
40
+ return corporate_actions.adjust_prices(df, actions)
41
+
42
+
43
+ def wide_prices(
44
+ universe_symbols: Iterable[str],
45
+ start: date | str | None = None,
46
+ end: date | str | None = None,
47
+ field: str = "close",
48
+ adjusted: bool = True,
49
+ paths: DataPaths | None = None,
50
+ ) -> pd.DataFrame:
51
+ """Return a date-indexed wide DataFrame ready for the backtester.
52
+
53
+ The output is what :func:`oq_backtest.backtest` consumes as
54
+ ``prices``: rows are trading dates, columns are symbols, values are
55
+ the requested field (default ``close``).
56
+ """
57
+ syms = list(universe_symbols)
58
+ if not syms:
59
+ raise ValueError("universe_symbols must be non-empty")
60
+ long_df = prices(syms, start=start, end=end, adjusted=adjusted, paths=paths)
61
+ if long_df.empty:
62
+ return pd.DataFrame()
63
+ if field not in long_df.columns:
64
+ raise KeyError(f"field {field!r} not in {sorted(long_df.columns)}")
65
+ wide = long_df.pivot_table(index="date", columns="symbol", values=field, aggfunc="last")
66
+ wide = wide.sort_index()
67
+ wide.index = pd.DatetimeIndex(wide.index)
68
+ return wide
69
+
70
+
71
+ def universe(
72
+ index_name: str,
73
+ as_of: date | str,
74
+ paths: DataPaths | None = None,
75
+ ) -> list[str]:
76
+ """List the symbols that made up ``index_name`` on ``as_of``."""
77
+ paths = paths or get_paths()
78
+ when = pd.to_datetime(as_of).date()
79
+ members = universes.members_as_of(index_name, when, paths=paths)
80
+ return members["symbol"].tolist()
81
+
82
+
83
+ def resolve_symbol(symbol: str, when: date | str, paths: DataPaths | None = None) -> str:
84
+ """Translate a current ticker to the symbol used on ``when``."""
85
+ paths = paths or get_paths()
86
+ master = symbols.load_master(paths=paths)
87
+ return master.resolve_as_of(symbol, pd.to_datetime(when).date())
88
+
89
+
90
+ def list_symbols(paths: DataPaths | None = None) -> list[str]:
91
+ """All distinct ``symbol`` values present in the EOD dataset."""
92
+ paths = paths or get_paths()
93
+ df = storage.query("SELECT DISTINCT symbol FROM eod ORDER BY symbol", paths=paths)
94
+ return df["symbol"].tolist() if not df.empty else []
95
+
96
+
97
+ __all__ = [
98
+ "list_symbols",
99
+ "prices",
100
+ "resolve_symbol",
101
+ "universe",
102
+ "wide_prices",
103
+ ]