nsefast 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. nsefast-0.1.0/CHANGELOG.md +39 -0
  2. nsefast-0.1.0/LICENSE +21 -0
  3. nsefast-0.1.0/MANIFEST.in +14 -0
  4. nsefast-0.1.0/PKG-INFO +193 -0
  5. nsefast-0.1.0/README.md +141 -0
  6. nsefast-0.1.0/docs/PUBLISHING.md +81 -0
  7. nsefast-0.1.0/docs/USAGE.md +165 -0
  8. nsefast-0.1.0/nsefast/__init__.py +3 -0
  9. nsefast-0.1.0/nsefast/cli.py +341 -0
  10. nsefast-0.1.0/nsefast/collectors/__init__.py +1 -0
  11. nsefast-0.1.0/nsefast/collectors/_json_helpers.py +103 -0
  12. nsefast-0.1.0/nsefast/collectors/calendar.py +50 -0
  13. nsefast-0.1.0/nsefast/collectors/corporate.py +308 -0
  14. nsefast-0.1.0/nsefast/collectors/deals.py +255 -0
  15. nsefast-0.1.0/nsefast/collectors/delivery.py +5 -0
  16. nsefast-0.1.0/nsefast/collectors/derivatives.py +406 -0
  17. nsefast-0.1.0/nsefast/collectors/equity.py +309 -0
  18. nsefast-0.1.0/nsefast/collectors/historical.py +53 -0
  19. nsefast-0.1.0/nsefast/collectors/indices.py +311 -0
  20. nsefast-0.1.0/nsefast/collectors/market_breadth.py +47 -0
  21. nsefast-0.1.0/nsefast/collectors/master.py +67 -0
  22. nsefast-0.1.0/nsefast/collectors/report_links.py +62 -0
  23. nsefast-0.1.0/nsefast/collectors/surveillance.py +54 -0
  24. nsefast-0.1.0/nsefast/config.py +37 -0
  25. nsefast-0.1.0/nsefast/http_client.py +71 -0
  26. nsefast-0.1.0/nsefast/processing/__init__.py +1 -0
  27. nsefast-0.1.0/nsefast/processing/features.py +32 -0
  28. nsefast-0.1.0/nsefast/processing/normalize.py +18 -0
  29. nsefast-0.1.0/nsefast/processing/technicals.py +39 -0
  30. nsefast-0.1.0/nsefast/robots.py +34 -0
  31. nsefast-0.1.0/nsefast/storage/__init__.py +1 -0
  32. nsefast-0.1.0/nsefast/storage/duckdb_store.py +50 -0
  33. nsefast-0.1.0/nsefast/storage/parquet_store.py +29 -0
  34. nsefast-0.1.0/nsefast/storage/postgres_store.py +31 -0
  35. nsefast-0.1.0/nsefast.egg-info/PKG-INFO +193 -0
  36. nsefast-0.1.0/nsefast.egg-info/SOURCES.txt +40 -0
  37. nsefast-0.1.0/nsefast.egg-info/dependency_links.txt +1 -0
  38. nsefast-0.1.0/nsefast.egg-info/entry_points.txt +2 -0
  39. nsefast-0.1.0/nsefast.egg-info/requires.txt +28 -0
  40. nsefast-0.1.0/nsefast.egg-info/top_level.txt +1 -0
  41. nsefast-0.1.0/pyproject.toml +71 -0
  42. nsefast-0.1.0/setup.cfg +4 -0
@@ -0,0 +1,39 @@
1
+ # Changelog
2
+
3
+ All notable changes to **nsefast** are documented in this file.
4
+ The format follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
5
+ and the project adheres to [Semantic Versioning](https://semver.org/).
6
+
7
+ ## [0.1.0] - 2026-05-06
8
+
9
+ ### Added
10
+
11
+ - Initial public release.
12
+ - Polite HTTP client with warm-up cookie session, retries, and `robots.txt` enforcement.
13
+ - Live JSON collectors with full graceful-failure semantics:
14
+ - `deals.bulk_deals(start, end)`, `deals.block_deals(start, end)`
15
+ - `corporate.corporate_announcements(start, end)`, `corporate.corporate_actions(start, end)`
16
+ - `corporate.dividends`, `bonuses`, `splits`, `rights_issues`, `mergers_demergers`, `record_and_ex_dates`
17
+ - `derivatives.option_chain(symbol)`, `option_chain_equity`, `option_chain_index`
18
+ - `indices.all_indices()`, `indices.sector_strength()`, `indices.is_sector_index(name)`
19
+ - `equity.fifty_two_week_high_low()`, `fifty_two_week_high()`, `fifty_two_week_low()`
20
+ - Daily archive collectors:
21
+ - `equity.daily_bhavcopy(date)`, `equity.delivery_data(date)`
22
+ - `derivatives.fo_bhavcopy(date)`
23
+ - `master.symbol_master()`
24
+ - Polars-first dataframe layer with canonical schemas per collector.
25
+ - Parquet canonical storage (`storage.parquet_store`), DuckDB analytics views
26
+ (`storage.duckdb_store`), optional PostgreSQL (`storage.postgres_store`).
27
+ - Typer CLI: `nsefast collect …`, `nsefast features swing`, `nsefast export parquet`.
28
+ - Optional Rust core (`rust-core/`) exposing `fast_hash`, `dedup`, `snake_case` via PyO3.
29
+ - 77 unit tests covering field mapping, alternate field names, malformed payloads,
30
+ network failures, HTTP errors, JSON decode errors, robots.txt blocks, schema
31
+ ordering, chunk-window arithmetic, CE/PE leg flattening, and sector classification.
32
+
33
+ ### Notes
34
+
35
+ - All public collectors return a Polars DataFrame with the canonical schema on
36
+ **any** failure (network, JSON, malformed payload, polars error, etc.) and
37
+ never raise — pipelines stay crash-proof.
38
+ - NSE blocks many datacenter IP ranges; from such hosts, collectors will
39
+ return empty DataFrames rather than crash.
nsefast-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Nikhil Shinde
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,14 @@
1
+ include README.md
2
+ include LICENSE
3
+ include CHANGELOG.md
4
+ include pyproject.toml
5
+ recursive-include nsefast *.py
6
+ recursive-include docs *.md
7
+ recursive-exclude tests *
8
+ recursive-exclude rust-core *
9
+ recursive-exclude data *
10
+ recursive-exclude artifacts *
11
+ recursive-exclude .local *
12
+ global-exclude __pycache__
13
+ global-exclude *.py[cod]
14
+ global-exclude .DS_Store
nsefast-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,193 @@
1
+ Metadata-Version: 2.4
2
+ Name: nsefast
3
+ Version: 0.1.0
4
+ Summary: Fast, robots.txt-respecting NSE India market data collector for swing trading, quant research, and backtesting
5
+ Author: Nikhil Shinde
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/nikhilshinde/nsefast
8
+ Project-URL: Documentation, https://github.com/nikhilshinde/nsefast#readme
9
+ Project-URL: Repository, https://github.com/nikhilshinde/nsefast
10
+ Project-URL: Issues, https://github.com/nikhilshinde/nsefast/issues
11
+ Project-URL: Changelog, https://github.com/nikhilshinde/nsefast/blob/main/CHANGELOG.md
12
+ Keywords: nse,nse-india,stock-market,trading,quant,backtesting,bhavcopy,options,polars,duckdb
13
+ Classifier: Development Status :: 4 - Beta
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: Intended Audience :: Financial and Insurance Industry
16
+ Classifier: License :: OSI Approved :: MIT License
17
+ Classifier: Operating System :: OS Independent
18
+ Classifier: Programming Language :: Python :: 3
19
+ Classifier: Programming Language :: Python :: 3.10
20
+ Classifier: Programming Language :: Python :: 3.11
21
+ Classifier: Programming Language :: Python :: 3.12
22
+ Classifier: Topic :: Office/Business :: Financial :: Investment
23
+ Classifier: Topic :: Scientific/Engineering :: Information Analysis
24
+ Requires-Python: >=3.10
25
+ Description-Content-Type: text/markdown
26
+ License-File: LICENSE
27
+ Requires-Dist: requests>=2.28
28
+ Requires-Dist: urllib3>=1.26
29
+ Requires-Dist: beautifulsoup4>=4.11
30
+ Requires-Dist: lxml>=4.9
31
+ Requires-Dist: polars>=0.20
32
+ Requires-Dist: pyarrow>=12.0
33
+ Requires-Dist: duckdb>=0.9
34
+ Requires-Dist: python-dateutil>=2.8
35
+ Requires-Dist: typer>=0.9
36
+ Requires-Dist: rich>=13.0
37
+ Provides-Extra: pandas
38
+ Requires-Dist: pandas>=2.0; extra == "pandas"
39
+ Provides-Extra: postgres
40
+ Requires-Dist: psycopg2-binary>=2.9; extra == "postgres"
41
+ Requires-Dist: sqlalchemy>=2.0; extra == "postgres"
42
+ Provides-Extra: api
43
+ Requires-Dist: fastapi>=0.100; extra == "api"
44
+ Requires-Dist: uvicorn>=0.23; extra == "api"
45
+ Provides-Extra: dev
46
+ Requires-Dist: pytest>=7.4; extra == "dev"
47
+ Requires-Dist: pytest-cov>=4.1; extra == "dev"
48
+ Requires-Dist: ruff>=0.1; extra == "dev"
49
+ Requires-Dist: build>=1.0; extra == "dev"
50
+ Requires-Dist: twine>=4.0; extra == "dev"
51
+ Dynamic: license-file
52
+
53
+ # nsefast
54
+
55
+ Fast NSE India data collector for **swing trading**, **quant research**, **AI training**, **backtesting**, and **market intelligence**.
56
+
57
+ > ⚠️ **Ethics & Compliance:** `nsefast` only uses publicly downloadable NSE reports
58
+ > and pages allowed by NSE's `robots.txt`. It does **not** bypass logins, captchas,
59
+ > Cloudflare, anti-bot systems, or rate limits. Add appropriate delays and use
60
+ > responsibly. You are responsible for complying with NSE's terms of service.
61
+
62
+ ## Features
63
+
64
+ - Polite, retrying HTTP client with `robots.txt` checks
65
+ - Modular collectors for **equity**, **derivatives**, **corporate**, **deals**,
66
+ **indices**, **surveillance**, **calendar**, and **master** data
67
+ - [Polars](https://pola.rs) for fast dataframe processing
68
+ - [Parquet](https://parquet.apache.org/) primary storage, partitioned by dataset/date
69
+ - [DuckDB](https://duckdb.org) local analytics layer
70
+ - Optional PostgreSQL storage
71
+ - Optional Rust core (`rust-core/`) for hashing / dedup / large parsing
72
+ - Typer-based CLI
73
+
74
+ ## Install
75
+
76
+ ```bash
77
+ pip install nsefast
78
+ ```
79
+
80
+ Optional extras:
81
+
82
+ ```bash
83
+ pip install "nsefast[pandas]" # pandas export helpers
84
+ pip install "nsefast[postgres]" # PostgreSQL sink
85
+ pip install "nsefast[api]" # FastAPI server scaffold
86
+ pip install "nsefast[dev]" # pytest, ruff, build, twine
87
+ ```
88
+
89
+ For development:
90
+
91
+ ```bash
92
+ git clone https://github.com/nikhilshinde/nsefast
93
+ cd nsefast
94
+ pip install -e ".[dev]"
95
+ pytest -q
96
+ ```
97
+
98
+ ## Quick start
99
+
100
+ ```bash
101
+ # Discover all downloadable report links from NSE public pages
102
+ nsefast collect-reports
103
+
104
+ # Run the full scaffold
105
+ nsefast collect-all
106
+
107
+ # Equity bhavcopy for a date
108
+ nsefast collect equity-bhavcopy --date 2026-05-07
109
+
110
+ # Corporate announcements range
111
+ nsefast collect corporate-announcements --start 2026-05-01 --end 2026-05-07
112
+
113
+ # Build swing-trading features
114
+ nsefast features swing --date 2026-05-07
115
+
116
+ # Export a dataset to Parquet
117
+ nsefast export parquet --dataset daily_bhavcopy
118
+ ```
119
+
120
+ In Python:
121
+
122
+ ```python
123
+ from nsefast.collectors.report_links import collect_report_links
124
+ from nsefast.storage.parquet_store import save_parquet
125
+
126
+ df = collect_report_links() # polars DataFrame
127
+ save_parquet(df, dataset="report_links")
128
+ ```
129
+
130
+ ## Project layout
131
+
132
+ ```text
133
+ nsefast/
134
+ ├── pyproject.toml
135
+ ├── requirements.txt
136
+ ├── main.py
137
+ ├── README.md
138
+
139
+ ├── nsefast/
140
+ │ ├── config.py # URLs, headers, paths
141
+ │ ├── http_client.py # session + retries
142
+ │ ├── robots.py # robots.txt checker
143
+ │ ├── collectors/ # one module per data domain
144
+ │ ├── processing/ # normalize, features, technicals
145
+ │ ├── storage/ # parquet, duckdb, postgres
146
+ │ └── cli.py # Typer CLI
147
+
148
+ └── rust-core/ # optional pyo3 module
149
+ ├── Cargo.toml
150
+ └── src/lib.rs
151
+ ```
152
+
153
+ ## Storage zones
154
+
155
+ - `data/raw/` — raw downloads exactly as fetched
156
+ - `data/clean/` — normalized intermediate files
157
+ - `data/parquet/` — partitioned Parquet, the canonical store
158
+
159
+ ## Rust core (optional)
160
+
161
+ The `rust-core/` crate exposes a `nsefast_core` Python module via
162
+ [PyO3](https://pyo3.rs/) for CPU-bound work (SHA-256 hashing, dedup,
163
+ fast CSV normalization). HTTP scraping stays in Python — it's I/O bound.
164
+
165
+ Build with [maturin](https://www.maturin.rs/):
166
+
167
+ ```bash
168
+ cd rust-core
169
+ maturin develop --release
170
+ ```
171
+
172
+ ## Documentation
173
+
174
+ - [`docs/USAGE.md`](docs/USAGE.md) — full Python + CLI usage, canonical schemas, polite-use rules
175
+ - [`docs/PUBLISHING.md`](docs/PUBLISHING.md) — how to release new versions to PyPI
176
+ - [`CHANGELOG.md`](CHANGELOG.md) — version history
177
+
178
+ ## Failure semantics
179
+
180
+ Every public collector returns a Polars DataFrame with its canonical
181
+ schema on **any** failure (invalid input, network error, malformed
182
+ payload, polars error, robots block). Collectors **never raise** — your
183
+ pipelines stay crash-proof.
184
+
185
+ ## Tests
186
+
187
+ ```bash
188
+ pytest -q # 77 unit tests, no network calls
189
+ ```
190
+
191
+ ## License
192
+
193
+ MIT — see [`LICENSE`](LICENSE)
@@ -0,0 +1,141 @@
1
+ # nsefast
2
+
3
+ Fast NSE India data collector for **swing trading**, **quant research**, **AI training**, **backtesting**, and **market intelligence**.
4
+
5
+ > ⚠️ **Ethics & Compliance:** `nsefast` only uses publicly downloadable NSE reports
6
+ > and pages allowed by NSE's `robots.txt`. It does **not** bypass logins, captchas,
7
+ > Cloudflare, anti-bot systems, or rate limits. Add appropriate delays and use
8
+ > responsibly. You are responsible for complying with NSE's terms of service.
9
+
10
+ ## Features
11
+
12
+ - Polite, retrying HTTP client with `robots.txt` checks
13
+ - Modular collectors for **equity**, **derivatives**, **corporate**, **deals**,
14
+ **indices**, **surveillance**, **calendar**, and **master** data
15
+ - [Polars](https://pola.rs) for fast dataframe processing
16
+ - [Parquet](https://parquet.apache.org/) primary storage, partitioned by dataset/date
17
+ - [DuckDB](https://duckdb.org) local analytics layer
18
+ - Optional PostgreSQL storage
19
+ - Optional Rust core (`rust-core/`) for hashing / dedup / large parsing
20
+ - Typer-based CLI
21
+
22
+ ## Install
23
+
24
+ ```bash
25
+ pip install nsefast
26
+ ```
27
+
28
+ Optional extras:
29
+
30
+ ```bash
31
+ pip install "nsefast[pandas]" # pandas export helpers
32
+ pip install "nsefast[postgres]" # PostgreSQL sink
33
+ pip install "nsefast[api]" # FastAPI server scaffold
34
+ pip install "nsefast[dev]" # pytest, ruff, build, twine
35
+ ```
36
+
37
+ For development:
38
+
39
+ ```bash
40
+ git clone https://github.com/nikhilshinde/nsefast
41
+ cd nsefast
42
+ pip install -e ".[dev]"
43
+ pytest -q
44
+ ```
45
+
46
+ ## Quick start
47
+
48
+ ```bash
49
+ # Discover all downloadable report links from NSE public pages
50
+ nsefast collect-reports
51
+
52
+ # Run the full scaffold
53
+ nsefast collect-all
54
+
55
+ # Equity bhavcopy for a date
56
+ nsefast collect equity-bhavcopy --date 2026-05-07
57
+
58
+ # Corporate announcements range
59
+ nsefast collect corporate-announcements --start 2026-05-01 --end 2026-05-07
60
+
61
+ # Build swing-trading features
62
+ nsefast features swing --date 2026-05-07
63
+
64
+ # Export a dataset to Parquet
65
+ nsefast export parquet --dataset daily_bhavcopy
66
+ ```
67
+
68
+ In Python:
69
+
70
+ ```python
71
+ from nsefast.collectors.report_links import collect_report_links
72
+ from nsefast.storage.parquet_store import save_parquet
73
+
74
+ df = collect_report_links() # polars DataFrame
75
+ save_parquet(df, dataset="report_links")
76
+ ```
77
+
78
+ ## Project layout
79
+
80
+ ```text
81
+ nsefast/
82
+ ├── pyproject.toml
83
+ ├── requirements.txt
84
+ ├── main.py
85
+ ├── README.md
86
+
87
+ ├── nsefast/
88
+ │ ├── config.py # URLs, headers, paths
89
+ │ ├── http_client.py # session + retries
90
+ │ ├── robots.py # robots.txt checker
91
+ │ ├── collectors/ # one module per data domain
92
+ │ ├── processing/ # normalize, features, technicals
93
+ │ ├── storage/ # parquet, duckdb, postgres
94
+ │ └── cli.py # Typer CLI
95
+
96
+ └── rust-core/ # optional pyo3 module
97
+ ├── Cargo.toml
98
+ └── src/lib.rs
99
+ ```
100
+
101
+ ## Storage zones
102
+
103
+ - `data/raw/` — raw downloads exactly as fetched
104
+ - `data/clean/` — normalized intermediate files
105
+ - `data/parquet/` — partitioned Parquet, the canonical store
106
+
107
+ ## Rust core (optional)
108
+
109
+ The `rust-core/` crate exposes a `nsefast_core` Python module via
110
+ [PyO3](https://pyo3.rs/) for CPU-bound work (SHA-256 hashing, dedup,
111
+ fast CSV normalization). HTTP scraping stays in Python — it's I/O bound.
112
+
113
+ Build with [maturin](https://www.maturin.rs/):
114
+
115
+ ```bash
116
+ cd rust-core
117
+ maturin develop --release
118
+ ```
119
+
120
+ ## Documentation
121
+
122
+ - [`docs/USAGE.md`](docs/USAGE.md) — full Python + CLI usage, canonical schemas, polite-use rules
123
+ - [`docs/PUBLISHING.md`](docs/PUBLISHING.md) — how to release new versions to PyPI
124
+ - [`CHANGELOG.md`](CHANGELOG.md) — version history
125
+
126
+ ## Failure semantics
127
+
128
+ Every public collector returns a Polars DataFrame with its canonical
129
+ schema on **any** failure (invalid input, network error, malformed
130
+ payload, polars error, robots block). Collectors **never raise** — your
131
+ pipelines stay crash-proof.
132
+
133
+ ## Tests
134
+
135
+ ```bash
136
+ pytest -q # 77 unit tests, no network calls
137
+ ```
138
+
139
+ ## License
140
+
141
+ MIT — see [`LICENSE`](LICENSE)
@@ -0,0 +1,81 @@
1
+ # Publishing nsefast to PyPI
2
+
3
+ > **Replit cannot publish to PyPI for you.** PyPI requires your account
4
+ > credentials. Use these steps from your own machine (or a CI you control).
5
+
6
+ ## One-time setup
7
+
8
+ 1. Create a PyPI account: <https://pypi.org/account/register/>
9
+ 2. Create an API token: <https://pypi.org/manage/account/token/>
10
+ - Scope: "Entire account" (first publish) or "Project: nsefast" (after first publish).
11
+ 3. Save the token in `~/.pypirc`:
12
+
13
+ ```ini
14
+ [pypi]
15
+ username = __token__
16
+ password = pypi-<your-long-token>
17
+
18
+ [testpypi]
19
+ username = __token__
20
+ password = pypi-<your-test-token>
21
+ ```
22
+
23
+ 4. (Optional) Create a TestPyPI token at <https://test.pypi.org> for dry-runs.
24
+
25
+ ## Build and publish
26
+
27
+ ```bash
28
+ # 1. From a clean checkout, install build tooling
29
+ pip install --upgrade build twine
30
+
31
+ # 2. Bump the version in pyproject.toml and CHANGELOG.md
32
+ # (PyPI rejects re-uploads of an existing version)
33
+
34
+ # 3. Wipe stale artifacts
35
+ rm -rf dist/ build/ *.egg-info
36
+
37
+ # 4. Build sdist + wheel
38
+ python -m build
39
+
40
+ # 5. Sanity-check the artifacts
41
+ twine check dist/*
42
+
43
+ # 6. (Optional) Dry-run on TestPyPI first
44
+ twine upload --repository testpypi dist/*
45
+ pip install --index-url https://test.pypi.org/simple/ nsefast
46
+
47
+ # 7. Real upload
48
+ twine upload dist/*
49
+ ```
50
+
51
+ ## After publish
52
+
53
+ - Tag the release: `git tag v0.1.0 && git push --tags`
54
+ - Verify install: `pip install nsefast` then `nsefast --help`
55
+ - Update the GitHub release notes from `CHANGELOG.md`.
56
+
57
+ ## Automated publishing (optional)
58
+
59
+ Use the included `.github/workflows/publish.yml` — it builds + publishes
60
+ to PyPI on every pushed tag matching `v*`. Configure a `PYPI_API_TOKEN`
61
+ secret in your GitHub repo settings.
62
+
63
+ ## Versioning
64
+
65
+ - Bump `version` in `pyproject.toml`.
66
+ - Add a section to `CHANGELOG.md`.
67
+ - Follow SemVer: bump **patch** for fixes, **minor** for new collectors,
68
+ **major** for breaking schema changes.
69
+
70
+ ## What ships in the wheel
71
+
72
+ `MANIFEST.in` excludes tests, the Rust crate, the `data/` cache, and
73
+ Replit `artifacts/`. The wheel contains only the `nsefast/` Python
74
+ package + README + LICENSE + CHANGELOG.
75
+
76
+ Verify locally:
77
+
78
+ ```bash
79
+ python -m build
80
+ unzip -l dist/nsefast-*.whl
81
+ ```
@@ -0,0 +1,165 @@
1
+ # nsefast — Usage Guide
2
+
3
+ > Public NSE India data only. Respects `robots.txt`, no anti-bot bypass.
4
+
5
+ ## Install
6
+
7
+ ```bash
8
+ pip install nsefast
9
+ # with optional extras
10
+ pip install "nsefast[pandas,postgres,api]"
11
+ ```
12
+
13
+ For local development:
14
+
15
+ ```bash
16
+ git clone https://github.com/nikhilshinde/nsefast
17
+ cd nsefast
18
+ pip install -e ".[dev]"
19
+ pytest -q
20
+ ```
21
+
22
+ ## Failure semantics (read this first)
23
+
24
+ Every public collector returns a **Polars DataFrame with its canonical
25
+ schema** on any failure (invalid input, network exception, HTTP non-200,
26
+ JSON decode error, malformed payload, polars construction error, robots
27
+ block). Collectors **never raise**. Empty DataFrame ≠ error — it just
28
+ means no data for that input.
29
+
30
+ ```python
31
+ from nsefast.collectors.deals import bulk_deals
32
+ df = bulk_deals("not-a-date", "also-bad") # logs a warning, returns empty df
33
+ assert df.is_empty()
34
+ assert df.columns == ["date","symbol","security_name","client_name",
35
+ "buy_sell","quantity","price","remarks","source_url"]
36
+ ```
37
+
38
+ ## Quick CLI tour
39
+
40
+ ```bash
41
+ # Daily archives
42
+ nsefast collect equity-bhavcopy --date 2026-05-07
43
+ nsefast collect fo-bhavcopy --date 2026-05-07
44
+ nsefast collect delivery --date 2026-05-07
45
+ nsefast collect symbol-master
46
+
47
+ # Deals (date-range, 90-day chunked)
48
+ nsefast collect bulk-deals --start 2026-04-01 --end 2026-05-07
49
+ nsefast collect block-deals --start 2026-04-01 --end 2026-05-07
50
+
51
+ # Corporate filings
52
+ nsefast collect corporate-announcements --start 2026-04-01 --end 2026-05-07
53
+ nsefast collect corporate-actions --start 2026-04-01 --end 2026-05-07
54
+
55
+ # Live snapshots
56
+ nsefast collect option-chain --symbol NIFTY
57
+ nsefast collect option-chain --symbol RELIANCE
58
+ nsefast collect sector-strength
59
+ nsefast collect all-indices
60
+ nsefast collect 52w
61
+
62
+ # Discover all downloadable report URLs
63
+ nsefast collect-reports
64
+
65
+ # Build features
66
+ nsefast features swing --date 2026-05-07
67
+ ```
68
+
69
+ All commands save partitioned Parquet under `data/parquet/<dataset>/…`.
70
+
71
+ ## Python API
72
+
73
+ ### Daily archives
74
+
75
+ ```python
76
+ from nsefast.collectors import equity, derivatives, master
77
+
78
+ df = equity.daily_bhavcopy("2026-05-07") # OHLCV per security
79
+ df = equity.delivery_data("2026-05-07") # delivery quantity / %
80
+ df = derivatives.fo_bhavcopy("2026-05-07") # F&O snapshot
81
+ df = master.symbol_master() # all listed symbols
82
+ ```
83
+
84
+ ### Date-range JSON collectors (90-day chunked, polite)
85
+
86
+ ```python
87
+ from nsefast.collectors.deals import bulk_deals, block_deals
88
+ from nsefast.collectors.corporate import (
89
+ corporate_announcements, corporate_actions,
90
+ dividends, bonuses, splits, rights_issues, mergers_demergers,
91
+ )
92
+
93
+ deals = bulk_deals("2026-04-01", "2026-05-07")
94
+ ann = corporate_announcements("2026-04-01", "2026-05-07")
95
+ divs = dividends("2026-04-01", "2026-05-07") # filtered slice
96
+ ```
97
+
98
+ ### Live snapshots
99
+
100
+ ```python
101
+ from nsefast.collectors.derivatives import option_chain
102
+ from nsefast.collectors.indices import all_indices, sector_strength
103
+ from nsefast.collectors.equity import (
104
+ fifty_two_week_high_low, fifty_two_week_high, fifty_two_week_low,
105
+ )
106
+
107
+ chain = option_chain("NIFTY") # CE & PE flattened, one row per leg
108
+ chain_r = option_chain("RELIANCE") # auto-routes equity vs index endpoint
109
+
110
+ idx = all_indices() # every index NSE publishes
111
+ strong = sector_strength() # sector-only, sorted by % change desc
112
+
113
+ w52 = fifty_two_week_high_low() # 'category' column = "high" or "low"
114
+ highs = fifty_two_week_high()
115
+ lows = fifty_two_week_low()
116
+ ```
117
+
118
+ ### Storage
119
+
120
+ ```python
121
+ from nsefast.storage.parquet_store import save_parquet, read_parquet
122
+
123
+ path = save_parquet(df, dataset="bulk_deals", partition="2026-05")
124
+ df2 = read_parquet("bulk_deals")
125
+
126
+ # DuckDB analytics over the parquet lake
127
+ from nsefast.storage.duckdb_store import register_view, query
128
+ register_view("bulk_deals")
129
+ top = query("SELECT symbol, SUM(quantity) AS q FROM bulk_deals "
130
+ "GROUP BY symbol ORDER BY q DESC LIMIT 10")
131
+ ```
132
+
133
+ ### Optional PostgreSQL sink
134
+
135
+ ```python
136
+ # pip install "nsefast[postgres]"
137
+ from nsefast.storage.postgres_store import write_dataframe
138
+ write_dataframe(df, table="bulk_deals", dsn="postgresql://user:pw@host/db")
139
+ ```
140
+
141
+ ## Canonical schemas
142
+
143
+ | Collector | Columns |
144
+ |---|---|
145
+ | `bulk_deals` / `block_deals` | `date, symbol, security_name, client_name, buy_sell, quantity, price, remarks, source_url` |
146
+ | `corporate_announcements` | `date, symbol, company_name, subject, details, attachment_url, source_url` |
147
+ | `corporate_actions` | `ex_date, record_date, symbol, company_name, purpose, face_value, source_url` |
148
+ | `option_chain*` | `symbol, underlying_value, expiry_date, strike_price, option_type, open_interest, change_in_oi, volume, implied_volatility, last_price, change, bid_qty, bid_price, ask_price, ask_qty, source_url, fetched_at` |
149
+ | `all_indices` / `sector_strength` | `index_name, last, change, percent_change, open, day_high, day_low, prev_close, year_high, year_low, advances, declines, unchanged, source_url, fetched_at` |
150
+ | `fifty_two_week_high_low` | `symbol, series, company_name, category, last_price, prev_high_low, prev_high_low_date, change, percent_change, source_url, fetched_at` |
151
+
152
+ ## Network notes
153
+
154
+ - NSE blocks many datacenter IP ranges (AWS, GCP, Replit, etc.) at the
155
+ edge. Run from a residential IP if collectors silently return empty.
156
+ - The HTTP client warms up cookies via `nseindia.com` before any
157
+ `/api/...` call — if you build your own session, do the same.
158
+ - All endpoints are gated by `robots.txt`. Disallowed paths return empty.
159
+
160
+ ## Polite-use checklist
161
+
162
+ 1. Don't reduce `REQUEST_DELAY_SECONDS` (default in `nsefast.config`).
163
+ 2. Cache results to Parquet; don't re-fetch the same range repeatedly.
164
+ 3. Don't run more than one collector instance against NSE concurrently.
165
+ 4. Comply with NSE's terms of service. You are responsible for usage.
@@ -0,0 +1,3 @@
1
+ """nsefast — fast NSE India data collector."""
2
+
3
+ __version__ = "0.1.0"