mcx-data 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,94 @@
1
+ Metadata-Version: 2.4
2
+ Name: mcx-data
3
+ Version: 1.0.0
4
+ Summary: Download MCX India commodity market data as pandas DataFrames — spot prices (recent + archive), futures bhavcopy. Works on AWS Lambda.
5
+ Author-email: Nikhil Suthar <nikhil.suthar@lumiq.ai>
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/NikhilSuthar/indian-market-data
8
+ Project-URL: Documentation, https://NikhilSuthar.github.io/indian-market-data
9
+ Project-URL: Repository, https://github.com/NikhilSuthar/indian-market-data
10
+ Project-URL: Bug Tracker, https://github.com/NikhilSuthar/indian-market-data/issues
11
+ Keywords: mcx,mcx-data,mcx-india,commodity,futures,options,gold,silver,crude-oil,natural-gas,india,market-data,financial-data,pandas,bhavcopy,trading,historical-data
12
+ Classifier: Development Status :: 5 - Production/Stable
13
+ Classifier: Intended Audience :: Financial and Insurance Industry
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.9
17
+ Classifier: Programming Language :: Python :: 3.10
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Programming Language :: Python :: 3.13
21
+ Classifier: Topic :: Office/Business :: Financial :: Investment
22
+ Classifier: Typing :: Typed
23
+ Requires-Python: >=3.9
24
+ Description-Content-Type: text/markdown
25
+ Requires-Dist: requests>=2.31.0
26
+ Requires-Dist: pandas>=2.0.0
27
+ Requires-Dist: openpyxl>=3.1.0
28
+ Requires-Dist: curl-cffi>=0.7.0
29
+ Provides-Extra: s3
30
+ Requires-Dist: boto3>=1.28.0; extra == "s3"
31
+ Provides-Extra: dev
32
+ Requires-Dist: pytest; extra == "dev"
33
+ Requires-Dist: pytest-cov; extra == "dev"
34
+ Requires-Dist: ruff; extra == "dev"
35
+
36
+ # mcx-data
37
+
38
+ [![PyPI version](https://badge.fury.io/py/mcx-data.svg)](https://pypi.org/project/mcx-data/)
39
+ [![Python 3.9+](https://img.shields.io/badge/python-3.9+-blue.svg)](https://www.python.org/downloads/)
40
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
41
+
42
+ Download **MCX India** commodity spot market data as pandas DataFrames. Works from **AWS Lambda** and any cloud environment.
43
+
44
+ **Full Documentation → [NikhilSuthar.github.io/indian-market-data/mcx-spot](https://NikhilSuthar.github.io/indian-market-data/mcx-spot)**
45
+
46
+ Part of the [indian-market-data](https://github.com/NikhilSuthar/indian-market-data) monorepo — also see [`nse-data`](https://pypi.org/project/nse-data/).
47
+
48
+ ```bash
49
+ pip install mcx-data
50
+ ```
51
+
52
+ ## Quick Start
53
+
54
+ ```python
55
+ from mcxdata import mcx
56
+
57
+ # Today's spot prices — all 28 commodities
58
+ df = mcx.get_spot_recent()
59
+
60
+ # Single commodity
61
+ df = mcx.get_spot_recent(commodity="GOLD")
62
+
63
+ # Historical (requires specific commodity)
64
+ df = mcx.get_spot_archive("2026-05-01", "2026-05-22", commodity="GOLD")
65
+ df = mcx.get_spot_archive("2026-05-01", "2026-05-22", commodity="SILVER")
66
+
67
+ # Download to S3
68
+ mcx.download("spot", "market", "spot_recent",
69
+ s3_bucket="my-bucket", s3_prefix="raw/mcx/")
70
+
71
+ # Available commodities (28)
72
+ mcx.list_commodities()
73
+ ```
74
+
75
+ ## Datasets
76
+
77
+ | Dataset | Description | Date Param |
78
+ |---------|-------------|-----------|
79
+ | `spot_recent` | Today's spot prices — all 28 commodities | None |
80
+ | `spot_archive` | Historical spot prices by commodity + date range | `from_date`, `to_date` |
81
+
82
+ ## Available Commodities (28)
83
+
84
+ `ALUMINI, ALUMINIUM, CARDAMOM, COPPER, COTTON, COTTONOIL, CPO, CRUDEOIL, CRUDEOILM, ELECDMBL, GOLD, GOLDGUINEA, GOLDM, GOLDPETAL, GOLDTEN, KAPAS, LEAD, LEADMINI, MENTHAOIL, NATGASMINI, NATURALGAS, NICKEL, SILVER, SILVERM, SILVERMIC, STEELREBAR, ZINC, ZINCMINI`
85
+
86
+ ## Notes
87
+
88
+ - MCX archive requires a **specific commodity** — `"ALL"` returns empty (MCX API limitation)
89
+ - Uses `curl-cffi` Chrome TLS impersonation to bypass MCX Akamai WAF
90
+ - Lambda IPs are generally unblocked — works reliably on AWS
91
+
92
+ ## License
93
+
94
+ MIT — data from [MCX India](https://www.mcxindia.com).
@@ -0,0 +1,12 @@
1
+ mcxdata/__init__.py,sha256=HTrEkyf_omqFka303BFOMFtlD07dKL0EwbGxlQjVL00,1261
2
+ mcxdata/cli.py,sha256=5p8Jj9ES7EoqfB1a3IAZSUuOZ-esKaHDTdlRVeCh5iU,3508
3
+ mcxdata/fetcher.py,sha256=u-vKwjTTjkLJtF3HnEnjJ5_99Zh6GMVqr5oZoRJJjYk,7553
4
+ mcxdata/mcx.py,sha256=17pU0hwmYpGl5ZdUasoL8E9FTO8mBE75w6rfrGM9n7E,8303
5
+ mcxdata/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ mcxdata/registry.py,sha256=mSPdJjXtcsS6FGbVOSrdoBDokGZhVzGRYgltbf3Kce4,7336
7
+ mcxdata/session.py,sha256=etMHrx96JgZ7DjFRBBSX-B3HWTe27fiha37q3Jd3YZU,2986
8
+ mcx_data-1.0.0.dist-info/METADATA,sha256=So0uOTMksvy1xw6P0teBjNd7tipC4RB76ySM3NXs6Ao,3888
9
+ mcx_data-1.0.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
10
+ mcx_data-1.0.0.dist-info/entry_points.txt,sha256=vwAOj6RJNgiA9Ns-HN8-HhVfUL7lkexeNPkaGBAgm0k,46
11
+ mcx_data-1.0.0.dist-info/top_level.txt,sha256=0VKxoDvrLWSzugxSPsJM4h-B69pluDTiuaRC-BnUzE4,8
12
+ mcx_data-1.0.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ mcx-data = mcxdata.cli:main
@@ -0,0 +1 @@
1
+ mcxdata
mcxdata/__init__.py ADDED
@@ -0,0 +1,40 @@
1
+ """
2
+ mcx-data — Download MCX India commodity market data as pandas DataFrames.
3
+
4
+ MCX (Multi Commodity Exchange of India) is India's largest commodity exchange,
5
+ trading futures and options on metals, energy, and agri commodities.
6
+
7
+ Quick Start:
8
+ from mcxdata import mcx
9
+
10
+ # Today's spot prices (all commodities + locations)
11
+ df = mcx.get_spot_recent()
12
+
13
+ # Today's spot price for GOLD only
14
+ df = mcx.get_spot_recent(commodity="GOLD")
15
+
16
+ # Historical spot prices for a date range
17
+ df = mcx.get_spot_archive("01/05/2026", "22/05/2026")
18
+ df = mcx.get_spot_archive("01/05/2026", "22/05/2026", commodity="GOLD")
19
+
20
+ # Generic API (mirrors nse-data pattern)
21
+ df = mcx.get("spot", "market", "spot_recent")
22
+ df = mcx.get("spot", "market", "spot_archive",
23
+ from_date="01/05/2026", to_date="22/05/2026", commodity="GOLD")
24
+
25
+ # Download to S3
26
+ mcx.download("spot", "market", "spot_recent",
27
+ s3_bucket="my-bucket", s3_prefix="raw/mcx/")
28
+
29
+ # List all available datasets
30
+ mcx.list_datasets()
31
+
32
+ # List available commodity names
33
+ mcx.list_commodities()
34
+
35
+ See: https://NikhilSuthar.github.io/indian-market-data
36
+ """
37
+
38
+ __version__ = "1.0.0"
39
+
40
+ from mcxdata import mcx
mcxdata/cli.py ADDED
@@ -0,0 +1,91 @@
1
+ """
2
+ mcx-data CLI — command line interface for downloading MCX data.
3
+
4
+ Usage:
5
+ mcx-data spot recent
6
+ mcx-data spot archive --from 01/05/2026 --to 22/05/2026 --commodity GOLD
7
+ mcx-data list
8
+ mcx-data commodities
9
+ """
10
+
11
+ import argparse
12
+ import sys
13
+
14
+
15
+ def main():
16
+ parser = argparse.ArgumentParser(
17
+ prog="mcx-data",
18
+ description="Download MCX India commodity market data as CSV",
19
+ )
20
+ parser.add_argument("--version", action="version", version=f"%(prog)s 0.1.0")
21
+
22
+ sub = parser.add_subparsers(dest="command", help="Command")
23
+
24
+ # list command
25
+ sub.add_parser("list", help="List all available MCX datasets")
26
+
27
+ # commodities command
28
+ sub.add_parser("commodities", help="List available commodity names from MCX")
29
+
30
+ # spot recent
31
+ spot_recent = sub.add_parser("spot-recent", help="Download today's spot market prices")
32
+ spot_recent.add_argument("--commodity", default="ALL", help="Commodity name or ALL")
33
+ spot_recent.add_argument("--location", default="ALL", help="Location name or ALL")
34
+ spot_recent.add_argument("--out", default=".", help="Output directory")
35
+ spot_recent.add_argument("--s3-bucket", default=None, help="S3 bucket name")
36
+ spot_recent.add_argument("--s3-prefix", default="mcx-data/", help="S3 prefix")
37
+
38
+ # spot archive
39
+ spot_archive = sub.add_parser("spot-archive", help="Download historical spot market prices")
40
+ spot_archive.add_argument("--from", dest="from_date", required=True,
41
+ help="From date DD/MM/YYYY")
42
+ spot_archive.add_argument("--to", dest="to_date", required=True,
43
+ help="To date DD/MM/YYYY")
44
+ spot_archive.add_argument("--commodity", default="ALL", help="Commodity name or ALL")
45
+ spot_archive.add_argument("--out", default=".", help="Output directory")
46
+ spot_archive.add_argument("--s3-bucket", default=None, help="S3 bucket name")
47
+ spot_archive.add_argument("--s3-prefix", default="mcx-data/", help="S3 prefix")
48
+
49
+ args = parser.parse_args()
50
+
51
+ if args.command is None:
52
+ parser.print_help()
53
+ sys.exit(0)
54
+
55
+ from mcxdata import mcx
56
+
57
+ if args.command == "list":
58
+ df = mcx.list_datasets()
59
+ print(df.to_string(index=False))
60
+
61
+ elif args.command == "commodities":
62
+ print("Fetching commodity list from MCX...")
63
+ commodities = mcx.list_commodities()
64
+ for c in commodities:
65
+ print(f" {c}")
66
+
67
+ elif args.command == "spot-recent":
68
+ print(f"Downloading MCX spot recent (commodity={args.commodity})...")
69
+ mcx.download("spot", "market", "spot_recent",
70
+ commodity=args.commodity,
71
+ location=args.location,
72
+ output_dir=args.out,
73
+ s3_bucket=args.s3_bucket,
74
+ s3_prefix=args.s3_prefix)
75
+
76
+ elif args.command == "spot-archive":
77
+ print(f"Downloading MCX spot archive {args.from_date} → {args.to_date} (commodity={args.commodity})...")
78
+ mcx.download("spot", "market", "spot_archive",
79
+ from_date=args.from_date,
80
+ to_date=args.to_date,
81
+ commodity=args.commodity,
82
+ output_dir=args.out,
83
+ s3_bucket=args.s3_bucket,
84
+ s3_prefix=args.s3_prefix)
85
+
86
+ else:
87
+ parser.print_help()
88
+
89
+
90
+ if __name__ == "__main__":
91
+ main()
mcxdata/fetcher.py ADDED
@@ -0,0 +1,211 @@
1
+ """
2
+ MCX Data Fetcher — calls the two known MCX backpage.aspx endpoints.
3
+
4
+ Endpoints (confirmed from browser devtools):
5
+ Recent: POST https://www.mcxindia.com/backpage.aspx/GetSpotMarketPrice
6
+ Body: "" (empty string)
7
+
8
+ Archive: POST https://www.mcxindia.com/backpage.aspx/GetSpotMarketArchive
9
+ Body: {"Product":"GOLD","Location":"ALL","Fromdate":"20260524","Session":"0","Todate":"20260524"}
10
+
11
+ Response format:
12
+ {"d": "[{\"Symbol\":\"GOLD\",\"Unit\":\"10 GRMS\",\"Location\":\"AHMEDABAD\",
13
+ \"TodaysSpotPrice\":\"157549.00\",\"Change\":\"Down\"}]"}
14
+ i.e. JSON where .d is a JSON-encoded string of the data array.
15
+ """
16
+
17
+ import json
18
+ import io
19
+ import time
20
+ from typing import Optional
21
+
22
+ import pandas as pd
23
+
24
+ from mcxdata.session import get_session, reset_session, MCX_BASE
25
+
26
+ # ── Endpoint URLs ─────────────────────────────────────────────────────────────
27
+ _URL_RECENT = f"{MCX_BASE}/backpage.aspx/GetSpotMarketPrice"
28
+ _URL_ARCHIVE = f"{MCX_BASE}/backpage.aspx/GetSpotMarketArchive"
29
+
30
+ _POST_HEADERS = {
31
+ "Content-Type": "application/json; charset=UTF-8",
32
+ "Accept": "application/json, text/javascript, */*; q=0.01",
33
+ "X-Requested-With": "XMLHttpRequest",
34
+ "Referer": f"{MCX_BASE}/market-data/spot-market-price",
35
+ "Origin": MCX_BASE,
36
+ }
37
+
38
+
39
+ def fetch_recent(commodity: str = "ALL", location: str = "ALL",
40
+ session_val: str = "0") -> pd.DataFrame:
41
+ """
42
+ POST GetSpotMarketPrice — returns today's spot prices for all commodities.
43
+
44
+ Args:
45
+ commodity: Not used by MCX for Recent (always returns all). Kept for API symmetry.
46
+ location: Not used by MCX for Recent.
47
+ session_val: "0"=Both, "1"=Session1, "2"=Session2
48
+
49
+ Returns:
50
+ DataFrame with columns: Symbol, Unit, Location, Spot Price (Rs.), Change
51
+ """
52
+ raw = _post(_URL_RECENT, body="")
53
+ return _parse_response(raw)
54
+
55
+
56
+ def fetch_archive(from_date: str, to_date: str,
57
+ commodity: str = "ALL", location: str = "ALL",
58
+ session_val: str = "0") -> pd.DataFrame:
59
+ """
60
+ POST GetSpotMarketArchive — returns historical spot prices.
61
+
62
+ Args:
63
+ from_date: YYYYMMDD e.g. "20260501"
64
+ to_date: YYYYMMDD e.g. "20260522"
65
+ commodity: "ALL" or commodity name e.g. "GOLD", "SILVER"
66
+ location: "ALL" or location name
67
+ session_val: "0"=Both, "1"=Session1, "2"=Session2
68
+
69
+ Returns:
70
+ DataFrame with columns: Symbol, Unit, Location, Date, Spot Price (Rs.), Change
71
+ """
72
+ payload = {
73
+ "Product": commodity if commodity and commodity != "ALL" else "ALL",
74
+ "Location": location if location and location != "ALL" else "ALL",
75
+ "Fromdate": from_date,
76
+ "Session": session_val,
77
+ "Todate": to_date,
78
+ }
79
+ raw = _post(_URL_ARCHIVE, body=json.dumps(payload))
80
+ return _parse_response(raw)
81
+
82
+
83
+ def _post(url: str, body: str) -> dict:
84
+ """
85
+ POST to MCX backpage endpoint. Returns parsed JSON dict.
86
+
87
+ Retry strategy on 403:
88
+ - Attempt 1: retry after 3s with same session (cookies preserved)
89
+ - Attempt 2: rebuild session (fresh warmup) and retry after 5s
90
+ Never destroy cookies on first 403 — Akamai uses session state.
91
+ """
92
+ session, stype = get_session()
93
+
94
+ for attempt in range(3):
95
+ try:
96
+ if stype == "curl_cffi":
97
+ r = session.post(url, data=body, headers=_POST_HEADERS, timeout=25)
98
+ else:
99
+ session.headers.update(_POST_HEADERS)
100
+ r = session.post(url, data=body, timeout=25)
101
+
102
+ if r.status_code == 403:
103
+ if attempt == 0:
104
+ # First 403 — same session, just wait longer
105
+ time.sleep(3)
106
+ continue
107
+ elif attempt == 1:
108
+ # Second 403 — rebuild session with fresh warmup
109
+ reset_session()
110
+ time.sleep(5)
111
+ session, stype = get_session()
112
+ continue
113
+ else:
114
+ raise RuntimeError(f"HTTP 403: {url} (Akamai WAF blocking — ensure curl_cffi is installed)")
115
+
116
+ if r.status_code != 200:
117
+ raise RuntimeError(f"HTTP {r.status_code}: {url}")
118
+
119
+ return r.json()
120
+
121
+ except RuntimeError:
122
+ raise
123
+ except Exception as e:
124
+ if attempt < 2:
125
+ time.sleep(2)
126
+ continue
127
+ raise
128
+
129
+ raise RuntimeError(f"Failed after 3 attempts: {url}")
130
+
131
+
132
+ def _parse_response(raw: dict) -> pd.DataFrame:
133
+ """
134
+ Parse MCX backpage response.
135
+
136
+ MCX returns:
137
+ {"d": {"Summary": {"AsOn": ..., "Count": 28}, "Data": [{...}, ...]}}
138
+ or for archive:
139
+ {"d": {"Summary": {...}, "Data": [{...}, ...]}}
140
+ """
141
+ if "d" not in raw:
142
+ raise RuntimeError(f"Unexpected response format: {list(raw.keys())}")
143
+
144
+ inner = raw["d"]
145
+
146
+ # inner can be a dict with "Data" key, a list, or a JSON string
147
+ if isinstance(inner, str):
148
+ if not inner.strip():
149
+ return pd.DataFrame()
150
+ inner = json.loads(inner)
151
+
152
+ if isinstance(inner, dict):
153
+ data = inner.get("Data", inner.get("data", []))
154
+ elif isinstance(inner, list):
155
+ data = inner
156
+ else:
157
+ raise RuntimeError(f"Cannot parse .d of type {type(inner)}: {str(inner)[:100]}")
158
+
159
+ if not data:
160
+ return pd.DataFrame()
161
+
162
+ df = pd.DataFrame(data)
163
+ return _clean_df(df)
164
+
165
+
166
+ def _clean_df(df: pd.DataFrame) -> pd.DataFrame:
167
+ """Standardise MCX response DataFrame."""
168
+ if df.empty:
169
+ return df
170
+
171
+ # Drop internal ASP.NET type field and junk columns
172
+ drop_cols = [c for c in df.columns if c.startswith("__")
173
+ or c in ("ExtensionData", "EnSymbol", "Enlocation")]
174
+ df = df.drop(columns=drop_cols, errors="ignore")
175
+
176
+ # Rename MCX field names → readable column names
177
+ rename = {
178
+ "Symbol": "Commodity",
179
+ "TodaysSpotPrice": "Spot Price (Rs.)",
180
+ "Change": "Up/Down",
181
+ }
182
+ df = df.rename(columns={k: v for k, v in rename.items() if k in df.columns})
183
+
184
+ # Clean price — remove commas, convert to float
185
+ price_col = "Spot Price (Rs.)"
186
+ if price_col in df.columns:
187
+ df[price_col] = (
188
+ df[price_col].astype(str)
189
+ .str.replace(",", "", regex=False)
190
+ .str.strip()
191
+ )
192
+ df[price_col] = pd.to_numeric(df[price_col], errors="coerce")
193
+
194
+ # Parse .NET JSON Date: /Date(milliseconds)/ → datetime
195
+ if "Date" in df.columns:
196
+ import re as _re
197
+ def _parse_net_date(val):
198
+ m = _re.search(r'/Date\((\d+)\)/', str(val))
199
+ if m:
200
+ return pd.to_datetime(int(m.group(1)), unit="ms")
201
+ return pd.NaT
202
+ df["Date"] = df["Date"].apply(_parse_net_date)
203
+ df["Date"] = df["Date"].dt.strftime("%Y-%m-%d %H:%M:%S") # ISO: "2026-05-22 12:33:11"
204
+
205
+ # Reorder columns sensibly
206
+ preferred_order = ["Commodity", "Unit", "Location", "Spot Price (Rs.)", "Up/Down", "Date"]
207
+ cols = [c for c in preferred_order if c in df.columns]
208
+ extra = [c for c in df.columns if c not in cols]
209
+ df = df[cols + extra]
210
+
211
+ return df.reset_index(drop=True)
mcxdata/mcx.py ADDED
@@ -0,0 +1,232 @@
1
+ """
2
+ mcx-data public API.
3
+
4
+ Usage:
5
+ from mcxdata import mcx
6
+
7
+ # Today's spot prices (all commodities)
8
+ df = mcx.get_spot_recent()
9
+ df = mcx.get_spot_recent(commodity="GOLD")
10
+
11
+ # Historical spot prices
12
+ df = mcx.get_spot_archive("2026-05-01", "2026-05-22")
13
+ df = mcx.get_spot_archive("2026-05-01", "2026-05-22", commodity="GOLD")
14
+
15
+ # Generic API (mirrors nse-data pattern)
16
+ df = mcx.get("spot", "market", "spot_recent")
17
+ df = mcx.get("spot", "market", "spot_archive",
18
+ from_date="2026-05-01", to_date="2026-05-22", commodity="GOLD")
19
+
20
+ # Download to local file or S3
21
+ mcx.download("spot", "market", "spot_recent", output_dir="./data")
22
+ mcx.download("spot", "market", "spot_archive",
23
+ from_date="2026-05-01", to_date="2026-05-22",
24
+ s3_bucket="my-bucket", s3_prefix="raw/mcx/")
25
+
26
+ # List datasets / commodities
27
+ mcx.list_datasets()
28
+ mcx.list_commodities()
29
+ """
30
+
31
+ import os
32
+ import re
33
+ from datetime import datetime
34
+ from typing import Optional
35
+
36
+ import pandas as pd
37
+
38
+ from mcxdata.registry import list_datasets as _list_datasets
39
+ from mcxdata.fetcher import fetch_recent, fetch_archive
40
+
41
+
42
+ # ── Public API ────────────────────────────────────────────────────────────────
43
+
44
+ def list_datasets(category: str = None) -> pd.DataFrame:
45
+ """List all available MCX datasets."""
46
+ rows = _list_datasets(category)
47
+ return pd.DataFrame(rows)
48
+
49
+
50
+ def list_commodities() -> list:
51
+ """
52
+ Return the 28 MCX commodity names from the spot market data directly.
53
+ Uses get_spot_recent() so it always reflects live MCX data.
54
+ """
55
+ df = get_spot_recent()
56
+ return sorted(df["Commodity"].unique().tolist())
57
+
58
+
59
+ # ── Spot recent ───────────────────────────────────────────────────────────────
60
+
61
+ def get_spot_recent(commodity: str = "ALL", location: str = "ALL") -> pd.DataFrame:
62
+ """
63
+ Get today's spot prices for all (or one) MCX commodity.
64
+
65
+ Args:
66
+ commodity: "ALL" or name e.g. "GOLD", "SILVER", "CRUDEOIL"
67
+ location: "ALL" or location name
68
+
69
+ Returns:
70
+ DataFrame — Commodity, Unit, Location, Spot Price (Rs.), Up/Down
71
+
72
+ Example:
73
+ df = mcx.get_spot_recent()
74
+ df = mcx.get_spot_recent(commodity="GOLD")
75
+ """
76
+ df = fetch_recent()
77
+
78
+ # Filter client-side if a specific commodity requested
79
+ if commodity and commodity.upper() != "ALL":
80
+ mask = df["Commodity"].str.upper() == commodity.upper()
81
+ df = df[mask].reset_index(drop=True)
82
+ if location and location.upper() != "ALL":
83
+ mask = df["Location"].str.upper() == location.upper()
84
+ df = df[mask].reset_index(drop=True)
85
+
86
+ return df
87
+
88
+
89
+ # ── Spot archive ──────────────────────────────────────────────────────────────
90
+
91
+ def get_spot_archive(
92
+ from_date: str,
93
+ to_date: str,
94
+ commodity: str = "ALL",
95
+ location: str = "ALL",
96
+ ) -> pd.DataFrame:
97
+ """
98
+ Get historical spot prices from MCX archives.
99
+
100
+ Args:
101
+ from_date: "YYYY-MM-DD" or "DD/MM/YYYY" e.g. "2026-05-01"
102
+ to_date: "YYYY-MM-DD" or "DD/MM/YYYY" e.g. "2026-05-22"
103
+ commodity: "ALL" or name e.g. "GOLD", "SILVER"
104
+ location: "ALL"
105
+
106
+ Returns:
107
+ DataFrame — Commodity, Unit, Location, Date, Spot Price (Rs.), Up/Down
108
+
109
+ Example:
110
+ df = mcx.get_spot_archive("2026-05-01", "2026-05-22")
111
+ df = mcx.get_spot_archive("2026-05-01", "2026-05-22", commodity="GOLD")
112
+ """
113
+ fd = _to_yyyymmdd(from_date)
114
+ td = _to_yyyymmdd(to_date)
115
+ return fetch_archive(fd, td, commodity=commodity, location=location)
116
+
117
+
118
+ # ── Generic get() — mirrors nse-data API pattern ──────────────────────────────
119
+
120
+ def get(
121
+ category: str,
122
+ subcategory: str,
123
+ dataset: str,
124
+ date: str = None,
125
+ *,
126
+ from_date: str = None,
127
+ to_date: str = None,
128
+ commodity: str = "ALL",
129
+ location: str = "ALL",
130
+ **kwargs,
131
+ ) -> pd.DataFrame:
132
+ """
133
+ Generic dataset getter — mirrors nse-data's nse.get() signature.
134
+
135
+ For recent:
136
+ df = mcx.get("spot", "market", "spot_recent")
137
+
138
+ For archive:
139
+ df = mcx.get("spot", "market", "spot_archive",
140
+ from_date="2026-05-01", to_date="2026-05-22")
141
+ """
142
+ from mcxdata.registry import get_config
143
+ cfg = get_config(category, subcategory, dataset)
144
+
145
+ if cfg.date_type == "recent":
146
+ return get_spot_recent(commodity=commodity, location=location)
147
+
148
+ elif cfg.date_type == "range":
149
+ if not from_date or not to_date:
150
+ if date:
151
+ from_date = from_date or date
152
+ to_date = to_date or date
153
+ else:
154
+ raise ValueError(
155
+ f"'{dataset}' requires from_date and to_date.\n"
156
+ "Example: mcx.get('spot','market','spot_archive', "
157
+ "from_date='2026-05-01', to_date='2026-05-22')"
158
+ )
159
+ return get_spot_archive(from_date, to_date,
160
+ commodity=commodity, location=location)
161
+
162
+ raise ValueError(f"Unsupported date_type '{cfg.date_type}' for '{dataset}'")
163
+
164
+
165
+ # ── download() — save to file or S3 ──────────────────────────────────────────
166
+
167
+ def download(
168
+ category: str,
169
+ subcategory: str,
170
+ dataset: str,
171
+ date: str = None,
172
+ *,
173
+ from_date: str = None,
174
+ to_date: str = None,
175
+ commodity: str = "ALL",
176
+ output_dir: str = ".",
177
+ s3_bucket: Optional[str] = None,
178
+ s3_prefix: str = "mcx-data/",
179
+ **kwargs,
180
+ ) -> str:
181
+ """
182
+ Download MCX dataset and save to local file or S3.
183
+ Returns the saved path or S3 URI.
184
+ """
185
+ df = get(category, subcategory, dataset, date,
186
+ from_date=from_date, to_date=to_date,
187
+ commodity=commodity, **kwargs)
188
+
189
+ # Build filename
190
+ ts = (to_date or from_date or date or datetime.today().strftime("%Y-%m-%d"))
191
+ ts = re.sub(r"[^0-9]", "", ts) # keep digits only → "20260522"
192
+ safe_comm = commodity.replace(" ", "_").upper()
193
+ fname = f"MCX_{dataset}_{safe_comm}_{ts}.csv"
194
+
195
+ if s3_bucket:
196
+ import boto3
197
+ key = f"{s3_prefix.rstrip('/')}/{fname}"
198
+ boto3.client("s3").put_object(
199
+ Bucket=s3_bucket, Key=key,
200
+ Body=df.to_csv(index=False).encode("utf-8"),
201
+ ContentType="text/csv",
202
+ )
203
+ uri = f"s3://{s3_bucket}/{key}"
204
+ print(f"✓ {dataset} → {uri}")
205
+ return uri
206
+ else:
207
+ os.makedirs(output_dir, exist_ok=True)
208
+ path = os.path.join(output_dir, fname)
209
+ df.to_csv(path, index=False)
210
+ print(f"✓ {dataset} → {path}")
211
+ return path
212
+
213
+
214
+ # ── Internal helpers ──────────────────────────────────────────────────────────
215
+
216
+ def _to_yyyymmdd(date_str: str) -> str:
217
+ """
218
+ Accept YYYY-MM-DD or DD/MM/YYYY → return YYYYMMDD (MCX format for archive).
219
+ """
220
+ s = date_str.strip()
221
+ if re.match(r'^\d{8}$', s): # already YYYYMMDD
222
+ return s
223
+ if re.match(r'^\d{4}-\d{2}-\d{2}$', s): # YYYY-MM-DD
224
+ return datetime.strptime(s, "%Y-%m-%d").strftime("%Y%m%d")
225
+ if re.match(r'^\d{2}/\d{2}/\d{4}$', s): # DD/MM/YYYY
226
+ return datetime.strptime(s, "%d/%m/%Y").strftime("%Y%m%d")
227
+ if re.match(r'^\d{2}-\d{2}-\d{4}$', s): # DD-MM-YYYY
228
+ return datetime.strptime(s, "%d-%m-%Y").strftime("%Y%m%d")
229
+ raise ValueError(
230
+ f"Unrecognised date format: '{date_str}'. "
231
+ "Use YYYY-MM-DD, DD/MM/YYYY, or YYYYMMDD."
232
+ )
mcxdata/py.typed ADDED
File without changes
mcxdata/registry.py ADDED
@@ -0,0 +1,147 @@
1
+ """
2
+ MCX Dataset Registry — single source of truth for all supported datasets.
3
+
4
+ Each DatasetConfig describes how to fetch one dataset:
5
+ - page_url: MCX page URL to load (for WebForms scraping)
6
+ - export_target: __doPostBack target for Excel export (or None for HTML parse)
7
+ - commodity_id: Select element ID for commodity filter
8
+ - location_id: Select element ID for location filter
9
+ - date_field_id: Input ID for date/from-date
10
+ - to_date_id: Input ID for to-date (archive only)
11
+ - date_type: "daily" | "monthly" | "range" | "static"
12
+ - df_supported: True if data can be returned as DataFrame (parsed table)
13
+ - download_only: True if only Excel export is supported
14
+
15
+ Naming convention mirrors nse-data: REGISTRY[category][subcategory][dataset_key]
16
+ """
17
+
18
+ from dataclasses import dataclass, field
19
+ from typing import Optional, Literal
20
+
21
+
22
+ @dataclass
23
+ class DatasetConfig:
24
+ """Configuration for one MCX dataset."""
25
+ name: str
26
+ description: str
27
+ page_url: str # MCX page to load
28
+ table_id: Optional[str] = None # HTML table ID to parse (None = first table)
29
+ export_target: Optional[str] = None # __doPostBack target for Excel export
30
+ commodity_field: Optional[str] = None # Form field name for commodity filter
31
+ location_field: Optional[str] = None # Form field name for location filter
32
+ date_field: Optional[str] = None # Form field name for (from-)date
33
+ to_date_field: Optional[str] = None # Form field name for to-date (range)
34
+ commodity_select_id: Optional[str] = None # <select> element ID
35
+ location_select_id: Optional[str] = None # <select> element ID
36
+ date_type: Literal["recent", "daily", "range", "monthly", "static"] = "recent"
37
+ date_format: str = "%d/%m/%Y" # How MCX expects dates in POST
38
+ file_format: str = "html" # "html" (parse table) or "excel"
39
+ df_supported: bool = True
40
+ download_only: bool = False
41
+ portal_only: bool = False
42
+ skip_rows: int = 0
43
+ frequency: str = "Daily"
44
+ notes: str = ""
45
+
46
+
47
+ MCX_BASE = "https://www.mcxindia.com"
48
+
49
+ # ─── REGISTRY ──────────────────────────────────────────────────────────────────
50
+ REGISTRY: dict = {
51
+
52
+ # ══════════════════════════════════════════════════════════════════════════
53
+ # SPOT MARKET
54
+ # URL: https://www.mcxindia.com/market-data/spot-market-price
55
+ #
56
+ # "Recent" tab: shows today's spot prices (all commodities + locations)
57
+ # — No filters, just load the page and parse the table
58
+ #
59
+ # "Archives" tab: filter by Commodity + date range → Excel export
60
+ # — POST form with __doPostBack export target
61
+ # ══════════════════════════════════════════════════════════════════════════
62
+ "spot": {
63
+ "market": {
64
+
65
+ # Recent spot prices (today's data — no date param needed)
66
+ "spot_recent": DatasetConfig(
67
+ name="Spot Market Price — Recent",
68
+ description=(
69
+ "Current day spot prices for all commodities and locations. "
70
+ "Commodity, Unit, Location, Spot Price (Rs.), and Up/Down."
71
+ ),
72
+ page_url=f"{MCX_BASE}/market-data/spot-market-price",
73
+ table_id=None, # parse first data table
74
+ export_target="ctl00$cph_InnerContainerRight$C004$lnkExpToExcel",
75
+ commodity_field="ctl00$cph_InnerContainerRight$C004$ddlCommodity",
76
+ location_field="ctl00$cph_InnerContainerRight$C004$ddlLocation",
77
+ date_type="recent",
78
+ file_format="html",
79
+ df_supported=True,
80
+ frequency="Daily (intraday)",
81
+ notes="Recent tab — no date filter. Exports all commodity spot prices as of market close.",
82
+ ),
83
+
84
+ # Archive spot prices (historical — date range + commodity filter)
85
+ "spot_archive": DatasetConfig(
86
+ name="Spot Market Price — Archive",
87
+ description=(
88
+ "Historical spot prices with date range and optional commodity filter. "
89
+ "Returns daily spot price series per commodity."
90
+ ),
91
+ page_url=f"{MCX_BASE}/market-data/spot-market-price",
92
+ export_target="ctl00$cph_InnerContainerRight$C004$lnkExpToExcelArchive",
93
+ commodity_field="ctl00$cph_InnerContainerRight$C004$ddlCommodityArchive",
94
+ date_field="ctl00$cph_InnerContainerRight$C004$txtFromDate",
95
+ to_date_field="ctl00$cph_InnerContainerRight$C004$txtToDate",
96
+ commodity_select_id="cph_InnerContainerRight_C004_ddlCommodityArchive",
97
+ date_type="range",
98
+ date_format="%d/%m/%Y",
99
+ file_format="html",
100
+ df_supported=True,
101
+ frequency="Daily",
102
+ notes=(
103
+ "Archives tab — requires from_date + to_date (DD/MM/YYYY). "
104
+ "Commodity filter: use 'ALL' for all commodities or specific name e.g. 'GOLD'."
105
+ ),
106
+ ),
107
+ },
108
+ },
109
+ }
110
+
111
+
112
+ # ─── Helper functions ─────────────────────────────────────────────────────────
113
+
114
+ def get_config(category: str, subcategory: str, dataset: str) -> DatasetConfig:
115
+ """Look up a DatasetConfig by path. Raises ValueError if not found."""
116
+ try:
117
+ return REGISTRY[category.lower()][subcategory.lower()][dataset.lower()]
118
+ except KeyError:
119
+ available = list_datasets()
120
+ opts = [f"{r['category']}/{r['subcategory']}/{r['dataset']}" for r in available]
121
+ raise ValueError(
122
+ f"Unknown MCX dataset: '{category}/{subcategory}/{dataset}'.\n"
123
+ f"Available: {opts}"
124
+ )
125
+
126
+
127
+ def list_datasets(category: str = None) -> list:
128
+ """Return list of dicts describing all registered datasets."""
129
+ results = []
130
+ for cat, subs in REGISTRY.items():
131
+ if category and cat != category.lower():
132
+ continue
133
+ for sub, datasets in subs.items():
134
+ for key, cfg in datasets.items():
135
+ results.append({
136
+ "category": cat,
137
+ "subcategory": sub,
138
+ "dataset": key,
139
+ "name": cfg.name,
140
+ "description": cfg.description,
141
+ "frequency": cfg.frequency,
142
+ "date_type": cfg.date_type,
143
+ "df_supported": cfg.df_supported and not cfg.download_only,
144
+ "format": cfg.file_format,
145
+ "notes": cfg.notes,
146
+ })
147
+ return results
mcxdata/session.py ADDED
@@ -0,0 +1,96 @@
1
+ """
2
+ Shared HTTP session for mcx-data.
3
+
4
+ MCX India uses Akamai WAF which blocks plain Python requests.
5
+ Solution: curl_cffi with Chrome TLS fingerprint impersonation (same approach that
6
+ works for niftyindices.com TRI in nse-data).
7
+
8
+ Priority:
9
+ 1. curl_cffi — Chrome TLS impersonation, bypasses Akamai
10
+ 2. cloudscraper — partial Akamai bypass
11
+ 3. requests — fallback (may 403 on Akamai-blocked IPs)
12
+ """
13
+
14
+ import time
15
+ from typing import Optional, Tuple
16
+
17
+ MCX_BASE = "https://www.mcxindia.com"
18
+ MCX_SPOT_PAGE = f"{MCX_BASE}/market-data/spot-market-price"
19
+
20
+ _HEADERS = {
21
+ "User-Agent": (
22
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
23
+ "AppleWebKit/537.36 (KHTML, like Gecko) "
24
+ "Chrome/124.0.0.0 Safari/537.36"
25
+ ),
26
+ "Accept-Language": "en-IN,en-US;q=0.9,en;q=0.8",
27
+ }
28
+
29
+ # Module-level session cache
30
+ _SESSION = None
31
+ _SESSION_TYPE: Optional[str] = None
32
+
33
+
34
+ def get_session() -> Tuple[object, str]:
35
+ """Return (session, type_str). Builds session on first call."""
36
+ global _SESSION, _SESSION_TYPE
37
+ if _SESSION is None:
38
+ _SESSION, _SESSION_TYPE = _build_session()
39
+ return _SESSION, _SESSION_TYPE
40
+
41
+
42
+ def reset_session() -> None:
43
+ """Force a fresh session (e.g. after 403)."""
44
+ global _SESSION, _SESSION_TYPE
45
+ _SESSION = None
46
+ _SESSION_TYPE = None
47
+
48
+
49
+ def _build_session() -> Tuple[object, str]:
50
+ """Build the best available session and warm it up with the MCX page."""
51
+
52
+ # 1. curl_cffi — best for Akamai (exact Chrome TLS fingerprint)
53
+ try:
54
+ from curl_cffi.requests import Session as CurlSession
55
+ s = CurlSession(impersonate="chrome124")
56
+ s.headers.update(_HEADERS)
57
+ _warmup(s, "curl_cffi")
58
+ return s, "curl_cffi"
59
+ except ImportError as e:
60
+ print(f"curl_cffi not available: {e}")
61
+ except Exception as e:
62
+ print(f"curl_cffi failed to init: {e}")
63
+
64
+ # 2. cloudscraper — JS challenge solver
65
+ try:
66
+ import cloudscraper
67
+ s = cloudscraper.create_scraper(
68
+ browser={"browser": "chrome", "platform": "windows", "mobile": False}
69
+ )
70
+ s.headers.update(_HEADERS)
71
+ _warmup(s, "cloudscraper")
72
+ return s, "cloudscraper"
73
+ except ImportError:
74
+ pass
75
+ except Exception as e:
76
+ print(f"cloudscraper failed: {e}")
77
+
78
+ # 3. Plain requests fallback
79
+ print("WARNING: falling back to plain requests — MCX may return 403 (Akamai WAF)")
80
+ import requests
81
+ s = requests.Session()
82
+ s.headers.update(_HEADERS)
83
+ _warmup(s, "requests")
84
+ return s, "requests"
85
+
86
+
87
+ def _warmup(session, stype: str) -> None:
88
+ """
89
+ GET the MCX spot page to acquire session cookies.
90
+ Also call the recent endpoint once to fully warm up the session state.
91
+ """
92
+ try:
93
+ session.get(MCX_SPOT_PAGE, timeout=15)
94
+ time.sleep(1.5) # Akamai needs time to recognise the session
95
+ except Exception:
96
+ pass