PyPI - mcx-data - Versions diffs - 1.0.0__py3-none-any.whl - Mend

mcx-data 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

mcx_data-1.0.0.dist-info/METADATA +94 -0
mcx_data-1.0.0.dist-info/RECORD +12 -0
mcx_data-1.0.0.dist-info/WHEEL +5 -0
mcx_data-1.0.0.dist-info/entry_points.txt +2 -0
mcx_data-1.0.0.dist-info/top_level.txt +1 -0
mcxdata/__init__.py +40 -0
mcxdata/cli.py +91 -0
mcxdata/fetcher.py +211 -0
mcxdata/mcx.py +232 -0
mcxdata/py.typed +0 -0
mcxdata/registry.py +147 -0
mcxdata/session.py +96 -0

mcx_data-1.0.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,94 @@
+Metadata-Version: 2.4
+Name: mcx-data
+Version: 1.0.0
+Summary: Download MCX India commodity market data as pandas DataFrames — spot prices (recent + archive), futures bhavcopy. Works on AWS Lambda.
+Author-email: Nikhil Suthar <nikhil.suthar@lumiq.ai>
+License-Expression: MIT
+Project-URL: Homepage, https://github.com/NikhilSuthar/indian-market-data
+Project-URL: Documentation, https://NikhilSuthar.github.io/indian-market-data
+Project-URL: Repository, https://github.com/NikhilSuthar/indian-market-data
+Project-URL: Bug Tracker, https://github.com/NikhilSuthar/indian-market-data/issues
+Keywords: mcx,mcx-data,mcx-india,commodity,futures,options,gold,silver,crude-oil,natural-gas,india,market-data,financial-data,pandas,bhavcopy,trading,historical-data
+Classifier: Development Status :: 5 - Production/Stable
+Classifier: Intended Audience :: Financial and Insurance Industry
+Classifier: Intended Audience :: Developers
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Classifier: Topic :: Office/Business :: Financial :: Investment
+Classifier: Typing :: Typed
+Requires-Python: >=3.9
+Description-Content-Type: text/markdown
+Requires-Dist: requests>=2.31.0
+Requires-Dist: pandas>=2.0.0
+Requires-Dist: openpyxl>=3.1.0
+Requires-Dist: curl-cffi>=0.7.0
+Provides-Extra: s3
+Requires-Dist: boto3>=1.28.0; extra == "s3"
+Provides-Extra: dev
+Requires-Dist: pytest; extra == "dev"
+Requires-Dist: pytest-cov; extra == "dev"
+Requires-Dist: ruff; extra == "dev"
+# mcx-data
+[![PyPI version](https://badge.fury.io/py/mcx-data.svg)](https://pypi.org/project/mcx-data/)
+[![Python 3.9+](https://img.shields.io/badge/python-3.9+-blue.svg)](https://www.python.org/downloads/)
+[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
+Download **MCX India** commodity spot market data as pandas DataFrames. Works from **AWS Lambda** and any cloud environment.
+**Full Documentation → [NikhilSuthar.github.io/indian-market-data/mcx-spot](https://NikhilSuthar.github.io/indian-market-data/mcx-spot)**
+Part of the [indian-market-data](https://github.com/NikhilSuthar/indian-market-data) monorepo — also see [`nse-data`](https://pypi.org/project/nse-data/).
+```bash
+pip install mcx-data
+```
+## Quick Start
+```python
+from mcxdata import mcx
+# Today's spot prices — all 28 commodities
+df = mcx.get_spot_recent()
+# Single commodity
+df = mcx.get_spot_recent(commodity="GOLD")
+# Historical (requires specific commodity)
+df = mcx.get_spot_archive("2026-05-01", "2026-05-22", commodity="GOLD")
+df = mcx.get_spot_archive("2026-05-01", "2026-05-22", commodity="SILVER")
+# Download to S3
+mcx.download("spot", "market", "spot_recent",
+             s3_bucket="my-bucket", s3_prefix="raw/mcx/")
+# Available commodities (28)
+mcx.list_commodities()
+```
+## Datasets
+| Dataset | Description | Date Param |
+|---------|-------------|-----------|
+| `spot_recent` | Today's spot prices — all 28 commodities | None |
+| `spot_archive` | Historical spot prices by commodity + date range | `from_date`, `to_date` |
+## Available Commodities (28)
+`ALUMINI, ALUMINIUM, CARDAMOM, COPPER, COTTON, COTTONOIL, CPO, CRUDEOIL, CRUDEOILM, ELECDMBL, GOLD, GOLDGUINEA, GOLDM, GOLDPETAL, GOLDTEN, KAPAS, LEAD, LEADMINI, MENTHAOIL, NATGASMINI, NATURALGAS, NICKEL, SILVER, SILVERM, SILVERMIC, STEELREBAR, ZINC, ZINCMINI`
+## Notes
+- MCX archive requires a **specific commodity** — `"ALL"` returns empty (MCX API limitation)
+- Uses `curl-cffi` Chrome TLS impersonation to bypass MCX Akamai WAF
+- Lambda IPs are generally unblocked — works reliably on AWS
+## License
+MIT — data from [MCX India](https://www.mcxindia.com).

mcx_data-1.0.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,12 @@
+mcxdata/__init__.py,sha256=HTrEkyf_omqFka303BFOMFtlD07dKL0EwbGxlQjVL00,1261
+mcxdata/cli.py,sha256=5p8Jj9ES7EoqfB1a3IAZSUuOZ-esKaHDTdlRVeCh5iU,3508
+mcxdata/fetcher.py,sha256=u-vKwjTTjkLJtF3HnEnjJ5_99Zh6GMVqr5oZoRJJjYk,7553
+mcxdata/mcx.py,sha256=17pU0hwmYpGl5ZdUasoL8E9FTO8mBE75w6rfrGM9n7E,8303
+mcxdata/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+mcxdata/registry.py,sha256=mSPdJjXtcsS6FGbVOSrdoBDokGZhVzGRYgltbf3Kce4,7336
+mcxdata/session.py,sha256=etMHrx96JgZ7DjFRBBSX-B3HWTe27fiha37q3Jd3YZU,2986
+mcx_data-1.0.0.dist-info/METADATA,sha256=So0uOTMksvy1xw6P0teBjNd7tipC4RB76ySM3NXs6Ao,3888
+mcx_data-1.0.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
+mcx_data-1.0.0.dist-info/entry_points.txt,sha256=vwAOj6RJNgiA9Ns-HN8-HhVfUL7lkexeNPkaGBAgm0k,46
+mcx_data-1.0.0.dist-info/top_level.txt,sha256=0VKxoDvrLWSzugxSPsJM4h-B69pluDTiuaRC-BnUzE4,8
+mcx_data-1.0.0.dist-info/RECORD,,

mcx_data-1.0.0.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,5 @@
+Wheel-Version: 1.0
+Generator: setuptools (82.0.1)
+Root-Is-Purelib: true
+Tag: py3-none-any

mcx_data-1.0.0.dist-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [console_scripts]
2	+ mcx-data = mcxdata.cli:main

mcx_data-1.0.0.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ mcxdata

mcxdata/__init__.py ADDED Viewed

@@ -0,0 +1,40 @@
+"""
+mcx-data — Download MCX India commodity market data as pandas DataFrames.
+MCX (Multi Commodity Exchange of India) is India's largest commodity exchange,
+trading futures and options on metals, energy, and agri commodities.
+Quick Start:
+    from mcxdata import mcx
+    # Today's spot prices (all commodities + locations)
+    df = mcx.get_spot_recent()
+    # Today's spot price for GOLD only
+    df = mcx.get_spot_recent(commodity="GOLD")
+    # Historical spot prices for a date range
+    df = mcx.get_spot_archive("01/05/2026", "22/05/2026")
+    df = mcx.get_spot_archive("01/05/2026", "22/05/2026", commodity="GOLD")
+    # Generic API (mirrors nse-data pattern)
+    df = mcx.get("spot", "market", "spot_recent")
+    df = mcx.get("spot", "market", "spot_archive",
+                 from_date="01/05/2026", to_date="22/05/2026", commodity="GOLD")
+    # Download to S3
+    mcx.download("spot", "market", "spot_recent",
+                 s3_bucket="my-bucket", s3_prefix="raw/mcx/")
+    # List all available datasets
+    mcx.list_datasets()
+    # List available commodity names
+    mcx.list_commodities()
+See: https://NikhilSuthar.github.io/indian-market-data
+"""
+__version__ = "1.0.0"
+from mcxdata import mcx

mcxdata/cli.py ADDED Viewed

@@ -0,0 +1,91 @@
+"""
+mcx-data CLI — command line interface for downloading MCX data.
+Usage:
+    mcx-data spot recent
+    mcx-data spot archive --from 01/05/2026 --to 22/05/2026 --commodity GOLD
+    mcx-data list
+    mcx-data commodities
+"""
+import argparse
+import sys
+def main():
+    parser = argparse.ArgumentParser(
+        prog="mcx-data",
+        description="Download MCX India commodity market data as CSV",
+    )
+    parser.add_argument("--version", action="version", version=f"%(prog)s 0.1.0")
+    sub = parser.add_subparsers(dest="command", help="Command")
+    # list command
+    sub.add_parser("list", help="List all available MCX datasets")
+    # commodities command
+    sub.add_parser("commodities", help="List available commodity names from MCX")
+    # spot recent
+    spot_recent = sub.add_parser("spot-recent", help="Download today's spot market prices")
+    spot_recent.add_argument("--commodity", default="ALL", help="Commodity name or ALL")
+    spot_recent.add_argument("--location",  default="ALL", help="Location name or ALL")
+    spot_recent.add_argument("--out",       default=".",   help="Output directory")
+    spot_recent.add_argument("--s3-bucket", default=None,  help="S3 bucket name")
+    spot_recent.add_argument("--s3-prefix", default="mcx-data/", help="S3 prefix")
+    # spot archive
+    spot_archive = sub.add_parser("spot-archive", help="Download historical spot market prices")
+    spot_archive.add_argument("--from", dest="from_date", required=True,
+                              help="From date DD/MM/YYYY")
+    spot_archive.add_argument("--to",   dest="to_date",   required=True,
+                              help="To date DD/MM/YYYY")
+    spot_archive.add_argument("--commodity", default="ALL", help="Commodity name or ALL")
+    spot_archive.add_argument("--out",       default=".",   help="Output directory")
+    spot_archive.add_argument("--s3-bucket", default=None,  help="S3 bucket name")
+    spot_archive.add_argument("--s3-prefix", default="mcx-data/", help="S3 prefix")
+    args = parser.parse_args()
+    if args.command is None:
+        parser.print_help()
+        sys.exit(0)
+    from mcxdata import mcx
+    if args.command == "list":
+        df = mcx.list_datasets()
+        print(df.to_string(index=False))
+    elif args.command == "commodities":
+        print("Fetching commodity list from MCX...")
+        commodities = mcx.list_commodities()
+        for c in commodities:
+            print(f"  {c}")
+    elif args.command == "spot-recent":
+        print(f"Downloading MCX spot recent (commodity={args.commodity})...")
+        mcx.download("spot", "market", "spot_recent",
+                     commodity=args.commodity,
+                     location=args.location,
+                     output_dir=args.out,
+                     s3_bucket=args.s3_bucket,
+                     s3_prefix=args.s3_prefix)
+    elif args.command == "spot-archive":
+        print(f"Downloading MCX spot archive {args.from_date} → {args.to_date} (commodity={args.commodity})...")
+        mcx.download("spot", "market", "spot_archive",
+                     from_date=args.from_date,
+                     to_date=args.to_date,
+                     commodity=args.commodity,
+                     output_dir=args.out,
+                     s3_bucket=args.s3_bucket,
+                     s3_prefix=args.s3_prefix)
+    else:
+        parser.print_help()
+if __name__ == "__main__":
+    main()

mcxdata/fetcher.py ADDED Viewed

@@ -0,0 +1,211 @@
+"""
+MCX Data Fetcher — calls the two known MCX backpage.aspx endpoints.
+Endpoints (confirmed from browser devtools):
+  Recent:  POST https://www.mcxindia.com/backpage.aspx/GetSpotMarketPrice
+           Body: ""  (empty string)
+  Archive: POST https://www.mcxindia.com/backpage.aspx/GetSpotMarketArchive
+           Body: {"Product":"GOLD","Location":"ALL","Fromdate":"20260524","Session":"0","Todate":"20260524"}
+Response format:
+  {"d": "[{\"Symbol\":\"GOLD\",\"Unit\":\"10 GRMS\",\"Location\":\"AHMEDABAD\",
+            \"TodaysSpotPrice\":\"157549.00\",\"Change\":\"Down\"}]"}
+  i.e. JSON where .d is a JSON-encoded string of the data array.
+"""
+import json
+import io
+import time
+from typing import Optional
+import pandas as pd
+from mcxdata.session import get_session, reset_session, MCX_BASE
+# ── Endpoint URLs ─────────────────────────────────────────────────────────────
+_URL_RECENT  = f"{MCX_BASE}/backpage.aspx/GetSpotMarketPrice"
+_URL_ARCHIVE = f"{MCX_BASE}/backpage.aspx/GetSpotMarketArchive"
+_POST_HEADERS = {
+    "Content-Type": "application/json; charset=UTF-8",
+    "Accept": "application/json, text/javascript, */*; q=0.01",
+    "X-Requested-With": "XMLHttpRequest",
+    "Referer": f"{MCX_BASE}/market-data/spot-market-price",
+    "Origin": MCX_BASE,
+}
+def fetch_recent(commodity: str = "ALL", location: str = "ALL",
+                 session_val: str = "0") -> pd.DataFrame:
+    """
+    POST GetSpotMarketPrice — returns today's spot prices for all commodities.
+    Args:
+        commodity:   Not used by MCX for Recent (always returns all). Kept for API symmetry.
+        location:    Not used by MCX for Recent.
+        session_val: "0"=Both, "1"=Session1, "2"=Session2
+    Returns:
+        DataFrame with columns: Symbol, Unit, Location, Spot Price (Rs.), Change
+    """
+    raw = _post(_URL_RECENT, body="")
+    return _parse_response(raw)
+def fetch_archive(from_date: str, to_date: str,
+                  commodity: str = "ALL", location: str = "ALL",
+                  session_val: str = "0") -> pd.DataFrame:
+    """
+    POST GetSpotMarketArchive — returns historical spot prices.
+    Args:
+        from_date:   YYYYMMDD e.g. "20260501"
+        to_date:     YYYYMMDD e.g. "20260522"
+        commodity:   "ALL" or commodity name e.g. "GOLD", "SILVER"
+        location:    "ALL" or location name
+        session_val: "0"=Both, "1"=Session1, "2"=Session2
+    Returns:
+        DataFrame with columns: Symbol, Unit, Location, Date, Spot Price (Rs.), Change
+    """
+    payload = {
+        "Product":  commodity if commodity and commodity != "ALL" else "ALL",
+        "Location": location  if location  and location  != "ALL" else "ALL",
+        "Fromdate": from_date,
+        "Session":  session_val,
+        "Todate":   to_date,
+    }
+    raw = _post(_URL_ARCHIVE, body=json.dumps(payload))
+    return _parse_response(raw)
+def _post(url: str, body: str) -> dict:
+    """
+    POST to MCX backpage endpoint. Returns parsed JSON dict.
+    Retry strategy on 403:
+      - Attempt 1: retry after 3s with same session (cookies preserved)
+      - Attempt 2: rebuild session (fresh warmup) and retry after 5s
+    Never destroy cookies on first 403 — Akamai uses session state.
+    """
+    session, stype = get_session()
+    for attempt in range(3):
+        try:
+            if stype == "curl_cffi":
+                r = session.post(url, data=body, headers=_POST_HEADERS, timeout=25)
+            else:
+                session.headers.update(_POST_HEADERS)
+                r = session.post(url, data=body, timeout=25)
+            if r.status_code == 403:
+                if attempt == 0:
+                    # First 403 — same session, just wait longer
+                    time.sleep(3)
+                    continue
+                elif attempt == 1:
+                    # Second 403 — rebuild session with fresh warmup
+                    reset_session()
+                    time.sleep(5)
+                    session, stype = get_session()
+                    continue
+                else:
+                    raise RuntimeError(f"HTTP 403: {url} (Akamai WAF blocking — ensure curl_cffi is installed)")
+            if r.status_code != 200:
+                raise RuntimeError(f"HTTP {r.status_code}: {url}")
+            return r.json()
+        except RuntimeError:
+            raise
+        except Exception as e:
+            if attempt < 2:
+                time.sleep(2)
+                continue
+            raise
+    raise RuntimeError(f"Failed after 3 attempts: {url}")
+def _parse_response(raw: dict) -> pd.DataFrame:
+    """
+    Parse MCX backpage response.
+    MCX returns:
+      {"d": {"Summary": {"AsOn": ..., "Count": 28}, "Data": [{...}, ...]}}
+    or for archive:
+      {"d": {"Summary": {...}, "Data": [{...}, ...]}}
+    """
+    if "d" not in raw:
+        raise RuntimeError(f"Unexpected response format: {list(raw.keys())}")
+    inner = raw["d"]
+    # inner can be a dict with "Data" key, a list, or a JSON string
+    if isinstance(inner, str):
+        if not inner.strip():
+            return pd.DataFrame()
+        inner = json.loads(inner)
+    if isinstance(inner, dict):
+        data = inner.get("Data", inner.get("data", []))
+    elif isinstance(inner, list):
+        data = inner
+    else:
+        raise RuntimeError(f"Cannot parse .d of type {type(inner)}: {str(inner)[:100]}")
+    if not data:
+        return pd.DataFrame()
+    df = pd.DataFrame(data)
+    return _clean_df(df)
+def _clean_df(df: pd.DataFrame) -> pd.DataFrame:
+    """Standardise MCX response DataFrame."""
+    if df.empty:
+        return df
+    # Drop internal ASP.NET type field and junk columns
+    drop_cols = [c for c in df.columns if c.startswith("__")
+                 or c in ("ExtensionData", "EnSymbol", "Enlocation")]
+    df = df.drop(columns=drop_cols, errors="ignore")
+    # Rename MCX field names → readable column names
+    rename = {
+        "Symbol":          "Commodity",
+        "TodaysSpotPrice": "Spot Price (Rs.)",
+        "Change":          "Up/Down",
+    }
+    df = df.rename(columns={k: v for k, v in rename.items() if k in df.columns})
+    # Clean price — remove commas, convert to float
+    price_col = "Spot Price (Rs.)"
+    if price_col in df.columns:
+        df[price_col] = (
+            df[price_col].astype(str)
+            .str.replace(",", "", regex=False)
+            .str.strip()
+        )
+        df[price_col] = pd.to_numeric(df[price_col], errors="coerce")
+    # Parse .NET JSON Date: /Date(milliseconds)/ → datetime
+    if "Date" in df.columns:
+        import re as _re
+        def _parse_net_date(val):
+            m = _re.search(r'/Date\((\d+)\)/', str(val))
+            if m:
+                return pd.to_datetime(int(m.group(1)), unit="ms")
+            return pd.NaT
+        df["Date"] = df["Date"].apply(_parse_net_date)
+        df["Date"] = df["Date"].dt.strftime("%Y-%m-%d %H:%M:%S")  # ISO: "2026-05-22 12:33:11"
+    # Reorder columns sensibly
+    preferred_order = ["Commodity", "Unit", "Location", "Spot Price (Rs.)", "Up/Down", "Date"]
+    cols = [c for c in preferred_order if c in df.columns]
+    extra = [c for c in df.columns if c not in cols]
+    df = df[cols + extra]
+    return df.reset_index(drop=True)

mcxdata/mcx.py ADDED Viewed

@@ -0,0 +1,232 @@
+"""
+mcx-data public API.
+Usage:
+    from mcxdata import mcx
+    # Today's spot prices (all commodities)
+    df = mcx.get_spot_recent()
+    df = mcx.get_spot_recent(commodity="GOLD")
+    # Historical spot prices
+    df = mcx.get_spot_archive("2026-05-01", "2026-05-22")
+    df = mcx.get_spot_archive("2026-05-01", "2026-05-22", commodity="GOLD")
+    # Generic API (mirrors nse-data pattern)
+    df = mcx.get("spot", "market", "spot_recent")
+    df = mcx.get("spot", "market", "spot_archive",
+                 from_date="2026-05-01", to_date="2026-05-22", commodity="GOLD")
+    # Download to local file or S3
+    mcx.download("spot", "market", "spot_recent", output_dir="./data")
+    mcx.download("spot", "market", "spot_archive",
+                 from_date="2026-05-01", to_date="2026-05-22",
+                 s3_bucket="my-bucket", s3_prefix="raw/mcx/")
+    # List datasets / commodities
+    mcx.list_datasets()
+    mcx.list_commodities()
+"""
+import os
+import re
+from datetime import datetime
+from typing import Optional
+import pandas as pd
+from mcxdata.registry import list_datasets as _list_datasets
+from mcxdata.fetcher import fetch_recent, fetch_archive
+# ── Public API ────────────────────────────────────────────────────────────────
+def list_datasets(category: str = None) -> pd.DataFrame:
+    """List all available MCX datasets."""
+    rows = _list_datasets(category)
+    return pd.DataFrame(rows)
+def list_commodities() -> list:
+    """
+    Return the 28 MCX commodity names from the spot market data directly.
+    Uses get_spot_recent() so it always reflects live MCX data.
+    """
+    df = get_spot_recent()
+    return sorted(df["Commodity"].unique().tolist())
+# ── Spot recent ───────────────────────────────────────────────────────────────
+def get_spot_recent(commodity: str = "ALL", location: str = "ALL") -> pd.DataFrame:
+    """
+    Get today's spot prices for all (or one) MCX commodity.
+    Args:
+        commodity: "ALL" or name e.g. "GOLD", "SILVER", "CRUDEOIL"
+        location:  "ALL" or location name
+    Returns:
+        DataFrame — Commodity, Unit, Location, Spot Price (Rs.), Up/Down
+    Example:
+        df = mcx.get_spot_recent()
+        df = mcx.get_spot_recent(commodity="GOLD")
+    """
+    df = fetch_recent()
+    # Filter client-side if a specific commodity requested
+    if commodity and commodity.upper() != "ALL":
+        mask = df["Commodity"].str.upper() == commodity.upper()
+        df = df[mask].reset_index(drop=True)
+    if location and location.upper() != "ALL":
+        mask = df["Location"].str.upper() == location.upper()
+        df = df[mask].reset_index(drop=True)
+    return df
+# ── Spot archive ──────────────────────────────────────────────────────────────
+def get_spot_archive(
+    from_date: str,
+    to_date: str,
+    commodity: str = "ALL",
+    location: str = "ALL",
+) -> pd.DataFrame:
+    """
+    Get historical spot prices from MCX archives.
+    Args:
+        from_date: "YYYY-MM-DD" or "DD/MM/YYYY"  e.g. "2026-05-01"
+        to_date:   "YYYY-MM-DD" or "DD/MM/YYYY"  e.g. "2026-05-22"
+        commodity: "ALL" or name e.g. "GOLD", "SILVER"
+        location:  "ALL"
+    Returns:
+        DataFrame — Commodity, Unit, Location, Date, Spot Price (Rs.), Up/Down
+    Example:
+        df = mcx.get_spot_archive("2026-05-01", "2026-05-22")
+        df = mcx.get_spot_archive("2026-05-01", "2026-05-22", commodity="GOLD")
+    """
+    fd = _to_yyyymmdd(from_date)
+    td = _to_yyyymmdd(to_date)
+    return fetch_archive(fd, td, commodity=commodity, location=location)
+# ── Generic get() — mirrors nse-data API pattern ──────────────────────────────
+def get(
+    category: str,
+    subcategory: str,
+    dataset: str,
+    date: str = None,
+    *,
+    from_date: str = None,
+    to_date:   str = None,
+    commodity: str = "ALL",
+    location:  str = "ALL",
+    **kwargs,
+) -> pd.DataFrame:
+    """
+    Generic dataset getter — mirrors nse-data's nse.get() signature.
+    For recent:
+        df = mcx.get("spot", "market", "spot_recent")
+    For archive:
+        df = mcx.get("spot", "market", "spot_archive",
+                     from_date="2026-05-01", to_date="2026-05-22")
+    """
+    from mcxdata.registry import get_config
+    cfg = get_config(category, subcategory, dataset)
+    if cfg.date_type == "recent":
+        return get_spot_recent(commodity=commodity, location=location)
+    elif cfg.date_type == "range":
+        if not from_date or not to_date:
+            if date:
+                from_date = from_date or date
+                to_date   = to_date   or date
+            else:
+                raise ValueError(
+                    f"'{dataset}' requires from_date and to_date.\n"
+                    "Example: mcx.get('spot','market','spot_archive', "
+                    "from_date='2026-05-01', to_date='2026-05-22')"
+                )
+        return get_spot_archive(from_date, to_date,
+                                commodity=commodity, location=location)
+    raise ValueError(f"Unsupported date_type '{cfg.date_type}' for '{dataset}'")
+# ── download() — save to file or S3 ──────────────────────────────────────────
+def download(
+    category: str,
+    subcategory: str,
+    dataset: str,
+    date: str = None,
+    *,
+    from_date: str = None,
+    to_date:   str = None,
+    commodity: str = "ALL",
+    output_dir: str = ".",
+    s3_bucket: Optional[str] = None,
+    s3_prefix: str = "mcx-data/",
+    **kwargs,
+) -> str:
+    """
+    Download MCX dataset and save to local file or S3.
+    Returns the saved path or S3 URI.
+    """
+    df = get(category, subcategory, dataset, date,
+             from_date=from_date, to_date=to_date,
+             commodity=commodity, **kwargs)
+    # Build filename
+    ts = (to_date or from_date or date or datetime.today().strftime("%Y-%m-%d"))
+    ts = re.sub(r"[^0-9]", "", ts)         # keep digits only → "20260522"
+    safe_comm = commodity.replace(" ", "_").upper()
+    fname = f"MCX_{dataset}_{safe_comm}_{ts}.csv"
+    if s3_bucket:
+        import boto3
+        key = f"{s3_prefix.rstrip('/')}/{fname}"
+        boto3.client("s3").put_object(
+            Bucket=s3_bucket, Key=key,
+            Body=df.to_csv(index=False).encode("utf-8"),
+            ContentType="text/csv",
+        )
+        uri = f"s3://{s3_bucket}/{key}"
+        print(f"✓ {dataset} → {uri}")
+        return uri
+    else:
+        os.makedirs(output_dir, exist_ok=True)
+        path = os.path.join(output_dir, fname)
+        df.to_csv(path, index=False)
+        print(f"✓ {dataset} → {path}")
+        return path
+# ── Internal helpers ──────────────────────────────────────────────────────────
+def _to_yyyymmdd(date_str: str) -> str:
+    """
+    Accept YYYY-MM-DD or DD/MM/YYYY → return YYYYMMDD (MCX format for archive).
+    """
+    s = date_str.strip()
+    if re.match(r'^\d{8}$', s):                    # already YYYYMMDD
+        return s
+    if re.match(r'^\d{4}-\d{2}-\d{2}$', s):        # YYYY-MM-DD
+        return datetime.strptime(s, "%Y-%m-%d").strftime("%Y%m%d")
+    if re.match(r'^\d{2}/\d{2}/\d{4}$', s):        # DD/MM/YYYY
+        return datetime.strptime(s, "%d/%m/%Y").strftime("%Y%m%d")
+    if re.match(r'^\d{2}-\d{2}-\d{4}$', s):        # DD-MM-YYYY
+        return datetime.strptime(s, "%d-%m-%Y").strftime("%Y%m%d")
+    raise ValueError(
+        f"Unrecognised date format: '{date_str}'. "
+        "Use YYYY-MM-DD, DD/MM/YYYY, or YYYYMMDD."
+    )

mcxdata/py.typed ADDED Viewed

File without changes

mcxdata/registry.py ADDED Viewed

@@ -0,0 +1,147 @@
+"""
+MCX Dataset Registry — single source of truth for all supported datasets.
+Each DatasetConfig describes how to fetch one dataset:
+  - page_url:       MCX page URL to load (for WebForms scraping)
+  - export_target:  __doPostBack target for Excel export (or None for HTML parse)
+  - commodity_id:   Select element ID for commodity filter
+  - location_id:    Select element ID for location filter
+  - date_field_id:  Input ID for date/from-date
+  - to_date_id:     Input ID for to-date (archive only)
+  - date_type:      "daily" | "monthly" | "range" | "static"
+  - df_supported:   True if data can be returned as DataFrame (parsed table)
+  - download_only:  True if only Excel export is supported
+Naming convention mirrors nse-data: REGISTRY[category][subcategory][dataset_key]
+"""
+from dataclasses import dataclass, field
+from typing import Optional, Literal
+@dataclass
+class DatasetConfig:
+    """Configuration for one MCX dataset."""
+    name: str
+    description: str
+    page_url: str                         # MCX page to load
+    table_id: Optional[str] = None        # HTML table ID to parse (None = first table)
+    export_target: Optional[str] = None   # __doPostBack target for Excel export
+    commodity_field: Optional[str] = None # Form field name for commodity filter
+    location_field: Optional[str] = None  # Form field name for location filter
+    date_field: Optional[str] = None      # Form field name for (from-)date
+    to_date_field: Optional[str] = None   # Form field name for to-date (range)
+    commodity_select_id: Optional[str] = None  # <select> element ID
+    location_select_id: Optional[str] = None   # <select> element ID
+    date_type: Literal["recent", "daily", "range", "monthly", "static"] = "recent"
+    date_format: str = "%d/%m/%Y"         # How MCX expects dates in POST
+    file_format: str = "html"             # "html" (parse table) or "excel"
+    df_supported: bool = True
+    download_only: bool = False
+    portal_only: bool = False
+    skip_rows: int = 0
+    frequency: str = "Daily"
+    notes: str = ""
+MCX_BASE = "https://www.mcxindia.com"
+# ─── REGISTRY ──────────────────────────────────────────────────────────────────
+REGISTRY: dict = {
+    # ══════════════════════════════════════════════════════════════════════════
+    # SPOT MARKET
+    # URL: https://www.mcxindia.com/market-data/spot-market-price
+    #
+    # "Recent" tab: shows today's spot prices (all commodities + locations)
+    #   — No filters, just load the page and parse the table
+    #
+    # "Archives" tab: filter by Commodity + date range → Excel export
+    #   — POST form with __doPostBack export target
+    # ══════════════════════════════════════════════════════════════════════════
+    "spot": {
+        "market": {
+            # Recent spot prices (today's data — no date param needed)
+            "spot_recent": DatasetConfig(
+                name="Spot Market Price — Recent",
+                description=(
+                    "Current day spot prices for all commodities and locations. "
+                    "Commodity, Unit, Location, Spot Price (Rs.), and Up/Down."
+                ),
+                page_url=f"{MCX_BASE}/market-data/spot-market-price",
+                table_id=None,  # parse first data table
+                export_target="ctl00$cph_InnerContainerRight$C004$lnkExpToExcel",
+                commodity_field="ctl00$cph_InnerContainerRight$C004$ddlCommodity",
+                location_field="ctl00$cph_InnerContainerRight$C004$ddlLocation",
+                date_type="recent",
+                file_format="html",
+                df_supported=True,
+                frequency="Daily (intraday)",
+                notes="Recent tab — no date filter. Exports all commodity spot prices as of market close.",
+            ),
+            # Archive spot prices (historical — date range + commodity filter)
+            "spot_archive": DatasetConfig(
+                name="Spot Market Price — Archive",
+                description=(
+                    "Historical spot prices with date range and optional commodity filter. "
+                    "Returns daily spot price series per commodity."
+                ),
+                page_url=f"{MCX_BASE}/market-data/spot-market-price",
+                export_target="ctl00$cph_InnerContainerRight$C004$lnkExpToExcelArchive",
+                commodity_field="ctl00$cph_InnerContainerRight$C004$ddlCommodityArchive",
+                date_field="ctl00$cph_InnerContainerRight$C004$txtFromDate",
+                to_date_field="ctl00$cph_InnerContainerRight$C004$txtToDate",
+                commodity_select_id="cph_InnerContainerRight_C004_ddlCommodityArchive",
+                date_type="range",
+                date_format="%d/%m/%Y",
+                file_format="html",
+                df_supported=True,
+                frequency="Daily",
+                notes=(
+                    "Archives tab — requires from_date + to_date (DD/MM/YYYY). "
+                    "Commodity filter: use 'ALL' for all commodities or specific name e.g. 'GOLD'."
+                ),
+            ),
+        },
+    },
+}
+# ─── Helper functions ─────────────────────────────────────────────────────────
+def get_config(category: str, subcategory: str, dataset: str) -> DatasetConfig:
+    """Look up a DatasetConfig by path. Raises ValueError if not found."""
+    try:
+        return REGISTRY[category.lower()][subcategory.lower()][dataset.lower()]
+    except KeyError:
+        available = list_datasets()
+        opts = [f"{r['category']}/{r['subcategory']}/{r['dataset']}" for r in available]
+        raise ValueError(
+            f"Unknown MCX dataset: '{category}/{subcategory}/{dataset}'.\n"
+            f"Available: {opts}"
+        )
+def list_datasets(category: str = None) -> list:
+    """Return list of dicts describing all registered datasets."""
+    results = []
+    for cat, subs in REGISTRY.items():
+        if category and cat != category.lower():
+            continue
+        for sub, datasets in subs.items():
+            for key, cfg in datasets.items():
+                results.append({
+                    "category":    cat,
+                    "subcategory": sub,
+                    "dataset":     key,
+                    "name":        cfg.name,
+                    "description": cfg.description,
+                    "frequency":   cfg.frequency,
+                    "date_type":   cfg.date_type,
+                    "df_supported": cfg.df_supported and not cfg.download_only,
+                    "format":      cfg.file_format,
+                    "notes":       cfg.notes,
+                })
+    return results

mcxdata/session.py ADDED Viewed

@@ -0,0 +1,96 @@
+"""
+Shared HTTP session for mcx-data.
+MCX India uses Akamai WAF which blocks plain Python requests.
+Solution: curl_cffi with Chrome TLS fingerprint impersonation (same approach that
+works for niftyindices.com TRI in nse-data).
+Priority:
+  1. curl_cffi  — Chrome TLS impersonation, bypasses Akamai
+  2. cloudscraper — partial Akamai bypass
+  3. requests    — fallback (may 403 on Akamai-blocked IPs)
+"""
+import time
+from typing import Optional, Tuple
+MCX_BASE = "https://www.mcxindia.com"
+MCX_SPOT_PAGE = f"{MCX_BASE}/market-data/spot-market-price"
+_HEADERS = {
+    "User-Agent": (
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
+        "AppleWebKit/537.36 (KHTML, like Gecko) "
+        "Chrome/124.0.0.0 Safari/537.36"
+    ),
+    "Accept-Language": "en-IN,en-US;q=0.9,en;q=0.8",
+}
+# Module-level session cache
+_SESSION = None
+_SESSION_TYPE: Optional[str] = None
+def get_session() -> Tuple[object, str]:
+    """Return (session, type_str). Builds session on first call."""
+    global _SESSION, _SESSION_TYPE
+    if _SESSION is None:
+        _SESSION, _SESSION_TYPE = _build_session()
+    return _SESSION, _SESSION_TYPE
+def reset_session() -> None:
+    """Force a fresh session (e.g. after 403)."""
+    global _SESSION, _SESSION_TYPE
+    _SESSION = None
+    _SESSION_TYPE = None
+def _build_session() -> Tuple[object, str]:
+    """Build the best available session and warm it up with the MCX page."""
+    # 1. curl_cffi — best for Akamai (exact Chrome TLS fingerprint)
+    try:
+        from curl_cffi.requests import Session as CurlSession
+        s = CurlSession(impersonate="chrome124")
+        s.headers.update(_HEADERS)
+        _warmup(s, "curl_cffi")
+        return s, "curl_cffi"
+    except ImportError as e:
+        print(f"curl_cffi not available: {e}")
+    except Exception as e:
+        print(f"curl_cffi failed to init: {e}")
+    # 2. cloudscraper — JS challenge solver
+    try:
+        import cloudscraper
+        s = cloudscraper.create_scraper(
+            browser={"browser": "chrome", "platform": "windows", "mobile": False}
+        )
+        s.headers.update(_HEADERS)
+        _warmup(s, "cloudscraper")
+        return s, "cloudscraper"
+    except ImportError:
+        pass
+    except Exception as e:
+        print(f"cloudscraper failed: {e}")
+    # 3. Plain requests fallback
+    print("WARNING: falling back to plain requests — MCX may return 403 (Akamai WAF)")
+    import requests
+    s = requests.Session()
+    s.headers.update(_HEADERS)
+    _warmup(s, "requests")
+    return s, "requests"
+def _warmup(session, stype: str) -> None:
+    """
+    GET the MCX spot page to acquire session cookies.
+    Also call the recent endpoint once to fully warm up the session state.
+    """
+    try:
+        session.get(MCX_SPOT_PAGE, timeout=15)
+        time.sleep(1.5)  # Akamai needs time to recognise the session
+    except Exception:
+        pass