PyPI - finfetch - Versions diffs - 0.1.0__py3-none-any.whl - Mend

finfetch 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

finfetch/__init__.py +25 -0
finfetch/core.py +622 -0
finfetch/exceptions.py +57 -0
finfetch/models.py +315 -0
finfetch/parser.py +212 -0
finfetch/utils.py +128 -0
finfetch-0.1.0.dist-info/METADATA +188 -0
finfetch-0.1.0.dist-info/RECORD +11 -0
finfetch-0.1.0.dist-info/WHEEL +5 -0
finfetch-0.1.0.dist-info/licenses/LICENSE +21 -0
finfetch-0.1.0.dist-info/top_level.txt +1 -0

finfetch/__init__.py ADDED Viewed

@@ -0,0 +1,25 @@
+"""finfetch — Free financial data for Indian stocks. No API key needed."""
+from .core import Ticker, Tickers, get_price
+from .models import search
+from .exceptions import (
+    DataUnavailableError,
+    FinFetchError,
+    RateLimitError,
+    ScrapingError,
+    TickerNotFoundError,
+)
+__version__ = "0.1.0"
+__all__ = [
+    "Ticker",
+    "Tickers",
+    "get_price",
+    "search",
+    # exceptions
+    "FinFetchError",
+    "TickerNotFoundError",
+    "DataUnavailableError",
+    "RateLimitError",
+    "ScrapingError",
+]

finfetch/core.py ADDED Viewed

@@ -0,0 +1,622 @@
+"""Core module: Ticker / Tickers classes and data-source implementations."""
+from __future__ import annotations
+import logging
+import os
+from abc import ABC, abstractmethod
+import pandas as pd
+import requests
+from bs4 import BeautifulSoup
+from .exceptions import DataUnavailableError, RateLimitError, ScrapingError
+from .models import TickerInfo, canonical_to_snake, resolve_ticker
+from .parser import (
+    parse_html_table,
+    parse_screener_top_ratios,
+    standardize_dataframe,
+)
+from .utils import (
+    DEFAULT_HEADERS,
+    TTLCache,
+    _rate_limiter,
+    retry_with_backoff,
+)
+logger = logging.getLogger("finfetch")
+# ======================================================================
+# Internal data sources
+# ======================================================================
+class _BaseSource(ABC):
+    """Abstract base for all internal data sources."""
+    name: str = "base"
+    SUPPORTED_SECTIONS: list[str] = []
+    @abstractmethod
+    def fetch(self, ticker: str, section: str, **kw) -> pd.DataFrame | None: ...
+    def supports(self, section: str) -> bool:
+        return section in self.SUPPORTED_SECTIONS
+# ----------------------------------------------------------------------
+# Screener.in
+# ----------------------------------------------------------------------
+class _ScreenerSource(_BaseSource):
+    name = "screener"
+    BASE_URL = "https://www.screener.in"
+    SUPPORTED_SECTIONS = [
+        "profit-loss", "quarters", "balance-sheet",
+        "cash-flow", "ratios", "shareholding",
+    ]
+    def __init__(self) -> None:
+        self._session = requests.Session()
+        self._session.headers.update(
+            {**DEFAULT_HEADERS, "Referer": f"{self.BASE_URL}/"}
+        )
+        self._logged_in = False
+        email = os.getenv("SCREENER_EMAIL", "")
+        password = os.getenv("SCREENER_PASSWORD", "")
+        if email and password:
+            self._login(email, password)
+    def _login(self, email: str, password: str) -> None:
+        _rate_limiter.wait("screener.in")
+        page = self._session.get(f"{self.BASE_URL}/login/", timeout=15)
+        page.raise_for_status()
+        soup = BeautifulSoup(page.text, "lxml")
+        csrf = soup.find("input", {"name": "csrfmiddlewaretoken"})
+        if not csrf:
+            return
+        _rate_limiter.wait("screener.in")
+        resp = self._session.post(
+            f"{self.BASE_URL}/login/",
+            data={
+                "username": email,
+                "password": password,
+                "csrfmiddlewaretoken": csrf["value"],
+            },
+            headers={"Referer": f"{self.BASE_URL}/login/"},
+            allow_redirects=True,
+            timeout=15,
+        )
+        resp.raise_for_status()
+        self._logged_in = "login" not in resp.url.lower()
+    def fetch(self, ticker: str, section: str, **kw) -> pd.DataFrame | None:
+        consolidated = kw.get("consolidated", True)
+        url_type = "consolidated" if consolidated else "standalone"
+        url = f"{self.BASE_URL}/company/{ticker}/{url_type}/"
+        def _do() -> requests.Response:
+            _rate_limiter.wait("screener.in")
+            r = self._session.get(url, timeout=15)
+            if r.status_code == 429:
+                raise RateLimitError("screener.in")
+            r.raise_for_status()
+            return r
+        resp = retry_with_backoff(_do)
+        soup = BeautifulSoup(resp.text, "lxml")
+        el = soup.find("section", id=section) or soup.find("div", id=section)
+        if el is None:
+            return None
+        table = el.find("table")
+        if table is None:
+            return None
+        return parse_html_table(table)
+# ----------------------------------------------------------------------
+# Trendlyne
+# ----------------------------------------------------------------------
+class _TrendlyneSource(_BaseSource):
+    name = "trendlyne"
+    BASE_URL = "https://trendlyne.com"
+    SUPPORTED_SECTIONS = [
+        "profit-loss", "quarters", "balance-sheet", "cash-flow", "ratios",
+    ]
+    _SECTION_URL = {
+        "profit-loss": "income-statement",
+        "quarters": "quarterly-results",
+        "balance-sheet": "balance-sheet",
+        "cash-flow": "cash-flow",
+        "ratios": "financial-ratios",
+    }
+    def __init__(self) -> None:
+        self._session = requests.Session()
+        self._session.headers.update(
+            {**DEFAULT_HEADERS, "Referer": f"{self.BASE_URL}/"}
+        )
+    def fetch(self, ticker: str, section: str, **kw) -> pd.DataFrame | None:
+        seg = self._SECTION_URL.get(section)
+        if seg is None:
+            return None
+        url = f"{self.BASE_URL}/fundamentals/{seg}/{ticker}/"
+        def _do() -> requests.Response:
+            _rate_limiter.wait("trendlyne.com")
+            r = self._session.get(url, timeout=15)
+            if r.status_code == 429:
+                raise RateLimitError("trendlyne.com")
+            r.raise_for_status()
+            return r
+        try:
+            resp = retry_with_backoff(_do)
+        except Exception:
+            return None
+        soup = BeautifulSoup(resp.text, "lxml")
+        table = soup.find("table", class_="financial-table") or soup.find("table")
+        if table is None:
+            return None
+        return parse_html_table(table)
+# ----------------------------------------------------------------------
+# MoneyControl
+# ----------------------------------------------------------------------
+class _MoneyControlSource(_BaseSource):
+    name = "moneycontrol"
+    BASE_URL = "https://www.moneycontrol.com"
+    SEARCH_URL = (
+        "https://www.moneycontrol.com/mccode/common/autosuggestion_solr.php"
+    )
+    SUPPORTED_SECTIONS = [
+        "profit-loss", "quarters", "balance-sheet", "cash-flow", "ratios",
+    ]
+    _SECTION_URL = {
+        "profit-loss": "profit-lossVI",
+        "quarters": "quarterly-resultsVI",
+        "balance-sheet": "balance-sheetVI",
+        "cash-flow": "cash-flowVI",
+        "ratios": "ratiosVI",
+    }
+    def __init__(self) -> None:
+        self._session = requests.Session()
+        self._session.headers.update(
+            {**DEFAULT_HEADERS, "Referer": f"{self.BASE_URL}/"}
+        )
+    def _search(self, ticker: str) -> dict | None:
+        _rate_limiter.wait("moneycontrol.com")
+        try:
+            resp = self._session.get(
+                self.SEARCH_URL,
+                params={"classic": "true", "query": ticker, "type": "1", "format": "json"},
+                timeout=10,
+            )
+            resp.raise_for_status()
+            for line in resp.text.strip().split("\n"):
+                parts = line.split("|")
+                if len(parts) >= 3:
+                    return {"name": parts[0].strip(), "id": parts[1].strip(), "url_path": parts[2].strip()}
+        except Exception:
+            pass
+        return None
+    def fetch(self, ticker: str, section: str, **kw) -> pd.DataFrame | None:
+        seg = self._SECTION_URL.get(section)
+        if seg is None:
+            return None
+        company = self._search(ticker)
+        if company is None:
+            return None
+        consolidated = kw.get("consolidated", True)
+        cons = "consolidated" if consolidated else "standalone"
+        url = f"{self.BASE_URL}/financials/{company['url_path']}/{seg}/{company['id']}/{cons}"
+        def _do() -> requests.Response:
+            _rate_limiter.wait("moneycontrol.com")
+            r = self._session.get(url, timeout=15)
+            if r.status_code == 429:
+                raise RateLimitError("moneycontrol.com")
+            r.raise_for_status()
+            return r
+        try:
+            resp = retry_with_backoff(_do)
+        except Exception:
+            return None
+        soup = BeautifulSoup(resp.text, "lxml")
+        table = (
+            soup.find("table", class_="mctable1")
+            or soup.find("table", class_="table4")
+            or soup.find("table")
+        )
+        if table is None:
+            return None
+        return parse_html_table(table)
+# ----------------------------------------------------------------------
+# Yahoo Finance (via yfinance)
+# ----------------------------------------------------------------------
+class _YFinanceSource(_BaseSource):
+    name = "yfinance"
+    SUPPORTED_SECTIONS = [
+        "profit-loss", "quarters", "balance-sheet", "cash-flow",
+    ]
+    _ITEM_MAP = {
+        "Total Revenue": "Revenue",
+        "Cost Of Revenue": "COGS",
+        "Gross Profit": "Gross Profit",
+        "Operating Income": "Operating Profit",
+        "Operating Expense": "Operating Expenses",
+        "Ebitda": "EBITDA",
+        "EBITDA": "EBITDA",
+        "Interest Expense": "Interest",
+        "Net Income": "Net Profit",
+        "Basic EPS": "EPS",
+        "Diluted EPS": "Diluted EPS",
+        "Tax Provision": "Tax",
+        "Pretax Income": "Profit Before Tax",
+        "Total Assets": "Total Assets",
+        "Total Liabilities Net Minority Interest": "Total Liabilities",
+        "Stockholders Equity": "Shareholders Equity",
+        "Total Debt": "Total Debt",
+        "Current Assets": "Current Assets",
+        "Current Liabilities": "Current Liabilities",
+        "Cash And Cash Equivalents": "Cash",
+        "Net PPE": "Fixed Assets",
+        "Inventory": "Inventory",
+        "Accounts Receivable": "Receivables",
+        "Operating Cash Flow": "Operating Cash Flow",
+        "Investing Cash Flow": "Investing Cash Flow",
+        "Financing Cash Flow": "Financing Cash Flow",
+        "Free Cash Flow": "Free Cash Flow",
+        "Capital Expenditure": "CapEx",
+    }
+    @staticmethod
+    def _yf_symbol(ticker: str) -> str:
+        if not ticker.endswith((".NS", ".BO")):
+            return f"{ticker}.NS"
+        return ticker
+    def fetch(self, ticker: str, section: str, **kw) -> pd.DataFrame | None:
+        import yfinance as yf
+        sym = self._yf_symbol(ticker)
+        t = yf.Ticker(sym)
+        try:
+            raw: pd.DataFrame | None = {
+                "profit-loss": t.financials,
+                "quarters": t.quarterly_financials,
+                "balance-sheet": t.balance_sheet,
+                "cash-flow": t.cashflow,
+            }.get(section)
+        except Exception:
+            return None
+        if raw is None or raw.empty:
+            return None
+        # Rename index items
+        raw.index = [self._ITEM_MAP.get(i, i) for i in raw.index]
+        # Column dates → period strings
+        raw.columns = [
+            c.strftime("%b %Y") if hasattr(c, "strftime") else str(c)
+            for c in raw.columns
+        ]
+        # Yahoo gives values in INR; convert to Crores
+        numeric = raw.apply(pd.to_numeric, errors="coerce") / 1e7
+        # Oldest first
+        return numeric[numeric.columns[::-1]]
+# ======================================================================
+# Source ordering (lazy-initialised singletons)
+# ======================================================================
+_sources: list[_BaseSource] | None = None
+def _get_sources() -> list[_BaseSource]:
+    global _sources
+    if _sources is not None:
+        return _sources
+    _sources = []
+    for cls in (_ScreenerSource, _TrendlyneSource, _MoneyControlSource, _YFinanceSource):
+        try:
+            _sources.append(cls())
+        except Exception as exc:
+            logger.debug("Skipping source %s: %s", cls.name, exc)
+    return _sources
+# ======================================================================
+# Ticker
+# ======================================================================
+# Normalise shorthand period strings for history()
+_PERIOD_MAP = {"1m": "1mo", "3m": "3mo", "6m": "6mo"}
+class Ticker:
+    """Primary interface — wraps a single NSE-listed stock.
+    All heavy data is **lazily loaded** on first access and cached for
+    5 minutes by default.
+    Parameters
+    ----------
+    symbol : str
+        NSE ticker symbol or company name (fuzzy-matched).
+    consolidated : bool
+        Use consolidated financial statements (default ``True``).
+    cache_ttl : int
+        Cache lifetime in seconds (default 300 = 5 min).
+    """
+    def __init__(
+        self,
+        symbol: str,
+        *,
+        consolidated: bool = True,
+        cache_ttl: int = 300,
+    ) -> None:
+        self._symbol = resolve_ticker(symbol)
+        self._consolidated = consolidated
+        self._cache = TTLCache(ttl=cache_ttl)
+    def __repr__(self) -> str:
+        return f"Ticker('{self._symbol}')"
+    # ----- identity -----
+    @property
+    def symbol(self) -> str:
+        """Resolved NSE symbol."""
+        return self._symbol
+    # ----- price / history  (via yfinance) -----
+    @property
+    def price(self) -> float:
+        """Current market price (INR)."""
+        return self._cache.get_or_fetch("price", self._fetch_price)
+    def history(
+        self,
+        period: str = "1mo",
+        interval: str = "1d",
+    ) -> pd.DataFrame:
+        """OHLCV price history.
+        Parameters
+        ----------
+        period : str
+            Look-back window, e.g. ``"1mo"``, ``"3m"``, ``"1y"``, ``"max"``.
+        interval : str
+            Bar size, e.g. ``"1d"``, ``"1wk"``, ``"1mo"``.
+        Returns
+        -------
+        pandas.DataFrame
+            Columns: ``open``, ``high``, ``low``, ``close``, ``volume``.
+            Index: :class:`~pandas.DatetimeIndex`.
+        """
+        period = _PERIOD_MAP.get(period, period)
+        key = f"history_{period}_{interval}"
+        return self._cache.get_or_fetch(
+            key, lambda: self._fetch_history(period, interval)
+        )
+    # ----- financial statements -----
+    @property
+    def financials(self) -> pd.DataFrame:
+        """Annual income statement."""
+        return self._cache.get_or_fetch("financials", lambda: self._fetch("profit-loss"))
+    @property
+    def quarterly_financials(self) -> pd.DataFrame:
+        """Quarterly income statement."""
+        return self._cache.get_or_fetch("quarters", lambda: self._fetch("quarters"))
+    @property
+    def balance_sheet(self) -> pd.DataFrame:
+        """Annual balance sheet."""
+        return self._cache.get_or_fetch("balance_sheet", lambda: self._fetch("balance-sheet"))
+    @property
+    def cashflow(self) -> pd.DataFrame:
+        """Annual cash-flow statement."""
+        return self._cache.get_or_fetch("cashflow", lambda: self._fetch("cash-flow"))
+    @property
+    def ratios(self) -> pd.DataFrame:
+        """Historical financial ratios."""
+        return self._cache.get_or_fetch("ratios", lambda: self._fetch("ratios"))
+    @property
+    def shareholding(self) -> pd.DataFrame:
+        """Shareholding pattern."""
+        return self._cache.get_or_fetch("shareholding", lambda: self._fetch("shareholding"))
+    # ----- info dict -----
+    @property
+    def info(self) -> dict:
+        """Key company information and ratios as a plain dict."""
+        return self._cache.get_or_fetch("info", self._fetch_info)
+    # ----- cache control -----
+    def clear_cache(self) -> None:
+        """Manually expire all cached data for this ticker."""
+        self._cache.clear()
+    # ==================================================================
+    # Private helpers
+    # ==================================================================
+    def _fetch(self, section: str) -> pd.DataFrame:
+        """Try every source in priority order and return a standardised DataFrame."""
+        errors: list[tuple[str, str]] = []
+        for src in _get_sources():
+            if not src.supports(section):
+                continue
+            try:
+                raw = src.fetch(self._symbol, section, consolidated=self._consolidated)
+                if raw is not None and not raw.empty:
+                    return standardize_dataframe(raw)
+            except Exception as exc:
+                errors.append((src.name, str(exc)))
+        raise DataUnavailableError(self._symbol, section, errors)
+    def _fetch_price(self) -> float:
+        import yfinance as yf
+        t = yf.Ticker(_YFinanceSource._yf_symbol(self._symbol))
+        try:
+            info = t.info
+        except Exception as exc:
+            raise DataUnavailableError(self._symbol, "price", [("yfinance", str(exc))]) from exc
+        price = info.get("currentPrice") or info.get("regularMarketPrice")
+        if price is None:
+            raise DataUnavailableError(self._symbol, "price", [("yfinance", "no price field")])
+        return float(price)
+    def _fetch_history(self, period: str, interval: str) -> pd.DataFrame:
+        import yfinance as yf
+        t = yf.Ticker(_YFinanceSource._yf_symbol(self._symbol))
+        df = t.history(period=period, interval=interval)
+        if df.empty:
+            raise DataUnavailableError(self._symbol, "history", [("yfinance", "empty result")])
+        df.columns = [c.lower().replace(" ", "_") for c in df.columns]
+        return df
+    def _fetch_info(self) -> dict:
+        import yfinance as yf
+        info: dict = {}
+        # --- yfinance info ---
+        try:
+            t = yf.Ticker(_YFinanceSource._yf_symbol(self._symbol))
+            yf_info = t.info or {}
+        except Exception:
+            yf_info = {}
+        if yf_info:
+            info["symbol"] = self._symbol
+            info["name"] = yf_info.get("longName") or yf_info.get("shortName", "")
+            info["sector"] = yf_info.get("sector", "")
+            info["industry"] = yf_info.get("industry", "")
+            mc = yf_info.get("marketCap")
+            if mc is not None:
+                info["market_cap_cr"] = round(mc / 1e7, 2)
+            for src_key, dst_key in (
+                ("currentPrice", "current_price"),
+                ("trailingPE", "pe_ratio"),
+                ("forwardPE", "forward_pe"),
+                ("priceToBook", "pb_ratio"),
+                ("enterpriseToEbitda", "ev_ebitda"),
+                ("returnOnEquity", "roe"),
+                ("debtToEquity", "debt_to_equity"),
+                ("currentRatio", "current_ratio"),
+                ("dividendYield", "dividend_yield"),
+                ("bookValue", "book_value"),
+                ("fiftyTwoWeekHigh", "high_52w"),
+                ("fiftyTwoWeekLow", "low_52w"),
+            ):
+                v = yf_info.get(src_key)
+                if v is not None:
+                    info[dst_key] = round(float(v), 4) if isinstance(v, float) else v
+        # --- Screener top-ratios (best-effort merge) ---
+        try:
+            src = _ScreenerSource()
+            _rate_limiter.wait("screener.in")
+            resp = src._session.get(
+                f"{src.BASE_URL}/company/{self._symbol}/consolidated/",
+                timeout=15,
+            )
+            resp.raise_for_status()
+            soup = BeautifulSoup(resp.text, "lxml")
+            ratios = parse_screener_top_ratios(soup)
+            for key, val in ratios.items():
+                snake = canonical_to_snake(key)
+                if snake not in info:
+                    info[snake] = val
+        except Exception:
+            pass
+        if not info:
+            raise DataUnavailableError(self._symbol, "info")
+        return info
+# ======================================================================
+# Tickers  (multi-stock convenience)
+# ======================================================================
+class Tickers:
+    """Convenience wrapper for multiple tickers at once.
+    Parameters
+    ----------
+    symbols : str
+        Space-separated ticker symbols, e.g. ``"RELIANCE TCS INFY"``.
+    """
+    def __init__(self, symbols: str, **kw) -> None:
+        self.symbols = [s.strip() for s in symbols.split() if s.strip()]
+        self.tickers: dict[str, Ticker] = {
+            s: Ticker(s, **kw) for s in self.symbols
+        }
+    def __repr__(self) -> str:
+        return f"Tickers('{' '.join(self.symbols)}')"
+    def __getitem__(self, symbol: str) -> Ticker:
+        return self.tickers[symbol.upper()]
+    def __iter__(self):
+        return iter(self.tickers.values())
+    def history(self, period: str = "1mo", interval: str = "1d") -> dict[str, pd.DataFrame]:
+        """Fetch price history for every ticker."""
+        return {s: t.history(period=period, interval=interval) for s, t in self.tickers.items()}
+# ======================================================================
+# Module-level convenience functions
+# ======================================================================
+def get_price(symbol: str) -> float:
+    """Quick helper — return the current price for *symbol*."""
+    return Ticker(symbol).price

finfetch/exceptions.py ADDED Viewed

@@ -0,0 +1,57 @@
+"""Custom exceptions for finfetch."""
+from __future__ import annotations
+class FinFetchError(Exception):
+    """Base exception for all finfetch errors."""
+class TickerNotFoundError(FinFetchError):
+    """Raised when a ticker symbol cannot be resolved."""
+    def __init__(self, symbol: str, suggestions: list[str] | None = None):
+        self.symbol = symbol
+        self.suggestions = suggestions or []
+        msg = f"Ticker '{symbol}' not found"
+        if self.suggestions:
+            msg += f". Did you mean: {', '.join(self.suggestions)}?"
+        super().__init__(msg)
+class DataUnavailableError(FinFetchError):
+    """Raised when data cannot be fetched from any source."""
+    def __init__(
+        self,
+        symbol: str,
+        section: str,
+        errors: list[tuple[str, str]] | None = None,
+    ):
+        self.symbol = symbol
+        self.section = section
+        self.errors = errors or []
+        msg = f"Could not fetch '{section}' data for '{symbol}'"
+        if self.errors:
+            details = "; ".join(f"{src}: {err}" for src, err in self.errors)
+            msg += f" [{details}]"
+        super().__init__(msg)
+class RateLimitError(FinFetchError):
+    """Raised when a source returns HTTP 429."""
+    def __init__(self, source: str):
+        self.source = source
+        super().__init__(f"Rate limited by {source}. Try again later.")
+class ScrapingError(FinFetchError):
+    """Raised when HTML parsing fails unexpectedly."""
+    def __init__(self, source: str, detail: str = ""):
+        self.source = source
+        msg = f"Failed to parse data from {source}"
+        if detail:
+            msg += f": {detail}"
+        super().__init__(msg)