PyPI - agrobr - Versions diffs - 0.1.0__py3-none-any.whl - Mend

agrobr 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (58) hide show

agrobr/__init__.py +10 -0
agrobr/alerts/__init__.py +7 -0
agrobr/alerts/notifier.py +167 -0
agrobr/cache/__init__.py +31 -0
agrobr/cache/duckdb_store.py +433 -0
agrobr/cache/history.py +317 -0
agrobr/cache/migrations.py +82 -0
agrobr/cache/policies.py +240 -0
agrobr/cepea/__init__.py +7 -0
agrobr/cepea/api.py +360 -0
agrobr/cepea/client.py +273 -0
agrobr/cepea/parsers/__init__.py +37 -0
agrobr/cepea/parsers/base.py +35 -0
agrobr/cepea/parsers/consensus.py +300 -0
agrobr/cepea/parsers/detector.py +108 -0
agrobr/cepea/parsers/fingerprint.py +226 -0
agrobr/cepea/parsers/v1.py +305 -0
agrobr/cli.py +323 -0
agrobr/conab/__init__.py +21 -0
agrobr/conab/api.py +239 -0
agrobr/conab/client.py +219 -0
agrobr/conab/parsers/__init__.py +7 -0
agrobr/conab/parsers/v1.py +383 -0
agrobr/constants.py +205 -0
agrobr/exceptions.py +104 -0
agrobr/health/__init__.py +23 -0
agrobr/health/checker.py +202 -0
agrobr/health/reporter.py +314 -0
agrobr/http/__init__.py +9 -0
agrobr/http/browser.py +214 -0
agrobr/http/rate_limiter.py +69 -0
agrobr/http/retry.py +93 -0
agrobr/http/user_agents.py +67 -0
agrobr/ibge/__init__.py +19 -0
agrobr/ibge/api.py +273 -0
agrobr/ibge/client.py +256 -0
agrobr/models.py +85 -0
agrobr/normalize/__init__.py +64 -0
agrobr/normalize/dates.py +303 -0
agrobr/normalize/encoding.py +102 -0
agrobr/normalize/regions.py +308 -0
agrobr/normalize/units.py +278 -0
agrobr/noticias_agricolas/__init__.py +6 -0
agrobr/noticias_agricolas/client.py +222 -0
agrobr/noticias_agricolas/parser.py +187 -0
agrobr/sync.py +147 -0
agrobr/telemetry/__init__.py +17 -0
agrobr/telemetry/collector.py +153 -0
agrobr/utils/__init__.py +5 -0
agrobr/utils/logging.py +59 -0
agrobr/validators/__init__.py +35 -0
agrobr/validators/sanity.py +286 -0
agrobr/validators/structural.py +313 -0
agrobr-0.1.0.dist-info/METADATA +243 -0
agrobr-0.1.0.dist-info/RECORD +58 -0
agrobr-0.1.0.dist-info/WHEEL +4 -0
agrobr-0.1.0.dist-info/entry_points.txt +2 -0
agrobr-0.1.0.dist-info/licenses/LICENSE +21 -0

agrobr/cepea/parsers/v1.py ADDED Viewed

@@ -0,0 +1,305 @@
+"""Parser v1 para indicadores CEPEA - Layout 2024."""
+from __future__ import annotations
+import re
+from datetime import date, datetime
+from decimal import Decimal, InvalidOperation
+from typing import Any
+import structlog
+from bs4 import BeautifulSoup
+from agrobr.constants import Fonte
+from agrobr.exceptions import ParseError
+from agrobr.models import Indicador
+from .base import BaseParser
+from .fingerprint import extract_fingerprint
+logger = structlog.get_logger()
+class CepeaParserV1(BaseParser):
+    """Parser para layout CEPEA 2024."""
+    version = 1
+    source = "cepea"
+    valid_from = date(2024, 1, 1)
+    valid_until = None
+    def can_parse(self, html: str) -> tuple[bool, float]:
+        """Verifica se este parser consegue processar o HTML."""
+        soup = BeautifulSoup(html, "lxml")
+        confidence = 0.0
+        checks_passed = 0
+        total_checks = 5
+        tables = soup.find_all("table")
+        if tables:
+            checks_passed += 1
+        indicador_table = soup.find("table", id=re.compile(r"indicador|preco|cotacao", re.I))
+        if not indicador_table:
+            indicador_table = soup.find(
+                "table", class_=re.compile(r"indicador|preco|cotacao", re.I)
+            )
+        if indicador_table:
+            checks_passed += 1
+        headers = soup.find_all("th")
+        header_texts = [th.get_text(strip=True).lower() for th in headers]
+        date_keywords = ["data", "dia", "date"]
+        value_keywords = ["valor", "preço", "preco", "price", "r$"]
+        if any(kw in " ".join(header_texts) for kw in date_keywords):
+            checks_passed += 1
+        if any(kw in " ".join(header_texts) for kw in value_keywords):
+            checks_passed += 1
+        cepea_indicators = soup.find_all(string=re.compile(r"cepea|esalq|indicador", re.I))
+        if cepea_indicators:
+            checks_passed += 1
+        confidence = checks_passed / total_checks
+        can_parse = confidence >= 0.4
+        logger.debug(
+            "can_parse_check",
+            parser_version=self.version,
+            confidence=confidence,
+            checks_passed=checks_passed,
+            total_checks=total_checks,
+        )
+        return can_parse, confidence
+    def parse(self, html: str, produto: str) -> list[Indicador]:
+        """Parseia HTML e retorna lista de indicadores."""
+        soup = BeautifulSoup(html, "lxml")
+        indicadores: list[Indicador] = []
+        tables = soup.find_all("table")
+        if not tables:
+            raise ParseError(
+                source=self.source,
+                parser_version=self.version,
+                reason="No tables found in HTML",
+                html_snippet=html[:500],
+            )
+        data_table = self._find_data_table(soup)
+        if not data_table:
+            raise ParseError(
+                source=self.source,
+                parser_version=self.version,
+                reason="Could not identify data table",
+                html_snippet=html[:500],
+            )
+        headers = self._extract_headers(data_table)
+        rows = data_table.find_all("tr")[1:]
+        for row in rows:
+            cells = row.find_all(["td", "th"])
+            if len(cells) < 2:
+                continue
+            try:
+                indicador = self._parse_row(cells, headers, produto)
+                if indicador:
+                    indicadores.append(indicador)
+            except (ValueError, InvalidOperation) as e:
+                logger.debug(
+                    "row_parse_failed",
+                    error=str(e),
+                    cells=[c.get_text(strip=True) for c in cells],
+                )
+                continue
+        if not indicadores:
+            raise ParseError(
+                source=self.source,
+                parser_version=self.version,
+                reason="No valid indicators extracted",
+                html_snippet=html[:500],
+            )
+        logger.info(
+            "parse_success",
+            source=self.source,
+            parser_version=self.version,
+            records_count=len(indicadores),
+        )
+        return indicadores
+    def extract_fingerprint(self, html: str) -> dict[str, Any]:
+        """Extrai assinatura estrutural do HTML."""
+        fp = extract_fingerprint(html, Fonte.CEPEA, "internal")
+        return fp.model_dump()
+    def _find_data_table(self, soup: BeautifulSoup) -> Any | None:
+        """Encontra a tabela com dados de indicadores."""
+        table = soup.find("table", id=re.compile(r"indicador|preco|cotacao|dados", re.I))
+        if table:
+            return table
+        table = soup.find("table", class_=re.compile(r"indicador|preco|cotacao|dados|table", re.I))
+        if table:
+            return table
+        tables = soup.find_all("table")
+        for table in tables:
+            headers = table.find_all("th")
+            header_text = " ".join(th.get_text(strip=True).lower() for th in headers)
+            if "data" in header_text and ("valor" in header_text or "r$" in header_text):
+                return table
+        if tables:
+            largest_table = max(tables, key=lambda t: len(t.find_all("tr")))
+            if len(largest_table.find_all("tr")) >= 3:
+                return largest_table
+        return None
+    def _extract_headers(self, table: Any) -> list[str]:
+        """Extrai headers da tabela."""
+        headers: list[str] = []
+        header_row = table.find("tr")
+        if header_row:
+            for cell in header_row.find_all(["th", "td"]):
+                text = cell.get_text(strip=True).lower()
+                text = re.sub(r"\s+", " ", text)
+                headers.append(text)
+        return headers
+    def _parse_row(self, cells: list[Any], headers: list[str], produto: str) -> Indicador | None:
+        """Parseia uma linha da tabela."""
+        cell_texts = [c.get_text(strip=True) for c in cells]
+        data_value = None
+        valor_value = None
+        variacao_value = None
+        for _i, (header, cell_text) in enumerate(zip(headers, cell_texts)):
+            header_lower = header.lower()
+            if any(kw in header_lower for kw in ["data", "dia", "date"]):
+                data_value = self._parse_date(cell_text)
+            elif any(kw in header_lower for kw in ["valor", "preço", "preco", "r$", "price"]):
+                valor_value = self._parse_decimal(cell_text)
+            elif "var" in header_lower or "%" in header_lower:
+                variacao_value = cell_text
+        if not data_value and cell_texts:
+            data_value = self._parse_date(cell_texts[0])
+        if not valor_value and len(cell_texts) > 1:
+            for text in cell_texts[1:]:
+                parsed = self._parse_decimal(text)
+                if parsed and parsed > 0:
+                    valor_value = parsed
+                    break
+        if not data_value or not valor_value:
+            return None
+        unidade = self._detect_unidade(produto, headers)
+        return Indicador(
+            fonte=Fonte.CEPEA,
+            produto=produto,
+            praca=None,
+            data=data_value,
+            valor=valor_value,
+            unidade=unidade,
+            metodologia="indicador_esalq",
+            revisao=0,
+            meta={"variacao": variacao_value} if variacao_value else {},
+            parser_version=self.version,
+        )
+    def _parse_date(self, text: str) -> date | None:
+        """Parseia data de diferentes formatos."""
+        text = text.strip()
+        patterns = [
+            (r"(\d{2})/(\d{2})/(\d{4})", "%d/%m/%Y"),
+            (r"(\d{2})-(\d{2})-(\d{4})", "%d-%m-%Y"),
+            (r"(\d{4})-(\d{2})-(\d{2})", "%Y-%m-%d"),
+            (r"(\d{2})/(\d{2})/(\d{2})", "%d/%m/%y"),
+        ]
+        for pattern, date_format in patterns:
+            match = re.search(pattern, text)
+            if match:
+                try:
+                    return datetime.strptime(match.group(), date_format).date()
+                except ValueError:
+                    continue
+        return None
+    def _parse_decimal(self, text: str) -> Decimal | None:
+        """Parseia valor decimal."""
+        text = text.strip()
+        text = re.sub(r"[R$\s]", "", text)
+        if "," in text and "." in text:
+            text = text.replace(".", "").replace(",", ".")
+        elif "," in text:
+            text = text.replace(",", ".")
+        text = re.sub(r"[^\d.\-]", "", text)
+        if not text or text == "." or text == "-":
+            return None
+        try:
+            value = Decimal(text)
+            return value if value > 0 else None
+        except InvalidOperation:
+            return None
+    def _detect_unidade(self, produto: str, headers: list[str]) -> str:
+        """Detecta unidade baseado no produto e headers."""
+        produto_lower = produto.lower()
+        unidades_produto = {
+            "soja": "BRL/sc60kg",
+            "milho": "BRL/sc60kg",
+            "cafe": "BRL/sc60kg",
+            "trigo": "BRL/sc60kg",
+            "arroz": "BRL/sc50kg",
+            "boi": "BRL/@",
+            "boi_gordo": "BRL/@",
+            "boi-gordo": "BRL/@",
+            "algodao": "BRL/@",
+            "frango": "BRL/kg",
+            "suino": "BRL/kg",
+            "acucar": "BRL/sc50kg",
+            "etanol": "BRL/L",
+        }
+        for key, unidade in unidades_produto.items():
+            if key in produto_lower:
+                return unidade
+        header_text = " ".join(headers).lower()
+        if "sc" in header_text or "saca" in header_text:
+            if "50" in header_text:
+                return "BRL/sc50kg"
+            return "BRL/sc60kg"
+        if "@" in header_text or "arroba" in header_text:
+            return "BRL/@"
+        if "kg" in header_text:
+            return "BRL/kg"
+        if "litro" in header_text or "/l" in header_text:
+            return "BRL/L"
+        return "BRL/sc60kg"

agrobr/cli.py ADDED Viewed

@@ -0,0 +1,323 @@
+"""CLI do agrobr usando Typer."""
+from __future__ import annotations
+import json
+from typing import Any
+import typer
+from agrobr import __version__, constants
+app = typer.Typer(
+    name="agrobr",
+    help="Dados agricolas brasileiros em uma linha de codigo",
+    add_completion=False,
+)
+def version_callback(value: bool) -> None:
+    if value:
+        typer.echo(f"agrobr version {__version__}")
+        raise typer.Exit()
+@app.callback()  # type: ignore[misc]
+def main(
+    _version: bool = typer.Option(
+        None,
+        "--version",
+        "-v",
+        help="Mostra a versao e sai",
+        callback=version_callback,
+        is_eager=True,
+    ),
+) -> None:
+    """agrobr - Dados agricolas brasileiros."""
+    pass
+cepea_app = typer.Typer(help="Indicadores CEPEA")
+app.add_typer(cepea_app, name="cepea")
+@cepea_app.command("indicador")  # type: ignore[misc]
+def cepea_indicador(
+    produto: str = typer.Argument(..., help="Produto (soja, milho, cafe, boi, etc)"),
+    _inicio: str | None = typer.Option(None, "--inicio", "-i", help="Data inicio (YYYY-MM-DD)"),
+    _fim: str | None = typer.Option(None, "--fim", "-f", help="Data fim (YYYY-MM-DD)"),
+    _ultimo: bool = typer.Option(False, "--ultimo", "-u", help="Apenas ultimo valor"),
+    _formato: str = typer.Option("table", "--formato", "-o", help="Formato: table, csv, json"),
+) -> None:
+    """Consulta indicador CEPEA."""
+    typer.echo(f"Consultando {produto}...")
+    typer.echo("Funcionalidade em desenvolvimento")
+@app.command("health")  # type: ignore[misc]
+def health(
+    _all_sources: bool = typer.Option(False, "--all", "-a", help="Verifica todas as fontes"),
+    _source: str | None = typer.Option(None, "--source", "-s", help="Fonte especifica"),
+    output: str = typer.Option("text", "--output", "-o", help="Formato: text, json"),
+) -> None:
+    """Executa health checks."""
+    typer.echo("Health check em desenvolvimento")
+    if output == "json":
+        result = {"status": "ok", "checks": []}
+        typer.echo(json.dumps(result, indent=2))
+cache_app = typer.Typer(help="Gerenciamento de cache")
+app.add_typer(cache_app, name="cache")
+@cache_app.command("status")  # type: ignore[misc]
+def cache_status() -> None:
+    """Mostra status do cache."""
+    typer.echo("Status do cache em desenvolvimento")
+@cache_app.command("clear")  # type: ignore[misc]
+def cache_clear(
+    _source: str | None = typer.Option(None, "--source", "-s", help="Limpar apenas fonte"),
+    _older_than: str | None = typer.Option(None, "--older-than", help="Ex: 30d"),
+) -> None:
+    """Limpa o cache."""
+    typer.echo("Limpeza de cache em desenvolvimento")
+conab_app = typer.Typer(help="Dados CONAB - Safras e balanco")
+app.add_typer(conab_app, name="conab")
+@conab_app.command("safras")  # type: ignore[misc]
+def conab_safras(
+    produto: str = typer.Argument(..., help="Produto (soja, milho, arroz, feijao, etc)"),
+    safra: str | None = typer.Option(None, "--safra", "-s", help="Safra (ex: 2025/26)"),
+    uf: str | None = typer.Option(None, "--uf", "-u", help="Filtrar por UF"),
+    formato: str = typer.Option("table", "--formato", "-o", help="Formato: table, csv, json"),
+) -> None:
+    """Consulta dados de safra por produto."""
+    import asyncio
+    from agrobr import conab
+    typer.echo(f"Consultando safras de {produto}...")
+    try:
+        df = asyncio.run(conab.safras(produto=produto, safra=safra, uf=uf))
+        if df.empty:
+            typer.echo("Nenhum dado encontrado")
+            return
+        if formato == "json":
+            typer.echo(df.to_json(orient="records", indent=2))
+        elif formato == "csv":
+            typer.echo(df.to_csv(index=False))
+        else:
+            typer.echo(df.to_string(index=False))
+    except Exception as e:
+        typer.echo(f"Erro: {e}", err=True)
+        raise typer.Exit(1) from None
+@conab_app.command("balanco")  # type: ignore[misc]
+def conab_balanco(
+    produto: str | None = typer.Argument(None, help="Produto (opcional)"),
+    formato: str = typer.Option("table", "--formato", "-o", help="Formato: table, csv, json"),
+) -> None:
+    """Consulta balanco de oferta e demanda."""
+    import asyncio
+    from agrobr import conab
+    typer.echo("Consultando balanco oferta/demanda...")
+    try:
+        df = asyncio.run(conab.balanco(produto=produto))
+        if df.empty:
+            typer.echo("Nenhum dado encontrado")
+            return
+        if formato == "json":
+            typer.echo(df.to_json(orient="records", indent=2))
+        elif formato == "csv":
+            typer.echo(df.to_csv(index=False))
+        else:
+            typer.echo(df.to_string(index=False))
+    except Exception as e:
+        typer.echo(f"Erro: {e}", err=True)
+        raise typer.Exit(1) from None
+@conab_app.command("levantamentos")  # type: ignore[misc]
+def conab_levantamentos() -> None:
+    """Lista levantamentos disponiveis."""
+    import asyncio
+    from agrobr import conab
+    typer.echo("Listando levantamentos...")
+    try:
+        levs = asyncio.run(conab.levantamentos())
+        for lev in levs[:10]:
+            typer.echo(f"  {lev['safra']} - {lev['levantamento']}o levantamento")
+        if len(levs) > 10:
+            typer.echo(f"  ... e mais {len(levs) - 10} levantamentos")
+    except Exception as e:
+        typer.echo(f"Erro: {e}", err=True)
+        raise typer.Exit(1) from None
+@conab_app.command("produtos")  # type: ignore[misc]
+def conab_produtos() -> None:
+    """Lista produtos disponiveis."""
+    import asyncio
+    from agrobr import conab
+    prods = asyncio.run(conab.produtos())
+    typer.echo("Produtos disponiveis:")
+    for prod in prods:
+        typer.echo(f"  - {prod}")
+# =============================================================================
+# IBGE Commands
+# =============================================================================
+ibge_app = typer.Typer(help="Dados IBGE - PAM e LSPA")
+app.add_typer(ibge_app, name="ibge")
+@ibge_app.command("pam")  # type: ignore[misc]
+def ibge_pam(
+    produto: str = typer.Argument(..., help="Produto (soja, milho, arroz, etc)"),
+    ano: str | None = typer.Option(
+        None, "--ano", "-a", help="Ano ou anos (ex: 2023 ou 2020,2021,2022)"
+    ),
+    uf: str | None = typer.Option(None, "--uf", "-u", help="Filtrar por UF"),
+    nivel: str = typer.Option("uf", "--nivel", "-n", help="Nivel: brasil, uf, municipio"),
+    formato: str = typer.Option("table", "--formato", "-o", help="Formato: table, csv, json"),
+) -> None:
+    """Consulta dados da Producao Agricola Municipal (PAM)."""
+    import asyncio
+    from agrobr import ibge
+    typer.echo(f"Consultando PAM para {produto}...")
+    try:
+        # Parse ano
+        ano_param: int | list[int] | None = None
+        if ano:
+            ano_param = [int(a.strip()) for a in ano.split(",")] if "," in ano else int(ano)
+        nivel_typed: Any = nivel  # type validated by ibge.pam at runtime
+        df = asyncio.run(ibge.pam(produto=produto, ano=ano_param, uf=uf, nivel=nivel_typed))
+        if df.empty:
+            typer.echo("Nenhum dado encontrado")
+            return
+        if formato == "json":
+            typer.echo(df.to_json(orient="records", indent=2))
+        elif formato == "csv":
+            typer.echo(df.to_csv(index=False))
+        else:
+            typer.echo(df.to_string(index=False))
+    except Exception as e:
+        typer.echo(f"Erro: {e}", err=True)
+        raise typer.Exit(1) from None
+@ibge_app.command("lspa")  # type: ignore[misc]
+def ibge_lspa(
+    produto: str = typer.Argument(..., help="Produto (soja, milho_1, milho_2, etc)"),
+    ano: int | None = typer.Option(None, "--ano", "-a", help="Ano de referencia"),
+    mes: int | None = typer.Option(None, "--mes", "-m", help="Mes (1-12)"),
+    uf: str | None = typer.Option(None, "--uf", "-u", help="Filtrar por UF"),
+    formato: str = typer.Option("table", "--formato", "-o", help="Formato: table, csv, json"),
+) -> None:
+    """Consulta dados do Levantamento Sistematico da Producao Agricola (LSPA)."""
+    import asyncio
+    from agrobr import ibge
+    typer.echo(f"Consultando LSPA para {produto}...")
+    try:
+        df = asyncio.run(ibge.lspa(produto=produto, ano=ano, mes=mes, uf=uf))
+        if df.empty:
+            typer.echo("Nenhum dado encontrado")
+            return
+        if formato == "json":
+            typer.echo(df.to_json(orient="records", indent=2))
+        elif formato == "csv":
+            typer.echo(df.to_csv(index=False))
+        else:
+            typer.echo(df.to_string(index=False))
+    except Exception as e:
+        typer.echo(f"Erro: {e}", err=True)
+        raise typer.Exit(1) from None
+@ibge_app.command("produtos")  # type: ignore[misc]
+def ibge_produtos(
+    pesquisa: str = typer.Option("pam", "--pesquisa", "-p", help="Pesquisa: pam ou lspa"),
+) -> None:
+    """Lista produtos disponiveis."""
+    import asyncio
+    from agrobr import ibge
+    if pesquisa == "pam":
+        prods = asyncio.run(ibge.produtos_pam())
+        typer.echo("Produtos disponiveis na PAM:")
+    else:
+        prods = asyncio.run(ibge.produtos_lspa())
+        typer.echo("Produtos disponiveis no LSPA:")
+    for prod in prods:
+        typer.echo(f"  - {prod}")
+config_app = typer.Typer(help="Configuracoes")
+app.add_typer(config_app, name="config")
+@config_app.command("show")  # type: ignore[misc]
+def config_show() -> None:
+    """Mostra configuracoes atuais."""
+    typer.echo("=== Cache Settings ===")
+    settings = constants.CacheSettings()
+    typer.echo(f"  cache_dir: {settings.cache_dir}")
+    typer.echo(f"  db_name: {settings.db_name}")
+    typer.echo(f"  ttl_cepea_diario: {settings.ttl_cepea_diario}s")
+    typer.echo("\n=== HTTP Settings ===")
+    http = constants.HTTPSettings()
+    typer.echo(f"  timeout_read: {http.timeout_read}s")
+    typer.echo(f"  max_retries: {http.max_retries}")
+    typer.echo("\n=== Alert Settings ===")
+    alerts = constants.AlertSettings()
+    typer.echo(f"  enabled: {alerts.enabled}")
+    typer.echo(f"  slack_webhook: {'configured' if alerts.slack_webhook else 'not set'}")
+if __name__ == "__main__":
+    app()

agrobr/conab/__init__.py ADDED Viewed

@@ -0,0 +1,21 @@
+"""Modulo CONAB - Dados de safras e balanco oferta/demanda."""
+from __future__ import annotations
+from agrobr.conab.api import (
+    balanco,
+    brasil_total,
+    levantamentos,
+    produtos,
+    safras,
+    ufs,
+)
+__all__ = [
+    "safras",
+    "balanco",
+    "brasil_total",
+    "levantamentos",
+    "produtos",
+    "ufs",
+]