PyPI - agrobr - Versions diffs - 0.1.2__py3-none-any.whl → 0.5.0__py3-none-any.whl - Mend

agrobr 0.1.2py3-none-any.whl → 0.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

agrobr/__init__.py +3 -2
agrobr/benchmark/__init__.py +343 -0
agrobr/cache/policies.py +3 -8
agrobr/cepea/api.py +87 -30
agrobr/cepea/client.py +0 -7
agrobr/cli.py +141 -5
agrobr/conab/api.py +72 -6
agrobr/config.py +137 -0
agrobr/constants.py +1 -2
agrobr/contracts/__init__.py +186 -0
agrobr/contracts/cepea.py +80 -0
agrobr/contracts/conab.py +181 -0
agrobr/contracts/ibge.py +146 -0
agrobr/export.py +251 -0
agrobr/health/__init__.py +10 -0
agrobr/health/doctor.py +321 -0
agrobr/http/browser.py +0 -9
agrobr/ibge/api.py +104 -25
agrobr/ibge/client.py +5 -20
agrobr/models.py +100 -1
agrobr/noticias_agricolas/client.py +0 -7
agrobr/noticias_agricolas/parser.py +0 -17
agrobr/plugins/__init__.py +205 -0
agrobr/quality.py +319 -0
agrobr/sla.py +249 -0
agrobr/snapshots.py +321 -0
agrobr/stability.py +148 -0
agrobr/validators/semantic.py +447 -0
{agrobr-0.1.2.dist-info → agrobr-0.5.0.dist-info}/METADATA +12 -12
{agrobr-0.1.2.dist-info → agrobr-0.5.0.dist-info}/RECORD +33 -19
{agrobr-0.1.2.dist-info → agrobr-0.5.0.dist-info}/WHEEL +0 -0
{agrobr-0.1.2.dist-info → agrobr-0.5.0.dist-info}/entry_points.txt +0 -0
{agrobr-0.1.2.dist-info → agrobr-0.5.0.dist-info}/licenses/LICENSE +0 -0

agrobr/ibge/api.py CHANGED Viewed

@@ -2,16 +2,22 @@
 from __future__ import annotations
-from typing import Literal
+import time
+from datetime import datetime
+from typing import Literal, overload
 import pandas as pd
 import structlog
+from agrobr import constants
+from agrobr.cache.policies import calculate_expiry
 from agrobr.ibge import client
+from agrobr.models import MetaInfo
 logger = structlog.get_logger()
+@overload
 async def pam(
     produto: str,
     ano: int | str | list[int] | None = None,
@@ -19,7 +25,33 @@ async def pam(
     nivel: Literal["brasil", "uf", "municipio"] = "uf",
     variaveis: list[str] | None = None,
     as_polars: bool = False,
-) -> pd.DataFrame:
+    *,
+    return_meta: Literal[False] = False,
+) -> pd.DataFrame: ...
+@overload
+async def pam(
+    produto: str,
+    ano: int | str | list[int] | None = None,
+    uf: str | None = None,
+    nivel: Literal["brasil", "uf", "municipio"] = "uf",
+    variaveis: list[str] | None = None,
+    as_polars: bool = False,
+    *,
+    return_meta: Literal[True],
+) -> tuple[pd.DataFrame, MetaInfo]: ...
+async def pam(
+    produto: str,
+    ano: int | str | list[int] | None = None,
+    uf: str | None = None,
+    nivel: Literal["brasil", "uf", "municipio"] = "uf",
+    variaveis: list[str] | None = None,
+    as_polars: bool = False,
+    return_meta: bool = False,
+) -> pd.DataFrame | tuple[pd.DataFrame, MetaInfo]:
     """
     Obtém dados da Produção Agrícola Municipal (PAM).
@@ -30,14 +62,22 @@ async def pam(
         nivel: Nível territorial ("brasil", "uf", "municipio")
         variaveis: Lista de variáveis (area_plantada, area_colhida, producao, rendimento)
         as_polars: Se True, retorna polars.DataFrame
+        return_meta: Se True, retorna tupla (DataFrame, MetaInfo)
     Returns:
-        DataFrame com dados da PAM
+        DataFrame com dados da PAM ou tupla (DataFrame, MetaInfo)
     Example:
         >>> df = await ibge.pam('soja', ano=2023)
-        >>> df = await ibge.pam('milho', ano=[2020, 2021, 2022], uf='MT')
+        >>> df, meta = await ibge.pam('milho', ano=[2020, 2021, 2022], uf='MT', return_meta=True)
     """
+    fetch_start = time.perf_counter()
+    meta = MetaInfo(
+        source="ibge_pam",
+        source_url="https://sidra.ibge.gov.br",
+        source_method="httpx",
+        fetched_at=datetime.now(),
+    )
     logger.info(
         "ibge_pam_request",
         produto=produto,
@@ -46,7 +86,6 @@ async def pam(
         nivel=nivel,
     )
-    # Mapeia produto para código SIDRA
     produto_lower = produto.lower()
     if produto_lower not in client.PRODUTOS_PAM:
         raise ValueError(
@@ -55,7 +94,6 @@ async def pam(
     produto_cod = client.PRODUTOS_PAM[produto_lower]
-    # Mapeia variáveis
     if variaveis is None:
         variaveis = ["area_plantada", "area_colhida", "producao", "rendimento"]
@@ -66,7 +104,6 @@ async def pam(
         else:
             logger.warning(f"Variável desconhecida: {var}")
-    # Mapeia nível territorial
     nivel_map = {
         "brasil": "1",
         "uf": "3",
@@ -74,12 +111,10 @@ async def pam(
     }
     territorial_level = nivel_map.get(nivel, "3")
-    # Define código territorial
     ibge_code = "all"
     if uf and nivel in ("uf", "municipio"):
         ibge_code = client.uf_to_ibge_code(uf)
-    # Define período
     if ano is None:
         period = "last"
     elif isinstance(ano, list):
@@ -87,7 +122,6 @@ async def pam(
     else:
         period = str(ano)
-    # Busca dados
     df = await client.fetch_sidra(
         table_code=client.TABELAS["pam_nova"],
         territorial_level=territorial_level,
@@ -97,10 +131,8 @@ async def pam(
         classifications={"782": produto_cod},
     )
-    # Processa resposta
     df = client.parse_sidra_response(df)
-    # Pivota para ter variáveis como colunas
     if "variavel" in df.columns and "valor" in df.columns:
         df_pivot = df.pivot_table(
             index=["localidade", "ano"] if "localidade" in df.columns else ["ano"],
@@ -109,7 +141,6 @@ async def pam(
             aggfunc="first",
         ).reset_index()
-        # Renomeia colunas para nomes mais simples
         rename_map = {
             "Área plantada": "area_plantada",
             "Área colhida": "area_colhida",
@@ -123,11 +154,20 @@ async def pam(
     df["produto"] = produto_lower
     df["fonte"] = "ibge_pam"
+    meta.fetch_duration_ms = int((time.perf_counter() - fetch_start) * 1000)
+    meta.records_count = len(df)
+    meta.columns = df.columns.tolist()
+    meta.cache_key = f"ibge:pam:{produto}:{ano}"
+    meta.cache_expires_at = calculate_expiry(constants.Fonte.IBGE, "pam")
     if as_polars:
         try:
             import polars as pl
-            return pl.from_pandas(df)  # type: ignore[no-any-return]
+            result_df = pl.from_pandas(df)
+            if return_meta:
+                return result_df, meta  # type: ignore[return-value,no-any-return]
+            return result_df  # type: ignore[return-value,no-any-return]
         except ImportError:
             logger.warning("polars_not_installed", fallback="pandas")
@@ -137,16 +177,43 @@ async def pam(
         records=len(df),
     )
+    if return_meta:
+        return df, meta
     return df
+@overload
+async def lspa(
+    produto: str,
+    ano: int | str | None = None,
+    mes: int | str | None = None,
+    uf: str | None = None,
+    as_polars: bool = False,
+    *,
+    return_meta: Literal[False] = False,
+) -> pd.DataFrame: ...
+@overload
 async def lspa(
     produto: str,
     ano: int | str | None = None,
     mes: int | str | None = None,
     uf: str | None = None,
     as_polars: bool = False,
-) -> pd.DataFrame:
+    *,
+    return_meta: Literal[True],
+) -> tuple[pd.DataFrame, MetaInfo]: ...
+async def lspa(
+    produto: str,
+    ano: int | str | None = None,
+    mes: int | str | None = None,
+    uf: str | None = None,
+    as_polars: bool = False,
+    return_meta: bool = False,
+) -> pd.DataFrame | tuple[pd.DataFrame, MetaInfo]:
     """
     Obtém dados do Levantamento Sistemático da Produção Agrícola (LSPA).
@@ -158,14 +225,22 @@ async def lspa(
         mes: Mês de referência (1-12). Se None, retorna todos os meses do ano.
         uf: Filtrar por UF (ex: "MT", "PR")
         as_polars: Se True, retorna polars.DataFrame
+        return_meta: Se True, retorna tupla (DataFrame, MetaInfo)
     Returns:
-        DataFrame com estimativas LSPA
+        DataFrame com estimativas LSPA ou tupla (DataFrame, MetaInfo)
     Example:
         >>> df = await ibge.lspa('soja', ano=2024)
-        >>> df = await ibge.lspa('milho_1', ano=2024, mes=6, uf='PR')
+        >>> df, meta = await ibge.lspa('milho_1', ano=2024, mes=6, uf='PR', return_meta=True)
     """
+    fetch_start = time.perf_counter()
+    meta = MetaInfo(
+        source="ibge_lspa",
+        source_url="https://sidra.ibge.gov.br",
+        source_method="httpx",
+        fetched_at=datetime.now(),
+    )
     logger.info(
         "ibge_lspa_request",
         produto=produto,
@@ -174,7 +249,6 @@ async def lspa(
         uf=uf,
     )
-    # Mapeia produto para código SIDRA
     produto_lower = produto.lower()
     if produto_lower not in client.PRODUTOS_LSPA:
         raise ValueError(
@@ -183,20 +257,16 @@ async def lspa(
     produto_cod = client.PRODUTOS_LSPA[produto_lower]
-    # Define período
     if ano is None:
         from datetime import date
         ano = date.today().year
-    # Define período
     period = f"{ano}{int(mes):02d}" if mes else ",".join(f"{ano}{m:02d}" for m in range(1, 13))
-    # Define nível territorial
     territorial_level = "3" if uf else "1"
     ibge_code = client.uf_to_ibge_code(uf) if uf else "all"
-    # Busca dados (não especifica variáveis - retorna todas)
     df = await client.fetch_sidra(
         table_code=client.TABELAS["lspa"],
         territorial_level=territorial_level,
@@ -205,10 +275,8 @@ async def lspa(
         classifications={"48": produto_cod},
     )
-    # Processa resposta
     df = client.parse_sidra_response(df)
-    # Adiciona período da consulta
     df["ano"] = ano
     if mes:
         df["mes"] = mes
@@ -216,11 +284,20 @@ async def lspa(
     df["produto"] = produto_lower
     df["fonte"] = "ibge_lspa"
+    meta.fetch_duration_ms = int((time.perf_counter() - fetch_start) * 1000)
+    meta.records_count = len(df)
+    meta.columns = df.columns.tolist()
+    meta.cache_key = f"ibge:lspa:{produto}:{ano}:{mes}"
+    meta.cache_expires_at = calculate_expiry(constants.Fonte.IBGE, "lspa")
     if as_polars:
         try:
             import polars as pl
-            return pl.from_pandas(df)  # type: ignore[no-any-return]
+            result_df = pl.from_pandas(df)
+            if return_meta:
+                return result_df, meta  # type: ignore[return-value,no-any-return]
+            return result_df  # type: ignore[return-value,no-any-return]
         except ImportError:
             logger.warning("polars_not_installed", fallback="pandas")
@@ -230,6 +307,8 @@ async def lspa(
         records=len(df),
     )
+    if return_meta:
+        return df, meta
     return df

agrobr/ibge/client.py CHANGED Viewed

@@ -14,38 +14,30 @@ from agrobr.http.rate_limiter import RateLimiter
 logger = structlog.get_logger()
-# Códigos das tabelas SIDRA
 TABELAS = {
-    # PAM - Produção Agrícola Municipal
-    "pam_temporarias": "1612",  # Lavouras temporárias (1974-2018)
-    "pam_permanentes": "1613",  # Lavouras permanentes (1974-2018)
-    "pam_nova": "5457",  # Nova série PAM (2018+)
-    # LSPA - Levantamento Sistemático da Produção Agrícola
-    "lspa": "6588",  # Série mensal (2006+)
-    "lspa_safra": "1618",  # Por ano de safra
+    "pam_temporarias": "1612",
+    "pam_permanentes": "1613",
+    "pam_nova": "5457",
+    "lspa": "6588",
+    "lspa_safra": "1618",
 }
-# Variáveis disponíveis
 VARIAVEIS = {
-    # PAM 5457
     "area_plantada": "214",
     "area_colhida": "215",
     "producao": "216",
     "rendimento": "112",
     "valor_producao": "215",
-    # PAM 1612 (lavouras temporárias)
     "area_plantada_1612": "109",
     "area_colhida_1612": "1000109",
     "producao_1612": "214",
     "rendimento_1612": "112",
     "valor_1612": "215",
-    # LSPA 6588
     "area_lspa": "109",
     "producao_lspa": "216",
     "rendimento_lspa": "112",
 }
-# Níveis territoriais
 NIVEIS_TERRITORIAIS = {
     "brasil": "1",
     "regiao": "2",
@@ -55,7 +47,6 @@ NIVEIS_TERRITORIAIS = {
     "municipio": "6",
 }
-# Códigos de produtos agrícolas (classificação 782 para tabela 5457)
 PRODUTOS_PAM = {
     "soja": "40124",
     "milho": "40126",
@@ -69,7 +60,6 @@ PRODUTOS_PAM = {
     "laranja": "40125",
 }
-# Códigos para LSPA (classificação 48 para tabela 6588)
 PRODUTOS_LSPA = {
     "soja": "39443",
     "milho_1": "39441",
@@ -125,7 +115,6 @@ async def fetch_sidra(
     )
     async with RateLimiter.acquire(constants.Fonte.IBGE):
-        # sidrapy é síncrono, então apenas chamamos diretamente
         kwargs: dict[str, Any] = {
             "table_code": table_code,
             "territorial_level": territorial_level,
@@ -151,7 +140,6 @@ async def fetch_sidra(
         try:
             df = sidrapy.get_table(**kwargs)
-            # Remove primeira linha que é o header descritivo
             if header == "n" and len(df) > 1:
                 df = df.iloc[1:].reset_index(drop=True)
@@ -186,7 +174,6 @@ def parse_sidra_response(
     Returns:
         DataFrame processado
     """
-    # Mapeamento padrão de colunas SIDRA
     default_rename = {
         "NC": "nivel_territorial_cod",
         "NN": "nivel_territorial",
@@ -206,11 +193,9 @@ def parse_sidra_response(
     if rename_columns:
         default_rename.update(rename_columns)
-    # Renomeia apenas colunas que existem
     rename_map = {k: v for k, v in default_rename.items() if k in df.columns}
     df = df.rename(columns=rename_map)
-    # Converte valor para numérico
     if "valor" in df.columns:
         df["valor"] = pd.to_numeric(df["valor"], errors="coerce")

agrobr/models.py CHANGED Viewed

@@ -2,14 +2,22 @@
 from __future__ import annotations
+import hashlib
+import json
+import sys
+from dataclasses import dataclass
+from dataclasses import field as dataclass_field
 from datetime import date, datetime
 from decimal import Decimal
-from typing import Any
+from typing import TYPE_CHECKING, Any
 from pydantic import BaseModel, Field, field_validator
 from .constants import Fonte
+if TYPE_CHECKING:
+    import pandas as pd
 class Indicador(BaseModel):
     fonte: Fonte
@@ -83,3 +91,94 @@ class Fingerprint(BaseModel):
     structure_hash: str
     table_headers: list[list[str]]
     element_counts: dict[str, int]
+@dataclass
+class MetaInfo:
+    """Metadados de proveniencia e rastreabilidade para data lineage."""
+    source: str
+    source_url: str
+    source_method: str
+    fetched_at: datetime
+    timestamp: datetime = dataclass_field(default_factory=datetime.now)
+    fetch_duration_ms: int = 0
+    parse_duration_ms: int = 0
+    from_cache: bool = False
+    cache_key: str | None = None
+    cache_expires_at: datetime | None = None
+    raw_content_hash: str | None = None
+    raw_content_size: int = 0
+    records_count: int = 0
+    columns: list[str] = dataclass_field(default_factory=list)
+    agrobr_version: str = ""
+    schema_version: str = "1.0"
+    parser_version: int = 1
+    python_version: str = ""
+    validation_passed: bool = True
+    validation_warnings: list[str] = dataclass_field(default_factory=list)
+    def __post_init__(self) -> None:
+        """Preenche versoes automaticamente."""
+        if not self.agrobr_version:
+            from agrobr import __version__
+            self.agrobr_version = __version__
+        if not self.python_version:
+            self.python_version = sys.version.split()[0]
+    def to_dict(self) -> dict[str, Any]:
+        """Converte para dicionario serializavel."""
+        return {
+            "source": self.source,
+            "source_url": self.source_url,
+            "source_method": self.source_method,
+            "fetched_at": self.fetched_at.isoformat(),
+            "timestamp": self.timestamp.isoformat(),
+            "fetch_duration_ms": self.fetch_duration_ms,
+            "parse_duration_ms": self.parse_duration_ms,
+            "from_cache": self.from_cache,
+            "cache_key": self.cache_key,
+            "cache_expires_at": (
+                self.cache_expires_at.isoformat() if self.cache_expires_at else None
+            ),
+            "raw_content_hash": self.raw_content_hash,
+            "raw_content_size": self.raw_content_size,
+            "records_count": self.records_count,
+            "columns": self.columns,
+            "agrobr_version": self.agrobr_version,
+            "schema_version": self.schema_version,
+            "parser_version": self.parser_version,
+            "python_version": self.python_version,
+            "validation_passed": self.validation_passed,
+            "validation_warnings": self.validation_warnings,
+        }
+    def to_json(self, indent: int = 2) -> str:
+        """Serializa para JSON."""
+        return json.dumps(self.to_dict(), indent=indent, ensure_ascii=False)
+    @classmethod
+    def from_dict(cls, data: dict[str, Any]) -> MetaInfo:
+        """Reconstroi a partir de dicionario."""
+        data = data.copy()
+        for key in ["fetched_at", "timestamp", "cache_expires_at"]:
+            if data.get(key) and isinstance(data[key], str):
+                data[key] = datetime.fromisoformat(data[key])
+        return cls(**data)
+    def compute_dataframe_hash(self, df: pd.DataFrame) -> str:
+        """Computa hash do DataFrame para verificacao de integridade."""
+        csv_bytes = df.to_csv(index=False).encode("utf-8")
+        return f"sha256:{hashlib.sha256(csv_bytes).hexdigest()}"
+    def verify_hash(self, df: pd.DataFrame) -> bool:
+        """Verifica se DataFrame corresponde ao hash original."""
+        if not self.raw_content_hash:
+            return True
+        current_hash = self.compute_dataframe_hash(df)
+        return current_hash == self.raw_content_hash

agrobr/noticias_agricolas/client.py CHANGED Viewed

@@ -17,7 +17,6 @@ from agrobr.normalize.encoding import decode_content
 logger = structlog.get_logger()
-# Por padrão usa browser pois a página carrega dados via AJAX
 _use_browser: bool = True
@@ -77,20 +76,17 @@ async def _fetch_with_browser(url: str, produto: str) -> str:
                     last_error="No response received",
                 )
-            # Aguarda tabela de cotações carregar
             try:
                 await page.wait_for_selector(
                     "table.cot-fisicas",
                     timeout=15000,
                 )
             except Exception:
-                # Tenta seletor alternativo
                 await page.wait_for_selector(
                     "table",
                     timeout=10000,
                 )
-            # Aguarda AJAX terminar
             await page.wait_for_timeout(2000)
             html: str = await page.content()
@@ -193,7 +189,6 @@ async def fetch_indicador_page(produto: str, force_httpx: bool = False) -> str:
         produto=produto,
     )
-    # Por padrão usa browser pois a página carrega dados via AJAX
     if not force_httpx and _use_browser:
         try:
             return await _fetch_with_browser(url, produto)
@@ -203,9 +198,7 @@ async def fetch_indicador_page(produto: str, force_httpx: bool = False) -> str:
                 source="noticias_agricolas",
                 url=url,
             )
-            # Fallback para httpx
-    # Tenta httpx (pode ter dados incompletos)
     try:
         return await _fetch_with_httpx(url)
     except httpx.HTTPError as e:

agrobr/noticias_agricolas/parser.py CHANGED Viewed

@@ -14,7 +14,6 @@ from agrobr.models import Indicador
 logger = structlog.get_logger()
-# Mapeamento de produtos para unidades
 UNIDADES = {
     "soja": "BRL/sc60kg",
     "soja_parana": "BRL/sc60kg",
@@ -27,7 +26,6 @@ UNIDADES = {
     "trigo": "BRL/ton",
 }
-# Mapeamento de produtos para praça
 PRACAS = {
     "soja": "Paranaguá/PR",
     "soja_parana": "Paraná",
@@ -45,7 +43,6 @@ def _parse_date(date_str: str) -> datetime | None:
     """Converte string de data para datetime."""
     date_str = date_str.strip()
-    # Formato: DD/MM/YYYY
     match = re.match(r"(\d{2})/(\d{2})/(\d{4})", date_str)
     if match:
         day, month, year = match.groups()
@@ -61,10 +58,8 @@ def _parse_valor(valor_str: str) -> Decimal | None:
     """Converte string de valor para Decimal."""
     valor_str = valor_str.strip()
-    # Remove "R$" e espaços
     valor_str = re.sub(r"R\$\s*", "", valor_str)
-    # Substitui vírgula por ponto
     valor_str = valor_str.replace(".", "").replace(",", ".")
     try:
@@ -77,10 +72,8 @@ def _parse_variacao(var_str: str) -> Decimal | None:
     """Converte string de variação para Decimal."""
     var_str = var_str.strip()
-    # Remove % e espaços
     var_str = re.sub(r"[%\s]", "", var_str)
-    # Substitui vírgula por ponto
     var_str = var_str.replace(",", ".")
     try:
@@ -107,26 +100,18 @@ def parse_indicador(html: str, produto: str) -> list[Indicador]:
     unidade = UNIDADES.get(produto_lower, "BRL/unidade")
     praca = PRACAS.get(produto_lower)
-    # Estrutura do Notícias Agrícolas:
-    # Tabela com classe "cot-fisicas" ou tabelas genéricas
-    # Headers: Data | Valor R$ | Variação (%)
-    # Primeiro tenta tabela específica de cotações
     tables = soup.find_all("table", class_="cot-fisicas")
-    # Se não encontrar, tenta todas as tabelas
     if not tables:
         tables = soup.find_all("table")
     for table in tables:
-        # Verifica se é tabela de cotação
         headers = table.find_all("th")
         header_text = " ".join(h.get_text(strip=True).lower() for h in headers)
         if "data" not in header_text or "valor" not in header_text:
             continue
-        # Extrai todas as linhas de dados (tbody > tr)
         tbody = table.find("tbody")
         rows = tbody.find_all("tr") if tbody else table.find_all("tr")[1:]
@@ -136,7 +121,6 @@ def parse_indicador(html: str, produto: str) -> list[Indicador]:
             if len(cells) < 2:
                 continue
-            # Extrai data e valor
             data_str = cells[0].get_text(strip=True)
             valor_str = cells[1].get_text(strip=True)
@@ -152,7 +136,6 @@ def parse_indicador(html: str, produto: str) -> list[Indicador]:
                 )
                 continue
-            # Extrai variação se disponível
             meta: dict[str, str | float] = {}
             if len(cells) >= 3:
                 var_str = cells[2].get_text(strip=True)

agrobr 0.1.2__py3-none-any.whl → 0.5.0__py3-none-any.whl

agrobr 0.1.2py3-none-any.whl → 0.5.0py3-none-any.whl