PyPI - agrobr - Versions diffs - 0.1.0__py3-none-any.whl → 0.5.0__py3-none-any.whl - Mend

agrobr 0.1.0py3-none-any.whl → 0.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

agrobr/__init__.py +3 -2
agrobr/benchmark/__init__.py +343 -0
agrobr/cache/policies.py +99 -17
agrobr/cepea/api.py +87 -30
agrobr/cepea/client.py +1 -8
agrobr/cli.py +141 -5
agrobr/conab/api.py +72 -6
agrobr/config.py +137 -0
agrobr/constants.py +1 -2
agrobr/contracts/__init__.py +186 -0
agrobr/contracts/cepea.py +80 -0
agrobr/contracts/conab.py +181 -0
agrobr/contracts/ibge.py +146 -0
agrobr/export.py +251 -0
agrobr/health/__init__.py +10 -0
agrobr/health/doctor.py +321 -0
agrobr/http/browser.py +0 -9
agrobr/ibge/api.py +104 -25
agrobr/ibge/client.py +5 -20
agrobr/models.py +100 -1
agrobr/noticias_agricolas/client.py +0 -7
agrobr/noticias_agricolas/parser.py +0 -17
agrobr/plugins/__init__.py +205 -0
agrobr/quality.py +319 -0
agrobr/sla.py +249 -0
agrobr/snapshots.py +321 -0
agrobr/stability.py +148 -0
agrobr/validators/semantic.py +447 -0
{agrobr-0.1.0.dist-info → agrobr-0.5.0.dist-info}/METADATA +12 -12
{agrobr-0.1.0.dist-info → agrobr-0.5.0.dist-info}/RECORD +33 -19
{agrobr-0.1.0.dist-info → agrobr-0.5.0.dist-info}/WHEEL +0 -0
{agrobr-0.1.0.dist-info → agrobr-0.5.0.dist-info}/entry_points.txt +0 -0
{agrobr-0.1.0.dist-info → agrobr-0.5.0.dist-info}/licenses/LICENSE +0 -0

agrobr/ibge/client.py CHANGED Viewed

@@ -14,38 +14,30 @@ from agrobr.http.rate_limiter import RateLimiter
 logger = structlog.get_logger()
-# Códigos das tabelas SIDRA
 TABELAS = {
-    # PAM - Produção Agrícola Municipal
-    "pam_temporarias": "1612",  # Lavouras temporárias (1974-2018)
-    "pam_permanentes": "1613",  # Lavouras permanentes (1974-2018)
-    "pam_nova": "5457",  # Nova série PAM (2018+)
-    # LSPA - Levantamento Sistemático da Produção Agrícola
-    "lspa": "6588",  # Série mensal (2006+)
-    "lspa_safra": "1618",  # Por ano de safra
+    "pam_temporarias": "1612",
+    "pam_permanentes": "1613",
+    "pam_nova": "5457",
+    "lspa": "6588",
+    "lspa_safra": "1618",
 }
-# Variáveis disponíveis
 VARIAVEIS = {
-    # PAM 5457
     "area_plantada": "214",
     "area_colhida": "215",
     "producao": "216",
     "rendimento": "112",
     "valor_producao": "215",
-    # PAM 1612 (lavouras temporárias)
     "area_plantada_1612": "109",
     "area_colhida_1612": "1000109",
     "producao_1612": "214",
     "rendimento_1612": "112",
     "valor_1612": "215",
-    # LSPA 6588
     "area_lspa": "109",
     "producao_lspa": "216",
     "rendimento_lspa": "112",
 }
-# Níveis territoriais
 NIVEIS_TERRITORIAIS = {
     "brasil": "1",
     "regiao": "2",
@@ -55,7 +47,6 @@ NIVEIS_TERRITORIAIS = {
     "municipio": "6",
 }
-# Códigos de produtos agrícolas (classificação 782 para tabela 5457)
 PRODUTOS_PAM = {
     "soja": "40124",
     "milho": "40126",
@@ -69,7 +60,6 @@ PRODUTOS_PAM = {
     "laranja": "40125",
 }
-# Códigos para LSPA (classificação 48 para tabela 6588)
 PRODUTOS_LSPA = {
     "soja": "39443",
     "milho_1": "39441",
@@ -125,7 +115,6 @@ async def fetch_sidra(
     )
     async with RateLimiter.acquire(constants.Fonte.IBGE):
-        # sidrapy é síncrono, então apenas chamamos diretamente
         kwargs: dict[str, Any] = {
             "table_code": table_code,
             "territorial_level": territorial_level,
@@ -151,7 +140,6 @@ async def fetch_sidra(
         try:
             df = sidrapy.get_table(**kwargs)
-            # Remove primeira linha que é o header descritivo
             if header == "n" and len(df) > 1:
                 df = df.iloc[1:].reset_index(drop=True)
@@ -186,7 +174,6 @@ def parse_sidra_response(
     Returns:
         DataFrame processado
     """
-    # Mapeamento padrão de colunas SIDRA
     default_rename = {
         "NC": "nivel_territorial_cod",
         "NN": "nivel_territorial",
@@ -206,11 +193,9 @@ def parse_sidra_response(
     if rename_columns:
         default_rename.update(rename_columns)
-    # Renomeia apenas colunas que existem
     rename_map = {k: v for k, v in default_rename.items() if k in df.columns}
     df = df.rename(columns=rename_map)
-    # Converte valor para numérico
     if "valor" in df.columns:
         df["valor"] = pd.to_numeric(df["valor"], errors="coerce")

agrobr/models.py CHANGED Viewed

@@ -2,14 +2,22 @@
 from __future__ import annotations
+import hashlib
+import json
+import sys
+from dataclasses import dataclass
+from dataclasses import field as dataclass_field
 from datetime import date, datetime
 from decimal import Decimal
-from typing import Any
+from typing import TYPE_CHECKING, Any
 from pydantic import BaseModel, Field, field_validator
 from .constants import Fonte
+if TYPE_CHECKING:
+    import pandas as pd
 class Indicador(BaseModel):
     fonte: Fonte
@@ -83,3 +91,94 @@ class Fingerprint(BaseModel):
     structure_hash: str
     table_headers: list[list[str]]
     element_counts: dict[str, int]
+@dataclass
+class MetaInfo:
+    """Metadados de proveniencia e rastreabilidade para data lineage."""
+    source: str
+    source_url: str
+    source_method: str
+    fetched_at: datetime
+    timestamp: datetime = dataclass_field(default_factory=datetime.now)
+    fetch_duration_ms: int = 0
+    parse_duration_ms: int = 0
+    from_cache: bool = False
+    cache_key: str | None = None
+    cache_expires_at: datetime | None = None
+    raw_content_hash: str | None = None
+    raw_content_size: int = 0
+    records_count: int = 0
+    columns: list[str] = dataclass_field(default_factory=list)
+    agrobr_version: str = ""
+    schema_version: str = "1.0"
+    parser_version: int = 1
+    python_version: str = ""
+    validation_passed: bool = True
+    validation_warnings: list[str] = dataclass_field(default_factory=list)
+    def __post_init__(self) -> None:
+        """Preenche versoes automaticamente."""
+        if not self.agrobr_version:
+            from agrobr import __version__
+            self.agrobr_version = __version__
+        if not self.python_version:
+            self.python_version = sys.version.split()[0]
+    def to_dict(self) -> dict[str, Any]:
+        """Converte para dicionario serializavel."""
+        return {
+            "source": self.source,
+            "source_url": self.source_url,
+            "source_method": self.source_method,
+            "fetched_at": self.fetched_at.isoformat(),
+            "timestamp": self.timestamp.isoformat(),
+            "fetch_duration_ms": self.fetch_duration_ms,
+            "parse_duration_ms": self.parse_duration_ms,
+            "from_cache": self.from_cache,
+            "cache_key": self.cache_key,
+            "cache_expires_at": (
+                self.cache_expires_at.isoformat() if self.cache_expires_at else None
+            ),
+            "raw_content_hash": self.raw_content_hash,
+            "raw_content_size": self.raw_content_size,
+            "records_count": self.records_count,
+            "columns": self.columns,
+            "agrobr_version": self.agrobr_version,
+            "schema_version": self.schema_version,
+            "parser_version": self.parser_version,
+            "python_version": self.python_version,
+            "validation_passed": self.validation_passed,
+            "validation_warnings": self.validation_warnings,
+        }
+    def to_json(self, indent: int = 2) -> str:
+        """Serializa para JSON."""
+        return json.dumps(self.to_dict(), indent=indent, ensure_ascii=False)
+    @classmethod
+    def from_dict(cls, data: dict[str, Any]) -> MetaInfo:
+        """Reconstroi a partir de dicionario."""
+        data = data.copy()
+        for key in ["fetched_at", "timestamp", "cache_expires_at"]:
+            if data.get(key) and isinstance(data[key], str):
+                data[key] = datetime.fromisoformat(data[key])
+        return cls(**data)
+    def compute_dataframe_hash(self, df: pd.DataFrame) -> str:
+        """Computa hash do DataFrame para verificacao de integridade."""
+        csv_bytes = df.to_csv(index=False).encode("utf-8")
+        return f"sha256:{hashlib.sha256(csv_bytes).hexdigest()}"
+    def verify_hash(self, df: pd.DataFrame) -> bool:
+        """Verifica se DataFrame corresponde ao hash original."""
+        if not self.raw_content_hash:
+            return True
+        current_hash = self.compute_dataframe_hash(df)
+        return current_hash == self.raw_content_hash

agrobr/noticias_agricolas/client.py CHANGED Viewed

@@ -17,7 +17,6 @@ from agrobr.normalize.encoding import decode_content
 logger = structlog.get_logger()
-# Por padrão usa browser pois a página carrega dados via AJAX
 _use_browser: bool = True
@@ -77,20 +76,17 @@ async def _fetch_with_browser(url: str, produto: str) -> str:
                     last_error="No response received",
                 )
-            # Aguarda tabela de cotações carregar
             try:
                 await page.wait_for_selector(
                     "table.cot-fisicas",
                     timeout=15000,
                 )
             except Exception:
-                # Tenta seletor alternativo
                 await page.wait_for_selector(
                     "table",
                     timeout=10000,
                 )
-            # Aguarda AJAX terminar
             await page.wait_for_timeout(2000)
             html: str = await page.content()
@@ -193,7 +189,6 @@ async def fetch_indicador_page(produto: str, force_httpx: bool = False) -> str:
         produto=produto,
     )
-    # Por padrão usa browser pois a página carrega dados via AJAX
     if not force_httpx and _use_browser:
         try:
             return await _fetch_with_browser(url, produto)
@@ -203,9 +198,7 @@ async def fetch_indicador_page(produto: str, force_httpx: bool = False) -> str:
                 source="noticias_agricolas",
                 url=url,
             )
-            # Fallback para httpx
-    # Tenta httpx (pode ter dados incompletos)
     try:
         return await _fetch_with_httpx(url)
     except httpx.HTTPError as e:

agrobr/noticias_agricolas/parser.py CHANGED Viewed

@@ -14,7 +14,6 @@ from agrobr.models import Indicador
 logger = structlog.get_logger()
-# Mapeamento de produtos para unidades
 UNIDADES = {
     "soja": "BRL/sc60kg",
     "soja_parana": "BRL/sc60kg",
@@ -27,7 +26,6 @@ UNIDADES = {
     "trigo": "BRL/ton",
 }
-# Mapeamento de produtos para praça
 PRACAS = {
     "soja": "Paranaguá/PR",
     "soja_parana": "Paraná",
@@ -45,7 +43,6 @@ def _parse_date(date_str: str) -> datetime | None:
     """Converte string de data para datetime."""
     date_str = date_str.strip()
-    # Formato: DD/MM/YYYY
     match = re.match(r"(\d{2})/(\d{2})/(\d{4})", date_str)
     if match:
         day, month, year = match.groups()
@@ -61,10 +58,8 @@ def _parse_valor(valor_str: str) -> Decimal | None:
     """Converte string de valor para Decimal."""
     valor_str = valor_str.strip()
-    # Remove "R$" e espaços
     valor_str = re.sub(r"R\$\s*", "", valor_str)
-    # Substitui vírgula por ponto
     valor_str = valor_str.replace(".", "").replace(",", ".")
     try:
@@ -77,10 +72,8 @@ def _parse_variacao(var_str: str) -> Decimal | None:
     """Converte string de variação para Decimal."""
     var_str = var_str.strip()
-    # Remove % e espaços
     var_str = re.sub(r"[%\s]", "", var_str)
-    # Substitui vírgula por ponto
     var_str = var_str.replace(",", ".")
     try:
@@ -107,26 +100,18 @@ def parse_indicador(html: str, produto: str) -> list[Indicador]:
     unidade = UNIDADES.get(produto_lower, "BRL/unidade")
     praca = PRACAS.get(produto_lower)
-    # Estrutura do Notícias Agrícolas:
-    # Tabela com classe "cot-fisicas" ou tabelas genéricas
-    # Headers: Data | Valor R$ | Variação (%)
-    # Primeiro tenta tabela específica de cotações
     tables = soup.find_all("table", class_="cot-fisicas")
-    # Se não encontrar, tenta todas as tabelas
     if not tables:
         tables = soup.find_all("table")
     for table in tables:
-        # Verifica se é tabela de cotação
         headers = table.find_all("th")
         header_text = " ".join(h.get_text(strip=True).lower() for h in headers)
         if "data" not in header_text or "valor" not in header_text:
             continue
-        # Extrai todas as linhas de dados (tbody > tr)
         tbody = table.find("tbody")
         rows = tbody.find_all("tr") if tbody else table.find_all("tr")[1:]
@@ -136,7 +121,6 @@ def parse_indicador(html: str, produto: str) -> list[Indicador]:
             if len(cells) < 2:
                 continue
-            # Extrai data e valor
             data_str = cells[0].get_text(strip=True)
             valor_str = cells[1].get_text(strip=True)
@@ -152,7 +136,6 @@ def parse_indicador(html: str, produto: str) -> list[Indicador]:
                 )
                 continue
-            # Extrai variação se disponível
             meta: dict[str, str | float] = {}
             if len(cells) >= 3:
                 var_str = cells[2].get_text(strip=True)

agrobr/plugins/__init__.py ADDED Viewed

@@ -0,0 +1,205 @@
+"""Sistema de plugins para extensibilidade do agrobr."""
+from __future__ import annotations
+import importlib
+import importlib.util
+from abc import ABC, abstractmethod
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, TypeVar
+import structlog
+logger = structlog.get_logger()
+T = TypeVar("T", bound="Plugin")
+_registry: dict[str, type[Plugin]] = {}
+_instances: dict[str, Plugin] = {}
+@dataclass
+class PluginMeta:
+    name: str
+    version: str
+    description: str
+    author: str = ""
+    requires: list[str] = field(default_factory=list)
+    provides: list[str] = field(default_factory=list)
+class Plugin(ABC):
+    meta: PluginMeta
+    @abstractmethod
+    def setup(self) -> None:
+        pass
+    @abstractmethod
+    def teardown(self) -> None:
+        pass
+    def is_enabled(self) -> bool:
+        return True
+class SourcePlugin(Plugin):
+    @abstractmethod
+    async def fetch(self, **kwargs: Any) -> Any:
+        pass
+    @abstractmethod
+    async def parse(self, content: Any, **kwargs: Any) -> Any:
+        pass
+    def get_source_name(self) -> str:
+        return self.meta.name
+class ParserPlugin(Plugin):
+    @abstractmethod
+    def can_parse(self, content: str) -> bool:
+        pass
+    @abstractmethod
+    def parse(self, content: str, **kwargs: Any) -> Any:
+        pass
+    @property
+    def priority(self) -> int:
+        return 0
+class ExporterPlugin(Plugin):
+    @abstractmethod
+    def export(self, data: Any, path: Path, **kwargs: Any) -> Path:
+        pass
+    @abstractmethod
+    def get_extension(self) -> str:
+        pass
+class ValidatorPlugin(Plugin):
+    @abstractmethod
+    def validate(self, data: Any, **kwargs: Any) -> tuple[bool, list[str]]:
+        pass
+def register(plugin_class: type[T]) -> type[T]:
+    if not hasattr(plugin_class, "meta"):
+        raise ValueError(f"Plugin {plugin_class.__name__} must have 'meta' attribute")
+    name = plugin_class.meta.name
+    if name in _registry:
+        logger.warning(
+            "plugin_override", name=name, old=_registry[name].__name__, new=plugin_class.__name__
+        )
+    _registry[name] = plugin_class
+    logger.info("plugin_registered", name=name, version=plugin_class.meta.version)
+    return plugin_class
+def get_plugin(name: str) -> Plugin | None:
+    if name in _instances:
+        return _instances[name]
+    if name not in _registry:
+        return None
+    plugin_class = _registry[name]
+    instance = plugin_class()
+    instance.setup()
+    _instances[name] = instance
+    return instance
+def list_plugins() -> list[PluginMeta]:
+    return [cls.meta for cls in _registry.values()]
+def list_plugins_by_type(plugin_type: type[Plugin]) -> list[PluginMeta]:
+    return [cls.meta for cls in _registry.values() if issubclass(cls, plugin_type)]
+def load_plugin_from_file(path: Path) -> type[Plugin] | None:
+    if not path.exists():
+        logger.error("plugin_file_not_found", path=str(path))
+        return None
+    spec = importlib.util.spec_from_file_location(path.stem, path)
+    if spec is None or spec.loader is None:
+        logger.error("plugin_spec_failed", path=str(path))
+        return None
+    module = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(module)
+    for attr_name in dir(module):
+        attr = getattr(module, attr_name)
+        if (
+            isinstance(attr, type)
+            and issubclass(attr, Plugin)
+            and attr is not Plugin
+            and hasattr(attr, "meta")
+        ):
+            register(attr)
+            return attr
+    logger.warning("no_plugin_found", path=str(path))
+    return None
+def load_plugins_from_dir(directory: Path) -> list[type[Plugin]]:
+    loaded: list[type[Plugin]] = []
+    if not directory.exists():
+        return loaded
+    for path in directory.glob("*.py"):
+        if path.name.startswith("_"):
+            continue
+        plugin_class = load_plugin_from_file(path)
+        if plugin_class:
+            loaded.append(plugin_class)
+    return loaded
+def unload_plugin(name: str) -> bool:
+    if name in _instances:
+        _instances[name].teardown()
+        del _instances[name]
+    if name in _registry:
+        del _registry[name]
+        logger.info("plugin_unloaded", name=name)
+        return True
+    return False
+def unload_all() -> None:
+    for name in list(_instances.keys()):
+        _instances[name].teardown()
+    _instances.clear()
+    _registry.clear()
+__all__ = [
+    "Plugin",
+    "PluginMeta",
+    "SourcePlugin",
+    "ParserPlugin",
+    "ExporterPlugin",
+    "ValidatorPlugin",
+    "register",
+    "get_plugin",
+    "list_plugins",
+    "list_plugins_by_type",
+    "load_plugin_from_file",
+    "load_plugins_from_dir",
+    "unload_plugin",
+    "unload_all",
+]

agrobr 0.1.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

agrobr 0.1.0py3-none-any.whl → 0.5.0py3-none-any.whl