PyPI - agrobr - Versions diffs - 0.1.0__py3-none-any.whl → 0.5.0__py3-none-any.whl - Mend

agrobr 0.1.0py3-none-any.whl → 0.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

agrobr/__init__.py +3 -2
agrobr/benchmark/__init__.py +343 -0
agrobr/cache/policies.py +99 -17
agrobr/cepea/api.py +87 -30
agrobr/cepea/client.py +1 -8
agrobr/cli.py +141 -5
agrobr/conab/api.py +72 -6
agrobr/config.py +137 -0
agrobr/constants.py +1 -2
agrobr/contracts/__init__.py +186 -0
agrobr/contracts/cepea.py +80 -0
agrobr/contracts/conab.py +181 -0
agrobr/contracts/ibge.py +146 -0
agrobr/export.py +251 -0
agrobr/health/__init__.py +10 -0
agrobr/health/doctor.py +321 -0
agrobr/http/browser.py +0 -9
agrobr/ibge/api.py +104 -25
agrobr/ibge/client.py +5 -20
agrobr/models.py +100 -1
agrobr/noticias_agricolas/client.py +0 -7
agrobr/noticias_agricolas/parser.py +0 -17
agrobr/plugins/__init__.py +205 -0
agrobr/quality.py +319 -0
agrobr/sla.py +249 -0
agrobr/snapshots.py +321 -0
agrobr/stability.py +148 -0
agrobr/validators/semantic.py +447 -0
{agrobr-0.1.0.dist-info → agrobr-0.5.0.dist-info}/METADATA +12 -12
{agrobr-0.1.0.dist-info → agrobr-0.5.0.dist-info}/RECORD +33 -19
{agrobr-0.1.0.dist-info → agrobr-0.5.0.dist-info}/WHEEL +0 -0
{agrobr-0.1.0.dist-info → agrobr-0.5.0.dist-info}/entry_points.txt +0 -0
{agrobr-0.1.0.dist-info → agrobr-0.5.0.dist-info}/licenses/LICENSE +0 -0

agrobr/__init__.py CHANGED Viewed

@@ -2,9 +2,10 @@
 from __future__ import annotations
-__version__ = "0.1.0"
+__version__ = "0.2.0"
 __author__ = "Bruno"
 from agrobr import cepea, conab, ibge
+from agrobr.models import MetaInfo
-__all__ = ["cepea", "conab", "ibge", "__version__"]
+__all__ = ["cepea", "conab", "ibge", "MetaInfo", "__version__"]

agrobr/benchmark/__init__.py ADDED Viewed

@@ -0,0 +1,343 @@
+"""Benchmark suite para testes de performance do agrobr."""
+from __future__ import annotations
+import statistics
+import time
+from collections.abc import Callable, Coroutine
+from dataclasses import dataclass, field
+from datetime import datetime
+from typing import Any
+import structlog
+logger = structlog.get_logger()
+@dataclass
+class BenchmarkResult:
+    """Resultado de um benchmark."""
+    name: str
+    iterations: int
+    total_time_ms: float
+    mean_time_ms: float
+    median_time_ms: float
+    min_time_ms: float
+    max_time_ms: float
+    std_dev_ms: float
+    times_ms: list[float] = field(default_factory=list)
+    timestamp: datetime = field(default_factory=datetime.now)
+    metadata: dict[str, Any] = field(default_factory=dict)
+    def to_dict(self) -> dict[str, Any]:
+        """Converte para dicionario."""
+        return {
+            "name": self.name,
+            "iterations": self.iterations,
+            "total_time_ms": round(self.total_time_ms, 2),
+            "mean_time_ms": round(self.mean_time_ms, 2),
+            "median_time_ms": round(self.median_time_ms, 2),
+            "min_time_ms": round(self.min_time_ms, 2),
+            "max_time_ms": round(self.max_time_ms, 2),
+            "std_dev_ms": round(self.std_dev_ms, 2),
+            "timestamp": self.timestamp.isoformat(),
+            "metadata": self.metadata,
+        }
+    def summary(self) -> str:
+        """Retorna resumo formatado."""
+        return (
+            f"{self.name}: "
+            f"mean={self.mean_time_ms:.2f}ms, "
+            f"median={self.median_time_ms:.2f}ms, "
+            f"min={self.min_time_ms:.2f}ms, "
+            f"max={self.max_time_ms:.2f}ms "
+            f"({self.iterations} iterations)"
+        )
+@dataclass
+class BenchmarkSuite:
+    """Suite de benchmarks."""
+    name: str
+    results: list[BenchmarkResult] = field(default_factory=list)
+    timestamp: datetime = field(default_factory=datetime.now)
+    def add_result(self, result: BenchmarkResult) -> None:
+        """Adiciona resultado."""
+        self.results.append(result)
+    def to_dict(self) -> dict[str, Any]:
+        """Converte para dicionario."""
+        return {
+            "name": self.name,
+            "timestamp": self.timestamp.isoformat(),
+            "results": [r.to_dict() for r in self.results],
+        }
+    def summary(self) -> str:
+        """Retorna resumo formatado."""
+        lines = [f"Benchmark Suite: {self.name}", "=" * 50]
+        for result in self.results:
+            lines.append(result.summary())
+        return "\n".join(lines)
+async def benchmark_async(
+    name: str,
+    func: Callable[..., Coroutine[Any, Any, Any]],
+    iterations: int = 10,
+    warmup: int = 1,
+    **kwargs: Any,
+) -> BenchmarkResult:
+    """
+    Executa benchmark de funcao async.
+    Args:
+        name: Nome do benchmark
+        func: Funcao async a testar
+        iterations: Numero de iteracoes
+        warmup: Iteracoes de aquecimento
+        **kwargs: Argumentos para a funcao
+    Returns:
+        BenchmarkResult com estatisticas
+    """
+    for _ in range(warmup):
+        await func(**kwargs)
+    times: list[float] = []
+    for _ in range(iterations):
+        start = time.perf_counter()
+        await func(**kwargs)
+        elapsed = (time.perf_counter() - start) * 1000
+        times.append(elapsed)
+    return BenchmarkResult(
+        name=name,
+        iterations=iterations,
+        total_time_ms=sum(times),
+        mean_time_ms=statistics.mean(times),
+        median_time_ms=statistics.median(times),
+        min_time_ms=min(times),
+        max_time_ms=max(times),
+        std_dev_ms=statistics.stdev(times) if len(times) > 1 else 0,
+        times_ms=times,
+        metadata={"warmup": warmup, "kwargs": str(kwargs)},
+    )
+def benchmark_sync(
+    name: str,
+    func: Callable[..., Any],
+    iterations: int = 10,
+    warmup: int = 1,
+    **kwargs: Any,
+) -> BenchmarkResult:
+    """
+    Executa benchmark de funcao sincrona.
+    Args:
+        name: Nome do benchmark
+        func: Funcao a testar
+        iterations: Numero de iteracoes
+        warmup: Iteracoes de aquecimento
+        **kwargs: Argumentos para a funcao
+    Returns:
+        BenchmarkResult com estatisticas
+    """
+    for _ in range(warmup):
+        func(**kwargs)
+    times: list[float] = []
+    for _ in range(iterations):
+        start = time.perf_counter()
+        func(**kwargs)
+        elapsed = (time.perf_counter() - start) * 1000
+        times.append(elapsed)
+    return BenchmarkResult(
+        name=name,
+        iterations=iterations,
+        total_time_ms=sum(times),
+        mean_time_ms=statistics.mean(times),
+        median_time_ms=statistics.median(times),
+        min_time_ms=min(times),
+        max_time_ms=max(times),
+        std_dev_ms=statistics.stdev(times) if len(times) > 1 else 0,
+        times_ms=times,
+        metadata={"warmup": warmup, "kwargs": str(kwargs)},
+    )
+async def run_api_benchmarks(iterations: int = 5) -> BenchmarkSuite:
+    """
+    Executa benchmarks das APIs principais.
+    Args:
+        iterations: Numero de iteracoes por benchmark
+    Returns:
+        BenchmarkSuite com resultados
+    """
+    from agrobr import cepea, conab, ibge
+    suite = BenchmarkSuite(name="agrobr_api_benchmarks")
+    try:
+        result = await benchmark_async(
+            "cepea.indicador(soja, offline=True)",
+            cepea.indicador,
+            iterations=iterations,
+            produto="soja",
+            offline=True,
+        )
+        suite.add_result(result)
+    except Exception as e:
+        logger.warning("benchmark_failed", name="cepea.indicador", error=str(e))
+    try:
+        result = await benchmark_async(
+            "cepea.produtos()",
+            cepea.produtos,
+            iterations=iterations,
+        )
+        suite.add_result(result)
+    except Exception as e:
+        logger.warning("benchmark_failed", name="cepea.produtos", error=str(e))
+    try:
+        result = await benchmark_async(
+            "conab.produtos()",
+            conab.produtos,
+            iterations=iterations,
+        )
+        suite.add_result(result)
+    except Exception as e:
+        logger.warning("benchmark_failed", name="conab.produtos", error=str(e))
+    try:
+        result = await benchmark_async(
+            "ibge.produtos_pam()",
+            ibge.produtos_pam,
+            iterations=iterations,
+        )
+        suite.add_result(result)
+    except Exception as e:
+        logger.warning("benchmark_failed", name="ibge.produtos_pam", error=str(e))
+    return suite
+def run_contract_benchmarks(iterations: int = 100) -> BenchmarkSuite:
+    """
+    Executa benchmarks de validacao de contratos.
+    Args:
+        iterations: Numero de iteracoes por benchmark
+    Returns:
+        BenchmarkSuite com resultados
+    """
+    import pandas as pd
+    from agrobr.contracts.cepea import CEPEA_INDICADOR_V1
+    suite = BenchmarkSuite(name="contract_validation_benchmarks")
+    df_small = pd.DataFrame(
+        {
+            "data": pd.date_range("2024-01-01", periods=10),
+            "produto": ["soja"] * 10,
+            "praca": ["paranagua"] * 10,
+            "valor": [150.0] * 10,
+            "unidade": ["BRL/sc60kg"] * 10,
+            "fonte": ["cepea"] * 10,
+            "metodologia": [None] * 10,
+            "anomalies": [None] * 10,
+        }
+    )
+    result = benchmark_sync(
+        "contract.validate(10 rows)",
+        CEPEA_INDICADOR_V1.validate,
+        iterations=iterations,
+        df=df_small,
+    )
+    suite.add_result(result)
+    df_large = pd.DataFrame(
+        {
+            "data": pd.date_range("2020-01-01", periods=1000),
+            "produto": ["soja"] * 1000,
+            "praca": ["paranagua"] * 1000,
+            "valor": [150.0] * 1000,
+            "unidade": ["BRL/sc60kg"] * 1000,
+            "fonte": ["cepea"] * 1000,
+            "metodologia": [None] * 1000,
+            "anomalies": [None] * 1000,
+        }
+    )
+    result = benchmark_sync(
+        "contract.validate(1000 rows)",
+        CEPEA_INDICADOR_V1.validate,
+        iterations=iterations,
+        df=df_large,
+    )
+    suite.add_result(result)
+    return suite
+def run_semantic_benchmarks(iterations: int = 50) -> BenchmarkSuite:
+    """
+    Executa benchmarks de validacao semantica.
+    Args:
+        iterations: Numero de iteracoes por benchmark
+    Returns:
+        BenchmarkSuite com resultados
+    """
+    import pandas as pd
+    from agrobr.validators.semantic import validate_semantic
+    suite = BenchmarkSuite(name="semantic_validation_benchmarks")
+    df = pd.DataFrame(
+        {
+            "data": pd.date_range("2024-01-01", periods=100),
+            "valor": [150.0 + i * 0.5 for i in range(100)],
+            "produto": ["soja"] * 100,
+            "produtividade": [3500.0] * 100,
+            "area_plantada": [1000.0] * 100,
+            "area_colhida": [950.0] * 100,
+            "safra": ["2024/25"] * 100,
+        }
+    )
+    result = benchmark_sync(
+        "validate_semantic(100 rows)",
+        validate_semantic,
+        iterations=iterations,
+        df=df,
+    )
+    suite.add_result(result)
+    return suite
+__all__ = [
+    "BenchmarkResult",
+    "BenchmarkSuite",
+    "benchmark_async",
+    "benchmark_sync",
+    "run_api_benchmarks",
+    "run_contract_benchmarks",
+    "run_semantic_benchmarks",
+]

agrobr/cache/policies.py CHANGED Viewed

@@ -1,10 +1,6 @@
-"""
-Políticas de cache e TTL por fonte.
-"""
 from __future__ import annotations
-from datetime import datetime, timedelta
+from datetime import datetime, time, timedelta
 from enum import Enum
 from typing import NamedTuple
@@ -17,6 +13,7 @@ class CachePolicy(NamedTuple):
     ttl_seconds: int
     stale_max_seconds: int
     description: str
+    smart_expiry: bool = False
 class TTL(Enum):
@@ -33,41 +30,52 @@ class TTL(Enum):
     DAYS_90 = 90 * 24 * 60 * 60
+CEPEA_UPDATE_HOUR = 18
+CEPEA_UPDATE_MINUTE = 0
 POLICIES: dict[str, CachePolicy] = {
     "cepea_diario": CachePolicy(
-        ttl_seconds=TTL.HOURS_4.value,
+        ttl_seconds=TTL.HOURS_24.value,
         stale_max_seconds=TTL.HOURS_24.value * 2,
-        description="CEPEA indicador diário (atualiza ~18h)",
+        description="CEPEA indicador diário (expira às 18h)",
+        smart_expiry=True,
     ),
     "cepea_semanal": CachePolicy(
         ttl_seconds=TTL.HOURS_24.value,
         stale_max_seconds=TTL.DAYS_7.value,
         description="CEPEA indicador semanal (atualiza sexta)",
+        smart_expiry=False,
     ),
     "conab_safras": CachePolicy(
         ttl_seconds=TTL.HOURS_24.value,
         stale_max_seconds=TTL.DAYS_30.value,
         description="CONAB safras (atualiza mensalmente)",
+        smart_expiry=False,
     ),
     "conab_balanco": CachePolicy(
         ttl_seconds=TTL.HOURS_24.value,
         stale_max_seconds=TTL.DAYS_30.value,
         description="CONAB balanço (atualiza mensalmente)",
+        smart_expiry=False,
     ),
     "ibge_pam": CachePolicy(
         ttl_seconds=TTL.DAYS_7.value,
         stale_max_seconds=TTL.DAYS_90.value,
         description="IBGE PAM (atualiza anualmente)",
+        smart_expiry=False,
     ),
     "ibge_lspa": CachePolicy(
         ttl_seconds=TTL.HOURS_24.value,
         stale_max_seconds=TTL.DAYS_30.value,
         description="IBGE LSPA (atualiza mensalmente)",
+        smart_expiry=False,
     ),
     "noticias_agricolas": CachePolicy(
-        ttl_seconds=TTL.HOURS_4.value,
+        ttl_seconds=TTL.HOURS_24.value,
         stale_max_seconds=TTL.HOURS_24.value * 2,
-        description="Notícias Agrícolas (mirror CEPEA)",
+        description="Notícias Agrícolas (expira às 18h, mirror CEPEA)",
+        smart_expiry=True,
     ),
 }
@@ -106,6 +114,35 @@ def get_policy(source: Fonte | str, endpoint: str | None = None) -> CachePolicy:
     return POLICIES[default_key]
+def _get_smart_expiry_time() -> datetime:
+    """
+    Calcula próximo horário de expiração para CEPEA (18h).
+    CEPEA atualiza dados por volta das 17-18h.
+    Cache expira às 18h para pegar dados novos.
+    Returns:
+        Datetime da próxima expiração
+    """
+    now = datetime.now()
+    today_expiry = datetime.combine(now.date(), time(CEPEA_UPDATE_HOUR, CEPEA_UPDATE_MINUTE))
+    if now < today_expiry:
+        return today_expiry
+    else:
+        return today_expiry + timedelta(days=1)
+def _get_last_expiry_time() -> datetime:
+    """
+    Retorna o último horário de expiração (18h anterior).
+    Returns:
+        Datetime da última expiração
+    """
+    return _get_smart_expiry_time() - timedelta(days=1)
 def get_ttl(source: Fonte | str, endpoint: str | None = None) -> int:
     """
     Retorna TTL em segundos para uma fonte.
@@ -134,20 +171,29 @@ def get_stale_max(source: Fonte | str, endpoint: str | None = None) -> int:
     return get_policy(source, endpoint).stale_max_seconds
-def is_expired(created_at: datetime, source: Fonte | str) -> bool:
+def is_expired(created_at: datetime, source: Fonte | str, endpoint: str | None = None) -> bool:
     """
     Verifica se entrada de cache está expirada.
+    Para fontes com smart_expiry (CEPEA), expira às 18h.
+    Para outras fontes, usa TTL fixo.
     Args:
         created_at: Data de criação
         source: Fonte de dados
+        endpoint: Endpoint específico
     Returns:
         True se expirado
     """
-    ttl = get_ttl(source)
-    expires_at = created_at + timedelta(seconds=ttl)
-    return datetime.utcnow() > expires_at
+    policy = get_policy(source, endpoint)
+    if policy.smart_expiry:
+        last_expiry = _get_last_expiry_time()
+        return created_at < last_expiry
+    expires_at = created_at + timedelta(seconds=policy.ttl_seconds)
+    return datetime.now() > expires_at
 def is_stale_acceptable(created_at: datetime, source: Fonte | str) -> bool:
@@ -163,13 +209,16 @@ def is_stale_acceptable(created_at: datetime, source: Fonte | str) -> bool:
     """
     stale_max = get_stale_max(source)
     max_acceptable = created_at + timedelta(seconds=stale_max)
-    return datetime.utcnow() <= max_acceptable
+    return datetime.now() <= max_acceptable
 def calculate_expiry(source: Fonte | str, endpoint: str | None = None) -> datetime:
     """
     Calcula data de expiração para nova entrada.
+    Para fontes com smart_expiry (CEPEA), retorna próximas 18h.
+    Para outras fontes, usa TTL fixo.
     Args:
         source: Fonte de dados
         endpoint: Endpoint específico
@@ -177,8 +226,12 @@ def calculate_expiry(source: Fonte | str, endpoint: str | None = None) -> dateti
     Returns:
         Data de expiração
     """
-    ttl = get_ttl(source, endpoint)
-    return datetime.utcnow() + timedelta(seconds=ttl)
+    policy = get_policy(source, endpoint)
+    if policy.smart_expiry:
+        return _get_smart_expiry_time()
+    return datetime.now() + timedelta(seconds=policy.ttl_seconds)
 class InvalidationReason(Enum):
@@ -196,6 +249,7 @@ def should_refresh(
     created_at: datetime,
     source: Fonte | str,
     force: bool = False,
+    endpoint: str | None = None,
 ) -> tuple[bool, str]:
     """
     Determina se cache deve ser atualizado.
@@ -204,6 +258,7 @@ def should_refresh(
         created_at: Data de criação do cache
         source: Fonte de dados
         force: Forçar atualização
+        endpoint: Endpoint específico
     Returns:
         Tupla (deve_atualizar, razão)
@@ -211,7 +266,7 @@ def should_refresh(
     if force:
         return True, "force_refresh"
-    if is_expired(created_at, source):
+    if is_expired(created_at, source, endpoint):
         return True, "expired"
     return False, "fresh"
@@ -238,3 +293,30 @@ def format_ttl(seconds: int) -> str:
     days = seconds // 86400
     return f"{days} dia{'s' if days > 1 else ''}"
+def get_next_update_info(source: Fonte | str) -> dict[str, str]:
+    """
+    Retorna informações sobre próxima atualização.
+    Args:
+        source: Fonte de dados
+    Returns:
+        Dict com info de expiração
+    """
+    policy = get_policy(source)
+    if policy.smart_expiry:
+        next_expiry = _get_smart_expiry_time()
+        return {
+            "type": "smart",
+            "expires_at": next_expiry.strftime("%Y-%m-%d %H:%M"),
+            "description": f"Expira às {CEPEA_UPDATE_HOUR}h (atualização CEPEA)",
+        }
+    return {
+        "type": "ttl",
+        "ttl": format_ttl(policy.ttl_seconds),
+        "description": policy.description,
+    }

agrobr 0.1.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

agrobr 0.1.0py3-none-any.whl → 0.5.0py3-none-any.whl