PyPI - agrobr - Versions diffs - 0.1.0__py3-none-any.whl - Mend

agrobr 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (58) hide show

agrobr/__init__.py +10 -0
agrobr/alerts/__init__.py +7 -0
agrobr/alerts/notifier.py +167 -0
agrobr/cache/__init__.py +31 -0
agrobr/cache/duckdb_store.py +433 -0
agrobr/cache/history.py +317 -0
agrobr/cache/migrations.py +82 -0
agrobr/cache/policies.py +240 -0
agrobr/cepea/__init__.py +7 -0
agrobr/cepea/api.py +360 -0
agrobr/cepea/client.py +273 -0
agrobr/cepea/parsers/__init__.py +37 -0
agrobr/cepea/parsers/base.py +35 -0
agrobr/cepea/parsers/consensus.py +300 -0
agrobr/cepea/parsers/detector.py +108 -0
agrobr/cepea/parsers/fingerprint.py +226 -0
agrobr/cepea/parsers/v1.py +305 -0
agrobr/cli.py +323 -0
agrobr/conab/__init__.py +21 -0
agrobr/conab/api.py +239 -0
agrobr/conab/client.py +219 -0
agrobr/conab/parsers/__init__.py +7 -0
agrobr/conab/parsers/v1.py +383 -0
agrobr/constants.py +205 -0
agrobr/exceptions.py +104 -0
agrobr/health/__init__.py +23 -0
agrobr/health/checker.py +202 -0
agrobr/health/reporter.py +314 -0
agrobr/http/__init__.py +9 -0
agrobr/http/browser.py +214 -0
agrobr/http/rate_limiter.py +69 -0
agrobr/http/retry.py +93 -0
agrobr/http/user_agents.py +67 -0
agrobr/ibge/__init__.py +19 -0
agrobr/ibge/api.py +273 -0
agrobr/ibge/client.py +256 -0
agrobr/models.py +85 -0
agrobr/normalize/__init__.py +64 -0
agrobr/normalize/dates.py +303 -0
agrobr/normalize/encoding.py +102 -0
agrobr/normalize/regions.py +308 -0
agrobr/normalize/units.py +278 -0
agrobr/noticias_agricolas/__init__.py +6 -0
agrobr/noticias_agricolas/client.py +222 -0
agrobr/noticias_agricolas/parser.py +187 -0
agrobr/sync.py +147 -0
agrobr/telemetry/__init__.py +17 -0
agrobr/telemetry/collector.py +153 -0
agrobr/utils/__init__.py +5 -0
agrobr/utils/logging.py +59 -0
agrobr/validators/__init__.py +35 -0
agrobr/validators/sanity.py +286 -0
agrobr/validators/structural.py +313 -0
agrobr-0.1.0.dist-info/METADATA +243 -0
agrobr-0.1.0.dist-info/RECORD +58 -0
agrobr-0.1.0.dist-info/WHEEL +4 -0
agrobr-0.1.0.dist-info/entry_points.txt +2 -0
agrobr-0.1.0.dist-info/licenses/LICENSE +21 -0

agrobr/health/__init__.py ADDED Viewed

@@ -0,0 +1,23 @@
+"""Health checks automatizados."""
+from __future__ import annotations
+from .checker import (
+    CheckResult,
+    CheckStatus,
+    check_source,
+    run_all_checks,
+)
+from .reporter import (
+    HealthReport,
+    generate_report,
+)
+__all__: list[str] = [
+    "CheckResult",
+    "CheckStatus",
+    "check_source",
+    "run_all_checks",
+    "HealthReport",
+    "generate_report",
+]

agrobr/health/checker.py ADDED Viewed

@@ -0,0 +1,202 @@
+"""Health checks automatizados para fontes de dados."""
+from __future__ import annotations
+import asyncio
+import time
+from dataclasses import dataclass
+from datetime import datetime
+from enum import StrEnum
+from typing import Any
+import structlog
+from agrobr.constants import Fonte
+logger = structlog.get_logger()
+class CheckStatus(StrEnum):
+    OK = "ok"
+    WARNING = "warning"
+    FAILED = "failed"
+@dataclass
+class CheckResult:
+    """Resultado de um health check."""
+    source: Fonte
+    status: CheckStatus
+    latency_ms: float
+    message: str
+    details: dict[str, Any]
+    timestamp: datetime
+async def check_cepea() -> CheckResult:
+    """Executa health check para CEPEA."""
+    from agrobr.cepea import client as cepea_client
+    from agrobr.cepea.parsers import fingerprint as fp
+    from agrobr.cepea.parsers.detector import get_parser_with_fallback
+    start = time.monotonic()
+    details: dict[str, Any] = {}
+    try:
+        html = await cepea_client.fetch_indicador_page("soja")
+        latency = (time.monotonic() - start) * 1000
+        details["fetch_ok"] = True
+        details["latency_ms"] = latency
+        if latency > 5000:
+            return CheckResult(
+                source=Fonte.CEPEA,
+                status=CheckStatus.WARNING,
+                latency_ms=latency,
+                message=f"High latency: {latency:.0f}ms",
+                details=details,
+                timestamp=datetime.utcnow(),
+            )
+        current_fp = fp.extract_fingerprint(html, Fonte.CEPEA, "health_check")
+        baseline_fp = fp.load_baseline_fingerprint(".structures/cepea_baseline.json")
+        if baseline_fp:
+            similarity, diff = fp.compare_fingerprints(current_fp, baseline_fp)
+            details["fingerprint_similarity"] = similarity
+            details["fingerprint_diff"] = diff
+            if similarity < 0.70:
+                return CheckResult(
+                    source=Fonte.CEPEA,
+                    status=CheckStatus.FAILED,
+                    latency_ms=latency,
+                    message=f"Layout changed significantly: {similarity:.1%} similarity",
+                    details=details,
+                    timestamp=datetime.utcnow(),
+                )
+            elif similarity < 0.85:
+                details["warning"] = "Fingerprint drift detected"
+        parser, results = await get_parser_with_fallback(html, "soja")
+        details["parser_version"] = parser.version
+        details["records_parsed"] = len(results)
+        if not results:
+            return CheckResult(
+                source=Fonte.CEPEA,
+                status=CheckStatus.FAILED,
+                latency_ms=latency,
+                message="Parser returned no results",
+                details=details,
+                timestamp=datetime.utcnow(),
+            )
+        status = CheckStatus.WARNING if details.get("warning") else CheckStatus.OK
+        return CheckResult(
+            source=Fonte.CEPEA,
+            status=status,
+            latency_ms=latency,
+            message="All checks passed" if status == CheckStatus.OK else details["warning"],
+            details=details,
+            timestamp=datetime.utcnow(),
+        )
+    except Exception as e:
+        latency = (time.monotonic() - start) * 1000
+        logger.error("health_check_failed", source="cepea", error=str(e))
+        return CheckResult(
+            source=Fonte.CEPEA,
+            status=CheckStatus.FAILED,
+            latency_ms=latency,
+            message=str(e),
+            details=details,
+            timestamp=datetime.utcnow(),
+        )
+async def check_conab() -> CheckResult:
+    """Executa health check para CONAB."""
+    start = time.monotonic()
+    return CheckResult(
+        source=Fonte.CONAB,
+        status=CheckStatus.WARNING,
+        latency_ms=(time.monotonic() - start) * 1000,
+        message="CONAB health check not implemented yet",
+        details={},
+        timestamp=datetime.utcnow(),
+    )
+async def check_ibge() -> CheckResult:
+    """Executa health check para IBGE."""
+    start = time.monotonic()
+    return CheckResult(
+        source=Fonte.IBGE,
+        status=CheckStatus.WARNING,
+        latency_ms=(time.monotonic() - start) * 1000,
+        message="IBGE health check not implemented yet",
+        details={},
+        timestamp=datetime.utcnow(),
+    )
+async def check_source(source: Fonte) -> CheckResult:
+    """
+    Executa health check para uma fonte específica.
+    Args:
+        source: Fonte a verificar
+    Returns:
+        CheckResult com status do check
+    """
+    checkers = {
+        Fonte.CEPEA: check_cepea,
+        Fonte.CONAB: check_conab,
+        Fonte.IBGE: check_ibge,
+    }
+    checker = checkers.get(source)
+    if not checker:
+        return CheckResult(
+            source=source,
+            status=CheckStatus.FAILED,
+            latency_ms=0,
+            message=f"Unknown source: {source}",
+            details={},
+            timestamp=datetime.utcnow(),
+        )
+    return await checker()
+async def run_all_checks() -> list[CheckResult]:
+    """Executa health checks para todas as fontes."""
+    sources = [Fonte.CEPEA, Fonte.CONAB, Fonte.IBGE]
+    results = await asyncio.gather(*[check_source(s) for s in sources])
+    return list(results)
+def format_results(results: list[CheckResult]) -> str:
+    """Formata resultados para exibição."""
+    lines = ["Health Check Results", "=" * 40]
+    for result in results:
+        status_emoji = {
+            CheckStatus.OK: "✓",
+            CheckStatus.WARNING: "⚠",
+            CheckStatus.FAILED: "✗",
+        }[result.status]
+        lines.append(
+            f"{status_emoji} {result.source.value.upper()}: "
+            f"{result.status.value} ({result.latency_ms:.0f}ms)"
+        )
+        lines.append(f"  {result.message}")
+    return "\n".join(lines)

agrobr/health/reporter.py ADDED Viewed

@@ -0,0 +1,314 @@
+"""
+Geração de relatórios de health check.
+"""
+from __future__ import annotations
+import json
+from datetime import datetime
+from pathlib import Path
+from typing import Any
+import structlog
+from ..constants import Fonte
+from .checker import CheckResult, CheckStatus, run_all_checks
+logger = structlog.get_logger()
+class HealthReport:
+    """Relatório consolidado de health checks."""
+    def __init__(self, results: list[CheckResult]):
+        self.results = results
+        self.timestamp = datetime.utcnow()
+        self._summary: dict[str, Any] | None = None
+    @property
+    def summary(self) -> dict[str, Any]:
+        """Resumo do relatório."""
+        if self._summary is None:
+            self._summary = self._calculate_summary()
+        return self._summary
+    def _calculate_summary(self) -> dict[str, Any]:
+        """Calcula estatísticas do relatório."""
+        total = len(self.results)
+        ok_count = sum(1 for r in self.results if r.status == CheckStatus.OK)
+        warning_count = sum(1 for r in self.results if r.status == CheckStatus.WARNING)
+        failed_count = sum(1 for r in self.results if r.status == CheckStatus.FAILED)
+        avg_latency = sum(r.latency_ms for r in self.results) / total if total > 0 else 0
+        return {
+            "total_checks": total,
+            "ok": ok_count,
+            "warnings": warning_count,
+            "failures": failed_count,
+            "success_rate": ok_count / total if total > 0 else 0,
+            "avg_latency_ms": avg_latency,
+            "all_passed": failed_count == 0,
+            "has_warnings": warning_count > 0,
+        }
+    @property
+    def all_passed(self) -> bool:
+        """Retorna True se todos os checks passaram."""
+        return bool(self.summary["all_passed"])
+    @property
+    def failures(self) -> list[CheckResult]:
+        """Retorna lista de checks que falharam."""
+        return [r for r in self.results if r.status == CheckStatus.FAILED]
+    @property
+    def warnings(self) -> list[CheckResult]:
+        """Retorna lista de checks com warning."""
+        return [r for r in self.results if r.status == CheckStatus.WARNING]
+    def to_dict(self) -> dict[str, Any]:
+        """Converte relatório para dicionário."""
+        return {
+            "timestamp": self.timestamp.isoformat() + "Z",
+            "summary": self.summary,
+            "checks": [
+                {
+                    "source": r.source.value,
+                    "status": r.status.value,
+                    "latency_ms": r.latency_ms,
+                    "message": r.message,
+                    "details": r.details,
+                    "timestamp": r.timestamp.isoformat() + "Z",
+                }
+                for r in self.results
+            ],
+        }
+    def to_json(self, indent: int = 2) -> str:
+        """Converte relatório para JSON."""
+        return json.dumps(self.to_dict(), indent=indent, default=str)
+    def save(self, path: str | Path, format: str = "json") -> None:
+        """
+        Salva relatório em arquivo.
+        Args:
+            path: Caminho do arquivo
+            format: Formato ('json', 'html', 'md')
+        """
+        path = Path(path)
+        path.parent.mkdir(parents=True, exist_ok=True)
+        if format == "json":
+            path.write_text(self.to_json())
+        elif format == "html":
+            path.write_text(self.to_html())
+        elif format == "md":
+            path.write_text(self.to_markdown())
+        else:
+            raise ValueError(f"Formato não suportado: {format}")
+        logger.info("health_report_saved", path=str(path), format=format)
+    def to_markdown(self) -> str:
+        """Converte relatório para Markdown."""
+        lines = [
+            "# Health Check Report",
+            "",
+            f"**Timestamp:** {self.timestamp.isoformat()}Z",
+            "",
+            "## Summary",
+            "",
+            f"- Total checks: {self.summary['total_checks']}",
+            f"- OK: {self.summary['ok']}",
+            f"- Warnings: {self.summary['warnings']}",
+            f"- Failures: {self.summary['failures']}",
+            f"- Success rate: {self.summary['success_rate']:.1%}",
+            f"- Average latency: {self.summary['avg_latency_ms']:.0f}ms",
+            "",
+            "## Results",
+            "",
+            "| Source | Status | Latency | Message |",
+            "|--------|--------|---------|---------|",
+        ]
+        for r in self.results:
+            status_emoji = {
+                CheckStatus.OK: ":white_check_mark:",
+                CheckStatus.WARNING: ":warning:",
+                CheckStatus.FAILED: ":x:",
+            }.get(r.status, "")
+            lines.append(
+                f"| {r.source.value} | {status_emoji} {r.status.value} | "
+                f"{r.latency_ms:.0f}ms | {r.message} |"
+            )
+        if self.failures:
+            lines.extend(
+                [
+                    "",
+                    "## Failures",
+                    "",
+                ]
+            )
+            for r in self.failures:
+                lines.extend(
+                    [
+                        f"### {r.source.value}",
+                        "",
+                        f"**Error:** {r.message}",
+                        "",
+                        "```json",
+                        json.dumps(r.details, indent=2, default=str),
+                        "```",
+                        "",
+                    ]
+                )
+        return "\n".join(lines)
+    def to_html(self) -> str:
+        """Converte relatório para HTML."""
+        status_colors = {
+            CheckStatus.OK: "#28a745",
+            CheckStatus.WARNING: "#ffc107",
+            CheckStatus.FAILED: "#dc3545",
+        }
+        rows = []
+        for r in self.results:
+            color = status_colors.get(r.status, "#6c757d")
+            rows.append(
+                f"""
+                <tr>
+                    <td>{r.source.value}</td>
+                    <td style="color: {color}; font-weight: bold;">{r.status.value}</td>
+                    <td>{r.latency_ms:.0f}ms</td>
+                    <td>{r.message}</td>
+                </tr>
+            """
+            )
+        return f"""
+<!DOCTYPE html>
+<html>
+<head>
+    <title>Health Check Report</title>
+    <style>
+        body {{ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; margin: 40px; }}
+        h1 {{ color: #333; }}
+        table {{ border-collapse: collapse; width: 100%; margin-top: 20px; }}
+        th, td {{ border: 1px solid #ddd; padding: 12px; text-align: left; }}
+        th {{ background-color: #f8f9fa; }}
+        .summary {{ background-color: #f8f9fa; padding: 20px; border-radius: 8px; margin-bottom: 20px; }}
+        .summary-item {{ display: inline-block; margin-right: 30px; }}
+        .summary-value {{ font-size: 24px; font-weight: bold; }}
+        .summary-label {{ color: #666; }}
+    </style>
+</head>
+<body>
+    <h1>Health Check Report</h1>
+    <p><strong>Timestamp:</strong> {self.timestamp.isoformat()}Z</p>
+    <div class="summary">
+        <div class="summary-item">
+            <div class="summary-value">{self.summary["total_checks"]}</div>
+            <div class="summary-label">Total</div>
+        </div>
+        <div class="summary-item">
+            <div class="summary-value" style="color: #28a745;">{self.summary["ok"]}</div>
+            <div class="summary-label">OK</div>
+        </div>
+        <div class="summary-item">
+            <div class="summary-value" style="color: #ffc107;">{self.summary["warnings"]}</div>
+            <div class="summary-label">Warnings</div>
+        </div>
+        <div class="summary-item">
+            <div class="summary-value" style="color: #dc3545;">{self.summary["failures"]}</div>
+            <div class="summary-label">Failures</div>
+        </div>
+        <div class="summary-item">
+            <div class="summary-value">{self.summary["avg_latency_ms"]:.0f}ms</div>
+            <div class="summary-label">Avg Latency</div>
+        </div>
+    </div>
+    <h2>Results</h2>
+    <table>
+        <thead>
+            <tr>
+                <th>Source</th>
+                <th>Status</th>
+                <th>Latency</th>
+                <th>Message</th>
+            </tr>
+        </thead>
+        <tbody>
+            {"".join(rows)}
+        </tbody>
+    </table>
+</body>
+</html>
+"""
+    def print_summary(self) -> None:
+        """Imprime resumo no console."""
+        print("\n" + "=" * 60)
+        print("HEALTH CHECK REPORT")
+        print("=" * 60)
+        print(f"Timestamp: {self.timestamp.isoformat()}Z")
+        print()
+        for r in self.results:
+            status_symbol = {
+                CheckStatus.OK: "[OK]",
+                CheckStatus.WARNING: "[WARN]",
+                CheckStatus.FAILED: "[FAIL]",
+            }.get(r.status, "[?]")
+            print(f"  {status_symbol} {r.source.value}: {r.message} ({r.latency_ms:.0f}ms)")
+        print()
+        print("-" * 60)
+        print(
+            f"Total: {self.summary['total_checks']} | "
+            f"OK: {self.summary['ok']} | "
+            f"Warnings: {self.summary['warnings']} | "
+            f"Failures: {self.summary['failures']}"
+        )
+        print(
+            f"Success Rate: {self.summary['success_rate']:.1%} | "
+            f"Avg Latency: {self.summary['avg_latency_ms']:.0f}ms"
+        )
+        print("=" * 60 + "\n")
+async def generate_report(
+    sources: list[Fonte] | None = None,
+    save_path: str | Path | None = None,
+    format: str = "json",
+) -> HealthReport:
+    """
+    Gera relatório de health check.
+    Args:
+        sources: Fontes a verificar (todas se None)
+        save_path: Caminho para salvar (opcional)
+        format: Formato do arquivo
+    Returns:
+        HealthReport
+    """
+    results = await run_all_checks()
+    if sources:
+        results = [r for r in results if r.source in sources]
+    report = HealthReport(results)
+    if save_path:
+        report.save(save_path, format)
+    return report

agrobr/http/__init__.py ADDED Viewed

@@ -0,0 +1,9 @@
+"""HTTP utilities - retry, rate limiting, user-agents."""
+from __future__ import annotations
+from agrobr.http.rate_limiter import RateLimiter
+from agrobr.http.retry import retry_async, with_retry
+from agrobr.http.user_agents import UserAgentRotator
+__all__ = ["retry_async", "with_retry", "RateLimiter", "UserAgentRotator"]