agrobr 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. agrobr/__init__.py +10 -0
  2. agrobr/alerts/__init__.py +7 -0
  3. agrobr/alerts/notifier.py +167 -0
  4. agrobr/cache/__init__.py +31 -0
  5. agrobr/cache/duckdb_store.py +433 -0
  6. agrobr/cache/history.py +317 -0
  7. agrobr/cache/migrations.py +82 -0
  8. agrobr/cache/policies.py +240 -0
  9. agrobr/cepea/__init__.py +7 -0
  10. agrobr/cepea/api.py +360 -0
  11. agrobr/cepea/client.py +273 -0
  12. agrobr/cepea/parsers/__init__.py +37 -0
  13. agrobr/cepea/parsers/base.py +35 -0
  14. agrobr/cepea/parsers/consensus.py +300 -0
  15. agrobr/cepea/parsers/detector.py +108 -0
  16. agrobr/cepea/parsers/fingerprint.py +226 -0
  17. agrobr/cepea/parsers/v1.py +305 -0
  18. agrobr/cli.py +323 -0
  19. agrobr/conab/__init__.py +21 -0
  20. agrobr/conab/api.py +239 -0
  21. agrobr/conab/client.py +219 -0
  22. agrobr/conab/parsers/__init__.py +7 -0
  23. agrobr/conab/parsers/v1.py +383 -0
  24. agrobr/constants.py +205 -0
  25. agrobr/exceptions.py +104 -0
  26. agrobr/health/__init__.py +23 -0
  27. agrobr/health/checker.py +202 -0
  28. agrobr/health/reporter.py +314 -0
  29. agrobr/http/__init__.py +9 -0
  30. agrobr/http/browser.py +214 -0
  31. agrobr/http/rate_limiter.py +69 -0
  32. agrobr/http/retry.py +93 -0
  33. agrobr/http/user_agents.py +67 -0
  34. agrobr/ibge/__init__.py +19 -0
  35. agrobr/ibge/api.py +273 -0
  36. agrobr/ibge/client.py +256 -0
  37. agrobr/models.py +85 -0
  38. agrobr/normalize/__init__.py +64 -0
  39. agrobr/normalize/dates.py +303 -0
  40. agrobr/normalize/encoding.py +102 -0
  41. agrobr/normalize/regions.py +308 -0
  42. agrobr/normalize/units.py +278 -0
  43. agrobr/noticias_agricolas/__init__.py +6 -0
  44. agrobr/noticias_agricolas/client.py +222 -0
  45. agrobr/noticias_agricolas/parser.py +187 -0
  46. agrobr/sync.py +147 -0
  47. agrobr/telemetry/__init__.py +17 -0
  48. agrobr/telemetry/collector.py +153 -0
  49. agrobr/utils/__init__.py +5 -0
  50. agrobr/utils/logging.py +59 -0
  51. agrobr/validators/__init__.py +35 -0
  52. agrobr/validators/sanity.py +286 -0
  53. agrobr/validators/structural.py +313 -0
  54. agrobr-0.1.0.dist-info/METADATA +243 -0
  55. agrobr-0.1.0.dist-info/RECORD +58 -0
  56. agrobr-0.1.0.dist-info/WHEEL +4 -0
  57. agrobr-0.1.0.dist-info/entry_points.txt +2 -0
  58. agrobr-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,23 @@
1
+ """Health checks automatizados."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from .checker import (
6
+ CheckResult,
7
+ CheckStatus,
8
+ check_source,
9
+ run_all_checks,
10
+ )
11
+ from .reporter import (
12
+ HealthReport,
13
+ generate_report,
14
+ )
15
+
16
+ __all__: list[str] = [
17
+ "CheckResult",
18
+ "CheckStatus",
19
+ "check_source",
20
+ "run_all_checks",
21
+ "HealthReport",
22
+ "generate_report",
23
+ ]
@@ -0,0 +1,202 @@
1
+ """Health checks automatizados para fontes de dados."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import time
7
+ from dataclasses import dataclass
8
+ from datetime import datetime
9
+ from enum import StrEnum
10
+ from typing import Any
11
+
12
+ import structlog
13
+
14
+ from agrobr.constants import Fonte
15
+
16
+ logger = structlog.get_logger()
17
+
18
+
19
+ class CheckStatus(StrEnum):
20
+ OK = "ok"
21
+ WARNING = "warning"
22
+ FAILED = "failed"
23
+
24
+
25
+ @dataclass
26
+ class CheckResult:
27
+ """Resultado de um health check."""
28
+
29
+ source: Fonte
30
+ status: CheckStatus
31
+ latency_ms: float
32
+ message: str
33
+ details: dict[str, Any]
34
+ timestamp: datetime
35
+
36
+
37
+ async def check_cepea() -> CheckResult:
38
+ """Executa health check para CEPEA."""
39
+ from agrobr.cepea import client as cepea_client
40
+ from agrobr.cepea.parsers import fingerprint as fp
41
+ from agrobr.cepea.parsers.detector import get_parser_with_fallback
42
+
43
+ start = time.monotonic()
44
+ details: dict[str, Any] = {}
45
+
46
+ try:
47
+ html = await cepea_client.fetch_indicador_page("soja")
48
+ latency = (time.monotonic() - start) * 1000
49
+
50
+ details["fetch_ok"] = True
51
+ details["latency_ms"] = latency
52
+
53
+ if latency > 5000:
54
+ return CheckResult(
55
+ source=Fonte.CEPEA,
56
+ status=CheckStatus.WARNING,
57
+ latency_ms=latency,
58
+ message=f"High latency: {latency:.0f}ms",
59
+ details=details,
60
+ timestamp=datetime.utcnow(),
61
+ )
62
+
63
+ current_fp = fp.extract_fingerprint(html, Fonte.CEPEA, "health_check")
64
+ baseline_fp = fp.load_baseline_fingerprint(".structures/cepea_baseline.json")
65
+
66
+ if baseline_fp:
67
+ similarity, diff = fp.compare_fingerprints(current_fp, baseline_fp)
68
+ details["fingerprint_similarity"] = similarity
69
+ details["fingerprint_diff"] = diff
70
+
71
+ if similarity < 0.70:
72
+ return CheckResult(
73
+ source=Fonte.CEPEA,
74
+ status=CheckStatus.FAILED,
75
+ latency_ms=latency,
76
+ message=f"Layout changed significantly: {similarity:.1%} similarity",
77
+ details=details,
78
+ timestamp=datetime.utcnow(),
79
+ )
80
+ elif similarity < 0.85:
81
+ details["warning"] = "Fingerprint drift detected"
82
+
83
+ parser, results = await get_parser_with_fallback(html, "soja")
84
+ details["parser_version"] = parser.version
85
+ details["records_parsed"] = len(results)
86
+
87
+ if not results:
88
+ return CheckResult(
89
+ source=Fonte.CEPEA,
90
+ status=CheckStatus.FAILED,
91
+ latency_ms=latency,
92
+ message="Parser returned no results",
93
+ details=details,
94
+ timestamp=datetime.utcnow(),
95
+ )
96
+
97
+ status = CheckStatus.WARNING if details.get("warning") else CheckStatus.OK
98
+ return CheckResult(
99
+ source=Fonte.CEPEA,
100
+ status=status,
101
+ latency_ms=latency,
102
+ message="All checks passed" if status == CheckStatus.OK else details["warning"],
103
+ details=details,
104
+ timestamp=datetime.utcnow(),
105
+ )
106
+
107
+ except Exception as e:
108
+ latency = (time.monotonic() - start) * 1000
109
+ logger.error("health_check_failed", source="cepea", error=str(e))
110
+ return CheckResult(
111
+ source=Fonte.CEPEA,
112
+ status=CheckStatus.FAILED,
113
+ latency_ms=latency,
114
+ message=str(e),
115
+ details=details,
116
+ timestamp=datetime.utcnow(),
117
+ )
118
+
119
+
120
+ async def check_conab() -> CheckResult:
121
+ """Executa health check para CONAB."""
122
+ start = time.monotonic()
123
+
124
+ return CheckResult(
125
+ source=Fonte.CONAB,
126
+ status=CheckStatus.WARNING,
127
+ latency_ms=(time.monotonic() - start) * 1000,
128
+ message="CONAB health check not implemented yet",
129
+ details={},
130
+ timestamp=datetime.utcnow(),
131
+ )
132
+
133
+
134
+ async def check_ibge() -> CheckResult:
135
+ """Executa health check para IBGE."""
136
+ start = time.monotonic()
137
+
138
+ return CheckResult(
139
+ source=Fonte.IBGE,
140
+ status=CheckStatus.WARNING,
141
+ latency_ms=(time.monotonic() - start) * 1000,
142
+ message="IBGE health check not implemented yet",
143
+ details={},
144
+ timestamp=datetime.utcnow(),
145
+ )
146
+
147
+
148
+ async def check_source(source: Fonte) -> CheckResult:
149
+ """
150
+ Executa health check para uma fonte específica.
151
+
152
+ Args:
153
+ source: Fonte a verificar
154
+
155
+ Returns:
156
+ CheckResult com status do check
157
+ """
158
+ checkers = {
159
+ Fonte.CEPEA: check_cepea,
160
+ Fonte.CONAB: check_conab,
161
+ Fonte.IBGE: check_ibge,
162
+ }
163
+
164
+ checker = checkers.get(source)
165
+ if not checker:
166
+ return CheckResult(
167
+ source=source,
168
+ status=CheckStatus.FAILED,
169
+ latency_ms=0,
170
+ message=f"Unknown source: {source}",
171
+ details={},
172
+ timestamp=datetime.utcnow(),
173
+ )
174
+
175
+ return await checker()
176
+
177
+
178
+ async def run_all_checks() -> list[CheckResult]:
179
+ """Executa health checks para todas as fontes."""
180
+ sources = [Fonte.CEPEA, Fonte.CONAB, Fonte.IBGE]
181
+ results = await asyncio.gather(*[check_source(s) for s in sources])
182
+ return list(results)
183
+
184
+
185
+ def format_results(results: list[CheckResult]) -> str:
186
+ """Formata resultados para exibição."""
187
+ lines = ["Health Check Results", "=" * 40]
188
+
189
+ for result in results:
190
+ status_emoji = {
191
+ CheckStatus.OK: "✓",
192
+ CheckStatus.WARNING: "⚠",
193
+ CheckStatus.FAILED: "✗",
194
+ }[result.status]
195
+
196
+ lines.append(
197
+ f"{status_emoji} {result.source.value.upper()}: "
198
+ f"{result.status.value} ({result.latency_ms:.0f}ms)"
199
+ )
200
+ lines.append(f" {result.message}")
201
+
202
+ return "\n".join(lines)
@@ -0,0 +1,314 @@
1
+ """
2
+ Geração de relatórios de health check.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ import json
8
+ from datetime import datetime
9
+ from pathlib import Path
10
+ from typing import Any
11
+
12
+ import structlog
13
+
14
+ from ..constants import Fonte
15
+ from .checker import CheckResult, CheckStatus, run_all_checks
16
+
17
+ logger = structlog.get_logger()
18
+
19
+
20
+ class HealthReport:
21
+ """Relatório consolidado de health checks."""
22
+
23
+ def __init__(self, results: list[CheckResult]):
24
+ self.results = results
25
+ self.timestamp = datetime.utcnow()
26
+ self._summary: dict[str, Any] | None = None
27
+
28
+ @property
29
+ def summary(self) -> dict[str, Any]:
30
+ """Resumo do relatório."""
31
+ if self._summary is None:
32
+ self._summary = self._calculate_summary()
33
+ return self._summary
34
+
35
+ def _calculate_summary(self) -> dict[str, Any]:
36
+ """Calcula estatísticas do relatório."""
37
+ total = len(self.results)
38
+ ok_count = sum(1 for r in self.results if r.status == CheckStatus.OK)
39
+ warning_count = sum(1 for r in self.results if r.status == CheckStatus.WARNING)
40
+ failed_count = sum(1 for r in self.results if r.status == CheckStatus.FAILED)
41
+
42
+ avg_latency = sum(r.latency_ms for r in self.results) / total if total > 0 else 0
43
+
44
+ return {
45
+ "total_checks": total,
46
+ "ok": ok_count,
47
+ "warnings": warning_count,
48
+ "failures": failed_count,
49
+ "success_rate": ok_count / total if total > 0 else 0,
50
+ "avg_latency_ms": avg_latency,
51
+ "all_passed": failed_count == 0,
52
+ "has_warnings": warning_count > 0,
53
+ }
54
+
55
+ @property
56
+ def all_passed(self) -> bool:
57
+ """Retorna True se todos os checks passaram."""
58
+ return bool(self.summary["all_passed"])
59
+
60
+ @property
61
+ def failures(self) -> list[CheckResult]:
62
+ """Retorna lista de checks que falharam."""
63
+ return [r for r in self.results if r.status == CheckStatus.FAILED]
64
+
65
+ @property
66
+ def warnings(self) -> list[CheckResult]:
67
+ """Retorna lista de checks com warning."""
68
+ return [r for r in self.results if r.status == CheckStatus.WARNING]
69
+
70
+ def to_dict(self) -> dict[str, Any]:
71
+ """Converte relatório para dicionário."""
72
+ return {
73
+ "timestamp": self.timestamp.isoformat() + "Z",
74
+ "summary": self.summary,
75
+ "checks": [
76
+ {
77
+ "source": r.source.value,
78
+ "status": r.status.value,
79
+ "latency_ms": r.latency_ms,
80
+ "message": r.message,
81
+ "details": r.details,
82
+ "timestamp": r.timestamp.isoformat() + "Z",
83
+ }
84
+ for r in self.results
85
+ ],
86
+ }
87
+
88
+ def to_json(self, indent: int = 2) -> str:
89
+ """Converte relatório para JSON."""
90
+ return json.dumps(self.to_dict(), indent=indent, default=str)
91
+
92
+ def save(self, path: str | Path, format: str = "json") -> None:
93
+ """
94
+ Salva relatório em arquivo.
95
+
96
+ Args:
97
+ path: Caminho do arquivo
98
+ format: Formato ('json', 'html', 'md')
99
+ """
100
+ path = Path(path)
101
+ path.parent.mkdir(parents=True, exist_ok=True)
102
+
103
+ if format == "json":
104
+ path.write_text(self.to_json())
105
+ elif format == "html":
106
+ path.write_text(self.to_html())
107
+ elif format == "md":
108
+ path.write_text(self.to_markdown())
109
+ else:
110
+ raise ValueError(f"Formato não suportado: {format}")
111
+
112
+ logger.info("health_report_saved", path=str(path), format=format)
113
+
114
+ def to_markdown(self) -> str:
115
+ """Converte relatório para Markdown."""
116
+ lines = [
117
+ "# Health Check Report",
118
+ "",
119
+ f"**Timestamp:** {self.timestamp.isoformat()}Z",
120
+ "",
121
+ "## Summary",
122
+ "",
123
+ f"- Total checks: {self.summary['total_checks']}",
124
+ f"- OK: {self.summary['ok']}",
125
+ f"- Warnings: {self.summary['warnings']}",
126
+ f"- Failures: {self.summary['failures']}",
127
+ f"- Success rate: {self.summary['success_rate']:.1%}",
128
+ f"- Average latency: {self.summary['avg_latency_ms']:.0f}ms",
129
+ "",
130
+ "## Results",
131
+ "",
132
+ "| Source | Status | Latency | Message |",
133
+ "|--------|--------|---------|---------|",
134
+ ]
135
+
136
+ for r in self.results:
137
+ status_emoji = {
138
+ CheckStatus.OK: ":white_check_mark:",
139
+ CheckStatus.WARNING: ":warning:",
140
+ CheckStatus.FAILED: ":x:",
141
+ }.get(r.status, "")
142
+
143
+ lines.append(
144
+ f"| {r.source.value} | {status_emoji} {r.status.value} | "
145
+ f"{r.latency_ms:.0f}ms | {r.message} |"
146
+ )
147
+
148
+ if self.failures:
149
+ lines.extend(
150
+ [
151
+ "",
152
+ "## Failures",
153
+ "",
154
+ ]
155
+ )
156
+ for r in self.failures:
157
+ lines.extend(
158
+ [
159
+ f"### {r.source.value}",
160
+ "",
161
+ f"**Error:** {r.message}",
162
+ "",
163
+ "```json",
164
+ json.dumps(r.details, indent=2, default=str),
165
+ "```",
166
+ "",
167
+ ]
168
+ )
169
+
170
+ return "\n".join(lines)
171
+
172
+ def to_html(self) -> str:
173
+ """Converte relatório para HTML."""
174
+ status_colors = {
175
+ CheckStatus.OK: "#28a745",
176
+ CheckStatus.WARNING: "#ffc107",
177
+ CheckStatus.FAILED: "#dc3545",
178
+ }
179
+
180
+ rows = []
181
+ for r in self.results:
182
+ color = status_colors.get(r.status, "#6c757d")
183
+ rows.append(
184
+ f"""
185
+ <tr>
186
+ <td>{r.source.value}</td>
187
+ <td style="color: {color}; font-weight: bold;">{r.status.value}</td>
188
+ <td>{r.latency_ms:.0f}ms</td>
189
+ <td>{r.message}</td>
190
+ </tr>
191
+ """
192
+ )
193
+
194
+ return f"""
195
+ <!DOCTYPE html>
196
+ <html>
197
+ <head>
198
+ <title>Health Check Report</title>
199
+ <style>
200
+ body {{ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; margin: 40px; }}
201
+ h1 {{ color: #333; }}
202
+ table {{ border-collapse: collapse; width: 100%; margin-top: 20px; }}
203
+ th, td {{ border: 1px solid #ddd; padding: 12px; text-align: left; }}
204
+ th {{ background-color: #f8f9fa; }}
205
+ .summary {{ background-color: #f8f9fa; padding: 20px; border-radius: 8px; margin-bottom: 20px; }}
206
+ .summary-item {{ display: inline-block; margin-right: 30px; }}
207
+ .summary-value {{ font-size: 24px; font-weight: bold; }}
208
+ .summary-label {{ color: #666; }}
209
+ </style>
210
+ </head>
211
+ <body>
212
+ <h1>Health Check Report</h1>
213
+ <p><strong>Timestamp:</strong> {self.timestamp.isoformat()}Z</p>
214
+
215
+ <div class="summary">
216
+ <div class="summary-item">
217
+ <div class="summary-value">{self.summary["total_checks"]}</div>
218
+ <div class="summary-label">Total</div>
219
+ </div>
220
+ <div class="summary-item">
221
+ <div class="summary-value" style="color: #28a745;">{self.summary["ok"]}</div>
222
+ <div class="summary-label">OK</div>
223
+ </div>
224
+ <div class="summary-item">
225
+ <div class="summary-value" style="color: #ffc107;">{self.summary["warnings"]}</div>
226
+ <div class="summary-label">Warnings</div>
227
+ </div>
228
+ <div class="summary-item">
229
+ <div class="summary-value" style="color: #dc3545;">{self.summary["failures"]}</div>
230
+ <div class="summary-label">Failures</div>
231
+ </div>
232
+ <div class="summary-item">
233
+ <div class="summary-value">{self.summary["avg_latency_ms"]:.0f}ms</div>
234
+ <div class="summary-label">Avg Latency</div>
235
+ </div>
236
+ </div>
237
+
238
+ <h2>Results</h2>
239
+ <table>
240
+ <thead>
241
+ <tr>
242
+ <th>Source</th>
243
+ <th>Status</th>
244
+ <th>Latency</th>
245
+ <th>Message</th>
246
+ </tr>
247
+ </thead>
248
+ <tbody>
249
+ {"".join(rows)}
250
+ </tbody>
251
+ </table>
252
+ </body>
253
+ </html>
254
+ """
255
+
256
+ def print_summary(self) -> None:
257
+ """Imprime resumo no console."""
258
+ print("\n" + "=" * 60)
259
+ print("HEALTH CHECK REPORT")
260
+ print("=" * 60)
261
+ print(f"Timestamp: {self.timestamp.isoformat()}Z")
262
+ print()
263
+
264
+ for r in self.results:
265
+ status_symbol = {
266
+ CheckStatus.OK: "[OK]",
267
+ CheckStatus.WARNING: "[WARN]",
268
+ CheckStatus.FAILED: "[FAIL]",
269
+ }.get(r.status, "[?]")
270
+
271
+ print(f" {status_symbol} {r.source.value}: {r.message} ({r.latency_ms:.0f}ms)")
272
+
273
+ print()
274
+ print("-" * 60)
275
+ print(
276
+ f"Total: {self.summary['total_checks']} | "
277
+ f"OK: {self.summary['ok']} | "
278
+ f"Warnings: {self.summary['warnings']} | "
279
+ f"Failures: {self.summary['failures']}"
280
+ )
281
+ print(
282
+ f"Success Rate: {self.summary['success_rate']:.1%} | "
283
+ f"Avg Latency: {self.summary['avg_latency_ms']:.0f}ms"
284
+ )
285
+ print("=" * 60 + "\n")
286
+
287
+
288
+ async def generate_report(
289
+ sources: list[Fonte] | None = None,
290
+ save_path: str | Path | None = None,
291
+ format: str = "json",
292
+ ) -> HealthReport:
293
+ """
294
+ Gera relatório de health check.
295
+
296
+ Args:
297
+ sources: Fontes a verificar (todas se None)
298
+ save_path: Caminho para salvar (opcional)
299
+ format: Formato do arquivo
300
+
301
+ Returns:
302
+ HealthReport
303
+ """
304
+ results = await run_all_checks()
305
+
306
+ if sources:
307
+ results = [r for r in results if r.source in sources]
308
+
309
+ report = HealthReport(results)
310
+
311
+ if save_path:
312
+ report.save(save_path, format)
313
+
314
+ return report
@@ -0,0 +1,9 @@
1
+ """HTTP utilities - retry, rate limiting, user-agents."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from agrobr.http.rate_limiter import RateLimiter
6
+ from agrobr.http.retry import retry_async, with_retry
7
+ from agrobr.http.user_agents import UserAgentRotator
8
+
9
+ __all__ = ["retry_async", "with_retry", "RateLimiter", "UserAgentRotator"]