agrobr 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agrobr/__init__.py +10 -0
- agrobr/alerts/__init__.py +7 -0
- agrobr/alerts/notifier.py +167 -0
- agrobr/cache/__init__.py +31 -0
- agrobr/cache/duckdb_store.py +433 -0
- agrobr/cache/history.py +317 -0
- agrobr/cache/migrations.py +82 -0
- agrobr/cache/policies.py +240 -0
- agrobr/cepea/__init__.py +7 -0
- agrobr/cepea/api.py +360 -0
- agrobr/cepea/client.py +273 -0
- agrobr/cepea/parsers/__init__.py +37 -0
- agrobr/cepea/parsers/base.py +35 -0
- agrobr/cepea/parsers/consensus.py +300 -0
- agrobr/cepea/parsers/detector.py +108 -0
- agrobr/cepea/parsers/fingerprint.py +226 -0
- agrobr/cepea/parsers/v1.py +305 -0
- agrobr/cli.py +323 -0
- agrobr/conab/__init__.py +21 -0
- agrobr/conab/api.py +239 -0
- agrobr/conab/client.py +219 -0
- agrobr/conab/parsers/__init__.py +7 -0
- agrobr/conab/parsers/v1.py +383 -0
- agrobr/constants.py +205 -0
- agrobr/exceptions.py +104 -0
- agrobr/health/__init__.py +23 -0
- agrobr/health/checker.py +202 -0
- agrobr/health/reporter.py +314 -0
- agrobr/http/__init__.py +9 -0
- agrobr/http/browser.py +214 -0
- agrobr/http/rate_limiter.py +69 -0
- agrobr/http/retry.py +93 -0
- agrobr/http/user_agents.py +67 -0
- agrobr/ibge/__init__.py +19 -0
- agrobr/ibge/api.py +273 -0
- agrobr/ibge/client.py +256 -0
- agrobr/models.py +85 -0
- agrobr/normalize/__init__.py +64 -0
- agrobr/normalize/dates.py +303 -0
- agrobr/normalize/encoding.py +102 -0
- agrobr/normalize/regions.py +308 -0
- agrobr/normalize/units.py +278 -0
- agrobr/noticias_agricolas/__init__.py +6 -0
- agrobr/noticias_agricolas/client.py +222 -0
- agrobr/noticias_agricolas/parser.py +187 -0
- agrobr/sync.py +147 -0
- agrobr/telemetry/__init__.py +17 -0
- agrobr/telemetry/collector.py +153 -0
- agrobr/utils/__init__.py +5 -0
- agrobr/utils/logging.py +59 -0
- agrobr/validators/__init__.py +35 -0
- agrobr/validators/sanity.py +286 -0
- agrobr/validators/structural.py +313 -0
- agrobr-0.1.0.dist-info/METADATA +243 -0
- agrobr-0.1.0.dist-info/RECORD +58 -0
- agrobr-0.1.0.dist-info/WHEEL +4 -0
- agrobr-0.1.0.dist-info/entry_points.txt +2 -0
- agrobr-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,313 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Validação estrutural de páginas usando fingerprints.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
import structlog
|
|
12
|
+
|
|
13
|
+
from ..constants import Fonte
|
|
14
|
+
from ..models import Fingerprint
|
|
15
|
+
|
|
16
|
+
logger = structlog.get_logger()
|
|
17
|
+
|
|
18
|
+
THRESHOLD_HIGH = 0.85
|
|
19
|
+
THRESHOLD_MEDIUM = 0.70
|
|
20
|
+
THRESHOLD_LOW = 0.50
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@dataclass
|
|
24
|
+
class StructuralValidationResult:
|
|
25
|
+
"""Resultado da validação estrutural."""
|
|
26
|
+
|
|
27
|
+
source: Fonte
|
|
28
|
+
similarity: float
|
|
29
|
+
passed: bool
|
|
30
|
+
level: str
|
|
31
|
+
differences: dict[str, Any]
|
|
32
|
+
current_fingerprint: Fingerprint | None
|
|
33
|
+
baseline_fingerprint: Fingerprint | None
|
|
34
|
+
message: str
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def validate_structure(
|
|
38
|
+
current: Fingerprint,
|
|
39
|
+
baseline: Fingerprint,
|
|
40
|
+
_threshold: float = THRESHOLD_HIGH,
|
|
41
|
+
) -> StructuralValidationResult:
|
|
42
|
+
"""
|
|
43
|
+
Valida estrutura atual contra baseline.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
current: Fingerprint atual
|
|
47
|
+
baseline: Fingerprint de referência
|
|
48
|
+
threshold: Limiar de similaridade (default: 0.85)
|
|
49
|
+
|
|
50
|
+
Returns:
|
|
51
|
+
StructuralValidationResult
|
|
52
|
+
"""
|
|
53
|
+
similarity, differences = compare_fingerprints(current, baseline)
|
|
54
|
+
|
|
55
|
+
if similarity >= THRESHOLD_HIGH:
|
|
56
|
+
level = "high"
|
|
57
|
+
passed = True
|
|
58
|
+
message = "Structure matches baseline"
|
|
59
|
+
elif similarity >= THRESHOLD_MEDIUM:
|
|
60
|
+
level = "medium"
|
|
61
|
+
passed = True
|
|
62
|
+
message = f"Minor structural differences detected ({similarity:.1%} similarity)"
|
|
63
|
+
elif similarity >= THRESHOLD_LOW:
|
|
64
|
+
level = "low"
|
|
65
|
+
passed = False
|
|
66
|
+
message = f"Significant structural changes ({similarity:.1%} similarity)"
|
|
67
|
+
else:
|
|
68
|
+
level = "critical"
|
|
69
|
+
passed = False
|
|
70
|
+
message = f"Major layout change detected ({similarity:.1%} similarity)"
|
|
71
|
+
|
|
72
|
+
logger.info(
|
|
73
|
+
"structural_validation",
|
|
74
|
+
source=current.source.value,
|
|
75
|
+
similarity=similarity,
|
|
76
|
+
level=level,
|
|
77
|
+
passed=passed,
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
return StructuralValidationResult(
|
|
81
|
+
source=current.source,
|
|
82
|
+
similarity=similarity,
|
|
83
|
+
passed=passed,
|
|
84
|
+
level=level,
|
|
85
|
+
differences=differences,
|
|
86
|
+
current_fingerprint=current,
|
|
87
|
+
baseline_fingerprint=baseline,
|
|
88
|
+
message=message,
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def compare_fingerprints(
|
|
93
|
+
current: Fingerprint,
|
|
94
|
+
reference: Fingerprint,
|
|
95
|
+
) -> tuple[float, dict[str, Any]]:
|
|
96
|
+
"""
|
|
97
|
+
Compara duas fingerprints e retorna similaridade.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
current: Fingerprint atual
|
|
101
|
+
reference: Fingerprint de referência
|
|
102
|
+
|
|
103
|
+
Returns:
|
|
104
|
+
Tupla (similaridade 0-1, detalhes das diferenças)
|
|
105
|
+
"""
|
|
106
|
+
scores: dict[str, float] = {}
|
|
107
|
+
details: dict[str, Any] = {}
|
|
108
|
+
|
|
109
|
+
scores["structure"] = 1.0 if current.structure_hash == reference.structure_hash else 0.0
|
|
110
|
+
if scores["structure"] == 0:
|
|
111
|
+
details["structure_changed"] = {
|
|
112
|
+
"current": current.structure_hash,
|
|
113
|
+
"reference": reference.structure_hash,
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
if reference.table_classes:
|
|
117
|
+
matches = sum(1 for tc in current.table_classes if tc in reference.table_classes)
|
|
118
|
+
scores["table_classes"] = matches / len(reference.table_classes)
|
|
119
|
+
if scores["table_classes"] < 1.0:
|
|
120
|
+
details["table_classes_diff"] = {
|
|
121
|
+
"missing": [
|
|
122
|
+
tc for tc in reference.table_classes if tc not in current.table_classes
|
|
123
|
+
],
|
|
124
|
+
"new": [tc for tc in current.table_classes if tc not in reference.table_classes],
|
|
125
|
+
}
|
|
126
|
+
else:
|
|
127
|
+
scores["table_classes"] = 1.0
|
|
128
|
+
|
|
129
|
+
if reference.key_ids:
|
|
130
|
+
matches = sum(1 for kid in reference.key_ids if kid in current.key_ids)
|
|
131
|
+
scores["key_ids"] = matches / len(reference.key_ids)
|
|
132
|
+
if scores["key_ids"] < 1.0:
|
|
133
|
+
details["key_ids_diff"] = {
|
|
134
|
+
"missing": [kid for kid in reference.key_ids if kid not in current.key_ids],
|
|
135
|
+
"new": [kid for kid in current.key_ids if kid not in reference.key_ids],
|
|
136
|
+
}
|
|
137
|
+
else:
|
|
138
|
+
scores["key_ids"] = 1.0
|
|
139
|
+
|
|
140
|
+
if reference.table_headers:
|
|
141
|
+
header_score = 0.0
|
|
142
|
+
for ref_headers in reference.table_headers:
|
|
143
|
+
for cur_headers in current.table_headers:
|
|
144
|
+
ref_set = set(ref_headers)
|
|
145
|
+
cur_set = set(cur_headers)
|
|
146
|
+
if ref_set or cur_set:
|
|
147
|
+
jaccard = len(ref_set & cur_set) / len(ref_set | cur_set)
|
|
148
|
+
header_score = max(header_score, jaccard)
|
|
149
|
+
scores["table_headers"] = header_score
|
|
150
|
+
if scores["table_headers"] < 0.9:
|
|
151
|
+
details["table_headers_diff"] = {
|
|
152
|
+
"reference": reference.table_headers,
|
|
153
|
+
"current": current.table_headers,
|
|
154
|
+
}
|
|
155
|
+
else:
|
|
156
|
+
scores["table_headers"] = 1.0
|
|
157
|
+
|
|
158
|
+
count_diffs: dict[str, dict[str, int]] = {}
|
|
159
|
+
for key in reference.element_counts:
|
|
160
|
+
ref_count = reference.element_counts.get(key, 0)
|
|
161
|
+
cur_count = current.element_counts.get(key, 0)
|
|
162
|
+
if ref_count > 0:
|
|
163
|
+
diff_ratio = abs(cur_count - ref_count) / ref_count
|
|
164
|
+
if diff_ratio > 0.5:
|
|
165
|
+
count_diffs[key] = {"reference": ref_count, "current": cur_count}
|
|
166
|
+
|
|
167
|
+
if count_diffs:
|
|
168
|
+
scores["element_counts"] = max(0, 1 - len(count_diffs) * 0.2)
|
|
169
|
+
details["element_counts_diff"] = count_diffs
|
|
170
|
+
else:
|
|
171
|
+
scores["element_counts"] = 1.0
|
|
172
|
+
|
|
173
|
+
weights = {
|
|
174
|
+
"structure": 0.25,
|
|
175
|
+
"table_classes": 0.20,
|
|
176
|
+
"key_ids": 0.15,
|
|
177
|
+
"table_headers": 0.30,
|
|
178
|
+
"element_counts": 0.10,
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
final_score = sum(scores[k] * weights[k] for k in weights)
|
|
182
|
+
|
|
183
|
+
return final_score, details
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def load_baseline(source: Fonte, baselines_dir: str | Path = ".structures") -> Fingerprint | None:
|
|
187
|
+
"""
|
|
188
|
+
Carrega fingerprint de baseline.
|
|
189
|
+
|
|
190
|
+
Args:
|
|
191
|
+
source: Fonte de dados
|
|
192
|
+
baselines_dir: Diretório de baselines
|
|
193
|
+
|
|
194
|
+
Returns:
|
|
195
|
+
Fingerprint ou None
|
|
196
|
+
"""
|
|
197
|
+
import json
|
|
198
|
+
|
|
199
|
+
path = Path(baselines_dir) / f"{source.value}_baseline.json"
|
|
200
|
+
|
|
201
|
+
if not path.exists():
|
|
202
|
+
path = Path(baselines_dir) / "baseline.json"
|
|
203
|
+
if not path.exists():
|
|
204
|
+
return None
|
|
205
|
+
|
|
206
|
+
try:
|
|
207
|
+
with open(path) as f:
|
|
208
|
+
data = json.load(f)
|
|
209
|
+
|
|
210
|
+
if "sources" in data and source.value in data["sources"]:
|
|
211
|
+
source_data = data["sources"][source.value]
|
|
212
|
+
return Fingerprint.model_validate(source_data)
|
|
213
|
+
|
|
214
|
+
return Fingerprint.model_validate(data)
|
|
215
|
+
except Exception as e:
|
|
216
|
+
logger.warning("baseline_load_failed", source=source.value, error=str(e))
|
|
217
|
+
return None
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def save_baseline(
|
|
221
|
+
fingerprint: Fingerprint,
|
|
222
|
+
baselines_dir: str | Path = ".structures",
|
|
223
|
+
) -> None:
|
|
224
|
+
"""
|
|
225
|
+
Salva fingerprint como baseline.
|
|
226
|
+
|
|
227
|
+
Args:
|
|
228
|
+
fingerprint: Fingerprint a salvar
|
|
229
|
+
baselines_dir: Diretório de baselines
|
|
230
|
+
"""
|
|
231
|
+
import json
|
|
232
|
+
|
|
233
|
+
path = Path(baselines_dir) / f"{fingerprint.source.value}_baseline.json"
|
|
234
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
235
|
+
|
|
236
|
+
with open(path, "w") as f:
|
|
237
|
+
json.dump(fingerprint.model_dump(mode="json"), f, indent=2, default=str)
|
|
238
|
+
|
|
239
|
+
logger.info("baseline_saved", source=fingerprint.source.value, path=str(path))
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
def validate_against_baseline(
|
|
243
|
+
current: Fingerprint,
|
|
244
|
+
baselines_dir: str | Path = ".structures",
|
|
245
|
+
threshold: float = THRESHOLD_HIGH,
|
|
246
|
+
) -> StructuralValidationResult:
|
|
247
|
+
"""
|
|
248
|
+
Valida fingerprint atual contra baseline salvo.
|
|
249
|
+
|
|
250
|
+
Args:
|
|
251
|
+
current: Fingerprint atual
|
|
252
|
+
baselines_dir: Diretório de baselines
|
|
253
|
+
threshold: Limiar de similaridade
|
|
254
|
+
|
|
255
|
+
Returns:
|
|
256
|
+
StructuralValidationResult
|
|
257
|
+
"""
|
|
258
|
+
baseline = load_baseline(current.source, baselines_dir)
|
|
259
|
+
|
|
260
|
+
if baseline is None:
|
|
261
|
+
return StructuralValidationResult(
|
|
262
|
+
source=current.source,
|
|
263
|
+
similarity=1.0,
|
|
264
|
+
passed=True,
|
|
265
|
+
level="unknown",
|
|
266
|
+
differences={},
|
|
267
|
+
current_fingerprint=current,
|
|
268
|
+
baseline_fingerprint=None,
|
|
269
|
+
message="No baseline found - treating as valid",
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
return validate_structure(current, baseline, threshold)
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
class StructuralMonitor:
|
|
276
|
+
"""Monitor contínuo de estrutura."""
|
|
277
|
+
|
|
278
|
+
def __init__(self, baselines_dir: str | Path = ".structures"):
|
|
279
|
+
self.baselines_dir = Path(baselines_dir)
|
|
280
|
+
self.history: list[StructuralValidationResult] = []
|
|
281
|
+
|
|
282
|
+
async def check(self, source: Fonte) -> StructuralValidationResult:
|
|
283
|
+
"""
|
|
284
|
+
Verifica estrutura de uma fonte.
|
|
285
|
+
|
|
286
|
+
Args:
|
|
287
|
+
source: Fonte a verificar
|
|
288
|
+
|
|
289
|
+
Returns:
|
|
290
|
+
StructuralValidationResult
|
|
291
|
+
"""
|
|
292
|
+
from ..cepea import client as cepea_client
|
|
293
|
+
from ..cepea.parsers.fingerprint import extract_fingerprint
|
|
294
|
+
|
|
295
|
+
html = await cepea_client.fetch_indicador_page("soja")
|
|
296
|
+
current = extract_fingerprint(html, source, "soja")
|
|
297
|
+
|
|
298
|
+
result = validate_against_baseline(current, self.baselines_dir)
|
|
299
|
+
self.history.append(result)
|
|
300
|
+
|
|
301
|
+
return result
|
|
302
|
+
|
|
303
|
+
async def check_all(self) -> list[StructuralValidationResult]:
|
|
304
|
+
"""Verifica todas as fontes."""
|
|
305
|
+
import asyncio
|
|
306
|
+
|
|
307
|
+
sources = [Fonte.CEPEA]
|
|
308
|
+
results = await asyncio.gather(*[self.check(s) for s in sources])
|
|
309
|
+
return list(results)
|
|
310
|
+
|
|
311
|
+
def get_drift_history(self) -> list[StructuralValidationResult]:
|
|
312
|
+
"""Retorna histórico de drifts detectados."""
|
|
313
|
+
return [r for r in self.history if not r.passed]
|
|
@@ -0,0 +1,243 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: agrobr
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Dados agrícolas brasileiros em uma linha de código
|
|
5
|
+
Project-URL: Homepage, https://github.com/bruno-portfolio/agrobr
|
|
6
|
+
Project-URL: Documentation, https://agrobr.dev
|
|
7
|
+
Project-URL: Repository, https://github.com/bruno-portfolio/agrobr
|
|
8
|
+
Project-URL: Issues, https://github.com/bruno-portfolio/agrobr/issues
|
|
9
|
+
Author-email: Bruno <bruno@example.com>
|
|
10
|
+
License-Expression: MIT
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Keywords: agricultura,agro,brasil,cepea,commodities,conab,dados,ibge,milho,scraping,soja
|
|
13
|
+
Classifier: Development Status :: 3 - Alpha
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: Intended Audience :: Science/Research
|
|
16
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
17
|
+
Classifier: Operating System :: OS Independent
|
|
18
|
+
Classifier: Programming Language :: Python :: 3
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Topic :: Office/Business :: Financial
|
|
22
|
+
Classifier: Topic :: Scientific/Engineering
|
|
23
|
+
Classifier: Typing :: Typed
|
|
24
|
+
Requires-Python: >=3.11
|
|
25
|
+
Requires-Dist: beautifulsoup4>=4.12.0
|
|
26
|
+
Requires-Dist: chardet>=5.2.0
|
|
27
|
+
Requires-Dist: duckdb>=0.9.0
|
|
28
|
+
Requires-Dist: httpx>=0.25.0
|
|
29
|
+
Requires-Dist: lxml>=5.0.0
|
|
30
|
+
Requires-Dist: openpyxl>=3.1.0
|
|
31
|
+
Requires-Dist: pandas>=2.0.0
|
|
32
|
+
Requires-Dist: playwright>=1.40.0
|
|
33
|
+
Requires-Dist: pydantic-settings>=2.1.0
|
|
34
|
+
Requires-Dist: pydantic>=2.5.0
|
|
35
|
+
Requires-Dist: sidrapy>=0.1.4
|
|
36
|
+
Requires-Dist: structlog>=23.2.0
|
|
37
|
+
Requires-Dist: typer>=0.9.0
|
|
38
|
+
Provides-Extra: all
|
|
39
|
+
Requires-Dist: black>=23.11.0; extra == 'all'
|
|
40
|
+
Requires-Dist: mkdocs-material>=9.5.0; extra == 'all'
|
|
41
|
+
Requires-Dist: mkdocs>=1.5.0; extra == 'all'
|
|
42
|
+
Requires-Dist: mkdocstrings[python]>=0.24.0; extra == 'all'
|
|
43
|
+
Requires-Dist: mypy>=1.7.0; extra == 'all'
|
|
44
|
+
Requires-Dist: pandas-stubs>=2.0.0; extra == 'all'
|
|
45
|
+
Requires-Dist: polars>=0.19.0; extra == 'all'
|
|
46
|
+
Requires-Dist: pre-commit>=3.5.0; extra == 'all'
|
|
47
|
+
Requires-Dist: pytest-asyncio>=0.21.0; extra == 'all'
|
|
48
|
+
Requires-Dist: pytest-cov>=4.1.0; extra == 'all'
|
|
49
|
+
Requires-Dist: pytest-recording>=0.13.0; extra == 'all'
|
|
50
|
+
Requires-Dist: pytest>=7.4.0; extra == 'all'
|
|
51
|
+
Requires-Dist: ruff>=0.14.0; extra == 'all'
|
|
52
|
+
Provides-Extra: dev
|
|
53
|
+
Requires-Dist: black>=23.11.0; extra == 'dev'
|
|
54
|
+
Requires-Dist: mypy>=1.7.0; extra == 'dev'
|
|
55
|
+
Requires-Dist: pandas-stubs>=2.0.0; extra == 'dev'
|
|
56
|
+
Requires-Dist: pre-commit>=3.5.0; extra == 'dev'
|
|
57
|
+
Requires-Dist: pytest-asyncio>=0.21.0; extra == 'dev'
|
|
58
|
+
Requires-Dist: pytest-cov>=4.1.0; extra == 'dev'
|
|
59
|
+
Requires-Dist: pytest-recording>=0.13.0; extra == 'dev'
|
|
60
|
+
Requires-Dist: pytest>=7.4.0; extra == 'dev'
|
|
61
|
+
Requires-Dist: ruff>=0.14.0; extra == 'dev'
|
|
62
|
+
Provides-Extra: docs
|
|
63
|
+
Requires-Dist: mkdocs-material>=9.5.0; extra == 'docs'
|
|
64
|
+
Requires-Dist: mkdocs>=1.5.0; extra == 'docs'
|
|
65
|
+
Requires-Dist: mkdocstrings[python]>=0.24.0; extra == 'docs'
|
|
66
|
+
Provides-Extra: polars
|
|
67
|
+
Requires-Dist: polars>=0.19.0; extra == 'polars'
|
|
68
|
+
Description-Content-Type: text/markdown
|
|
69
|
+
|
|
70
|
+
# agrobr
|
|
71
|
+
|
|
72
|
+
> Dados agrícolas brasileiros em uma linha de código
|
|
73
|
+
|
|
74
|
+
[](https://pypi.org/project/agrobr/)
|
|
75
|
+
[](https://github.com/bruno-portfolio/agrobr/actions/workflows/tests.yml)
|
|
76
|
+
[](https://github.com/bruno-portfolio/agrobr/actions/workflows/health_check.yml)
|
|
77
|
+
[](https://www.python.org/downloads/)
|
|
78
|
+
[](https://opensource.org/licenses/MIT)
|
|
79
|
+
[](https://github.com/astral-sh/ruff)
|
|
80
|
+
|
|
81
|
+
Wrapper Python production-grade para dados do **CEPEA**, **CONAB** e **IBGE**.
|
|
82
|
+
|
|
83
|
+
## Instalação
|
|
84
|
+
|
|
85
|
+
```bash
|
|
86
|
+
pip install agrobr
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
Com suporte a Polars e Playwright (para fontes que requerem JavaScript):
|
|
90
|
+
```bash
|
|
91
|
+
pip install agrobr[polars,browser]
|
|
92
|
+
playwright install chromium
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
## Uso Rápido
|
|
96
|
+
|
|
97
|
+
### CEPEA - Indicadores de Preços
|
|
98
|
+
|
|
99
|
+
```python
|
|
100
|
+
import asyncio
|
|
101
|
+
from agrobr import cepea
|
|
102
|
+
|
|
103
|
+
async def main():
|
|
104
|
+
# Série histórica de soja
|
|
105
|
+
df = await cepea.indicador('soja', periodo='2024')
|
|
106
|
+
print(df.head())
|
|
107
|
+
|
|
108
|
+
# Último valor disponível
|
|
109
|
+
ultimo = await cepea.ultimo('soja')
|
|
110
|
+
print(f"Soja: R$ {ultimo.valor}/sc em {ultimo.data}")
|
|
111
|
+
|
|
112
|
+
# Produtos disponíveis
|
|
113
|
+
print(cepea.produtos()) # ['soja', 'milho', 'boi_gordo', 'cafe', ...]
|
|
114
|
+
|
|
115
|
+
asyncio.run(main())
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
### CONAB - Safras e Balanço
|
|
119
|
+
|
|
120
|
+
```python
|
|
121
|
+
from agrobr import conab
|
|
122
|
+
|
|
123
|
+
async def main():
|
|
124
|
+
# Dados de safra por UF
|
|
125
|
+
df = await conab.safras('soja', safra='2024/25')
|
|
126
|
+
print(df[['uf', 'area_plantada', 'producao', 'produtividade']])
|
|
127
|
+
|
|
128
|
+
# Balanço oferta/demanda
|
|
129
|
+
balanco = await conab.balanco('soja')
|
|
130
|
+
print(balanco)
|
|
131
|
+
|
|
132
|
+
# Total Brasil
|
|
133
|
+
brasil = await conab.brasil_total()
|
|
134
|
+
print(brasil)
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
### IBGE - PAM e LSPA
|
|
138
|
+
|
|
139
|
+
```python
|
|
140
|
+
from agrobr import ibge
|
|
141
|
+
|
|
142
|
+
async def main():
|
|
143
|
+
# PAM - Produção Agrícola Municipal (anual)
|
|
144
|
+
df = await ibge.pam('soja', ano=2023, nivel='uf')
|
|
145
|
+
print(df[['localidade', 'area_plantada', 'producao']])
|
|
146
|
+
|
|
147
|
+
# LSPA - Levantamento Sistemático (mensal)
|
|
148
|
+
df = await ibge.lspa('soja', ano=2024, mes=6)
|
|
149
|
+
print(df)
|
|
150
|
+
|
|
151
|
+
# Múltiplos anos
|
|
152
|
+
df = await ibge.pam('milho', ano=[2020, 2021, 2022, 2023])
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
### Modo Síncrono
|
|
156
|
+
|
|
157
|
+
```python
|
|
158
|
+
from agrobr.sync import cepea, conab, ibge
|
|
159
|
+
|
|
160
|
+
# Mesmo API, sem async/await
|
|
161
|
+
df = cepea.indicador('soja', periodo='2024')
|
|
162
|
+
safras = conab.safras('milho')
|
|
163
|
+
pam = ibge.pam('soja', ano=2023)
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
### Suporte Polars
|
|
167
|
+
|
|
168
|
+
```python
|
|
169
|
+
# Retorna polars.DataFrame em vez de pandas
|
|
170
|
+
df = await cepea.indicador('soja', as_polars=True)
|
|
171
|
+
df = await conab.safras('milho', as_polars=True)
|
|
172
|
+
df = await ibge.pam('soja', ano=2023, as_polars=True)
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
### CLI
|
|
176
|
+
|
|
177
|
+
```bash
|
|
178
|
+
# CEPEA
|
|
179
|
+
agrobr cepea soja --ultimo
|
|
180
|
+
agrobr cepea milho --inicio 2024-01-01 --formato csv
|
|
181
|
+
|
|
182
|
+
# CONAB
|
|
183
|
+
agrobr conab safras soja --safra 2024/25
|
|
184
|
+
agrobr conab balanco milho
|
|
185
|
+
|
|
186
|
+
# IBGE
|
|
187
|
+
agrobr ibge pam soja --ano 2023 --nivel uf
|
|
188
|
+
agrobr ibge lspa milho --ano 2024 --mes 6
|
|
189
|
+
|
|
190
|
+
# Health check
|
|
191
|
+
agrobr health --all
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
## Fontes Suportadas
|
|
195
|
+
|
|
196
|
+
| Fonte | Dados | Status |
|
|
197
|
+
|-------|-------|--------|
|
|
198
|
+
| CEPEA | Indicadores de preços (soja, milho, café, boi, algodão, trigo) | ✅ Funcional |
|
|
199
|
+
| CONAB | Safras, balanço oferta/demanda | ✅ Funcional |
|
|
200
|
+
| IBGE | PAM (anual), LSPA (mensal) | ✅ Funcional |
|
|
201
|
+
|
|
202
|
+
## Diferenciais
|
|
203
|
+
|
|
204
|
+
- 🚀 **Async-first** para pipelines de alta performance
|
|
205
|
+
- 💾 **Cache inteligente** com DuckDB (analytics nativo)
|
|
206
|
+
- 📊 **Histórico permanente** - acumula dados automaticamente
|
|
207
|
+
- 🐼 **Suporte pandas + polars**
|
|
208
|
+
- ✅ **Validação com Pydantic v2**
|
|
209
|
+
- 📈 **Validação estatística** de sanidade (detecta anomalias)
|
|
210
|
+
- 🔍 **Fingerprinting de layout** para detecção proativa de mudanças
|
|
211
|
+
- 🔔 **Alertas multi-canal** (Slack, Discord, Email)
|
|
212
|
+
- 🖥️ **CLI completo** para debug e automação
|
|
213
|
+
- 🔄 **Fallback automático** entre fontes
|
|
214
|
+
|
|
215
|
+
## Como Funciona
|
|
216
|
+
|
|
217
|
+
O agrobr mantém um cache local em DuckDB que acumula dados ao longo do tempo:
|
|
218
|
+
|
|
219
|
+
```
|
|
220
|
+
Dia 1: Coleta 10 dias de dados → salva no DuckDB
|
|
221
|
+
Dia 30: 30 dias de histórico acumulado
|
|
222
|
+
Dia 365: 1 ano completo de dados locais
|
|
223
|
+
```
|
|
224
|
+
|
|
225
|
+
Consultas a períodos antigos são instantâneas (cache). Apenas dados recentes precisam de request HTTP.
|
|
226
|
+
|
|
227
|
+
## Documentação
|
|
228
|
+
|
|
229
|
+
📚 [Documentação completa](https://bruno-portfolio.github.io/agrobr/)
|
|
230
|
+
|
|
231
|
+
- [Guia Rápido](https://bruno-portfolio.github.io/agrobr/quickstart/)
|
|
232
|
+
- [API CEPEA](https://bruno-portfolio.github.io/agrobr/api/cepea/)
|
|
233
|
+
- [API CONAB](https://bruno-portfolio.github.io/agrobr/api/conab/)
|
|
234
|
+
- [API IBGE](https://bruno-portfolio.github.io/agrobr/api/ibge/)
|
|
235
|
+
- [Resiliência](https://bruno-portfolio.github.io/agrobr/advanced/resilience/)
|
|
236
|
+
|
|
237
|
+
## Contribuindo
|
|
238
|
+
|
|
239
|
+
Contribuições são bem-vindas! Veja [CONTRIBUTING.md](CONTRIBUTING.md) para detalhes.
|
|
240
|
+
|
|
241
|
+
## Licença
|
|
242
|
+
|
|
243
|
+
MIT - veja [LICENSE](LICENSE) para detalhes.
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
agrobr/__init__.py,sha256=8F6sBJaY9pRQCbyKBx8u4IhlZBRuwyYZE4sExT_b554,239
|
|
2
|
+
agrobr/cli.py,sha256=Xi772yw-axkNFeeXFI0aYsJpB1LOjPCbf5Is1ZheZc0,10122
|
|
3
|
+
agrobr/constants.py,sha256=TPMVRr4JJZ9CLq-FEpVk2HHbLIV0fjaqAjFYofstfsI,5130
|
|
4
|
+
agrobr/exceptions.py,sha256=zODmRKbqk7rRtkkFUclgHx-WriuTJG28zQs4JV5Zrow,2485
|
|
5
|
+
agrobr/models.py,sha256=UsV5yOzwT2zLUWswqhylWXNYAZOn1bGB45l2cGLd-PQ,2256
|
|
6
|
+
agrobr/sync.py,sha256=j0tM1PZ2lT7uqHJpcqatWgJykg8WDtUQBCpdS1NXSbg,3432
|
|
7
|
+
agrobr/alerts/__init__.py,sha256=g5M62usjNdccmus9YCooPO9MxmwiOz_ds1AHEOoh1qE,186
|
|
8
|
+
agrobr/alerts/notifier.py,sha256=q7ajH37I-KvERhUMmL262rnBTZav9bjOCBfJtm3St6w,4981
|
|
9
|
+
agrobr/cache/__init__.py,sha256=T9g1cZ-1dJ33M4vkKNZZXKRLYk17B0Z3NgG3qZoFHJE,612
|
|
10
|
+
agrobr/cache/duckdb_store.py,sha256=c7I4uqNVF5WJ_8yB0TKiS8bZhsCx7mvQs3mIJRT3Q0I,13064
|
|
11
|
+
agrobr/cache/history.py,sha256=fdnwSCvEUKY4nAJNXvHKuAQVk3a_G4dGRR3RABJsjcA,8629
|
|
12
|
+
agrobr/cache/migrations.py,sha256=VoiRU1nCgruGWArOWDyEaDFzSdg6j4gttnPLJ4b1DBE,2723
|
|
13
|
+
agrobr/cache/policies.py,sha256=Gm_yi5sxz71Yj-bsAAOeHEzLO6Wln3sJ_OTRvdQdWBA,6015
|
|
14
|
+
agrobr/cepea/__init__.py,sha256=QXYOJG_OoDpYYdtc1xvEnwKCdYppI6EmQg9tF0XA5Do,213
|
|
15
|
+
agrobr/cepea/api.py,sha256=FkOGqIuh_sJj-QHF-MG1nLJbam9ndvHMtSmtqvsquc0,11767
|
|
16
|
+
agrobr/cepea/client.py,sha256=eT5LzD77obH7BVWlSq3GZ0RY5s-d6h1I4cRhkoOQZv8,8057
|
|
17
|
+
agrobr/cepea/parsers/__init__.py,sha256=Cda0xDbu_gfkg90fTOngmnpRITKYJ0BIV2Y5Pymo5aM,970
|
|
18
|
+
agrobr/cepea/parsers/base.py,sha256=PdrL68ioeXw_ekp5AEwVCvK64Yt02ybmbZOANusei0I,948
|
|
19
|
+
agrobr/cepea/parsers/consensus.py,sha256=2J03DNUbVJRMQGrnz_FyHDlY6uqBb1zmHav2rzYSZX8,8732
|
|
20
|
+
agrobr/cepea/parsers/detector.py,sha256=cQq58t08UjrsBXKuai51NjcExvDxaDloBlteZ1pl4TA,3089
|
|
21
|
+
agrobr/cepea/parsers/fingerprint.py,sha256=HG2_s9fX6403IxB9oU9jOOMIkL7hnFl1RFJ2o2ZJtME,7382
|
|
22
|
+
agrobr/cepea/parsers/v1.py,sha256=ai1iKHZtXsTMp6M2crr8Mp4RTIvTamUx3JJEtYVUrQo,9907
|
|
23
|
+
agrobr/conab/__init__.py,sha256=-2T4uYh0hmL2WUUfETPc8etlbwvR-eE4QlhfaEb83fo,330
|
|
24
|
+
agrobr/conab/api.py,sha256=l-mFo2RAiwxQ15KjYXi7MQOPACfY2nD_3UIz9dGRc6k,5764
|
|
25
|
+
agrobr/conab/client.py,sha256=BF-VOUKy-ZTpgMQEH9lGsiWKig4YxnQf2PZYqOgXmOw,6113
|
|
26
|
+
agrobr/conab/parsers/__init__.py,sha256=QtDUjHnbgCr118I_301SPmx3guFK3DMmNfxjd9rPnXo,137
|
|
27
|
+
agrobr/conab/parsers/v1.py,sha256=2pDFTDZ_n28bBNuvjI8N8RdPidI_VvaJFjZb4XH3sCM,12716
|
|
28
|
+
agrobr/health/__init__.py,sha256=D78Istswk7-IKpwrkDEOi8wpzKQre2ftGJpB09wIvlM,383
|
|
29
|
+
agrobr/health/checker.py,sha256=prtjubt4m-ygLWGeIRAkc2oFP7_qDqAeeNtMv2P2hEg,5855
|
|
30
|
+
agrobr/health/reporter.py,sha256=2TO2C2NU39RFPuyZmX3bgn1NUEY95b9I9gpJVDcmYYY,10046
|
|
31
|
+
agrobr/http/__init__.py,sha256=tEc9cyPnMWhwwulC89Mlu-DKI6qb9HtFGSFIC07YaiY,327
|
|
32
|
+
agrobr/http/browser.py,sha256=CsicdVJBXqPRpYFk1zBbyz4h2iZzJF-rZ_RfHo2W3nw,6006
|
|
33
|
+
agrobr/http/rate_limiter.py,sha256=jZrny-CB_KFW6U9iN76PQM60U8SyOLO7dG3cDkQy45c,2099
|
|
34
|
+
agrobr/http/retry.py,sha256=DTwQVoF79FcfPDvmVgd4AjvIcoX912akVGrblDRGRnw,2765
|
|
35
|
+
agrobr/http/user_agents.py,sha256=sl0HeMnxwHmNKgqantXZePTUo3hwyfgqFunEUphQVv0,2765
|
|
36
|
+
agrobr/ibge/__init__.py,sha256=hfSTOMKBnpVxlsOxKobY4fpgyuDNbgO_JuNXCZju9t4,262
|
|
37
|
+
agrobr/ibge/api.py,sha256=9NydjberpCF5MO9rtfLOSsgwC5k1EP190nec4SeRn80,7355
|
|
38
|
+
agrobr/ibge/client.py,sha256=qDvx8NxNR0cM3msXKhuemTTLtFDdz8pWx-t62Edxdlw,6588
|
|
39
|
+
agrobr/normalize/__init__.py,sha256=K7pKKTlTC1iPtsv31U5A_C1ZbBCG9ePdUWORjaFHPx0,1346
|
|
40
|
+
agrobr/normalize/dates.py,sha256=t2dHd7IS-3joUkQN7jAvdBKLwvVPanY7AIWy27tlxfY,7021
|
|
41
|
+
agrobr/normalize/encoding.py,sha256=vMSJsD_1mxgWn51MBGf4M4yayp5ct3iJpnD1HP22yUA,2842
|
|
42
|
+
agrobr/normalize/regions.py,sha256=PIGyb4zruK2PCS1YVrMuWFXMIQxw18rX3O8HY1uU0jY,7643
|
|
43
|
+
agrobr/normalize/units.py,sha256=MCAIXoeCxRdk4gi3tYjqyn7j8p3wdIO_vbb3mU_e4bE,7467
|
|
44
|
+
agrobr/noticias_agricolas/__init__.py,sha256=jI-4dUvX-IR6mQALzntyWHzgCYvFKApA5rmbG_1-QGs,269
|
|
45
|
+
agrobr/noticias_agricolas/client.py,sha256=tVP7Kzl5V4Mz8XptV_h6_NXLrV6onuQ0gjBSjh_LGA4,6725
|
|
46
|
+
agrobr/noticias_agricolas/parser.py,sha256=rpDORBwCMa9uTTwE8nqgRJ2C5mqhUQoG5nFuf9XmFdM,5172
|
|
47
|
+
agrobr/telemetry/__init__.py,sha256=9B-h84G0B3Do10UkyLerjeV7gYVxONP_TdmB1cmho0M,305
|
|
48
|
+
agrobr/telemetry/collector.py,sha256=U91E68ZPI9T-u81qwC5aEyRwb-Zs4iPyE3mnt1xIxUI,4059
|
|
49
|
+
agrobr/utils/__init__.py,sha256=3NdxvsGHGrHIYZPfQeUsjmPTb7Ci0jTBwMYIM5q5dN0,88
|
|
50
|
+
agrobr/utils/logging.py,sha256=CgSfZSrLAR3Awx_LC0jzZhyZTussPz8-22uDzBI7FqE,1708
|
|
51
|
+
agrobr/validators/__init__.py,sha256=6lq_iLVgy9_tbxvgaLIei_E0TuzJYpILV7q12f7gXHg,745
|
|
52
|
+
agrobr/validators/sanity.py,sha256=LCL4Fi9KVPUoSjvjeel0-vvOYGkHpS8ASqC8sGiMoro,8808
|
|
53
|
+
agrobr/validators/structural.py,sha256=sQzzVoSVi50zHd9oHBAk6eBSlL7ep9CX-XHQEDSDGIw,9348
|
|
54
|
+
agrobr-0.1.0.dist-info/METADATA,sha256=pqbB6Jj2xcy3lXmAEl_U3n0WUeKDLZzSe8pBLspr-OE,7920
|
|
55
|
+
agrobr-0.1.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
56
|
+
agrobr-0.1.0.dist-info/entry_points.txt,sha256=coFf6hQniKkja1iHtSR-Q5xtEwdPBciTjsllHVw02qc,42
|
|
57
|
+
agrobr-0.1.0.dist-info/licenses/LICENSE,sha256=nbehzK8ZLuQrIhwvqVn1rdRp1livNvGvutr4Sl6bxEs,1067
|
|
58
|
+
agrobr-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024-2026 Bruno
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|