agrobr 0.1.0__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agrobr/__init__.py +3 -2
- agrobr/benchmark/__init__.py +343 -0
- agrobr/cache/policies.py +99 -17
- agrobr/cepea/api.py +87 -30
- agrobr/cepea/client.py +1 -8
- agrobr/cli.py +141 -5
- agrobr/conab/api.py +72 -6
- agrobr/config.py +137 -0
- agrobr/constants.py +1 -2
- agrobr/contracts/__init__.py +186 -0
- agrobr/contracts/cepea.py +80 -0
- agrobr/contracts/conab.py +181 -0
- agrobr/contracts/ibge.py +146 -0
- agrobr/export.py +251 -0
- agrobr/health/__init__.py +10 -0
- agrobr/health/doctor.py +321 -0
- agrobr/http/browser.py +0 -9
- agrobr/ibge/api.py +104 -25
- agrobr/ibge/client.py +5 -20
- agrobr/models.py +100 -1
- agrobr/noticias_agricolas/client.py +0 -7
- agrobr/noticias_agricolas/parser.py +0 -17
- agrobr/plugins/__init__.py +205 -0
- agrobr/quality.py +319 -0
- agrobr/sla.py +249 -0
- agrobr/snapshots.py +321 -0
- agrobr/stability.py +148 -0
- agrobr/validators/semantic.py +447 -0
- {agrobr-0.1.0.dist-info → agrobr-0.5.0.dist-info}/METADATA +12 -12
- {agrobr-0.1.0.dist-info → agrobr-0.5.0.dist-info}/RECORD +33 -19
- {agrobr-0.1.0.dist-info → agrobr-0.5.0.dist-info}/WHEEL +0 -0
- {agrobr-0.1.0.dist-info → agrobr-0.5.0.dist-info}/entry_points.txt +0 -0
- {agrobr-0.1.0.dist-info → agrobr-0.5.0.dist-info}/licenses/LICENSE +0 -0
agrobr/health/doctor.py
ADDED
|
@@ -0,0 +1,321 @@
|
|
|
1
|
+
"""Diagnóstico completo do sistema agrobr."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import time
|
|
7
|
+
from dataclasses import dataclass, field
|
|
8
|
+
from datetime import datetime
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
import httpx
|
|
13
|
+
import structlog
|
|
14
|
+
|
|
15
|
+
from agrobr import __version__
|
|
16
|
+
from agrobr.cache.duckdb_store import get_store
|
|
17
|
+
from agrobr.cache.policies import get_next_update_info
|
|
18
|
+
|
|
19
|
+
logger = structlog.get_logger()
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass
|
|
23
|
+
class SourceStatus:
|
|
24
|
+
"""Status de conectividade de uma fonte."""
|
|
25
|
+
|
|
26
|
+
name: str
|
|
27
|
+
url: str
|
|
28
|
+
status: str
|
|
29
|
+
latency_ms: int
|
|
30
|
+
error: str | None = None
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass
|
|
34
|
+
class CacheStats:
|
|
35
|
+
"""Estatísticas do cache."""
|
|
36
|
+
|
|
37
|
+
location: str
|
|
38
|
+
size_bytes: int
|
|
39
|
+
total_records: int
|
|
40
|
+
by_source: dict[str, dict[str, Any]] = field(default_factory=dict)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@dataclass
|
|
44
|
+
class DiagnosticsResult:
|
|
45
|
+
"""Resultado do diagnóstico completo."""
|
|
46
|
+
|
|
47
|
+
version: str
|
|
48
|
+
timestamp: datetime
|
|
49
|
+
sources: list[SourceStatus]
|
|
50
|
+
cache: CacheStats
|
|
51
|
+
last_collections: dict[str, datetime | None]
|
|
52
|
+
cache_expiry: dict[str, dict[str, str]]
|
|
53
|
+
config: dict[str, Any]
|
|
54
|
+
overall_status: str
|
|
55
|
+
|
|
56
|
+
def to_dict(self) -> dict[str, Any]:
|
|
57
|
+
"""Converte para dicionário serializável."""
|
|
58
|
+
return {
|
|
59
|
+
"version": self.version,
|
|
60
|
+
"timestamp": self.timestamp.isoformat(),
|
|
61
|
+
"sources": [
|
|
62
|
+
{
|
|
63
|
+
"name": s.name,
|
|
64
|
+
"url": s.url,
|
|
65
|
+
"status": s.status,
|
|
66
|
+
"latency_ms": s.latency_ms,
|
|
67
|
+
"error": s.error,
|
|
68
|
+
}
|
|
69
|
+
for s in self.sources
|
|
70
|
+
],
|
|
71
|
+
"cache": {
|
|
72
|
+
"location": self.cache.location,
|
|
73
|
+
"size_mb": round(self.cache.size_bytes / 1024 / 1024, 2),
|
|
74
|
+
"total_records": self.cache.total_records,
|
|
75
|
+
"by_source": self.cache.by_source,
|
|
76
|
+
},
|
|
77
|
+
"last_collections": {
|
|
78
|
+
k: v.isoformat() if v else None for k, v in self.last_collections.items()
|
|
79
|
+
},
|
|
80
|
+
"cache_expiry": self.cache_expiry,
|
|
81
|
+
"config": self.config,
|
|
82
|
+
"overall_status": self.overall_status,
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
def to_rich(self) -> str:
|
|
86
|
+
"""Formata para output no terminal."""
|
|
87
|
+
lines = [
|
|
88
|
+
"",
|
|
89
|
+
f"agrobr diagnostics v{self.version}",
|
|
90
|
+
"=" * 50,
|
|
91
|
+
"",
|
|
92
|
+
"Sources Connectivity",
|
|
93
|
+
]
|
|
94
|
+
|
|
95
|
+
for s in self.sources:
|
|
96
|
+
if s.status == "ok":
|
|
97
|
+
icon = "[OK]"
|
|
98
|
+
elif s.status == "slow":
|
|
99
|
+
icon = "[SLOW]"
|
|
100
|
+
else:
|
|
101
|
+
icon = "[FAIL]"
|
|
102
|
+
|
|
103
|
+
line = f" {icon} {s.name:<35} {s.latency_ms:>5}ms"
|
|
104
|
+
if s.error:
|
|
105
|
+
line += f" ({s.error})"
|
|
106
|
+
lines.append(line)
|
|
107
|
+
|
|
108
|
+
lines.extend(
|
|
109
|
+
[
|
|
110
|
+
"",
|
|
111
|
+
"Cache Status",
|
|
112
|
+
f" Location: {self.cache.location}",
|
|
113
|
+
f" Size: {self.cache.size_bytes / 1024 / 1024:.2f} MB",
|
|
114
|
+
f" Total records: {self.cache.total_records:,}",
|
|
115
|
+
"",
|
|
116
|
+
" By source:",
|
|
117
|
+
]
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
for fonte, stats in self.cache.by_source.items():
|
|
121
|
+
count = stats.get("count", 0)
|
|
122
|
+
oldest = stats.get("oldest", "-")
|
|
123
|
+
newest = stats.get("newest", "-")
|
|
124
|
+
lines.append(f" {fonte.upper()}: {count:,} records ({oldest} to {newest})")
|
|
125
|
+
|
|
126
|
+
lines.extend(
|
|
127
|
+
[
|
|
128
|
+
"",
|
|
129
|
+
"Cache Expiry",
|
|
130
|
+
]
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
for fonte, info in self.cache_expiry.items():
|
|
134
|
+
exp_type = info.get("type", "unknown")
|
|
135
|
+
if exp_type == "smart":
|
|
136
|
+
lines.append(f" {fonte.upper()}: {info.get('description', '')}")
|
|
137
|
+
else:
|
|
138
|
+
lines.append(f" {fonte.upper()}: TTL {info.get('ttl', 'unknown')}")
|
|
139
|
+
|
|
140
|
+
lines.extend(
|
|
141
|
+
[
|
|
142
|
+
"",
|
|
143
|
+
"Configuration",
|
|
144
|
+
f" Browser fallback: {'enabled' if self.config.get('browser_fallback') else 'disabled'}",
|
|
145
|
+
f" Alternative source: {'enabled' if self.config.get('alternative_source') else 'disabled'}",
|
|
146
|
+
"",
|
|
147
|
+
]
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
if self.overall_status == "healthy":
|
|
151
|
+
lines.append("[OK] All systems operational")
|
|
152
|
+
elif self.overall_status == "degraded":
|
|
153
|
+
lines.append("[WARN] System degraded - some sources unavailable")
|
|
154
|
+
else:
|
|
155
|
+
lines.append("[FAIL] System error - check source connectivity")
|
|
156
|
+
|
|
157
|
+
lines.append("")
|
|
158
|
+
return "\n".join(lines)
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
async def _check_source(name: str, url: str, timeout: float = 10.0) -> SourceStatus:
|
|
162
|
+
"""Verifica conectividade de uma fonte."""
|
|
163
|
+
start = time.perf_counter()
|
|
164
|
+
|
|
165
|
+
try:
|
|
166
|
+
async with httpx.AsyncClient(timeout=timeout) as http_client:
|
|
167
|
+
response = await http_client.head(url, follow_redirects=True)
|
|
168
|
+
latency_ms = int((time.perf_counter() - start) * 1000)
|
|
169
|
+
|
|
170
|
+
if response.status_code < 400:
|
|
171
|
+
status = "ok" if latency_ms < 2000 else "slow"
|
|
172
|
+
return SourceStatus(name, url, status, latency_ms)
|
|
173
|
+
|
|
174
|
+
return SourceStatus(
|
|
175
|
+
name,
|
|
176
|
+
url,
|
|
177
|
+
"error",
|
|
178
|
+
latency_ms,
|
|
179
|
+
error=f"HTTP {response.status_code}",
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
except httpx.TimeoutException:
|
|
183
|
+
latency_ms = int((time.perf_counter() - start) * 1000)
|
|
184
|
+
return SourceStatus(name, url, "error", latency_ms, error="timeout")
|
|
185
|
+
|
|
186
|
+
except httpx.ConnectError as e:
|
|
187
|
+
latency_ms = int((time.perf_counter() - start) * 1000)
|
|
188
|
+
return SourceStatus(name, url, "error", latency_ms, error=f"connection error: {e}")
|
|
189
|
+
|
|
190
|
+
except Exception as e:
|
|
191
|
+
latency_ms = int((time.perf_counter() - start) * 1000)
|
|
192
|
+
return SourceStatus(name, url, "error", latency_ms, error=str(e))
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def _get_cache_stats() -> CacheStats:
|
|
196
|
+
"""Obtém estatísticas do cache."""
|
|
197
|
+
try:
|
|
198
|
+
store = get_store()
|
|
199
|
+
cache_path = Path(store.db_path)
|
|
200
|
+
size_bytes = cache_path.stat().st_size if cache_path.exists() else 0
|
|
201
|
+
|
|
202
|
+
by_source: dict[str, dict[str, Any]] = {}
|
|
203
|
+
conn = store._get_conn()
|
|
204
|
+
|
|
205
|
+
for fonte in ["cepea", "conab", "ibge"]:
|
|
206
|
+
try:
|
|
207
|
+
result = conn.execute(
|
|
208
|
+
"""
|
|
209
|
+
SELECT COUNT(*), MIN(data), MAX(data)
|
|
210
|
+
FROM indicadores
|
|
211
|
+
WHERE LOWER(fonte) = ?
|
|
212
|
+
""",
|
|
213
|
+
[fonte],
|
|
214
|
+
).fetchone()
|
|
215
|
+
|
|
216
|
+
if result and result[0] > 0:
|
|
217
|
+
by_source[fonte] = {
|
|
218
|
+
"count": result[0],
|
|
219
|
+
"oldest": str(result[1]) if result[1] else None,
|
|
220
|
+
"newest": str(result[2]) if result[2] else None,
|
|
221
|
+
}
|
|
222
|
+
except Exception:
|
|
223
|
+
pass
|
|
224
|
+
|
|
225
|
+
total_records = sum(s.get("count", 0) for s in by_source.values())
|
|
226
|
+
|
|
227
|
+
return CacheStats(
|
|
228
|
+
location=str(cache_path),
|
|
229
|
+
size_bytes=size_bytes,
|
|
230
|
+
total_records=total_records,
|
|
231
|
+
by_source=by_source,
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
except Exception as e:
|
|
235
|
+
logger.warning("cache_stats_failed", error=str(e))
|
|
236
|
+
return CacheStats(
|
|
237
|
+
location="unknown",
|
|
238
|
+
size_bytes=0,
|
|
239
|
+
total_records=0,
|
|
240
|
+
by_source={},
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
def _get_last_collections() -> dict[str, datetime | None]:
|
|
245
|
+
"""Obtém data da última coleta por fonte."""
|
|
246
|
+
collections: dict[str, datetime | None] = {}
|
|
247
|
+
|
|
248
|
+
try:
|
|
249
|
+
store = get_store()
|
|
250
|
+
conn = store._get_conn()
|
|
251
|
+
|
|
252
|
+
for fonte in ["cepea", "conab", "ibge"]:
|
|
253
|
+
try:
|
|
254
|
+
result = conn.execute(
|
|
255
|
+
"""
|
|
256
|
+
SELECT MAX(collected_at)
|
|
257
|
+
FROM indicadores
|
|
258
|
+
WHERE LOWER(fonte) = ?
|
|
259
|
+
""",
|
|
260
|
+
[fonte],
|
|
261
|
+
).fetchone()
|
|
262
|
+
|
|
263
|
+
collections[fonte] = result[0] if result and result[0] else None
|
|
264
|
+
|
|
265
|
+
except Exception:
|
|
266
|
+
collections[fonte] = None
|
|
267
|
+
|
|
268
|
+
except Exception:
|
|
269
|
+
pass
|
|
270
|
+
|
|
271
|
+
return collections
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
async def run_diagnostics(verbose: bool = False) -> DiagnosticsResult: # noqa: ARG001
|
|
275
|
+
"""
|
|
276
|
+
Executa diagnóstico completo do sistema.
|
|
277
|
+
|
|
278
|
+
Args:
|
|
279
|
+
verbose: Se True, inclui informações detalhadas (reservado para uso futuro)
|
|
280
|
+
|
|
281
|
+
Returns:
|
|
282
|
+
DiagnosticsResult com status completo
|
|
283
|
+
"""
|
|
284
|
+
sources_to_check = [
|
|
285
|
+
("CEPEA (Noticias Agricolas)", "https://www.noticiasagricolas.com.br"),
|
|
286
|
+
("CONAB", "https://www.conab.gov.br"),
|
|
287
|
+
("IBGE/SIDRA", "https://sidra.ibge.gov.br"),
|
|
288
|
+
]
|
|
289
|
+
|
|
290
|
+
source_tasks = [_check_source(name, url) for name, url in sources_to_check]
|
|
291
|
+
sources = await asyncio.gather(*source_tasks)
|
|
292
|
+
|
|
293
|
+
cache = _get_cache_stats()
|
|
294
|
+
|
|
295
|
+
cache_expiry = {}
|
|
296
|
+
for fonte in ["cepea", "conab", "ibge"]:
|
|
297
|
+
cache_expiry[fonte] = get_next_update_info(fonte)
|
|
298
|
+
|
|
299
|
+
last_collections = _get_last_collections()
|
|
300
|
+
|
|
301
|
+
error_count = sum(1 for s in sources if s.status == "error")
|
|
302
|
+
if error_count == len(sources):
|
|
303
|
+
overall_status = "error"
|
|
304
|
+
elif error_count > 0:
|
|
305
|
+
overall_status = "degraded"
|
|
306
|
+
else:
|
|
307
|
+
overall_status = "healthy"
|
|
308
|
+
|
|
309
|
+
return DiagnosticsResult(
|
|
310
|
+
version=__version__,
|
|
311
|
+
timestamp=datetime.now(),
|
|
312
|
+
sources=list(sources),
|
|
313
|
+
cache=cache,
|
|
314
|
+
last_collections=last_collections,
|
|
315
|
+
cache_expiry=cache_expiry,
|
|
316
|
+
config={
|
|
317
|
+
"browser_fallback": False,
|
|
318
|
+
"alternative_source": True,
|
|
319
|
+
},
|
|
320
|
+
overall_status=overall_status,
|
|
321
|
+
)
|
agrobr/http/browser.py
CHANGED
|
@@ -15,7 +15,6 @@ from agrobr.http.user_agents import UserAgentRotator
|
|
|
15
15
|
|
|
16
16
|
logger = structlog.get_logger()
|
|
17
17
|
|
|
18
|
-
# Singleton para reutilizar browser
|
|
19
18
|
_playwright: Playwright | None = None
|
|
20
19
|
_browser: Browser | None = None
|
|
21
20
|
_lock = asyncio.Lock()
|
|
@@ -64,7 +63,6 @@ async def get_page() -> AsyncGenerator[Page, None]:
|
|
|
64
63
|
"""Context manager para obter uma página do browser."""
|
|
65
64
|
browser = await _get_browser()
|
|
66
65
|
|
|
67
|
-
# Cria contexto com fingerprint realista
|
|
68
66
|
ua = UserAgentRotator.get_random()
|
|
69
67
|
context = await browser.new_context(
|
|
70
68
|
user_agent=ua,
|
|
@@ -78,7 +76,6 @@ async def get_page() -> AsyncGenerator[Page, None]:
|
|
|
78
76
|
|
|
79
77
|
page = await context.new_page()
|
|
80
78
|
|
|
81
|
-
# Esconde sinais de automação
|
|
82
79
|
await page.add_init_script(
|
|
83
80
|
"""
|
|
84
81
|
Object.defineProperty(navigator, 'webdriver', {
|
|
@@ -124,7 +121,6 @@ async def fetch_with_browser(
|
|
|
124
121
|
|
|
125
122
|
try:
|
|
126
123
|
async with get_page() as page:
|
|
127
|
-
# Navega para a URL
|
|
128
124
|
response = await page.goto(
|
|
129
125
|
url,
|
|
130
126
|
wait_until="domcontentloaded",
|
|
@@ -138,7 +134,6 @@ async def fetch_with_browser(
|
|
|
138
134
|
last_error="No response received",
|
|
139
135
|
)
|
|
140
136
|
|
|
141
|
-
# Aguarda seletor específico se fornecido
|
|
142
137
|
if wait_selector:
|
|
143
138
|
try:
|
|
144
139
|
await page.wait_for_selector(
|
|
@@ -152,13 +147,10 @@ async def fetch_with_browser(
|
|
|
152
147
|
error=str(e),
|
|
153
148
|
)
|
|
154
149
|
|
|
155
|
-
# Aguarda Cloudflare resolver e JS terminar
|
|
156
150
|
await page.wait_for_timeout(5000)
|
|
157
151
|
|
|
158
|
-
# Verifica se foi bloqueado pelo Cloudflare
|
|
159
152
|
if response.status in (403, 503):
|
|
160
153
|
check_html: str = await page.content()
|
|
161
|
-
# Detecta página de challenge do Cloudflare
|
|
162
154
|
if "cloudflare" in check_html.lower() or "challenge" in check_html.lower():
|
|
163
155
|
raise SourceUnavailableError(
|
|
164
156
|
source=source,
|
|
@@ -166,7 +158,6 @@ async def fetch_with_browser(
|
|
|
166
158
|
last_error=f"Cloudflare block detected (status {response.status})",
|
|
167
159
|
)
|
|
168
160
|
|
|
169
|
-
# Obtém HTML
|
|
170
161
|
html: str = await page.content()
|
|
171
162
|
|
|
172
163
|
logger.info(
|
agrobr/ibge/api.py
CHANGED
|
@@ -2,16 +2,22 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
import time
|
|
6
|
+
from datetime import datetime
|
|
7
|
+
from typing import Literal, overload
|
|
6
8
|
|
|
7
9
|
import pandas as pd
|
|
8
10
|
import structlog
|
|
9
11
|
|
|
12
|
+
from agrobr import constants
|
|
13
|
+
from agrobr.cache.policies import calculate_expiry
|
|
10
14
|
from agrobr.ibge import client
|
|
15
|
+
from agrobr.models import MetaInfo
|
|
11
16
|
|
|
12
17
|
logger = structlog.get_logger()
|
|
13
18
|
|
|
14
19
|
|
|
20
|
+
@overload
|
|
15
21
|
async def pam(
|
|
16
22
|
produto: str,
|
|
17
23
|
ano: int | str | list[int] | None = None,
|
|
@@ -19,7 +25,33 @@ async def pam(
|
|
|
19
25
|
nivel: Literal["brasil", "uf", "municipio"] = "uf",
|
|
20
26
|
variaveis: list[str] | None = None,
|
|
21
27
|
as_polars: bool = False,
|
|
22
|
-
|
|
28
|
+
*,
|
|
29
|
+
return_meta: Literal[False] = False,
|
|
30
|
+
) -> pd.DataFrame: ...
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@overload
|
|
34
|
+
async def pam(
|
|
35
|
+
produto: str,
|
|
36
|
+
ano: int | str | list[int] | None = None,
|
|
37
|
+
uf: str | None = None,
|
|
38
|
+
nivel: Literal["brasil", "uf", "municipio"] = "uf",
|
|
39
|
+
variaveis: list[str] | None = None,
|
|
40
|
+
as_polars: bool = False,
|
|
41
|
+
*,
|
|
42
|
+
return_meta: Literal[True],
|
|
43
|
+
) -> tuple[pd.DataFrame, MetaInfo]: ...
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
async def pam(
|
|
47
|
+
produto: str,
|
|
48
|
+
ano: int | str | list[int] | None = None,
|
|
49
|
+
uf: str | None = None,
|
|
50
|
+
nivel: Literal["brasil", "uf", "municipio"] = "uf",
|
|
51
|
+
variaveis: list[str] | None = None,
|
|
52
|
+
as_polars: bool = False,
|
|
53
|
+
return_meta: bool = False,
|
|
54
|
+
) -> pd.DataFrame | tuple[pd.DataFrame, MetaInfo]:
|
|
23
55
|
"""
|
|
24
56
|
Obtém dados da Produção Agrícola Municipal (PAM).
|
|
25
57
|
|
|
@@ -30,14 +62,22 @@ async def pam(
|
|
|
30
62
|
nivel: Nível territorial ("brasil", "uf", "municipio")
|
|
31
63
|
variaveis: Lista de variáveis (area_plantada, area_colhida, producao, rendimento)
|
|
32
64
|
as_polars: Se True, retorna polars.DataFrame
|
|
65
|
+
return_meta: Se True, retorna tupla (DataFrame, MetaInfo)
|
|
33
66
|
|
|
34
67
|
Returns:
|
|
35
|
-
DataFrame com dados da PAM
|
|
68
|
+
DataFrame com dados da PAM ou tupla (DataFrame, MetaInfo)
|
|
36
69
|
|
|
37
70
|
Example:
|
|
38
71
|
>>> df = await ibge.pam('soja', ano=2023)
|
|
39
|
-
>>> df = await ibge.pam('milho', ano=[2020, 2021, 2022], uf='MT')
|
|
72
|
+
>>> df, meta = await ibge.pam('milho', ano=[2020, 2021, 2022], uf='MT', return_meta=True)
|
|
40
73
|
"""
|
|
74
|
+
fetch_start = time.perf_counter()
|
|
75
|
+
meta = MetaInfo(
|
|
76
|
+
source="ibge_pam",
|
|
77
|
+
source_url="https://sidra.ibge.gov.br",
|
|
78
|
+
source_method="httpx",
|
|
79
|
+
fetched_at=datetime.now(),
|
|
80
|
+
)
|
|
41
81
|
logger.info(
|
|
42
82
|
"ibge_pam_request",
|
|
43
83
|
produto=produto,
|
|
@@ -46,7 +86,6 @@ async def pam(
|
|
|
46
86
|
nivel=nivel,
|
|
47
87
|
)
|
|
48
88
|
|
|
49
|
-
# Mapeia produto para código SIDRA
|
|
50
89
|
produto_lower = produto.lower()
|
|
51
90
|
if produto_lower not in client.PRODUTOS_PAM:
|
|
52
91
|
raise ValueError(
|
|
@@ -55,7 +94,6 @@ async def pam(
|
|
|
55
94
|
|
|
56
95
|
produto_cod = client.PRODUTOS_PAM[produto_lower]
|
|
57
96
|
|
|
58
|
-
# Mapeia variáveis
|
|
59
97
|
if variaveis is None:
|
|
60
98
|
variaveis = ["area_plantada", "area_colhida", "producao", "rendimento"]
|
|
61
99
|
|
|
@@ -66,7 +104,6 @@ async def pam(
|
|
|
66
104
|
else:
|
|
67
105
|
logger.warning(f"Variável desconhecida: {var}")
|
|
68
106
|
|
|
69
|
-
# Mapeia nível territorial
|
|
70
107
|
nivel_map = {
|
|
71
108
|
"brasil": "1",
|
|
72
109
|
"uf": "3",
|
|
@@ -74,12 +111,10 @@ async def pam(
|
|
|
74
111
|
}
|
|
75
112
|
territorial_level = nivel_map.get(nivel, "3")
|
|
76
113
|
|
|
77
|
-
# Define código territorial
|
|
78
114
|
ibge_code = "all"
|
|
79
115
|
if uf and nivel in ("uf", "municipio"):
|
|
80
116
|
ibge_code = client.uf_to_ibge_code(uf)
|
|
81
117
|
|
|
82
|
-
# Define período
|
|
83
118
|
if ano is None:
|
|
84
119
|
period = "last"
|
|
85
120
|
elif isinstance(ano, list):
|
|
@@ -87,7 +122,6 @@ async def pam(
|
|
|
87
122
|
else:
|
|
88
123
|
period = str(ano)
|
|
89
124
|
|
|
90
|
-
# Busca dados
|
|
91
125
|
df = await client.fetch_sidra(
|
|
92
126
|
table_code=client.TABELAS["pam_nova"],
|
|
93
127
|
territorial_level=territorial_level,
|
|
@@ -97,10 +131,8 @@ async def pam(
|
|
|
97
131
|
classifications={"782": produto_cod},
|
|
98
132
|
)
|
|
99
133
|
|
|
100
|
-
# Processa resposta
|
|
101
134
|
df = client.parse_sidra_response(df)
|
|
102
135
|
|
|
103
|
-
# Pivota para ter variáveis como colunas
|
|
104
136
|
if "variavel" in df.columns and "valor" in df.columns:
|
|
105
137
|
df_pivot = df.pivot_table(
|
|
106
138
|
index=["localidade", "ano"] if "localidade" in df.columns else ["ano"],
|
|
@@ -109,7 +141,6 @@ async def pam(
|
|
|
109
141
|
aggfunc="first",
|
|
110
142
|
).reset_index()
|
|
111
143
|
|
|
112
|
-
# Renomeia colunas para nomes mais simples
|
|
113
144
|
rename_map = {
|
|
114
145
|
"Área plantada": "area_plantada",
|
|
115
146
|
"Área colhida": "area_colhida",
|
|
@@ -123,11 +154,20 @@ async def pam(
|
|
|
123
154
|
df["produto"] = produto_lower
|
|
124
155
|
df["fonte"] = "ibge_pam"
|
|
125
156
|
|
|
157
|
+
meta.fetch_duration_ms = int((time.perf_counter() - fetch_start) * 1000)
|
|
158
|
+
meta.records_count = len(df)
|
|
159
|
+
meta.columns = df.columns.tolist()
|
|
160
|
+
meta.cache_key = f"ibge:pam:{produto}:{ano}"
|
|
161
|
+
meta.cache_expires_at = calculate_expiry(constants.Fonte.IBGE, "pam")
|
|
162
|
+
|
|
126
163
|
if as_polars:
|
|
127
164
|
try:
|
|
128
165
|
import polars as pl
|
|
129
166
|
|
|
130
|
-
|
|
167
|
+
result_df = pl.from_pandas(df)
|
|
168
|
+
if return_meta:
|
|
169
|
+
return result_df, meta # type: ignore[return-value,no-any-return]
|
|
170
|
+
return result_df # type: ignore[return-value,no-any-return]
|
|
131
171
|
except ImportError:
|
|
132
172
|
logger.warning("polars_not_installed", fallback="pandas")
|
|
133
173
|
|
|
@@ -137,16 +177,43 @@ async def pam(
|
|
|
137
177
|
records=len(df),
|
|
138
178
|
)
|
|
139
179
|
|
|
180
|
+
if return_meta:
|
|
181
|
+
return df, meta
|
|
140
182
|
return df
|
|
141
183
|
|
|
142
184
|
|
|
185
|
+
@overload
|
|
186
|
+
async def lspa(
|
|
187
|
+
produto: str,
|
|
188
|
+
ano: int | str | None = None,
|
|
189
|
+
mes: int | str | None = None,
|
|
190
|
+
uf: str | None = None,
|
|
191
|
+
as_polars: bool = False,
|
|
192
|
+
*,
|
|
193
|
+
return_meta: Literal[False] = False,
|
|
194
|
+
) -> pd.DataFrame: ...
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
@overload
|
|
143
198
|
async def lspa(
|
|
144
199
|
produto: str,
|
|
145
200
|
ano: int | str | None = None,
|
|
146
201
|
mes: int | str | None = None,
|
|
147
202
|
uf: str | None = None,
|
|
148
203
|
as_polars: bool = False,
|
|
149
|
-
|
|
204
|
+
*,
|
|
205
|
+
return_meta: Literal[True],
|
|
206
|
+
) -> tuple[pd.DataFrame, MetaInfo]: ...
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
async def lspa(
|
|
210
|
+
produto: str,
|
|
211
|
+
ano: int | str | None = None,
|
|
212
|
+
mes: int | str | None = None,
|
|
213
|
+
uf: str | None = None,
|
|
214
|
+
as_polars: bool = False,
|
|
215
|
+
return_meta: bool = False,
|
|
216
|
+
) -> pd.DataFrame | tuple[pd.DataFrame, MetaInfo]:
|
|
150
217
|
"""
|
|
151
218
|
Obtém dados do Levantamento Sistemático da Produção Agrícola (LSPA).
|
|
152
219
|
|
|
@@ -158,14 +225,22 @@ async def lspa(
|
|
|
158
225
|
mes: Mês de referência (1-12). Se None, retorna todos os meses do ano.
|
|
159
226
|
uf: Filtrar por UF (ex: "MT", "PR")
|
|
160
227
|
as_polars: Se True, retorna polars.DataFrame
|
|
228
|
+
return_meta: Se True, retorna tupla (DataFrame, MetaInfo)
|
|
161
229
|
|
|
162
230
|
Returns:
|
|
163
|
-
DataFrame com estimativas LSPA
|
|
231
|
+
DataFrame com estimativas LSPA ou tupla (DataFrame, MetaInfo)
|
|
164
232
|
|
|
165
233
|
Example:
|
|
166
234
|
>>> df = await ibge.lspa('soja', ano=2024)
|
|
167
|
-
>>> df = await ibge.lspa('milho_1', ano=2024, mes=6, uf='PR')
|
|
235
|
+
>>> df, meta = await ibge.lspa('milho_1', ano=2024, mes=6, uf='PR', return_meta=True)
|
|
168
236
|
"""
|
|
237
|
+
fetch_start = time.perf_counter()
|
|
238
|
+
meta = MetaInfo(
|
|
239
|
+
source="ibge_lspa",
|
|
240
|
+
source_url="https://sidra.ibge.gov.br",
|
|
241
|
+
source_method="httpx",
|
|
242
|
+
fetched_at=datetime.now(),
|
|
243
|
+
)
|
|
169
244
|
logger.info(
|
|
170
245
|
"ibge_lspa_request",
|
|
171
246
|
produto=produto,
|
|
@@ -174,7 +249,6 @@ async def lspa(
|
|
|
174
249
|
uf=uf,
|
|
175
250
|
)
|
|
176
251
|
|
|
177
|
-
# Mapeia produto para código SIDRA
|
|
178
252
|
produto_lower = produto.lower()
|
|
179
253
|
if produto_lower not in client.PRODUTOS_LSPA:
|
|
180
254
|
raise ValueError(
|
|
@@ -183,20 +257,16 @@ async def lspa(
|
|
|
183
257
|
|
|
184
258
|
produto_cod = client.PRODUTOS_LSPA[produto_lower]
|
|
185
259
|
|
|
186
|
-
# Define período
|
|
187
260
|
if ano is None:
|
|
188
261
|
from datetime import date
|
|
189
262
|
|
|
190
263
|
ano = date.today().year
|
|
191
264
|
|
|
192
|
-
# Define período
|
|
193
265
|
period = f"{ano}{int(mes):02d}" if mes else ",".join(f"{ano}{m:02d}" for m in range(1, 13))
|
|
194
266
|
|
|
195
|
-
# Define nível territorial
|
|
196
267
|
territorial_level = "3" if uf else "1"
|
|
197
268
|
ibge_code = client.uf_to_ibge_code(uf) if uf else "all"
|
|
198
269
|
|
|
199
|
-
# Busca dados (não especifica variáveis - retorna todas)
|
|
200
270
|
df = await client.fetch_sidra(
|
|
201
271
|
table_code=client.TABELAS["lspa"],
|
|
202
272
|
territorial_level=territorial_level,
|
|
@@ -205,10 +275,8 @@ async def lspa(
|
|
|
205
275
|
classifications={"48": produto_cod},
|
|
206
276
|
)
|
|
207
277
|
|
|
208
|
-
# Processa resposta
|
|
209
278
|
df = client.parse_sidra_response(df)
|
|
210
279
|
|
|
211
|
-
# Adiciona período da consulta
|
|
212
280
|
df["ano"] = ano
|
|
213
281
|
if mes:
|
|
214
282
|
df["mes"] = mes
|
|
@@ -216,11 +284,20 @@ async def lspa(
|
|
|
216
284
|
df["produto"] = produto_lower
|
|
217
285
|
df["fonte"] = "ibge_lspa"
|
|
218
286
|
|
|
287
|
+
meta.fetch_duration_ms = int((time.perf_counter() - fetch_start) * 1000)
|
|
288
|
+
meta.records_count = len(df)
|
|
289
|
+
meta.columns = df.columns.tolist()
|
|
290
|
+
meta.cache_key = f"ibge:lspa:{produto}:{ano}:{mes}"
|
|
291
|
+
meta.cache_expires_at = calculate_expiry(constants.Fonte.IBGE, "lspa")
|
|
292
|
+
|
|
219
293
|
if as_polars:
|
|
220
294
|
try:
|
|
221
295
|
import polars as pl
|
|
222
296
|
|
|
223
|
-
|
|
297
|
+
result_df = pl.from_pandas(df)
|
|
298
|
+
if return_meta:
|
|
299
|
+
return result_df, meta # type: ignore[return-value,no-any-return]
|
|
300
|
+
return result_df # type: ignore[return-value,no-any-return]
|
|
224
301
|
except ImportError:
|
|
225
302
|
logger.warning("polars_not_installed", fallback="pandas")
|
|
226
303
|
|
|
@@ -230,6 +307,8 @@ async def lspa(
|
|
|
230
307
|
records=len(df),
|
|
231
308
|
)
|
|
232
309
|
|
|
310
|
+
if return_meta:
|
|
311
|
+
return df, meta
|
|
233
312
|
return df
|
|
234
313
|
|
|
235
314
|
|