agrobr 0.1.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,321 @@
1
+ """Diagnóstico completo do sistema agrobr."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import time
7
+ from dataclasses import dataclass, field
8
+ from datetime import datetime
9
+ from pathlib import Path
10
+ from typing import Any
11
+
12
+ import httpx
13
+ import structlog
14
+
15
+ from agrobr import __version__
16
+ from agrobr.cache.duckdb_store import get_store
17
+ from agrobr.cache.policies import get_next_update_info
18
+
19
+ logger = structlog.get_logger()
20
+
21
+
22
+ @dataclass
23
+ class SourceStatus:
24
+ """Status de conectividade de uma fonte."""
25
+
26
+ name: str
27
+ url: str
28
+ status: str
29
+ latency_ms: int
30
+ error: str | None = None
31
+
32
+
33
+ @dataclass
34
+ class CacheStats:
35
+ """Estatísticas do cache."""
36
+
37
+ location: str
38
+ size_bytes: int
39
+ total_records: int
40
+ by_source: dict[str, dict[str, Any]] = field(default_factory=dict)
41
+
42
+
43
+ @dataclass
44
+ class DiagnosticsResult:
45
+ """Resultado do diagnóstico completo."""
46
+
47
+ version: str
48
+ timestamp: datetime
49
+ sources: list[SourceStatus]
50
+ cache: CacheStats
51
+ last_collections: dict[str, datetime | None]
52
+ cache_expiry: dict[str, dict[str, str]]
53
+ config: dict[str, Any]
54
+ overall_status: str
55
+
56
+ def to_dict(self) -> dict[str, Any]:
57
+ """Converte para dicionário serializável."""
58
+ return {
59
+ "version": self.version,
60
+ "timestamp": self.timestamp.isoformat(),
61
+ "sources": [
62
+ {
63
+ "name": s.name,
64
+ "url": s.url,
65
+ "status": s.status,
66
+ "latency_ms": s.latency_ms,
67
+ "error": s.error,
68
+ }
69
+ for s in self.sources
70
+ ],
71
+ "cache": {
72
+ "location": self.cache.location,
73
+ "size_mb": round(self.cache.size_bytes / 1024 / 1024, 2),
74
+ "total_records": self.cache.total_records,
75
+ "by_source": self.cache.by_source,
76
+ },
77
+ "last_collections": {
78
+ k: v.isoformat() if v else None for k, v in self.last_collections.items()
79
+ },
80
+ "cache_expiry": self.cache_expiry,
81
+ "config": self.config,
82
+ "overall_status": self.overall_status,
83
+ }
84
+
85
+ def to_rich(self) -> str:
86
+ """Formata para output no terminal."""
87
+ lines = [
88
+ "",
89
+ f"agrobr diagnostics v{self.version}",
90
+ "=" * 50,
91
+ "",
92
+ "Sources Connectivity",
93
+ ]
94
+
95
+ for s in self.sources:
96
+ if s.status == "ok":
97
+ icon = "[OK]"
98
+ elif s.status == "slow":
99
+ icon = "[SLOW]"
100
+ else:
101
+ icon = "[FAIL]"
102
+
103
+ line = f" {icon} {s.name:<35} {s.latency_ms:>5}ms"
104
+ if s.error:
105
+ line += f" ({s.error})"
106
+ lines.append(line)
107
+
108
+ lines.extend(
109
+ [
110
+ "",
111
+ "Cache Status",
112
+ f" Location: {self.cache.location}",
113
+ f" Size: {self.cache.size_bytes / 1024 / 1024:.2f} MB",
114
+ f" Total records: {self.cache.total_records:,}",
115
+ "",
116
+ " By source:",
117
+ ]
118
+ )
119
+
120
+ for fonte, stats in self.cache.by_source.items():
121
+ count = stats.get("count", 0)
122
+ oldest = stats.get("oldest", "-")
123
+ newest = stats.get("newest", "-")
124
+ lines.append(f" {fonte.upper()}: {count:,} records ({oldest} to {newest})")
125
+
126
+ lines.extend(
127
+ [
128
+ "",
129
+ "Cache Expiry",
130
+ ]
131
+ )
132
+
133
+ for fonte, info in self.cache_expiry.items():
134
+ exp_type = info.get("type", "unknown")
135
+ if exp_type == "smart":
136
+ lines.append(f" {fonte.upper()}: {info.get('description', '')}")
137
+ else:
138
+ lines.append(f" {fonte.upper()}: TTL {info.get('ttl', 'unknown')}")
139
+
140
+ lines.extend(
141
+ [
142
+ "",
143
+ "Configuration",
144
+ f" Browser fallback: {'enabled' if self.config.get('browser_fallback') else 'disabled'}",
145
+ f" Alternative source: {'enabled' if self.config.get('alternative_source') else 'disabled'}",
146
+ "",
147
+ ]
148
+ )
149
+
150
+ if self.overall_status == "healthy":
151
+ lines.append("[OK] All systems operational")
152
+ elif self.overall_status == "degraded":
153
+ lines.append("[WARN] System degraded - some sources unavailable")
154
+ else:
155
+ lines.append("[FAIL] System error - check source connectivity")
156
+
157
+ lines.append("")
158
+ return "\n".join(lines)
159
+
160
+
161
+ async def _check_source(name: str, url: str, timeout: float = 10.0) -> SourceStatus:
162
+ """Verifica conectividade de uma fonte."""
163
+ start = time.perf_counter()
164
+
165
+ try:
166
+ async with httpx.AsyncClient(timeout=timeout) as http_client:
167
+ response = await http_client.head(url, follow_redirects=True)
168
+ latency_ms = int((time.perf_counter() - start) * 1000)
169
+
170
+ if response.status_code < 400:
171
+ status = "ok" if latency_ms < 2000 else "slow"
172
+ return SourceStatus(name, url, status, latency_ms)
173
+
174
+ return SourceStatus(
175
+ name,
176
+ url,
177
+ "error",
178
+ latency_ms,
179
+ error=f"HTTP {response.status_code}",
180
+ )
181
+
182
+ except httpx.TimeoutException:
183
+ latency_ms = int((time.perf_counter() - start) * 1000)
184
+ return SourceStatus(name, url, "error", latency_ms, error="timeout")
185
+
186
+ except httpx.ConnectError as e:
187
+ latency_ms = int((time.perf_counter() - start) * 1000)
188
+ return SourceStatus(name, url, "error", latency_ms, error=f"connection error: {e}")
189
+
190
+ except Exception as e:
191
+ latency_ms = int((time.perf_counter() - start) * 1000)
192
+ return SourceStatus(name, url, "error", latency_ms, error=str(e))
193
+
194
+
195
+ def _get_cache_stats() -> CacheStats:
196
+ """Obtém estatísticas do cache."""
197
+ try:
198
+ store = get_store()
199
+ cache_path = Path(store.db_path)
200
+ size_bytes = cache_path.stat().st_size if cache_path.exists() else 0
201
+
202
+ by_source: dict[str, dict[str, Any]] = {}
203
+ conn = store._get_conn()
204
+
205
+ for fonte in ["cepea", "conab", "ibge"]:
206
+ try:
207
+ result = conn.execute(
208
+ """
209
+ SELECT COUNT(*), MIN(data), MAX(data)
210
+ FROM indicadores
211
+ WHERE LOWER(fonte) = ?
212
+ """,
213
+ [fonte],
214
+ ).fetchone()
215
+
216
+ if result and result[0] > 0:
217
+ by_source[fonte] = {
218
+ "count": result[0],
219
+ "oldest": str(result[1]) if result[1] else None,
220
+ "newest": str(result[2]) if result[2] else None,
221
+ }
222
+ except Exception:
223
+ pass
224
+
225
+ total_records = sum(s.get("count", 0) for s in by_source.values())
226
+
227
+ return CacheStats(
228
+ location=str(cache_path),
229
+ size_bytes=size_bytes,
230
+ total_records=total_records,
231
+ by_source=by_source,
232
+ )
233
+
234
+ except Exception as e:
235
+ logger.warning("cache_stats_failed", error=str(e))
236
+ return CacheStats(
237
+ location="unknown",
238
+ size_bytes=0,
239
+ total_records=0,
240
+ by_source={},
241
+ )
242
+
243
+
244
+ def _get_last_collections() -> dict[str, datetime | None]:
245
+ """Obtém data da última coleta por fonte."""
246
+ collections: dict[str, datetime | None] = {}
247
+
248
+ try:
249
+ store = get_store()
250
+ conn = store._get_conn()
251
+
252
+ for fonte in ["cepea", "conab", "ibge"]:
253
+ try:
254
+ result = conn.execute(
255
+ """
256
+ SELECT MAX(collected_at)
257
+ FROM indicadores
258
+ WHERE LOWER(fonte) = ?
259
+ """,
260
+ [fonte],
261
+ ).fetchone()
262
+
263
+ collections[fonte] = result[0] if result and result[0] else None
264
+
265
+ except Exception:
266
+ collections[fonte] = None
267
+
268
+ except Exception:
269
+ pass
270
+
271
+ return collections
272
+
273
+
274
+ async def run_diagnostics(verbose: bool = False) -> DiagnosticsResult: # noqa: ARG001
275
+ """
276
+ Executa diagnóstico completo do sistema.
277
+
278
+ Args:
279
+ verbose: Se True, inclui informações detalhadas (reservado para uso futuro)
280
+
281
+ Returns:
282
+ DiagnosticsResult com status completo
283
+ """
284
+ sources_to_check = [
285
+ ("CEPEA (Noticias Agricolas)", "https://www.noticiasagricolas.com.br"),
286
+ ("CONAB", "https://www.conab.gov.br"),
287
+ ("IBGE/SIDRA", "https://sidra.ibge.gov.br"),
288
+ ]
289
+
290
+ source_tasks = [_check_source(name, url) for name, url in sources_to_check]
291
+ sources = await asyncio.gather(*source_tasks)
292
+
293
+ cache = _get_cache_stats()
294
+
295
+ cache_expiry = {}
296
+ for fonte in ["cepea", "conab", "ibge"]:
297
+ cache_expiry[fonte] = get_next_update_info(fonte)
298
+
299
+ last_collections = _get_last_collections()
300
+
301
+ error_count = sum(1 for s in sources if s.status == "error")
302
+ if error_count == len(sources):
303
+ overall_status = "error"
304
+ elif error_count > 0:
305
+ overall_status = "degraded"
306
+ else:
307
+ overall_status = "healthy"
308
+
309
+ return DiagnosticsResult(
310
+ version=__version__,
311
+ timestamp=datetime.now(),
312
+ sources=list(sources),
313
+ cache=cache,
314
+ last_collections=last_collections,
315
+ cache_expiry=cache_expiry,
316
+ config={
317
+ "browser_fallback": False,
318
+ "alternative_source": True,
319
+ },
320
+ overall_status=overall_status,
321
+ )
agrobr/http/browser.py CHANGED
@@ -15,7 +15,6 @@ from agrobr.http.user_agents import UserAgentRotator
15
15
 
16
16
  logger = structlog.get_logger()
17
17
 
18
- # Singleton para reutilizar browser
19
18
  _playwright: Playwright | None = None
20
19
  _browser: Browser | None = None
21
20
  _lock = asyncio.Lock()
@@ -64,7 +63,6 @@ async def get_page() -> AsyncGenerator[Page, None]:
64
63
  """Context manager para obter uma página do browser."""
65
64
  browser = await _get_browser()
66
65
 
67
- # Cria contexto com fingerprint realista
68
66
  ua = UserAgentRotator.get_random()
69
67
  context = await browser.new_context(
70
68
  user_agent=ua,
@@ -78,7 +76,6 @@ async def get_page() -> AsyncGenerator[Page, None]:
78
76
 
79
77
  page = await context.new_page()
80
78
 
81
- # Esconde sinais de automação
82
79
  await page.add_init_script(
83
80
  """
84
81
  Object.defineProperty(navigator, 'webdriver', {
@@ -124,7 +121,6 @@ async def fetch_with_browser(
124
121
 
125
122
  try:
126
123
  async with get_page() as page:
127
- # Navega para a URL
128
124
  response = await page.goto(
129
125
  url,
130
126
  wait_until="domcontentloaded",
@@ -138,7 +134,6 @@ async def fetch_with_browser(
138
134
  last_error="No response received",
139
135
  )
140
136
 
141
- # Aguarda seletor específico se fornecido
142
137
  if wait_selector:
143
138
  try:
144
139
  await page.wait_for_selector(
@@ -152,13 +147,10 @@ async def fetch_with_browser(
152
147
  error=str(e),
153
148
  )
154
149
 
155
- # Aguarda Cloudflare resolver e JS terminar
156
150
  await page.wait_for_timeout(5000)
157
151
 
158
- # Verifica se foi bloqueado pelo Cloudflare
159
152
  if response.status in (403, 503):
160
153
  check_html: str = await page.content()
161
- # Detecta página de challenge do Cloudflare
162
154
  if "cloudflare" in check_html.lower() or "challenge" in check_html.lower():
163
155
  raise SourceUnavailableError(
164
156
  source=source,
@@ -166,7 +158,6 @@ async def fetch_with_browser(
166
158
  last_error=f"Cloudflare block detected (status {response.status})",
167
159
  )
168
160
 
169
- # Obtém HTML
170
161
  html: str = await page.content()
171
162
 
172
163
  logger.info(
agrobr/ibge/api.py CHANGED
@@ -2,16 +2,22 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
- from typing import Literal
5
+ import time
6
+ from datetime import datetime
7
+ from typing import Literal, overload
6
8
 
7
9
  import pandas as pd
8
10
  import structlog
9
11
 
12
+ from agrobr import constants
13
+ from agrobr.cache.policies import calculate_expiry
10
14
  from agrobr.ibge import client
15
+ from agrobr.models import MetaInfo
11
16
 
12
17
  logger = structlog.get_logger()
13
18
 
14
19
 
20
+ @overload
15
21
  async def pam(
16
22
  produto: str,
17
23
  ano: int | str | list[int] | None = None,
@@ -19,7 +25,33 @@ async def pam(
19
25
  nivel: Literal["brasil", "uf", "municipio"] = "uf",
20
26
  variaveis: list[str] | None = None,
21
27
  as_polars: bool = False,
22
- ) -> pd.DataFrame:
28
+ *,
29
+ return_meta: Literal[False] = False,
30
+ ) -> pd.DataFrame: ...
31
+
32
+
33
+ @overload
34
+ async def pam(
35
+ produto: str,
36
+ ano: int | str | list[int] | None = None,
37
+ uf: str | None = None,
38
+ nivel: Literal["brasil", "uf", "municipio"] = "uf",
39
+ variaveis: list[str] | None = None,
40
+ as_polars: bool = False,
41
+ *,
42
+ return_meta: Literal[True],
43
+ ) -> tuple[pd.DataFrame, MetaInfo]: ...
44
+
45
+
46
+ async def pam(
47
+ produto: str,
48
+ ano: int | str | list[int] | None = None,
49
+ uf: str | None = None,
50
+ nivel: Literal["brasil", "uf", "municipio"] = "uf",
51
+ variaveis: list[str] | None = None,
52
+ as_polars: bool = False,
53
+ return_meta: bool = False,
54
+ ) -> pd.DataFrame | tuple[pd.DataFrame, MetaInfo]:
23
55
  """
24
56
  Obtém dados da Produção Agrícola Municipal (PAM).
25
57
 
@@ -30,14 +62,22 @@ async def pam(
30
62
  nivel: Nível territorial ("brasil", "uf", "municipio")
31
63
  variaveis: Lista de variáveis (area_plantada, area_colhida, producao, rendimento)
32
64
  as_polars: Se True, retorna polars.DataFrame
65
+ return_meta: Se True, retorna tupla (DataFrame, MetaInfo)
33
66
 
34
67
  Returns:
35
- DataFrame com dados da PAM
68
+ DataFrame com dados da PAM ou tupla (DataFrame, MetaInfo)
36
69
 
37
70
  Example:
38
71
  >>> df = await ibge.pam('soja', ano=2023)
39
- >>> df = await ibge.pam('milho', ano=[2020, 2021, 2022], uf='MT')
72
+ >>> df, meta = await ibge.pam('milho', ano=[2020, 2021, 2022], uf='MT', return_meta=True)
40
73
  """
74
+ fetch_start = time.perf_counter()
75
+ meta = MetaInfo(
76
+ source="ibge_pam",
77
+ source_url="https://sidra.ibge.gov.br",
78
+ source_method="httpx",
79
+ fetched_at=datetime.now(),
80
+ )
41
81
  logger.info(
42
82
  "ibge_pam_request",
43
83
  produto=produto,
@@ -46,7 +86,6 @@ async def pam(
46
86
  nivel=nivel,
47
87
  )
48
88
 
49
- # Mapeia produto para código SIDRA
50
89
  produto_lower = produto.lower()
51
90
  if produto_lower not in client.PRODUTOS_PAM:
52
91
  raise ValueError(
@@ -55,7 +94,6 @@ async def pam(
55
94
 
56
95
  produto_cod = client.PRODUTOS_PAM[produto_lower]
57
96
 
58
- # Mapeia variáveis
59
97
  if variaveis is None:
60
98
  variaveis = ["area_plantada", "area_colhida", "producao", "rendimento"]
61
99
 
@@ -66,7 +104,6 @@ async def pam(
66
104
  else:
67
105
  logger.warning(f"Variável desconhecida: {var}")
68
106
 
69
- # Mapeia nível territorial
70
107
  nivel_map = {
71
108
  "brasil": "1",
72
109
  "uf": "3",
@@ -74,12 +111,10 @@ async def pam(
74
111
  }
75
112
  territorial_level = nivel_map.get(nivel, "3")
76
113
 
77
- # Define código territorial
78
114
  ibge_code = "all"
79
115
  if uf and nivel in ("uf", "municipio"):
80
116
  ibge_code = client.uf_to_ibge_code(uf)
81
117
 
82
- # Define período
83
118
  if ano is None:
84
119
  period = "last"
85
120
  elif isinstance(ano, list):
@@ -87,7 +122,6 @@ async def pam(
87
122
  else:
88
123
  period = str(ano)
89
124
 
90
- # Busca dados
91
125
  df = await client.fetch_sidra(
92
126
  table_code=client.TABELAS["pam_nova"],
93
127
  territorial_level=territorial_level,
@@ -97,10 +131,8 @@ async def pam(
97
131
  classifications={"782": produto_cod},
98
132
  )
99
133
 
100
- # Processa resposta
101
134
  df = client.parse_sidra_response(df)
102
135
 
103
- # Pivota para ter variáveis como colunas
104
136
  if "variavel" in df.columns and "valor" in df.columns:
105
137
  df_pivot = df.pivot_table(
106
138
  index=["localidade", "ano"] if "localidade" in df.columns else ["ano"],
@@ -109,7 +141,6 @@ async def pam(
109
141
  aggfunc="first",
110
142
  ).reset_index()
111
143
 
112
- # Renomeia colunas para nomes mais simples
113
144
  rename_map = {
114
145
  "Área plantada": "area_plantada",
115
146
  "Área colhida": "area_colhida",
@@ -123,11 +154,20 @@ async def pam(
123
154
  df["produto"] = produto_lower
124
155
  df["fonte"] = "ibge_pam"
125
156
 
157
+ meta.fetch_duration_ms = int((time.perf_counter() - fetch_start) * 1000)
158
+ meta.records_count = len(df)
159
+ meta.columns = df.columns.tolist()
160
+ meta.cache_key = f"ibge:pam:{produto}:{ano}"
161
+ meta.cache_expires_at = calculate_expiry(constants.Fonte.IBGE, "pam")
162
+
126
163
  if as_polars:
127
164
  try:
128
165
  import polars as pl
129
166
 
130
- return pl.from_pandas(df) # type: ignore[no-any-return]
167
+ result_df = pl.from_pandas(df)
168
+ if return_meta:
169
+ return result_df, meta # type: ignore[return-value,no-any-return]
170
+ return result_df # type: ignore[return-value,no-any-return]
131
171
  except ImportError:
132
172
  logger.warning("polars_not_installed", fallback="pandas")
133
173
 
@@ -137,16 +177,43 @@ async def pam(
137
177
  records=len(df),
138
178
  )
139
179
 
180
+ if return_meta:
181
+ return df, meta
140
182
  return df
141
183
 
142
184
 
185
+ @overload
186
+ async def lspa(
187
+ produto: str,
188
+ ano: int | str | None = None,
189
+ mes: int | str | None = None,
190
+ uf: str | None = None,
191
+ as_polars: bool = False,
192
+ *,
193
+ return_meta: Literal[False] = False,
194
+ ) -> pd.DataFrame: ...
195
+
196
+
197
+ @overload
143
198
  async def lspa(
144
199
  produto: str,
145
200
  ano: int | str | None = None,
146
201
  mes: int | str | None = None,
147
202
  uf: str | None = None,
148
203
  as_polars: bool = False,
149
- ) -> pd.DataFrame:
204
+ *,
205
+ return_meta: Literal[True],
206
+ ) -> tuple[pd.DataFrame, MetaInfo]: ...
207
+
208
+
209
+ async def lspa(
210
+ produto: str,
211
+ ano: int | str | None = None,
212
+ mes: int | str | None = None,
213
+ uf: str | None = None,
214
+ as_polars: bool = False,
215
+ return_meta: bool = False,
216
+ ) -> pd.DataFrame | tuple[pd.DataFrame, MetaInfo]:
150
217
  """
151
218
  Obtém dados do Levantamento Sistemático da Produção Agrícola (LSPA).
152
219
 
@@ -158,14 +225,22 @@ async def lspa(
158
225
  mes: Mês de referência (1-12). Se None, retorna todos os meses do ano.
159
226
  uf: Filtrar por UF (ex: "MT", "PR")
160
227
  as_polars: Se True, retorna polars.DataFrame
228
+ return_meta: Se True, retorna tupla (DataFrame, MetaInfo)
161
229
 
162
230
  Returns:
163
- DataFrame com estimativas LSPA
231
+ DataFrame com estimativas LSPA ou tupla (DataFrame, MetaInfo)
164
232
 
165
233
  Example:
166
234
  >>> df = await ibge.lspa('soja', ano=2024)
167
- >>> df = await ibge.lspa('milho_1', ano=2024, mes=6, uf='PR')
235
+ >>> df, meta = await ibge.lspa('milho_1', ano=2024, mes=6, uf='PR', return_meta=True)
168
236
  """
237
+ fetch_start = time.perf_counter()
238
+ meta = MetaInfo(
239
+ source="ibge_lspa",
240
+ source_url="https://sidra.ibge.gov.br",
241
+ source_method="httpx",
242
+ fetched_at=datetime.now(),
243
+ )
169
244
  logger.info(
170
245
  "ibge_lspa_request",
171
246
  produto=produto,
@@ -174,7 +249,6 @@ async def lspa(
174
249
  uf=uf,
175
250
  )
176
251
 
177
- # Mapeia produto para código SIDRA
178
252
  produto_lower = produto.lower()
179
253
  if produto_lower not in client.PRODUTOS_LSPA:
180
254
  raise ValueError(
@@ -183,20 +257,16 @@ async def lspa(
183
257
 
184
258
  produto_cod = client.PRODUTOS_LSPA[produto_lower]
185
259
 
186
- # Define período
187
260
  if ano is None:
188
261
  from datetime import date
189
262
 
190
263
  ano = date.today().year
191
264
 
192
- # Define período
193
265
  period = f"{ano}{int(mes):02d}" if mes else ",".join(f"{ano}{m:02d}" for m in range(1, 13))
194
266
 
195
- # Define nível territorial
196
267
  territorial_level = "3" if uf else "1"
197
268
  ibge_code = client.uf_to_ibge_code(uf) if uf else "all"
198
269
 
199
- # Busca dados (não especifica variáveis - retorna todas)
200
270
  df = await client.fetch_sidra(
201
271
  table_code=client.TABELAS["lspa"],
202
272
  territorial_level=territorial_level,
@@ -205,10 +275,8 @@ async def lspa(
205
275
  classifications={"48": produto_cod},
206
276
  )
207
277
 
208
- # Processa resposta
209
278
  df = client.parse_sidra_response(df)
210
279
 
211
- # Adiciona período da consulta
212
280
  df["ano"] = ano
213
281
  if mes:
214
282
  df["mes"] = mes
@@ -216,11 +284,20 @@ async def lspa(
216
284
  df["produto"] = produto_lower
217
285
  df["fonte"] = "ibge_lspa"
218
286
 
287
+ meta.fetch_duration_ms = int((time.perf_counter() - fetch_start) * 1000)
288
+ meta.records_count = len(df)
289
+ meta.columns = df.columns.tolist()
290
+ meta.cache_key = f"ibge:lspa:{produto}:{ano}:{mes}"
291
+ meta.cache_expires_at = calculate_expiry(constants.Fonte.IBGE, "lspa")
292
+
219
293
  if as_polars:
220
294
  try:
221
295
  import polars as pl
222
296
 
223
- return pl.from_pandas(df) # type: ignore[no-any-return]
297
+ result_df = pl.from_pandas(df)
298
+ if return_meta:
299
+ return result_df, meta # type: ignore[return-value,no-any-return]
300
+ return result_df # type: ignore[return-value,no-any-return]
224
301
  except ImportError:
225
302
  logger.warning("polars_not_installed", fallback="pandas")
226
303
 
@@ -230,6 +307,8 @@ async def lspa(
230
307
  records=len(df),
231
308
  )
232
309
 
310
+ if return_meta:
311
+ return df, meta
233
312
  return df
234
313
 
235
314