agrobr 0.1.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
agrobr/snapshots.py ADDED
@@ -0,0 +1,321 @@
1
+ """Gerenciamento de snapshots para modo deterministico."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import shutil
7
+ from dataclasses import dataclass, field
8
+ from datetime import date, datetime
9
+ from pathlib import Path
10
+ from typing import Any
11
+
12
+ import pandas as pd
13
+ import structlog
14
+
15
+ from agrobr.config import get_config
16
+
17
+ logger = structlog.get_logger()
18
+
19
+
20
+ @dataclass
21
+ class SnapshotManifest:
22
+ """Manifesto de um snapshot."""
23
+
24
+ name: str
25
+ created_at: datetime
26
+ agrobr_version: str
27
+ sources: list[str] = field(default_factory=list)
28
+ files: dict[str, dict[str, Any]] = field(default_factory=dict)
29
+ metadata: dict[str, Any] = field(default_factory=dict)
30
+
31
+ def to_dict(self) -> dict[str, Any]:
32
+ """Converte para dicionario."""
33
+ return {
34
+ "name": self.name,
35
+ "created_at": self.created_at.isoformat(),
36
+ "agrobr_version": self.agrobr_version,
37
+ "sources": self.sources,
38
+ "files": self.files,
39
+ "metadata": self.metadata,
40
+ }
41
+
42
+ @classmethod
43
+ def from_dict(cls, data: dict[str, Any]) -> SnapshotManifest:
44
+ """Cria a partir de dicionario."""
45
+ data = data.copy()
46
+ if isinstance(data.get("created_at"), str):
47
+ data["created_at"] = datetime.fromisoformat(data["created_at"])
48
+ return cls(**data)
49
+
50
+
51
+ @dataclass
52
+ class SnapshotInfo:
53
+ """Informacoes resumidas de um snapshot."""
54
+
55
+ name: str
56
+ path: Path
57
+ created_at: datetime
58
+ size_bytes: int
59
+ sources: list[str]
60
+ file_count: int
61
+
62
+
63
+ def get_snapshots_dir() -> Path:
64
+ """Retorna diretorio de snapshots."""
65
+ config = get_config()
66
+ return config.get_snapshot_dir()
67
+
68
+
69
+ def list_snapshots() -> list[SnapshotInfo]:
70
+ """Lista todos os snapshots disponiveis."""
71
+ snapshots_dir = get_snapshots_dir()
72
+
73
+ if not snapshots_dir.exists():
74
+ return []
75
+
76
+ snapshots = []
77
+ for path in sorted(snapshots_dir.iterdir()):
78
+ if not path.is_dir():
79
+ continue
80
+
81
+ manifest_path = path / "manifest.json"
82
+ if not manifest_path.exists():
83
+ continue
84
+
85
+ try:
86
+ with open(manifest_path) as f:
87
+ manifest = SnapshotManifest.from_dict(json.load(f))
88
+
89
+ size = sum(f.stat().st_size for f in path.rglob("*") if f.is_file())
90
+ file_count = len(list(path.rglob("*.parquet")))
91
+
92
+ snapshots.append(
93
+ SnapshotInfo(
94
+ name=manifest.name,
95
+ path=path,
96
+ created_at=manifest.created_at,
97
+ size_bytes=size,
98
+ sources=manifest.sources,
99
+ file_count=file_count,
100
+ )
101
+ )
102
+ except Exception as e:
103
+ logger.warning("snapshot_read_error", path=str(path), error=str(e))
104
+
105
+ return snapshots
106
+
107
+
108
+ def get_snapshot(name: str) -> SnapshotInfo | None:
109
+ """Obtem informacoes de um snapshot especifico."""
110
+ for snapshot in list_snapshots():
111
+ if snapshot.name == name:
112
+ return snapshot
113
+ return None
114
+
115
+
116
+ async def create_snapshot(
117
+ name: str | None = None,
118
+ sources: list[str] | None = None,
119
+ _include_cache: bool = True,
120
+ ) -> SnapshotInfo:
121
+ """
122
+ Cria um novo snapshot dos dados atuais.
123
+
124
+ Args:
125
+ name: Nome do snapshot (default: data atual YYYY-MM-DD)
126
+ sources: Fontes a incluir (default: todas)
127
+ include_cache: Incluir dados do cache
128
+
129
+ Returns:
130
+ SnapshotInfo do snapshot criado
131
+ """
132
+ import agrobr
133
+
134
+ if name is None:
135
+ name = date.today().isoformat()
136
+
137
+ if sources is None:
138
+ sources = ["cepea", "conab", "ibge"]
139
+
140
+ snapshots_dir = get_snapshots_dir()
141
+ snapshot_path = snapshots_dir / name
142
+
143
+ if snapshot_path.exists():
144
+ raise ValueError(f"Snapshot '{name}' already exists")
145
+
146
+ snapshot_path.mkdir(parents=True, exist_ok=True)
147
+
148
+ manifest = SnapshotManifest(
149
+ name=name,
150
+ created_at=datetime.now(),
151
+ agrobr_version=getattr(agrobr, "__version__", "unknown"),
152
+ sources=sources,
153
+ )
154
+
155
+ for source in sources:
156
+ source_path = snapshot_path / source
157
+ source_path.mkdir(exist_ok=True)
158
+
159
+ try:
160
+ if source == "cepea":
161
+ await _snapshot_cepea(source_path, manifest)
162
+ elif source == "conab":
163
+ await _snapshot_conab(source_path, manifest)
164
+ elif source == "ibge":
165
+ await _snapshot_ibge(source_path, manifest)
166
+ except Exception as e:
167
+ logger.error("snapshot_source_error", source=source, error=str(e))
168
+
169
+ with open(snapshot_path / "manifest.json", "w") as f:
170
+ json.dump(manifest.to_dict(), f, indent=2)
171
+
172
+ logger.info("snapshot_created", name=name, path=str(snapshot_path))
173
+
174
+ return get_snapshot(name) # type: ignore
175
+
176
+
177
+ async def _snapshot_cepea(path: Path, manifest: SnapshotManifest) -> None:
178
+ """Cria snapshot dos dados CEPEA."""
179
+ from agrobr import cepea
180
+
181
+ produtos = await cepea.produtos()
182
+
183
+ for produto in produtos:
184
+ try:
185
+ df = await cepea.indicador(produto, offline=True)
186
+ if df is not None and not df.empty:
187
+ file_path = path / f"{produto}.parquet"
188
+ df.to_parquet(file_path, index=False)
189
+ manifest.files[f"cepea/{produto}.parquet"] = {
190
+ "rows": len(df),
191
+ "columns": df.columns.tolist(),
192
+ }
193
+ except Exception as e:
194
+ logger.warning("snapshot_produto_error", produto=produto, error=str(e))
195
+
196
+
197
+ async def _snapshot_conab(path: Path, manifest: SnapshotManifest) -> None:
198
+ """Cria snapshot dos dados CONAB."""
199
+ from agrobr import conab
200
+
201
+ try:
202
+ df = await conab.safras(produto="soja")
203
+ if df is not None and not df.empty:
204
+ file_path = path / "safras.parquet"
205
+ df.to_parquet(file_path, index=False)
206
+ manifest.files["conab/safras.parquet"] = {
207
+ "rows": len(df),
208
+ "columns": df.columns.tolist(),
209
+ }
210
+ except Exception as e:
211
+ logger.warning("snapshot_conab_safras_error", error=str(e))
212
+
213
+ try:
214
+ df = await conab.balanco()
215
+ if df is not None and not df.empty:
216
+ file_path = path / "balanco.parquet"
217
+ df.to_parquet(file_path, index=False)
218
+ manifest.files["conab/balanco.parquet"] = {
219
+ "rows": len(df),
220
+ "columns": df.columns.tolist(),
221
+ }
222
+ except Exception as e:
223
+ logger.warning("snapshot_conab_balanco_error", error=str(e))
224
+
225
+
226
+ async def _snapshot_ibge(path: Path, manifest: SnapshotManifest) -> None:
227
+ """Cria snapshot dos dados IBGE."""
228
+ from agrobr import ibge
229
+
230
+ try:
231
+ df = await ibge.pam(produto="soja")
232
+ if df is not None and not df.empty:
233
+ file_path = path / "pam.parquet"
234
+ df.to_parquet(file_path, index=False)
235
+ manifest.files["ibge/pam.parquet"] = {
236
+ "rows": len(df),
237
+ "columns": df.columns.tolist(),
238
+ }
239
+ except Exception as e:
240
+ logger.warning("snapshot_ibge_pam_error", error=str(e))
241
+
242
+ try:
243
+ df = await ibge.lspa(produto="soja")
244
+ if df is not None and not df.empty:
245
+ file_path = path / "lspa.parquet"
246
+ df.to_parquet(file_path, index=False)
247
+ manifest.files["ibge/lspa.parquet"] = {
248
+ "rows": len(df),
249
+ "columns": df.columns.tolist(),
250
+ }
251
+ except Exception as e:
252
+ logger.warning("snapshot_ibge_lspa_error", error=str(e))
253
+
254
+
255
+ def load_from_snapshot(
256
+ source: str,
257
+ dataset: str,
258
+ snapshot_name: str | None = None,
259
+ ) -> pd.DataFrame | None:
260
+ """
261
+ Carrega dados de um snapshot.
262
+
263
+ Args:
264
+ source: Fonte (cepea, conab, ibge)
265
+ dataset: Nome do dataset (soja, safras, pam, etc)
266
+ snapshot_name: Nome do snapshot (usa config se None)
267
+
268
+ Returns:
269
+ DataFrame ou None se nao encontrado
270
+ """
271
+ config = get_config()
272
+
273
+ if snapshot_name is None:
274
+ if config.snapshot_date:
275
+ snapshot_name = config.snapshot_date.isoformat()
276
+ else:
277
+ raise ValueError("No snapshot specified and no snapshot_date in config")
278
+
279
+ snapshot_path = get_snapshots_dir() / snapshot_name / source / f"{dataset}.parquet"
280
+
281
+ if not snapshot_path.exists():
282
+ logger.warning(
283
+ "snapshot_file_not_found",
284
+ source=source,
285
+ dataset=dataset,
286
+ path=str(snapshot_path),
287
+ )
288
+ return None
289
+
290
+ return pd.read_parquet(snapshot_path)
291
+
292
+
293
+ def delete_snapshot(name: str) -> bool:
294
+ """
295
+ Remove um snapshot.
296
+
297
+ Args:
298
+ name: Nome do snapshot
299
+
300
+ Returns:
301
+ True se removido, False se nao existia
302
+ """
303
+ snapshot_path = get_snapshots_dir() / name
304
+
305
+ if not snapshot_path.exists():
306
+ return False
307
+
308
+ shutil.rmtree(snapshot_path)
309
+ logger.info("snapshot_deleted", name=name)
310
+ return True
311
+
312
+
313
+ __all__ = [
314
+ "SnapshotManifest",
315
+ "SnapshotInfo",
316
+ "list_snapshots",
317
+ "get_snapshot",
318
+ "create_snapshot",
319
+ "load_from_snapshot",
320
+ "delete_snapshot",
321
+ ]
agrobr/stability.py ADDED
@@ -0,0 +1,148 @@
1
+ """Decorators e utilitarios para estabilidade de API."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import functools
6
+ import warnings
7
+ from collections.abc import Callable
8
+ from dataclasses import dataclass
9
+ from enum import StrEnum
10
+ from typing import Any, TypeVar
11
+
12
+ import structlog
13
+
14
+ logger = structlog.get_logger()
15
+
16
+ F = TypeVar("F", bound=Callable[..., Any])
17
+
18
+
19
+ class APIStatus(StrEnum):
20
+ STABLE = "stable"
21
+ EXPERIMENTAL = "experimental"
22
+ DEPRECATED = "deprecated"
23
+ INTERNAL = "internal"
24
+
25
+
26
+ @dataclass
27
+ class APIInfo:
28
+ status: APIStatus
29
+ since: str
30
+ deprecated_in: str | None = None
31
+ removed_in: str | None = None
32
+ replacement: str | None = None
33
+ notes: str | None = None
34
+
35
+
36
+ _api_registry: dict[str, APIInfo] = {}
37
+
38
+
39
+ def stable(since: str, notes: str | None = None) -> Callable[[F], F]:
40
+ def decorator(func: F) -> F:
41
+ info = APIInfo(status=APIStatus.STABLE, since=since, notes=notes)
42
+ _api_registry[func.__qualname__] = info
43
+
44
+ @functools.wraps(func)
45
+ def wrapper(*args: Any, **kwargs: Any) -> Any:
46
+ return func(*args, **kwargs)
47
+
48
+ wrapper._api_info = info # type: ignore[attr-defined]
49
+ return wrapper # type: ignore[return-value]
50
+
51
+ return decorator
52
+
53
+
54
+ def experimental(since: str, notes: str | None = None) -> Callable[[F], F]:
55
+ def decorator(func: F) -> F:
56
+ info = APIInfo(status=APIStatus.EXPERIMENTAL, since=since, notes=notes)
57
+ _api_registry[func.__qualname__] = info
58
+
59
+ @functools.wraps(func)
60
+ def wrapper(*args: Any, **kwargs: Any) -> Any:
61
+ warnings.warn(
62
+ f"{func.__qualname__} is experimental and may change without notice",
63
+ stacklevel=2,
64
+ )
65
+ return func(*args, **kwargs)
66
+
67
+ wrapper._api_info = info # type: ignore[attr-defined]
68
+ return wrapper # type: ignore[return-value]
69
+
70
+ return decorator
71
+
72
+
73
+ def deprecated(
74
+ since: str,
75
+ removed_in: str | None = None,
76
+ replacement: str | None = None,
77
+ ) -> Callable[[F], F]:
78
+ def decorator(func: F) -> F:
79
+ info = APIInfo(
80
+ status=APIStatus.DEPRECATED,
81
+ since=since,
82
+ deprecated_in=since,
83
+ removed_in=removed_in,
84
+ replacement=replacement,
85
+ )
86
+ _api_registry[func.__qualname__] = info
87
+
88
+ @functools.wraps(func)
89
+ def wrapper(*args: Any, **kwargs: Any) -> Any:
90
+ msg = f"{func.__qualname__} is deprecated since {since}"
91
+ if removed_in:
92
+ msg += f" and will be removed in {removed_in}"
93
+ if replacement:
94
+ msg += f". Use {replacement} instead"
95
+ warnings.warn(msg, DeprecationWarning, stacklevel=2)
96
+ return func(*args, **kwargs)
97
+
98
+ wrapper._api_info = info # type: ignore[attr-defined]
99
+ return wrapper # type: ignore[return-value]
100
+
101
+ return decorator
102
+
103
+
104
+ def internal(func: F) -> F:
105
+ info = APIInfo(status=APIStatus.INTERNAL, since="0.1.0")
106
+ _api_registry[func.__qualname__] = info
107
+
108
+ @functools.wraps(func)
109
+ def wrapper(*args: Any, **kwargs: Any) -> Any:
110
+ return func(*args, **kwargs)
111
+
112
+ wrapper._api_info = info # type: ignore[attr-defined]
113
+ return wrapper # type: ignore[return-value]
114
+
115
+
116
+ def get_api_info(func: Callable[..., Any]) -> APIInfo | None:
117
+ return getattr(func, "_api_info", None)
118
+
119
+
120
+ def list_stable_apis() -> list[str]:
121
+ return [name for name, info in _api_registry.items() if info.status == APIStatus.STABLE]
122
+
123
+
124
+ def list_experimental_apis() -> list[str]:
125
+ return [name for name, info in _api_registry.items() if info.status == APIStatus.EXPERIMENTAL]
126
+
127
+
128
+ def list_deprecated_apis() -> list[str]:
129
+ return [name for name, info in _api_registry.items() if info.status == APIStatus.DEPRECATED]
130
+
131
+
132
+ def get_api_registry() -> dict[str, APIInfo]:
133
+ return _api_registry.copy()
134
+
135
+
136
+ __all__ = [
137
+ "APIStatus",
138
+ "APIInfo",
139
+ "stable",
140
+ "experimental",
141
+ "deprecated",
142
+ "internal",
143
+ "get_api_info",
144
+ "list_stable_apis",
145
+ "list_experimental_apis",
146
+ "list_deprecated_apis",
147
+ "get_api_registry",
148
+ ]