agrobr 0.1.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
agrobr/config.py ADDED
@@ -0,0 +1,137 @@
1
+ """Configuracao global do agrobr."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+ from datetime import date
7
+ from pathlib import Path
8
+ from typing import Literal
9
+
10
+ _config: AgrobrConfig | None = None
11
+
12
+
13
+ @dataclass
14
+ class AgrobrConfig:
15
+ """Configuracao global do agrobr."""
16
+
17
+ mode: Literal["normal", "deterministic"] = "normal"
18
+ snapshot_date: date | None = None
19
+ snapshot_path: Path | None = None
20
+
21
+ cache_enabled: bool = True
22
+ cache_path: Path | None = None
23
+
24
+ network_enabled: bool = True
25
+ timeout_seconds: int = 30
26
+
27
+ browser_fallback: bool = False
28
+ alternative_source: bool = True
29
+
30
+ log_level: str = "INFO"
31
+
32
+ def is_deterministic(self) -> bool:
33
+ """Verifica se esta em modo deterministico."""
34
+ return self.mode == "deterministic"
35
+
36
+ def get_snapshot_dir(self) -> Path:
37
+ """Retorna diretorio de snapshots."""
38
+ if self.snapshot_path:
39
+ return self.snapshot_path
40
+ return Path.home() / ".agrobr" / "snapshots"
41
+
42
+ def get_current_snapshot_path(self) -> Path | None:
43
+ """Retorna caminho do snapshot atual."""
44
+ if not self.snapshot_date:
45
+ return None
46
+ return self.get_snapshot_dir() / self.snapshot_date.isoformat()
47
+
48
+
49
+ def set_mode(
50
+ mode: Literal["normal", "deterministic"],
51
+ snapshot: str | date | None = None,
52
+ snapshot_path: str | Path | None = None,
53
+ ) -> None:
54
+ """
55
+ Define modo de operacao do agrobr.
56
+
57
+ Args:
58
+ mode: "normal" ou "deterministic"
59
+ snapshot: Data do snapshot (YYYY-MM-DD ou date)
60
+ snapshot_path: Caminho customizado para snapshots
61
+
62
+ Example:
63
+ agrobr.set_mode("normal")
64
+ agrobr.set_mode("deterministic", snapshot="2025-12-31")
65
+ """
66
+ global _config
67
+
68
+ if isinstance(snapshot, str):
69
+ snapshot = date.fromisoformat(snapshot)
70
+
71
+ if isinstance(snapshot_path, str):
72
+ snapshot_path = Path(snapshot_path)
73
+
74
+ _config = AgrobrConfig(
75
+ mode=mode,
76
+ snapshot_date=snapshot,
77
+ snapshot_path=snapshot_path,
78
+ network_enabled=(mode == "normal"),
79
+ )
80
+
81
+
82
+ def get_config() -> AgrobrConfig:
83
+ """Retorna configuracao atual."""
84
+ global _config
85
+ if _config is None:
86
+ _config = AgrobrConfig()
87
+ return _config
88
+
89
+
90
+ def reset_config() -> None:
91
+ """Reseta para configuracao padrao."""
92
+ global _config
93
+ _config = None
94
+
95
+
96
+ def configure(
97
+ cache_enabled: bool | None = None,
98
+ cache_path: str | Path | None = None,
99
+ timeout_seconds: int | None = None,
100
+ browser_fallback: bool | None = None,
101
+ alternative_source: bool | None = None,
102
+ log_level: str | None = None,
103
+ ) -> None:
104
+ """
105
+ Configura opcoes do agrobr.
106
+
107
+ Args:
108
+ cache_enabled: Habilitar/desabilitar cache
109
+ cache_path: Caminho customizado para cache
110
+ timeout_seconds: Timeout para requisicoes HTTP
111
+ browser_fallback: Usar browser como fallback
112
+ alternative_source: Usar fontes alternativas
113
+ log_level: Nivel de log (DEBUG, INFO, WARNING, ERROR)
114
+ """
115
+ config = get_config()
116
+
117
+ if cache_enabled is not None:
118
+ config.cache_enabled = cache_enabled
119
+ if cache_path is not None:
120
+ config.cache_path = Path(cache_path) if isinstance(cache_path, str) else cache_path
121
+ if timeout_seconds is not None:
122
+ config.timeout_seconds = timeout_seconds
123
+ if browser_fallback is not None:
124
+ config.browser_fallback = browser_fallback
125
+ if alternative_source is not None:
126
+ config.alternative_source = alternative_source
127
+ if log_level is not None:
128
+ config.log_level = log_level
129
+
130
+
131
+ __all__ = [
132
+ "AgrobrConfig",
133
+ "set_mode",
134
+ "get_config",
135
+ "reset_config",
136
+ "configure",
137
+ ]
agrobr/constants.py CHANGED
@@ -12,7 +12,7 @@ class Fonte(StrEnum):
12
12
  CEPEA = "cepea"
13
13
  CONAB = "conab"
14
14
  IBGE = "ibge"
15
- NOTICIAS_AGRICOLAS = "noticias_agricolas" # Fonte alternativa para CEPEA
15
+ NOTICIAS_AGRICOLAS = "noticias_agricolas"
16
16
 
17
17
 
18
18
  URLS = {
@@ -35,7 +35,6 @@ URLS = {
35
35
  },
36
36
  }
37
37
 
38
- # Mapeamento de produtos para URLs do Notícias Agrícolas (indicadores CEPEA)
39
38
  NOTICIAS_AGRICOLAS_PRODUTOS = {
40
39
  "soja": "soja/soja-indicador-cepea-esalq-porto-paranagua",
41
40
  "soja_parana": "soja/indicador-cepea-esalq-soja-parana",
@@ -0,0 +1,186 @@
1
+ """Stability Contracts para garantia de schema e compatibilidade."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+ from enum import StrEnum
7
+ from typing import Any
8
+
9
+ import pandas as pd
10
+
11
+
12
+ class ColumnType(StrEnum):
13
+ """Tipos de dados suportados nos contratos."""
14
+
15
+ DATE = "date"
16
+ DATETIME = "datetime"
17
+ STRING = "str"
18
+ INTEGER = "int"
19
+ FLOAT = "float"
20
+ DECIMAL = "Decimal"
21
+ BOOLEAN = "bool"
22
+
23
+
24
+ class BreakingChangePolicy(StrEnum):
25
+ """Politica para mudancas que quebram contrato."""
26
+
27
+ MAJOR_VERSION = "major"
28
+ NEVER = "never"
29
+ DEPRECATE_FIRST = "deprecate"
30
+
31
+
32
+ @dataclass
33
+ class Column:
34
+ """Definicao de uma coluna no contrato."""
35
+
36
+ name: str
37
+ type: ColumnType
38
+ nullable: bool = False
39
+ unit: str | None = None
40
+ description: str = ""
41
+ stable: bool = True
42
+ deprecated: bool = False
43
+ deprecated_in: str | None = None
44
+ removed_in: str | None = None
45
+
46
+ def validate(self, series: pd.Series) -> list[str]:
47
+ """Valida uma serie contra esta definicao."""
48
+ errors = []
49
+
50
+ if not self.nullable and series.isna().any():
51
+ null_count = series.isna().sum()
52
+ errors.append(f"Column '{self.name}' has {null_count} null values but nullable=False")
53
+
54
+ if self.type == ColumnType.DATE:
55
+ if not pd.api.types.is_datetime64_any_dtype(series):
56
+ try:
57
+ pd.to_datetime(series.dropna())
58
+ except Exception:
59
+ errors.append(f"Column '{self.name}' cannot be converted to date")
60
+
61
+ elif self.type == ColumnType.INTEGER:
62
+ if not pd.api.types.is_integer_dtype(series):
63
+ non_null = series.dropna()
64
+ if len(non_null) > 0:
65
+ try:
66
+ non_null.astype(int)
67
+ except (ValueError, TypeError):
68
+ errors.append(f"Column '{self.name}' contains non-integer values")
69
+
70
+ elif self.type in (
71
+ ColumnType.FLOAT,
72
+ ColumnType.DECIMAL,
73
+ ) and not pd.api.types.is_numeric_dtype(series):
74
+ errors.append(f"Column '{self.name}' is not numeric")
75
+
76
+ return errors
77
+
78
+
79
+ @dataclass
80
+ class Contract:
81
+ """Contrato de estabilidade para um dataset."""
82
+
83
+ name: str
84
+ version: str
85
+ columns: list[Column]
86
+ guarantees: list[str] = field(default_factory=list)
87
+ breaking_policy: BreakingChangePolicy = BreakingChangePolicy.MAJOR_VERSION
88
+ effective_from: str = ""
89
+
90
+ def validate(self, df: pd.DataFrame) -> tuple[bool, list[str]]:
91
+ """
92
+ Valida DataFrame contra o contrato.
93
+
94
+ Args:
95
+ df: DataFrame a validar
96
+
97
+ Returns:
98
+ Tupla (valido, lista de erros)
99
+ """
100
+ errors = []
101
+
102
+ required_cols = [c.name for c in self.columns if not c.nullable and c.stable]
103
+ missing = set(required_cols) - set(df.columns)
104
+ if missing:
105
+ errors.append(f"Missing required columns: {missing}")
106
+
107
+ for col_def in self.columns:
108
+ if col_def.name in df.columns:
109
+ col_errors = col_def.validate(df[col_def.name])
110
+ errors.extend(col_errors)
111
+
112
+ return len(errors) == 0, errors
113
+
114
+ def get_column(self, name: str) -> Column | None:
115
+ """Retorna definicao de uma coluna pelo nome."""
116
+ for col in self.columns:
117
+ if col.name == name:
118
+ return col
119
+ return None
120
+
121
+ def list_columns(self, stable_only: bool = False) -> list[str]:
122
+ """Lista nomes das colunas."""
123
+ if stable_only:
124
+ return [c.name for c in self.columns if c.stable]
125
+ return [c.name for c in self.columns]
126
+
127
+ def to_markdown(self) -> str:
128
+ """Gera documentacao Markdown do contrato."""
129
+ lines = [
130
+ f"# Contract: {self.name}",
131
+ f"**Version:** {self.version}",
132
+ f"**Effective from:** {self.effective_from}",
133
+ f"**Breaking policy:** {self.breaking_policy.value}",
134
+ "",
135
+ "## Columns",
136
+ "",
137
+ "| Column | Type | Nullable | Unit | Stable | Description |",
138
+ "|--------|------|----------|------|--------|-------------|",
139
+ ]
140
+
141
+ for col in self.columns:
142
+ stable = "Yes" if col.stable else "No"
143
+ nullable = "Yes" if col.nullable else "No"
144
+ unit = col.unit or "-"
145
+ desc = col.description or "-"
146
+ deprecated = " (deprecated)" if col.deprecated else ""
147
+ lines.append(
148
+ f"| {col.name}{deprecated} | {col.type.value} | {nullable} | {unit} | {stable} | {desc} |"
149
+ )
150
+
151
+ if self.guarantees:
152
+ lines.extend(["", "## Guarantees", ""])
153
+ for g in self.guarantees:
154
+ lines.append(f"- {g}")
155
+
156
+ return "\n".join(lines)
157
+
158
+ def to_dict(self) -> dict[str, Any]:
159
+ """Converte contrato para dicionario."""
160
+ return {
161
+ "name": self.name,
162
+ "version": self.version,
163
+ "effective_from": self.effective_from,
164
+ "breaking_policy": self.breaking_policy.value,
165
+ "columns": [
166
+ {
167
+ "name": c.name,
168
+ "type": c.type.value,
169
+ "nullable": c.nullable,
170
+ "unit": c.unit,
171
+ "stable": c.stable,
172
+ "deprecated": c.deprecated,
173
+ "description": c.description,
174
+ }
175
+ for c in self.columns
176
+ ],
177
+ "guarantees": self.guarantees,
178
+ }
179
+
180
+
181
+ __all__ = [
182
+ "Column",
183
+ "ColumnType",
184
+ "Contract",
185
+ "BreakingChangePolicy",
186
+ ]
@@ -0,0 +1,80 @@
1
+ """Contratos de estabilidade para dados CEPEA."""
2
+
3
+ from agrobr.contracts import BreakingChangePolicy, Column, ColumnType, Contract
4
+
5
+ CEPEA_INDICADOR_V1 = Contract(
6
+ name="cepea.indicador",
7
+ version="1.0",
8
+ effective_from="0.3.0",
9
+ columns=[
10
+ Column(
11
+ name="data",
12
+ type=ColumnType.DATE,
13
+ nullable=False,
14
+ stable=True,
15
+ description="Data do indicador",
16
+ ),
17
+ Column(
18
+ name="produto",
19
+ type=ColumnType.STRING,
20
+ nullable=False,
21
+ stable=True,
22
+ description="Nome do produto (soja, milho, etc)",
23
+ ),
24
+ Column(
25
+ name="praca",
26
+ type=ColumnType.STRING,
27
+ nullable=True,
28
+ stable=True,
29
+ description="Praca de referencia",
30
+ ),
31
+ Column(
32
+ name="valor",
33
+ type=ColumnType.FLOAT,
34
+ nullable=False,
35
+ unit="BRL",
36
+ stable=True,
37
+ description="Preco em reais",
38
+ ),
39
+ Column(
40
+ name="unidade",
41
+ type=ColumnType.STRING,
42
+ nullable=False,
43
+ stable=True,
44
+ description="Unidade do preco (BRL/sc60kg, BRL/@, etc)",
45
+ ),
46
+ Column(
47
+ name="fonte",
48
+ type=ColumnType.STRING,
49
+ nullable=False,
50
+ stable=True,
51
+ description="Fonte dos dados",
52
+ ),
53
+ Column(
54
+ name="metodologia",
55
+ type=ColumnType.STRING,
56
+ nullable=True,
57
+ stable=False,
58
+ description="Descricao da metodologia",
59
+ ),
60
+ Column(
61
+ name="anomalies",
62
+ type=ColumnType.STRING,
63
+ nullable=True,
64
+ stable=False,
65
+ description="Lista de anomalias detectadas",
66
+ ),
67
+ ],
68
+ guarantees=[
69
+ "Column names never change (additions only)",
70
+ "Types only widen (int -> float, str -> categorical)",
71
+ "Dates always in local timezone (Sao Paulo)",
72
+ "Units explicit in 'unidade' column",
73
+ "'valor' is always positive",
74
+ "'data' is always a valid business day",
75
+ ],
76
+ breaking_policy=BreakingChangePolicy.MAJOR_VERSION,
77
+ )
78
+
79
+
80
+ __all__ = ["CEPEA_INDICADOR_V1"]
@@ -0,0 +1,181 @@
1
+ """Contratos de estabilidade para dados CONAB."""
2
+
3
+ from agrobr.contracts import BreakingChangePolicy, Column, ColumnType, Contract
4
+
5
+ CONAB_SAFRA_V1 = Contract(
6
+ name="conab.safras",
7
+ version="1.0",
8
+ effective_from="0.3.0",
9
+ columns=[
10
+ Column(
11
+ name="fonte",
12
+ type=ColumnType.STRING,
13
+ nullable=False,
14
+ stable=True,
15
+ description="Fonte dos dados (conab)",
16
+ ),
17
+ Column(
18
+ name="produto",
19
+ type=ColumnType.STRING,
20
+ nullable=False,
21
+ stable=True,
22
+ description="Nome do produto",
23
+ ),
24
+ Column(
25
+ name="safra",
26
+ type=ColumnType.STRING,
27
+ nullable=False,
28
+ stable=True,
29
+ description="Safra no formato YYYY/YY",
30
+ ),
31
+ Column(
32
+ name="uf",
33
+ type=ColumnType.STRING,
34
+ nullable=True,
35
+ stable=True,
36
+ description="Sigla da UF",
37
+ ),
38
+ Column(
39
+ name="area_plantada",
40
+ type=ColumnType.FLOAT,
41
+ nullable=True,
42
+ unit="mil_ha",
43
+ stable=True,
44
+ description="Area plantada em mil hectares",
45
+ ),
46
+ Column(
47
+ name="area_colhida",
48
+ type=ColumnType.FLOAT,
49
+ nullable=True,
50
+ unit="mil_ha",
51
+ stable=True,
52
+ description="Area colhida em mil hectares",
53
+ ),
54
+ Column(
55
+ name="produtividade",
56
+ type=ColumnType.FLOAT,
57
+ nullable=True,
58
+ unit="kg/ha",
59
+ stable=True,
60
+ description="Produtividade em kg/ha",
61
+ ),
62
+ Column(
63
+ name="producao",
64
+ type=ColumnType.FLOAT,
65
+ nullable=True,
66
+ unit="mil_ton",
67
+ stable=True,
68
+ description="Producao em mil toneladas",
69
+ ),
70
+ Column(
71
+ name="levantamento",
72
+ type=ColumnType.INTEGER,
73
+ nullable=False,
74
+ stable=True,
75
+ description="Numero do levantamento (1-12)",
76
+ ),
77
+ Column(
78
+ name="data_publicacao",
79
+ type=ColumnType.DATE,
80
+ nullable=False,
81
+ stable=True,
82
+ description="Data de publicacao do levantamento",
83
+ ),
84
+ ],
85
+ guarantees=[
86
+ "Column names never change (additions only)",
87
+ "'safra' always matches pattern YYYY/YY",
88
+ "'uf' is always a valid Brazilian state code",
89
+ "'levantamento' is between 1 and 12",
90
+ "Numeric values are always >= 0",
91
+ ],
92
+ breaking_policy=BreakingChangePolicy.MAJOR_VERSION,
93
+ )
94
+
95
+ CONAB_BALANCO_V1 = Contract(
96
+ name="conab.balanco",
97
+ version="1.0",
98
+ effective_from="0.3.0",
99
+ columns=[
100
+ Column(
101
+ name="produto",
102
+ type=ColumnType.STRING,
103
+ nullable=False,
104
+ stable=True,
105
+ description="Nome do produto",
106
+ ),
107
+ Column(
108
+ name="safra",
109
+ type=ColumnType.STRING,
110
+ nullable=False,
111
+ stable=True,
112
+ description="Safra de referencia",
113
+ ),
114
+ Column(
115
+ name="estoque_inicial",
116
+ type=ColumnType.FLOAT,
117
+ nullable=True,
118
+ unit="mil_ton",
119
+ stable=True,
120
+ description="Estoque inicial",
121
+ ),
122
+ Column(
123
+ name="producao",
124
+ type=ColumnType.FLOAT,
125
+ nullable=True,
126
+ unit="mil_ton",
127
+ stable=True,
128
+ description="Producao",
129
+ ),
130
+ Column(
131
+ name="importacao",
132
+ type=ColumnType.FLOAT,
133
+ nullable=True,
134
+ unit="mil_ton",
135
+ stable=True,
136
+ description="Importacao",
137
+ ),
138
+ Column(
139
+ name="suprimento",
140
+ type=ColumnType.FLOAT,
141
+ nullable=True,
142
+ unit="mil_ton",
143
+ stable=True,
144
+ description="Suprimento total",
145
+ ),
146
+ Column(
147
+ name="consumo",
148
+ type=ColumnType.FLOAT,
149
+ nullable=True,
150
+ unit="mil_ton",
151
+ stable=True,
152
+ description="Consumo interno",
153
+ ),
154
+ Column(
155
+ name="exportacao",
156
+ type=ColumnType.FLOAT,
157
+ nullable=True,
158
+ unit="mil_ton",
159
+ stable=True,
160
+ description="Exportacao",
161
+ ),
162
+ Column(
163
+ name="estoque_final",
164
+ type=ColumnType.FLOAT,
165
+ nullable=True,
166
+ unit="mil_ton",
167
+ stable=True,
168
+ description="Estoque final",
169
+ ),
170
+ ],
171
+ guarantees=[
172
+ "Column names never change (additions only)",
173
+ "All numeric values represent thousands of tons",
174
+ "suprimento = estoque_inicial + producao + importacao",
175
+ "estoque_final = suprimento - consumo - exportacao",
176
+ ],
177
+ breaking_policy=BreakingChangePolicy.MAJOR_VERSION,
178
+ )
179
+
180
+
181
+ __all__ = ["CONAB_SAFRA_V1", "CONAB_BALANCO_V1"]