agrobr 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agrobr/__init__.py +10 -0
- agrobr/alerts/__init__.py +7 -0
- agrobr/alerts/notifier.py +167 -0
- agrobr/cache/__init__.py +31 -0
- agrobr/cache/duckdb_store.py +433 -0
- agrobr/cache/history.py +317 -0
- agrobr/cache/migrations.py +82 -0
- agrobr/cache/policies.py +240 -0
- agrobr/cepea/__init__.py +7 -0
- agrobr/cepea/api.py +360 -0
- agrobr/cepea/client.py +273 -0
- agrobr/cepea/parsers/__init__.py +37 -0
- agrobr/cepea/parsers/base.py +35 -0
- agrobr/cepea/parsers/consensus.py +300 -0
- agrobr/cepea/parsers/detector.py +108 -0
- agrobr/cepea/parsers/fingerprint.py +226 -0
- agrobr/cepea/parsers/v1.py +305 -0
- agrobr/cli.py +323 -0
- agrobr/conab/__init__.py +21 -0
- agrobr/conab/api.py +239 -0
- agrobr/conab/client.py +219 -0
- agrobr/conab/parsers/__init__.py +7 -0
- agrobr/conab/parsers/v1.py +383 -0
- agrobr/constants.py +205 -0
- agrobr/exceptions.py +104 -0
- agrobr/health/__init__.py +23 -0
- agrobr/health/checker.py +202 -0
- agrobr/health/reporter.py +314 -0
- agrobr/http/__init__.py +9 -0
- agrobr/http/browser.py +214 -0
- agrobr/http/rate_limiter.py +69 -0
- agrobr/http/retry.py +93 -0
- agrobr/http/user_agents.py +67 -0
- agrobr/ibge/__init__.py +19 -0
- agrobr/ibge/api.py +273 -0
- agrobr/ibge/client.py +256 -0
- agrobr/models.py +85 -0
- agrobr/normalize/__init__.py +64 -0
- agrobr/normalize/dates.py +303 -0
- agrobr/normalize/encoding.py +102 -0
- agrobr/normalize/regions.py +308 -0
- agrobr/normalize/units.py +278 -0
- agrobr/noticias_agricolas/__init__.py +6 -0
- agrobr/noticias_agricolas/client.py +222 -0
- agrobr/noticias_agricolas/parser.py +187 -0
- agrobr/sync.py +147 -0
- agrobr/telemetry/__init__.py +17 -0
- agrobr/telemetry/collector.py +153 -0
- agrobr/utils/__init__.py +5 -0
- agrobr/utils/logging.py +59 -0
- agrobr/validators/__init__.py +35 -0
- agrobr/validators/sanity.py +286 -0
- agrobr/validators/structural.py +313 -0
- agrobr-0.1.0.dist-info/METADATA +243 -0
- agrobr-0.1.0.dist-info/RECORD +58 -0
- agrobr-0.1.0.dist-info/WHEEL +4 -0
- agrobr-0.1.0.dist-info/entry_points.txt +2 -0
- agrobr-0.1.0.dist-info/licenses/LICENSE +21 -0
agrobr/sync.py
ADDED
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
"""Wrappers síncronos para APIs async do agrobr."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import functools
|
|
7
|
+
from collections.abc import Awaitable, Callable
|
|
8
|
+
from typing import Any, TypeVar
|
|
9
|
+
|
|
10
|
+
T = TypeVar("T")
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _get_or_create_event_loop() -> asyncio.AbstractEventLoop:
|
|
14
|
+
"""
|
|
15
|
+
Obtém event loop existente ou cria novo.
|
|
16
|
+
|
|
17
|
+
Trata casos especiais:
|
|
18
|
+
- Jupyter notebooks (loop já rodando)
|
|
19
|
+
- Threads secundárias (sem loop default)
|
|
20
|
+
"""
|
|
21
|
+
try:
|
|
22
|
+
loop = asyncio.get_running_loop()
|
|
23
|
+
try:
|
|
24
|
+
import nest_asyncio
|
|
25
|
+
|
|
26
|
+
nest_asyncio.apply()
|
|
27
|
+
return loop
|
|
28
|
+
except ImportError:
|
|
29
|
+
raise RuntimeError(
|
|
30
|
+
"Event loop already running. Install nest_asyncio for Jupyter support: "
|
|
31
|
+
"pip install nest_asyncio"
|
|
32
|
+
) from None
|
|
33
|
+
except RuntimeError:
|
|
34
|
+
try:
|
|
35
|
+
return asyncio.get_event_loop()
|
|
36
|
+
except RuntimeError:
|
|
37
|
+
loop = asyncio.new_event_loop()
|
|
38
|
+
asyncio.set_event_loop(loop)
|
|
39
|
+
return loop
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def run_sync(coro: Awaitable[T]) -> T:
|
|
43
|
+
"""
|
|
44
|
+
Executa coroutine de forma síncrona.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
coro: Coroutine a executar
|
|
48
|
+
|
|
49
|
+
Returns:
|
|
50
|
+
Resultado da coroutine
|
|
51
|
+
"""
|
|
52
|
+
loop = _get_or_create_event_loop()
|
|
53
|
+
|
|
54
|
+
if loop.is_running():
|
|
55
|
+
import nest_asyncio
|
|
56
|
+
|
|
57
|
+
nest_asyncio.apply()
|
|
58
|
+
return loop.run_until_complete(coro)
|
|
59
|
+
else:
|
|
60
|
+
return asyncio.run(coro) # type: ignore[arg-type]
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def sync_wrapper(async_func: Callable[..., Awaitable[T]]) -> Callable[..., T]:
|
|
64
|
+
"""
|
|
65
|
+
Decorator que cria versão síncrona de função async.
|
|
66
|
+
|
|
67
|
+
Usage:
|
|
68
|
+
@sync_wrapper
|
|
69
|
+
async def fetch_data():
|
|
70
|
+
...
|
|
71
|
+
|
|
72
|
+
# Agora pode chamar:
|
|
73
|
+
fetch_data() # Síncrono
|
|
74
|
+
"""
|
|
75
|
+
|
|
76
|
+
@functools.wraps(async_func)
|
|
77
|
+
def wrapper(*args: Any, **kwargs: Any) -> T:
|
|
78
|
+
return run_sync(async_func(*args, **kwargs))
|
|
79
|
+
|
|
80
|
+
if wrapper.__doc__:
|
|
81
|
+
wrapper.__doc__ = f"[SYNC] {wrapper.__doc__}"
|
|
82
|
+
|
|
83
|
+
return wrapper
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
class _SyncModule:
|
|
87
|
+
"""Módulo que expõe versões síncronas da API."""
|
|
88
|
+
|
|
89
|
+
def __init__(self, async_module: Any) -> None:
|
|
90
|
+
self._async_module = async_module
|
|
91
|
+
|
|
92
|
+
def __getattr__(self, name: str) -> Any:
|
|
93
|
+
attr = getattr(self._async_module, name)
|
|
94
|
+
|
|
95
|
+
if asyncio.iscoroutinefunction(attr):
|
|
96
|
+
return sync_wrapper(attr)
|
|
97
|
+
|
|
98
|
+
return attr
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
class _SyncCepea(_SyncModule):
|
|
102
|
+
"""API síncrona do CEPEA."""
|
|
103
|
+
|
|
104
|
+
pass
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
class _SyncConab(_SyncModule):
|
|
108
|
+
"""API síncrona da CONAB."""
|
|
109
|
+
|
|
110
|
+
pass
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
class _SyncIbge(_SyncModule):
|
|
114
|
+
"""API síncrona do IBGE."""
|
|
115
|
+
|
|
116
|
+
pass
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
_cepea: _SyncCepea | None = None
|
|
120
|
+
_conab: _SyncConab | None = None
|
|
121
|
+
_ibge: _SyncIbge | None = None
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def __getattr__(name: str) -> Any:
|
|
125
|
+
"""Lazy loading para evitar imports circulares."""
|
|
126
|
+
global _cepea, _conab, _ibge
|
|
127
|
+
|
|
128
|
+
if name == "cepea":
|
|
129
|
+
if _cepea is None:
|
|
130
|
+
from agrobr import cepea as async_cepea
|
|
131
|
+
|
|
132
|
+
_cepea = _SyncCepea(async_cepea)
|
|
133
|
+
return _cepea
|
|
134
|
+
elif name == "conab":
|
|
135
|
+
if _conab is None:
|
|
136
|
+
from agrobr import conab as async_conab
|
|
137
|
+
|
|
138
|
+
_conab = _SyncConab(async_conab)
|
|
139
|
+
return _conab
|
|
140
|
+
elif name == "ibge":
|
|
141
|
+
if _ibge is None:
|
|
142
|
+
from agrobr import ibge as async_ibge
|
|
143
|
+
|
|
144
|
+
_ibge = _SyncIbge(async_ibge)
|
|
145
|
+
return _ibge
|
|
146
|
+
|
|
147
|
+
raise AttributeError(f"module 'agrobr.sync' has no attribute '{name}'")
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""Telemetria opt-in."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from .collector import (
|
|
6
|
+
TelemetryCollector,
|
|
7
|
+
track_cache_operation,
|
|
8
|
+
track_fetch,
|
|
9
|
+
track_parse_error,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
__all__: list[str] = [
|
|
13
|
+
"TelemetryCollector",
|
|
14
|
+
"track_cache_operation",
|
|
15
|
+
"track_fetch",
|
|
16
|
+
"track_parse_error",
|
|
17
|
+
]
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
"""Coleta de telemetria opt-in."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import hashlib
|
|
7
|
+
import platform
|
|
8
|
+
import uuid
|
|
9
|
+
from datetime import datetime
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
import httpx
|
|
13
|
+
import structlog
|
|
14
|
+
|
|
15
|
+
from agrobr import __version__
|
|
16
|
+
from agrobr.constants import TelemetrySettings
|
|
17
|
+
|
|
18
|
+
logger = structlog.get_logger()
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class TelemetryCollector:
|
|
22
|
+
"""
|
|
23
|
+
Coleta telemetria opt-in de forma não-intrusiva.
|
|
24
|
+
|
|
25
|
+
Características:
|
|
26
|
+
- Fire-and-forget (nunca bloqueia)
|
|
27
|
+
- Batching para reduzir requests
|
|
28
|
+
- Falhas silenciosas
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
_instance_id: str | None = None
|
|
32
|
+
_buffer: list[dict[str, Any]] = []
|
|
33
|
+
_lock = asyncio.Lock()
|
|
34
|
+
|
|
35
|
+
@classmethod
|
|
36
|
+
def get_instance_id(cls) -> str:
|
|
37
|
+
"""
|
|
38
|
+
Gera ID único por instalação.
|
|
39
|
+
|
|
40
|
+
Não é rastreável para o usuário específico,
|
|
41
|
+
apenas identifica a instalação para deduplicação.
|
|
42
|
+
"""
|
|
43
|
+
if cls._instance_id is None:
|
|
44
|
+
machine_id = uuid.getnode().to_bytes(6, "big")
|
|
45
|
+
cls._instance_id = hashlib.sha256(machine_id).hexdigest()[:16]
|
|
46
|
+
return cls._instance_id
|
|
47
|
+
|
|
48
|
+
@classmethod
|
|
49
|
+
def get_context(cls) -> dict[str, Any]:
|
|
50
|
+
"""Contexto comum para todos os eventos."""
|
|
51
|
+
return {
|
|
52
|
+
"instance_id": cls.get_instance_id(),
|
|
53
|
+
"package_version": __version__,
|
|
54
|
+
"python_version": platform.python_version(),
|
|
55
|
+
"os": platform.system(),
|
|
56
|
+
"os_version": platform.release(),
|
|
57
|
+
"timestamp": datetime.utcnow().isoformat(),
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
@classmethod
|
|
61
|
+
async def track(
|
|
62
|
+
cls,
|
|
63
|
+
event: str,
|
|
64
|
+
properties: dict[str, Any] | None = None,
|
|
65
|
+
) -> None:
|
|
66
|
+
"""
|
|
67
|
+
Registra evento de telemetria.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
event: Nome do evento (ex: "fetch", "parse_error", "cache_hit")
|
|
71
|
+
properties: Propriedades do evento
|
|
72
|
+
"""
|
|
73
|
+
settings = TelemetrySettings()
|
|
74
|
+
|
|
75
|
+
if not settings.enabled:
|
|
76
|
+
return
|
|
77
|
+
|
|
78
|
+
payload = {
|
|
79
|
+
"event": event,
|
|
80
|
+
"context": cls.get_context(),
|
|
81
|
+
"properties": properties or {},
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
async with cls._lock:
|
|
85
|
+
cls._buffer.append(payload)
|
|
86
|
+
|
|
87
|
+
if len(cls._buffer) >= settings.batch_size:
|
|
88
|
+
asyncio.create_task(cls._flush())
|
|
89
|
+
|
|
90
|
+
@classmethod
|
|
91
|
+
async def _flush(cls) -> None:
|
|
92
|
+
"""Envia buffer para o servidor."""
|
|
93
|
+
settings = TelemetrySettings()
|
|
94
|
+
|
|
95
|
+
async with cls._lock:
|
|
96
|
+
if not cls._buffer:
|
|
97
|
+
return
|
|
98
|
+
|
|
99
|
+
events = cls._buffer.copy()
|
|
100
|
+
cls._buffer.clear()
|
|
101
|
+
|
|
102
|
+
try:
|
|
103
|
+
async with httpx.AsyncClient() as client:
|
|
104
|
+
await client.post(
|
|
105
|
+
settings.endpoint,
|
|
106
|
+
json={"events": events},
|
|
107
|
+
timeout=5.0,
|
|
108
|
+
)
|
|
109
|
+
logger.debug("telemetry_flushed", count=len(events))
|
|
110
|
+
except Exception as e:
|
|
111
|
+
logger.debug("telemetry_flush_failed", error=str(e))
|
|
112
|
+
|
|
113
|
+
@classmethod
|
|
114
|
+
def reset(cls) -> None:
|
|
115
|
+
"""Reseta estado do coletor (para testes)."""
|
|
116
|
+
cls._buffer.clear()
|
|
117
|
+
cls._instance_id = None
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
async def track_fetch(source: str, produto: str, latency_ms: float, from_cache: bool) -> None:
|
|
121
|
+
"""Registra evento de fetch."""
|
|
122
|
+
await TelemetryCollector.track(
|
|
123
|
+
"fetch",
|
|
124
|
+
{
|
|
125
|
+
"source": source,
|
|
126
|
+
"produto": produto,
|
|
127
|
+
"latency_ms": latency_ms,
|
|
128
|
+
"from_cache": from_cache,
|
|
129
|
+
},
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
async def track_parse_error(source: str, parser_version: int, error_type: str) -> None:
|
|
134
|
+
"""Registra erro de parsing."""
|
|
135
|
+
await TelemetryCollector.track(
|
|
136
|
+
"parse_error",
|
|
137
|
+
{
|
|
138
|
+
"source": source,
|
|
139
|
+
"parser_version": parser_version,
|
|
140
|
+
"error_type": error_type,
|
|
141
|
+
},
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
async def track_cache_operation(operation: str, hit: bool) -> None:
|
|
146
|
+
"""Registra operação de cache."""
|
|
147
|
+
await TelemetryCollector.track(
|
|
148
|
+
"cache",
|
|
149
|
+
{
|
|
150
|
+
"operation": operation,
|
|
151
|
+
"hit": hit,
|
|
152
|
+
},
|
|
153
|
+
)
|
agrobr/utils/__init__.py
ADDED
agrobr/utils/logging.py
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
"""Configuracao de logging estruturado com structlog."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
import sys
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
import structlog
|
|
10
|
+
from structlog.types import Processor
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def configure_logging(
|
|
14
|
+
level: str = "INFO",
|
|
15
|
+
json_format: bool = True,
|
|
16
|
+
log_file: Path | str | None = None,
|
|
17
|
+
) -> None:
|
|
18
|
+
"""Configura structlog para logging estruturado."""
|
|
19
|
+
processors: list[Processor] = [
|
|
20
|
+
structlog.contextvars.merge_contextvars,
|
|
21
|
+
structlog.processors.add_log_level,
|
|
22
|
+
structlog.processors.TimeStamper(fmt="iso"),
|
|
23
|
+
structlog.stdlib.PositionalArgumentsFormatter(),
|
|
24
|
+
structlog.processors.StackInfoRenderer(),
|
|
25
|
+
structlog.processors.UnicodeDecoder(),
|
|
26
|
+
]
|
|
27
|
+
|
|
28
|
+
if json_format:
|
|
29
|
+
processors.append(structlog.processors.JSONRenderer())
|
|
30
|
+
else:
|
|
31
|
+
processors.append(structlog.dev.ConsoleRenderer(colors=True))
|
|
32
|
+
|
|
33
|
+
structlog.configure(
|
|
34
|
+
processors=processors,
|
|
35
|
+
wrapper_class=structlog.stdlib.BoundLogger,
|
|
36
|
+
context_class=dict,
|
|
37
|
+
logger_factory=structlog.stdlib.LoggerFactory(),
|
|
38
|
+
cache_logger_on_first_use=True,
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
logging.basicConfig(
|
|
42
|
+
format="%(message)s",
|
|
43
|
+
stream=sys.stdout,
|
|
44
|
+
level=getattr(logging, level.upper()),
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
if log_file:
|
|
48
|
+
file_handler = logging.FileHandler(log_file)
|
|
49
|
+
file_handler.setLevel(getattr(logging, level.upper()))
|
|
50
|
+
logging.getLogger().addHandler(file_handler)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def get_logger(name: str | None = None) -> structlog.stdlib.BoundLogger:
|
|
54
|
+
"""Obtem logger configurado."""
|
|
55
|
+
logger: structlog.stdlib.BoundLogger = structlog.get_logger(name)
|
|
56
|
+
return logger
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
configure_logging(level="INFO", json_format=True)
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
"""Validadores - sanity checks e validação estrutural."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from .sanity import (
|
|
6
|
+
AnomalyReport,
|
|
7
|
+
SanityRule,
|
|
8
|
+
validate_batch,
|
|
9
|
+
validate_indicador,
|
|
10
|
+
validate_safra,
|
|
11
|
+
)
|
|
12
|
+
from .structural import (
|
|
13
|
+
StructuralMonitor,
|
|
14
|
+
StructuralValidationResult,
|
|
15
|
+
compare_fingerprints,
|
|
16
|
+
load_baseline,
|
|
17
|
+
save_baseline,
|
|
18
|
+
validate_against_baseline,
|
|
19
|
+
validate_structure,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
__all__: list[str] = [
|
|
23
|
+
"AnomalyReport",
|
|
24
|
+
"SanityRule",
|
|
25
|
+
"validate_batch",
|
|
26
|
+
"validate_indicador",
|
|
27
|
+
"validate_safra",
|
|
28
|
+
"StructuralValidationResult",
|
|
29
|
+
"validate_structure",
|
|
30
|
+
"validate_against_baseline",
|
|
31
|
+
"compare_fingerprints",
|
|
32
|
+
"load_baseline",
|
|
33
|
+
"save_baseline",
|
|
34
|
+
"StructuralMonitor",
|
|
35
|
+
]
|
|
@@ -0,0 +1,286 @@
|
|
|
1
|
+
"""Validação estatística de sanidade para dados agrícolas."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from decimal import Decimal
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
import structlog
|
|
10
|
+
|
|
11
|
+
from agrobr.exceptions import ValidationError
|
|
12
|
+
from agrobr.models import Indicador, Safra
|
|
13
|
+
|
|
14
|
+
logger = structlog.get_logger()
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@dataclass
|
|
18
|
+
class SanityRule:
|
|
19
|
+
"""Regra de validação estatística."""
|
|
20
|
+
|
|
21
|
+
field: str
|
|
22
|
+
min_value: Decimal | None
|
|
23
|
+
max_value: Decimal | None
|
|
24
|
+
max_daily_change_pct: Decimal | None = None
|
|
25
|
+
description: str = ""
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
PRICE_RULES: dict[str, SanityRule] = {
|
|
29
|
+
"soja": SanityRule(
|
|
30
|
+
field="valor",
|
|
31
|
+
min_value=Decimal("30"),
|
|
32
|
+
max_value=Decimal("300"),
|
|
33
|
+
max_daily_change_pct=Decimal("15"),
|
|
34
|
+
description="Soja (BRL/sc60kg)",
|
|
35
|
+
),
|
|
36
|
+
"milho": SanityRule(
|
|
37
|
+
field="valor",
|
|
38
|
+
min_value=Decimal("15"),
|
|
39
|
+
max_value=Decimal("150"),
|
|
40
|
+
max_daily_change_pct=Decimal("15"),
|
|
41
|
+
description="Milho (BRL/sc60kg)",
|
|
42
|
+
),
|
|
43
|
+
"cafe": SanityRule(
|
|
44
|
+
field="valor",
|
|
45
|
+
min_value=Decimal("200"),
|
|
46
|
+
max_value=Decimal("3000"),
|
|
47
|
+
max_daily_change_pct=Decimal("10"),
|
|
48
|
+
description="Café Arábica (BRL/sc60kg)",
|
|
49
|
+
),
|
|
50
|
+
"boi": SanityRule(
|
|
51
|
+
field="valor",
|
|
52
|
+
min_value=Decimal("100"),
|
|
53
|
+
max_value=Decimal("500"),
|
|
54
|
+
max_daily_change_pct=Decimal("10"),
|
|
55
|
+
description="Boi Gordo (BRL/@)",
|
|
56
|
+
),
|
|
57
|
+
"boi_gordo": SanityRule(
|
|
58
|
+
field="valor",
|
|
59
|
+
min_value=Decimal("100"),
|
|
60
|
+
max_value=Decimal("500"),
|
|
61
|
+
max_daily_change_pct=Decimal("10"),
|
|
62
|
+
description="Boi Gordo (BRL/@)",
|
|
63
|
+
),
|
|
64
|
+
"trigo": SanityRule(
|
|
65
|
+
field="valor",
|
|
66
|
+
min_value=Decimal("20"),
|
|
67
|
+
max_value=Decimal("150"),
|
|
68
|
+
max_daily_change_pct=Decimal("15"),
|
|
69
|
+
description="Trigo (BRL/sc60kg)",
|
|
70
|
+
),
|
|
71
|
+
"algodao": SanityRule(
|
|
72
|
+
field="valor",
|
|
73
|
+
min_value=Decimal("50"),
|
|
74
|
+
max_value=Decimal("250"),
|
|
75
|
+
max_daily_change_pct=Decimal("10"),
|
|
76
|
+
description="Algodão (BRL/@)",
|
|
77
|
+
),
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
SAFRA_RULES: dict[str, dict[str, SanityRule]] = {
|
|
81
|
+
"soja": {
|
|
82
|
+
"area_plantada": SanityRule(
|
|
83
|
+
field="area_plantada",
|
|
84
|
+
min_value=Decimal("20000"),
|
|
85
|
+
max_value=Decimal("50000"),
|
|
86
|
+
description="Área plantada soja Brasil (mil ha)",
|
|
87
|
+
),
|
|
88
|
+
"producao": SanityRule(
|
|
89
|
+
field="producao",
|
|
90
|
+
min_value=Decimal("50000"),
|
|
91
|
+
max_value=Decimal("200000"),
|
|
92
|
+
description="Produção soja Brasil (mil ton)",
|
|
93
|
+
),
|
|
94
|
+
},
|
|
95
|
+
"milho": {
|
|
96
|
+
"area_plantada": SanityRule(
|
|
97
|
+
field="area_plantada",
|
|
98
|
+
min_value=Decimal("10000"),
|
|
99
|
+
max_value=Decimal("30000"),
|
|
100
|
+
description="Área plantada milho Brasil (mil ha)",
|
|
101
|
+
),
|
|
102
|
+
"producao": SanityRule(
|
|
103
|
+
field="producao",
|
|
104
|
+
min_value=Decimal("50000"),
|
|
105
|
+
max_value=Decimal("150000"),
|
|
106
|
+
description="Produção milho Brasil (mil ton)",
|
|
107
|
+
),
|
|
108
|
+
},
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
@dataclass
|
|
113
|
+
class AnomalyReport:
|
|
114
|
+
"""Relatório de anomalia detectada."""
|
|
115
|
+
|
|
116
|
+
field: str
|
|
117
|
+
value: Any
|
|
118
|
+
expected_range: str
|
|
119
|
+
anomaly_type: str
|
|
120
|
+
severity: str
|
|
121
|
+
details: dict[str, Any]
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def validate_indicador(
|
|
125
|
+
indicador: Indicador,
|
|
126
|
+
valor_anterior: Decimal | None = None,
|
|
127
|
+
) -> list[AnomalyReport]:
|
|
128
|
+
"""
|
|
129
|
+
Valida indicador contra regras estatísticas.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
indicador: Indicador a validar
|
|
133
|
+
valor_anterior: Valor do dia anterior (para validar variação)
|
|
134
|
+
|
|
135
|
+
Returns:
|
|
136
|
+
Lista de anomalias detectadas (vazia se OK)
|
|
137
|
+
"""
|
|
138
|
+
anomalies: list[AnomalyReport] = []
|
|
139
|
+
rule = PRICE_RULES.get(indicador.produto.lower())
|
|
140
|
+
|
|
141
|
+
if not rule:
|
|
142
|
+
logger.debug("sanity_no_rules", produto=indicador.produto)
|
|
143
|
+
return anomalies
|
|
144
|
+
|
|
145
|
+
if rule.min_value and indicador.valor < rule.min_value:
|
|
146
|
+
anomalies.append(
|
|
147
|
+
AnomalyReport(
|
|
148
|
+
field="valor",
|
|
149
|
+
value=indicador.valor,
|
|
150
|
+
expected_range=f"[{rule.min_value}, {rule.max_value}]",
|
|
151
|
+
anomaly_type="out_of_range",
|
|
152
|
+
severity="critical",
|
|
153
|
+
details={
|
|
154
|
+
"produto": indicador.produto,
|
|
155
|
+
"rule": rule.description,
|
|
156
|
+
"below_min_by": float(rule.min_value - indicador.valor),
|
|
157
|
+
},
|
|
158
|
+
)
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
if rule.max_value and indicador.valor > rule.max_value:
|
|
162
|
+
anomalies.append(
|
|
163
|
+
AnomalyReport(
|
|
164
|
+
field="valor",
|
|
165
|
+
value=indicador.valor,
|
|
166
|
+
expected_range=f"[{rule.min_value}, {rule.max_value}]",
|
|
167
|
+
anomaly_type="out_of_range",
|
|
168
|
+
severity="critical",
|
|
169
|
+
details={
|
|
170
|
+
"produto": indicador.produto,
|
|
171
|
+
"rule": rule.description,
|
|
172
|
+
"above_max_by": float(indicador.valor - rule.max_value),
|
|
173
|
+
},
|
|
174
|
+
)
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
if valor_anterior and rule.max_daily_change_pct:
|
|
178
|
+
change_pct = abs((indicador.valor - valor_anterior) / valor_anterior) * 100
|
|
179
|
+
|
|
180
|
+
if change_pct > rule.max_daily_change_pct:
|
|
181
|
+
severity = "critical" if change_pct > rule.max_daily_change_pct * 2 else "warning"
|
|
182
|
+
anomalies.append(
|
|
183
|
+
AnomalyReport(
|
|
184
|
+
field="valor",
|
|
185
|
+
value=indicador.valor,
|
|
186
|
+
expected_range=f"±{rule.max_daily_change_pct}% do dia anterior",
|
|
187
|
+
anomaly_type="excessive_change",
|
|
188
|
+
severity=severity,
|
|
189
|
+
details={
|
|
190
|
+
"produto": indicador.produto,
|
|
191
|
+
"valor_anterior": float(valor_anterior),
|
|
192
|
+
"change_pct": float(change_pct),
|
|
193
|
+
"max_allowed_pct": float(rule.max_daily_change_pct),
|
|
194
|
+
},
|
|
195
|
+
)
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
if anomalies:
|
|
199
|
+
logger.warning(
|
|
200
|
+
"sanity_anomalies_detected",
|
|
201
|
+
produto=indicador.produto,
|
|
202
|
+
count=len(anomalies),
|
|
203
|
+
types=[a.anomaly_type for a in anomalies],
|
|
204
|
+
)
|
|
205
|
+
else:
|
|
206
|
+
logger.debug("sanity_check_passed", produto=indicador.produto)
|
|
207
|
+
|
|
208
|
+
return anomalies
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def validate_safra(safra: Safra) -> list[AnomalyReport]:
|
|
212
|
+
"""Valida dados de safra contra regras estatísticas."""
|
|
213
|
+
anomalies: list[AnomalyReport] = []
|
|
214
|
+
rules = SAFRA_RULES.get(safra.produto.lower(), {})
|
|
215
|
+
|
|
216
|
+
for field_name, rule in rules.items():
|
|
217
|
+
value = getattr(safra, field_name)
|
|
218
|
+
|
|
219
|
+
if value is None:
|
|
220
|
+
continue
|
|
221
|
+
|
|
222
|
+
if rule.min_value and value < rule.min_value:
|
|
223
|
+
anomalies.append(
|
|
224
|
+
AnomalyReport(
|
|
225
|
+
field=field_name,
|
|
226
|
+
value=value,
|
|
227
|
+
expected_range=f"[{rule.min_value}, {rule.max_value}]",
|
|
228
|
+
anomaly_type="out_of_range",
|
|
229
|
+
severity="critical",
|
|
230
|
+
details={"rule": rule.description},
|
|
231
|
+
)
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
if rule.max_value and value > rule.max_value:
|
|
235
|
+
anomalies.append(
|
|
236
|
+
AnomalyReport(
|
|
237
|
+
field=field_name,
|
|
238
|
+
value=value,
|
|
239
|
+
expected_range=f"[{rule.min_value}, {rule.max_value}]",
|
|
240
|
+
anomaly_type="out_of_range",
|
|
241
|
+
severity="critical",
|
|
242
|
+
details={"rule": rule.description},
|
|
243
|
+
)
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
return anomalies
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
async def validate_batch(
|
|
250
|
+
indicadores: list[Indicador],
|
|
251
|
+
strict: bool = False,
|
|
252
|
+
) -> tuple[list[Indicador], list[AnomalyReport]]:
|
|
253
|
+
"""
|
|
254
|
+
Valida batch de indicadores.
|
|
255
|
+
|
|
256
|
+
Args:
|
|
257
|
+
indicadores: Lista de indicadores a validar
|
|
258
|
+
strict: Se True, levanta exceção em anomalias críticas
|
|
259
|
+
|
|
260
|
+
Returns:
|
|
261
|
+
tuple: (indicadores com anomalies preenchidas, todas as anomalias)
|
|
262
|
+
"""
|
|
263
|
+
all_anomalies: list[AnomalyReport] = []
|
|
264
|
+
|
|
265
|
+
sorted_indicadores = sorted(indicadores, key=lambda x: x.data)
|
|
266
|
+
|
|
267
|
+
for i, ind in enumerate(sorted_indicadores):
|
|
268
|
+
valor_anterior = None
|
|
269
|
+
if i > 0 and sorted_indicadores[i - 1].produto == ind.produto:
|
|
270
|
+
valor_anterior = sorted_indicadores[i - 1].valor
|
|
271
|
+
|
|
272
|
+
anomalies = validate_indicador(ind, valor_anterior)
|
|
273
|
+
|
|
274
|
+
if anomalies:
|
|
275
|
+
ind.anomalies = [f"{a.anomaly_type}: {a.field}" for a in anomalies]
|
|
276
|
+
all_anomalies.extend(anomalies)
|
|
277
|
+
|
|
278
|
+
if strict and any(a.severity == "critical" for a in anomalies):
|
|
279
|
+
raise ValidationError(
|
|
280
|
+
source=ind.fonte.value,
|
|
281
|
+
field=anomalies[0].field,
|
|
282
|
+
value=anomalies[0].value,
|
|
283
|
+
reason=anomalies[0].anomaly_type,
|
|
284
|
+
)
|
|
285
|
+
|
|
286
|
+
return sorted_indicadores, all_anomalies
|