agrobr 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agrobr/__init__.py +10 -0
- agrobr/alerts/__init__.py +7 -0
- agrobr/alerts/notifier.py +167 -0
- agrobr/cache/__init__.py +31 -0
- agrobr/cache/duckdb_store.py +433 -0
- agrobr/cache/history.py +317 -0
- agrobr/cache/migrations.py +82 -0
- agrobr/cache/policies.py +240 -0
- agrobr/cepea/__init__.py +7 -0
- agrobr/cepea/api.py +360 -0
- agrobr/cepea/client.py +273 -0
- agrobr/cepea/parsers/__init__.py +37 -0
- agrobr/cepea/parsers/base.py +35 -0
- agrobr/cepea/parsers/consensus.py +300 -0
- agrobr/cepea/parsers/detector.py +108 -0
- agrobr/cepea/parsers/fingerprint.py +226 -0
- agrobr/cepea/parsers/v1.py +305 -0
- agrobr/cli.py +323 -0
- agrobr/conab/__init__.py +21 -0
- agrobr/conab/api.py +239 -0
- agrobr/conab/client.py +219 -0
- agrobr/conab/parsers/__init__.py +7 -0
- agrobr/conab/parsers/v1.py +383 -0
- agrobr/constants.py +205 -0
- agrobr/exceptions.py +104 -0
- agrobr/health/__init__.py +23 -0
- agrobr/health/checker.py +202 -0
- agrobr/health/reporter.py +314 -0
- agrobr/http/__init__.py +9 -0
- agrobr/http/browser.py +214 -0
- agrobr/http/rate_limiter.py +69 -0
- agrobr/http/retry.py +93 -0
- agrobr/http/user_agents.py +67 -0
- agrobr/ibge/__init__.py +19 -0
- agrobr/ibge/api.py +273 -0
- agrobr/ibge/client.py +256 -0
- agrobr/models.py +85 -0
- agrobr/normalize/__init__.py +64 -0
- agrobr/normalize/dates.py +303 -0
- agrobr/normalize/encoding.py +102 -0
- agrobr/normalize/regions.py +308 -0
- agrobr/normalize/units.py +278 -0
- agrobr/noticias_agricolas/__init__.py +6 -0
- agrobr/noticias_agricolas/client.py +222 -0
- agrobr/noticias_agricolas/parser.py +187 -0
- agrobr/sync.py +147 -0
- agrobr/telemetry/__init__.py +17 -0
- agrobr/telemetry/collector.py +153 -0
- agrobr/utils/__init__.py +5 -0
- agrobr/utils/logging.py +59 -0
- agrobr/validators/__init__.py +35 -0
- agrobr/validators/sanity.py +286 -0
- agrobr/validators/structural.py +313 -0
- agrobr-0.1.0.dist-info/METADATA +243 -0
- agrobr-0.1.0.dist-info/RECORD +58 -0
- agrobr-0.1.0.dist-info/WHEEL +4 -0
- agrobr-0.1.0.dist-info/entry_points.txt +2 -0
- agrobr-0.1.0.dist-info/licenses/LICENSE +21 -0
agrobr/__init__.py
ADDED
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
"""Dispatcher de alertas multi-canal."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import json
|
|
7
|
+
from enum import StrEnum
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
import httpx
|
|
11
|
+
import structlog
|
|
12
|
+
|
|
13
|
+
from agrobr import constants
|
|
14
|
+
|
|
15
|
+
logger = structlog.get_logger()
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class AlertLevel(StrEnum):
|
|
19
|
+
INFO = "info"
|
|
20
|
+
WARNING = "warning"
|
|
21
|
+
CRITICAL = "critical"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
async def send_alert(
|
|
25
|
+
level: AlertLevel | str,
|
|
26
|
+
title: str,
|
|
27
|
+
details: dict[str, Any],
|
|
28
|
+
source: str | None = None,
|
|
29
|
+
) -> None:
|
|
30
|
+
"""Envia alerta para todos os canais configurados."""
|
|
31
|
+
settings = constants.AlertSettings()
|
|
32
|
+
|
|
33
|
+
if not settings.enabled:
|
|
34
|
+
logger.debug("alerts_disabled", title=title)
|
|
35
|
+
return
|
|
36
|
+
|
|
37
|
+
if isinstance(level, str):
|
|
38
|
+
level = AlertLevel(level)
|
|
39
|
+
|
|
40
|
+
tasks = []
|
|
41
|
+
|
|
42
|
+
if settings.slack_webhook:
|
|
43
|
+
tasks.append(_send_slack(settings.slack_webhook, level, title, details, source))
|
|
44
|
+
|
|
45
|
+
if settings.discord_webhook:
|
|
46
|
+
tasks.append(_send_discord(settings.discord_webhook, level, title, details, source))
|
|
47
|
+
|
|
48
|
+
if settings.sendgrid_api_key and settings.email_to:
|
|
49
|
+
tasks.append(_send_email(settings, level, title, details, source))
|
|
50
|
+
|
|
51
|
+
if tasks:
|
|
52
|
+
results = await asyncio.gather(*tasks, return_exceptions=True)
|
|
53
|
+
for i, result in enumerate(results):
|
|
54
|
+
if isinstance(result, Exception):
|
|
55
|
+
logger.error("alert_send_failed", channel=i, error=str(result))
|
|
56
|
+
else:
|
|
57
|
+
logger.warning("no_alert_channels_configured", title=title)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
async def _send_slack(
|
|
61
|
+
webhook: str,
|
|
62
|
+
level: AlertLevel,
|
|
63
|
+
title: str,
|
|
64
|
+
details: dict[str, Any],
|
|
65
|
+
source: str | None,
|
|
66
|
+
) -> None:
|
|
67
|
+
emoji = {"info": "info", "warning": "warning", "critical": "rotating_light"}[level.value]
|
|
68
|
+
color = {"info": "#36a64f", "warning": "#ff9800", "critical": "#dc3545"}[level.value]
|
|
69
|
+
|
|
70
|
+
blocks: list[dict[str, Any]] = [
|
|
71
|
+
{"type": "header", "text": {"type": "plain_text", "text": f":{emoji}: {title}"}},
|
|
72
|
+
]
|
|
73
|
+
|
|
74
|
+
if source:
|
|
75
|
+
blocks.append(
|
|
76
|
+
{
|
|
77
|
+
"type": "section",
|
|
78
|
+
"fields": [
|
|
79
|
+
{"type": "mrkdwn", "text": f"*Source:* {source}"},
|
|
80
|
+
{"type": "mrkdwn", "text": f"*Level:* {level.value.upper()}"},
|
|
81
|
+
],
|
|
82
|
+
}
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
if details:
|
|
86
|
+
detail_text = json.dumps(details, indent=2, default=str)[:2900]
|
|
87
|
+
blocks.append(
|
|
88
|
+
{"type": "section", "text": {"type": "mrkdwn", "text": f"```{detail_text}```"}}
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
payload = {"attachments": [{"color": color, "blocks": blocks}]}
|
|
92
|
+
|
|
93
|
+
async with httpx.AsyncClient() as client:
|
|
94
|
+
response = await client.post(webhook, json=payload, timeout=10.0)
|
|
95
|
+
response.raise_for_status()
|
|
96
|
+
|
|
97
|
+
logger.info("alert_sent", channel="slack", level=level.value, title=title)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
async def _send_discord(
|
|
101
|
+
webhook: str,
|
|
102
|
+
level: AlertLevel,
|
|
103
|
+
title: str,
|
|
104
|
+
details: dict[str, Any],
|
|
105
|
+
source: str | None,
|
|
106
|
+
) -> None:
|
|
107
|
+
emoji = {"info": "info", "warning": "warning", "critical": "rotating_light"}[level.value]
|
|
108
|
+
color = {"info": 0x36A64F, "warning": 0xFF9800, "critical": 0xDC3545}[level.value]
|
|
109
|
+
|
|
110
|
+
detail_text = json.dumps(details, indent=2, default=str)[:1900]
|
|
111
|
+
|
|
112
|
+
embed: dict[str, Any] = {
|
|
113
|
+
"title": f":{emoji}: {title}",
|
|
114
|
+
"color": color,
|
|
115
|
+
"fields": [],
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
if source:
|
|
119
|
+
embed["fields"].append({"name": "Source", "value": source, "inline": True})
|
|
120
|
+
embed["fields"].append({"name": "Level", "value": level.value.upper(), "inline": True})
|
|
121
|
+
|
|
122
|
+
if details:
|
|
123
|
+
embed["description"] = f"```json\n{detail_text}\n```"
|
|
124
|
+
|
|
125
|
+
payload = {"embeds": [embed]}
|
|
126
|
+
|
|
127
|
+
async with httpx.AsyncClient() as client:
|
|
128
|
+
response = await client.post(webhook, json=payload, timeout=10.0)
|
|
129
|
+
response.raise_for_status()
|
|
130
|
+
|
|
131
|
+
logger.info("alert_sent", channel="discord", level=level.value, title=title)
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
async def _send_email(
|
|
135
|
+
settings: constants.AlertSettings,
|
|
136
|
+
level: AlertLevel,
|
|
137
|
+
title: str,
|
|
138
|
+
details: dict[str, Any],
|
|
139
|
+
source: str | None,
|
|
140
|
+
) -> None:
|
|
141
|
+
detail_text = json.dumps(details, indent=2, default=str)
|
|
142
|
+
|
|
143
|
+
html_content = f"""
|
|
144
|
+
<h2>{title}</h2>
|
|
145
|
+
<p><strong>Level:</strong> {level.value.upper()}</p>
|
|
146
|
+
{"<p><strong>Source:</strong> " + source + "</p>" if source else ""}
|
|
147
|
+
<h3>Details</h3>
|
|
148
|
+
<pre>{detail_text}</pre>
|
|
149
|
+
"""
|
|
150
|
+
|
|
151
|
+
payload = {
|
|
152
|
+
"personalizations": [{"to": [{"email": e} for e in settings.email_to]}],
|
|
153
|
+
"from": {"email": settings.email_from},
|
|
154
|
+
"subject": f"[agrobr {level.value.upper()}] {title}",
|
|
155
|
+
"content": [{"type": "text/html", "value": html_content}],
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
async with httpx.AsyncClient() as client:
|
|
159
|
+
response = await client.post(
|
|
160
|
+
"https://api.sendgrid.com/v3/mail/send",
|
|
161
|
+
json=payload,
|
|
162
|
+
headers={"Authorization": f"Bearer {settings.sendgrid_api_key}"},
|
|
163
|
+
timeout=10.0,
|
|
164
|
+
)
|
|
165
|
+
response.raise_for_status()
|
|
166
|
+
|
|
167
|
+
logger.info("alert_sent", channel="email", level=level.value, title=title)
|
agrobr/cache/__init__.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"""Cache DuckDB com separação cache/histórico."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from .duckdb_store import DuckDBStore, get_store
|
|
6
|
+
from .history import HistoryManager, get_history_manager
|
|
7
|
+
from .policies import (
|
|
8
|
+
TTL,
|
|
9
|
+
CachePolicy,
|
|
10
|
+
calculate_expiry,
|
|
11
|
+
get_policy,
|
|
12
|
+
get_stale_max,
|
|
13
|
+
get_ttl,
|
|
14
|
+
is_expired,
|
|
15
|
+
is_stale_acceptable,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
__all__ = [
|
|
19
|
+
"DuckDBStore",
|
|
20
|
+
"get_store",
|
|
21
|
+
"CachePolicy",
|
|
22
|
+
"TTL",
|
|
23
|
+
"get_policy",
|
|
24
|
+
"get_ttl",
|
|
25
|
+
"get_stale_max",
|
|
26
|
+
"is_expired",
|
|
27
|
+
"is_stale_acceptable",
|
|
28
|
+
"calculate_expiry",
|
|
29
|
+
"HistoryManager",
|
|
30
|
+
"get_history_manager",
|
|
31
|
+
]
|
|
@@ -0,0 +1,433 @@
|
|
|
1
|
+
"""Storage DuckDB com separação cache/histórico."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from datetime import datetime, timedelta
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
import duckdb
|
|
9
|
+
import structlog
|
|
10
|
+
|
|
11
|
+
from agrobr import constants
|
|
12
|
+
|
|
13
|
+
logger = structlog.get_logger()
|
|
14
|
+
|
|
15
|
+
SCHEMA_CACHE = """
|
|
16
|
+
CREATE TABLE IF NOT EXISTS cache_entries (
|
|
17
|
+
key TEXT PRIMARY KEY,
|
|
18
|
+
data BLOB NOT NULL,
|
|
19
|
+
source TEXT NOT NULL,
|
|
20
|
+
created_at TIMESTAMP NOT NULL,
|
|
21
|
+
expires_at TIMESTAMP NOT NULL,
|
|
22
|
+
last_accessed_at TIMESTAMP NOT NULL,
|
|
23
|
+
hit_count INTEGER DEFAULT 0,
|
|
24
|
+
version INTEGER DEFAULT 1,
|
|
25
|
+
stale BOOLEAN DEFAULT FALSE
|
|
26
|
+
);
|
|
27
|
+
|
|
28
|
+
CREATE INDEX IF NOT EXISTS idx_cache_source ON cache_entries(source);
|
|
29
|
+
CREATE INDEX IF NOT EXISTS idx_cache_expires ON cache_entries(expires_at);
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
SCHEMA_HISTORY = """
|
|
33
|
+
CREATE TABLE IF NOT EXISTS history_entries (
|
|
34
|
+
id INTEGER PRIMARY KEY,
|
|
35
|
+
key TEXT NOT NULL,
|
|
36
|
+
data BLOB NOT NULL,
|
|
37
|
+
source TEXT NOT NULL,
|
|
38
|
+
data_date DATE NOT NULL,
|
|
39
|
+
collected_at TIMESTAMP NOT NULL,
|
|
40
|
+
parser_version INTEGER NOT NULL,
|
|
41
|
+
fingerprint_hash TEXT,
|
|
42
|
+
UNIQUE(key, data_date, collected_at)
|
|
43
|
+
);
|
|
44
|
+
|
|
45
|
+
CREATE INDEX IF NOT EXISTS idx_history_source ON history_entries(source);
|
|
46
|
+
CREATE INDEX IF NOT EXISTS idx_history_date ON history_entries(data_date);
|
|
47
|
+
CREATE INDEX IF NOT EXISTS idx_history_key ON history_entries(key);
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
SCHEMA_INDICADORES = """
|
|
51
|
+
CREATE SEQUENCE IF NOT EXISTS seq_indicadores_id START 1;
|
|
52
|
+
|
|
53
|
+
CREATE TABLE IF NOT EXISTS indicadores (
|
|
54
|
+
id INTEGER DEFAULT nextval('seq_indicadores_id') PRIMARY KEY,
|
|
55
|
+
produto TEXT NOT NULL,
|
|
56
|
+
praca TEXT,
|
|
57
|
+
data DATE NOT NULL,
|
|
58
|
+
valor DECIMAL(18,4) NOT NULL,
|
|
59
|
+
unidade TEXT NOT NULL,
|
|
60
|
+
fonte TEXT NOT NULL,
|
|
61
|
+
metodologia TEXT,
|
|
62
|
+
variacao_percentual DECIMAL(8,4),
|
|
63
|
+
collected_at TIMESTAMP NOT NULL,
|
|
64
|
+
parser_version INTEGER DEFAULT 1,
|
|
65
|
+
UNIQUE(produto, praca, data, fonte)
|
|
66
|
+
);
|
|
67
|
+
|
|
68
|
+
CREATE INDEX IF NOT EXISTS idx_ind_produto ON indicadores(produto);
|
|
69
|
+
CREATE INDEX IF NOT EXISTS idx_ind_data ON indicadores(data);
|
|
70
|
+
CREATE INDEX IF NOT EXISTS idx_ind_produto_data ON indicadores(produto, data);
|
|
71
|
+
"""
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class DuckDBStore:
|
|
75
|
+
"""Storage com DuckDB separando cache volátil e histórico permanente."""
|
|
76
|
+
|
|
77
|
+
def __init__(self, settings: constants.CacheSettings | None = None) -> None:
|
|
78
|
+
self.settings = settings or constants.CacheSettings()
|
|
79
|
+
self.db_path = self.settings.cache_dir / self.settings.db_name
|
|
80
|
+
self._conn: duckdb.DuckDBPyConnection | None = None
|
|
81
|
+
|
|
82
|
+
def _get_conn(self) -> duckdb.DuckDBPyConnection:
|
|
83
|
+
if self._conn is None:
|
|
84
|
+
self.settings.cache_dir.mkdir(parents=True, exist_ok=True)
|
|
85
|
+
self._conn = duckdb.connect(str(self.db_path))
|
|
86
|
+
self._init_schema()
|
|
87
|
+
return self._conn
|
|
88
|
+
|
|
89
|
+
def _init_schema(self) -> None:
|
|
90
|
+
from agrobr.cache.migrations import migrate
|
|
91
|
+
|
|
92
|
+
conn = self._conn
|
|
93
|
+
if conn:
|
|
94
|
+
conn.execute(SCHEMA_CACHE)
|
|
95
|
+
conn.execute(SCHEMA_HISTORY)
|
|
96
|
+
conn.execute(SCHEMA_INDICADORES)
|
|
97
|
+
migrate(conn)
|
|
98
|
+
|
|
99
|
+
def cache_get(self, key: str) -> tuple[bytes | None, bool]:
|
|
100
|
+
"""Busca entrada no cache. Retorna (dados, is_stale)."""
|
|
101
|
+
conn = self._get_conn()
|
|
102
|
+
now = datetime.utcnow()
|
|
103
|
+
|
|
104
|
+
result = conn.execute(
|
|
105
|
+
"SELECT data, expires_at, stale FROM cache_entries WHERE key = ?",
|
|
106
|
+
[key],
|
|
107
|
+
).fetchone()
|
|
108
|
+
|
|
109
|
+
if result is None:
|
|
110
|
+
logger.debug("cache_miss", key=key, reason="not_found")
|
|
111
|
+
return None, False
|
|
112
|
+
|
|
113
|
+
data, expires_at, stale = result
|
|
114
|
+
|
|
115
|
+
conn.execute(
|
|
116
|
+
"UPDATE cache_entries SET hit_count = hit_count + 1, last_accessed_at = ? WHERE key = ?",
|
|
117
|
+
[now, key],
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
if expires_at < now:
|
|
121
|
+
logger.debug("cache_hit", key=key, stale=True, reason="expired")
|
|
122
|
+
return data, True
|
|
123
|
+
|
|
124
|
+
if stale:
|
|
125
|
+
logger.debug("cache_hit", key=key, stale=True, reason="marked_stale")
|
|
126
|
+
return data, True
|
|
127
|
+
|
|
128
|
+
logger.debug("cache_hit", key=key, stale=False)
|
|
129
|
+
return data, False
|
|
130
|
+
|
|
131
|
+
def cache_set(
|
|
132
|
+
self,
|
|
133
|
+
key: str,
|
|
134
|
+
data: bytes,
|
|
135
|
+
source: constants.Fonte,
|
|
136
|
+
ttl_seconds: int,
|
|
137
|
+
) -> None:
|
|
138
|
+
"""Grava entrada no cache."""
|
|
139
|
+
conn = self._get_conn()
|
|
140
|
+
now = datetime.utcnow()
|
|
141
|
+
expires_at = now + timedelta(seconds=ttl_seconds)
|
|
142
|
+
|
|
143
|
+
conn.execute(
|
|
144
|
+
"""
|
|
145
|
+
INSERT OR REPLACE INTO cache_entries
|
|
146
|
+
(key, data, source, created_at, expires_at, last_accessed_at, hit_count, version, stale)
|
|
147
|
+
VALUES (?, ?, ?, ?, ?, ?, 0, 1, FALSE)
|
|
148
|
+
""",
|
|
149
|
+
[key, data, source.value, now, expires_at, now],
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
logger.debug("cache_write", key=key, ttl_seconds=ttl_seconds)
|
|
153
|
+
|
|
154
|
+
def cache_invalidate(self, key: str) -> None:
|
|
155
|
+
"""Marca entrada como stale."""
|
|
156
|
+
conn = self._get_conn()
|
|
157
|
+
conn.execute("UPDATE cache_entries SET stale = TRUE WHERE key = ?", [key])
|
|
158
|
+
|
|
159
|
+
def cache_delete(self, key: str) -> None:
|
|
160
|
+
"""Remove entrada do cache."""
|
|
161
|
+
conn = self._get_conn()
|
|
162
|
+
conn.execute("DELETE FROM cache_entries WHERE key = ?", [key])
|
|
163
|
+
|
|
164
|
+
def cache_clear(
|
|
165
|
+
self,
|
|
166
|
+
source: constants.Fonte | None = None,
|
|
167
|
+
older_than_days: int | None = None,
|
|
168
|
+
) -> int:
|
|
169
|
+
"""Limpa cache com filtros opcionais. Retorna número de entradas removidas."""
|
|
170
|
+
conn = self._get_conn()
|
|
171
|
+
|
|
172
|
+
conditions = []
|
|
173
|
+
params: list[Any] = []
|
|
174
|
+
|
|
175
|
+
if source:
|
|
176
|
+
conditions.append("source = ?")
|
|
177
|
+
params.append(source.value)
|
|
178
|
+
|
|
179
|
+
if older_than_days:
|
|
180
|
+
cutoff = datetime.utcnow() - timedelta(days=older_than_days)
|
|
181
|
+
conditions.append("created_at < ?")
|
|
182
|
+
params.append(cutoff)
|
|
183
|
+
|
|
184
|
+
where = " AND ".join(conditions) if conditions else "1=1"
|
|
185
|
+
result = conn.execute(f"DELETE FROM cache_entries WHERE {where} RETURNING *", params)
|
|
186
|
+
|
|
187
|
+
count = len(result.fetchall()) if result else 0
|
|
188
|
+
logger.info("cache_cleared", count=count, source=source, older_than_days=older_than_days)
|
|
189
|
+
return count
|
|
190
|
+
|
|
191
|
+
def history_save(
|
|
192
|
+
self,
|
|
193
|
+
key: str,
|
|
194
|
+
data: bytes,
|
|
195
|
+
source: constants.Fonte,
|
|
196
|
+
data_date: datetime,
|
|
197
|
+
parser_version: int,
|
|
198
|
+
fingerprint_hash: str | None = None,
|
|
199
|
+
) -> None:
|
|
200
|
+
"""Salva dados no histórico permanente."""
|
|
201
|
+
if not self.settings.save_to_history:
|
|
202
|
+
return
|
|
203
|
+
|
|
204
|
+
conn = self._get_conn()
|
|
205
|
+
now = datetime.utcnow()
|
|
206
|
+
|
|
207
|
+
try:
|
|
208
|
+
conn.execute(
|
|
209
|
+
"""
|
|
210
|
+
INSERT INTO history_entries
|
|
211
|
+
(key, data, source, data_date, collected_at, parser_version, fingerprint_hash)
|
|
212
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
213
|
+
""",
|
|
214
|
+
[key, data, source.value, data_date, now, parser_version, fingerprint_hash],
|
|
215
|
+
)
|
|
216
|
+
logger.debug("history_saved", key=key, data_date=data_date)
|
|
217
|
+
except duckdb.ConstraintException:
|
|
218
|
+
logger.debug("history_exists", key=key, data_date=data_date)
|
|
219
|
+
|
|
220
|
+
def history_get(
|
|
221
|
+
self,
|
|
222
|
+
key: str,
|
|
223
|
+
data_date: datetime | None = None,
|
|
224
|
+
) -> bytes | None:
|
|
225
|
+
"""Busca dados no histórico. Se data_date não especificado, retorna mais recente."""
|
|
226
|
+
conn = self._get_conn()
|
|
227
|
+
|
|
228
|
+
if data_date:
|
|
229
|
+
result = conn.execute(
|
|
230
|
+
"""
|
|
231
|
+
SELECT data FROM history_entries
|
|
232
|
+
WHERE key = ? AND data_date = ?
|
|
233
|
+
ORDER BY collected_at DESC LIMIT 1
|
|
234
|
+
""",
|
|
235
|
+
[key, data_date],
|
|
236
|
+
).fetchone()
|
|
237
|
+
else:
|
|
238
|
+
result = conn.execute(
|
|
239
|
+
"""
|
|
240
|
+
SELECT data FROM history_entries
|
|
241
|
+
WHERE key = ?
|
|
242
|
+
ORDER BY data_date DESC, collected_at DESC LIMIT 1
|
|
243
|
+
""",
|
|
244
|
+
[key],
|
|
245
|
+
).fetchone()
|
|
246
|
+
|
|
247
|
+
return result[0] if result else None
|
|
248
|
+
|
|
249
|
+
def indicadores_query(
|
|
250
|
+
self,
|
|
251
|
+
produto: str,
|
|
252
|
+
inicio: datetime | None = None,
|
|
253
|
+
fim: datetime | None = None,
|
|
254
|
+
praca: str | None = None,
|
|
255
|
+
) -> list[dict[str, Any]]:
|
|
256
|
+
"""
|
|
257
|
+
Busca indicadores no histórico local.
|
|
258
|
+
|
|
259
|
+
Args:
|
|
260
|
+
produto: Nome do produto
|
|
261
|
+
inicio: Data inicial
|
|
262
|
+
fim: Data final
|
|
263
|
+
praca: Praça específica (opcional)
|
|
264
|
+
|
|
265
|
+
Returns:
|
|
266
|
+
Lista de dicts com dados dos indicadores
|
|
267
|
+
"""
|
|
268
|
+
conn = self._get_conn()
|
|
269
|
+
|
|
270
|
+
conditions = ["produto = ?"]
|
|
271
|
+
params: list[Any] = [produto.lower()]
|
|
272
|
+
|
|
273
|
+
if inicio:
|
|
274
|
+
conditions.append("data >= ?")
|
|
275
|
+
params.append(inicio)
|
|
276
|
+
|
|
277
|
+
if fim:
|
|
278
|
+
conditions.append("data <= ?")
|
|
279
|
+
params.append(fim)
|
|
280
|
+
|
|
281
|
+
if praca:
|
|
282
|
+
conditions.append("praca = ?")
|
|
283
|
+
params.append(praca)
|
|
284
|
+
|
|
285
|
+
where = " AND ".join(conditions)
|
|
286
|
+
|
|
287
|
+
result = conn.execute(
|
|
288
|
+
f"""
|
|
289
|
+
SELECT produto, praca, data, valor, unidade, fonte, metodologia,
|
|
290
|
+
variacao_percentual, collected_at, parser_version
|
|
291
|
+
FROM indicadores
|
|
292
|
+
WHERE {where}
|
|
293
|
+
ORDER BY data DESC
|
|
294
|
+
""",
|
|
295
|
+
params,
|
|
296
|
+
).fetchall()
|
|
297
|
+
|
|
298
|
+
columns = [
|
|
299
|
+
"produto",
|
|
300
|
+
"praca",
|
|
301
|
+
"data",
|
|
302
|
+
"valor",
|
|
303
|
+
"unidade",
|
|
304
|
+
"fonte",
|
|
305
|
+
"metodologia",
|
|
306
|
+
"variacao_percentual",
|
|
307
|
+
"collected_at",
|
|
308
|
+
"parser_version",
|
|
309
|
+
]
|
|
310
|
+
|
|
311
|
+
indicadores = [dict(zip(columns, row)) for row in result]
|
|
312
|
+
|
|
313
|
+
logger.debug(
|
|
314
|
+
"indicadores_query",
|
|
315
|
+
produto=produto,
|
|
316
|
+
count=len(indicadores),
|
|
317
|
+
inicio=inicio,
|
|
318
|
+
fim=fim,
|
|
319
|
+
)
|
|
320
|
+
|
|
321
|
+
return indicadores
|
|
322
|
+
|
|
323
|
+
def indicadores_upsert(self, indicadores: list[dict[str, Any]]) -> int:
|
|
324
|
+
"""
|
|
325
|
+
Salva indicadores no histórico (upsert).
|
|
326
|
+
|
|
327
|
+
Args:
|
|
328
|
+
indicadores: Lista de dicts com dados dos indicadores
|
|
329
|
+
|
|
330
|
+
Returns:
|
|
331
|
+
Número de indicadores salvos/atualizados
|
|
332
|
+
"""
|
|
333
|
+
if not indicadores:
|
|
334
|
+
return 0
|
|
335
|
+
|
|
336
|
+
conn = self._get_conn()
|
|
337
|
+
now = datetime.utcnow()
|
|
338
|
+
count = 0
|
|
339
|
+
|
|
340
|
+
for ind in indicadores:
|
|
341
|
+
try:
|
|
342
|
+
conn.execute(
|
|
343
|
+
"""
|
|
344
|
+
INSERT INTO indicadores
|
|
345
|
+
(produto, praca, data, valor, unidade, fonte, metodologia,
|
|
346
|
+
variacao_percentual, collected_at, parser_version)
|
|
347
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
348
|
+
ON CONFLICT (produto, praca, data, fonte)
|
|
349
|
+
DO UPDATE SET
|
|
350
|
+
valor = EXCLUDED.valor,
|
|
351
|
+
variacao_percentual = EXCLUDED.variacao_percentual,
|
|
352
|
+
collected_at = EXCLUDED.collected_at
|
|
353
|
+
""",
|
|
354
|
+
[
|
|
355
|
+
ind.get("produto", "").lower(),
|
|
356
|
+
ind.get("praca"),
|
|
357
|
+
ind["data"],
|
|
358
|
+
float(ind["valor"]),
|
|
359
|
+
ind.get("unidade", "BRL/unidade"),
|
|
360
|
+
ind.get("fonte", "unknown"),
|
|
361
|
+
ind.get("metodologia"),
|
|
362
|
+
ind.get("variacao_percentual"),
|
|
363
|
+
now,
|
|
364
|
+
ind.get("parser_version", 1),
|
|
365
|
+
],
|
|
366
|
+
)
|
|
367
|
+
count += 1
|
|
368
|
+
except Exception as e:
|
|
369
|
+
logger.warning(
|
|
370
|
+
"indicador_upsert_failed",
|
|
371
|
+
data=ind.get("data"),
|
|
372
|
+
error=str(e),
|
|
373
|
+
)
|
|
374
|
+
|
|
375
|
+
logger.info("indicadores_upsert", count=count, total=len(indicadores))
|
|
376
|
+
return count
|
|
377
|
+
|
|
378
|
+
def indicadores_get_dates(
|
|
379
|
+
self,
|
|
380
|
+
produto: str,
|
|
381
|
+
inicio: datetime | None = None,
|
|
382
|
+
fim: datetime | None = None,
|
|
383
|
+
) -> set[datetime]:
|
|
384
|
+
"""
|
|
385
|
+
Retorna conjunto de datas com indicadores no histórico.
|
|
386
|
+
|
|
387
|
+
Args:
|
|
388
|
+
produto: Nome do produto
|
|
389
|
+
inicio: Data inicial
|
|
390
|
+
fim: Data final
|
|
391
|
+
|
|
392
|
+
Returns:
|
|
393
|
+
Set de datas presentes no histórico
|
|
394
|
+
"""
|
|
395
|
+
conn = self._get_conn()
|
|
396
|
+
|
|
397
|
+
conditions = ["produto = ?"]
|
|
398
|
+
params: list[Any] = [produto.lower()]
|
|
399
|
+
|
|
400
|
+
if inicio:
|
|
401
|
+
conditions.append("data >= ?")
|
|
402
|
+
params.append(inicio)
|
|
403
|
+
|
|
404
|
+
if fim:
|
|
405
|
+
conditions.append("data <= ?")
|
|
406
|
+
params.append(fim)
|
|
407
|
+
|
|
408
|
+
where = " AND ".join(conditions)
|
|
409
|
+
|
|
410
|
+
result = conn.execute(
|
|
411
|
+
f"SELECT DISTINCT data FROM indicadores WHERE {where}",
|
|
412
|
+
params,
|
|
413
|
+
).fetchall()
|
|
414
|
+
|
|
415
|
+
dates = {row[0] for row in result}
|
|
416
|
+
return dates
|
|
417
|
+
|
|
418
|
+
def close(self) -> None:
|
|
419
|
+
"""Fecha conexão."""
|
|
420
|
+
if self._conn:
|
|
421
|
+
self._conn.close()
|
|
422
|
+
self._conn = None
|
|
423
|
+
|
|
424
|
+
|
|
425
|
+
_store: DuckDBStore | None = None
|
|
426
|
+
|
|
427
|
+
|
|
428
|
+
def get_store() -> DuckDBStore:
|
|
429
|
+
"""Obtém instância global do store."""
|
|
430
|
+
global _store
|
|
431
|
+
if _store is None:
|
|
432
|
+
_store = DuckDBStore()
|
|
433
|
+
return _store
|