agrobr 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. agrobr/__init__.py +10 -0
  2. agrobr/alerts/__init__.py +7 -0
  3. agrobr/alerts/notifier.py +167 -0
  4. agrobr/cache/__init__.py +31 -0
  5. agrobr/cache/duckdb_store.py +433 -0
  6. agrobr/cache/history.py +317 -0
  7. agrobr/cache/migrations.py +82 -0
  8. agrobr/cache/policies.py +240 -0
  9. agrobr/cepea/__init__.py +7 -0
  10. agrobr/cepea/api.py +360 -0
  11. agrobr/cepea/client.py +273 -0
  12. agrobr/cepea/parsers/__init__.py +37 -0
  13. agrobr/cepea/parsers/base.py +35 -0
  14. agrobr/cepea/parsers/consensus.py +300 -0
  15. agrobr/cepea/parsers/detector.py +108 -0
  16. agrobr/cepea/parsers/fingerprint.py +226 -0
  17. agrobr/cepea/parsers/v1.py +305 -0
  18. agrobr/cli.py +323 -0
  19. agrobr/conab/__init__.py +21 -0
  20. agrobr/conab/api.py +239 -0
  21. agrobr/conab/client.py +219 -0
  22. agrobr/conab/parsers/__init__.py +7 -0
  23. agrobr/conab/parsers/v1.py +383 -0
  24. agrobr/constants.py +205 -0
  25. agrobr/exceptions.py +104 -0
  26. agrobr/health/__init__.py +23 -0
  27. agrobr/health/checker.py +202 -0
  28. agrobr/health/reporter.py +314 -0
  29. agrobr/http/__init__.py +9 -0
  30. agrobr/http/browser.py +214 -0
  31. agrobr/http/rate_limiter.py +69 -0
  32. agrobr/http/retry.py +93 -0
  33. agrobr/http/user_agents.py +67 -0
  34. agrobr/ibge/__init__.py +19 -0
  35. agrobr/ibge/api.py +273 -0
  36. agrobr/ibge/client.py +256 -0
  37. agrobr/models.py +85 -0
  38. agrobr/normalize/__init__.py +64 -0
  39. agrobr/normalize/dates.py +303 -0
  40. agrobr/normalize/encoding.py +102 -0
  41. agrobr/normalize/regions.py +308 -0
  42. agrobr/normalize/units.py +278 -0
  43. agrobr/noticias_agricolas/__init__.py +6 -0
  44. agrobr/noticias_agricolas/client.py +222 -0
  45. agrobr/noticias_agricolas/parser.py +187 -0
  46. agrobr/sync.py +147 -0
  47. agrobr/telemetry/__init__.py +17 -0
  48. agrobr/telemetry/collector.py +153 -0
  49. agrobr/utils/__init__.py +5 -0
  50. agrobr/utils/logging.py +59 -0
  51. agrobr/validators/__init__.py +35 -0
  52. agrobr/validators/sanity.py +286 -0
  53. agrobr/validators/structural.py +313 -0
  54. agrobr-0.1.0.dist-info/METADATA +243 -0
  55. agrobr-0.1.0.dist-info/RECORD +58 -0
  56. agrobr-0.1.0.dist-info/WHEEL +4 -0
  57. agrobr-0.1.0.dist-info/entry_points.txt +2 -0
  58. agrobr-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,300 @@
1
+ """
2
+ Multi-parser consensus para validação cruzada.
3
+
4
+ Executa múltiplos parsers e compara resultados para detectar
5
+ problemas de parsing.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from dataclasses import dataclass
11
+ from typing import Any
12
+
13
+ import structlog
14
+
15
+ from ...alerts.notifier import AlertLevel, send_alert
16
+ from ...exceptions import ParseError
17
+ from ...models import Indicador
18
+ from .base import BaseParser
19
+ from .v1 import CepeaParserV1
20
+
21
+ logger = structlog.get_logger()
22
+
23
+ CONSENSUS_PARSERS: list[type[BaseParser]] = [
24
+ CepeaParserV1,
25
+ ]
26
+
27
+ DIVERGENCE_THRESHOLD_COUNT = 0.1
28
+ DIVERGENCE_THRESHOLD_VALUE = 0.01
29
+
30
+
31
+ @dataclass
32
+ class ConsensusResult:
33
+ """Resultado de parsing com consensus."""
34
+
35
+ indicadores: list[Indicador]
36
+ parser_used: BaseParser
37
+ all_results: dict[int, list[Indicador]]
38
+ has_consensus: bool
39
+ divergences: list[dict[str, Any]]
40
+ report: dict[str, Any]
41
+
42
+
43
+ @dataclass
44
+ class ParserDivergence:
45
+ """Divergência entre parsers."""
46
+
47
+ divergence_type: str
48
+ versions: list[int]
49
+ details: dict[str, Any]
50
+
51
+
52
+ async def parse_with_consensus(
53
+ html: str,
54
+ produto: str,
55
+ require_consensus: bool = False,
56
+ ) -> ConsensusResult:
57
+ """
58
+ Executa múltiplos parsers e compara resultados.
59
+
60
+ Args:
61
+ html: Conteúdo HTML
62
+ produto: Produto a parsear
63
+ require_consensus: Se True, levanta erro se parsers divergem
64
+
65
+ Returns:
66
+ ConsensusResult
67
+
68
+ Raises:
69
+ ParseError: Se require_consensus e parsers divergem
70
+ """
71
+ results: dict[int, list[Indicador]] = {}
72
+ errors: dict[int, str] = {}
73
+
74
+ for parser_cls in CONSENSUS_PARSERS:
75
+ parser = parser_cls()
76
+ try:
77
+ can_parse, confidence = parser.can_parse(html)
78
+ if can_parse and confidence > 0.5:
79
+ parsed = parser.parse(html, produto)
80
+ results[parser.version] = parsed
81
+ logger.debug(
82
+ "consensus_parser_success",
83
+ version=parser.version,
84
+ count=len(parsed),
85
+ )
86
+ except Exception as e:
87
+ errors[parser.version] = str(e)
88
+ logger.warning(
89
+ "consensus_parser_failed",
90
+ version=parser.version,
91
+ error=str(e),
92
+ )
93
+
94
+ divergences, report = analyze_consensus(results, errors)
95
+
96
+ has_consensus = len(divergences) == 0
97
+
98
+ if not has_consensus:
99
+ logger.warning(
100
+ "consensus_divergence_detected",
101
+ divergence_count=len(divergences),
102
+ )
103
+
104
+ if require_consensus:
105
+ await send_alert(
106
+ level=AlertLevel.WARNING,
107
+ title="Parser consensus failed",
108
+ details=report,
109
+ )
110
+ raise ParseError(
111
+ source="cepea",
112
+ parser_version=0,
113
+ reason=f"Parsers diverged: {len(divergences)} differences",
114
+ )
115
+
116
+ latest_version = max(results.keys()) if results else 0
117
+ best_results = results.get(latest_version, [])
118
+
119
+ parser_used: BaseParser = CepeaParserV1()
120
+ for parser_cls in CONSENSUS_PARSERS:
121
+ if parser_cls().version == latest_version:
122
+ parser_used = parser_cls()
123
+ break
124
+
125
+ return ConsensusResult(
126
+ indicadores=best_results,
127
+ parser_used=parser_used,
128
+ all_results=results,
129
+ has_consensus=has_consensus,
130
+ divergences=divergences,
131
+ report=report,
132
+ )
133
+
134
+
135
+ def analyze_consensus(
136
+ results: dict[int, list[Indicador]],
137
+ errors: dict[int, str],
138
+ ) -> tuple[list[dict[str, Any]], dict[str, Any]]:
139
+ """
140
+ Analisa resultados de múltiplos parsers.
141
+
142
+ Args:
143
+ results: Resultados por versão de parser
144
+ errors: Erros por versão de parser
145
+
146
+ Returns:
147
+ Tupla (lista de divergências, relatório completo)
148
+ """
149
+ report = {
150
+ "parser_count": len(CONSENSUS_PARSERS),
151
+ "successful": list(results.keys()),
152
+ "failed": list(errors.keys()),
153
+ "errors": errors,
154
+ }
155
+
156
+ divergences: list[dict[str, Any]] = []
157
+
158
+ if len(results) < 2:
159
+ return divergences, report
160
+
161
+ counts = {v: len(r) for v, r in results.items()}
162
+ unique_counts = set(counts.values())
163
+
164
+ if len(unique_counts) > 1:
165
+ divergences.append(
166
+ {
167
+ "type": "count_mismatch",
168
+ "versions": list(counts.keys()),
169
+ "counts": counts,
170
+ "description": f"Different record counts: {counts}",
171
+ }
172
+ )
173
+
174
+ versions = list(results.keys())
175
+ base_version = versions[0]
176
+ base_results = results[base_version]
177
+
178
+ for other_version in versions[1:]:
179
+ other_results = results[other_version]
180
+
181
+ if base_results and other_results:
182
+ if base_results[0].data != other_results[0].data:
183
+ divergences.append(
184
+ {
185
+ "type": "first_date_mismatch",
186
+ "versions": [base_version, other_version],
187
+ "values": [str(base_results[0].data), str(other_results[0].data)],
188
+ }
189
+ )
190
+
191
+ first_diff = abs(float(base_results[0].valor) - float(other_results[0].valor))
192
+ if first_diff > DIVERGENCE_THRESHOLD_VALUE:
193
+ divergences.append(
194
+ {
195
+ "type": "first_value_mismatch",
196
+ "versions": [base_version, other_version],
197
+ "values": [str(base_results[0].valor), str(other_results[0].valor)],
198
+ "difference": first_diff,
199
+ }
200
+ )
201
+
202
+ if base_results[-1].data != other_results[-1].data:
203
+ divergences.append(
204
+ {
205
+ "type": "last_date_mismatch",
206
+ "versions": [base_version, other_version],
207
+ "values": [str(base_results[-1].data), str(other_results[-1].data)],
208
+ }
209
+ )
210
+
211
+ last_diff = abs(float(base_results[-1].valor) - float(other_results[-1].valor))
212
+ if last_diff > DIVERGENCE_THRESHOLD_VALUE:
213
+ divergences.append(
214
+ {
215
+ "type": "last_value_mismatch",
216
+ "versions": [base_version, other_version],
217
+ "values": [str(base_results[-1].valor), str(other_results[-1].valor)],
218
+ "difference": last_diff,
219
+ }
220
+ )
221
+
222
+ report["divergences"] = divergences
223
+ report["has_divergence"] = len(divergences) > 0
224
+
225
+ return divergences, report
226
+
227
+
228
+ def select_best_result(
229
+ results: dict[int, list[Indicador]],
230
+ divergences: list[dict[str, Any]],
231
+ ) -> tuple[int, list[Indicador]]:
232
+ """
233
+ Seleciona melhor resultado quando há divergência.
234
+
235
+ Estratégia:
236
+ 1. Prefere parser mais recente
237
+ 2. Se contagens diferem, prefere quem tem mais dados
238
+ 3. Em último caso, usa mais recente
239
+
240
+ Args:
241
+ results: Resultados por versão
242
+ divergences: Divergências detectadas
243
+
244
+ Returns:
245
+ Tupla (versão selecionada, indicadores)
246
+ """
247
+ if not results:
248
+ return 0, []
249
+
250
+ has_count_mismatch = any(d["type"] == "count_mismatch" for d in divergences)
251
+
252
+ if has_count_mismatch:
253
+ best_version = max(results.keys(), key=lambda v: len(results[v]))
254
+ else:
255
+ best_version = max(results.keys())
256
+
257
+ return best_version, results[best_version]
258
+
259
+
260
+ class ConsensusValidator:
261
+ """Validador de consensus para uso contínuo."""
262
+
263
+ def __init__(self) -> None:
264
+ self.history: list[ConsensusResult] = []
265
+ self.divergence_count = 0
266
+
267
+ async def validate(self, html: str, produto: str) -> ConsensusResult:
268
+ """
269
+ Executa validação com tracking de histórico.
270
+
271
+ Args:
272
+ html: Conteúdo HTML
273
+ produto: Produto
274
+
275
+ Returns:
276
+ ConsensusResult
277
+ """
278
+ result = await parse_with_consensus(html, produto, require_consensus=False)
279
+
280
+ self.history.append(result)
281
+ if not result.has_consensus:
282
+ self.divergence_count += 1
283
+
284
+ return result
285
+
286
+ @property
287
+ def divergence_rate(self) -> float:
288
+ """Taxa de divergência no histórico."""
289
+ if not self.history:
290
+ return 0.0
291
+ return self.divergence_count / len(self.history)
292
+
293
+ def get_statistics(self) -> dict[str, Any]:
294
+ """Retorna estatísticas do validador."""
295
+ return {
296
+ "total_validations": len(self.history),
297
+ "divergence_count": self.divergence_count,
298
+ "divergence_rate": self.divergence_rate,
299
+ "consensus_rate": 1 - self.divergence_rate,
300
+ }
@@ -0,0 +1,108 @@
1
+ """Detector e seletor de parser com fallback em cascata."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from datetime import date
6
+ from typing import TYPE_CHECKING
7
+
8
+ import structlog
9
+
10
+ from agrobr import constants, exceptions
11
+ from agrobr.cepea.parsers import base
12
+ from agrobr.cepea.parsers.v1 import CepeaParserV1
13
+
14
+ if TYPE_CHECKING:
15
+ from agrobr import models
16
+
17
+ logger = structlog.get_logger()
18
+
19
+ PARSERS: list[type[base.BaseParser]] = [
20
+ CepeaParserV1,
21
+ ]
22
+
23
+
24
+ async def get_parser_with_fallback(
25
+ html: str,
26
+ produto: str,
27
+ data_referencia: date | None = None,
28
+ strict: bool = False,
29
+ ) -> tuple[base.BaseParser, list[models.Indicador]]:
30
+ """Seleciona parser e executa com fallback em cascata."""
31
+ if not PARSERS:
32
+ raise exceptions.ParseError(
33
+ source="cepea",
34
+ parser_version=0,
35
+ reason="No parsers registered. CEPEA parser will be implemented in WEEK 3.",
36
+ html_snippet=html[:200],
37
+ )
38
+
39
+ errors: list[tuple[str, str]] = []
40
+ warnings: list[str] = []
41
+
42
+ for parser_cls in reversed(PARSERS):
43
+ parser = parser_cls()
44
+
45
+ if data_referencia:
46
+ if parser.valid_from > data_referencia:
47
+ continue
48
+ if parser.valid_until and data_referencia > parser.valid_until:
49
+ continue
50
+
51
+ can_parse, confidence = parser.can_parse(html)
52
+
53
+ logger.debug(
54
+ "parser_check",
55
+ parser_version=parser.version,
56
+ can_parse=can_parse,
57
+ confidence=confidence,
58
+ )
59
+
60
+ if not can_parse:
61
+ continue
62
+
63
+ if confidence < constants.CONFIDENCE_LOW and strict:
64
+ raise exceptions.FingerprintMismatchError(
65
+ source=parser.source,
66
+ similarity=confidence,
67
+ threshold=constants.CONFIDENCE_LOW,
68
+ )
69
+
70
+ if confidence < constants.CONFIDENCE_HIGH:
71
+ warnings.append(
72
+ f"Parser v{parser.version} confidence {confidence:.1%} "
73
+ f"(expected >= {constants.CONFIDENCE_HIGH:.1%})"
74
+ )
75
+
76
+ try:
77
+ result = parser.parse(html, produto)
78
+
79
+ if not result:
80
+ errors.append((f"v{parser.version}", "No data extracted"))
81
+ continue
82
+
83
+ if warnings:
84
+ logger.warning(
85
+ "parser_low_confidence",
86
+ parser_version=parser.version,
87
+ confidence=confidence,
88
+ warnings=warnings,
89
+ )
90
+
91
+ return parser, result
92
+
93
+ except Exception as e:
94
+ errors.append((f"v{parser.version}", str(e)))
95
+ logger.warning(
96
+ "parser_failed",
97
+ parser_version=parser.version,
98
+ error=str(e),
99
+ )
100
+ continue
101
+
102
+ error_summary = "; ".join(f"{v}: {e}" for v, e in errors)
103
+ raise exceptions.ParseError(
104
+ source=PARSERS[0]().source if PARSERS else "cepea",
105
+ parser_version=0,
106
+ reason=f"All parsers failed: {error_summary}",
107
+ html_snippet=html[:500],
108
+ )
@@ -0,0 +1,226 @@
1
+ """Fingerprinting de layout para detecção de mudanças."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import hashlib
6
+ import json
7
+ from datetime import datetime
8
+ from pathlib import Path
9
+ from typing import Any
10
+
11
+ import structlog
12
+ from bs4 import BeautifulSoup
13
+
14
+ from agrobr.constants import Fonte
15
+ from agrobr.models import Fingerprint
16
+
17
+ logger = structlog.get_logger()
18
+
19
+
20
+ def extract_fingerprint(
21
+ html: str,
22
+ source: Fonte,
23
+ url: str,
24
+ ) -> Fingerprint:
25
+ """
26
+ Extrai assinatura estrutural do HTML.
27
+
28
+ Args:
29
+ html: Conteúdo HTML
30
+ source: Fonte de dados
31
+ url: URL original
32
+
33
+ Returns:
34
+ Fingerprint com assinatura estrutural
35
+ """
36
+ soup = BeautifulSoup(html, "lxml")
37
+
38
+ table_classes: list[list[str]] = []
39
+ for table in soup.find_all("table")[:10]:
40
+ classes_raw = table.get("class")
41
+ if classes_raw is None:
42
+ classes: list[str] = []
43
+ elif isinstance(classes_raw, str):
44
+ classes = [classes_raw]
45
+ else:
46
+ classes = list(classes_raw)
47
+ table_classes.append(sorted(classes))
48
+
49
+ keywords = ["preco", "indicador", "cotacao", "valor", "tabela", "dados"]
50
+ key_ids: list[str] = []
51
+ for elem in soup.find_all(id=True):
52
+ elem_id_raw = elem.get("id")
53
+ if elem_id_raw is None or not isinstance(elem_id_raw, str):
54
+ continue
55
+ elem_id = elem_id_raw.lower()
56
+ if any(kw in elem_id for kw in keywords):
57
+ key_ids.append(elem_id_raw)
58
+ key_ids = sorted(set(key_ids))[:20]
59
+
60
+ table_headers: list[list[str]] = []
61
+ for table in soup.find_all("table")[:5]:
62
+ headers: list[str] = []
63
+ for th in table.find_all("th"):
64
+ text = th.get_text(strip=True)[:50]
65
+ if text:
66
+ headers.append(text)
67
+ if headers:
68
+ table_headers.append(headers)
69
+
70
+ element_counts = {
71
+ "tables": len(soup.find_all("table")),
72
+ "forms": len(soup.find_all("form")),
73
+ "divs_with_id": len(soup.find_all("div", id=True)),
74
+ "inputs": len(soup.find_all("input")),
75
+ "selects": len(soup.find_all("select")),
76
+ "links": len(soup.find_all("a")),
77
+ "scripts": len(soup.find_all("script")),
78
+ }
79
+
80
+ structure_elements: list[tuple[str, int, tuple[str, ...]]] = []
81
+ for tag in soup.find_all(["table", "div", "form", "section", "article"])[:30]:
82
+ tag_classes_raw = tag.get("class")
83
+ if tag_classes_raw is None:
84
+ tag_classes: list[str] = []
85
+ elif isinstance(tag_classes_raw, str):
86
+ tag_classes = [tag_classes_raw]
87
+ else:
88
+ tag_classes = list(tag_classes_raw)
89
+ structure_elements.append(
90
+ (
91
+ tag.name or "",
92
+ len(tag.find_all(recursive=False)),
93
+ tuple(sorted(tag_classes))[:3] if tag_classes else (),
94
+ )
95
+ )
96
+
97
+ structure_hash = hashlib.md5(str(structure_elements).encode()).hexdigest()[:12]
98
+
99
+ return Fingerprint(
100
+ source=source,
101
+ url=url,
102
+ collected_at=datetime.utcnow(),
103
+ table_classes=table_classes,
104
+ key_ids=key_ids,
105
+ structure_hash=structure_hash,
106
+ table_headers=table_headers,
107
+ element_counts=element_counts,
108
+ )
109
+
110
+
111
+ def compare_fingerprints(
112
+ current: Fingerprint,
113
+ reference: Fingerprint,
114
+ ) -> tuple[float, dict[str, Any]]:
115
+ """
116
+ Compara duas fingerprints e retorna similaridade.
117
+
118
+ Args:
119
+ current: Fingerprint atual
120
+ reference: Fingerprint de referência (baseline)
121
+
122
+ Returns:
123
+ tuple[float, dict]: (similaridade 0-1, detalhes das diferenças)
124
+ """
125
+ scores: dict[str, float] = {}
126
+ details: dict[str, Any] = {}
127
+
128
+ scores["structure"] = 1.0 if current.structure_hash == reference.structure_hash else 0.0
129
+ if scores["structure"] == 0:
130
+ details["structure_changed"] = {
131
+ "current": current.structure_hash,
132
+ "reference": reference.structure_hash,
133
+ }
134
+
135
+ if reference.table_classes:
136
+ matches = sum(1 for tc in current.table_classes if tc in reference.table_classes)
137
+ scores["table_classes"] = matches / len(reference.table_classes)
138
+ if scores["table_classes"] < 1.0:
139
+ details["table_classes_diff"] = {
140
+ "missing": [
141
+ tc for tc in reference.table_classes if tc not in current.table_classes
142
+ ],
143
+ "new": [tc for tc in current.table_classes if tc not in reference.table_classes],
144
+ }
145
+ else:
146
+ scores["table_classes"] = 1.0
147
+
148
+ if reference.key_ids:
149
+ matches = sum(1 for kid in reference.key_ids if kid in current.key_ids)
150
+ scores["key_ids"] = matches / len(reference.key_ids)
151
+ if scores["key_ids"] < 1.0:
152
+ details["key_ids_diff"] = {
153
+ "missing": [kid for kid in reference.key_ids if kid not in current.key_ids],
154
+ "new": [kid for kid in current.key_ids if kid not in reference.key_ids],
155
+ }
156
+ else:
157
+ scores["key_ids"] = 1.0
158
+
159
+ if reference.table_headers:
160
+ header_score = 0.0
161
+ for ref_headers in reference.table_headers:
162
+ for cur_headers in current.table_headers:
163
+ ref_set = set(ref_headers)
164
+ cur_set = set(cur_headers)
165
+ if ref_set or cur_set:
166
+ jaccard = len(ref_set & cur_set) / len(ref_set | cur_set)
167
+ header_score = max(header_score, jaccard)
168
+ scores["table_headers"] = header_score
169
+ if scores["table_headers"] < 0.9:
170
+ details["table_headers_diff"] = {
171
+ "reference": reference.table_headers,
172
+ "current": current.table_headers,
173
+ }
174
+ else:
175
+ scores["table_headers"] = 1.0
176
+
177
+ count_diffs: dict[str, dict[str, int]] = {}
178
+ for key in reference.element_counts:
179
+ ref_count = reference.element_counts.get(key, 0)
180
+ cur_count = current.element_counts.get(key, 0)
181
+ if ref_count > 0:
182
+ diff_ratio = abs(cur_count - ref_count) / ref_count
183
+ if diff_ratio > 0.5:
184
+ count_diffs[key] = {"reference": ref_count, "current": cur_count}
185
+
186
+ if count_diffs:
187
+ scores["element_counts"] = max(0, 1 - len(count_diffs) * 0.2)
188
+ details["element_counts_diff"] = count_diffs
189
+ else:
190
+ scores["element_counts"] = 1.0
191
+
192
+ weights = {
193
+ "structure": 0.25,
194
+ "table_classes": 0.20,
195
+ "key_ids": 0.15,
196
+ "table_headers": 0.30,
197
+ "element_counts": 0.10,
198
+ }
199
+
200
+ final_score = sum(scores[k] * weights[k] for k in weights)
201
+
202
+ logger.debug(
203
+ "fingerprint_comparison",
204
+ scores=scores,
205
+ final_score=final_score,
206
+ has_changes=bool(details),
207
+ )
208
+
209
+ return final_score, details
210
+
211
+
212
+ def save_baseline_fingerprint(fingerprint: Fingerprint, path: str) -> None:
213
+ """Salva fingerprint como baseline de referência."""
214
+ Path(path).parent.mkdir(parents=True, exist_ok=True)
215
+ with open(path, "w", encoding="utf-8") as f:
216
+ json.dump(fingerprint.model_dump(mode="json"), f, indent=2, default=str)
217
+
218
+
219
+ def load_baseline_fingerprint(path: str) -> Fingerprint | None:
220
+ """Carrega fingerprint de referência."""
221
+ if not Path(path).exists():
222
+ return None
223
+
224
+ with open(path, encoding="utf-8") as f:
225
+ data = json.load(f)
226
+ return Fingerprint.model_validate(data)