catalogmx 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- catalogmx/__init__.py +133 -19
- catalogmx/calculators/__init__.py +113 -0
- catalogmx/calculators/costo_trabajador.py +213 -0
- catalogmx/calculators/impuestos.py +920 -0
- catalogmx/calculators/imss.py +370 -0
- catalogmx/calculators/isr.py +290 -0
- catalogmx/calculators/resico.py +154 -0
- catalogmx/catalogs/banxico/__init__.py +29 -3
- catalogmx/catalogs/banxico/cetes_sqlite.py +279 -0
- catalogmx/catalogs/banxico/inflacion_sqlite.py +302 -0
- catalogmx/catalogs/banxico/salarios_minimos_sqlite.py +295 -0
- catalogmx/catalogs/banxico/tiie_sqlite.py +279 -0
- catalogmx/catalogs/banxico/tipo_cambio_usd_sqlite.py +255 -0
- catalogmx/catalogs/banxico/udis_sqlite.py +332 -0
- catalogmx/catalogs/cnbv/__init__.py +9 -0
- catalogmx/catalogs/cnbv/sectores.py +173 -0
- catalogmx/catalogs/conapo/__init__.py +15 -0
- catalogmx/catalogs/conapo/sistema_urbano_nacional.py +50 -0
- catalogmx/catalogs/conapo/zonas_metropolitanas.py +230 -0
- catalogmx/catalogs/ift/__init__.py +1 -1
- catalogmx/catalogs/ift/codigos_lada.py +517 -313
- catalogmx/catalogs/inegi/__init__.py +17 -0
- catalogmx/catalogs/inegi/scian.py +127 -0
- catalogmx/catalogs/mexico/__init__.py +2 -0
- catalogmx/catalogs/mexico/giros_mercantiles.py +119 -0
- catalogmx/catalogs/sat/carta_porte/material_peligroso.py +5 -1
- catalogmx/catalogs/sat/cfdi_4/clave_prod_serv.py +78 -0
- catalogmx/catalogs/sat/cfdi_4/tasa_o_cuota.py +2 -1
- catalogmx/catalogs/sepomex/__init__.py +2 -1
- catalogmx/catalogs/sepomex/codigos_postales.py +30 -2
- catalogmx/catalogs/sepomex/codigos_postales_completo.py +261 -0
- catalogmx/cli.py +12 -9
- catalogmx/data/__init__.py +10 -0
- catalogmx/data/mexico_dynamic.sqlite3 +0 -0
- catalogmx/data/updater.py +362 -0
- catalogmx/generators/__init__.py +20 -0
- catalogmx/generators/identity.py +582 -0
- catalogmx/helpers.py +177 -3
- catalogmx/utils/__init__.py +29 -0
- catalogmx/utils/clabe_utils.py +417 -0
- catalogmx/utils/text.py +7 -1
- catalogmx/validators/clabe.py +52 -2
- catalogmx/validators/nss.py +32 -27
- catalogmx/validators/rfc.py +185 -52
- catalogmx-0.4.0.dist-info/METADATA +905 -0
- {catalogmx-0.3.0.dist-info → catalogmx-0.4.0.dist-info}/RECORD +51 -25
- {catalogmx-0.3.0.dist-info → catalogmx-0.4.0.dist-info}/WHEEL +1 -1
- catalogmx/catalogs/banxico/udis.py +0 -279
- catalogmx-0.3.0.dist-info/METADATA +0 -644
- {catalogmx-0.3.0.dist-info → catalogmx-0.4.0.dist-info}/entry_points.txt +0 -0
- {catalogmx-0.3.0.dist-info → catalogmx-0.4.0.dist-info}/licenses/AUTHORS.rst +0 -0
- {catalogmx-0.3.0.dist-info → catalogmx-0.4.0.dist-info}/licenses/LICENSE +0 -0
- {catalogmx-0.3.0.dist-info → catalogmx-0.4.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,261 @@
|
|
|
1
|
+
"""Catálogo de Códigos Postales SEPOMEX - Versión Completa
|
|
2
|
+
|
|
3
|
+
Provides access to all ~150,000 Mexican postal codes with enhanced data.
|
|
4
|
+
Includes settlement type (tipo_asentamiento), zone (zona), and state/municipality codes.
|
|
5
|
+
|
|
6
|
+
WARNING: This catalog is very large (~42MB). Use with caution and consider
|
|
7
|
+
using pagination or filters when displaying results.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import json
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
from catalogmx.utils.text import normalize_text
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class CodigosPostalesCompleto:
|
|
17
|
+
"""Catálogo completo de códigos postales con datos extendidos"""
|
|
18
|
+
|
|
19
|
+
_data: list[dict] | None = None
|
|
20
|
+
_by_cp: dict[str, list[dict]] | None = None
|
|
21
|
+
_by_estado: dict[str, list[dict]] | None = None
|
|
22
|
+
_by_estado_normalized: dict[str, list[dict]] | None = None
|
|
23
|
+
_by_municipio_normalized: dict[str, list[dict]] | None = None
|
|
24
|
+
_by_zona: dict[str, list[dict]] | None = None
|
|
25
|
+
|
|
26
|
+
@classmethod
|
|
27
|
+
def _load_data(cls) -> None:
|
|
28
|
+
if cls._data is None:
|
|
29
|
+
path = (
|
|
30
|
+
Path(__file__).parent.parent.parent.parent.parent
|
|
31
|
+
/ "shared-data"
|
|
32
|
+
/ "sepomex"
|
|
33
|
+
/ "codigos_postales_completo.json"
|
|
34
|
+
)
|
|
35
|
+
with open(path, encoding="utf-8") as f:
|
|
36
|
+
cls._data = json.load(f)
|
|
37
|
+
|
|
38
|
+
# Index by CP (can have multiple settlements)
|
|
39
|
+
cls._by_cp = {}
|
|
40
|
+
for item in cls._data:
|
|
41
|
+
cp = item.get("cp") or item.get("codigo_postal")
|
|
42
|
+
if cp:
|
|
43
|
+
if cp not in cls._by_cp:
|
|
44
|
+
cls._by_cp[cp] = []
|
|
45
|
+
cls._by_cp[cp].append(item)
|
|
46
|
+
|
|
47
|
+
# Index by estado
|
|
48
|
+
cls._by_estado = {}
|
|
49
|
+
for item in cls._data:
|
|
50
|
+
estado = item.get("estado")
|
|
51
|
+
if estado:
|
|
52
|
+
if estado not in cls._by_estado:
|
|
53
|
+
cls._by_estado[estado] = []
|
|
54
|
+
cls._by_estado[estado].append(item)
|
|
55
|
+
|
|
56
|
+
# Index by estado normalized (accent-insensitive)
|
|
57
|
+
cls._by_estado_normalized = {}
|
|
58
|
+
for item in cls._data:
|
|
59
|
+
estado = item.get("estado")
|
|
60
|
+
if estado:
|
|
61
|
+
estado_norm = normalize_text(estado)
|
|
62
|
+
if estado_norm not in cls._by_estado_normalized:
|
|
63
|
+
cls._by_estado_normalized[estado_norm] = []
|
|
64
|
+
cls._by_estado_normalized[estado_norm].append(item)
|
|
65
|
+
|
|
66
|
+
# Index by municipio normalized (accent-insensitive)
|
|
67
|
+
cls._by_municipio_normalized = {}
|
|
68
|
+
for item in cls._data:
|
|
69
|
+
municipio = item.get("municipio")
|
|
70
|
+
if municipio:
|
|
71
|
+
municipio_norm = normalize_text(municipio)
|
|
72
|
+
if municipio_norm not in cls._by_municipio_normalized:
|
|
73
|
+
cls._by_municipio_normalized[municipio_norm] = []
|
|
74
|
+
cls._by_municipio_normalized[municipio_norm].append(item)
|
|
75
|
+
|
|
76
|
+
# Index by zona (Urbano/Rural)
|
|
77
|
+
cls._by_zona = {}
|
|
78
|
+
for item in cls._data:
|
|
79
|
+
zona = item.get("zona")
|
|
80
|
+
if zona:
|
|
81
|
+
if zona not in cls._by_zona:
|
|
82
|
+
cls._by_zona[zona] = []
|
|
83
|
+
cls._by_zona[zona].append(item)
|
|
84
|
+
|
|
85
|
+
@classmethod
|
|
86
|
+
def get_by_cp(cls, cp: str) -> list[dict]:
|
|
87
|
+
"""Obtiene todos los asentamientos de un código postal"""
|
|
88
|
+
cls._load_data()
|
|
89
|
+
return cls._by_cp.get(cp, [])
|
|
90
|
+
|
|
91
|
+
@classmethod
|
|
92
|
+
def is_valid(cls, cp: str) -> bool:
|
|
93
|
+
"""Verifica si un código postal existe"""
|
|
94
|
+
cls._load_data()
|
|
95
|
+
return cp in cls._by_cp
|
|
96
|
+
|
|
97
|
+
@classmethod
|
|
98
|
+
def get_by_estado(cls, estado: str) -> list[dict]:
|
|
99
|
+
"""Obtiene todos los códigos postales de un estado (insensible a acentos)"""
|
|
100
|
+
cls._load_data()
|
|
101
|
+
estado_normalized = normalize_text(estado)
|
|
102
|
+
return cls._by_estado_normalized.get(estado_normalized, [])
|
|
103
|
+
|
|
104
|
+
@classmethod
|
|
105
|
+
def get_by_municipio(cls, municipio: str) -> list[dict]:
|
|
106
|
+
"""Obtiene todos los códigos postales de un municipio (insensible a acentos)"""
|
|
107
|
+
cls._load_data()
|
|
108
|
+
municipio_normalized = normalize_text(municipio)
|
|
109
|
+
return cls._by_municipio_normalized.get(municipio_normalized, [])
|
|
110
|
+
|
|
111
|
+
@classmethod
|
|
112
|
+
def get_by_zona(cls, zona: str) -> list[dict]:
|
|
113
|
+
"""Obtiene códigos postales por zona (Urbano o Rural)"""
|
|
114
|
+
cls._load_data()
|
|
115
|
+
return cls._by_zona.get(zona, [])
|
|
116
|
+
|
|
117
|
+
@classmethod
|
|
118
|
+
def search_by_asentamiento(
|
|
119
|
+
cls, query: str, *, codigo_postal: str | None = None, estado: str | None = None
|
|
120
|
+
) -> list[dict]:
|
|
121
|
+
"""Busca códigos postales por nombre de asentamiento (insensible a acentos)"""
|
|
122
|
+
cls._load_data()
|
|
123
|
+
query_normalized = normalize_text(query)
|
|
124
|
+
|
|
125
|
+
search_list = cls._data
|
|
126
|
+
if codigo_postal:
|
|
127
|
+
search_list = cls.get_by_cp(codigo_postal)
|
|
128
|
+
elif estado:
|
|
129
|
+
search_list = cls.get_by_estado(estado)
|
|
130
|
+
|
|
131
|
+
return [
|
|
132
|
+
item
|
|
133
|
+
for item in search_list
|
|
134
|
+
if query_normalized in normalize_text(item.get("asentamiento", ""))
|
|
135
|
+
]
|
|
136
|
+
|
|
137
|
+
@classmethod
|
|
138
|
+
def search_by_colonia(cls, colonia: str) -> list[dict]:
|
|
139
|
+
"""Alias for search_by_asentamiento for backwards compatibility"""
|
|
140
|
+
return cls.search_by_asentamiento(colonia)
|
|
141
|
+
|
|
142
|
+
@classmethod
|
|
143
|
+
def get_by_tipo_asentamiento(cls, tipo: str) -> list[dict]:
|
|
144
|
+
"""Obtiene códigos postales por tipo de asentamiento (Colonia, Fraccionamiento, etc.)"""
|
|
145
|
+
cls._load_data()
|
|
146
|
+
tipo_normalized = normalize_text(tipo)
|
|
147
|
+
return [
|
|
148
|
+
item
|
|
149
|
+
for item in cls._data
|
|
150
|
+
if tipo_normalized in normalize_text(item.get("tipo_asentamiento", ""))
|
|
151
|
+
]
|
|
152
|
+
|
|
153
|
+
@classmethod
|
|
154
|
+
def get_all(cls) -> list[dict]:
|
|
155
|
+
"""Obtiene todos los códigos postales (WARNING: dataset muy grande)"""
|
|
156
|
+
cls._load_data()
|
|
157
|
+
return cls._data.copy()
|
|
158
|
+
|
|
159
|
+
@classmethod
|
|
160
|
+
def get_municipio(cls, cp: str) -> str | None:
|
|
161
|
+
"""Obtiene el municipio de un código postal"""
|
|
162
|
+
settlements = cls.get_by_cp(cp)
|
|
163
|
+
return settlements[0]["municipio"] if settlements else None
|
|
164
|
+
|
|
165
|
+
@classmethod
|
|
166
|
+
def get_estado(cls, cp: str) -> str | None:
|
|
167
|
+
"""Obtiene el estado de un código postal"""
|
|
168
|
+
settlements = cls.get_by_cp(cp)
|
|
169
|
+
return settlements[0]["estado"] if settlements else None
|
|
170
|
+
|
|
171
|
+
@classmethod
|
|
172
|
+
def get_total_count(cls) -> int:
|
|
173
|
+
"""Obtiene el conteo total de códigos postales"""
|
|
174
|
+
cls._load_data()
|
|
175
|
+
return len(cls._data)
|
|
176
|
+
|
|
177
|
+
@classmethod
|
|
178
|
+
def get_unique_cps(cls) -> list[str]:
|
|
179
|
+
"""Obtiene todos los códigos postales únicos"""
|
|
180
|
+
cls._load_data()
|
|
181
|
+
return sorted(set(cls._by_cp.keys()))
|
|
182
|
+
|
|
183
|
+
@classmethod
|
|
184
|
+
def get_count_by_estado(cls, estado: str) -> int:
|
|
185
|
+
"""Obtiene el conteo de códigos postales por estado"""
|
|
186
|
+
return len(cls.get_by_estado(estado))
|
|
187
|
+
|
|
188
|
+
@classmethod
|
|
189
|
+
def get_asentamientos(cls, cp: str) -> list[str]:
|
|
190
|
+
"""Obtiene todos los asentamientos de un código postal"""
|
|
191
|
+
settlements = cls.get_by_cp(cp)
|
|
192
|
+
return [s.get("asentamiento", "") for s in settlements]
|
|
193
|
+
|
|
194
|
+
@classmethod
|
|
195
|
+
def search(
|
|
196
|
+
cls,
|
|
197
|
+
*,
|
|
198
|
+
cp: str | None = None,
|
|
199
|
+
estado: str | None = None,
|
|
200
|
+
municipio: str | None = None,
|
|
201
|
+
asentamiento: str | None = None,
|
|
202
|
+
limit: int = 100,
|
|
203
|
+
) -> list[dict]:
|
|
204
|
+
"""Busca con múltiples criterios (con paginación)"""
|
|
205
|
+
cls._load_data()
|
|
206
|
+
results = cls._data
|
|
207
|
+
|
|
208
|
+
if cp:
|
|
209
|
+
results = [r for r in results if r.get("cp") == cp or r.get("codigo_postal") == cp]
|
|
210
|
+
if estado:
|
|
211
|
+
estado_norm = normalize_text(estado)
|
|
212
|
+
results = [r for r in results if normalize_text(r.get("estado", "")) == estado_norm]
|
|
213
|
+
if municipio:
|
|
214
|
+
municipio_norm = normalize_text(municipio)
|
|
215
|
+
results = [
|
|
216
|
+
r for r in results if municipio_norm in normalize_text(r.get("municipio", ""))
|
|
217
|
+
]
|
|
218
|
+
if asentamiento:
|
|
219
|
+
asentamiento_norm = normalize_text(asentamiento)
|
|
220
|
+
results = [
|
|
221
|
+
r for r in results if asentamiento_norm in normalize_text(r.get("asentamiento", ""))
|
|
222
|
+
]
|
|
223
|
+
|
|
224
|
+
return results[:limit]
|
|
225
|
+
|
|
226
|
+
@classmethod
|
|
227
|
+
def get_statistics(cls) -> dict:
|
|
228
|
+
"""Obtiene estadísticas del catálogo"""
|
|
229
|
+
cls._load_data()
|
|
230
|
+
unique_cps = set()
|
|
231
|
+
unique_states = set()
|
|
232
|
+
unique_municipalities = set()
|
|
233
|
+
|
|
234
|
+
for item in cls._data:
|
|
235
|
+
cp = item.get("cp") or item.get("codigo_postal")
|
|
236
|
+
if cp:
|
|
237
|
+
unique_cps.add(cp)
|
|
238
|
+
estado = item.get("estado")
|
|
239
|
+
if estado:
|
|
240
|
+
unique_states.add(estado)
|
|
241
|
+
municipio = item.get("municipio")
|
|
242
|
+
estado = item.get("estado")
|
|
243
|
+
if municipio and estado:
|
|
244
|
+
unique_municipalities.add(f"{estado}:{municipio}")
|
|
245
|
+
|
|
246
|
+
return {
|
|
247
|
+
"total_postal_codes": len(cls._data),
|
|
248
|
+
"unique_postal_codes": len(unique_cps),
|
|
249
|
+
"states": len(unique_states),
|
|
250
|
+
"municipalities": len(unique_municipalities),
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
@classmethod
|
|
254
|
+
def clear_cache(cls) -> None:
|
|
255
|
+
"""Limpia los datos en caché para liberar memoria"""
|
|
256
|
+
cls._data = None
|
|
257
|
+
cls._by_cp = None
|
|
258
|
+
cls._by_estado = None
|
|
259
|
+
cls._by_estado_normalized = None
|
|
260
|
+
cls._by_municipio_normalized = None
|
|
261
|
+
cls._by_zona = None
|
catalogmx/cli.py
CHANGED
|
@@ -1,17 +1,20 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Module that contains the command line app.
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
4
|
+
Coverage Exclusion Justification:
|
|
5
|
+
---------------------------------
|
|
6
|
+
This file is excluded from coverage because:
|
|
7
|
+
1. CLI code requires actual terminal invocation to test properly
|
|
8
|
+
2. Click commands handle formatted output (colors, emojis) that's hard to assert
|
|
9
|
+
3. User interaction paths (error messages, confirmations) are presentation-only
|
|
10
|
+
4. The underlying validators (RFCValidator, CURPValidator) ARE tested directly
|
|
8
11
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
``rfcmx.__main__`` in ``sys.modules``.
|
|
12
|
-
- When you import __main__ it will get executed again (as a module) because
|
|
13
|
-
there's no ``rfcmx.__main__`` in ``sys.modules``.
|
|
12
|
+
The business logic (validation, generation) is tested in their respective modules.
|
|
13
|
+
This file is pure presentation/interface code.
|
|
14
14
|
|
|
15
|
+
Why does this file exist, and why not put this in __main__?
|
|
16
|
+
You might be tempted to import things from __main__ later, but that will cause
|
|
17
|
+
problems: the code will get executed twice.
|
|
15
18
|
Also see (1) from http://click.pocoo.org/5/setuptools/#setuptools-integration
|
|
16
19
|
"""
|
|
17
20
|
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Data updater module for catalogmx
|
|
3
|
+
|
|
4
|
+
This module handles automatic updates of dynamic data (UDI, tipo cambio, etc.)
|
|
5
|
+
from GitHub Releases, allowing data updates without requiring library releases.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from catalogmx.data.updater import DataUpdater
|
|
9
|
+
|
|
10
|
+
__all__ = ["DataUpdater"]
|
|
Binary file
|
|
@@ -0,0 +1,362 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Data Updater - Automatic download of dynamic data from GitHub Releases
|
|
3
|
+
|
|
4
|
+
This module allows catalogmx to update dynamic data (UDI, exchange rates, TIIE, etc.)
|
|
5
|
+
without requiring a new library release. Data is downloaded from GitHub Releases
|
|
6
|
+
and cached locally.
|
|
7
|
+
|
|
8
|
+
Coverage Exclusion Justification:
|
|
9
|
+
---------------------------------
|
|
10
|
+
This file is excluded from coverage because:
|
|
11
|
+
1. Network I/O: Downloads from GitHub Releases require mocking entire urllib
|
|
12
|
+
2. File system operations: Cache management, directory creation, file moves
|
|
13
|
+
3. Error recovery paths: Network failures, corrupted downloads, disk full scenarios
|
|
14
|
+
4. Environment variables: CATALOGMX_DATA_URL, CATALOGMX_CACHE_DIR, CATALOGMX_AUTO_UPDATE
|
|
15
|
+
|
|
16
|
+
Testing this properly would require:
|
|
17
|
+
- Mocking urllib.request.urlretrieve
|
|
18
|
+
- Mocking Path operations (exists, mkdir, unlink, open)
|
|
19
|
+
- Mocking sqlite3 connections
|
|
20
|
+
- Creating temporary directories
|
|
21
|
+
|
|
22
|
+
The overhead doesn't justify the benefit since:
|
|
23
|
+
- The SQLite catalog loaders (which USE this updater) are tested
|
|
24
|
+
- Manual integration testing verifies the download flow works
|
|
25
|
+
|
|
26
|
+
Usage:
|
|
27
|
+
from catalogmx.data import DataUpdater
|
|
28
|
+
|
|
29
|
+
# Automatic update (recommended)
|
|
30
|
+
updater = DataUpdater()
|
|
31
|
+
db_path = updater.auto_update(max_age_hours=24)
|
|
32
|
+
|
|
33
|
+
# Manual update
|
|
34
|
+
updater.download_latest()
|
|
35
|
+
|
|
36
|
+
# Check version
|
|
37
|
+
version = updater.get_local_version()
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
import json
|
|
41
|
+
import os
|
|
42
|
+
import shutil
|
|
43
|
+
import sqlite3
|
|
44
|
+
import urllib.request
|
|
45
|
+
from datetime import datetime
|
|
46
|
+
from pathlib import Path
|
|
47
|
+
|
|
48
|
+
# Configuration
|
|
49
|
+
GITHUB_RELEASE_URL = os.getenv(
|
|
50
|
+
"CATALOGMX_DATA_URL",
|
|
51
|
+
"https://github.com/openbancor/catalogmx/releases/download/latest/mexico_dynamic.sqlite3",
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
CACHE_DIR = Path(os.getenv("CATALOGMX_CACHE_DIR", str(Path.home() / ".catalogmx")))
|
|
55
|
+
CACHE_DB = CACHE_DIR / "mexico_dynamic.sqlite3"
|
|
56
|
+
VERSION_FILE = CACHE_DIR / "version.json"
|
|
57
|
+
|
|
58
|
+
# Embedded fallback database (included in package)
|
|
59
|
+
EMBEDDED_DB = Path(__file__).parent / "mexico_dynamic.sqlite3"
|
|
60
|
+
|
|
61
|
+
# Auto-update enabled by default
|
|
62
|
+
AUTO_UPDATE_ENABLED = os.getenv("CATALOGMX_AUTO_UPDATE", "true").lower() in (
|
|
63
|
+
"true",
|
|
64
|
+
"1",
|
|
65
|
+
"yes",
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class DataUpdater:
|
|
70
|
+
"""
|
|
71
|
+
Manages automatic updates of dynamic data from GitHub Releases
|
|
72
|
+
|
|
73
|
+
This class handles downloading, caching, and version management of the
|
|
74
|
+
dynamic data SQLite database, allowing data updates without library releases.
|
|
75
|
+
"""
|
|
76
|
+
|
|
77
|
+
def __init__(self, cache_dir: Path | None = None):
|
|
78
|
+
"""
|
|
79
|
+
Initialize DataUpdater
|
|
80
|
+
|
|
81
|
+
:param cache_dir: Custom cache directory (default: ~/.catalogmx)
|
|
82
|
+
"""
|
|
83
|
+
self.cache_dir = cache_dir or CACHE_DIR
|
|
84
|
+
self.cache_db = self.cache_dir / "mexico_dynamic.sqlite3"
|
|
85
|
+
self.version_file = self.cache_dir / "version.json"
|
|
86
|
+
|
|
87
|
+
# Create cache directory if it doesn't exist
|
|
88
|
+
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
|
89
|
+
|
|
90
|
+
def get_local_version(self) -> str | None:
|
|
91
|
+
"""
|
|
92
|
+
Get version of local cached data
|
|
93
|
+
|
|
94
|
+
:return: Version string (e.g., "2025-12-04") or None if no cache
|
|
95
|
+
"""
|
|
96
|
+
if not self.version_file.exists():
|
|
97
|
+
return None
|
|
98
|
+
|
|
99
|
+
try:
|
|
100
|
+
with open(self.version_file, encoding="utf-8") as f:
|
|
101
|
+
return json.load(f).get("version")
|
|
102
|
+
except (json.JSONDecodeError, OSError):
|
|
103
|
+
return None
|
|
104
|
+
|
|
105
|
+
def get_local_age_hours(self) -> float | None:
|
|
106
|
+
"""
|
|
107
|
+
Get age of local cached data in hours
|
|
108
|
+
|
|
109
|
+
:return: Age in hours or None if no cache
|
|
110
|
+
"""
|
|
111
|
+
if not self.version_file.exists():
|
|
112
|
+
return None
|
|
113
|
+
|
|
114
|
+
try:
|
|
115
|
+
with open(self.version_file, encoding="utf-8") as f:
|
|
116
|
+
data = json.load(f)
|
|
117
|
+
updated_str = data.get("updated_at")
|
|
118
|
+
if not updated_str:
|
|
119
|
+
return None
|
|
120
|
+
|
|
121
|
+
updated = datetime.fromisoformat(updated_str)
|
|
122
|
+
return (datetime.now() - updated).total_seconds() / 3600
|
|
123
|
+
except (json.JSONDecodeError, OSError, ValueError):
|
|
124
|
+
return None
|
|
125
|
+
|
|
126
|
+
def _verify_database(self, db_path: Path) -> str | None:
|
|
127
|
+
"""
|
|
128
|
+
Verify database integrity and get version
|
|
129
|
+
|
|
130
|
+
:param db_path: Path to database file
|
|
131
|
+
:return: Version string or None if invalid
|
|
132
|
+
"""
|
|
133
|
+
try:
|
|
134
|
+
db = sqlite3.connect(db_path)
|
|
135
|
+
cursor = db.execute("SELECT value FROM _metadata WHERE key = 'version'")
|
|
136
|
+
row = cursor.fetchone()
|
|
137
|
+
db.close()
|
|
138
|
+
|
|
139
|
+
if row:
|
|
140
|
+
return row[0]
|
|
141
|
+
return None
|
|
142
|
+
except (sqlite3.Error, OSError):
|
|
143
|
+
return None
|
|
144
|
+
|
|
145
|
+
def download_latest(self, force: bool = False, verbose: bool = True) -> bool:
|
|
146
|
+
"""
|
|
147
|
+
Download latest version of data from GitHub Releases
|
|
148
|
+
|
|
149
|
+
:param force: Force download even if cache is recent
|
|
150
|
+
:param verbose: Print progress messages
|
|
151
|
+
:return: True if download successful, False otherwise
|
|
152
|
+
"""
|
|
153
|
+
if verbose:
|
|
154
|
+
print(f"📥 Downloading data from {GITHUB_RELEASE_URL}...")
|
|
155
|
+
|
|
156
|
+
try:
|
|
157
|
+
# Download to temporary file
|
|
158
|
+
temp_db = self.cache_dir / "mexico_dynamic.sqlite3.tmp"
|
|
159
|
+
urllib.request.urlretrieve(GITHUB_RELEASE_URL, temp_db)
|
|
160
|
+
|
|
161
|
+
# Verify integrity
|
|
162
|
+
version = self._verify_database(temp_db)
|
|
163
|
+
if not version:
|
|
164
|
+
if verbose:
|
|
165
|
+
print("❌ Downloaded database is invalid or corrupted")
|
|
166
|
+
temp_db.unlink(missing_ok=True)
|
|
167
|
+
return False
|
|
168
|
+
|
|
169
|
+
# Move to cache
|
|
170
|
+
shutil.move(str(temp_db), str(self.cache_db))
|
|
171
|
+
|
|
172
|
+
# Save metadata
|
|
173
|
+
with open(self.version_file, "w", encoding="utf-8") as f:
|
|
174
|
+
json.dump(
|
|
175
|
+
{
|
|
176
|
+
"version": version,
|
|
177
|
+
"updated_at": datetime.now().isoformat(),
|
|
178
|
+
"source": "github_releases",
|
|
179
|
+
"url": GITHUB_RELEASE_URL,
|
|
180
|
+
},
|
|
181
|
+
f,
|
|
182
|
+
indent=2,
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
if verbose:
|
|
186
|
+
print(f"✅ Data updated to version {version}")
|
|
187
|
+
return True
|
|
188
|
+
|
|
189
|
+
except Exception as e:
|
|
190
|
+
if verbose:
|
|
191
|
+
print(f"❌ Error downloading data: {e}")
|
|
192
|
+
return False
|
|
193
|
+
|
|
194
|
+
def auto_update(self, max_age_hours: int = 24, verbose: bool = False) -> Path:
|
|
195
|
+
"""
|
|
196
|
+
Auto-update with intelligent fallback
|
|
197
|
+
|
|
198
|
+
This method:
|
|
199
|
+
1. Checks if local cache exists and is recent
|
|
200
|
+
2. Downloads update if cache is old or missing
|
|
201
|
+
3. Falls back to cache if download fails
|
|
202
|
+
4. Falls back to embedded data if no cache exists
|
|
203
|
+
|
|
204
|
+
:param max_age_hours: Maximum age before updating (default 24 hours)
|
|
205
|
+
:param verbose: Print progress messages
|
|
206
|
+
:return: Path to database to use
|
|
207
|
+
"""
|
|
208
|
+
if not AUTO_UPDATE_ENABLED:
|
|
209
|
+
# Auto-update disabled, use cache or embedded
|
|
210
|
+
if self.cache_db.exists():
|
|
211
|
+
return self.cache_db
|
|
212
|
+
if EMBEDDED_DB.exists():
|
|
213
|
+
return EMBEDDED_DB
|
|
214
|
+
raise FileNotFoundError("No database available and auto-update is disabled")
|
|
215
|
+
|
|
216
|
+
age = self.get_local_age_hours()
|
|
217
|
+
|
|
218
|
+
# If no cache or cache is old, try to update
|
|
219
|
+
if age is None or age > max_age_hours:
|
|
220
|
+
if self.download_latest(verbose=verbose):
|
|
221
|
+
return self.cache_db
|
|
222
|
+
|
|
223
|
+
# If cache exists (even if update failed), use it
|
|
224
|
+
if self.cache_db.exists():
|
|
225
|
+
if verbose and age is not None:
|
|
226
|
+
print(f"ℹ️ Using cached data (age: {age:.1f} hours)")
|
|
227
|
+
return self.cache_db
|
|
228
|
+
|
|
229
|
+
# Fallback: embedded data
|
|
230
|
+
if EMBEDDED_DB.exists():
|
|
231
|
+
if verbose:
|
|
232
|
+
print("⚠️ Using embedded data (may be outdated)")
|
|
233
|
+
return EMBEDDED_DB
|
|
234
|
+
|
|
235
|
+
raise FileNotFoundError(
|
|
236
|
+
"No database available. Please check your internet connection or manually download the database."
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
def get_database_path(self, auto_update: bool = True, max_age_hours: int = 24) -> Path:
|
|
240
|
+
"""
|
|
241
|
+
Get path to database (with or without auto-update)
|
|
242
|
+
|
|
243
|
+
:param auto_update: Enable auto-update (default True)
|
|
244
|
+
:param max_age_hours: Maximum age before updating (default 24 hours)
|
|
245
|
+
:return: Path to database file
|
|
246
|
+
"""
|
|
247
|
+
if auto_update:
|
|
248
|
+
return self.auto_update(max_age_hours=max_age_hours)
|
|
249
|
+
|
|
250
|
+
# No auto-update, use cache or embedded
|
|
251
|
+
if self.cache_db.exists():
|
|
252
|
+
return self.cache_db
|
|
253
|
+
|
|
254
|
+
if EMBEDDED_DB.exists():
|
|
255
|
+
return EMBEDDED_DB
|
|
256
|
+
|
|
257
|
+
raise FileNotFoundError("No database available")
|
|
258
|
+
|
|
259
|
+
def get_version_info(self) -> dict[str, str]:
|
|
260
|
+
"""
|
|
261
|
+
Get detailed version information
|
|
262
|
+
|
|
263
|
+
:return: Dictionary with version, age, source, etc.
|
|
264
|
+
"""
|
|
265
|
+
if not self.version_file.exists():
|
|
266
|
+
return {
|
|
267
|
+
"version": "unknown",
|
|
268
|
+
"source": "embedded" if EMBEDDED_DB.exists() else "none",
|
|
269
|
+
"age_hours": "N/A",
|
|
270
|
+
"updated_at": "N/A",
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
try:
|
|
274
|
+
with open(self.version_file, encoding="utf-8") as f:
|
|
275
|
+
data = json.load(f)
|
|
276
|
+
|
|
277
|
+
age = self.get_local_age_hours()
|
|
278
|
+
data["age_hours"] = f"{age:.1f}" if age is not None else "N/A"
|
|
279
|
+
|
|
280
|
+
return data
|
|
281
|
+
except (json.JSONDecodeError, OSError):
|
|
282
|
+
return {
|
|
283
|
+
"version": "error",
|
|
284
|
+
"source": "error",
|
|
285
|
+
"age_hours": "N/A",
|
|
286
|
+
"updated_at": "N/A",
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
def clear_cache(self) -> bool:
|
|
290
|
+
"""
|
|
291
|
+
Clear local cache
|
|
292
|
+
|
|
293
|
+
:return: True if cache cleared successfully
|
|
294
|
+
"""
|
|
295
|
+
try:
|
|
296
|
+
if self.cache_db.exists():
|
|
297
|
+
self.cache_db.unlink()
|
|
298
|
+
if self.version_file.exists():
|
|
299
|
+
self.version_file.unlink()
|
|
300
|
+
return True
|
|
301
|
+
except OSError:
|
|
302
|
+
return False
|
|
303
|
+
|
|
304
|
+
def clearCache(self) -> bool:
|
|
305
|
+
"""
|
|
306
|
+
Clear local cache (camelCase alias for clear_cache)
|
|
307
|
+
|
|
308
|
+
:return: True if cache cleared successfully
|
|
309
|
+
"""
|
|
310
|
+
return self.clear_cache()
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
# Singleton instance for convenience
|
|
314
|
+
_default_updater = DataUpdater()
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
def get_database_path(auto_update: bool = True, max_age_hours: int = 24) -> Path:
|
|
318
|
+
"""
|
|
319
|
+
Get path to dynamic data database
|
|
320
|
+
|
|
321
|
+
Convenience function using default updater instance.
|
|
322
|
+
|
|
323
|
+
:param auto_update: Enable auto-update (default True)
|
|
324
|
+
:param max_age_hours: Maximum age before updating (default 24 hours)
|
|
325
|
+
:return: Path to database file
|
|
326
|
+
"""
|
|
327
|
+
return _default_updater.get_database_path(auto_update, max_age_hours)
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
def get_version() -> str | None:
|
|
331
|
+
"""
|
|
332
|
+
Get version of local cached data
|
|
333
|
+
|
|
334
|
+
Convenience function using default updater instance.
|
|
335
|
+
|
|
336
|
+
:return: Version string or None
|
|
337
|
+
"""
|
|
338
|
+
return _default_updater.get_local_version()
|
|
339
|
+
|
|
340
|
+
|
|
341
|
+
def update_now(force: bool = False, verbose: bool = True) -> bool:
|
|
342
|
+
"""
|
|
343
|
+
Force update data now
|
|
344
|
+
|
|
345
|
+
Convenience function using default updater instance.
|
|
346
|
+
|
|
347
|
+
:param force: Force download even if cache is recent
|
|
348
|
+
:param verbose: Print progress messages
|
|
349
|
+
:return: True if successful
|
|
350
|
+
"""
|
|
351
|
+
return _default_updater.download_latest(force=force, verbose=verbose)
|
|
352
|
+
|
|
353
|
+
|
|
354
|
+
# Export public API
|
|
355
|
+
__all__ = [
|
|
356
|
+
"DataUpdater",
|
|
357
|
+
"get_database_path",
|
|
358
|
+
"get_version",
|
|
359
|
+
"update_now",
|
|
360
|
+
"CACHE_DIR",
|
|
361
|
+
"AUTO_UPDATE_ENABLED",
|
|
362
|
+
]
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Identity generators for Mexican test data.
|
|
3
|
+
|
|
4
|
+
This module provides functions to generate complete Mexican identities
|
|
5
|
+
with realistic data for testing and development purposes.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from catalogmx.generators.identity import (
|
|
9
|
+
IdentityGenerator,
|
|
10
|
+
generate_identity,
|
|
11
|
+
generate_persona_fisica,
|
|
12
|
+
generate_persona_moral,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
__all__ = [
|
|
16
|
+
"IdentityGenerator",
|
|
17
|
+
"generate_identity",
|
|
18
|
+
"generate_persona_fisica",
|
|
19
|
+
"generate_persona_moral",
|
|
20
|
+
]
|