catalogmx 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- catalogmx/__init__.py +56 -0
- catalogmx/catalogs/__init__.py +5 -0
- catalogmx/catalogs/banxico/__init__.py +24 -0
- catalogmx/catalogs/banxico/banks.py +136 -0
- catalogmx/catalogs/banxico/codigos_plaza.py +287 -0
- catalogmx/catalogs/banxico/instituciones_financieras.py +338 -0
- catalogmx/catalogs/banxico/monedas_divisas.py +386 -0
- catalogmx/catalogs/banxico/udis.py +279 -0
- catalogmx/catalogs/ift/__init__.py +15 -0
- catalogmx/catalogs/ift/codigos_lada.py +426 -0
- catalogmx/catalogs/ift/operadores_moviles.py +315 -0
- catalogmx/catalogs/inegi/__init__.py +21 -0
- catalogmx/catalogs/inegi/localidades.py +207 -0
- catalogmx/catalogs/inegi/municipios.py +73 -0
- catalogmx/catalogs/inegi/municipios_completo.py +236 -0
- catalogmx/catalogs/inegi/states.py +148 -0
- catalogmx/catalogs/mexico/__init__.py +17 -0
- catalogmx/catalogs/mexico/hoy_no_circula.py +215 -0
- catalogmx/catalogs/mexico/placas_formatos.py +184 -0
- catalogmx/catalogs/mexico/salarios_minimos.py +156 -0
- catalogmx/catalogs/mexico/uma.py +207 -0
- catalogmx/catalogs/sat/__init__.py +13 -0
- catalogmx/catalogs/sat/carta_porte/__init__.py +19 -0
- catalogmx/catalogs/sat/carta_porte/aeropuertos.py +76 -0
- catalogmx/catalogs/sat/carta_porte/carreteras.py +59 -0
- catalogmx/catalogs/sat/carta_porte/config_autotransporte.py +54 -0
- catalogmx/catalogs/sat/carta_porte/material_peligroso.py +66 -0
- catalogmx/catalogs/sat/carta_porte/puertos_maritimos.py +63 -0
- catalogmx/catalogs/sat/carta_porte/tipo_embalaje.py +48 -0
- catalogmx/catalogs/sat/carta_porte/tipo_permiso.py +54 -0
- catalogmx/catalogs/sat/cfdi_4/__init__.py +42 -0
- catalogmx/catalogs/sat/cfdi_4/clave_prod_serv.py +383 -0
- catalogmx/catalogs/sat/cfdi_4/clave_unidad.py +298 -0
- catalogmx/catalogs/sat/cfdi_4/exportacion.py +45 -0
- catalogmx/catalogs/sat/cfdi_4/forma_pago.py +45 -0
- catalogmx/catalogs/sat/cfdi_4/impuesto.py +57 -0
- catalogmx/catalogs/sat/cfdi_4/meses.py +34 -0
- catalogmx/catalogs/sat/cfdi_4/metodo_pago.py +45 -0
- catalogmx/catalogs/sat/cfdi_4/objeto_imp.py +45 -0
- catalogmx/catalogs/sat/cfdi_4/periodicidad.py +34 -0
- catalogmx/catalogs/sat/cfdi_4/regimen_fiscal.py +57 -0
- catalogmx/catalogs/sat/cfdi_4/tasa_o_cuota.py +42 -0
- catalogmx/catalogs/sat/cfdi_4/tipo_comprobante.py +45 -0
- catalogmx/catalogs/sat/cfdi_4/tipo_factor.py +34 -0
- catalogmx/catalogs/sat/cfdi_4/tipo_relacion.py +45 -0
- catalogmx/catalogs/sat/cfdi_4/uso_cfdi.py +45 -0
- catalogmx/catalogs/sat/comercio_exterior/__init__.py +39 -0
- catalogmx/catalogs/sat/comercio_exterior/claves_pedimento.py +77 -0
- catalogmx/catalogs/sat/comercio_exterior/estados.py +122 -0
- catalogmx/catalogs/sat/comercio_exterior/incoterms.py +226 -0
- catalogmx/catalogs/sat/comercio_exterior/monedas.py +107 -0
- catalogmx/catalogs/sat/comercio_exterior/motivos_traslado.py +54 -0
- catalogmx/catalogs/sat/comercio_exterior/paises.py +88 -0
- catalogmx/catalogs/sat/comercio_exterior/registro_ident_trib.py +76 -0
- catalogmx/catalogs/sat/comercio_exterior/unidades_aduana.py +54 -0
- catalogmx/catalogs/sat/comercio_exterior/validator.py +212 -0
- catalogmx/catalogs/sat/nomina/__init__.py +19 -0
- catalogmx/catalogs/sat/nomina/banco.py +50 -0
- catalogmx/catalogs/sat/nomina/periodicidad_pago.py +48 -0
- catalogmx/catalogs/sat/nomina/riesgo_puesto.py +56 -0
- catalogmx/catalogs/sat/nomina/tipo_contrato.py +47 -0
- catalogmx/catalogs/sat/nomina/tipo_jornada.py +42 -0
- catalogmx/catalogs/sat/nomina/tipo_nomina.py +52 -0
- catalogmx/catalogs/sat/nomina/tipo_regimen.py +47 -0
- catalogmx/catalogs/sepomex/__init__.py +5 -0
- catalogmx/catalogs/sepomex/codigos_postales.py +184 -0
- catalogmx/cli.py +185 -0
- catalogmx/helpers.py +324 -0
- catalogmx/utils/text.py +55 -0
- catalogmx/validators/__init__.py +0 -0
- catalogmx/validators/clabe.py +233 -0
- catalogmx/validators/curp.py +623 -0
- catalogmx/validators/nss.py +255 -0
- catalogmx/validators/rfc.py +1004 -0
- catalogmx-0.3.0.dist-info/METADATA +644 -0
- catalogmx-0.3.0.dist-info/RECORD +81 -0
- catalogmx-0.3.0.dist-info/WHEEL +5 -0
- catalogmx-0.3.0.dist-info/entry_points.txt +2 -0
- catalogmx-0.3.0.dist-info/licenses/AUTHORS.rst +5 -0
- catalogmx-0.3.0.dist-info/licenses/LICENSE +19 -0
- catalogmx-0.3.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,1004 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
import datetime
|
|
3
|
+
import re
|
|
4
|
+
|
|
5
|
+
import unidecode
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class RFCGeneral:
|
|
9
|
+
"""
|
|
10
|
+
General Functions for RFC, Mexican Tax ID Code (Registro Federal de Contribuyentes),
|
|
11
|
+
Variables:
|
|
12
|
+
general_regex:
|
|
13
|
+
a regex upon which all valid RFC must validate.
|
|
14
|
+
All RFC are composed of 3 or 4 characters [A-Z&Ñ] (based on name or company),
|
|
15
|
+
a date in format YYMMDD (based on birth or foundation date),
|
|
16
|
+
2 characters [A-Z0-9] but not O, and a checksum composed of [0-9A] (homoclave)
|
|
17
|
+
date_regex:
|
|
18
|
+
a regex to capture the date element in the RFC and validate it.
|
|
19
|
+
homoclave_regex:
|
|
20
|
+
a regex to capture the homoclave element in the RFC and validate it.
|
|
21
|
+
homoclave_characters:
|
|
22
|
+
all possible characters in homoclave's first 2 characters
|
|
23
|
+
checksum_table:
|
|
24
|
+
Replace characters in RFC to calculate the checksum
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
general_regex = re.compile(r"[A-Z&Ñ]{3,4}[0-9]{6}[A-Z0-9]{2}[0-9A]")
|
|
28
|
+
date_regex = r"[A-Z&Ñ]{3,4}([0-9]{6})[A-Z0-9]{2}[0-9A]"
|
|
29
|
+
homoclave_regex = r"[A-Z&Ñ]{3,4}[0-9]{6}([A-Z0-9]{2})[0-9A]"
|
|
30
|
+
homoclave_characters = "ABCDEFGHIJKLMNPQRSTUVWXYZ0123456789"
|
|
31
|
+
|
|
32
|
+
checksum_table = {
|
|
33
|
+
"0": "00",
|
|
34
|
+
"1": "01",
|
|
35
|
+
"2": "02",
|
|
36
|
+
"3": "03",
|
|
37
|
+
"4": "04",
|
|
38
|
+
"5": "05",
|
|
39
|
+
"6": "06",
|
|
40
|
+
"7": "07",
|
|
41
|
+
"8": "08",
|
|
42
|
+
"9": "09",
|
|
43
|
+
"A": "10",
|
|
44
|
+
"B": "11",
|
|
45
|
+
"C": "12",
|
|
46
|
+
"D": "13",
|
|
47
|
+
"E": "14",
|
|
48
|
+
"F": "15",
|
|
49
|
+
"G": "16",
|
|
50
|
+
"H": "17",
|
|
51
|
+
"I": "18",
|
|
52
|
+
"J": "19",
|
|
53
|
+
"K": "20",
|
|
54
|
+
"L": "21",
|
|
55
|
+
"M": "22",
|
|
56
|
+
"N": "23",
|
|
57
|
+
"&": "24",
|
|
58
|
+
"O": "25",
|
|
59
|
+
"P": "26",
|
|
60
|
+
"Q": "27",
|
|
61
|
+
"R": "28",
|
|
62
|
+
"S": "29",
|
|
63
|
+
"T": "30",
|
|
64
|
+
"U": "31",
|
|
65
|
+
"V": "32",
|
|
66
|
+
"W": "33",
|
|
67
|
+
"X": "34",
|
|
68
|
+
"Y": "35",
|
|
69
|
+
"Z": "36",
|
|
70
|
+
" ": "37",
|
|
71
|
+
"Ñ": "38",
|
|
72
|
+
}
|
|
73
|
+
quotient_remaining_table = {
|
|
74
|
+
" ": "00",
|
|
75
|
+
"0": "00",
|
|
76
|
+
"1": "01",
|
|
77
|
+
"2": "02",
|
|
78
|
+
"3": "03",
|
|
79
|
+
"4": "04",
|
|
80
|
+
"5": "05",
|
|
81
|
+
"6": "06",
|
|
82
|
+
"7": "07",
|
|
83
|
+
"8": "08",
|
|
84
|
+
"9": "09",
|
|
85
|
+
"&": "10",
|
|
86
|
+
"A": "11",
|
|
87
|
+
"B": "12",
|
|
88
|
+
"C": "13",
|
|
89
|
+
"D": "14",
|
|
90
|
+
"E": "15",
|
|
91
|
+
"F": "16",
|
|
92
|
+
"G": "17",
|
|
93
|
+
"H": "18",
|
|
94
|
+
"I": "19",
|
|
95
|
+
"J": "21",
|
|
96
|
+
"K": "22",
|
|
97
|
+
"L": "23",
|
|
98
|
+
"M": "24",
|
|
99
|
+
"N": "25",
|
|
100
|
+
"O": "26",
|
|
101
|
+
"P": "27",
|
|
102
|
+
"Q": "28",
|
|
103
|
+
"R": "29",
|
|
104
|
+
"S": "32",
|
|
105
|
+
"T": "33",
|
|
106
|
+
"U": "34",
|
|
107
|
+
"V": "35",
|
|
108
|
+
"W": "36",
|
|
109
|
+
"X": "37",
|
|
110
|
+
"Y": "38",
|
|
111
|
+
"Z": "39",
|
|
112
|
+
"Ñ": "40",
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
homoclave_assign_table = [
|
|
116
|
+
"1",
|
|
117
|
+
"2",
|
|
118
|
+
"3",
|
|
119
|
+
"4",
|
|
120
|
+
"5",
|
|
121
|
+
"6",
|
|
122
|
+
"7",
|
|
123
|
+
"8",
|
|
124
|
+
"9",
|
|
125
|
+
"A",
|
|
126
|
+
"B",
|
|
127
|
+
"C",
|
|
128
|
+
"D",
|
|
129
|
+
"E",
|
|
130
|
+
"F",
|
|
131
|
+
"G",
|
|
132
|
+
"H",
|
|
133
|
+
"I",
|
|
134
|
+
"J",
|
|
135
|
+
"K",
|
|
136
|
+
"L",
|
|
137
|
+
"M",
|
|
138
|
+
"N",
|
|
139
|
+
"P",
|
|
140
|
+
"Q",
|
|
141
|
+
"R",
|
|
142
|
+
"S",
|
|
143
|
+
"T",
|
|
144
|
+
"U",
|
|
145
|
+
"V",
|
|
146
|
+
"W",
|
|
147
|
+
"X",
|
|
148
|
+
"Y",
|
|
149
|
+
"Z",
|
|
150
|
+
]
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
class RFCValidator(RFCGeneral):
|
|
154
|
+
"""
|
|
155
|
+
Loads an RFC, Mexican Tax ID Code (Registro Federal de Contribuyentes),
|
|
156
|
+
and provides functions to determine its validity.
|
|
157
|
+
|
|
158
|
+
"""
|
|
159
|
+
|
|
160
|
+
def __init__(self, rfc: str):
|
|
161
|
+
"""
|
|
162
|
+
|
|
163
|
+
:param rfc: The RFC code to be validated, if str then converted to unicode and then to uppercase and stripped.
|
|
164
|
+
:return: RFCValidator instance
|
|
165
|
+
"""
|
|
166
|
+
self.rfc = ""
|
|
167
|
+
if bool(rfc) and isinstance(rfc, str):
|
|
168
|
+
# if type(rfc) == str:
|
|
169
|
+
# rfc = rfc.decode('utf-8')
|
|
170
|
+
self.rfc = rfc.upper().strip()
|
|
171
|
+
self._general_validation = None
|
|
172
|
+
else:
|
|
173
|
+
self._general_validation = False
|
|
174
|
+
|
|
175
|
+
def validators(self, strict: bool = True) -> dict:
|
|
176
|
+
"""
|
|
177
|
+
Returns a dictionary with the validations.
|
|
178
|
+
:param strict: If False then checksum test won't be checked.
|
|
179
|
+
:return: A dictionary with the result of the validations.
|
|
180
|
+
"""
|
|
181
|
+
validations = {
|
|
182
|
+
"general_regex": self.validate_general_regex,
|
|
183
|
+
"date_format": self.validate_date,
|
|
184
|
+
"homoclave": self.validate_homoclave,
|
|
185
|
+
"checksum": self.validate_checksum,
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
if not strict:
|
|
189
|
+
validations = {
|
|
190
|
+
"general_regex": self.validate_general_regex,
|
|
191
|
+
"date_format": self.validate_date,
|
|
192
|
+
"homoclave": self.validate_homoclave,
|
|
193
|
+
# 'checksum': self.validate_checksum,
|
|
194
|
+
}
|
|
195
|
+
return {name: function() for name, function in validations.items()}
|
|
196
|
+
|
|
197
|
+
def validate(self, strict: bool = True) -> bool:
|
|
198
|
+
"""
|
|
199
|
+
Retrieves the result of the validations and verifies all of them passed.
|
|
200
|
+
:param strict: If True checksum won't be checked:
|
|
201
|
+
:return: True if the RFC is valid, False if the RFC is invalid.
|
|
202
|
+
"""
|
|
203
|
+
return False not in [result for name, result in self.validators(strict=strict).items()]
|
|
204
|
+
|
|
205
|
+
is_valid = validate
|
|
206
|
+
|
|
207
|
+
def validate_date(self) -> bool:
|
|
208
|
+
"""
|
|
209
|
+
Checks if the date element in the RFC code is valid
|
|
210
|
+
"""
|
|
211
|
+
if self.validate_general_regex():
|
|
212
|
+
date = re.findall(self.date_regex, self.rfc)
|
|
213
|
+
try:
|
|
214
|
+
if not date:
|
|
215
|
+
raise ValueError()
|
|
216
|
+
datetime.datetime.strptime(date[0], "%y%m%d")
|
|
217
|
+
return True
|
|
218
|
+
except ValueError:
|
|
219
|
+
return False
|
|
220
|
+
return False
|
|
221
|
+
|
|
222
|
+
def validate_homoclave(self) -> bool:
|
|
223
|
+
"""
|
|
224
|
+
Checks if the homoclave's first 2 characters are correct.
|
|
225
|
+
"""
|
|
226
|
+
if self.validate_general_regex():
|
|
227
|
+
homoclave = re.findall(self.homoclave_regex, self.rfc)
|
|
228
|
+
try:
|
|
229
|
+
if not homoclave:
|
|
230
|
+
raise ValueError()
|
|
231
|
+
for character in homoclave[0]:
|
|
232
|
+
if character in self.homoclave_characters:
|
|
233
|
+
pass
|
|
234
|
+
else:
|
|
235
|
+
raise ValueError()
|
|
236
|
+
return True
|
|
237
|
+
except ValueError:
|
|
238
|
+
return False
|
|
239
|
+
return False
|
|
240
|
+
|
|
241
|
+
def validate_general_regex(self) -> bool:
|
|
242
|
+
"""
|
|
243
|
+
Checks if length of the RFC and a match with the general Regex
|
|
244
|
+
"""
|
|
245
|
+
if self._general_validation is not None:
|
|
246
|
+
return self._general_validation
|
|
247
|
+
if len(self.rfc) not in (12, 13):
|
|
248
|
+
self._general_validation = False
|
|
249
|
+
return self._general_validation
|
|
250
|
+
if self.general_regex.match(self.rfc):
|
|
251
|
+
self._general_validation = True
|
|
252
|
+
else:
|
|
253
|
+
self._general_validation = False
|
|
254
|
+
return self._general_validation
|
|
255
|
+
|
|
256
|
+
def detect_fisica_moral(self) -> str:
|
|
257
|
+
"""
|
|
258
|
+
Returns a string based on the kind of RFC, (Persona Moral, Persona Física or Genérico)
|
|
259
|
+
"""
|
|
260
|
+
if self.validate_general_regex():
|
|
261
|
+
if self.is_generic():
|
|
262
|
+
return "Genérico"
|
|
263
|
+
if self.is_fisica():
|
|
264
|
+
return "Persona Física"
|
|
265
|
+
if self.is_moral():
|
|
266
|
+
return "Persona Moral"
|
|
267
|
+
else:
|
|
268
|
+
return "RFC Inválido"
|
|
269
|
+
|
|
270
|
+
def is_generic(self) -> bool:
|
|
271
|
+
"""
|
|
272
|
+
Checks if the RFC is a Generic one.
|
|
273
|
+
|
|
274
|
+
Generic RFC is used for non-specific recipients of Electronic Invoices.
|
|
275
|
+
XAXX010101000 for Mexican non-specific recipients
|
|
276
|
+
XEXX010101000 for Non-Mexican recipients, usually export invoices.
|
|
277
|
+
|
|
278
|
+
>>> RFCValidator('XAXX010101000').is_generic()
|
|
279
|
+
True
|
|
280
|
+
"""
|
|
281
|
+
if self.rfc in ("XAXX010101000", "XEXX010101000"):
|
|
282
|
+
return True
|
|
283
|
+
return False
|
|
284
|
+
|
|
285
|
+
def is_fisica(self) -> bool:
|
|
286
|
+
"""
|
|
287
|
+
Check if the code belongs to a "persona física" (individual)
|
|
288
|
+
"""
|
|
289
|
+
if self.validate_general_regex():
|
|
290
|
+
char4 = self.rfc[3]
|
|
291
|
+
if char4.isalpha() and not self.is_generic():
|
|
292
|
+
return True
|
|
293
|
+
else:
|
|
294
|
+
return False
|
|
295
|
+
raise ValueError("Invalid RFC")
|
|
296
|
+
|
|
297
|
+
def is_moral(self) -> bool:
|
|
298
|
+
"""
|
|
299
|
+
Check if the code belongs to "persona moral" (corporation or association)
|
|
300
|
+
"""
|
|
301
|
+
if self.validate_general_regex():
|
|
302
|
+
char4 = self.rfc[3]
|
|
303
|
+
if char4.isdigit():
|
|
304
|
+
return True
|
|
305
|
+
else:
|
|
306
|
+
return False
|
|
307
|
+
raise ValueError("Invalid RFC")
|
|
308
|
+
|
|
309
|
+
def validate_checksum(self) -> bool:
|
|
310
|
+
"""
|
|
311
|
+
Calculates the checksum of the RFC and verifies it's equal to the last character.
|
|
312
|
+
Generic RFCs' checksums are not calculated since they are incorrect (they're always 0)
|
|
313
|
+
In 99% of the RFC codes this is correct. In 1% of them for unknown reasons not clarified by the Tax Authority,
|
|
314
|
+
the checksum doesn't fit this checksum. Be aware that an RFC may have an "invalid" checksum but still be
|
|
315
|
+
valid if a "Cédula de Identificación Fiscal" is given.
|
|
316
|
+
"""
|
|
317
|
+
if self.validate_general_regex():
|
|
318
|
+
return (
|
|
319
|
+
self.rfc[-1] == self.calculate_last_digit(self.rfc, with_checksum=True)
|
|
320
|
+
or self.is_generic()
|
|
321
|
+
)
|
|
322
|
+
return False
|
|
323
|
+
|
|
324
|
+
@classmethod
|
|
325
|
+
def calculate_last_digit(cls, rfc: str, with_checksum: bool = True) -> str | bool:
|
|
326
|
+
"""
|
|
327
|
+
Calculates the checksum of an RFC.
|
|
328
|
+
|
|
329
|
+
The checksum is calculated with the first 12 digits of the RFC
|
|
330
|
+
If its length is 11 then an extra space is added at the beggining of the string.
|
|
331
|
+
"""
|
|
332
|
+
if bool(rfc) and isinstance(rfc, str):
|
|
333
|
+
str_rfc = rfc.strip().upper()
|
|
334
|
+
else:
|
|
335
|
+
return False
|
|
336
|
+
if with_checksum:
|
|
337
|
+
str_rfc = str_rfc[:-1]
|
|
338
|
+
assert len(str_rfc) in (11, 12)
|
|
339
|
+
if len(str_rfc) == 11:
|
|
340
|
+
str_rfc = str_rfc.rjust(12)
|
|
341
|
+
checksum = (
|
|
342
|
+
(int(cls.checksum_table[n]), index)
|
|
343
|
+
for index, n in zip(range(13, 1, -1), str_rfc, strict=False)
|
|
344
|
+
)
|
|
345
|
+
suma = sum(int(x * y) for x, y in checksum)
|
|
346
|
+
|
|
347
|
+
residual = suma % 11
|
|
348
|
+
|
|
349
|
+
if residual == 0:
|
|
350
|
+
return "0"
|
|
351
|
+
else:
|
|
352
|
+
residual = 11 - residual
|
|
353
|
+
if residual == 10:
|
|
354
|
+
return "A"
|
|
355
|
+
else:
|
|
356
|
+
return str(residual)
|
|
357
|
+
|
|
358
|
+
|
|
359
|
+
class RFCGeneratorUtils(RFCGeneral):
|
|
360
|
+
vocales = "AEIOU"
|
|
361
|
+
excluded_words_fisicas = ["DE", "LA", "LAS", "MC", "VON", "DEL", "LOS", "Y", "MAC", "VAN", "MI"]
|
|
362
|
+
cacophonic_words = [
|
|
363
|
+
"BUEI",
|
|
364
|
+
"BUEY",
|
|
365
|
+
"CACA",
|
|
366
|
+
"CACO",
|
|
367
|
+
"CAGA",
|
|
368
|
+
"CAGO",
|
|
369
|
+
"CAKA",
|
|
370
|
+
"COGE",
|
|
371
|
+
"COJA",
|
|
372
|
+
"COJE",
|
|
373
|
+
"COJI",
|
|
374
|
+
"COJO",
|
|
375
|
+
"CULO",
|
|
376
|
+
"FETO",
|
|
377
|
+
"GUEY",
|
|
378
|
+
"JOTO",
|
|
379
|
+
"KACA",
|
|
380
|
+
"KACO",
|
|
381
|
+
"KAGA",
|
|
382
|
+
"KAGO",
|
|
383
|
+
"KOGE",
|
|
384
|
+
"KOJO",
|
|
385
|
+
"KAKA",
|
|
386
|
+
"KULO",
|
|
387
|
+
"MAME",
|
|
388
|
+
"MAMO",
|
|
389
|
+
"MEAR",
|
|
390
|
+
"MEON",
|
|
391
|
+
"MION",
|
|
392
|
+
"MOCO",
|
|
393
|
+
"MULA",
|
|
394
|
+
"PEDA",
|
|
395
|
+
"PEDO",
|
|
396
|
+
"PENE",
|
|
397
|
+
"PUTA",
|
|
398
|
+
"PUTO",
|
|
399
|
+
"QULO",
|
|
400
|
+
"RATA",
|
|
401
|
+
"RUIN",
|
|
402
|
+
]
|
|
403
|
+
# Lista completa de palabras excluidas según documento SAT
|
|
404
|
+
excluded_words_morales = [
|
|
405
|
+
"EL",
|
|
406
|
+
"LA",
|
|
407
|
+
"DE",
|
|
408
|
+
"LOS",
|
|
409
|
+
"LAS",
|
|
410
|
+
"Y",
|
|
411
|
+
"DEL",
|
|
412
|
+
"MI",
|
|
413
|
+
"COMPAÑIA",
|
|
414
|
+
"COMPAÑÍA",
|
|
415
|
+
"CIA",
|
|
416
|
+
"CIA.",
|
|
417
|
+
"SOCIEDAD",
|
|
418
|
+
"SOC",
|
|
419
|
+
"SOC.",
|
|
420
|
+
"COOPERATIVA",
|
|
421
|
+
"COOP",
|
|
422
|
+
"COOP.",
|
|
423
|
+
"S.A.",
|
|
424
|
+
"SA",
|
|
425
|
+
"S.A",
|
|
426
|
+
"S. A.",
|
|
427
|
+
"S. A",
|
|
428
|
+
"S.A.B.",
|
|
429
|
+
"SAB",
|
|
430
|
+
"S.A.B",
|
|
431
|
+
"S. A. B.",
|
|
432
|
+
"S. A. B",
|
|
433
|
+
"S. DE R.L.",
|
|
434
|
+
"S DE RL",
|
|
435
|
+
"SRL",
|
|
436
|
+
"S.R.L.",
|
|
437
|
+
"S. R. L.",
|
|
438
|
+
"S. EN C.",
|
|
439
|
+
"S EN C",
|
|
440
|
+
"S.C.",
|
|
441
|
+
"SC",
|
|
442
|
+
"S. EN C. POR A.",
|
|
443
|
+
"S EN C POR A",
|
|
444
|
+
"S. EN N.C.",
|
|
445
|
+
"S EN NC",
|
|
446
|
+
"A.C.",
|
|
447
|
+
"AC",
|
|
448
|
+
"A. C.",
|
|
449
|
+
"A. EN P.",
|
|
450
|
+
"A EN P",
|
|
451
|
+
"S.C.L.",
|
|
452
|
+
"SCL",
|
|
453
|
+
"S.N.C.",
|
|
454
|
+
"SNC",
|
|
455
|
+
"C.V.",
|
|
456
|
+
"CV",
|
|
457
|
+
"C. V.",
|
|
458
|
+
"SA DE CV",
|
|
459
|
+
"S.A. DE C.V.",
|
|
460
|
+
"SA DE CV MI",
|
|
461
|
+
"S.A. DE C.V. MI",
|
|
462
|
+
"S.A.B. DE C.V.",
|
|
463
|
+
"SAB DE CV",
|
|
464
|
+
"S.A.B DE C.V",
|
|
465
|
+
"SRL DE CV",
|
|
466
|
+
"S.R.L. DE C.V.",
|
|
467
|
+
"SRL DE CV MI",
|
|
468
|
+
"SRL MI",
|
|
469
|
+
"THE",
|
|
470
|
+
"OF",
|
|
471
|
+
"COMPANY",
|
|
472
|
+
"AND",
|
|
473
|
+
"CO",
|
|
474
|
+
"CO.",
|
|
475
|
+
"MC",
|
|
476
|
+
"VON",
|
|
477
|
+
"MAC",
|
|
478
|
+
"VAN",
|
|
479
|
+
"PARA",
|
|
480
|
+
"POR",
|
|
481
|
+
"AL",
|
|
482
|
+
"E",
|
|
483
|
+
"EN",
|
|
484
|
+
"CON",
|
|
485
|
+
"SUS",
|
|
486
|
+
"A",
|
|
487
|
+
]
|
|
488
|
+
|
|
489
|
+
allowed_chars = list("ABCDEFGHIJKLMNÑOPQRSTUVWXYZ&")
|
|
490
|
+
|
|
491
|
+
# Tabla de conversión de números a texto
|
|
492
|
+
numeros_texto = {
|
|
493
|
+
"0": "CERO",
|
|
494
|
+
"1": "UNO",
|
|
495
|
+
"2": "DOS",
|
|
496
|
+
"3": "TRES",
|
|
497
|
+
"4": "CUATRO",
|
|
498
|
+
"5": "CINCO",
|
|
499
|
+
"6": "SEIS",
|
|
500
|
+
"7": "SIETE",
|
|
501
|
+
"8": "OCHO",
|
|
502
|
+
"9": "NUEVE",
|
|
503
|
+
"10": "DIEZ",
|
|
504
|
+
"11": "ONCE",
|
|
505
|
+
"12": "DOCE",
|
|
506
|
+
"13": "TRECE",
|
|
507
|
+
"14": "CATORCE",
|
|
508
|
+
"15": "QUINCE",
|
|
509
|
+
"16": "DIECISEIS",
|
|
510
|
+
"17": "DIECISIETE",
|
|
511
|
+
"18": "DIECIOCHO",
|
|
512
|
+
"19": "DIECINUEVE",
|
|
513
|
+
"20": "VEINTE",
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
# Tabla de números romanos a arábigos
|
|
517
|
+
numeros_romanos = {
|
|
518
|
+
"I": 1,
|
|
519
|
+
"II": 2,
|
|
520
|
+
"III": 3,
|
|
521
|
+
"IV": 4,
|
|
522
|
+
"V": 5,
|
|
523
|
+
"VI": 6,
|
|
524
|
+
"VII": 7,
|
|
525
|
+
"VIII": 8,
|
|
526
|
+
"IX": 9,
|
|
527
|
+
"X": 10,
|
|
528
|
+
"XI": 11,
|
|
529
|
+
"XII": 12,
|
|
530
|
+
"XIII": 13,
|
|
531
|
+
"XIV": 14,
|
|
532
|
+
"XV": 15,
|
|
533
|
+
"XVI": 16,
|
|
534
|
+
"XVII": 17,
|
|
535
|
+
"XVIII": 18,
|
|
536
|
+
"XIX": 19,
|
|
537
|
+
"XX": 20,
|
|
538
|
+
}
|
|
539
|
+
|
|
540
|
+
@classmethod
|
|
541
|
+
def convertir_numero_a_texto(cls, numero_str: str) -> str:
|
|
542
|
+
"""Convierte un número (arábigo o romano) a su representación en texto"""
|
|
543
|
+
numero_str = numero_str.strip().upper()
|
|
544
|
+
|
|
545
|
+
# Intentar como número romano
|
|
546
|
+
if numero_str in cls.numeros_romanos:
|
|
547
|
+
numero_arabigo = str(cls.numeros_romanos[numero_str])
|
|
548
|
+
if numero_arabigo in cls.numeros_texto:
|
|
549
|
+
return cls.numeros_texto[numero_arabigo]
|
|
550
|
+
|
|
551
|
+
# Intentar como número arábigo
|
|
552
|
+
if numero_str in cls.numeros_texto:
|
|
553
|
+
return cls.numeros_texto[numero_str]
|
|
554
|
+
|
|
555
|
+
# Si no está en la tabla, intentar convertir dígitos
|
|
556
|
+
try:
|
|
557
|
+
num = int(numero_str)
|
|
558
|
+
if 0 <= num <= 20:
|
|
559
|
+
return cls.numeros_texto[str(num)]
|
|
560
|
+
except ValueError:
|
|
561
|
+
pass
|
|
562
|
+
|
|
563
|
+
return numero_str # Si no se puede convertir, devolver original
|
|
564
|
+
|
|
565
|
+
@classmethod
|
|
566
|
+
def clean_name(cls, nombre: str) -> str:
|
|
567
|
+
return (
|
|
568
|
+
"".join(
|
|
569
|
+
char if char in cls.allowed_chars else unidecode.unidecode(char)
|
|
570
|
+
for char in " ".join(
|
|
571
|
+
elem for elem in nombre.split(" ") if elem not in cls.excluded_words_fisicas
|
|
572
|
+
)
|
|
573
|
+
.strip()
|
|
574
|
+
.upper()
|
|
575
|
+
)
|
|
576
|
+
.strip()
|
|
577
|
+
.upper()
|
|
578
|
+
)
|
|
579
|
+
|
|
580
|
+
@staticmethod
|
|
581
|
+
def name_adapter(name: str, non_strict: bool = False) -> str:
|
|
582
|
+
if isinstance(name, str):
|
|
583
|
+
# if isinstance(name, str):
|
|
584
|
+
# name = name.decode('utf-8')
|
|
585
|
+
return name.upper().strip()
|
|
586
|
+
elif non_strict:
|
|
587
|
+
if name is None or not name:
|
|
588
|
+
return ""
|
|
589
|
+
else:
|
|
590
|
+
raise ValueError
|
|
591
|
+
|
|
592
|
+
|
|
593
|
+
class RFCGeneratorFisicas(RFCGeneratorUtils):
|
|
594
|
+
def __init__(self, paterno: str, materno: str, nombre: str, fecha: datetime.date):
|
|
595
|
+
_dob = datetime.datetime(2000, 1, 1)
|
|
596
|
+
if paterno.strip() and nombre.strip() and isinstance(fecha, datetime.date):
|
|
597
|
+
self.paterno = paterno
|
|
598
|
+
self.materno = materno
|
|
599
|
+
self.nombre = nombre
|
|
600
|
+
self.dob = fecha
|
|
601
|
+
self._rfc = ""
|
|
602
|
+
else:
|
|
603
|
+
raise ValueError("Invalid Values")
|
|
604
|
+
|
|
605
|
+
@property
|
|
606
|
+
def paterno(self) -> str:
|
|
607
|
+
return self._paterno
|
|
608
|
+
|
|
609
|
+
@paterno.setter
|
|
610
|
+
def paterno(self, name: str):
|
|
611
|
+
self._paterno = self.name_adapter(name)
|
|
612
|
+
|
|
613
|
+
@property
|
|
614
|
+
def materno(self) -> str:
|
|
615
|
+
return self._materno
|
|
616
|
+
|
|
617
|
+
@materno.setter
|
|
618
|
+
def materno(self, name: str):
|
|
619
|
+
self._materno = self.name_adapter(name, non_strict=True)
|
|
620
|
+
|
|
621
|
+
@property
|
|
622
|
+
def nombre(self) -> str:
|
|
623
|
+
return self._nombre
|
|
624
|
+
|
|
625
|
+
@nombre.setter
|
|
626
|
+
def nombre(self, name: str):
|
|
627
|
+
self._nombre = self.name_adapter(name)
|
|
628
|
+
|
|
629
|
+
@property
|
|
630
|
+
def dob(self) -> datetime.date:
|
|
631
|
+
return self._dob
|
|
632
|
+
|
|
633
|
+
@dob.setter
|
|
634
|
+
def dob(self, date: datetime.date):
|
|
635
|
+
if isinstance(date, datetime.date):
|
|
636
|
+
self._dob = date
|
|
637
|
+
|
|
638
|
+
@property
|
|
639
|
+
def rfc(self) -> str:
|
|
640
|
+
if not self._rfc:
|
|
641
|
+
partial_rfc = self.generate_letters() + self.generate_date() + self.homoclave
|
|
642
|
+
self._rfc = partial_rfc + RFCValidator.calculate_last_digit(
|
|
643
|
+
partial_rfc, with_checksum=False
|
|
644
|
+
)
|
|
645
|
+
return self._rfc
|
|
646
|
+
|
|
647
|
+
def generate_date(self) -> str:
|
|
648
|
+
return self.dob.strftime("%y%m%d")
|
|
649
|
+
|
|
650
|
+
def generate_letters(self) -> str:
|
|
651
|
+
extra_letter = False
|
|
652
|
+
clave = []
|
|
653
|
+
clave.append(self.paterno_calculo[0])
|
|
654
|
+
second_value = list(
|
|
655
|
+
filter(lambda x: x >= 0, map(self.paterno_calculo[1:].find, self.vocales))
|
|
656
|
+
)
|
|
657
|
+
if len(second_value) > 0:
|
|
658
|
+
clave.append(self.paterno_calculo[min(second_value) + 1])
|
|
659
|
+
else:
|
|
660
|
+
extra_letter = True
|
|
661
|
+
if self.materno_calculo:
|
|
662
|
+
clave.append(self.materno_calculo[0])
|
|
663
|
+
else:
|
|
664
|
+
if extra_letter:
|
|
665
|
+
clave.append(self.paterno_calculo[1])
|
|
666
|
+
else:
|
|
667
|
+
extra_letter = True
|
|
668
|
+
clave.append(self.nombre_iniciales[0])
|
|
669
|
+
if extra_letter:
|
|
670
|
+
clave.append(self.nombre_iniciales[1])
|
|
671
|
+
clave = "".join(clave)
|
|
672
|
+
if clave in self.cacophonic_words:
|
|
673
|
+
clave = clave[:-1] + "X"
|
|
674
|
+
return clave
|
|
675
|
+
|
|
676
|
+
@property
|
|
677
|
+
def paterno_calculo(self) -> str:
|
|
678
|
+
return self.clean_name(self.paterno)
|
|
679
|
+
|
|
680
|
+
@property
|
|
681
|
+
def materno_calculo(self) -> str:
|
|
682
|
+
return self.clean_name(self.materno)
|
|
683
|
+
|
|
684
|
+
@property
|
|
685
|
+
def nombre_calculo(self) -> str:
|
|
686
|
+
return self.clean_name(self.nombre)
|
|
687
|
+
|
|
688
|
+
def nombre_iscompound(self) -> bool:
|
|
689
|
+
return len(self.nombre_calculo.split(" ")) > 1
|
|
690
|
+
|
|
691
|
+
@property
|
|
692
|
+
def nombre_iniciales(self) -> str:
|
|
693
|
+
if self.nombre_iscompound():
|
|
694
|
+
if self.nombre_calculo.split(" ")[0] in ("MARIA", "JOSE"):
|
|
695
|
+
return " ".join(self.nombre_calculo.split(" ")[1:])
|
|
696
|
+
else:
|
|
697
|
+
return self.nombre_calculo
|
|
698
|
+
else:
|
|
699
|
+
return self.nombre_calculo
|
|
700
|
+
|
|
701
|
+
@property
|
|
702
|
+
def nombre_completo(self) -> str:
|
|
703
|
+
return " ".join(
|
|
704
|
+
comp
|
|
705
|
+
for comp in (self.paterno_calculo, self.materno_calculo, self.nombre_calculo)
|
|
706
|
+
if comp
|
|
707
|
+
)
|
|
708
|
+
|
|
709
|
+
@property
|
|
710
|
+
def cadena_homoclave(self) -> str:
|
|
711
|
+
calc_str = [
|
|
712
|
+
"0",
|
|
713
|
+
]
|
|
714
|
+
for character in self.nombre_completo:
|
|
715
|
+
calc_str.append(self.quotient_remaining_table[character])
|
|
716
|
+
return "".join(calc_str)
|
|
717
|
+
|
|
718
|
+
@property
|
|
719
|
+
def homoclave(self) -> str:
|
|
720
|
+
cadena = self.cadena_homoclave
|
|
721
|
+
suma = (
|
|
722
|
+
sum(int(cadena[n : n + 2]) * int(cadena[n + 1]) for n in range(len(cadena) - 1)) % 1000
|
|
723
|
+
)
|
|
724
|
+
resultado = (suma // 34, suma % 34)
|
|
725
|
+
return self.homoclave_assign_table[resultado[0]] + self.homoclave_assign_table[resultado[1]]
|
|
726
|
+
|
|
727
|
+
|
|
728
|
+
class RFCGeneratorMorales(RFCGeneratorUtils):
|
|
729
|
+
"""
|
|
730
|
+
RFC Generator for Persona Moral (Legal Entities/Companies)
|
|
731
|
+
|
|
732
|
+
The RFC for a legal entity is composed of:
|
|
733
|
+
- 3 letters derived from the company name
|
|
734
|
+
- 6 digits for the incorporation/foundation date (YYMMDD)
|
|
735
|
+
- 2 alphanumeric characters for homoclave
|
|
736
|
+
- 1 checksum digit
|
|
737
|
+
Total: 12 characters
|
|
738
|
+
"""
|
|
739
|
+
|
|
740
|
+
def __init__(self, razon_social: str, fecha: datetime.date):
|
|
741
|
+
"""
|
|
742
|
+
Initialize RFC Generator for Persona Moral
|
|
743
|
+
|
|
744
|
+
:param razon_social: Company name (razón social)
|
|
745
|
+
:param fecha: Incorporation/foundation date
|
|
746
|
+
"""
|
|
747
|
+
if razon_social.strip() and isinstance(fecha, datetime.date):
|
|
748
|
+
self.razon_social = razon_social
|
|
749
|
+
self.fecha = fecha
|
|
750
|
+
self._rfc = ""
|
|
751
|
+
else:
|
|
752
|
+
raise ValueError(
|
|
753
|
+
"Invalid Values: razon_social must be non-empty and fecha must be a date"
|
|
754
|
+
)
|
|
755
|
+
|
|
756
|
+
@property
|
|
757
|
+
def razon_social(self) -> str:
|
|
758
|
+
return self._razon_social
|
|
759
|
+
|
|
760
|
+
@razon_social.setter
|
|
761
|
+
def razon_social(self, name: str):
|
|
762
|
+
if isinstance(name, str):
|
|
763
|
+
self._razon_social = name.upper().strip()
|
|
764
|
+
else:
|
|
765
|
+
raise ValueError("razon_social must be a string")
|
|
766
|
+
|
|
767
|
+
@property
|
|
768
|
+
def fecha(self) -> datetime.date:
|
|
769
|
+
return self._fecha
|
|
770
|
+
|
|
771
|
+
@fecha.setter
|
|
772
|
+
def fecha(self, date: datetime.date):
|
|
773
|
+
if isinstance(date, datetime.date):
|
|
774
|
+
self._fecha = date
|
|
775
|
+
else:
|
|
776
|
+
raise ValueError("fecha must be a datetime.date")
|
|
777
|
+
|
|
778
|
+
@property
|
|
779
|
+
def rfc(self) -> str:
|
|
780
|
+
"""Generate and return the complete RFC"""
|
|
781
|
+
if not self._rfc:
|
|
782
|
+
partial_rfc = self.generate_letters() + self.generate_date() + self.homoclave
|
|
783
|
+
self._rfc = partial_rfc + RFCValidator.calculate_last_digit(
|
|
784
|
+
partial_rfc, with_checksum=False
|
|
785
|
+
)
|
|
786
|
+
return self._rfc
|
|
787
|
+
|
|
788
|
+
def generate_date(self) -> str:
|
|
789
|
+
"""Generate date portion in YYMMDD format"""
|
|
790
|
+
return self.fecha.strftime("%y%m%d")
|
|
791
|
+
|
|
792
|
+
@property
|
|
793
|
+
def razon_social_calculo(self) -> str:
|
|
794
|
+
"""
|
|
795
|
+
Clean the company name according to SAT official rules:
|
|
796
|
+
- Remove excluded words FIRST (S.A., DE, LA, etc.)
|
|
797
|
+
- Remove special characters (&, @, %, #, !, $, ", -, /, +, (, ), etc.)
|
|
798
|
+
- Substitute Ñ with X
|
|
799
|
+
- Handle initials (F.A.Z. → each letter is a word)
|
|
800
|
+
- Convert numbers (arabic and roman) to text
|
|
801
|
+
- Handle consonant compounds (CH → C, LL → L)
|
|
802
|
+
"""
|
|
803
|
+
razon = self.razon_social.upper().strip()
|
|
804
|
+
|
|
805
|
+
# Step 1: First pass - remove excluded words with punctuation patterns
|
|
806
|
+
# This handles cases like "S.A.", "S. A.", etc.
|
|
807
|
+
# Process longer words first to avoid partial matches (e.g., S.A.B. before S.A.)
|
|
808
|
+
for excluded in sorted(self.excluded_words_morales, key=len, reverse=True):
|
|
809
|
+
# Try exact match
|
|
810
|
+
razon = razon.replace(" " + excluded + " ", " ")
|
|
811
|
+
razon = razon.replace(" " + excluded + ",", " ")
|
|
812
|
+
razon = razon.replace(" " + excluded + ".", " ")
|
|
813
|
+
# Try at beginning
|
|
814
|
+
if razon.startswith(excluded + " "):
|
|
815
|
+
razon = razon[len(excluded) + 1 :]
|
|
816
|
+
# Try at end
|
|
817
|
+
if razon.endswith(" " + excluded):
|
|
818
|
+
razon = razon[: -len(excluded) - 1]
|
|
819
|
+
if razon.endswith("," + excluded):
|
|
820
|
+
razon = razon[: -len(excluded) - 1]
|
|
821
|
+
|
|
822
|
+
# Step 2: Remove special characters except spaces, letters, numbers, and dots
|
|
823
|
+
# Caracteres especiales a eliminar según SAT: &, @, %, #, !, $, ", -, /, +, (, ), etc.
|
|
824
|
+
import string
|
|
825
|
+
|
|
826
|
+
allowed_for_processing = string.ascii_uppercase + string.digits + " .ÑÁÉÍÓÚÜñáéíóúü"
|
|
827
|
+
razon_limpia = "".join(c if c in allowed_for_processing else " " for c in razon)
|
|
828
|
+
|
|
829
|
+
# Step 3: Substitute Ñ with X
|
|
830
|
+
razon_limpia = razon_limpia.replace("Ñ", "X").replace("ñ", "X")
|
|
831
|
+
|
|
832
|
+
# Step 4: Handle initials (F.A.Z. → F A Z)
|
|
833
|
+
# Si hay letras separadas por puntos, expandirlas como palabras individuales
|
|
834
|
+
# Marcar cuáles son iniciales para no filtrarlas después
|
|
835
|
+
words_temp = []
|
|
836
|
+
is_initial = [] # Track which words are initials
|
|
837
|
+
for word in razon_limpia.split():
|
|
838
|
+
word = word.strip()
|
|
839
|
+
if not word:
|
|
840
|
+
continue
|
|
841
|
+
# Detectar patrón de iniciales: letra.letra.letra o similar
|
|
842
|
+
if "." in word and len(word) <= 15: # Máximo razonable para iniciales
|
|
843
|
+
# Separar por puntos y filtrar vacíos
|
|
844
|
+
parts = [c.strip() for c in word.split(".") if c.strip()]
|
|
845
|
+
# Si todas las partes son de 1-2 caracteres, son iniciales
|
|
846
|
+
if parts and all(len(p) <= 2 and p.isalpha() for p in parts):
|
|
847
|
+
words_temp.extend(parts)
|
|
848
|
+
is_initial.extend([True] * len(parts)) # Mark all as initials
|
|
849
|
+
continue
|
|
850
|
+
# Quitar puntos finales de palabras normales
|
|
851
|
+
word = word.rstrip(".")
|
|
852
|
+
if word:
|
|
853
|
+
words_temp.append(word)
|
|
854
|
+
is_initial.append(False)
|
|
855
|
+
|
|
856
|
+
# Step 5: Convert numbers to text
|
|
857
|
+
words_converted = []
|
|
858
|
+
is_initial_converted = []
|
|
859
|
+
for word, is_init in zip(words_temp, is_initial, strict=False):
|
|
860
|
+
# Verificar si es un número (arábigo o romano)
|
|
861
|
+
if word.isdigit() or word in self.numeros_romanos:
|
|
862
|
+
converted = self.convertir_numero_a_texto(word)
|
|
863
|
+
words_converted.append(converted)
|
|
864
|
+
is_initial_converted.append(is_init)
|
|
865
|
+
else:
|
|
866
|
+
words_converted.append(word)
|
|
867
|
+
is_initial_converted.append(is_init)
|
|
868
|
+
|
|
869
|
+
# Step 6: Second pass - Remove excluded words (but keep initials)
|
|
870
|
+
filtered_words = []
|
|
871
|
+
for word, is_init in zip(words_converted, is_initial_converted, strict=False):
|
|
872
|
+
word_clean = word.strip().upper()
|
|
873
|
+
if not word_clean:
|
|
874
|
+
continue
|
|
875
|
+
# Keep initials even if they match excluded words
|
|
876
|
+
if is_init:
|
|
877
|
+
filtered_words.append(word_clean)
|
|
878
|
+
elif word_clean not in self.excluded_words_morales:
|
|
879
|
+
filtered_words.append(word_clean)
|
|
880
|
+
|
|
881
|
+
# Step 7: Clean remaining special characters and accents
|
|
882
|
+
cleaned = " ".join(filtered_words)
|
|
883
|
+
result = ""
|
|
884
|
+
for char in cleaned:
|
|
885
|
+
if char in self.allowed_chars:
|
|
886
|
+
result += char
|
|
887
|
+
elif char == " ":
|
|
888
|
+
result += " "
|
|
889
|
+
else:
|
|
890
|
+
# Use unidecode for accented characters
|
|
891
|
+
decoded = unidecode.unidecode(char)
|
|
892
|
+
if decoded in self.allowed_chars:
|
|
893
|
+
result += decoded
|
|
894
|
+
|
|
895
|
+
result = result.strip().upper()
|
|
896
|
+
|
|
897
|
+
# Step 8: Handle consonant compounds (CH → C, LL → L) at the beginning of words
|
|
898
|
+
words_final = []
|
|
899
|
+
for word in result.split():
|
|
900
|
+
if word.startswith("CH"):
|
|
901
|
+
word = "C" + word[2:]
|
|
902
|
+
elif word.startswith("LL"):
|
|
903
|
+
word = "L" + word[2:]
|
|
904
|
+
words_final.append(word)
|
|
905
|
+
|
|
906
|
+
return " ".join(words_final)
|
|
907
|
+
|
|
908
|
+
def generate_letters(self) -> str:
|
|
909
|
+
"""
|
|
910
|
+
Generate the 3-letter code from company name according to SAT rules:
|
|
911
|
+
|
|
912
|
+
1 word: First 3 letters (or pad with X if less than 3)
|
|
913
|
+
2 words: 1st letter of 1st word + 1st letter of 2nd word + 2nd letter of 1st word
|
|
914
|
+
3+ words: 1st letter of each of the first 3 words
|
|
915
|
+
|
|
916
|
+
Note: According to SAT specification for 2 words, it should be:
|
|
917
|
+
- First letter of first word
|
|
918
|
+
- First letter of second word
|
|
919
|
+
- Second letter of first word (or first two letters of second word)
|
|
920
|
+
|
|
921
|
+
But empirical evidence shows it's actually:
|
|
922
|
+
- First letter of first word
|
|
923
|
+
- First vowel of first word (after first letter)
|
|
924
|
+
- First letter of second word
|
|
925
|
+
"""
|
|
926
|
+
cleaned_name = self.razon_social_calculo
|
|
927
|
+
|
|
928
|
+
if not cleaned_name:
|
|
929
|
+
raise ValueError("Company name is empty after cleaning")
|
|
930
|
+
|
|
931
|
+
words = cleaned_name.split()
|
|
932
|
+
|
|
933
|
+
if not words:
|
|
934
|
+
raise ValueError("No valid words in company name")
|
|
935
|
+
|
|
936
|
+
clave = []
|
|
937
|
+
|
|
938
|
+
if len(words) == 1:
|
|
939
|
+
# Single word: First 3 letters
|
|
940
|
+
word = words[0]
|
|
941
|
+
clave.append(word[0] if len(word) > 0 else "X")
|
|
942
|
+
clave.append(word[1] if len(word) > 1 else "X")
|
|
943
|
+
clave.append(word[2] if len(word) > 2 else "X")
|
|
944
|
+
elif len(words) == 2:
|
|
945
|
+
# Two words: Initial of first word, first two letters of second word
|
|
946
|
+
# According to SAT specification: "se toma la inicial de la primera y las dos primeras letras de la segunda"
|
|
947
|
+
clave.append(words[0][0]) # First letter of first word
|
|
948
|
+
clave.append(words[1][0]) # First letter of second word
|
|
949
|
+
clave.append(words[1][1] if len(words[1]) > 1 else "X") # Second letter of second word
|
|
950
|
+
else:
|
|
951
|
+
# Three or more words: First letter of each of the first three words
|
|
952
|
+
clave.append(words[0][0])
|
|
953
|
+
clave.append(words[1][0])
|
|
954
|
+
clave.append(words[2][0])
|
|
955
|
+
|
|
956
|
+
result = "".join(clave)
|
|
957
|
+
|
|
958
|
+
# Check for cacophonic words and replace last character with 'X'
|
|
959
|
+
if result in self.cacophonic_words:
|
|
960
|
+
result = result[:-1] + "X"
|
|
961
|
+
|
|
962
|
+
return result
|
|
963
|
+
|
|
964
|
+
@property
|
|
965
|
+
def nombre_completo(self) -> str:
|
|
966
|
+
"""Return the complete cleaned company name for homoclave calculation"""
|
|
967
|
+
return self.razon_social_calculo
|
|
968
|
+
|
|
969
|
+
@property
|
|
970
|
+
def cadena_homoclave(self) -> str:
|
|
971
|
+
"""Generate the string used for homoclave calculation"""
|
|
972
|
+
calc_str = ["0"]
|
|
973
|
+
for character in self.nombre_completo:
|
|
974
|
+
if character in self.quotient_remaining_table:
|
|
975
|
+
calc_str.append(self.quotient_remaining_table[character])
|
|
976
|
+
elif character == " ":
|
|
977
|
+
calc_str.append(self.quotient_remaining_table[" "])
|
|
978
|
+
return "".join(calc_str)
|
|
979
|
+
|
|
980
|
+
@property
|
|
981
|
+
def homoclave(self) -> str:
|
|
982
|
+
"""Calculate the 2-character homoclave"""
|
|
983
|
+
cadena = self.cadena_homoclave
|
|
984
|
+
suma = (
|
|
985
|
+
sum(int(cadena[n : n + 2]) * int(cadena[n + 1]) for n in range(len(cadena) - 1)) % 1000
|
|
986
|
+
)
|
|
987
|
+
resultado = (suma // 34, suma % 34)
|
|
988
|
+
return self.homoclave_assign_table[resultado[0]] + self.homoclave_assign_table[resultado[1]]
|
|
989
|
+
|
|
990
|
+
|
|
991
|
+
class RFCGenerator:
|
|
992
|
+
"""
|
|
993
|
+
Factory class to generate RFC for either Persona Física or Persona Moral
|
|
994
|
+
"""
|
|
995
|
+
|
|
996
|
+
@staticmethod
|
|
997
|
+
def generate_fisica(nombre: str, paterno: str, materno: str, fecha: datetime.date) -> str:
|
|
998
|
+
"""Generate RFC for Persona Física (Individual)"""
|
|
999
|
+
return RFCGeneratorFisicas(nombre=nombre, paterno=paterno, materno=materno, fecha=fecha).rfc
|
|
1000
|
+
|
|
1001
|
+
@staticmethod
|
|
1002
|
+
def generate_moral(razon_social: str, fecha: datetime.date) -> str:
|
|
1003
|
+
"""Generate RFC for Persona Moral (Legal Entity/Company)"""
|
|
1004
|
+
return RFCGeneratorMorales(razon_social=razon_social, fecha=fecha).rfc
|