catalogmx 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. catalogmx/__init__.py +56 -0
  2. catalogmx/catalogs/__init__.py +5 -0
  3. catalogmx/catalogs/banxico/__init__.py +24 -0
  4. catalogmx/catalogs/banxico/banks.py +136 -0
  5. catalogmx/catalogs/banxico/codigos_plaza.py +287 -0
  6. catalogmx/catalogs/banxico/instituciones_financieras.py +338 -0
  7. catalogmx/catalogs/banxico/monedas_divisas.py +386 -0
  8. catalogmx/catalogs/banxico/udis.py +279 -0
  9. catalogmx/catalogs/ift/__init__.py +15 -0
  10. catalogmx/catalogs/ift/codigos_lada.py +426 -0
  11. catalogmx/catalogs/ift/operadores_moviles.py +315 -0
  12. catalogmx/catalogs/inegi/__init__.py +21 -0
  13. catalogmx/catalogs/inegi/localidades.py +207 -0
  14. catalogmx/catalogs/inegi/municipios.py +73 -0
  15. catalogmx/catalogs/inegi/municipios_completo.py +236 -0
  16. catalogmx/catalogs/inegi/states.py +148 -0
  17. catalogmx/catalogs/mexico/__init__.py +17 -0
  18. catalogmx/catalogs/mexico/hoy_no_circula.py +215 -0
  19. catalogmx/catalogs/mexico/placas_formatos.py +184 -0
  20. catalogmx/catalogs/mexico/salarios_minimos.py +156 -0
  21. catalogmx/catalogs/mexico/uma.py +207 -0
  22. catalogmx/catalogs/sat/__init__.py +13 -0
  23. catalogmx/catalogs/sat/carta_porte/__init__.py +19 -0
  24. catalogmx/catalogs/sat/carta_porte/aeropuertos.py +76 -0
  25. catalogmx/catalogs/sat/carta_porte/carreteras.py +59 -0
  26. catalogmx/catalogs/sat/carta_porte/config_autotransporte.py +54 -0
  27. catalogmx/catalogs/sat/carta_porte/material_peligroso.py +66 -0
  28. catalogmx/catalogs/sat/carta_porte/puertos_maritimos.py +63 -0
  29. catalogmx/catalogs/sat/carta_porte/tipo_embalaje.py +48 -0
  30. catalogmx/catalogs/sat/carta_porte/tipo_permiso.py +54 -0
  31. catalogmx/catalogs/sat/cfdi_4/__init__.py +42 -0
  32. catalogmx/catalogs/sat/cfdi_4/clave_prod_serv.py +383 -0
  33. catalogmx/catalogs/sat/cfdi_4/clave_unidad.py +298 -0
  34. catalogmx/catalogs/sat/cfdi_4/exportacion.py +45 -0
  35. catalogmx/catalogs/sat/cfdi_4/forma_pago.py +45 -0
  36. catalogmx/catalogs/sat/cfdi_4/impuesto.py +57 -0
  37. catalogmx/catalogs/sat/cfdi_4/meses.py +34 -0
  38. catalogmx/catalogs/sat/cfdi_4/metodo_pago.py +45 -0
  39. catalogmx/catalogs/sat/cfdi_4/objeto_imp.py +45 -0
  40. catalogmx/catalogs/sat/cfdi_4/periodicidad.py +34 -0
  41. catalogmx/catalogs/sat/cfdi_4/regimen_fiscal.py +57 -0
  42. catalogmx/catalogs/sat/cfdi_4/tasa_o_cuota.py +42 -0
  43. catalogmx/catalogs/sat/cfdi_4/tipo_comprobante.py +45 -0
  44. catalogmx/catalogs/sat/cfdi_4/tipo_factor.py +34 -0
  45. catalogmx/catalogs/sat/cfdi_4/tipo_relacion.py +45 -0
  46. catalogmx/catalogs/sat/cfdi_4/uso_cfdi.py +45 -0
  47. catalogmx/catalogs/sat/comercio_exterior/__init__.py +39 -0
  48. catalogmx/catalogs/sat/comercio_exterior/claves_pedimento.py +77 -0
  49. catalogmx/catalogs/sat/comercio_exterior/estados.py +122 -0
  50. catalogmx/catalogs/sat/comercio_exterior/incoterms.py +226 -0
  51. catalogmx/catalogs/sat/comercio_exterior/monedas.py +107 -0
  52. catalogmx/catalogs/sat/comercio_exterior/motivos_traslado.py +54 -0
  53. catalogmx/catalogs/sat/comercio_exterior/paises.py +88 -0
  54. catalogmx/catalogs/sat/comercio_exterior/registro_ident_trib.py +76 -0
  55. catalogmx/catalogs/sat/comercio_exterior/unidades_aduana.py +54 -0
  56. catalogmx/catalogs/sat/comercio_exterior/validator.py +212 -0
  57. catalogmx/catalogs/sat/nomina/__init__.py +19 -0
  58. catalogmx/catalogs/sat/nomina/banco.py +50 -0
  59. catalogmx/catalogs/sat/nomina/periodicidad_pago.py +48 -0
  60. catalogmx/catalogs/sat/nomina/riesgo_puesto.py +56 -0
  61. catalogmx/catalogs/sat/nomina/tipo_contrato.py +47 -0
  62. catalogmx/catalogs/sat/nomina/tipo_jornada.py +42 -0
  63. catalogmx/catalogs/sat/nomina/tipo_nomina.py +52 -0
  64. catalogmx/catalogs/sat/nomina/tipo_regimen.py +47 -0
  65. catalogmx/catalogs/sepomex/__init__.py +5 -0
  66. catalogmx/catalogs/sepomex/codigos_postales.py +184 -0
  67. catalogmx/cli.py +185 -0
  68. catalogmx/helpers.py +324 -0
  69. catalogmx/utils/text.py +55 -0
  70. catalogmx/validators/__init__.py +0 -0
  71. catalogmx/validators/clabe.py +233 -0
  72. catalogmx/validators/curp.py +623 -0
  73. catalogmx/validators/nss.py +255 -0
  74. catalogmx/validators/rfc.py +1004 -0
  75. catalogmx-0.3.0.dist-info/METADATA +644 -0
  76. catalogmx-0.3.0.dist-info/RECORD +81 -0
  77. catalogmx-0.3.0.dist-info/WHEEL +5 -0
  78. catalogmx-0.3.0.dist-info/entry_points.txt +2 -0
  79. catalogmx-0.3.0.dist-info/licenses/AUTHORS.rst +5 -0
  80. catalogmx-0.3.0.dist-info/licenses/LICENSE +19 -0
  81. catalogmx-0.3.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1004 @@
1
+ #!/usr/bin/env python3
2
+ import datetime
3
+ import re
4
+
5
+ import unidecode
6
+
7
+
8
+ class RFCGeneral:
9
+ """
10
+ General Functions for RFC, Mexican Tax ID Code (Registro Federal de Contribuyentes),
11
+ Variables:
12
+ general_regex:
13
+ a regex upon which all valid RFC must validate.
14
+ All RFC are composed of 3 or 4 characters [A-Z&Ñ] (based on name or company),
15
+ a date in format YYMMDD (based on birth or foundation date),
16
+ 2 characters [A-Z0-9] but not O, and a checksum composed of [0-9A] (homoclave)
17
+ date_regex:
18
+ a regex to capture the date element in the RFC and validate it.
19
+ homoclave_regex:
20
+ a regex to capture the homoclave element in the RFC and validate it.
21
+ homoclave_characters:
22
+ all possible characters in homoclave's first 2 characters
23
+ checksum_table:
24
+ Replace characters in RFC to calculate the checksum
25
+ """
26
+
27
+ general_regex = re.compile(r"[A-Z&Ñ]{3,4}[0-9]{6}[A-Z0-9]{2}[0-9A]")
28
+ date_regex = r"[A-Z&Ñ]{3,4}([0-9]{6})[A-Z0-9]{2}[0-9A]"
29
+ homoclave_regex = r"[A-Z&Ñ]{3,4}[0-9]{6}([A-Z0-9]{2})[0-9A]"
30
+ homoclave_characters = "ABCDEFGHIJKLMNPQRSTUVWXYZ0123456789"
31
+
32
+ checksum_table = {
33
+ "0": "00",
34
+ "1": "01",
35
+ "2": "02",
36
+ "3": "03",
37
+ "4": "04",
38
+ "5": "05",
39
+ "6": "06",
40
+ "7": "07",
41
+ "8": "08",
42
+ "9": "09",
43
+ "A": "10",
44
+ "B": "11",
45
+ "C": "12",
46
+ "D": "13",
47
+ "E": "14",
48
+ "F": "15",
49
+ "G": "16",
50
+ "H": "17",
51
+ "I": "18",
52
+ "J": "19",
53
+ "K": "20",
54
+ "L": "21",
55
+ "M": "22",
56
+ "N": "23",
57
+ "&": "24",
58
+ "O": "25",
59
+ "P": "26",
60
+ "Q": "27",
61
+ "R": "28",
62
+ "S": "29",
63
+ "T": "30",
64
+ "U": "31",
65
+ "V": "32",
66
+ "W": "33",
67
+ "X": "34",
68
+ "Y": "35",
69
+ "Z": "36",
70
+ " ": "37",
71
+ "Ñ": "38",
72
+ }
73
+ quotient_remaining_table = {
74
+ " ": "00",
75
+ "0": "00",
76
+ "1": "01",
77
+ "2": "02",
78
+ "3": "03",
79
+ "4": "04",
80
+ "5": "05",
81
+ "6": "06",
82
+ "7": "07",
83
+ "8": "08",
84
+ "9": "09",
85
+ "&": "10",
86
+ "A": "11",
87
+ "B": "12",
88
+ "C": "13",
89
+ "D": "14",
90
+ "E": "15",
91
+ "F": "16",
92
+ "G": "17",
93
+ "H": "18",
94
+ "I": "19",
95
+ "J": "21",
96
+ "K": "22",
97
+ "L": "23",
98
+ "M": "24",
99
+ "N": "25",
100
+ "O": "26",
101
+ "P": "27",
102
+ "Q": "28",
103
+ "R": "29",
104
+ "S": "32",
105
+ "T": "33",
106
+ "U": "34",
107
+ "V": "35",
108
+ "W": "36",
109
+ "X": "37",
110
+ "Y": "38",
111
+ "Z": "39",
112
+ "Ñ": "40",
113
+ }
114
+
115
+ homoclave_assign_table = [
116
+ "1",
117
+ "2",
118
+ "3",
119
+ "4",
120
+ "5",
121
+ "6",
122
+ "7",
123
+ "8",
124
+ "9",
125
+ "A",
126
+ "B",
127
+ "C",
128
+ "D",
129
+ "E",
130
+ "F",
131
+ "G",
132
+ "H",
133
+ "I",
134
+ "J",
135
+ "K",
136
+ "L",
137
+ "M",
138
+ "N",
139
+ "P",
140
+ "Q",
141
+ "R",
142
+ "S",
143
+ "T",
144
+ "U",
145
+ "V",
146
+ "W",
147
+ "X",
148
+ "Y",
149
+ "Z",
150
+ ]
151
+
152
+
153
+ class RFCValidator(RFCGeneral):
154
+ """
155
+ Loads an RFC, Mexican Tax ID Code (Registro Federal de Contribuyentes),
156
+ and provides functions to determine its validity.
157
+
158
+ """
159
+
160
+ def __init__(self, rfc: str):
161
+ """
162
+
163
+ :param rfc: The RFC code to be validated, if str then converted to unicode and then to uppercase and stripped.
164
+ :return: RFCValidator instance
165
+ """
166
+ self.rfc = ""
167
+ if bool(rfc) and isinstance(rfc, str):
168
+ # if type(rfc) == str:
169
+ # rfc = rfc.decode('utf-8')
170
+ self.rfc = rfc.upper().strip()
171
+ self._general_validation = None
172
+ else:
173
+ self._general_validation = False
174
+
175
+ def validators(self, strict: bool = True) -> dict:
176
+ """
177
+ Returns a dictionary with the validations.
178
+ :param strict: If False then checksum test won't be checked.
179
+ :return: A dictionary with the result of the validations.
180
+ """
181
+ validations = {
182
+ "general_regex": self.validate_general_regex,
183
+ "date_format": self.validate_date,
184
+ "homoclave": self.validate_homoclave,
185
+ "checksum": self.validate_checksum,
186
+ }
187
+
188
+ if not strict:
189
+ validations = {
190
+ "general_regex": self.validate_general_regex,
191
+ "date_format": self.validate_date,
192
+ "homoclave": self.validate_homoclave,
193
+ # 'checksum': self.validate_checksum,
194
+ }
195
+ return {name: function() for name, function in validations.items()}
196
+
197
+ def validate(self, strict: bool = True) -> bool:
198
+ """
199
+ Retrieves the result of the validations and verifies all of them passed.
200
+ :param strict: If True checksum won't be checked:
201
+ :return: True if the RFC is valid, False if the RFC is invalid.
202
+ """
203
+ return False not in [result for name, result in self.validators(strict=strict).items()]
204
+
205
+ is_valid = validate
206
+
207
+ def validate_date(self) -> bool:
208
+ """
209
+ Checks if the date element in the RFC code is valid
210
+ """
211
+ if self.validate_general_regex():
212
+ date = re.findall(self.date_regex, self.rfc)
213
+ try:
214
+ if not date:
215
+ raise ValueError()
216
+ datetime.datetime.strptime(date[0], "%y%m%d")
217
+ return True
218
+ except ValueError:
219
+ return False
220
+ return False
221
+
222
+ def validate_homoclave(self) -> bool:
223
+ """
224
+ Checks if the homoclave's first 2 characters are correct.
225
+ """
226
+ if self.validate_general_regex():
227
+ homoclave = re.findall(self.homoclave_regex, self.rfc)
228
+ try:
229
+ if not homoclave:
230
+ raise ValueError()
231
+ for character in homoclave[0]:
232
+ if character in self.homoclave_characters:
233
+ pass
234
+ else:
235
+ raise ValueError()
236
+ return True
237
+ except ValueError:
238
+ return False
239
+ return False
240
+
241
+ def validate_general_regex(self) -> bool:
242
+ """
243
+ Checks if length of the RFC and a match with the general Regex
244
+ """
245
+ if self._general_validation is not None:
246
+ return self._general_validation
247
+ if len(self.rfc) not in (12, 13):
248
+ self._general_validation = False
249
+ return self._general_validation
250
+ if self.general_regex.match(self.rfc):
251
+ self._general_validation = True
252
+ else:
253
+ self._general_validation = False
254
+ return self._general_validation
255
+
256
+ def detect_fisica_moral(self) -> str:
257
+ """
258
+ Returns a string based on the kind of RFC, (Persona Moral, Persona Física or Genérico)
259
+ """
260
+ if self.validate_general_regex():
261
+ if self.is_generic():
262
+ return "Genérico"
263
+ if self.is_fisica():
264
+ return "Persona Física"
265
+ if self.is_moral():
266
+ return "Persona Moral"
267
+ else:
268
+ return "RFC Inválido"
269
+
270
+ def is_generic(self) -> bool:
271
+ """
272
+ Checks if the RFC is a Generic one.
273
+
274
+ Generic RFC is used for non-specific recipients of Electronic Invoices.
275
+ XAXX010101000 for Mexican non-specific recipients
276
+ XEXX010101000 for Non-Mexican recipients, usually export invoices.
277
+
278
+ >>> RFCValidator('XAXX010101000').is_generic()
279
+ True
280
+ """
281
+ if self.rfc in ("XAXX010101000", "XEXX010101000"):
282
+ return True
283
+ return False
284
+
285
+ def is_fisica(self) -> bool:
286
+ """
287
+ Check if the code belongs to a "persona física" (individual)
288
+ """
289
+ if self.validate_general_regex():
290
+ char4 = self.rfc[3]
291
+ if char4.isalpha() and not self.is_generic():
292
+ return True
293
+ else:
294
+ return False
295
+ raise ValueError("Invalid RFC")
296
+
297
+ def is_moral(self) -> bool:
298
+ """
299
+ Check if the code belongs to "persona moral" (corporation or association)
300
+ """
301
+ if self.validate_general_regex():
302
+ char4 = self.rfc[3]
303
+ if char4.isdigit():
304
+ return True
305
+ else:
306
+ return False
307
+ raise ValueError("Invalid RFC")
308
+
309
+ def validate_checksum(self) -> bool:
310
+ """
311
+ Calculates the checksum of the RFC and verifies it's equal to the last character.
312
+ Generic RFCs' checksums are not calculated since they are incorrect (they're always 0)
313
+ In 99% of the RFC codes this is correct. In 1% of them for unknown reasons not clarified by the Tax Authority,
314
+ the checksum doesn't fit this checksum. Be aware that an RFC may have an "invalid" checksum but still be
315
+ valid if a "Cédula de Identificación Fiscal" is given.
316
+ """
317
+ if self.validate_general_regex():
318
+ return (
319
+ self.rfc[-1] == self.calculate_last_digit(self.rfc, with_checksum=True)
320
+ or self.is_generic()
321
+ )
322
+ return False
323
+
324
+ @classmethod
325
+ def calculate_last_digit(cls, rfc: str, with_checksum: bool = True) -> str | bool:
326
+ """
327
+ Calculates the checksum of an RFC.
328
+
329
+ The checksum is calculated with the first 12 digits of the RFC
330
+ If its length is 11 then an extra space is added at the beggining of the string.
331
+ """
332
+ if bool(rfc) and isinstance(rfc, str):
333
+ str_rfc = rfc.strip().upper()
334
+ else:
335
+ return False
336
+ if with_checksum:
337
+ str_rfc = str_rfc[:-1]
338
+ assert len(str_rfc) in (11, 12)
339
+ if len(str_rfc) == 11:
340
+ str_rfc = str_rfc.rjust(12)
341
+ checksum = (
342
+ (int(cls.checksum_table[n]), index)
343
+ for index, n in zip(range(13, 1, -1), str_rfc, strict=False)
344
+ )
345
+ suma = sum(int(x * y) for x, y in checksum)
346
+
347
+ residual = suma % 11
348
+
349
+ if residual == 0:
350
+ return "0"
351
+ else:
352
+ residual = 11 - residual
353
+ if residual == 10:
354
+ return "A"
355
+ else:
356
+ return str(residual)
357
+
358
+
359
+ class RFCGeneratorUtils(RFCGeneral):
360
+ vocales = "AEIOU"
361
+ excluded_words_fisicas = ["DE", "LA", "LAS", "MC", "VON", "DEL", "LOS", "Y", "MAC", "VAN", "MI"]
362
+ cacophonic_words = [
363
+ "BUEI",
364
+ "BUEY",
365
+ "CACA",
366
+ "CACO",
367
+ "CAGA",
368
+ "CAGO",
369
+ "CAKA",
370
+ "COGE",
371
+ "COJA",
372
+ "COJE",
373
+ "COJI",
374
+ "COJO",
375
+ "CULO",
376
+ "FETO",
377
+ "GUEY",
378
+ "JOTO",
379
+ "KACA",
380
+ "KACO",
381
+ "KAGA",
382
+ "KAGO",
383
+ "KOGE",
384
+ "KOJO",
385
+ "KAKA",
386
+ "KULO",
387
+ "MAME",
388
+ "MAMO",
389
+ "MEAR",
390
+ "MEON",
391
+ "MION",
392
+ "MOCO",
393
+ "MULA",
394
+ "PEDA",
395
+ "PEDO",
396
+ "PENE",
397
+ "PUTA",
398
+ "PUTO",
399
+ "QULO",
400
+ "RATA",
401
+ "RUIN",
402
+ ]
403
+ # Lista completa de palabras excluidas según documento SAT
404
+ excluded_words_morales = [
405
+ "EL",
406
+ "LA",
407
+ "DE",
408
+ "LOS",
409
+ "LAS",
410
+ "Y",
411
+ "DEL",
412
+ "MI",
413
+ "COMPAÑIA",
414
+ "COMPAÑÍA",
415
+ "CIA",
416
+ "CIA.",
417
+ "SOCIEDAD",
418
+ "SOC",
419
+ "SOC.",
420
+ "COOPERATIVA",
421
+ "COOP",
422
+ "COOP.",
423
+ "S.A.",
424
+ "SA",
425
+ "S.A",
426
+ "S. A.",
427
+ "S. A",
428
+ "S.A.B.",
429
+ "SAB",
430
+ "S.A.B",
431
+ "S. A. B.",
432
+ "S. A. B",
433
+ "S. DE R.L.",
434
+ "S DE RL",
435
+ "SRL",
436
+ "S.R.L.",
437
+ "S. R. L.",
438
+ "S. EN C.",
439
+ "S EN C",
440
+ "S.C.",
441
+ "SC",
442
+ "S. EN C. POR A.",
443
+ "S EN C POR A",
444
+ "S. EN N.C.",
445
+ "S EN NC",
446
+ "A.C.",
447
+ "AC",
448
+ "A. C.",
449
+ "A. EN P.",
450
+ "A EN P",
451
+ "S.C.L.",
452
+ "SCL",
453
+ "S.N.C.",
454
+ "SNC",
455
+ "C.V.",
456
+ "CV",
457
+ "C. V.",
458
+ "SA DE CV",
459
+ "S.A. DE C.V.",
460
+ "SA DE CV MI",
461
+ "S.A. DE C.V. MI",
462
+ "S.A.B. DE C.V.",
463
+ "SAB DE CV",
464
+ "S.A.B DE C.V",
465
+ "SRL DE CV",
466
+ "S.R.L. DE C.V.",
467
+ "SRL DE CV MI",
468
+ "SRL MI",
469
+ "THE",
470
+ "OF",
471
+ "COMPANY",
472
+ "AND",
473
+ "CO",
474
+ "CO.",
475
+ "MC",
476
+ "VON",
477
+ "MAC",
478
+ "VAN",
479
+ "PARA",
480
+ "POR",
481
+ "AL",
482
+ "E",
483
+ "EN",
484
+ "CON",
485
+ "SUS",
486
+ "A",
487
+ ]
488
+
489
+ allowed_chars = list("ABCDEFGHIJKLMNÑOPQRSTUVWXYZ&")
490
+
491
+ # Tabla de conversión de números a texto
492
+ numeros_texto = {
493
+ "0": "CERO",
494
+ "1": "UNO",
495
+ "2": "DOS",
496
+ "3": "TRES",
497
+ "4": "CUATRO",
498
+ "5": "CINCO",
499
+ "6": "SEIS",
500
+ "7": "SIETE",
501
+ "8": "OCHO",
502
+ "9": "NUEVE",
503
+ "10": "DIEZ",
504
+ "11": "ONCE",
505
+ "12": "DOCE",
506
+ "13": "TRECE",
507
+ "14": "CATORCE",
508
+ "15": "QUINCE",
509
+ "16": "DIECISEIS",
510
+ "17": "DIECISIETE",
511
+ "18": "DIECIOCHO",
512
+ "19": "DIECINUEVE",
513
+ "20": "VEINTE",
514
+ }
515
+
516
+ # Tabla de números romanos a arábigos
517
+ numeros_romanos = {
518
+ "I": 1,
519
+ "II": 2,
520
+ "III": 3,
521
+ "IV": 4,
522
+ "V": 5,
523
+ "VI": 6,
524
+ "VII": 7,
525
+ "VIII": 8,
526
+ "IX": 9,
527
+ "X": 10,
528
+ "XI": 11,
529
+ "XII": 12,
530
+ "XIII": 13,
531
+ "XIV": 14,
532
+ "XV": 15,
533
+ "XVI": 16,
534
+ "XVII": 17,
535
+ "XVIII": 18,
536
+ "XIX": 19,
537
+ "XX": 20,
538
+ }
539
+
540
+ @classmethod
541
+ def convertir_numero_a_texto(cls, numero_str: str) -> str:
542
+ """Convierte un número (arábigo o romano) a su representación en texto"""
543
+ numero_str = numero_str.strip().upper()
544
+
545
+ # Intentar como número romano
546
+ if numero_str in cls.numeros_romanos:
547
+ numero_arabigo = str(cls.numeros_romanos[numero_str])
548
+ if numero_arabigo in cls.numeros_texto:
549
+ return cls.numeros_texto[numero_arabigo]
550
+
551
+ # Intentar como número arábigo
552
+ if numero_str in cls.numeros_texto:
553
+ return cls.numeros_texto[numero_str]
554
+
555
+ # Si no está en la tabla, intentar convertir dígitos
556
+ try:
557
+ num = int(numero_str)
558
+ if 0 <= num <= 20:
559
+ return cls.numeros_texto[str(num)]
560
+ except ValueError:
561
+ pass
562
+
563
+ return numero_str # Si no se puede convertir, devolver original
564
+
565
+ @classmethod
566
+ def clean_name(cls, nombre: str) -> str:
567
+ return (
568
+ "".join(
569
+ char if char in cls.allowed_chars else unidecode.unidecode(char)
570
+ for char in " ".join(
571
+ elem for elem in nombre.split(" ") if elem not in cls.excluded_words_fisicas
572
+ )
573
+ .strip()
574
+ .upper()
575
+ )
576
+ .strip()
577
+ .upper()
578
+ )
579
+
580
+ @staticmethod
581
+ def name_adapter(name: str, non_strict: bool = False) -> str:
582
+ if isinstance(name, str):
583
+ # if isinstance(name, str):
584
+ # name = name.decode('utf-8')
585
+ return name.upper().strip()
586
+ elif non_strict:
587
+ if name is None or not name:
588
+ return ""
589
+ else:
590
+ raise ValueError
591
+
592
+
593
+ class RFCGeneratorFisicas(RFCGeneratorUtils):
594
+ def __init__(self, paterno: str, materno: str, nombre: str, fecha: datetime.date):
595
+ _dob = datetime.datetime(2000, 1, 1)
596
+ if paterno.strip() and nombre.strip() and isinstance(fecha, datetime.date):
597
+ self.paterno = paterno
598
+ self.materno = materno
599
+ self.nombre = nombre
600
+ self.dob = fecha
601
+ self._rfc = ""
602
+ else:
603
+ raise ValueError("Invalid Values")
604
+
605
+ @property
606
+ def paterno(self) -> str:
607
+ return self._paterno
608
+
609
+ @paterno.setter
610
+ def paterno(self, name: str):
611
+ self._paterno = self.name_adapter(name)
612
+
613
+ @property
614
+ def materno(self) -> str:
615
+ return self._materno
616
+
617
+ @materno.setter
618
+ def materno(self, name: str):
619
+ self._materno = self.name_adapter(name, non_strict=True)
620
+
621
+ @property
622
+ def nombre(self) -> str:
623
+ return self._nombre
624
+
625
+ @nombre.setter
626
+ def nombre(self, name: str):
627
+ self._nombre = self.name_adapter(name)
628
+
629
+ @property
630
+ def dob(self) -> datetime.date:
631
+ return self._dob
632
+
633
+ @dob.setter
634
+ def dob(self, date: datetime.date):
635
+ if isinstance(date, datetime.date):
636
+ self._dob = date
637
+
638
+ @property
639
+ def rfc(self) -> str:
640
+ if not self._rfc:
641
+ partial_rfc = self.generate_letters() + self.generate_date() + self.homoclave
642
+ self._rfc = partial_rfc + RFCValidator.calculate_last_digit(
643
+ partial_rfc, with_checksum=False
644
+ )
645
+ return self._rfc
646
+
647
+ def generate_date(self) -> str:
648
+ return self.dob.strftime("%y%m%d")
649
+
650
+ def generate_letters(self) -> str:
651
+ extra_letter = False
652
+ clave = []
653
+ clave.append(self.paterno_calculo[0])
654
+ second_value = list(
655
+ filter(lambda x: x >= 0, map(self.paterno_calculo[1:].find, self.vocales))
656
+ )
657
+ if len(second_value) > 0:
658
+ clave.append(self.paterno_calculo[min(second_value) + 1])
659
+ else:
660
+ extra_letter = True
661
+ if self.materno_calculo:
662
+ clave.append(self.materno_calculo[0])
663
+ else:
664
+ if extra_letter:
665
+ clave.append(self.paterno_calculo[1])
666
+ else:
667
+ extra_letter = True
668
+ clave.append(self.nombre_iniciales[0])
669
+ if extra_letter:
670
+ clave.append(self.nombre_iniciales[1])
671
+ clave = "".join(clave)
672
+ if clave in self.cacophonic_words:
673
+ clave = clave[:-1] + "X"
674
+ return clave
675
+
676
+ @property
677
+ def paterno_calculo(self) -> str:
678
+ return self.clean_name(self.paterno)
679
+
680
+ @property
681
+ def materno_calculo(self) -> str:
682
+ return self.clean_name(self.materno)
683
+
684
+ @property
685
+ def nombre_calculo(self) -> str:
686
+ return self.clean_name(self.nombre)
687
+
688
+ def nombre_iscompound(self) -> bool:
689
+ return len(self.nombre_calculo.split(" ")) > 1
690
+
691
+ @property
692
+ def nombre_iniciales(self) -> str:
693
+ if self.nombre_iscompound():
694
+ if self.nombre_calculo.split(" ")[0] in ("MARIA", "JOSE"):
695
+ return " ".join(self.nombre_calculo.split(" ")[1:])
696
+ else:
697
+ return self.nombre_calculo
698
+ else:
699
+ return self.nombre_calculo
700
+
701
+ @property
702
+ def nombre_completo(self) -> str:
703
+ return " ".join(
704
+ comp
705
+ for comp in (self.paterno_calculo, self.materno_calculo, self.nombre_calculo)
706
+ if comp
707
+ )
708
+
709
+ @property
710
+ def cadena_homoclave(self) -> str:
711
+ calc_str = [
712
+ "0",
713
+ ]
714
+ for character in self.nombre_completo:
715
+ calc_str.append(self.quotient_remaining_table[character])
716
+ return "".join(calc_str)
717
+
718
+ @property
719
+ def homoclave(self) -> str:
720
+ cadena = self.cadena_homoclave
721
+ suma = (
722
+ sum(int(cadena[n : n + 2]) * int(cadena[n + 1]) for n in range(len(cadena) - 1)) % 1000
723
+ )
724
+ resultado = (suma // 34, suma % 34)
725
+ return self.homoclave_assign_table[resultado[0]] + self.homoclave_assign_table[resultado[1]]
726
+
727
+
728
+ class RFCGeneratorMorales(RFCGeneratorUtils):
729
+ """
730
+ RFC Generator for Persona Moral (Legal Entities/Companies)
731
+
732
+ The RFC for a legal entity is composed of:
733
+ - 3 letters derived from the company name
734
+ - 6 digits for the incorporation/foundation date (YYMMDD)
735
+ - 2 alphanumeric characters for homoclave
736
+ - 1 checksum digit
737
+ Total: 12 characters
738
+ """
739
+
740
+ def __init__(self, razon_social: str, fecha: datetime.date):
741
+ """
742
+ Initialize RFC Generator for Persona Moral
743
+
744
+ :param razon_social: Company name (razón social)
745
+ :param fecha: Incorporation/foundation date
746
+ """
747
+ if razon_social.strip() and isinstance(fecha, datetime.date):
748
+ self.razon_social = razon_social
749
+ self.fecha = fecha
750
+ self._rfc = ""
751
+ else:
752
+ raise ValueError(
753
+ "Invalid Values: razon_social must be non-empty and fecha must be a date"
754
+ )
755
+
756
+ @property
757
+ def razon_social(self) -> str:
758
+ return self._razon_social
759
+
760
+ @razon_social.setter
761
+ def razon_social(self, name: str):
762
+ if isinstance(name, str):
763
+ self._razon_social = name.upper().strip()
764
+ else:
765
+ raise ValueError("razon_social must be a string")
766
+
767
+ @property
768
+ def fecha(self) -> datetime.date:
769
+ return self._fecha
770
+
771
+ @fecha.setter
772
+ def fecha(self, date: datetime.date):
773
+ if isinstance(date, datetime.date):
774
+ self._fecha = date
775
+ else:
776
+ raise ValueError("fecha must be a datetime.date")
777
+
778
+ @property
779
+ def rfc(self) -> str:
780
+ """Generate and return the complete RFC"""
781
+ if not self._rfc:
782
+ partial_rfc = self.generate_letters() + self.generate_date() + self.homoclave
783
+ self._rfc = partial_rfc + RFCValidator.calculate_last_digit(
784
+ partial_rfc, with_checksum=False
785
+ )
786
+ return self._rfc
787
+
788
+ def generate_date(self) -> str:
789
+ """Generate date portion in YYMMDD format"""
790
+ return self.fecha.strftime("%y%m%d")
791
+
792
+ @property
793
+ def razon_social_calculo(self) -> str:
794
+ """
795
+ Clean the company name according to SAT official rules:
796
+ - Remove excluded words FIRST (S.A., DE, LA, etc.)
797
+ - Remove special characters (&, @, %, #, !, $, ", -, /, +, (, ), etc.)
798
+ - Substitute Ñ with X
799
+ - Handle initials (F.A.Z. → each letter is a word)
800
+ - Convert numbers (arabic and roman) to text
801
+ - Handle consonant compounds (CH → C, LL → L)
802
+ """
803
+ razon = self.razon_social.upper().strip()
804
+
805
+ # Step 1: First pass - remove excluded words with punctuation patterns
806
+ # This handles cases like "S.A.", "S. A.", etc.
807
+ # Process longer words first to avoid partial matches (e.g., S.A.B. before S.A.)
808
+ for excluded in sorted(self.excluded_words_morales, key=len, reverse=True):
809
+ # Try exact match
810
+ razon = razon.replace(" " + excluded + " ", " ")
811
+ razon = razon.replace(" " + excluded + ",", " ")
812
+ razon = razon.replace(" " + excluded + ".", " ")
813
+ # Try at beginning
814
+ if razon.startswith(excluded + " "):
815
+ razon = razon[len(excluded) + 1 :]
816
+ # Try at end
817
+ if razon.endswith(" " + excluded):
818
+ razon = razon[: -len(excluded) - 1]
819
+ if razon.endswith("," + excluded):
820
+ razon = razon[: -len(excluded) - 1]
821
+
822
+ # Step 2: Remove special characters except spaces, letters, numbers, and dots
823
+ # Caracteres especiales a eliminar según SAT: &, @, %, #, !, $, ", -, /, +, (, ), etc.
824
+ import string
825
+
826
+ allowed_for_processing = string.ascii_uppercase + string.digits + " .ÑÁÉÍÓÚÜñáéíóúü"
827
+ razon_limpia = "".join(c if c in allowed_for_processing else " " for c in razon)
828
+
829
+ # Step 3: Substitute Ñ with X
830
+ razon_limpia = razon_limpia.replace("Ñ", "X").replace("ñ", "X")
831
+
832
+ # Step 4: Handle initials (F.A.Z. → F A Z)
833
+ # Si hay letras separadas por puntos, expandirlas como palabras individuales
834
+ # Marcar cuáles son iniciales para no filtrarlas después
835
+ words_temp = []
836
+ is_initial = [] # Track which words are initials
837
+ for word in razon_limpia.split():
838
+ word = word.strip()
839
+ if not word:
840
+ continue
841
+ # Detectar patrón de iniciales: letra.letra.letra o similar
842
+ if "." in word and len(word) <= 15: # Máximo razonable para iniciales
843
+ # Separar por puntos y filtrar vacíos
844
+ parts = [c.strip() for c in word.split(".") if c.strip()]
845
+ # Si todas las partes son de 1-2 caracteres, son iniciales
846
+ if parts and all(len(p) <= 2 and p.isalpha() for p in parts):
847
+ words_temp.extend(parts)
848
+ is_initial.extend([True] * len(parts)) # Mark all as initials
849
+ continue
850
+ # Quitar puntos finales de palabras normales
851
+ word = word.rstrip(".")
852
+ if word:
853
+ words_temp.append(word)
854
+ is_initial.append(False)
855
+
856
+ # Step 5: Convert numbers to text
857
+ words_converted = []
858
+ is_initial_converted = []
859
+ for word, is_init in zip(words_temp, is_initial, strict=False):
860
+ # Verificar si es un número (arábigo o romano)
861
+ if word.isdigit() or word in self.numeros_romanos:
862
+ converted = self.convertir_numero_a_texto(word)
863
+ words_converted.append(converted)
864
+ is_initial_converted.append(is_init)
865
+ else:
866
+ words_converted.append(word)
867
+ is_initial_converted.append(is_init)
868
+
869
+ # Step 6: Second pass - Remove excluded words (but keep initials)
870
+ filtered_words = []
871
+ for word, is_init in zip(words_converted, is_initial_converted, strict=False):
872
+ word_clean = word.strip().upper()
873
+ if not word_clean:
874
+ continue
875
+ # Keep initials even if they match excluded words
876
+ if is_init:
877
+ filtered_words.append(word_clean)
878
+ elif word_clean not in self.excluded_words_morales:
879
+ filtered_words.append(word_clean)
880
+
881
+ # Step 7: Clean remaining special characters and accents
882
+ cleaned = " ".join(filtered_words)
883
+ result = ""
884
+ for char in cleaned:
885
+ if char in self.allowed_chars:
886
+ result += char
887
+ elif char == " ":
888
+ result += " "
889
+ else:
890
+ # Use unidecode for accented characters
891
+ decoded = unidecode.unidecode(char)
892
+ if decoded in self.allowed_chars:
893
+ result += decoded
894
+
895
+ result = result.strip().upper()
896
+
897
+ # Step 8: Handle consonant compounds (CH → C, LL → L) at the beginning of words
898
+ words_final = []
899
+ for word in result.split():
900
+ if word.startswith("CH"):
901
+ word = "C" + word[2:]
902
+ elif word.startswith("LL"):
903
+ word = "L" + word[2:]
904
+ words_final.append(word)
905
+
906
+ return " ".join(words_final)
907
+
908
+ def generate_letters(self) -> str:
909
+ """
910
+ Generate the 3-letter code from company name according to SAT rules:
911
+
912
+ 1 word: First 3 letters (or pad with X if less than 3)
913
+ 2 words: 1st letter of 1st word + 1st letter of 2nd word + 2nd letter of 1st word
914
+ 3+ words: 1st letter of each of the first 3 words
915
+
916
+ Note: According to SAT specification for 2 words, it should be:
917
+ - First letter of first word
918
+ - First letter of second word
919
+ - Second letter of first word (or first two letters of second word)
920
+
921
+ But empirical evidence shows it's actually:
922
+ - First letter of first word
923
+ - First vowel of first word (after first letter)
924
+ - First letter of second word
925
+ """
926
+ cleaned_name = self.razon_social_calculo
927
+
928
+ if not cleaned_name:
929
+ raise ValueError("Company name is empty after cleaning")
930
+
931
+ words = cleaned_name.split()
932
+
933
+ if not words:
934
+ raise ValueError("No valid words in company name")
935
+
936
+ clave = []
937
+
938
+ if len(words) == 1:
939
+ # Single word: First 3 letters
940
+ word = words[0]
941
+ clave.append(word[0] if len(word) > 0 else "X")
942
+ clave.append(word[1] if len(word) > 1 else "X")
943
+ clave.append(word[2] if len(word) > 2 else "X")
944
+ elif len(words) == 2:
945
+ # Two words: Initial of first word, first two letters of second word
946
+ # According to SAT specification: "se toma la inicial de la primera y las dos primeras letras de la segunda"
947
+ clave.append(words[0][0]) # First letter of first word
948
+ clave.append(words[1][0]) # First letter of second word
949
+ clave.append(words[1][1] if len(words[1]) > 1 else "X") # Second letter of second word
950
+ else:
951
+ # Three or more words: First letter of each of the first three words
952
+ clave.append(words[0][0])
953
+ clave.append(words[1][0])
954
+ clave.append(words[2][0])
955
+
956
+ result = "".join(clave)
957
+
958
+ # Check for cacophonic words and replace last character with 'X'
959
+ if result in self.cacophonic_words:
960
+ result = result[:-1] + "X"
961
+
962
+ return result
963
+
964
+ @property
965
+ def nombre_completo(self) -> str:
966
+ """Return the complete cleaned company name for homoclave calculation"""
967
+ return self.razon_social_calculo
968
+
969
+ @property
970
+ def cadena_homoclave(self) -> str:
971
+ """Generate the string used for homoclave calculation"""
972
+ calc_str = ["0"]
973
+ for character in self.nombre_completo:
974
+ if character in self.quotient_remaining_table:
975
+ calc_str.append(self.quotient_remaining_table[character])
976
+ elif character == " ":
977
+ calc_str.append(self.quotient_remaining_table[" "])
978
+ return "".join(calc_str)
979
+
980
+ @property
981
+ def homoclave(self) -> str:
982
+ """Calculate the 2-character homoclave"""
983
+ cadena = self.cadena_homoclave
984
+ suma = (
985
+ sum(int(cadena[n : n + 2]) * int(cadena[n + 1]) for n in range(len(cadena) - 1)) % 1000
986
+ )
987
+ resultado = (suma // 34, suma % 34)
988
+ return self.homoclave_assign_table[resultado[0]] + self.homoclave_assign_table[resultado[1]]
989
+
990
+
991
+ class RFCGenerator:
992
+ """
993
+ Factory class to generate RFC for either Persona Física or Persona Moral
994
+ """
995
+
996
+ @staticmethod
997
+ def generate_fisica(nombre: str, paterno: str, materno: str, fecha: datetime.date) -> str:
998
+ """Generate RFC for Persona Física (Individual)"""
999
+ return RFCGeneratorFisicas(nombre=nombre, paterno=paterno, materno=materno, fecha=fecha).rfc
1000
+
1001
+ @staticmethod
1002
+ def generate_moral(razon_social: str, fecha: datetime.date) -> str:
1003
+ """Generate RFC for Persona Moral (Legal Entity/Company)"""
1004
+ return RFCGeneratorMorales(razon_social=razon_social, fecha=fecha).rfc