datamarket 0.6.0__py3-none-any.whl → 0.10.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datamarket might be problematic. Click here for more details.

Files changed (38) hide show
  1. datamarket/__init__.py +0 -1
  2. datamarket/exceptions/__init__.py +1 -0
  3. datamarket/exceptions/main.py +118 -0
  4. datamarket/interfaces/alchemy.py +1934 -25
  5. datamarket/interfaces/aws.py +81 -14
  6. datamarket/interfaces/azure.py +127 -0
  7. datamarket/interfaces/drive.py +60 -10
  8. datamarket/interfaces/ftp.py +37 -14
  9. datamarket/interfaces/llm.py +1220 -0
  10. datamarket/interfaces/nominatim.py +314 -42
  11. datamarket/interfaces/peerdb.py +272 -104
  12. datamarket/interfaces/proxy.py +354 -50
  13. datamarket/interfaces/tinybird.py +7 -15
  14. datamarket/params/nominatim.py +439 -0
  15. datamarket/utils/__init__.py +1 -1
  16. datamarket/utils/airflow.py +10 -7
  17. datamarket/utils/alchemy.py +2 -1
  18. datamarket/utils/logs.py +88 -0
  19. datamarket/utils/main.py +138 -10
  20. datamarket/utils/nominatim.py +201 -0
  21. datamarket/utils/playwright/__init__.py +0 -0
  22. datamarket/utils/playwright/async_api.py +274 -0
  23. datamarket/utils/playwright/sync_api.py +281 -0
  24. datamarket/utils/requests.py +655 -0
  25. datamarket/utils/selenium.py +6 -12
  26. datamarket/utils/strings/__init__.py +1 -0
  27. datamarket/utils/strings/normalization.py +217 -0
  28. datamarket/utils/strings/obfuscation.py +153 -0
  29. datamarket/utils/strings/standardization.py +40 -0
  30. datamarket/utils/typer.py +2 -1
  31. datamarket/utils/types.py +1 -0
  32. datamarket-0.10.3.dist-info/METADATA +172 -0
  33. datamarket-0.10.3.dist-info/RECORD +38 -0
  34. {datamarket-0.6.0.dist-info → datamarket-0.10.3.dist-info}/WHEEL +1 -2
  35. datamarket-0.6.0.dist-info/METADATA +0 -49
  36. datamarket-0.6.0.dist-info/RECORD +0 -24
  37. datamarket-0.6.0.dist-info/top_level.txt +0 -1
  38. {datamarket-0.6.0.dist-info → datamarket-0.10.3.dist-info/licenses}/LICENSE +0 -0
@@ -1,3 +1,9 @@
1
+ import re
2
+
3
+ from unidecode import unidecode
4
+
5
+ CITY_TO_PROVINCE = {"Madrid": "Madrid"}
6
+
1
7
  POSTCODES = {
2
8
  "01": "Álava",
3
9
  "02": "Albacete",
@@ -52,3 +58,436 @@ POSTCODES = {
52
58
  "51": "Ceuta",
53
59
  "52": "Melilla",
54
60
  }
61
+
62
+ # Mapping of normalized names (for comparison) to standardized names (for storing)
63
+ # for each corresponding country code
64
+ STATES = {
65
+ "es": {
66
+ "andalucia": "Andalucía",
67
+ "aragon": "Aragón",
68
+ "asturias": "Asturias",
69
+ "baleares": "Baleares",
70
+ "canarias": "Canarias",
71
+ "cantabria": "Cantabria",
72
+ "castilla la mancha": "Castilla-La Mancha",
73
+ "castilla y leon": "Castilla y León",
74
+ "cataluna": "Cataluña",
75
+ "ceuta": "Ceuta",
76
+ "comunidad valenciana": "Comunidad Valenciana",
77
+ "extremadura": "Extremadura",
78
+ "galicia": "Galicia",
79
+ "la rioja": "La Rioja",
80
+ "madrid": "Comunidad de Madrid",
81
+ "melilla": "Melilla",
82
+ "murcia": "Murcia",
83
+ "navarra": "Navarra",
84
+ "pais vasco": "País Vasco",
85
+ "euskadi": "País Vasco", # Alias not caught by rapidfuzz
86
+ }
87
+ }
88
+
89
+ PROVINCES = {
90
+ "es": {
91
+ "alava": "Álava",
92
+ "araba": "Álava", # Alias not caught by rapidfuzz
93
+ "albacete": "Albacete",
94
+ "alicante": "Alicante",
95
+ "almeria": "Almería",
96
+ "asturias": "Asturias",
97
+ "avila": "Ávila",
98
+ "badajoz": "Badajoz",
99
+ "barcelona": "Barcelona",
100
+ "bizkaia": "Vizcaya",
101
+ "burgos": "Burgos",
102
+ "caceres": "Cáceres",
103
+ "cadiz": "Cádiz",
104
+ "cantabria": "Cantabria",
105
+ "castellon": "Castellón",
106
+ "ceuta": "Ceuta", # Considered province by opensm and/or geonames
107
+ "ciudad real": "Ciudad Real",
108
+ "cordoba": "Córdoba",
109
+ "cuenca": "Cuenca",
110
+ "gipuzkoa": "Gipuzkoa",
111
+ "gerona": "Gerona",
112
+ "granada": "Granada",
113
+ "guadalajara": "Guadalajara",
114
+ "huelva": "Huelva",
115
+ "huesca": "Huesca",
116
+ "islas baleares": "Islas Baleares",
117
+ "jaen": "Jaén",
118
+ "la coruna": "La Coruña",
119
+ "la rioja": "La Rioja",
120
+ "las palmas": "Las Palmas",
121
+ "leon": "León",
122
+ "lerida": "Lérida",
123
+ "lugo": "Lugo",
124
+ "madrid": "Madrid",
125
+ "malaga": "Málaga",
126
+ "melilla": "Melilla", # Considered province by opensm and/or geonames
127
+ "murcia": "Murcia",
128
+ "navarra": "Navarra",
129
+ "orense": "Orense",
130
+ "palencia": "Palencia",
131
+ "pontevedra": "Pontevedra",
132
+ "salamanca": "Salamanca",
133
+ "santa cruz de tenerife": "Santa Cruz de Tenerife",
134
+ "segovia": "Segovia",
135
+ "sevilla": "Sevilla",
136
+ "soria": "Soria",
137
+ "tarragona": "Tarragona",
138
+ "teruel": "Teruel",
139
+ "toledo": "Toledo",
140
+ "valencia": "Valencia",
141
+ "valladolid": "Valladolid",
142
+ "zamora": "Zamora",
143
+ "zaragoza": "Zaragoza",
144
+ }
145
+ }
146
+
147
+
148
+ PROVINCE_TO_POSTCODE = {
149
+ "es": {
150
+ "A Coruña": "15",
151
+ "Álava": "01",
152
+ "Araba": "01",
153
+ "Alacant": "03",
154
+ "Alicante": "03",
155
+ "Albacete": "02",
156
+ "Almería": "04",
157
+ "Asturias": "33",
158
+ "Ávila": "05",
159
+ "Badajoz": "06",
160
+ "Baleares": "07",
161
+ "Barcelona": "08",
162
+ "Bizkaia": "48",
163
+ "Burgos": "09",
164
+ "Cáceres": "10",
165
+ "Cádiz": "11",
166
+ "Cantabria": "39",
167
+ "Castelló": "12",
168
+ "Castellón": "12",
169
+ "Ceuta": "51",
170
+ "Ciudad Real": "13",
171
+ "Córdoba": "14",
172
+ "Cuenca": "16",
173
+ "Gerona": "17",
174
+ "Gipuzkoa": "20",
175
+ "Girona": "17",
176
+ "Granada": "18",
177
+ "Guadalajara": "19",
178
+ "Guipúzcoa": "20",
179
+ "Huelva": "21",
180
+ "Huesca": "22",
181
+ "Illes Balears": "07",
182
+ "Jaén": "23",
183
+ "La Coruña": "15",
184
+ "La Rioja": "26",
185
+ "Las Palmas": "35",
186
+ "León": "24",
187
+ "Lérida": "25",
188
+ "Lleida": "25",
189
+ "Lugo": "27",
190
+ "Madrid": "28",
191
+ "Málaga": "29",
192
+ "Melilla": "52",
193
+ "Murcia": "30",
194
+ "Navarra": "31",
195
+ "Orense": "32",
196
+ "Ourense": "32",
197
+ "Palencia": "34",
198
+ "Pontevedra": "36",
199
+ "Salamanca": "37",
200
+ "Santa Cruz de Tenerife": "38",
201
+ "Segovia": "40",
202
+ "Sevilla": "41",
203
+ "Soria": "42",
204
+ "Tarragona": "43",
205
+ "Teruel": "44",
206
+ "Toledo": "45",
207
+ "València": "46",
208
+ "Valencia": "46",
209
+ "Valladolid": "47",
210
+ "Vizcaya": "48",
211
+ "Zamora": "49",
212
+ "Zaragoza": "50",
213
+ },
214
+ "pt": {
215
+ "Aveiro": "3",
216
+ "Beja": "7",
217
+ "Braga": "4",
218
+ "Bragança": "5",
219
+ "Castelo Branco": "6",
220
+ "Coimbra": "3",
221
+ "Évora": "7",
222
+ "Faro": "8",
223
+ "Guarda": "6",
224
+ "Leiria": "2",
225
+ "Lisboa": "1",
226
+ "Portalegre": "7",
227
+ "Porto": "4",
228
+ "Santarém": "2",
229
+ "Setúbal": "2",
230
+ "Viana do Castelo": "4",
231
+ "Vila Real": "5",
232
+ "Viseu": "3",
233
+ "Açores": "9",
234
+ "Madeira": "9",
235
+ },
236
+ }
237
+
238
+
239
+ POSTCODE_TO_STATES = {
240
+ "es": {
241
+ # Andalucía
242
+ "04": "Andalucía",
243
+ "11": "Andalucía",
244
+ "14": "Andalucía",
245
+ "18": "Andalucía",
246
+ "21": "Andalucía",
247
+ "23": "Andalucía",
248
+ "29": "Andalucía",
249
+ "41": "Andalucía",
250
+ # Aragón
251
+ "22": "Aragón",
252
+ "44": "Aragón",
253
+ "50": "Aragón",
254
+ # Asturias
255
+ "33": "Principado de Asturias",
256
+ # Baleares
257
+ "07": "Islas Baleares",
258
+ # Canarias
259
+ "35": "Canarias",
260
+ "38": "Canarias",
261
+ # Cantabria
262
+ "39": "Cantabria",
263
+ # Castilla y León
264
+ "05": "Castilla y León",
265
+ "09": "Castilla y León",
266
+ "24": "Castilla y León",
267
+ "34": "Castilla y León",
268
+ "37": "Castilla y León",
269
+ "40": "Castilla y León",
270
+ "42": "Castilla y León",
271
+ "47": "Castilla y León",
272
+ "49": "Castilla y León",
273
+ # Castilla-La Mancha
274
+ "02": "Castilla-La Mancha",
275
+ "13": "Castilla-La Mancha",
276
+ "16": "Castilla-La Mancha",
277
+ "19": "Castilla-La Mancha",
278
+ "45": "Castilla-La Mancha",
279
+ # Cataluña
280
+ "08": "Cataluña",
281
+ "17": "Cataluña",
282
+ "25": "Cataluña",
283
+ "43": "Cataluña",
284
+ # Comunidad Valenciana
285
+ "03": "Comunidad Valenciana",
286
+ "12": "Comunidad Valenciana",
287
+ "46": "Comunidad Valenciana",
288
+ # Extremadura
289
+ "06": "Extremadura",
290
+ "10": "Extremadura",
291
+ # Galicia
292
+ "15": "Galicia",
293
+ "27": "Galicia",
294
+ "32": "Galicia",
295
+ "36": "Galicia",
296
+ # Madrid
297
+ "28": "Comunidad de Madrid",
298
+ # Murcia
299
+ "30": "Región de Murcia",
300
+ # Navarra
301
+ "31": "Comunidad Foral de Navarra",
302
+ # País Vasco
303
+ "01": "País Vasco",
304
+ "20": "País Vasco",
305
+ "48": "País Vasco",
306
+ # La Rioja
307
+ "26": "La Rioja",
308
+ # Ciudades Autónomas
309
+ "51": "Ceuta",
310
+ "52": "Melilla",
311
+ },
312
+ "pt": { # --- NORTE ---
313
+ "40": "Porto",
314
+ "41": "Porto",
315
+ "42": "Porto",
316
+ "43": "Porto",
317
+ "44": "Porto",
318
+ "45": "Aveiro", # Concelhos do norte de Aveiro, na fronteira com Porto.
319
+ "47": "Braga",
320
+ "48": "Braga", # Guimarães.
321
+ "49": "Viana do Castelo",
322
+ "50": "Vila Real",
323
+ "51": "Vila Real",
324
+ "52": "Vila Real",
325
+ "53": "Vila Real / Bragança", # Zona fronteiriça.
326
+ "54": "Bragança",
327
+ # --- CENTRO ---
328
+ "60": "Castelo Branco",
329
+ "61": "Castelo Branco",
330
+ "62": "Castelo Branco",
331
+ "63": "Guarda",
332
+ "30": "Coimbra",
333
+ "31": "Coimbra",
334
+ "32": "Coimbra",
335
+ "33": "Coimbra",
336
+ "34": "Viseu",
337
+ "35": "Viseu",
338
+ "37": "Aveiro",
339
+ "38": "Aveiro",
340
+ "24": "Leiria",
341
+ # --- ÁREA METROPOLITANA DE LISBOA e arredores ---
342
+ "10": "Lisboa",
343
+ "11": "Lisboa",
344
+ "12": "Lisboa",
345
+ "13": "Lisboa",
346
+ "14": "Lisboa",
347
+ "15": "Lisboa",
348
+ "16": "Lisboa",
349
+ "17": "Lisboa",
350
+ "18": "Lisboa",
351
+ "19": "Lisboa",
352
+ "20": "Santarém",
353
+ "21": "Santarém",
354
+ "22": "Santarém",
355
+ "23": "Santarém", # Tomar e Torres Novas.
356
+ "25": "Lisboa", # Concelhos como Torres Vedras, Mafra, Alenquer.
357
+ "26": "Lisboa", # Concelhos como Loures, Amadora, Odivelas.
358
+ "27": "Lisboa", # Concelhos como Sintra, Cascais, Oeiras.
359
+ "28": "Setúbal",
360
+ "29": "Setúbal",
361
+ # --- ALENTEJO ---
362
+ "70": "Évora",
363
+ "71": "Évora",
364
+ "72": "Évora",
365
+ "73": "Portalegre",
366
+ "74": "Portalegre",
367
+ "75": "Setúbal", # Litoral Alentejano (Sines, Grândola), administrativamente de Setúbal.
368
+ "76": "Beja",
369
+ "77": "Beja",
370
+ "78": "Beja",
371
+ "79": "Beja",
372
+ # --- ALGARVE ---
373
+ "80": "Faro",
374
+ "81": "Faro",
375
+ "82": "Faro",
376
+ "83": "Faro",
377
+ "84": "Faro",
378
+ "85": "Faro",
379
+ "86": "Faro",
380
+ "87": "Faro",
381
+ "88": "Faro",
382
+ "89": "Faro",
383
+ # --- REGIÕES AUTÓNOMAS ---
384
+ "90": "Madeira",
385
+ "91": "Madeira",
386
+ "92": "Madeira",
387
+ "93": "Madeira",
388
+ "95": "Açores", # Ilha de São Miguel (Ponta Delgada).
389
+ "96": "Açores", # Ilha de São Miguel (Ribeira Grande) e Santa Maria.
390
+ "97": "Açores", # Ilha Terceira (Angra do Heroísmo).
391
+ "98": "Açores", # Ilhas de São Jorge, Graciosa, Faial, Pico.
392
+ "99": "Açores", # Ilhas de Flores e Corvo.
393
+ },
394
+ }
395
+
396
+ _NORMALIZED_PROVINCE_CACHE = {}
397
+ for country, provinces in PROVINCE_TO_POSTCODE.items():
398
+ # Get the original keys (e.g., "A Coruña", "Álava")
399
+ original_keys = list(provinces.keys())
400
+
401
+ # Create the normalized list (e.g., "a coruna", "alava")
402
+ normalized_choices = [unidecode(p).lower() for p in original_keys]
403
+
404
+ _NORMALIZED_PROVINCE_CACHE[country] = {
405
+ "choices": normalized_choices, # The list for rapidfuzz to search in
406
+ "keys": original_keys, # The list to find the name by index
407
+ }
408
+
409
+ # Source: https://github.com/ariankoochak/regex-patterns-of-all-countries
410
+ COUNTRY_PARSING_RULES = {
411
+ "es": {
412
+ "zip_validate_pattern": re.compile(r"^\d{5}$"),
413
+ "zip_search_pattern": re.compile(r"\b\d{5}\b"),
414
+ "phone_validate_pattern": re.compile(r"^(\+?34)?[67]\d{8}$"),
415
+ },
416
+ "pt": {
417
+ "zip_validate_pattern": re.compile(r"^\d{4}[- ]{0,1}\d{3}$|^\d{4}$"),
418
+ "zip_search_pattern": re.compile(r"\b\d{4}[- ]?\d{3}\b|\b\d{4}\b"),
419
+ "phone_validate_pattern": re.compile(r"^(\+?351)?9[1236]\d{7}$"),
420
+ },
421
+ }
422
+
423
+ MADRID_DISTRICT_DIRECT_PATCH = {
424
+ # Correcciones directas
425
+ "Aravaca": "Moncloa-Aravaca",
426
+ "Puerta de Hierro": "Fuencarral-El Pardo",
427
+ "Palacio": "Centro",
428
+ "Argüelles": "Moncloa-Aravaca",
429
+ "Barrio de La Estación": "Latina",
430
+ "Casa de Campo": "Moncloa-Aravaca",
431
+ "Universidad": "Centro",
432
+ "Valdezarza": "Moncloa-Aravaca",
433
+ "Cortes": "Centro",
434
+ "Barrio de la Latina": "Centro",
435
+ "Ciudad Universitaria": "Moncloa-Aravaca",
436
+ "Embajadores": "Centro",
437
+ "Justicia": "Centro",
438
+ "Sol": "Centro",
439
+ "Barrio de los Austrias": "Centro",
440
+ }
441
+
442
+ MADRID_DISTRICT_QUARTER_PATCH = {
443
+ # Reglas dependientes del quarter
444
+ ("Centro", "Atocha"): "Arganzuela",
445
+ ("Centro", "Gaztambide"): "Chamberí",
446
+ ("Centro", "Imperial"): "Arganzuela",
447
+ ("Centro", "Palos de Moguer"): "Arganzuela",
448
+ ("Arganzuela", "Embajadores"): "Centro",
449
+ ("Salamanca", "La Elipa"): "Ciudad Lineal",
450
+ ("Salamanca", "Ventas"): "Ciudad Lineal",
451
+ ("Tetuán", "La Paz"): "Fuencarral-El Pardo",
452
+ ("Tetuán", "San Cristóbal"): "Villaverde",
453
+ ("Tetuán", "Colonia de San Cristóbal"): "Villaverde",
454
+ ("Tetuán", "Valdezarza"): "Moncloa-Aravaca",
455
+ ("Chamberí", "Ciudad Universitaria"): "Moncloa-Aravaca",
456
+ ("Chamberí", "Justicia"): "Centro",
457
+ ("Chamberí", "Universidad"): "Centro",
458
+ ("Fuencarral-El Pardo", "Castilla"): "Chamartín",
459
+ ("Fuencarral-El Pardo", "Valdeacederas"): "Tetuán",
460
+ ("Fuencarral-El Pardo", "Valdezarza"): "Moncloa-Aravaca",
461
+ ("Moncloa-Aravaca", "Bellas Vistas"): "Tetuán",
462
+ ("Moncloa-Aravaca", "Berruguete"): "Tetuán",
463
+ ("Moncloa-Aravaca", "Campamento"): "Latina",
464
+ ("Moncloa-Aravaca", "Gaztambide"): "Chamberí",
465
+ ("Moncloa-Aravaca", "Lucero"): "Latina",
466
+ ("Moncloa-Aravaca", "Valdeacederas"): "Tetuán",
467
+ ("Moncloa-Aravaca", "Vallehermoso"): "Chamberí",
468
+ ("Latina", "Casa de Campo"): "Moncloa-Aravaca",
469
+ ("Villaverde", "San Fermín"): "Usera",
470
+ ("San Blas - Canillejas", "Concepción"): "Ciudad Lineal",
471
+ ("San Blas - Canillejas", "Quintana"): "Ciudad Lineal",
472
+ ("Barajas", "Palomas"): "Hortaleza",
473
+ }
474
+
475
+ MADRID_QUARTER_DIRECT_PATCH = {
476
+ "Barrio de la Latina": "Palacio",
477
+ "Barrio de las Letras": "Cortes",
478
+ "Barrio de los Austrias": "Palacio",
479
+ "Colonia de San Cristóbal": "San Cristóbal",
480
+ "Encinar de los Reyes": "Valdefuentes",
481
+ "La Elipa": "Ventas",
482
+ "Las Cárcavas - San Antonio": "Valdefuentes",
483
+ "Lavapiés": "Embajadores",
484
+ "Montecarmelo": "El Goloso",
485
+ "Puerta de Hierro": "Ciudad Universitaria",
486
+ "Villaverde Alto, Casco Histórico de Villaverde": "San Andrés",
487
+ "Villaverde Bajo": "Los Rosales",
488
+ "Virgen del Cortijo": "Valdefuentes",
489
+ "Las Acacias": "Acacias",
490
+ }
491
+
492
+ # Cutoff score for rapidfuzz in the name standardization function
493
+ STANDARD_THRESHOLD = 40
@@ -1 +1 @@
1
- from .main import *
1
+ from .main import * # noqa: F403
@@ -3,20 +3,23 @@
3
3
 
4
4
  import re
5
5
  import unicodedata
6
+
6
7
  import inflection
7
8
 
8
9
  ########################################################################################################################
9
10
  # FUNCTIONS
10
11
 
12
+
11
13
  def process_task_name(task_id):
12
- task_id = ''.join(
13
- f"_{unicodedata.name(c)}_" if not c.isalnum() else c for c in task_id
14
- if c.isalnum() or (unicodedata.category(c) not in ('Cc', 'Cf', 'Cs', 'Co', 'Cn'))
14
+ task_id = "".join(
15
+ f"_{unicodedata.name(c)}_" if not c.isalnum() else c
16
+ for c in task_id
17
+ if c.isalnum() or (unicodedata.category(c) not in ("Cc", "Cf", "Cs", "Co", "Cn"))
15
18
  )
16
- task_id = inflection.parameterize(task_id, separator='_')
19
+ task_id = inflection.parameterize(task_id, separator="_")
17
20
  task_id = task_id.lower()
18
- task_id = task_id.strip('_')
19
- task_id = re.sub(r'_+', '_', task_id)
21
+ task_id = task_id.strip("_")
22
+ task_id = re.sub(r"_+", "_", task_id)
20
23
  if task_id[0].isdigit():
21
- task_id = 'task_' + task_id
24
+ task_id = "task_" + task_id
22
25
  return task_id
@@ -8,6 +8,7 @@ from sqlalchemy.ext.declarative import declarative_base
8
8
 
9
9
  Base = declarative_base()
10
10
 
11
+
11
12
  class View(Base):
12
13
  __abstract__ = True
13
14
  is_view = True
@@ -19,4 +20,4 @@ class View(Base):
19
20
  """
20
21
  conn.execute(f"""
21
22
  CREATE OR REPLACE VIEW {cls.__tablename__} AS {query}
22
- """)
23
+ """)
@@ -0,0 +1,88 @@
1
+ from typing import Any, TypeAlias
2
+
3
+ # --- Type Definitions ---
4
+ AnsiCode: TypeAlias = str
5
+ StyleCode: TypeAlias = AnsiCode # e.g. BOLD, UNDERLINE
6
+ ShadeCode: TypeAlias = AnsiCode # e.g. GREEN, BLUE
7
+ ColorCode: TypeAlias = AnsiCode # Final combined result
8
+
9
+
10
+ class Color:
11
+ """Raw ANSI escape codes. Internal building blocks for the library."""
12
+
13
+ RESET: AnsiCode = "\033[0m"
14
+
15
+ # Styles (Combinable)
16
+ BOLD: StyleCode = "\033[1m"
17
+ UNDERLINE: StyleCode = "\033[4m"
18
+
19
+ # --- THE 14 PROTECTED SHADES ---
20
+ # Standard (Deeper tones)
21
+ S_GREEN: ShadeCode = "\033[32m"
22
+ S_BLUE: ShadeCode = "\033[34m"
23
+ S_PURPLE: ShadeCode = "\033[35m"
24
+ S_CYAN: ShadeCode = "\033[36m"
25
+ S_WHITE: ShadeCode = "\033[37m"
26
+
27
+ # High-Intensity (Vibrant tones)
28
+ H_GREY: ShadeCode = "\033[90m"
29
+ H_GREEN: ShadeCode = "\033[92m"
30
+ H_BLUE: ShadeCode = "\033[94m"
31
+ H_PURPLE: ShadeCode = "\033[95m"
32
+ H_CYAN: ShadeCode = "\033[96m"
33
+ H_WHITE: ShadeCode = "\033[97m"
34
+
35
+ # Extended Palette
36
+ TEAL: ShadeCode = "\033[38;5;30m"
37
+ LAVENDER: ShadeCode = "\033[38;5;147m"
38
+ OLIVE: ShadeCode = "\033[38;5;64m"
39
+
40
+
41
+ def combine(*codes: AnsiCode) -> ColorCode:
42
+ """Combines multiple ANSI codes (e.g., BOLD + GREEN)."""
43
+ return "".join(codes)
44
+
45
+
46
+ class SystemColor:
47
+ """
48
+ RESERVED: For core library internals.
49
+ Strictly BOLD or UNDERLINED to distinguish from scraper data.
50
+ """
51
+
52
+ # Verbs / Actions (BOLD)
53
+ BATCH_PIPELINE_STATS: ColorCode = combine(Color.BOLD, Color.H_PURPLE)
54
+
55
+ # State / Nouns (UNDERLINED)
56
+ PROCESS_BATCH_PROGRESS: ColorCode = combine(Color.UNDERLINE, Color.S_CYAN)
57
+
58
+
59
+ class ScraperColor:
60
+ """
61
+ USER-FACING: Standard colors for everyday scraper logic.
62
+ Raw shades only. It is up to the user how to apply these.
63
+ """
64
+
65
+ GREY: ShadeCode = Color.H_GREY
66
+ EMERALD: ShadeCode = Color.H_GREEN
67
+ FOREST: ShadeCode = Color.S_GREEN
68
+ SKY: ShadeCode = Color.H_BLUE
69
+ NAVY: ShadeCode = Color.S_BLUE
70
+ VIOLET: ShadeCode = Color.H_PURPLE
71
+ PLUM: ShadeCode = Color.S_PURPLE
72
+ CYAN: ShadeCode = Color.H_CYAN
73
+ TEAL: ShadeCode = Color.S_CYAN
74
+ WHITE: ShadeCode = Color.H_WHITE
75
+ SILVER: ShadeCode = Color.S_WHITE
76
+ LAVENDER: ShadeCode = Color.LAVENDER
77
+ OLIVE: ShadeCode = Color.OLIVE
78
+
79
+
80
+ def colorize(text: Any, color_code: ColorCode) -> str:
81
+ """
82
+ Wraps text in ANSI color codes.
83
+
84
+ Args:
85
+ text: The content to colorize (supports any type).
86
+ color_code: A ShadeCode, StyleCode, or combined ColorCode.
87
+ """
88
+ return f"{color_code}{text}{Color.RESET}"