datacontract-cli 0.10.23__py3-none-any.whl → 0.10.37__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. datacontract/__init__.py +13 -0
  2. datacontract/api.py +12 -5
  3. datacontract/catalog/catalog.py +5 -3
  4. datacontract/cli.py +116 -10
  5. datacontract/data_contract.py +143 -65
  6. datacontract/engines/data_contract_checks.py +366 -60
  7. datacontract/engines/data_contract_test.py +50 -4
  8. datacontract/engines/fastjsonschema/check_jsonschema.py +37 -19
  9. datacontract/engines/fastjsonschema/s3/s3_read_files.py +3 -2
  10. datacontract/engines/soda/check_soda_execute.py +22 -3
  11. datacontract/engines/soda/connections/athena.py +79 -0
  12. datacontract/engines/soda/connections/duckdb_connection.py +65 -6
  13. datacontract/engines/soda/connections/kafka.py +4 -2
  14. datacontract/export/avro_converter.py +20 -3
  15. datacontract/export/bigquery_converter.py +1 -1
  16. datacontract/export/dbt_converter.py +36 -7
  17. datacontract/export/dqx_converter.py +126 -0
  18. datacontract/export/duckdb_type_converter.py +57 -0
  19. datacontract/export/excel_exporter.py +923 -0
  20. datacontract/export/exporter.py +3 -0
  21. datacontract/export/exporter_factory.py +17 -1
  22. datacontract/export/great_expectations_converter.py +55 -5
  23. datacontract/export/{html_export.py → html_exporter.py} +31 -20
  24. datacontract/export/markdown_converter.py +134 -5
  25. datacontract/export/mermaid_exporter.py +110 -0
  26. datacontract/export/odcs_v3_exporter.py +187 -145
  27. datacontract/export/protobuf_converter.py +163 -69
  28. datacontract/export/rdf_converter.py +2 -2
  29. datacontract/export/sodacl_converter.py +9 -1
  30. datacontract/export/spark_converter.py +31 -4
  31. datacontract/export/sql_converter.py +6 -2
  32. datacontract/export/sql_type_converter.py +20 -8
  33. datacontract/imports/avro_importer.py +63 -12
  34. datacontract/imports/csv_importer.py +111 -57
  35. datacontract/imports/excel_importer.py +1111 -0
  36. datacontract/imports/importer.py +16 -3
  37. datacontract/imports/importer_factory.py +17 -0
  38. datacontract/imports/json_importer.py +325 -0
  39. datacontract/imports/odcs_importer.py +2 -2
  40. datacontract/imports/odcs_v3_importer.py +351 -151
  41. datacontract/imports/protobuf_importer.py +264 -0
  42. datacontract/imports/spark_importer.py +117 -13
  43. datacontract/imports/sql_importer.py +32 -16
  44. datacontract/imports/unity_importer.py +84 -38
  45. datacontract/init/init_template.py +1 -1
  46. datacontract/integration/datamesh_manager.py +16 -2
  47. datacontract/lint/resolve.py +112 -23
  48. datacontract/lint/schema.py +24 -15
  49. datacontract/model/data_contract_specification/__init__.py +1 -0
  50. datacontract/model/odcs.py +13 -0
  51. datacontract/model/run.py +3 -0
  52. datacontract/output/junit_test_results.py +3 -3
  53. datacontract/schemas/datacontract-1.1.0.init.yaml +1 -1
  54. datacontract/schemas/datacontract-1.2.0.init.yaml +91 -0
  55. datacontract/schemas/datacontract-1.2.0.schema.json +2029 -0
  56. datacontract/schemas/datacontract-1.2.1.init.yaml +91 -0
  57. datacontract/schemas/datacontract-1.2.1.schema.json +2058 -0
  58. datacontract/schemas/odcs-3.0.2.schema.json +2382 -0
  59. datacontract/templates/datacontract.html +54 -3
  60. datacontract/templates/datacontract_odcs.html +685 -0
  61. datacontract/templates/index.html +5 -2
  62. datacontract/templates/partials/server.html +2 -0
  63. datacontract/templates/style/output.css +319 -145
  64. {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.37.dist-info}/METADATA +656 -431
  65. datacontract_cli-0.10.37.dist-info/RECORD +119 -0
  66. {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.37.dist-info}/WHEEL +1 -1
  67. {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.37.dist-info/licenses}/LICENSE +1 -1
  68. datacontract/export/csv_type_converter.py +0 -36
  69. datacontract/lint/lint.py +0 -142
  70. datacontract/lint/linters/description_linter.py +0 -35
  71. datacontract/lint/linters/field_pattern_linter.py +0 -34
  72. datacontract/lint/linters/field_reference_linter.py +0 -48
  73. datacontract/lint/linters/notice_period_linter.py +0 -55
  74. datacontract/lint/linters/quality_schema_linter.py +0 -52
  75. datacontract/lint/linters/valid_constraints_linter.py +0 -100
  76. datacontract/model/data_contract_specification.py +0 -327
  77. datacontract_cli-0.10.23.dist-info/RECORD +0 -113
  78. /datacontract/{lint/linters → output}/__init__.py +0 -0
  79. {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.37.dist-info}/entry_points.txt +0 -0
  80. {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.37.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1111 @@
1
+ import logging
2
+ import os
3
+ from decimal import Decimal
4
+ from typing import Any, Dict, List, Optional
5
+
6
+ import openpyxl
7
+ from open_data_contract_standard.model import (
8
+ AuthoritativeDefinition,
9
+ CustomProperty,
10
+ DataQuality,
11
+ OpenDataContractStandard,
12
+ Role,
13
+ SchemaObject,
14
+ SchemaProperty,
15
+ Server,
16
+ ServiceLevelAgreementProperty,
17
+ Support,
18
+ Team,
19
+ )
20
+ from openpyxl.cell.cell import Cell
21
+ from openpyxl.workbook.workbook import Workbook
22
+ from openpyxl.worksheet.worksheet import Worksheet
23
+
24
+ from datacontract.imports.importer import Importer
25
+ from datacontract.model.data_contract_specification import (
26
+ DataContractSpecification,
27
+ )
28
+ from datacontract.model.exceptions import DataContractException
29
+
30
+ logger = logging.getLogger(__name__)
31
+
32
+
33
+ class ExcelImporter(Importer):
34
+ def import_source(
35
+ self,
36
+ data_contract_specification: DataContractSpecification | OpenDataContractStandard,
37
+ source: str,
38
+ import_args: dict,
39
+ ) -> DataContractSpecification | OpenDataContractStandard:
40
+ return import_excel_as_odcs(source)
41
+
42
+
43
+ def import_excel_as_odcs(excel_file_path: str) -> OpenDataContractStandard:
44
+ """
45
+ Import an Excel file and convert it to an OpenDataContractStandard object
46
+
47
+ Args:
48
+ excel_file_path: Path to the Excel file
49
+
50
+ Returns:
51
+ OpenDataContractStandard object
52
+ """
53
+ if not os.path.exists(excel_file_path):
54
+ raise FileNotFoundError(f"Excel file not found: {excel_file_path}")
55
+
56
+ try:
57
+ workbook = openpyxl.load_workbook(excel_file_path, data_only=True)
58
+ except Exception as e:
59
+ raise DataContractException(
60
+ type="schema",
61
+ name="Parse excel contract",
62
+ reason=f"Failed to open Excel file: {excel_file_path}",
63
+ engine="datacontract",
64
+ original_exception=e,
65
+ )
66
+
67
+ try:
68
+ # Get description values
69
+ purpose = get_cell_value_by_name(workbook, "description.purpose")
70
+ limitations = get_cell_value_by_name(workbook, "description.limitations")
71
+ usage = get_cell_value_by_name(workbook, "description.usage")
72
+
73
+ # Build description dict
74
+ description = None
75
+ if purpose or limitations or usage:
76
+ description = {"purpose": purpose, "limitations": limitations, "usage": usage}
77
+
78
+ # Get tags as a list
79
+ tags_str = get_cell_value_by_name(workbook, "tags")
80
+ tags = None
81
+ if tags_str:
82
+ tags = [tag.strip() for tag in tags_str.split(",") if tag.strip()]
83
+
84
+ # Import quality data first (standalone from schemas)
85
+ quality_map = import_quality(workbook)
86
+
87
+ # Import schemas
88
+ schemas = import_schemas(workbook)
89
+
90
+ # Attach quality to schemas and properties
91
+ schemas_with_quality = attach_quality_to_schemas(schemas, quality_map)
92
+
93
+ # Import other components
94
+ support = import_support(workbook)
95
+ team = import_team(workbook)
96
+ roles = import_roles(workbook)
97
+ sla_properties = import_sla_properties(workbook)
98
+ servers = import_servers(workbook)
99
+ price = import_price(workbook)
100
+ custom_properties = import_custom_properties(workbook)
101
+
102
+ # Create the ODCS object with proper object creation
103
+ odcs = OpenDataContractStandard(
104
+ apiVersion=get_cell_value_by_name(workbook, "apiVersion"),
105
+ kind=get_cell_value_by_name(workbook, "kind"),
106
+ id=get_cell_value_by_name(workbook, "id"),
107
+ name=get_cell_value_by_name(workbook, "name"),
108
+ version=get_cell_value_by_name(workbook, "version"),
109
+ status=get_cell_value_by_name(workbook, "status"),
110
+ domain=get_cell_value_by_name(workbook, "domain"),
111
+ dataProduct=get_cell_value_by_name(workbook, "dataProduct"),
112
+ tenant=get_cell_value_by_name(workbook, "tenant"),
113
+ description=description,
114
+ tags=tags,
115
+ schema=schemas_with_quality,
116
+ support=support,
117
+ price=price,
118
+ team=team,
119
+ roles=roles,
120
+ slaDefaultElement=get_cell_value_by_name(workbook, "slaDefaultElement"),
121
+ slaProperties=sla_properties,
122
+ servers=servers,
123
+ customProperties=custom_properties,
124
+ )
125
+
126
+ return odcs
127
+ except Exception as e:
128
+ logger.error(f"Error importing Excel file: {str(e)}")
129
+ raise DataContractException(
130
+ type="schema",
131
+ name="Parse excel contract",
132
+ reason=f"Failed to parse Excel file: {excel_file_path}",
133
+ engine="datacontract",
134
+ original_exception=e,
135
+ )
136
+ finally:
137
+ workbook.close()
138
+
139
+
140
+ def import_schemas(workbook) -> Optional[List[SchemaObject]]:
141
+ """Extract schema information from sheets starting with 'Schema '"""
142
+ schemas = []
143
+
144
+ for sheet_name in workbook.sheetnames:
145
+ if sheet_name.startswith("Schema ") and sheet_name != "Schema <table_name>":
146
+ sheet = workbook[sheet_name]
147
+ schema_name = get_cell_value_by_name_in_sheet(sheet, "schema.name")
148
+
149
+ if not schema_name:
150
+ continue
151
+
152
+ schema = SchemaObject(
153
+ name=schema_name,
154
+ logicalType="object",
155
+ physicalType=get_cell_value_by_name_in_sheet(sheet, "schema.physicalType"),
156
+ physicalName=get_cell_value_by_name_in_sheet(sheet, "schema.physicalName"),
157
+ description=get_cell_value_by_name_in_sheet(sheet, "schema.description"),
158
+ businessName=get_cell_value_by_name_in_sheet(sheet, "schema.businessName"),
159
+ dataGranularityDescription=get_cell_value_by_name_in_sheet(sheet, "schema.dataGranularityDescription"),
160
+ authoritativeDefinitions=None,
161
+ properties=import_properties(sheet),
162
+ quality=None, # Quality will be attached later
163
+ customProperties=None,
164
+ tags=None,
165
+ )
166
+
167
+ # Get tags
168
+ tags_str = get_cell_value_by_name_in_sheet(sheet, "schema.tags")
169
+ if tags_str:
170
+ schema.tags = [tag.strip() for tag in tags_str.split(",") if tag.strip()]
171
+
172
+ schemas.append(schema)
173
+
174
+ return schemas if schemas else None
175
+
176
+
177
+ def import_properties(sheet) -> Optional[List[SchemaProperty]]:
178
+ """Extract properties from the schema sheet"""
179
+ try:
180
+ # Find the properties table
181
+ properties_range = get_range_by_name_in_sheet(sheet, "schema.properties")
182
+ if not properties_range:
183
+ return None
184
+
185
+ # Get header row to map column names to indices
186
+ header_row = list(sheet.rows)[properties_range[0] - 1] # Convert to 0-based indexing
187
+ headers = {}
188
+ for i, cell in enumerate(header_row):
189
+ if cell.value:
190
+ headers[cell.value.lower()] = i
191
+
192
+ # Process property rows
193
+ property_lookup = {} # Dictionary to keep track of properties by name for nesting
194
+
195
+ # First, create all properties
196
+ for row_idx in range(properties_range[0], properties_range[1]):
197
+ if len(list(sheet.rows)) < row_idx + 1:
198
+ break
199
+ row = list(sheet.rows)[row_idx]
200
+
201
+ # Skip empty rows or header row
202
+ property_name = get_cell_value(row, headers.get("property"))
203
+ if not property_name or row_idx == properties_range[0] - 1:
204
+ continue
205
+
206
+ # Create property object
207
+ property_obj = SchemaProperty(
208
+ name=property_name,
209
+ logicalType=get_cell_value(row, headers.get("logical type")),
210
+ logicalTypeOptions=import_logical_type_options(row, headers),
211
+ physicalType=get_cell_value(row, headers.get("physical type")),
212
+ physicalName=get_cell_value(row, headers.get("physical name")),
213
+ description=get_cell_value(row, headers.get("description")),
214
+ businessName=get_cell_value(row, headers.get("business name")),
215
+ required=parse_boolean(get_cell_value(row, headers.get("required"))),
216
+ unique=parse_boolean(get_cell_value(row, headers.get("unique"))),
217
+ primaryKey=parse_boolean(get_cell_value(row, headers.get("primary key"))),
218
+ primaryKeyPosition=parse_integer(get_cell_value(row, headers.get("primary key position"))),
219
+ partitioned=parse_boolean(get_cell_value(row, headers.get("partitioned"))),
220
+ partitionKeyPosition=parse_integer(get_cell_value(row, headers.get("partition key position"))),
221
+ criticalDataElement=parse_boolean(get_cell_value(row, headers.get("critical data element status"))),
222
+ classification=get_cell_value(row, headers.get("classification")),
223
+ transformLogic=get_cell_value(row, headers.get("transform logic")),
224
+ transformDescription=get_cell_value(row, headers.get("transform description")),
225
+ encryptedName=get_cell_value(row, headers.get("encrypted name")),
226
+ properties=None,
227
+ items=None,
228
+ tags=get_property_tags(headers, row),
229
+ )
230
+
231
+ # Authoritative definitions
232
+ authoritative_definition_url = get_cell_value(row, headers.get("authoritative definition url"))
233
+ authoritative_definition_type = get_cell_value(row, headers.get("authoritative definition type"))
234
+ if authoritative_definition_url and authoritative_definition_type:
235
+ property_obj.authoritativeDefinitions = [
236
+ AuthoritativeDefinition(
237
+ url=authoritative_definition_url,
238
+ type=authoritative_definition_type,
239
+ )
240
+ ]
241
+
242
+ # Quality will be attached later via quality_map
243
+ property_obj.quality = None
244
+
245
+ # Transform sources
246
+ transform_sources = get_cell_value(row, headers.get("transform sources"))
247
+ if transform_sources:
248
+ property_obj.transformSourceObjects = [
249
+ src.strip() for src in transform_sources.split(",") if src.strip()
250
+ ]
251
+
252
+ # Examples
253
+ examples = get_cell_value(row, headers.get("example(s)"))
254
+ if examples:
255
+ property_obj.examples = [ex.strip() for ex in examples.split(",") if ex.strip()]
256
+
257
+ # Add to lookup dictionary
258
+ property_lookup[property_name] = property_obj
259
+
260
+ # Now organize nested properties
261
+ root_properties = []
262
+ for name, prop in property_lookup.items():
263
+ if "." in name:
264
+ # This is a nested property
265
+ parent_name = name.rsplit(".", 1)[0]
266
+ child_name = name.rsplit(".", 1)[1]
267
+
268
+ if parent_name in property_lookup:
269
+ parent_prop = property_lookup[parent_name]
270
+ # Update the property name to be just the child part
271
+ prop.name = child_name
272
+
273
+ # If parent is an array, set as items
274
+ if parent_prop.logicalType == "array":
275
+ parent_prop.items = prop
276
+ else:
277
+ # Otherwise add to properties list
278
+ if parent_prop.properties is None:
279
+ parent_prop.properties = []
280
+ parent_prop.properties.append(prop)
281
+ else:
282
+ # This is a root property
283
+ root_properties.append(prop)
284
+
285
+ return root_properties if root_properties else None
286
+ except Exception as e:
287
+ logger.warning(f"Error importing properties: {str(e)}")
288
+ return None
289
+
290
+
291
+ def import_logical_type_options(row, headers):
292
+ """Import logical type options from property row"""
293
+
294
+ required_props = get_cell_value(row, headers.get("required properties"))
295
+
296
+ required_props_list = None
297
+ if required_props:
298
+ required_props_list = [prop.strip() for prop in required_props.split(",") if prop.strip()]
299
+
300
+ logical_type_options_dict = {
301
+ "minLength": parse_integer(get_cell_value(row, headers.get("minimum length"))),
302
+ "maxLength": parse_integer(get_cell_value(row, headers.get("maximum length"))),
303
+ "pattern": get_cell_value(row, headers.get("pattern")),
304
+ "format": get_cell_value(row, headers.get("format")),
305
+ "exclusiveMaximum": parse_boolean(get_cell_value(row, headers.get("exclusive maximum"))),
306
+ "exclusiveMinimum": parse_boolean(get_cell_value(row, headers.get("exclusive minimum"))),
307
+ "minimum": get_cell_value(row, headers.get("minimum")),
308
+ "maximum": get_cell_value(row, headers.get("maximum")),
309
+ "multipleOf": get_cell_value(row, headers.get("multiple of")),
310
+ "minItems": parse_integer(get_cell_value(row, headers.get("minimum items"))),
311
+ "maxItems": parse_integer(get_cell_value(row, headers.get("maximum items"))),
312
+ "uniqueItems": parse_boolean(get_cell_value(row, headers.get("unique items"))),
313
+ "maxProperties": parse_integer(get_cell_value(row, headers.get("maximum properties"))),
314
+ "minProperties": parse_integer(get_cell_value(row, headers.get("minimum properties"))),
315
+ "required": required_props_list,
316
+ }
317
+
318
+ for dict_key in list(logical_type_options_dict.keys()):
319
+ if logical_type_options_dict[dict_key] is None:
320
+ del logical_type_options_dict[dict_key]
321
+
322
+ if len(logical_type_options_dict) == 0:
323
+ return None
324
+ return logical_type_options_dict
325
+
326
+
327
+ def get_property_tags(headers, row):
328
+ tags_value = get_cell_value(row, headers.get("tags"))
329
+ if tags_value:
330
+ return [tag.strip() for tag in tags_value.split(",") if tag.strip()]
331
+ return None
332
+
333
+
334
+ def parse_boolean(value):
335
+ """Parse a string value to boolean"""
336
+ if value is None:
337
+ return None
338
+ value = value.lower().strip()
339
+ return value == "true" or value == "yes" or value == "1"
340
+
341
+
342
+ def parse_integer(value):
343
+ """Parse a string value to integer"""
344
+ if value is None:
345
+ return None
346
+ try:
347
+ return int(value)
348
+ except (ValueError, TypeError):
349
+ return None
350
+
351
+
352
+ def get_range_by_name_in_workbook(workbook: Workbook, name: str) -> tuple | None:
353
+ """Find the range (start_row, end_row) of a named range in a workbook"""
354
+ try:
355
+ for named_range in workbook.defined_names:
356
+ if named_range == name:
357
+ destinations = workbook.defined_names[named_range].destinations
358
+ for sheet_title, range_address in destinations:
359
+ if ":" in range_address:
360
+ # Convert Excel range to row numbers
361
+ start_ref, end_ref = range_address.split(":")
362
+ start_row = int("".join(filter(str.isdigit, start_ref)))
363
+ end_row = int("".join(filter(str.isdigit, end_ref)))
364
+ return start_row, end_row
365
+ else:
366
+ # Single cell
367
+ row = int("".join(filter(str.isdigit, range_address)))
368
+ return row, row
369
+ except Exception as e:
370
+ logger.warning(f"Error finding range by name {name}: {str(e)}")
371
+ return None
372
+
373
+
374
+ def get_range_by_name_in_sheet(sheet: Worksheet, name: str) -> tuple | None:
375
+ """Find the range (start_row, end_row) of a named range in a sheet"""
376
+ try:
377
+ for named_range in sheet.defined_names:
378
+ if named_range == name:
379
+ destinations = sheet.defined_names[named_range].destinations
380
+ for sheet_title, range_address in destinations:
381
+ if sheet_title == sheet.title:
382
+ # For named ranges that refer to entire rows or multiple rows
383
+ if ":" in range_address:
384
+ # Convert Excel range to row numbers
385
+ start_ref, end_ref = range_address.split(":")
386
+ start_row = int("".join(filter(str.isdigit, start_ref)))
387
+ end_row = int("".join(filter(str.isdigit, end_ref)))
388
+ return (start_row, end_row)
389
+ else:
390
+ # Single cell
391
+ row = int("".join(filter(str.isdigit, range_address)))
392
+ return (row, row)
393
+ except Exception as e:
394
+ logger.warning(f"Error finding range by name {name}: {str(e)}")
395
+ return None
396
+
397
+
398
+ def get_cell_by_name_in_workbook(workbook: Workbook, name: str) -> Cell | None:
399
+ """Find a cell by name within a workbook"""
400
+ try:
401
+ for named_range in workbook.defined_names:
402
+ if named_range == name:
403
+ destinations = workbook.defined_names[named_range].destinations
404
+ for sheet_title, coordinate in destinations:
405
+ sheet = workbook[sheet_title]
406
+ if sheet_title == sheet.title:
407
+ return sheet[coordinate]
408
+ except Exception as e:
409
+ logger.warning(f"Error finding cell by name {name}: {str(e)}")
410
+ return None
411
+
412
+
413
+ def get_cell_value_by_name(workbook: Workbook, name: str) -> str | None:
414
+ """Get the value of a named cell"""
415
+ try:
416
+ cell = get_cell_by_name_in_workbook(workbook, name)
417
+ if cell.value is not None:
418
+ value = str(cell.value).strip()
419
+ return value if value else None
420
+ except Exception as e:
421
+ logger.warning(f"Error getting cell value by name {name}: {str(e)}")
422
+ return None
423
+
424
+
425
+ def get_cell_value_by_name_in_sheet(sheet: Worksheet, name: str) -> str | None:
426
+ """Get the value of a named cell within a specific sheet"""
427
+ try:
428
+ for named_range in sheet.defined_names:
429
+ if named_range == name:
430
+ destinations = sheet.defined_names[named_range].destinations
431
+ for sheet_title, coordinate in destinations:
432
+ if sheet_title == sheet.title:
433
+ cell = sheet[coordinate]
434
+ if cell.value is not None:
435
+ value = str(cell.value).strip()
436
+ return value if value else None
437
+ except Exception as e:
438
+ logger.warning(f"Error getting cell value by name {name} in sheet {sheet.title}: {str(e)}")
439
+ return None
440
+
441
+
442
+ def get_cell_value(row, col_idx):
443
+ """Safely get cell value from a row by column index"""
444
+ if col_idx is None:
445
+ return None
446
+ try:
447
+ cell = row[col_idx]
448
+ if cell.value is not None:
449
+ value = str(cell.value).strip()
450
+ return value if value else None
451
+ return None
452
+ except (IndexError, AttributeError):
453
+ return None
454
+
455
+
456
+ def get_cell_value_by_position(sheet, row_idx, col_idx):
457
+ """Get cell value by row and column indices (0-based)"""
458
+ try:
459
+ cell = sheet.cell(row=row_idx + 1, column=col_idx + 1) # Convert to 1-based indices
460
+ if cell.value is not None:
461
+ value = str(cell.value).strip()
462
+ return value if value else None
463
+ return None
464
+ except Exception as e:
465
+ logger.warning(f"Error getting cell value by position ({row_idx}, {col_idx}): {str(e)}")
466
+ return None
467
+
468
+
469
+ def import_support(workbook: Workbook) -> Optional[List[Support]]:
470
+ """Extract support information from the Support sheet"""
471
+ try:
472
+ support_sheet = workbook["Support"]
473
+ if not support_sheet:
474
+ return None
475
+
476
+ support_range = get_range_by_name_in_workbook(workbook, "support")
477
+ if not support_range:
478
+ return None
479
+
480
+ header_row = list(support_sheet.rows)[support_range[0] - 1]
481
+ headers = {}
482
+ for i, cell in enumerate(header_row):
483
+ if cell.value:
484
+ headers[cell.value.lower()] = i
485
+
486
+ support_channels = []
487
+ for row_idx in range(support_range[0], support_range[1]):
488
+ if len(list(support_sheet.rows)) < row_idx + 1:
489
+ break
490
+ row = list(support_sheet.rows)[row_idx]
491
+
492
+ channel = get_cell_value(row, headers.get("channel"))
493
+ if not channel or row_idx == support_range[0] - 1:
494
+ continue
495
+
496
+ support_channel = Support(
497
+ channel=channel,
498
+ url=get_cell_value(row, headers.get("channel url")),
499
+ description=get_cell_value(row, headers.get("description")),
500
+ tool=get_cell_value(row, headers.get("tool")),
501
+ scope=get_cell_value(row, headers.get("scope")),
502
+ invitationUrl=get_cell_value(row, headers.get("invitation url")),
503
+ )
504
+
505
+ support_channels.append(support_channel)
506
+ except Exception as e:
507
+ logger.warning(f"Error importing support: {str(e)}")
508
+ return None
509
+
510
+ return support_channels if support_channels else None
511
+
512
+
513
+ def import_team(workbook: Workbook) -> Optional[List[Team]]:
514
+ """Extract team information from the Team sheet"""
515
+ try:
516
+ team_sheet = workbook["Team"]
517
+ if not team_sheet:
518
+ return None
519
+
520
+ team_range = get_range_by_name_in_workbook(workbook, "team")
521
+ if not team_range:
522
+ return None
523
+
524
+ header_row = list(team_sheet.rows)[team_range[0] - 1]
525
+ headers = {}
526
+ for i, cell in enumerate(header_row):
527
+ if cell.value:
528
+ headers[cell.value.lower()] = i
529
+
530
+ team_members = []
531
+ for row_idx in range(team_range[0], team_range[1]):
532
+ if len(list(team_sheet.rows)) < row_idx + 1:
533
+ break
534
+ row = list(team_sheet.rows)[row_idx]
535
+
536
+ username = get_cell_value(row, headers.get("username"))
537
+ name = get_cell_value(row, headers.get("name"))
538
+ role = get_cell_value(row, headers.get("role"))
539
+
540
+ if (not (username or name or role)) or row_idx == team_range[0] - 1:
541
+ continue
542
+
543
+ team_member = Team(
544
+ username=username,
545
+ name=name,
546
+ description=get_cell_value(row, headers.get("description")),
547
+ role=role,
548
+ dateIn=get_cell_value(row, headers.get("date in")),
549
+ dateOut=get_cell_value(row, headers.get("date out")),
550
+ replacedByUsername=get_cell_value(row, headers.get("replaced by username")),
551
+ )
552
+
553
+ team_members.append(team_member)
554
+ except Exception as e:
555
+ logger.warning(f"Error importing team: {str(e)}")
556
+ return None
557
+
558
+ return team_members if team_members else None
559
+
560
+
561
+ def import_roles(workbook: Workbook) -> Optional[List[Role]]:
562
+ """Extract roles information from the Roles sheet"""
563
+ try:
564
+ roles_sheet = workbook["Roles"]
565
+ if not roles_sheet:
566
+ return None
567
+
568
+ roles_range = get_range_by_name_in_sheet(roles_sheet, "roles")
569
+ if not roles_range:
570
+ return None
571
+
572
+ header_row = list(roles_sheet.rows)[roles_range[0] - 1]
573
+ headers = {}
574
+ for i, cell in enumerate(header_row):
575
+ if cell.value:
576
+ headers[cell.value.lower()] = i
577
+
578
+ roles_list = []
579
+ for row_idx in range(roles_range[0], roles_range[1]):
580
+ if len(list(roles_sheet.rows)) < row_idx + 1:
581
+ break
582
+ row = list(roles_sheet.rows)[row_idx]
583
+
584
+ role_name = get_cell_value(row, headers.get("role"))
585
+ if not role_name or row_idx == roles_range[0] - 1:
586
+ continue
587
+
588
+ role = Role(
589
+ role=role_name,
590
+ description=get_cell_value(row, headers.get("description")),
591
+ access=get_cell_value(row, headers.get("access")),
592
+ firstLevelApprovers=get_cell_value(row, headers.get("1st level approvers")),
593
+ secondLevelApprovers=get_cell_value(row, headers.get("2nd level approvers")),
594
+ customProperties=None,
595
+ )
596
+
597
+ roles_list.append(role)
598
+ except Exception as e:
599
+ logger.warning(f"Error importing roles: {str(e)}")
600
+ return None
601
+
602
+ return roles_list if roles_list else None
603
+
604
+
605
+ def import_sla_properties(workbook: Workbook) -> Optional[List[ServiceLevelAgreementProperty]]:
606
+ """Extract SLA properties from the SLA sheet"""
607
+ try:
608
+ sla_sheet = workbook["SLA"]
609
+ if not sla_sheet:
610
+ return None
611
+
612
+ sla_range = get_range_by_name_in_sheet(sla_sheet, "slaProperties")
613
+ if not sla_range:
614
+ return None
615
+
616
+ header_row = list(sla_sheet.rows)[sla_range[0] - 1]
617
+ headers = {}
618
+ for i, cell in enumerate(header_row):
619
+ if cell.value:
620
+ headers[cell.value.lower()] = i
621
+
622
+ sla_properties = []
623
+ for row_idx in range(sla_range[0], sla_range[1]):
624
+ if len(list(sla_sheet.rows)) < row_idx + 1:
625
+ break
626
+ row = list(sla_sheet.rows)[row_idx]
627
+
628
+ property_name = get_cell_value(row, headers.get("property"))
629
+ if not property_name or row_idx == sla_range[0] - 1:
630
+ continue
631
+
632
+ sla_property = ServiceLevelAgreementProperty(
633
+ property=property_name,
634
+ value=get_cell_value(row, headers.get("value")),
635
+ valueExt=get_cell_value(row, headers.get("extended value")),
636
+ unit=get_cell_value(row, headers.get("unit")),
637
+ element=get_cell_value(row, headers.get("element")),
638
+ driver=get_cell_value(row, headers.get("driver")),
639
+ )
640
+
641
+ sla_properties.append(sla_property)
642
+ except Exception as e:
643
+ logger.warning(f"Error importing SLA properties: {str(e)}")
644
+ return None
645
+
646
+ return sla_properties if sla_properties else None
647
+
648
+
649
+ def import_servers(workbook) -> Optional[List[Server]]:
650
+ """Extract server information from the Servers sheet"""
651
+ try:
652
+ sheet = workbook["Servers"]
653
+ if not sheet:
654
+ return None
655
+
656
+ # Find the server cells
657
+ server_cell = get_cell_by_name_in_workbook(workbook, "servers.server")
658
+ if not server_cell:
659
+ return None
660
+
661
+ # Get servers (horizontally arranged in the sheet)
662
+ servers = []
663
+ col_idx = server_cell.column - 1 # 0-based index
664
+ row_idx = server_cell.row - 1 # 0-based index
665
+
666
+ index = 0
667
+ while True:
668
+ server_name = get_cell_value_by_position(sheet, row_idx, col_idx + index)
669
+ if not server_name:
670
+ break
671
+
672
+ server = Server(
673
+ server=server_name,
674
+ description=get_server_cell_value(workbook, sheet, "servers.description", index),
675
+ environment=get_server_cell_value(workbook, sheet, "servers.environment", index),
676
+ type=get_server_cell_value(workbook, sheet, "servers.type", index),
677
+ )
678
+
679
+ # Get type-specific fields
680
+ server_type = server.type
681
+ if server_type:
682
+ if server_type == "azure":
683
+ server.location = get_server_cell_value(workbook, sheet, "servers.azure.location", index)
684
+ server.format = get_server_cell_value(workbook, sheet, "servers.azure.format", index)
685
+ server.delimiter = get_server_cell_value(workbook, sheet, "servers.azure.delimiter", index)
686
+ elif server_type == "bigquery":
687
+ server.project = get_server_cell_value(workbook, sheet, "servers.bigquery.project", index)
688
+ server.dataset = get_server_cell_value(workbook, sheet, "servers.bigquery.dataset", index)
689
+ elif server_type == "databricks":
690
+ server.catalog = get_server_cell_value(workbook, sheet, "servers.databricks.catalog", index)
691
+ server.host = get_server_cell_value(workbook, sheet, "servers.databricks.host", index)
692
+ server.schema_ = get_server_cell_value(workbook, sheet, "servers.databricks.schema", index)
693
+ elif server_type == "glue":
694
+ server.account = get_server_cell_value(workbook, sheet, "servers.glue.account", index)
695
+ server.database = get_server_cell_value(workbook, sheet, "servers.glue.database", index)
696
+ server.format = get_server_cell_value(workbook, sheet, "servers.glue.format", index)
697
+ server.location = get_server_cell_value(workbook, sheet, "servers.glue.location", index)
698
+ elif server_type == "kafka":
699
+ server.format = get_server_cell_value(workbook, sheet, "servers.kafka.format", index)
700
+ server.host = get_server_cell_value(workbook, sheet, "servers.kafka.host", index)
701
+ server.topic = get_server_cell_value(workbook, sheet, "servers.kafka.topic", index)
702
+ elif server_type == "postgres":
703
+ server.database = get_server_cell_value(workbook, sheet, "servers.postgres.database", index)
704
+ server.host = get_server_cell_value(workbook, sheet, "servers.postgres.host", index)
705
+ server.port = get_server_cell_value(workbook, sheet, "servers.postgres.port", index)
706
+ server.schema_ = get_server_cell_value(workbook, sheet, "servers.postgres.schema", index)
707
+ elif server_type == "s3":
708
+ server.delimiter = get_server_cell_value(workbook, sheet, "servers.s3.delimiter", index)
709
+ server.endpointUrl = get_server_cell_value(workbook, sheet, "servers.s3.endpointUrl", index)
710
+ server.format = get_server_cell_value(workbook, sheet, "servers.s3.format", index)
711
+ server.location = get_server_cell_value(workbook, sheet, "servers.s3.location", index)
712
+ elif server_type == "snowflake":
713
+ server.account = get_server_cell_value(workbook, sheet, "servers.snowflake.account", index)
714
+ server.database = get_server_cell_value(workbook, sheet, "servers.snowflake.database", index)
715
+ server.host = get_server_cell_value(workbook, sheet, "servers.snowflake.host", index)
716
+ server.port = get_server_cell_value(workbook, sheet, "servers.snowflake.port", index)
717
+ server.schema_ = get_server_cell_value(workbook, sheet, "servers.snowflake.schema", index)
718
+ server.warehouse = get_server_cell_value(workbook, sheet, "servers.snowflake.warehouse", index)
719
+ elif server_type == "sqlserver":
720
+ server.database = get_server_cell_value(workbook, sheet, "servers.sqlserver.database", index)
721
+ server.host = get_server_cell_value(workbook, sheet, "servers.sqlserver.host", index)
722
+ server.port = get_server_cell_value(workbook, sheet, "servers.sqlserver.port", index)
723
+ server.schema_ = get_server_cell_value(workbook, sheet, "servers.sqlserver.schema", index)
724
+ else:
725
+ # Custom server type - grab all possible fields
726
+ server.account = get_server_cell_value(workbook, sheet, "servers.custom.account", index)
727
+ server.catalog = get_server_cell_value(workbook, sheet, "servers.custom.catalog", index)
728
+ server.database = get_server_cell_value(workbook, sheet, "servers.custom.database", index)
729
+ server.dataset = get_server_cell_value(workbook, sheet, "servers.custom.dataset", index)
730
+ server.delimiter = get_server_cell_value(workbook, sheet, "servers.custom.delimiter", index)
731
+ server.endpointUrl = get_server_cell_value(workbook, sheet, "servers.custom.endpointUrl", index)
732
+ server.format = get_server_cell_value(workbook, sheet, "servers.custom.format", index)
733
+ server.host = get_server_cell_value(workbook, sheet, "servers.custom.host", index)
734
+ server.location = get_server_cell_value(workbook, sheet, "servers.custom.location", index)
735
+ server.path = get_server_cell_value(workbook, sheet, "servers.custom.path", index)
736
+ server.port = get_server_cell_value(workbook, sheet, "servers.custom.port", index)
737
+ server.project = get_server_cell_value(workbook, sheet, "servers.custom.project", index)
738
+ server.schema_ = get_server_cell_value(workbook, sheet, "servers.custom.schema", index)
739
+ server.stagingDir = get_server_cell_value(workbook, sheet, "servers.custom.stagingDir", index)
740
+ server.table = get_server_cell_value(workbook, sheet, "servers.custom.table", index)
741
+ server.view = get_server_cell_value(workbook, sheet, "servers.custom.view", index)
742
+ server.warehouse = get_server_cell_value(workbook, sheet, "servers.custom.warehouse", index)
743
+ server.region = get_server_cell_value(workbook, sheet, "servers.custom.region", index)
744
+ server.regionName = get_server_cell_value(workbook, sheet, "servers.custom.regionName", index)
745
+ server.serviceName = get_server_cell_value(workbook, sheet, "servers.custom.serviceName", index)
746
+
747
+ servers.append(server)
748
+ index += 1
749
+ except Exception as e:
750
+ logger.warning(f"Error importing servers: {str(e)}")
751
+ return None
752
+
753
+ return servers if servers else None
754
+
755
+
756
+ def get_server_cell_value(workbook: Workbook, sheet: Worksheet, name: str, col_offset: int):
757
+ """Get cell value for server properties (arranged horizontally)"""
758
+ try:
759
+ cell = get_cell_by_name_in_workbook(workbook, name)
760
+ if not cell:
761
+ return None
762
+
763
+ row = cell.row - 1 # 0-based
764
+ col = cell.column - 1 + col_offset # 0-based
765
+ return get_cell_value_by_position(sheet, row, col)
766
+ except Exception as e:
767
+ logger.warning(f"Error getting server cell value for {name}: {str(e)}")
768
+ return None
769
+
770
+
771
+ def import_price(workbook) -> Optional[Dict[str, Any]]:
772
+ """Extract price information"""
773
+ try:
774
+ price_amount = get_cell_value_by_name(workbook, "price.priceAmount")
775
+ price_currency = get_cell_value_by_name(workbook, "price.priceCurrency")
776
+ price_unit = get_cell_value_by_name(workbook, "price.priceUnit")
777
+
778
+ if not (price_amount or price_currency or price_unit):
779
+ return None
780
+
781
+ # Create a dictionary for price since the class doesn't seem to be directly available
782
+ return {
783
+ "priceAmount": price_amount,
784
+ "priceCurrency": price_currency,
785
+ "priceUnit": price_unit,
786
+ }
787
+ except Exception as e:
788
+ logger.warning(f"Error importing price: {str(e)}")
789
+ return None
790
+
791
+
792
+ def import_custom_properties(workbook: Workbook) -> List[CustomProperty]:
793
+ """Extract custom properties"""
794
+ custom_properties = []
795
+
796
+ owner = get_cell_value_by_name(workbook, "owner")
797
+
798
+ # Add owner as a custom property
799
+ if owner:
800
+ custom_properties.append(
801
+ CustomProperty(
802
+ property="owner",
803
+ value=owner,
804
+ )
805
+ )
806
+
807
+ try:
808
+ # Get other custom properties
809
+ custom_properties_sheet = workbook["Custom Properties"]
810
+ if custom_properties_sheet:
811
+ custom_properties_range = get_range_by_name_in_workbook(workbook, "CustomProperties")
812
+ if custom_properties_range:
813
+ # Skip header row
814
+ for row_idx in range(custom_properties_range[0], custom_properties_range[1]):
815
+ if row_idx == custom_properties_range[0] - 1:
816
+ continue
817
+
818
+ property_name = get_cell_value_by_position(custom_properties_sheet, row_idx, 0)
819
+ if not property_name or property_name == "owner":
820
+ continue
821
+
822
+ property_value = get_cell_value_by_position(custom_properties_sheet, row_idx, 1)
823
+ parsed_value = parse_property_value(property_value)
824
+
825
+ custom_properties.append(
826
+ CustomProperty(
827
+ property=property_name,
828
+ value=parsed_value,
829
+ )
830
+ )
831
+ except Exception as e:
832
+ logger.warning(f"Error importing custom properties: {str(e)}")
833
+
834
+ return custom_properties if custom_properties else None
835
+
836
+
837
+ def parse_property_value(value: str) -> Any:
838
+ """Parse a property value into the appropriate type based on Excel values"""
839
+ if value is None:
840
+ return None
841
+
842
+ # Try to convert to boolean (simple case)
843
+ if isinstance(value, str):
844
+ value_lower = value.lower().strip()
845
+ if value_lower == "true":
846
+ return True
847
+ if value_lower == "false":
848
+ return False
849
+
850
+ # Try numeric conversions
851
+ try:
852
+ # Check if it's an integer
853
+ if isinstance(value, str) and value.isdigit():
854
+ return int(value)
855
+
856
+ # Try float conversion
857
+ float_val = float(value)
858
+ # If it's a whole number, return as int
859
+ if float_val.is_integer():
860
+ return int(float_val)
861
+ return float_val
862
+ except (ValueError, TypeError, AttributeError):
863
+ # If conversion fails, return original string
864
+ return value
865
+
866
+
867
+ def import_quality(workbook: Workbook) -> Dict[str, List[DataQuality]]:
868
+ """
869
+ Import quality data from Quality sheet and organize by schema.property key
870
+
871
+ Returns:
872
+ Dictionary mapping schema.property keys to lists of DataQuality objects
873
+ """
874
+ try:
875
+ quality_sheet = workbook["Quality"]
876
+ if not quality_sheet:
877
+ return {}
878
+ except KeyError:
879
+ logger.warning("Quality sheet not found")
880
+ return {}
881
+
882
+ try:
883
+ quality_range = get_range_by_name_in_workbook(workbook, "quality")
884
+ if not quality_range:
885
+ logger.warning("Quality range not found")
886
+ return {}
887
+
888
+ quality_header_row_index = quality_range[0] - 1
889
+ headers = get_headers_from_header_row(quality_sheet, quality_header_row_index)
890
+
891
+ quality_map = {}
892
+
893
+ for row_idx in range(quality_range[0], quality_range[1]):
894
+ if len(list(quality_sheet.rows)) < row_idx + 1:
895
+ break
896
+ row = list(quality_sheet.rows)[row_idx]
897
+
898
+ # Extract quality fields from row
899
+ schema_name = get_cell_value(row, headers.get("schema"))
900
+ property_name = get_cell_value(row, headers.get("property"))
901
+ quality_type = get_cell_value(row, headers.get("quality type"))
902
+ description = get_cell_value(row, headers.get("description"))
903
+ rule = get_cell_value(row, headers.get("rule (library)"))
904
+ query = get_cell_value(row, headers.get("query (sql)"))
905
+ engine = get_cell_value(row, headers.get("quality engine (custom)"))
906
+ implementation = get_cell_value(row, headers.get("implementation (custom)"))
907
+ severity = get_cell_value(row, headers.get("severity"))
908
+ scheduler = get_cell_value(row, headers.get("scheduler"))
909
+ schedule = get_cell_value(row, headers.get("schedule"))
910
+ threshold_operator = get_cell_value(row, headers.get("threshold operator"))
911
+ threshold_value = get_cell_value(row, headers.get("threshold value"))
912
+
913
+ # Skip if no schema name or insufficient quality data
914
+ if not schema_name or (not quality_type and not description and not rule):
915
+ continue
916
+
917
+ # Parse threshold values based on operator
918
+ threshold_dict = parse_threshold_values(threshold_operator, threshold_value)
919
+
920
+ # Create DataQuality object with parsed thresholds
921
+ quality = DataQuality(
922
+ name=None,
923
+ description=description,
924
+ type=quality_type,
925
+ rule=rule,
926
+ unit=None,
927
+ validValues=None,
928
+ query=query,
929
+ engine=engine,
930
+ implementation=implementation,
931
+ dimension=None,
932
+ method=None,
933
+ severity=severity,
934
+ businessImpact=None,
935
+ customProperties=None,
936
+ authoritativeDefinitions=None,
937
+ tags=None,
938
+ scheduler=scheduler,
939
+ schedule=schedule,
940
+ **threshold_dict, # Unpack threshold values
941
+ )
942
+
943
+ # Create key for mapping - use schema.property format
944
+ key = schema_name if not property_name else f"{schema_name}.{property_name}"
945
+
946
+ if key not in quality_map:
947
+ quality_map[key] = []
948
+ quality_map[key].append(quality)
949
+
950
+ except Exception as e:
951
+ logger.warning(f"Error importing quality: {str(e)}")
952
+ return {}
953
+
954
+ return quality_map
955
+
956
+
957
+ def parse_threshold_values(threshold_operator: str, threshold_value: str) -> Dict[str, Any]:
958
+ """
959
+ Parse threshold operator and value into DataQuality threshold fields
960
+
961
+ Args:
962
+ threshold_operator: The threshold operator (e.g., "mustBe", "mustBeBetween")
963
+ threshold_value: The threshold value (string representation)
964
+
965
+ Returns:
966
+ Dictionary with appropriate threshold fields set
967
+ """
968
+ threshold_dict = {}
969
+
970
+ if not threshold_operator or not threshold_value:
971
+ return threshold_dict
972
+
973
+ # Parse threshold values based on operator
974
+ if threshold_operator in ["mustBeBetween", "mustNotBeBetween"]:
975
+ # Parse "[value1, value2]" format
976
+ if threshold_value.startswith("[") and threshold_value.endswith("]"):
977
+ content = threshold_value[1:-1] # Remove brackets
978
+ try:
979
+ values = [Decimal(v.strip()) for v in content.split(",") if v.strip()]
980
+ if len(values) >= 2:
981
+ threshold_dict[threshold_operator] = values[:2] # Take first two values
982
+ except (ValueError, TypeError) as e:
983
+ logger.warning(f"Failed to parse between values: {threshold_value}, error: {e}")
984
+ else:
985
+ # Single value for other operators
986
+ try:
987
+ # Try to parse as number
988
+ if threshold_value.replace(".", "").replace("-", "").isdigit():
989
+ value = Decimal(threshold_value)
990
+ threshold_dict[threshold_operator] = value
991
+ except (ValueError, TypeError) as e:
992
+ logger.warning(f"Failed to parse threshold value: {threshold_value}, error: {e}")
993
+
994
+ return threshold_dict
995
+
996
+
997
+ def attach_quality_to_schemas(
998
+ schemas: Optional[List[SchemaObject]], quality_map: Dict[str, List[DataQuality]]
999
+ ) -> Optional[List[SchemaObject]]:
1000
+ """
1001
+ Attach quality attributes to schemas and their properties based on quality_map
1002
+
1003
+ Args:
1004
+ schemas: List of schema objects
1005
+ quality_map: Dictionary mapping schema.property keys to quality lists
1006
+
1007
+ Returns:
1008
+ List of schema objects with quality attached
1009
+ """
1010
+ if not schemas:
1011
+ return None
1012
+
1013
+ updated_schemas = []
1014
+
1015
+ for schema in schemas:
1016
+ schema_name = schema.name
1017
+ if not schema_name:
1018
+ updated_schemas.append(schema)
1019
+ continue
1020
+
1021
+ # Get schema-level quality attributes
1022
+ schema_quality = quality_map.get(schema_name)
1023
+ if schema_quality:
1024
+ schema.quality = schema_quality
1025
+
1026
+ # Attach quality to properties
1027
+ if schema.properties:
1028
+ schema.properties = attach_quality_to_properties(schema.properties, schema_name, quality_map)
1029
+
1030
+ updated_schemas.append(schema)
1031
+
1032
+ return updated_schemas
1033
+
1034
+
1035
+ def attach_quality_to_properties(
1036
+ properties: List[SchemaProperty], schema_name: str, quality_map: Dict[str, List[DataQuality]], prefix: str = ""
1037
+ ) -> List[SchemaProperty]:
1038
+ """
1039
+ Recursively attach quality attributes to properties and nested properties
1040
+
1041
+ Args:
1042
+ properties: List of property objects
1043
+ schema_name: Name of the parent schema
1044
+ quality_map: Dictionary mapping schema.property keys to quality lists
1045
+ prefix: Current property path prefix for nested properties
1046
+
1047
+ Returns:
1048
+ List of property objects with quality attached
1049
+ """
1050
+ updated_properties = []
1051
+
1052
+ for prop in properties:
1053
+ property_name = prop.name
1054
+ if not property_name:
1055
+ updated_properties.append(prop)
1056
+ continue
1057
+
1058
+ # Build full property path
1059
+ full_property_name = f"{prefix}.{property_name}" if prefix else property_name
1060
+ quality_key = f"{schema_name}.{full_property_name}"
1061
+
1062
+ # Get quality for this property
1063
+ property_quality = quality_map.get(quality_key)
1064
+ if property_quality:
1065
+ prop.quality = property_quality
1066
+
1067
+ # Handle nested properties
1068
+ if prop.properties:
1069
+ prop.properties = attach_quality_to_properties(
1070
+ prop.properties, schema_name, quality_map, full_property_name
1071
+ )
1072
+
1073
+ # Handle array items
1074
+ if prop.items:
1075
+ items_quality_key = f"{schema_name}.{full_property_name}.items"
1076
+ items_quality = quality_map.get(items_quality_key)
1077
+ if items_quality:
1078
+ prop.items.quality = items_quality
1079
+
1080
+ # Handle nested properties in array items
1081
+ if prop.items.properties:
1082
+ prop.items.properties = attach_quality_to_properties(
1083
+ prop.items.properties, schema_name, quality_map, f"{full_property_name}.items"
1084
+ )
1085
+
1086
+ updated_properties.append(prop)
1087
+
1088
+ return updated_properties
1089
+
1090
+
1091
+ def get_headers_from_header_row(sheet: Worksheet, header_row_index: int) -> Dict[str, int]:
1092
+ """
1093
+ Get headers from the first row and map them to column indices
1094
+
1095
+ Args:
1096
+ sheet: The worksheet
1097
+ header_row_index: 0-based row index of the header row
1098
+
1099
+ Returns:
1100
+ Dictionary mapping header names (lowercase) to column indices
1101
+ """
1102
+ headers = {}
1103
+ try:
1104
+ header_row = list(sheet.rows)[header_row_index]
1105
+ for i, cell in enumerate(header_row):
1106
+ if cell.value:
1107
+ headers[str(cell.value).lower().strip()] = i
1108
+ except (IndexError, AttributeError) as e:
1109
+ logger.warning(f"Error getting headers from row {header_row_index}: {e}")
1110
+
1111
+ return headers