datacontract-cli 0.10.23__py3-none-any.whl → 0.10.40__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. datacontract/__init__.py +13 -0
  2. datacontract/api.py +12 -5
  3. datacontract/catalog/catalog.py +5 -3
  4. datacontract/cli.py +119 -13
  5. datacontract/data_contract.py +145 -67
  6. datacontract/engines/data_contract_checks.py +366 -60
  7. datacontract/engines/data_contract_test.py +50 -4
  8. datacontract/engines/fastjsonschema/check_jsonschema.py +37 -19
  9. datacontract/engines/fastjsonschema/s3/s3_read_files.py +3 -2
  10. datacontract/engines/soda/check_soda_execute.py +27 -3
  11. datacontract/engines/soda/connections/athena.py +79 -0
  12. datacontract/engines/soda/connections/duckdb_connection.py +65 -6
  13. datacontract/engines/soda/connections/kafka.py +4 -2
  14. datacontract/engines/soda/connections/oracle.py +50 -0
  15. datacontract/export/avro_converter.py +20 -3
  16. datacontract/export/bigquery_converter.py +1 -1
  17. datacontract/export/dbt_converter.py +36 -7
  18. datacontract/export/dqx_converter.py +126 -0
  19. datacontract/export/duckdb_type_converter.py +57 -0
  20. datacontract/export/excel_exporter.py +923 -0
  21. datacontract/export/exporter.py +3 -0
  22. datacontract/export/exporter_factory.py +17 -1
  23. datacontract/export/great_expectations_converter.py +55 -5
  24. datacontract/export/{html_export.py → html_exporter.py} +31 -20
  25. datacontract/export/markdown_converter.py +134 -5
  26. datacontract/export/mermaid_exporter.py +110 -0
  27. datacontract/export/odcs_v3_exporter.py +193 -149
  28. datacontract/export/protobuf_converter.py +163 -69
  29. datacontract/export/rdf_converter.py +2 -2
  30. datacontract/export/sodacl_converter.py +9 -1
  31. datacontract/export/spark_converter.py +31 -4
  32. datacontract/export/sql_converter.py +6 -2
  33. datacontract/export/sql_type_converter.py +124 -8
  34. datacontract/imports/avro_importer.py +63 -12
  35. datacontract/imports/csv_importer.py +111 -57
  36. datacontract/imports/excel_importer.py +1112 -0
  37. datacontract/imports/importer.py +16 -3
  38. datacontract/imports/importer_factory.py +17 -0
  39. datacontract/imports/json_importer.py +325 -0
  40. datacontract/imports/odcs_importer.py +2 -2
  41. datacontract/imports/odcs_v3_importer.py +367 -151
  42. datacontract/imports/protobuf_importer.py +264 -0
  43. datacontract/imports/spark_importer.py +117 -13
  44. datacontract/imports/sql_importer.py +32 -16
  45. datacontract/imports/unity_importer.py +84 -38
  46. datacontract/init/init_template.py +1 -1
  47. datacontract/integration/entropy_data.py +126 -0
  48. datacontract/lint/resolve.py +112 -23
  49. datacontract/lint/schema.py +24 -15
  50. datacontract/lint/urls.py +17 -3
  51. datacontract/model/data_contract_specification/__init__.py +1 -0
  52. datacontract/model/odcs.py +13 -0
  53. datacontract/model/run.py +3 -0
  54. datacontract/output/junit_test_results.py +3 -3
  55. datacontract/schemas/datacontract-1.1.0.init.yaml +1 -1
  56. datacontract/schemas/datacontract-1.2.0.init.yaml +91 -0
  57. datacontract/schemas/datacontract-1.2.0.schema.json +2029 -0
  58. datacontract/schemas/datacontract-1.2.1.init.yaml +91 -0
  59. datacontract/schemas/datacontract-1.2.1.schema.json +2058 -0
  60. datacontract/schemas/odcs-3.0.2.schema.json +2382 -0
  61. datacontract/schemas/odcs-3.1.0.schema.json +2809 -0
  62. datacontract/templates/datacontract.html +54 -3
  63. datacontract/templates/datacontract_odcs.html +685 -0
  64. datacontract/templates/index.html +5 -2
  65. datacontract/templates/partials/server.html +2 -0
  66. datacontract/templates/style/output.css +319 -145
  67. {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.40.dist-info}/METADATA +711 -433
  68. datacontract_cli-0.10.40.dist-info/RECORD +121 -0
  69. {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.40.dist-info}/WHEEL +1 -1
  70. {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.40.dist-info/licenses}/LICENSE +1 -1
  71. datacontract/export/csv_type_converter.py +0 -36
  72. datacontract/integration/datamesh_manager.py +0 -72
  73. datacontract/lint/lint.py +0 -142
  74. datacontract/lint/linters/description_linter.py +0 -35
  75. datacontract/lint/linters/field_pattern_linter.py +0 -34
  76. datacontract/lint/linters/field_reference_linter.py +0 -48
  77. datacontract/lint/linters/notice_period_linter.py +0 -55
  78. datacontract/lint/linters/quality_schema_linter.py +0 -52
  79. datacontract/lint/linters/valid_constraints_linter.py +0 -100
  80. datacontract/model/data_contract_specification.py +0 -327
  81. datacontract_cli-0.10.23.dist-info/RECORD +0 -113
  82. /datacontract/{lint/linters → output}/__init__.py +0 -0
  83. {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.40.dist-info}/entry_points.txt +0 -0
  84. {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.40.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1112 @@
1
+ import logging
2
+ import os
3
+ from decimal import Decimal
4
+ from typing import Any, Dict, List, Optional
5
+
6
+ import openpyxl
7
+ from open_data_contract_standard.model import (
8
+ AuthoritativeDefinition,
9
+ CustomProperty,
10
+ DataQuality,
11
+ OpenDataContractStandard,
12
+ Role,
13
+ SchemaObject,
14
+ SchemaProperty,
15
+ Server,
16
+ ServiceLevelAgreementProperty,
17
+ Support,
18
+ Team,
19
+ TeamMember,
20
+ )
21
+ from openpyxl.cell.cell import Cell
22
+ from openpyxl.workbook.workbook import Workbook
23
+ from openpyxl.worksheet.worksheet import Worksheet
24
+
25
+ from datacontract.imports.importer import Importer
26
+ from datacontract.model.data_contract_specification import (
27
+ DataContractSpecification,
28
+ )
29
+ from datacontract.model.exceptions import DataContractException
30
+
31
+ logger = logging.getLogger(__name__)
32
+
33
+
34
+ class ExcelImporter(Importer):
35
+ def import_source(
36
+ self,
37
+ data_contract_specification: DataContractSpecification | OpenDataContractStandard,
38
+ source: str,
39
+ import_args: dict,
40
+ ) -> DataContractSpecification | OpenDataContractStandard:
41
+ return import_excel_as_odcs(source)
42
+
43
+
44
+ def import_excel_as_odcs(excel_file_path: str) -> OpenDataContractStandard:
45
+ """
46
+ Import an Excel file and convert it to an OpenDataContractStandard object
47
+
48
+ Args:
49
+ excel_file_path: Path to the Excel file
50
+
51
+ Returns:
52
+ OpenDataContractStandard object
53
+ """
54
+ if not os.path.exists(excel_file_path):
55
+ raise FileNotFoundError(f"Excel file not found: {excel_file_path}")
56
+
57
+ try:
58
+ workbook = openpyxl.load_workbook(excel_file_path, data_only=True)
59
+ except Exception as e:
60
+ raise DataContractException(
61
+ type="schema",
62
+ name="Parse excel contract",
63
+ reason=f"Failed to open Excel file: {excel_file_path}",
64
+ engine="datacontract",
65
+ original_exception=e,
66
+ )
67
+
68
+ try:
69
+ # Get description values
70
+ purpose = get_cell_value_by_name(workbook, "description.purpose")
71
+ limitations = get_cell_value_by_name(workbook, "description.limitations")
72
+ usage = get_cell_value_by_name(workbook, "description.usage")
73
+
74
+ # Build description dict
75
+ description = None
76
+ if purpose or limitations or usage:
77
+ description = {"purpose": purpose, "limitations": limitations, "usage": usage}
78
+
79
+ # Get tags as a list
80
+ tags_str = get_cell_value_by_name(workbook, "tags")
81
+ tags = None
82
+ if tags_str:
83
+ tags = [tag.strip() for tag in tags_str.split(",") if tag.strip()]
84
+
85
+ # Import quality data first (standalone from schemas)
86
+ quality_map = import_quality(workbook)
87
+
88
+ # Import schemas
89
+ schemas = import_schemas(workbook)
90
+
91
+ # Attach quality to schemas and properties
92
+ schemas_with_quality = attach_quality_to_schemas(schemas, quality_map)
93
+
94
+ # Import other components
95
+ support = import_support(workbook)
96
+ team = import_team(workbook)
97
+ roles = import_roles(workbook)
98
+ sla_properties = import_sla_properties(workbook)
99
+ servers = import_servers(workbook)
100
+ price = import_price(workbook)
101
+ custom_properties = import_custom_properties(workbook)
102
+
103
+ # Create the ODCS object with proper object creation
104
+ odcs = OpenDataContractStandard(
105
+ apiVersion=get_cell_value_by_name(workbook, "apiVersion"),
106
+ kind=get_cell_value_by_name(workbook, "kind"),
107
+ id=get_cell_value_by_name(workbook, "id"),
108
+ name=get_cell_value_by_name(workbook, "name"),
109
+ version=get_cell_value_by_name(workbook, "version"),
110
+ status=get_cell_value_by_name(workbook, "status"),
111
+ domain=get_cell_value_by_name(workbook, "domain"),
112
+ dataProduct=get_cell_value_by_name(workbook, "dataProduct"),
113
+ tenant=get_cell_value_by_name(workbook, "tenant"),
114
+ description=description,
115
+ tags=tags,
116
+ schema=schemas_with_quality,
117
+ support=support,
118
+ price=price,
119
+ team=team,
120
+ roles=roles,
121
+ slaDefaultElement=get_cell_value_by_name(workbook, "slaDefaultElement"),
122
+ slaProperties=sla_properties,
123
+ servers=servers,
124
+ customProperties=custom_properties,
125
+ )
126
+
127
+ return odcs
128
+ except Exception as e:
129
+ logger.error(f"Error importing Excel file: {str(e)}")
130
+ raise DataContractException(
131
+ type="schema",
132
+ name="Parse excel contract",
133
+ reason=f"Failed to parse Excel file: {excel_file_path}",
134
+ engine="datacontract",
135
+ original_exception=e,
136
+ )
137
+ finally:
138
+ workbook.close()
139
+
140
+
141
+ def import_schemas(workbook) -> Optional[List[SchemaObject]]:
142
+ """Extract schema information from sheets starting with 'Schema '"""
143
+ schemas = []
144
+
145
+ for sheet_name in workbook.sheetnames:
146
+ if sheet_name.startswith("Schema ") and sheet_name != "Schema <table_name>":
147
+ sheet = workbook[sheet_name]
148
+ schema_name = get_cell_value_by_name_in_sheet(sheet, "schema.name")
149
+
150
+ if not schema_name:
151
+ continue
152
+
153
+ schema = SchemaObject(
154
+ name=schema_name,
155
+ logicalType="object",
156
+ physicalType=get_cell_value_by_name_in_sheet(sheet, "schema.physicalType"),
157
+ physicalName=get_cell_value_by_name_in_sheet(sheet, "schema.physicalName"),
158
+ description=get_cell_value_by_name_in_sheet(sheet, "schema.description"),
159
+ businessName=get_cell_value_by_name_in_sheet(sheet, "schema.businessName"),
160
+ dataGranularityDescription=get_cell_value_by_name_in_sheet(sheet, "schema.dataGranularityDescription"),
161
+ authoritativeDefinitions=None,
162
+ properties=import_properties(sheet),
163
+ quality=None, # Quality will be attached later
164
+ customProperties=None,
165
+ tags=None,
166
+ )
167
+
168
+ # Get tags
169
+ tags_str = get_cell_value_by_name_in_sheet(sheet, "schema.tags")
170
+ if tags_str:
171
+ schema.tags = [tag.strip() for tag in tags_str.split(",") if tag.strip()]
172
+
173
+ schemas.append(schema)
174
+
175
+ return schemas if schemas else None
176
+
177
+
178
+ def import_properties(sheet) -> Optional[List[SchemaProperty]]:
179
+ """Extract properties from the schema sheet"""
180
+ try:
181
+ # Find the properties table
182
+ properties_range = get_range_by_name_in_sheet(sheet, "schema.properties")
183
+ if not properties_range:
184
+ return None
185
+
186
+ # Get header row to map column names to indices
187
+ header_row = list(sheet.rows)[properties_range[0] - 1] # Convert to 0-based indexing
188
+ headers = {}
189
+ for i, cell in enumerate(header_row):
190
+ if cell.value:
191
+ headers[cell.value.lower()] = i
192
+
193
+ # Process property rows
194
+ property_lookup = {} # Dictionary to keep track of properties by name for nesting
195
+
196
+ # First, create all properties
197
+ for row_idx in range(properties_range[0], properties_range[1]):
198
+ if len(list(sheet.rows)) < row_idx + 1:
199
+ break
200
+ row = list(sheet.rows)[row_idx]
201
+
202
+ # Skip empty rows or header row
203
+ property_name = get_cell_value(row, headers.get("property"))
204
+ if not property_name or row_idx == properties_range[0] - 1:
205
+ continue
206
+
207
+ # Create property object
208
+ property_obj = SchemaProperty(
209
+ name=property_name,
210
+ logicalType=get_cell_value(row, headers.get("logical type")),
211
+ logicalTypeOptions=import_logical_type_options(row, headers),
212
+ physicalType=get_cell_value(row, headers.get("physical type")),
213
+ physicalName=get_cell_value(row, headers.get("physical name")),
214
+ description=get_cell_value(row, headers.get("description")),
215
+ businessName=get_cell_value(row, headers.get("business name")),
216
+ required=parse_boolean(get_cell_value(row, headers.get("required"))),
217
+ unique=parse_boolean(get_cell_value(row, headers.get("unique"))),
218
+ primaryKey=parse_boolean(get_cell_value(row, headers.get("primary key"))),
219
+ primaryKeyPosition=parse_integer(get_cell_value(row, headers.get("primary key position"))),
220
+ partitioned=parse_boolean(get_cell_value(row, headers.get("partitioned"))),
221
+ partitionKeyPosition=parse_integer(get_cell_value(row, headers.get("partition key position"))),
222
+ criticalDataElement=parse_boolean(get_cell_value(row, headers.get("critical data element status"))),
223
+ classification=get_cell_value(row, headers.get("classification")),
224
+ transformLogic=get_cell_value(row, headers.get("transform logic")),
225
+ transformDescription=get_cell_value(row, headers.get("transform description")),
226
+ encryptedName=get_cell_value(row, headers.get("encrypted name")),
227
+ properties=None,
228
+ items=None,
229
+ tags=get_property_tags(headers, row),
230
+ )
231
+
232
+ # Authoritative definitions
233
+ authoritative_definition_url = get_cell_value(row, headers.get("authoritative definition url"))
234
+ authoritative_definition_type = get_cell_value(row, headers.get("authoritative definition type"))
235
+ if authoritative_definition_url and authoritative_definition_type:
236
+ property_obj.authoritativeDefinitions = [
237
+ AuthoritativeDefinition(
238
+ url=authoritative_definition_url,
239
+ type=authoritative_definition_type,
240
+ )
241
+ ]
242
+
243
+ # Quality will be attached later via quality_map
244
+ property_obj.quality = None
245
+
246
+ # Transform sources
247
+ transform_sources = get_cell_value(row, headers.get("transform sources"))
248
+ if transform_sources:
249
+ property_obj.transformSourceObjects = [
250
+ src.strip() for src in transform_sources.split(",") if src.strip()
251
+ ]
252
+
253
+ # Examples
254
+ examples = get_cell_value(row, headers.get("example(s)"))
255
+ if examples:
256
+ property_obj.examples = [ex.strip() for ex in examples.split(",") if ex.strip()]
257
+
258
+ # Add to lookup dictionary
259
+ property_lookup[property_name] = property_obj
260
+
261
+ # Now organize nested properties
262
+ root_properties = []
263
+ for name, prop in property_lookup.items():
264
+ if "." in name:
265
+ # This is a nested property
266
+ parent_name = name.rsplit(".", 1)[0]
267
+ child_name = name.rsplit(".", 1)[1]
268
+
269
+ if parent_name in property_lookup:
270
+ parent_prop = property_lookup[parent_name]
271
+ # Update the property name to be just the child part
272
+ prop.name = child_name
273
+
274
+ # If parent is an array, set as items
275
+ if parent_prop.logicalType == "array":
276
+ parent_prop.items = prop
277
+ else:
278
+ # Otherwise add to properties list
279
+ if parent_prop.properties is None:
280
+ parent_prop.properties = []
281
+ parent_prop.properties.append(prop)
282
+ else:
283
+ # This is a root property
284
+ root_properties.append(prop)
285
+
286
+ return root_properties if root_properties else None
287
+ except Exception as e:
288
+ logger.warning(f"Error importing properties: {str(e)}")
289
+ return None
290
+
291
+
292
+ def import_logical_type_options(row, headers):
293
+ """Import logical type options from property row"""
294
+
295
+ required_props = get_cell_value(row, headers.get("required properties"))
296
+
297
+ required_props_list = None
298
+ if required_props:
299
+ required_props_list = [prop.strip() for prop in required_props.split(",") if prop.strip()]
300
+
301
+ logical_type_options_dict = {
302
+ "minLength": parse_integer(get_cell_value(row, headers.get("minimum length"))),
303
+ "maxLength": parse_integer(get_cell_value(row, headers.get("maximum length"))),
304
+ "pattern": get_cell_value(row, headers.get("pattern")),
305
+ "format": get_cell_value(row, headers.get("format")),
306
+ "exclusiveMaximum": parse_boolean(get_cell_value(row, headers.get("exclusive maximum"))),
307
+ "exclusiveMinimum": parse_boolean(get_cell_value(row, headers.get("exclusive minimum"))),
308
+ "minimum": get_cell_value(row, headers.get("minimum")),
309
+ "maximum": get_cell_value(row, headers.get("maximum")),
310
+ "multipleOf": get_cell_value(row, headers.get("multiple of")),
311
+ "minItems": parse_integer(get_cell_value(row, headers.get("minimum items"))),
312
+ "maxItems": parse_integer(get_cell_value(row, headers.get("maximum items"))),
313
+ "uniqueItems": parse_boolean(get_cell_value(row, headers.get("unique items"))),
314
+ "maxProperties": parse_integer(get_cell_value(row, headers.get("maximum properties"))),
315
+ "minProperties": parse_integer(get_cell_value(row, headers.get("minimum properties"))),
316
+ "required": required_props_list,
317
+ }
318
+
319
+ for dict_key in list(logical_type_options_dict.keys()):
320
+ if logical_type_options_dict[dict_key] is None:
321
+ del logical_type_options_dict[dict_key]
322
+
323
+ if len(logical_type_options_dict) == 0:
324
+ return None
325
+ return logical_type_options_dict
326
+
327
+
328
+ def get_property_tags(headers, row):
329
+ tags_value = get_cell_value(row, headers.get("tags"))
330
+ if tags_value:
331
+ return [tag.strip() for tag in tags_value.split(",") if tag.strip()]
332
+ return None
333
+
334
+
335
+ def parse_boolean(value):
336
+ """Parse a string value to boolean"""
337
+ if value is None:
338
+ return None
339
+ value = value.lower().strip()
340
+ return value == "true" or value == "yes" or value == "1"
341
+
342
+
343
+ def parse_integer(value):
344
+ """Parse a string value to integer"""
345
+ if value is None:
346
+ return None
347
+ try:
348
+ return int(value)
349
+ except (ValueError, TypeError):
350
+ return None
351
+
352
+
353
+ def get_range_by_name_in_workbook(workbook: Workbook, name: str) -> tuple | None:
354
+ """Find the range (start_row, end_row) of a named range in a workbook"""
355
+ try:
356
+ for named_range in workbook.defined_names:
357
+ if named_range == name:
358
+ destinations = workbook.defined_names[named_range].destinations
359
+ for sheet_title, range_address in destinations:
360
+ if ":" in range_address:
361
+ # Convert Excel range to row numbers
362
+ start_ref, end_ref = range_address.split(":")
363
+ start_row = int("".join(filter(str.isdigit, start_ref)))
364
+ end_row = int("".join(filter(str.isdigit, end_ref)))
365
+ return start_row, end_row
366
+ else:
367
+ # Single cell
368
+ row = int("".join(filter(str.isdigit, range_address)))
369
+ return row, row
370
+ except Exception as e:
371
+ logger.warning(f"Error finding range by name {name}: {str(e)}")
372
+ return None
373
+
374
+
375
+ def get_range_by_name_in_sheet(sheet: Worksheet, name: str) -> tuple | None:
376
+ """Find the range (start_row, end_row) of a named range in a sheet"""
377
+ try:
378
+ for named_range in sheet.defined_names:
379
+ if named_range == name:
380
+ destinations = sheet.defined_names[named_range].destinations
381
+ for sheet_title, range_address in destinations:
382
+ if sheet_title == sheet.title:
383
+ # For named ranges that refer to entire rows or multiple rows
384
+ if ":" in range_address:
385
+ # Convert Excel range to row numbers
386
+ start_ref, end_ref = range_address.split(":")
387
+ start_row = int("".join(filter(str.isdigit, start_ref)))
388
+ end_row = int("".join(filter(str.isdigit, end_ref)))
389
+ return (start_row, end_row)
390
+ else:
391
+ # Single cell
392
+ row = int("".join(filter(str.isdigit, range_address)))
393
+ return (row, row)
394
+ except Exception as e:
395
+ logger.warning(f"Error finding range by name {name}: {str(e)}")
396
+ return None
397
+
398
+
399
+ def get_cell_by_name_in_workbook(workbook: Workbook, name: str) -> Cell | None:
400
+ """Find a cell by name within a workbook"""
401
+ try:
402
+ for named_range in workbook.defined_names:
403
+ if named_range == name:
404
+ destinations = workbook.defined_names[named_range].destinations
405
+ for sheet_title, coordinate in destinations:
406
+ sheet = workbook[sheet_title]
407
+ if sheet_title == sheet.title:
408
+ return sheet[coordinate]
409
+ except Exception as e:
410
+ logger.warning(f"Error finding cell by name {name}: {str(e)}")
411
+ return None
412
+
413
+
414
+ def get_cell_value_by_name(workbook: Workbook, name: str) -> str | None:
415
+ """Get the value of a named cell"""
416
+ try:
417
+ cell = get_cell_by_name_in_workbook(workbook, name)
418
+ if cell.value is not None:
419
+ value = str(cell.value).strip()
420
+ return value if value else None
421
+ except Exception as e:
422
+ logger.warning(f"Error getting cell value by name {name}: {str(e)}")
423
+ return None
424
+
425
+
426
+ def get_cell_value_by_name_in_sheet(sheet: Worksheet, name: str) -> str | None:
427
+ """Get the value of a named cell within a specific sheet"""
428
+ try:
429
+ for named_range in sheet.defined_names:
430
+ if named_range == name:
431
+ destinations = sheet.defined_names[named_range].destinations
432
+ for sheet_title, coordinate in destinations:
433
+ if sheet_title == sheet.title:
434
+ cell = sheet[coordinate]
435
+ if cell.value is not None:
436
+ value = str(cell.value).strip()
437
+ return value if value else None
438
+ except Exception as e:
439
+ logger.warning(f"Error getting cell value by name {name} in sheet {sheet.title}: {str(e)}")
440
+ return None
441
+
442
+
443
+ def get_cell_value(row, col_idx):
444
+ """Safely get cell value from a row by column index"""
445
+ if col_idx is None:
446
+ return None
447
+ try:
448
+ cell = row[col_idx]
449
+ if cell.value is not None:
450
+ value = str(cell.value).strip()
451
+ return value if value else None
452
+ return None
453
+ except (IndexError, AttributeError):
454
+ return None
455
+
456
+
457
+ def get_cell_value_by_position(sheet, row_idx, col_idx):
458
+ """Get cell value by row and column indices (0-based)"""
459
+ try:
460
+ cell = sheet.cell(row=row_idx + 1, column=col_idx + 1) # Convert to 1-based indices
461
+ if cell.value is not None:
462
+ value = str(cell.value).strip()
463
+ return value if value else None
464
+ return None
465
+ except Exception as e:
466
+ logger.warning(f"Error getting cell value by position ({row_idx}, {col_idx}): {str(e)}")
467
+ return None
468
+
469
+
470
+ def import_support(workbook: Workbook) -> Optional[List[Support]]:
471
+ """Extract support information from the Support sheet"""
472
+ try:
473
+ support_sheet = workbook["Support"]
474
+ if not support_sheet:
475
+ return None
476
+
477
+ support_range = get_range_by_name_in_workbook(workbook, "support")
478
+ if not support_range:
479
+ return None
480
+
481
+ header_row = list(support_sheet.rows)[support_range[0] - 1]
482
+ headers = {}
483
+ for i, cell in enumerate(header_row):
484
+ if cell.value:
485
+ headers[cell.value.lower()] = i
486
+
487
+ support_channels = []
488
+ for row_idx in range(support_range[0], support_range[1]):
489
+ if len(list(support_sheet.rows)) < row_idx + 1:
490
+ break
491
+ row = list(support_sheet.rows)[row_idx]
492
+
493
+ channel = get_cell_value(row, headers.get("channel"))
494
+ if not channel or row_idx == support_range[0] - 1:
495
+ continue
496
+
497
+ support_channel = Support(
498
+ channel=channel,
499
+ url=get_cell_value(row, headers.get("channel url")),
500
+ description=get_cell_value(row, headers.get("description")),
501
+ tool=get_cell_value(row, headers.get("tool")),
502
+ scope=get_cell_value(row, headers.get("scope")),
503
+ invitationUrl=get_cell_value(row, headers.get("invitation url")),
504
+ )
505
+
506
+ support_channels.append(support_channel)
507
+ except Exception as e:
508
+ logger.warning(f"Error importing support: {str(e)}")
509
+ return None
510
+
511
+ return support_channels if support_channels else None
512
+
513
+
514
+ def import_team(workbook: Workbook) -> Optional[List[Team]]:
515
+ """Extract team information from the Team sheet"""
516
+ try:
517
+ team_sheet = workbook["Team"]
518
+ if not team_sheet:
519
+ return None
520
+
521
+ team_range = get_range_by_name_in_workbook(workbook, "team")
522
+ if not team_range:
523
+ return None
524
+
525
+ header_row = list(team_sheet.rows)[team_range[0] - 1]
526
+ headers = {}
527
+ for i, cell in enumerate(header_row):
528
+ if cell.value:
529
+ headers[cell.value.lower()] = i
530
+
531
+ team_members = []
532
+ for row_idx in range(team_range[0], team_range[1]):
533
+ if len(list(team_sheet.rows)) < row_idx + 1:
534
+ break
535
+ row = list(team_sheet.rows)[row_idx]
536
+
537
+ username = get_cell_value(row, headers.get("username"))
538
+ name = get_cell_value(row, headers.get("name"))
539
+ role = get_cell_value(row, headers.get("role"))
540
+
541
+ if (not (username or name or role)) or row_idx == team_range[0] - 1:
542
+ continue
543
+
544
+ team_member = TeamMember(
545
+ username=username,
546
+ name=name,
547
+ description=get_cell_value(row, headers.get("description")),
548
+ role=role,
549
+ dateIn=get_cell_value(row, headers.get("date in")),
550
+ dateOut=get_cell_value(row, headers.get("date out")),
551
+ replacedByUsername=get_cell_value(row, headers.get("replaced by username")),
552
+ )
553
+
554
+ team_members.append(team_member)
555
+ except Exception as e:
556
+ logger.warning(f"Error importing team: {str(e)}")
557
+ return None
558
+
559
+ return team_members if team_members else None
560
+
561
+
562
+ def import_roles(workbook: Workbook) -> Optional[List[Role]]:
563
+ """Extract roles information from the Roles sheet"""
564
+ try:
565
+ roles_sheet = workbook["Roles"]
566
+ if not roles_sheet:
567
+ return None
568
+
569
+ roles_range = get_range_by_name_in_sheet(roles_sheet, "roles")
570
+ if not roles_range:
571
+ return None
572
+
573
+ header_row = list(roles_sheet.rows)[roles_range[0] - 1]
574
+ headers = {}
575
+ for i, cell in enumerate(header_row):
576
+ if cell.value:
577
+ headers[cell.value.lower()] = i
578
+
579
+ roles_list = []
580
+ for row_idx in range(roles_range[0], roles_range[1]):
581
+ if len(list(roles_sheet.rows)) < row_idx + 1:
582
+ break
583
+ row = list(roles_sheet.rows)[row_idx]
584
+
585
+ role_name = get_cell_value(row, headers.get("role"))
586
+ if not role_name or row_idx == roles_range[0] - 1:
587
+ continue
588
+
589
+ role = Role(
590
+ role=role_name,
591
+ description=get_cell_value(row, headers.get("description")),
592
+ access=get_cell_value(row, headers.get("access")),
593
+ firstLevelApprovers=get_cell_value(row, headers.get("1st level approvers")),
594
+ secondLevelApprovers=get_cell_value(row, headers.get("2nd level approvers")),
595
+ customProperties=None,
596
+ )
597
+
598
+ roles_list.append(role)
599
+ except Exception as e:
600
+ logger.warning(f"Error importing roles: {str(e)}")
601
+ return None
602
+
603
+ return roles_list if roles_list else None
604
+
605
+
606
+ def import_sla_properties(workbook: Workbook) -> Optional[List[ServiceLevelAgreementProperty]]:
607
+ """Extract SLA properties from the SLA sheet"""
608
+ try:
609
+ sla_sheet = workbook["SLA"]
610
+ if not sla_sheet:
611
+ return None
612
+
613
+ sla_range = get_range_by_name_in_sheet(sla_sheet, "slaProperties")
614
+ if not sla_range:
615
+ return None
616
+
617
+ header_row = list(sla_sheet.rows)[sla_range[0] - 1]
618
+ headers = {}
619
+ for i, cell in enumerate(header_row):
620
+ if cell.value:
621
+ headers[cell.value.lower()] = i
622
+
623
+ sla_properties = []
624
+ for row_idx in range(sla_range[0], sla_range[1]):
625
+ if len(list(sla_sheet.rows)) < row_idx + 1:
626
+ break
627
+ row = list(sla_sheet.rows)[row_idx]
628
+
629
+ property_name = get_cell_value(row, headers.get("property"))
630
+ if not property_name or row_idx == sla_range[0] - 1:
631
+ continue
632
+
633
+ sla_property = ServiceLevelAgreementProperty(
634
+ property=property_name,
635
+ value=get_cell_value(row, headers.get("value")),
636
+ valueExt=get_cell_value(row, headers.get("extended value")),
637
+ unit=get_cell_value(row, headers.get("unit")),
638
+ element=get_cell_value(row, headers.get("element")),
639
+ driver=get_cell_value(row, headers.get("driver")),
640
+ )
641
+
642
+ sla_properties.append(sla_property)
643
+ except Exception as e:
644
+ logger.warning(f"Error importing SLA properties: {str(e)}")
645
+ return None
646
+
647
+ return sla_properties if sla_properties else None
648
+
649
+
650
+ def import_servers(workbook) -> Optional[List[Server]]:
651
+ """Extract server information from the Servers sheet"""
652
+ try:
653
+ sheet = workbook["Servers"]
654
+ if not sheet:
655
+ return None
656
+
657
+ # Find the server cells
658
+ server_cell = get_cell_by_name_in_workbook(workbook, "servers.server")
659
+ if not server_cell:
660
+ return None
661
+
662
+ # Get servers (horizontally arranged in the sheet)
663
+ servers = []
664
+ col_idx = server_cell.column - 1 # 0-based index
665
+ row_idx = server_cell.row - 1 # 0-based index
666
+
667
+ index = 0
668
+ while True:
669
+ server_name = get_cell_value_by_position(sheet, row_idx, col_idx + index)
670
+ if not server_name:
671
+ break
672
+
673
+ server = Server(
674
+ server=server_name,
675
+ description=get_server_cell_value(workbook, sheet, "servers.description", index),
676
+ environment=get_server_cell_value(workbook, sheet, "servers.environment", index),
677
+ type=get_server_cell_value(workbook, sheet, "servers.type", index),
678
+ )
679
+
680
+ # Get type-specific fields
681
+ server_type = server.type
682
+ if server_type:
683
+ if server_type == "azure":
684
+ server.location = get_server_cell_value(workbook, sheet, "servers.azure.location", index)
685
+ server.format = get_server_cell_value(workbook, sheet, "servers.azure.format", index)
686
+ server.delimiter = get_server_cell_value(workbook, sheet, "servers.azure.delimiter", index)
687
+ elif server_type == "bigquery":
688
+ server.project = get_server_cell_value(workbook, sheet, "servers.bigquery.project", index)
689
+ server.dataset = get_server_cell_value(workbook, sheet, "servers.bigquery.dataset", index)
690
+ elif server_type == "databricks":
691
+ server.catalog = get_server_cell_value(workbook, sheet, "servers.databricks.catalog", index)
692
+ server.host = get_server_cell_value(workbook, sheet, "servers.databricks.host", index)
693
+ server.schema_ = get_server_cell_value(workbook, sheet, "servers.databricks.schema", index)
694
+ elif server_type == "glue":
695
+ server.account = get_server_cell_value(workbook, sheet, "servers.glue.account", index)
696
+ server.database = get_server_cell_value(workbook, sheet, "servers.glue.database", index)
697
+ server.format = get_server_cell_value(workbook, sheet, "servers.glue.format", index)
698
+ server.location = get_server_cell_value(workbook, sheet, "servers.glue.location", index)
699
+ elif server_type == "kafka":
700
+ server.format = get_server_cell_value(workbook, sheet, "servers.kafka.format", index)
701
+ server.host = get_server_cell_value(workbook, sheet, "servers.kafka.host", index)
702
+ server.topic = get_server_cell_value(workbook, sheet, "servers.kafka.topic", index)
703
+ elif server_type == "postgres":
704
+ server.database = get_server_cell_value(workbook, sheet, "servers.postgres.database", index)
705
+ server.host = get_server_cell_value(workbook, sheet, "servers.postgres.host", index)
706
+ server.port = get_server_cell_value(workbook, sheet, "servers.postgres.port", index)
707
+ server.schema_ = get_server_cell_value(workbook, sheet, "servers.postgres.schema", index)
708
+ elif server_type == "s3":
709
+ server.delimiter = get_server_cell_value(workbook, sheet, "servers.s3.delimiter", index)
710
+ server.endpointUrl = get_server_cell_value(workbook, sheet, "servers.s3.endpointUrl", index)
711
+ server.format = get_server_cell_value(workbook, sheet, "servers.s3.format", index)
712
+ server.location = get_server_cell_value(workbook, sheet, "servers.s3.location", index)
713
+ elif server_type == "snowflake":
714
+ server.account = get_server_cell_value(workbook, sheet, "servers.snowflake.account", index)
715
+ server.database = get_server_cell_value(workbook, sheet, "servers.snowflake.database", index)
716
+ server.host = get_server_cell_value(workbook, sheet, "servers.snowflake.host", index)
717
+ server.port = get_server_cell_value(workbook, sheet, "servers.snowflake.port", index)
718
+ server.schema_ = get_server_cell_value(workbook, sheet, "servers.snowflake.schema", index)
719
+ server.warehouse = get_server_cell_value(workbook, sheet, "servers.snowflake.warehouse", index)
720
+ elif server_type == "sqlserver":
721
+ server.database = get_server_cell_value(workbook, sheet, "servers.sqlserver.database", index)
722
+ server.host = get_server_cell_value(workbook, sheet, "servers.sqlserver.host", index)
723
+ server.port = get_server_cell_value(workbook, sheet, "servers.sqlserver.port", index)
724
+ server.schema_ = get_server_cell_value(workbook, sheet, "servers.sqlserver.schema", index)
725
+ else:
726
+ # Custom server type - grab all possible fields
727
+ server.account = get_server_cell_value(workbook, sheet, "servers.custom.account", index)
728
+ server.catalog = get_server_cell_value(workbook, sheet, "servers.custom.catalog", index)
729
+ server.database = get_server_cell_value(workbook, sheet, "servers.custom.database", index)
730
+ server.dataset = get_server_cell_value(workbook, sheet, "servers.custom.dataset", index)
731
+ server.delimiter = get_server_cell_value(workbook, sheet, "servers.custom.delimiter", index)
732
+ server.endpointUrl = get_server_cell_value(workbook, sheet, "servers.custom.endpointUrl", index)
733
+ server.format = get_server_cell_value(workbook, sheet, "servers.custom.format", index)
734
+ server.host = get_server_cell_value(workbook, sheet, "servers.custom.host", index)
735
+ server.location = get_server_cell_value(workbook, sheet, "servers.custom.location", index)
736
+ server.path = get_server_cell_value(workbook, sheet, "servers.custom.path", index)
737
+ server.port = get_server_cell_value(workbook, sheet, "servers.custom.port", index)
738
+ server.project = get_server_cell_value(workbook, sheet, "servers.custom.project", index)
739
+ server.schema_ = get_server_cell_value(workbook, sheet, "servers.custom.schema", index)
740
+ server.stagingDir = get_server_cell_value(workbook, sheet, "servers.custom.stagingDir", index)
741
+ server.table = get_server_cell_value(workbook, sheet, "servers.custom.table", index)
742
+ server.view = get_server_cell_value(workbook, sheet, "servers.custom.view", index)
743
+ server.warehouse = get_server_cell_value(workbook, sheet, "servers.custom.warehouse", index)
744
+ server.region = get_server_cell_value(workbook, sheet, "servers.custom.region", index)
745
+ server.regionName = get_server_cell_value(workbook, sheet, "servers.custom.regionName", index)
746
+ server.serviceName = get_server_cell_value(workbook, sheet, "servers.custom.serviceName", index)
747
+
748
+ servers.append(server)
749
+ index += 1
750
+ except Exception as e:
751
+ logger.warning(f"Error importing servers: {str(e)}")
752
+ return None
753
+
754
+ return servers if servers else None
755
+
756
+
757
+ def get_server_cell_value(workbook: Workbook, sheet: Worksheet, name: str, col_offset: int):
758
+ """Get cell value for server properties (arranged horizontally)"""
759
+ try:
760
+ cell = get_cell_by_name_in_workbook(workbook, name)
761
+ if not cell:
762
+ return None
763
+
764
+ row = cell.row - 1 # 0-based
765
+ col = cell.column - 1 + col_offset # 0-based
766
+ return get_cell_value_by_position(sheet, row, col)
767
+ except Exception as e:
768
+ logger.warning(f"Error getting server cell value for {name}: {str(e)}")
769
+ return None
770
+
771
+
772
+ def import_price(workbook) -> Optional[Dict[str, Any]]:
773
+ """Extract price information"""
774
+ try:
775
+ price_amount = get_cell_value_by_name(workbook, "price.priceAmount")
776
+ price_currency = get_cell_value_by_name(workbook, "price.priceCurrency")
777
+ price_unit = get_cell_value_by_name(workbook, "price.priceUnit")
778
+
779
+ if not (price_amount or price_currency or price_unit):
780
+ return None
781
+
782
+ # Create a dictionary for price since the class doesn't seem to be directly available
783
+ return {
784
+ "priceAmount": price_amount,
785
+ "priceCurrency": price_currency,
786
+ "priceUnit": price_unit,
787
+ }
788
+ except Exception as e:
789
+ logger.warning(f"Error importing price: {str(e)}")
790
+ return None
791
+
792
+
793
+ def import_custom_properties(workbook: Workbook) -> List[CustomProperty]:
794
+ """Extract custom properties"""
795
+ custom_properties = []
796
+
797
+ owner = get_cell_value_by_name(workbook, "owner")
798
+
799
+ # Add owner as a custom property
800
+ if owner:
801
+ custom_properties.append(
802
+ CustomProperty(
803
+ property="owner",
804
+ value=owner,
805
+ )
806
+ )
807
+
808
+ try:
809
+ # Get other custom properties
810
+ custom_properties_sheet = workbook["Custom Properties"]
811
+ if custom_properties_sheet:
812
+ custom_properties_range = get_range_by_name_in_workbook(workbook, "CustomProperties")
813
+ if custom_properties_range:
814
+ # Skip header row
815
+ for row_idx in range(custom_properties_range[0], custom_properties_range[1]):
816
+ if row_idx == custom_properties_range[0] - 1:
817
+ continue
818
+
819
+ property_name = get_cell_value_by_position(custom_properties_sheet, row_idx, 0)
820
+ if not property_name or property_name == "owner":
821
+ continue
822
+
823
+ property_value = get_cell_value_by_position(custom_properties_sheet, row_idx, 1)
824
+ parsed_value = parse_property_value(property_value)
825
+
826
+ custom_properties.append(
827
+ CustomProperty(
828
+ property=property_name,
829
+ value=parsed_value,
830
+ )
831
+ )
832
+ except Exception as e:
833
+ logger.warning(f"Error importing custom properties: {str(e)}")
834
+
835
+ return custom_properties if custom_properties else None
836
+
837
+
838
+ def parse_property_value(value: str) -> Any:
839
+ """Parse a property value into the appropriate type based on Excel values"""
840
+ if value is None:
841
+ return None
842
+
843
+ # Try to convert to boolean (simple case)
844
+ if isinstance(value, str):
845
+ value_lower = value.lower().strip()
846
+ if value_lower == "true":
847
+ return True
848
+ if value_lower == "false":
849
+ return False
850
+
851
+ # Try numeric conversions
852
+ try:
853
+ # Check if it's an integer
854
+ if isinstance(value, str) and value.isdigit():
855
+ return int(value)
856
+
857
+ # Try float conversion
858
+ float_val = float(value)
859
+ # If it's a whole number, return as int
860
+ if float_val.is_integer():
861
+ return int(float_val)
862
+ return float_val
863
+ except (ValueError, TypeError, AttributeError):
864
+ # If conversion fails, return original string
865
+ return value
866
+
867
+
868
+ def import_quality(workbook: Workbook) -> Dict[str, List[DataQuality]]:
869
+ """
870
+ Import quality data from Quality sheet and organize by schema.property key
871
+
872
+ Returns:
873
+ Dictionary mapping schema.property keys to lists of DataQuality objects
874
+ """
875
+ try:
876
+ quality_sheet = workbook["Quality"]
877
+ if not quality_sheet:
878
+ return {}
879
+ except KeyError:
880
+ logger.warning("Quality sheet not found")
881
+ return {}
882
+
883
+ try:
884
+ quality_range = get_range_by_name_in_workbook(workbook, "quality")
885
+ if not quality_range:
886
+ logger.warning("Quality range not found")
887
+ return {}
888
+
889
+ quality_header_row_index = quality_range[0] - 1
890
+ headers = get_headers_from_header_row(quality_sheet, quality_header_row_index)
891
+
892
+ quality_map = {}
893
+
894
+ for row_idx in range(quality_range[0], quality_range[1]):
895
+ if len(list(quality_sheet.rows)) < row_idx + 1:
896
+ break
897
+ row = list(quality_sheet.rows)[row_idx]
898
+
899
+ # Extract quality fields from row
900
+ schema_name = get_cell_value(row, headers.get("schema"))
901
+ property_name = get_cell_value(row, headers.get("property"))
902
+ quality_type = get_cell_value(row, headers.get("quality type"))
903
+ description = get_cell_value(row, headers.get("description"))
904
+ rule = get_cell_value(row, headers.get("rule (library)"))
905
+ query = get_cell_value(row, headers.get("query (sql)"))
906
+ engine = get_cell_value(row, headers.get("quality engine (custom)"))
907
+ implementation = get_cell_value(row, headers.get("implementation (custom)"))
908
+ severity = get_cell_value(row, headers.get("severity"))
909
+ scheduler = get_cell_value(row, headers.get("scheduler"))
910
+ schedule = get_cell_value(row, headers.get("schedule"))
911
+ threshold_operator = get_cell_value(row, headers.get("threshold operator"))
912
+ threshold_value = get_cell_value(row, headers.get("threshold value"))
913
+
914
+ # Skip if no schema name or insufficient quality data
915
+ if not schema_name or (not quality_type and not description and not rule):
916
+ continue
917
+
918
+ # Parse threshold values based on operator
919
+ threshold_dict = parse_threshold_values(threshold_operator, threshold_value)
920
+
921
+ # Create DataQuality object with parsed thresholds
922
+ quality = DataQuality(
923
+ name=None,
924
+ description=description,
925
+ type=quality_type,
926
+ rule=rule,
927
+ unit=None,
928
+ validValues=None,
929
+ query=query,
930
+ engine=engine,
931
+ implementation=implementation,
932
+ dimension=None,
933
+ method=None,
934
+ severity=severity,
935
+ businessImpact=None,
936
+ customProperties=None,
937
+ authoritativeDefinitions=None,
938
+ tags=None,
939
+ scheduler=scheduler,
940
+ schedule=schedule,
941
+ **threshold_dict, # Unpack threshold values
942
+ )
943
+
944
+ # Create key for mapping - use schema.property format
945
+ key = schema_name if not property_name else f"{schema_name}.{property_name}"
946
+
947
+ if key not in quality_map:
948
+ quality_map[key] = []
949
+ quality_map[key].append(quality)
950
+
951
+ except Exception as e:
952
+ logger.warning(f"Error importing quality: {str(e)}")
953
+ return {}
954
+
955
+ return quality_map
956
+
957
+
958
+ def parse_threshold_values(threshold_operator: str, threshold_value: str) -> Dict[str, Any]:
959
+ """
960
+ Parse threshold operator and value into DataQuality threshold fields
961
+
962
+ Args:
963
+ threshold_operator: The threshold operator (e.g., "mustBe", "mustBeBetween")
964
+ threshold_value: The threshold value (string representation)
965
+
966
+ Returns:
967
+ Dictionary with appropriate threshold fields set
968
+ """
969
+ threshold_dict = {}
970
+
971
+ if not threshold_operator or not threshold_value:
972
+ return threshold_dict
973
+
974
+ # Parse threshold values based on operator
975
+ if threshold_operator in ["mustBeBetween", "mustNotBeBetween"]:
976
+ # Parse "[value1, value2]" format
977
+ if threshold_value.startswith("[") and threshold_value.endswith("]"):
978
+ content = threshold_value[1:-1] # Remove brackets
979
+ try:
980
+ values = [Decimal(v.strip()) for v in content.split(",") if v.strip()]
981
+ if len(values) >= 2:
982
+ threshold_dict[threshold_operator] = values[:2] # Take first two values
983
+ except (ValueError, TypeError) as e:
984
+ logger.warning(f"Failed to parse between values: {threshold_value}, error: {e}")
985
+ else:
986
+ # Single value for other operators
987
+ try:
988
+ # Try to parse as number
989
+ if threshold_value.replace(".", "").replace("-", "").isdigit():
990
+ value = Decimal(threshold_value)
991
+ threshold_dict[threshold_operator] = value
992
+ except (ValueError, TypeError) as e:
993
+ logger.warning(f"Failed to parse threshold value: {threshold_value}, error: {e}")
994
+
995
+ return threshold_dict
996
+
997
+
998
+ def attach_quality_to_schemas(
999
+ schemas: Optional[List[SchemaObject]], quality_map: Dict[str, List[DataQuality]]
1000
+ ) -> Optional[List[SchemaObject]]:
1001
+ """
1002
+ Attach quality attributes to schemas and their properties based on quality_map
1003
+
1004
+ Args:
1005
+ schemas: List of schema objects
1006
+ quality_map: Dictionary mapping schema.property keys to quality lists
1007
+
1008
+ Returns:
1009
+ List of schema objects with quality attached
1010
+ """
1011
+ if not schemas:
1012
+ return None
1013
+
1014
+ updated_schemas = []
1015
+
1016
+ for schema in schemas:
1017
+ schema_name = schema.name
1018
+ if not schema_name:
1019
+ updated_schemas.append(schema)
1020
+ continue
1021
+
1022
+ # Get schema-level quality attributes
1023
+ schema_quality = quality_map.get(schema_name)
1024
+ if schema_quality:
1025
+ schema.quality = schema_quality
1026
+
1027
+ # Attach quality to properties
1028
+ if schema.properties:
1029
+ schema.properties = attach_quality_to_properties(schema.properties, schema_name, quality_map)
1030
+
1031
+ updated_schemas.append(schema)
1032
+
1033
+ return updated_schemas
1034
+
1035
+
1036
+ def attach_quality_to_properties(
1037
+ properties: List[SchemaProperty], schema_name: str, quality_map: Dict[str, List[DataQuality]], prefix: str = ""
1038
+ ) -> List[SchemaProperty]:
1039
+ """
1040
+ Recursively attach quality attributes to properties and nested properties
1041
+
1042
+ Args:
1043
+ properties: List of property objects
1044
+ schema_name: Name of the parent schema
1045
+ quality_map: Dictionary mapping schema.property keys to quality lists
1046
+ prefix: Current property path prefix for nested properties
1047
+
1048
+ Returns:
1049
+ List of property objects with quality attached
1050
+ """
1051
+ updated_properties = []
1052
+
1053
+ for prop in properties:
1054
+ property_name = prop.name
1055
+ if not property_name:
1056
+ updated_properties.append(prop)
1057
+ continue
1058
+
1059
+ # Build full property path
1060
+ full_property_name = f"{prefix}.{property_name}" if prefix else property_name
1061
+ quality_key = f"{schema_name}.{full_property_name}"
1062
+
1063
+ # Get quality for this property
1064
+ property_quality = quality_map.get(quality_key)
1065
+ if property_quality:
1066
+ prop.quality = property_quality
1067
+
1068
+ # Handle nested properties
1069
+ if prop.properties:
1070
+ prop.properties = attach_quality_to_properties(
1071
+ prop.properties, schema_name, quality_map, full_property_name
1072
+ )
1073
+
1074
+ # Handle array items
1075
+ if prop.items:
1076
+ items_quality_key = f"{schema_name}.{full_property_name}.items"
1077
+ items_quality = quality_map.get(items_quality_key)
1078
+ if items_quality:
1079
+ prop.items.quality = items_quality
1080
+
1081
+ # Handle nested properties in array items
1082
+ if prop.items.properties:
1083
+ prop.items.properties = attach_quality_to_properties(
1084
+ prop.items.properties, schema_name, quality_map, f"{full_property_name}.items"
1085
+ )
1086
+
1087
+ updated_properties.append(prop)
1088
+
1089
+ return updated_properties
1090
+
1091
+
1092
+ def get_headers_from_header_row(sheet: Worksheet, header_row_index: int) -> Dict[str, int]:
1093
+ """
1094
+ Get headers from the first row and map them to column indices
1095
+
1096
+ Args:
1097
+ sheet: The worksheet
1098
+ header_row_index: 0-based row index of the header row
1099
+
1100
+ Returns:
1101
+ Dictionary mapping header names (lowercase) to column indices
1102
+ """
1103
+ headers = {}
1104
+ try:
1105
+ header_row = list(sheet.rows)[header_row_index]
1106
+ for i, cell in enumerate(header_row):
1107
+ if cell.value:
1108
+ headers[str(cell.value).lower().strip()] = i
1109
+ except (IndexError, AttributeError) as e:
1110
+ logger.warning(f"Error getting headers from row {header_row_index}: {e}")
1111
+
1112
+ return headers