datacontract-cli 0.10.23__py3-none-any.whl → 0.10.37__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. datacontract/__init__.py +13 -0
  2. datacontract/api.py +12 -5
  3. datacontract/catalog/catalog.py +5 -3
  4. datacontract/cli.py +116 -10
  5. datacontract/data_contract.py +143 -65
  6. datacontract/engines/data_contract_checks.py +366 -60
  7. datacontract/engines/data_contract_test.py +50 -4
  8. datacontract/engines/fastjsonschema/check_jsonschema.py +37 -19
  9. datacontract/engines/fastjsonschema/s3/s3_read_files.py +3 -2
  10. datacontract/engines/soda/check_soda_execute.py +22 -3
  11. datacontract/engines/soda/connections/athena.py +79 -0
  12. datacontract/engines/soda/connections/duckdb_connection.py +65 -6
  13. datacontract/engines/soda/connections/kafka.py +4 -2
  14. datacontract/export/avro_converter.py +20 -3
  15. datacontract/export/bigquery_converter.py +1 -1
  16. datacontract/export/dbt_converter.py +36 -7
  17. datacontract/export/dqx_converter.py +126 -0
  18. datacontract/export/duckdb_type_converter.py +57 -0
  19. datacontract/export/excel_exporter.py +923 -0
  20. datacontract/export/exporter.py +3 -0
  21. datacontract/export/exporter_factory.py +17 -1
  22. datacontract/export/great_expectations_converter.py +55 -5
  23. datacontract/export/{html_export.py → html_exporter.py} +31 -20
  24. datacontract/export/markdown_converter.py +134 -5
  25. datacontract/export/mermaid_exporter.py +110 -0
  26. datacontract/export/odcs_v3_exporter.py +187 -145
  27. datacontract/export/protobuf_converter.py +163 -69
  28. datacontract/export/rdf_converter.py +2 -2
  29. datacontract/export/sodacl_converter.py +9 -1
  30. datacontract/export/spark_converter.py +31 -4
  31. datacontract/export/sql_converter.py +6 -2
  32. datacontract/export/sql_type_converter.py +20 -8
  33. datacontract/imports/avro_importer.py +63 -12
  34. datacontract/imports/csv_importer.py +111 -57
  35. datacontract/imports/excel_importer.py +1111 -0
  36. datacontract/imports/importer.py +16 -3
  37. datacontract/imports/importer_factory.py +17 -0
  38. datacontract/imports/json_importer.py +325 -0
  39. datacontract/imports/odcs_importer.py +2 -2
  40. datacontract/imports/odcs_v3_importer.py +351 -151
  41. datacontract/imports/protobuf_importer.py +264 -0
  42. datacontract/imports/spark_importer.py +117 -13
  43. datacontract/imports/sql_importer.py +32 -16
  44. datacontract/imports/unity_importer.py +84 -38
  45. datacontract/init/init_template.py +1 -1
  46. datacontract/integration/datamesh_manager.py +16 -2
  47. datacontract/lint/resolve.py +112 -23
  48. datacontract/lint/schema.py +24 -15
  49. datacontract/model/data_contract_specification/__init__.py +1 -0
  50. datacontract/model/odcs.py +13 -0
  51. datacontract/model/run.py +3 -0
  52. datacontract/output/junit_test_results.py +3 -3
  53. datacontract/schemas/datacontract-1.1.0.init.yaml +1 -1
  54. datacontract/schemas/datacontract-1.2.0.init.yaml +91 -0
  55. datacontract/schemas/datacontract-1.2.0.schema.json +2029 -0
  56. datacontract/schemas/datacontract-1.2.1.init.yaml +91 -0
  57. datacontract/schemas/datacontract-1.2.1.schema.json +2058 -0
  58. datacontract/schemas/odcs-3.0.2.schema.json +2382 -0
  59. datacontract/templates/datacontract.html +54 -3
  60. datacontract/templates/datacontract_odcs.html +685 -0
  61. datacontract/templates/index.html +5 -2
  62. datacontract/templates/partials/server.html +2 -0
  63. datacontract/templates/style/output.css +319 -145
  64. {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.37.dist-info}/METADATA +656 -431
  65. datacontract_cli-0.10.37.dist-info/RECORD +119 -0
  66. {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.37.dist-info}/WHEEL +1 -1
  67. {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.37.dist-info/licenses}/LICENSE +1 -1
  68. datacontract/export/csv_type_converter.py +0 -36
  69. datacontract/lint/lint.py +0 -142
  70. datacontract/lint/linters/description_linter.py +0 -35
  71. datacontract/lint/linters/field_pattern_linter.py +0 -34
  72. datacontract/lint/linters/field_reference_linter.py +0 -48
  73. datacontract/lint/linters/notice_period_linter.py +0 -55
  74. datacontract/lint/linters/quality_schema_linter.py +0 -52
  75. datacontract/lint/linters/valid_constraints_linter.py +0 -100
  76. datacontract/model/data_contract_specification.py +0 -327
  77. datacontract_cli-0.10.23.dist-info/RECORD +0 -113
  78. /datacontract/{lint/linters → output}/__init__.py +0 -0
  79. {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.37.dist-info}/entry_points.txt +0 -0
  80. {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.37.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,923 @@
1
+ import io
2
+ import logging
3
+ from decimal import Decimal
4
+ from typing import Any, List, Optional
5
+
6
+ import openpyxl
7
+ import requests
8
+ from open_data_contract_standard.model import (
9
+ DataQuality,
10
+ OpenDataContractStandard,
11
+ SchemaObject,
12
+ SchemaProperty,
13
+ )
14
+ from openpyxl.cell.cell import Cell
15
+ from openpyxl.workbook.defined_name import DefinedName
16
+ from openpyxl.workbook.workbook import Workbook
17
+ from openpyxl.worksheet.worksheet import Worksheet
18
+
19
+ from datacontract.export.exporter import Exporter
20
+ from datacontract.model.data_contract_specification import DataContractSpecification
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+ ODCS_EXCEL_TEMPLATE_URL = (
25
+ "https://github.com/datacontract/open-data-contract-standard-excel-template/raw/refs/heads/main/odcs-template.xlsx"
26
+ )
27
+
28
+
29
+ class ExcelExporter(Exporter):
30
+ """Excel exporter that uses the official ODCS template"""
31
+
32
+ def __init__(self, export_format):
33
+ super().__init__(export_format)
34
+
35
+ def export(self, data_contract, model, server, sql_server_type, export_args) -> bytes:
36
+ """
37
+ Export data contract to Excel using the official ODCS template
38
+
39
+ Args:
40
+ data_contract: DataContractSpecification or OpenDataContractStandard to export
41
+ model: Model name (not used for Excel export)
42
+ server: Server name (not used for Excel export)
43
+ sql_server_type: SQL server type (not used for Excel export)
44
+ export_args: Additional export arguments (template can be specified here)
45
+
46
+ Returns:
47
+ Excel file as bytes
48
+ """
49
+ # Convert to ODCS if needed
50
+ if isinstance(data_contract, DataContractSpecification):
51
+ # First convert DCS to ODCS format via YAML
52
+ yaml_content = data_contract.to_yaml()
53
+ odcs = OpenDataContractStandard.from_string(yaml_content)
54
+ else:
55
+ odcs = data_contract
56
+
57
+ # Get template from export_args if provided, otherwise use default
58
+ template = export_args.get("template") if export_args else None
59
+ return export_to_excel_bytes(odcs, template)
60
+
61
+
62
+ def export_to_excel_bytes(odcs: OpenDataContractStandard, template_path: Optional[str] = None) -> bytes:
63
+ """
64
+ Export ODCS to Excel format using the official template and return as bytes
65
+
66
+ Args:
67
+ odcs: OpenDataContractStandard object to export
68
+ template_path: Optional path/URL to custom Excel template. If None, uses default template.
69
+
70
+ Returns:
71
+ Excel file as bytes
72
+ """
73
+ if template_path:
74
+ workbook = create_workbook_from_template(template_path)
75
+ else:
76
+ workbook = create_workbook_from_template(ODCS_EXCEL_TEMPLATE_URL)
77
+
78
+ try:
79
+ fill_fundamentals(workbook, odcs)
80
+ fill_schema(workbook, odcs)
81
+ fill_quality(workbook, odcs)
82
+ fill_custom_properties(workbook, odcs)
83
+ fill_support(workbook, odcs)
84
+ fill_team(workbook, odcs)
85
+ fill_roles(workbook, odcs)
86
+ fill_sla_properties(workbook, odcs)
87
+ fill_servers(workbook, odcs)
88
+ fill_pricing(workbook, odcs)
89
+
90
+ # Set focus on the Fundamentals sheet
91
+ workbook.active = workbook["Fundamentals"]
92
+
93
+ # Force formula recalculation
94
+ try:
95
+ workbook.calculation.calcMode = "auto"
96
+ except (AttributeError, ValueError):
97
+ # Fallback for older openpyxl versions or if calcMode doesn't exist
98
+ pass
99
+
100
+ # Write to output stream
101
+ output = io.BytesIO()
102
+ workbook.save(output)
103
+ output.seek(0)
104
+ return output.getvalue()
105
+ finally:
106
+ workbook.close()
107
+
108
+
109
+ def create_workbook_from_template(template_path: str) -> Workbook:
110
+ """Load Excel template from file path or URL"""
111
+ try:
112
+ # Convert Path object to string if needed
113
+ template_path_str = str(template_path)
114
+ logger.info(f"Processing template path: {template_path_str}")
115
+
116
+ # Check if it's a URL
117
+ if template_path_str.startswith(("http://", "https://")):
118
+ logger.info(f"Identified as URL, downloading from: {template_path_str}")
119
+ # Download from URL
120
+ response = requests.get(template_path_str, timeout=30)
121
+ response.raise_for_status()
122
+ template_bytes = response.content
123
+ workbook = openpyxl.load_workbook(io.BytesIO(template_bytes))
124
+ else:
125
+ logger.info(f"Identified as local file: {template_path_str}")
126
+ # Load from local file
127
+ workbook = openpyxl.load_workbook(template_path_str)
128
+
129
+ return workbook
130
+ except Exception as e:
131
+ logger.error(f"Failed to load Excel template from {template_path}: {e}")
132
+ raise RuntimeError(f"Failed to load Excel template: {e}")
133
+
134
+
135
+ def fill_fundamentals(workbook: Workbook, odcs: OpenDataContractStandard):
136
+ """Fill the Fundamentals sheet with basic contract information"""
137
+ set_cell_value_by_name(workbook, "apiVersion", odcs.apiVersion)
138
+ set_cell_value_by_name(workbook, "kind", odcs.kind)
139
+ set_cell_value_by_name(workbook, "id", odcs.id)
140
+ set_cell_value_by_name(workbook, "name", odcs.name)
141
+ set_cell_value_by_name(workbook, "version", odcs.version)
142
+ set_cell_value_by_name(workbook, "status", odcs.status)
143
+ set_cell_value_by_name(workbook, "domain", odcs.domain)
144
+ set_cell_value_by_name(workbook, "dataProduct", odcs.dataProduct)
145
+ set_cell_value_by_name(workbook, "tenant", odcs.tenant)
146
+
147
+ # Set owner from custom properties
148
+ owner_value = None
149
+ if odcs.customProperties:
150
+ for prop in odcs.customProperties:
151
+ if prop.property == "owner":
152
+ owner_value = prop.value
153
+ break
154
+ set_cell_value_by_name(workbook, "owner", owner_value)
155
+
156
+ set_cell_value_by_name(workbook, "slaDefaultElement", odcs.slaDefaultElement)
157
+
158
+ # Set description fields
159
+ if odcs.description:
160
+ set_cell_value_by_name(workbook, "description.purpose", odcs.description.purpose)
161
+ set_cell_value_by_name(workbook, "description.limitations", odcs.description.limitations)
162
+ set_cell_value_by_name(workbook, "description.usage", odcs.description.usage)
163
+
164
+ # Set tags as comma-separated string
165
+ if odcs.tags:
166
+ set_cell_value_by_name(workbook, "tags", ",".join(odcs.tags))
167
+
168
+
169
+ def fill_pricing(workbook: Workbook, odcs: OpenDataContractStandard):
170
+ """Fill pricing information"""
171
+ if odcs.price:
172
+ set_cell_value_by_name(workbook, "price.priceAmount", odcs.price.priceAmount)
173
+ set_cell_value_by_name(workbook, "price.priceCurrency", odcs.price.priceCurrency)
174
+ set_cell_value_by_name(workbook, "price.priceUnit", odcs.price.priceUnit)
175
+
176
+
177
+ def fill_schema(workbook: Workbook, odcs: OpenDataContractStandard):
178
+ """Fill schema information by cloning template sheets"""
179
+ # Get template sheet "Schema <table_name>"
180
+ schema_template_sheet = workbook["Schema <table_name>"]
181
+
182
+ if odcs.schema_:
183
+ # Create copies for all schemas first
184
+ new_sheets = []
185
+ for schema in odcs.schema_:
186
+ # Clone the template sheet
187
+ new_sheet = workbook.copy_worksheet(schema_template_sheet)
188
+ new_sheet.title = f"Schema {schema.name}"
189
+
190
+ # Copy defined names with schema sheet scope to the new sheet
191
+ copy_sheet_names(workbook, schema_template_sheet, new_sheet)
192
+
193
+ # Move the new sheet before the template sheet
194
+ schema_template_sheet_index = workbook.index(schema_template_sheet)
195
+ new_sheet_index = workbook.index(new_sheet)
196
+
197
+ workbook.move_sheet(new_sheet, offset=schema_template_sheet_index - new_sheet_index)
198
+
199
+ new_sheets.append((new_sheet, schema))
200
+
201
+ # Remove the template sheet before filling
202
+ workbook.remove(schema_template_sheet)
203
+
204
+ # Now fill in schema information for each copied sheet
205
+ for new_sheet, schema in new_sheets:
206
+ # Copy named ranges from template to new sheet (if needed)
207
+ # Note: copy_worksheet should have copied the named ranges already
208
+
209
+ # Fill in schema information
210
+ fill_single_schema(new_sheet, schema)
211
+ else:
212
+ # Remove the template sheet even if no schemas
213
+ workbook.remove(schema_template_sheet)
214
+
215
+
216
+ def copy_sheet_names(workbook: Workbook, template_sheet: Worksheet, new_sheet: Worksheet):
217
+ """Copy worksheet-scoped named ranges from template sheet to new sheet"""
218
+ try:
219
+ # Copy worksheet-scoped defined names from template sheet to new sheet
220
+ for name_str in template_sheet.defined_names:
221
+ try:
222
+ # Get the DefinedName object
223
+ defined_name = template_sheet.defined_names[name_str]
224
+
225
+ # Get the original range reference
226
+ original_ref = defined_name.attr_text
227
+
228
+ # Create new defined name with same name and reference but scoped to new sheet
229
+ new_name = DefinedName(name_str, attr_text=original_ref.replace(template_sheet.title, new_sheet.title))
230
+
231
+ # Add to the new sheet's defined names (worksheet-scoped)
232
+ new_sheet.defined_names.add(new_name)
233
+
234
+ except Exception as e:
235
+ logger.warning(f"Failed to copy worksheet-scoped named range {name_str}: {e}")
236
+
237
+ except Exception as e:
238
+ logger.warning(f"Error copying sheet names: {e}")
239
+
240
+
241
+ def fill_single_schema(sheet: Worksheet, schema: SchemaObject):
242
+ """Fill a single schema sheet with schema information using named ranges"""
243
+ # Use worksheet-scoped named ranges that were copied from the template
244
+ set_cell_value_by_name_in_sheet(sheet, "schema.name", schema.name)
245
+ set_cell_value_by_name_in_sheet(
246
+ sheet, "schema.physicalType", schema.physicalType if schema.physicalType else "table"
247
+ )
248
+ set_cell_value_by_name_in_sheet(sheet, "schema.description", schema.description)
249
+ set_cell_value_by_name_in_sheet(sheet, "schema.businessName", schema.businessName)
250
+ set_cell_value_by_name_in_sheet(sheet, "schema.physicalName", schema.physicalName)
251
+ set_cell_value_by_name_in_sheet(sheet, "schema.dataGranularityDescription", schema.dataGranularityDescription)
252
+
253
+ # Set tags as comma-separated string
254
+ if schema.tags:
255
+ set_cell_value_by_name_in_sheet(sheet, "schema.tags", ",".join(schema.tags))
256
+
257
+ # Fill properties using the template's properties table structure
258
+ if schema.properties:
259
+ fill_properties_in_schema_sheet(sheet, schema.properties)
260
+
261
+
262
+ def fill_properties_in_schema_sheet(sheet: Worksheet, properties: List[SchemaProperty], prefix: str = ""):
263
+ """Fill properties in the schema sheet using the template's existing properties table"""
264
+ try:
265
+ # The template already has a properties table starting at row 13 with headers
266
+ # Find the header row and map column names to indices
267
+ header_row_index = 13
268
+ headers = get_headers_from_header_row(sheet, header_row_index)
269
+
270
+ # Reverse the headers dict to map header_name -> column_index
271
+ header_map = {header_name.lower(): col_idx for col_idx, header_name in headers.items()}
272
+
273
+ # Fill properties starting after header row
274
+ row_index = header_row_index + 1
275
+ for property in properties:
276
+ row_index = fill_single_property_template(sheet, row_index, prefix, property, header_map)
277
+
278
+ except Exception as e:
279
+ logger.warning(f"Error filling properties: {e}")
280
+
281
+
282
+ def fill_single_property_template(
283
+ sheet: Worksheet, row_index: int, prefix: str, property: SchemaProperty, header_map: dict
284
+ ) -> int:
285
+ """Fill a single property row using the template's column structure"""
286
+ property_name = f"{prefix}{'.' + property.name if property.name else ''}" if prefix else property.name
287
+
288
+ # Helper function to set cell value by header name
289
+ def set_by_header(header_name: str, value: Any):
290
+ col_idx = header_map.get(header_name.lower())
291
+ if col_idx is not None:
292
+ sheet.cell(row=row_index, column=col_idx + 1).value = value
293
+
294
+ # Fill property fields based on template headers
295
+ set_by_header("Property", property_name)
296
+ set_by_header("Business Name", property.businessName)
297
+ set_by_header("Logical Type", property.logicalType)
298
+ set_by_header("Physical Type", property.physicalType)
299
+ set_by_header("Physical Name", property.physicalName)
300
+ set_by_header("Description", property.description)
301
+ set_by_header("Required", property.required)
302
+ set_by_header("Unique", property.unique)
303
+ set_by_header("Primary Key", property.primaryKey)
304
+ set_by_header("Primary Key Position", property.primaryKeyPosition)
305
+ set_by_header("Partitioned", property.partitioned)
306
+ set_by_header("Partition Key Position", property.partitionKeyPosition)
307
+ set_by_header("Classification", property.classification)
308
+ set_by_header("Tags", ",".join(property.tags) if property.tags else "")
309
+ set_by_header(
310
+ "Example(s)", ",".join(map(str, property.examples)) if property.examples else ""
311
+ ) # Note: using "Example(s)" as in template
312
+ set_by_header("Encrypted Name", property.encryptedName)
313
+ set_by_header(
314
+ "Transform Sources", ",".join(property.transformSourceObjects) if property.transformSourceObjects else ""
315
+ )
316
+ set_by_header("Transform Logic", property.transformLogic)
317
+ set_by_header("Critical Data Element Status", property.criticalDataElement)
318
+
319
+ # Authoritative definitions
320
+ if property.authoritativeDefinitions and len(property.authoritativeDefinitions) > 0:
321
+ set_by_header("Authoritative Definition URL", property.authoritativeDefinitions[0].url)
322
+ set_by_header("Authoritative Definition Type", property.authoritativeDefinitions[0].type)
323
+
324
+ next_row_index = row_index + 1
325
+
326
+ # Handle nested properties
327
+ if property.properties:
328
+ for nested_property in property.properties:
329
+ next_row_index = fill_single_property_template(
330
+ sheet, next_row_index, property_name, nested_property, header_map
331
+ )
332
+
333
+ # Handle array items
334
+ if property.items:
335
+ next_row_index = fill_single_property_template(
336
+ sheet, next_row_index, f"{property_name}.items", property.items, header_map
337
+ )
338
+
339
+ return next_row_index
340
+
341
+
342
+ def fill_single_property_simple(
343
+ sheet: Worksheet, row_index: int, prefix: str, property: SchemaProperty, header_map: dict = None
344
+ ) -> int:
345
+ """Fill a single property row using header names (deprecated - use fill_single_property_template instead)"""
346
+ # This function is kept for backward compatibility but should use header_map if provided
347
+ if header_map is None:
348
+ # Fallback to the template-based approach
349
+ header_row_index = 13
350
+ headers = get_headers_from_header_row(sheet, header_row_index)
351
+ header_map = {header_name.lower(): col_idx for col_idx, header_name in headers.items()}
352
+
353
+ # Delegate to the template-based function
354
+ return fill_single_property_template(sheet, row_index, prefix, property, header_map)
355
+
356
+
357
+ def fill_quality(workbook: Workbook, odcs: OpenDataContractStandard):
358
+ """Fill the Quality sheet with quality data"""
359
+ quality_sheet = workbook["Quality"]
360
+
361
+ try:
362
+ ref = name_to_ref(workbook, "quality")
363
+ if not ref:
364
+ logger.warning("No quality range found")
365
+ return
366
+
367
+ # Parse range to find header row
368
+ header_row_index = parse_range_safely(ref)
369
+
370
+ headers = get_headers_from_header_row(quality_sheet, header_row_index)
371
+ current_row_index = header_row_index + 1
372
+
373
+ # Iterate through all schemas
374
+ if odcs.schema_:
375
+ for schema in odcs.schema_:
376
+ # Add schema-level quality attributes
377
+ if schema.quality:
378
+ for quality in schema.quality:
379
+ row = get_or_create_row(quality_sheet, current_row_index)
380
+ fill_quality_row(row, headers, schema.name, None, quality)
381
+ current_row_index += 1
382
+
383
+ # Add property-level quality attributes
384
+ if schema.properties:
385
+ current_row_index = fill_properties_quality(
386
+ quality_sheet, headers, schema.name, schema.properties, current_row_index
387
+ )
388
+
389
+ except Exception as e:
390
+ logger.warning(f"Error filling quality: {e}")
391
+
392
+
393
+ def fill_properties_quality(
394
+ sheet: Worksheet,
395
+ headers: dict,
396
+ schema_name: str,
397
+ properties: List[SchemaProperty],
398
+ start_row_index: int,
399
+ prefix: str = "",
400
+ ) -> int:
401
+ """Recursively fill quality data for properties"""
402
+ current_row_index = start_row_index
403
+
404
+ for property in properties:
405
+ if not property.name:
406
+ continue
407
+
408
+ full_property_name = f"{prefix}{'.' + property.name if property.name else ''}" if prefix else property.name
409
+
410
+ # Add quality attributes for this property
411
+ if property.quality:
412
+ for quality in property.quality:
413
+ row = get_or_create_row(sheet, current_row_index)
414
+ fill_quality_row(row, headers, schema_name, full_property_name, quality)
415
+ current_row_index += 1
416
+
417
+ # Recursively handle nested properties
418
+ if property.properties:
419
+ current_row_index = fill_properties_quality(
420
+ sheet, headers, schema_name, property.properties, current_row_index, full_property_name
421
+ )
422
+
423
+ # Handle array items
424
+ if property.items:
425
+ items_property_name = f"{full_property_name}.items"
426
+ if property.items.quality:
427
+ for quality in property.items.quality:
428
+ row = get_or_create_row(sheet, current_row_index)
429
+ fill_quality_row(row, headers, schema_name, items_property_name, quality)
430
+ current_row_index += 1
431
+
432
+ # Handle nested properties in array items
433
+ if property.items.properties:
434
+ current_row_index = fill_properties_quality(
435
+ sheet, headers, schema_name, property.items.properties, current_row_index, items_property_name
436
+ )
437
+
438
+ return current_row_index
439
+
440
+
441
+ def fill_quality_row(row, headers: dict, schema_name: str, property_name: Optional[str], quality: DataQuality):
442
+ """Fill a single quality row"""
443
+ for cell_index, header_name in headers.items():
444
+ header_lower = header_name.lower().strip()
445
+
446
+ if header_lower == "schema":
447
+ set_cell_value(row, cell_index, schema_name)
448
+ elif header_lower == "property":
449
+ set_cell_value(row, cell_index, property_name)
450
+ elif header_lower == "quality type":
451
+ set_cell_value(row, cell_index, quality.type)
452
+ elif header_lower == "description":
453
+ set_cell_value(row, cell_index, quality.description)
454
+ elif header_lower == "rule (library)":
455
+ set_cell_value(row, cell_index, quality.rule)
456
+ elif header_lower == "query (sql)":
457
+ set_cell_value(row, cell_index, quality.query)
458
+ elif header_lower == "threshold operator":
459
+ operator = get_threshold_operator(quality)
460
+ set_cell_value(row, cell_index, operator)
461
+ elif header_lower == "threshold value":
462
+ value = get_threshold_value(quality)
463
+ set_cell_value(row, cell_index, value)
464
+ elif header_lower == "quality engine (custom)":
465
+ set_cell_value(row, cell_index, quality.engine)
466
+ elif header_lower == "implementation (custom)":
467
+ set_cell_value(row, cell_index, quality.implementation)
468
+ elif header_lower == "severity":
469
+ set_cell_value(row, cell_index, quality.severity)
470
+ elif header_lower == "scheduler":
471
+ set_cell_value(row, cell_index, quality.scheduler)
472
+ elif header_lower == "schedule":
473
+ set_cell_value(row, cell_index, quality.schedule)
474
+
475
+
476
+ def get_threshold_operator(quality: DataQuality) -> Optional[str]:
477
+ """Get the threshold operator from quality object"""
478
+ if hasattr(quality, "mustBe") and quality.mustBe is not None:
479
+ return "mustBe"
480
+ elif hasattr(quality, "mustNotBe") and quality.mustNotBe is not None:
481
+ return "mustNotBe"
482
+ elif hasattr(quality, "mustBeGreaterThan") and quality.mustBeGreaterThan is not None:
483
+ return "mustBeGreaterThan"
484
+ elif hasattr(quality, "mustBeGreaterThanOrEqualTo") and quality.mustBeGreaterThanOrEqualTo is not None:
485
+ return "mustBeGreaterThanOrEqualTo"
486
+ elif hasattr(quality, "mustBeLessThan") and quality.mustBeLessThan is not None:
487
+ return "mustBeLessThan"
488
+ elif hasattr(quality, "mustBeLessThanOrEqualTo") and quality.mustBeLessThanOrEqualTo is not None:
489
+ return "mustBeLessThanOrEqualTo"
490
+ elif hasattr(quality, "mustBeBetween") and quality.mustBeBetween is not None:
491
+ return "mustBeBetween"
492
+ elif hasattr(quality, "mustNotBeBetween") and quality.mustNotBeBetween is not None:
493
+ return "mustNotBeBetween"
494
+ return None
495
+
496
+
497
+ def get_threshold_value(quality: DataQuality) -> Optional[str]:
498
+ """Get the threshold value from quality object"""
499
+ if hasattr(quality, "mustBe") and quality.mustBe is not None:
500
+ return str(quality.mustBe)
501
+ elif hasattr(quality, "mustNotBe") and quality.mustNotBe is not None:
502
+ return str(quality.mustNotBe)
503
+ elif hasattr(quality, "mustBeGreaterThan") and quality.mustBeGreaterThan is not None:
504
+ return str(quality.mustBeGreaterThan)
505
+ elif hasattr(quality, "mustBeGreaterThanOrEqualTo") and quality.mustBeGreaterThanOrEqualTo is not None:
506
+ return str(quality.mustBeGreaterThanOrEqualTo)
507
+ elif hasattr(quality, "mustBeLessThan") and quality.mustBeLessThan is not None:
508
+ return str(quality.mustBeLessThan)
509
+ elif hasattr(quality, "mustBeLessThanOrEqualTo") and quality.mustBeLessThanOrEqualTo is not None:
510
+ return str(quality.mustBeLessThanOrEqualTo)
511
+ elif hasattr(quality, "mustBeBetween") and quality.mustBeBetween is not None and len(quality.mustBeBetween) >= 2:
512
+ return f"[{quality.mustBeBetween[0]}, {quality.mustBeBetween[1]}]"
513
+ elif (
514
+ hasattr(quality, "mustNotBeBetween")
515
+ and quality.mustNotBeBetween is not None
516
+ and len(quality.mustNotBeBetween) >= 2
517
+ ):
518
+ return f"[{quality.mustNotBeBetween[0]}, {quality.mustNotBeBetween[1]}]"
519
+ return None
520
+
521
+
522
+ def fill_custom_properties(workbook: Workbook, odcs: OpenDataContractStandard):
523
+ """Fill the Custom Properties sheet"""
524
+ try:
525
+ ref = name_to_ref(workbook, "CustomProperties")
526
+ if not ref:
527
+ logger.warning("No CustomProperties range found")
528
+ return
529
+
530
+ custom_properties_sheet = workbook["Custom Properties"]
531
+
532
+ # Parse range to find header row
533
+ header_row_index = parse_range_safely(ref)
534
+
535
+ # Fill custom properties excluding owner
536
+ if odcs.customProperties:
537
+ row_index = header_row_index + 1
538
+ for prop in odcs.customProperties:
539
+ if prop.property != "owner" and prop.property:
540
+ row = get_or_create_row(custom_properties_sheet, row_index)
541
+ set_cell_value(row, 0, prop.property) # Property column
542
+ set_cell_value(row, 1, prop.value) # Value column
543
+ row_index += 1
544
+
545
+ except Exception as e:
546
+ logger.warning(f"Error filling custom properties: {e}")
547
+
548
+
549
+ def fill_support(workbook: Workbook, odcs: OpenDataContractStandard):
550
+ """Fill the Support sheet"""
551
+ try:
552
+ ref = name_to_ref(workbook, "support")
553
+ if not ref:
554
+ logger.warning("No support range found")
555
+ return
556
+
557
+ support_sheet = workbook["Support"]
558
+
559
+ # Parse range to find header row
560
+ header_row_index = parse_range_safely(ref)
561
+
562
+ headers = get_headers_from_header_row(support_sheet, header_row_index)
563
+
564
+ if odcs.support:
565
+ for support_index, support_channel in enumerate(odcs.support):
566
+ row = get_or_create_row(support_sheet, header_row_index + 1 + support_index)
567
+
568
+ for cell_index, header_name in headers.items():
569
+ header_lower = header_name.lower()
570
+ if header_lower == "channel":
571
+ set_cell_value(row, cell_index, support_channel.channel)
572
+ elif header_lower == "channel url":
573
+ set_cell_value(row, cell_index, support_channel.url)
574
+ elif header_lower == "description":
575
+ set_cell_value(row, cell_index, support_channel.description)
576
+ elif header_lower == "tool":
577
+ set_cell_value(row, cell_index, support_channel.tool)
578
+ elif header_lower == "scope":
579
+ set_cell_value(row, cell_index, support_channel.scope)
580
+ elif header_lower == "invitation url":
581
+ set_cell_value(row, cell_index, support_channel.invitationUrl)
582
+
583
+ except Exception as e:
584
+ logger.warning(f"Error filling support: {e}")
585
+
586
+
587
+ def fill_team(workbook: Workbook, odcs: OpenDataContractStandard):
588
+ """Fill the Team sheet"""
589
+ try:
590
+ ref = name_to_ref(workbook, "team")
591
+ if not ref:
592
+ logger.warning("No team range found")
593
+ return
594
+
595
+ team_sheet = workbook["Team"]
596
+
597
+ # Parse range to find header row
598
+ header_row_index = parse_range_safely(ref)
599
+
600
+ headers = get_headers_from_header_row(team_sheet, header_row_index)
601
+
602
+ if odcs.team:
603
+ for team_index, team_member in enumerate(odcs.team):
604
+ row = get_or_create_row(team_sheet, header_row_index + 1 + team_index)
605
+
606
+ for cell_index, header_name in headers.items():
607
+ header_lower = header_name.lower()
608
+ if header_lower == "username":
609
+ set_cell_value(row, cell_index, team_member.username)
610
+ elif header_lower == "name":
611
+ set_cell_value(row, cell_index, team_member.name)
612
+ elif header_lower == "description":
613
+ set_cell_value(row, cell_index, team_member.description)
614
+ elif header_lower == "role":
615
+ set_cell_value(row, cell_index, team_member.role)
616
+ elif header_lower == "date in":
617
+ set_cell_value(row, cell_index, team_member.dateIn)
618
+ elif header_lower == "date out":
619
+ set_cell_value(row, cell_index, team_member.dateOut)
620
+ elif header_lower == "replaced by username":
621
+ set_cell_value(row, cell_index, team_member.replacedByUsername)
622
+
623
+ except Exception as e:
624
+ logger.warning(f"Error filling team: {e}")
625
+
626
+
627
+ def fill_roles(workbook: Workbook, odcs: OpenDataContractStandard):
628
+ """Fill the Roles sheet using fixed table structure"""
629
+ try:
630
+ roles_sheet = workbook["Roles"]
631
+
632
+ # From template analysis: Row 4 has headers
633
+ header_row_index = 4
634
+ headers = get_headers_from_header_row(roles_sheet, header_row_index)
635
+
636
+ if odcs.roles:
637
+ for role_index, role in enumerate(odcs.roles):
638
+ row = get_or_create_row(roles_sheet, header_row_index + 1 + role_index)
639
+
640
+ for cell_index, header_name in headers.items():
641
+ header_lower = header_name.lower()
642
+ if header_lower == "role":
643
+ set_cell_value(row, cell_index, role.role)
644
+ elif header_lower == "description":
645
+ set_cell_value(row, cell_index, role.description)
646
+ elif header_lower == "access":
647
+ set_cell_value(row, cell_index, role.access)
648
+ elif header_lower == "1st level approvers":
649
+ set_cell_value(row, cell_index, role.firstLevelApprovers)
650
+ elif header_lower == "2nd level approvers":
651
+ set_cell_value(row, cell_index, role.secondLevelApprovers)
652
+
653
+ except Exception as e:
654
+ logger.warning(f"Error filling roles: {e}")
655
+
656
+
657
+ def fill_sla_properties(workbook: Workbook, odcs: OpenDataContractStandard):
658
+ """Fill the SLA sheet using fixed table structure"""
659
+ try:
660
+ sla_sheet = workbook["SLA"]
661
+
662
+ # From template analysis: Row 6 has the SLA properties table headers
663
+ header_row_index = 6
664
+
665
+ headers = get_headers_from_header_row(sla_sheet, header_row_index)
666
+
667
+ if odcs.slaProperties:
668
+ for sla_index, sla_prop in enumerate(odcs.slaProperties):
669
+ row = get_or_create_row(sla_sheet, header_row_index + 1 + sla_index)
670
+
671
+ for cell_index, header_name in headers.items():
672
+ header_lower = header_name.lower()
673
+ if header_lower == "property":
674
+ set_cell_value(row, cell_index, sla_prop.property)
675
+ elif header_lower == "value":
676
+ set_cell_value(row, cell_index, sla_prop.value)
677
+ elif header_lower == "extended value":
678
+ set_cell_value(row, cell_index, sla_prop.valueExt)
679
+ elif header_lower == "unit":
680
+ set_cell_value(row, cell_index, sla_prop.unit)
681
+ elif header_lower == "element":
682
+ set_cell_value(row, cell_index, sla_prop.element)
683
+ elif header_lower == "driver":
684
+ set_cell_value(row, cell_index, sla_prop.driver)
685
+
686
+ except Exception as e:
687
+ logger.warning(f"Error filling SLA properties: {e}")
688
+
689
+
690
+ def fill_servers(workbook: Workbook, odcs: OpenDataContractStandard):
691
+ """Fill the Servers sheet"""
692
+ try:
693
+ servers_sheet = workbook["Servers"]
694
+
695
+ if odcs.servers:
696
+ for index, server in enumerate(odcs.servers):
697
+ set_cell_value_by_column_index(servers_sheet, "servers.server", index, server.server)
698
+ set_cell_value_by_column_index(servers_sheet, "servers.description", index, server.description)
699
+ set_cell_value_by_column_index(servers_sheet, "servers.environment", index, server.environment)
700
+ set_cell_value_by_column_index(servers_sheet, "servers.type", index, server.type)
701
+
702
+ # Type-specific fields
703
+ server_type = server.type
704
+ if server_type == "azure":
705
+ set_cell_value_by_column_index(servers_sheet, "servers.azure.location", index, server.location)
706
+ set_cell_value_by_column_index(servers_sheet, "servers.azure.format", index, server.format)
707
+ set_cell_value_by_column_index(servers_sheet, "servers.azure.delimiter", index, server.delimiter)
708
+ elif server_type == "bigquery":
709
+ set_cell_value_by_column_index(servers_sheet, "servers.bigquery.project", index, server.project)
710
+ set_cell_value_by_column_index(servers_sheet, "servers.bigquery.dataset", index, server.dataset)
711
+ elif server_type == "databricks":
712
+ set_cell_value_by_column_index(servers_sheet, "servers.databricks.catalog", index, server.catalog)
713
+ set_cell_value_by_column_index(servers_sheet, "servers.databricks.host", index, server.host)
714
+ set_cell_value_by_column_index(servers_sheet, "servers.databricks.schema", index, server.schema_)
715
+ # Add other server types as needed...
716
+
717
+ except Exception as e:
718
+ logger.warning(f"Error filling servers: {e}")
719
+
720
+
721
+ # Helper functions
722
+
723
+
724
+ def find_cell_by_name(workbook: Workbook, name: str) -> Optional[Cell]:
725
+ """Find a cell by its named range"""
726
+ try:
727
+ ref = name_to_ref(workbook, name)
728
+ if not ref:
729
+ return None
730
+ return find_cell_by_ref(workbook, ref)
731
+ except Exception:
732
+ return None
733
+
734
+
735
+ def find_cell_by_name_in_sheet(sheet: Worksheet, name: str) -> Optional[Cell]:
736
+ """Find a cell by its named range within a specific sheet"""
737
+ try:
738
+ # Access worksheet-scoped defined names directly
739
+ for named_range in sheet.defined_names:
740
+ if named_range == name:
741
+ destinations = sheet.defined_names[named_range].destinations
742
+ for sheet_title, coordinate in destinations:
743
+ if sheet_title == sheet.title:
744
+ return sheet[coordinate]
745
+ except Exception:
746
+ return None
747
+ return None
748
+
749
+
750
+ def find_cell_by_ref(workbook: Workbook, cell_ref: str) -> Optional[Cell]:
751
+ """Find a cell by its reference"""
752
+ try:
753
+ from openpyxl.utils.cell import column_index_from_string, coordinate_from_string
754
+
755
+ # Parse the reference
756
+ if "!" in cell_ref:
757
+ sheet_name, coord = cell_ref.split("!")
758
+ sheet_name = sheet_name.strip("'")
759
+ sheet = workbook[sheet_name]
760
+ else:
761
+ coord = cell_ref
762
+ sheet = workbook.active
763
+
764
+ # Remove $ signs
765
+ coord = coord.replace("$", "")
766
+ col_letter, row_num = coordinate_from_string(coord)
767
+ col_num = column_index_from_string(col_letter)
768
+
769
+ return sheet.cell(row=int(row_num), column=col_num)
770
+ except Exception:
771
+ return None
772
+
773
+
774
+ def find_cell_by_ref_in_sheet(sheet: Worksheet, cell_ref: str) -> Optional[Cell]:
775
+ """Find a cell by its reference within a specific sheet"""
776
+ try:
777
+ from openpyxl.utils.cell import column_index_from_string, coordinate_from_string
778
+
779
+ # Remove sheet name if present
780
+ if "!" in cell_ref:
781
+ _, coord = cell_ref.split("!")
782
+ else:
783
+ coord = cell_ref
784
+
785
+ # Remove $ signs
786
+ coord = coord.replace("$", "")
787
+ col_letter, row_num = coordinate_from_string(coord)
788
+ col_num = column_index_from_string(col_letter)
789
+
790
+ return sheet.cell(row=int(row_num), column=col_num)
791
+ except Exception:
792
+ return None
793
+
794
+
795
+ def name_to_ref(workbook: Workbook, name: str) -> Optional[str]:
796
+ """Get the reference for a named range in the workbook"""
797
+ try:
798
+ defined_name = workbook.defined_names.get(name)
799
+ if defined_name:
800
+ return defined_name.attr_text
801
+ except Exception:
802
+ pass
803
+ return None
804
+
805
+
806
+ def name_to_ref_in_sheet(sheet: Worksheet, name: str) -> Optional[str]:
807
+ """Get the reference for a named range in a specific sheet"""
808
+ try:
809
+ workbook = sheet.parent
810
+ defined_names = [dn for dn in workbook.defined_names if dn.name == name]
811
+ for dn in defined_names:
812
+ if sheet.title in dn.attr_text:
813
+ return dn.attr_text
814
+ except Exception:
815
+ pass
816
+ return None
817
+
818
+
819
+ def set_cell_value_by_name(workbook: Workbook, cell_name: str, value: Any):
820
+ """Set cell value by named range"""
821
+ cell = find_cell_by_name(workbook, cell_name)
822
+ if cell:
823
+ set_cell_value_direct(cell, value)
824
+ else:
825
+ logger.warning(f"Cell with name {cell_name} not found in workbook")
826
+
827
+
828
+ def set_cell_value_by_name_in_sheet(sheet: Worksheet, cell_name: str, value: Any):
829
+ """Set cell value by named range within a specific sheet"""
830
+ cell = find_cell_by_name_in_sheet(sheet, cell_name)
831
+ if cell:
832
+ set_cell_value_direct(cell, value)
833
+ else:
834
+ logger.warning(f"Cell with name {cell_name} not found in sheet {sheet.title}")
835
+
836
+
837
+ def set_cell_value_by_column_index(sheet: Worksheet, name: str, column_index: int, value: Any):
838
+ """Set cell value by column offset from named range"""
839
+ try:
840
+ workbook = sheet.parent
841
+ first_cell = find_cell_by_name(workbook, name)
842
+ if first_cell:
843
+ target_cell = sheet.cell(row=first_cell.row, column=first_cell.column + column_index)
844
+ set_cell_value_direct(target_cell, value)
845
+ except Exception as e:
846
+ logger.warning(f"Error setting cell value by column index: {e}")
847
+
848
+
849
+ def set_cell_value_direct(cell: Cell, value: Any):
850
+ """Set cell value directly"""
851
+ if value is not None:
852
+ if isinstance(value, bool):
853
+ cell.value = value
854
+ elif isinstance(value, (int, float, Decimal)):
855
+ cell.value = float(value)
856
+ else:
857
+ cell.value = str(value)
858
+ else:
859
+ cell.value = None
860
+
861
+
862
+ def set_cell_value(row, cell_index: int, value: Any):
863
+ """Set cell value in a row at specific index"""
864
+ cell = get_or_create_cell(row, cell_index)
865
+ set_cell_value_direct(cell, value)
866
+
867
+
868
+ def get_or_create_row(sheet: Worksheet, row_index: int):
869
+ """Get or create a row at the specified index"""
870
+ try:
871
+ return sheet[row_index]
872
+ except (IndexError, KeyError):
873
+ # If row doesn't exist, create it
874
+ while len(list(sheet.rows)) < row_index:
875
+ sheet.append([])
876
+ return sheet[row_index]
877
+
878
+
879
+ def get_or_create_cell(row, cell_index: int) -> Cell:
880
+ """Get or create a cell at the specified index in a row"""
881
+ try:
882
+ return row[cell_index]
883
+ except IndexError:
884
+ # Extend the row if needed
885
+ while len(row) <= cell_index:
886
+ row.append(None)
887
+ return row[cell_index]
888
+
889
+
890
+ def parse_range_safely(ref: str) -> int:
891
+ """Parse a range reference and return the starting row number"""
892
+ try:
893
+ from openpyxl.utils import range_boundaries
894
+
895
+ min_col, min_row, max_col, max_row = range_boundaries(ref)
896
+ return min_row
897
+ except Exception:
898
+ # Handle malformed ranges - extract row number from range like "Quality!$A$4:$AZ$300"
899
+ if ":" in ref:
900
+ start_ref = ref.split(":")[0]
901
+ if "!" in start_ref:
902
+ start_ref = start_ref.split("!")[-1]
903
+ start_ref = start_ref.replace("$", "")
904
+ # Extract row number
905
+ import re
906
+
907
+ row_match = re.search(r"(\d+)", start_ref)
908
+ if row_match:
909
+ return int(row_match.group(1))
910
+ return 1
911
+
912
+
913
+ def get_headers_from_header_row(sheet: Worksheet, header_row_index: int) -> dict:
914
+ """Get headers from a row and return as dict mapping cell_index -> header_name"""
915
+ headers = {}
916
+ try:
917
+ header_row = sheet[header_row_index]
918
+ for cell_index, cell in enumerate(header_row):
919
+ if cell.value:
920
+ headers[cell_index] = str(cell.value).strip()
921
+ except Exception as e:
922
+ logger.warning(f"Error getting headers from row {header_row_index}: {e}")
923
+ return headers