datacontract-cli 0.10.23__py3-none-any.whl → 0.10.25__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datacontract-cli might be problematic. Click here for more details.

Files changed (43) hide show
  1. datacontract/__init__.py +13 -0
  2. datacontract/api.py +3 -3
  3. datacontract/catalog/catalog.py +2 -2
  4. datacontract/cli.py +1 -1
  5. datacontract/data_contract.py +5 -3
  6. datacontract/engines/data_contract_test.py +13 -4
  7. datacontract/engines/fastjsonschema/s3/s3_read_files.py +3 -2
  8. datacontract/engines/soda/check_soda_execute.py +16 -3
  9. datacontract/engines/soda/connections/duckdb_connection.py +61 -5
  10. datacontract/engines/soda/connections/kafka.py +3 -2
  11. datacontract/export/avro_converter.py +8 -1
  12. datacontract/export/bigquery_converter.py +1 -1
  13. datacontract/export/duckdb_type_converter.py +57 -0
  14. datacontract/export/great_expectations_converter.py +49 -2
  15. datacontract/export/odcs_v3_exporter.py +162 -136
  16. datacontract/export/protobuf_converter.py +163 -69
  17. datacontract/export/spark_converter.py +1 -1
  18. datacontract/imports/avro_importer.py +30 -5
  19. datacontract/imports/csv_importer.py +111 -57
  20. datacontract/imports/excel_importer.py +850 -0
  21. datacontract/imports/importer.py +5 -2
  22. datacontract/imports/importer_factory.py +10 -0
  23. datacontract/imports/odcs_v3_importer.py +226 -127
  24. datacontract/imports/protobuf_importer.py +264 -0
  25. datacontract/lint/linters/description_linter.py +1 -3
  26. datacontract/lint/linters/field_reference_linter.py +1 -2
  27. datacontract/lint/linters/notice_period_linter.py +2 -2
  28. datacontract/lint/linters/valid_constraints_linter.py +3 -3
  29. datacontract/lint/resolve.py +23 -8
  30. datacontract/model/data_contract_specification/__init__.py +1 -0
  31. datacontract/model/run.py +3 -0
  32. datacontract/output/__init__.py +0 -0
  33. datacontract/templates/datacontract.html +2 -1
  34. datacontract/templates/index.html +2 -1
  35. {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.25.dist-info}/METADATA +305 -195
  36. {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.25.dist-info}/RECORD +40 -38
  37. {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.25.dist-info}/WHEEL +1 -1
  38. datacontract/export/csv_type_converter.py +0 -36
  39. datacontract/lint/linters/quality_schema_linter.py +0 -52
  40. datacontract/model/data_contract_specification.py +0 -327
  41. {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.25.dist-info}/entry_points.txt +0 -0
  42. {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.25.dist-info/licenses}/LICENSE +0 -0
  43. {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.25.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,850 @@
1
+ import logging
2
+ import os
3
+ from typing import Any, Dict, List, Optional
4
+
5
+ import openpyxl
6
+ from open_data_contract_standard.model import (
7
+ AuthoritativeDefinition,
8
+ CustomProperty,
9
+ DataQuality,
10
+ OpenDataContractStandard,
11
+ Role,
12
+ SchemaObject,
13
+ SchemaProperty,
14
+ Server,
15
+ ServiceLevelAgreementProperty,
16
+ Support,
17
+ Team,
18
+ )
19
+ from openpyxl.cell.cell import Cell
20
+ from openpyxl.workbook.workbook import Workbook
21
+ from openpyxl.worksheet.worksheet import Worksheet
22
+
23
+ from datacontract.imports.importer import Importer
24
+ from datacontract.model.data_contract_specification import (
25
+ DataContractSpecification,
26
+ )
27
+ from datacontract.model.exceptions import DataContractException
28
+
29
+ logger = logging.getLogger(__name__)
30
+
31
+
32
+ class ExcelImporter(Importer):
33
+ def import_source(
34
+ self, data_contract_specification: DataContractSpecification, source: str, import_args: dict
35
+ ) -> OpenDataContractStandard:
36
+ return import_excel_as_odcs(source)
37
+
38
+
39
+ def import_excel_as_odcs(excel_file_path: str) -> OpenDataContractStandard:
40
+ """
41
+ Import an Excel file and convert it to an OpenDataContractStandard object
42
+
43
+ Args:
44
+ excel_file_path: Path to the Excel file
45
+
46
+ Returns:
47
+ OpenDataContractStandard object
48
+ """
49
+ if not os.path.exists(excel_file_path):
50
+ raise FileNotFoundError(f"Excel file not found: {excel_file_path}")
51
+
52
+ try:
53
+ workbook = openpyxl.load_workbook(excel_file_path, data_only=True)
54
+ except Exception as e:
55
+ raise DataContractException(
56
+ type="schema",
57
+ name="Parse excel contract",
58
+ reason=f"Failed to open Excel file: {excel_file_path}",
59
+ engine="datacontract",
60
+ original_exception=e,
61
+ )
62
+
63
+ try:
64
+ # Get description values
65
+ purpose = get_cell_value_by_name(workbook, "description.purpose")
66
+ limitations = get_cell_value_by_name(workbook, "description.limitations")
67
+ usage = get_cell_value_by_name(workbook, "description.usage")
68
+
69
+ # Build description dict
70
+ description = None
71
+ if purpose or limitations or usage:
72
+ description = {"purpose": purpose, "limitations": limitations, "usage": usage}
73
+
74
+ # Get tags as a list
75
+ tags_str = get_cell_value_by_name(workbook, "tags")
76
+ tags = None
77
+ if tags_str:
78
+ tags = [tag.strip() for tag in tags_str.split(",") if tag.strip()]
79
+
80
+ # Import other components
81
+ schemas = import_schemas(workbook)
82
+ support = import_support(workbook)
83
+ team = import_team(workbook)
84
+ roles = import_roles(workbook)
85
+ sla_properties = import_sla_properties(workbook)
86
+ servers = import_servers(workbook)
87
+ price = import_price(workbook)
88
+ custom_properties = import_custom_properties(workbook)
89
+
90
+ # Create the ODCS object with proper object creation
91
+ odcs = OpenDataContractStandard(
92
+ apiVersion=get_cell_value_by_name(workbook, "apiVersion"),
93
+ kind=get_cell_value_by_name(workbook, "kind"),
94
+ id=get_cell_value_by_name(workbook, "id"),
95
+ name=get_cell_value_by_name(workbook, "name"),
96
+ version=get_cell_value_by_name(workbook, "version"),
97
+ status=get_cell_value_by_name(workbook, "status"),
98
+ domain=get_cell_value_by_name(workbook, "domain"),
99
+ dataProduct=get_cell_value_by_name(workbook, "dataProduct"),
100
+ tenant=get_cell_value_by_name(workbook, "tenant"),
101
+ description=description,
102
+ tags=tags,
103
+ schema=schemas,
104
+ support=support,
105
+ price=price,
106
+ team=team,
107
+ roles=roles,
108
+ slaDefaultElement=get_cell_value_by_name(workbook, "slaDefaultElement"),
109
+ slaProperties=sla_properties,
110
+ servers=servers,
111
+ customProperties=custom_properties,
112
+ )
113
+
114
+ return odcs
115
+ except Exception as e:
116
+ logger.error(f"Error importing Excel file: {str(e)}")
117
+ raise DataContractException(
118
+ type="schema",
119
+ name="Parse excel contract",
120
+ reason=f"Failed to parse Excel file: {excel_file_path}",
121
+ engine="datacontract",
122
+ original_exception=e,
123
+ )
124
+ finally:
125
+ workbook.close()
126
+
127
+
128
+ def import_schemas(workbook) -> Optional[List[SchemaObject]]:
129
+ """Extract schema information from sheets starting with 'Schema '"""
130
+ schemas = []
131
+
132
+ for sheet_name in workbook.sheetnames:
133
+ if sheet_name.startswith("Schema ") and sheet_name != "Schema <table_name>":
134
+ sheet = workbook[sheet_name]
135
+ schema_name = get_cell_value_by_name_in_sheet(sheet, "schema.name")
136
+
137
+ if not schema_name:
138
+ continue
139
+
140
+ schema = SchemaObject(
141
+ name=schema_name,
142
+ logicalType="object",
143
+ physicalType=get_cell_value_by_name_in_sheet(sheet, "schema.physicalType"),
144
+ physicalName=get_cell_value_by_name_in_sheet(sheet, "schema.physicalName"),
145
+ description=get_cell_value_by_name_in_sheet(sheet, "schema.description"),
146
+ businessName=get_cell_value_by_name_in_sheet(sheet, "schema.businessName"),
147
+ dataGranularityDescription=get_cell_value_by_name_in_sheet(sheet, "schema.dataGranularityDescription"),
148
+ authoritativeDefinitions=None,
149
+ properties=import_properties(sheet),
150
+ quality=None,
151
+ customProperties=None,
152
+ tags=None,
153
+ )
154
+
155
+ # Get tags
156
+ tags_str = get_cell_value_by_name_in_sheet(sheet, "schema.tags")
157
+ if tags_str:
158
+ schema.tags = [tag.strip() for tag in tags_str.split(",") if tag.strip()]
159
+
160
+ schemas.append(schema)
161
+
162
+ return schemas if schemas else None
163
+
164
+
165
+ def import_properties(sheet) -> Optional[List[SchemaProperty]]:
166
+ """Extract properties from the schema sheet"""
167
+ try:
168
+ # Find the properties table
169
+ properties_range = get_range_by_name_in_sheet(sheet, "schema.properties")
170
+ if not properties_range:
171
+ return None
172
+
173
+ # Get header row to map column names to indices
174
+ header_row = list(sheet.rows)[properties_range[0] - 1] # Convert to 0-based indexing
175
+ headers = {}
176
+ for i, cell in enumerate(header_row):
177
+ if cell.value:
178
+ headers[cell.value.lower()] = i
179
+
180
+ # Process property rows
181
+ property_lookup = {} # Dictionary to keep track of properties by name for nesting
182
+
183
+ # First, create all properties
184
+ for row_idx in range(properties_range[0], properties_range[1]):
185
+ if len(list(sheet.rows)) < row_idx + 1:
186
+ break
187
+ row = list(sheet.rows)[row_idx]
188
+
189
+ # Skip empty rows or header row
190
+ property_name = get_cell_value(row, headers.get("property"))
191
+ if not property_name or row_idx == properties_range[0] - 1:
192
+ continue
193
+
194
+ # Create property object
195
+ property_obj = SchemaProperty(
196
+ name=property_name,
197
+ logicalType=get_cell_value(row, headers.get("logical type")),
198
+ logicalTypeOptions=import_logical_type_options(row, headers),
199
+ physicalType=get_cell_value(row, headers.get("physical type")),
200
+ physicalName=get_cell_value(row, headers.get("physical name")),
201
+ description=get_cell_value(row, headers.get("description")),
202
+ businessName=get_cell_value(row, headers.get("business name")),
203
+ required=parse_boolean(get_cell_value(row, headers.get("required"))),
204
+ unique=parse_boolean(get_cell_value(row, headers.get("unique"))),
205
+ primaryKey=parse_boolean(get_cell_value(row, headers.get("primary key"))),
206
+ primaryKeyPosition=parse_integer(get_cell_value(row, headers.get("primary key position"))),
207
+ partitioned=parse_boolean(get_cell_value(row, headers.get("partitioned"))),
208
+ partitionKeyPosition=parse_integer(get_cell_value(row, headers.get("partition key position"))),
209
+ criticalDataElement=parse_boolean(get_cell_value(row, headers.get("critical data element status"))),
210
+ classification=get_cell_value(row, headers.get("classification")),
211
+ transformLogic=get_cell_value(row, headers.get("transform logic")),
212
+ transformDescription=get_cell_value(row, headers.get("transform description")),
213
+ encryptedName=get_cell_value(row, headers.get("encrypted name")),
214
+ properties=None,
215
+ items=None,
216
+ tags=get_property_tags(headers, row),
217
+ )
218
+
219
+ # Authoritative definitions
220
+ authoritative_definition_url = get_cell_value(row, headers.get("authoritative definition url"))
221
+ authoritative_definition_type = get_cell_value(row, headers.get("authoritative definition type"))
222
+ if authoritative_definition_url and authoritative_definition_type:
223
+ property_obj.authoritativeDefinitions = [
224
+ AuthoritativeDefinition(
225
+ url=authoritative_definition_url,
226
+ type=authoritative_definition_type,
227
+ )
228
+ ]
229
+
230
+ # Quality
231
+ quality_type = get_cell_value(row, headers.get("quality type"))
232
+ quality_description = get_cell_value(row, headers.get("quality description"))
233
+ if quality_type and quality_description:
234
+ property_obj.quality = [
235
+ DataQuality(
236
+ type=quality_type,
237
+ description=quality_description,
238
+ )
239
+ ]
240
+
241
+ # Transform sources
242
+ transform_sources = get_cell_value(row, headers.get("transform sources"))
243
+ if transform_sources:
244
+ property_obj.transformSourceObjects = [
245
+ src.strip() for src in transform_sources.split(",") if src.strip()
246
+ ]
247
+
248
+ # Examples
249
+ examples = get_cell_value(row, headers.get("example(s)"))
250
+ if examples:
251
+ property_obj.examples = [ex.strip() for ex in examples.split(",") if ex.strip()]
252
+
253
+ # Add to lookup dictionary
254
+ property_lookup[property_name] = property_obj
255
+
256
+ # Now organize nested properties
257
+ root_properties = []
258
+ for name, prop in property_lookup.items():
259
+ if "." in name:
260
+ # This is a nested property
261
+ parent_name = name.rsplit(".", 1)[0]
262
+ child_name = name.rsplit(".", 1)[1]
263
+
264
+ if parent_name in property_lookup:
265
+ parent_prop = property_lookup[parent_name]
266
+ # Update the property name to be just the child part
267
+ prop.name = child_name
268
+
269
+ # If parent is an array, set as items
270
+ if parent_prop.logicalType == "array":
271
+ parent_prop.items = prop
272
+ else:
273
+ # Otherwise add to properties list
274
+ if parent_prop.properties is None:
275
+ parent_prop.properties = []
276
+ parent_prop.properties.append(prop)
277
+ else:
278
+ # This is a root property
279
+ root_properties.append(prop)
280
+
281
+ return root_properties if root_properties else None
282
+ except Exception as e:
283
+ logger.warning(f"Error importing properties: {str(e)}")
284
+ return None
285
+
286
+
287
+ def import_logical_type_options(row, headers):
288
+ """Import logical type options from property row"""
289
+
290
+ required_props = get_cell_value(row, headers.get("required properties"))
291
+
292
+ required_props_list = None
293
+ if required_props:
294
+ required_props_list = [prop.strip() for prop in required_props.split(",") if prop.strip()]
295
+
296
+ logical_type_options_dict = {
297
+ "minLength": parse_integer(get_cell_value(row, headers.get("minimum length"))),
298
+ "maxLength": parse_integer(get_cell_value(row, headers.get("maximum length"))),
299
+ "pattern": get_cell_value(row, headers.get("pattern")),
300
+ "format": get_cell_value(row, headers.get("format")),
301
+ "exclusiveMaximum": parse_boolean(get_cell_value(row, headers.get("exclusive maximum"))),
302
+ "exclusiveMinimum": parse_boolean(get_cell_value(row, headers.get("exclusive minimum"))),
303
+ "minimum": get_cell_value(row, headers.get("minimum")),
304
+ "maximum": get_cell_value(row, headers.get("maximum")),
305
+ "multipleOf": get_cell_value(row, headers.get("multiple of")),
306
+ "minItems": parse_integer(get_cell_value(row, headers.get("minimum items"))),
307
+ "maxItems": parse_integer(get_cell_value(row, headers.get("maximum items"))),
308
+ "uniqueItems": parse_boolean(get_cell_value(row, headers.get("unique items"))),
309
+ "maxProperties": parse_integer(get_cell_value(row, headers.get("maximum properties"))),
310
+ "minProperties": parse_integer(get_cell_value(row, headers.get("minimum properties"))),
311
+ "required": required_props_list,
312
+ }
313
+
314
+ for dict_key in list(logical_type_options_dict.keys()):
315
+ if logical_type_options_dict[dict_key] is None:
316
+ del logical_type_options_dict[dict_key]
317
+
318
+ if len(logical_type_options_dict) == 0:
319
+ return None
320
+ return logical_type_options_dict
321
+
322
+
323
+ def get_property_tags(headers, row):
324
+ tags_value = get_cell_value(row, headers.get("tags"))
325
+ if tags_value:
326
+ return [tag.strip() for tag in tags_value.split(",") if tag.strip()]
327
+ return None
328
+
329
+
330
+ def parse_boolean(value):
331
+ """Parse a string value to boolean"""
332
+ if value is None:
333
+ return None
334
+ value = value.lower().strip()
335
+ return value == "true" or value == "yes" or value == "1"
336
+
337
+
338
+ def parse_integer(value):
339
+ """Parse a string value to integer"""
340
+ if value is None:
341
+ return None
342
+ try:
343
+ return int(value)
344
+ except (ValueError, TypeError):
345
+ return None
346
+
347
+
348
+ def get_range_by_name_in_workbook(workbook: Workbook, name: str) -> tuple | None:
349
+ """Find the range (start_row, end_row) of a named range in a workbook"""
350
+ try:
351
+ for named_range in workbook.defined_names:
352
+ if named_range == name:
353
+ destinations = workbook.defined_names[named_range].destinations
354
+ for sheet_title, range_address in destinations:
355
+ if ":" in range_address:
356
+ # Convert Excel range to row numbers
357
+ start_ref, end_ref = range_address.split(":")
358
+ start_row = int("".join(filter(str.isdigit, start_ref)))
359
+ end_row = int("".join(filter(str.isdigit, end_ref)))
360
+ return start_row, end_row
361
+ else:
362
+ # Single cell
363
+ row = int("".join(filter(str.isdigit, range_address)))
364
+ return row, row
365
+ except Exception as e:
366
+ logger.warning(f"Error finding range by name {name}: {str(e)}")
367
+ return None
368
+
369
+
370
+ def get_range_by_name_in_sheet(sheet: Worksheet, name: str) -> tuple | None:
371
+ """Find the range (start_row, end_row) of a named range in a sheet"""
372
+ try:
373
+ for named_range in sheet.defined_names:
374
+ if named_range == name:
375
+ destinations = sheet.defined_names[named_range].destinations
376
+ for sheet_title, range_address in destinations:
377
+ if sheet_title == sheet.title:
378
+ # For named ranges that refer to entire rows or multiple rows
379
+ if ":" in range_address:
380
+ # Convert Excel range to row numbers
381
+ start_ref, end_ref = range_address.split(":")
382
+ start_row = int("".join(filter(str.isdigit, start_ref)))
383
+ end_row = int("".join(filter(str.isdigit, end_ref)))
384
+ return (start_row, end_row)
385
+ else:
386
+ # Single cell
387
+ row = int("".join(filter(str.isdigit, range_address)))
388
+ return (row, row)
389
+ except Exception as e:
390
+ logger.warning(f"Error finding range by name {name}: {str(e)}")
391
+ return None
392
+
393
+
394
+ def get_cell_by_name_in_workbook(workbook: Workbook, name: str) -> Cell | None:
395
+ """Find a cell by name within a workbook"""
396
+ try:
397
+ for named_range in workbook.defined_names:
398
+ if named_range == name:
399
+ destinations = workbook.defined_names[named_range].destinations
400
+ for sheet_title, coordinate in destinations:
401
+ sheet = workbook[sheet_title]
402
+ if sheet_title == sheet.title:
403
+ return sheet[coordinate]
404
+ except Exception as e:
405
+ logger.warning(f"Error finding cell by name {name}: {str(e)}")
406
+ return None
407
+
408
+
409
+ def get_cell_value_by_name(workbook: Workbook, name: str) -> str | None:
410
+ """Get the value of a named cell"""
411
+ try:
412
+ cell = get_cell_by_name_in_workbook(workbook, name)
413
+ if cell.value is not None:
414
+ return str(cell.value)
415
+ except Exception as e:
416
+ logger.warning(f"Error getting cell value by name {name}: {str(e)}")
417
+ return None
418
+
419
+
420
+ def get_cell_value_by_name_in_sheet(sheet: Worksheet, name: str) -> str | None:
421
+ """Get the value of a named cell within a specific sheet"""
422
+ try:
423
+ for named_range in sheet.defined_names:
424
+ if named_range == name:
425
+ destinations = sheet.defined_names[named_range].destinations
426
+ for sheet_title, coordinate in destinations:
427
+ if sheet_title == sheet.title:
428
+ cell = sheet[coordinate]
429
+ if cell.value is not None:
430
+ return str(cell.value)
431
+ except Exception as e:
432
+ logger.warning(f"Error getting cell value by name {name} in sheet {sheet.title}: {str(e)}")
433
+ return None
434
+
435
+
436
+ def get_cell_value(row, col_idx):
437
+ """Safely get cell value from a row by column index"""
438
+ if col_idx is None:
439
+ return None
440
+ try:
441
+ cell = row[col_idx]
442
+ return str(cell.value) if cell.value is not None else None
443
+ except (IndexError, AttributeError):
444
+ return None
445
+
446
+
447
+ def get_cell_value_by_position(sheet, row_idx, col_idx):
448
+ """Get cell value by row and column indices (0-based)"""
449
+ try:
450
+ cell = sheet.cell(row=row_idx + 1, column=col_idx + 1) # Convert to 1-based indices
451
+ return str(cell.value) if cell.value is not None else None
452
+ except Exception as e:
453
+ logger.warning(f"Error getting cell value by position ({row_idx}, {col_idx}): {str(e)}")
454
+ return None
455
+
456
+
457
+ def import_support(workbook: Workbook) -> Optional[List[Support]]:
458
+ """Extract support information from the Support sheet"""
459
+ try:
460
+ support_sheet = workbook["Support"]
461
+ if not support_sheet:
462
+ return None
463
+
464
+ support_range = get_range_by_name_in_workbook(workbook, "support")
465
+ if not support_range:
466
+ return None
467
+
468
+ header_row = list(support_sheet.rows)[support_range[0] - 1]
469
+ headers = {}
470
+ for i, cell in enumerate(header_row):
471
+ if cell.value:
472
+ headers[cell.value.lower()] = i
473
+
474
+ support_channels = []
475
+ for row_idx in range(support_range[0], support_range[1]):
476
+ if len(list(support_sheet.rows)) < row_idx + 1:
477
+ break
478
+ row = list(support_sheet.rows)[row_idx]
479
+
480
+ channel = get_cell_value(row, headers.get("channel"))
481
+ if not channel or row_idx == support_range[0] - 1:
482
+ continue
483
+
484
+ support_channel = Support(
485
+ channel=channel,
486
+ url=get_cell_value(row, headers.get("channel url")),
487
+ description=get_cell_value(row, headers.get("description")),
488
+ tool=get_cell_value(row, headers.get("tool")),
489
+ scope=get_cell_value(row, headers.get("scope")),
490
+ invitationUrl=get_cell_value(row, headers.get("invitation url")),
491
+ )
492
+
493
+ support_channels.append(support_channel)
494
+ except Exception as e:
495
+ logger.warning(f"Error importing support: {str(e)}")
496
+ return None
497
+
498
+ return support_channels if support_channels else None
499
+
500
+
501
+ def import_team(workbook: Workbook) -> Optional[List[Team]]:
502
+ """Extract team information from the Team sheet"""
503
+ try:
504
+ team_sheet = workbook["Team"]
505
+ if not team_sheet:
506
+ return None
507
+
508
+ team_range = get_range_by_name_in_workbook(workbook, "team")
509
+ if not team_range:
510
+ return None
511
+
512
+ header_row = list(team_sheet.rows)[team_range[0] - 1]
513
+ headers = {}
514
+ for i, cell in enumerate(header_row):
515
+ if cell.value:
516
+ headers[cell.value.lower()] = i
517
+
518
+ team_members = []
519
+ for row_idx in range(team_range[0], team_range[1]):
520
+ if len(list(team_sheet.rows)) < row_idx + 1:
521
+ break
522
+ row = list(team_sheet.rows)[row_idx]
523
+
524
+ username = get_cell_value(row, headers.get("username"))
525
+ name = get_cell_value(row, headers.get("name"))
526
+ role = get_cell_value(row, headers.get("role"))
527
+
528
+ if (not (username or name or role)) or row_idx == team_range[0] - 1:
529
+ continue
530
+
531
+ team_member = Team(
532
+ username=username,
533
+ name=name,
534
+ description=get_cell_value(row, headers.get("description")),
535
+ role=role,
536
+ dateIn=get_cell_value(row, headers.get("date in")),
537
+ dateOut=get_cell_value(row, headers.get("date out")),
538
+ replacedByUsername=get_cell_value(row, headers.get("replaced by username")),
539
+ )
540
+
541
+ team_members.append(team_member)
542
+ except Exception as e:
543
+ logger.warning(f"Error importing team: {str(e)}")
544
+ return None
545
+
546
+ return team_members if team_members else None
547
+
548
+
549
+ def import_roles(workbook: Workbook) -> Optional[List[Role]]:
550
+ """Extract roles information from the Roles sheet"""
551
+ try:
552
+ roles_sheet = workbook["Roles"]
553
+ if not roles_sheet:
554
+ return None
555
+
556
+ roles_range = get_range_by_name_in_sheet(roles_sheet, "roles")
557
+ if not roles_range:
558
+ return None
559
+
560
+ header_row = list(roles_sheet.rows)[roles_range[0] - 1]
561
+ headers = {}
562
+ for i, cell in enumerate(header_row):
563
+ if cell.value:
564
+ headers[cell.value.lower()] = i
565
+
566
+ roles_list = []
567
+ for row_idx in range(roles_range[0], roles_range[1]):
568
+ row = list(roles_sheet.rows)[row_idx]
569
+
570
+ role_name = get_cell_value(row, headers.get("role"))
571
+ if not role_name or row_idx == roles_range[0] - 1:
572
+ continue
573
+
574
+ role = Role(
575
+ role=role_name,
576
+ description=get_cell_value(row, headers.get("description")),
577
+ access=get_cell_value(row, headers.get("access")),
578
+ firstLevelApprovers=get_cell_value(row, headers.get("1st level approvers")),
579
+ secondLevelApprovers=get_cell_value(row, headers.get("2nd level approvers")),
580
+ customProperties=None,
581
+ )
582
+
583
+ roles_list.append(role)
584
+ except Exception as e:
585
+ logger.warning(f"Error importing roles: {str(e)}")
586
+ return None
587
+
588
+ return roles_list if roles_list else None
589
+
590
+
591
+ def import_sla_properties(workbook: Workbook) -> Optional[List[ServiceLevelAgreementProperty]]:
592
+ """Extract SLA properties from the SLA sheet"""
593
+ try:
594
+ sla_sheet = workbook["SLA"]
595
+ if not sla_sheet:
596
+ return None
597
+
598
+ sla_range = get_range_by_name_in_sheet(sla_sheet, "slaProperties")
599
+ if not sla_range:
600
+ return None
601
+
602
+ header_row = list(sla_sheet.rows)[sla_range[0] - 1]
603
+ headers = {}
604
+ for i, cell in enumerate(header_row):
605
+ if cell.value:
606
+ headers[cell.value.lower()] = i
607
+
608
+ sla_properties = []
609
+ for row_idx in range(sla_range[0], sla_range[1]):
610
+ if len(list(sla_sheet.rows)) < row_idx + 1:
611
+ break
612
+ row = list(sla_sheet.rows)[row_idx]
613
+
614
+ property_name = get_cell_value(row, headers.get("property"))
615
+ if not property_name or row_idx == sla_range[0] - 1:
616
+ continue
617
+
618
+ sla_property = ServiceLevelAgreementProperty(
619
+ property=property_name,
620
+ value=get_cell_value(row, headers.get("value")),
621
+ valueExt=get_cell_value(row, headers.get("extended value")),
622
+ unit=get_cell_value(row, headers.get("unit")),
623
+ element=get_cell_value(row, headers.get("element")),
624
+ driver=get_cell_value(row, headers.get("driver")),
625
+ )
626
+
627
+ sla_properties.append(sla_property)
628
+ except Exception as e:
629
+ logger.warning(f"Error importing SLA properties: {str(e)}")
630
+ return None
631
+
632
+ return sla_properties if sla_properties else None
633
+
634
+
635
+ def import_servers(workbook) -> Optional[List[Server]]:
636
+ """Extract server information from the Servers sheet"""
637
+ try:
638
+ sheet = workbook["Servers"]
639
+ if not sheet:
640
+ return None
641
+
642
+ # Find the server cells
643
+ server_cell = get_cell_by_name_in_workbook(workbook, "servers.server")
644
+ if not server_cell:
645
+ return None
646
+
647
+ # Get servers (horizontally arranged in the sheet)
648
+ servers = []
649
+ col_idx = server_cell.column - 1 # 0-based index
650
+ row_idx = server_cell.row - 1 # 0-based index
651
+
652
+ index = 0
653
+ while True:
654
+ server_name = get_cell_value_by_position(sheet, row_idx, col_idx + index)
655
+ if not server_name:
656
+ break
657
+
658
+ server = Server(
659
+ server=server_name,
660
+ description=get_server_cell_value(workbook, sheet, "servers.description", index),
661
+ environment=get_server_cell_value(workbook, sheet, "servers.environment", index),
662
+ type=get_server_cell_value(workbook, sheet, "servers.type", index),
663
+ )
664
+
665
+ # Get type-specific fields
666
+ server_type = server.type
667
+ if server_type:
668
+ if server_type == "azure":
669
+ server.location = get_server_cell_value(workbook, sheet, "servers.azure.location", index)
670
+ server.format = get_server_cell_value(workbook, sheet, "servers.azure.format", index)
671
+ server.delimiter = get_server_cell_value(workbook, sheet, "servers.azure.delimiter", index)
672
+ elif server_type == "bigquery":
673
+ server.project = get_server_cell_value(workbook, sheet, "servers.bigquery.project", index)
674
+ server.dataset = get_server_cell_value(workbook, sheet, "servers.bigquery.dataset", index)
675
+ elif server_type == "databricks":
676
+ server.catalog = get_server_cell_value(workbook, sheet, "servers.databricks.catalog", index)
677
+ server.host = get_server_cell_value(workbook, sheet, "servers.databricks.host", index)
678
+ server.schema = get_server_cell_value(workbook, sheet, "servers.databricks.schema", index)
679
+ elif server_type == "glue":
680
+ server.account = get_server_cell_value(workbook, sheet, "servers.glue.account", index)
681
+ server.database = get_server_cell_value(workbook, sheet, "servers.glue.database", index)
682
+ server.format = get_server_cell_value(workbook, sheet, "servers.glue.format", index)
683
+ server.location = get_server_cell_value(workbook, sheet, "servers.glue.location", index)
684
+ elif server_type == "kafka":
685
+ server.format = get_server_cell_value(workbook, sheet, "servers.kafka.format", index)
686
+ server.host = get_server_cell_value(workbook, sheet, "servers.kafka.host", index)
687
+ server.topic = get_server_cell_value(workbook, sheet, "servers.kafka.topic", index)
688
+ elif server_type == "postgres":
689
+ server.database = get_server_cell_value(workbook, sheet, "servers.postgres.database", index)
690
+ server.host = get_server_cell_value(workbook, sheet, "servers.postgres.host", index)
691
+ server.port = get_server_cell_value(workbook, sheet, "servers.postgres.port", index)
692
+ server.schema = get_server_cell_value(workbook, sheet, "servers.postgres.schema", index)
693
+ elif server_type == "s3":
694
+ server.delimiter = get_server_cell_value(workbook, sheet, "servers.s3.delimiter", index)
695
+ server.endpointUrl = get_server_cell_value(workbook, sheet, "servers.s3.endpointUrl", index)
696
+ server.format = get_server_cell_value(workbook, sheet, "servers.s3.format", index)
697
+ server.location = get_server_cell_value(workbook, sheet, "servers.s3.location", index)
698
+ elif server_type == "snowflake":
699
+ server.account = get_server_cell_value(workbook, sheet, "servers.snowflake.account", index)
700
+ server.database = get_server_cell_value(workbook, sheet, "servers.snowflake.database", index)
701
+ server.host = get_server_cell_value(workbook, sheet, "servers.snowflake.host", index)
702
+ server.port = get_server_cell_value(workbook, sheet, "servers.snowflake.port", index)
703
+ server.schema = get_server_cell_value(workbook, sheet, "servers.snowflake.schema", index)
704
+ server.warehouse = get_server_cell_value(workbook, sheet, "servers.snowflake.warehouse", index)
705
+ elif server_type == "sqlserver":
706
+ server.database = get_server_cell_value(workbook, sheet, "servers.sqlserver.database", index)
707
+ server.host = get_server_cell_value(workbook, sheet, "servers.sqlserver.host", index)
708
+ server.port = get_server_cell_value(workbook, sheet, "servers.sqlserver.port", index)
709
+ server.schema = get_server_cell_value(workbook, sheet, "servers.sqlserver.schema", index)
710
+ else:
711
+ # Custom server type - grab all possible fields
712
+ server.account = get_server_cell_value(workbook, sheet, "servers.custom.account", index)
713
+ server.catalog = get_server_cell_value(workbook, sheet, "servers.custom.catalog", index)
714
+ server.database = get_server_cell_value(workbook, sheet, "servers.custom.database", index)
715
+ server.dataset = get_server_cell_value(workbook, sheet, "servers.custom.dataset", index)
716
+ server.delimiter = get_server_cell_value(workbook, sheet, "servers.custom.delimiter", index)
717
+ server.endpointUrl = get_server_cell_value(workbook, sheet, "servers.custom.endpointUrl", index)
718
+ server.format = get_server_cell_value(workbook, sheet, "servers.custom.format", index)
719
+ server.host = get_server_cell_value(workbook, sheet, "servers.custom.host", index)
720
+ server.location = get_server_cell_value(workbook, sheet, "servers.custom.location", index)
721
+ server.path = get_server_cell_value(workbook, sheet, "servers.custom.path", index)
722
+ server.port = get_server_cell_value(workbook, sheet, "servers.custom.port", index)
723
+ server.project = get_server_cell_value(workbook, sheet, "servers.custom.project", index)
724
+ server.schema = get_server_cell_value(workbook, sheet, "servers.custom.schema", index)
725
+ server.stagingDir = get_server_cell_value(workbook, sheet, "servers.custom.stagingDir", index)
726
+ server.table = get_server_cell_value(workbook, sheet, "servers.custom.table", index)
727
+ server.view = get_server_cell_value(workbook, sheet, "servers.custom.view", index)
728
+ server.warehouse = get_server_cell_value(workbook, sheet, "servers.custom.warehouse", index)
729
+ server.region = get_server_cell_value(workbook, sheet, "servers.custom.region", index)
730
+ server.regionName = get_server_cell_value(workbook, sheet, "servers.custom.regionName", index)
731
+ server.serviceName = get_server_cell_value(workbook, sheet, "servers.custom.serviceName", index)
732
+
733
+ servers.append(server)
734
+ index += 1
735
+ except Exception as e:
736
+ logger.warning(f"Error importing servers: {str(e)}")
737
+ return None
738
+
739
+ return servers if servers else None
740
+
741
+
742
+ def get_server_cell_value(workbook: Workbook, sheet: Worksheet, name: str, col_offset: int):
743
+ """Get cell value for server properties (arranged horizontally)"""
744
+ try:
745
+ cell = get_cell_by_name_in_workbook(workbook, name)
746
+ if not cell:
747
+ return None
748
+
749
+ row = cell.row - 1 # 0-based
750
+ col = cell.column - 1 + col_offset # 0-based
751
+ return get_cell_value_by_position(sheet, row, col)
752
+ except Exception as e:
753
+ logger.warning(f"Error getting server cell value for {name}: {str(e)}")
754
+ return None
755
+
756
+
757
+ def import_price(workbook) -> Optional[Dict[str, Any]]:
758
+ """Extract price information"""
759
+ try:
760
+ price_amount = get_cell_value_by_name(workbook, "price.priceAmount")
761
+ price_currency = get_cell_value_by_name(workbook, "price.priceCurrency")
762
+ price_unit = get_cell_value_by_name(workbook, "price.priceUnit")
763
+
764
+ if not (price_amount or price_currency or price_unit):
765
+ return None
766
+
767
+ # Create a dictionary for price since the class doesn't seem to be directly available
768
+ return {
769
+ "priceAmount": price_amount,
770
+ "priceCurrency": price_currency,
771
+ "priceUnit": price_unit,
772
+ }
773
+ except Exception as e:
774
+ logger.warning(f"Error importing price: {str(e)}")
775
+ return None
776
+
777
+
778
+ def import_custom_properties(workbook: Workbook) -> List[CustomProperty]:
779
+ """Extract custom properties"""
780
+ custom_properties = []
781
+
782
+ owner = get_cell_value_by_name(workbook, "owner")
783
+
784
+ # Add owner as a custom property
785
+ if owner:
786
+ custom_properties.append(
787
+ CustomProperty(
788
+ property="owner",
789
+ value=owner,
790
+ )
791
+ )
792
+
793
+ try:
794
+ # Get other custom properties
795
+ custom_properties_sheet = workbook["Custom Properties"]
796
+ if custom_properties_sheet:
797
+ custom_properties_range = get_range_by_name_in_workbook(workbook, "CustomProperties")
798
+ if custom_properties_range:
799
+ # Skip header row
800
+ for row_idx in range(custom_properties_range[0], custom_properties_range[1]):
801
+ if row_idx == custom_properties_range[0] - 1:
802
+ continue
803
+
804
+ property_name = get_cell_value_by_position(custom_properties_sheet, row_idx, 0)
805
+ if not property_name or property_name == "owner":
806
+ continue
807
+
808
+ property_value = get_cell_value_by_position(custom_properties_sheet, row_idx, 1)
809
+ parsed_value = parse_property_value(property_value)
810
+
811
+ custom_properties.append(
812
+ CustomProperty(
813
+ property=property_name,
814
+ value=parsed_value,
815
+ )
816
+ )
817
+ except Exception as e:
818
+ logger.warning(f"Error importing custom properties: {str(e)}")
819
+
820
+ return custom_properties
821
+
822
+
823
+ def parse_property_value(value: str) -> Any:
824
+ """Parse a property value into the appropriate type based on Excel values"""
825
+ if value is None:
826
+ return None
827
+
828
+ # Try to convert to boolean (simple case)
829
+ if isinstance(value, str):
830
+ value_lower = value.lower().strip()
831
+ if value_lower == "true":
832
+ return True
833
+ if value_lower == "false":
834
+ return False
835
+
836
+ # Try numeric conversions
837
+ try:
838
+ # Check if it's an integer
839
+ if isinstance(value, str) and value.isdigit():
840
+ return int(value)
841
+
842
+ # Try float conversion
843
+ float_val = float(value)
844
+ # If it's a whole number, return as int
845
+ if float_val.is_integer():
846
+ return int(float_val)
847
+ return float_val
848
+ except (ValueError, TypeError, AttributeError):
849
+ # If conversion fails, return original string
850
+ return value