datacontract-cli 0.10.37__py3-none-any.whl → 0.10.40__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
datacontract/cli.py CHANGED
@@ -15,8 +15,8 @@ from datacontract.catalog.catalog import create_data_contract_html, create_index
15
15
  from datacontract.data_contract import DataContract, ExportFormat
16
16
  from datacontract.imports.importer import ImportFormat, Spec
17
17
  from datacontract.init.init_template import get_init_template
18
- from datacontract.integration.datamesh_manager import (
19
- publish_data_contract_to_datamesh_manager,
18
+ from datacontract.integration.entropy_data import (
19
+ publish_data_contract_to_entropy_data,
20
20
  )
21
21
  from datacontract.lint.resolve import resolve_data_contract_dict
22
22
  from datacontract.model.exceptions import DataContractException
@@ -406,7 +406,7 @@ def publish(
406
406
  """
407
407
  enable_debug_logging(debug)
408
408
 
409
- publish_data_contract_to_datamesh_manager(
409
+ publish_data_contract_to_entropy_data(
410
410
  data_contract_dict=resolve_data_contract_dict(location),
411
411
  ssl_verification=ssl_verification,
412
412
  )
@@ -24,7 +24,7 @@ from datacontract.export.exporter import ExportFormat
24
24
  from datacontract.export.exporter_factory import exporter_factory
25
25
  from datacontract.imports.importer_factory import importer_factory
26
26
  from datacontract.init.init_template import get_init_template
27
- from datacontract.integration.datamesh_manager import publish_test_results_to_datamesh_manager
27
+ from datacontract.integration.entropy_data import publish_test_results_to_entropy_data
28
28
  from datacontract.lint import resolve
29
29
  from datacontract.model.data_contract_specification import DataContractSpecification, Info
30
30
  from datacontract.model.exceptions import DataContractException
@@ -151,7 +151,7 @@ class DataContract:
151
151
  run.finish()
152
152
 
153
153
  if self._publish_url is not None or self._publish_test_results:
154
- publish_test_results_to_datamesh_manager(run, self._publish_url, self._ssl_verification)
154
+ publish_test_results_to_entropy_data(run, self._publish_url, self._ssl_verification)
155
155
 
156
156
  return run
157
157
 
@@ -3,6 +3,7 @@ import typing
3
3
  import uuid
4
4
 
5
5
  from datacontract.engines.soda.connections.athena import to_athena_soda_configuration
6
+ from datacontract.engines.soda.connections.oracle import initialize_client_and_create_soda_configuration
6
7
 
7
8
  if typing.TYPE_CHECKING:
8
9
  from pyspark.sql import SparkSession
@@ -104,6 +105,10 @@ def check_soda_execute(
104
105
  soda_configuration_str = to_sqlserver_soda_configuration(server)
105
106
  scan.add_configuration_yaml_str(soda_configuration_str)
106
107
  scan.set_data_source_name(server.type)
108
+ elif server.type == "oracle":
109
+ soda_configuration_str = initialize_client_and_create_soda_configuration(server)
110
+ scan.add_configuration_yaml_str(soda_configuration_str)
111
+ scan.set_data_source_name(server.type)
107
112
  elif server.type == "trino":
108
113
  soda_configuration_str = to_trino_soda_configuration(server)
109
114
  scan.add_configuration_yaml_str(soda_configuration_str)
@@ -71,7 +71,7 @@ def to_athena_soda_configuration(server):
71
71
  data_source["catalog"] = server.catalog
72
72
 
73
73
  if s3_session_token:
74
- data_source["aws_session_token"] = s3_session_token
74
+ data_source["session_token"] = s3_session_token
75
75
 
76
76
  soda_configuration = {f"data_source {server.type}": data_source}
77
77
 
@@ -0,0 +1,50 @@
1
+ import os
2
+
3
+ import yaml
4
+
5
+ from datacontract.model.data_contract_specification import Server
6
+
7
+
8
+ def initialize_client_and_create_soda_configuration(server: Server) -> str:
9
+ import oracledb
10
+ soda_config = to_oracle_soda_configuration(server)
11
+
12
+ oracle_client_dir = os.getenv("DATACONTRACT_ORACLE_CLIENT_DIR")
13
+ if oracle_client_dir is not None:
14
+ # Soda Core currently does not support thick mode natively, see https://github.com/sodadata/soda-core/issues/2036
15
+ # but the oracledb client can be configured accordingly before Soda initializes as a work-around
16
+ oracledb.init_oracle_client(lib_dir=oracle_client_dir)
17
+
18
+ return soda_config
19
+
20
+
21
+ def to_oracle_soda_configuration(server: Server) -> str:
22
+ """Serialize server config to soda configuration.
23
+
24
+
25
+ ### Example:
26
+ type: oracle
27
+ host: database-1.us-east-1.rds.amazonaws.com
28
+ port: '1521'
29
+ username: simple
30
+ password: simple_pass
31
+ connectstring: database-1.us-east-1.rds.amazonaws.com:1521/ORCL (database is equal to service name at oracle)
32
+ schema: SYSTEM
33
+ """
34
+
35
+ service_name = server.service_name or server.database
36
+ # with service account key, using an external json file
37
+ soda_configuration = {
38
+ f"data_source {server.type}": {
39
+ "type": "oracle",
40
+ "host": server.host,
41
+ "port": str(server.port),
42
+ "username": os.getenv("DATACONTRACT_ORACLE_USERNAME", ""),
43
+ "password": os.getenv("DATACONTRACT_ORACLE_PASSWORD", ""),
44
+ "connectstring": f"{server.host}:{server.port}/{service_name}",
45
+ "schema": server.schema_,
46
+ }
47
+ }
48
+
49
+ soda_configuration_str = yaml.dump(soda_configuration)
50
+ return soda_configuration_str
@@ -11,6 +11,7 @@ from open_data_contract_standard.model import (
11
11
  Server,
12
12
  ServiceLevelAgreementProperty,
13
13
  Support,
14
+ Team,
14
15
  )
15
16
 
16
17
  from datacontract.export.exporter import Exporter
@@ -30,7 +31,7 @@ def to_odcs_v3_yaml(data_contract_spec: DataContractSpecification) -> str:
30
31
 
31
32
  def to_odcs_v3(data_contract_spec: DataContractSpecification) -> OpenDataContractStandard:
32
33
  result = OpenDataContractStandard(
33
- apiVersion="v3.0.1",
34
+ apiVersion="v3.1.0",
34
35
  kind="DataContract",
35
36
  id=data_contract_spec.id,
36
37
  name=data_contract_spec.info.title,
@@ -88,7 +89,10 @@ def to_odcs_v3(data_contract_spec: DataContractSpecification) -> OpenDataContrac
88
89
  if server_value.account is not None:
89
90
  server.account = server_value.account
90
91
  if server_value.database is not None:
91
- server.database = server_value.database
92
+ if server.type == "oracle":
93
+ server.serviceName = server_value.database
94
+ else:
95
+ server.database = server_value.database
92
96
  if server_value.schema_ is not None:
93
97
  server.schema_ = server_value.schema_
94
98
  if server_value.format is not None:
@@ -127,9 +131,9 @@ def to_odcs_v3(data_contract_spec: DataContractSpecification) -> OpenDataContrac
127
131
 
128
132
  if len(servers) > 0:
129
133
  result.servers = servers
134
+ if (data_contract_spec.info.owner is not None) and (data_contract_spec.info.owner != ""):
135
+ result.team = Team(name=data_contract_spec.info.owner)
130
136
  custom_properties = []
131
- if data_contract_spec.info.owner is not None:
132
- custom_properties.append(CustomProperty(property="owner", value=data_contract_spec.info.owner))
133
137
  if data_contract_spec.info.model_extra is not None:
134
138
  for key, value in data_contract_spec.info.model_extra.items():
135
139
  custom_properties.append(CustomProperty(property=key, value=value))
@@ -194,14 +198,10 @@ def to_logical_type(type: str) -> str | None:
194
198
  return "integer"
195
199
  if type.lower() in ["boolean"]:
196
200
  return "boolean"
197
- if type.lower() in ["object", "record", "struct"]:
201
+ if type.lower() in ["object", "record", "struct", "map", "variant"]:
198
202
  return "object"
199
- if type.lower() in ["bytes"]:
203
+ if type.lower() in ["bytes", "array"]:
200
204
  return "array"
201
- if type.lower() in ["array"]:
202
- return "array"
203
- if type.lower() in ["variant"]:
204
- return "variant"
205
205
  if type.lower() in ["null"]:
206
206
  return None
207
207
  return None
@@ -224,6 +224,8 @@ def to_physical_type(config: Dict[str, Any]) -> str | None:
224
224
  return config["databricksType"]
225
225
  elif "physicalType" in config:
226
226
  return config["physicalType"]
227
+ elif "oracleType" in config:
228
+ return config["oracleType"]
227
229
  return None
228
230
 
229
231
 
@@ -22,6 +22,8 @@ def convert_to_sql_type(field: Field, server_type: str) -> str:
22
22
  return convert_type_to_bigquery(field)
23
23
  elif server_type == "trino":
24
24
  return convert_type_to_trino(field)
25
+ elif server_type == "oracle":
26
+ return convert_type_to_oracle(field)
25
27
 
26
28
  return field.type
27
29
 
@@ -390,3 +392,105 @@ def convert_type_to_trino(field: Field) -> None | str:
390
392
  return "varbinary"
391
393
  if field_type in ["object", "record", "struct"]:
392
394
  return "json"
395
+
396
+
397
+ def convert_type_to_oracle(field: Field) -> None | str:
398
+ """Convert from supported datacontract types to equivalent Oracle types
399
+
400
+ Oracle returns types WITH precision/scale/length through Soda, so we need to match that.
401
+ For example:
402
+ - NUMBER -> NUMBER (base types without precision return without it)
403
+ - TIMESTAMP -> TIMESTAMP(6) (Oracle default precision)
404
+ - CHAR -> CHAR (but may need explicit handling)
405
+
406
+ For fields that were created with specific Oracle types (like NCHAR, ROWID, BLOB),
407
+ users should use config.oracleType to override the default mapping.
408
+
409
+ Reference: https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/Data-Types.html
410
+ """
411
+ # config.oracleType always wins - use it as-is without stripping
412
+ if field.config and "oracleType" in field.config:
413
+ return field.config["oracleType"]
414
+
415
+ if field.config and "physicalType" in field.config:
416
+ return field.config["physicalType"]
417
+
418
+ field_type = field.type
419
+ if not field_type:
420
+ return None
421
+
422
+ field_type = field_type.lower()
423
+
424
+ # String types - default to NVARCHAR2 for strings
425
+ if field_type in ["string", "varchar"]:
426
+ return "NVARCHAR2"
427
+
428
+ if field_type == "text":
429
+ # text could be NVARCHAR2 or NCLOB depending on size
430
+ if field.config and field.config.get("large"):
431
+ return "NCLOB"
432
+ return "NVARCHAR2"
433
+
434
+ # Numeric types - NUMBER without precision (Oracle returns just NUMBER)
435
+ if field_type in ["number", "decimal", "numeric", "int", "integer", "long", "bigint", "smallint"]:
436
+ return "NUMBER"
437
+
438
+ # Float types - BINARY_FLOAT/BINARY_DOUBLE by default
439
+ if field_type == "float":
440
+ return "BINARY_FLOAT"
441
+
442
+ if field_type in ["double", "double precision"]:
443
+ return "BINARY_DOUBLE"
444
+
445
+ # Boolean - maps to CHAR
446
+ if field_type == "boolean":
447
+ return "CHAR"
448
+
449
+ # Temporal types - Oracle returns with precision
450
+ if field_type in ["timestamp_tz", "timestamp with time zone", "timestamptz"]:
451
+ return "TIMESTAMP(6) WITH TIME ZONE"
452
+
453
+ if field_type in ["timestamp_ntz", "timestamp", "timestamp without time zone"]:
454
+ return "TIMESTAMP(6)"
455
+
456
+ if field_type == "date":
457
+ return "DATE"
458
+
459
+ if field_type == "time":
460
+ # Oracle's INTERVAL DAY TO SECOND has default precision
461
+ return "INTERVAL DAY(0) TO SECOND(6)"
462
+
463
+ # Binary types
464
+ if field_type in ["bytes", "binary"]:
465
+ # Default to RAW for bytes
466
+ return "RAW"
467
+
468
+ # LOB types
469
+ if field_type == "blob":
470
+ return "BLOB"
471
+
472
+ if field_type == "nclob":
473
+ return "NCLOB"
474
+
475
+ if field_type == "clob":
476
+ return "CLOB"
477
+
478
+ # Oracle-specific types
479
+ if field_type == "bfile":
480
+ return "BFILE"
481
+
482
+ if field_type in ["long raw", "longraw"]:
483
+ return "LONG RAW"
484
+
485
+ if field_type == "rowid":
486
+ return "ROWID"
487
+
488
+ if field_type == "urowid":
489
+ return "UROWID"
490
+
491
+ # Complex/JSON types -> CLOB (emulated)
492
+ if field_type in ["array", "map", "object", "record", "struct", "variant", "json"]:
493
+ return "CLOB"
494
+
495
+ # Default to CLOB for unknown types
496
+ return "CLOB"
@@ -16,6 +16,7 @@ from open_data_contract_standard.model import (
16
16
  ServiceLevelAgreementProperty,
17
17
  Support,
18
18
  Team,
19
+ TeamMember,
19
20
  )
20
21
  from openpyxl.cell.cell import Cell
21
22
  from openpyxl.workbook.workbook import Workbook
@@ -540,7 +541,7 @@ def import_team(workbook: Workbook) -> Optional[List[Team]]:
540
541
  if (not (username or name or role)) or row_idx == team_range[0] - 1:
541
542
  continue
542
543
 
543
- team_member = Team(
544
+ team_member = TeamMember(
544
545
  username=username,
545
546
  name=name,
546
547
  description=get_cell_value(row, headers.get("description")),
@@ -128,6 +128,7 @@ def import_servers(odcs: OpenDataContractStandard) -> Dict[str, Server] | None:
128
128
  server.account = odcs_server.account
129
129
  server.database = odcs_server.database
130
130
  server.schema_ = odcs_server.schema_
131
+ server.service_name = odcs_server.serviceName
131
132
  server.host = odcs_server.host
132
133
  server.port = odcs_server.port
133
134
  server.catalog = odcs_server.catalog
@@ -196,6 +197,17 @@ def get_server_type(odcs: OpenDataContractStandard) -> str | None:
196
197
  return server.type
197
198
 
198
199
 
200
+ def get_composite_primary_keys(properties: List[SchemaProperty]) -> list[str]:
201
+ primary_keys = [
202
+ (property.name, property.primaryKeyPosition)
203
+ for property in properties
204
+ if property.name and property.primaryKey is not None and property.primaryKey
205
+ ]
206
+
207
+ primary_keys.sort(key=lambda x: x[1] or -1)
208
+ return [name for name, _ in primary_keys]
209
+
210
+
199
211
  def import_models(odcs: Any) -> Dict[str, Model]:
200
212
  custom_type_mappings = get_custom_type_mappings(odcs.customProperties)
201
213
 
@@ -213,6 +225,8 @@ def import_models(odcs: Any) -> Dict[str, Model]:
213
225
  tags=odcs_schema.tags if odcs_schema.tags is not None else None,
214
226
  )
215
227
  model.fields = import_fields(odcs_schema.properties, custom_type_mappings, server_type=get_server_type(odcs))
228
+ if has_composite_primary_key(odcs_properties=odcs_schema.properties):
229
+ model.primaryKey = get_composite_primary_keys(odcs_schema.properties)
216
230
  if odcs_schema.quality is not None:
217
231
  model.quality = convert_quality_list(odcs_schema.quality)
218
232
  model.title = schema_name
@@ -313,6 +327,8 @@ def import_field_config(odcs_property: SchemaProperty, server_type=None) -> dict
313
327
  config["sqlserverType"] = physical_type
314
328
  elif server_type == "databricks":
315
329
  config["databricksType"] = physical_type
330
+ elif server_type == "oracle":
331
+ config["oracleType"] = physical_type
316
332
  else:
317
333
  config["physicalType"] = physical_type
318
334
 
@@ -0,0 +1,126 @@
1
+ import os
2
+ from urllib.parse import urlparse
3
+
4
+ import requests
5
+
6
+ from datacontract.model.run import Run
7
+
8
+ # used to retrieve the HTML location of the published data contract or test results
9
+ RESPONSE_HEADER_LOCATION_HTML = "location-html"
10
+
11
+
12
+ def publish_test_results_to_entropy_data(run: Run, publish_url: str, ssl_verification: bool):
13
+ try:
14
+ host = publish_url
15
+ if publish_url is None:
16
+ # this url supports Data Mesh Manager and Data Contract Manager
17
+ host = _get_host()
18
+ url = "%s/api/test-results" % host
19
+ else:
20
+ url = publish_url
21
+
22
+ api_key = _get_api_key()
23
+
24
+ if run.dataContractId is None:
25
+ raise Exception("Cannot publish run results for unknown data contract ID")
26
+
27
+ headers = {"Content-Type": "application/json", "x-api-key": api_key}
28
+ request_body = run.model_dump_json()
29
+ # print("Request Body:", request_body)
30
+ response = requests.post(
31
+ url,
32
+ data=request_body,
33
+ headers=headers,
34
+ verify=ssl_verification,
35
+ )
36
+ # print("Status Code:", response.status_code)
37
+ # print("Response Body:", response.text)
38
+ if response.status_code != 200:
39
+ display_host = _extract_hostname(host)
40
+ run.log_error(f"Error publishing test results to {display_host}: {response.text}")
41
+ return
42
+ run.log_info("Published test results successfully")
43
+
44
+ location_html = response.headers.get(RESPONSE_HEADER_LOCATION_HTML)
45
+ if location_html is not None and len(location_html) > 0:
46
+ print(f"🚀 Open {location_html}")
47
+
48
+ except Exception as e:
49
+ run.log_error(f"Failed publishing test results. Error: {str(e)}")
50
+
51
+
52
+ def publish_data_contract_to_entropy_data(data_contract_dict: dict, ssl_verification: bool):
53
+ try:
54
+ api_key = _get_api_key()
55
+ host = _get_host()
56
+ headers = {"Content-Type": "application/json", "x-api-key": api_key}
57
+ id = data_contract_dict["id"]
58
+ url = f"{host}/api/datacontracts/{id}"
59
+ response = requests.put(
60
+ url=url,
61
+ json=data_contract_dict,
62
+ headers=headers,
63
+ verify=ssl_verification,
64
+ )
65
+ if response.status_code != 200:
66
+ display_host = _extract_hostname(host)
67
+ print(f"Error publishing data contract to {display_host}: {response.text}")
68
+ exit(1)
69
+
70
+ print("✅ Published data contract successfully")
71
+
72
+ location_html = response.headers.get(RESPONSE_HEADER_LOCATION_HTML)
73
+ if location_html is not None and len(location_html) > 0:
74
+ print(f"🚀 Open {location_html}")
75
+
76
+ except Exception as e:
77
+ print(f"Failed publishing data contract. Error: {str(e)}")
78
+
79
+
80
+ def _get_api_key() -> str:
81
+ """
82
+ Get API key from environment variables with fallback priority:
83
+ 1. ENTROPY_DATA_API_KEY
84
+ 2. DATAMESH_MANAGER_API_KEY
85
+ 3. DATACONTRACT_MANAGER_API_KEY
86
+ """
87
+ api_key = os.getenv("ENTROPY_DATA_API_KEY")
88
+ if api_key is None:
89
+ api_key = os.getenv("DATAMESH_MANAGER_API_KEY")
90
+ if api_key is None:
91
+ api_key = os.getenv("DATACONTRACT_MANAGER_API_KEY")
92
+ if api_key is None:
93
+ raise Exception(
94
+ "Cannot publish, as neither ENTROPY_DATA_API_KEY, DATAMESH_MANAGER_API_KEY, nor DATACONTRACT_MANAGER_API_KEY is set"
95
+ )
96
+ return api_key
97
+
98
+
99
+ def _get_host() -> str:
100
+ """
101
+ Get host from environment variables with fallback priority:
102
+ 1. ENTROPY_DATA_HOST
103
+ 2. DATAMESH_MANAGER_HOST
104
+ 3. DATACONTRACT_MANAGER_HOST
105
+ 4. Default: https://api.entropy-data.com
106
+ """
107
+ host = os.getenv("ENTROPY_DATA_HOST")
108
+ if host is None:
109
+ host = os.getenv("DATAMESH_MANAGER_HOST")
110
+ if host is None:
111
+ host = os.getenv("DATACONTRACT_MANAGER_HOST")
112
+ if host is None:
113
+ host = "https://api.entropy-data.com"
114
+ return host
115
+
116
+
117
+ def _extract_hostname(url: str) -> str:
118
+ """
119
+ Extract the hostname (including subdomains and top-level domain) from a URL.
120
+
121
+ Examples:
122
+ - https://app.entropy-data.com/path -> app.entropy-data.com
123
+ - http://api.example.com:8080/api -> api.example.com
124
+ """
125
+ parsed = urlparse(url)
126
+ return parsed.netloc.split(":")[0] if parsed.netloc else url
@@ -312,9 +312,9 @@ def _resolve_data_contract_from_str(
312
312
 
313
313
  if schema_location is None:
314
314
  if is_open_data_contract_standard(yaml_dict):
315
- logging.info("Using ODCS 3.0.2 schema to validate data contract")
315
+ logging.info("Using ODCS 3.1.0 schema to validate data contract")
316
316
  # TODO refactor this to a specific function
317
- schema_location = resources.files("datacontract").joinpath("schemas", "odcs-3.0.2.schema.json")
317
+ schema_location = resources.files("datacontract").joinpath("schemas", "odcs-3.1.0.schema.json")
318
318
 
319
319
  _validate_json_schema(yaml_dict, schema_location)
320
320
 
datacontract/lint/urls.py CHANGED
@@ -28,10 +28,22 @@ def fetch_resource(url: str):
28
28
  def _set_api_key(headers, url):
29
29
  hostname = urlparse(url).hostname
30
30
 
31
+ entropy_data_api_key = os.getenv("ENTROPY_DATA_API_KEY")
31
32
  datamesh_manager_api_key = os.getenv("DATAMESH_MANAGER_API_KEY")
32
33
  datacontract_manager_api_key = os.getenv("DATACONTRACT_MANAGER_API_KEY")
33
34
 
34
- if hostname == "datamesh-manager.com" or hostname.endswith(".datamesh-manager.com"):
35
+ if hostname == "entropy-data.com" or hostname.endswith(".entropy-data.com"):
36
+ if entropy_data_api_key is None or entropy_data_api_key == "":
37
+ print("Error: Entropy Data API key is not set. Set env variable ENTROPY_DATA_API_KEY.")
38
+ raise DataContractException(
39
+ type="lint",
40
+ name=f"Reading data contract from {url}",
41
+ reason="Error: Entropy Data API key is not set. Set env variable ENTROPY_DATA_API_KEY.",
42
+ engine="datacontract",
43
+ result="error",
44
+ )
45
+ headers["x-api-key"] = entropy_data_api_key
46
+ elif hostname == "datamesh-manager.com" or hostname.endswith(".datamesh-manager.com"):
35
47
  if datamesh_manager_api_key is None or datamesh_manager_api_key == "":
36
48
  print("Error: Data Mesh Manager API key is not set. Set env variable DATAMESH_MANAGER_API_KEY.")
37
49
  raise DataContractException(
@@ -54,7 +66,9 @@ def _set_api_key(headers, url):
54
66
  )
55
67
  headers["x-api-key"] = datacontract_manager_api_key
56
68
 
57
- if datamesh_manager_api_key is not None and datamesh_manager_api_key != "":
58
- headers["x-api-key"] = datamesh_manager_api_key
59
69
  if datacontract_manager_api_key is not None and datacontract_manager_api_key != "":
60
70
  headers["x-api-key"] = datacontract_manager_api_key
71
+ if datamesh_manager_api_key is not None and datamesh_manager_api_key != "":
72
+ headers["x-api-key"] = datamesh_manager_api_key
73
+ if entropy_data_api_key is not None and entropy_data_api_key != "":
74
+ headers["x-api-key"] = entropy_data_api_key