datacontract-cli 0.10.37__py3-none-any.whl → 0.10.40__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datacontract/cli.py +3 -3
- datacontract/data_contract.py +2 -2
- datacontract/engines/soda/check_soda_execute.py +5 -0
- datacontract/engines/soda/connections/athena.py +1 -1
- datacontract/engines/soda/connections/oracle.py +50 -0
- datacontract/export/odcs_v3_exporter.py +12 -10
- datacontract/export/sql_type_converter.py +104 -0
- datacontract/imports/excel_importer.py +2 -1
- datacontract/imports/odcs_v3_importer.py +16 -0
- datacontract/integration/entropy_data.py +126 -0
- datacontract/lint/resolve.py +2 -2
- datacontract/lint/urls.py +17 -3
- datacontract/schemas/odcs-3.1.0.schema.json +2809 -0
- datacontract/templates/datacontract_odcs.html +2 -2
- {datacontract_cli-0.10.37.dist-info → datacontract_cli-0.10.40.dist-info}/METADATA +63 -10
- {datacontract_cli-0.10.37.dist-info → datacontract_cli-0.10.40.dist-info}/RECORD +20 -18
- datacontract/integration/datamesh_manager.py +0 -86
- {datacontract_cli-0.10.37.dist-info → datacontract_cli-0.10.40.dist-info}/WHEEL +0 -0
- {datacontract_cli-0.10.37.dist-info → datacontract_cli-0.10.40.dist-info}/entry_points.txt +0 -0
- {datacontract_cli-0.10.37.dist-info → datacontract_cli-0.10.40.dist-info}/licenses/LICENSE +0 -0
- {datacontract_cli-0.10.37.dist-info → datacontract_cli-0.10.40.dist-info}/top_level.txt +0 -0
datacontract/cli.py
CHANGED
|
@@ -15,8 +15,8 @@ from datacontract.catalog.catalog import create_data_contract_html, create_index
|
|
|
15
15
|
from datacontract.data_contract import DataContract, ExportFormat
|
|
16
16
|
from datacontract.imports.importer import ImportFormat, Spec
|
|
17
17
|
from datacontract.init.init_template import get_init_template
|
|
18
|
-
from datacontract.integration.
|
|
19
|
-
|
|
18
|
+
from datacontract.integration.entropy_data import (
|
|
19
|
+
publish_data_contract_to_entropy_data,
|
|
20
20
|
)
|
|
21
21
|
from datacontract.lint.resolve import resolve_data_contract_dict
|
|
22
22
|
from datacontract.model.exceptions import DataContractException
|
|
@@ -406,7 +406,7 @@ def publish(
|
|
|
406
406
|
"""
|
|
407
407
|
enable_debug_logging(debug)
|
|
408
408
|
|
|
409
|
-
|
|
409
|
+
publish_data_contract_to_entropy_data(
|
|
410
410
|
data_contract_dict=resolve_data_contract_dict(location),
|
|
411
411
|
ssl_verification=ssl_verification,
|
|
412
412
|
)
|
datacontract/data_contract.py
CHANGED
|
@@ -24,7 +24,7 @@ from datacontract.export.exporter import ExportFormat
|
|
|
24
24
|
from datacontract.export.exporter_factory import exporter_factory
|
|
25
25
|
from datacontract.imports.importer_factory import importer_factory
|
|
26
26
|
from datacontract.init.init_template import get_init_template
|
|
27
|
-
from datacontract.integration.
|
|
27
|
+
from datacontract.integration.entropy_data import publish_test_results_to_entropy_data
|
|
28
28
|
from datacontract.lint import resolve
|
|
29
29
|
from datacontract.model.data_contract_specification import DataContractSpecification, Info
|
|
30
30
|
from datacontract.model.exceptions import DataContractException
|
|
@@ -151,7 +151,7 @@ class DataContract:
|
|
|
151
151
|
run.finish()
|
|
152
152
|
|
|
153
153
|
if self._publish_url is not None or self._publish_test_results:
|
|
154
|
-
|
|
154
|
+
publish_test_results_to_entropy_data(run, self._publish_url, self._ssl_verification)
|
|
155
155
|
|
|
156
156
|
return run
|
|
157
157
|
|
|
@@ -3,6 +3,7 @@ import typing
|
|
|
3
3
|
import uuid
|
|
4
4
|
|
|
5
5
|
from datacontract.engines.soda.connections.athena import to_athena_soda_configuration
|
|
6
|
+
from datacontract.engines.soda.connections.oracle import initialize_client_and_create_soda_configuration
|
|
6
7
|
|
|
7
8
|
if typing.TYPE_CHECKING:
|
|
8
9
|
from pyspark.sql import SparkSession
|
|
@@ -104,6 +105,10 @@ def check_soda_execute(
|
|
|
104
105
|
soda_configuration_str = to_sqlserver_soda_configuration(server)
|
|
105
106
|
scan.add_configuration_yaml_str(soda_configuration_str)
|
|
106
107
|
scan.set_data_source_name(server.type)
|
|
108
|
+
elif server.type == "oracle":
|
|
109
|
+
soda_configuration_str = initialize_client_and_create_soda_configuration(server)
|
|
110
|
+
scan.add_configuration_yaml_str(soda_configuration_str)
|
|
111
|
+
scan.set_data_source_name(server.type)
|
|
107
112
|
elif server.type == "trino":
|
|
108
113
|
soda_configuration_str = to_trino_soda_configuration(server)
|
|
109
114
|
scan.add_configuration_yaml_str(soda_configuration_str)
|
|
@@ -71,7 +71,7 @@ def to_athena_soda_configuration(server):
|
|
|
71
71
|
data_source["catalog"] = server.catalog
|
|
72
72
|
|
|
73
73
|
if s3_session_token:
|
|
74
|
-
data_source["
|
|
74
|
+
data_source["session_token"] = s3_session_token
|
|
75
75
|
|
|
76
76
|
soda_configuration = {f"data_source {server.type}": data_source}
|
|
77
77
|
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
import yaml
|
|
4
|
+
|
|
5
|
+
from datacontract.model.data_contract_specification import Server
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def initialize_client_and_create_soda_configuration(server: Server) -> str:
|
|
9
|
+
import oracledb
|
|
10
|
+
soda_config = to_oracle_soda_configuration(server)
|
|
11
|
+
|
|
12
|
+
oracle_client_dir = os.getenv("DATACONTRACT_ORACLE_CLIENT_DIR")
|
|
13
|
+
if oracle_client_dir is not None:
|
|
14
|
+
# Soda Core currently does not support thick mode natively, see https://github.com/sodadata/soda-core/issues/2036
|
|
15
|
+
# but the oracledb client can be configured accordingly before Soda initializes as a work-around
|
|
16
|
+
oracledb.init_oracle_client(lib_dir=oracle_client_dir)
|
|
17
|
+
|
|
18
|
+
return soda_config
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def to_oracle_soda_configuration(server: Server) -> str:
|
|
22
|
+
"""Serialize server config to soda configuration.
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
### Example:
|
|
26
|
+
type: oracle
|
|
27
|
+
host: database-1.us-east-1.rds.amazonaws.com
|
|
28
|
+
port: '1521'
|
|
29
|
+
username: simple
|
|
30
|
+
password: simple_pass
|
|
31
|
+
connectstring: database-1.us-east-1.rds.amazonaws.com:1521/ORCL (database is equal to service name at oracle)
|
|
32
|
+
schema: SYSTEM
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
service_name = server.service_name or server.database
|
|
36
|
+
# with service account key, using an external json file
|
|
37
|
+
soda_configuration = {
|
|
38
|
+
f"data_source {server.type}": {
|
|
39
|
+
"type": "oracle",
|
|
40
|
+
"host": server.host,
|
|
41
|
+
"port": str(server.port),
|
|
42
|
+
"username": os.getenv("DATACONTRACT_ORACLE_USERNAME", ""),
|
|
43
|
+
"password": os.getenv("DATACONTRACT_ORACLE_PASSWORD", ""),
|
|
44
|
+
"connectstring": f"{server.host}:{server.port}/{service_name}",
|
|
45
|
+
"schema": server.schema_,
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
soda_configuration_str = yaml.dump(soda_configuration)
|
|
50
|
+
return soda_configuration_str
|
|
@@ -11,6 +11,7 @@ from open_data_contract_standard.model import (
|
|
|
11
11
|
Server,
|
|
12
12
|
ServiceLevelAgreementProperty,
|
|
13
13
|
Support,
|
|
14
|
+
Team,
|
|
14
15
|
)
|
|
15
16
|
|
|
16
17
|
from datacontract.export.exporter import Exporter
|
|
@@ -30,7 +31,7 @@ def to_odcs_v3_yaml(data_contract_spec: DataContractSpecification) -> str:
|
|
|
30
31
|
|
|
31
32
|
def to_odcs_v3(data_contract_spec: DataContractSpecification) -> OpenDataContractStandard:
|
|
32
33
|
result = OpenDataContractStandard(
|
|
33
|
-
apiVersion="v3.0
|
|
34
|
+
apiVersion="v3.1.0",
|
|
34
35
|
kind="DataContract",
|
|
35
36
|
id=data_contract_spec.id,
|
|
36
37
|
name=data_contract_spec.info.title,
|
|
@@ -88,7 +89,10 @@ def to_odcs_v3(data_contract_spec: DataContractSpecification) -> OpenDataContrac
|
|
|
88
89
|
if server_value.account is not None:
|
|
89
90
|
server.account = server_value.account
|
|
90
91
|
if server_value.database is not None:
|
|
91
|
-
server.
|
|
92
|
+
if server.type == "oracle":
|
|
93
|
+
server.serviceName = server_value.database
|
|
94
|
+
else:
|
|
95
|
+
server.database = server_value.database
|
|
92
96
|
if server_value.schema_ is not None:
|
|
93
97
|
server.schema_ = server_value.schema_
|
|
94
98
|
if server_value.format is not None:
|
|
@@ -127,9 +131,9 @@ def to_odcs_v3(data_contract_spec: DataContractSpecification) -> OpenDataContrac
|
|
|
127
131
|
|
|
128
132
|
if len(servers) > 0:
|
|
129
133
|
result.servers = servers
|
|
134
|
+
if (data_contract_spec.info.owner is not None) and (data_contract_spec.info.owner != ""):
|
|
135
|
+
result.team = Team(name=data_contract_spec.info.owner)
|
|
130
136
|
custom_properties = []
|
|
131
|
-
if data_contract_spec.info.owner is not None:
|
|
132
|
-
custom_properties.append(CustomProperty(property="owner", value=data_contract_spec.info.owner))
|
|
133
137
|
if data_contract_spec.info.model_extra is not None:
|
|
134
138
|
for key, value in data_contract_spec.info.model_extra.items():
|
|
135
139
|
custom_properties.append(CustomProperty(property=key, value=value))
|
|
@@ -194,14 +198,10 @@ def to_logical_type(type: str) -> str | None:
|
|
|
194
198
|
return "integer"
|
|
195
199
|
if type.lower() in ["boolean"]:
|
|
196
200
|
return "boolean"
|
|
197
|
-
if type.lower() in ["object", "record", "struct"]:
|
|
201
|
+
if type.lower() in ["object", "record", "struct", "map", "variant"]:
|
|
198
202
|
return "object"
|
|
199
|
-
if type.lower() in ["bytes"]:
|
|
203
|
+
if type.lower() in ["bytes", "array"]:
|
|
200
204
|
return "array"
|
|
201
|
-
if type.lower() in ["array"]:
|
|
202
|
-
return "array"
|
|
203
|
-
if type.lower() in ["variant"]:
|
|
204
|
-
return "variant"
|
|
205
205
|
if type.lower() in ["null"]:
|
|
206
206
|
return None
|
|
207
207
|
return None
|
|
@@ -224,6 +224,8 @@ def to_physical_type(config: Dict[str, Any]) -> str | None:
|
|
|
224
224
|
return config["databricksType"]
|
|
225
225
|
elif "physicalType" in config:
|
|
226
226
|
return config["physicalType"]
|
|
227
|
+
elif "oracleType" in config:
|
|
228
|
+
return config["oracleType"]
|
|
227
229
|
return None
|
|
228
230
|
|
|
229
231
|
|
|
@@ -22,6 +22,8 @@ def convert_to_sql_type(field: Field, server_type: str) -> str:
|
|
|
22
22
|
return convert_type_to_bigquery(field)
|
|
23
23
|
elif server_type == "trino":
|
|
24
24
|
return convert_type_to_trino(field)
|
|
25
|
+
elif server_type == "oracle":
|
|
26
|
+
return convert_type_to_oracle(field)
|
|
25
27
|
|
|
26
28
|
return field.type
|
|
27
29
|
|
|
@@ -390,3 +392,105 @@ def convert_type_to_trino(field: Field) -> None | str:
|
|
|
390
392
|
return "varbinary"
|
|
391
393
|
if field_type in ["object", "record", "struct"]:
|
|
392
394
|
return "json"
|
|
395
|
+
|
|
396
|
+
|
|
397
|
+
def convert_type_to_oracle(field: Field) -> None | str:
|
|
398
|
+
"""Convert from supported datacontract types to equivalent Oracle types
|
|
399
|
+
|
|
400
|
+
Oracle returns types WITH precision/scale/length through Soda, so we need to match that.
|
|
401
|
+
For example:
|
|
402
|
+
- NUMBER -> NUMBER (base types without precision return without it)
|
|
403
|
+
- TIMESTAMP -> TIMESTAMP(6) (Oracle default precision)
|
|
404
|
+
- CHAR -> CHAR (but may need explicit handling)
|
|
405
|
+
|
|
406
|
+
For fields that were created with specific Oracle types (like NCHAR, ROWID, BLOB),
|
|
407
|
+
users should use config.oracleType to override the default mapping.
|
|
408
|
+
|
|
409
|
+
Reference: https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/Data-Types.html
|
|
410
|
+
"""
|
|
411
|
+
# config.oracleType always wins - use it as-is without stripping
|
|
412
|
+
if field.config and "oracleType" in field.config:
|
|
413
|
+
return field.config["oracleType"]
|
|
414
|
+
|
|
415
|
+
if field.config and "physicalType" in field.config:
|
|
416
|
+
return field.config["physicalType"]
|
|
417
|
+
|
|
418
|
+
field_type = field.type
|
|
419
|
+
if not field_type:
|
|
420
|
+
return None
|
|
421
|
+
|
|
422
|
+
field_type = field_type.lower()
|
|
423
|
+
|
|
424
|
+
# String types - default to NVARCHAR2 for strings
|
|
425
|
+
if field_type in ["string", "varchar"]:
|
|
426
|
+
return "NVARCHAR2"
|
|
427
|
+
|
|
428
|
+
if field_type == "text":
|
|
429
|
+
# text could be NVARCHAR2 or NCLOB depending on size
|
|
430
|
+
if field.config and field.config.get("large"):
|
|
431
|
+
return "NCLOB"
|
|
432
|
+
return "NVARCHAR2"
|
|
433
|
+
|
|
434
|
+
# Numeric types - NUMBER without precision (Oracle returns just NUMBER)
|
|
435
|
+
if field_type in ["number", "decimal", "numeric", "int", "integer", "long", "bigint", "smallint"]:
|
|
436
|
+
return "NUMBER"
|
|
437
|
+
|
|
438
|
+
# Float types - BINARY_FLOAT/BINARY_DOUBLE by default
|
|
439
|
+
if field_type == "float":
|
|
440
|
+
return "BINARY_FLOAT"
|
|
441
|
+
|
|
442
|
+
if field_type in ["double", "double precision"]:
|
|
443
|
+
return "BINARY_DOUBLE"
|
|
444
|
+
|
|
445
|
+
# Boolean - maps to CHAR
|
|
446
|
+
if field_type == "boolean":
|
|
447
|
+
return "CHAR"
|
|
448
|
+
|
|
449
|
+
# Temporal types - Oracle returns with precision
|
|
450
|
+
if field_type in ["timestamp_tz", "timestamp with time zone", "timestamptz"]:
|
|
451
|
+
return "TIMESTAMP(6) WITH TIME ZONE"
|
|
452
|
+
|
|
453
|
+
if field_type in ["timestamp_ntz", "timestamp", "timestamp without time zone"]:
|
|
454
|
+
return "TIMESTAMP(6)"
|
|
455
|
+
|
|
456
|
+
if field_type == "date":
|
|
457
|
+
return "DATE"
|
|
458
|
+
|
|
459
|
+
if field_type == "time":
|
|
460
|
+
# Oracle's INTERVAL DAY TO SECOND has default precision
|
|
461
|
+
return "INTERVAL DAY(0) TO SECOND(6)"
|
|
462
|
+
|
|
463
|
+
# Binary types
|
|
464
|
+
if field_type in ["bytes", "binary"]:
|
|
465
|
+
# Default to RAW for bytes
|
|
466
|
+
return "RAW"
|
|
467
|
+
|
|
468
|
+
# LOB types
|
|
469
|
+
if field_type == "blob":
|
|
470
|
+
return "BLOB"
|
|
471
|
+
|
|
472
|
+
if field_type == "nclob":
|
|
473
|
+
return "NCLOB"
|
|
474
|
+
|
|
475
|
+
if field_type == "clob":
|
|
476
|
+
return "CLOB"
|
|
477
|
+
|
|
478
|
+
# Oracle-specific types
|
|
479
|
+
if field_type == "bfile":
|
|
480
|
+
return "BFILE"
|
|
481
|
+
|
|
482
|
+
if field_type in ["long raw", "longraw"]:
|
|
483
|
+
return "LONG RAW"
|
|
484
|
+
|
|
485
|
+
if field_type == "rowid":
|
|
486
|
+
return "ROWID"
|
|
487
|
+
|
|
488
|
+
if field_type == "urowid":
|
|
489
|
+
return "UROWID"
|
|
490
|
+
|
|
491
|
+
# Complex/JSON types -> CLOB (emulated)
|
|
492
|
+
if field_type in ["array", "map", "object", "record", "struct", "variant", "json"]:
|
|
493
|
+
return "CLOB"
|
|
494
|
+
|
|
495
|
+
# Default to CLOB for unknown types
|
|
496
|
+
return "CLOB"
|
|
@@ -16,6 +16,7 @@ from open_data_contract_standard.model import (
|
|
|
16
16
|
ServiceLevelAgreementProperty,
|
|
17
17
|
Support,
|
|
18
18
|
Team,
|
|
19
|
+
TeamMember,
|
|
19
20
|
)
|
|
20
21
|
from openpyxl.cell.cell import Cell
|
|
21
22
|
from openpyxl.workbook.workbook import Workbook
|
|
@@ -540,7 +541,7 @@ def import_team(workbook: Workbook) -> Optional[List[Team]]:
|
|
|
540
541
|
if (not (username or name or role)) or row_idx == team_range[0] - 1:
|
|
541
542
|
continue
|
|
542
543
|
|
|
543
|
-
team_member =
|
|
544
|
+
team_member = TeamMember(
|
|
544
545
|
username=username,
|
|
545
546
|
name=name,
|
|
546
547
|
description=get_cell_value(row, headers.get("description")),
|
|
@@ -128,6 +128,7 @@ def import_servers(odcs: OpenDataContractStandard) -> Dict[str, Server] | None:
|
|
|
128
128
|
server.account = odcs_server.account
|
|
129
129
|
server.database = odcs_server.database
|
|
130
130
|
server.schema_ = odcs_server.schema_
|
|
131
|
+
server.service_name = odcs_server.serviceName
|
|
131
132
|
server.host = odcs_server.host
|
|
132
133
|
server.port = odcs_server.port
|
|
133
134
|
server.catalog = odcs_server.catalog
|
|
@@ -196,6 +197,17 @@ def get_server_type(odcs: OpenDataContractStandard) -> str | None:
|
|
|
196
197
|
return server.type
|
|
197
198
|
|
|
198
199
|
|
|
200
|
+
def get_composite_primary_keys(properties: List[SchemaProperty]) -> list[str]:
|
|
201
|
+
primary_keys = [
|
|
202
|
+
(property.name, property.primaryKeyPosition)
|
|
203
|
+
for property in properties
|
|
204
|
+
if property.name and property.primaryKey is not None and property.primaryKey
|
|
205
|
+
]
|
|
206
|
+
|
|
207
|
+
primary_keys.sort(key=lambda x: x[1] or -1)
|
|
208
|
+
return [name for name, _ in primary_keys]
|
|
209
|
+
|
|
210
|
+
|
|
199
211
|
def import_models(odcs: Any) -> Dict[str, Model]:
|
|
200
212
|
custom_type_mappings = get_custom_type_mappings(odcs.customProperties)
|
|
201
213
|
|
|
@@ -213,6 +225,8 @@ def import_models(odcs: Any) -> Dict[str, Model]:
|
|
|
213
225
|
tags=odcs_schema.tags if odcs_schema.tags is not None else None,
|
|
214
226
|
)
|
|
215
227
|
model.fields = import_fields(odcs_schema.properties, custom_type_mappings, server_type=get_server_type(odcs))
|
|
228
|
+
if has_composite_primary_key(odcs_properties=odcs_schema.properties):
|
|
229
|
+
model.primaryKey = get_composite_primary_keys(odcs_schema.properties)
|
|
216
230
|
if odcs_schema.quality is not None:
|
|
217
231
|
model.quality = convert_quality_list(odcs_schema.quality)
|
|
218
232
|
model.title = schema_name
|
|
@@ -313,6 +327,8 @@ def import_field_config(odcs_property: SchemaProperty, server_type=None) -> dict
|
|
|
313
327
|
config["sqlserverType"] = physical_type
|
|
314
328
|
elif server_type == "databricks":
|
|
315
329
|
config["databricksType"] = physical_type
|
|
330
|
+
elif server_type == "oracle":
|
|
331
|
+
config["oracleType"] = physical_type
|
|
316
332
|
else:
|
|
317
333
|
config["physicalType"] = physical_type
|
|
318
334
|
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from urllib.parse import urlparse
|
|
3
|
+
|
|
4
|
+
import requests
|
|
5
|
+
|
|
6
|
+
from datacontract.model.run import Run
|
|
7
|
+
|
|
8
|
+
# used to retrieve the HTML location of the published data contract or test results
|
|
9
|
+
RESPONSE_HEADER_LOCATION_HTML = "location-html"
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def publish_test_results_to_entropy_data(run: Run, publish_url: str, ssl_verification: bool):
|
|
13
|
+
try:
|
|
14
|
+
host = publish_url
|
|
15
|
+
if publish_url is None:
|
|
16
|
+
# this url supports Data Mesh Manager and Data Contract Manager
|
|
17
|
+
host = _get_host()
|
|
18
|
+
url = "%s/api/test-results" % host
|
|
19
|
+
else:
|
|
20
|
+
url = publish_url
|
|
21
|
+
|
|
22
|
+
api_key = _get_api_key()
|
|
23
|
+
|
|
24
|
+
if run.dataContractId is None:
|
|
25
|
+
raise Exception("Cannot publish run results for unknown data contract ID")
|
|
26
|
+
|
|
27
|
+
headers = {"Content-Type": "application/json", "x-api-key": api_key}
|
|
28
|
+
request_body = run.model_dump_json()
|
|
29
|
+
# print("Request Body:", request_body)
|
|
30
|
+
response = requests.post(
|
|
31
|
+
url,
|
|
32
|
+
data=request_body,
|
|
33
|
+
headers=headers,
|
|
34
|
+
verify=ssl_verification,
|
|
35
|
+
)
|
|
36
|
+
# print("Status Code:", response.status_code)
|
|
37
|
+
# print("Response Body:", response.text)
|
|
38
|
+
if response.status_code != 200:
|
|
39
|
+
display_host = _extract_hostname(host)
|
|
40
|
+
run.log_error(f"Error publishing test results to {display_host}: {response.text}")
|
|
41
|
+
return
|
|
42
|
+
run.log_info("Published test results successfully")
|
|
43
|
+
|
|
44
|
+
location_html = response.headers.get(RESPONSE_HEADER_LOCATION_HTML)
|
|
45
|
+
if location_html is not None and len(location_html) > 0:
|
|
46
|
+
print(f"🚀 Open {location_html}")
|
|
47
|
+
|
|
48
|
+
except Exception as e:
|
|
49
|
+
run.log_error(f"Failed publishing test results. Error: {str(e)}")
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def publish_data_contract_to_entropy_data(data_contract_dict: dict, ssl_verification: bool):
|
|
53
|
+
try:
|
|
54
|
+
api_key = _get_api_key()
|
|
55
|
+
host = _get_host()
|
|
56
|
+
headers = {"Content-Type": "application/json", "x-api-key": api_key}
|
|
57
|
+
id = data_contract_dict["id"]
|
|
58
|
+
url = f"{host}/api/datacontracts/{id}"
|
|
59
|
+
response = requests.put(
|
|
60
|
+
url=url,
|
|
61
|
+
json=data_contract_dict,
|
|
62
|
+
headers=headers,
|
|
63
|
+
verify=ssl_verification,
|
|
64
|
+
)
|
|
65
|
+
if response.status_code != 200:
|
|
66
|
+
display_host = _extract_hostname(host)
|
|
67
|
+
print(f"Error publishing data contract to {display_host}: {response.text}")
|
|
68
|
+
exit(1)
|
|
69
|
+
|
|
70
|
+
print("✅ Published data contract successfully")
|
|
71
|
+
|
|
72
|
+
location_html = response.headers.get(RESPONSE_HEADER_LOCATION_HTML)
|
|
73
|
+
if location_html is not None and len(location_html) > 0:
|
|
74
|
+
print(f"🚀 Open {location_html}")
|
|
75
|
+
|
|
76
|
+
except Exception as e:
|
|
77
|
+
print(f"Failed publishing data contract. Error: {str(e)}")
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _get_api_key() -> str:
|
|
81
|
+
"""
|
|
82
|
+
Get API key from environment variables with fallback priority:
|
|
83
|
+
1. ENTROPY_DATA_API_KEY
|
|
84
|
+
2. DATAMESH_MANAGER_API_KEY
|
|
85
|
+
3. DATACONTRACT_MANAGER_API_KEY
|
|
86
|
+
"""
|
|
87
|
+
api_key = os.getenv("ENTROPY_DATA_API_KEY")
|
|
88
|
+
if api_key is None:
|
|
89
|
+
api_key = os.getenv("DATAMESH_MANAGER_API_KEY")
|
|
90
|
+
if api_key is None:
|
|
91
|
+
api_key = os.getenv("DATACONTRACT_MANAGER_API_KEY")
|
|
92
|
+
if api_key is None:
|
|
93
|
+
raise Exception(
|
|
94
|
+
"Cannot publish, as neither ENTROPY_DATA_API_KEY, DATAMESH_MANAGER_API_KEY, nor DATACONTRACT_MANAGER_API_KEY is set"
|
|
95
|
+
)
|
|
96
|
+
return api_key
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def _get_host() -> str:
|
|
100
|
+
"""
|
|
101
|
+
Get host from environment variables with fallback priority:
|
|
102
|
+
1. ENTROPY_DATA_HOST
|
|
103
|
+
2. DATAMESH_MANAGER_HOST
|
|
104
|
+
3. DATACONTRACT_MANAGER_HOST
|
|
105
|
+
4. Default: https://api.entropy-data.com
|
|
106
|
+
"""
|
|
107
|
+
host = os.getenv("ENTROPY_DATA_HOST")
|
|
108
|
+
if host is None:
|
|
109
|
+
host = os.getenv("DATAMESH_MANAGER_HOST")
|
|
110
|
+
if host is None:
|
|
111
|
+
host = os.getenv("DATACONTRACT_MANAGER_HOST")
|
|
112
|
+
if host is None:
|
|
113
|
+
host = "https://api.entropy-data.com"
|
|
114
|
+
return host
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def _extract_hostname(url: str) -> str:
|
|
118
|
+
"""
|
|
119
|
+
Extract the hostname (including subdomains and top-level domain) from a URL.
|
|
120
|
+
|
|
121
|
+
Examples:
|
|
122
|
+
- https://app.entropy-data.com/path -> app.entropy-data.com
|
|
123
|
+
- http://api.example.com:8080/api -> api.example.com
|
|
124
|
+
"""
|
|
125
|
+
parsed = urlparse(url)
|
|
126
|
+
return parsed.netloc.split(":")[0] if parsed.netloc else url
|
datacontract/lint/resolve.py
CHANGED
|
@@ -312,9 +312,9 @@ def _resolve_data_contract_from_str(
|
|
|
312
312
|
|
|
313
313
|
if schema_location is None:
|
|
314
314
|
if is_open_data_contract_standard(yaml_dict):
|
|
315
|
-
logging.info("Using ODCS 3.0
|
|
315
|
+
logging.info("Using ODCS 3.1.0 schema to validate data contract")
|
|
316
316
|
# TODO refactor this to a specific function
|
|
317
|
-
schema_location = resources.files("datacontract").joinpath("schemas", "odcs-3.0.
|
|
317
|
+
schema_location = resources.files("datacontract").joinpath("schemas", "odcs-3.1.0.schema.json")
|
|
318
318
|
|
|
319
319
|
_validate_json_schema(yaml_dict, schema_location)
|
|
320
320
|
|
datacontract/lint/urls.py
CHANGED
|
@@ -28,10 +28,22 @@ def fetch_resource(url: str):
|
|
|
28
28
|
def _set_api_key(headers, url):
|
|
29
29
|
hostname = urlparse(url).hostname
|
|
30
30
|
|
|
31
|
+
entropy_data_api_key = os.getenv("ENTROPY_DATA_API_KEY")
|
|
31
32
|
datamesh_manager_api_key = os.getenv("DATAMESH_MANAGER_API_KEY")
|
|
32
33
|
datacontract_manager_api_key = os.getenv("DATACONTRACT_MANAGER_API_KEY")
|
|
33
34
|
|
|
34
|
-
if hostname == "
|
|
35
|
+
if hostname == "entropy-data.com" or hostname.endswith(".entropy-data.com"):
|
|
36
|
+
if entropy_data_api_key is None or entropy_data_api_key == "":
|
|
37
|
+
print("Error: Entropy Data API key is not set. Set env variable ENTROPY_DATA_API_KEY.")
|
|
38
|
+
raise DataContractException(
|
|
39
|
+
type="lint",
|
|
40
|
+
name=f"Reading data contract from {url}",
|
|
41
|
+
reason="Error: Entropy Data API key is not set. Set env variable ENTROPY_DATA_API_KEY.",
|
|
42
|
+
engine="datacontract",
|
|
43
|
+
result="error",
|
|
44
|
+
)
|
|
45
|
+
headers["x-api-key"] = entropy_data_api_key
|
|
46
|
+
elif hostname == "datamesh-manager.com" or hostname.endswith(".datamesh-manager.com"):
|
|
35
47
|
if datamesh_manager_api_key is None or datamesh_manager_api_key == "":
|
|
36
48
|
print("Error: Data Mesh Manager API key is not set. Set env variable DATAMESH_MANAGER_API_KEY.")
|
|
37
49
|
raise DataContractException(
|
|
@@ -54,7 +66,9 @@ def _set_api_key(headers, url):
|
|
|
54
66
|
)
|
|
55
67
|
headers["x-api-key"] = datacontract_manager_api_key
|
|
56
68
|
|
|
57
|
-
if datamesh_manager_api_key is not None and datamesh_manager_api_key != "":
|
|
58
|
-
headers["x-api-key"] = datamesh_manager_api_key
|
|
59
69
|
if datacontract_manager_api_key is not None and datacontract_manager_api_key != "":
|
|
60
70
|
headers["x-api-key"] = datacontract_manager_api_key
|
|
71
|
+
if datamesh_manager_api_key is not None and datamesh_manager_api_key != "":
|
|
72
|
+
headers["x-api-key"] = datamesh_manager_api_key
|
|
73
|
+
if entropy_data_api_key is not None and entropy_data_api_key != "":
|
|
74
|
+
headers["x-api-key"] = entropy_data_api_key
|