datacontract-cli 0.10.0__py3-none-any.whl → 0.10.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datacontract-cli might be problematic. Click here for more details.
- datacontract/breaking/breaking_rules.py +4 -0
- datacontract/catalog/catalog.py +74 -0
- datacontract/cli.py +39 -3
- datacontract/data_contract.py +12 -1
- datacontract/engines/fastjsonschema/check_jsonschema.py +1 -2
- datacontract/engines/soda/check_soda_execute.py +9 -15
- datacontract/engines/soda/connections/duckdb.py +83 -14
- datacontract/engines/soda/connections/kafka.py +108 -105
- datacontract/export/avro_idl_converter.py +1 -2
- datacontract/export/dbt_converter.py +1 -2
- datacontract/export/great_expectations_converter.py +1 -2
- datacontract/export/html_export.py +3 -4
- datacontract/export/jsonschema_converter.py +1 -2
- datacontract/export/odcs_converter.py +1 -2
- datacontract/export/rdf_converter.py +1 -1
- datacontract/export/sodacl_converter.py +1 -2
- datacontract/export/terraform_converter.py +1 -2
- datacontract/imports/avro_importer.py +1 -2
- datacontract/imports/glue_importer.py +183 -0
- datacontract/imports/sql_importer.py +20 -9
- datacontract/integration/publish_opentelemetry.py +3 -6
- datacontract/lint/linters/example_model_linter.py +1 -2
- datacontract/lint/linters/field_pattern_linter.py +1 -2
- datacontract/lint/linters/notice_period_linter.py +1 -2
- datacontract/lint/linters/quality_schema_linter.py +1 -2
- datacontract/lint/resolve.py +9 -6
- datacontract/model/data_contract_specification.py +2 -0
- datacontract/templates/datacontract.html +76 -21
- datacontract/templates/style/output.css +113 -4
- {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.2.dist-info}/METADATA +185 -107
- {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.2.dist-info}/RECORD +35 -33
- {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.2.dist-info}/LICENSE +0 -0
- {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.2.dist-info}/WHEEL +0 -0
- {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.2.dist-info}/entry_points.txt +0 -0
- {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.2.dist-info}/top_level.txt +0 -0
|
@@ -20,6 +20,10 @@ class BreakingRules:
|
|
|
20
20
|
field_ref_removed = Severity.WARNING
|
|
21
21
|
field_ref_updated = Severity.WARNING
|
|
22
22
|
|
|
23
|
+
field_title_added = Severity.INFO
|
|
24
|
+
field_title_removed = Severity.INFO
|
|
25
|
+
field_title_updated = Severity.INFO
|
|
26
|
+
|
|
23
27
|
field_type_added = Severity.WARNING
|
|
24
28
|
field_type_removed = Severity.WARNING
|
|
25
29
|
field_type_updated = Severity.ERROR
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
import pytz
|
|
6
|
+
from jinja2 import PackageLoader, Environment, select_autoescape
|
|
7
|
+
|
|
8
|
+
from datacontract.data_contract import DataContract
|
|
9
|
+
from datacontract.export.html_export import get_version
|
|
10
|
+
from datacontract.model.data_contract_specification import \
|
|
11
|
+
DataContractSpecification
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def create_data_contract_html(contracts, file: Path, path: Path):
|
|
15
|
+
data_contract = DataContract(data_contract_file=f"{file.absolute()}", inline_definitions=True, inline_quality=True)
|
|
16
|
+
html = data_contract.export(export_format="html")
|
|
17
|
+
spec = data_contract.get_data_contract_specification()
|
|
18
|
+
file_without_suffix = file.with_suffix(".html")
|
|
19
|
+
html_filepath = path / file_without_suffix
|
|
20
|
+
html_filepath.parent.mkdir(parents=True, exist_ok=True)
|
|
21
|
+
with open(html_filepath, "w") as f:
|
|
22
|
+
f.write(html)
|
|
23
|
+
contracts.append(
|
|
24
|
+
DataContractView(
|
|
25
|
+
html_filepath=html_filepath,
|
|
26
|
+
html_link=file_without_suffix,
|
|
27
|
+
spec=spec,
|
|
28
|
+
)
|
|
29
|
+
)
|
|
30
|
+
print(f"Created {html_filepath}")
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass
|
|
34
|
+
class DataContractView:
|
|
35
|
+
"""Class for keeping track of an item in inventory."""
|
|
36
|
+
|
|
37
|
+
html_filepath: Path
|
|
38
|
+
html_link: Path
|
|
39
|
+
spec: DataContractSpecification
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def create_index_html(contracts, path):
|
|
43
|
+
index_filepath = path / "index.html"
|
|
44
|
+
with open(index_filepath, "w") as f:
|
|
45
|
+
# Load templates from templates folder
|
|
46
|
+
package_loader = PackageLoader("datacontract", "templates")
|
|
47
|
+
env = Environment(
|
|
48
|
+
loader=package_loader,
|
|
49
|
+
autoescape=select_autoescape(
|
|
50
|
+
enabled_extensions="html",
|
|
51
|
+
default_for_string=True,
|
|
52
|
+
),
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
# Load the required template
|
|
56
|
+
template = env.get_template("index.html")
|
|
57
|
+
|
|
58
|
+
style_content, _, _ = package_loader.get_source(env, "style/output.css")
|
|
59
|
+
|
|
60
|
+
tz = pytz.timezone("UTC")
|
|
61
|
+
now = datetime.now(tz)
|
|
62
|
+
formatted_date = now.strftime("%d %b %Y %H:%M:%S UTC")
|
|
63
|
+
datacontract_cli_version = get_version()
|
|
64
|
+
|
|
65
|
+
# Render the template with necessary data
|
|
66
|
+
html_string = template.render(
|
|
67
|
+
style=style_content,
|
|
68
|
+
formatted_date=formatted_date,
|
|
69
|
+
datacontract_cli_version=datacontract_cli_version,
|
|
70
|
+
contracts=contracts,
|
|
71
|
+
contracts_size=len(contracts),
|
|
72
|
+
)
|
|
73
|
+
f.write(html_string)
|
|
74
|
+
print(f"Created {index_filepath}")
|
datacontract/cli.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from enum import Enum
|
|
2
2
|
from importlib import metadata
|
|
3
|
+
from pathlib import Path
|
|
3
4
|
from typing import Iterable, Optional
|
|
4
5
|
|
|
5
6
|
import typer
|
|
@@ -10,6 +11,8 @@ from rich.table import Table
|
|
|
10
11
|
from typer.core import TyperGroup
|
|
11
12
|
from typing_extensions import Annotated
|
|
12
13
|
|
|
14
|
+
from datacontract.catalog.catalog import create_index_html, \
|
|
15
|
+
create_data_contract_html
|
|
13
16
|
from datacontract.data_contract import DataContract
|
|
14
17
|
from datacontract.init.download_datacontract_file import \
|
|
15
18
|
download_datacontract_file, FileExistsException
|
|
@@ -160,6 +163,7 @@ class ExportFormat(str, Enum):
|
|
|
160
163
|
@app.command()
|
|
161
164
|
def export(
|
|
162
165
|
format: Annotated[ExportFormat, typer.Option(help="The export format.")],
|
|
166
|
+
output: Annotated[Path, typer.Option(help="Specify the file path where the exported data will be saved. If no path is provided, the output will be printed to stdout.")] = None,
|
|
163
167
|
server: Annotated[str, typer.Option(help="The server name to export.")] = None,
|
|
164
168
|
model: Annotated[
|
|
165
169
|
str,
|
|
@@ -169,10 +173,12 @@ def export(
|
|
|
169
173
|
"models (default)."
|
|
170
174
|
),
|
|
171
175
|
] = "all",
|
|
176
|
+
# TODO: this should be a subcommand
|
|
172
177
|
rdf_base: Annotated[
|
|
173
178
|
Optional[str],
|
|
174
179
|
typer.Option(help="[rdf] The base URI used to generate the RDF graph.", rich_help_panel="RDF Options"),
|
|
175
180
|
] = None,
|
|
181
|
+
# TODO: this should be a subcommand
|
|
176
182
|
sql_server_type: Annotated[
|
|
177
183
|
Optional[str],
|
|
178
184
|
typer.Option(
|
|
@@ -195,26 +201,56 @@ def export(
|
|
|
195
201
|
sql_server_type=sql_server_type,
|
|
196
202
|
)
|
|
197
203
|
# Don't interpret console markup in output.
|
|
198
|
-
|
|
204
|
+
if output is None:
|
|
205
|
+
console.print(result, markup=False)
|
|
206
|
+
else:
|
|
207
|
+
with output.open('w') as f:
|
|
208
|
+
f.write(result)
|
|
209
|
+
console.print(f"Written result to {output}")
|
|
199
210
|
|
|
200
211
|
|
|
201
212
|
class ImportFormat(str, Enum):
|
|
202
213
|
sql = "sql"
|
|
203
214
|
avro = "avro"
|
|
215
|
+
glue = "glue"
|
|
204
216
|
|
|
205
217
|
|
|
206
218
|
@app.command(name="import")
|
|
207
219
|
def import_(
|
|
208
220
|
format: Annotated[ImportFormat, typer.Option(help="The format of the source file.")],
|
|
209
|
-
source: Annotated[str, typer.Option(help="The path to the file that should be imported.")],
|
|
221
|
+
source: Annotated[str, typer.Option(help="The path to the file or Glue Database that should be imported.")],
|
|
210
222
|
):
|
|
211
223
|
"""
|
|
212
|
-
Create a data contract from the given source
|
|
224
|
+
Create a data contract from the given source location. Prints to stdout.
|
|
213
225
|
"""
|
|
214
226
|
result = DataContract().import_from_source(format, source)
|
|
215
227
|
console.print(result.to_yaml())
|
|
216
228
|
|
|
217
229
|
|
|
230
|
+
@app.command(name="catalog")
|
|
231
|
+
def catalog(
|
|
232
|
+
files: Annotated[
|
|
233
|
+
Optional[str], typer.Option(help="Glob pattern for the data contract files to include in the catalog.")
|
|
234
|
+
] = "*.yaml",
|
|
235
|
+
output: Annotated[Optional[str], typer.Option(help="Output directory for the catalog html files.")] = "catalog/",
|
|
236
|
+
):
|
|
237
|
+
"""
|
|
238
|
+
Create an html catalog of data contracts.
|
|
239
|
+
"""
|
|
240
|
+
path = Path(output)
|
|
241
|
+
path.mkdir(parents=True, exist_ok=True)
|
|
242
|
+
console.print(f"Created {output}")
|
|
243
|
+
|
|
244
|
+
contracts = []
|
|
245
|
+
for file in Path().glob(files):
|
|
246
|
+
try:
|
|
247
|
+
create_data_contract_html(contracts, file, path)
|
|
248
|
+
except Exception as e:
|
|
249
|
+
console.print(f"Skipped {file} due to error: {e}")
|
|
250
|
+
|
|
251
|
+
create_index_html(contracts, path)
|
|
252
|
+
|
|
253
|
+
|
|
218
254
|
@app.command()
|
|
219
255
|
def breaking(
|
|
220
256
|
location_old: Annotated[str, typer.Argument(help="The location (url or path) of the old data contract yaml.")],
|
datacontract/data_contract.py
CHANGED
|
@@ -30,6 +30,7 @@ from datacontract.export.sodacl_converter import to_sodacl_yaml
|
|
|
30
30
|
from datacontract.export.sql_converter import to_sql_ddl, to_sql_query
|
|
31
31
|
from datacontract.export.terraform_converter import to_terraform
|
|
32
32
|
from datacontract.imports.avro_importer import import_avro
|
|
33
|
+
from datacontract.imports.glue_importer import import_glue
|
|
33
34
|
from datacontract.imports.sql_importer import import_sql
|
|
34
35
|
from datacontract.integration.publish_datamesh_manager import \
|
|
35
36
|
publish_datamesh_manager
|
|
@@ -66,6 +67,7 @@ class DataContract:
|
|
|
66
67
|
publish_to_opentelemetry: bool = False,
|
|
67
68
|
spark: SparkSession = None,
|
|
68
69
|
inline_definitions: bool = False,
|
|
70
|
+
inline_quality: bool = False,
|
|
69
71
|
):
|
|
70
72
|
self._data_contract_file = data_contract_file
|
|
71
73
|
self._data_contract_str = data_contract_str
|
|
@@ -77,6 +79,7 @@ class DataContract:
|
|
|
77
79
|
self._publish_to_opentelemetry = publish_to_opentelemetry
|
|
78
80
|
self._spark = spark
|
|
79
81
|
self._inline_definitions = inline_definitions
|
|
82
|
+
self._inline_quality = inline_quality
|
|
80
83
|
self.all_linters = {
|
|
81
84
|
ExampleModelLinter(),
|
|
82
85
|
QualityUsesSchemaLinter(),
|
|
@@ -105,6 +108,7 @@ class DataContract:
|
|
|
105
108
|
self._data_contract,
|
|
106
109
|
self._schema_location,
|
|
107
110
|
inline_definitions=True,
|
|
111
|
+
inline_quality=True,
|
|
108
112
|
)
|
|
109
113
|
run.checks.append(
|
|
110
114
|
Check(type="lint", result="passed", name="Data contract is syntactically valid", engine="datacontract")
|
|
@@ -273,11 +277,16 @@ class DataContract:
|
|
|
273
277
|
data_contract=self._data_contract,
|
|
274
278
|
schema_location=self._schema_location,
|
|
275
279
|
inline_definitions=self._inline_definitions,
|
|
280
|
+
inline_quality=self._inline_quality,
|
|
276
281
|
)
|
|
277
282
|
|
|
278
283
|
def export(self, export_format, model: str = "all", rdf_base: str = None, sql_server_type: str = "auto") -> str:
|
|
279
284
|
data_contract = resolve.resolve_data_contract(
|
|
280
|
-
self._data_contract_file,
|
|
285
|
+
self._data_contract_file,
|
|
286
|
+
self._data_contract_str,
|
|
287
|
+
self._data_contract,
|
|
288
|
+
inline_definitions=True,
|
|
289
|
+
inline_quality=True,
|
|
281
290
|
)
|
|
282
291
|
if export_format == "jsonschema":
|
|
283
292
|
if data_contract.models is None:
|
|
@@ -482,6 +491,8 @@ class DataContract:
|
|
|
482
491
|
data_contract_specification = import_sql(data_contract_specification, format, source)
|
|
483
492
|
elif format == "avro":
|
|
484
493
|
data_contract_specification = import_avro(data_contract_specification, source)
|
|
494
|
+
elif format == "glue":
|
|
495
|
+
data_contract_specification = import_glue(data_contract_specification, source)
|
|
485
496
|
else:
|
|
486
497
|
print(f"Import format {format} not supported.")
|
|
487
498
|
|
|
@@ -6,8 +6,7 @@ import fastjsonschema
|
|
|
6
6
|
|
|
7
7
|
from datacontract.engines.fastjsonschema.s3.s3_read_files import yield_s3_files
|
|
8
8
|
from datacontract.export.jsonschema_converter import to_jsonschema
|
|
9
|
-
from datacontract.model.data_contract_specification import
|
|
10
|
-
DataContractSpecification, Server
|
|
9
|
+
from datacontract.model.data_contract_specification import DataContractSpecification, Server
|
|
11
10
|
from datacontract.model.exceptions import DataContractException
|
|
12
11
|
from datacontract.model.run import Run, Check
|
|
13
12
|
|
|
@@ -3,20 +3,14 @@ import logging
|
|
|
3
3
|
from pyspark.sql import SparkSession
|
|
4
4
|
from soda.scan import Scan
|
|
5
5
|
|
|
6
|
-
from datacontract.engines.soda.connections.bigquery import
|
|
7
|
-
|
|
8
|
-
from datacontract.engines.soda.connections.databricks import \
|
|
9
|
-
to_databricks_soda_configuration
|
|
6
|
+
from datacontract.engines.soda.connections.bigquery import to_bigquery_soda_configuration
|
|
7
|
+
from datacontract.engines.soda.connections.databricks import to_databricks_soda_configuration
|
|
10
8
|
from datacontract.engines.soda.connections.duckdb import get_duckdb_connection
|
|
11
|
-
from datacontract.engines.soda.connections.kafka import create_spark_session,
|
|
12
|
-
|
|
13
|
-
from datacontract.engines.soda.connections.
|
|
14
|
-
to_postgres_soda_configuration
|
|
15
|
-
from datacontract.engines.soda.connections.snowflake import \
|
|
16
|
-
to_snowflake_soda_configuration
|
|
9
|
+
from datacontract.engines.soda.connections.kafka import create_spark_session, read_kafka_topic
|
|
10
|
+
from datacontract.engines.soda.connections.postgres import to_postgres_soda_configuration
|
|
11
|
+
from datacontract.engines.soda.connections.snowflake import to_snowflake_soda_configuration
|
|
17
12
|
from datacontract.export.sodacl_converter import to_sodacl_yaml
|
|
18
|
-
from datacontract.model.data_contract_specification import
|
|
19
|
-
DataContractSpecification, Server
|
|
13
|
+
from datacontract.model.data_contract_specification import DataContractSpecification, Server
|
|
20
14
|
from datacontract.model.run import Run, Check, Log
|
|
21
15
|
|
|
22
16
|
|
|
@@ -30,9 +24,9 @@ def check_soda_execute(
|
|
|
30
24
|
run.log_info("Running engine soda-core")
|
|
31
25
|
scan = Scan()
|
|
32
26
|
|
|
33
|
-
if server.type
|
|
34
|
-
if server.format in ["json", "parquet", "csv"]:
|
|
35
|
-
con = get_duckdb_connection(data_contract, server)
|
|
27
|
+
if server.type in ["s3", "azure", "local"]:
|
|
28
|
+
if server.format in ["json", "parquet", "csv", "delta"]:
|
|
29
|
+
con = get_duckdb_connection(data_contract, server, run)
|
|
36
30
|
scan.add_duckdb_connection(duckdb_connection=con, data_source_name=server.type)
|
|
37
31
|
scan.set_data_source_name(server.type)
|
|
38
32
|
else:
|
|
@@ -1,23 +1,28 @@
|
|
|
1
|
-
import logging
|
|
2
1
|
import os
|
|
3
2
|
|
|
3
|
+
from deltalake import DeltaTable
|
|
4
|
+
|
|
4
5
|
import duckdb
|
|
5
6
|
from datacontract.export.csv_type_converter import convert_to_duckdb_csv_type
|
|
7
|
+
from datacontract.model.run import Run
|
|
6
8
|
|
|
7
9
|
|
|
8
|
-
def get_duckdb_connection(data_contract, server):
|
|
10
|
+
def get_duckdb_connection(data_contract, server, run: Run):
|
|
9
11
|
con = duckdb.connect(database=":memory:")
|
|
10
12
|
path: str = ""
|
|
11
13
|
if server.type == "local":
|
|
12
14
|
path = server.path
|
|
13
15
|
if server.type == "s3":
|
|
14
16
|
path = server.location
|
|
15
|
-
|
|
17
|
+
setup_s3_connection(con, server)
|
|
18
|
+
if server.type == "azure":
|
|
19
|
+
path = server.location
|
|
20
|
+
setup_azure_connection(con, server)
|
|
16
21
|
for model_name, model in data_contract.models.items():
|
|
17
22
|
model_path = path
|
|
18
23
|
if "{model}" in model_path:
|
|
19
24
|
model_path = model_path.format(model=model_name)
|
|
20
|
-
|
|
25
|
+
run.log_info(f"Creating table {model_name} for {model_path}")
|
|
21
26
|
|
|
22
27
|
if server.format == "json":
|
|
23
28
|
format = "auto"
|
|
@@ -34,6 +39,7 @@ def get_duckdb_connection(data_contract, server):
|
|
|
34
39
|
""")
|
|
35
40
|
elif server.format == "csv":
|
|
36
41
|
columns = to_csv_types(model)
|
|
42
|
+
run.log_info("Using columns: " + str(columns))
|
|
37
43
|
if columns is None:
|
|
38
44
|
con.sql(
|
|
39
45
|
f"""CREATE VIEW "{model_name}" AS SELECT * FROM read_csv('{model_path}', hive_partitioning=1);"""
|
|
@@ -42,6 +48,21 @@ def get_duckdb_connection(data_contract, server):
|
|
|
42
48
|
con.sql(
|
|
43
49
|
f"""CREATE VIEW "{model_name}" AS SELECT * FROM read_csv('{model_path}', hive_partitioning=1, columns={columns});"""
|
|
44
50
|
)
|
|
51
|
+
elif server.format == "delta":
|
|
52
|
+
if server.type == "azure":
|
|
53
|
+
raise NotImplementedError("Support for Delta Tables on Azure Storage is not implemented yet")
|
|
54
|
+
|
|
55
|
+
storage_options = {
|
|
56
|
+
"AWS_ENDPOINT_URL": server.endpointUrl,
|
|
57
|
+
"AWS_ACCESS_KEY_ID": os.getenv("DATACONTRACT_S3_ACCESS_KEY_ID"),
|
|
58
|
+
"AWS_SECRET_ACCESS_KEY": os.getenv("DATACONTRACT_S3_SECRET_ACCESS_KEY"),
|
|
59
|
+
"AWS_REGION": os.getenv("DATACONTRACT_S3_REGION", "us-east-1"),
|
|
60
|
+
"AWS_ALLOW_HTTP": "True" if server.endpointUrl.startswith("http://") else "False",
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
delta_table_arrow = DeltaTable(model_path, storage_options=storage_options).to_pyarrow_dataset()
|
|
64
|
+
|
|
65
|
+
con.register(model_name, delta_table_arrow)
|
|
45
66
|
return con
|
|
46
67
|
|
|
47
68
|
|
|
@@ -59,18 +80,66 @@ def setup_s3_connection(con, server):
|
|
|
59
80
|
s3_region = os.getenv("DATACONTRACT_S3_REGION")
|
|
60
81
|
s3_access_key_id = os.getenv("DATACONTRACT_S3_ACCESS_KEY_ID")
|
|
61
82
|
s3_secret_access_key = os.getenv("DATACONTRACT_S3_SECRET_ACCESS_KEY")
|
|
62
|
-
|
|
63
|
-
|
|
83
|
+
s3_endpoint = "s3.amazonaws.com"
|
|
84
|
+
use_ssl = "true"
|
|
85
|
+
url_style = "vhost"
|
|
64
86
|
if server.endpointUrl is not None:
|
|
65
87
|
s3_endpoint = server.endpointUrl.removeprefix("http://").removeprefix("https://")
|
|
66
88
|
if server.endpointUrl.startswith("http://"):
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
89
|
+
use_ssl = "false"
|
|
90
|
+
url_style = 'path'
|
|
91
|
+
|
|
92
|
+
|
|
71
93
|
if s3_access_key_id is not None:
|
|
72
94
|
con.sql(f"""
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
95
|
+
CREATE OR REPLACE SECRET s3_secret (
|
|
96
|
+
TYPE S3,
|
|
97
|
+
PROVIDER CREDENTIAL_CHAIN,
|
|
98
|
+
REGION '{s3_region}',
|
|
99
|
+
KEY_ID '{s3_access_key_id}',
|
|
100
|
+
SECRET '{s3_secret_access_key}',
|
|
101
|
+
ENDPOINT '{s3_endpoint}',
|
|
102
|
+
USE_SSL '{use_ssl}',
|
|
103
|
+
URL_STYLE '{url_style}'
|
|
104
|
+
);
|
|
105
|
+
""")
|
|
106
|
+
|
|
107
|
+
# con.sql(f"""
|
|
108
|
+
# SET s3_region = '{s3_region}';
|
|
109
|
+
# SET s3_access_key_id = '{s3_access_key_id}';
|
|
110
|
+
# SET s3_secret_access_key = '{s3_secret_access_key}';
|
|
111
|
+
# """)
|
|
112
|
+
# else:
|
|
113
|
+
# con.sql("""
|
|
114
|
+
# RESET s3_region;
|
|
115
|
+
# RESET s3_access_key_id;
|
|
116
|
+
# RESET s3_secret_access_key;
|
|
117
|
+
# """)
|
|
118
|
+
# con.sql("RESET s3_session_token")
|
|
119
|
+
# print(con.sql("SELECT * FROM duckdb_settings() WHERE name like 's3%'"))
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def setup_azure_connection(con, server):
|
|
123
|
+
tenant_id = os.getenv("DATACONTRACT_AZURE_TENANT_ID")
|
|
124
|
+
client_id = os.getenv("DATACONTRACT_AZURE_CLIENT_ID")
|
|
125
|
+
client_secret = os.getenv("DATACONTRACT_AZURE_CLIENT_SECRET")
|
|
126
|
+
|
|
127
|
+
if tenant_id is None:
|
|
128
|
+
raise ValueError("Error: Environment variable DATACONTRACT_AZURE_TENANT_ID is not set")
|
|
129
|
+
if client_id is None:
|
|
130
|
+
raise ValueError("Error: Environment variable DATACONTRACT_AZURE_CLIENT_ID is not set")
|
|
131
|
+
if client_secret is None:
|
|
132
|
+
raise ValueError("Error: Environment variable DATACONTRACT_AZURE_CLIENT_SECRET is not set")
|
|
133
|
+
|
|
134
|
+
con.install_extension("azure")
|
|
135
|
+
con.load_extension("azure")
|
|
136
|
+
|
|
137
|
+
con.sql(f"""
|
|
138
|
+
CREATE SECRET azure_spn (
|
|
139
|
+
TYPE AZURE,
|
|
140
|
+
PROVIDER SERVICE_PRINCIPAL,
|
|
141
|
+
TENANT_ID '{tenant_id}',
|
|
142
|
+
CLIENT_ID '{client_id}',
|
|
143
|
+
CLIENT_SECRET '{client_secret}'
|
|
144
|
+
);
|
|
145
|
+
""")
|