acryl-datahub 1.1.1rc3__py3-none-any.whl → 1.1.1rc4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.1.1rc3.dist-info → acryl_datahub-1.1.1rc4.dist-info}/METADATA +2613 -2613
- {acryl_datahub-1.1.1rc3.dist-info → acryl_datahub-1.1.1rc4.dist-info}/RECORD +29 -27
- {acryl_datahub-1.1.1rc3.dist-info → acryl_datahub-1.1.1rc4.dist-info}/WHEEL +1 -1
- datahub/_version.py +1 -1
- datahub/api/entities/dataset/dataset.py +1 -0
- datahub/cli/ingest_cli.py +9 -1
- datahub/emitter/response_helper.py +86 -1
- datahub/emitter/rest_emitter.py +1 -1
- datahub/ingestion/source/datahub/config.py +11 -0
- datahub/ingestion/source/datahub/datahub_database_reader.py +186 -33
- datahub/ingestion/source/datahub/datahub_source.py +1 -1
- datahub/ingestion/source/openapi.py +12 -0
- datahub/ingestion/source/openapi_parser.py +56 -37
- datahub/ingestion/source/snowflake/snowflake_config.py +13 -0
- datahub/ingestion/source/snowflake/snowflake_v2.py +17 -6
- datahub/metadata/_internal_schema_classes.py +514 -514
- datahub/metadata/_urns/urn_defs.py +1785 -1785
- datahub/metadata/schema.avsc +17354 -17725
- datahub/metadata/schemas/ExecutionRequestInput.avsc +5 -0
- datahub/metadata/schemas/__init__.py +3 -3
- datahub/sdk/__init__.py +4 -0
- datahub/sdk/_all_entities.py +4 -0
- datahub/sdk/_shared.py +2 -1
- datahub/sdk/dataflow.py +302 -0
- datahub/sdk/datajob.py +335 -0
- datahub/sdk/entity_client.py +8 -0
- {acryl_datahub-1.1.1rc3.dist-info → acryl_datahub-1.1.1rc4.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.1.1rc3.dist-info → acryl_datahub-1.1.1rc4.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.1.1rc3.dist-info → acryl_datahub-1.1.1rc4.dist-info}/top_level.txt +0 -0
|
@@ -82,6 +82,9 @@ class OpenApiConfig(ConfigModel):
|
|
|
82
82
|
get_token: dict = Field(
|
|
83
83
|
default={}, description="Retrieving a token from the endpoint."
|
|
84
84
|
)
|
|
85
|
+
verify_ssl: bool = Field(
|
|
86
|
+
default=True, description="Enable SSL certificate verification"
|
|
87
|
+
)
|
|
85
88
|
|
|
86
89
|
@validator("bearer_token", always=True)
|
|
87
90
|
def ensure_only_one_token(
|
|
@@ -129,12 +132,14 @@ class OpenApiConfig(ConfigModel):
|
|
|
129
132
|
tok_url=url4req,
|
|
130
133
|
method=self.get_token["request_type"],
|
|
131
134
|
proxies=self.proxies,
|
|
135
|
+
verify_ssl=self.verify_ssl,
|
|
132
136
|
)
|
|
133
137
|
sw_dict = get_swag_json(
|
|
134
138
|
self.url,
|
|
135
139
|
token=self.token,
|
|
136
140
|
swagger_file=self.swagger_file,
|
|
137
141
|
proxies=self.proxies,
|
|
142
|
+
verify_ssl=self.verify_ssl,
|
|
138
143
|
) # load the swagger file
|
|
139
144
|
|
|
140
145
|
else: # using basic auth for accessing endpoints
|
|
@@ -144,6 +149,7 @@ class OpenApiConfig(ConfigModel):
|
|
|
144
149
|
password=self.password,
|
|
145
150
|
swagger_file=self.swagger_file,
|
|
146
151
|
proxies=self.proxies,
|
|
152
|
+
verify_ssl=self.verify_ssl,
|
|
147
153
|
)
|
|
148
154
|
return sw_dict
|
|
149
155
|
|
|
@@ -343,6 +349,7 @@ class APISource(Source, ABC):
|
|
|
343
349
|
tot_url,
|
|
344
350
|
token=config.token,
|
|
345
351
|
proxies=config.proxies,
|
|
352
|
+
verify_ssl=config.verify_ssl,
|
|
346
353
|
)
|
|
347
354
|
else:
|
|
348
355
|
response = request_call(
|
|
@@ -350,6 +357,7 @@ class APISource(Source, ABC):
|
|
|
350
357
|
username=config.username,
|
|
351
358
|
password=config.password,
|
|
352
359
|
proxies=config.proxies,
|
|
360
|
+
verify_ssl=config.verify_ssl,
|
|
353
361
|
)
|
|
354
362
|
if response.status_code == 200:
|
|
355
363
|
fields2add, root_dataset_samples[dataset_name] = extract_fields(
|
|
@@ -380,6 +388,7 @@ class APISource(Source, ABC):
|
|
|
380
388
|
tot_url,
|
|
381
389
|
token=config.token,
|
|
382
390
|
proxies=config.proxies,
|
|
391
|
+
verify_ssl=config.verify_ssl,
|
|
383
392
|
)
|
|
384
393
|
else:
|
|
385
394
|
response = request_call(
|
|
@@ -387,6 +396,7 @@ class APISource(Source, ABC):
|
|
|
387
396
|
username=config.username,
|
|
388
397
|
password=config.password,
|
|
389
398
|
proxies=config.proxies,
|
|
399
|
+
verify_ssl=config.verify_ssl,
|
|
390
400
|
)
|
|
391
401
|
if response.status_code == 200:
|
|
392
402
|
fields2add, _ = extract_fields(response, dataset_name)
|
|
@@ -415,6 +425,7 @@ class APISource(Source, ABC):
|
|
|
415
425
|
tot_url,
|
|
416
426
|
token=config.token,
|
|
417
427
|
proxies=config.proxies,
|
|
428
|
+
verify_ssl=config.verify_ssl,
|
|
418
429
|
)
|
|
419
430
|
else:
|
|
420
431
|
response = request_call(
|
|
@@ -422,6 +433,7 @@ class APISource(Source, ABC):
|
|
|
422
433
|
username=config.username,
|
|
423
434
|
password=config.password,
|
|
424
435
|
proxies=config.proxies,
|
|
436
|
+
verify_ssl=config.verify_ssl,
|
|
425
437
|
)
|
|
426
438
|
if response.status_code == 200:
|
|
427
439
|
fields2add, _ = extract_fields(response, dataset_name)
|
|
@@ -59,17 +59,21 @@ def request_call(
|
|
|
59
59
|
username: Optional[str] = None,
|
|
60
60
|
password: Optional[str] = None,
|
|
61
61
|
proxies: Optional[dict] = None,
|
|
62
|
+
verify_ssl: bool = True,
|
|
62
63
|
) -> requests.Response:
|
|
63
64
|
headers = {"accept": "application/json"}
|
|
64
65
|
if username is not None and password is not None:
|
|
65
66
|
return requests.get(
|
|
66
|
-
url,
|
|
67
|
+
url,
|
|
68
|
+
headers=headers,
|
|
69
|
+
auth=HTTPBasicAuth(username, password),
|
|
70
|
+
verify=verify_ssl,
|
|
67
71
|
)
|
|
68
72
|
elif token is not None:
|
|
69
73
|
headers["Authorization"] = f"{token}"
|
|
70
|
-
return requests.get(url, proxies=proxies, headers=headers)
|
|
74
|
+
return requests.get(url, proxies=proxies, headers=headers, verify=verify_ssl)
|
|
71
75
|
else:
|
|
72
|
-
return requests.get(url, headers=headers)
|
|
76
|
+
return requests.get(url, headers=headers, verify=verify_ssl)
|
|
73
77
|
|
|
74
78
|
|
|
75
79
|
def get_swag_json(
|
|
@@ -79,10 +83,16 @@ def get_swag_json(
|
|
|
79
83
|
password: Optional[str] = None,
|
|
80
84
|
swagger_file: str = "",
|
|
81
85
|
proxies: Optional[dict] = None,
|
|
86
|
+
verify_ssl: bool = True,
|
|
82
87
|
) -> Dict:
|
|
83
88
|
tot_url = url + swagger_file
|
|
84
89
|
response = request_call(
|
|
85
|
-
url=tot_url,
|
|
90
|
+
url=tot_url,
|
|
91
|
+
token=token,
|
|
92
|
+
username=username,
|
|
93
|
+
password=password,
|
|
94
|
+
proxies=proxies,
|
|
95
|
+
verify_ssl=verify_ssl,
|
|
86
96
|
)
|
|
87
97
|
|
|
88
98
|
if response.status_code != 200:
|
|
@@ -127,37 +137,45 @@ def get_endpoints(sw_dict: dict) -> dict:
|
|
|
127
137
|
check_sw_version(sw_dict)
|
|
128
138
|
|
|
129
139
|
for p_k, p_o in sw_dict["paths"].items():
|
|
130
|
-
method
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
url_details[p_k]
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
140
|
+
for method, method_spec in p_o.items():
|
|
141
|
+
# skip non-method keys like "parameters"
|
|
142
|
+
if method.lower() not in [
|
|
143
|
+
"get",
|
|
144
|
+
"post",
|
|
145
|
+
"put",
|
|
146
|
+
"delete",
|
|
147
|
+
"patch",
|
|
148
|
+
"options",
|
|
149
|
+
"head",
|
|
150
|
+
]:
|
|
151
|
+
continue
|
|
152
|
+
|
|
153
|
+
responses = method_spec.get("responses", {})
|
|
154
|
+
base_res = responses.get("200") or responses.get(200)
|
|
155
|
+
if not base_res:
|
|
156
|
+
# if there is no 200 response, we skip this method
|
|
157
|
+
continue
|
|
158
|
+
|
|
159
|
+
# if the description is not present, we will use the summary
|
|
160
|
+
# if both are not present, we will use an empty string
|
|
161
|
+
desc = method_spec.get("description") or method_spec.get("summary", "")
|
|
162
|
+
|
|
163
|
+
# if the tags are not present, we will use an empty list
|
|
164
|
+
tags = method_spec.get("tags", [])
|
|
165
|
+
|
|
166
|
+
url_details[p_k] = {
|
|
167
|
+
"description": desc,
|
|
168
|
+
"tags": tags,
|
|
169
|
+
"method": method.upper(),
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
example_data = check_for_api_example_data(base_res, p_k)
|
|
173
|
+
if example_data:
|
|
174
|
+
url_details[p_k]["data"] = example_data
|
|
175
|
+
|
|
176
|
+
# checking whether there are defined parameters to execute the call...
|
|
177
|
+
if "parameters" in p_o[method]:
|
|
178
|
+
url_details[p_k]["parameters"] = p_o[method]["parameters"]
|
|
161
179
|
|
|
162
180
|
return dict(sorted(url_details.items()))
|
|
163
181
|
|
|
@@ -358,6 +376,7 @@ def get_tok(
|
|
|
358
376
|
tok_url: str = "",
|
|
359
377
|
method: str = "post",
|
|
360
378
|
proxies: Optional[dict] = None,
|
|
379
|
+
verify_ssl: bool = True,
|
|
361
380
|
) -> str:
|
|
362
381
|
"""
|
|
363
382
|
Trying to post username/password to get auth.
|
|
@@ -368,7 +387,7 @@ def get_tok(
|
|
|
368
387
|
# this will make a POST call with username and password
|
|
369
388
|
data = {"username": username, "password": password, "maxDuration": True}
|
|
370
389
|
# url2post = url + "api/authenticate/"
|
|
371
|
-
response = requests.post(url4req, proxies=proxies, json=data)
|
|
390
|
+
response = requests.post(url4req, proxies=proxies, json=data, verify=verify_ssl)
|
|
372
391
|
if response.status_code == 200:
|
|
373
392
|
cont = json.loads(response.content)
|
|
374
393
|
if "token" in cont: # other authentication scheme
|
|
@@ -377,7 +396,7 @@ def get_tok(
|
|
|
377
396
|
token = f"Bearer {cont['tokens']['access']}"
|
|
378
397
|
elif method == "get":
|
|
379
398
|
# this will make a GET call with username and password
|
|
380
|
-
response = requests.get(url4req)
|
|
399
|
+
response = requests.get(url4req, verify=verify_ssl)
|
|
381
400
|
if response.status_code == 200:
|
|
382
401
|
cont = json.loads(response.content)
|
|
383
402
|
token = cont["token"]
|
|
@@ -22,6 +22,7 @@ from datahub.ingestion.api.incremental_properties_helper import (
|
|
|
22
22
|
from datahub.ingestion.glossary.classification_mixin import (
|
|
23
23
|
ClassificationSourceConfigMixin,
|
|
24
24
|
)
|
|
25
|
+
from datahub.ingestion.source.snowflake.constants import SnowflakeEdition
|
|
25
26
|
from datahub.ingestion.source.snowflake.snowflake_connection import (
|
|
26
27
|
SnowflakeConnectionConfig,
|
|
27
28
|
)
|
|
@@ -326,6 +327,18 @@ class SnowflakeV2Config(
|
|
|
326
327
|
" Map of share name -> details of share.",
|
|
327
328
|
)
|
|
328
329
|
|
|
330
|
+
known_snowflake_edition: Optional[SnowflakeEdition] = Field(
|
|
331
|
+
default=None,
|
|
332
|
+
description="Explicitly specify the Snowflake edition (STANDARD or ENTERPRISE). If unset, the edition will be inferred automatically using 'SHOW TAGS'.",
|
|
333
|
+
)
|
|
334
|
+
|
|
335
|
+
# Allows empty containers to be ingested before datasets are added, avoiding permission errors
|
|
336
|
+
warn_no_datasets: bool = Field(
|
|
337
|
+
hidden_from_docs=True,
|
|
338
|
+
default=False,
|
|
339
|
+
description="If True, warns when no datasets are found during ingestion. If False, ingestion fails when no datasets are found.",
|
|
340
|
+
)
|
|
341
|
+
|
|
329
342
|
include_assertion_results: bool = Field(
|
|
330
343
|
default=False,
|
|
331
344
|
description="Whether to ingest assertion run results for assertions created using Datahub"
|
|
@@ -9,6 +9,7 @@ import re
|
|
|
9
9
|
from dataclasses import dataclass
|
|
10
10
|
from typing import Dict, Iterable, List, Optional, Union
|
|
11
11
|
|
|
12
|
+
from datahub.configuration.time_window_config import BaseTimeWindowConfig
|
|
12
13
|
from datahub.ingestion.api.common import PipelineContext
|
|
13
14
|
from datahub.ingestion.api.decorators import (
|
|
14
15
|
SupportStatus,
|
|
@@ -551,11 +552,15 @@ class SnowflakeV2Source(
|
|
|
551
552
|
and len(discovered_views) == 0
|
|
552
553
|
and len(discovered_streams) == 0
|
|
553
554
|
):
|
|
554
|
-
self.
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
555
|
+
if self.config.warn_no_datasets:
|
|
556
|
+
self.structured_reporter.warning(
|
|
557
|
+
"No tables/views/streams found. Verify dataset permissions if Snowflake source is not empty.",
|
|
558
|
+
)
|
|
559
|
+
else:
|
|
560
|
+
self.structured_reporter.failure(
|
|
561
|
+
GENERIC_PERMISSION_ERROR_KEY,
|
|
562
|
+
"No tables/views/streams found. Verify dataset permissions in Snowflake.",
|
|
563
|
+
)
|
|
559
564
|
|
|
560
565
|
self.discovered_datasets = (
|
|
561
566
|
discovered_tables + discovered_views + discovered_streams
|
|
@@ -571,7 +576,11 @@ class SnowflakeV2Source(
|
|
|
571
576
|
queries_extractor = SnowflakeQueriesExtractor(
|
|
572
577
|
connection=self.connection,
|
|
573
578
|
config=SnowflakeQueriesExtractorConfig(
|
|
574
|
-
window=
|
|
579
|
+
window=BaseTimeWindowConfig(
|
|
580
|
+
start_time=self.config.start_time,
|
|
581
|
+
end_time=self.config.end_time,
|
|
582
|
+
bucket_duration=self.config.bucket_duration,
|
|
583
|
+
),
|
|
575
584
|
temporary_tables_pattern=self.config.temporary_tables_pattern,
|
|
576
585
|
include_lineage=self.config.include_table_lineage,
|
|
577
586
|
include_usage_statistics=self.config.include_usage_stats,
|
|
@@ -732,6 +741,8 @@ class SnowflakeV2Source(
|
|
|
732
741
|
return None
|
|
733
742
|
|
|
734
743
|
def is_standard_edition(self) -> bool:
|
|
744
|
+
if self.config.known_snowflake_edition is not None:
|
|
745
|
+
return self.config.known_snowflake_edition == SnowflakeEdition.STANDARD
|
|
735
746
|
try:
|
|
736
747
|
self.connection.query(SnowflakeQuery.show_tags())
|
|
737
748
|
return False
|