acryl-datahub 1.1.1rc3__py3-none-any.whl → 1.1.1rc4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (29) hide show
  1. {acryl_datahub-1.1.1rc3.dist-info → acryl_datahub-1.1.1rc4.dist-info}/METADATA +2613 -2613
  2. {acryl_datahub-1.1.1rc3.dist-info → acryl_datahub-1.1.1rc4.dist-info}/RECORD +29 -27
  3. {acryl_datahub-1.1.1rc3.dist-info → acryl_datahub-1.1.1rc4.dist-info}/WHEEL +1 -1
  4. datahub/_version.py +1 -1
  5. datahub/api/entities/dataset/dataset.py +1 -0
  6. datahub/cli/ingest_cli.py +9 -1
  7. datahub/emitter/response_helper.py +86 -1
  8. datahub/emitter/rest_emitter.py +1 -1
  9. datahub/ingestion/source/datahub/config.py +11 -0
  10. datahub/ingestion/source/datahub/datahub_database_reader.py +186 -33
  11. datahub/ingestion/source/datahub/datahub_source.py +1 -1
  12. datahub/ingestion/source/openapi.py +12 -0
  13. datahub/ingestion/source/openapi_parser.py +56 -37
  14. datahub/ingestion/source/snowflake/snowflake_config.py +13 -0
  15. datahub/ingestion/source/snowflake/snowflake_v2.py +17 -6
  16. datahub/metadata/_internal_schema_classes.py +514 -514
  17. datahub/metadata/_urns/urn_defs.py +1785 -1785
  18. datahub/metadata/schema.avsc +17354 -17725
  19. datahub/metadata/schemas/ExecutionRequestInput.avsc +5 -0
  20. datahub/metadata/schemas/__init__.py +3 -3
  21. datahub/sdk/__init__.py +4 -0
  22. datahub/sdk/_all_entities.py +4 -0
  23. datahub/sdk/_shared.py +2 -1
  24. datahub/sdk/dataflow.py +302 -0
  25. datahub/sdk/datajob.py +335 -0
  26. datahub/sdk/entity_client.py +8 -0
  27. {acryl_datahub-1.1.1rc3.dist-info → acryl_datahub-1.1.1rc4.dist-info}/entry_points.txt +0 -0
  28. {acryl_datahub-1.1.1rc3.dist-info → acryl_datahub-1.1.1rc4.dist-info}/licenses/LICENSE +0 -0
  29. {acryl_datahub-1.1.1rc3.dist-info → acryl_datahub-1.1.1rc4.dist-info}/top_level.txt +0 -0
@@ -82,6 +82,9 @@ class OpenApiConfig(ConfigModel):
82
82
  get_token: dict = Field(
83
83
  default={}, description="Retrieving a token from the endpoint."
84
84
  )
85
+ verify_ssl: bool = Field(
86
+ default=True, description="Enable SSL certificate verification"
87
+ )
85
88
 
86
89
  @validator("bearer_token", always=True)
87
90
  def ensure_only_one_token(
@@ -129,12 +132,14 @@ class OpenApiConfig(ConfigModel):
129
132
  tok_url=url4req,
130
133
  method=self.get_token["request_type"],
131
134
  proxies=self.proxies,
135
+ verify_ssl=self.verify_ssl,
132
136
  )
133
137
  sw_dict = get_swag_json(
134
138
  self.url,
135
139
  token=self.token,
136
140
  swagger_file=self.swagger_file,
137
141
  proxies=self.proxies,
142
+ verify_ssl=self.verify_ssl,
138
143
  ) # load the swagger file
139
144
 
140
145
  else: # using basic auth for accessing endpoints
@@ -144,6 +149,7 @@ class OpenApiConfig(ConfigModel):
144
149
  password=self.password,
145
150
  swagger_file=self.swagger_file,
146
151
  proxies=self.proxies,
152
+ verify_ssl=self.verify_ssl,
147
153
  )
148
154
  return sw_dict
149
155
 
@@ -343,6 +349,7 @@ class APISource(Source, ABC):
343
349
  tot_url,
344
350
  token=config.token,
345
351
  proxies=config.proxies,
352
+ verify_ssl=config.verify_ssl,
346
353
  )
347
354
  else:
348
355
  response = request_call(
@@ -350,6 +357,7 @@ class APISource(Source, ABC):
350
357
  username=config.username,
351
358
  password=config.password,
352
359
  proxies=config.proxies,
360
+ verify_ssl=config.verify_ssl,
353
361
  )
354
362
  if response.status_code == 200:
355
363
  fields2add, root_dataset_samples[dataset_name] = extract_fields(
@@ -380,6 +388,7 @@ class APISource(Source, ABC):
380
388
  tot_url,
381
389
  token=config.token,
382
390
  proxies=config.proxies,
391
+ verify_ssl=config.verify_ssl,
383
392
  )
384
393
  else:
385
394
  response = request_call(
@@ -387,6 +396,7 @@ class APISource(Source, ABC):
387
396
  username=config.username,
388
397
  password=config.password,
389
398
  proxies=config.proxies,
399
+ verify_ssl=config.verify_ssl,
390
400
  )
391
401
  if response.status_code == 200:
392
402
  fields2add, _ = extract_fields(response, dataset_name)
@@ -415,6 +425,7 @@ class APISource(Source, ABC):
415
425
  tot_url,
416
426
  token=config.token,
417
427
  proxies=config.proxies,
428
+ verify_ssl=config.verify_ssl,
418
429
  )
419
430
  else:
420
431
  response = request_call(
@@ -422,6 +433,7 @@ class APISource(Source, ABC):
422
433
  username=config.username,
423
434
  password=config.password,
424
435
  proxies=config.proxies,
436
+ verify_ssl=config.verify_ssl,
425
437
  )
426
438
  if response.status_code == 200:
427
439
  fields2add, _ = extract_fields(response, dataset_name)
@@ -59,17 +59,21 @@ def request_call(
59
59
  username: Optional[str] = None,
60
60
  password: Optional[str] = None,
61
61
  proxies: Optional[dict] = None,
62
+ verify_ssl: bool = True,
62
63
  ) -> requests.Response:
63
64
  headers = {"accept": "application/json"}
64
65
  if username is not None and password is not None:
65
66
  return requests.get(
66
- url, headers=headers, auth=HTTPBasicAuth(username, password)
67
+ url,
68
+ headers=headers,
69
+ auth=HTTPBasicAuth(username, password),
70
+ verify=verify_ssl,
67
71
  )
68
72
  elif token is not None:
69
73
  headers["Authorization"] = f"{token}"
70
- return requests.get(url, proxies=proxies, headers=headers)
74
+ return requests.get(url, proxies=proxies, headers=headers, verify=verify_ssl)
71
75
  else:
72
- return requests.get(url, headers=headers)
76
+ return requests.get(url, headers=headers, verify=verify_ssl)
73
77
 
74
78
 
75
79
  def get_swag_json(
@@ -79,10 +83,16 @@ def get_swag_json(
79
83
  password: Optional[str] = None,
80
84
  swagger_file: str = "",
81
85
  proxies: Optional[dict] = None,
86
+ verify_ssl: bool = True,
82
87
  ) -> Dict:
83
88
  tot_url = url + swagger_file
84
89
  response = request_call(
85
- url=tot_url, token=token, username=username, password=password, proxies=proxies
90
+ url=tot_url,
91
+ token=token,
92
+ username=username,
93
+ password=password,
94
+ proxies=proxies,
95
+ verify_ssl=verify_ssl,
86
96
  )
87
97
 
88
98
  if response.status_code != 200:
@@ -127,37 +137,45 @@ def get_endpoints(sw_dict: dict) -> dict:
127
137
  check_sw_version(sw_dict)
128
138
 
129
139
  for p_k, p_o in sw_dict["paths"].items():
130
- method = list(p_o)[0]
131
- if "200" in p_o[method]["responses"]:
132
- base_res = p_o[method]["responses"]["200"]
133
- elif 200 in p_o[method]["responses"]:
134
- # if you read a plain yml file the 200 will be an integer
135
- base_res = p_o[method]["responses"][200]
136
- else:
137
- # the endpoint does not have a 200 response
138
- continue
139
-
140
- if "description" in p_o[method]:
141
- desc = p_o[method]["description"]
142
- elif "summary" in p_o[method]:
143
- desc = p_o[method]["summary"]
144
- else: # still testing
145
- desc = ""
146
-
147
- try:
148
- tags = p_o[method]["tags"]
149
- except KeyError:
150
- tags = []
151
-
152
- url_details[p_k] = {"description": desc, "tags": tags, "method": method}
153
-
154
- example_data = check_for_api_example_data(base_res, p_k)
155
- if example_data:
156
- url_details[p_k]["data"] = example_data
157
-
158
- # checking whether there are defined parameters to execute the call...
159
- if "parameters" in p_o[method]:
160
- url_details[p_k]["parameters"] = p_o[method]["parameters"]
140
+ for method, method_spec in p_o.items():
141
+ # skip non-method keys like "parameters"
142
+ if method.lower() not in [
143
+ "get",
144
+ "post",
145
+ "put",
146
+ "delete",
147
+ "patch",
148
+ "options",
149
+ "head",
150
+ ]:
151
+ continue
152
+
153
+ responses = method_spec.get("responses", {})
154
+ base_res = responses.get("200") or responses.get(200)
155
+ if not base_res:
156
+ # if there is no 200 response, we skip this method
157
+ continue
158
+
159
+ # if the description is not present, we will use the summary
160
+ # if both are not present, we will use an empty string
161
+ desc = method_spec.get("description") or method_spec.get("summary", "")
162
+
163
+ # if the tags are not present, we will use an empty list
164
+ tags = method_spec.get("tags", [])
165
+
166
+ url_details[p_k] = {
167
+ "description": desc,
168
+ "tags": tags,
169
+ "method": method.upper(),
170
+ }
171
+
172
+ example_data = check_for_api_example_data(base_res, p_k)
173
+ if example_data:
174
+ url_details[p_k]["data"] = example_data
175
+
176
+ # checking whether there are defined parameters to execute the call...
177
+ if "parameters" in p_o[method]:
178
+ url_details[p_k]["parameters"] = p_o[method]["parameters"]
161
179
 
162
180
  return dict(sorted(url_details.items()))
163
181
 
@@ -358,6 +376,7 @@ def get_tok(
358
376
  tok_url: str = "",
359
377
  method: str = "post",
360
378
  proxies: Optional[dict] = None,
379
+ verify_ssl: bool = True,
361
380
  ) -> str:
362
381
  """
363
382
  Trying to post username/password to get auth.
@@ -368,7 +387,7 @@ def get_tok(
368
387
  # this will make a POST call with username and password
369
388
  data = {"username": username, "password": password, "maxDuration": True}
370
389
  # url2post = url + "api/authenticate/"
371
- response = requests.post(url4req, proxies=proxies, json=data)
390
+ response = requests.post(url4req, proxies=proxies, json=data, verify=verify_ssl)
372
391
  if response.status_code == 200:
373
392
  cont = json.loads(response.content)
374
393
  if "token" in cont: # other authentication scheme
@@ -377,7 +396,7 @@ def get_tok(
377
396
  token = f"Bearer {cont['tokens']['access']}"
378
397
  elif method == "get":
379
398
  # this will make a GET call with username and password
380
- response = requests.get(url4req)
399
+ response = requests.get(url4req, verify=verify_ssl)
381
400
  if response.status_code == 200:
382
401
  cont = json.loads(response.content)
383
402
  token = cont["token"]
@@ -22,6 +22,7 @@ from datahub.ingestion.api.incremental_properties_helper import (
22
22
  from datahub.ingestion.glossary.classification_mixin import (
23
23
  ClassificationSourceConfigMixin,
24
24
  )
25
+ from datahub.ingestion.source.snowflake.constants import SnowflakeEdition
25
26
  from datahub.ingestion.source.snowflake.snowflake_connection import (
26
27
  SnowflakeConnectionConfig,
27
28
  )
@@ -326,6 +327,18 @@ class SnowflakeV2Config(
326
327
  " Map of share name -> details of share.",
327
328
  )
328
329
 
330
+ known_snowflake_edition: Optional[SnowflakeEdition] = Field(
331
+ default=None,
332
+ description="Explicitly specify the Snowflake edition (STANDARD or ENTERPRISE). If unset, the edition will be inferred automatically using 'SHOW TAGS'.",
333
+ )
334
+
335
+ # Allows empty containers to be ingested before datasets are added, avoiding permission errors
336
+ warn_no_datasets: bool = Field(
337
+ hidden_from_docs=True,
338
+ default=False,
339
+ description="If True, warns when no datasets are found during ingestion. If False, ingestion fails when no datasets are found.",
340
+ )
341
+
329
342
  include_assertion_results: bool = Field(
330
343
  default=False,
331
344
  description="Whether to ingest assertion run results for assertions created using Datahub"
@@ -9,6 +9,7 @@ import re
9
9
  from dataclasses import dataclass
10
10
  from typing import Dict, Iterable, List, Optional, Union
11
11
 
12
+ from datahub.configuration.time_window_config import BaseTimeWindowConfig
12
13
  from datahub.ingestion.api.common import PipelineContext
13
14
  from datahub.ingestion.api.decorators import (
14
15
  SupportStatus,
@@ -551,11 +552,15 @@ class SnowflakeV2Source(
551
552
  and len(discovered_views) == 0
552
553
  and len(discovered_streams) == 0
553
554
  ):
554
- self.structured_reporter.failure(
555
- GENERIC_PERMISSION_ERROR_KEY,
556
- "No tables/views/streams found. Please check permissions.",
557
- )
558
- return
555
+ if self.config.warn_no_datasets:
556
+ self.structured_reporter.warning(
557
+ "No tables/views/streams found. Verify dataset permissions if Snowflake source is not empty.",
558
+ )
559
+ else:
560
+ self.structured_reporter.failure(
561
+ GENERIC_PERMISSION_ERROR_KEY,
562
+ "No tables/views/streams found. Verify dataset permissions in Snowflake.",
563
+ )
559
564
 
560
565
  self.discovered_datasets = (
561
566
  discovered_tables + discovered_views + discovered_streams
@@ -571,7 +576,11 @@ class SnowflakeV2Source(
571
576
  queries_extractor = SnowflakeQueriesExtractor(
572
577
  connection=self.connection,
573
578
  config=SnowflakeQueriesExtractorConfig(
574
- window=self.config,
579
+ window=BaseTimeWindowConfig(
580
+ start_time=self.config.start_time,
581
+ end_time=self.config.end_time,
582
+ bucket_duration=self.config.bucket_duration,
583
+ ),
575
584
  temporary_tables_pattern=self.config.temporary_tables_pattern,
576
585
  include_lineage=self.config.include_table_lineage,
577
586
  include_usage_statistics=self.config.include_usage_stats,
@@ -732,6 +741,8 @@ class SnowflakeV2Source(
732
741
  return None
733
742
 
734
743
  def is_standard_edition(self) -> bool:
744
+ if self.config.known_snowflake_edition is not None:
745
+ return self.config.known_snowflake_edition == SnowflakeEdition.STANDARD
735
746
  try:
736
747
  self.connection.query(SnowflakeQuery.show_tags())
737
748
  return False