acryl-datahub 1.1.1rc3__py3-none-any.whl → 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (226) hide show
  1. {acryl_datahub-1.1.1rc3.dist-info → acryl_datahub-1.2.0.dist-info}/METADATA +2559 -2532
  2. {acryl_datahub-1.1.1rc3.dist-info → acryl_datahub-1.2.0.dist-info}/RECORD +226 -190
  3. {acryl_datahub-1.1.1rc3.dist-info → acryl_datahub-1.2.0.dist-info}/WHEEL +1 -1
  4. {acryl_datahub-1.1.1rc3.dist-info → acryl_datahub-1.2.0.dist-info}/entry_points.txt +2 -0
  5. datahub/_version.py +1 -1
  6. datahub/api/entities/dataset/dataset.py +2 -1
  7. datahub/api/entities/external/__init__.py +0 -0
  8. datahub/api/entities/external/external_entities.py +239 -0
  9. datahub/api/entities/external/external_tag.py +145 -0
  10. datahub/api/entities/external/lake_formation_external_entites.py +161 -0
  11. datahub/api/entities/external/restricted_text.py +247 -0
  12. datahub/api/entities/external/unity_catalog_external_entites.py +173 -0
  13. datahub/cli/check_cli.py +88 -7
  14. datahub/cli/cli_utils.py +63 -0
  15. datahub/cli/container_cli.py +5 -0
  16. datahub/cli/delete_cli.py +124 -27
  17. datahub/cli/docker_check.py +107 -12
  18. datahub/cli/docker_cli.py +149 -227
  19. datahub/cli/exists_cli.py +0 -2
  20. datahub/cli/get_cli.py +0 -2
  21. datahub/cli/iceberg_cli.py +5 -0
  22. datahub/cli/ingest_cli.py +12 -16
  23. datahub/cli/migrate.py +2 -0
  24. datahub/cli/put_cli.py +1 -4
  25. datahub/cli/quickstart_versioning.py +50 -7
  26. datahub/cli/specific/assertions_cli.py +0 -4
  27. datahub/cli/specific/datacontract_cli.py +0 -3
  28. datahub/cli/specific/dataproduct_cli.py +0 -11
  29. datahub/cli/specific/dataset_cli.py +1 -8
  30. datahub/cli/specific/forms_cli.py +0 -4
  31. datahub/cli/specific/group_cli.py +0 -2
  32. datahub/cli/specific/structuredproperties_cli.py +1 -4
  33. datahub/cli/specific/user_cli.py +0 -2
  34. datahub/cli/state_cli.py +0 -2
  35. datahub/cli/timeline_cli.py +0 -2
  36. datahub/emitter/response_helper.py +86 -1
  37. datahub/emitter/rest_emitter.py +71 -13
  38. datahub/entrypoints.py +4 -3
  39. datahub/ingestion/api/decorators.py +15 -3
  40. datahub/ingestion/api/report.py +332 -3
  41. datahub/ingestion/api/sink.py +3 -0
  42. datahub/ingestion/api/source.py +48 -44
  43. datahub/ingestion/autogenerated/__init__.py +0 -0
  44. datahub/ingestion/autogenerated/capability_summary.json +3449 -0
  45. datahub/ingestion/autogenerated/lineage.json +401 -0
  46. datahub/ingestion/autogenerated/lineage_helper.py +177 -0
  47. datahub/ingestion/extractor/schema_util.py +13 -4
  48. datahub/ingestion/glossary/classification_mixin.py +5 -0
  49. datahub/ingestion/graph/client.py +100 -15
  50. datahub/ingestion/graph/config.py +1 -0
  51. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +20 -10
  52. datahub/ingestion/run/pipeline.py +54 -2
  53. datahub/ingestion/sink/datahub_rest.py +13 -0
  54. datahub/ingestion/source/abs/source.py +1 -1
  55. datahub/ingestion/source/aws/aws_common.py +4 -0
  56. datahub/ingestion/source/aws/glue.py +489 -244
  57. datahub/ingestion/source/aws/tag_entities.py +292 -0
  58. datahub/ingestion/source/azure/azure_common.py +2 -2
  59. datahub/ingestion/source/bigquery_v2/bigquery.py +50 -23
  60. datahub/ingestion/source/bigquery_v2/bigquery_config.py +1 -1
  61. datahub/ingestion/source/bigquery_v2/bigquery_queries.py +1 -0
  62. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +2 -0
  63. datahub/ingestion/source/bigquery_v2/common.py +1 -1
  64. datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
  65. datahub/ingestion/source/bigquery_v2/queries.py +3 -3
  66. datahub/ingestion/source/cassandra/cassandra.py +1 -1
  67. datahub/ingestion/source/cassandra/cassandra_profiling.py +6 -5
  68. datahub/ingestion/source/common/subtypes.py +45 -0
  69. datahub/ingestion/source/data_lake_common/object_store.py +115 -27
  70. datahub/ingestion/source/data_lake_common/path_spec.py +10 -21
  71. datahub/ingestion/source/datahub/config.py +11 -0
  72. datahub/ingestion/source/datahub/datahub_database_reader.py +187 -35
  73. datahub/ingestion/source/datahub/datahub_source.py +1 -1
  74. datahub/ingestion/source/dbt/dbt_cloud.py +10 -2
  75. datahub/ingestion/source/dbt/dbt_common.py +6 -2
  76. datahub/ingestion/source/dbt/dbt_core.py +3 -0
  77. datahub/ingestion/source/debug/__init__.py +0 -0
  78. datahub/ingestion/source/debug/datahub_debug.py +300 -0
  79. datahub/ingestion/source/dremio/dremio_api.py +114 -73
  80. datahub/ingestion/source/dremio/dremio_config.py +2 -0
  81. datahub/ingestion/source/dremio/dremio_reporting.py +23 -2
  82. datahub/ingestion/source/dremio/dremio_source.py +94 -81
  83. datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
  84. datahub/ingestion/source/file.py +3 -0
  85. datahub/ingestion/source/fivetran/fivetran.py +34 -26
  86. datahub/ingestion/source/gcs/gcs_source.py +13 -2
  87. datahub/ingestion/source/ge_data_profiler.py +76 -28
  88. datahub/ingestion/source/ge_profiling_config.py +11 -0
  89. datahub/ingestion/source/hex/api.py +26 -1
  90. datahub/ingestion/source/iceberg/iceberg.py +3 -1
  91. datahub/ingestion/source/identity/azure_ad.py +1 -1
  92. datahub/ingestion/source/identity/okta.py +1 -14
  93. datahub/ingestion/source/kafka/kafka.py +16 -0
  94. datahub/ingestion/source/kafka_connect/sink_connectors.py +156 -47
  95. datahub/ingestion/source/kafka_connect/source_connectors.py +59 -4
  96. datahub/ingestion/source/looker/looker_source.py +1 -0
  97. datahub/ingestion/source/mlflow.py +11 -1
  98. datahub/ingestion/source/mock_data/__init__.py +0 -0
  99. datahub/ingestion/source/mock_data/datahub_mock_data.py +472 -0
  100. datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
  101. datahub/ingestion/source/mock_data/table_naming_helper.py +91 -0
  102. datahub/ingestion/source/nifi.py +1 -1
  103. datahub/ingestion/source/openapi.py +12 -0
  104. datahub/ingestion/source/openapi_parser.py +56 -37
  105. datahub/ingestion/source/powerbi/powerbi.py +1 -5
  106. datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +0 -1
  107. datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
  108. datahub/ingestion/source/preset.py +2 -2
  109. datahub/ingestion/source/qlik_sense/qlik_sense.py +1 -0
  110. datahub/ingestion/source/redshift/redshift.py +21 -1
  111. datahub/ingestion/source/redshift/usage.py +4 -3
  112. datahub/ingestion/source/s3/report.py +4 -2
  113. datahub/ingestion/source/s3/source.py +367 -115
  114. datahub/ingestion/source/sac/sac.py +3 -1
  115. datahub/ingestion/source/salesforce.py +6 -3
  116. datahub/ingestion/source/sigma/sigma.py +7 -1
  117. datahub/ingestion/source/slack/slack.py +2 -1
  118. datahub/ingestion/source/snowflake/snowflake_config.py +43 -7
  119. datahub/ingestion/source/snowflake/snowflake_queries.py +348 -82
  120. datahub/ingestion/source/snowflake/snowflake_summary.py +5 -0
  121. datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
  122. datahub/ingestion/source/snowflake/snowflake_utils.py +2 -7
  123. datahub/ingestion/source/snowflake/snowflake_v2.py +33 -8
  124. datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
  125. datahub/ingestion/source/sql/athena.py +119 -11
  126. datahub/ingestion/source/sql/athena_properties_extractor.py +777 -0
  127. datahub/ingestion/source/sql/clickhouse.py +3 -1
  128. datahub/ingestion/source/sql/cockroachdb.py +0 -1
  129. datahub/ingestion/source/sql/hana.py +3 -1
  130. datahub/ingestion/source/sql/hive_metastore.py +3 -11
  131. datahub/ingestion/source/sql/mariadb.py +0 -1
  132. datahub/ingestion/source/sql/mssql/source.py +239 -34
  133. datahub/ingestion/source/sql/mysql.py +0 -1
  134. datahub/ingestion/source/sql/oracle.py +1 -1
  135. datahub/ingestion/source/sql/postgres.py +0 -1
  136. datahub/ingestion/source/sql/sql_common.py +121 -34
  137. datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
  138. datahub/ingestion/source/sql/teradata.py +997 -235
  139. datahub/ingestion/source/sql/vertica.py +10 -6
  140. datahub/ingestion/source/sql_queries.py +2 -2
  141. datahub/ingestion/source/state/stateful_ingestion_base.py +1 -1
  142. datahub/ingestion/source/superset.py +58 -3
  143. datahub/ingestion/source/tableau/tableau.py +58 -37
  144. datahub/ingestion/source/tableau/tableau_common.py +4 -2
  145. datahub/ingestion/source/tableau/tableau_constant.py +0 -4
  146. datahub/ingestion/source/unity/config.py +5 -0
  147. datahub/ingestion/source/unity/proxy.py +118 -0
  148. datahub/ingestion/source/unity/source.py +195 -17
  149. datahub/ingestion/source/unity/tag_entities.py +295 -0
  150. datahub/ingestion/source/usage/clickhouse_usage.py +4 -1
  151. datahub/ingestion/source/usage/starburst_trino_usage.py +3 -0
  152. datahub/ingestion/transformer/add_dataset_ownership.py +18 -2
  153. datahub/integrations/assertion/snowflake/compiler.py +4 -3
  154. datahub/metadata/_internal_schema_classes.py +1446 -559
  155. datahub/metadata/_urns/urn_defs.py +1721 -1553
  156. datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
  157. datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
  158. datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
  159. datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +4 -0
  160. datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +27 -0
  161. datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +4 -0
  162. datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +25 -0
  163. datahub/metadata/schema.avsc +18055 -17802
  164. datahub/metadata/schemas/ApplicationKey.avsc +31 -0
  165. datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
  166. datahub/metadata/schemas/Applications.avsc +38 -0
  167. datahub/metadata/schemas/ChartKey.avsc +1 -0
  168. datahub/metadata/schemas/ContainerKey.avsc +1 -0
  169. datahub/metadata/schemas/ContainerProperties.avsc +8 -0
  170. datahub/metadata/schemas/CorpUserSettings.avsc +41 -0
  171. datahub/metadata/schemas/DashboardKey.avsc +1 -0
  172. datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
  173. datahub/metadata/schemas/DataFlowKey.avsc +1 -0
  174. datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
  175. datahub/metadata/schemas/DataHubPageModuleProperties.avsc +200 -0
  176. datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
  177. datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +175 -0
  178. datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
  179. datahub/metadata/schemas/DataJobInfo.avsc +8 -0
  180. datahub/metadata/schemas/DataJobKey.avsc +1 -0
  181. datahub/metadata/schemas/DataProcessKey.avsc +8 -0
  182. datahub/metadata/schemas/DataProductKey.avsc +1 -0
  183. datahub/metadata/schemas/DataProductProperties.avsc +1 -1
  184. datahub/metadata/schemas/DatasetKey.avsc +11 -1
  185. datahub/metadata/schemas/ExecutionRequestInput.avsc +5 -0
  186. datahub/metadata/schemas/GlobalSettingsInfo.avsc +62 -0
  187. datahub/metadata/schemas/GlossaryTermKey.avsc +1 -0
  188. datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
  189. datahub/metadata/schemas/LogicalParent.avsc +140 -0
  190. datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
  191. datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
  192. datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
  193. datahub/metadata/schemas/MLModelGroupKey.avsc +9 -0
  194. datahub/metadata/schemas/MLModelKey.avsc +9 -0
  195. datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
  196. datahub/metadata/schemas/MetadataChangeEvent.avsc +20 -1
  197. datahub/metadata/schemas/NotebookKey.avsc +1 -0
  198. datahub/metadata/schemas/QuerySubjects.avsc +1 -12
  199. datahub/metadata/schemas/SchemaFieldKey.avsc +2 -1
  200. datahub/sdk/__init__.py +6 -0
  201. datahub/sdk/_all_entities.py +11 -0
  202. datahub/sdk/_shared.py +118 -1
  203. datahub/sdk/chart.py +315 -0
  204. datahub/sdk/container.py +7 -0
  205. datahub/sdk/dashboard.py +432 -0
  206. datahub/sdk/dataflow.py +309 -0
  207. datahub/sdk/datajob.py +367 -0
  208. datahub/sdk/dataset.py +8 -2
  209. datahub/sdk/entity_client.py +90 -2
  210. datahub/sdk/lineage_client.py +683 -82
  211. datahub/sdk/main_client.py +46 -16
  212. datahub/sdk/mlmodel.py +101 -38
  213. datahub/sdk/mlmodelgroup.py +7 -0
  214. datahub/sdk/search_client.py +4 -3
  215. datahub/specific/chart.py +1 -1
  216. datahub/specific/dataproduct.py +4 -0
  217. datahub/sql_parsing/sql_parsing_aggregator.py +29 -17
  218. datahub/sql_parsing/sqlglot_lineage.py +62 -13
  219. datahub/telemetry/telemetry.py +17 -11
  220. datahub/testing/sdk_v2_helpers.py +7 -1
  221. datahub/upgrade/upgrade.py +46 -13
  222. datahub/utilities/server_config_util.py +8 -0
  223. datahub/utilities/sqlalchemy_query_combiner.py +5 -2
  224. datahub/utilities/stats_collections.py +4 -0
  225. {acryl_datahub-1.1.1rc3.dist-info → acryl_datahub-1.2.0.dist-info}/licenses/LICENSE +0 -0
  226. {acryl_datahub-1.1.1rc3.dist-info → acryl_datahub-1.2.0.dist-info}/top_level.txt +0 -0
@@ -59,17 +59,21 @@ def request_call(
59
59
  username: Optional[str] = None,
60
60
  password: Optional[str] = None,
61
61
  proxies: Optional[dict] = None,
62
+ verify_ssl: bool = True,
62
63
  ) -> requests.Response:
63
64
  headers = {"accept": "application/json"}
64
65
  if username is not None and password is not None:
65
66
  return requests.get(
66
- url, headers=headers, auth=HTTPBasicAuth(username, password)
67
+ url,
68
+ headers=headers,
69
+ auth=HTTPBasicAuth(username, password),
70
+ verify=verify_ssl,
67
71
  )
68
72
  elif token is not None:
69
73
  headers["Authorization"] = f"{token}"
70
- return requests.get(url, proxies=proxies, headers=headers)
74
+ return requests.get(url, proxies=proxies, headers=headers, verify=verify_ssl)
71
75
  else:
72
- return requests.get(url, headers=headers)
76
+ return requests.get(url, headers=headers, verify=verify_ssl)
73
77
 
74
78
 
75
79
  def get_swag_json(
@@ -79,10 +83,16 @@ def get_swag_json(
79
83
  password: Optional[str] = None,
80
84
  swagger_file: str = "",
81
85
  proxies: Optional[dict] = None,
86
+ verify_ssl: bool = True,
82
87
  ) -> Dict:
83
88
  tot_url = url + swagger_file
84
89
  response = request_call(
85
- url=tot_url, token=token, username=username, password=password, proxies=proxies
90
+ url=tot_url,
91
+ token=token,
92
+ username=username,
93
+ password=password,
94
+ proxies=proxies,
95
+ verify_ssl=verify_ssl,
86
96
  )
87
97
 
88
98
  if response.status_code != 200:
@@ -127,37 +137,45 @@ def get_endpoints(sw_dict: dict) -> dict:
127
137
  check_sw_version(sw_dict)
128
138
 
129
139
  for p_k, p_o in sw_dict["paths"].items():
130
- method = list(p_o)[0]
131
- if "200" in p_o[method]["responses"]:
132
- base_res = p_o[method]["responses"]["200"]
133
- elif 200 in p_o[method]["responses"]:
134
- # if you read a plain yml file the 200 will be an integer
135
- base_res = p_o[method]["responses"][200]
136
- else:
137
- # the endpoint does not have a 200 response
138
- continue
139
-
140
- if "description" in p_o[method]:
141
- desc = p_o[method]["description"]
142
- elif "summary" in p_o[method]:
143
- desc = p_o[method]["summary"]
144
- else: # still testing
145
- desc = ""
146
-
147
- try:
148
- tags = p_o[method]["tags"]
149
- except KeyError:
150
- tags = []
151
-
152
- url_details[p_k] = {"description": desc, "tags": tags, "method": method}
153
-
154
- example_data = check_for_api_example_data(base_res, p_k)
155
- if example_data:
156
- url_details[p_k]["data"] = example_data
157
-
158
- # checking whether there are defined parameters to execute the call...
159
- if "parameters" in p_o[method]:
160
- url_details[p_k]["parameters"] = p_o[method]["parameters"]
140
+ for method, method_spec in p_o.items():
141
+ # skip non-method keys like "parameters"
142
+ if method.lower() not in [
143
+ "get",
144
+ "post",
145
+ "put",
146
+ "delete",
147
+ "patch",
148
+ "options",
149
+ "head",
150
+ ]:
151
+ continue
152
+
153
+ responses = method_spec.get("responses", {})
154
+ base_res = responses.get("200") or responses.get(200)
155
+ if not base_res:
156
+ # if there is no 200 response, we skip this method
157
+ continue
158
+
159
+ # if the description is not present, we will use the summary
160
+ # if both are not present, we will use an empty string
161
+ desc = method_spec.get("description") or method_spec.get("summary", "")
162
+
163
+ # if the tags are not present, we will use an empty list
164
+ tags = method_spec.get("tags", [])
165
+
166
+ url_details[p_k] = {
167
+ "description": desc,
168
+ "tags": tags,
169
+ "method": method.upper(),
170
+ }
171
+
172
+ example_data = check_for_api_example_data(base_res, p_k)
173
+ if example_data:
174
+ url_details[p_k]["data"] = example_data
175
+
176
+ # checking whether there are defined parameters to execute the call...
177
+ if "parameters" in p_o[method]:
178
+ url_details[p_k]["parameters"] = p_o[method]["parameters"]
161
179
 
162
180
  return dict(sorted(url_details.items()))
163
181
 
@@ -358,6 +376,7 @@ def get_tok(
358
376
  tok_url: str = "",
359
377
  method: str = "post",
360
378
  proxies: Optional[dict] = None,
379
+ verify_ssl: bool = True,
361
380
  ) -> str:
362
381
  """
363
382
  Trying to post username/password to get auth.
@@ -368,7 +387,7 @@ def get_tok(
368
387
  # this will make a POST call with username and password
369
388
  data = {"username": username, "password": password, "maxDuration": True}
370
389
  # url2post = url + "api/authenticate/"
371
- response = requests.post(url4req, proxies=proxies, json=data)
390
+ response = requests.post(url4req, proxies=proxies, json=data, verify=verify_ssl)
372
391
  if response.status_code == 200:
373
392
  cont = json.loads(response.content)
374
393
  if "token" in cont: # other authentication scheme
@@ -377,7 +396,7 @@ def get_tok(
377
396
  token = f"Bearer {cont['tokens']['access']}"
378
397
  elif method == "get":
379
398
  # this will make a GET call with username and password
380
- response = requests.get(url4req)
399
+ response = requests.get(url4req, verify=verify_ssl)
381
400
  if response.status_code == 200:
382
401
  cont = json.loads(response.content)
383
402
  token = cont["token"]
@@ -294,8 +294,6 @@ class Mapper:
294
294
  logger.debug(f"Dataset urn = {ds_urn} and its lineage = {upstream_lineage}")
295
295
 
296
296
  mcp = MetadataChangeProposalWrapper(
297
- entityType=Constant.DATASET,
298
- changeType=ChangeTypeClass.UPSERT,
299
297
  entityUrn=ds_urn,
300
298
  aspect=upstream_lineage_class,
301
299
  )
@@ -538,9 +536,7 @@ class Mapper:
538
536
  profile.columnCount = table.column_count
539
537
 
540
538
  mcp = MetadataChangeProposalWrapper(
541
- entityType="dataset",
542
539
  entityUrn=ds_urn,
543
- aspectName="datasetProfile",
544
540
  aspect=profile,
545
541
  )
546
542
  dataset_mcps.append(mcp)
@@ -796,7 +792,6 @@ class Mapper:
796
792
  guid=container_key.guid(),
797
793
  )
798
794
  mcp = MetadataChangeProposalWrapper(
799
- changeType=ChangeTypeClass.UPSERT,
800
795
  entityUrn=entity_urn,
801
796
  aspect=ContainerClass(container=f"{container_urn}"),
802
797
  )
@@ -1253,6 +1248,7 @@ class Mapper:
1253
1248
  SourceCapability.DATA_PROFILING,
1254
1249
  "Optionally enabled via configuration profiling.enabled",
1255
1250
  )
1251
+ @capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
1256
1252
  class PowerBiDashboardSource(StatefulIngestionSourceBase, TestableSource):
1257
1253
  """
1258
1254
  This plugin extracts the following:
@@ -673,7 +673,6 @@ class PowerBiAPI:
673
673
  fill_dashboard_tags()
674
674
  self._fill_independent_datasets(workspace=workspace)
675
675
 
676
- # flake8: noqa: C901
677
676
  def fill_workspaces(
678
677
  self, workspaces: List[Workspace], reporter: PowerBiDashboardSourceReport
679
678
  ) -> Iterable[Workspace]:
@@ -52,7 +52,6 @@ from datahub.ingestion.source.state.stateful_ingestion_base import (
52
52
  from datahub.metadata.com.linkedin.pegasus2avro.common import ChangeAuditStamps
53
53
  from datahub.metadata.schema_classes import (
54
54
  BrowsePathsClass,
55
- ChangeTypeClass,
56
55
  CorpUserInfoClass,
57
56
  CorpUserKeyClass,
58
57
  DashboardInfoClass,
@@ -243,20 +242,14 @@ class Mapper:
243
242
 
244
243
  @staticmethod
245
244
  def new_mcp(
246
- entity_type,
247
245
  entity_urn,
248
- aspect_name,
249
246
  aspect,
250
- change_type=ChangeTypeClass.UPSERT,
251
247
  ):
252
248
  """
253
249
  Create MCP
254
250
  """
255
251
  return MetadataChangeProposalWrapper(
256
- entityType=entity_type,
257
- changeType=change_type,
258
252
  entityUrn=entity_urn,
259
- aspectName=aspect_name,
260
253
  aspect=aspect,
261
254
  )
262
255
 
@@ -343,17 +336,13 @@ class Mapper:
343
336
  )
344
337
 
345
338
  info_mcp = self.new_mcp(
346
- entity_type=Constant.DASHBOARD,
347
339
  entity_urn=dashboard_urn,
348
- aspect_name=Constant.DASHBOARD_INFO,
349
340
  aspect=dashboard_info_cls,
350
341
  )
351
342
 
352
343
  # removed status mcp
353
344
  removed_status_mcp = self.new_mcp(
354
- entity_type=Constant.DASHBOARD,
355
345
  entity_urn=dashboard_urn,
356
- aspect_name=Constant.STATUS,
357
346
  aspect=StatusClass(removed=False),
358
347
  )
359
348
 
@@ -365,9 +354,7 @@ class Mapper:
365
354
 
366
355
  # Dashboard key
367
356
  dashboard_key_mcp = self.new_mcp(
368
- entity_type=Constant.DASHBOARD,
369
357
  entity_urn=dashboard_urn,
370
- aspect_name=Constant.DASHBOARD_KEY,
371
358
  aspect=dashboard_key_cls,
372
359
  )
373
360
 
@@ -378,9 +365,7 @@ class Mapper:
378
365
  ownership = OwnershipClass(owners=owners)
379
366
  # Dashboard owner MCP
380
367
  owner_mcp = self.new_mcp(
381
- entity_type=Constant.DASHBOARD,
382
368
  entity_urn=dashboard_urn,
383
- aspect_name=Constant.OWNERSHIP,
384
369
  aspect=ownership,
385
370
  )
386
371
 
@@ -396,9 +381,7 @@ class Mapper:
396
381
  ]
397
382
  )
398
383
  browse_path_mcp = self.new_mcp(
399
- entity_type=Constant.DASHBOARD,
400
384
  entity_urn=dashboard_urn,
401
- aspect_name=Constant.BROWSERPATH,
402
385
  aspect=browse_path,
403
386
  )
404
387
 
@@ -429,27 +412,21 @@ class Mapper:
429
412
  )
430
413
 
431
414
  info_mcp = self.new_mcp(
432
- entity_type=Constant.CORP_USER,
433
415
  entity_urn=user_urn,
434
- aspect_name=Constant.CORP_USER_INFO,
435
416
  aspect=user_info_instance,
436
417
  )
437
418
  user_mcps.append(info_mcp)
438
419
 
439
420
  # removed status mcp
440
421
  status_mcp = self.new_mcp(
441
- entity_type=Constant.CORP_USER,
442
422
  entity_urn=user_urn,
443
- aspect_name=Constant.STATUS,
444
423
  aspect=StatusClass(removed=False),
445
424
  )
446
425
  user_mcps.append(status_mcp)
447
426
  user_key = CorpUserKeyClass(username=user.username)
448
427
 
449
428
  user_key_mcp = self.new_mcp(
450
- entity_type=Constant.CORP_USER,
451
429
  entity_urn=user_urn,
452
- aspect_name=Constant.CORP_USER_KEY,
453
430
  aspect=user_key,
454
431
  )
455
432
  user_mcps.append(user_key_mcp)
@@ -69,9 +69,9 @@ class PresetConfig(SupersetConfig):
69
69
 
70
70
  @platform_name("Preset")
71
71
  @config_class(PresetConfig)
72
- @support_status(SupportStatus.TESTING)
72
+ @support_status(SupportStatus.CERTIFIED)
73
73
  @capability(
74
- SourceCapability.DELETION_DETECTION, "Optionally enabled via stateful_ingestion"
74
+ SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
75
75
  )
76
76
  class PresetSource(SupersetSource):
77
77
  """
@@ -109,6 +109,7 @@ logger = logging.getLogger(__name__)
109
109
  "Enabled by default, configured using `ingest_owner`",
110
110
  )
111
111
  @capability(SourceCapability.SCHEMA_METADATA, "Enabled by default")
112
+ @capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
112
113
  class QlikSenseSource(StatefulIngestionSourceBase, TestableSource):
113
114
  """
114
115
  This plugin extracts the following:
@@ -10,6 +10,7 @@ import humanfriendly
10
10
  import pydantic
11
11
  import redshift_connector
12
12
 
13
+ from datahub.configuration.common import AllowDenyPattern
13
14
  from datahub.configuration.pattern_utils import is_schema_allowed
14
15
  from datahub.emitter.mce_builder import (
15
16
  make_data_platform_urn,
@@ -140,12 +141,15 @@ logger: logging.Logger = logging.getLogger(__name__)
140
141
  SourceCapability.USAGE_STATS,
141
142
  "Enabled by default, can be disabled via configuration `include_usage_statistics`",
142
143
  )
143
- @capability(SourceCapability.DELETION_DETECTION, "Enabled via stateful ingestion")
144
+ @capability(
145
+ SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
146
+ )
144
147
  @capability(
145
148
  SourceCapability.CLASSIFICATION,
146
149
  "Optionally enabled via `classification.enabled`",
147
150
  supported=True,
148
151
  )
152
+ @capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
149
153
  class RedshiftSource(StatefulIngestionSourceBase, TestableSource):
150
154
  """
151
155
  This plugin extracts the following:
@@ -354,7 +358,23 @@ class RedshiftSource(StatefulIngestionSourceBase, TestableSource):
354
358
  ).workunit_processor,
355
359
  ]
356
360
 
361
+ def _warn_deprecated_configs(self):
362
+ if (
363
+ self.config.match_fully_qualified_names is not None
364
+ and not self.config.match_fully_qualified_names
365
+ and self.config.schema_pattern is not None
366
+ and self.config.schema_pattern != AllowDenyPattern.allow_all()
367
+ ):
368
+ self.report.report_warning(
369
+ message="Please update `schema_pattern` to match against fully qualified schema name `<database_name>.<schema_name>` and set config `match_fully_qualified_names : True`."
370
+ "Current default `match_fully_qualified_names: False` is only to maintain backward compatibility. "
371
+ "The config option `match_fully_qualified_names` will be removed in future and the default behavior will be like `match_fully_qualified_names: True`.",
372
+ context="Config option deprecation warning",
373
+ title="Config option deprecation warning",
374
+ )
375
+
357
376
  def get_workunits_internal(self) -> Iterable[Union[MetadataWorkUnit, SqlWorkUnit]]:
377
+ self._warn_deprecated_configs()
358
378
  connection = self._try_get_redshift_connection(self.config)
359
379
 
360
380
  if connection is None:
@@ -182,9 +182,10 @@ class RedshiftUsageExtractor:
182
182
  self.report.num_operational_stats_filtered = 0
183
183
 
184
184
  if self.config.include_operational_stats:
185
- with self.report.new_stage(
186
- USAGE_EXTRACTION_OPERATIONAL_STATS
187
- ), PerfTimer() as timer:
185
+ with (
186
+ self.report.new_stage(USAGE_EXTRACTION_OPERATIONAL_STATS),
187
+ PerfTimer() as timer,
188
+ ):
188
189
  # Generate operation aspect workunits
189
190
  yield from self._gen_operation_aspect_workunits(
190
191
  self.connection, all_tables
@@ -1,19 +1,21 @@
1
1
  import dataclasses
2
2
  from dataclasses import field as dataclass_field
3
- from typing import List
4
3
 
5
4
  from datahub.ingestion.source.state.stale_entity_removal_handler import (
6
5
  StaleEntityRemovalSourceReport,
7
6
  )
7
+ from datahub.utilities.lossy_collections import LossyList
8
8
 
9
9
 
10
10
  @dataclasses.dataclass
11
11
  class DataLakeSourceReport(StaleEntityRemovalSourceReport):
12
12
  files_scanned = 0
13
- filtered: List[str] = dataclass_field(default_factory=list)
13
+ filtered: LossyList[str] = dataclass_field(default_factory=LossyList)
14
+ number_of_files_filtered: int = 0
14
15
 
15
16
  def report_file_scanned(self) -> None:
16
17
  self.files_scanned += 1
17
18
 
18
19
  def report_file_dropped(self, file: str) -> None:
19
20
  self.filtered.append(file)
21
+ self.number_of_files_filtered += 1