acryl-datahub 1.1.1rc3__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.1.1rc3.dist-info → acryl_datahub-1.2.0.dist-info}/METADATA +2559 -2532
- {acryl_datahub-1.1.1rc3.dist-info → acryl_datahub-1.2.0.dist-info}/RECORD +226 -190
- {acryl_datahub-1.1.1rc3.dist-info → acryl_datahub-1.2.0.dist-info}/WHEEL +1 -1
- {acryl_datahub-1.1.1rc3.dist-info → acryl_datahub-1.2.0.dist-info}/entry_points.txt +2 -0
- datahub/_version.py +1 -1
- datahub/api/entities/dataset/dataset.py +2 -1
- datahub/api/entities/external/__init__.py +0 -0
- datahub/api/entities/external/external_entities.py +239 -0
- datahub/api/entities/external/external_tag.py +145 -0
- datahub/api/entities/external/lake_formation_external_entites.py +161 -0
- datahub/api/entities/external/restricted_text.py +247 -0
- datahub/api/entities/external/unity_catalog_external_entites.py +173 -0
- datahub/cli/check_cli.py +88 -7
- datahub/cli/cli_utils.py +63 -0
- datahub/cli/container_cli.py +5 -0
- datahub/cli/delete_cli.py +124 -27
- datahub/cli/docker_check.py +107 -12
- datahub/cli/docker_cli.py +149 -227
- datahub/cli/exists_cli.py +0 -2
- datahub/cli/get_cli.py +0 -2
- datahub/cli/iceberg_cli.py +5 -0
- datahub/cli/ingest_cli.py +12 -16
- datahub/cli/migrate.py +2 -0
- datahub/cli/put_cli.py +1 -4
- datahub/cli/quickstart_versioning.py +50 -7
- datahub/cli/specific/assertions_cli.py +0 -4
- datahub/cli/specific/datacontract_cli.py +0 -3
- datahub/cli/specific/dataproduct_cli.py +0 -11
- datahub/cli/specific/dataset_cli.py +1 -8
- datahub/cli/specific/forms_cli.py +0 -4
- datahub/cli/specific/group_cli.py +0 -2
- datahub/cli/specific/structuredproperties_cli.py +1 -4
- datahub/cli/specific/user_cli.py +0 -2
- datahub/cli/state_cli.py +0 -2
- datahub/cli/timeline_cli.py +0 -2
- datahub/emitter/response_helper.py +86 -1
- datahub/emitter/rest_emitter.py +71 -13
- datahub/entrypoints.py +4 -3
- datahub/ingestion/api/decorators.py +15 -3
- datahub/ingestion/api/report.py +332 -3
- datahub/ingestion/api/sink.py +3 -0
- datahub/ingestion/api/source.py +48 -44
- datahub/ingestion/autogenerated/__init__.py +0 -0
- datahub/ingestion/autogenerated/capability_summary.json +3449 -0
- datahub/ingestion/autogenerated/lineage.json +401 -0
- datahub/ingestion/autogenerated/lineage_helper.py +177 -0
- datahub/ingestion/extractor/schema_util.py +13 -4
- datahub/ingestion/glossary/classification_mixin.py +5 -0
- datahub/ingestion/graph/client.py +100 -15
- datahub/ingestion/graph/config.py +1 -0
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +20 -10
- datahub/ingestion/run/pipeline.py +54 -2
- datahub/ingestion/sink/datahub_rest.py +13 -0
- datahub/ingestion/source/abs/source.py +1 -1
- datahub/ingestion/source/aws/aws_common.py +4 -0
- datahub/ingestion/source/aws/glue.py +489 -244
- datahub/ingestion/source/aws/tag_entities.py +292 -0
- datahub/ingestion/source/azure/azure_common.py +2 -2
- datahub/ingestion/source/bigquery_v2/bigquery.py +50 -23
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +1 -1
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +1 -0
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +2 -0
- datahub/ingestion/source/bigquery_v2/common.py +1 -1
- datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
- datahub/ingestion/source/bigquery_v2/queries.py +3 -3
- datahub/ingestion/source/cassandra/cassandra.py +1 -1
- datahub/ingestion/source/cassandra/cassandra_profiling.py +6 -5
- datahub/ingestion/source/common/subtypes.py +45 -0
- datahub/ingestion/source/data_lake_common/object_store.py +115 -27
- datahub/ingestion/source/data_lake_common/path_spec.py +10 -21
- datahub/ingestion/source/datahub/config.py +11 -0
- datahub/ingestion/source/datahub/datahub_database_reader.py +187 -35
- datahub/ingestion/source/datahub/datahub_source.py +1 -1
- datahub/ingestion/source/dbt/dbt_cloud.py +10 -2
- datahub/ingestion/source/dbt/dbt_common.py +6 -2
- datahub/ingestion/source/dbt/dbt_core.py +3 -0
- datahub/ingestion/source/debug/__init__.py +0 -0
- datahub/ingestion/source/debug/datahub_debug.py +300 -0
- datahub/ingestion/source/dremio/dremio_api.py +114 -73
- datahub/ingestion/source/dremio/dremio_config.py +2 -0
- datahub/ingestion/source/dremio/dremio_reporting.py +23 -2
- datahub/ingestion/source/dremio/dremio_source.py +94 -81
- datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
- datahub/ingestion/source/file.py +3 -0
- datahub/ingestion/source/fivetran/fivetran.py +34 -26
- datahub/ingestion/source/gcs/gcs_source.py +13 -2
- datahub/ingestion/source/ge_data_profiler.py +76 -28
- datahub/ingestion/source/ge_profiling_config.py +11 -0
- datahub/ingestion/source/hex/api.py +26 -1
- datahub/ingestion/source/iceberg/iceberg.py +3 -1
- datahub/ingestion/source/identity/azure_ad.py +1 -1
- datahub/ingestion/source/identity/okta.py +1 -14
- datahub/ingestion/source/kafka/kafka.py +16 -0
- datahub/ingestion/source/kafka_connect/sink_connectors.py +156 -47
- datahub/ingestion/source/kafka_connect/source_connectors.py +59 -4
- datahub/ingestion/source/looker/looker_source.py +1 -0
- datahub/ingestion/source/mlflow.py +11 -1
- datahub/ingestion/source/mock_data/__init__.py +0 -0
- datahub/ingestion/source/mock_data/datahub_mock_data.py +472 -0
- datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
- datahub/ingestion/source/mock_data/table_naming_helper.py +91 -0
- datahub/ingestion/source/nifi.py +1 -1
- datahub/ingestion/source/openapi.py +12 -0
- datahub/ingestion/source/openapi_parser.py +56 -37
- datahub/ingestion/source/powerbi/powerbi.py +1 -5
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +0 -1
- datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
- datahub/ingestion/source/preset.py +2 -2
- datahub/ingestion/source/qlik_sense/qlik_sense.py +1 -0
- datahub/ingestion/source/redshift/redshift.py +21 -1
- datahub/ingestion/source/redshift/usage.py +4 -3
- datahub/ingestion/source/s3/report.py +4 -2
- datahub/ingestion/source/s3/source.py +367 -115
- datahub/ingestion/source/sac/sac.py +3 -1
- datahub/ingestion/source/salesforce.py +6 -3
- datahub/ingestion/source/sigma/sigma.py +7 -1
- datahub/ingestion/source/slack/slack.py +2 -1
- datahub/ingestion/source/snowflake/snowflake_config.py +43 -7
- datahub/ingestion/source/snowflake/snowflake_queries.py +348 -82
- datahub/ingestion/source/snowflake/snowflake_summary.py +5 -0
- datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
- datahub/ingestion/source/snowflake/snowflake_utils.py +2 -7
- datahub/ingestion/source/snowflake/snowflake_v2.py +33 -8
- datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
- datahub/ingestion/source/sql/athena.py +119 -11
- datahub/ingestion/source/sql/athena_properties_extractor.py +777 -0
- datahub/ingestion/source/sql/clickhouse.py +3 -1
- datahub/ingestion/source/sql/cockroachdb.py +0 -1
- datahub/ingestion/source/sql/hana.py +3 -1
- datahub/ingestion/source/sql/hive_metastore.py +3 -11
- datahub/ingestion/source/sql/mariadb.py +0 -1
- datahub/ingestion/source/sql/mssql/source.py +239 -34
- datahub/ingestion/source/sql/mysql.py +0 -1
- datahub/ingestion/source/sql/oracle.py +1 -1
- datahub/ingestion/source/sql/postgres.py +0 -1
- datahub/ingestion/source/sql/sql_common.py +121 -34
- datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
- datahub/ingestion/source/sql/teradata.py +997 -235
- datahub/ingestion/source/sql/vertica.py +10 -6
- datahub/ingestion/source/sql_queries.py +2 -2
- datahub/ingestion/source/state/stateful_ingestion_base.py +1 -1
- datahub/ingestion/source/superset.py +58 -3
- datahub/ingestion/source/tableau/tableau.py +58 -37
- datahub/ingestion/source/tableau/tableau_common.py +4 -2
- datahub/ingestion/source/tableau/tableau_constant.py +0 -4
- datahub/ingestion/source/unity/config.py +5 -0
- datahub/ingestion/source/unity/proxy.py +118 -0
- datahub/ingestion/source/unity/source.py +195 -17
- datahub/ingestion/source/unity/tag_entities.py +295 -0
- datahub/ingestion/source/usage/clickhouse_usage.py +4 -1
- datahub/ingestion/source/usage/starburst_trino_usage.py +3 -0
- datahub/ingestion/transformer/add_dataset_ownership.py +18 -2
- datahub/integrations/assertion/snowflake/compiler.py +4 -3
- datahub/metadata/_internal_schema_classes.py +1446 -559
- datahub/metadata/_urns/urn_defs.py +1721 -1553
- datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +4 -0
- datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +27 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +4 -0
- datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +25 -0
- datahub/metadata/schema.avsc +18055 -17802
- datahub/metadata/schemas/ApplicationKey.avsc +31 -0
- datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
- datahub/metadata/schemas/Applications.avsc +38 -0
- datahub/metadata/schemas/ChartKey.avsc +1 -0
- datahub/metadata/schemas/ContainerKey.avsc +1 -0
- datahub/metadata/schemas/ContainerProperties.avsc +8 -0
- datahub/metadata/schemas/CorpUserSettings.avsc +41 -0
- datahub/metadata/schemas/DashboardKey.avsc +1 -0
- datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
- datahub/metadata/schemas/DataFlowKey.avsc +1 -0
- datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleProperties.avsc +200 -0
- datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +175 -0
- datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
- datahub/metadata/schemas/DataJobInfo.avsc +8 -0
- datahub/metadata/schemas/DataJobKey.avsc +1 -0
- datahub/metadata/schemas/DataProcessKey.avsc +8 -0
- datahub/metadata/schemas/DataProductKey.avsc +1 -0
- datahub/metadata/schemas/DataProductProperties.avsc +1 -1
- datahub/metadata/schemas/DatasetKey.avsc +11 -1
- datahub/metadata/schemas/ExecutionRequestInput.avsc +5 -0
- datahub/metadata/schemas/GlobalSettingsInfo.avsc +62 -0
- datahub/metadata/schemas/GlossaryTermKey.avsc +1 -0
- datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
- datahub/metadata/schemas/LogicalParent.avsc +140 -0
- datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
- datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
- datahub/metadata/schemas/MLModelGroupKey.avsc +9 -0
- datahub/metadata/schemas/MLModelKey.avsc +9 -0
- datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +20 -1
- datahub/metadata/schemas/NotebookKey.avsc +1 -0
- datahub/metadata/schemas/QuerySubjects.avsc +1 -12
- datahub/metadata/schemas/SchemaFieldKey.avsc +2 -1
- datahub/sdk/__init__.py +6 -0
- datahub/sdk/_all_entities.py +11 -0
- datahub/sdk/_shared.py +118 -1
- datahub/sdk/chart.py +315 -0
- datahub/sdk/container.py +7 -0
- datahub/sdk/dashboard.py +432 -0
- datahub/sdk/dataflow.py +309 -0
- datahub/sdk/datajob.py +367 -0
- datahub/sdk/dataset.py +8 -2
- datahub/sdk/entity_client.py +90 -2
- datahub/sdk/lineage_client.py +683 -82
- datahub/sdk/main_client.py +46 -16
- datahub/sdk/mlmodel.py +101 -38
- datahub/sdk/mlmodelgroup.py +7 -0
- datahub/sdk/search_client.py +4 -3
- datahub/specific/chart.py +1 -1
- datahub/specific/dataproduct.py +4 -0
- datahub/sql_parsing/sql_parsing_aggregator.py +29 -17
- datahub/sql_parsing/sqlglot_lineage.py +62 -13
- datahub/telemetry/telemetry.py +17 -11
- datahub/testing/sdk_v2_helpers.py +7 -1
- datahub/upgrade/upgrade.py +46 -13
- datahub/utilities/server_config_util.py +8 -0
- datahub/utilities/sqlalchemy_query_combiner.py +5 -2
- datahub/utilities/stats_collections.py +4 -0
- {acryl_datahub-1.1.1rc3.dist-info → acryl_datahub-1.2.0.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.1.1rc3.dist-info → acryl_datahub-1.2.0.dist-info}/top_level.txt +0 -0
|
@@ -59,17 +59,21 @@ def request_call(
|
|
|
59
59
|
username: Optional[str] = None,
|
|
60
60
|
password: Optional[str] = None,
|
|
61
61
|
proxies: Optional[dict] = None,
|
|
62
|
+
verify_ssl: bool = True,
|
|
62
63
|
) -> requests.Response:
|
|
63
64
|
headers = {"accept": "application/json"}
|
|
64
65
|
if username is not None and password is not None:
|
|
65
66
|
return requests.get(
|
|
66
|
-
url,
|
|
67
|
+
url,
|
|
68
|
+
headers=headers,
|
|
69
|
+
auth=HTTPBasicAuth(username, password),
|
|
70
|
+
verify=verify_ssl,
|
|
67
71
|
)
|
|
68
72
|
elif token is not None:
|
|
69
73
|
headers["Authorization"] = f"{token}"
|
|
70
|
-
return requests.get(url, proxies=proxies, headers=headers)
|
|
74
|
+
return requests.get(url, proxies=proxies, headers=headers, verify=verify_ssl)
|
|
71
75
|
else:
|
|
72
|
-
return requests.get(url, headers=headers)
|
|
76
|
+
return requests.get(url, headers=headers, verify=verify_ssl)
|
|
73
77
|
|
|
74
78
|
|
|
75
79
|
def get_swag_json(
|
|
@@ -79,10 +83,16 @@ def get_swag_json(
|
|
|
79
83
|
password: Optional[str] = None,
|
|
80
84
|
swagger_file: str = "",
|
|
81
85
|
proxies: Optional[dict] = None,
|
|
86
|
+
verify_ssl: bool = True,
|
|
82
87
|
) -> Dict:
|
|
83
88
|
tot_url = url + swagger_file
|
|
84
89
|
response = request_call(
|
|
85
|
-
url=tot_url,
|
|
90
|
+
url=tot_url,
|
|
91
|
+
token=token,
|
|
92
|
+
username=username,
|
|
93
|
+
password=password,
|
|
94
|
+
proxies=proxies,
|
|
95
|
+
verify_ssl=verify_ssl,
|
|
86
96
|
)
|
|
87
97
|
|
|
88
98
|
if response.status_code != 200:
|
|
@@ -127,37 +137,45 @@ def get_endpoints(sw_dict: dict) -> dict:
|
|
|
127
137
|
check_sw_version(sw_dict)
|
|
128
138
|
|
|
129
139
|
for p_k, p_o in sw_dict["paths"].items():
|
|
130
|
-
method
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
url_details[p_k]
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
140
|
+
for method, method_spec in p_o.items():
|
|
141
|
+
# skip non-method keys like "parameters"
|
|
142
|
+
if method.lower() not in [
|
|
143
|
+
"get",
|
|
144
|
+
"post",
|
|
145
|
+
"put",
|
|
146
|
+
"delete",
|
|
147
|
+
"patch",
|
|
148
|
+
"options",
|
|
149
|
+
"head",
|
|
150
|
+
]:
|
|
151
|
+
continue
|
|
152
|
+
|
|
153
|
+
responses = method_spec.get("responses", {})
|
|
154
|
+
base_res = responses.get("200") or responses.get(200)
|
|
155
|
+
if not base_res:
|
|
156
|
+
# if there is no 200 response, we skip this method
|
|
157
|
+
continue
|
|
158
|
+
|
|
159
|
+
# if the description is not present, we will use the summary
|
|
160
|
+
# if both are not present, we will use an empty string
|
|
161
|
+
desc = method_spec.get("description") or method_spec.get("summary", "")
|
|
162
|
+
|
|
163
|
+
# if the tags are not present, we will use an empty list
|
|
164
|
+
tags = method_spec.get("tags", [])
|
|
165
|
+
|
|
166
|
+
url_details[p_k] = {
|
|
167
|
+
"description": desc,
|
|
168
|
+
"tags": tags,
|
|
169
|
+
"method": method.upper(),
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
example_data = check_for_api_example_data(base_res, p_k)
|
|
173
|
+
if example_data:
|
|
174
|
+
url_details[p_k]["data"] = example_data
|
|
175
|
+
|
|
176
|
+
# checking whether there are defined parameters to execute the call...
|
|
177
|
+
if "parameters" in p_o[method]:
|
|
178
|
+
url_details[p_k]["parameters"] = p_o[method]["parameters"]
|
|
161
179
|
|
|
162
180
|
return dict(sorted(url_details.items()))
|
|
163
181
|
|
|
@@ -358,6 +376,7 @@ def get_tok(
|
|
|
358
376
|
tok_url: str = "",
|
|
359
377
|
method: str = "post",
|
|
360
378
|
proxies: Optional[dict] = None,
|
|
379
|
+
verify_ssl: bool = True,
|
|
361
380
|
) -> str:
|
|
362
381
|
"""
|
|
363
382
|
Trying to post username/password to get auth.
|
|
@@ -368,7 +387,7 @@ def get_tok(
|
|
|
368
387
|
# this will make a POST call with username and password
|
|
369
388
|
data = {"username": username, "password": password, "maxDuration": True}
|
|
370
389
|
# url2post = url + "api/authenticate/"
|
|
371
|
-
response = requests.post(url4req, proxies=proxies, json=data)
|
|
390
|
+
response = requests.post(url4req, proxies=proxies, json=data, verify=verify_ssl)
|
|
372
391
|
if response.status_code == 200:
|
|
373
392
|
cont = json.loads(response.content)
|
|
374
393
|
if "token" in cont: # other authentication scheme
|
|
@@ -377,7 +396,7 @@ def get_tok(
|
|
|
377
396
|
token = f"Bearer {cont['tokens']['access']}"
|
|
378
397
|
elif method == "get":
|
|
379
398
|
# this will make a GET call with username and password
|
|
380
|
-
response = requests.get(url4req)
|
|
399
|
+
response = requests.get(url4req, verify=verify_ssl)
|
|
381
400
|
if response.status_code == 200:
|
|
382
401
|
cont = json.loads(response.content)
|
|
383
402
|
token = cont["token"]
|
|
@@ -294,8 +294,6 @@ class Mapper:
|
|
|
294
294
|
logger.debug(f"Dataset urn = {ds_urn} and its lineage = {upstream_lineage}")
|
|
295
295
|
|
|
296
296
|
mcp = MetadataChangeProposalWrapper(
|
|
297
|
-
entityType=Constant.DATASET,
|
|
298
|
-
changeType=ChangeTypeClass.UPSERT,
|
|
299
297
|
entityUrn=ds_urn,
|
|
300
298
|
aspect=upstream_lineage_class,
|
|
301
299
|
)
|
|
@@ -538,9 +536,7 @@ class Mapper:
|
|
|
538
536
|
profile.columnCount = table.column_count
|
|
539
537
|
|
|
540
538
|
mcp = MetadataChangeProposalWrapper(
|
|
541
|
-
entityType="dataset",
|
|
542
539
|
entityUrn=ds_urn,
|
|
543
|
-
aspectName="datasetProfile",
|
|
544
540
|
aspect=profile,
|
|
545
541
|
)
|
|
546
542
|
dataset_mcps.append(mcp)
|
|
@@ -796,7 +792,6 @@ class Mapper:
|
|
|
796
792
|
guid=container_key.guid(),
|
|
797
793
|
)
|
|
798
794
|
mcp = MetadataChangeProposalWrapper(
|
|
799
|
-
changeType=ChangeTypeClass.UPSERT,
|
|
800
795
|
entityUrn=entity_urn,
|
|
801
796
|
aspect=ContainerClass(container=f"{container_urn}"),
|
|
802
797
|
)
|
|
@@ -1253,6 +1248,7 @@ class Mapper:
|
|
|
1253
1248
|
SourceCapability.DATA_PROFILING,
|
|
1254
1249
|
"Optionally enabled via configuration profiling.enabled",
|
|
1255
1250
|
)
|
|
1251
|
+
@capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
|
|
1256
1252
|
class PowerBiDashboardSource(StatefulIngestionSourceBase, TestableSource):
|
|
1257
1253
|
"""
|
|
1258
1254
|
This plugin extracts the following:
|
|
@@ -673,7 +673,6 @@ class PowerBiAPI:
|
|
|
673
673
|
fill_dashboard_tags()
|
|
674
674
|
self._fill_independent_datasets(workspace=workspace)
|
|
675
675
|
|
|
676
|
-
# flake8: noqa: C901
|
|
677
676
|
def fill_workspaces(
|
|
678
677
|
self, workspaces: List[Workspace], reporter: PowerBiDashboardSourceReport
|
|
679
678
|
) -> Iterable[Workspace]:
|
|
@@ -52,7 +52,6 @@ from datahub.ingestion.source.state.stateful_ingestion_base import (
|
|
|
52
52
|
from datahub.metadata.com.linkedin.pegasus2avro.common import ChangeAuditStamps
|
|
53
53
|
from datahub.metadata.schema_classes import (
|
|
54
54
|
BrowsePathsClass,
|
|
55
|
-
ChangeTypeClass,
|
|
56
55
|
CorpUserInfoClass,
|
|
57
56
|
CorpUserKeyClass,
|
|
58
57
|
DashboardInfoClass,
|
|
@@ -243,20 +242,14 @@ class Mapper:
|
|
|
243
242
|
|
|
244
243
|
@staticmethod
|
|
245
244
|
def new_mcp(
|
|
246
|
-
entity_type,
|
|
247
245
|
entity_urn,
|
|
248
|
-
aspect_name,
|
|
249
246
|
aspect,
|
|
250
|
-
change_type=ChangeTypeClass.UPSERT,
|
|
251
247
|
):
|
|
252
248
|
"""
|
|
253
249
|
Create MCP
|
|
254
250
|
"""
|
|
255
251
|
return MetadataChangeProposalWrapper(
|
|
256
|
-
entityType=entity_type,
|
|
257
|
-
changeType=change_type,
|
|
258
252
|
entityUrn=entity_urn,
|
|
259
|
-
aspectName=aspect_name,
|
|
260
253
|
aspect=aspect,
|
|
261
254
|
)
|
|
262
255
|
|
|
@@ -343,17 +336,13 @@ class Mapper:
|
|
|
343
336
|
)
|
|
344
337
|
|
|
345
338
|
info_mcp = self.new_mcp(
|
|
346
|
-
entity_type=Constant.DASHBOARD,
|
|
347
339
|
entity_urn=dashboard_urn,
|
|
348
|
-
aspect_name=Constant.DASHBOARD_INFO,
|
|
349
340
|
aspect=dashboard_info_cls,
|
|
350
341
|
)
|
|
351
342
|
|
|
352
343
|
# removed status mcp
|
|
353
344
|
removed_status_mcp = self.new_mcp(
|
|
354
|
-
entity_type=Constant.DASHBOARD,
|
|
355
345
|
entity_urn=dashboard_urn,
|
|
356
|
-
aspect_name=Constant.STATUS,
|
|
357
346
|
aspect=StatusClass(removed=False),
|
|
358
347
|
)
|
|
359
348
|
|
|
@@ -365,9 +354,7 @@ class Mapper:
|
|
|
365
354
|
|
|
366
355
|
# Dashboard key
|
|
367
356
|
dashboard_key_mcp = self.new_mcp(
|
|
368
|
-
entity_type=Constant.DASHBOARD,
|
|
369
357
|
entity_urn=dashboard_urn,
|
|
370
|
-
aspect_name=Constant.DASHBOARD_KEY,
|
|
371
358
|
aspect=dashboard_key_cls,
|
|
372
359
|
)
|
|
373
360
|
|
|
@@ -378,9 +365,7 @@ class Mapper:
|
|
|
378
365
|
ownership = OwnershipClass(owners=owners)
|
|
379
366
|
# Dashboard owner MCP
|
|
380
367
|
owner_mcp = self.new_mcp(
|
|
381
|
-
entity_type=Constant.DASHBOARD,
|
|
382
368
|
entity_urn=dashboard_urn,
|
|
383
|
-
aspect_name=Constant.OWNERSHIP,
|
|
384
369
|
aspect=ownership,
|
|
385
370
|
)
|
|
386
371
|
|
|
@@ -396,9 +381,7 @@ class Mapper:
|
|
|
396
381
|
]
|
|
397
382
|
)
|
|
398
383
|
browse_path_mcp = self.new_mcp(
|
|
399
|
-
entity_type=Constant.DASHBOARD,
|
|
400
384
|
entity_urn=dashboard_urn,
|
|
401
|
-
aspect_name=Constant.BROWSERPATH,
|
|
402
385
|
aspect=browse_path,
|
|
403
386
|
)
|
|
404
387
|
|
|
@@ -429,27 +412,21 @@ class Mapper:
|
|
|
429
412
|
)
|
|
430
413
|
|
|
431
414
|
info_mcp = self.new_mcp(
|
|
432
|
-
entity_type=Constant.CORP_USER,
|
|
433
415
|
entity_urn=user_urn,
|
|
434
|
-
aspect_name=Constant.CORP_USER_INFO,
|
|
435
416
|
aspect=user_info_instance,
|
|
436
417
|
)
|
|
437
418
|
user_mcps.append(info_mcp)
|
|
438
419
|
|
|
439
420
|
# removed status mcp
|
|
440
421
|
status_mcp = self.new_mcp(
|
|
441
|
-
entity_type=Constant.CORP_USER,
|
|
442
422
|
entity_urn=user_urn,
|
|
443
|
-
aspect_name=Constant.STATUS,
|
|
444
423
|
aspect=StatusClass(removed=False),
|
|
445
424
|
)
|
|
446
425
|
user_mcps.append(status_mcp)
|
|
447
426
|
user_key = CorpUserKeyClass(username=user.username)
|
|
448
427
|
|
|
449
428
|
user_key_mcp = self.new_mcp(
|
|
450
|
-
entity_type=Constant.CORP_USER,
|
|
451
429
|
entity_urn=user_urn,
|
|
452
|
-
aspect_name=Constant.CORP_USER_KEY,
|
|
453
430
|
aspect=user_key,
|
|
454
431
|
)
|
|
455
432
|
user_mcps.append(user_key_mcp)
|
|
@@ -69,9 +69,9 @@ class PresetConfig(SupersetConfig):
|
|
|
69
69
|
|
|
70
70
|
@platform_name("Preset")
|
|
71
71
|
@config_class(PresetConfig)
|
|
72
|
-
@support_status(SupportStatus.
|
|
72
|
+
@support_status(SupportStatus.CERTIFIED)
|
|
73
73
|
@capability(
|
|
74
|
-
SourceCapability.DELETION_DETECTION, "
|
|
74
|
+
SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
|
|
75
75
|
)
|
|
76
76
|
class PresetSource(SupersetSource):
|
|
77
77
|
"""
|
|
@@ -109,6 +109,7 @@ logger = logging.getLogger(__name__)
|
|
|
109
109
|
"Enabled by default, configured using `ingest_owner`",
|
|
110
110
|
)
|
|
111
111
|
@capability(SourceCapability.SCHEMA_METADATA, "Enabled by default")
|
|
112
|
+
@capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
|
|
112
113
|
class QlikSenseSource(StatefulIngestionSourceBase, TestableSource):
|
|
113
114
|
"""
|
|
114
115
|
This plugin extracts the following:
|
|
@@ -10,6 +10,7 @@ import humanfriendly
|
|
|
10
10
|
import pydantic
|
|
11
11
|
import redshift_connector
|
|
12
12
|
|
|
13
|
+
from datahub.configuration.common import AllowDenyPattern
|
|
13
14
|
from datahub.configuration.pattern_utils import is_schema_allowed
|
|
14
15
|
from datahub.emitter.mce_builder import (
|
|
15
16
|
make_data_platform_urn,
|
|
@@ -140,12 +141,15 @@ logger: logging.Logger = logging.getLogger(__name__)
|
|
|
140
141
|
SourceCapability.USAGE_STATS,
|
|
141
142
|
"Enabled by default, can be disabled via configuration `include_usage_statistics`",
|
|
142
143
|
)
|
|
143
|
-
@capability(
|
|
144
|
+
@capability(
|
|
145
|
+
SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
|
|
146
|
+
)
|
|
144
147
|
@capability(
|
|
145
148
|
SourceCapability.CLASSIFICATION,
|
|
146
149
|
"Optionally enabled via `classification.enabled`",
|
|
147
150
|
supported=True,
|
|
148
151
|
)
|
|
152
|
+
@capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
|
|
149
153
|
class RedshiftSource(StatefulIngestionSourceBase, TestableSource):
|
|
150
154
|
"""
|
|
151
155
|
This plugin extracts the following:
|
|
@@ -354,7 +358,23 @@ class RedshiftSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
354
358
|
).workunit_processor,
|
|
355
359
|
]
|
|
356
360
|
|
|
361
|
+
def _warn_deprecated_configs(self):
|
|
362
|
+
if (
|
|
363
|
+
self.config.match_fully_qualified_names is not None
|
|
364
|
+
and not self.config.match_fully_qualified_names
|
|
365
|
+
and self.config.schema_pattern is not None
|
|
366
|
+
and self.config.schema_pattern != AllowDenyPattern.allow_all()
|
|
367
|
+
):
|
|
368
|
+
self.report.report_warning(
|
|
369
|
+
message="Please update `schema_pattern` to match against fully qualified schema name `<database_name>.<schema_name>` and set config `match_fully_qualified_names : True`."
|
|
370
|
+
"Current default `match_fully_qualified_names: False` is only to maintain backward compatibility. "
|
|
371
|
+
"The config option `match_fully_qualified_names` will be removed in future and the default behavior will be like `match_fully_qualified_names: True`.",
|
|
372
|
+
context="Config option deprecation warning",
|
|
373
|
+
title="Config option deprecation warning",
|
|
374
|
+
)
|
|
375
|
+
|
|
357
376
|
def get_workunits_internal(self) -> Iterable[Union[MetadataWorkUnit, SqlWorkUnit]]:
|
|
377
|
+
self._warn_deprecated_configs()
|
|
358
378
|
connection = self._try_get_redshift_connection(self.config)
|
|
359
379
|
|
|
360
380
|
if connection is None:
|
|
@@ -182,9 +182,10 @@ class RedshiftUsageExtractor:
|
|
|
182
182
|
self.report.num_operational_stats_filtered = 0
|
|
183
183
|
|
|
184
184
|
if self.config.include_operational_stats:
|
|
185
|
-
with
|
|
186
|
-
USAGE_EXTRACTION_OPERATIONAL_STATS
|
|
187
|
-
|
|
185
|
+
with (
|
|
186
|
+
self.report.new_stage(USAGE_EXTRACTION_OPERATIONAL_STATS),
|
|
187
|
+
PerfTimer() as timer,
|
|
188
|
+
):
|
|
188
189
|
# Generate operation aspect workunits
|
|
189
190
|
yield from self._gen_operation_aspect_workunits(
|
|
190
191
|
self.connection, all_tables
|
|
@@ -1,19 +1,21 @@
|
|
|
1
1
|
import dataclasses
|
|
2
2
|
from dataclasses import field as dataclass_field
|
|
3
|
-
from typing import List
|
|
4
3
|
|
|
5
4
|
from datahub.ingestion.source.state.stale_entity_removal_handler import (
|
|
6
5
|
StaleEntityRemovalSourceReport,
|
|
7
6
|
)
|
|
7
|
+
from datahub.utilities.lossy_collections import LossyList
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
@dataclasses.dataclass
|
|
11
11
|
class DataLakeSourceReport(StaleEntityRemovalSourceReport):
|
|
12
12
|
files_scanned = 0
|
|
13
|
-
filtered:
|
|
13
|
+
filtered: LossyList[str] = dataclass_field(default_factory=LossyList)
|
|
14
|
+
number_of_files_filtered: int = 0
|
|
14
15
|
|
|
15
16
|
def report_file_scanned(self) -> None:
|
|
16
17
|
self.files_scanned += 1
|
|
17
18
|
|
|
18
19
|
def report_file_dropped(self, file: str) -> None:
|
|
19
20
|
self.filtered.append(file)
|
|
21
|
+
self.number_of_files_filtered += 1
|