acryl-datahub 1.3.1__py3-none-any.whl → 1.3.1.1rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1rc1.dist-info}/METADATA +2501 -2501
- {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1rc1.dist-info}/RECORD +193 -193
- datahub/_version.py +1 -1
- datahub/api/entities/common/serialized_value.py +2 -2
- datahub/api/entities/corpgroup/corpgroup.py +11 -6
- datahub/api/entities/corpuser/corpuser.py +11 -11
- datahub/api/entities/dataproduct/dataproduct.py +47 -27
- datahub/api/entities/dataset/dataset.py +32 -21
- datahub/api/entities/external/lake_formation_external_entites.py +5 -6
- datahub/api/entities/external/unity_catalog_external_entites.py +5 -7
- datahub/api/entities/forms/forms.py +16 -14
- datahub/api/entities/structuredproperties/structuredproperties.py +23 -16
- datahub/cli/check_cli.py +2 -2
- datahub/cli/config_utils.py +3 -3
- datahub/cli/lite_cli.py +9 -7
- datahub/cli/migrate.py +4 -4
- datahub/cli/quickstart_versioning.py +3 -3
- datahub/cli/specific/group_cli.py +1 -1
- datahub/cli/specific/structuredproperties_cli.py +1 -1
- datahub/cli/specific/user_cli.py +1 -1
- datahub/configuration/common.py +14 -2
- datahub/configuration/connection_resolver.py +2 -2
- datahub/configuration/git.py +47 -30
- datahub/configuration/import_resolver.py +2 -2
- datahub/configuration/kafka.py +4 -3
- datahub/configuration/time_window_config.py +26 -26
- datahub/configuration/validate_field_deprecation.py +2 -2
- datahub/configuration/validate_field_removal.py +2 -2
- datahub/configuration/validate_field_rename.py +2 -2
- datahub/configuration/validate_multiline_string.py +2 -1
- datahub/emitter/kafka_emitter.py +3 -1
- datahub/emitter/rest_emitter.py +2 -4
- datahub/ingestion/api/decorators.py +1 -1
- datahub/ingestion/api/report.py +1 -1
- datahub/ingestion/api/sink.py +1 -1
- datahub/ingestion/api/source.py +1 -1
- datahub/ingestion/glossary/datahub_classifier.py +11 -8
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +1 -1
- datahub/ingestion/reporting/file_reporter.py +5 -4
- datahub/ingestion/run/pipeline.py +6 -6
- datahub/ingestion/run/pipeline_config.py +12 -14
- datahub/ingestion/run/sink_callback.py +1 -1
- datahub/ingestion/sink/datahub_rest.py +6 -4
- datahub/ingestion/source/abs/config.py +19 -19
- datahub/ingestion/source/abs/datalake_profiler_config.py +11 -13
- datahub/ingestion/source/abs/source.py +2 -2
- datahub/ingestion/source/aws/aws_common.py +1 -1
- datahub/ingestion/source/aws/glue.py +6 -4
- datahub/ingestion/source/aws/sagemaker.py +1 -1
- datahub/ingestion/source/azure/azure_common.py +8 -12
- datahub/ingestion/source/bigquery_v2/bigquery.py +1 -1
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +43 -30
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +1 -1
- datahub/ingestion/source/cassandra/cassandra.py +1 -1
- datahub/ingestion/source/common/gcp_credentials_config.py +10 -10
- datahub/ingestion/source/data_lake_common/path_spec.py +85 -89
- datahub/ingestion/source/datahub/config.py +8 -8
- datahub/ingestion/source/datahub/datahub_source.py +1 -1
- datahub/ingestion/source/dbt/dbt_cloud.py +9 -3
- datahub/ingestion/source/dbt/dbt_common.py +39 -37
- datahub/ingestion/source/dbt/dbt_core.py +10 -12
- datahub/ingestion/source/debug/datahub_debug.py +1 -1
- datahub/ingestion/source/delta_lake/config.py +6 -4
- datahub/ingestion/source/dremio/dremio_config.py +10 -6
- datahub/ingestion/source/dynamodb/dynamodb.py +1 -1
- datahub/ingestion/source/elastic_search.py +4 -3
- datahub/ingestion/source/excel/source.py +1 -1
- datahub/ingestion/source/feast.py +1 -1
- datahub/ingestion/source/file.py +5 -4
- datahub/ingestion/source/fivetran/config.py +17 -16
- datahub/ingestion/source/fivetran/fivetran.py +2 -2
- datahub/ingestion/source/gc/datahub_gc.py +1 -1
- datahub/ingestion/source/gcs/gcs_source.py +8 -10
- datahub/ingestion/source/ge_profiling_config.py +8 -5
- datahub/ingestion/source/grafana/grafana_api.py +2 -2
- datahub/ingestion/source/grafana/grafana_config.py +4 -3
- datahub/ingestion/source/grafana/grafana_source.py +1 -1
- datahub/ingestion/source/grafana/models.py +23 -5
- datahub/ingestion/source/hex/api.py +7 -5
- datahub/ingestion/source/hex/hex.py +4 -3
- datahub/ingestion/source/iceberg/iceberg.py +1 -1
- datahub/ingestion/source/iceberg/iceberg_common.py +5 -3
- datahub/ingestion/source/identity/azure_ad.py +1 -1
- datahub/ingestion/source/identity/okta.py +10 -10
- datahub/ingestion/source/kafka/kafka.py +1 -1
- datahub/ingestion/source/ldap.py +1 -1
- datahub/ingestion/source/looker/looker_common.py +7 -5
- datahub/ingestion/source/looker/looker_config.py +21 -20
- datahub/ingestion/source/looker/lookml_config.py +47 -47
- datahub/ingestion/source/metabase.py +8 -8
- datahub/ingestion/source/metadata/business_glossary.py +2 -2
- datahub/ingestion/source/metadata/lineage.py +13 -8
- datahub/ingestion/source/mlflow.py +1 -1
- datahub/ingestion/source/mode.py +6 -4
- datahub/ingestion/source/mongodb.py +4 -3
- datahub/ingestion/source/neo4j/neo4j_source.py +1 -1
- datahub/ingestion/source/nifi.py +17 -23
- datahub/ingestion/source/openapi.py +6 -8
- datahub/ingestion/source/powerbi/config.py +33 -32
- datahub/ingestion/source/powerbi/dataplatform_instance_resolver.py +2 -2
- datahub/ingestion/source/powerbi/powerbi.py +1 -1
- datahub/ingestion/source/powerbi_report_server/report_server.py +2 -2
- datahub/ingestion/source/powerbi_report_server/report_server_domain.py +8 -6
- datahub/ingestion/source/preset.py +8 -8
- datahub/ingestion/source/pulsar.py +1 -1
- datahub/ingestion/source/qlik_sense/data_classes.py +15 -8
- datahub/ingestion/source/qlik_sense/qlik_api.py +7 -7
- datahub/ingestion/source/qlik_sense/qlik_sense.py +1 -1
- datahub/ingestion/source/redshift/config.py +18 -20
- datahub/ingestion/source/redshift/redshift.py +2 -2
- datahub/ingestion/source/redshift/usage.py +23 -3
- datahub/ingestion/source/s3/config.py +83 -62
- datahub/ingestion/source/s3/datalake_profiler_config.py +11 -13
- datahub/ingestion/source/s3/source.py +8 -5
- datahub/ingestion/source/sac/sac.py +5 -4
- datahub/ingestion/source/salesforce.py +3 -2
- datahub/ingestion/source/schema/json_schema.py +2 -2
- datahub/ingestion/source/sigma/data_classes.py +3 -2
- datahub/ingestion/source/sigma/sigma.py +1 -1
- datahub/ingestion/source/sigma/sigma_api.py +7 -7
- datahub/ingestion/source/slack/slack.py +1 -1
- datahub/ingestion/source/snaplogic/snaplogic.py +1 -1
- datahub/ingestion/source/snowflake/snowflake_assertion.py +1 -1
- datahub/ingestion/source/snowflake/snowflake_config.py +35 -31
- datahub/ingestion/source/snowflake/snowflake_connection.py +35 -13
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +3 -3
- datahub/ingestion/source/snowflake/snowflake_queries.py +1 -1
- datahub/ingestion/source/sql/athena.py +1 -1
- datahub/ingestion/source/sql/clickhouse.py +4 -2
- datahub/ingestion/source/sql/cockroachdb.py +1 -1
- datahub/ingestion/source/sql/druid.py +1 -1
- datahub/ingestion/source/sql/hana.py +1 -1
- datahub/ingestion/source/sql/hive.py +7 -5
- datahub/ingestion/source/sql/hive_metastore.py +1 -1
- datahub/ingestion/source/sql/mssql/source.py +13 -6
- datahub/ingestion/source/sql/mysql.py +1 -1
- datahub/ingestion/source/sql/oracle.py +17 -10
- datahub/ingestion/source/sql/postgres.py +2 -2
- datahub/ingestion/source/sql/presto.py +1 -1
- datahub/ingestion/source/sql/sql_config.py +8 -9
- datahub/ingestion/source/sql/sql_generic.py +1 -1
- datahub/ingestion/source/sql/teradata.py +1 -1
- datahub/ingestion/source/sql/trino.py +1 -1
- datahub/ingestion/source/sql/vertica.py +5 -4
- datahub/ingestion/source/sql_queries.py +11 -8
- datahub/ingestion/source/state/checkpoint.py +2 -2
- datahub/ingestion/source/state/entity_removal_state.py +2 -1
- datahub/ingestion/source/state/stateful_ingestion_base.py +55 -45
- datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +1 -1
- datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py +1 -1
- datahub/ingestion/source/superset.py +9 -9
- datahub/ingestion/source/tableau/tableau.py +14 -16
- datahub/ingestion/source/unity/config.py +33 -34
- datahub/ingestion/source/unity/proxy.py +203 -0
- datahub/ingestion/source/unity/proxy_types.py +91 -0
- datahub/ingestion/source/unity/source.py +27 -2
- datahub/ingestion/source/usage/clickhouse_usage.py +1 -1
- datahub/ingestion/source/usage/starburst_trino_usage.py +1 -1
- datahub/ingestion/source/usage/usage_common.py +5 -3
- datahub/ingestion/source_config/csv_enricher.py +7 -6
- datahub/ingestion/source_config/operation_config.py +7 -4
- datahub/ingestion/source_config/pulsar.py +11 -15
- datahub/ingestion/transformer/add_dataset_browse_path.py +1 -1
- datahub/ingestion/transformer/add_dataset_dataproduct.py +6 -5
- datahub/ingestion/transformer/add_dataset_ownership.py +3 -3
- datahub/ingestion/transformer/add_dataset_properties.py +2 -2
- datahub/ingestion/transformer/add_dataset_schema_tags.py +2 -2
- datahub/ingestion/transformer/add_dataset_schema_terms.py +2 -2
- datahub/ingestion/transformer/add_dataset_tags.py +3 -3
- datahub/ingestion/transformer/add_dataset_terms.py +3 -3
- datahub/ingestion/transformer/dataset_domain.py +3 -3
- datahub/ingestion/transformer/dataset_domain_based_on_tags.py +1 -1
- datahub/ingestion/transformer/extract_dataset_tags.py +1 -1
- datahub/ingestion/transformer/extract_ownership_from_tags.py +1 -1
- datahub/ingestion/transformer/mark_dataset_status.py +1 -1
- datahub/ingestion/transformer/pattern_cleanup_dataset_usage_user.py +1 -1
- datahub/ingestion/transformer/pattern_cleanup_ownership.py +1 -1
- datahub/ingestion/transformer/remove_dataset_ownership.py +1 -1
- datahub/ingestion/transformer/replace_external_url.py +2 -2
- datahub/ingestion/transformer/set_browse_path.py +1 -1
- datahub/ingestion/transformer/tags_to_terms.py +1 -1
- datahub/lite/duckdb_lite.py +1 -1
- datahub/lite/lite_util.py +2 -2
- datahub/sdk/search_filters.py +68 -40
- datahub/secret/datahub_secret_store.py +7 -4
- datahub/secret/file_secret_store.py +1 -1
- datahub/sql_parsing/sqlglot_lineage.py +5 -2
- datahub/testing/check_sql_parser_result.py +2 -2
- datahub/utilities/ingest_utils.py +1 -1
- {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1rc1.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1rc1.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1rc1.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1rc1.dist-info}/top_level.txt +0 -0
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
import logging
|
|
2
|
+
from copy import deepcopy
|
|
2
3
|
from dataclasses import dataclass, field as dataclass_field
|
|
3
4
|
from datetime import timedelta
|
|
4
5
|
from typing import Any, Dict, Literal, Optional, Union
|
|
5
6
|
|
|
6
7
|
import pydantic
|
|
7
|
-
from pydantic import
|
|
8
|
+
from pydantic import model_validator
|
|
8
9
|
from pydantic.fields import Field
|
|
9
10
|
|
|
10
11
|
from datahub.configuration.common import AllowDenyPattern
|
|
@@ -210,75 +211,74 @@ class LookMLSourceConfig(
|
|
|
210
211
|
"All if comments are evaluated to true for configured looker_environment value",
|
|
211
212
|
)
|
|
212
213
|
|
|
213
|
-
@
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
214
|
+
@model_validator(mode="before")
|
|
215
|
+
@classmethod
|
|
216
|
+
def convert_string_to_connection_def(cls, values: Dict[str, Any]) -> Dict[str, Any]:
|
|
217
|
+
values = deepcopy(values)
|
|
218
|
+
conn_map = values.get("connection_to_platform_map")
|
|
219
|
+
if conn_map:
|
|
220
|
+
# Previous version of config supported strings in connection map. This upconverts strings to ConnectionMap
|
|
221
|
+
for key in conn_map:
|
|
222
|
+
if isinstance(conn_map[key], str):
|
|
223
|
+
platform = conn_map[key]
|
|
224
|
+
if "." in platform:
|
|
225
|
+
platform_db_split = conn_map[key].split(".")
|
|
226
|
+
connection = LookerConnectionDefinition(
|
|
227
|
+
platform=platform_db_split[0],
|
|
228
|
+
default_db=platform_db_split[1],
|
|
229
|
+
default_schema="",
|
|
230
|
+
)
|
|
231
|
+
conn_map[key] = connection
|
|
232
|
+
else:
|
|
233
|
+
logger.warning(
|
|
234
|
+
f"Connection map for {key} provides platform {platform} but does not provide a default "
|
|
235
|
+
f"database name. This might result in failed resolution"
|
|
236
|
+
)
|
|
237
|
+
conn_map[key] = LookerConnectionDefinition(
|
|
238
|
+
platform=platform, default_db="", default_schema=""
|
|
239
|
+
)
|
|
240
|
+
return values
|
|
236
241
|
|
|
237
|
-
@
|
|
238
|
-
def check_either_connection_map_or_connection_provided(
|
|
242
|
+
@model_validator(mode="after")
|
|
243
|
+
def check_either_connection_map_or_connection_provided(self):
|
|
239
244
|
"""Validate that we must either have a connection map or an api credential"""
|
|
240
|
-
if not
|
|
241
|
-
"api", {}
|
|
242
|
-
):
|
|
245
|
+
if not (self.connection_to_platform_map or {}) and not (self.api):
|
|
243
246
|
raise ValueError(
|
|
244
247
|
"Neither api not connection_to_platform_map config was found. LookML source requires either api "
|
|
245
248
|
"credentials for Looker or a map of connection names to platform identifiers to work correctly"
|
|
246
249
|
)
|
|
247
|
-
return
|
|
250
|
+
return self
|
|
248
251
|
|
|
249
|
-
@
|
|
250
|
-
def check_either_project_name_or_api_provided(
|
|
252
|
+
@model_validator(mode="after")
|
|
253
|
+
def check_either_project_name_or_api_provided(self):
|
|
251
254
|
"""Validate that we must either have a project name or an api credential to fetch project names"""
|
|
252
|
-
if not
|
|
255
|
+
if not self.project_name and not self.api:
|
|
253
256
|
raise ValueError(
|
|
254
257
|
"Neither project_name not an API credential was found. LookML source requires either api credentials "
|
|
255
258
|
"for Looker or a project_name to accurately name views and models."
|
|
256
259
|
)
|
|
257
|
-
return
|
|
260
|
+
return self
|
|
258
261
|
|
|
259
|
-
@
|
|
260
|
-
def check_api_provided_for_view_lineage(
|
|
262
|
+
@model_validator(mode="after")
|
|
263
|
+
def check_api_provided_for_view_lineage(self):
|
|
261
264
|
"""Validate that we must have an api credential to use Looker API for view's column lineage"""
|
|
262
|
-
if not
|
|
265
|
+
if not self.api and self.use_api_for_view_lineage:
|
|
263
266
|
raise ValueError(
|
|
264
267
|
"API credential was not found. LookML source requires api credentials "
|
|
265
268
|
"for Looker to use Looker APIs for view's column lineage extraction."
|
|
266
269
|
"Set `use_api_for_view_lineage` to False to skip using Looker APIs."
|
|
267
270
|
)
|
|
268
|
-
return
|
|
271
|
+
return self
|
|
269
272
|
|
|
270
|
-
@
|
|
271
|
-
def check_base_folder_if_not_provided(
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
git_info: Optional[GitInfo] = values.get("git_info")
|
|
276
|
-
if git_info:
|
|
277
|
-
if not git_info.deploy_key:
|
|
273
|
+
@model_validator(mode="after")
|
|
274
|
+
def check_base_folder_if_not_provided(self):
|
|
275
|
+
if self.base_folder is None:
|
|
276
|
+
if self.git_info:
|
|
277
|
+
if not self.git_info.deploy_key:
|
|
278
278
|
logger.warning(
|
|
279
279
|
"git_info is provided, but no SSH key is present. If the repo is not public, we'll fail to "
|
|
280
280
|
"clone it."
|
|
281
281
|
)
|
|
282
282
|
else:
|
|
283
283
|
raise ValueError("Neither base_folder nor git_info has been provided.")
|
|
284
|
-
return
|
|
284
|
+
return self
|
|
@@ -9,7 +9,7 @@ from typing import Dict, Iterable, List, Optional, Tuple, Union
|
|
|
9
9
|
import dateutil.parser as dp
|
|
10
10
|
import pydantic
|
|
11
11
|
import requests
|
|
12
|
-
from pydantic import Field,
|
|
12
|
+
from pydantic import Field, field_validator, model_validator
|
|
13
13
|
from requests.models import HTTPError
|
|
14
14
|
|
|
15
15
|
import datahub.emitter.mce_builder as builder
|
|
@@ -115,16 +115,16 @@ class MetabaseConfig(
|
|
|
115
115
|
)
|
|
116
116
|
stateful_ingestion: Optional[StatefulStaleMetadataRemovalConfig] = None
|
|
117
117
|
|
|
118
|
-
@
|
|
118
|
+
@field_validator("connect_uri", "display_uri", mode="after")
|
|
119
|
+
@classmethod
|
|
119
120
|
def remove_trailing_slash(cls, v):
|
|
120
121
|
return config_clean.remove_trailing_slashes(v)
|
|
121
122
|
|
|
122
|
-
@
|
|
123
|
-
def default_display_uri_to_connect_uri(
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
return values
|
|
123
|
+
@model_validator(mode="after")
|
|
124
|
+
def default_display_uri_to_connect_uri(self) -> "MetabaseConfig":
|
|
125
|
+
if self.display_uri is None:
|
|
126
|
+
self.display_uri = self.connect_uri
|
|
127
|
+
return self
|
|
128
128
|
|
|
129
129
|
|
|
130
130
|
@dataclass
|
|
@@ -563,7 +563,7 @@ class BusinessGlossaryFileSource(Source):
|
|
|
563
563
|
|
|
564
564
|
@classmethod
|
|
565
565
|
def create(cls, config_dict, ctx):
|
|
566
|
-
config = BusinessGlossarySourceConfig.
|
|
566
|
+
config = BusinessGlossarySourceConfig.model_validate(config_dict)
|
|
567
567
|
return cls(ctx, config)
|
|
568
568
|
|
|
569
569
|
@classmethod
|
|
@@ -571,7 +571,7 @@ class BusinessGlossaryFileSource(Source):
|
|
|
571
571
|
cls, file_name: Union[str, pathlib.Path]
|
|
572
572
|
) -> BusinessGlossaryConfig:
|
|
573
573
|
config = load_config_file(file_name, resolve_env_vars=True)
|
|
574
|
-
glossary_cfg = BusinessGlossaryConfig.
|
|
574
|
+
glossary_cfg = BusinessGlossaryConfig.model_validate(config)
|
|
575
575
|
return glossary_cfg
|
|
576
576
|
|
|
577
577
|
def get_workunits_internal(
|
|
@@ -3,7 +3,7 @@ from dataclasses import dataclass, field
|
|
|
3
3
|
from functools import partial
|
|
4
4
|
from typing import Any, Dict, Iterable, List, Optional
|
|
5
5
|
|
|
6
|
-
from pydantic import
|
|
6
|
+
from pydantic import field_validator
|
|
7
7
|
from pydantic.fields import Field
|
|
8
8
|
|
|
9
9
|
import datahub.metadata.schema_classes as models
|
|
@@ -51,7 +51,8 @@ class EntityConfig(EnvConfigMixin):
|
|
|
51
51
|
platform: str
|
|
52
52
|
platform_instance: Optional[str] = None
|
|
53
53
|
|
|
54
|
-
@
|
|
54
|
+
@field_validator("type", mode="after")
|
|
55
|
+
@classmethod
|
|
55
56
|
def type_must_be_supported(cls, v: str) -> str:
|
|
56
57
|
allowed_types = ["dataset"]
|
|
57
58
|
if v not in allowed_types:
|
|
@@ -60,7 +61,8 @@ class EntityConfig(EnvConfigMixin):
|
|
|
60
61
|
)
|
|
61
62
|
return v
|
|
62
63
|
|
|
63
|
-
@
|
|
64
|
+
@field_validator("name", mode="after")
|
|
65
|
+
@classmethod
|
|
64
66
|
def validate_name(cls, v: str) -> str:
|
|
65
67
|
if v.startswith("urn:li:"):
|
|
66
68
|
raise ValueError(
|
|
@@ -77,7 +79,8 @@ class FineGrainedLineageConfig(ConfigModel):
|
|
|
77
79
|
transformOperation: Optional[str]
|
|
78
80
|
confidenceScore: Optional[float] = 1.0
|
|
79
81
|
|
|
80
|
-
@
|
|
82
|
+
@field_validator("upstreamType", mode="after")
|
|
83
|
+
@classmethod
|
|
81
84
|
def upstream_type_must_be_supported(cls, v: str) -> str:
|
|
82
85
|
allowed_types = [
|
|
83
86
|
FineGrainedLineageUpstreamTypeClass.FIELD_SET,
|
|
@@ -90,7 +93,8 @@ class FineGrainedLineageConfig(ConfigModel):
|
|
|
90
93
|
)
|
|
91
94
|
return v
|
|
92
95
|
|
|
93
|
-
@
|
|
96
|
+
@field_validator("downstreamType", mode="after")
|
|
97
|
+
@classmethod
|
|
94
98
|
def downstream_type_must_be_supported(cls, v: str) -> str:
|
|
95
99
|
allowed_types = [
|
|
96
100
|
FineGrainedLineageDownstreamTypeClass.FIELD_SET,
|
|
@@ -124,7 +128,8 @@ class LineageFileSourceConfig(ConfigModel):
|
|
|
124
128
|
class LineageConfig(VersionedConfig):
|
|
125
129
|
lineage: List[EntityNodeConfig]
|
|
126
130
|
|
|
127
|
-
@
|
|
131
|
+
@field_validator("version", mode="after")
|
|
132
|
+
@classmethod
|
|
128
133
|
def version_must_be_1(cls, v):
|
|
129
134
|
if v != "1":
|
|
130
135
|
raise ValueError("Only version 1 is supported")
|
|
@@ -148,13 +153,13 @@ class LineageFileSource(Source):
|
|
|
148
153
|
def create(
|
|
149
154
|
cls, config_dict: Dict[str, Any], ctx: PipelineContext
|
|
150
155
|
) -> "LineageFileSource":
|
|
151
|
-
config = LineageFileSourceConfig.
|
|
156
|
+
config = LineageFileSourceConfig.model_validate(config_dict)
|
|
152
157
|
return cls(ctx, config)
|
|
153
158
|
|
|
154
159
|
@staticmethod
|
|
155
160
|
def load_lineage_config(file_name: str) -> LineageConfig:
|
|
156
161
|
config = load_config_file(file_name, resolve_env_vars=True)
|
|
157
|
-
lineage_config = LineageConfig.
|
|
162
|
+
lineage_config = LineageConfig.model_validate(config)
|
|
158
163
|
return lineage_config
|
|
159
164
|
|
|
160
165
|
def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
|
|
@@ -892,5 +892,5 @@ class MLflowSource(StatefulIngestionSourceBase):
|
|
|
892
892
|
|
|
893
893
|
@classmethod
|
|
894
894
|
def create(cls, config_dict: dict, ctx: PipelineContext) -> "MLflowSource":
|
|
895
|
-
config = MLflowConfig.
|
|
895
|
+
config = MLflowConfig.model_validate(config_dict)
|
|
896
896
|
return cls(ctx, config)
|
datahub/ingestion/source/mode.py
CHANGED
|
@@ -26,7 +26,7 @@ import sqlglot
|
|
|
26
26
|
import tenacity
|
|
27
27
|
import yaml
|
|
28
28
|
from liquid import Template, Undefined
|
|
29
|
-
from pydantic import Field,
|
|
29
|
+
from pydantic import Field, field_validator
|
|
30
30
|
from requests.adapters import HTTPAdapter, Retry
|
|
31
31
|
from requests.exceptions import ConnectionError
|
|
32
32
|
from requests.models import HTTPBasicAuth, HTTPError
|
|
@@ -218,11 +218,13 @@ class ModeConfig(
|
|
|
218
218
|
default=False, description="Exclude archived reports"
|
|
219
219
|
)
|
|
220
220
|
|
|
221
|
-
@
|
|
221
|
+
@field_validator("connect_uri", mode="after")
|
|
222
|
+
@classmethod
|
|
222
223
|
def remove_trailing_slash(cls, v):
|
|
223
224
|
return config_clean.remove_trailing_slashes(v)
|
|
224
225
|
|
|
225
|
-
@
|
|
226
|
+
@field_validator("items_per_page", mode="after")
|
|
227
|
+
@classmethod
|
|
226
228
|
def validate_items_per_page(cls, v):
|
|
227
229
|
if 1 <= v <= DEFAULT_API_ITEMS_PER_PAGE:
|
|
228
230
|
return v
|
|
@@ -1824,7 +1826,7 @@ class ModeSource(StatefulIngestionSourceBase):
|
|
|
1824
1826
|
|
|
1825
1827
|
@classmethod
|
|
1826
1828
|
def create(cls, config_dict: dict, ctx: PipelineContext) -> "ModeSource":
|
|
1827
|
-
config: ModeConfig = ModeConfig.
|
|
1829
|
+
config: ModeConfig = ModeConfig.model_validate(config_dict)
|
|
1828
1830
|
return cls(ctx, config)
|
|
1829
1831
|
|
|
1830
1832
|
def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
|
|
@@ -6,7 +6,7 @@ from typing import Dict, Iterable, List, Optional, Tuple, Type, Union, ValuesVie
|
|
|
6
6
|
import bson.timestamp
|
|
7
7
|
import pymongo.collection
|
|
8
8
|
from packaging import version
|
|
9
|
-
from pydantic import PositiveInt,
|
|
9
|
+
from pydantic import PositiveInt, field_validator
|
|
10
10
|
from pydantic.fields import Field
|
|
11
11
|
from pymongo.mongo_client import MongoClient
|
|
12
12
|
|
|
@@ -138,7 +138,8 @@ class MongoDBConfig(
|
|
|
138
138
|
# Custom Stateful Ingestion settings
|
|
139
139
|
stateful_ingestion: Optional[StatefulStaleMetadataRemovalConfig] = None
|
|
140
140
|
|
|
141
|
-
@
|
|
141
|
+
@field_validator("maxDocumentSize", mode="after")
|
|
142
|
+
@classmethod
|
|
142
143
|
def check_max_doc_size_filter_is_valid(cls, doc_size_filter_value):
|
|
143
144
|
if doc_size_filter_value > 16793600:
|
|
144
145
|
raise ValueError("maxDocumentSize must be a positive value <= 16793600.")
|
|
@@ -311,7 +312,7 @@ class MongoDBSource(StatefulIngestionSourceBase):
|
|
|
311
312
|
|
|
312
313
|
@classmethod
|
|
313
314
|
def create(cls, config_dict: dict, ctx: PipelineContext) -> "MongoDBSource":
|
|
314
|
-
config = MongoDBConfig.
|
|
315
|
+
config = MongoDBConfig.model_validate(config_dict)
|
|
315
316
|
return cls(ctx, config)
|
|
316
317
|
|
|
317
318
|
def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
|
|
@@ -78,7 +78,7 @@ class Neo4jSource(StatefulIngestionSourceBase):
|
|
|
78
78
|
|
|
79
79
|
@classmethod
|
|
80
80
|
def create(cls, config_dict: Dict, ctx: PipelineContext) -> "Neo4jSource":
|
|
81
|
-
config = Neo4jConfig.
|
|
81
|
+
config = Neo4jConfig.model_validate(config_dict)
|
|
82
82
|
return cls(config, ctx)
|
|
83
83
|
|
|
84
84
|
def create_schema_field_tuple(
|
datahub/ingestion/source/nifi.py
CHANGED
|
@@ -13,7 +13,7 @@ import requests
|
|
|
13
13
|
from cached_property import cached_property
|
|
14
14
|
from dateutil import parser
|
|
15
15
|
from packaging import version
|
|
16
|
-
from pydantic import
|
|
16
|
+
from pydantic import field_validator, model_validator
|
|
17
17
|
from pydantic.fields import Field
|
|
18
18
|
from requests import Response
|
|
19
19
|
from requests.adapters import HTTPAdapter
|
|
@@ -165,39 +165,33 @@ class NifiSourceConfig(StatefulIngestionConfigBase, EnvConfigMixin):
|
|
|
165
165
|
" When disabled, re-states lineage on each run.",
|
|
166
166
|
)
|
|
167
167
|
|
|
168
|
-
@
|
|
169
|
-
def validate_auth_params(
|
|
170
|
-
if
|
|
171
|
-
"client_cert_file"
|
|
172
|
-
):
|
|
168
|
+
@model_validator(mode="after")
|
|
169
|
+
def validate_auth_params(self) -> "NifiSourceConfig":
|
|
170
|
+
if self.auth is NifiAuthType.CLIENT_CERT and not self.client_cert_file:
|
|
173
171
|
raise ValueError(
|
|
174
172
|
"Config `client_cert_file` is required for CLIENT_CERT auth"
|
|
175
173
|
)
|
|
176
|
-
elif
|
|
174
|
+
elif self.auth in (
|
|
177
175
|
NifiAuthType.SINGLE_USER,
|
|
178
176
|
NifiAuthType.BASIC_AUTH,
|
|
179
|
-
) and (not
|
|
177
|
+
) and (not self.username or not self.password):
|
|
180
178
|
raise ValueError(
|
|
181
|
-
f"Config `username` and `password` is required for {
|
|
179
|
+
f"Config `username` and `password` is required for {self.auth.value} auth"
|
|
182
180
|
)
|
|
183
|
-
return
|
|
184
|
-
|
|
185
|
-
@root_validator(skip_on_failure=True)
|
|
186
|
-
def validator_site_url_to_site_name(cls, values):
|
|
187
|
-
site_url_to_site_name = values.get("site_url_to_site_name")
|
|
188
|
-
site_url = values.get("site_url")
|
|
189
|
-
site_name = values.get("site_name")
|
|
181
|
+
return self
|
|
190
182
|
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
183
|
+
@model_validator(mode="after")
|
|
184
|
+
def validator_site_url_to_site_name(self) -> "NifiSourceConfig":
|
|
185
|
+
if self.site_url_to_site_name is None:
|
|
186
|
+
self.site_url_to_site_name = {}
|
|
194
187
|
|
|
195
|
-
if site_url not in site_url_to_site_name:
|
|
196
|
-
site_url_to_site_name[site_url] = site_name
|
|
188
|
+
if self.site_url not in self.site_url_to_site_name:
|
|
189
|
+
self.site_url_to_site_name[self.site_url] = self.site_name
|
|
197
190
|
|
|
198
|
-
return
|
|
191
|
+
return self
|
|
199
192
|
|
|
200
|
-
@
|
|
193
|
+
@field_validator("site_url", mode="after")
|
|
194
|
+
@classmethod
|
|
201
195
|
def validator_site_url(cls, site_url: str) -> str:
|
|
202
196
|
assert site_url.startswith(("http://", "https://")), (
|
|
203
197
|
"site_url must start with http:// or https://"
|
|
@@ -4,7 +4,7 @@ import warnings
|
|
|
4
4
|
from abc import ABC
|
|
5
5
|
from typing import Dict, Iterable, List, Optional, Tuple
|
|
6
6
|
|
|
7
|
-
from pydantic import
|
|
7
|
+
from pydantic import model_validator
|
|
8
8
|
from pydantic.fields import Field
|
|
9
9
|
|
|
10
10
|
from datahub.configuration.common import ConfigModel
|
|
@@ -86,13 +86,11 @@ class OpenApiConfig(ConfigModel):
|
|
|
86
86
|
default=True, description="Enable SSL certificate verification"
|
|
87
87
|
)
|
|
88
88
|
|
|
89
|
-
@
|
|
90
|
-
def ensure_only_one_token(
|
|
91
|
-
|
|
92
|
-
) -> Optional[str]:
|
|
93
|
-
if bearer_token is not None and values.get("token") is not None:
|
|
89
|
+
@model_validator(mode="after")
|
|
90
|
+
def ensure_only_one_token(self) -> "OpenApiConfig":
|
|
91
|
+
if self.bearer_token is not None and self.token is not None:
|
|
94
92
|
raise ValueError("Unable to use 'token' and 'bearer_token' together.")
|
|
95
|
-
return
|
|
93
|
+
return self
|
|
96
94
|
|
|
97
95
|
def get_swagger(self) -> Dict:
|
|
98
96
|
if self.get_token or self.token or self.bearer_token is not None:
|
|
@@ -463,5 +461,5 @@ class OpenApiSource(APISource):
|
|
|
463
461
|
|
|
464
462
|
@classmethod
|
|
465
463
|
def create(cls, config_dict, ctx):
|
|
466
|
-
config = OpenApiConfig.
|
|
464
|
+
config = OpenApiConfig.model_validate(config_dict)
|
|
467
465
|
return cls(config, ctx)
|
|
@@ -4,7 +4,7 @@ from enum import Enum
|
|
|
4
4
|
from typing import Dict, List, Literal, Optional, Union
|
|
5
5
|
|
|
6
6
|
import pydantic
|
|
7
|
-
from pydantic import
|
|
7
|
+
from pydantic import field_validator, model_validator
|
|
8
8
|
|
|
9
9
|
import datahub.emitter.mce_builder as builder
|
|
10
10
|
from datahub.configuration.common import AllowDenyPattern, ConfigModel, HiddenFromDocs
|
|
@@ -540,8 +540,8 @@ class PowerBiDashboardSourceConfig(
|
|
|
540
540
|
description="timeout in seconds for Metadata Rest Api.",
|
|
541
541
|
)
|
|
542
542
|
|
|
543
|
-
@
|
|
544
|
-
def validate_extract_column_level_lineage(
|
|
543
|
+
@model_validator(mode="after")
|
|
544
|
+
def validate_extract_column_level_lineage(self) -> "PowerBiDashboardSourceConfig":
|
|
545
545
|
flags = [
|
|
546
546
|
"native_query_parsing",
|
|
547
547
|
"enable_advance_lineage_sql_construct",
|
|
@@ -549,26 +549,23 @@ class PowerBiDashboardSourceConfig(
|
|
|
549
549
|
"extract_dataset_schema",
|
|
550
550
|
]
|
|
551
551
|
|
|
552
|
-
if
|
|
553
|
-
"extract_column_level_lineage" in values
|
|
554
|
-
and values["extract_column_level_lineage"] is False
|
|
555
|
-
):
|
|
552
|
+
if self.extract_column_level_lineage is False:
|
|
556
553
|
# Flag is not set. skip validation
|
|
557
|
-
return
|
|
554
|
+
return self
|
|
558
555
|
|
|
559
556
|
logger.debug(f"Validating additional flags: {flags}")
|
|
560
557
|
|
|
561
558
|
is_flag_enabled: bool = True
|
|
562
559
|
for flag in flags:
|
|
563
|
-
if
|
|
560
|
+
if not getattr(self, flag, True):
|
|
564
561
|
is_flag_enabled = False
|
|
565
562
|
|
|
566
563
|
if not is_flag_enabled:
|
|
567
564
|
raise ValueError(f"Enable all these flags in recipe: {flags} ")
|
|
568
565
|
|
|
569
|
-
return
|
|
566
|
+
return self
|
|
570
567
|
|
|
571
|
-
@
|
|
568
|
+
@field_validator("dataset_type_mapping", mode="after")
|
|
572
569
|
@classmethod
|
|
573
570
|
def map_data_platform(cls, value):
|
|
574
571
|
# For backward compatibility convert input PostgreSql to PostgreSQL
|
|
@@ -580,28 +577,32 @@ class PowerBiDashboardSourceConfig(
|
|
|
580
577
|
|
|
581
578
|
return value
|
|
582
579
|
|
|
583
|
-
@
|
|
584
|
-
def workspace_id_backward_compatibility(
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
580
|
+
@model_validator(mode="after")
|
|
581
|
+
def workspace_id_backward_compatibility(self) -> "PowerBiDashboardSourceConfig":
|
|
582
|
+
if (
|
|
583
|
+
self.workspace_id_pattern == AllowDenyPattern.allow_all()
|
|
584
|
+
and self.workspace_id
|
|
585
|
+
):
|
|
589
586
|
logger.warning(
|
|
590
587
|
"workspace_id_pattern is not set but workspace_id is set, setting workspace_id as "
|
|
591
588
|
"workspace_id_pattern. workspace_id will be deprecated, please use workspace_id_pattern instead."
|
|
592
589
|
)
|
|
593
|
-
|
|
594
|
-
allow=[f"^{workspace_id}$"]
|
|
590
|
+
self.workspace_id_pattern = AllowDenyPattern(
|
|
591
|
+
allow=[f"^{self.workspace_id}$"]
|
|
595
592
|
)
|
|
596
|
-
elif
|
|
593
|
+
elif (
|
|
594
|
+
self.workspace_id_pattern != AllowDenyPattern.allow_all()
|
|
595
|
+
and self.workspace_id
|
|
596
|
+
):
|
|
597
597
|
logger.warning(
|
|
598
598
|
"workspace_id will be ignored in favour of workspace_id_pattern. workspace_id will be deprecated, "
|
|
599
599
|
"please use workspace_id_pattern only."
|
|
600
600
|
)
|
|
601
|
-
|
|
602
|
-
return
|
|
601
|
+
self.workspace_id = None
|
|
602
|
+
return self
|
|
603
603
|
|
|
604
|
-
@
|
|
604
|
+
@model_validator(mode="before")
|
|
605
|
+
@classmethod
|
|
605
606
|
def raise_error_for_dataset_type_mapping(cls, values: Dict) -> Dict:
|
|
606
607
|
if (
|
|
607
608
|
values.get("dataset_type_mapping") is not None
|
|
@@ -613,18 +614,18 @@ class PowerBiDashboardSourceConfig(
|
|
|
613
614
|
|
|
614
615
|
return values
|
|
615
616
|
|
|
616
|
-
@
|
|
617
|
-
def validate_extract_dataset_schema(
|
|
618
|
-
if
|
|
617
|
+
@model_validator(mode="after")
|
|
618
|
+
def validate_extract_dataset_schema(self) -> "PowerBiDashboardSourceConfig":
|
|
619
|
+
if self.extract_dataset_schema is False:
|
|
619
620
|
add_global_warning(
|
|
620
621
|
"Please use `extract_dataset_schema: true`, otherwise dataset schema extraction will be skipped."
|
|
621
622
|
)
|
|
622
|
-
return
|
|
623
|
+
return self
|
|
623
624
|
|
|
624
|
-
@
|
|
625
|
-
def validate_dsn_to_database_schema(
|
|
626
|
-
if
|
|
627
|
-
dsn_mapping =
|
|
625
|
+
@model_validator(mode="after")
|
|
626
|
+
def validate_dsn_to_database_schema(self) -> "PowerBiDashboardSourceConfig":
|
|
627
|
+
if self.dsn_to_database_schema is not None:
|
|
628
|
+
dsn_mapping = self.dsn_to_database_schema
|
|
628
629
|
if not isinstance(dsn_mapping, dict):
|
|
629
630
|
raise ValueError("dsn_to_database_schema must contain key-value pairs")
|
|
630
631
|
|
|
@@ -639,4 +640,4 @@ class PowerBiDashboardSourceConfig(
|
|
|
639
640
|
f"dsn_to_database_schema invalid mapping value: {value}"
|
|
640
641
|
)
|
|
641
642
|
|
|
642
|
-
return
|
|
643
|
+
return self
|
|
@@ -41,7 +41,7 @@ class ResolvePlatformInstanceFromDatasetTypeMapping(
|
|
|
41
41
|
if isinstance(platform, PlatformDetail):
|
|
42
42
|
return platform
|
|
43
43
|
|
|
44
|
-
return PlatformDetail.
|
|
44
|
+
return PlatformDetail.model_validate({})
|
|
45
45
|
|
|
46
46
|
|
|
47
47
|
class ResolvePlatformInstanceFromServerToPlatformInstance(
|
|
@@ -56,7 +56,7 @@ class ResolvePlatformInstanceFromServerToPlatformInstance(
|
|
|
56
56
|
]
|
|
57
57
|
if data_platform_detail.data_platform_server
|
|
58
58
|
in self.config.server_to_platform_instance
|
|
59
|
-
else PlatformDetail.
|
|
59
|
+
else PlatformDetail.model_validate({})
|
|
60
60
|
)
|
|
61
61
|
|
|
62
62
|
|
|
@@ -1316,7 +1316,7 @@ class PowerBiDashboardSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
1316
1316
|
|
|
1317
1317
|
@classmethod
|
|
1318
1318
|
def create(cls, config_dict, ctx):
|
|
1319
|
-
config = PowerBiDashboardSourceConfig.
|
|
1319
|
+
config = PowerBiDashboardSourceConfig.model_validate(config_dict)
|
|
1320
1320
|
return cls(config, ctx)
|
|
1321
1321
|
|
|
1322
1322
|
def get_allowed_workspaces(self) -> List[powerbi_data_classes.Workspace]:
|
|
@@ -213,7 +213,7 @@ class PowerBiReportServerAPI:
|
|
|
213
213
|
|
|
214
214
|
if response_dict.get("value"):
|
|
215
215
|
reports.extend(
|
|
216
|
-
report_types_mapping[report_type].
|
|
216
|
+
report_types_mapping[report_type].model_validate(report)
|
|
217
217
|
for report in response_dict.get("value")
|
|
218
218
|
)
|
|
219
219
|
|
|
@@ -517,7 +517,7 @@ class PowerBiReportServerDashboardSource(StatefulIngestionSourceBase):
|
|
|
517
517
|
|
|
518
518
|
@classmethod
|
|
519
519
|
def create(cls, config_dict, ctx):
|
|
520
|
-
config = PowerBiReportServerDashboardSourceConfig.
|
|
520
|
+
config = PowerBiReportServerDashboardSourceConfig.model_validate(config_dict)
|
|
521
521
|
return cls(config, ctx)
|
|
522
522
|
|
|
523
523
|
def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from datetime import datetime
|
|
2
2
|
from typing import Any, Dict, List, Optional
|
|
3
3
|
|
|
4
|
-
from pydantic import BaseModel, Field,
|
|
4
|
+
from pydantic import BaseModel, Field, model_validator
|
|
5
5
|
|
|
6
6
|
from datahub.ingestion.source.powerbi_report_server.constants import (
|
|
7
7
|
RelationshipDirection,
|
|
@@ -30,11 +30,13 @@ class CatalogItem(BaseModel):
|
|
|
30
30
|
has_data_sources: bool = Field(False, alias="HasDataSources")
|
|
31
31
|
data_sources: Optional[List["DataSource"]] = Field(None, alias="DataSources")
|
|
32
32
|
|
|
33
|
-
@
|
|
34
|
-
def validate_diplay_name(
|
|
35
|
-
if
|
|
36
|
-
|
|
37
|
-
|
|
33
|
+
@model_validator(mode="after")
|
|
34
|
+
def validate_diplay_name(self):
|
|
35
|
+
if self.created_by:
|
|
36
|
+
self.display_name = self.created_by.split("\\")[-1]
|
|
37
|
+
else:
|
|
38
|
+
self.display_name = ""
|
|
39
|
+
return self
|
|
38
40
|
|
|
39
41
|
def get_urn_part(self):
|
|
40
42
|
return f"reports.{self.id}"
|