acryl-datahub 1.3.1__py3-none-any.whl → 1.3.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1.dist-info}/METADATA +2582 -2582
- {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1.dist-info}/RECORD +203 -201
- datahub/_version.py +1 -1
- datahub/api/entities/common/serialized_value.py +2 -2
- datahub/api/entities/corpgroup/corpgroup.py +11 -6
- datahub/api/entities/corpuser/corpuser.py +11 -11
- datahub/api/entities/dataproduct/dataproduct.py +47 -27
- datahub/api/entities/dataset/dataset.py +32 -21
- datahub/api/entities/external/lake_formation_external_entites.py +5 -6
- datahub/api/entities/external/unity_catalog_external_entites.py +5 -7
- datahub/api/entities/forms/forms.py +16 -14
- datahub/api/entities/structuredproperties/structuredproperties.py +23 -16
- datahub/cli/check_cli.py +2 -2
- datahub/cli/config_utils.py +3 -3
- datahub/cli/lite_cli.py +9 -7
- datahub/cli/migrate.py +4 -4
- datahub/cli/quickstart_versioning.py +3 -3
- datahub/cli/specific/group_cli.py +1 -1
- datahub/cli/specific/structuredproperties_cli.py +1 -1
- datahub/cli/specific/user_cli.py +1 -1
- datahub/configuration/common.py +14 -2
- datahub/configuration/connection_resolver.py +2 -2
- datahub/configuration/git.py +47 -30
- datahub/configuration/import_resolver.py +2 -2
- datahub/configuration/kafka.py +4 -3
- datahub/configuration/time_window_config.py +26 -26
- datahub/configuration/validate_field_deprecation.py +2 -2
- datahub/configuration/validate_field_removal.py +2 -2
- datahub/configuration/validate_field_rename.py +2 -2
- datahub/configuration/validate_multiline_string.py +2 -1
- datahub/emitter/kafka_emitter.py +3 -1
- datahub/emitter/rest_emitter.py +2 -4
- datahub/ingestion/api/decorators.py +1 -1
- datahub/ingestion/api/report.py +1 -1
- datahub/ingestion/api/sink.py +1 -1
- datahub/ingestion/api/source.py +1 -1
- datahub/ingestion/glossary/datahub_classifier.py +11 -8
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +1 -1
- datahub/ingestion/reporting/file_reporter.py +5 -4
- datahub/ingestion/run/pipeline.py +6 -6
- datahub/ingestion/run/pipeline_config.py +12 -14
- datahub/ingestion/run/sink_callback.py +1 -1
- datahub/ingestion/sink/datahub_rest.py +6 -4
- datahub/ingestion/source/abs/config.py +19 -19
- datahub/ingestion/source/abs/datalake_profiler_config.py +11 -13
- datahub/ingestion/source/abs/source.py +2 -2
- datahub/ingestion/source/aws/aws_common.py +1 -1
- datahub/ingestion/source/aws/glue.py +6 -4
- datahub/ingestion/source/aws/sagemaker.py +1 -1
- datahub/ingestion/source/azure/azure_common.py +8 -12
- datahub/ingestion/source/bigquery_v2/bigquery.py +1 -1
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +43 -30
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +1 -1
- datahub/ingestion/source/cassandra/cassandra.py +1 -1
- datahub/ingestion/source/common/gcp_credentials_config.py +10 -10
- datahub/ingestion/source/data_lake_common/path_spec.py +85 -89
- datahub/ingestion/source/datahub/config.py +8 -8
- datahub/ingestion/source/datahub/datahub_source.py +1 -1
- datahub/ingestion/source/dbt/dbt_cloud.py +9 -3
- datahub/ingestion/source/dbt/dbt_common.py +39 -37
- datahub/ingestion/source/dbt/dbt_core.py +10 -12
- datahub/ingestion/source/debug/datahub_debug.py +1 -1
- datahub/ingestion/source/delta_lake/config.py +6 -4
- datahub/ingestion/source/dremio/dremio_config.py +10 -6
- datahub/ingestion/source/dremio/dremio_source.py +15 -15
- datahub/ingestion/source/dynamodb/dynamodb.py +1 -1
- datahub/ingestion/source/elastic_search.py +4 -3
- datahub/ingestion/source/excel/source.py +1 -1
- datahub/ingestion/source/feast.py +1 -1
- datahub/ingestion/source/file.py +5 -4
- datahub/ingestion/source/fivetran/config.py +17 -16
- datahub/ingestion/source/fivetran/fivetran.py +2 -2
- datahub/ingestion/source/gc/datahub_gc.py +1 -1
- datahub/ingestion/source/gcs/gcs_source.py +8 -10
- datahub/ingestion/source/ge_profiling_config.py +8 -5
- datahub/ingestion/source/grafana/grafana_api.py +2 -2
- datahub/ingestion/source/grafana/grafana_config.py +4 -3
- datahub/ingestion/source/grafana/grafana_source.py +1 -1
- datahub/ingestion/source/grafana/models.py +23 -5
- datahub/ingestion/source/hex/api.py +7 -5
- datahub/ingestion/source/hex/hex.py +4 -3
- datahub/ingestion/source/iceberg/iceberg.py +1 -1
- datahub/ingestion/source/iceberg/iceberg_common.py +5 -3
- datahub/ingestion/source/identity/azure_ad.py +1 -1
- datahub/ingestion/source/identity/okta.py +10 -10
- datahub/ingestion/source/kafka/kafka.py +1 -1
- datahub/ingestion/source/ldap.py +1 -1
- datahub/ingestion/source/looker/looker_common.py +7 -5
- datahub/ingestion/source/looker/looker_config.py +21 -20
- datahub/ingestion/source/looker/lookml_config.py +47 -47
- datahub/ingestion/source/metabase.py +8 -8
- datahub/ingestion/source/metadata/business_glossary.py +2 -2
- datahub/ingestion/source/metadata/lineage.py +13 -8
- datahub/ingestion/source/mlflow.py +1 -1
- datahub/ingestion/source/mode.py +6 -4
- datahub/ingestion/source/mongodb.py +4 -3
- datahub/ingestion/source/neo4j/neo4j_source.py +1 -1
- datahub/ingestion/source/nifi.py +17 -23
- datahub/ingestion/source/openapi.py +6 -8
- datahub/ingestion/source/powerbi/config.py +33 -32
- datahub/ingestion/source/powerbi/dataplatform_instance_resolver.py +2 -2
- datahub/ingestion/source/powerbi/powerbi.py +1 -1
- datahub/ingestion/source/powerbi_report_server/report_server.py +2 -2
- datahub/ingestion/source/powerbi_report_server/report_server_domain.py +8 -6
- datahub/ingestion/source/preset.py +8 -8
- datahub/ingestion/source/pulsar.py +1 -1
- datahub/ingestion/source/qlik_sense/data_classes.py +15 -8
- datahub/ingestion/source/qlik_sense/qlik_api.py +7 -7
- datahub/ingestion/source/qlik_sense/qlik_sense.py +1 -1
- datahub/ingestion/source/redshift/config.py +18 -20
- datahub/ingestion/source/redshift/redshift.py +2 -2
- datahub/ingestion/source/redshift/usage.py +23 -3
- datahub/ingestion/source/s3/config.py +83 -62
- datahub/ingestion/source/s3/datalake_profiler_config.py +11 -13
- datahub/ingestion/source/s3/source.py +8 -5
- datahub/ingestion/source/sac/sac.py +5 -4
- datahub/ingestion/source/salesforce.py +3 -2
- datahub/ingestion/source/schema/json_schema.py +2 -2
- datahub/ingestion/source/sigma/data_classes.py +3 -2
- datahub/ingestion/source/sigma/sigma.py +1 -1
- datahub/ingestion/source/sigma/sigma_api.py +7 -7
- datahub/ingestion/source/slack/slack.py +1 -1
- datahub/ingestion/source/snaplogic/snaplogic.py +1 -1
- datahub/ingestion/source/snowflake/snowflake_assertion.py +1 -1
- datahub/ingestion/source/snowflake/snowflake_config.py +35 -31
- datahub/ingestion/source/snowflake/snowflake_connection.py +35 -13
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +3 -3
- datahub/ingestion/source/snowflake/snowflake_queries.py +1 -1
- datahub/ingestion/source/sql/athena.py +1 -1
- datahub/ingestion/source/sql/clickhouse.py +4 -2
- datahub/ingestion/source/sql/cockroachdb.py +1 -1
- datahub/ingestion/source/sql/druid.py +1 -1
- datahub/ingestion/source/sql/hana.py +1 -1
- datahub/ingestion/source/sql/hive.py +7 -5
- datahub/ingestion/source/sql/hive_metastore.py +1 -1
- datahub/ingestion/source/sql/mssql/source.py +13 -6
- datahub/ingestion/source/sql/mysql.py +1 -1
- datahub/ingestion/source/sql/oracle.py +17 -10
- datahub/ingestion/source/sql/postgres.py +2 -2
- datahub/ingestion/source/sql/presto.py +1 -1
- datahub/ingestion/source/sql/sql_config.py +8 -9
- datahub/ingestion/source/sql/sql_generic.py +1 -1
- datahub/ingestion/source/sql/teradata.py +1 -1
- datahub/ingestion/source/sql/trino.py +1 -1
- datahub/ingestion/source/sql/vertica.py +5 -4
- datahub/ingestion/source/sql_queries.py +11 -8
- datahub/ingestion/source/state/checkpoint.py +2 -2
- datahub/ingestion/source/state/entity_removal_state.py +2 -1
- datahub/ingestion/source/state/stateful_ingestion_base.py +55 -45
- datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +1 -1
- datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py +1 -1
- datahub/ingestion/source/superset.py +9 -9
- datahub/ingestion/source/tableau/tableau.py +14 -16
- datahub/ingestion/source/unity/azure_auth_config.py +15 -0
- datahub/ingestion/source/unity/config.py +51 -34
- datahub/ingestion/source/unity/connection.py +7 -1
- datahub/ingestion/source/unity/connection_test.py +1 -1
- datahub/ingestion/source/unity/proxy.py +216 -7
- datahub/ingestion/source/unity/proxy_types.py +91 -0
- datahub/ingestion/source/unity/source.py +29 -3
- datahub/ingestion/source/usage/clickhouse_usage.py +1 -1
- datahub/ingestion/source/usage/starburst_trino_usage.py +1 -1
- datahub/ingestion/source/usage/usage_common.py +5 -3
- datahub/ingestion/source_config/csv_enricher.py +7 -6
- datahub/ingestion/source_config/operation_config.py +7 -4
- datahub/ingestion/source_config/pulsar.py +11 -15
- datahub/ingestion/transformer/add_dataset_browse_path.py +1 -1
- datahub/ingestion/transformer/add_dataset_dataproduct.py +6 -5
- datahub/ingestion/transformer/add_dataset_ownership.py +3 -3
- datahub/ingestion/transformer/add_dataset_properties.py +2 -2
- datahub/ingestion/transformer/add_dataset_schema_tags.py +2 -2
- datahub/ingestion/transformer/add_dataset_schema_terms.py +2 -2
- datahub/ingestion/transformer/add_dataset_tags.py +3 -3
- datahub/ingestion/transformer/add_dataset_terms.py +3 -3
- datahub/ingestion/transformer/dataset_domain.py +3 -3
- datahub/ingestion/transformer/dataset_domain_based_on_tags.py +1 -1
- datahub/ingestion/transformer/extract_dataset_tags.py +1 -1
- datahub/ingestion/transformer/extract_ownership_from_tags.py +1 -1
- datahub/ingestion/transformer/mark_dataset_status.py +1 -1
- datahub/ingestion/transformer/pattern_cleanup_dataset_usage_user.py +1 -1
- datahub/ingestion/transformer/pattern_cleanup_ownership.py +1 -1
- datahub/ingestion/transformer/remove_dataset_ownership.py +1 -1
- datahub/ingestion/transformer/replace_external_url.py +2 -2
- datahub/ingestion/transformer/set_browse_path.py +1 -1
- datahub/ingestion/transformer/tags_to_terms.py +1 -1
- datahub/lite/duckdb_lite.py +1 -1
- datahub/lite/lite_util.py +2 -2
- datahub/metadata/schema.avsc +7 -2
- datahub/metadata/schemas/QuerySubjects.avsc +1 -1
- datahub/metadata/schemas/StructuredPropertyDefinition.avsc +6 -1
- datahub/sdk/__init__.py +1 -0
- datahub/sdk/_all_entities.py +2 -0
- datahub/sdk/search_filters.py +68 -40
- datahub/sdk/tag.py +112 -0
- datahub/secret/datahub_secret_store.py +7 -4
- datahub/secret/file_secret_store.py +1 -1
- datahub/sql_parsing/sqlglot_lineage.py +5 -2
- datahub/testing/check_sql_parser_result.py +2 -2
- datahub/utilities/ingest_utils.py +1 -1
- {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1.dist-info}/top_level.txt +0 -0
|
@@ -3,7 +3,7 @@ from dataclasses import dataclass
|
|
|
3
3
|
from typing import Any, Dict, Generic, Optional, Type, TypeVar
|
|
4
4
|
|
|
5
5
|
import pydantic
|
|
6
|
-
from pydantic import
|
|
6
|
+
from pydantic import model_validator
|
|
7
7
|
from pydantic.fields import Field
|
|
8
8
|
|
|
9
9
|
from datahub.configuration.common import (
|
|
@@ -73,14 +73,14 @@ class StatefulIngestionConfig(ConfigModel):
|
|
|
73
73
|
description="If set to True, ignores the current checkpoint state.",
|
|
74
74
|
)
|
|
75
75
|
|
|
76
|
-
@
|
|
77
|
-
def validate_config(
|
|
78
|
-
if
|
|
79
|
-
if
|
|
80
|
-
|
|
76
|
+
@model_validator(mode="after")
|
|
77
|
+
def validate_config(self) -> "StatefulIngestionConfig":
|
|
78
|
+
if self.enabled:
|
|
79
|
+
if self.state_provider is None:
|
|
80
|
+
self.state_provider = DynamicTypedStateProviderConfig(
|
|
81
81
|
type="datahub", config={}
|
|
82
82
|
)
|
|
83
|
-
return
|
|
83
|
+
return self
|
|
84
84
|
|
|
85
85
|
|
|
86
86
|
CustomConfig = TypeVar("CustomConfig", bound=StatefulIngestionConfig)
|
|
@@ -110,17 +110,19 @@ class StatefulLineageConfigMixin(ConfigModel):
|
|
|
110
110
|
"store_last_lineage_extraction_timestamp", "enable_stateful_lineage_ingestion"
|
|
111
111
|
)
|
|
112
112
|
|
|
113
|
-
@
|
|
114
|
-
def lineage_stateful_option_validator(
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
if
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
113
|
+
@model_validator(mode="after")
|
|
114
|
+
def lineage_stateful_option_validator(self) -> "StatefulLineageConfigMixin":
|
|
115
|
+
try:
|
|
116
|
+
sti = getattr(self, "stateful_ingestion", None)
|
|
117
|
+
if not sti or not getattr(sti, "enabled", False):
|
|
118
|
+
if getattr(self, "enable_stateful_lineage_ingestion", False):
|
|
119
|
+
logger.warning(
|
|
120
|
+
"Stateful ingestion is disabled, disabling enable_stateful_lineage_ingestion config option as well"
|
|
121
|
+
)
|
|
122
|
+
self.enable_stateful_lineage_ingestion = False
|
|
123
|
+
except (AttributeError, RecursionError) as e:
|
|
124
|
+
logger.debug(f"Skipping stateful lineage validation due to: {e}")
|
|
125
|
+
return self
|
|
124
126
|
|
|
125
127
|
|
|
126
128
|
class StatefulProfilingConfigMixin(ConfigModel):
|
|
@@ -135,16 +137,19 @@ class StatefulProfilingConfigMixin(ConfigModel):
|
|
|
135
137
|
"store_last_profiling_timestamps", "enable_stateful_profiling"
|
|
136
138
|
)
|
|
137
139
|
|
|
138
|
-
@
|
|
139
|
-
def profiling_stateful_option_validator(
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
if
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
140
|
+
@model_validator(mode="after")
|
|
141
|
+
def profiling_stateful_option_validator(self) -> "StatefulProfilingConfigMixin":
|
|
142
|
+
try:
|
|
143
|
+
sti = getattr(self, "stateful_ingestion", None)
|
|
144
|
+
if not sti or not getattr(sti, "enabled", False):
|
|
145
|
+
if getattr(self, "enable_stateful_profiling", False):
|
|
146
|
+
logger.warning(
|
|
147
|
+
"Stateful ingestion is disabled, disabling enable_stateful_profiling config option as well"
|
|
148
|
+
)
|
|
149
|
+
self.enable_stateful_profiling = False
|
|
150
|
+
except (AttributeError, RecursionError) as e:
|
|
151
|
+
logger.debug(f"Skipping stateful profiling validation due to: {e}")
|
|
152
|
+
return self
|
|
148
153
|
|
|
149
154
|
|
|
150
155
|
class StatefulUsageConfigMixin(BaseTimeWindowConfig):
|
|
@@ -161,16 +166,21 @@ class StatefulUsageConfigMixin(BaseTimeWindowConfig):
|
|
|
161
166
|
"store_last_usage_extraction_timestamp", "enable_stateful_usage_ingestion"
|
|
162
167
|
)
|
|
163
168
|
|
|
164
|
-
@
|
|
165
|
-
def last_usage_extraction_stateful_option_validator(
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
)
|
|
172
|
-
|
|
173
|
-
|
|
169
|
+
@model_validator(mode="after")
|
|
170
|
+
def last_usage_extraction_stateful_option_validator(
|
|
171
|
+
self,
|
|
172
|
+
) -> "StatefulUsageConfigMixin":
|
|
173
|
+
try:
|
|
174
|
+
sti = getattr(self, "stateful_ingestion", None)
|
|
175
|
+
if not sti or not getattr(sti, "enabled", False):
|
|
176
|
+
if getattr(self, "enable_stateful_usage_ingestion", False):
|
|
177
|
+
logger.warning(
|
|
178
|
+
"Stateful ingestion is disabled, disabling enable_stateful_usage_ingestion config option as well"
|
|
179
|
+
)
|
|
180
|
+
self.enable_stateful_usage_ingestion = False
|
|
181
|
+
except (AttributeError, RecursionError) as e:
|
|
182
|
+
logger.debug(f"Skipping stateful usage validation due to: {e}")
|
|
183
|
+
return self
|
|
174
184
|
|
|
175
185
|
|
|
176
186
|
class StatefulTimeWindowConfigMixin(BaseTimeWindowConfig):
|
|
@@ -185,16 +195,16 @@ class StatefulTimeWindowConfigMixin(BaseTimeWindowConfig):
|
|
|
185
195
|
"and queries together from a single audit log and uses a unified time window.",
|
|
186
196
|
)
|
|
187
197
|
|
|
188
|
-
@
|
|
189
|
-
def time_window_stateful_option_validator(
|
|
190
|
-
sti =
|
|
191
|
-
if not sti or not sti
|
|
192
|
-
if
|
|
198
|
+
@model_validator(mode="after")
|
|
199
|
+
def time_window_stateful_option_validator(self) -> "StatefulTimeWindowConfigMixin":
|
|
200
|
+
sti = getattr(self, "stateful_ingestion", None)
|
|
201
|
+
if not sti or not getattr(sti, "enabled", False):
|
|
202
|
+
if getattr(self, "enable_stateful_time_window", False):
|
|
193
203
|
logger.warning(
|
|
194
204
|
"Stateful ingestion is disabled, disabling enable_stateful_time_window config option as well"
|
|
195
205
|
)
|
|
196
|
-
|
|
197
|
-
return
|
|
206
|
+
self.enable_stateful_time_window = False
|
|
207
|
+
return self
|
|
198
208
|
|
|
199
209
|
|
|
200
210
|
@dataclass
|
|
@@ -40,7 +40,7 @@ class DatahubIngestionCheckpointingProvider(IngestionCheckpointingProviderBase):
|
|
|
40
40
|
def create(
|
|
41
41
|
cls, config_dict: Dict[str, Any], ctx: PipelineContext
|
|
42
42
|
) -> "DatahubIngestionCheckpointingProvider":
|
|
43
|
-
config = DatahubIngestionStateProviderConfig.
|
|
43
|
+
config = DatahubIngestionStateProviderConfig.model_validate(config_dict)
|
|
44
44
|
if config.datahub_api is not None:
|
|
45
45
|
return cls(DataHubGraph(config.datahub_api))
|
|
46
46
|
elif ctx.graph:
|
|
@@ -32,7 +32,7 @@ class FileIngestionCheckpointingProvider(IngestionCheckpointingProviderBase):
|
|
|
32
32
|
def create(
|
|
33
33
|
cls, config_dict: Dict[str, Any], ctx: PipelineContext
|
|
34
34
|
) -> "FileIngestionCheckpointingProvider":
|
|
35
|
-
config = FileIngestionStateProviderConfig.
|
|
35
|
+
config = FileIngestionStateProviderConfig.model_validate(config_dict)
|
|
36
36
|
return cls(config)
|
|
37
37
|
|
|
38
38
|
def get_latest_checkpoint(
|
|
@@ -9,7 +9,7 @@ from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
|
|
|
9
9
|
import dateutil.parser as dp
|
|
10
10
|
import requests
|
|
11
11
|
import sqlglot
|
|
12
|
-
from pydantic import BaseModel,
|
|
12
|
+
from pydantic import BaseModel, field_validator, model_validator
|
|
13
13
|
from pydantic.fields import Field
|
|
14
14
|
from requests.adapters import HTTPAdapter
|
|
15
15
|
from urllib3.util.retry import Retry
|
|
@@ -246,16 +246,16 @@ class SupersetConfig(
|
|
|
246
246
|
# This is required to allow preset configs to get parsed
|
|
247
247
|
extra = "allow"
|
|
248
248
|
|
|
249
|
-
@
|
|
250
|
-
|
|
249
|
+
@field_validator("connect_uri", "display_uri", mode="after")
|
|
250
|
+
@classmethod
|
|
251
|
+
def remove_trailing_slash(cls, v: str) -> str:
|
|
251
252
|
return config_clean.remove_trailing_slashes(v)
|
|
252
253
|
|
|
253
|
-
@
|
|
254
|
-
def default_display_uri_to_connect_uri(
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
return values
|
|
254
|
+
@model_validator(mode="after")
|
|
255
|
+
def default_display_uri_to_connect_uri(self) -> "SupersetConfig":
|
|
256
|
+
if self.display_uri is None:
|
|
257
|
+
self.display_uri = self.connect_uri
|
|
258
|
+
return self
|
|
259
259
|
|
|
260
260
|
|
|
261
261
|
def get_metric_name(metric):
|
|
@@ -25,7 +25,7 @@ from urllib.parse import quote, urlparse
|
|
|
25
25
|
|
|
26
26
|
import dateutil.parser as dp
|
|
27
27
|
import tableauserverclient as TSC
|
|
28
|
-
from pydantic import
|
|
28
|
+
from pydantic import field_validator, model_validator
|
|
29
29
|
from pydantic.fields import Field
|
|
30
30
|
from requests.adapters import HTTPAdapter
|
|
31
31
|
from tableauserverclient import (
|
|
@@ -257,8 +257,9 @@ class TableauConnectionConfig(ConfigModel):
|
|
|
257
257
|
description="When enabled, extracts column-level lineage from Tableau Datasources",
|
|
258
258
|
)
|
|
259
259
|
|
|
260
|
-
@
|
|
261
|
-
|
|
260
|
+
@field_validator("connect_uri", mode="after")
|
|
261
|
+
@classmethod
|
|
262
|
+
def remove_trailing_slash(cls, v: str) -> str:
|
|
262
263
|
return config_clean.remove_trailing_slashes(v)
|
|
263
264
|
|
|
264
265
|
def get_tableau_auth(
|
|
@@ -652,8 +653,9 @@ class TableauConfig(
|
|
|
652
653
|
"fetch_size",
|
|
653
654
|
)
|
|
654
655
|
|
|
655
|
-
#
|
|
656
|
-
@
|
|
656
|
+
# mode = "before" because we want to take some decision before pydantic initialize the configuration to default values
|
|
657
|
+
@model_validator(mode="before")
|
|
658
|
+
@classmethod
|
|
657
659
|
def projects_backward_compatibility(cls, values: Dict) -> Dict:
|
|
658
660
|
# In-place update of the input dict would cause state contamination. This was discovered through test failures
|
|
659
661
|
# in test_hex.py where the same dict is reused.
|
|
@@ -683,27 +685,23 @@ class TableauConfig(
|
|
|
683
685
|
|
|
684
686
|
return values
|
|
685
687
|
|
|
686
|
-
@
|
|
687
|
-
def validate_config_values(
|
|
688
|
-
tags_for_hidden_assets = values.get("tags_for_hidden_assets")
|
|
689
|
-
ingest_tags = values.get("ingest_tags")
|
|
688
|
+
@model_validator(mode="after")
|
|
689
|
+
def validate_config_values(self) -> "TableauConfig":
|
|
690
690
|
if (
|
|
691
|
-
not ingest_tags
|
|
692
|
-
and tags_for_hidden_assets
|
|
693
|
-
and len(tags_for_hidden_assets) > 0
|
|
691
|
+
not self.ingest_tags
|
|
692
|
+
and self.tags_for_hidden_assets
|
|
693
|
+
and len(self.tags_for_hidden_assets) > 0
|
|
694
694
|
):
|
|
695
695
|
raise ValueError(
|
|
696
696
|
"tags_for_hidden_assets is only allowed with ingest_tags enabled. Be aware that this will overwrite tags entered from the UI."
|
|
697
697
|
)
|
|
698
698
|
|
|
699
|
-
use_email_as_username
|
|
700
|
-
ingest_owner = values.get("ingest_owner")
|
|
701
|
-
if use_email_as_username and not ingest_owner:
|
|
699
|
+
if self.use_email_as_username and not self.ingest_owner:
|
|
702
700
|
raise ValueError(
|
|
703
701
|
"use_email_as_username requires ingest_owner to be enabled."
|
|
704
702
|
)
|
|
705
703
|
|
|
706
|
-
return
|
|
704
|
+
return self
|
|
707
705
|
|
|
708
706
|
|
|
709
707
|
class WorkbookKey(ContainerKey):
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from pydantic import Field, SecretStr
|
|
2
|
+
|
|
3
|
+
from datahub.configuration import ConfigModel
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class AzureAuthConfig(ConfigModel):
|
|
7
|
+
client_secret: SecretStr = Field(
|
|
8
|
+
description="Azure application client secret used for authentication. This is a confidential credential that should be kept secure."
|
|
9
|
+
)
|
|
10
|
+
client_id: str = Field(
|
|
11
|
+
description="Azure application (client) ID. This is the unique identifier for the registered Azure AD application.",
|
|
12
|
+
)
|
|
13
|
+
tenant_id: str = Field(
|
|
14
|
+
description="Azure tenant (directory) ID. This identifies the Azure AD tenant where the application is registered.",
|
|
15
|
+
)
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import os
|
|
3
3
|
from datetime import datetime, timedelta, timezone
|
|
4
|
-
from typing import
|
|
4
|
+
from typing import Dict, List, Optional, Union
|
|
5
5
|
|
|
6
6
|
import pydantic
|
|
7
|
-
from pydantic import Field
|
|
7
|
+
from pydantic import Field, field_validator, model_validator
|
|
8
8
|
from typing_extensions import Literal
|
|
9
9
|
|
|
10
10
|
from datahub.configuration.common import (
|
|
@@ -397,13 +397,15 @@ class UnityCatalogSourceConfig(
|
|
|
397
397
|
default=None, description="Unity Catalog Stateful Ingestion Config."
|
|
398
398
|
)
|
|
399
399
|
|
|
400
|
-
@
|
|
400
|
+
@field_validator("start_time", mode="after")
|
|
401
|
+
@classmethod
|
|
401
402
|
def within_thirty_days(cls, v: datetime) -> datetime:
|
|
402
403
|
if (datetime.now(timezone.utc) - v).days > 30:
|
|
403
404
|
raise ValueError("Query history is only maintained for 30 days.")
|
|
404
405
|
return v
|
|
405
406
|
|
|
406
|
-
@
|
|
407
|
+
@field_validator("workspace_url", mode="after")
|
|
408
|
+
@classmethod
|
|
407
409
|
def workspace_url_should_start_with_http_scheme(cls, workspace_url: str) -> str:
|
|
408
410
|
if not workspace_url.lower().startswith(("http://", "https://")):
|
|
409
411
|
raise ValueError(
|
|
@@ -411,7 +413,26 @@ class UnityCatalogSourceConfig(
|
|
|
411
413
|
)
|
|
412
414
|
return workspace_url
|
|
413
415
|
|
|
414
|
-
@
|
|
416
|
+
@model_validator(mode="before")
|
|
417
|
+
def either_token_or_azure_auth_provided(cls, values: dict) -> dict:
|
|
418
|
+
token = values.get("token")
|
|
419
|
+
azure_auth = values.get("azure_auth")
|
|
420
|
+
|
|
421
|
+
# Check if exactly one of the authentication methods is provided
|
|
422
|
+
if not token and not azure_auth:
|
|
423
|
+
raise ValueError(
|
|
424
|
+
"Either 'azure_auth' or 'token' (personal access token) must be provided in the configuration."
|
|
425
|
+
)
|
|
426
|
+
|
|
427
|
+
if token and azure_auth:
|
|
428
|
+
raise ValueError(
|
|
429
|
+
"Cannot specify both 'token' and 'azure_auth'. Please provide only one authentication method."
|
|
430
|
+
)
|
|
431
|
+
|
|
432
|
+
return values
|
|
433
|
+
|
|
434
|
+
@field_validator("include_metastore", mode="after")
|
|
435
|
+
@classmethod
|
|
415
436
|
def include_metastore_warning(cls, v: bool) -> bool:
|
|
416
437
|
if v:
|
|
417
438
|
msg = (
|
|
@@ -424,60 +445,56 @@ class UnityCatalogSourceConfig(
|
|
|
424
445
|
add_global_warning(msg)
|
|
425
446
|
return v
|
|
426
447
|
|
|
427
|
-
@
|
|
428
|
-
def set_warehouse_id_from_profiling(
|
|
429
|
-
profiling
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
if not values.get("warehouse_id") and profiling and profiling.warehouse_id:
|
|
433
|
-
values["warehouse_id"] = profiling.warehouse_id
|
|
448
|
+
@model_validator(mode="after")
|
|
449
|
+
def set_warehouse_id_from_profiling(self):
|
|
450
|
+
profiling = self.profiling
|
|
451
|
+
if not self.warehouse_id and profiling and profiling.warehouse_id:
|
|
452
|
+
self.warehouse_id = profiling.warehouse_id
|
|
434
453
|
if (
|
|
435
|
-
|
|
454
|
+
self.warehouse_id
|
|
436
455
|
and profiling
|
|
437
456
|
and profiling.warehouse_id
|
|
438
|
-
and
|
|
457
|
+
and self.warehouse_id != profiling.warehouse_id
|
|
439
458
|
):
|
|
440
459
|
raise ValueError(
|
|
441
460
|
"When `warehouse_id` is set, it must match the `warehouse_id` in `profiling`."
|
|
442
461
|
)
|
|
443
462
|
|
|
444
|
-
if
|
|
445
|
-
profiling.warehouse_id =
|
|
463
|
+
if self.warehouse_id and profiling and not profiling.warehouse_id:
|
|
464
|
+
profiling.warehouse_id = self.warehouse_id
|
|
446
465
|
|
|
447
466
|
if profiling and profiling.enabled and not profiling.warehouse_id:
|
|
448
467
|
raise ValueError("warehouse_id must be set when profiling is enabled.")
|
|
449
468
|
|
|
450
|
-
return
|
|
469
|
+
return self
|
|
451
470
|
|
|
452
|
-
@
|
|
453
|
-
def validate_lineage_data_source_with_warehouse(
|
|
454
|
-
|
|
455
|
-
) -> Dict[str, Any]:
|
|
456
|
-
lineage_data_source = values.get("lineage_data_source", LineageDataSource.AUTO)
|
|
457
|
-
warehouse_id = values.get("warehouse_id")
|
|
471
|
+
@model_validator(mode="after")
|
|
472
|
+
def validate_lineage_data_source_with_warehouse(self):
|
|
473
|
+
lineage_data_source = self.lineage_data_source or LineageDataSource.AUTO
|
|
458
474
|
|
|
459
|
-
if
|
|
475
|
+
if (
|
|
476
|
+
lineage_data_source == LineageDataSource.SYSTEM_TABLES
|
|
477
|
+
and not self.warehouse_id
|
|
478
|
+
):
|
|
460
479
|
raise ValueError(
|
|
461
480
|
f"lineage_data_source='{LineageDataSource.SYSTEM_TABLES.value}' requires warehouse_id to be set"
|
|
462
481
|
)
|
|
463
482
|
|
|
464
|
-
return
|
|
483
|
+
return self
|
|
465
484
|
|
|
466
|
-
@
|
|
467
|
-
def validate_usage_data_source_with_warehouse(
|
|
468
|
-
|
|
469
|
-
) -> Dict[str, Any]:
|
|
470
|
-
usage_data_source = values.get("usage_data_source", UsageDataSource.AUTO)
|
|
471
|
-
warehouse_id = values.get("warehouse_id")
|
|
485
|
+
@model_validator(mode="after")
|
|
486
|
+
def validate_usage_data_source_with_warehouse(self):
|
|
487
|
+
usage_data_source = self.usage_data_source or UsageDataSource.AUTO
|
|
472
488
|
|
|
473
|
-
if usage_data_source == UsageDataSource.SYSTEM_TABLES and not warehouse_id:
|
|
489
|
+
if usage_data_source == UsageDataSource.SYSTEM_TABLES and not self.warehouse_id:
|
|
474
490
|
raise ValueError(
|
|
475
491
|
f"usage_data_source='{UsageDataSource.SYSTEM_TABLES.value}' requires warehouse_id to be set"
|
|
476
492
|
)
|
|
477
493
|
|
|
478
|
-
return
|
|
494
|
+
return self
|
|
479
495
|
|
|
480
|
-
@
|
|
496
|
+
@field_validator("schema_pattern", mode="before")
|
|
497
|
+
@classmethod
|
|
481
498
|
def schema_pattern_should__always_deny_information_schema(
|
|
482
499
|
cls, v: AllowDenyPattern
|
|
483
500
|
) -> AllowDenyPattern:
|
|
@@ -8,6 +8,7 @@ from pydantic import Field
|
|
|
8
8
|
|
|
9
9
|
from datahub.configuration.common import ConfigModel
|
|
10
10
|
from datahub.ingestion.source.sql.sqlalchemy_uri import make_sqlalchemy_uri
|
|
11
|
+
from datahub.ingestion.source.unity.azure_auth_config import AzureAuthConfig
|
|
11
12
|
|
|
12
13
|
DATABRICKS = "databricks"
|
|
13
14
|
|
|
@@ -19,7 +20,12 @@ class UnityCatalogConnectionConfig(ConfigModel):
|
|
|
19
20
|
"""
|
|
20
21
|
|
|
21
22
|
scheme: str = DATABRICKS
|
|
22
|
-
token: str = pydantic.Field(
|
|
23
|
+
token: Optional[str] = pydantic.Field(
|
|
24
|
+
default=None, description="Databricks personal access token"
|
|
25
|
+
)
|
|
26
|
+
azure_auth: Optional[AzureAuthConfig] = Field(
|
|
27
|
+
default=None, description="Azure configuration"
|
|
28
|
+
)
|
|
23
29
|
workspace_url: str = pydantic.Field(
|
|
24
30
|
description="Databricks workspace url. e.g. https://my-workspace.cloud.databricks.com"
|
|
25
31
|
)
|
|
@@ -16,10 +16,10 @@ class UnityCatalogConnectionTest:
|
|
|
16
16
|
self.report = UnityCatalogReport()
|
|
17
17
|
self.proxy = UnityCatalogApiProxy(
|
|
18
18
|
self.config.workspace_url,
|
|
19
|
-
self.config.token,
|
|
20
19
|
self.config.profiling.warehouse_id,
|
|
21
20
|
report=self.report,
|
|
22
21
|
databricks_api_page_size=self.config.databricks_api_page_size,
|
|
22
|
+
personal_access_token=self.config.token,
|
|
23
23
|
)
|
|
24
24
|
|
|
25
25
|
def get_connection_test(self) -> TestConnectionReport:
|