acryl-datahub 1.3.1__py3-none-any.whl → 1.3.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1.dist-info}/METADATA +2582 -2582
- {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1.dist-info}/RECORD +203 -201
- datahub/_version.py +1 -1
- datahub/api/entities/common/serialized_value.py +2 -2
- datahub/api/entities/corpgroup/corpgroup.py +11 -6
- datahub/api/entities/corpuser/corpuser.py +11 -11
- datahub/api/entities/dataproduct/dataproduct.py +47 -27
- datahub/api/entities/dataset/dataset.py +32 -21
- datahub/api/entities/external/lake_formation_external_entites.py +5 -6
- datahub/api/entities/external/unity_catalog_external_entites.py +5 -7
- datahub/api/entities/forms/forms.py +16 -14
- datahub/api/entities/structuredproperties/structuredproperties.py +23 -16
- datahub/cli/check_cli.py +2 -2
- datahub/cli/config_utils.py +3 -3
- datahub/cli/lite_cli.py +9 -7
- datahub/cli/migrate.py +4 -4
- datahub/cli/quickstart_versioning.py +3 -3
- datahub/cli/specific/group_cli.py +1 -1
- datahub/cli/specific/structuredproperties_cli.py +1 -1
- datahub/cli/specific/user_cli.py +1 -1
- datahub/configuration/common.py +14 -2
- datahub/configuration/connection_resolver.py +2 -2
- datahub/configuration/git.py +47 -30
- datahub/configuration/import_resolver.py +2 -2
- datahub/configuration/kafka.py +4 -3
- datahub/configuration/time_window_config.py +26 -26
- datahub/configuration/validate_field_deprecation.py +2 -2
- datahub/configuration/validate_field_removal.py +2 -2
- datahub/configuration/validate_field_rename.py +2 -2
- datahub/configuration/validate_multiline_string.py +2 -1
- datahub/emitter/kafka_emitter.py +3 -1
- datahub/emitter/rest_emitter.py +2 -4
- datahub/ingestion/api/decorators.py +1 -1
- datahub/ingestion/api/report.py +1 -1
- datahub/ingestion/api/sink.py +1 -1
- datahub/ingestion/api/source.py +1 -1
- datahub/ingestion/glossary/datahub_classifier.py +11 -8
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +1 -1
- datahub/ingestion/reporting/file_reporter.py +5 -4
- datahub/ingestion/run/pipeline.py +6 -6
- datahub/ingestion/run/pipeline_config.py +12 -14
- datahub/ingestion/run/sink_callback.py +1 -1
- datahub/ingestion/sink/datahub_rest.py +6 -4
- datahub/ingestion/source/abs/config.py +19 -19
- datahub/ingestion/source/abs/datalake_profiler_config.py +11 -13
- datahub/ingestion/source/abs/source.py +2 -2
- datahub/ingestion/source/aws/aws_common.py +1 -1
- datahub/ingestion/source/aws/glue.py +6 -4
- datahub/ingestion/source/aws/sagemaker.py +1 -1
- datahub/ingestion/source/azure/azure_common.py +8 -12
- datahub/ingestion/source/bigquery_v2/bigquery.py +1 -1
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +43 -30
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +1 -1
- datahub/ingestion/source/cassandra/cassandra.py +1 -1
- datahub/ingestion/source/common/gcp_credentials_config.py +10 -10
- datahub/ingestion/source/data_lake_common/path_spec.py +85 -89
- datahub/ingestion/source/datahub/config.py +8 -8
- datahub/ingestion/source/datahub/datahub_source.py +1 -1
- datahub/ingestion/source/dbt/dbt_cloud.py +9 -3
- datahub/ingestion/source/dbt/dbt_common.py +39 -37
- datahub/ingestion/source/dbt/dbt_core.py +10 -12
- datahub/ingestion/source/debug/datahub_debug.py +1 -1
- datahub/ingestion/source/delta_lake/config.py +6 -4
- datahub/ingestion/source/dremio/dremio_config.py +10 -6
- datahub/ingestion/source/dremio/dremio_source.py +15 -15
- datahub/ingestion/source/dynamodb/dynamodb.py +1 -1
- datahub/ingestion/source/elastic_search.py +4 -3
- datahub/ingestion/source/excel/source.py +1 -1
- datahub/ingestion/source/feast.py +1 -1
- datahub/ingestion/source/file.py +5 -4
- datahub/ingestion/source/fivetran/config.py +17 -16
- datahub/ingestion/source/fivetran/fivetran.py +2 -2
- datahub/ingestion/source/gc/datahub_gc.py +1 -1
- datahub/ingestion/source/gcs/gcs_source.py +8 -10
- datahub/ingestion/source/ge_profiling_config.py +8 -5
- datahub/ingestion/source/grafana/grafana_api.py +2 -2
- datahub/ingestion/source/grafana/grafana_config.py +4 -3
- datahub/ingestion/source/grafana/grafana_source.py +1 -1
- datahub/ingestion/source/grafana/models.py +23 -5
- datahub/ingestion/source/hex/api.py +7 -5
- datahub/ingestion/source/hex/hex.py +4 -3
- datahub/ingestion/source/iceberg/iceberg.py +1 -1
- datahub/ingestion/source/iceberg/iceberg_common.py +5 -3
- datahub/ingestion/source/identity/azure_ad.py +1 -1
- datahub/ingestion/source/identity/okta.py +10 -10
- datahub/ingestion/source/kafka/kafka.py +1 -1
- datahub/ingestion/source/ldap.py +1 -1
- datahub/ingestion/source/looker/looker_common.py +7 -5
- datahub/ingestion/source/looker/looker_config.py +21 -20
- datahub/ingestion/source/looker/lookml_config.py +47 -47
- datahub/ingestion/source/metabase.py +8 -8
- datahub/ingestion/source/metadata/business_glossary.py +2 -2
- datahub/ingestion/source/metadata/lineage.py +13 -8
- datahub/ingestion/source/mlflow.py +1 -1
- datahub/ingestion/source/mode.py +6 -4
- datahub/ingestion/source/mongodb.py +4 -3
- datahub/ingestion/source/neo4j/neo4j_source.py +1 -1
- datahub/ingestion/source/nifi.py +17 -23
- datahub/ingestion/source/openapi.py +6 -8
- datahub/ingestion/source/powerbi/config.py +33 -32
- datahub/ingestion/source/powerbi/dataplatform_instance_resolver.py +2 -2
- datahub/ingestion/source/powerbi/powerbi.py +1 -1
- datahub/ingestion/source/powerbi_report_server/report_server.py +2 -2
- datahub/ingestion/source/powerbi_report_server/report_server_domain.py +8 -6
- datahub/ingestion/source/preset.py +8 -8
- datahub/ingestion/source/pulsar.py +1 -1
- datahub/ingestion/source/qlik_sense/data_classes.py +15 -8
- datahub/ingestion/source/qlik_sense/qlik_api.py +7 -7
- datahub/ingestion/source/qlik_sense/qlik_sense.py +1 -1
- datahub/ingestion/source/redshift/config.py +18 -20
- datahub/ingestion/source/redshift/redshift.py +2 -2
- datahub/ingestion/source/redshift/usage.py +23 -3
- datahub/ingestion/source/s3/config.py +83 -62
- datahub/ingestion/source/s3/datalake_profiler_config.py +11 -13
- datahub/ingestion/source/s3/source.py +8 -5
- datahub/ingestion/source/sac/sac.py +5 -4
- datahub/ingestion/source/salesforce.py +3 -2
- datahub/ingestion/source/schema/json_schema.py +2 -2
- datahub/ingestion/source/sigma/data_classes.py +3 -2
- datahub/ingestion/source/sigma/sigma.py +1 -1
- datahub/ingestion/source/sigma/sigma_api.py +7 -7
- datahub/ingestion/source/slack/slack.py +1 -1
- datahub/ingestion/source/snaplogic/snaplogic.py +1 -1
- datahub/ingestion/source/snowflake/snowflake_assertion.py +1 -1
- datahub/ingestion/source/snowflake/snowflake_config.py +35 -31
- datahub/ingestion/source/snowflake/snowflake_connection.py +35 -13
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +3 -3
- datahub/ingestion/source/snowflake/snowflake_queries.py +1 -1
- datahub/ingestion/source/sql/athena.py +1 -1
- datahub/ingestion/source/sql/clickhouse.py +4 -2
- datahub/ingestion/source/sql/cockroachdb.py +1 -1
- datahub/ingestion/source/sql/druid.py +1 -1
- datahub/ingestion/source/sql/hana.py +1 -1
- datahub/ingestion/source/sql/hive.py +7 -5
- datahub/ingestion/source/sql/hive_metastore.py +1 -1
- datahub/ingestion/source/sql/mssql/source.py +13 -6
- datahub/ingestion/source/sql/mysql.py +1 -1
- datahub/ingestion/source/sql/oracle.py +17 -10
- datahub/ingestion/source/sql/postgres.py +2 -2
- datahub/ingestion/source/sql/presto.py +1 -1
- datahub/ingestion/source/sql/sql_config.py +8 -9
- datahub/ingestion/source/sql/sql_generic.py +1 -1
- datahub/ingestion/source/sql/teradata.py +1 -1
- datahub/ingestion/source/sql/trino.py +1 -1
- datahub/ingestion/source/sql/vertica.py +5 -4
- datahub/ingestion/source/sql_queries.py +11 -8
- datahub/ingestion/source/state/checkpoint.py +2 -2
- datahub/ingestion/source/state/entity_removal_state.py +2 -1
- datahub/ingestion/source/state/stateful_ingestion_base.py +55 -45
- datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +1 -1
- datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py +1 -1
- datahub/ingestion/source/superset.py +9 -9
- datahub/ingestion/source/tableau/tableau.py +14 -16
- datahub/ingestion/source/unity/azure_auth_config.py +15 -0
- datahub/ingestion/source/unity/config.py +51 -34
- datahub/ingestion/source/unity/connection.py +7 -1
- datahub/ingestion/source/unity/connection_test.py +1 -1
- datahub/ingestion/source/unity/proxy.py +216 -7
- datahub/ingestion/source/unity/proxy_types.py +91 -0
- datahub/ingestion/source/unity/source.py +29 -3
- datahub/ingestion/source/usage/clickhouse_usage.py +1 -1
- datahub/ingestion/source/usage/starburst_trino_usage.py +1 -1
- datahub/ingestion/source/usage/usage_common.py +5 -3
- datahub/ingestion/source_config/csv_enricher.py +7 -6
- datahub/ingestion/source_config/operation_config.py +7 -4
- datahub/ingestion/source_config/pulsar.py +11 -15
- datahub/ingestion/transformer/add_dataset_browse_path.py +1 -1
- datahub/ingestion/transformer/add_dataset_dataproduct.py +6 -5
- datahub/ingestion/transformer/add_dataset_ownership.py +3 -3
- datahub/ingestion/transformer/add_dataset_properties.py +2 -2
- datahub/ingestion/transformer/add_dataset_schema_tags.py +2 -2
- datahub/ingestion/transformer/add_dataset_schema_terms.py +2 -2
- datahub/ingestion/transformer/add_dataset_tags.py +3 -3
- datahub/ingestion/transformer/add_dataset_terms.py +3 -3
- datahub/ingestion/transformer/dataset_domain.py +3 -3
- datahub/ingestion/transformer/dataset_domain_based_on_tags.py +1 -1
- datahub/ingestion/transformer/extract_dataset_tags.py +1 -1
- datahub/ingestion/transformer/extract_ownership_from_tags.py +1 -1
- datahub/ingestion/transformer/mark_dataset_status.py +1 -1
- datahub/ingestion/transformer/pattern_cleanup_dataset_usage_user.py +1 -1
- datahub/ingestion/transformer/pattern_cleanup_ownership.py +1 -1
- datahub/ingestion/transformer/remove_dataset_ownership.py +1 -1
- datahub/ingestion/transformer/replace_external_url.py +2 -2
- datahub/ingestion/transformer/set_browse_path.py +1 -1
- datahub/ingestion/transformer/tags_to_terms.py +1 -1
- datahub/lite/duckdb_lite.py +1 -1
- datahub/lite/lite_util.py +2 -2
- datahub/metadata/schema.avsc +7 -2
- datahub/metadata/schemas/QuerySubjects.avsc +1 -1
- datahub/metadata/schemas/StructuredPropertyDefinition.avsc +6 -1
- datahub/sdk/__init__.py +1 -0
- datahub/sdk/_all_entities.py +2 -0
- datahub/sdk/search_filters.py +68 -40
- datahub/sdk/tag.py +112 -0
- datahub/secret/datahub_secret_store.py +7 -4
- datahub/secret/file_secret_store.py +1 -1
- datahub/sql_parsing/sqlglot_lineage.py +5 -2
- datahub/testing/check_sql_parser_result.py +2 -2
- datahub/utilities/ingest_utils.py +1 -1
- {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1.dist-info}/top_level.txt +0 -0
|
@@ -6,6 +6,7 @@ import pydantic
|
|
|
6
6
|
import snowflake.connector
|
|
7
7
|
from cryptography.hazmat.backends import default_backend
|
|
8
8
|
from cryptography.hazmat.primitives import serialization
|
|
9
|
+
from pydantic import field_validator, model_validator
|
|
9
10
|
from snowflake.connector import SnowflakeConnection as NativeSnowflakeConnection
|
|
10
11
|
from snowflake.connector.cursor import DictCursor
|
|
11
12
|
from snowflake.connector.network import (
|
|
@@ -125,26 +126,28 @@ class SnowflakeConnectionConfig(ConfigModel):
|
|
|
125
126
|
|
|
126
127
|
rename_host_port_to_account_id = pydantic_renamed_field("host_port", "account_id") # type: ignore[pydantic-field]
|
|
127
128
|
|
|
128
|
-
@
|
|
129
|
-
|
|
129
|
+
@field_validator("account_id", mode="after")
|
|
130
|
+
@classmethod
|
|
131
|
+
def validate_account_id(cls, account_id: str, info: pydantic.ValidationInfo) -> str:
|
|
130
132
|
account_id = remove_protocol(account_id)
|
|
131
133
|
account_id = remove_trailing_slashes(account_id)
|
|
132
134
|
# Get the domain from config, fallback to default
|
|
133
|
-
domain =
|
|
135
|
+
domain = info.data.get("snowflake_domain", DEFAULT_SNOWFLAKE_DOMAIN)
|
|
134
136
|
snowflake_host_suffix = f".{domain}"
|
|
135
137
|
account_id = remove_suffix(account_id, snowflake_host_suffix)
|
|
136
138
|
return account_id
|
|
137
139
|
|
|
138
|
-
@
|
|
139
|
-
|
|
140
|
+
@field_validator("authentication_type", mode="before")
|
|
141
|
+
@classmethod
|
|
142
|
+
def authenticator_type_is_valid(cls, v: Any, info: pydantic.ValidationInfo) -> Any:
|
|
140
143
|
if v not in _VALID_AUTH_TYPES:
|
|
141
144
|
raise ValueError(
|
|
142
145
|
f"unsupported authenticator type '{v}' was provided,"
|
|
143
146
|
f" use one of {list(_VALID_AUTH_TYPES.keys())}"
|
|
144
147
|
)
|
|
145
148
|
if (
|
|
146
|
-
|
|
147
|
-
or
|
|
149
|
+
info.data.get("private_key") is not None
|
|
150
|
+
or info.data.get("private_key_path") is not None
|
|
148
151
|
) and v != "KEY_PAIR_AUTHENTICATOR":
|
|
149
152
|
raise ValueError(
|
|
150
153
|
f"Either `private_key` and `private_key_path` is set but `authentication_type` is {v}. "
|
|
@@ -153,21 +156,22 @@ class SnowflakeConnectionConfig(ConfigModel):
|
|
|
153
156
|
if v == "KEY_PAIR_AUTHENTICATOR":
|
|
154
157
|
# If we are using key pair auth, we need the private key path and password to be set
|
|
155
158
|
if (
|
|
156
|
-
|
|
157
|
-
and
|
|
159
|
+
info.data.get("private_key") is None
|
|
160
|
+
and info.data.get("private_key_path") is None
|
|
158
161
|
):
|
|
159
162
|
raise ValueError(
|
|
160
163
|
f"Both `private_key` and `private_key_path` are none. "
|
|
161
164
|
f"At least one should be set when using {v} authentication"
|
|
162
165
|
)
|
|
163
166
|
elif v == "OAUTH_AUTHENTICATOR":
|
|
164
|
-
cls._check_oauth_config(
|
|
167
|
+
cls._check_oauth_config(info.data.get("oauth_config"))
|
|
165
168
|
logger.info(f"using authenticator type '{v}'")
|
|
166
169
|
return v
|
|
167
170
|
|
|
168
|
-
@
|
|
169
|
-
|
|
170
|
-
|
|
171
|
+
@field_validator("token", mode="before")
|
|
172
|
+
@classmethod
|
|
173
|
+
def validate_token_oauth_config(cls, v: Any, info: pydantic.ValidationInfo) -> Any:
|
|
174
|
+
auth_type = info.data.get("authentication_type")
|
|
171
175
|
if auth_type == "OAUTH_AUTHENTICATOR_TOKEN":
|
|
172
176
|
if not v:
|
|
173
177
|
raise ValueError("Token required for OAUTH_AUTHENTICATOR_TOKEN.")
|
|
@@ -177,6 +181,24 @@ class SnowflakeConnectionConfig(ConfigModel):
|
|
|
177
181
|
)
|
|
178
182
|
return v
|
|
179
183
|
|
|
184
|
+
@model_validator(mode="after")
|
|
185
|
+
def validate_authentication_config(self):
|
|
186
|
+
"""Validate authentication configuration consistency."""
|
|
187
|
+
# Check token requirement for OAUTH_AUTHENTICATOR_TOKEN
|
|
188
|
+
if self.authentication_type == "OAUTH_AUTHENTICATOR_TOKEN":
|
|
189
|
+
if not self.token:
|
|
190
|
+
raise ValueError("Token required for OAUTH_AUTHENTICATOR_TOKEN.")
|
|
191
|
+
|
|
192
|
+
# Check private key authentication consistency
|
|
193
|
+
if self.private_key is not None or self.private_key_path is not None:
|
|
194
|
+
if self.authentication_type != "KEY_PAIR_AUTHENTICATOR":
|
|
195
|
+
raise ValueError(
|
|
196
|
+
f"Either `private_key` and `private_key_path` is set but `authentication_type` is {self.authentication_type}. "
|
|
197
|
+
f"Should be set to 'KEY_PAIR_AUTHENTICATOR' when using key pair authentication"
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
return self
|
|
201
|
+
|
|
180
202
|
@staticmethod
|
|
181
203
|
def _check_oauth_config(oauth_config: Optional[OAuthConfiguration]) -> None:
|
|
182
204
|
if oauth_config is None:
|
|
@@ -14,7 +14,7 @@ from typing import (
|
|
|
14
14
|
Type,
|
|
15
15
|
)
|
|
16
16
|
|
|
17
|
-
from pydantic import BaseModel, Field,
|
|
17
|
+
from pydantic import BaseModel, Field, field_validator
|
|
18
18
|
|
|
19
19
|
from datahub.configuration.datetimes import parse_absolute_time
|
|
20
20
|
from datahub.ingestion.api.closeable import Closeable
|
|
@@ -70,7 +70,7 @@ def pydantic_parse_json(field: str) -> "V1Validator":
|
|
|
70
70
|
return json.loads(v)
|
|
71
71
|
return v
|
|
72
72
|
|
|
73
|
-
return
|
|
73
|
+
return field_validator(field, mode="before")(_parse_from_json)
|
|
74
74
|
|
|
75
75
|
|
|
76
76
|
class UpstreamColumnNode(BaseModel):
|
|
@@ -379,7 +379,7 @@ class SnowflakeLineageExtractor(SnowflakeCommonMixin, Closeable):
|
|
|
379
379
|
# To avoid that causing a pydantic error we are setting it to an empty list
|
|
380
380
|
# instead of a list with an empty object
|
|
381
381
|
db_row["QUERIES"] = "[]"
|
|
382
|
-
return UpstreamLineageEdge.
|
|
382
|
+
return UpstreamLineageEdge.model_validate(db_row)
|
|
383
383
|
except Exception as e:
|
|
384
384
|
self.report.num_upstream_lineage_edge_parsing_failed += 1
|
|
385
385
|
upstream_tables = db_row.get("UPSTREAM_TABLES")
|
|
@@ -806,7 +806,7 @@ class SnowflakeQueriesSource(Source):
|
|
|
806
806
|
|
|
807
807
|
@classmethod
|
|
808
808
|
def create(cls, config_dict: dict, ctx: PipelineContext) -> Self:
|
|
809
|
-
config = SnowflakeQueriesSourceConfig.
|
|
809
|
+
config = SnowflakeQueriesSourceConfig.model_validate(config_dict)
|
|
810
810
|
return cls(ctx, config)
|
|
811
811
|
|
|
812
812
|
def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
|
|
@@ -386,7 +386,7 @@ class AthenaSource(SQLAlchemySource):
|
|
|
386
386
|
|
|
387
387
|
@classmethod
|
|
388
388
|
def create(cls, config_dict, ctx):
|
|
389
|
-
config = AthenaConfig.
|
|
389
|
+
config = AthenaConfig.model_validate(config_dict)
|
|
390
390
|
return cls(config, ctx)
|
|
391
391
|
|
|
392
392
|
# overwrite this method to allow to specify the usage of a custom dialect
|
|
@@ -10,6 +10,7 @@ import clickhouse_sqlalchemy.types as custom_types
|
|
|
10
10
|
import pydantic
|
|
11
11
|
from clickhouse_sqlalchemy.drivers import base
|
|
12
12
|
from clickhouse_sqlalchemy.drivers.base import ClickHouseDialect
|
|
13
|
+
from pydantic import model_validator
|
|
13
14
|
from pydantic.fields import Field
|
|
14
15
|
from sqlalchemy import create_engine, text
|
|
15
16
|
from sqlalchemy.engine import reflection
|
|
@@ -175,7 +176,8 @@ class ClickHouseConfig(
|
|
|
175
176
|
return str(url)
|
|
176
177
|
|
|
177
178
|
# pre = True because we want to take some decision before pydantic initialize the configuration to default values
|
|
178
|
-
@
|
|
179
|
+
@model_validator(mode="before")
|
|
180
|
+
@classmethod
|
|
179
181
|
def projects_backward_compatibility(cls, values: Dict) -> Dict:
|
|
180
182
|
secure = values.get("secure")
|
|
181
183
|
protocol = values.get("protocol")
|
|
@@ -423,7 +425,7 @@ class ClickHouseSource(TwoTierSQLAlchemySource):
|
|
|
423
425
|
|
|
424
426
|
@classmethod
|
|
425
427
|
def create(cls, config_dict, ctx):
|
|
426
|
-
config = ClickHouseConfig.
|
|
428
|
+
config = ClickHouseConfig.model_validate(config_dict)
|
|
427
429
|
return cls(config, ctx)
|
|
428
430
|
|
|
429
431
|
def get_workunits_internal(self) -> Iterable[Union[MetadataWorkUnit, SqlWorkUnit]]:
|
|
@@ -6,7 +6,7 @@ from enum import Enum
|
|
|
6
6
|
from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
|
|
7
7
|
from urllib.parse import urlparse
|
|
8
8
|
|
|
9
|
-
from pydantic import
|
|
9
|
+
from pydantic import field_validator
|
|
10
10
|
from pydantic.fields import Field
|
|
11
11
|
|
|
12
12
|
# This import verifies that the dependencies are available.
|
|
@@ -674,11 +674,13 @@ class HiveConfig(TwoTierSQLAlchemyConfig):
|
|
|
674
674
|
description="Platform instance for the storage system",
|
|
675
675
|
)
|
|
676
676
|
|
|
677
|
-
@
|
|
678
|
-
|
|
677
|
+
@field_validator("host_port", mode="after")
|
|
678
|
+
@classmethod
|
|
679
|
+
def clean_host_port(cls, v: str) -> str:
|
|
679
680
|
return config_clean.remove_protocol(v)
|
|
680
681
|
|
|
681
|
-
@
|
|
682
|
+
@field_validator("hive_storage_lineage_direction", mode="after")
|
|
683
|
+
@classmethod
|
|
682
684
|
def _validate_direction(cls, v: str) -> str:
|
|
683
685
|
"""Validate the lineage direction."""
|
|
684
686
|
if v.lower() not in ["upstream", "downstream"]:
|
|
@@ -725,7 +727,7 @@ class HiveSource(TwoTierSQLAlchemySource):
|
|
|
725
727
|
|
|
726
728
|
@classmethod
|
|
727
729
|
def create(cls, config_dict, ctx):
|
|
728
|
-
config = HiveConfig.
|
|
730
|
+
config = HiveConfig.model_validate(config_dict)
|
|
729
731
|
return cls(config, ctx)
|
|
730
732
|
|
|
731
733
|
def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
|
|
@@ -351,7 +351,7 @@ class HiveMetastoreSource(SQLAlchemySource):
|
|
|
351
351
|
|
|
352
352
|
@classmethod
|
|
353
353
|
def create(cls, config_dict, ctx):
|
|
354
|
-
config = HiveMetastore.
|
|
354
|
+
config = HiveMetastore.model_validate(config_dict)
|
|
355
355
|
return cls(config, ctx)
|
|
356
356
|
|
|
357
357
|
def gen_database_containers(
|
|
@@ -3,8 +3,8 @@ import re
|
|
|
3
3
|
import urllib.parse
|
|
4
4
|
from typing import Any, Dict, Iterable, List, Optional, Tuple
|
|
5
5
|
|
|
6
|
-
import pydantic
|
|
7
6
|
import sqlalchemy.dialects.mssql
|
|
7
|
+
from pydantic import ValidationInfo, field_validator
|
|
8
8
|
from pydantic.fields import Field
|
|
9
9
|
from sqlalchemy import create_engine, inspect
|
|
10
10
|
from sqlalchemy.engine.base import Connection
|
|
@@ -140,11 +140,18 @@ class SQLServerConfig(BasicSQLAlchemyConfig):
|
|
|
140
140
|
description="Indicates if the SQL Server instance is running on AWS RDS. When None (default), automatic detection will be attempted using server name analysis.",
|
|
141
141
|
)
|
|
142
142
|
|
|
143
|
-
@
|
|
144
|
-
|
|
145
|
-
|
|
143
|
+
@field_validator("uri_args", mode="after")
|
|
144
|
+
@classmethod
|
|
145
|
+
def passwords_match(
|
|
146
|
+
cls, v: Dict[str, Any], info: ValidationInfo, **kwargs: Any
|
|
147
|
+
) -> Dict[str, Any]:
|
|
148
|
+
if (
|
|
149
|
+
info.data["use_odbc"]
|
|
150
|
+
and not info.data["sqlalchemy_uri"]
|
|
151
|
+
and "driver" not in v
|
|
152
|
+
):
|
|
146
153
|
raise ValueError("uri_args must contain a 'driver' option")
|
|
147
|
-
elif not
|
|
154
|
+
elif not info.data["use_odbc"] and v:
|
|
148
155
|
raise ValueError("uri_args is not supported when ODBC is disabled")
|
|
149
156
|
return v
|
|
150
157
|
|
|
@@ -314,7 +321,7 @@ class SQLServerSource(SQLAlchemySource):
|
|
|
314
321
|
|
|
315
322
|
@classmethod
|
|
316
323
|
def create(cls, config_dict: Dict, ctx: PipelineContext) -> "SQLServerSource":
|
|
317
|
-
config = SQLServerConfig.
|
|
324
|
+
config = SQLServerConfig.model_validate(config_dict)
|
|
318
325
|
return cls(config, ctx)
|
|
319
326
|
|
|
320
327
|
# override to get table descriptions
|
|
@@ -150,7 +150,7 @@ class MySQLSource(TwoTierSQLAlchemySource):
|
|
|
150
150
|
|
|
151
151
|
@classmethod
|
|
152
152
|
def create(cls, config_dict, ctx):
|
|
153
|
-
config = MySQLConfig.
|
|
153
|
+
config = MySQLConfig.model_validate(config_dict)
|
|
154
154
|
return cls(config, ctx)
|
|
155
155
|
|
|
156
156
|
def _setup_rds_iam_event_listener(
|
|
@@ -10,8 +10,8 @@ from typing import Any, Dict, Iterable, List, NoReturn, Optional, Tuple, Union,
|
|
|
10
10
|
from unittest.mock import patch
|
|
11
11
|
|
|
12
12
|
import oracledb
|
|
13
|
-
import pydantic
|
|
14
13
|
import sqlalchemy.engine
|
|
14
|
+
from pydantic import ValidationInfo, field_validator
|
|
15
15
|
from pydantic.fields import Field
|
|
16
16
|
from sqlalchemy import event, sql
|
|
17
17
|
from sqlalchemy.dialects.oracle.base import ischema_names
|
|
@@ -101,25 +101,32 @@ class OracleConfig(BasicSQLAlchemyConfig):
|
|
|
101
101
|
"On Linux, this value is ignored, as ldconfig or LD_LIBRARY_PATH will define the location.",
|
|
102
102
|
)
|
|
103
103
|
|
|
104
|
-
@
|
|
105
|
-
|
|
106
|
-
|
|
104
|
+
@field_validator("service_name", mode="after")
|
|
105
|
+
@classmethod
|
|
106
|
+
def check_service_name(
|
|
107
|
+
cls, v: Optional[str], info: ValidationInfo
|
|
108
|
+
) -> Optional[str]:
|
|
109
|
+
if info.data.get("database") and v:
|
|
107
110
|
raise ValueError(
|
|
108
111
|
"specify one of 'database' and 'service_name', but not both"
|
|
109
112
|
)
|
|
110
113
|
return v
|
|
111
114
|
|
|
112
|
-
@
|
|
113
|
-
|
|
115
|
+
@field_validator("data_dictionary_mode", mode="after")
|
|
116
|
+
@classmethod
|
|
117
|
+
def check_data_dictionary_mode(cls, value: str) -> str:
|
|
114
118
|
if value not in ("ALL", "DBA"):
|
|
115
119
|
raise ValueError("Specify one of data dictionary views mode: 'ALL', 'DBA'.")
|
|
116
120
|
return value
|
|
117
121
|
|
|
118
|
-
@
|
|
119
|
-
|
|
122
|
+
@field_validator("thick_mode_lib_dir", mode="before")
|
|
123
|
+
@classmethod
|
|
124
|
+
def check_thick_mode_lib_dir(
|
|
125
|
+
cls, v: Optional[str], info: ValidationInfo
|
|
126
|
+
) -> Optional[str]:
|
|
120
127
|
if (
|
|
121
128
|
v is None
|
|
122
|
-
and
|
|
129
|
+
and info.data.get("enable_thick_mode")
|
|
123
130
|
and (platform.system() == "Darwin" or platform.system() == "Windows")
|
|
124
131
|
):
|
|
125
132
|
raise ValueError(
|
|
@@ -659,7 +666,7 @@ class OracleSource(SQLAlchemySource):
|
|
|
659
666
|
|
|
660
667
|
@classmethod
|
|
661
668
|
def create(cls, config_dict, ctx):
|
|
662
|
-
config = OracleConfig.
|
|
669
|
+
config = OracleConfig.model_validate(config_dict)
|
|
663
670
|
return cls(config, ctx)
|
|
664
671
|
|
|
665
672
|
def get_db_name(self, inspector: Inspector) -> str:
|
|
@@ -212,7 +212,7 @@ class PostgresSource(SQLAlchemySource):
|
|
|
212
212
|
|
|
213
213
|
@classmethod
|
|
214
214
|
def create(cls, config_dict, ctx):
|
|
215
|
-
config = PostgresConfig.
|
|
215
|
+
config = PostgresConfig.model_validate(config_dict)
|
|
216
216
|
return cls(config, ctx)
|
|
217
217
|
|
|
218
218
|
def _setup_rds_iam_event_listener(
|
|
@@ -288,7 +288,7 @@ class PostgresSource(SQLAlchemySource):
|
|
|
288
288
|
return {}
|
|
289
289
|
|
|
290
290
|
for row in results:
|
|
291
|
-
data.append(ViewLineageEntry.
|
|
291
|
+
data.append(ViewLineageEntry.model_validate(row))
|
|
292
292
|
|
|
293
293
|
lineage_elements: Dict[Tuple[str, str], List[str]] = defaultdict(list)
|
|
294
294
|
# Loop over the lineages in the JSON data.
|
|
@@ -3,7 +3,7 @@ from abc import abstractmethod
|
|
|
3
3
|
from typing import Any, Dict, Optional
|
|
4
4
|
|
|
5
5
|
import pydantic
|
|
6
|
-
from pydantic import Field
|
|
6
|
+
from pydantic import Field, model_validator
|
|
7
7
|
|
|
8
8
|
from datahub.configuration.common import AllowDenyPattern, ConfigModel
|
|
9
9
|
from datahub.configuration.source_common import (
|
|
@@ -49,7 +49,8 @@ class SQLFilterConfig(ConfigModel):
|
|
|
49
49
|
description="Regex patterns for views to filter in ingestion. Note: Defaults to table_pattern if not specified. Specify regex to match the entire view name in database.schema.view format. e.g. to match all views starting with customer in Customer database and public schema, use the regex 'Customer.public.customer.*'",
|
|
50
50
|
)
|
|
51
51
|
|
|
52
|
-
@
|
|
52
|
+
@model_validator(mode="before")
|
|
53
|
+
@classmethod
|
|
53
54
|
def view_pattern_is_table_pattern_unless_specified(
|
|
54
55
|
cls, values: Dict[str, Any]
|
|
55
56
|
) -> Dict[str, Any]:
|
|
@@ -120,11 +121,9 @@ class SQLCommonConfig(
|
|
|
120
121
|
self.profiling.operation_config
|
|
121
122
|
)
|
|
122
123
|
|
|
123
|
-
@
|
|
124
|
-
def ensure_profiling_pattern_is_passed_to_profiling(
|
|
125
|
-
|
|
126
|
-
) -> Dict[str, Any]:
|
|
127
|
-
profiling: Optional[GEProfilingConfig] = values.get("profiling")
|
|
124
|
+
@model_validator(mode="after")
|
|
125
|
+
def ensure_profiling_pattern_is_passed_to_profiling(self):
|
|
126
|
+
profiling = self.profiling
|
|
128
127
|
# Note: isinstance() check is required here as unity-catalog source reuses
|
|
129
128
|
# SQLCommonConfig with different profiling config than GEProfilingConfig
|
|
130
129
|
if (
|
|
@@ -132,8 +131,8 @@ class SQLCommonConfig(
|
|
|
132
131
|
and isinstance(profiling, GEProfilingConfig)
|
|
133
132
|
and profiling.enabled
|
|
134
133
|
):
|
|
135
|
-
profiling._allow_deny_patterns =
|
|
136
|
-
return
|
|
134
|
+
profiling._allow_deny_patterns = self.profile_pattern
|
|
135
|
+
return self
|
|
137
136
|
|
|
138
137
|
@abstractmethod
|
|
139
138
|
def get_sql_alchemy_url(self):
|
|
@@ -860,7 +860,7 @@ ORDER by DataBaseName, TableName;
|
|
|
860
860
|
|
|
861
861
|
@classmethod
|
|
862
862
|
def create(cls, config_dict, ctx):
|
|
863
|
-
config = TeradataConfig.
|
|
863
|
+
config = TeradataConfig.model_validate(config_dict)
|
|
864
864
|
return cls(config, ctx)
|
|
865
865
|
|
|
866
866
|
def _init_schema_resolver(self) -> SchemaResolver:
|
|
@@ -413,7 +413,7 @@ class TrinoSource(SQLAlchemySource):
|
|
|
413
413
|
|
|
414
414
|
@classmethod
|
|
415
415
|
def create(cls, config_dict, ctx):
|
|
416
|
-
config = TrinoConfig.
|
|
416
|
+
config = TrinoConfig.model_validate(config_dict)
|
|
417
417
|
return cls(config, ctx)
|
|
418
418
|
|
|
419
419
|
def get_schema_fields_for_column(
|
|
@@ -5,7 +5,7 @@ from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Set, Tupl
|
|
|
5
5
|
|
|
6
6
|
import pydantic
|
|
7
7
|
import pytest
|
|
8
|
-
from pydantic import
|
|
8
|
+
from pydantic import field_validator
|
|
9
9
|
from vertica_sqlalchemy_dialect.base import VerticaInspector
|
|
10
10
|
|
|
11
11
|
from datahub.configuration.common import AllowDenyPattern
|
|
@@ -105,8 +105,9 @@ class VerticaConfig(BasicSQLAlchemyConfig):
|
|
|
105
105
|
# defaults
|
|
106
106
|
scheme: str = pydantic.Field(default="vertica+vertica_python")
|
|
107
107
|
|
|
108
|
-
@
|
|
109
|
-
|
|
108
|
+
@field_validator("host_port", mode="after")
|
|
109
|
+
@classmethod
|
|
110
|
+
def clean_host_port(cls, v: str) -> str:
|
|
110
111
|
return config_clean.remove_protocol(v)
|
|
111
112
|
|
|
112
113
|
|
|
@@ -138,7 +139,7 @@ class VerticaSource(SQLAlchemySource):
|
|
|
138
139
|
|
|
139
140
|
@classmethod
|
|
140
141
|
def create(cls, config_dict: Dict, ctx: PipelineContext) -> "VerticaSource":
|
|
141
|
-
config = VerticaConfig.
|
|
142
|
+
config = VerticaConfig.model_validate(config_dict)
|
|
142
143
|
return cls(config, ctx)
|
|
143
144
|
|
|
144
145
|
def get_workunits_internal(self) -> Iterable[Union[MetadataWorkUnit, SqlWorkUnit]]:
|
|
@@ -5,10 +5,10 @@ import re
|
|
|
5
5
|
from dataclasses import dataclass, field
|
|
6
6
|
from datetime import datetime
|
|
7
7
|
from functools import partial
|
|
8
|
-
from typing import ClassVar, Iterable, List, Optional, Union, cast
|
|
8
|
+
from typing import Any, ClassVar, Iterable, List, Optional, Union, cast
|
|
9
9
|
|
|
10
10
|
import smart_open
|
|
11
|
-
from pydantic import BaseModel, Field,
|
|
11
|
+
from pydantic import BaseModel, Field, field_validator
|
|
12
12
|
|
|
13
13
|
from datahub.configuration.common import HiddenFromDocs
|
|
14
14
|
from datahub.configuration.datetimes import parse_user_datetime
|
|
@@ -450,19 +450,22 @@ class QueryEntry(BaseModel):
|
|
|
450
450
|
class Config:
|
|
451
451
|
arbitrary_types_allowed = True
|
|
452
452
|
|
|
453
|
-
@
|
|
454
|
-
|
|
453
|
+
@field_validator("timestamp", mode="before")
|
|
454
|
+
@classmethod
|
|
455
|
+
def parse_timestamp(cls, v: Any) -> Any:
|
|
455
456
|
return None if v is None else parse_user_datetime(str(v))
|
|
456
457
|
|
|
457
|
-
@
|
|
458
|
-
|
|
458
|
+
@field_validator("user", mode="before")
|
|
459
|
+
@classmethod
|
|
460
|
+
def parse_user(cls, v: Any) -> Any:
|
|
459
461
|
if v is None:
|
|
460
462
|
return None
|
|
461
463
|
|
|
462
464
|
return v if isinstance(v, CorpUserUrn) else CorpUserUrn(v)
|
|
463
465
|
|
|
464
|
-
@
|
|
465
|
-
|
|
466
|
+
@field_validator("downstream_tables", "upstream_tables", mode="before")
|
|
467
|
+
@classmethod
|
|
468
|
+
def parse_tables(cls, v: Any) -> Any:
|
|
466
469
|
if not v:
|
|
467
470
|
return []
|
|
468
471
|
|
|
@@ -163,7 +163,7 @@ class Checkpoint(Generic[StateType]):
|
|
|
163
163
|
)
|
|
164
164
|
state_as_dict["version"] = checkpoint_aspect.state.formatVersion
|
|
165
165
|
state_as_dict["serde"] = checkpoint_aspect.state.serde
|
|
166
|
-
return state_class.
|
|
166
|
+
return state_class.model_validate(state_as_dict)
|
|
167
167
|
|
|
168
168
|
@staticmethod
|
|
169
169
|
def _from_base85_json_bytes(
|
|
@@ -179,7 +179,7 @@ class Checkpoint(Generic[StateType]):
|
|
|
179
179
|
state_as_dict = json.loads(state_uncompressed.decode("utf-8"))
|
|
180
180
|
state_as_dict["version"] = checkpoint_aspect.state.formatVersion
|
|
181
181
|
state_as_dict["serde"] = checkpoint_aspect.state.serde
|
|
182
|
-
return state_class.
|
|
182
|
+
return state_class.model_validate(state_as_dict)
|
|
183
183
|
|
|
184
184
|
def to_checkpoint_aspect(
|
|
185
185
|
self, max_allowed_state_size: int
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Tuple, Type
|
|
2
2
|
|
|
3
3
|
import pydantic
|
|
4
|
+
from pydantic import model_validator
|
|
4
5
|
|
|
5
6
|
from datahub.emitter.mce_builder import make_assertion_urn, make_container_urn
|
|
6
7
|
from datahub.ingestion.source.state.checkpoint import CheckpointStateBase
|
|
@@ -59,7 +60,7 @@ def pydantic_state_migrator(mapping: Dict[str, str]) -> "V1RootValidator":
|
|
|
59
60
|
|
|
60
61
|
return values
|
|
61
62
|
|
|
62
|
-
return
|
|
63
|
+
return model_validator(mode="before")(_validate_field_rename)
|
|
63
64
|
|
|
64
65
|
|
|
65
66
|
class GenericCheckpointState(CheckpointStateBase):
|