acryl-datahub 1.3.0.1rc9__py3-none-any.whl → 1.3.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.3.0.1rc9.dist-info → acryl_datahub-1.3.1.1.dist-info}/METADATA +2550 -2543
- {acryl_datahub-1.3.0.1rc9.dist-info → acryl_datahub-1.3.1.1.dist-info}/RECORD +263 -261
- datahub/_version.py +1 -1
- datahub/api/entities/common/serialized_value.py +2 -2
- datahub/api/entities/corpgroup/corpgroup.py +11 -6
- datahub/api/entities/corpuser/corpuser.py +11 -11
- datahub/api/entities/dataproduct/dataproduct.py +47 -27
- datahub/api/entities/dataset/dataset.py +32 -21
- datahub/api/entities/external/lake_formation_external_entites.py +5 -6
- datahub/api/entities/external/unity_catalog_external_entites.py +5 -7
- datahub/api/entities/forms/forms.py +16 -14
- datahub/api/entities/structuredproperties/structuredproperties.py +23 -16
- datahub/cli/check_cli.py +2 -2
- datahub/cli/config_utils.py +3 -3
- datahub/cli/lite_cli.py +9 -7
- datahub/cli/migrate.py +4 -4
- datahub/cli/quickstart_versioning.py +3 -3
- datahub/cli/specific/group_cli.py +1 -1
- datahub/cli/specific/structuredproperties_cli.py +1 -1
- datahub/cli/specific/user_cli.py +1 -1
- datahub/configuration/common.py +14 -2
- datahub/configuration/connection_resolver.py +2 -2
- datahub/configuration/git.py +47 -30
- datahub/configuration/import_resolver.py +2 -2
- datahub/configuration/kafka.py +4 -3
- datahub/configuration/time_window_config.py +26 -26
- datahub/configuration/validate_field_deprecation.py +2 -2
- datahub/configuration/validate_field_removal.py +2 -2
- datahub/configuration/validate_field_rename.py +2 -2
- datahub/configuration/validate_multiline_string.py +2 -1
- datahub/emitter/kafka_emitter.py +3 -1
- datahub/emitter/rest_emitter.py +2 -4
- datahub/ingestion/api/decorators.py +1 -1
- datahub/ingestion/api/report.py +1 -1
- datahub/ingestion/api/sink.py +1 -1
- datahub/ingestion/api/source.py +1 -1
- datahub/ingestion/glossary/datahub_classifier.py +11 -8
- datahub/ingestion/graph/client.py +5 -1
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +1 -1
- datahub/ingestion/reporting/file_reporter.py +5 -4
- datahub/ingestion/run/pipeline.py +7 -6
- datahub/ingestion/run/pipeline_config.py +12 -14
- datahub/ingestion/run/sink_callback.py +1 -1
- datahub/ingestion/sink/datahub_rest.py +6 -4
- datahub/ingestion/source/abs/config.py +19 -19
- datahub/ingestion/source/abs/datalake_profiler_config.py +11 -13
- datahub/ingestion/source/abs/source.py +2 -2
- datahub/ingestion/source/aws/aws_common.py +1 -1
- datahub/ingestion/source/aws/glue.py +6 -4
- datahub/ingestion/source/aws/sagemaker.py +1 -1
- datahub/ingestion/source/azure/azure_common.py +8 -12
- datahub/ingestion/source/bigquery_v2/bigquery.py +1 -1
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +43 -30
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +1 -1
- datahub/ingestion/source/cassandra/cassandra.py +1 -1
- datahub/ingestion/source/common/gcp_credentials_config.py +10 -10
- datahub/ingestion/source/data_lake_common/path_spec.py +85 -89
- datahub/ingestion/source/datahub/config.py +8 -8
- datahub/ingestion/source/datahub/datahub_source.py +1 -1
- datahub/ingestion/source/dbt/dbt_cloud.py +9 -3
- datahub/ingestion/source/dbt/dbt_common.py +39 -37
- datahub/ingestion/source/dbt/dbt_core.py +10 -12
- datahub/ingestion/source/debug/datahub_debug.py +1 -1
- datahub/ingestion/source/delta_lake/config.py +6 -4
- datahub/ingestion/source/dremio/dremio_api.py +212 -78
- datahub/ingestion/source/dremio/dremio_config.py +10 -6
- datahub/ingestion/source/dremio/dremio_entities.py +55 -39
- datahub/ingestion/source/dremio/dremio_profiling.py +14 -3
- datahub/ingestion/source/dremio/dremio_source.py +24 -26
- datahub/ingestion/source/dynamodb/dynamodb.py +1 -1
- datahub/ingestion/source/elastic_search.py +110 -32
- datahub/ingestion/source/excel/source.py +1 -1
- datahub/ingestion/source/feast.py +1 -1
- datahub/ingestion/source/file.py +5 -4
- datahub/ingestion/source/fivetran/config.py +17 -16
- datahub/ingestion/source/fivetran/fivetran.py +2 -2
- datahub/ingestion/source/gc/datahub_gc.py +1 -1
- datahub/ingestion/source/gcs/gcs_source.py +8 -10
- datahub/ingestion/source/ge_profiling_config.py +8 -5
- datahub/ingestion/source/grafana/grafana_api.py +2 -2
- datahub/ingestion/source/grafana/grafana_config.py +4 -3
- datahub/ingestion/source/grafana/grafana_source.py +1 -1
- datahub/ingestion/source/grafana/models.py +23 -5
- datahub/ingestion/source/hex/api.py +7 -5
- datahub/ingestion/source/hex/hex.py +4 -3
- datahub/ingestion/source/iceberg/iceberg.py +1 -1
- datahub/ingestion/source/iceberg/iceberg_common.py +5 -3
- datahub/ingestion/source/identity/azure_ad.py +1 -1
- datahub/ingestion/source/identity/okta.py +10 -10
- datahub/ingestion/source/kafka/kafka.py +1 -1
- datahub/ingestion/source/ldap.py +1 -1
- datahub/ingestion/source/looker/looker_common.py +7 -5
- datahub/ingestion/source/looker/looker_config.py +21 -20
- datahub/ingestion/source/looker/lookml_config.py +47 -47
- datahub/ingestion/source/metabase.py +8 -8
- datahub/ingestion/source/metadata/business_glossary.py +2 -2
- datahub/ingestion/source/metadata/lineage.py +13 -8
- datahub/ingestion/source/mlflow.py +1 -1
- datahub/ingestion/source/mode.py +6 -4
- datahub/ingestion/source/mongodb.py +4 -3
- datahub/ingestion/source/neo4j/neo4j_source.py +1 -1
- datahub/ingestion/source/nifi.py +17 -23
- datahub/ingestion/source/openapi.py +6 -8
- datahub/ingestion/source/powerbi/config.py +33 -32
- datahub/ingestion/source/powerbi/dataplatform_instance_resolver.py +2 -2
- datahub/ingestion/source/powerbi/powerbi.py +1 -1
- datahub/ingestion/source/powerbi_report_server/report_server.py +2 -2
- datahub/ingestion/source/powerbi_report_server/report_server_domain.py +8 -6
- datahub/ingestion/source/preset.py +8 -8
- datahub/ingestion/source/pulsar.py +1 -1
- datahub/ingestion/source/qlik_sense/data_classes.py +15 -8
- datahub/ingestion/source/qlik_sense/qlik_api.py +7 -7
- datahub/ingestion/source/qlik_sense/qlik_sense.py +1 -1
- datahub/ingestion/source/redshift/config.py +18 -20
- datahub/ingestion/source/redshift/redshift.py +2 -2
- datahub/ingestion/source/redshift/usage.py +23 -3
- datahub/ingestion/source/s3/config.py +83 -62
- datahub/ingestion/source/s3/datalake_profiler_config.py +11 -13
- datahub/ingestion/source/s3/source.py +8 -5
- datahub/ingestion/source/sac/sac.py +5 -4
- datahub/ingestion/source/salesforce.py +3 -2
- datahub/ingestion/source/schema/json_schema.py +2 -2
- datahub/ingestion/source/sigma/data_classes.py +3 -2
- datahub/ingestion/source/sigma/sigma.py +1 -1
- datahub/ingestion/source/sigma/sigma_api.py +7 -7
- datahub/ingestion/source/slack/slack.py +1 -1
- datahub/ingestion/source/snaplogic/snaplogic.py +1 -1
- datahub/ingestion/source/snowflake/snowflake_assertion.py +1 -1
- datahub/ingestion/source/snowflake/snowflake_config.py +35 -31
- datahub/ingestion/source/snowflake/snowflake_connection.py +35 -13
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +3 -3
- datahub/ingestion/source/snowflake/snowflake_queries.py +28 -4
- datahub/ingestion/source/sql/athena.py +1 -1
- datahub/ingestion/source/sql/clickhouse.py +4 -2
- datahub/ingestion/source/sql/cockroachdb.py +1 -1
- datahub/ingestion/source/sql/druid.py +1 -1
- datahub/ingestion/source/sql/hana.py +1 -1
- datahub/ingestion/source/sql/hive.py +7 -5
- datahub/ingestion/source/sql/hive_metastore.py +1 -1
- datahub/ingestion/source/sql/mssql/source.py +13 -6
- datahub/ingestion/source/sql/mysql.py +1 -1
- datahub/ingestion/source/sql/oracle.py +17 -10
- datahub/ingestion/source/sql/postgres.py +2 -2
- datahub/ingestion/source/sql/presto.py +1 -1
- datahub/ingestion/source/sql/sql_config.py +8 -9
- datahub/ingestion/source/sql/sql_generic.py +1 -1
- datahub/ingestion/source/sql/teradata.py +1 -1
- datahub/ingestion/source/sql/trino.py +1 -1
- datahub/ingestion/source/sql/vertica.py +5 -4
- datahub/ingestion/source/sql_queries.py +174 -22
- datahub/ingestion/source/state/checkpoint.py +2 -2
- datahub/ingestion/source/state/entity_removal_state.py +2 -1
- datahub/ingestion/source/state/stateful_ingestion_base.py +55 -45
- datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +1 -1
- datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py +1 -1
- datahub/ingestion/source/superset.py +9 -9
- datahub/ingestion/source/tableau/tableau.py +14 -16
- datahub/ingestion/source/unity/azure_auth_config.py +15 -0
- datahub/ingestion/source/unity/config.py +51 -34
- datahub/ingestion/source/unity/connection.py +7 -1
- datahub/ingestion/source/unity/connection_test.py +1 -1
- datahub/ingestion/source/unity/proxy.py +216 -7
- datahub/ingestion/source/unity/proxy_types.py +91 -0
- datahub/ingestion/source/unity/source.py +29 -3
- datahub/ingestion/source/usage/clickhouse_usage.py +1 -1
- datahub/ingestion/source/usage/starburst_trino_usage.py +1 -1
- datahub/ingestion/source/usage/usage_common.py +5 -3
- datahub/ingestion/source_config/csv_enricher.py +7 -6
- datahub/ingestion/source_config/operation_config.py +7 -4
- datahub/ingestion/source_config/pulsar.py +11 -15
- datahub/ingestion/transformer/add_dataset_browse_path.py +1 -1
- datahub/ingestion/transformer/add_dataset_dataproduct.py +6 -5
- datahub/ingestion/transformer/add_dataset_ownership.py +3 -3
- datahub/ingestion/transformer/add_dataset_properties.py +2 -2
- datahub/ingestion/transformer/add_dataset_schema_tags.py +2 -2
- datahub/ingestion/transformer/add_dataset_schema_terms.py +2 -2
- datahub/ingestion/transformer/add_dataset_tags.py +3 -3
- datahub/ingestion/transformer/add_dataset_terms.py +3 -3
- datahub/ingestion/transformer/dataset_domain.py +3 -3
- datahub/ingestion/transformer/dataset_domain_based_on_tags.py +1 -1
- datahub/ingestion/transformer/extract_dataset_tags.py +1 -1
- datahub/ingestion/transformer/extract_ownership_from_tags.py +1 -1
- datahub/ingestion/transformer/mark_dataset_status.py +1 -1
- datahub/ingestion/transformer/pattern_cleanup_dataset_usage_user.py +1 -1
- datahub/ingestion/transformer/pattern_cleanup_ownership.py +1 -1
- datahub/ingestion/transformer/remove_dataset_ownership.py +1 -1
- datahub/ingestion/transformer/replace_external_url.py +2 -2
- datahub/ingestion/transformer/set_browse_path.py +1 -1
- datahub/ingestion/transformer/tags_to_terms.py +1 -1
- datahub/lite/duckdb_lite.py +1 -1
- datahub/lite/lite_util.py +2 -2
- datahub/metadata/_internal_schema_classes.py +62 -2
- datahub/metadata/com/linkedin/pegasus2avro/assertion/__init__.py +2 -0
- datahub/metadata/schema.avsc +271 -91
- datahub/metadata/schemas/ApplicationProperties.avsc +5 -2
- datahub/metadata/schemas/AssertionInfo.avsc +48 -5
- datahub/metadata/schemas/BusinessAttributeInfo.avsc +8 -4
- datahub/metadata/schemas/ChartInfo.avsc +12 -5
- datahub/metadata/schemas/ContainerProperties.avsc +12 -5
- datahub/metadata/schemas/CorpGroupEditableInfo.avsc +2 -1
- datahub/metadata/schemas/CorpGroupInfo.avsc +7 -3
- datahub/metadata/schemas/CorpUserInfo.avsc +5 -2
- datahub/metadata/schemas/CorpUserSettings.avsc +4 -2
- datahub/metadata/schemas/DashboardInfo.avsc +16 -4
- datahub/metadata/schemas/DataFlowInfo.avsc +11 -5
- datahub/metadata/schemas/DataHubPageModuleProperties.avsc +4 -2
- datahub/metadata/schemas/DataJobInfo.avsc +9 -4
- datahub/metadata/schemas/DataPlatformInfo.avsc +3 -1
- datahub/metadata/schemas/DataPlatformInstanceProperties.avsc +5 -2
- datahub/metadata/schemas/DataProductProperties.avsc +5 -2
- datahub/metadata/schemas/DataTypeInfo.avsc +5 -0
- datahub/metadata/schemas/DatasetKey.avsc +2 -1
- datahub/metadata/schemas/DatasetProperties.avsc +12 -5
- datahub/metadata/schemas/DomainProperties.avsc +7 -3
- datahub/metadata/schemas/EditableContainerProperties.avsc +2 -1
- datahub/metadata/schemas/EditableDashboardProperties.avsc +2 -1
- datahub/metadata/schemas/EditableDataFlowProperties.avsc +2 -1
- datahub/metadata/schemas/EditableDataJobProperties.avsc +2 -1
- datahub/metadata/schemas/EditableDatasetProperties.avsc +2 -1
- datahub/metadata/schemas/EditableERModelRelationshipProperties.avsc +2 -1
- datahub/metadata/schemas/EditableMLFeatureProperties.avsc +2 -1
- datahub/metadata/schemas/EditableMLFeatureTableProperties.avsc +2 -1
- datahub/metadata/schemas/EditableMLModelGroupProperties.avsc +2 -1
- datahub/metadata/schemas/EditableMLModelProperties.avsc +2 -1
- datahub/metadata/schemas/EditableNotebookProperties.avsc +2 -1
- datahub/metadata/schemas/EditableSchemaMetadata.avsc +5 -3
- datahub/metadata/schemas/EntityTypeInfo.avsc +5 -0
- datahub/metadata/schemas/GlobalTags.avsc +3 -2
- datahub/metadata/schemas/GlossaryNodeInfo.avsc +3 -1
- datahub/metadata/schemas/GlossaryTermInfo.avsc +3 -1
- datahub/metadata/schemas/InputFields.avsc +3 -2
- datahub/metadata/schemas/MLFeatureKey.avsc +3 -1
- datahub/metadata/schemas/MLFeatureTableKey.avsc +3 -1
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +3 -1
- datahub/metadata/schemas/MLModelGroupKey.avsc +3 -1
- datahub/metadata/schemas/MLModelKey.avsc +3 -1
- datahub/metadata/schemas/MLModelProperties.avsc +4 -2
- datahub/metadata/schemas/MLPrimaryKeyKey.avsc +3 -1
- datahub/metadata/schemas/MetadataChangeEvent.avsc +124 -50
- datahub/metadata/schemas/NotebookInfo.avsc +5 -2
- datahub/metadata/schemas/Ownership.avsc +3 -2
- datahub/metadata/schemas/QuerySubjects.avsc +1 -1
- datahub/metadata/schemas/RoleProperties.avsc +3 -1
- datahub/metadata/schemas/SchemaFieldInfo.avsc +3 -1
- datahub/metadata/schemas/SchemaMetadata.avsc +3 -2
- datahub/metadata/schemas/StructuredPropertyDefinition.avsc +15 -4
- datahub/metadata/schemas/TagProperties.avsc +3 -1
- datahub/metadata/schemas/TestInfo.avsc +2 -1
- datahub/sdk/__init__.py +1 -0
- datahub/sdk/_all_entities.py +2 -0
- datahub/sdk/search_filters.py +68 -40
- datahub/sdk/tag.py +112 -0
- datahub/secret/datahub_secret_store.py +7 -4
- datahub/secret/file_secret_store.py +1 -1
- datahub/sql_parsing/schema_resolver.py +29 -0
- datahub/sql_parsing/sql_parsing_aggregator.py +15 -0
- datahub/sql_parsing/sqlglot_lineage.py +5 -2
- datahub/testing/check_sql_parser_result.py +2 -2
- datahub/utilities/ingest_utils.py +1 -1
- {acryl_datahub-1.3.0.1rc9.dist-info → acryl_datahub-1.3.1.1.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.3.0.1rc9.dist-info → acryl_datahub-1.3.1.1.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.3.0.1rc9.dist-info → acryl_datahub-1.3.1.1.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.3.0.1rc9.dist-info → acryl_datahub-1.3.1.1.dist-info}/top_level.txt +0 -0
|
@@ -6,7 +6,7 @@ from typing import Dict, Iterable, List, Optional, Tuple, Type, Union, ValuesVie
|
|
|
6
6
|
import bson.timestamp
|
|
7
7
|
import pymongo.collection
|
|
8
8
|
from packaging import version
|
|
9
|
-
from pydantic import PositiveInt,
|
|
9
|
+
from pydantic import PositiveInt, field_validator
|
|
10
10
|
from pydantic.fields import Field
|
|
11
11
|
from pymongo.mongo_client import MongoClient
|
|
12
12
|
|
|
@@ -138,7 +138,8 @@ class MongoDBConfig(
|
|
|
138
138
|
# Custom Stateful Ingestion settings
|
|
139
139
|
stateful_ingestion: Optional[StatefulStaleMetadataRemovalConfig] = None
|
|
140
140
|
|
|
141
|
-
@
|
|
141
|
+
@field_validator("maxDocumentSize", mode="after")
|
|
142
|
+
@classmethod
|
|
142
143
|
def check_max_doc_size_filter_is_valid(cls, doc_size_filter_value):
|
|
143
144
|
if doc_size_filter_value > 16793600:
|
|
144
145
|
raise ValueError("maxDocumentSize must be a positive value <= 16793600.")
|
|
@@ -311,7 +312,7 @@ class MongoDBSource(StatefulIngestionSourceBase):
|
|
|
311
312
|
|
|
312
313
|
@classmethod
|
|
313
314
|
def create(cls, config_dict: dict, ctx: PipelineContext) -> "MongoDBSource":
|
|
314
|
-
config = MongoDBConfig.
|
|
315
|
+
config = MongoDBConfig.model_validate(config_dict)
|
|
315
316
|
return cls(ctx, config)
|
|
316
317
|
|
|
317
318
|
def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
|
|
@@ -78,7 +78,7 @@ class Neo4jSource(StatefulIngestionSourceBase):
|
|
|
78
78
|
|
|
79
79
|
@classmethod
|
|
80
80
|
def create(cls, config_dict: Dict, ctx: PipelineContext) -> "Neo4jSource":
|
|
81
|
-
config = Neo4jConfig.
|
|
81
|
+
config = Neo4jConfig.model_validate(config_dict)
|
|
82
82
|
return cls(config, ctx)
|
|
83
83
|
|
|
84
84
|
def create_schema_field_tuple(
|
datahub/ingestion/source/nifi.py
CHANGED
|
@@ -13,7 +13,7 @@ import requests
|
|
|
13
13
|
from cached_property import cached_property
|
|
14
14
|
from dateutil import parser
|
|
15
15
|
from packaging import version
|
|
16
|
-
from pydantic import
|
|
16
|
+
from pydantic import field_validator, model_validator
|
|
17
17
|
from pydantic.fields import Field
|
|
18
18
|
from requests import Response
|
|
19
19
|
from requests.adapters import HTTPAdapter
|
|
@@ -165,39 +165,33 @@ class NifiSourceConfig(StatefulIngestionConfigBase, EnvConfigMixin):
|
|
|
165
165
|
" When disabled, re-states lineage on each run.",
|
|
166
166
|
)
|
|
167
167
|
|
|
168
|
-
@
|
|
169
|
-
def validate_auth_params(
|
|
170
|
-
if
|
|
171
|
-
"client_cert_file"
|
|
172
|
-
):
|
|
168
|
+
@model_validator(mode="after")
|
|
169
|
+
def validate_auth_params(self) -> "NifiSourceConfig":
|
|
170
|
+
if self.auth is NifiAuthType.CLIENT_CERT and not self.client_cert_file:
|
|
173
171
|
raise ValueError(
|
|
174
172
|
"Config `client_cert_file` is required for CLIENT_CERT auth"
|
|
175
173
|
)
|
|
176
|
-
elif
|
|
174
|
+
elif self.auth in (
|
|
177
175
|
NifiAuthType.SINGLE_USER,
|
|
178
176
|
NifiAuthType.BASIC_AUTH,
|
|
179
|
-
) and (not
|
|
177
|
+
) and (not self.username or not self.password):
|
|
180
178
|
raise ValueError(
|
|
181
|
-
f"Config `username` and `password` is required for {
|
|
179
|
+
f"Config `username` and `password` is required for {self.auth.value} auth"
|
|
182
180
|
)
|
|
183
|
-
return
|
|
184
|
-
|
|
185
|
-
@root_validator(skip_on_failure=True)
|
|
186
|
-
def validator_site_url_to_site_name(cls, values):
|
|
187
|
-
site_url_to_site_name = values.get("site_url_to_site_name")
|
|
188
|
-
site_url = values.get("site_url")
|
|
189
|
-
site_name = values.get("site_name")
|
|
181
|
+
return self
|
|
190
182
|
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
183
|
+
@model_validator(mode="after")
|
|
184
|
+
def validator_site_url_to_site_name(self) -> "NifiSourceConfig":
|
|
185
|
+
if self.site_url_to_site_name is None:
|
|
186
|
+
self.site_url_to_site_name = {}
|
|
194
187
|
|
|
195
|
-
if site_url not in site_url_to_site_name:
|
|
196
|
-
site_url_to_site_name[site_url] = site_name
|
|
188
|
+
if self.site_url not in self.site_url_to_site_name:
|
|
189
|
+
self.site_url_to_site_name[self.site_url] = self.site_name
|
|
197
190
|
|
|
198
|
-
return
|
|
191
|
+
return self
|
|
199
192
|
|
|
200
|
-
@
|
|
193
|
+
@field_validator("site_url", mode="after")
|
|
194
|
+
@classmethod
|
|
201
195
|
def validator_site_url(cls, site_url: str) -> str:
|
|
202
196
|
assert site_url.startswith(("http://", "https://")), (
|
|
203
197
|
"site_url must start with http:// or https://"
|
|
@@ -4,7 +4,7 @@ import warnings
|
|
|
4
4
|
from abc import ABC
|
|
5
5
|
from typing import Dict, Iterable, List, Optional, Tuple
|
|
6
6
|
|
|
7
|
-
from pydantic import
|
|
7
|
+
from pydantic import model_validator
|
|
8
8
|
from pydantic.fields import Field
|
|
9
9
|
|
|
10
10
|
from datahub.configuration.common import ConfigModel
|
|
@@ -86,13 +86,11 @@ class OpenApiConfig(ConfigModel):
|
|
|
86
86
|
default=True, description="Enable SSL certificate verification"
|
|
87
87
|
)
|
|
88
88
|
|
|
89
|
-
@
|
|
90
|
-
def ensure_only_one_token(
|
|
91
|
-
|
|
92
|
-
) -> Optional[str]:
|
|
93
|
-
if bearer_token is not None and values.get("token") is not None:
|
|
89
|
+
@model_validator(mode="after")
|
|
90
|
+
def ensure_only_one_token(self) -> "OpenApiConfig":
|
|
91
|
+
if self.bearer_token is not None and self.token is not None:
|
|
94
92
|
raise ValueError("Unable to use 'token' and 'bearer_token' together.")
|
|
95
|
-
return
|
|
93
|
+
return self
|
|
96
94
|
|
|
97
95
|
def get_swagger(self) -> Dict:
|
|
98
96
|
if self.get_token or self.token or self.bearer_token is not None:
|
|
@@ -463,5 +461,5 @@ class OpenApiSource(APISource):
|
|
|
463
461
|
|
|
464
462
|
@classmethod
|
|
465
463
|
def create(cls, config_dict, ctx):
|
|
466
|
-
config = OpenApiConfig.
|
|
464
|
+
config = OpenApiConfig.model_validate(config_dict)
|
|
467
465
|
return cls(config, ctx)
|
|
@@ -4,7 +4,7 @@ from enum import Enum
|
|
|
4
4
|
from typing import Dict, List, Literal, Optional, Union
|
|
5
5
|
|
|
6
6
|
import pydantic
|
|
7
|
-
from pydantic import
|
|
7
|
+
from pydantic import field_validator, model_validator
|
|
8
8
|
|
|
9
9
|
import datahub.emitter.mce_builder as builder
|
|
10
10
|
from datahub.configuration.common import AllowDenyPattern, ConfigModel, HiddenFromDocs
|
|
@@ -540,8 +540,8 @@ class PowerBiDashboardSourceConfig(
|
|
|
540
540
|
description="timeout in seconds for Metadata Rest Api.",
|
|
541
541
|
)
|
|
542
542
|
|
|
543
|
-
@
|
|
544
|
-
def validate_extract_column_level_lineage(
|
|
543
|
+
@model_validator(mode="after")
|
|
544
|
+
def validate_extract_column_level_lineage(self) -> "PowerBiDashboardSourceConfig":
|
|
545
545
|
flags = [
|
|
546
546
|
"native_query_parsing",
|
|
547
547
|
"enable_advance_lineage_sql_construct",
|
|
@@ -549,26 +549,23 @@ class PowerBiDashboardSourceConfig(
|
|
|
549
549
|
"extract_dataset_schema",
|
|
550
550
|
]
|
|
551
551
|
|
|
552
|
-
if
|
|
553
|
-
"extract_column_level_lineage" in values
|
|
554
|
-
and values["extract_column_level_lineage"] is False
|
|
555
|
-
):
|
|
552
|
+
if self.extract_column_level_lineage is False:
|
|
556
553
|
# Flag is not set. skip validation
|
|
557
|
-
return
|
|
554
|
+
return self
|
|
558
555
|
|
|
559
556
|
logger.debug(f"Validating additional flags: {flags}")
|
|
560
557
|
|
|
561
558
|
is_flag_enabled: bool = True
|
|
562
559
|
for flag in flags:
|
|
563
|
-
if
|
|
560
|
+
if not getattr(self, flag, True):
|
|
564
561
|
is_flag_enabled = False
|
|
565
562
|
|
|
566
563
|
if not is_flag_enabled:
|
|
567
564
|
raise ValueError(f"Enable all these flags in recipe: {flags} ")
|
|
568
565
|
|
|
569
|
-
return
|
|
566
|
+
return self
|
|
570
567
|
|
|
571
|
-
@
|
|
568
|
+
@field_validator("dataset_type_mapping", mode="after")
|
|
572
569
|
@classmethod
|
|
573
570
|
def map_data_platform(cls, value):
|
|
574
571
|
# For backward compatibility convert input PostgreSql to PostgreSQL
|
|
@@ -580,28 +577,32 @@ class PowerBiDashboardSourceConfig(
|
|
|
580
577
|
|
|
581
578
|
return value
|
|
582
579
|
|
|
583
|
-
@
|
|
584
|
-
def workspace_id_backward_compatibility(
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
580
|
+
@model_validator(mode="after")
|
|
581
|
+
def workspace_id_backward_compatibility(self) -> "PowerBiDashboardSourceConfig":
|
|
582
|
+
if (
|
|
583
|
+
self.workspace_id_pattern == AllowDenyPattern.allow_all()
|
|
584
|
+
and self.workspace_id
|
|
585
|
+
):
|
|
589
586
|
logger.warning(
|
|
590
587
|
"workspace_id_pattern is not set but workspace_id is set, setting workspace_id as "
|
|
591
588
|
"workspace_id_pattern. workspace_id will be deprecated, please use workspace_id_pattern instead."
|
|
592
589
|
)
|
|
593
|
-
|
|
594
|
-
allow=[f"^{workspace_id}$"]
|
|
590
|
+
self.workspace_id_pattern = AllowDenyPattern(
|
|
591
|
+
allow=[f"^{self.workspace_id}$"]
|
|
595
592
|
)
|
|
596
|
-
elif
|
|
593
|
+
elif (
|
|
594
|
+
self.workspace_id_pattern != AllowDenyPattern.allow_all()
|
|
595
|
+
and self.workspace_id
|
|
596
|
+
):
|
|
597
597
|
logger.warning(
|
|
598
598
|
"workspace_id will be ignored in favour of workspace_id_pattern. workspace_id will be deprecated, "
|
|
599
599
|
"please use workspace_id_pattern only."
|
|
600
600
|
)
|
|
601
|
-
|
|
602
|
-
return
|
|
601
|
+
self.workspace_id = None
|
|
602
|
+
return self
|
|
603
603
|
|
|
604
|
-
@
|
|
604
|
+
@model_validator(mode="before")
|
|
605
|
+
@classmethod
|
|
605
606
|
def raise_error_for_dataset_type_mapping(cls, values: Dict) -> Dict:
|
|
606
607
|
if (
|
|
607
608
|
values.get("dataset_type_mapping") is not None
|
|
@@ -613,18 +614,18 @@ class PowerBiDashboardSourceConfig(
|
|
|
613
614
|
|
|
614
615
|
return values
|
|
615
616
|
|
|
616
|
-
@
|
|
617
|
-
def validate_extract_dataset_schema(
|
|
618
|
-
if
|
|
617
|
+
@model_validator(mode="after")
|
|
618
|
+
def validate_extract_dataset_schema(self) -> "PowerBiDashboardSourceConfig":
|
|
619
|
+
if self.extract_dataset_schema is False:
|
|
619
620
|
add_global_warning(
|
|
620
621
|
"Please use `extract_dataset_schema: true`, otherwise dataset schema extraction will be skipped."
|
|
621
622
|
)
|
|
622
|
-
return
|
|
623
|
+
return self
|
|
623
624
|
|
|
624
|
-
@
|
|
625
|
-
def validate_dsn_to_database_schema(
|
|
626
|
-
if
|
|
627
|
-
dsn_mapping =
|
|
625
|
+
@model_validator(mode="after")
|
|
626
|
+
def validate_dsn_to_database_schema(self) -> "PowerBiDashboardSourceConfig":
|
|
627
|
+
if self.dsn_to_database_schema is not None:
|
|
628
|
+
dsn_mapping = self.dsn_to_database_schema
|
|
628
629
|
if not isinstance(dsn_mapping, dict):
|
|
629
630
|
raise ValueError("dsn_to_database_schema must contain key-value pairs")
|
|
630
631
|
|
|
@@ -639,4 +640,4 @@ class PowerBiDashboardSourceConfig(
|
|
|
639
640
|
f"dsn_to_database_schema invalid mapping value: {value}"
|
|
640
641
|
)
|
|
641
642
|
|
|
642
|
-
return
|
|
643
|
+
return self
|
|
@@ -41,7 +41,7 @@ class ResolvePlatformInstanceFromDatasetTypeMapping(
|
|
|
41
41
|
if isinstance(platform, PlatformDetail):
|
|
42
42
|
return platform
|
|
43
43
|
|
|
44
|
-
return PlatformDetail.
|
|
44
|
+
return PlatformDetail.model_validate({})
|
|
45
45
|
|
|
46
46
|
|
|
47
47
|
class ResolvePlatformInstanceFromServerToPlatformInstance(
|
|
@@ -56,7 +56,7 @@ class ResolvePlatformInstanceFromServerToPlatformInstance(
|
|
|
56
56
|
]
|
|
57
57
|
if data_platform_detail.data_platform_server
|
|
58
58
|
in self.config.server_to_platform_instance
|
|
59
|
-
else PlatformDetail.
|
|
59
|
+
else PlatformDetail.model_validate({})
|
|
60
60
|
)
|
|
61
61
|
|
|
62
62
|
|
|
@@ -1316,7 +1316,7 @@ class PowerBiDashboardSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
1316
1316
|
|
|
1317
1317
|
@classmethod
|
|
1318
1318
|
def create(cls, config_dict, ctx):
|
|
1319
|
-
config = PowerBiDashboardSourceConfig.
|
|
1319
|
+
config = PowerBiDashboardSourceConfig.model_validate(config_dict)
|
|
1320
1320
|
return cls(config, ctx)
|
|
1321
1321
|
|
|
1322
1322
|
def get_allowed_workspaces(self) -> List[powerbi_data_classes.Workspace]:
|
|
@@ -213,7 +213,7 @@ class PowerBiReportServerAPI:
|
|
|
213
213
|
|
|
214
214
|
if response_dict.get("value"):
|
|
215
215
|
reports.extend(
|
|
216
|
-
report_types_mapping[report_type].
|
|
216
|
+
report_types_mapping[report_type].model_validate(report)
|
|
217
217
|
for report in response_dict.get("value")
|
|
218
218
|
)
|
|
219
219
|
|
|
@@ -517,7 +517,7 @@ class PowerBiReportServerDashboardSource(StatefulIngestionSourceBase):
|
|
|
517
517
|
|
|
518
518
|
@classmethod
|
|
519
519
|
def create(cls, config_dict, ctx):
|
|
520
|
-
config = PowerBiReportServerDashboardSourceConfig.
|
|
520
|
+
config = PowerBiReportServerDashboardSourceConfig.model_validate(config_dict)
|
|
521
521
|
return cls(config, ctx)
|
|
522
522
|
|
|
523
523
|
def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from datetime import datetime
|
|
2
2
|
from typing import Any, Dict, List, Optional
|
|
3
3
|
|
|
4
|
-
from pydantic import BaseModel, Field,
|
|
4
|
+
from pydantic import BaseModel, Field, model_validator
|
|
5
5
|
|
|
6
6
|
from datahub.ingestion.source.powerbi_report_server.constants import (
|
|
7
7
|
RelationshipDirection,
|
|
@@ -30,11 +30,13 @@ class CatalogItem(BaseModel):
|
|
|
30
30
|
has_data_sources: bool = Field(False, alias="HasDataSources")
|
|
31
31
|
data_sources: Optional[List["DataSource"]] = Field(None, alias="DataSources")
|
|
32
32
|
|
|
33
|
-
@
|
|
34
|
-
def validate_diplay_name(
|
|
35
|
-
if
|
|
36
|
-
|
|
37
|
-
|
|
33
|
+
@model_validator(mode="after")
|
|
34
|
+
def validate_diplay_name(self):
|
|
35
|
+
if self.created_by:
|
|
36
|
+
self.display_name = self.created_by.split("\\")[-1]
|
|
37
|
+
else:
|
|
38
|
+
self.display_name = ""
|
|
39
|
+
return self
|
|
38
40
|
|
|
39
41
|
def get_urn_part(self):
|
|
40
42
|
return f"reports.{self.id}"
|
|
@@ -2,7 +2,7 @@ import logging
|
|
|
2
2
|
from typing import Dict, Optional
|
|
3
3
|
|
|
4
4
|
import requests
|
|
5
|
-
from pydantic import
|
|
5
|
+
from pydantic import field_validator, model_validator
|
|
6
6
|
from pydantic.fields import Field
|
|
7
7
|
|
|
8
8
|
from datahub.emitter.mce_builder import DEFAULT_ENV
|
|
@@ -55,16 +55,16 @@ class PresetConfig(SupersetConfig):
|
|
|
55
55
|
description="Can be used to change mapping for database names in superset to what you have in datahub",
|
|
56
56
|
)
|
|
57
57
|
|
|
58
|
-
@
|
|
58
|
+
@field_validator("connect_uri", "display_uri", mode="after")
|
|
59
|
+
@classmethod
|
|
59
60
|
def remove_trailing_slash(cls, v):
|
|
60
61
|
return config_clean.remove_trailing_slashes(v)
|
|
61
62
|
|
|
62
|
-
@
|
|
63
|
-
def default_display_uri_to_connect_uri(
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
return values
|
|
63
|
+
@model_validator(mode="after")
|
|
64
|
+
def default_display_uri_to_connect_uri(self) -> "PresetConfig":
|
|
65
|
+
if self.display_uri is None:
|
|
66
|
+
self.display_uri = self.connect_uri
|
|
67
|
+
return self
|
|
68
68
|
|
|
69
69
|
|
|
70
70
|
@platform_name("Preset")
|
|
@@ -235,7 +235,7 @@ class PulsarSource(StatefulIngestionSourceBase):
|
|
|
235
235
|
|
|
236
236
|
@classmethod
|
|
237
237
|
def create(cls, config_dict, ctx):
|
|
238
|
-
config = PulsarSourceConfig.
|
|
238
|
+
config = PulsarSourceConfig.model_validate(config_dict)
|
|
239
239
|
|
|
240
240
|
# Do not include each individual partition for partitioned topics,
|
|
241
241
|
if config.exclude_individual_partitions:
|
|
@@ -3,7 +3,7 @@ from datetime import datetime
|
|
|
3
3
|
from enum import Enum
|
|
4
4
|
from typing import Dict, List, Optional, Type, Union
|
|
5
5
|
|
|
6
|
-
from pydantic import BaseModel, ConfigDict, Field,
|
|
6
|
+
from pydantic import BaseModel, ConfigDict, Field, model_validator
|
|
7
7
|
|
|
8
8
|
from datahub.emitter.mcp_builder import ContainerKey
|
|
9
9
|
from datahub.ingestion.source.qlik_sense.config import QLIK_DATETIME_FORMAT, Constant
|
|
@@ -92,7 +92,8 @@ class Space(_QlikBaseModel):
|
|
|
92
92
|
updatedAt: datetime
|
|
93
93
|
ownerId: Optional[str] = None
|
|
94
94
|
|
|
95
|
-
@
|
|
95
|
+
@model_validator(mode="before")
|
|
96
|
+
@classmethod
|
|
96
97
|
def update_values(cls, values: Dict) -> Dict:
|
|
97
98
|
# Create a copy to avoid modifying the input dictionary, preventing state contamination in tests
|
|
98
99
|
values = deepcopy(values)
|
|
@@ -121,7 +122,8 @@ class SchemaField(_QlikBaseModel):
|
|
|
121
122
|
primaryKey: Optional[bool] = None
|
|
122
123
|
nullable: Optional[bool] = None
|
|
123
124
|
|
|
124
|
-
@
|
|
125
|
+
@model_validator(mode="before")
|
|
126
|
+
@classmethod
|
|
125
127
|
def update_values(cls, values: Dict) -> Dict:
|
|
126
128
|
# Create a copy to avoid modifying the input dictionary, preventing state contamination in tests
|
|
127
129
|
values = deepcopy(values)
|
|
@@ -138,7 +140,8 @@ class QlikDataset(Item):
|
|
|
138
140
|
itemId: str
|
|
139
141
|
datasetSchema: List[SchemaField]
|
|
140
142
|
|
|
141
|
-
@
|
|
143
|
+
@model_validator(mode="before")
|
|
144
|
+
@classmethod
|
|
142
145
|
def update_values(cls, values: Dict) -> Dict:
|
|
143
146
|
# Create a copy to avoid modifying the input dictionary, preventing state contamination in tests
|
|
144
147
|
values = deepcopy(values)
|
|
@@ -174,7 +177,8 @@ class Chart(_QlikBaseModel):
|
|
|
174
177
|
qDimension: List[AxisProperty]
|
|
175
178
|
qMeasure: List[AxisProperty]
|
|
176
179
|
|
|
177
|
-
@
|
|
180
|
+
@model_validator(mode="before")
|
|
181
|
+
@classmethod
|
|
178
182
|
def update_values(cls, values: Dict) -> Dict:
|
|
179
183
|
# Create a copy to avoid modifying the input dictionary, preventing state contamination in tests
|
|
180
184
|
values = deepcopy(values)
|
|
@@ -193,7 +197,8 @@ class Sheet(_QlikBaseModel):
|
|
|
193
197
|
updatedAt: datetime
|
|
194
198
|
charts: List[Chart] = []
|
|
195
199
|
|
|
196
|
-
@
|
|
200
|
+
@model_validator(mode="before")
|
|
201
|
+
@classmethod
|
|
197
202
|
def update_values(cls, values: Dict) -> Dict:
|
|
198
203
|
# Create a copy to avoid modifying the input dictionary, preventing state contamination in tests
|
|
199
204
|
values = deepcopy(values)
|
|
@@ -220,7 +225,8 @@ class QlikTable(_QlikBaseModel):
|
|
|
220
225
|
databaseName: Optional[str] = None
|
|
221
226
|
schemaName: Optional[str] = None
|
|
222
227
|
|
|
223
|
-
@
|
|
228
|
+
@model_validator(mode="before")
|
|
229
|
+
@classmethod
|
|
224
230
|
def update_values(cls, values: Dict) -> Dict:
|
|
225
231
|
# Create a copy to avoid modifying the input dictionary, preventing state contamination in tests
|
|
226
232
|
values = deepcopy(values)
|
|
@@ -239,7 +245,8 @@ class App(Item):
|
|
|
239
245
|
sheets: List[Sheet] = []
|
|
240
246
|
tables: List[QlikTable] = []
|
|
241
247
|
|
|
242
|
-
@
|
|
248
|
+
@model_validator(mode="before")
|
|
249
|
+
@classmethod
|
|
243
250
|
def update_values(cls, values: Dict) -> Dict:
|
|
244
251
|
# Create a copy to avoid modifying the input dictionary, preventing state contamination in tests
|
|
245
252
|
values = deepcopy(values)
|
|
@@ -56,7 +56,7 @@ class QlikAPI:
|
|
|
56
56
|
response.raise_for_status()
|
|
57
57
|
response_dict = response.json()
|
|
58
58
|
for space_dict in response_dict[Constant.DATA]:
|
|
59
|
-
space = Space.
|
|
59
|
+
space = Space.model_validate(space_dict)
|
|
60
60
|
spaces.append(space)
|
|
61
61
|
self.spaces[space.id] = space.name
|
|
62
62
|
if Constant.NEXT in response_dict[Constant.LINKS]:
|
|
@@ -64,7 +64,7 @@ class QlikAPI:
|
|
|
64
64
|
else:
|
|
65
65
|
break
|
|
66
66
|
# Add personal space entity
|
|
67
|
-
spaces.append(Space.
|
|
67
|
+
spaces.append(Space.model_validate(PERSONAL_SPACE_DICT))
|
|
68
68
|
self.spaces[PERSONAL_SPACE_DICT[Constant.ID]] = PERSONAL_SPACE_DICT[
|
|
69
69
|
Constant.NAME
|
|
70
70
|
]
|
|
@@ -78,7 +78,7 @@ class QlikAPI:
|
|
|
78
78
|
response.raise_for_status()
|
|
79
79
|
response_dict = response.json()
|
|
80
80
|
response_dict[Constant.ITEMID] = item_id
|
|
81
|
-
return QlikDataset.
|
|
81
|
+
return QlikDataset.model_validate(response_dict)
|
|
82
82
|
except Exception as e:
|
|
83
83
|
self._log_http_error(
|
|
84
84
|
message=f"Unable to fetch dataset with id {dataset_id}. Exception: {e}"
|
|
@@ -119,7 +119,7 @@ class QlikAPI:
|
|
|
119
119
|
f"Chart with id {chart_id} of sheet {sheet_id} does not have hypercube. q_layout: {q_layout}"
|
|
120
120
|
)
|
|
121
121
|
return None
|
|
122
|
-
return Chart.
|
|
122
|
+
return Chart.model_validate(q_layout)
|
|
123
123
|
except Exception as e:
|
|
124
124
|
self._log_http_error(
|
|
125
125
|
message=f"Unable to fetch chart {chart_id} of sheet {sheet_id}. Exception: {e}"
|
|
@@ -140,7 +140,7 @@ class QlikAPI:
|
|
|
140
140
|
if Constant.OWNERID not in sheet_dict[Constant.QMETA]:
|
|
141
141
|
# That means sheet is private sheet
|
|
142
142
|
return None
|
|
143
|
-
sheet = Sheet.
|
|
143
|
+
sheet = Sheet.model_validate(sheet_dict[Constant.QMETA])
|
|
144
144
|
if Constant.QCHILDLIST not in sheet_dict:
|
|
145
145
|
logger.warning(
|
|
146
146
|
f"Sheet {sheet.title} with id {sheet_id} does not have any charts. sheet_dict: {sheet_dict}"
|
|
@@ -222,7 +222,7 @@ class QlikAPI:
|
|
|
222
222
|
return []
|
|
223
223
|
response = websocket_connection.websocket_send_request(method="GetLayout")
|
|
224
224
|
for table_dict in response[Constant.QLAYOUT][Constant.TABLES]:
|
|
225
|
-
tables.append(QlikTable.
|
|
225
|
+
tables.append(QlikTable.model_validate(table_dict))
|
|
226
226
|
websocket_connection.handle.pop()
|
|
227
227
|
self._add_qri_of_tables(tables, app_id)
|
|
228
228
|
except Exception as e:
|
|
@@ -270,7 +270,7 @@ class QlikAPI:
|
|
|
270
270
|
response = websocket_connection.websocket_send_request(
|
|
271
271
|
method="GetAppLayout"
|
|
272
272
|
)
|
|
273
|
-
app = App.
|
|
273
|
+
app = App.model_validate(response[Constant.QLAYOUT])
|
|
274
274
|
app.sheets = self._get_app_sheets(websocket_connection, app_id)
|
|
275
275
|
app.tables = self._get_app_used_tables(websocket_connection, app_id)
|
|
276
276
|
websocket_connection.close_websocket()
|
|
@@ -148,7 +148,7 @@ class QlikSenseSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
148
148
|
|
|
149
149
|
@classmethod
|
|
150
150
|
def create(cls, config_dict, ctx):
|
|
151
|
-
config = QlikSourceConfig.
|
|
151
|
+
config = QlikSourceConfig.model_validate(config_dict)
|
|
152
152
|
return cls(config, ctx)
|
|
153
153
|
|
|
154
154
|
def _gen_space_key(self, space_id: str) -> SpaceKey:
|
|
@@ -3,7 +3,7 @@ from copy import deepcopy
|
|
|
3
3
|
from enum import Enum
|
|
4
4
|
from typing import Any, Dict, List, Optional
|
|
5
5
|
|
|
6
|
-
from pydantic import
|
|
6
|
+
from pydantic import model_validator
|
|
7
7
|
from pydantic.fields import Field
|
|
8
8
|
|
|
9
9
|
from datahub.configuration import ConfigModel
|
|
@@ -182,7 +182,8 @@ class RedshiftConfig(
|
|
|
182
182
|
description="Whether to skip EXTERNAL tables.",
|
|
183
183
|
)
|
|
184
184
|
|
|
185
|
-
@
|
|
185
|
+
@model_validator(mode="before")
|
|
186
|
+
@classmethod
|
|
186
187
|
def check_email_is_set_on_usage(cls, values):
|
|
187
188
|
if values.get("include_usage_statistics"):
|
|
188
189
|
assert "email_domain" in values and values["email_domain"], (
|
|
@@ -190,31 +191,28 @@ class RedshiftConfig(
|
|
|
190
191
|
)
|
|
191
192
|
return values
|
|
192
193
|
|
|
193
|
-
@
|
|
194
|
-
def check_database_is_set(
|
|
195
|
-
assert
|
|
196
|
-
return
|
|
197
|
-
|
|
198
|
-
@root_validator(skip_on_failure=True)
|
|
199
|
-
def backward_compatibility_configs_set(cls, values: Dict) -> Dict:
|
|
200
|
-
match_fully_qualified_names = values.get("match_fully_qualified_names")
|
|
201
|
-
|
|
202
|
-
schema_pattern: Optional[AllowDenyPattern] = values.get("schema_pattern")
|
|
194
|
+
@model_validator(mode="after")
|
|
195
|
+
def check_database_is_set(self) -> "RedshiftConfig":
|
|
196
|
+
assert self.database, "database must be set"
|
|
197
|
+
return self
|
|
203
198
|
|
|
199
|
+
@model_validator(mode="after")
|
|
200
|
+
def backward_compatibility_configs_set(self) -> "RedshiftConfig":
|
|
204
201
|
if (
|
|
205
|
-
schema_pattern is not None
|
|
206
|
-
and schema_pattern != AllowDenyPattern.allow_all()
|
|
207
|
-
and match_fully_qualified_names is not None
|
|
208
|
-
and not match_fully_qualified_names
|
|
202
|
+
self.schema_pattern is not None
|
|
203
|
+
and self.schema_pattern != AllowDenyPattern.allow_all()
|
|
204
|
+
and self.match_fully_qualified_names is not None
|
|
205
|
+
and not self.match_fully_qualified_names
|
|
209
206
|
):
|
|
210
207
|
logger.warning(
|
|
211
208
|
"Please update `schema_pattern` to match against fully qualified schema name `<database_name>.<schema_name>` and set config `match_fully_qualified_names : True`."
|
|
212
209
|
"Current default `match_fully_qualified_names: False` is only to maintain backward compatibility. "
|
|
213
210
|
"The config option `match_fully_qualified_names` will be deprecated in future and the default behavior will assume `match_fully_qualified_names: True`."
|
|
214
211
|
)
|
|
215
|
-
return
|
|
212
|
+
return self
|
|
216
213
|
|
|
217
|
-
@
|
|
214
|
+
@model_validator(mode="before")
|
|
215
|
+
@classmethod
|
|
218
216
|
def connection_config_compatibility_set(cls, values: Dict) -> Dict:
|
|
219
217
|
# Create a copy to avoid modifying the input dictionary, preventing state contamination in tests
|
|
220
218
|
values = deepcopy(values)
|
|
@@ -231,8 +229,8 @@ class RedshiftConfig(
|
|
|
231
229
|
if "options" in values and "connect_args" in values["options"]:
|
|
232
230
|
values["extra_client_options"] = values["options"]["connect_args"]
|
|
233
231
|
|
|
234
|
-
if values
|
|
235
|
-
if values
|
|
232
|
+
if values.get("extra_client_options"):
|
|
233
|
+
if values.get("options"):
|
|
236
234
|
values["options"]["connect_args"] = values["extra_client_options"]
|
|
237
235
|
else:
|
|
238
236
|
values["options"] = {"connect_args": values["extra_client_options"]}
|
|
@@ -236,7 +236,7 @@ class RedshiftSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
236
236
|
RedshiftConfig.Config.extra = (
|
|
237
237
|
pydantic.Extra.allow
|
|
238
238
|
) # we are okay with extra fields during this stage
|
|
239
|
-
config = RedshiftConfig.
|
|
239
|
+
config = RedshiftConfig.model_validate(config_dict)
|
|
240
240
|
# source = RedshiftSource(config, report)
|
|
241
241
|
connection: redshift_connector.Connection = (
|
|
242
242
|
RedshiftSource.get_redshift_connection(config)
|
|
@@ -316,7 +316,7 @@ class RedshiftSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
316
316
|
|
|
317
317
|
@classmethod
|
|
318
318
|
def create(cls, config_dict, ctx):
|
|
319
|
-
config = RedshiftConfig.
|
|
319
|
+
config = RedshiftConfig.model_validate(config_dict)
|
|
320
320
|
return cls(config, ctx)
|
|
321
321
|
|
|
322
322
|
@staticmethod
|