acryl-datahub 1.3.1__py3-none-any.whl → 1.3.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1.dist-info}/METADATA +2582 -2582
- {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1.dist-info}/RECORD +203 -201
- datahub/_version.py +1 -1
- datahub/api/entities/common/serialized_value.py +2 -2
- datahub/api/entities/corpgroup/corpgroup.py +11 -6
- datahub/api/entities/corpuser/corpuser.py +11 -11
- datahub/api/entities/dataproduct/dataproduct.py +47 -27
- datahub/api/entities/dataset/dataset.py +32 -21
- datahub/api/entities/external/lake_formation_external_entites.py +5 -6
- datahub/api/entities/external/unity_catalog_external_entites.py +5 -7
- datahub/api/entities/forms/forms.py +16 -14
- datahub/api/entities/structuredproperties/structuredproperties.py +23 -16
- datahub/cli/check_cli.py +2 -2
- datahub/cli/config_utils.py +3 -3
- datahub/cli/lite_cli.py +9 -7
- datahub/cli/migrate.py +4 -4
- datahub/cli/quickstart_versioning.py +3 -3
- datahub/cli/specific/group_cli.py +1 -1
- datahub/cli/specific/structuredproperties_cli.py +1 -1
- datahub/cli/specific/user_cli.py +1 -1
- datahub/configuration/common.py +14 -2
- datahub/configuration/connection_resolver.py +2 -2
- datahub/configuration/git.py +47 -30
- datahub/configuration/import_resolver.py +2 -2
- datahub/configuration/kafka.py +4 -3
- datahub/configuration/time_window_config.py +26 -26
- datahub/configuration/validate_field_deprecation.py +2 -2
- datahub/configuration/validate_field_removal.py +2 -2
- datahub/configuration/validate_field_rename.py +2 -2
- datahub/configuration/validate_multiline_string.py +2 -1
- datahub/emitter/kafka_emitter.py +3 -1
- datahub/emitter/rest_emitter.py +2 -4
- datahub/ingestion/api/decorators.py +1 -1
- datahub/ingestion/api/report.py +1 -1
- datahub/ingestion/api/sink.py +1 -1
- datahub/ingestion/api/source.py +1 -1
- datahub/ingestion/glossary/datahub_classifier.py +11 -8
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +1 -1
- datahub/ingestion/reporting/file_reporter.py +5 -4
- datahub/ingestion/run/pipeline.py +6 -6
- datahub/ingestion/run/pipeline_config.py +12 -14
- datahub/ingestion/run/sink_callback.py +1 -1
- datahub/ingestion/sink/datahub_rest.py +6 -4
- datahub/ingestion/source/abs/config.py +19 -19
- datahub/ingestion/source/abs/datalake_profiler_config.py +11 -13
- datahub/ingestion/source/abs/source.py +2 -2
- datahub/ingestion/source/aws/aws_common.py +1 -1
- datahub/ingestion/source/aws/glue.py +6 -4
- datahub/ingestion/source/aws/sagemaker.py +1 -1
- datahub/ingestion/source/azure/azure_common.py +8 -12
- datahub/ingestion/source/bigquery_v2/bigquery.py +1 -1
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +43 -30
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +1 -1
- datahub/ingestion/source/cassandra/cassandra.py +1 -1
- datahub/ingestion/source/common/gcp_credentials_config.py +10 -10
- datahub/ingestion/source/data_lake_common/path_spec.py +85 -89
- datahub/ingestion/source/datahub/config.py +8 -8
- datahub/ingestion/source/datahub/datahub_source.py +1 -1
- datahub/ingestion/source/dbt/dbt_cloud.py +9 -3
- datahub/ingestion/source/dbt/dbt_common.py +39 -37
- datahub/ingestion/source/dbt/dbt_core.py +10 -12
- datahub/ingestion/source/debug/datahub_debug.py +1 -1
- datahub/ingestion/source/delta_lake/config.py +6 -4
- datahub/ingestion/source/dremio/dremio_config.py +10 -6
- datahub/ingestion/source/dremio/dremio_source.py +15 -15
- datahub/ingestion/source/dynamodb/dynamodb.py +1 -1
- datahub/ingestion/source/elastic_search.py +4 -3
- datahub/ingestion/source/excel/source.py +1 -1
- datahub/ingestion/source/feast.py +1 -1
- datahub/ingestion/source/file.py +5 -4
- datahub/ingestion/source/fivetran/config.py +17 -16
- datahub/ingestion/source/fivetran/fivetran.py +2 -2
- datahub/ingestion/source/gc/datahub_gc.py +1 -1
- datahub/ingestion/source/gcs/gcs_source.py +8 -10
- datahub/ingestion/source/ge_profiling_config.py +8 -5
- datahub/ingestion/source/grafana/grafana_api.py +2 -2
- datahub/ingestion/source/grafana/grafana_config.py +4 -3
- datahub/ingestion/source/grafana/grafana_source.py +1 -1
- datahub/ingestion/source/grafana/models.py +23 -5
- datahub/ingestion/source/hex/api.py +7 -5
- datahub/ingestion/source/hex/hex.py +4 -3
- datahub/ingestion/source/iceberg/iceberg.py +1 -1
- datahub/ingestion/source/iceberg/iceberg_common.py +5 -3
- datahub/ingestion/source/identity/azure_ad.py +1 -1
- datahub/ingestion/source/identity/okta.py +10 -10
- datahub/ingestion/source/kafka/kafka.py +1 -1
- datahub/ingestion/source/ldap.py +1 -1
- datahub/ingestion/source/looker/looker_common.py +7 -5
- datahub/ingestion/source/looker/looker_config.py +21 -20
- datahub/ingestion/source/looker/lookml_config.py +47 -47
- datahub/ingestion/source/metabase.py +8 -8
- datahub/ingestion/source/metadata/business_glossary.py +2 -2
- datahub/ingestion/source/metadata/lineage.py +13 -8
- datahub/ingestion/source/mlflow.py +1 -1
- datahub/ingestion/source/mode.py +6 -4
- datahub/ingestion/source/mongodb.py +4 -3
- datahub/ingestion/source/neo4j/neo4j_source.py +1 -1
- datahub/ingestion/source/nifi.py +17 -23
- datahub/ingestion/source/openapi.py +6 -8
- datahub/ingestion/source/powerbi/config.py +33 -32
- datahub/ingestion/source/powerbi/dataplatform_instance_resolver.py +2 -2
- datahub/ingestion/source/powerbi/powerbi.py +1 -1
- datahub/ingestion/source/powerbi_report_server/report_server.py +2 -2
- datahub/ingestion/source/powerbi_report_server/report_server_domain.py +8 -6
- datahub/ingestion/source/preset.py +8 -8
- datahub/ingestion/source/pulsar.py +1 -1
- datahub/ingestion/source/qlik_sense/data_classes.py +15 -8
- datahub/ingestion/source/qlik_sense/qlik_api.py +7 -7
- datahub/ingestion/source/qlik_sense/qlik_sense.py +1 -1
- datahub/ingestion/source/redshift/config.py +18 -20
- datahub/ingestion/source/redshift/redshift.py +2 -2
- datahub/ingestion/source/redshift/usage.py +23 -3
- datahub/ingestion/source/s3/config.py +83 -62
- datahub/ingestion/source/s3/datalake_profiler_config.py +11 -13
- datahub/ingestion/source/s3/source.py +8 -5
- datahub/ingestion/source/sac/sac.py +5 -4
- datahub/ingestion/source/salesforce.py +3 -2
- datahub/ingestion/source/schema/json_schema.py +2 -2
- datahub/ingestion/source/sigma/data_classes.py +3 -2
- datahub/ingestion/source/sigma/sigma.py +1 -1
- datahub/ingestion/source/sigma/sigma_api.py +7 -7
- datahub/ingestion/source/slack/slack.py +1 -1
- datahub/ingestion/source/snaplogic/snaplogic.py +1 -1
- datahub/ingestion/source/snowflake/snowflake_assertion.py +1 -1
- datahub/ingestion/source/snowflake/snowflake_config.py +35 -31
- datahub/ingestion/source/snowflake/snowflake_connection.py +35 -13
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +3 -3
- datahub/ingestion/source/snowflake/snowflake_queries.py +1 -1
- datahub/ingestion/source/sql/athena.py +1 -1
- datahub/ingestion/source/sql/clickhouse.py +4 -2
- datahub/ingestion/source/sql/cockroachdb.py +1 -1
- datahub/ingestion/source/sql/druid.py +1 -1
- datahub/ingestion/source/sql/hana.py +1 -1
- datahub/ingestion/source/sql/hive.py +7 -5
- datahub/ingestion/source/sql/hive_metastore.py +1 -1
- datahub/ingestion/source/sql/mssql/source.py +13 -6
- datahub/ingestion/source/sql/mysql.py +1 -1
- datahub/ingestion/source/sql/oracle.py +17 -10
- datahub/ingestion/source/sql/postgres.py +2 -2
- datahub/ingestion/source/sql/presto.py +1 -1
- datahub/ingestion/source/sql/sql_config.py +8 -9
- datahub/ingestion/source/sql/sql_generic.py +1 -1
- datahub/ingestion/source/sql/teradata.py +1 -1
- datahub/ingestion/source/sql/trino.py +1 -1
- datahub/ingestion/source/sql/vertica.py +5 -4
- datahub/ingestion/source/sql_queries.py +11 -8
- datahub/ingestion/source/state/checkpoint.py +2 -2
- datahub/ingestion/source/state/entity_removal_state.py +2 -1
- datahub/ingestion/source/state/stateful_ingestion_base.py +55 -45
- datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +1 -1
- datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py +1 -1
- datahub/ingestion/source/superset.py +9 -9
- datahub/ingestion/source/tableau/tableau.py +14 -16
- datahub/ingestion/source/unity/azure_auth_config.py +15 -0
- datahub/ingestion/source/unity/config.py +51 -34
- datahub/ingestion/source/unity/connection.py +7 -1
- datahub/ingestion/source/unity/connection_test.py +1 -1
- datahub/ingestion/source/unity/proxy.py +216 -7
- datahub/ingestion/source/unity/proxy_types.py +91 -0
- datahub/ingestion/source/unity/source.py +29 -3
- datahub/ingestion/source/usage/clickhouse_usage.py +1 -1
- datahub/ingestion/source/usage/starburst_trino_usage.py +1 -1
- datahub/ingestion/source/usage/usage_common.py +5 -3
- datahub/ingestion/source_config/csv_enricher.py +7 -6
- datahub/ingestion/source_config/operation_config.py +7 -4
- datahub/ingestion/source_config/pulsar.py +11 -15
- datahub/ingestion/transformer/add_dataset_browse_path.py +1 -1
- datahub/ingestion/transformer/add_dataset_dataproduct.py +6 -5
- datahub/ingestion/transformer/add_dataset_ownership.py +3 -3
- datahub/ingestion/transformer/add_dataset_properties.py +2 -2
- datahub/ingestion/transformer/add_dataset_schema_tags.py +2 -2
- datahub/ingestion/transformer/add_dataset_schema_terms.py +2 -2
- datahub/ingestion/transformer/add_dataset_tags.py +3 -3
- datahub/ingestion/transformer/add_dataset_terms.py +3 -3
- datahub/ingestion/transformer/dataset_domain.py +3 -3
- datahub/ingestion/transformer/dataset_domain_based_on_tags.py +1 -1
- datahub/ingestion/transformer/extract_dataset_tags.py +1 -1
- datahub/ingestion/transformer/extract_ownership_from_tags.py +1 -1
- datahub/ingestion/transformer/mark_dataset_status.py +1 -1
- datahub/ingestion/transformer/pattern_cleanup_dataset_usage_user.py +1 -1
- datahub/ingestion/transformer/pattern_cleanup_ownership.py +1 -1
- datahub/ingestion/transformer/remove_dataset_ownership.py +1 -1
- datahub/ingestion/transformer/replace_external_url.py +2 -2
- datahub/ingestion/transformer/set_browse_path.py +1 -1
- datahub/ingestion/transformer/tags_to_terms.py +1 -1
- datahub/lite/duckdb_lite.py +1 -1
- datahub/lite/lite_util.py +2 -2
- datahub/metadata/schema.avsc +7 -2
- datahub/metadata/schemas/QuerySubjects.avsc +1 -1
- datahub/metadata/schemas/StructuredPropertyDefinition.avsc +6 -1
- datahub/sdk/__init__.py +1 -0
- datahub/sdk/_all_entities.py +2 -0
- datahub/sdk/search_filters.py +68 -40
- datahub/sdk/tag.py +112 -0
- datahub/secret/datahub_secret_store.py +7 -4
- datahub/secret/file_secret_store.py +1 -1
- datahub/sql_parsing/sqlglot_lineage.py +5 -2
- datahub/testing/check_sql_parser_result.py +2 -2
- datahub/utilities/ingest_utils.py +1 -1
- {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1.dist-info}/top_level.txt +0 -0
|
@@ -336,10 +336,10 @@ class DremioSource(StatefulIngestionSourceBase):
|
|
|
336
336
|
return
|
|
337
337
|
|
|
338
338
|
dataset_urn = make_dataset_urn_with_platform_instance(
|
|
339
|
-
platform=self.get_platform(),
|
|
340
|
-
name=dataset_name,
|
|
341
|
-
platform_instance=self.config.platform_instance,
|
|
339
|
+
platform=make_data_platform_urn(self.get_platform()),
|
|
340
|
+
name=f"dremio.{dataset_name}",
|
|
342
341
|
env=self.config.env,
|
|
342
|
+
platform_instance=self.config.platform_instance,
|
|
343
343
|
)
|
|
344
344
|
|
|
345
345
|
for dremio_mcp in self.dremio_aspects.populate_dataset_mcp(
|
|
@@ -419,10 +419,10 @@ class DremioSource(StatefulIngestionSourceBase):
|
|
|
419
419
|
schema_str = ".".join(dataset_info.path)
|
|
420
420
|
dataset_name = f"{schema_str}.{dataset_info.resource_name}".lower()
|
|
421
421
|
dataset_urn = make_dataset_urn_with_platform_instance(
|
|
422
|
-
platform=self.get_platform(),
|
|
423
|
-
name=dataset_name,
|
|
424
|
-
platform_instance=self.config.platform_instance,
|
|
422
|
+
platform=make_data_platform_urn(self.get_platform()),
|
|
423
|
+
name=f"dremio.{dataset_name}",
|
|
425
424
|
env=self.config.env,
|
|
425
|
+
platform_instance=self.config.platform_instance,
|
|
426
426
|
)
|
|
427
427
|
yield from self.profiler.get_workunits(dataset_info, dataset_urn)
|
|
428
428
|
|
|
@@ -434,10 +434,10 @@ class DremioSource(StatefulIngestionSourceBase):
|
|
|
434
434
|
"""
|
|
435
435
|
upstream_urns = [
|
|
436
436
|
make_dataset_urn_with_platform_instance(
|
|
437
|
-
platform=self.get_platform(),
|
|
438
|
-
name=upstream_table.lower(),
|
|
439
|
-
platform_instance=self.config.platform_instance,
|
|
437
|
+
platform=make_data_platform_urn(self.get_platform()),
|
|
438
|
+
name=f"dremio.{upstream_table.lower()}",
|
|
440
439
|
env=self.config.env,
|
|
440
|
+
platform_instance=self.config.platform_instance,
|
|
441
441
|
)
|
|
442
442
|
for upstream_table in parents
|
|
443
443
|
]
|
|
@@ -496,19 +496,19 @@ class DremioSource(StatefulIngestionSourceBase):
|
|
|
496
496
|
if query.query and query.affected_dataset:
|
|
497
497
|
upstream_urns = [
|
|
498
498
|
make_dataset_urn_with_platform_instance(
|
|
499
|
-
platform=self.get_platform(),
|
|
500
|
-
name=ds.lower(),
|
|
501
|
-
platform_instance=self.config.platform_instance,
|
|
499
|
+
platform=make_data_platform_urn(self.get_platform()),
|
|
500
|
+
name=f"dremio.{ds.lower()}",
|
|
502
501
|
env=self.config.env,
|
|
502
|
+
platform_instance=self.config.platform_instance,
|
|
503
503
|
)
|
|
504
504
|
for ds in query.queried_datasets
|
|
505
505
|
]
|
|
506
506
|
|
|
507
507
|
downstream_urn = make_dataset_urn_with_platform_instance(
|
|
508
|
-
platform=self.get_platform(),
|
|
509
|
-
name=query.affected_dataset.lower(),
|
|
510
|
-
platform_instance=self.config.platform_instance,
|
|
508
|
+
platform=make_data_platform_urn(self.get_platform()),
|
|
509
|
+
name=f"dremio.{query.affected_dataset.lower()}",
|
|
511
510
|
env=self.config.env,
|
|
511
|
+
platform_instance=self.config.platform_instance,
|
|
512
512
|
)
|
|
513
513
|
|
|
514
514
|
# Add query to SqlParsingAggregator
|
|
@@ -200,7 +200,7 @@ class DynamoDBSource(StatefulIngestionSourceBase):
|
|
|
200
200
|
|
|
201
201
|
@classmethod
|
|
202
202
|
def create(cls, config_dict: dict, ctx: PipelineContext) -> "DynamoDBSource":
|
|
203
|
-
config = DynamoDBConfig.
|
|
203
|
+
config = DynamoDBConfig.model_validate(config_dict)
|
|
204
204
|
return cls(ctx, config, "dynamodb")
|
|
205
205
|
|
|
206
206
|
def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
|
|
@@ -8,7 +8,7 @@ from hashlib import md5
|
|
|
8
8
|
from typing import Any, Dict, Generator, Iterable, List, Optional, Tuple, Type, Union
|
|
9
9
|
|
|
10
10
|
from elasticsearch import Elasticsearch
|
|
11
|
-
from pydantic import
|
|
11
|
+
from pydantic import field_validator
|
|
12
12
|
from pydantic.fields import Field
|
|
13
13
|
|
|
14
14
|
from datahub.configuration.common import AllowDenyPattern, ConfigModel
|
|
@@ -330,7 +330,8 @@ class ElasticsearchSourceConfig(
|
|
|
330
330
|
self.profiling.operation_config
|
|
331
331
|
)
|
|
332
332
|
|
|
333
|
-
@
|
|
333
|
+
@field_validator("host", mode="after")
|
|
334
|
+
@classmethod
|
|
334
335
|
def host_colon_port_comma(cls, host_val: str) -> str:
|
|
335
336
|
for entry in host_val.split(","):
|
|
336
337
|
entry = remove_protocol(entry)
|
|
@@ -382,7 +383,7 @@ class ElasticsearchSource(StatefulIngestionSourceBase):
|
|
|
382
383
|
def create(
|
|
383
384
|
cls, config_dict: Dict[str, Any], ctx: PipelineContext
|
|
384
385
|
) -> "ElasticsearchSource":
|
|
385
|
-
config = ElasticsearchSourceConfig.
|
|
386
|
+
config = ElasticsearchSourceConfig.model_validate(config_dict)
|
|
386
387
|
return cls(config, ctx)
|
|
387
388
|
|
|
388
389
|
def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
|
|
@@ -156,7 +156,7 @@ class ExcelSource(StatefulIngestionSourceBase):
|
|
|
156
156
|
|
|
157
157
|
@classmethod
|
|
158
158
|
def create(cls, config_dict: dict, ctx: PipelineContext) -> "ExcelSource":
|
|
159
|
-
config = ExcelSourceConfig.
|
|
159
|
+
config = ExcelSourceConfig.model_validate(config_dict)
|
|
160
160
|
return cls(ctx, config)
|
|
161
161
|
|
|
162
162
|
def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
|
|
@@ -462,7 +462,7 @@ class FeastRepositorySource(StatefulIngestionSourceBase):
|
|
|
462
462
|
|
|
463
463
|
@classmethod
|
|
464
464
|
def create(cls, config_dict, ctx):
|
|
465
|
-
config = FeastRepositorySourceConfig.
|
|
465
|
+
config = FeastRepositorySourceConfig.model_validate(config_dict)
|
|
466
466
|
return cls(config, ctx)
|
|
467
467
|
|
|
468
468
|
def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
|
datahub/ingestion/source/file.py
CHANGED
|
@@ -9,7 +9,7 @@ from functools import partial
|
|
|
9
9
|
from typing import Any, Iterable, Iterator, List, Optional, Tuple, Union
|
|
10
10
|
|
|
11
11
|
import ijson
|
|
12
|
-
from pydantic import
|
|
12
|
+
from pydantic import field_validator
|
|
13
13
|
from pydantic.fields import Field
|
|
14
14
|
|
|
15
15
|
from datahub.configuration.common import ConfigEnum
|
|
@@ -103,7 +103,8 @@ class FileSourceConfig(StatefulIngestionConfigBase):
|
|
|
103
103
|
|
|
104
104
|
stateful_ingestion: Optional[StatefulStaleMetadataRemovalConfig] = None
|
|
105
105
|
|
|
106
|
-
@
|
|
106
|
+
@field_validator("file_extension", mode="after")
|
|
107
|
+
@classmethod
|
|
107
108
|
def add_leading_dot_to_extension(cls, v: str) -> str:
|
|
108
109
|
if v:
|
|
109
110
|
if v.startswith("."):
|
|
@@ -205,7 +206,7 @@ class GenericFileSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
205
206
|
|
|
206
207
|
@classmethod
|
|
207
208
|
def create(cls, config_dict, ctx):
|
|
208
|
-
config = FileSourceConfig.
|
|
209
|
+
config = FileSourceConfig.model_validate(config_dict)
|
|
209
210
|
return cls(ctx, config)
|
|
210
211
|
|
|
211
212
|
def get_filenames(self) -> Iterable[FileInfo]:
|
|
@@ -358,7 +359,7 @@ class GenericFileSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
358
359
|
|
|
359
360
|
@staticmethod
|
|
360
361
|
def test_connection(config_dict: dict) -> TestConnectionReport:
|
|
361
|
-
config = FileSourceConfig.
|
|
362
|
+
config = FileSourceConfig.model_validate(config_dict)
|
|
362
363
|
exists = os.path.exists(config.path)
|
|
363
364
|
if not exists:
|
|
364
365
|
return TestConnectionReport(
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
import dataclasses
|
|
2
2
|
import logging
|
|
3
3
|
import warnings
|
|
4
|
-
from typing import Dict, Optional
|
|
4
|
+
from typing import Any, Dict, Optional
|
|
5
5
|
|
|
6
6
|
import pydantic
|
|
7
|
-
from pydantic import Field,
|
|
7
|
+
from pydantic import Field, field_validator, model_validator
|
|
8
8
|
from typing_extensions import Literal
|
|
9
9
|
|
|
10
10
|
from datahub.configuration.common import (
|
|
@@ -98,7 +98,8 @@ class DatabricksDestinationConfig(UnityCatalogConnectionConfig):
|
|
|
98
98
|
catalog: str = Field(description="The fivetran connector log catalog.")
|
|
99
99
|
log_schema: str = Field(description="The fivetran connector log schema.")
|
|
100
100
|
|
|
101
|
-
@
|
|
101
|
+
@field_validator("warehouse_id", mode="after")
|
|
102
|
+
@classmethod
|
|
102
103
|
def warehouse_id_should_not_be_empty(cls, warehouse_id: Optional[str]) -> str:
|
|
103
104
|
if warehouse_id is None or (warehouse_id and warehouse_id.strip() == ""):
|
|
104
105
|
raise ValueError("Fivetran requires warehouse_id to be set")
|
|
@@ -141,29 +142,28 @@ class FivetranLogConfig(ConfigModel):
|
|
|
141
142
|
"destination_config", "snowflake_destination_config"
|
|
142
143
|
)
|
|
143
144
|
|
|
144
|
-
@
|
|
145
|
-
def
|
|
146
|
-
destination_platform
|
|
147
|
-
|
|
148
|
-
if "snowflake_destination_config" not in values:
|
|
145
|
+
@model_validator(mode="after")
|
|
146
|
+
def validate_destination_platform_and_config(self) -> "FivetranLogConfig":
|
|
147
|
+
if self.destination_platform == "snowflake":
|
|
148
|
+
if self.snowflake_destination_config is None:
|
|
149
149
|
raise ValueError(
|
|
150
150
|
"If destination platform is 'snowflake', user must provide snowflake destination configuration in the recipe."
|
|
151
151
|
)
|
|
152
|
-
elif destination_platform == "bigquery":
|
|
153
|
-
if
|
|
152
|
+
elif self.destination_platform == "bigquery":
|
|
153
|
+
if self.bigquery_destination_config is None:
|
|
154
154
|
raise ValueError(
|
|
155
155
|
"If destination platform is 'bigquery', user must provide bigquery destination configuration in the recipe."
|
|
156
156
|
)
|
|
157
|
-
elif destination_platform == "databricks":
|
|
158
|
-
if
|
|
157
|
+
elif self.destination_platform == "databricks":
|
|
158
|
+
if self.databricks_destination_config is None:
|
|
159
159
|
raise ValueError(
|
|
160
160
|
"If destination platform is 'databricks', user must provide databricks destination configuration in the recipe."
|
|
161
161
|
)
|
|
162
162
|
else:
|
|
163
163
|
raise ValueError(
|
|
164
|
-
f"Destination platform '{destination_platform}' is not yet supported."
|
|
164
|
+
f"Destination platform '{self.destination_platform}' is not yet supported."
|
|
165
165
|
)
|
|
166
|
-
return
|
|
166
|
+
return self
|
|
167
167
|
|
|
168
168
|
|
|
169
169
|
@dataclasses.dataclass
|
|
@@ -267,8 +267,9 @@ class FivetranSourceConfig(StatefulIngestionConfigBase, DatasetSourceConfigMixin
|
|
|
267
267
|
description="Fivetran REST API configuration, used to provide wider support for connections.",
|
|
268
268
|
)
|
|
269
269
|
|
|
270
|
-
@
|
|
271
|
-
|
|
270
|
+
@model_validator(mode="before")
|
|
271
|
+
@classmethod
|
|
272
|
+
def compat_sources_to_database(cls, values: Any) -> Any:
|
|
272
273
|
if "sources_to_database" in values:
|
|
273
274
|
warnings.warn(
|
|
274
275
|
"The sources_to_database field is deprecated, please use sources_to_platform_instance instead.",
|
|
@@ -234,12 +234,12 @@ class FivetranSource(StatefulIngestionSourceBase):
|
|
|
234
234
|
return dict(
|
|
235
235
|
**{
|
|
236
236
|
f"source.{k}": str(v)
|
|
237
|
-
for k, v in source_details.
|
|
237
|
+
for k, v in source_details.model_dump().items()
|
|
238
238
|
if v is not None and not isinstance(v, bool)
|
|
239
239
|
},
|
|
240
240
|
**{
|
|
241
241
|
f"destination.{k}": str(v)
|
|
242
|
-
for k, v in destination_details.
|
|
242
|
+
for k, v in destination_details.model_dump().items()
|
|
243
243
|
if v is not None and not isinstance(v, bool)
|
|
244
244
|
},
|
|
245
245
|
)
|
|
@@ -127,7 +127,7 @@ class DataHubGcSource(Source):
|
|
|
127
127
|
|
|
128
128
|
@classmethod
|
|
129
129
|
def create(cls, config_dict, ctx):
|
|
130
|
-
config = DataHubGcSourceConfig.
|
|
130
|
+
config = DataHubGcSourceConfig.model_validate(config_dict)
|
|
131
131
|
return cls(ctx, config)
|
|
132
132
|
|
|
133
133
|
# auto_work_unit_report is overriden to disable a couple of automation like auto status aspect, etc. which is not needed her.
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from typing import
|
|
2
|
+
from typing import Iterable, List, Optional
|
|
3
3
|
|
|
4
|
-
from pydantic import Field, SecretStr,
|
|
4
|
+
from pydantic import Field, SecretStr, model_validator
|
|
5
5
|
|
|
6
6
|
from datahub.configuration.common import ConfigModel
|
|
7
7
|
from datahub.configuration.source_common import DatasetSourceConfigMixin
|
|
@@ -64,18 +64,16 @@ class GCSSourceConfig(
|
|
|
64
64
|
|
|
65
65
|
stateful_ingestion: Optional[StatefulStaleMetadataRemovalConfig] = None
|
|
66
66
|
|
|
67
|
-
@
|
|
68
|
-
def check_path_specs_and_infer_platform(
|
|
69
|
-
|
|
70
|
-
) -> List[PathSpec]:
|
|
71
|
-
if len(path_specs) == 0:
|
|
67
|
+
@model_validator(mode="after")
|
|
68
|
+
def check_path_specs_and_infer_platform(self) -> "GCSSourceConfig":
|
|
69
|
+
if len(self.path_specs) == 0:
|
|
72
70
|
raise ValueError("path_specs must not be empty")
|
|
73
71
|
|
|
74
72
|
# Check that all path specs have the gs:// prefix.
|
|
75
|
-
if any([not is_gcs_uri(path_spec.include) for path_spec in path_specs]):
|
|
73
|
+
if any([not is_gcs_uri(path_spec.include) for path_spec in self.path_specs]):
|
|
76
74
|
raise ValueError("All path_spec.include should start with gs://")
|
|
77
75
|
|
|
78
|
-
return
|
|
76
|
+
return self
|
|
79
77
|
|
|
80
78
|
|
|
81
79
|
class GCSSourceReport(DataLakeSourceReport):
|
|
@@ -105,7 +103,7 @@ class GCSSource(StatefulIngestionSourceBase):
|
|
|
105
103
|
|
|
106
104
|
@classmethod
|
|
107
105
|
def create(cls, config_dict, ctx):
|
|
108
|
-
config = GCSSourceConfig.
|
|
106
|
+
config = GCSSourceConfig.model_validate(config_dict)
|
|
109
107
|
return cls(config, ctx)
|
|
110
108
|
|
|
111
109
|
def create_equivalent_s3_config(self):
|
|
@@ -4,6 +4,7 @@ import os
|
|
|
4
4
|
from typing import Annotated, Any, Dict, List, Optional
|
|
5
5
|
|
|
6
6
|
import pydantic
|
|
7
|
+
from pydantic import model_validator
|
|
7
8
|
from pydantic.fields import Field
|
|
8
9
|
|
|
9
10
|
from datahub.configuration.common import AllowDenyPattern, ConfigModel, SupportedSources
|
|
@@ -212,7 +213,8 @@ class GEProfilingConfig(GEProfilingBaseConfig):
|
|
|
212
213
|
description="Whether to profile complex types like structs, arrays and maps. ",
|
|
213
214
|
)
|
|
214
215
|
|
|
215
|
-
@
|
|
216
|
+
@model_validator(mode="before")
|
|
217
|
+
@classmethod
|
|
216
218
|
def deprecate_bigquery_temp_table_schema(cls, values):
|
|
217
219
|
# TODO: Update docs to remove mention of this field.
|
|
218
220
|
if "bigquery_temp_table_schema" in values:
|
|
@@ -222,16 +224,17 @@ class GEProfilingConfig(GEProfilingBaseConfig):
|
|
|
222
224
|
del values["bigquery_temp_table_schema"]
|
|
223
225
|
return values
|
|
224
226
|
|
|
225
|
-
@
|
|
227
|
+
@model_validator(mode="before")
|
|
228
|
+
@classmethod
|
|
226
229
|
def ensure_field_level_settings_are_normalized(
|
|
227
|
-
cls
|
|
230
|
+
cls, values: Dict[str, Any]
|
|
228
231
|
) -> Dict[str, Any]:
|
|
229
232
|
max_num_fields_to_profile_key = "max_number_of_fields_to_profile"
|
|
230
233
|
max_num_fields_to_profile = values.get(max_num_fields_to_profile_key)
|
|
231
234
|
|
|
232
235
|
# Disable all field-level metrics.
|
|
233
236
|
if values.get("profile_table_level_only"):
|
|
234
|
-
for field_level_metric in cls.
|
|
237
|
+
for field_level_metric in cls.model_fields:
|
|
235
238
|
if field_level_metric.startswith("include_field_"):
|
|
236
239
|
if values.get(field_level_metric):
|
|
237
240
|
raise ValueError(
|
|
@@ -267,7 +270,7 @@ class GEProfilingConfig(GEProfilingBaseConfig):
|
|
|
267
270
|
)
|
|
268
271
|
|
|
269
272
|
def config_for_telemetry(self) -> Dict[str, Any]:
|
|
270
|
-
config_dict = self.
|
|
273
|
+
config_dict = self.model_dump()
|
|
271
274
|
|
|
272
275
|
return {
|
|
273
276
|
flag: config_dict[flag]
|
|
@@ -69,7 +69,7 @@ class GrafanaAPIClient:
|
|
|
69
69
|
if not batch:
|
|
70
70
|
break
|
|
71
71
|
|
|
72
|
-
folders.extend(Folder.
|
|
72
|
+
folders.extend(Folder.model_validate(folder) for folder in batch)
|
|
73
73
|
page += 1
|
|
74
74
|
except requests.exceptions.RequestException as e:
|
|
75
75
|
self.report.report_failure(
|
|
@@ -88,7 +88,7 @@ class GrafanaAPIClient:
|
|
|
88
88
|
try:
|
|
89
89
|
response = self.session.get(f"{self.base_url}/api/dashboards/uid/{uid}")
|
|
90
90
|
response.raise_for_status()
|
|
91
|
-
return Dashboard.
|
|
91
|
+
return Dashboard.model_validate(response.json())
|
|
92
92
|
except requests.exceptions.RequestException as e:
|
|
93
93
|
self.report.warning(
|
|
94
94
|
title="Dashboard Fetch Error",
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from typing import Dict, Optional
|
|
2
2
|
|
|
3
|
-
from pydantic import Field, SecretStr,
|
|
3
|
+
from pydantic import Field, SecretStr, field_validator
|
|
4
4
|
|
|
5
5
|
from datahub.configuration.common import AllowDenyPattern, HiddenFromDocs
|
|
6
6
|
from datahub.configuration.source_common import (
|
|
@@ -99,6 +99,7 @@ class GrafanaSourceConfig(
|
|
|
99
99
|
description="Map of Grafana datasource types/UIDs to platform connection configs for lineage extraction",
|
|
100
100
|
)
|
|
101
101
|
|
|
102
|
-
@
|
|
103
|
-
|
|
102
|
+
@field_validator("url", mode="after")
|
|
103
|
+
@classmethod
|
|
104
|
+
def remove_trailing_slash(cls, v: str) -> str:
|
|
104
105
|
return config_clean.remove_trailing_slashes(v)
|
|
@@ -171,7 +171,7 @@ class GrafanaSource(StatefulIngestionSourceBase):
|
|
|
171
171
|
|
|
172
172
|
@classmethod
|
|
173
173
|
def create(cls, config_dict: dict, ctx: PipelineContext) -> "GrafanaSource":
|
|
174
|
-
config = GrafanaSourceConfig.
|
|
174
|
+
config = GrafanaSourceConfig.model_validate(config_dict)
|
|
175
175
|
return cls(config, ctx)
|
|
176
176
|
|
|
177
177
|
def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
|
|
@@ -79,18 +79,29 @@ class Dashboard(_GrafanaBaseModel):
|
|
|
79
79
|
for panel_data in panels_data:
|
|
80
80
|
if panel_data.get("type") == "row" and "panels" in panel_data:
|
|
81
81
|
panels.extend(
|
|
82
|
-
Panel.
|
|
82
|
+
Panel.model_validate(p)
|
|
83
83
|
for p in panel_data["panels"]
|
|
84
84
|
if p.get("type") != "row"
|
|
85
85
|
)
|
|
86
86
|
elif panel_data.get("type") != "row":
|
|
87
|
-
panels.append(Panel.
|
|
87
|
+
panels.append(Panel.model_validate(panel_data))
|
|
88
88
|
return panels
|
|
89
89
|
|
|
90
90
|
@classmethod
|
|
91
|
-
def
|
|
91
|
+
def model_validate(
|
|
92
|
+
cls,
|
|
93
|
+
obj: Any,
|
|
94
|
+
*,
|
|
95
|
+
strict: Optional[bool] = None,
|
|
96
|
+
from_attributes: Optional[bool] = None,
|
|
97
|
+
context: Optional[Any] = None,
|
|
98
|
+
by_alias: Optional[bool] = None,
|
|
99
|
+
by_name: Optional[bool] = None,
|
|
100
|
+
) -> "Dashboard":
|
|
92
101
|
"""Custom parsing to handle nested panel extraction."""
|
|
93
|
-
|
|
102
|
+
# Handle both direct dashboard data and nested structure with 'dashboard' key
|
|
103
|
+
dashboard_data = obj.get("dashboard", obj)
|
|
104
|
+
|
|
94
105
|
_panel_data = dashboard_data.get("panels", [])
|
|
95
106
|
panels = []
|
|
96
107
|
try:
|
|
@@ -113,7 +124,14 @@ class Dashboard(_GrafanaBaseModel):
|
|
|
113
124
|
if "refresh" in dashboard_dict and isinstance(dashboard_dict["refresh"], bool):
|
|
114
125
|
dashboard_dict["refresh"] = str(dashboard_dict["refresh"])
|
|
115
126
|
|
|
116
|
-
return super().
|
|
127
|
+
return super().model_validate(
|
|
128
|
+
dashboard_dict,
|
|
129
|
+
strict=strict,
|
|
130
|
+
from_attributes=from_attributes,
|
|
131
|
+
context=context,
|
|
132
|
+
by_alias=by_alias,
|
|
133
|
+
by_name=by_name,
|
|
134
|
+
)
|
|
117
135
|
|
|
118
136
|
|
|
119
137
|
class Folder(_GrafanaBaseModel):
|
|
@@ -4,7 +4,7 @@ from datetime import datetime, timezone
|
|
|
4
4
|
from typing import Any, Dict, Generator, List, Optional, Union
|
|
5
5
|
|
|
6
6
|
import requests
|
|
7
|
-
from pydantic import BaseModel, Field, ValidationError,
|
|
7
|
+
from pydantic import BaseModel, Field, ValidationError, field_validator
|
|
8
8
|
from requests.adapters import HTTPAdapter
|
|
9
9
|
from typing_extensions import assert_never
|
|
10
10
|
from urllib3.util.retry import Retry
|
|
@@ -50,7 +50,8 @@ class HexApiProjectAnalytics(BaseModel):
|
|
|
50
50
|
default=None, alias="publishedResultsUpdatedAt"
|
|
51
51
|
)
|
|
52
52
|
|
|
53
|
-
@
|
|
53
|
+
@field_validator("last_viewed_at", "published_results_updated_at", mode="before")
|
|
54
|
+
@classmethod
|
|
54
55
|
def parse_datetime(cls, value):
|
|
55
56
|
if value is None:
|
|
56
57
|
return None
|
|
@@ -167,14 +168,15 @@ class HexApiProjectApiResource(BaseModel):
|
|
|
167
168
|
class Config:
|
|
168
169
|
extra = "ignore" # Allow extra fields in the JSON
|
|
169
170
|
|
|
170
|
-
@
|
|
171
|
+
@field_validator(
|
|
171
172
|
"created_at",
|
|
172
173
|
"last_edited_at",
|
|
173
174
|
"last_published_at",
|
|
174
175
|
"archived_at",
|
|
175
176
|
"trashed_at",
|
|
176
|
-
|
|
177
|
+
mode="before",
|
|
177
178
|
)
|
|
179
|
+
@classmethod
|
|
178
180
|
def parse_datetime(cls, value):
|
|
179
181
|
if value is None:
|
|
180
182
|
return None
|
|
@@ -292,7 +294,7 @@ class HexApi:
|
|
|
292
294
|
)
|
|
293
295
|
response.raise_for_status()
|
|
294
296
|
|
|
295
|
-
api_response = HexApiProjectsListResponse.
|
|
297
|
+
api_response = HexApiProjectsListResponse.model_validate(response.json())
|
|
296
298
|
logger.info(f"Fetched {len(api_response.values)} items")
|
|
297
299
|
params["after"] = (
|
|
298
300
|
api_response.pagination.after if api_response.pagination else None
|
|
@@ -3,7 +3,7 @@ from dataclasses import dataclass
|
|
|
3
3
|
from datetime import datetime, timedelta, timezone
|
|
4
4
|
from typing import Any, Dict, Iterable, List, Optional
|
|
5
5
|
|
|
6
|
-
from pydantic import Field, SecretStr,
|
|
6
|
+
from pydantic import Field, SecretStr, model_validator
|
|
7
7
|
from typing_extensions import assert_never
|
|
8
8
|
|
|
9
9
|
from datahub.configuration.common import AllowDenyPattern
|
|
@@ -120,7 +120,8 @@ class HexSourceConfig(
|
|
|
120
120
|
description="Number of items to fetch per DataHub API call.",
|
|
121
121
|
)
|
|
122
122
|
|
|
123
|
-
@
|
|
123
|
+
@model_validator(mode="before")
|
|
124
|
+
@classmethod
|
|
124
125
|
def validate_lineage_times(cls, data: Dict[str, Any]) -> Dict[str, Any]:
|
|
125
126
|
# In-place update of the input dict would cause state contamination. This was discovered through test failures
|
|
126
127
|
# in test_hex.py where the same dict is reused.
|
|
@@ -238,7 +239,7 @@ class HexSource(StatefulIngestionSourceBase):
|
|
|
238
239
|
|
|
239
240
|
@classmethod
|
|
240
241
|
def create(cls, config_dict: Dict[str, Any], ctx: PipelineContext) -> "HexSource":
|
|
241
|
-
config = HexSourceConfig.
|
|
242
|
+
config = HexSourceConfig.model_validate(config_dict)
|
|
242
243
|
return cls(config, ctx)
|
|
243
244
|
|
|
244
245
|
def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
|
|
@@ -161,7 +161,7 @@ class IcebergSource(StatefulIngestionSourceBase):
|
|
|
161
161
|
|
|
162
162
|
@classmethod
|
|
163
163
|
def create(cls, config_dict: Dict, ctx: PipelineContext) -> "IcebergSource":
|
|
164
|
-
config = IcebergSourceConfig.
|
|
164
|
+
config = IcebergSourceConfig.model_validate(config_dict)
|
|
165
165
|
return cls(config, ctx)
|
|
166
166
|
|
|
167
167
|
def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
|
|
@@ -4,7 +4,7 @@ from dataclasses import dataclass, field
|
|
|
4
4
|
from typing import Any, Dict, Optional
|
|
5
5
|
|
|
6
6
|
from humanfriendly import format_timespan
|
|
7
|
-
from pydantic import Field,
|
|
7
|
+
from pydantic import Field, field_validator
|
|
8
8
|
from pyiceberg.catalog import Catalog, load_catalog
|
|
9
9
|
from pyiceberg.catalog.rest import RestCatalog
|
|
10
10
|
from requests.adapters import HTTPAdapter
|
|
@@ -108,7 +108,8 @@ class IcebergSourceConfig(StatefulIngestionConfigBase, DatasetSourceConfigMixin)
|
|
|
108
108
|
default=1, description="How many threads will be processing tables"
|
|
109
109
|
)
|
|
110
110
|
|
|
111
|
-
@
|
|
111
|
+
@field_validator("catalog", mode="before")
|
|
112
|
+
@classmethod
|
|
112
113
|
def handle_deprecated_catalog_format(cls, value):
|
|
113
114
|
# Once support for deprecated format is dropped, we can remove this validator.
|
|
114
115
|
if (
|
|
@@ -131,7 +132,8 @@ class IcebergSourceConfig(StatefulIngestionConfigBase, DatasetSourceConfigMixin)
|
|
|
131
132
|
# In case the input is already the new format or is invalid
|
|
132
133
|
return value
|
|
133
134
|
|
|
134
|
-
@
|
|
135
|
+
@field_validator("catalog", mode="after")
|
|
136
|
+
@classmethod
|
|
135
137
|
def validate_catalog_size(cls, value):
|
|
136
138
|
if len(value) != 1:
|
|
137
139
|
raise ValueError("The catalog must contain exactly one entry.")
|
|
@@ -254,7 +254,7 @@ class AzureADSource(StatefulIngestionSourceBase):
|
|
|
254
254
|
|
|
255
255
|
@classmethod
|
|
256
256
|
def create(cls, config_dict, ctx):
|
|
257
|
-
config = AzureADConfig.
|
|
257
|
+
config = AzureADConfig.model_validate(config_dict)
|
|
258
258
|
return cls(config, ctx)
|
|
259
259
|
|
|
260
260
|
def __init__(self, config: AzureADConfig, ctx: PipelineContext):
|
|
@@ -11,7 +11,7 @@ import nest_asyncio
|
|
|
11
11
|
from okta.client import Client as OktaClient
|
|
12
12
|
from okta.exceptions import OktaAPIException
|
|
13
13
|
from okta.models import Group, GroupProfile, User, UserProfile, UserStatus
|
|
14
|
-
from pydantic import
|
|
14
|
+
from pydantic import model_validator
|
|
15
15
|
from pydantic.fields import Field
|
|
16
16
|
|
|
17
17
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
@@ -157,21 +157,21 @@ class OktaConfig(StatefulIngestionConfigBase):
|
|
|
157
157
|
mask_group_id: bool = True
|
|
158
158
|
mask_user_id: bool = True
|
|
159
159
|
|
|
160
|
-
@
|
|
161
|
-
def okta_users_one_of_filter_or_search(
|
|
162
|
-
if
|
|
160
|
+
@model_validator(mode="after")
|
|
161
|
+
def okta_users_one_of_filter_or_search(self) -> "OktaConfig":
|
|
162
|
+
if self.okta_users_search and self.okta_users_filter:
|
|
163
163
|
raise ValueError(
|
|
164
164
|
"Only one of okta_users_filter or okta_users_search can be set"
|
|
165
165
|
)
|
|
166
|
-
return
|
|
166
|
+
return self
|
|
167
167
|
|
|
168
|
-
@
|
|
169
|
-
def okta_groups_one_of_filter_or_search(
|
|
170
|
-
if
|
|
168
|
+
@model_validator(mode="after")
|
|
169
|
+
def okta_groups_one_of_filter_or_search(self) -> "OktaConfig":
|
|
170
|
+
if self.okta_groups_search and self.okta_groups_filter:
|
|
171
171
|
raise ValueError(
|
|
172
172
|
"Only one of okta_groups_filter or okta_groups_search can be set"
|
|
173
173
|
)
|
|
174
|
-
return
|
|
174
|
+
return self
|
|
175
175
|
|
|
176
176
|
|
|
177
177
|
@dataclass
|
|
@@ -288,7 +288,7 @@ class OktaSource(StatefulIngestionSourceBase):
|
|
|
288
288
|
|
|
289
289
|
@classmethod
|
|
290
290
|
def create(cls, config_dict, ctx):
|
|
291
|
-
config = OktaConfig.
|
|
291
|
+
config = OktaConfig.model_validate(config_dict)
|
|
292
292
|
return cls(config, ctx)
|
|
293
293
|
|
|
294
294
|
def __init__(self, config: OktaConfig, ctx: PipelineContext):
|
|
@@ -267,7 +267,7 @@ class KafkaSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
267
267
|
|
|
268
268
|
@classmethod
|
|
269
269
|
def create(cls, config_dict: Dict, ctx: PipelineContext) -> "KafkaSource":
|
|
270
|
-
config: KafkaSourceConfig = KafkaSourceConfig.
|
|
270
|
+
config: KafkaSourceConfig = KafkaSourceConfig.model_validate(config_dict)
|
|
271
271
|
return cls(config, ctx)
|
|
272
272
|
|
|
273
273
|
def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
|
datahub/ingestion/source/ldap.py
CHANGED
|
@@ -242,7 +242,7 @@ class LDAPSource(StatefulIngestionSourceBase):
|
|
|
242
242
|
@classmethod
|
|
243
243
|
def create(cls, config_dict: Dict[str, Any], ctx: PipelineContext) -> "LDAPSource":
|
|
244
244
|
"""Factory method."""
|
|
245
|
-
config = LDAPSourceConfig.
|
|
245
|
+
config = LDAPSourceConfig.model_validate(config_dict)
|
|
246
246
|
return cls(ctx, config)
|
|
247
247
|
|
|
248
248
|
def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
|