acryl-datahub 1.3.0.1rc9__py3-none-any.whl → 1.3.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.3.0.1rc9.dist-info → acryl_datahub-1.3.1.1.dist-info}/METADATA +2550 -2543
- {acryl_datahub-1.3.0.1rc9.dist-info → acryl_datahub-1.3.1.1.dist-info}/RECORD +263 -261
- datahub/_version.py +1 -1
- datahub/api/entities/common/serialized_value.py +2 -2
- datahub/api/entities/corpgroup/corpgroup.py +11 -6
- datahub/api/entities/corpuser/corpuser.py +11 -11
- datahub/api/entities/dataproduct/dataproduct.py +47 -27
- datahub/api/entities/dataset/dataset.py +32 -21
- datahub/api/entities/external/lake_formation_external_entites.py +5 -6
- datahub/api/entities/external/unity_catalog_external_entites.py +5 -7
- datahub/api/entities/forms/forms.py +16 -14
- datahub/api/entities/structuredproperties/structuredproperties.py +23 -16
- datahub/cli/check_cli.py +2 -2
- datahub/cli/config_utils.py +3 -3
- datahub/cli/lite_cli.py +9 -7
- datahub/cli/migrate.py +4 -4
- datahub/cli/quickstart_versioning.py +3 -3
- datahub/cli/specific/group_cli.py +1 -1
- datahub/cli/specific/structuredproperties_cli.py +1 -1
- datahub/cli/specific/user_cli.py +1 -1
- datahub/configuration/common.py +14 -2
- datahub/configuration/connection_resolver.py +2 -2
- datahub/configuration/git.py +47 -30
- datahub/configuration/import_resolver.py +2 -2
- datahub/configuration/kafka.py +4 -3
- datahub/configuration/time_window_config.py +26 -26
- datahub/configuration/validate_field_deprecation.py +2 -2
- datahub/configuration/validate_field_removal.py +2 -2
- datahub/configuration/validate_field_rename.py +2 -2
- datahub/configuration/validate_multiline_string.py +2 -1
- datahub/emitter/kafka_emitter.py +3 -1
- datahub/emitter/rest_emitter.py +2 -4
- datahub/ingestion/api/decorators.py +1 -1
- datahub/ingestion/api/report.py +1 -1
- datahub/ingestion/api/sink.py +1 -1
- datahub/ingestion/api/source.py +1 -1
- datahub/ingestion/glossary/datahub_classifier.py +11 -8
- datahub/ingestion/graph/client.py +5 -1
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +1 -1
- datahub/ingestion/reporting/file_reporter.py +5 -4
- datahub/ingestion/run/pipeline.py +7 -6
- datahub/ingestion/run/pipeline_config.py +12 -14
- datahub/ingestion/run/sink_callback.py +1 -1
- datahub/ingestion/sink/datahub_rest.py +6 -4
- datahub/ingestion/source/abs/config.py +19 -19
- datahub/ingestion/source/abs/datalake_profiler_config.py +11 -13
- datahub/ingestion/source/abs/source.py +2 -2
- datahub/ingestion/source/aws/aws_common.py +1 -1
- datahub/ingestion/source/aws/glue.py +6 -4
- datahub/ingestion/source/aws/sagemaker.py +1 -1
- datahub/ingestion/source/azure/azure_common.py +8 -12
- datahub/ingestion/source/bigquery_v2/bigquery.py +1 -1
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +43 -30
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +1 -1
- datahub/ingestion/source/cassandra/cassandra.py +1 -1
- datahub/ingestion/source/common/gcp_credentials_config.py +10 -10
- datahub/ingestion/source/data_lake_common/path_spec.py +85 -89
- datahub/ingestion/source/datahub/config.py +8 -8
- datahub/ingestion/source/datahub/datahub_source.py +1 -1
- datahub/ingestion/source/dbt/dbt_cloud.py +9 -3
- datahub/ingestion/source/dbt/dbt_common.py +39 -37
- datahub/ingestion/source/dbt/dbt_core.py +10 -12
- datahub/ingestion/source/debug/datahub_debug.py +1 -1
- datahub/ingestion/source/delta_lake/config.py +6 -4
- datahub/ingestion/source/dremio/dremio_api.py +212 -78
- datahub/ingestion/source/dremio/dremio_config.py +10 -6
- datahub/ingestion/source/dremio/dremio_entities.py +55 -39
- datahub/ingestion/source/dremio/dremio_profiling.py +14 -3
- datahub/ingestion/source/dremio/dremio_source.py +24 -26
- datahub/ingestion/source/dynamodb/dynamodb.py +1 -1
- datahub/ingestion/source/elastic_search.py +110 -32
- datahub/ingestion/source/excel/source.py +1 -1
- datahub/ingestion/source/feast.py +1 -1
- datahub/ingestion/source/file.py +5 -4
- datahub/ingestion/source/fivetran/config.py +17 -16
- datahub/ingestion/source/fivetran/fivetran.py +2 -2
- datahub/ingestion/source/gc/datahub_gc.py +1 -1
- datahub/ingestion/source/gcs/gcs_source.py +8 -10
- datahub/ingestion/source/ge_profiling_config.py +8 -5
- datahub/ingestion/source/grafana/grafana_api.py +2 -2
- datahub/ingestion/source/grafana/grafana_config.py +4 -3
- datahub/ingestion/source/grafana/grafana_source.py +1 -1
- datahub/ingestion/source/grafana/models.py +23 -5
- datahub/ingestion/source/hex/api.py +7 -5
- datahub/ingestion/source/hex/hex.py +4 -3
- datahub/ingestion/source/iceberg/iceberg.py +1 -1
- datahub/ingestion/source/iceberg/iceberg_common.py +5 -3
- datahub/ingestion/source/identity/azure_ad.py +1 -1
- datahub/ingestion/source/identity/okta.py +10 -10
- datahub/ingestion/source/kafka/kafka.py +1 -1
- datahub/ingestion/source/ldap.py +1 -1
- datahub/ingestion/source/looker/looker_common.py +7 -5
- datahub/ingestion/source/looker/looker_config.py +21 -20
- datahub/ingestion/source/looker/lookml_config.py +47 -47
- datahub/ingestion/source/metabase.py +8 -8
- datahub/ingestion/source/metadata/business_glossary.py +2 -2
- datahub/ingestion/source/metadata/lineage.py +13 -8
- datahub/ingestion/source/mlflow.py +1 -1
- datahub/ingestion/source/mode.py +6 -4
- datahub/ingestion/source/mongodb.py +4 -3
- datahub/ingestion/source/neo4j/neo4j_source.py +1 -1
- datahub/ingestion/source/nifi.py +17 -23
- datahub/ingestion/source/openapi.py +6 -8
- datahub/ingestion/source/powerbi/config.py +33 -32
- datahub/ingestion/source/powerbi/dataplatform_instance_resolver.py +2 -2
- datahub/ingestion/source/powerbi/powerbi.py +1 -1
- datahub/ingestion/source/powerbi_report_server/report_server.py +2 -2
- datahub/ingestion/source/powerbi_report_server/report_server_domain.py +8 -6
- datahub/ingestion/source/preset.py +8 -8
- datahub/ingestion/source/pulsar.py +1 -1
- datahub/ingestion/source/qlik_sense/data_classes.py +15 -8
- datahub/ingestion/source/qlik_sense/qlik_api.py +7 -7
- datahub/ingestion/source/qlik_sense/qlik_sense.py +1 -1
- datahub/ingestion/source/redshift/config.py +18 -20
- datahub/ingestion/source/redshift/redshift.py +2 -2
- datahub/ingestion/source/redshift/usage.py +23 -3
- datahub/ingestion/source/s3/config.py +83 -62
- datahub/ingestion/source/s3/datalake_profiler_config.py +11 -13
- datahub/ingestion/source/s3/source.py +8 -5
- datahub/ingestion/source/sac/sac.py +5 -4
- datahub/ingestion/source/salesforce.py +3 -2
- datahub/ingestion/source/schema/json_schema.py +2 -2
- datahub/ingestion/source/sigma/data_classes.py +3 -2
- datahub/ingestion/source/sigma/sigma.py +1 -1
- datahub/ingestion/source/sigma/sigma_api.py +7 -7
- datahub/ingestion/source/slack/slack.py +1 -1
- datahub/ingestion/source/snaplogic/snaplogic.py +1 -1
- datahub/ingestion/source/snowflake/snowflake_assertion.py +1 -1
- datahub/ingestion/source/snowflake/snowflake_config.py +35 -31
- datahub/ingestion/source/snowflake/snowflake_connection.py +35 -13
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +3 -3
- datahub/ingestion/source/snowflake/snowflake_queries.py +28 -4
- datahub/ingestion/source/sql/athena.py +1 -1
- datahub/ingestion/source/sql/clickhouse.py +4 -2
- datahub/ingestion/source/sql/cockroachdb.py +1 -1
- datahub/ingestion/source/sql/druid.py +1 -1
- datahub/ingestion/source/sql/hana.py +1 -1
- datahub/ingestion/source/sql/hive.py +7 -5
- datahub/ingestion/source/sql/hive_metastore.py +1 -1
- datahub/ingestion/source/sql/mssql/source.py +13 -6
- datahub/ingestion/source/sql/mysql.py +1 -1
- datahub/ingestion/source/sql/oracle.py +17 -10
- datahub/ingestion/source/sql/postgres.py +2 -2
- datahub/ingestion/source/sql/presto.py +1 -1
- datahub/ingestion/source/sql/sql_config.py +8 -9
- datahub/ingestion/source/sql/sql_generic.py +1 -1
- datahub/ingestion/source/sql/teradata.py +1 -1
- datahub/ingestion/source/sql/trino.py +1 -1
- datahub/ingestion/source/sql/vertica.py +5 -4
- datahub/ingestion/source/sql_queries.py +174 -22
- datahub/ingestion/source/state/checkpoint.py +2 -2
- datahub/ingestion/source/state/entity_removal_state.py +2 -1
- datahub/ingestion/source/state/stateful_ingestion_base.py +55 -45
- datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +1 -1
- datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py +1 -1
- datahub/ingestion/source/superset.py +9 -9
- datahub/ingestion/source/tableau/tableau.py +14 -16
- datahub/ingestion/source/unity/azure_auth_config.py +15 -0
- datahub/ingestion/source/unity/config.py +51 -34
- datahub/ingestion/source/unity/connection.py +7 -1
- datahub/ingestion/source/unity/connection_test.py +1 -1
- datahub/ingestion/source/unity/proxy.py +216 -7
- datahub/ingestion/source/unity/proxy_types.py +91 -0
- datahub/ingestion/source/unity/source.py +29 -3
- datahub/ingestion/source/usage/clickhouse_usage.py +1 -1
- datahub/ingestion/source/usage/starburst_trino_usage.py +1 -1
- datahub/ingestion/source/usage/usage_common.py +5 -3
- datahub/ingestion/source_config/csv_enricher.py +7 -6
- datahub/ingestion/source_config/operation_config.py +7 -4
- datahub/ingestion/source_config/pulsar.py +11 -15
- datahub/ingestion/transformer/add_dataset_browse_path.py +1 -1
- datahub/ingestion/transformer/add_dataset_dataproduct.py +6 -5
- datahub/ingestion/transformer/add_dataset_ownership.py +3 -3
- datahub/ingestion/transformer/add_dataset_properties.py +2 -2
- datahub/ingestion/transformer/add_dataset_schema_tags.py +2 -2
- datahub/ingestion/transformer/add_dataset_schema_terms.py +2 -2
- datahub/ingestion/transformer/add_dataset_tags.py +3 -3
- datahub/ingestion/transformer/add_dataset_terms.py +3 -3
- datahub/ingestion/transformer/dataset_domain.py +3 -3
- datahub/ingestion/transformer/dataset_domain_based_on_tags.py +1 -1
- datahub/ingestion/transformer/extract_dataset_tags.py +1 -1
- datahub/ingestion/transformer/extract_ownership_from_tags.py +1 -1
- datahub/ingestion/transformer/mark_dataset_status.py +1 -1
- datahub/ingestion/transformer/pattern_cleanup_dataset_usage_user.py +1 -1
- datahub/ingestion/transformer/pattern_cleanup_ownership.py +1 -1
- datahub/ingestion/transformer/remove_dataset_ownership.py +1 -1
- datahub/ingestion/transformer/replace_external_url.py +2 -2
- datahub/ingestion/transformer/set_browse_path.py +1 -1
- datahub/ingestion/transformer/tags_to_terms.py +1 -1
- datahub/lite/duckdb_lite.py +1 -1
- datahub/lite/lite_util.py +2 -2
- datahub/metadata/_internal_schema_classes.py +62 -2
- datahub/metadata/com/linkedin/pegasus2avro/assertion/__init__.py +2 -0
- datahub/metadata/schema.avsc +271 -91
- datahub/metadata/schemas/ApplicationProperties.avsc +5 -2
- datahub/metadata/schemas/AssertionInfo.avsc +48 -5
- datahub/metadata/schemas/BusinessAttributeInfo.avsc +8 -4
- datahub/metadata/schemas/ChartInfo.avsc +12 -5
- datahub/metadata/schemas/ContainerProperties.avsc +12 -5
- datahub/metadata/schemas/CorpGroupEditableInfo.avsc +2 -1
- datahub/metadata/schemas/CorpGroupInfo.avsc +7 -3
- datahub/metadata/schemas/CorpUserInfo.avsc +5 -2
- datahub/metadata/schemas/CorpUserSettings.avsc +4 -2
- datahub/metadata/schemas/DashboardInfo.avsc +16 -4
- datahub/metadata/schemas/DataFlowInfo.avsc +11 -5
- datahub/metadata/schemas/DataHubPageModuleProperties.avsc +4 -2
- datahub/metadata/schemas/DataJobInfo.avsc +9 -4
- datahub/metadata/schemas/DataPlatformInfo.avsc +3 -1
- datahub/metadata/schemas/DataPlatformInstanceProperties.avsc +5 -2
- datahub/metadata/schemas/DataProductProperties.avsc +5 -2
- datahub/metadata/schemas/DataTypeInfo.avsc +5 -0
- datahub/metadata/schemas/DatasetKey.avsc +2 -1
- datahub/metadata/schemas/DatasetProperties.avsc +12 -5
- datahub/metadata/schemas/DomainProperties.avsc +7 -3
- datahub/metadata/schemas/EditableContainerProperties.avsc +2 -1
- datahub/metadata/schemas/EditableDashboardProperties.avsc +2 -1
- datahub/metadata/schemas/EditableDataFlowProperties.avsc +2 -1
- datahub/metadata/schemas/EditableDataJobProperties.avsc +2 -1
- datahub/metadata/schemas/EditableDatasetProperties.avsc +2 -1
- datahub/metadata/schemas/EditableERModelRelationshipProperties.avsc +2 -1
- datahub/metadata/schemas/EditableMLFeatureProperties.avsc +2 -1
- datahub/metadata/schemas/EditableMLFeatureTableProperties.avsc +2 -1
- datahub/metadata/schemas/EditableMLModelGroupProperties.avsc +2 -1
- datahub/metadata/schemas/EditableMLModelProperties.avsc +2 -1
- datahub/metadata/schemas/EditableNotebookProperties.avsc +2 -1
- datahub/metadata/schemas/EditableSchemaMetadata.avsc +5 -3
- datahub/metadata/schemas/EntityTypeInfo.avsc +5 -0
- datahub/metadata/schemas/GlobalTags.avsc +3 -2
- datahub/metadata/schemas/GlossaryNodeInfo.avsc +3 -1
- datahub/metadata/schemas/GlossaryTermInfo.avsc +3 -1
- datahub/metadata/schemas/InputFields.avsc +3 -2
- datahub/metadata/schemas/MLFeatureKey.avsc +3 -1
- datahub/metadata/schemas/MLFeatureTableKey.avsc +3 -1
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +3 -1
- datahub/metadata/schemas/MLModelGroupKey.avsc +3 -1
- datahub/metadata/schemas/MLModelKey.avsc +3 -1
- datahub/metadata/schemas/MLModelProperties.avsc +4 -2
- datahub/metadata/schemas/MLPrimaryKeyKey.avsc +3 -1
- datahub/metadata/schemas/MetadataChangeEvent.avsc +124 -50
- datahub/metadata/schemas/NotebookInfo.avsc +5 -2
- datahub/metadata/schemas/Ownership.avsc +3 -2
- datahub/metadata/schemas/QuerySubjects.avsc +1 -1
- datahub/metadata/schemas/RoleProperties.avsc +3 -1
- datahub/metadata/schemas/SchemaFieldInfo.avsc +3 -1
- datahub/metadata/schemas/SchemaMetadata.avsc +3 -2
- datahub/metadata/schemas/StructuredPropertyDefinition.avsc +15 -4
- datahub/metadata/schemas/TagProperties.avsc +3 -1
- datahub/metadata/schemas/TestInfo.avsc +2 -1
- datahub/sdk/__init__.py +1 -0
- datahub/sdk/_all_entities.py +2 -0
- datahub/sdk/search_filters.py +68 -40
- datahub/sdk/tag.py +112 -0
- datahub/secret/datahub_secret_store.py +7 -4
- datahub/secret/file_secret_store.py +1 -1
- datahub/sql_parsing/schema_resolver.py +29 -0
- datahub/sql_parsing/sql_parsing_aggregator.py +15 -0
- datahub/sql_parsing/sqlglot_lineage.py +5 -2
- datahub/testing/check_sql_parser_result.py +2 -2
- datahub/utilities/ingest_utils.py +1 -1
- {acryl_datahub-1.3.0.1rc9.dist-info → acryl_datahub-1.3.1.1.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.3.0.1rc9.dist-info → acryl_datahub-1.3.1.1.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.3.0.1rc9.dist-info → acryl_datahub-1.3.1.1.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.3.0.1rc9.dist-info → acryl_datahub-1.3.1.1.dist-info}/top_level.txt +0 -0
|
@@ -55,7 +55,7 @@ from datahub.ingestion.source.state.stateful_ingestion_base import (
|
|
|
55
55
|
from datahub.ingestion.source_report.ingestion_stage import (
|
|
56
56
|
LINEAGE_EXTRACTION,
|
|
57
57
|
METADATA_EXTRACTION,
|
|
58
|
-
|
|
58
|
+
PROFILING,
|
|
59
59
|
)
|
|
60
60
|
from datahub.metadata.com.linkedin.pegasus2avro.dataset import (
|
|
61
61
|
DatasetLineageTypeClass,
|
|
@@ -201,7 +201,7 @@ class DremioSource(StatefulIngestionSourceBase):
|
|
|
201
201
|
return "dremio"
|
|
202
202
|
|
|
203
203
|
def _build_source_map(self) -> Dict[str, DremioSourceMapEntry]:
|
|
204
|
-
dremio_sources = self.dremio_catalog.get_sources()
|
|
204
|
+
dremio_sources = list(self.dremio_catalog.get_sources())
|
|
205
205
|
source_mappings_config = self.config.source_mappings or []
|
|
206
206
|
|
|
207
207
|
source_map = build_dremio_source_map(dremio_sources, source_mappings_config)
|
|
@@ -242,9 +242,7 @@ class DremioSource(StatefulIngestionSourceBase):
|
|
|
242
242
|
)
|
|
243
243
|
|
|
244
244
|
# Process Datasets
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
for dataset_info in datasets:
|
|
245
|
+
for dataset_info in self.dremio_catalog.get_datasets():
|
|
248
246
|
try:
|
|
249
247
|
yield from self.process_dataset(dataset_info)
|
|
250
248
|
logger.info(
|
|
@@ -258,10 +256,8 @@ class DremioSource(StatefulIngestionSourceBase):
|
|
|
258
256
|
exc=exc,
|
|
259
257
|
)
|
|
260
258
|
|
|
261
|
-
# Process Glossary Terms
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
for glossary_term in glossary_terms:
|
|
259
|
+
# Process Glossary Terms using streaming
|
|
260
|
+
for glossary_term in self.dremio_catalog.get_glossary_terms():
|
|
265
261
|
try:
|
|
266
262
|
yield from self.process_glossary_term(glossary_term)
|
|
267
263
|
except Exception as exc:
|
|
@@ -283,14 +279,16 @@ class DremioSource(StatefulIngestionSourceBase):
|
|
|
283
279
|
# Profiling
|
|
284
280
|
if self.config.is_profiling_enabled():
|
|
285
281
|
with (
|
|
286
|
-
self.report.
|
|
282
|
+
self.report.new_stage(PROFILING),
|
|
287
283
|
ThreadPoolExecutor(
|
|
288
284
|
max_workers=self.config.profiling.max_workers
|
|
289
285
|
) as executor,
|
|
290
286
|
):
|
|
287
|
+
# Collect datasets for profiling
|
|
288
|
+
datasets_for_profiling = list(self.dremio_catalog.get_datasets())
|
|
291
289
|
future_to_dataset = {
|
|
292
290
|
executor.submit(self.generate_profiles, dataset): dataset
|
|
293
|
-
for dataset in
|
|
291
|
+
for dataset in datasets_for_profiling
|
|
294
292
|
}
|
|
295
293
|
|
|
296
294
|
for future in as_completed(future_to_dataset):
|
|
@@ -338,10 +336,10 @@ class DremioSource(StatefulIngestionSourceBase):
|
|
|
338
336
|
return
|
|
339
337
|
|
|
340
338
|
dataset_urn = make_dataset_urn_with_platform_instance(
|
|
341
|
-
platform=self.get_platform(),
|
|
342
|
-
name=dataset_name,
|
|
343
|
-
platform_instance=self.config.platform_instance,
|
|
339
|
+
platform=make_data_platform_urn(self.get_platform()),
|
|
340
|
+
name=f"dremio.{dataset_name}",
|
|
344
341
|
env=self.config.env,
|
|
342
|
+
platform_instance=self.config.platform_instance,
|
|
345
343
|
)
|
|
346
344
|
|
|
347
345
|
for dremio_mcp in self.dremio_aspects.populate_dataset_mcp(
|
|
@@ -421,10 +419,10 @@ class DremioSource(StatefulIngestionSourceBase):
|
|
|
421
419
|
schema_str = ".".join(dataset_info.path)
|
|
422
420
|
dataset_name = f"{schema_str}.{dataset_info.resource_name}".lower()
|
|
423
421
|
dataset_urn = make_dataset_urn_with_platform_instance(
|
|
424
|
-
platform=self.get_platform(),
|
|
425
|
-
name=dataset_name,
|
|
426
|
-
platform_instance=self.config.platform_instance,
|
|
422
|
+
platform=make_data_platform_urn(self.get_platform()),
|
|
423
|
+
name=f"dremio.{dataset_name}",
|
|
427
424
|
env=self.config.env,
|
|
425
|
+
platform_instance=self.config.platform_instance,
|
|
428
426
|
)
|
|
429
427
|
yield from self.profiler.get_workunits(dataset_info, dataset_urn)
|
|
430
428
|
|
|
@@ -436,10 +434,10 @@ class DremioSource(StatefulIngestionSourceBase):
|
|
|
436
434
|
"""
|
|
437
435
|
upstream_urns = [
|
|
438
436
|
make_dataset_urn_with_platform_instance(
|
|
439
|
-
platform=self.get_platform(),
|
|
440
|
-
name=upstream_table.lower(),
|
|
441
|
-
platform_instance=self.config.platform_instance,
|
|
437
|
+
platform=make_data_platform_urn(self.get_platform()),
|
|
438
|
+
name=f"dremio.{upstream_table.lower()}",
|
|
442
439
|
env=self.config.env,
|
|
440
|
+
platform_instance=self.config.platform_instance,
|
|
443
441
|
)
|
|
444
442
|
for upstream_table in parents
|
|
445
443
|
]
|
|
@@ -498,19 +496,19 @@ class DremioSource(StatefulIngestionSourceBase):
|
|
|
498
496
|
if query.query and query.affected_dataset:
|
|
499
497
|
upstream_urns = [
|
|
500
498
|
make_dataset_urn_with_platform_instance(
|
|
501
|
-
platform=self.get_platform(),
|
|
502
|
-
name=ds.lower(),
|
|
503
|
-
platform_instance=self.config.platform_instance,
|
|
499
|
+
platform=make_data_platform_urn(self.get_platform()),
|
|
500
|
+
name=f"dremio.{ds.lower()}",
|
|
504
501
|
env=self.config.env,
|
|
502
|
+
platform_instance=self.config.platform_instance,
|
|
505
503
|
)
|
|
506
504
|
for ds in query.queried_datasets
|
|
507
505
|
]
|
|
508
506
|
|
|
509
507
|
downstream_urn = make_dataset_urn_with_platform_instance(
|
|
510
|
-
platform=self.get_platform(),
|
|
511
|
-
name=query.affected_dataset.lower(),
|
|
512
|
-
platform_instance=self.config.platform_instance,
|
|
508
|
+
platform=make_data_platform_urn(self.get_platform()),
|
|
509
|
+
name=f"dremio.{query.affected_dataset.lower()}",
|
|
513
510
|
env=self.config.env,
|
|
511
|
+
platform_instance=self.config.platform_instance,
|
|
514
512
|
)
|
|
515
513
|
|
|
516
514
|
# Add query to SqlParsingAggregator
|
|
@@ -200,7 +200,7 @@ class DynamoDBSource(StatefulIngestionSourceBase):
|
|
|
200
200
|
|
|
201
201
|
@classmethod
|
|
202
202
|
def create(cls, config_dict: dict, ctx: PipelineContext) -> "DynamoDBSource":
|
|
203
|
-
config = DynamoDBConfig.
|
|
203
|
+
config = DynamoDBConfig.model_validate(config_dict)
|
|
204
204
|
return cls(ctx, config, "dynamodb")
|
|
205
205
|
|
|
206
206
|
def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
|
|
@@ -8,7 +8,7 @@ from hashlib import md5
|
|
|
8
8
|
from typing import Any, Dict, Generator, Iterable, List, Optional, Tuple, Type, Union
|
|
9
9
|
|
|
10
10
|
from elasticsearch import Elasticsearch
|
|
11
|
-
from pydantic import
|
|
11
|
+
from pydantic import field_validator
|
|
12
12
|
from pydantic.fields import Field
|
|
13
13
|
|
|
14
14
|
from datahub.configuration.common import AllowDenyPattern, ConfigModel
|
|
@@ -330,7 +330,8 @@ class ElasticsearchSourceConfig(
|
|
|
330
330
|
self.profiling.operation_config
|
|
331
331
|
)
|
|
332
332
|
|
|
333
|
-
@
|
|
333
|
+
@field_validator("host", mode="after")
|
|
334
|
+
@classmethod
|
|
334
335
|
def host_colon_port_comma(cls, host_val: str) -> str:
|
|
335
336
|
for entry in host_val.split(","):
|
|
336
337
|
entry = remove_protocol(entry)
|
|
@@ -382,7 +383,7 @@ class ElasticsearchSource(StatefulIngestionSourceBase):
|
|
|
382
383
|
def create(
|
|
383
384
|
cls, config_dict: Dict[str, Any], ctx: PipelineContext
|
|
384
385
|
) -> "ElasticsearchSource":
|
|
385
|
-
config = ElasticsearchSourceConfig.
|
|
386
|
+
config = ElasticsearchSourceConfig.model_validate(config_dict)
|
|
386
387
|
return cls(config, ctx)
|
|
387
388
|
|
|
388
389
|
def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
|
|
@@ -407,12 +408,78 @@ class ElasticsearchSource(StatefulIngestionSourceBase):
|
|
|
407
408
|
for mcp in self._get_data_stream_index_count_mcps():
|
|
408
409
|
yield mcp.as_workunit()
|
|
409
410
|
if self.source_config.ingest_index_templates:
|
|
410
|
-
|
|
411
|
-
|
|
411
|
+
# Fetch legacy index templates
|
|
412
|
+
legacy_templates = self.client.indices.get_template()
|
|
413
|
+
for template in legacy_templates:
|
|
412
414
|
if self.source_config.index_template_pattern.allowed(template):
|
|
413
415
|
for mcp in self._extract_mcps(template, is_index=False):
|
|
414
416
|
yield mcp.as_workunit()
|
|
415
417
|
|
|
418
|
+
# Fetch composable index templates (ES 7.8+ / OpenSearch)
|
|
419
|
+
try:
|
|
420
|
+
composable_templates = self.client.indices.get_index_template()
|
|
421
|
+
for template_info in composable_templates.get("index_templates", []):
|
|
422
|
+
template = template_info.get("name")
|
|
423
|
+
if template and self.source_config.index_template_pattern.allowed(
|
|
424
|
+
template
|
|
425
|
+
):
|
|
426
|
+
for mcp in self._extract_mcps(
|
|
427
|
+
template, is_index=False, is_composable_template=True
|
|
428
|
+
):
|
|
429
|
+
yield mcp.as_workunit()
|
|
430
|
+
except Exception as e:
|
|
431
|
+
logger.warning(f"Unable to fetch composable index templates: {e}")
|
|
432
|
+
|
|
433
|
+
def _get_template_metadata(
|
|
434
|
+
self, template_name: str, is_composable: bool
|
|
435
|
+
) -> Dict[str, Any]:
|
|
436
|
+
"""Fetch template metadata from Elasticsearch/OpenSearch."""
|
|
437
|
+
if is_composable:
|
|
438
|
+
# For composable templates (ES 7.8+ / OpenSearch)
|
|
439
|
+
raw_response = self.client.indices.get_index_template(name=template_name)
|
|
440
|
+
template_data = raw_response.get("index_templates", [{}])[0]
|
|
441
|
+
return template_data.get("index_template", {})
|
|
442
|
+
else:
|
|
443
|
+
# For legacy templates
|
|
444
|
+
raw_response = self.client.indices.get_template(name=template_name)
|
|
445
|
+
return raw_response[template_name]
|
|
446
|
+
|
|
447
|
+
def _extract_template_custom_properties(
|
|
448
|
+
self, raw_metadata: Dict[str, Any], is_composable: bool
|
|
449
|
+
) -> Dict[str, str]:
|
|
450
|
+
"""Extract custom properties from template metadata."""
|
|
451
|
+
custom_properties: Dict[str, str] = {}
|
|
452
|
+
|
|
453
|
+
# Extract aliases
|
|
454
|
+
if is_composable:
|
|
455
|
+
aliases_dict = raw_metadata.get("template", {}).get("aliases", {})
|
|
456
|
+
else:
|
|
457
|
+
aliases_dict = raw_metadata.get("aliases", {})
|
|
458
|
+
index_aliases: List[str] = list(aliases_dict.keys()) if aliases_dict else []
|
|
459
|
+
if index_aliases:
|
|
460
|
+
custom_properties["aliases"] = ",".join(index_aliases)
|
|
461
|
+
|
|
462
|
+
# Extract index_patterns
|
|
463
|
+
index_patterns: List[str] = raw_metadata.get("index_patterns", [])
|
|
464
|
+
if index_patterns:
|
|
465
|
+
custom_properties["index_patterns"] = ",".join(index_patterns)
|
|
466
|
+
|
|
467
|
+
# Extract settings
|
|
468
|
+
if is_composable:
|
|
469
|
+
index_settings: Dict[str, Any] = (
|
|
470
|
+
raw_metadata.get("template", {}).get("settings", {}).get("index", {})
|
|
471
|
+
)
|
|
472
|
+
else:
|
|
473
|
+
index_settings = raw_metadata.get("settings", {}).get("index", {})
|
|
474
|
+
num_shards: str = index_settings.get("number_of_shards", "")
|
|
475
|
+
if num_shards:
|
|
476
|
+
custom_properties["num_shards"] = num_shards
|
|
477
|
+
num_replicas: str = index_settings.get("number_of_replicas", "")
|
|
478
|
+
if num_replicas:
|
|
479
|
+
custom_properties["num_replicas"] = num_replicas
|
|
480
|
+
|
|
481
|
+
return custom_properties
|
|
482
|
+
|
|
416
483
|
def _get_data_stream_index_count_mcps(
|
|
417
484
|
self,
|
|
418
485
|
) -> Iterable[MetadataChangeProposalWrapper]:
|
|
@@ -434,9 +501,11 @@ class ElasticsearchSource(StatefulIngestionSourceBase):
|
|
|
434
501
|
)
|
|
435
502
|
|
|
436
503
|
def _extract_mcps(
|
|
437
|
-
self, index: str, is_index: bool = True
|
|
504
|
+
self, index: str, is_index: bool = True, is_composable_template: bool = False
|
|
438
505
|
) -> Iterable[MetadataChangeProposalWrapper]:
|
|
439
|
-
logger.debug(
|
|
506
|
+
logger.debug(
|
|
507
|
+
f"index='{index}', is_index={is_index}, is_composable_template={is_composable_template}"
|
|
508
|
+
)
|
|
440
509
|
|
|
441
510
|
if is_index:
|
|
442
511
|
raw_index = self.client.indices.get(index=index)
|
|
@@ -451,15 +520,20 @@ class ElasticsearchSource(StatefulIngestionSourceBase):
|
|
|
451
520
|
# This is a duplicate, skip processing it further.
|
|
452
521
|
return
|
|
453
522
|
else:
|
|
454
|
-
|
|
455
|
-
|
|
523
|
+
raw_index_metadata = self._get_template_metadata(
|
|
524
|
+
index, is_composable_template
|
|
525
|
+
)
|
|
456
526
|
collapsed_index_name = collapse_name(
|
|
457
527
|
name=index, collapse_urns=self.source_config.collapse_urns
|
|
458
528
|
)
|
|
459
529
|
|
|
460
530
|
# 1. Construct and emit the schemaMetadata aspect
|
|
461
531
|
# 1.1 Generate the schema fields from ES mappings.
|
|
462
|
-
|
|
532
|
+
# For composable templates, mappings are under 'template.mappings'
|
|
533
|
+
if is_composable_template:
|
|
534
|
+
index_mappings = raw_index_metadata.get("template", {}).get("mappings", {})
|
|
535
|
+
else:
|
|
536
|
+
index_mappings = raw_index_metadata.get("mappings", {})
|
|
463
537
|
index_mappings_json_str: str = json.dumps(index_mappings)
|
|
464
538
|
md5_hash = md5(index_mappings_json_str.encode()).hexdigest()
|
|
465
539
|
schema_fields = list(
|
|
@@ -517,28 +591,32 @@ class ElasticsearchSource(StatefulIngestionSourceBase):
|
|
|
517
591
|
),
|
|
518
592
|
)
|
|
519
593
|
|
|
520
|
-
# 4. Construct and emit properties
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
594
|
+
# 4. Construct and emit properties
|
|
595
|
+
if is_index:
|
|
596
|
+
custom_properties: Dict[str, str] = {}
|
|
597
|
+
# Extract properties for indices
|
|
598
|
+
index_aliases: List[str] = list(
|
|
599
|
+
raw_index_metadata.get("aliases", {}).keys()
|
|
600
|
+
)
|
|
601
|
+
if index_aliases:
|
|
602
|
+
custom_properties["aliases"] = ",".join(index_aliases)
|
|
603
|
+
index_patterns: List[str] = raw_index_metadata.get("index_patterns", [])
|
|
604
|
+
if index_patterns:
|
|
605
|
+
custom_properties["index_patterns"] = ",".join(index_patterns)
|
|
606
|
+
index_settings: Dict[str, Any] = raw_index_metadata.get("settings", {}).get(
|
|
607
|
+
"index", {}
|
|
608
|
+
)
|
|
609
|
+
num_shards: str = index_settings.get("number_of_shards", "")
|
|
610
|
+
if num_shards:
|
|
611
|
+
custom_properties["num_shards"] = num_shards
|
|
612
|
+
num_replicas: str = index_settings.get("number_of_replicas", "")
|
|
613
|
+
if num_replicas:
|
|
614
|
+
custom_properties["num_replicas"] = num_replicas
|
|
615
|
+
else:
|
|
616
|
+
# Extract properties for templates
|
|
617
|
+
custom_properties = self._extract_template_custom_properties(
|
|
618
|
+
raw_index_metadata, is_composable_template
|
|
619
|
+
)
|
|
542
620
|
|
|
543
621
|
yield MetadataChangeProposalWrapper(
|
|
544
622
|
entityUrn=dataset_urn,
|
|
@@ -156,7 +156,7 @@ class ExcelSource(StatefulIngestionSourceBase):
|
|
|
156
156
|
|
|
157
157
|
@classmethod
|
|
158
158
|
def create(cls, config_dict: dict, ctx: PipelineContext) -> "ExcelSource":
|
|
159
|
-
config = ExcelSourceConfig.
|
|
159
|
+
config = ExcelSourceConfig.model_validate(config_dict)
|
|
160
160
|
return cls(ctx, config)
|
|
161
161
|
|
|
162
162
|
def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
|
|
@@ -462,7 +462,7 @@ class FeastRepositorySource(StatefulIngestionSourceBase):
|
|
|
462
462
|
|
|
463
463
|
@classmethod
|
|
464
464
|
def create(cls, config_dict, ctx):
|
|
465
|
-
config = FeastRepositorySourceConfig.
|
|
465
|
+
config = FeastRepositorySourceConfig.model_validate(config_dict)
|
|
466
466
|
return cls(config, ctx)
|
|
467
467
|
|
|
468
468
|
def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
|
datahub/ingestion/source/file.py
CHANGED
|
@@ -9,7 +9,7 @@ from functools import partial
|
|
|
9
9
|
from typing import Any, Iterable, Iterator, List, Optional, Tuple, Union
|
|
10
10
|
|
|
11
11
|
import ijson
|
|
12
|
-
from pydantic import
|
|
12
|
+
from pydantic import field_validator
|
|
13
13
|
from pydantic.fields import Field
|
|
14
14
|
|
|
15
15
|
from datahub.configuration.common import ConfigEnum
|
|
@@ -103,7 +103,8 @@ class FileSourceConfig(StatefulIngestionConfigBase):
|
|
|
103
103
|
|
|
104
104
|
stateful_ingestion: Optional[StatefulStaleMetadataRemovalConfig] = None
|
|
105
105
|
|
|
106
|
-
@
|
|
106
|
+
@field_validator("file_extension", mode="after")
|
|
107
|
+
@classmethod
|
|
107
108
|
def add_leading_dot_to_extension(cls, v: str) -> str:
|
|
108
109
|
if v:
|
|
109
110
|
if v.startswith("."):
|
|
@@ -205,7 +206,7 @@ class GenericFileSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
205
206
|
|
|
206
207
|
@classmethod
|
|
207
208
|
def create(cls, config_dict, ctx):
|
|
208
|
-
config = FileSourceConfig.
|
|
209
|
+
config = FileSourceConfig.model_validate(config_dict)
|
|
209
210
|
return cls(ctx, config)
|
|
210
211
|
|
|
211
212
|
def get_filenames(self) -> Iterable[FileInfo]:
|
|
@@ -358,7 +359,7 @@ class GenericFileSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
358
359
|
|
|
359
360
|
@staticmethod
|
|
360
361
|
def test_connection(config_dict: dict) -> TestConnectionReport:
|
|
361
|
-
config = FileSourceConfig.
|
|
362
|
+
config = FileSourceConfig.model_validate(config_dict)
|
|
362
363
|
exists = os.path.exists(config.path)
|
|
363
364
|
if not exists:
|
|
364
365
|
return TestConnectionReport(
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
import dataclasses
|
|
2
2
|
import logging
|
|
3
3
|
import warnings
|
|
4
|
-
from typing import Dict, Optional
|
|
4
|
+
from typing import Any, Dict, Optional
|
|
5
5
|
|
|
6
6
|
import pydantic
|
|
7
|
-
from pydantic import Field,
|
|
7
|
+
from pydantic import Field, field_validator, model_validator
|
|
8
8
|
from typing_extensions import Literal
|
|
9
9
|
|
|
10
10
|
from datahub.configuration.common import (
|
|
@@ -98,7 +98,8 @@ class DatabricksDestinationConfig(UnityCatalogConnectionConfig):
|
|
|
98
98
|
catalog: str = Field(description="The fivetran connector log catalog.")
|
|
99
99
|
log_schema: str = Field(description="The fivetran connector log schema.")
|
|
100
100
|
|
|
101
|
-
@
|
|
101
|
+
@field_validator("warehouse_id", mode="after")
|
|
102
|
+
@classmethod
|
|
102
103
|
def warehouse_id_should_not_be_empty(cls, warehouse_id: Optional[str]) -> str:
|
|
103
104
|
if warehouse_id is None or (warehouse_id and warehouse_id.strip() == ""):
|
|
104
105
|
raise ValueError("Fivetran requires warehouse_id to be set")
|
|
@@ -141,29 +142,28 @@ class FivetranLogConfig(ConfigModel):
|
|
|
141
142
|
"destination_config", "snowflake_destination_config"
|
|
142
143
|
)
|
|
143
144
|
|
|
144
|
-
@
|
|
145
|
-
def
|
|
146
|
-
destination_platform
|
|
147
|
-
|
|
148
|
-
if "snowflake_destination_config" not in values:
|
|
145
|
+
@model_validator(mode="after")
|
|
146
|
+
def validate_destination_platform_and_config(self) -> "FivetranLogConfig":
|
|
147
|
+
if self.destination_platform == "snowflake":
|
|
148
|
+
if self.snowflake_destination_config is None:
|
|
149
149
|
raise ValueError(
|
|
150
150
|
"If destination platform is 'snowflake', user must provide snowflake destination configuration in the recipe."
|
|
151
151
|
)
|
|
152
|
-
elif destination_platform == "bigquery":
|
|
153
|
-
if
|
|
152
|
+
elif self.destination_platform == "bigquery":
|
|
153
|
+
if self.bigquery_destination_config is None:
|
|
154
154
|
raise ValueError(
|
|
155
155
|
"If destination platform is 'bigquery', user must provide bigquery destination configuration in the recipe."
|
|
156
156
|
)
|
|
157
|
-
elif destination_platform == "databricks":
|
|
158
|
-
if
|
|
157
|
+
elif self.destination_platform == "databricks":
|
|
158
|
+
if self.databricks_destination_config is None:
|
|
159
159
|
raise ValueError(
|
|
160
160
|
"If destination platform is 'databricks', user must provide databricks destination configuration in the recipe."
|
|
161
161
|
)
|
|
162
162
|
else:
|
|
163
163
|
raise ValueError(
|
|
164
|
-
f"Destination platform '{destination_platform}' is not yet supported."
|
|
164
|
+
f"Destination platform '{self.destination_platform}' is not yet supported."
|
|
165
165
|
)
|
|
166
|
-
return
|
|
166
|
+
return self
|
|
167
167
|
|
|
168
168
|
|
|
169
169
|
@dataclasses.dataclass
|
|
@@ -267,8 +267,9 @@ class FivetranSourceConfig(StatefulIngestionConfigBase, DatasetSourceConfigMixin
|
|
|
267
267
|
description="Fivetran REST API configuration, used to provide wider support for connections.",
|
|
268
268
|
)
|
|
269
269
|
|
|
270
|
-
@
|
|
271
|
-
|
|
270
|
+
@model_validator(mode="before")
|
|
271
|
+
@classmethod
|
|
272
|
+
def compat_sources_to_database(cls, values: Any) -> Any:
|
|
272
273
|
if "sources_to_database" in values:
|
|
273
274
|
warnings.warn(
|
|
274
275
|
"The sources_to_database field is deprecated, please use sources_to_platform_instance instead.",
|
|
@@ -234,12 +234,12 @@ class FivetranSource(StatefulIngestionSourceBase):
|
|
|
234
234
|
return dict(
|
|
235
235
|
**{
|
|
236
236
|
f"source.{k}": str(v)
|
|
237
|
-
for k, v in source_details.
|
|
237
|
+
for k, v in source_details.model_dump().items()
|
|
238
238
|
if v is not None and not isinstance(v, bool)
|
|
239
239
|
},
|
|
240
240
|
**{
|
|
241
241
|
f"destination.{k}": str(v)
|
|
242
|
-
for k, v in destination_details.
|
|
242
|
+
for k, v in destination_details.model_dump().items()
|
|
243
243
|
if v is not None and not isinstance(v, bool)
|
|
244
244
|
},
|
|
245
245
|
)
|
|
@@ -127,7 +127,7 @@ class DataHubGcSource(Source):
|
|
|
127
127
|
|
|
128
128
|
@classmethod
|
|
129
129
|
def create(cls, config_dict, ctx):
|
|
130
|
-
config = DataHubGcSourceConfig.
|
|
130
|
+
config = DataHubGcSourceConfig.model_validate(config_dict)
|
|
131
131
|
return cls(ctx, config)
|
|
132
132
|
|
|
133
133
|
# auto_work_unit_report is overriden to disable a couple of automation like auto status aspect, etc. which is not needed her.
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from typing import
|
|
2
|
+
from typing import Iterable, List, Optional
|
|
3
3
|
|
|
4
|
-
from pydantic import Field, SecretStr,
|
|
4
|
+
from pydantic import Field, SecretStr, model_validator
|
|
5
5
|
|
|
6
6
|
from datahub.configuration.common import ConfigModel
|
|
7
7
|
from datahub.configuration.source_common import DatasetSourceConfigMixin
|
|
@@ -64,18 +64,16 @@ class GCSSourceConfig(
|
|
|
64
64
|
|
|
65
65
|
stateful_ingestion: Optional[StatefulStaleMetadataRemovalConfig] = None
|
|
66
66
|
|
|
67
|
-
@
|
|
68
|
-
def check_path_specs_and_infer_platform(
|
|
69
|
-
|
|
70
|
-
) -> List[PathSpec]:
|
|
71
|
-
if len(path_specs) == 0:
|
|
67
|
+
@model_validator(mode="after")
|
|
68
|
+
def check_path_specs_and_infer_platform(self) -> "GCSSourceConfig":
|
|
69
|
+
if len(self.path_specs) == 0:
|
|
72
70
|
raise ValueError("path_specs must not be empty")
|
|
73
71
|
|
|
74
72
|
# Check that all path specs have the gs:// prefix.
|
|
75
|
-
if any([not is_gcs_uri(path_spec.include) for path_spec in path_specs]):
|
|
73
|
+
if any([not is_gcs_uri(path_spec.include) for path_spec in self.path_specs]):
|
|
76
74
|
raise ValueError("All path_spec.include should start with gs://")
|
|
77
75
|
|
|
78
|
-
return
|
|
76
|
+
return self
|
|
79
77
|
|
|
80
78
|
|
|
81
79
|
class GCSSourceReport(DataLakeSourceReport):
|
|
@@ -105,7 +103,7 @@ class GCSSource(StatefulIngestionSourceBase):
|
|
|
105
103
|
|
|
106
104
|
@classmethod
|
|
107
105
|
def create(cls, config_dict, ctx):
|
|
108
|
-
config = GCSSourceConfig.
|
|
106
|
+
config = GCSSourceConfig.model_validate(config_dict)
|
|
109
107
|
return cls(config, ctx)
|
|
110
108
|
|
|
111
109
|
def create_equivalent_s3_config(self):
|
|
@@ -4,6 +4,7 @@ import os
|
|
|
4
4
|
from typing import Annotated, Any, Dict, List, Optional
|
|
5
5
|
|
|
6
6
|
import pydantic
|
|
7
|
+
from pydantic import model_validator
|
|
7
8
|
from pydantic.fields import Field
|
|
8
9
|
|
|
9
10
|
from datahub.configuration.common import AllowDenyPattern, ConfigModel, SupportedSources
|
|
@@ -212,7 +213,8 @@ class GEProfilingConfig(GEProfilingBaseConfig):
|
|
|
212
213
|
description="Whether to profile complex types like structs, arrays and maps. ",
|
|
213
214
|
)
|
|
214
215
|
|
|
215
|
-
@
|
|
216
|
+
@model_validator(mode="before")
|
|
217
|
+
@classmethod
|
|
216
218
|
def deprecate_bigquery_temp_table_schema(cls, values):
|
|
217
219
|
# TODO: Update docs to remove mention of this field.
|
|
218
220
|
if "bigquery_temp_table_schema" in values:
|
|
@@ -222,16 +224,17 @@ class GEProfilingConfig(GEProfilingBaseConfig):
|
|
|
222
224
|
del values["bigquery_temp_table_schema"]
|
|
223
225
|
return values
|
|
224
226
|
|
|
225
|
-
@
|
|
227
|
+
@model_validator(mode="before")
|
|
228
|
+
@classmethod
|
|
226
229
|
def ensure_field_level_settings_are_normalized(
|
|
227
|
-
cls
|
|
230
|
+
cls, values: Dict[str, Any]
|
|
228
231
|
) -> Dict[str, Any]:
|
|
229
232
|
max_num_fields_to_profile_key = "max_number_of_fields_to_profile"
|
|
230
233
|
max_num_fields_to_profile = values.get(max_num_fields_to_profile_key)
|
|
231
234
|
|
|
232
235
|
# Disable all field-level metrics.
|
|
233
236
|
if values.get("profile_table_level_only"):
|
|
234
|
-
for field_level_metric in cls.
|
|
237
|
+
for field_level_metric in cls.model_fields:
|
|
235
238
|
if field_level_metric.startswith("include_field_"):
|
|
236
239
|
if values.get(field_level_metric):
|
|
237
240
|
raise ValueError(
|
|
@@ -267,7 +270,7 @@ class GEProfilingConfig(GEProfilingBaseConfig):
|
|
|
267
270
|
)
|
|
268
271
|
|
|
269
272
|
def config_for_telemetry(self) -> Dict[str, Any]:
|
|
270
|
-
config_dict = self.
|
|
273
|
+
config_dict = self.model_dump()
|
|
271
274
|
|
|
272
275
|
return {
|
|
273
276
|
flag: config_dict[flag]
|
|
@@ -69,7 +69,7 @@ class GrafanaAPIClient:
|
|
|
69
69
|
if not batch:
|
|
70
70
|
break
|
|
71
71
|
|
|
72
|
-
folders.extend(Folder.
|
|
72
|
+
folders.extend(Folder.model_validate(folder) for folder in batch)
|
|
73
73
|
page += 1
|
|
74
74
|
except requests.exceptions.RequestException as e:
|
|
75
75
|
self.report.report_failure(
|
|
@@ -88,7 +88,7 @@ class GrafanaAPIClient:
|
|
|
88
88
|
try:
|
|
89
89
|
response = self.session.get(f"{self.base_url}/api/dashboards/uid/{uid}")
|
|
90
90
|
response.raise_for_status()
|
|
91
|
-
return Dashboard.
|
|
91
|
+
return Dashboard.model_validate(response.json())
|
|
92
92
|
except requests.exceptions.RequestException as e:
|
|
93
93
|
self.report.warning(
|
|
94
94
|
title="Dashboard Fetch Error",
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from typing import Dict, Optional
|
|
2
2
|
|
|
3
|
-
from pydantic import Field, SecretStr,
|
|
3
|
+
from pydantic import Field, SecretStr, field_validator
|
|
4
4
|
|
|
5
5
|
from datahub.configuration.common import AllowDenyPattern, HiddenFromDocs
|
|
6
6
|
from datahub.configuration.source_common import (
|
|
@@ -99,6 +99,7 @@ class GrafanaSourceConfig(
|
|
|
99
99
|
description="Map of Grafana datasource types/UIDs to platform connection configs for lineage extraction",
|
|
100
100
|
)
|
|
101
101
|
|
|
102
|
-
@
|
|
103
|
-
|
|
102
|
+
@field_validator("url", mode="after")
|
|
103
|
+
@classmethod
|
|
104
|
+
def remove_trailing_slash(cls, v: str) -> str:
|
|
104
105
|
return config_clean.remove_trailing_slashes(v)
|
|
@@ -171,7 +171,7 @@ class GrafanaSource(StatefulIngestionSourceBase):
|
|
|
171
171
|
|
|
172
172
|
@classmethod
|
|
173
173
|
def create(cls, config_dict: dict, ctx: PipelineContext) -> "GrafanaSource":
|
|
174
|
-
config = GrafanaSourceConfig.
|
|
174
|
+
config = GrafanaSourceConfig.model_validate(config_dict)
|
|
175
175
|
return cls(config, ctx)
|
|
176
176
|
|
|
177
177
|
def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
|