acryl-datahub 1.3.0.1rc9__py3-none-any.whl → 1.3.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.3.0.1rc9.dist-info → acryl_datahub-1.3.1.1.dist-info}/METADATA +2550 -2543
- {acryl_datahub-1.3.0.1rc9.dist-info → acryl_datahub-1.3.1.1.dist-info}/RECORD +263 -261
- datahub/_version.py +1 -1
- datahub/api/entities/common/serialized_value.py +2 -2
- datahub/api/entities/corpgroup/corpgroup.py +11 -6
- datahub/api/entities/corpuser/corpuser.py +11 -11
- datahub/api/entities/dataproduct/dataproduct.py +47 -27
- datahub/api/entities/dataset/dataset.py +32 -21
- datahub/api/entities/external/lake_formation_external_entites.py +5 -6
- datahub/api/entities/external/unity_catalog_external_entites.py +5 -7
- datahub/api/entities/forms/forms.py +16 -14
- datahub/api/entities/structuredproperties/structuredproperties.py +23 -16
- datahub/cli/check_cli.py +2 -2
- datahub/cli/config_utils.py +3 -3
- datahub/cli/lite_cli.py +9 -7
- datahub/cli/migrate.py +4 -4
- datahub/cli/quickstart_versioning.py +3 -3
- datahub/cli/specific/group_cli.py +1 -1
- datahub/cli/specific/structuredproperties_cli.py +1 -1
- datahub/cli/specific/user_cli.py +1 -1
- datahub/configuration/common.py +14 -2
- datahub/configuration/connection_resolver.py +2 -2
- datahub/configuration/git.py +47 -30
- datahub/configuration/import_resolver.py +2 -2
- datahub/configuration/kafka.py +4 -3
- datahub/configuration/time_window_config.py +26 -26
- datahub/configuration/validate_field_deprecation.py +2 -2
- datahub/configuration/validate_field_removal.py +2 -2
- datahub/configuration/validate_field_rename.py +2 -2
- datahub/configuration/validate_multiline_string.py +2 -1
- datahub/emitter/kafka_emitter.py +3 -1
- datahub/emitter/rest_emitter.py +2 -4
- datahub/ingestion/api/decorators.py +1 -1
- datahub/ingestion/api/report.py +1 -1
- datahub/ingestion/api/sink.py +1 -1
- datahub/ingestion/api/source.py +1 -1
- datahub/ingestion/glossary/datahub_classifier.py +11 -8
- datahub/ingestion/graph/client.py +5 -1
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +1 -1
- datahub/ingestion/reporting/file_reporter.py +5 -4
- datahub/ingestion/run/pipeline.py +7 -6
- datahub/ingestion/run/pipeline_config.py +12 -14
- datahub/ingestion/run/sink_callback.py +1 -1
- datahub/ingestion/sink/datahub_rest.py +6 -4
- datahub/ingestion/source/abs/config.py +19 -19
- datahub/ingestion/source/abs/datalake_profiler_config.py +11 -13
- datahub/ingestion/source/abs/source.py +2 -2
- datahub/ingestion/source/aws/aws_common.py +1 -1
- datahub/ingestion/source/aws/glue.py +6 -4
- datahub/ingestion/source/aws/sagemaker.py +1 -1
- datahub/ingestion/source/azure/azure_common.py +8 -12
- datahub/ingestion/source/bigquery_v2/bigquery.py +1 -1
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +43 -30
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +1 -1
- datahub/ingestion/source/cassandra/cassandra.py +1 -1
- datahub/ingestion/source/common/gcp_credentials_config.py +10 -10
- datahub/ingestion/source/data_lake_common/path_spec.py +85 -89
- datahub/ingestion/source/datahub/config.py +8 -8
- datahub/ingestion/source/datahub/datahub_source.py +1 -1
- datahub/ingestion/source/dbt/dbt_cloud.py +9 -3
- datahub/ingestion/source/dbt/dbt_common.py +39 -37
- datahub/ingestion/source/dbt/dbt_core.py +10 -12
- datahub/ingestion/source/debug/datahub_debug.py +1 -1
- datahub/ingestion/source/delta_lake/config.py +6 -4
- datahub/ingestion/source/dremio/dremio_api.py +212 -78
- datahub/ingestion/source/dremio/dremio_config.py +10 -6
- datahub/ingestion/source/dremio/dremio_entities.py +55 -39
- datahub/ingestion/source/dremio/dremio_profiling.py +14 -3
- datahub/ingestion/source/dremio/dremio_source.py +24 -26
- datahub/ingestion/source/dynamodb/dynamodb.py +1 -1
- datahub/ingestion/source/elastic_search.py +110 -32
- datahub/ingestion/source/excel/source.py +1 -1
- datahub/ingestion/source/feast.py +1 -1
- datahub/ingestion/source/file.py +5 -4
- datahub/ingestion/source/fivetran/config.py +17 -16
- datahub/ingestion/source/fivetran/fivetran.py +2 -2
- datahub/ingestion/source/gc/datahub_gc.py +1 -1
- datahub/ingestion/source/gcs/gcs_source.py +8 -10
- datahub/ingestion/source/ge_profiling_config.py +8 -5
- datahub/ingestion/source/grafana/grafana_api.py +2 -2
- datahub/ingestion/source/grafana/grafana_config.py +4 -3
- datahub/ingestion/source/grafana/grafana_source.py +1 -1
- datahub/ingestion/source/grafana/models.py +23 -5
- datahub/ingestion/source/hex/api.py +7 -5
- datahub/ingestion/source/hex/hex.py +4 -3
- datahub/ingestion/source/iceberg/iceberg.py +1 -1
- datahub/ingestion/source/iceberg/iceberg_common.py +5 -3
- datahub/ingestion/source/identity/azure_ad.py +1 -1
- datahub/ingestion/source/identity/okta.py +10 -10
- datahub/ingestion/source/kafka/kafka.py +1 -1
- datahub/ingestion/source/ldap.py +1 -1
- datahub/ingestion/source/looker/looker_common.py +7 -5
- datahub/ingestion/source/looker/looker_config.py +21 -20
- datahub/ingestion/source/looker/lookml_config.py +47 -47
- datahub/ingestion/source/metabase.py +8 -8
- datahub/ingestion/source/metadata/business_glossary.py +2 -2
- datahub/ingestion/source/metadata/lineage.py +13 -8
- datahub/ingestion/source/mlflow.py +1 -1
- datahub/ingestion/source/mode.py +6 -4
- datahub/ingestion/source/mongodb.py +4 -3
- datahub/ingestion/source/neo4j/neo4j_source.py +1 -1
- datahub/ingestion/source/nifi.py +17 -23
- datahub/ingestion/source/openapi.py +6 -8
- datahub/ingestion/source/powerbi/config.py +33 -32
- datahub/ingestion/source/powerbi/dataplatform_instance_resolver.py +2 -2
- datahub/ingestion/source/powerbi/powerbi.py +1 -1
- datahub/ingestion/source/powerbi_report_server/report_server.py +2 -2
- datahub/ingestion/source/powerbi_report_server/report_server_domain.py +8 -6
- datahub/ingestion/source/preset.py +8 -8
- datahub/ingestion/source/pulsar.py +1 -1
- datahub/ingestion/source/qlik_sense/data_classes.py +15 -8
- datahub/ingestion/source/qlik_sense/qlik_api.py +7 -7
- datahub/ingestion/source/qlik_sense/qlik_sense.py +1 -1
- datahub/ingestion/source/redshift/config.py +18 -20
- datahub/ingestion/source/redshift/redshift.py +2 -2
- datahub/ingestion/source/redshift/usage.py +23 -3
- datahub/ingestion/source/s3/config.py +83 -62
- datahub/ingestion/source/s3/datalake_profiler_config.py +11 -13
- datahub/ingestion/source/s3/source.py +8 -5
- datahub/ingestion/source/sac/sac.py +5 -4
- datahub/ingestion/source/salesforce.py +3 -2
- datahub/ingestion/source/schema/json_schema.py +2 -2
- datahub/ingestion/source/sigma/data_classes.py +3 -2
- datahub/ingestion/source/sigma/sigma.py +1 -1
- datahub/ingestion/source/sigma/sigma_api.py +7 -7
- datahub/ingestion/source/slack/slack.py +1 -1
- datahub/ingestion/source/snaplogic/snaplogic.py +1 -1
- datahub/ingestion/source/snowflake/snowflake_assertion.py +1 -1
- datahub/ingestion/source/snowflake/snowflake_config.py +35 -31
- datahub/ingestion/source/snowflake/snowflake_connection.py +35 -13
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +3 -3
- datahub/ingestion/source/snowflake/snowflake_queries.py +28 -4
- datahub/ingestion/source/sql/athena.py +1 -1
- datahub/ingestion/source/sql/clickhouse.py +4 -2
- datahub/ingestion/source/sql/cockroachdb.py +1 -1
- datahub/ingestion/source/sql/druid.py +1 -1
- datahub/ingestion/source/sql/hana.py +1 -1
- datahub/ingestion/source/sql/hive.py +7 -5
- datahub/ingestion/source/sql/hive_metastore.py +1 -1
- datahub/ingestion/source/sql/mssql/source.py +13 -6
- datahub/ingestion/source/sql/mysql.py +1 -1
- datahub/ingestion/source/sql/oracle.py +17 -10
- datahub/ingestion/source/sql/postgres.py +2 -2
- datahub/ingestion/source/sql/presto.py +1 -1
- datahub/ingestion/source/sql/sql_config.py +8 -9
- datahub/ingestion/source/sql/sql_generic.py +1 -1
- datahub/ingestion/source/sql/teradata.py +1 -1
- datahub/ingestion/source/sql/trino.py +1 -1
- datahub/ingestion/source/sql/vertica.py +5 -4
- datahub/ingestion/source/sql_queries.py +174 -22
- datahub/ingestion/source/state/checkpoint.py +2 -2
- datahub/ingestion/source/state/entity_removal_state.py +2 -1
- datahub/ingestion/source/state/stateful_ingestion_base.py +55 -45
- datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +1 -1
- datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py +1 -1
- datahub/ingestion/source/superset.py +9 -9
- datahub/ingestion/source/tableau/tableau.py +14 -16
- datahub/ingestion/source/unity/azure_auth_config.py +15 -0
- datahub/ingestion/source/unity/config.py +51 -34
- datahub/ingestion/source/unity/connection.py +7 -1
- datahub/ingestion/source/unity/connection_test.py +1 -1
- datahub/ingestion/source/unity/proxy.py +216 -7
- datahub/ingestion/source/unity/proxy_types.py +91 -0
- datahub/ingestion/source/unity/source.py +29 -3
- datahub/ingestion/source/usage/clickhouse_usage.py +1 -1
- datahub/ingestion/source/usage/starburst_trino_usage.py +1 -1
- datahub/ingestion/source/usage/usage_common.py +5 -3
- datahub/ingestion/source_config/csv_enricher.py +7 -6
- datahub/ingestion/source_config/operation_config.py +7 -4
- datahub/ingestion/source_config/pulsar.py +11 -15
- datahub/ingestion/transformer/add_dataset_browse_path.py +1 -1
- datahub/ingestion/transformer/add_dataset_dataproduct.py +6 -5
- datahub/ingestion/transformer/add_dataset_ownership.py +3 -3
- datahub/ingestion/transformer/add_dataset_properties.py +2 -2
- datahub/ingestion/transformer/add_dataset_schema_tags.py +2 -2
- datahub/ingestion/transformer/add_dataset_schema_terms.py +2 -2
- datahub/ingestion/transformer/add_dataset_tags.py +3 -3
- datahub/ingestion/transformer/add_dataset_terms.py +3 -3
- datahub/ingestion/transformer/dataset_domain.py +3 -3
- datahub/ingestion/transformer/dataset_domain_based_on_tags.py +1 -1
- datahub/ingestion/transformer/extract_dataset_tags.py +1 -1
- datahub/ingestion/transformer/extract_ownership_from_tags.py +1 -1
- datahub/ingestion/transformer/mark_dataset_status.py +1 -1
- datahub/ingestion/transformer/pattern_cleanup_dataset_usage_user.py +1 -1
- datahub/ingestion/transformer/pattern_cleanup_ownership.py +1 -1
- datahub/ingestion/transformer/remove_dataset_ownership.py +1 -1
- datahub/ingestion/transformer/replace_external_url.py +2 -2
- datahub/ingestion/transformer/set_browse_path.py +1 -1
- datahub/ingestion/transformer/tags_to_terms.py +1 -1
- datahub/lite/duckdb_lite.py +1 -1
- datahub/lite/lite_util.py +2 -2
- datahub/metadata/_internal_schema_classes.py +62 -2
- datahub/metadata/com/linkedin/pegasus2avro/assertion/__init__.py +2 -0
- datahub/metadata/schema.avsc +271 -91
- datahub/metadata/schemas/ApplicationProperties.avsc +5 -2
- datahub/metadata/schemas/AssertionInfo.avsc +48 -5
- datahub/metadata/schemas/BusinessAttributeInfo.avsc +8 -4
- datahub/metadata/schemas/ChartInfo.avsc +12 -5
- datahub/metadata/schemas/ContainerProperties.avsc +12 -5
- datahub/metadata/schemas/CorpGroupEditableInfo.avsc +2 -1
- datahub/metadata/schemas/CorpGroupInfo.avsc +7 -3
- datahub/metadata/schemas/CorpUserInfo.avsc +5 -2
- datahub/metadata/schemas/CorpUserSettings.avsc +4 -2
- datahub/metadata/schemas/DashboardInfo.avsc +16 -4
- datahub/metadata/schemas/DataFlowInfo.avsc +11 -5
- datahub/metadata/schemas/DataHubPageModuleProperties.avsc +4 -2
- datahub/metadata/schemas/DataJobInfo.avsc +9 -4
- datahub/metadata/schemas/DataPlatformInfo.avsc +3 -1
- datahub/metadata/schemas/DataPlatformInstanceProperties.avsc +5 -2
- datahub/metadata/schemas/DataProductProperties.avsc +5 -2
- datahub/metadata/schemas/DataTypeInfo.avsc +5 -0
- datahub/metadata/schemas/DatasetKey.avsc +2 -1
- datahub/metadata/schemas/DatasetProperties.avsc +12 -5
- datahub/metadata/schemas/DomainProperties.avsc +7 -3
- datahub/metadata/schemas/EditableContainerProperties.avsc +2 -1
- datahub/metadata/schemas/EditableDashboardProperties.avsc +2 -1
- datahub/metadata/schemas/EditableDataFlowProperties.avsc +2 -1
- datahub/metadata/schemas/EditableDataJobProperties.avsc +2 -1
- datahub/metadata/schemas/EditableDatasetProperties.avsc +2 -1
- datahub/metadata/schemas/EditableERModelRelationshipProperties.avsc +2 -1
- datahub/metadata/schemas/EditableMLFeatureProperties.avsc +2 -1
- datahub/metadata/schemas/EditableMLFeatureTableProperties.avsc +2 -1
- datahub/metadata/schemas/EditableMLModelGroupProperties.avsc +2 -1
- datahub/metadata/schemas/EditableMLModelProperties.avsc +2 -1
- datahub/metadata/schemas/EditableNotebookProperties.avsc +2 -1
- datahub/metadata/schemas/EditableSchemaMetadata.avsc +5 -3
- datahub/metadata/schemas/EntityTypeInfo.avsc +5 -0
- datahub/metadata/schemas/GlobalTags.avsc +3 -2
- datahub/metadata/schemas/GlossaryNodeInfo.avsc +3 -1
- datahub/metadata/schemas/GlossaryTermInfo.avsc +3 -1
- datahub/metadata/schemas/InputFields.avsc +3 -2
- datahub/metadata/schemas/MLFeatureKey.avsc +3 -1
- datahub/metadata/schemas/MLFeatureTableKey.avsc +3 -1
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +3 -1
- datahub/metadata/schemas/MLModelGroupKey.avsc +3 -1
- datahub/metadata/schemas/MLModelKey.avsc +3 -1
- datahub/metadata/schemas/MLModelProperties.avsc +4 -2
- datahub/metadata/schemas/MLPrimaryKeyKey.avsc +3 -1
- datahub/metadata/schemas/MetadataChangeEvent.avsc +124 -50
- datahub/metadata/schemas/NotebookInfo.avsc +5 -2
- datahub/metadata/schemas/Ownership.avsc +3 -2
- datahub/metadata/schemas/QuerySubjects.avsc +1 -1
- datahub/metadata/schemas/RoleProperties.avsc +3 -1
- datahub/metadata/schemas/SchemaFieldInfo.avsc +3 -1
- datahub/metadata/schemas/SchemaMetadata.avsc +3 -2
- datahub/metadata/schemas/StructuredPropertyDefinition.avsc +15 -4
- datahub/metadata/schemas/TagProperties.avsc +3 -1
- datahub/metadata/schemas/TestInfo.avsc +2 -1
- datahub/sdk/__init__.py +1 -0
- datahub/sdk/_all_entities.py +2 -0
- datahub/sdk/search_filters.py +68 -40
- datahub/sdk/tag.py +112 -0
- datahub/secret/datahub_secret_store.py +7 -4
- datahub/secret/file_secret_store.py +1 -1
- datahub/sql_parsing/schema_resolver.py +29 -0
- datahub/sql_parsing/sql_parsing_aggregator.py +15 -0
- datahub/sql_parsing/sqlglot_lineage.py +5 -2
- datahub/testing/check_sql_parser_result.py +2 -2
- datahub/utilities/ingest_utils.py +1 -1
- {acryl_datahub-1.3.0.1rc9.dist-info → acryl_datahub-1.3.1.1.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.3.0.1rc9.dist-info → acryl_datahub-1.3.1.1.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.3.0.1rc9.dist-info → acryl_datahub-1.3.1.1.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.3.0.1rc9.dist-info → acryl_datahub-1.3.1.1.dist-info}/top_level.txt +0 -0
|
@@ -79,18 +79,29 @@ class Dashboard(_GrafanaBaseModel):
|
|
|
79
79
|
for panel_data in panels_data:
|
|
80
80
|
if panel_data.get("type") == "row" and "panels" in panel_data:
|
|
81
81
|
panels.extend(
|
|
82
|
-
Panel.
|
|
82
|
+
Panel.model_validate(p)
|
|
83
83
|
for p in panel_data["panels"]
|
|
84
84
|
if p.get("type") != "row"
|
|
85
85
|
)
|
|
86
86
|
elif panel_data.get("type") != "row":
|
|
87
|
-
panels.append(Panel.
|
|
87
|
+
panels.append(Panel.model_validate(panel_data))
|
|
88
88
|
return panels
|
|
89
89
|
|
|
90
90
|
@classmethod
|
|
91
|
-
def
|
|
91
|
+
def model_validate(
|
|
92
|
+
cls,
|
|
93
|
+
obj: Any,
|
|
94
|
+
*,
|
|
95
|
+
strict: Optional[bool] = None,
|
|
96
|
+
from_attributes: Optional[bool] = None,
|
|
97
|
+
context: Optional[Any] = None,
|
|
98
|
+
by_alias: Optional[bool] = None,
|
|
99
|
+
by_name: Optional[bool] = None,
|
|
100
|
+
) -> "Dashboard":
|
|
92
101
|
"""Custom parsing to handle nested panel extraction."""
|
|
93
|
-
|
|
102
|
+
# Handle both direct dashboard data and nested structure with 'dashboard' key
|
|
103
|
+
dashboard_data = obj.get("dashboard", obj)
|
|
104
|
+
|
|
94
105
|
_panel_data = dashboard_data.get("panels", [])
|
|
95
106
|
panels = []
|
|
96
107
|
try:
|
|
@@ -113,7 +124,14 @@ class Dashboard(_GrafanaBaseModel):
|
|
|
113
124
|
if "refresh" in dashboard_dict and isinstance(dashboard_dict["refresh"], bool):
|
|
114
125
|
dashboard_dict["refresh"] = str(dashboard_dict["refresh"])
|
|
115
126
|
|
|
116
|
-
return super().
|
|
127
|
+
return super().model_validate(
|
|
128
|
+
dashboard_dict,
|
|
129
|
+
strict=strict,
|
|
130
|
+
from_attributes=from_attributes,
|
|
131
|
+
context=context,
|
|
132
|
+
by_alias=by_alias,
|
|
133
|
+
by_name=by_name,
|
|
134
|
+
)
|
|
117
135
|
|
|
118
136
|
|
|
119
137
|
class Folder(_GrafanaBaseModel):
|
|
@@ -4,7 +4,7 @@ from datetime import datetime, timezone
|
|
|
4
4
|
from typing import Any, Dict, Generator, List, Optional, Union
|
|
5
5
|
|
|
6
6
|
import requests
|
|
7
|
-
from pydantic import BaseModel, Field, ValidationError,
|
|
7
|
+
from pydantic import BaseModel, Field, ValidationError, field_validator
|
|
8
8
|
from requests.adapters import HTTPAdapter
|
|
9
9
|
from typing_extensions import assert_never
|
|
10
10
|
from urllib3.util.retry import Retry
|
|
@@ -50,7 +50,8 @@ class HexApiProjectAnalytics(BaseModel):
|
|
|
50
50
|
default=None, alias="publishedResultsUpdatedAt"
|
|
51
51
|
)
|
|
52
52
|
|
|
53
|
-
@
|
|
53
|
+
@field_validator("last_viewed_at", "published_results_updated_at", mode="before")
|
|
54
|
+
@classmethod
|
|
54
55
|
def parse_datetime(cls, value):
|
|
55
56
|
if value is None:
|
|
56
57
|
return None
|
|
@@ -167,14 +168,15 @@ class HexApiProjectApiResource(BaseModel):
|
|
|
167
168
|
class Config:
|
|
168
169
|
extra = "ignore" # Allow extra fields in the JSON
|
|
169
170
|
|
|
170
|
-
@
|
|
171
|
+
@field_validator(
|
|
171
172
|
"created_at",
|
|
172
173
|
"last_edited_at",
|
|
173
174
|
"last_published_at",
|
|
174
175
|
"archived_at",
|
|
175
176
|
"trashed_at",
|
|
176
|
-
|
|
177
|
+
mode="before",
|
|
177
178
|
)
|
|
179
|
+
@classmethod
|
|
178
180
|
def parse_datetime(cls, value):
|
|
179
181
|
if value is None:
|
|
180
182
|
return None
|
|
@@ -292,7 +294,7 @@ class HexApi:
|
|
|
292
294
|
)
|
|
293
295
|
response.raise_for_status()
|
|
294
296
|
|
|
295
|
-
api_response = HexApiProjectsListResponse.
|
|
297
|
+
api_response = HexApiProjectsListResponse.model_validate(response.json())
|
|
296
298
|
logger.info(f"Fetched {len(api_response.values)} items")
|
|
297
299
|
params["after"] = (
|
|
298
300
|
api_response.pagination.after if api_response.pagination else None
|
|
@@ -3,7 +3,7 @@ from dataclasses import dataclass
|
|
|
3
3
|
from datetime import datetime, timedelta, timezone
|
|
4
4
|
from typing import Any, Dict, Iterable, List, Optional
|
|
5
5
|
|
|
6
|
-
from pydantic import Field, SecretStr,
|
|
6
|
+
from pydantic import Field, SecretStr, model_validator
|
|
7
7
|
from typing_extensions import assert_never
|
|
8
8
|
|
|
9
9
|
from datahub.configuration.common import AllowDenyPattern
|
|
@@ -120,7 +120,8 @@ class HexSourceConfig(
|
|
|
120
120
|
description="Number of items to fetch per DataHub API call.",
|
|
121
121
|
)
|
|
122
122
|
|
|
123
|
-
@
|
|
123
|
+
@model_validator(mode="before")
|
|
124
|
+
@classmethod
|
|
124
125
|
def validate_lineage_times(cls, data: Dict[str, Any]) -> Dict[str, Any]:
|
|
125
126
|
# In-place update of the input dict would cause state contamination. This was discovered through test failures
|
|
126
127
|
# in test_hex.py where the same dict is reused.
|
|
@@ -238,7 +239,7 @@ class HexSource(StatefulIngestionSourceBase):
|
|
|
238
239
|
|
|
239
240
|
@classmethod
|
|
240
241
|
def create(cls, config_dict: Dict[str, Any], ctx: PipelineContext) -> "HexSource":
|
|
241
|
-
config = HexSourceConfig.
|
|
242
|
+
config = HexSourceConfig.model_validate(config_dict)
|
|
242
243
|
return cls(config, ctx)
|
|
243
244
|
|
|
244
245
|
def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
|
|
@@ -161,7 +161,7 @@ class IcebergSource(StatefulIngestionSourceBase):
|
|
|
161
161
|
|
|
162
162
|
@classmethod
|
|
163
163
|
def create(cls, config_dict: Dict, ctx: PipelineContext) -> "IcebergSource":
|
|
164
|
-
config = IcebergSourceConfig.
|
|
164
|
+
config = IcebergSourceConfig.model_validate(config_dict)
|
|
165
165
|
return cls(config, ctx)
|
|
166
166
|
|
|
167
167
|
def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
|
|
@@ -4,7 +4,7 @@ from dataclasses import dataclass, field
|
|
|
4
4
|
from typing import Any, Dict, Optional
|
|
5
5
|
|
|
6
6
|
from humanfriendly import format_timespan
|
|
7
|
-
from pydantic import Field,
|
|
7
|
+
from pydantic import Field, field_validator
|
|
8
8
|
from pyiceberg.catalog import Catalog, load_catalog
|
|
9
9
|
from pyiceberg.catalog.rest import RestCatalog
|
|
10
10
|
from requests.adapters import HTTPAdapter
|
|
@@ -108,7 +108,8 @@ class IcebergSourceConfig(StatefulIngestionConfigBase, DatasetSourceConfigMixin)
|
|
|
108
108
|
default=1, description="How many threads will be processing tables"
|
|
109
109
|
)
|
|
110
110
|
|
|
111
|
-
@
|
|
111
|
+
@field_validator("catalog", mode="before")
|
|
112
|
+
@classmethod
|
|
112
113
|
def handle_deprecated_catalog_format(cls, value):
|
|
113
114
|
# Once support for deprecated format is dropped, we can remove this validator.
|
|
114
115
|
if (
|
|
@@ -131,7 +132,8 @@ class IcebergSourceConfig(StatefulIngestionConfigBase, DatasetSourceConfigMixin)
|
|
|
131
132
|
# In case the input is already the new format or is invalid
|
|
132
133
|
return value
|
|
133
134
|
|
|
134
|
-
@
|
|
135
|
+
@field_validator("catalog", mode="after")
|
|
136
|
+
@classmethod
|
|
135
137
|
def validate_catalog_size(cls, value):
|
|
136
138
|
if len(value) != 1:
|
|
137
139
|
raise ValueError("The catalog must contain exactly one entry.")
|
|
@@ -254,7 +254,7 @@ class AzureADSource(StatefulIngestionSourceBase):
|
|
|
254
254
|
|
|
255
255
|
@classmethod
|
|
256
256
|
def create(cls, config_dict, ctx):
|
|
257
|
-
config = AzureADConfig.
|
|
257
|
+
config = AzureADConfig.model_validate(config_dict)
|
|
258
258
|
return cls(config, ctx)
|
|
259
259
|
|
|
260
260
|
def __init__(self, config: AzureADConfig, ctx: PipelineContext):
|
|
@@ -11,7 +11,7 @@ import nest_asyncio
|
|
|
11
11
|
from okta.client import Client as OktaClient
|
|
12
12
|
from okta.exceptions import OktaAPIException
|
|
13
13
|
from okta.models import Group, GroupProfile, User, UserProfile, UserStatus
|
|
14
|
-
from pydantic import
|
|
14
|
+
from pydantic import model_validator
|
|
15
15
|
from pydantic.fields import Field
|
|
16
16
|
|
|
17
17
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
@@ -157,21 +157,21 @@ class OktaConfig(StatefulIngestionConfigBase):
|
|
|
157
157
|
mask_group_id: bool = True
|
|
158
158
|
mask_user_id: bool = True
|
|
159
159
|
|
|
160
|
-
@
|
|
161
|
-
def okta_users_one_of_filter_or_search(
|
|
162
|
-
if
|
|
160
|
+
@model_validator(mode="after")
|
|
161
|
+
def okta_users_one_of_filter_or_search(self) -> "OktaConfig":
|
|
162
|
+
if self.okta_users_search and self.okta_users_filter:
|
|
163
163
|
raise ValueError(
|
|
164
164
|
"Only one of okta_users_filter or okta_users_search can be set"
|
|
165
165
|
)
|
|
166
|
-
return
|
|
166
|
+
return self
|
|
167
167
|
|
|
168
|
-
@
|
|
169
|
-
def okta_groups_one_of_filter_or_search(
|
|
170
|
-
if
|
|
168
|
+
@model_validator(mode="after")
|
|
169
|
+
def okta_groups_one_of_filter_or_search(self) -> "OktaConfig":
|
|
170
|
+
if self.okta_groups_search and self.okta_groups_filter:
|
|
171
171
|
raise ValueError(
|
|
172
172
|
"Only one of okta_groups_filter or okta_groups_search can be set"
|
|
173
173
|
)
|
|
174
|
-
return
|
|
174
|
+
return self
|
|
175
175
|
|
|
176
176
|
|
|
177
177
|
@dataclass
|
|
@@ -288,7 +288,7 @@ class OktaSource(StatefulIngestionSourceBase):
|
|
|
288
288
|
|
|
289
289
|
@classmethod
|
|
290
290
|
def create(cls, config_dict, ctx):
|
|
291
|
-
config = OktaConfig.
|
|
291
|
+
config = OktaConfig.model_validate(config_dict)
|
|
292
292
|
return cls(config, ctx)
|
|
293
293
|
|
|
294
294
|
def __init__(self, config: OktaConfig, ctx: PipelineContext):
|
|
@@ -267,7 +267,7 @@ class KafkaSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
267
267
|
|
|
268
268
|
@classmethod
|
|
269
269
|
def create(cls, config_dict: Dict, ctx: PipelineContext) -> "KafkaSource":
|
|
270
|
-
config: KafkaSourceConfig = KafkaSourceConfig.
|
|
270
|
+
config: KafkaSourceConfig = KafkaSourceConfig.model_validate(config_dict)
|
|
271
271
|
return cls(config, ctx)
|
|
272
272
|
|
|
273
273
|
def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
|
datahub/ingestion/source/ldap.py
CHANGED
|
@@ -242,7 +242,7 @@ class LDAPSource(StatefulIngestionSourceBase):
|
|
|
242
242
|
@classmethod
|
|
243
243
|
def create(cls, config_dict: Dict[str, Any], ctx: PipelineContext) -> "LDAPSource":
|
|
244
244
|
"""Factory method."""
|
|
245
|
-
config = LDAPSourceConfig.
|
|
245
|
+
config = LDAPSourceConfig.model_validate(config_dict)
|
|
246
246
|
return cls(ctx, config)
|
|
247
247
|
|
|
248
248
|
def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
|
|
@@ -28,7 +28,7 @@ from looker_sdk.sdk.api40.models import (
|
|
|
28
28
|
User,
|
|
29
29
|
WriteQuery,
|
|
30
30
|
)
|
|
31
|
-
from pydantic import
|
|
31
|
+
from pydantic import field_validator
|
|
32
32
|
|
|
33
33
|
import datahub.emitter.mce_builder as builder
|
|
34
34
|
from datahub.api.entities.platformresource.platform_resource import (
|
|
@@ -202,8 +202,9 @@ class LookerViewId:
|
|
|
202
202
|
folder_path=os.path.dirname(self.file_path),
|
|
203
203
|
)
|
|
204
204
|
|
|
205
|
-
@
|
|
206
|
-
|
|
205
|
+
@field_validator("view_name", mode="after")
|
|
206
|
+
@classmethod
|
|
207
|
+
def remove_quotes(cls, v: str) -> str:
|
|
207
208
|
# Sanitize the name.
|
|
208
209
|
v = v.replace('"', "").replace("`", "")
|
|
209
210
|
return v
|
|
@@ -931,8 +932,9 @@ class LookerExplore:
|
|
|
931
932
|
source_file: Optional[str] = None
|
|
932
933
|
tags: List[str] = dataclasses_field(default_factory=list)
|
|
933
934
|
|
|
934
|
-
@
|
|
935
|
-
|
|
935
|
+
@field_validator("name", mode="after")
|
|
936
|
+
@classmethod
|
|
937
|
+
def remove_quotes(cls, v: str) -> str:
|
|
936
938
|
# Sanitize the name.
|
|
937
939
|
v = v.replace('"', "").replace("`", "")
|
|
938
940
|
return v
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
import dataclasses
|
|
2
2
|
import os
|
|
3
3
|
import re
|
|
4
|
-
from typing import Any, ClassVar, Dict, List, Optional, Tuple, Union
|
|
4
|
+
from typing import Any, ClassVar, Dict, List, Optional, Tuple, Union
|
|
5
5
|
|
|
6
6
|
import pydantic
|
|
7
7
|
from looker_sdk.sdk.api40.models import DBConnection
|
|
8
|
-
from pydantic import Field,
|
|
8
|
+
from pydantic import Field, field_validator, model_validator
|
|
9
9
|
|
|
10
10
|
from datahub.configuration import ConfigModel
|
|
11
11
|
from datahub.configuration.common import (
|
|
@@ -198,17 +198,20 @@ class LookerConnectionDefinition(ConfigModel):
|
|
|
198
198
|
"the top level Looker configuration",
|
|
199
199
|
)
|
|
200
200
|
|
|
201
|
-
@
|
|
201
|
+
@field_validator("platform_env", mode="after")
|
|
202
|
+
@classmethod
|
|
202
203
|
def platform_env_must_be_one_of(cls, v: Optional[str]) -> Optional[str]:
|
|
203
204
|
if v is not None:
|
|
204
205
|
return EnvConfigMixin.env_must_be_one_of(v)
|
|
205
206
|
return v
|
|
206
207
|
|
|
207
|
-
@
|
|
208
|
-
|
|
208
|
+
@field_validator("platform", "default_db", "default_schema", mode="after")
|
|
209
|
+
@classmethod
|
|
210
|
+
def lower_everything(cls, v: Optional[str]) -> Optional[str]:
|
|
209
211
|
"""We lower case all strings passed in to avoid casing issues later"""
|
|
210
212
|
if v is not None:
|
|
211
213
|
return v.lower()
|
|
214
|
+
return v
|
|
212
215
|
|
|
213
216
|
@classmethod
|
|
214
217
|
def from_looker_connection(
|
|
@@ -326,22 +329,20 @@ class LookerDashboardSourceConfig(
|
|
|
326
329
|
"Dashboards will only be ingested if they're allowed by both this config and dashboard_pattern.",
|
|
327
330
|
)
|
|
328
331
|
|
|
329
|
-
@
|
|
332
|
+
@model_validator(mode="before")
|
|
333
|
+
@classmethod
|
|
330
334
|
def external_url_defaults_to_api_config_base_url(
|
|
331
|
-
cls,
|
|
332
|
-
) ->
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
)
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
)
|
|
342
|
-
if v is True and (
|
|
343
|
-
stateful_ingestion is None or stateful_ingestion.enabled is False
|
|
335
|
+
cls, values: Dict[str, Any]
|
|
336
|
+
) -> Dict[str, Any]:
|
|
337
|
+
if "external_base_url" not in values or values["external_base_url"] is None:
|
|
338
|
+
values["external_base_url"] = values.get("base_url")
|
|
339
|
+
return values
|
|
340
|
+
|
|
341
|
+
@model_validator(mode="after")
|
|
342
|
+
def stateful_ingestion_should_be_enabled(self):
|
|
343
|
+
if self.extract_independent_looks is True and (
|
|
344
|
+
self.stateful_ingestion is None or self.stateful_ingestion.enabled is False
|
|
344
345
|
):
|
|
345
346
|
raise ValueError("stateful_ingestion.enabled should be set to true")
|
|
346
347
|
|
|
347
|
-
return
|
|
348
|
+
return self
|
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
import logging
|
|
2
|
+
from copy import deepcopy
|
|
2
3
|
from dataclasses import dataclass, field as dataclass_field
|
|
3
4
|
from datetime import timedelta
|
|
4
5
|
from typing import Any, Dict, Literal, Optional, Union
|
|
5
6
|
|
|
6
7
|
import pydantic
|
|
7
|
-
from pydantic import
|
|
8
|
+
from pydantic import model_validator
|
|
8
9
|
from pydantic.fields import Field
|
|
9
10
|
|
|
10
11
|
from datahub.configuration.common import AllowDenyPattern
|
|
@@ -210,75 +211,74 @@ class LookMLSourceConfig(
|
|
|
210
211
|
"All if comments are evaluated to true for configured looker_environment value",
|
|
211
212
|
)
|
|
212
213
|
|
|
213
|
-
@
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
214
|
+
@model_validator(mode="before")
|
|
215
|
+
@classmethod
|
|
216
|
+
def convert_string_to_connection_def(cls, values: Dict[str, Any]) -> Dict[str, Any]:
|
|
217
|
+
values = deepcopy(values)
|
|
218
|
+
conn_map = values.get("connection_to_platform_map")
|
|
219
|
+
if conn_map:
|
|
220
|
+
# Previous version of config supported strings in connection map. This upconverts strings to ConnectionMap
|
|
221
|
+
for key in conn_map:
|
|
222
|
+
if isinstance(conn_map[key], str):
|
|
223
|
+
platform = conn_map[key]
|
|
224
|
+
if "." in platform:
|
|
225
|
+
platform_db_split = conn_map[key].split(".")
|
|
226
|
+
connection = LookerConnectionDefinition(
|
|
227
|
+
platform=platform_db_split[0],
|
|
228
|
+
default_db=platform_db_split[1],
|
|
229
|
+
default_schema="",
|
|
230
|
+
)
|
|
231
|
+
conn_map[key] = connection
|
|
232
|
+
else:
|
|
233
|
+
logger.warning(
|
|
234
|
+
f"Connection map for {key} provides platform {platform} but does not provide a default "
|
|
235
|
+
f"database name. This might result in failed resolution"
|
|
236
|
+
)
|
|
237
|
+
conn_map[key] = LookerConnectionDefinition(
|
|
238
|
+
platform=platform, default_db="", default_schema=""
|
|
239
|
+
)
|
|
240
|
+
return values
|
|
236
241
|
|
|
237
|
-
@
|
|
238
|
-
def check_either_connection_map_or_connection_provided(
|
|
242
|
+
@model_validator(mode="after")
|
|
243
|
+
def check_either_connection_map_or_connection_provided(self):
|
|
239
244
|
"""Validate that we must either have a connection map or an api credential"""
|
|
240
|
-
if not
|
|
241
|
-
"api", {}
|
|
242
|
-
):
|
|
245
|
+
if not (self.connection_to_platform_map or {}) and not (self.api):
|
|
243
246
|
raise ValueError(
|
|
244
247
|
"Neither api not connection_to_platform_map config was found. LookML source requires either api "
|
|
245
248
|
"credentials for Looker or a map of connection names to platform identifiers to work correctly"
|
|
246
249
|
)
|
|
247
|
-
return
|
|
250
|
+
return self
|
|
248
251
|
|
|
249
|
-
@
|
|
250
|
-
def check_either_project_name_or_api_provided(
|
|
252
|
+
@model_validator(mode="after")
|
|
253
|
+
def check_either_project_name_or_api_provided(self):
|
|
251
254
|
"""Validate that we must either have a project name or an api credential to fetch project names"""
|
|
252
|
-
if not
|
|
255
|
+
if not self.project_name and not self.api:
|
|
253
256
|
raise ValueError(
|
|
254
257
|
"Neither project_name not an API credential was found. LookML source requires either api credentials "
|
|
255
258
|
"for Looker or a project_name to accurately name views and models."
|
|
256
259
|
)
|
|
257
|
-
return
|
|
260
|
+
return self
|
|
258
261
|
|
|
259
|
-
@
|
|
260
|
-
def check_api_provided_for_view_lineage(
|
|
262
|
+
@model_validator(mode="after")
|
|
263
|
+
def check_api_provided_for_view_lineage(self):
|
|
261
264
|
"""Validate that we must have an api credential to use Looker API for view's column lineage"""
|
|
262
|
-
if not
|
|
265
|
+
if not self.api and self.use_api_for_view_lineage:
|
|
263
266
|
raise ValueError(
|
|
264
267
|
"API credential was not found. LookML source requires api credentials "
|
|
265
268
|
"for Looker to use Looker APIs for view's column lineage extraction."
|
|
266
269
|
"Set `use_api_for_view_lineage` to False to skip using Looker APIs."
|
|
267
270
|
)
|
|
268
|
-
return
|
|
271
|
+
return self
|
|
269
272
|
|
|
270
|
-
@
|
|
271
|
-
def check_base_folder_if_not_provided(
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
git_info: Optional[GitInfo] = values.get("git_info")
|
|
276
|
-
if git_info:
|
|
277
|
-
if not git_info.deploy_key:
|
|
273
|
+
@model_validator(mode="after")
|
|
274
|
+
def check_base_folder_if_not_provided(self):
|
|
275
|
+
if self.base_folder is None:
|
|
276
|
+
if self.git_info:
|
|
277
|
+
if not self.git_info.deploy_key:
|
|
278
278
|
logger.warning(
|
|
279
279
|
"git_info is provided, but no SSH key is present. If the repo is not public, we'll fail to "
|
|
280
280
|
"clone it."
|
|
281
281
|
)
|
|
282
282
|
else:
|
|
283
283
|
raise ValueError("Neither base_folder nor git_info has been provided.")
|
|
284
|
-
return
|
|
284
|
+
return self
|
|
@@ -9,7 +9,7 @@ from typing import Dict, Iterable, List, Optional, Tuple, Union
|
|
|
9
9
|
import dateutil.parser as dp
|
|
10
10
|
import pydantic
|
|
11
11
|
import requests
|
|
12
|
-
from pydantic import Field,
|
|
12
|
+
from pydantic import Field, field_validator, model_validator
|
|
13
13
|
from requests.models import HTTPError
|
|
14
14
|
|
|
15
15
|
import datahub.emitter.mce_builder as builder
|
|
@@ -115,16 +115,16 @@ class MetabaseConfig(
|
|
|
115
115
|
)
|
|
116
116
|
stateful_ingestion: Optional[StatefulStaleMetadataRemovalConfig] = None
|
|
117
117
|
|
|
118
|
-
@
|
|
118
|
+
@field_validator("connect_uri", "display_uri", mode="after")
|
|
119
|
+
@classmethod
|
|
119
120
|
def remove_trailing_slash(cls, v):
|
|
120
121
|
return config_clean.remove_trailing_slashes(v)
|
|
121
122
|
|
|
122
|
-
@
|
|
123
|
-
def default_display_uri_to_connect_uri(
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
return values
|
|
123
|
+
@model_validator(mode="after")
|
|
124
|
+
def default_display_uri_to_connect_uri(self) -> "MetabaseConfig":
|
|
125
|
+
if self.display_uri is None:
|
|
126
|
+
self.display_uri = self.connect_uri
|
|
127
|
+
return self
|
|
128
128
|
|
|
129
129
|
|
|
130
130
|
@dataclass
|
|
@@ -563,7 +563,7 @@ class BusinessGlossaryFileSource(Source):
|
|
|
563
563
|
|
|
564
564
|
@classmethod
|
|
565
565
|
def create(cls, config_dict, ctx):
|
|
566
|
-
config = BusinessGlossarySourceConfig.
|
|
566
|
+
config = BusinessGlossarySourceConfig.model_validate(config_dict)
|
|
567
567
|
return cls(ctx, config)
|
|
568
568
|
|
|
569
569
|
@classmethod
|
|
@@ -571,7 +571,7 @@ class BusinessGlossaryFileSource(Source):
|
|
|
571
571
|
cls, file_name: Union[str, pathlib.Path]
|
|
572
572
|
) -> BusinessGlossaryConfig:
|
|
573
573
|
config = load_config_file(file_name, resolve_env_vars=True)
|
|
574
|
-
glossary_cfg = BusinessGlossaryConfig.
|
|
574
|
+
glossary_cfg = BusinessGlossaryConfig.model_validate(config)
|
|
575
575
|
return glossary_cfg
|
|
576
576
|
|
|
577
577
|
def get_workunits_internal(
|
|
@@ -3,7 +3,7 @@ from dataclasses import dataclass, field
|
|
|
3
3
|
from functools import partial
|
|
4
4
|
from typing import Any, Dict, Iterable, List, Optional
|
|
5
5
|
|
|
6
|
-
from pydantic import
|
|
6
|
+
from pydantic import field_validator
|
|
7
7
|
from pydantic.fields import Field
|
|
8
8
|
|
|
9
9
|
import datahub.metadata.schema_classes as models
|
|
@@ -51,7 +51,8 @@ class EntityConfig(EnvConfigMixin):
|
|
|
51
51
|
platform: str
|
|
52
52
|
platform_instance: Optional[str] = None
|
|
53
53
|
|
|
54
|
-
@
|
|
54
|
+
@field_validator("type", mode="after")
|
|
55
|
+
@classmethod
|
|
55
56
|
def type_must_be_supported(cls, v: str) -> str:
|
|
56
57
|
allowed_types = ["dataset"]
|
|
57
58
|
if v not in allowed_types:
|
|
@@ -60,7 +61,8 @@ class EntityConfig(EnvConfigMixin):
|
|
|
60
61
|
)
|
|
61
62
|
return v
|
|
62
63
|
|
|
63
|
-
@
|
|
64
|
+
@field_validator("name", mode="after")
|
|
65
|
+
@classmethod
|
|
64
66
|
def validate_name(cls, v: str) -> str:
|
|
65
67
|
if v.startswith("urn:li:"):
|
|
66
68
|
raise ValueError(
|
|
@@ -77,7 +79,8 @@ class FineGrainedLineageConfig(ConfigModel):
|
|
|
77
79
|
transformOperation: Optional[str]
|
|
78
80
|
confidenceScore: Optional[float] = 1.0
|
|
79
81
|
|
|
80
|
-
@
|
|
82
|
+
@field_validator("upstreamType", mode="after")
|
|
83
|
+
@classmethod
|
|
81
84
|
def upstream_type_must_be_supported(cls, v: str) -> str:
|
|
82
85
|
allowed_types = [
|
|
83
86
|
FineGrainedLineageUpstreamTypeClass.FIELD_SET,
|
|
@@ -90,7 +93,8 @@ class FineGrainedLineageConfig(ConfigModel):
|
|
|
90
93
|
)
|
|
91
94
|
return v
|
|
92
95
|
|
|
93
|
-
@
|
|
96
|
+
@field_validator("downstreamType", mode="after")
|
|
97
|
+
@classmethod
|
|
94
98
|
def downstream_type_must_be_supported(cls, v: str) -> str:
|
|
95
99
|
allowed_types = [
|
|
96
100
|
FineGrainedLineageDownstreamTypeClass.FIELD_SET,
|
|
@@ -124,7 +128,8 @@ class LineageFileSourceConfig(ConfigModel):
|
|
|
124
128
|
class LineageConfig(VersionedConfig):
|
|
125
129
|
lineage: List[EntityNodeConfig]
|
|
126
130
|
|
|
127
|
-
@
|
|
131
|
+
@field_validator("version", mode="after")
|
|
132
|
+
@classmethod
|
|
128
133
|
def version_must_be_1(cls, v):
|
|
129
134
|
if v != "1":
|
|
130
135
|
raise ValueError("Only version 1 is supported")
|
|
@@ -148,13 +153,13 @@ class LineageFileSource(Source):
|
|
|
148
153
|
def create(
|
|
149
154
|
cls, config_dict: Dict[str, Any], ctx: PipelineContext
|
|
150
155
|
) -> "LineageFileSource":
|
|
151
|
-
config = LineageFileSourceConfig.
|
|
156
|
+
config = LineageFileSourceConfig.model_validate(config_dict)
|
|
152
157
|
return cls(ctx, config)
|
|
153
158
|
|
|
154
159
|
@staticmethod
|
|
155
160
|
def load_lineage_config(file_name: str) -> LineageConfig:
|
|
156
161
|
config = load_config_file(file_name, resolve_env_vars=True)
|
|
157
|
-
lineage_config = LineageConfig.
|
|
162
|
+
lineage_config = LineageConfig.model_validate(config)
|
|
158
163
|
return lineage_config
|
|
159
164
|
|
|
160
165
|
def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
|
|
@@ -892,5 +892,5 @@ class MLflowSource(StatefulIngestionSourceBase):
|
|
|
892
892
|
|
|
893
893
|
@classmethod
|
|
894
894
|
def create(cls, config_dict: dict, ctx: PipelineContext) -> "MLflowSource":
|
|
895
|
-
config = MLflowConfig.
|
|
895
|
+
config = MLflowConfig.model_validate(config_dict)
|
|
896
896
|
return cls(ctx, config)
|
datahub/ingestion/source/mode.py
CHANGED
|
@@ -26,7 +26,7 @@ import sqlglot
|
|
|
26
26
|
import tenacity
|
|
27
27
|
import yaml
|
|
28
28
|
from liquid import Template, Undefined
|
|
29
|
-
from pydantic import Field,
|
|
29
|
+
from pydantic import Field, field_validator
|
|
30
30
|
from requests.adapters import HTTPAdapter, Retry
|
|
31
31
|
from requests.exceptions import ConnectionError
|
|
32
32
|
from requests.models import HTTPBasicAuth, HTTPError
|
|
@@ -218,11 +218,13 @@ class ModeConfig(
|
|
|
218
218
|
default=False, description="Exclude archived reports"
|
|
219
219
|
)
|
|
220
220
|
|
|
221
|
-
@
|
|
221
|
+
@field_validator("connect_uri", mode="after")
|
|
222
|
+
@classmethod
|
|
222
223
|
def remove_trailing_slash(cls, v):
|
|
223
224
|
return config_clean.remove_trailing_slashes(v)
|
|
224
225
|
|
|
225
|
-
@
|
|
226
|
+
@field_validator("items_per_page", mode="after")
|
|
227
|
+
@classmethod
|
|
226
228
|
def validate_items_per_page(cls, v):
|
|
227
229
|
if 1 <= v <= DEFAULT_API_ITEMS_PER_PAGE:
|
|
228
230
|
return v
|
|
@@ -1824,7 +1826,7 @@ class ModeSource(StatefulIngestionSourceBase):
|
|
|
1824
1826
|
|
|
1825
1827
|
@classmethod
|
|
1826
1828
|
def create(cls, config_dict: dict, ctx: PipelineContext) -> "ModeSource":
|
|
1827
|
-
config: ModeConfig = ModeConfig.
|
|
1829
|
+
config: ModeConfig = ModeConfig.model_validate(config_dict)
|
|
1828
1830
|
return cls(ctx, config)
|
|
1829
1831
|
|
|
1830
1832
|
def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
|