acryl-datahub 1.3.1__py3-none-any.whl → 1.3.1.1rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1rc1.dist-info}/METADATA +2501 -2501
- {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1rc1.dist-info}/RECORD +193 -193
- datahub/_version.py +1 -1
- datahub/api/entities/common/serialized_value.py +2 -2
- datahub/api/entities/corpgroup/corpgroup.py +11 -6
- datahub/api/entities/corpuser/corpuser.py +11 -11
- datahub/api/entities/dataproduct/dataproduct.py +47 -27
- datahub/api/entities/dataset/dataset.py +32 -21
- datahub/api/entities/external/lake_formation_external_entites.py +5 -6
- datahub/api/entities/external/unity_catalog_external_entites.py +5 -7
- datahub/api/entities/forms/forms.py +16 -14
- datahub/api/entities/structuredproperties/structuredproperties.py +23 -16
- datahub/cli/check_cli.py +2 -2
- datahub/cli/config_utils.py +3 -3
- datahub/cli/lite_cli.py +9 -7
- datahub/cli/migrate.py +4 -4
- datahub/cli/quickstart_versioning.py +3 -3
- datahub/cli/specific/group_cli.py +1 -1
- datahub/cli/specific/structuredproperties_cli.py +1 -1
- datahub/cli/specific/user_cli.py +1 -1
- datahub/configuration/common.py +14 -2
- datahub/configuration/connection_resolver.py +2 -2
- datahub/configuration/git.py +47 -30
- datahub/configuration/import_resolver.py +2 -2
- datahub/configuration/kafka.py +4 -3
- datahub/configuration/time_window_config.py +26 -26
- datahub/configuration/validate_field_deprecation.py +2 -2
- datahub/configuration/validate_field_removal.py +2 -2
- datahub/configuration/validate_field_rename.py +2 -2
- datahub/configuration/validate_multiline_string.py +2 -1
- datahub/emitter/kafka_emitter.py +3 -1
- datahub/emitter/rest_emitter.py +2 -4
- datahub/ingestion/api/decorators.py +1 -1
- datahub/ingestion/api/report.py +1 -1
- datahub/ingestion/api/sink.py +1 -1
- datahub/ingestion/api/source.py +1 -1
- datahub/ingestion/glossary/datahub_classifier.py +11 -8
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +1 -1
- datahub/ingestion/reporting/file_reporter.py +5 -4
- datahub/ingestion/run/pipeline.py +6 -6
- datahub/ingestion/run/pipeline_config.py +12 -14
- datahub/ingestion/run/sink_callback.py +1 -1
- datahub/ingestion/sink/datahub_rest.py +6 -4
- datahub/ingestion/source/abs/config.py +19 -19
- datahub/ingestion/source/abs/datalake_profiler_config.py +11 -13
- datahub/ingestion/source/abs/source.py +2 -2
- datahub/ingestion/source/aws/aws_common.py +1 -1
- datahub/ingestion/source/aws/glue.py +6 -4
- datahub/ingestion/source/aws/sagemaker.py +1 -1
- datahub/ingestion/source/azure/azure_common.py +8 -12
- datahub/ingestion/source/bigquery_v2/bigquery.py +1 -1
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +43 -30
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +1 -1
- datahub/ingestion/source/cassandra/cassandra.py +1 -1
- datahub/ingestion/source/common/gcp_credentials_config.py +10 -10
- datahub/ingestion/source/data_lake_common/path_spec.py +85 -89
- datahub/ingestion/source/datahub/config.py +8 -8
- datahub/ingestion/source/datahub/datahub_source.py +1 -1
- datahub/ingestion/source/dbt/dbt_cloud.py +9 -3
- datahub/ingestion/source/dbt/dbt_common.py +39 -37
- datahub/ingestion/source/dbt/dbt_core.py +10 -12
- datahub/ingestion/source/debug/datahub_debug.py +1 -1
- datahub/ingestion/source/delta_lake/config.py +6 -4
- datahub/ingestion/source/dremio/dremio_config.py +10 -6
- datahub/ingestion/source/dynamodb/dynamodb.py +1 -1
- datahub/ingestion/source/elastic_search.py +4 -3
- datahub/ingestion/source/excel/source.py +1 -1
- datahub/ingestion/source/feast.py +1 -1
- datahub/ingestion/source/file.py +5 -4
- datahub/ingestion/source/fivetran/config.py +17 -16
- datahub/ingestion/source/fivetran/fivetran.py +2 -2
- datahub/ingestion/source/gc/datahub_gc.py +1 -1
- datahub/ingestion/source/gcs/gcs_source.py +8 -10
- datahub/ingestion/source/ge_profiling_config.py +8 -5
- datahub/ingestion/source/grafana/grafana_api.py +2 -2
- datahub/ingestion/source/grafana/grafana_config.py +4 -3
- datahub/ingestion/source/grafana/grafana_source.py +1 -1
- datahub/ingestion/source/grafana/models.py +23 -5
- datahub/ingestion/source/hex/api.py +7 -5
- datahub/ingestion/source/hex/hex.py +4 -3
- datahub/ingestion/source/iceberg/iceberg.py +1 -1
- datahub/ingestion/source/iceberg/iceberg_common.py +5 -3
- datahub/ingestion/source/identity/azure_ad.py +1 -1
- datahub/ingestion/source/identity/okta.py +10 -10
- datahub/ingestion/source/kafka/kafka.py +1 -1
- datahub/ingestion/source/ldap.py +1 -1
- datahub/ingestion/source/looker/looker_common.py +7 -5
- datahub/ingestion/source/looker/looker_config.py +21 -20
- datahub/ingestion/source/looker/lookml_config.py +47 -47
- datahub/ingestion/source/metabase.py +8 -8
- datahub/ingestion/source/metadata/business_glossary.py +2 -2
- datahub/ingestion/source/metadata/lineage.py +13 -8
- datahub/ingestion/source/mlflow.py +1 -1
- datahub/ingestion/source/mode.py +6 -4
- datahub/ingestion/source/mongodb.py +4 -3
- datahub/ingestion/source/neo4j/neo4j_source.py +1 -1
- datahub/ingestion/source/nifi.py +17 -23
- datahub/ingestion/source/openapi.py +6 -8
- datahub/ingestion/source/powerbi/config.py +33 -32
- datahub/ingestion/source/powerbi/dataplatform_instance_resolver.py +2 -2
- datahub/ingestion/source/powerbi/powerbi.py +1 -1
- datahub/ingestion/source/powerbi_report_server/report_server.py +2 -2
- datahub/ingestion/source/powerbi_report_server/report_server_domain.py +8 -6
- datahub/ingestion/source/preset.py +8 -8
- datahub/ingestion/source/pulsar.py +1 -1
- datahub/ingestion/source/qlik_sense/data_classes.py +15 -8
- datahub/ingestion/source/qlik_sense/qlik_api.py +7 -7
- datahub/ingestion/source/qlik_sense/qlik_sense.py +1 -1
- datahub/ingestion/source/redshift/config.py +18 -20
- datahub/ingestion/source/redshift/redshift.py +2 -2
- datahub/ingestion/source/redshift/usage.py +23 -3
- datahub/ingestion/source/s3/config.py +83 -62
- datahub/ingestion/source/s3/datalake_profiler_config.py +11 -13
- datahub/ingestion/source/s3/source.py +8 -5
- datahub/ingestion/source/sac/sac.py +5 -4
- datahub/ingestion/source/salesforce.py +3 -2
- datahub/ingestion/source/schema/json_schema.py +2 -2
- datahub/ingestion/source/sigma/data_classes.py +3 -2
- datahub/ingestion/source/sigma/sigma.py +1 -1
- datahub/ingestion/source/sigma/sigma_api.py +7 -7
- datahub/ingestion/source/slack/slack.py +1 -1
- datahub/ingestion/source/snaplogic/snaplogic.py +1 -1
- datahub/ingestion/source/snowflake/snowflake_assertion.py +1 -1
- datahub/ingestion/source/snowflake/snowflake_config.py +35 -31
- datahub/ingestion/source/snowflake/snowflake_connection.py +35 -13
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +3 -3
- datahub/ingestion/source/snowflake/snowflake_queries.py +1 -1
- datahub/ingestion/source/sql/athena.py +1 -1
- datahub/ingestion/source/sql/clickhouse.py +4 -2
- datahub/ingestion/source/sql/cockroachdb.py +1 -1
- datahub/ingestion/source/sql/druid.py +1 -1
- datahub/ingestion/source/sql/hana.py +1 -1
- datahub/ingestion/source/sql/hive.py +7 -5
- datahub/ingestion/source/sql/hive_metastore.py +1 -1
- datahub/ingestion/source/sql/mssql/source.py +13 -6
- datahub/ingestion/source/sql/mysql.py +1 -1
- datahub/ingestion/source/sql/oracle.py +17 -10
- datahub/ingestion/source/sql/postgres.py +2 -2
- datahub/ingestion/source/sql/presto.py +1 -1
- datahub/ingestion/source/sql/sql_config.py +8 -9
- datahub/ingestion/source/sql/sql_generic.py +1 -1
- datahub/ingestion/source/sql/teradata.py +1 -1
- datahub/ingestion/source/sql/trino.py +1 -1
- datahub/ingestion/source/sql/vertica.py +5 -4
- datahub/ingestion/source/sql_queries.py +11 -8
- datahub/ingestion/source/state/checkpoint.py +2 -2
- datahub/ingestion/source/state/entity_removal_state.py +2 -1
- datahub/ingestion/source/state/stateful_ingestion_base.py +55 -45
- datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +1 -1
- datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py +1 -1
- datahub/ingestion/source/superset.py +9 -9
- datahub/ingestion/source/tableau/tableau.py +14 -16
- datahub/ingestion/source/unity/config.py +33 -34
- datahub/ingestion/source/unity/proxy.py +203 -0
- datahub/ingestion/source/unity/proxy_types.py +91 -0
- datahub/ingestion/source/unity/source.py +27 -2
- datahub/ingestion/source/usage/clickhouse_usage.py +1 -1
- datahub/ingestion/source/usage/starburst_trino_usage.py +1 -1
- datahub/ingestion/source/usage/usage_common.py +5 -3
- datahub/ingestion/source_config/csv_enricher.py +7 -6
- datahub/ingestion/source_config/operation_config.py +7 -4
- datahub/ingestion/source_config/pulsar.py +11 -15
- datahub/ingestion/transformer/add_dataset_browse_path.py +1 -1
- datahub/ingestion/transformer/add_dataset_dataproduct.py +6 -5
- datahub/ingestion/transformer/add_dataset_ownership.py +3 -3
- datahub/ingestion/transformer/add_dataset_properties.py +2 -2
- datahub/ingestion/transformer/add_dataset_schema_tags.py +2 -2
- datahub/ingestion/transformer/add_dataset_schema_terms.py +2 -2
- datahub/ingestion/transformer/add_dataset_tags.py +3 -3
- datahub/ingestion/transformer/add_dataset_terms.py +3 -3
- datahub/ingestion/transformer/dataset_domain.py +3 -3
- datahub/ingestion/transformer/dataset_domain_based_on_tags.py +1 -1
- datahub/ingestion/transformer/extract_dataset_tags.py +1 -1
- datahub/ingestion/transformer/extract_ownership_from_tags.py +1 -1
- datahub/ingestion/transformer/mark_dataset_status.py +1 -1
- datahub/ingestion/transformer/pattern_cleanup_dataset_usage_user.py +1 -1
- datahub/ingestion/transformer/pattern_cleanup_ownership.py +1 -1
- datahub/ingestion/transformer/remove_dataset_ownership.py +1 -1
- datahub/ingestion/transformer/replace_external_url.py +2 -2
- datahub/ingestion/transformer/set_browse_path.py +1 -1
- datahub/ingestion/transformer/tags_to_terms.py +1 -1
- datahub/lite/duckdb_lite.py +1 -1
- datahub/lite/lite_util.py +2 -2
- datahub/sdk/search_filters.py +68 -40
- datahub/secret/datahub_secret_store.py +7 -4
- datahub/secret/file_secret_store.py +1 -1
- datahub/sql_parsing/sqlglot_lineage.py +5 -2
- datahub/testing/check_sql_parser_result.py +2 -2
- datahub/utilities/ingest_utils.py +1 -1
- {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1rc1.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1rc1.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1rc1.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1rc1.dist-info}/top_level.txt +0 -0
|
@@ -50,7 +50,7 @@ class AddDatasetProperties(DatasetPropertiesTransformer):
|
|
|
50
50
|
|
|
51
51
|
@classmethod
|
|
52
52
|
def create(cls, config_dict: dict, ctx: PipelineContext) -> "AddDatasetProperties":
|
|
53
|
-
config = AddDatasetPropertiesConfig.
|
|
53
|
+
config = AddDatasetPropertiesConfig.model_validate(config_dict)
|
|
54
54
|
return cls(config, ctx)
|
|
55
55
|
|
|
56
56
|
@staticmethod
|
|
@@ -144,5 +144,5 @@ class SimpleAddDatasetProperties(AddDatasetProperties):
|
|
|
144
144
|
def create(
|
|
145
145
|
cls, config_dict: dict, ctx: PipelineContext
|
|
146
146
|
) -> "SimpleAddDatasetProperties":
|
|
147
|
-
config = SimpleAddDatasetPropertiesConfig.
|
|
147
|
+
config = SimpleAddDatasetPropertiesConfig.model_validate(config_dict)
|
|
148
148
|
return cls(config, ctx)
|
|
@@ -38,7 +38,7 @@ class AddDatasetSchemaTags(DatasetSchemaMetadataTransformer):
|
|
|
38
38
|
|
|
39
39
|
@classmethod
|
|
40
40
|
def create(cls, config_dict: dict, ctx: PipelineContext) -> "AddDatasetSchemaTags":
|
|
41
|
-
config = AddDatasetSchemaTagsConfig.
|
|
41
|
+
config = AddDatasetSchemaTagsConfig.model_validate(config_dict)
|
|
42
42
|
return cls(config, ctx)
|
|
43
43
|
|
|
44
44
|
def extend_field(
|
|
@@ -142,5 +142,5 @@ class PatternAddDatasetSchemaTags(AddDatasetSchemaTags):
|
|
|
142
142
|
def create(
|
|
143
143
|
cls, config_dict: dict, ctx: PipelineContext
|
|
144
144
|
) -> "PatternAddDatasetSchemaTags":
|
|
145
|
-
config = PatternDatasetTagsConfig.
|
|
145
|
+
config = PatternDatasetTagsConfig.model_validate(config_dict)
|
|
146
146
|
return cls(config, ctx)
|
|
@@ -39,7 +39,7 @@ class AddDatasetSchemaTerms(DatasetSchemaMetadataTransformer):
|
|
|
39
39
|
|
|
40
40
|
@classmethod
|
|
41
41
|
def create(cls, config_dict: dict, ctx: PipelineContext) -> "AddDatasetSchemaTerms":
|
|
42
|
-
config = AddDatasetSchemaTermsConfig.
|
|
42
|
+
config = AddDatasetSchemaTermsConfig.model_validate(config_dict)
|
|
43
43
|
return cls(config, ctx)
|
|
44
44
|
|
|
45
45
|
def extend_field(
|
|
@@ -162,5 +162,5 @@ class PatternAddDatasetSchemaTerms(AddDatasetSchemaTerms):
|
|
|
162
162
|
def create(
|
|
163
163
|
cls, config_dict: dict, ctx: PipelineContext
|
|
164
164
|
) -> "PatternAddDatasetSchemaTerms":
|
|
165
|
-
config = PatternDatasetTermsConfig.
|
|
165
|
+
config = PatternDatasetTermsConfig.model_validate(config_dict)
|
|
166
166
|
return cls(config, ctx)
|
|
@@ -41,7 +41,7 @@ class AddDatasetTags(DatasetTagsTransformer):
|
|
|
41
41
|
|
|
42
42
|
@classmethod
|
|
43
43
|
def create(cls, config_dict: dict, ctx: PipelineContext) -> "AddDatasetTags":
|
|
44
|
-
config = AddDatasetTagsConfig.
|
|
44
|
+
config = AddDatasetTagsConfig.model_validate(config_dict)
|
|
45
45
|
return cls(config, ctx)
|
|
46
46
|
|
|
47
47
|
def transform_aspect(
|
|
@@ -104,7 +104,7 @@ class SimpleAddDatasetTags(AddDatasetTags):
|
|
|
104
104
|
|
|
105
105
|
@classmethod
|
|
106
106
|
def create(cls, config_dict: dict, ctx: PipelineContext) -> "SimpleAddDatasetTags":
|
|
107
|
-
config = SimpleDatasetTagConfig.
|
|
107
|
+
config = SimpleDatasetTagConfig.model_validate(config_dict)
|
|
108
108
|
return cls(config, ctx)
|
|
109
109
|
|
|
110
110
|
|
|
@@ -128,5 +128,5 @@ class PatternAddDatasetTags(AddDatasetTags):
|
|
|
128
128
|
|
|
129
129
|
@classmethod
|
|
130
130
|
def create(cls, config_dict: dict, ctx: PipelineContext) -> "PatternAddDatasetTags":
|
|
131
|
-
config = PatternDatasetTagsConfig.
|
|
131
|
+
config = PatternDatasetTagsConfig.model_validate(config_dict)
|
|
132
132
|
return cls(config, ctx)
|
|
@@ -39,7 +39,7 @@ class AddDatasetTerms(DatasetTermsTransformer):
|
|
|
39
39
|
|
|
40
40
|
@classmethod
|
|
41
41
|
def create(cls, config_dict: dict, ctx: PipelineContext) -> "AddDatasetTerms":
|
|
42
|
-
config = AddDatasetTermsConfig.
|
|
42
|
+
config = AddDatasetTermsConfig.model_validate(config_dict)
|
|
43
43
|
return cls(config, ctx)
|
|
44
44
|
|
|
45
45
|
@staticmethod
|
|
@@ -120,7 +120,7 @@ class SimpleAddDatasetTerms(AddDatasetTerms):
|
|
|
120
120
|
|
|
121
121
|
@classmethod
|
|
122
122
|
def create(cls, config_dict: dict, ctx: PipelineContext) -> "SimpleAddDatasetTerms":
|
|
123
|
-
config = SimpleDatasetTermsConfig.
|
|
123
|
+
config = SimpleDatasetTermsConfig.model_validate(config_dict)
|
|
124
124
|
return cls(config, ctx)
|
|
125
125
|
|
|
126
126
|
|
|
@@ -147,5 +147,5 @@ class PatternAddDatasetTerms(AddDatasetTerms):
|
|
|
147
147
|
def create(
|
|
148
148
|
cls, config_dict: dict, ctx: PipelineContext
|
|
149
149
|
) -> "PatternAddDatasetTerms":
|
|
150
|
-
config = PatternDatasetTermsConfig.
|
|
150
|
+
config = PatternDatasetTermsConfig.model_validate(config_dict)
|
|
151
151
|
return cls(config, ctx)
|
|
@@ -67,7 +67,7 @@ class AddDatasetDomain(DatasetDomainTransformer):
|
|
|
67
67
|
|
|
68
68
|
@classmethod
|
|
69
69
|
def create(cls, config_dict: dict, ctx: PipelineContext) -> "AddDatasetDomain":
|
|
70
|
-
config = AddDatasetDomainSemanticsConfig.
|
|
70
|
+
config = AddDatasetDomainSemanticsConfig.model_validate(config_dict)
|
|
71
71
|
return cls(config, ctx)
|
|
72
72
|
|
|
73
73
|
@staticmethod
|
|
@@ -208,7 +208,7 @@ class SimpleAddDatasetDomain(AddDatasetDomain):
|
|
|
208
208
|
def create(
|
|
209
209
|
cls, config_dict: dict, ctx: PipelineContext
|
|
210
210
|
) -> "SimpleAddDatasetDomain":
|
|
211
|
-
config = SimpleDatasetDomainSemanticsConfig.
|
|
211
|
+
config = SimpleDatasetDomainSemanticsConfig.model_validate(config_dict)
|
|
212
212
|
return cls(config, ctx)
|
|
213
213
|
|
|
214
214
|
|
|
@@ -238,5 +238,5 @@ class PatternAddDatasetDomain(AddDatasetDomain):
|
|
|
238
238
|
def create(
|
|
239
239
|
cls, config_dict: dict, ctx: PipelineContext
|
|
240
240
|
) -> "PatternAddDatasetDomain":
|
|
241
|
-
config = PatternDatasetDomainSemanticsConfig.
|
|
241
|
+
config = PatternDatasetDomainSemanticsConfig.model_validate(config_dict)
|
|
242
242
|
return cls(config, ctx)
|
|
@@ -27,7 +27,7 @@ class DatasetTagDomainMapper(DatasetDomainTransformer):
|
|
|
27
27
|
def create(
|
|
28
28
|
cls, config_dict: dict, ctx: PipelineContext
|
|
29
29
|
) -> "DatasetTagDomainMapper":
|
|
30
|
-
config = DatasetTagDomainMapperConfig.
|
|
30
|
+
config = DatasetTagDomainMapperConfig.model_validate(config_dict)
|
|
31
31
|
return cls(config, ctx)
|
|
32
32
|
|
|
33
33
|
def transform_aspect(
|
|
@@ -29,7 +29,7 @@ class ExtractDatasetTags(DatasetTagsTransformer):
|
|
|
29
29
|
|
|
30
30
|
@classmethod
|
|
31
31
|
def create(cls, config_dict: dict, ctx: PipelineContext) -> "ExtractDatasetTags":
|
|
32
|
-
config = ExtractDatasetTagsConfig.
|
|
32
|
+
config = ExtractDatasetTagsConfig.model_validate(config_dict)
|
|
33
33
|
return cls(config, ctx)
|
|
34
34
|
|
|
35
35
|
def _get_tags_to_add(self, entity_urn: str) -> List[TagAssociationClass]:
|
|
@@ -62,7 +62,7 @@ class ExtractOwnersFromTagsTransformer(DatasetTagsTransformer):
|
|
|
62
62
|
def create(
|
|
63
63
|
cls, config_dict: dict, ctx: PipelineContext
|
|
64
64
|
) -> "ExtractOwnersFromTagsTransformer":
|
|
65
|
-
config = ExtractOwnersFromTagsConfig.
|
|
65
|
+
config = ExtractOwnersFromTagsConfig.model_validate(config_dict)
|
|
66
66
|
return cls(config, ctx)
|
|
67
67
|
|
|
68
68
|
def get_owner_urn(self, owner_str: str) -> str:
|
|
@@ -24,7 +24,7 @@ class MarkDatasetStatus(DatasetStatusTransformer):
|
|
|
24
24
|
|
|
25
25
|
@classmethod
|
|
26
26
|
def create(cls, config_dict: dict, ctx: PipelineContext) -> "MarkDatasetStatus":
|
|
27
|
-
config = MarkDatasetStatusConfig.
|
|
27
|
+
config = MarkDatasetStatusConfig.model_validate(config_dict)
|
|
28
28
|
return cls(config, ctx)
|
|
29
29
|
|
|
30
30
|
def transform_aspect(
|
|
@@ -38,7 +38,7 @@ class PatternCleanupDatasetUsageUser(DatasetUsageStatisticsTransformer):
|
|
|
38
38
|
def create(
|
|
39
39
|
cls, config_dict: dict, ctx: PipelineContext
|
|
40
40
|
) -> "PatternCleanupDatasetUsageUser":
|
|
41
|
-
config = PatternCleanupDatasetUsageUserConfig.
|
|
41
|
+
config = PatternCleanupDatasetUsageUserConfig.model_validate(config_dict)
|
|
42
42
|
return cls(config, ctx)
|
|
43
43
|
|
|
44
44
|
def transform_aspect(
|
|
@@ -37,7 +37,7 @@ class PatternCleanUpOwnership(OwnershipTransformer):
|
|
|
37
37
|
def create(
|
|
38
38
|
cls, config_dict: dict, ctx: PipelineContext
|
|
39
39
|
) -> "PatternCleanUpOwnership":
|
|
40
|
-
config = PatternCleanUpOwnershipConfig.
|
|
40
|
+
config = PatternCleanUpOwnershipConfig.model_validate(config_dict)
|
|
41
41
|
return cls(config, ctx)
|
|
42
42
|
|
|
43
43
|
def _get_current_owner_urns(self, entity_urn: str) -> Set[str]:
|
|
@@ -21,7 +21,7 @@ class SimpleRemoveDatasetOwnership(OwnershipTransformer):
|
|
|
21
21
|
def create(
|
|
22
22
|
cls, config_dict: dict, ctx: PipelineContext
|
|
23
23
|
) -> "SimpleRemoveDatasetOwnership":
|
|
24
|
-
config = ClearDatasetOwnershipConfig.
|
|
24
|
+
config = ClearDatasetOwnershipConfig.model_validate(config_dict)
|
|
25
25
|
return cls(config, ctx)
|
|
26
26
|
|
|
27
27
|
def transform_aspect(
|
|
@@ -47,7 +47,7 @@ class ReplaceExternalUrlDataset(DatasetPropertiesTransformer, ReplaceUrl):
|
|
|
47
47
|
def create(
|
|
48
48
|
cls, config_dict: dict, ctx: PipelineContext
|
|
49
49
|
) -> "ReplaceExternalUrlDataset":
|
|
50
|
-
config = ReplaceExternalUrlConfig.
|
|
50
|
+
config = ReplaceExternalUrlConfig.model_validate(config_dict)
|
|
51
51
|
return cls(config, ctx)
|
|
52
52
|
|
|
53
53
|
def transform_aspect(
|
|
@@ -97,7 +97,7 @@ class ReplaceExternalUrlContainer(ContainerPropertiesTransformer, ReplaceUrl):
|
|
|
97
97
|
def create(
|
|
98
98
|
cls, config_dict: dict, ctx: PipelineContext
|
|
99
99
|
) -> "ReplaceExternalUrlContainer":
|
|
100
|
-
config = ReplaceExternalUrlConfig.
|
|
100
|
+
config = ReplaceExternalUrlConfig.model_validate(config_dict)
|
|
101
101
|
return cls(config, ctx)
|
|
102
102
|
|
|
103
103
|
def transform_aspect(
|
|
@@ -42,7 +42,7 @@ class SetBrowsePathTransformer(BaseTransformer, SingleAspectTransformer):
|
|
|
42
42
|
def create(
|
|
43
43
|
cls, config_dict: dict, ctx: PipelineContext
|
|
44
44
|
) -> "SetBrowsePathTransformer":
|
|
45
|
-
config = SetBrowsePathTransformerConfig.
|
|
45
|
+
config = SetBrowsePathTransformerConfig.model_validate(config_dict)
|
|
46
46
|
return cls(config, ctx)
|
|
47
47
|
|
|
48
48
|
@staticmethod
|
|
@@ -32,7 +32,7 @@ class TagsToTermMapper(TagsToTermTransformer):
|
|
|
32
32
|
|
|
33
33
|
@classmethod
|
|
34
34
|
def create(cls, config_dict: dict, ctx: PipelineContext) -> "TagsToTermMapper":
|
|
35
|
-
config = TagsToTermMapperConfig.
|
|
35
|
+
config = TagsToTermMapperConfig.model_validate(config_dict)
|
|
36
36
|
return cls(config, ctx)
|
|
37
37
|
|
|
38
38
|
@staticmethod
|
datahub/lite/duckdb_lite.py
CHANGED
|
@@ -42,7 +42,7 @@ logger = logging.getLogger(__name__)
|
|
|
42
42
|
class DuckDBLite(DataHubLiteLocal[DuckDBLiteConfig]):
|
|
43
43
|
@classmethod
|
|
44
44
|
def create(cls, config_dict: dict) -> "DuckDBLite":
|
|
45
|
-
config: DuckDBLiteConfig = DuckDBLiteConfig.
|
|
45
|
+
config: DuckDBLiteConfig = DuckDBLiteConfig.model_validate(config_dict)
|
|
46
46
|
return DuckDBLite(config)
|
|
47
47
|
|
|
48
48
|
def __init__(self, config: DuckDBLiteConfig) -> None:
|
datahub/lite/lite_util.py
CHANGED
|
@@ -92,7 +92,7 @@ class DataHubLiteWrapper(DataHubLiteLocal):
|
|
|
92
92
|
|
|
93
93
|
|
|
94
94
|
def get_datahub_lite(config_dict: dict, read_only: bool = False) -> "DataHubLiteLocal":
|
|
95
|
-
lite_local_config = LiteLocalConfig.
|
|
95
|
+
lite_local_config = LiteLocalConfig.model_validate(config_dict)
|
|
96
96
|
|
|
97
97
|
lite_type = lite_local_config.type
|
|
98
98
|
try:
|
|
@@ -102,7 +102,7 @@ def get_datahub_lite(config_dict: dict, read_only: bool = False) -> "DataHubLite
|
|
|
102
102
|
f"Failed to find a registered lite implementation for {lite_type}. Valid values are {[k for k in lite_registry.mapping]}"
|
|
103
103
|
) from e
|
|
104
104
|
|
|
105
|
-
lite_specific_config = lite_class.get_config_class().
|
|
105
|
+
lite_specific_config = lite_class.get_config_class().model_validate(
|
|
106
106
|
lite_local_config.config
|
|
107
107
|
)
|
|
108
108
|
lite = lite_class(lite_specific_config)
|
datahub/sdk/search_filters.py
CHANGED
|
@@ -16,6 +16,7 @@ from typing import (
|
|
|
16
16
|
)
|
|
17
17
|
|
|
18
18
|
import pydantic
|
|
19
|
+
from pydantic import field_validator
|
|
19
20
|
|
|
20
21
|
from datahub.configuration.common import ConfigModel
|
|
21
22
|
from datahub.configuration.pydantic_migration_helpers import (
|
|
@@ -102,7 +103,8 @@ class _EntitySubtypeFilter(_BaseFilter):
|
|
|
102
103
|
description="The entity subtype to filter on. Can be 'Table', 'View', 'Source', etc. depending on the native platform's concepts.",
|
|
103
104
|
)
|
|
104
105
|
|
|
105
|
-
@
|
|
106
|
+
@field_validator("entity_subtype", mode="before")
|
|
107
|
+
@classmethod
|
|
106
108
|
def validate_entity_subtype(cls, v: str) -> List[str]:
|
|
107
109
|
return [v] if not isinstance(v, list) else v
|
|
108
110
|
|
|
@@ -141,10 +143,13 @@ class _PlatformFilter(_BaseFilter):
|
|
|
141
143
|
platform: List[str]
|
|
142
144
|
# TODO: Add validator to convert string -> list of strings
|
|
143
145
|
|
|
144
|
-
@
|
|
145
|
-
|
|
146
|
+
@field_validator("platform", mode="before")
|
|
147
|
+
@classmethod
|
|
148
|
+
def validate_platform(cls, v):
|
|
146
149
|
# Subtle - we use the constructor instead of the from_string method
|
|
147
150
|
# because coercion is acceptable here.
|
|
151
|
+
if isinstance(v, list):
|
|
152
|
+
return [str(DataPlatformUrn(item)) for item in v]
|
|
148
153
|
return str(DataPlatformUrn(v))
|
|
149
154
|
|
|
150
155
|
def _build_rule(self) -> SearchFilterRule:
|
|
@@ -161,8 +166,11 @@ class _PlatformFilter(_BaseFilter):
|
|
|
161
166
|
class _DomainFilter(_BaseFilter):
|
|
162
167
|
domain: List[str]
|
|
163
168
|
|
|
164
|
-
@
|
|
165
|
-
|
|
169
|
+
@field_validator("domain", mode="before")
|
|
170
|
+
@classmethod
|
|
171
|
+
def validate_domain(cls, v):
|
|
172
|
+
if isinstance(v, list):
|
|
173
|
+
return [str(DomainUrn.from_string(item)) for item in v]
|
|
166
174
|
return str(DomainUrn.from_string(v))
|
|
167
175
|
|
|
168
176
|
def _build_rule(self) -> SearchFilterRule:
|
|
@@ -183,8 +191,11 @@ class _ContainerFilter(_BaseFilter):
|
|
|
183
191
|
description="If true, only entities that are direct descendants of the container will be returned.",
|
|
184
192
|
)
|
|
185
193
|
|
|
186
|
-
@
|
|
187
|
-
|
|
194
|
+
@field_validator("container", mode="before")
|
|
195
|
+
@classmethod
|
|
196
|
+
def validate_container(cls, v):
|
|
197
|
+
if isinstance(v, list):
|
|
198
|
+
return [str(ContainerUrn.from_string(item)) for item in v]
|
|
188
199
|
return str(ContainerUrn.from_string(v))
|
|
189
200
|
|
|
190
201
|
@classmethod
|
|
@@ -249,17 +260,25 @@ class _OwnerFilter(_BaseFilter):
|
|
|
249
260
|
description="The owner to filter on. Should be user or group URNs.",
|
|
250
261
|
)
|
|
251
262
|
|
|
252
|
-
@
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
+
@field_validator("owner", mode="before")
|
|
264
|
+
@classmethod
|
|
265
|
+
def validate_owner(cls, v):
|
|
266
|
+
validated = []
|
|
267
|
+
for owner in v:
|
|
268
|
+
if not owner.startswith("urn:li:"):
|
|
269
|
+
raise ValueError(
|
|
270
|
+
f"Owner must be a valid User or Group URN, got: {owner}"
|
|
271
|
+
)
|
|
272
|
+
_type = guess_entity_type(owner)
|
|
273
|
+
if _type == CorpUserUrn.ENTITY_TYPE:
|
|
274
|
+
validated.append(str(CorpUserUrn.from_string(owner)))
|
|
275
|
+
elif _type == CorpGroupUrn.ENTITY_TYPE:
|
|
276
|
+
validated.append(str(CorpGroupUrn.from_string(owner)))
|
|
277
|
+
else:
|
|
278
|
+
raise ValueError(
|
|
279
|
+
f"Owner must be a valid User or Group URN, got: {owner}"
|
|
280
|
+
)
|
|
281
|
+
return validated
|
|
263
282
|
|
|
264
283
|
def _build_rule(self) -> SearchFilterRule:
|
|
265
284
|
return SearchFilterRule(
|
|
@@ -279,17 +298,21 @@ class _GlossaryTermFilter(_BaseFilter):
|
|
|
279
298
|
description="The glossary term to filter on. Should be glossary term URNs.",
|
|
280
299
|
)
|
|
281
300
|
|
|
282
|
-
@
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
301
|
+
@field_validator("glossary_term", mode="before")
|
|
302
|
+
@classmethod
|
|
303
|
+
def validate_glossary_term(cls, v):
|
|
304
|
+
validated = []
|
|
305
|
+
for term in v:
|
|
306
|
+
if not term.startswith("urn:li:"):
|
|
307
|
+
raise ValueError(f"Glossary term must be a valid URN, got: {term}")
|
|
308
|
+
# Validate that it's a glossary term URN
|
|
309
|
+
_type = guess_entity_type(term)
|
|
310
|
+
if _type != "glossaryTerm":
|
|
311
|
+
raise ValueError(
|
|
312
|
+
f"Glossary term must be a valid glossary term URN, got: {term}"
|
|
313
|
+
)
|
|
314
|
+
validated.append(term)
|
|
315
|
+
return validated
|
|
293
316
|
|
|
294
317
|
def _build_rule(self) -> SearchFilterRule:
|
|
295
318
|
return SearchFilterRule(
|
|
@@ -309,15 +332,19 @@ class _TagFilter(_BaseFilter):
|
|
|
309
332
|
description="The tag to filter on. Should be tag URNs.",
|
|
310
333
|
)
|
|
311
334
|
|
|
312
|
-
@
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
335
|
+
@field_validator("tag", mode="before")
|
|
336
|
+
@classmethod
|
|
337
|
+
def validate_tag(cls, v):
|
|
338
|
+
validated = []
|
|
339
|
+
for tag in v:
|
|
340
|
+
if not tag.startswith("urn:li:"):
|
|
341
|
+
raise ValueError(f"Tag must be a valid URN, got: {tag}")
|
|
342
|
+
# Validate that it's a tag URN
|
|
343
|
+
_type = guess_entity_type(tag)
|
|
344
|
+
if _type != "tag":
|
|
345
|
+
raise ValueError(f"Tag must be a valid tag URN, got: {tag}")
|
|
346
|
+
validated.append(tag)
|
|
347
|
+
return validated
|
|
321
348
|
|
|
322
349
|
def _build_rule(self) -> SearchFilterRule:
|
|
323
350
|
return SearchFilterRule(
|
|
@@ -426,7 +453,8 @@ class _Not(_BaseFilter):
|
|
|
426
453
|
|
|
427
454
|
not_: "Filter" = pydantic.Field(alias="not")
|
|
428
455
|
|
|
429
|
-
@
|
|
456
|
+
@field_validator("not_", mode="after")
|
|
457
|
+
@classmethod
|
|
430
458
|
def validate_not(cls, v: "Filter") -> "Filter":
|
|
431
459
|
inner_filter = v.compile()
|
|
432
460
|
if len(inner_filter) != 1:
|
|
@@ -571,7 +599,7 @@ def load_filters(obj: Any) -> Filter:
|
|
|
571
599
|
if PYDANTIC_VERSION_2:
|
|
572
600
|
return pydantic.TypeAdapter(Filter).validate_python(obj) # type: ignore
|
|
573
601
|
else:
|
|
574
|
-
return pydantic.
|
|
602
|
+
return pydantic.TypeAdapter(Filter).validate_python(obj) # type: ignore
|
|
575
603
|
|
|
576
604
|
|
|
577
605
|
# We need FilterDsl for two reasons:
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from typing import Any, Dict, List, Optional, Union
|
|
3
3
|
|
|
4
|
-
from pydantic import BaseModel,
|
|
4
|
+
from pydantic import BaseModel, field_validator
|
|
5
5
|
|
|
6
6
|
from datahub.ingestion.graph.client import DataHubGraph
|
|
7
7
|
from datahub.ingestion.graph.config import DatahubClientConfig
|
|
@@ -18,8 +18,11 @@ class DataHubSecretStoreConfig(BaseModel):
|
|
|
18
18
|
class Config:
|
|
19
19
|
arbitrary_types_allowed = True
|
|
20
20
|
|
|
21
|
-
@
|
|
22
|
-
|
|
21
|
+
@field_validator("graph_client", mode="after")
|
|
22
|
+
@classmethod
|
|
23
|
+
def check_graph_connection(
|
|
24
|
+
cls, v: Optional[DataHubGraph]
|
|
25
|
+
) -> Optional[DataHubGraph]:
|
|
23
26
|
if v is not None:
|
|
24
27
|
v.test_connection()
|
|
25
28
|
return v
|
|
@@ -63,7 +66,7 @@ class DataHubSecretStore(SecretStore):
|
|
|
63
66
|
|
|
64
67
|
@classmethod
|
|
65
68
|
def create(cls, config: Any) -> "DataHubSecretStore":
|
|
66
|
-
config = DataHubSecretStoreConfig.
|
|
69
|
+
config = DataHubSecretStoreConfig.model_validate(config)
|
|
67
70
|
return cls(config)
|
|
68
71
|
|
|
69
72
|
def close(self) -> None:
|
|
@@ -28,6 +28,7 @@ import sqlglot.optimizer.optimizer
|
|
|
28
28
|
import sqlglot.optimizer.qualify
|
|
29
29
|
import sqlglot.optimizer.qualify_columns
|
|
30
30
|
import sqlglot.optimizer.unnest_subqueries
|
|
31
|
+
from pydantic import field_validator
|
|
31
32
|
|
|
32
33
|
from datahub.cli.env_utils import get_boolean_env_variable
|
|
33
34
|
from datahub.ingestion.graph.client import DataHubGraph
|
|
@@ -141,7 +142,8 @@ class DownstreamColumnRef(_ParserBaseModel):
|
|
|
141
142
|
column_type: Optional[SchemaFieldDataTypeClass] = None
|
|
142
143
|
native_column_type: Optional[str] = None
|
|
143
144
|
|
|
144
|
-
@
|
|
145
|
+
@field_validator("column_type", mode="before")
|
|
146
|
+
@classmethod
|
|
145
147
|
def _load_column_type(
|
|
146
148
|
cls, v: Optional[Union[dict, SchemaFieldDataTypeClass]]
|
|
147
149
|
) -> Optional[SchemaFieldDataTypeClass]:
|
|
@@ -215,7 +217,8 @@ class SqlParsingDebugInfo(_ParserBaseModel):
|
|
|
215
217
|
def error(self) -> Optional[Exception]:
|
|
216
218
|
return self.table_error or self.column_error
|
|
217
219
|
|
|
218
|
-
@
|
|
220
|
+
@field_validator("table_error", "column_error", mode="before")
|
|
221
|
+
@classmethod
|
|
219
222
|
def remove_variables_from_error(cls, v: Optional[Exception]) -> Optional[Exception]:
|
|
220
223
|
if v and v.__traceback__:
|
|
221
224
|
# Remove local variables from the traceback to avoid memory leaks.
|
|
@@ -60,8 +60,8 @@ def assert_sql_result_with_resolver(
|
|
|
60
60
|
expected = SqlParsingResult.parse_raw(expected_file.read_text())
|
|
61
61
|
|
|
62
62
|
full_diff = deepdiff.DeepDiff(
|
|
63
|
-
expected.
|
|
64
|
-
res.
|
|
63
|
+
expected.model_dump(),
|
|
64
|
+
res.model_dump(),
|
|
65
65
|
exclude_regex_paths=[
|
|
66
66
|
r"root.column_lineage\[\d+\].logic",
|
|
67
67
|
],
|
|
@@ -48,7 +48,7 @@ def deploy_source_vars(
|
|
|
48
48
|
|
|
49
49
|
deploy_options_raw = pipeline_config.pop("deployment", None)
|
|
50
50
|
if deploy_options_raw is not None:
|
|
51
|
-
deploy_options = DeployOptions.
|
|
51
|
+
deploy_options = DeployOptions.model_validate(deploy_options_raw)
|
|
52
52
|
|
|
53
53
|
if name:
|
|
54
54
|
logger.info(f"Overriding deployment name {deploy_options.name} with {name}")
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|