acryl-datahub 1.3.1__py3-none-any.whl → 1.3.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1.dist-info}/METADATA +2582 -2582
- {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1.dist-info}/RECORD +203 -201
- datahub/_version.py +1 -1
- datahub/api/entities/common/serialized_value.py +2 -2
- datahub/api/entities/corpgroup/corpgroup.py +11 -6
- datahub/api/entities/corpuser/corpuser.py +11 -11
- datahub/api/entities/dataproduct/dataproduct.py +47 -27
- datahub/api/entities/dataset/dataset.py +32 -21
- datahub/api/entities/external/lake_formation_external_entites.py +5 -6
- datahub/api/entities/external/unity_catalog_external_entites.py +5 -7
- datahub/api/entities/forms/forms.py +16 -14
- datahub/api/entities/structuredproperties/structuredproperties.py +23 -16
- datahub/cli/check_cli.py +2 -2
- datahub/cli/config_utils.py +3 -3
- datahub/cli/lite_cli.py +9 -7
- datahub/cli/migrate.py +4 -4
- datahub/cli/quickstart_versioning.py +3 -3
- datahub/cli/specific/group_cli.py +1 -1
- datahub/cli/specific/structuredproperties_cli.py +1 -1
- datahub/cli/specific/user_cli.py +1 -1
- datahub/configuration/common.py +14 -2
- datahub/configuration/connection_resolver.py +2 -2
- datahub/configuration/git.py +47 -30
- datahub/configuration/import_resolver.py +2 -2
- datahub/configuration/kafka.py +4 -3
- datahub/configuration/time_window_config.py +26 -26
- datahub/configuration/validate_field_deprecation.py +2 -2
- datahub/configuration/validate_field_removal.py +2 -2
- datahub/configuration/validate_field_rename.py +2 -2
- datahub/configuration/validate_multiline_string.py +2 -1
- datahub/emitter/kafka_emitter.py +3 -1
- datahub/emitter/rest_emitter.py +2 -4
- datahub/ingestion/api/decorators.py +1 -1
- datahub/ingestion/api/report.py +1 -1
- datahub/ingestion/api/sink.py +1 -1
- datahub/ingestion/api/source.py +1 -1
- datahub/ingestion/glossary/datahub_classifier.py +11 -8
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +1 -1
- datahub/ingestion/reporting/file_reporter.py +5 -4
- datahub/ingestion/run/pipeline.py +6 -6
- datahub/ingestion/run/pipeline_config.py +12 -14
- datahub/ingestion/run/sink_callback.py +1 -1
- datahub/ingestion/sink/datahub_rest.py +6 -4
- datahub/ingestion/source/abs/config.py +19 -19
- datahub/ingestion/source/abs/datalake_profiler_config.py +11 -13
- datahub/ingestion/source/abs/source.py +2 -2
- datahub/ingestion/source/aws/aws_common.py +1 -1
- datahub/ingestion/source/aws/glue.py +6 -4
- datahub/ingestion/source/aws/sagemaker.py +1 -1
- datahub/ingestion/source/azure/azure_common.py +8 -12
- datahub/ingestion/source/bigquery_v2/bigquery.py +1 -1
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +43 -30
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +1 -1
- datahub/ingestion/source/cassandra/cassandra.py +1 -1
- datahub/ingestion/source/common/gcp_credentials_config.py +10 -10
- datahub/ingestion/source/data_lake_common/path_spec.py +85 -89
- datahub/ingestion/source/datahub/config.py +8 -8
- datahub/ingestion/source/datahub/datahub_source.py +1 -1
- datahub/ingestion/source/dbt/dbt_cloud.py +9 -3
- datahub/ingestion/source/dbt/dbt_common.py +39 -37
- datahub/ingestion/source/dbt/dbt_core.py +10 -12
- datahub/ingestion/source/debug/datahub_debug.py +1 -1
- datahub/ingestion/source/delta_lake/config.py +6 -4
- datahub/ingestion/source/dremio/dremio_config.py +10 -6
- datahub/ingestion/source/dremio/dremio_source.py +15 -15
- datahub/ingestion/source/dynamodb/dynamodb.py +1 -1
- datahub/ingestion/source/elastic_search.py +4 -3
- datahub/ingestion/source/excel/source.py +1 -1
- datahub/ingestion/source/feast.py +1 -1
- datahub/ingestion/source/file.py +5 -4
- datahub/ingestion/source/fivetran/config.py +17 -16
- datahub/ingestion/source/fivetran/fivetran.py +2 -2
- datahub/ingestion/source/gc/datahub_gc.py +1 -1
- datahub/ingestion/source/gcs/gcs_source.py +8 -10
- datahub/ingestion/source/ge_profiling_config.py +8 -5
- datahub/ingestion/source/grafana/grafana_api.py +2 -2
- datahub/ingestion/source/grafana/grafana_config.py +4 -3
- datahub/ingestion/source/grafana/grafana_source.py +1 -1
- datahub/ingestion/source/grafana/models.py +23 -5
- datahub/ingestion/source/hex/api.py +7 -5
- datahub/ingestion/source/hex/hex.py +4 -3
- datahub/ingestion/source/iceberg/iceberg.py +1 -1
- datahub/ingestion/source/iceberg/iceberg_common.py +5 -3
- datahub/ingestion/source/identity/azure_ad.py +1 -1
- datahub/ingestion/source/identity/okta.py +10 -10
- datahub/ingestion/source/kafka/kafka.py +1 -1
- datahub/ingestion/source/ldap.py +1 -1
- datahub/ingestion/source/looker/looker_common.py +7 -5
- datahub/ingestion/source/looker/looker_config.py +21 -20
- datahub/ingestion/source/looker/lookml_config.py +47 -47
- datahub/ingestion/source/metabase.py +8 -8
- datahub/ingestion/source/metadata/business_glossary.py +2 -2
- datahub/ingestion/source/metadata/lineage.py +13 -8
- datahub/ingestion/source/mlflow.py +1 -1
- datahub/ingestion/source/mode.py +6 -4
- datahub/ingestion/source/mongodb.py +4 -3
- datahub/ingestion/source/neo4j/neo4j_source.py +1 -1
- datahub/ingestion/source/nifi.py +17 -23
- datahub/ingestion/source/openapi.py +6 -8
- datahub/ingestion/source/powerbi/config.py +33 -32
- datahub/ingestion/source/powerbi/dataplatform_instance_resolver.py +2 -2
- datahub/ingestion/source/powerbi/powerbi.py +1 -1
- datahub/ingestion/source/powerbi_report_server/report_server.py +2 -2
- datahub/ingestion/source/powerbi_report_server/report_server_domain.py +8 -6
- datahub/ingestion/source/preset.py +8 -8
- datahub/ingestion/source/pulsar.py +1 -1
- datahub/ingestion/source/qlik_sense/data_classes.py +15 -8
- datahub/ingestion/source/qlik_sense/qlik_api.py +7 -7
- datahub/ingestion/source/qlik_sense/qlik_sense.py +1 -1
- datahub/ingestion/source/redshift/config.py +18 -20
- datahub/ingestion/source/redshift/redshift.py +2 -2
- datahub/ingestion/source/redshift/usage.py +23 -3
- datahub/ingestion/source/s3/config.py +83 -62
- datahub/ingestion/source/s3/datalake_profiler_config.py +11 -13
- datahub/ingestion/source/s3/source.py +8 -5
- datahub/ingestion/source/sac/sac.py +5 -4
- datahub/ingestion/source/salesforce.py +3 -2
- datahub/ingestion/source/schema/json_schema.py +2 -2
- datahub/ingestion/source/sigma/data_classes.py +3 -2
- datahub/ingestion/source/sigma/sigma.py +1 -1
- datahub/ingestion/source/sigma/sigma_api.py +7 -7
- datahub/ingestion/source/slack/slack.py +1 -1
- datahub/ingestion/source/snaplogic/snaplogic.py +1 -1
- datahub/ingestion/source/snowflake/snowflake_assertion.py +1 -1
- datahub/ingestion/source/snowflake/snowflake_config.py +35 -31
- datahub/ingestion/source/snowflake/snowflake_connection.py +35 -13
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +3 -3
- datahub/ingestion/source/snowflake/snowflake_queries.py +1 -1
- datahub/ingestion/source/sql/athena.py +1 -1
- datahub/ingestion/source/sql/clickhouse.py +4 -2
- datahub/ingestion/source/sql/cockroachdb.py +1 -1
- datahub/ingestion/source/sql/druid.py +1 -1
- datahub/ingestion/source/sql/hana.py +1 -1
- datahub/ingestion/source/sql/hive.py +7 -5
- datahub/ingestion/source/sql/hive_metastore.py +1 -1
- datahub/ingestion/source/sql/mssql/source.py +13 -6
- datahub/ingestion/source/sql/mysql.py +1 -1
- datahub/ingestion/source/sql/oracle.py +17 -10
- datahub/ingestion/source/sql/postgres.py +2 -2
- datahub/ingestion/source/sql/presto.py +1 -1
- datahub/ingestion/source/sql/sql_config.py +8 -9
- datahub/ingestion/source/sql/sql_generic.py +1 -1
- datahub/ingestion/source/sql/teradata.py +1 -1
- datahub/ingestion/source/sql/trino.py +1 -1
- datahub/ingestion/source/sql/vertica.py +5 -4
- datahub/ingestion/source/sql_queries.py +11 -8
- datahub/ingestion/source/state/checkpoint.py +2 -2
- datahub/ingestion/source/state/entity_removal_state.py +2 -1
- datahub/ingestion/source/state/stateful_ingestion_base.py +55 -45
- datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +1 -1
- datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py +1 -1
- datahub/ingestion/source/superset.py +9 -9
- datahub/ingestion/source/tableau/tableau.py +14 -16
- datahub/ingestion/source/unity/azure_auth_config.py +15 -0
- datahub/ingestion/source/unity/config.py +51 -34
- datahub/ingestion/source/unity/connection.py +7 -1
- datahub/ingestion/source/unity/connection_test.py +1 -1
- datahub/ingestion/source/unity/proxy.py +216 -7
- datahub/ingestion/source/unity/proxy_types.py +91 -0
- datahub/ingestion/source/unity/source.py +29 -3
- datahub/ingestion/source/usage/clickhouse_usage.py +1 -1
- datahub/ingestion/source/usage/starburst_trino_usage.py +1 -1
- datahub/ingestion/source/usage/usage_common.py +5 -3
- datahub/ingestion/source_config/csv_enricher.py +7 -6
- datahub/ingestion/source_config/operation_config.py +7 -4
- datahub/ingestion/source_config/pulsar.py +11 -15
- datahub/ingestion/transformer/add_dataset_browse_path.py +1 -1
- datahub/ingestion/transformer/add_dataset_dataproduct.py +6 -5
- datahub/ingestion/transformer/add_dataset_ownership.py +3 -3
- datahub/ingestion/transformer/add_dataset_properties.py +2 -2
- datahub/ingestion/transformer/add_dataset_schema_tags.py +2 -2
- datahub/ingestion/transformer/add_dataset_schema_terms.py +2 -2
- datahub/ingestion/transformer/add_dataset_tags.py +3 -3
- datahub/ingestion/transformer/add_dataset_terms.py +3 -3
- datahub/ingestion/transformer/dataset_domain.py +3 -3
- datahub/ingestion/transformer/dataset_domain_based_on_tags.py +1 -1
- datahub/ingestion/transformer/extract_dataset_tags.py +1 -1
- datahub/ingestion/transformer/extract_ownership_from_tags.py +1 -1
- datahub/ingestion/transformer/mark_dataset_status.py +1 -1
- datahub/ingestion/transformer/pattern_cleanup_dataset_usage_user.py +1 -1
- datahub/ingestion/transformer/pattern_cleanup_ownership.py +1 -1
- datahub/ingestion/transformer/remove_dataset_ownership.py +1 -1
- datahub/ingestion/transformer/replace_external_url.py +2 -2
- datahub/ingestion/transformer/set_browse_path.py +1 -1
- datahub/ingestion/transformer/tags_to_terms.py +1 -1
- datahub/lite/duckdb_lite.py +1 -1
- datahub/lite/lite_util.py +2 -2
- datahub/metadata/schema.avsc +7 -2
- datahub/metadata/schemas/QuerySubjects.avsc +1 -1
- datahub/metadata/schemas/StructuredPropertyDefinition.avsc +6 -1
- datahub/sdk/__init__.py +1 -0
- datahub/sdk/_all_entities.py +2 -0
- datahub/sdk/search_filters.py +68 -40
- datahub/sdk/tag.py +112 -0
- datahub/secret/datahub_secret_store.py +7 -4
- datahub/secret/file_secret_store.py +1 -1
- datahub/sql_parsing/sqlglot_lineage.py +5 -2
- datahub/testing/check_sql_parser_result.py +2 -2
- datahub/utilities/ingest_utils.py +1 -1
- {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1.dist-info}/top_level.txt +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from typing import Callable, Dict, List, Optional, Union
|
|
3
3
|
|
|
4
|
-
import
|
|
4
|
+
from pydantic import model_validator
|
|
5
5
|
|
|
6
6
|
from datahub.configuration.common import ConfigModel, KeyValuePattern
|
|
7
7
|
from datahub.configuration.import_resolver import pydantic_resolve_key
|
|
@@ -39,7 +39,7 @@ class AddDatasetDataProduct(DatasetDataproductTransformer):
|
|
|
39
39
|
|
|
40
40
|
@classmethod
|
|
41
41
|
def create(cls, config_dict: dict, ctx: PipelineContext) -> "AddDatasetDataProduct":
|
|
42
|
-
config = AddDatasetDataProductConfig.
|
|
42
|
+
config = AddDatasetDataProductConfig.model_validate(config_dict)
|
|
43
43
|
return cls(config, ctx)
|
|
44
44
|
|
|
45
45
|
def transform_aspect(
|
|
@@ -116,7 +116,7 @@ class SimpleAddDatasetDataProduct(AddDatasetDataProduct):
|
|
|
116
116
|
def create(
|
|
117
117
|
cls, config_dict: dict, ctx: PipelineContext
|
|
118
118
|
) -> "SimpleAddDatasetDataProduct":
|
|
119
|
-
config = SimpleDatasetDataProductConfig.
|
|
119
|
+
config = SimpleDatasetDataProductConfig.model_validate(config_dict)
|
|
120
120
|
return cls(config, ctx)
|
|
121
121
|
|
|
122
122
|
|
|
@@ -124,7 +124,8 @@ class PatternDatasetDataProductConfig(ConfigModel):
|
|
|
124
124
|
dataset_to_data_product_urns_pattern: KeyValuePattern = KeyValuePattern.all()
|
|
125
125
|
is_container: bool = False
|
|
126
126
|
|
|
127
|
-
@
|
|
127
|
+
@model_validator(mode="before")
|
|
128
|
+
@classmethod
|
|
128
129
|
def validate_pattern_value(cls, values: Dict) -> Dict:
|
|
129
130
|
rules = values["dataset_to_data_product_urns_pattern"]["rules"]
|
|
130
131
|
for key, value in rules.items():
|
|
@@ -156,5 +157,5 @@ class PatternAddDatasetDataProduct(AddDatasetDataProduct):
|
|
|
156
157
|
def create(
|
|
157
158
|
cls, config_dict: dict, ctx: PipelineContext
|
|
158
159
|
) -> "PatternAddDatasetDataProduct":
|
|
159
|
-
config = PatternDatasetDataProductConfig.
|
|
160
|
+
config = PatternDatasetDataProductConfig.model_validate(config_dict)
|
|
160
161
|
return cls(config, ctx)
|
|
@@ -55,7 +55,7 @@ class AddDatasetOwnership(OwnershipTransformer):
|
|
|
55
55
|
|
|
56
56
|
@classmethod
|
|
57
57
|
def create(cls, config_dict: dict, ctx: PipelineContext) -> "AddDatasetOwnership":
|
|
58
|
-
config = AddDatasetOwnershipConfig.
|
|
58
|
+
config = AddDatasetOwnershipConfig.model_validate(config_dict)
|
|
59
59
|
return cls(config, ctx)
|
|
60
60
|
|
|
61
61
|
@staticmethod
|
|
@@ -209,7 +209,7 @@ class SimpleAddDatasetOwnership(AddDatasetOwnership):
|
|
|
209
209
|
def create(
|
|
210
210
|
cls, config_dict: dict, ctx: PipelineContext
|
|
211
211
|
) -> "SimpleAddDatasetOwnership":
|
|
212
|
-
config = SimpleDatasetOwnershipConfig.
|
|
212
|
+
config = SimpleDatasetOwnershipConfig.model_validate(config_dict)
|
|
213
213
|
return cls(config, ctx)
|
|
214
214
|
|
|
215
215
|
|
|
@@ -247,5 +247,5 @@ class PatternAddDatasetOwnership(AddDatasetOwnership):
|
|
|
247
247
|
def create(
|
|
248
248
|
cls, config_dict: dict, ctx: PipelineContext
|
|
249
249
|
) -> "PatternAddDatasetOwnership":
|
|
250
|
-
config = PatternDatasetOwnershipConfig.
|
|
250
|
+
config = PatternDatasetOwnershipConfig.model_validate(config_dict)
|
|
251
251
|
return cls(config, ctx)
|
|
@@ -50,7 +50,7 @@ class AddDatasetProperties(DatasetPropertiesTransformer):
|
|
|
50
50
|
|
|
51
51
|
@classmethod
|
|
52
52
|
def create(cls, config_dict: dict, ctx: PipelineContext) -> "AddDatasetProperties":
|
|
53
|
-
config = AddDatasetPropertiesConfig.
|
|
53
|
+
config = AddDatasetPropertiesConfig.model_validate(config_dict)
|
|
54
54
|
return cls(config, ctx)
|
|
55
55
|
|
|
56
56
|
@staticmethod
|
|
@@ -144,5 +144,5 @@ class SimpleAddDatasetProperties(AddDatasetProperties):
|
|
|
144
144
|
def create(
|
|
145
145
|
cls, config_dict: dict, ctx: PipelineContext
|
|
146
146
|
) -> "SimpleAddDatasetProperties":
|
|
147
|
-
config = SimpleAddDatasetPropertiesConfig.
|
|
147
|
+
config = SimpleAddDatasetPropertiesConfig.model_validate(config_dict)
|
|
148
148
|
return cls(config, ctx)
|
|
@@ -38,7 +38,7 @@ class AddDatasetSchemaTags(DatasetSchemaMetadataTransformer):
|
|
|
38
38
|
|
|
39
39
|
@classmethod
|
|
40
40
|
def create(cls, config_dict: dict, ctx: PipelineContext) -> "AddDatasetSchemaTags":
|
|
41
|
-
config = AddDatasetSchemaTagsConfig.
|
|
41
|
+
config = AddDatasetSchemaTagsConfig.model_validate(config_dict)
|
|
42
42
|
return cls(config, ctx)
|
|
43
43
|
|
|
44
44
|
def extend_field(
|
|
@@ -142,5 +142,5 @@ class PatternAddDatasetSchemaTags(AddDatasetSchemaTags):
|
|
|
142
142
|
def create(
|
|
143
143
|
cls, config_dict: dict, ctx: PipelineContext
|
|
144
144
|
) -> "PatternAddDatasetSchemaTags":
|
|
145
|
-
config = PatternDatasetTagsConfig.
|
|
145
|
+
config = PatternDatasetTagsConfig.model_validate(config_dict)
|
|
146
146
|
return cls(config, ctx)
|
|
@@ -39,7 +39,7 @@ class AddDatasetSchemaTerms(DatasetSchemaMetadataTransformer):
|
|
|
39
39
|
|
|
40
40
|
@classmethod
|
|
41
41
|
def create(cls, config_dict: dict, ctx: PipelineContext) -> "AddDatasetSchemaTerms":
|
|
42
|
-
config = AddDatasetSchemaTermsConfig.
|
|
42
|
+
config = AddDatasetSchemaTermsConfig.model_validate(config_dict)
|
|
43
43
|
return cls(config, ctx)
|
|
44
44
|
|
|
45
45
|
def extend_field(
|
|
@@ -162,5 +162,5 @@ class PatternAddDatasetSchemaTerms(AddDatasetSchemaTerms):
|
|
|
162
162
|
def create(
|
|
163
163
|
cls, config_dict: dict, ctx: PipelineContext
|
|
164
164
|
) -> "PatternAddDatasetSchemaTerms":
|
|
165
|
-
config = PatternDatasetTermsConfig.
|
|
165
|
+
config = PatternDatasetTermsConfig.model_validate(config_dict)
|
|
166
166
|
return cls(config, ctx)
|
|
@@ -41,7 +41,7 @@ class AddDatasetTags(DatasetTagsTransformer):
|
|
|
41
41
|
|
|
42
42
|
@classmethod
|
|
43
43
|
def create(cls, config_dict: dict, ctx: PipelineContext) -> "AddDatasetTags":
|
|
44
|
-
config = AddDatasetTagsConfig.
|
|
44
|
+
config = AddDatasetTagsConfig.model_validate(config_dict)
|
|
45
45
|
return cls(config, ctx)
|
|
46
46
|
|
|
47
47
|
def transform_aspect(
|
|
@@ -104,7 +104,7 @@ class SimpleAddDatasetTags(AddDatasetTags):
|
|
|
104
104
|
|
|
105
105
|
@classmethod
|
|
106
106
|
def create(cls, config_dict: dict, ctx: PipelineContext) -> "SimpleAddDatasetTags":
|
|
107
|
-
config = SimpleDatasetTagConfig.
|
|
107
|
+
config = SimpleDatasetTagConfig.model_validate(config_dict)
|
|
108
108
|
return cls(config, ctx)
|
|
109
109
|
|
|
110
110
|
|
|
@@ -128,5 +128,5 @@ class PatternAddDatasetTags(AddDatasetTags):
|
|
|
128
128
|
|
|
129
129
|
@classmethod
|
|
130
130
|
def create(cls, config_dict: dict, ctx: PipelineContext) -> "PatternAddDatasetTags":
|
|
131
|
-
config = PatternDatasetTagsConfig.
|
|
131
|
+
config = PatternDatasetTagsConfig.model_validate(config_dict)
|
|
132
132
|
return cls(config, ctx)
|
|
@@ -39,7 +39,7 @@ class AddDatasetTerms(DatasetTermsTransformer):
|
|
|
39
39
|
|
|
40
40
|
@classmethod
|
|
41
41
|
def create(cls, config_dict: dict, ctx: PipelineContext) -> "AddDatasetTerms":
|
|
42
|
-
config = AddDatasetTermsConfig.
|
|
42
|
+
config = AddDatasetTermsConfig.model_validate(config_dict)
|
|
43
43
|
return cls(config, ctx)
|
|
44
44
|
|
|
45
45
|
@staticmethod
|
|
@@ -120,7 +120,7 @@ class SimpleAddDatasetTerms(AddDatasetTerms):
|
|
|
120
120
|
|
|
121
121
|
@classmethod
|
|
122
122
|
def create(cls, config_dict: dict, ctx: PipelineContext) -> "SimpleAddDatasetTerms":
|
|
123
|
-
config = SimpleDatasetTermsConfig.
|
|
123
|
+
config = SimpleDatasetTermsConfig.model_validate(config_dict)
|
|
124
124
|
return cls(config, ctx)
|
|
125
125
|
|
|
126
126
|
|
|
@@ -147,5 +147,5 @@ class PatternAddDatasetTerms(AddDatasetTerms):
|
|
|
147
147
|
def create(
|
|
148
148
|
cls, config_dict: dict, ctx: PipelineContext
|
|
149
149
|
) -> "PatternAddDatasetTerms":
|
|
150
|
-
config = PatternDatasetTermsConfig.
|
|
150
|
+
config = PatternDatasetTermsConfig.model_validate(config_dict)
|
|
151
151
|
return cls(config, ctx)
|
|
@@ -67,7 +67,7 @@ class AddDatasetDomain(DatasetDomainTransformer):
|
|
|
67
67
|
|
|
68
68
|
@classmethod
|
|
69
69
|
def create(cls, config_dict: dict, ctx: PipelineContext) -> "AddDatasetDomain":
|
|
70
|
-
config = AddDatasetDomainSemanticsConfig.
|
|
70
|
+
config = AddDatasetDomainSemanticsConfig.model_validate(config_dict)
|
|
71
71
|
return cls(config, ctx)
|
|
72
72
|
|
|
73
73
|
@staticmethod
|
|
@@ -208,7 +208,7 @@ class SimpleAddDatasetDomain(AddDatasetDomain):
|
|
|
208
208
|
def create(
|
|
209
209
|
cls, config_dict: dict, ctx: PipelineContext
|
|
210
210
|
) -> "SimpleAddDatasetDomain":
|
|
211
|
-
config = SimpleDatasetDomainSemanticsConfig.
|
|
211
|
+
config = SimpleDatasetDomainSemanticsConfig.model_validate(config_dict)
|
|
212
212
|
return cls(config, ctx)
|
|
213
213
|
|
|
214
214
|
|
|
@@ -238,5 +238,5 @@ class PatternAddDatasetDomain(AddDatasetDomain):
|
|
|
238
238
|
def create(
|
|
239
239
|
cls, config_dict: dict, ctx: PipelineContext
|
|
240
240
|
) -> "PatternAddDatasetDomain":
|
|
241
|
-
config = PatternDatasetDomainSemanticsConfig.
|
|
241
|
+
config = PatternDatasetDomainSemanticsConfig.model_validate(config_dict)
|
|
242
242
|
return cls(config, ctx)
|
|
@@ -27,7 +27,7 @@ class DatasetTagDomainMapper(DatasetDomainTransformer):
|
|
|
27
27
|
def create(
|
|
28
28
|
cls, config_dict: dict, ctx: PipelineContext
|
|
29
29
|
) -> "DatasetTagDomainMapper":
|
|
30
|
-
config = DatasetTagDomainMapperConfig.
|
|
30
|
+
config = DatasetTagDomainMapperConfig.model_validate(config_dict)
|
|
31
31
|
return cls(config, ctx)
|
|
32
32
|
|
|
33
33
|
def transform_aspect(
|
|
@@ -29,7 +29,7 @@ class ExtractDatasetTags(DatasetTagsTransformer):
|
|
|
29
29
|
|
|
30
30
|
@classmethod
|
|
31
31
|
def create(cls, config_dict: dict, ctx: PipelineContext) -> "ExtractDatasetTags":
|
|
32
|
-
config = ExtractDatasetTagsConfig.
|
|
32
|
+
config = ExtractDatasetTagsConfig.model_validate(config_dict)
|
|
33
33
|
return cls(config, ctx)
|
|
34
34
|
|
|
35
35
|
def _get_tags_to_add(self, entity_urn: str) -> List[TagAssociationClass]:
|
|
@@ -62,7 +62,7 @@ class ExtractOwnersFromTagsTransformer(DatasetTagsTransformer):
|
|
|
62
62
|
def create(
|
|
63
63
|
cls, config_dict: dict, ctx: PipelineContext
|
|
64
64
|
) -> "ExtractOwnersFromTagsTransformer":
|
|
65
|
-
config = ExtractOwnersFromTagsConfig.
|
|
65
|
+
config = ExtractOwnersFromTagsConfig.model_validate(config_dict)
|
|
66
66
|
return cls(config, ctx)
|
|
67
67
|
|
|
68
68
|
def get_owner_urn(self, owner_str: str) -> str:
|
|
@@ -24,7 +24,7 @@ class MarkDatasetStatus(DatasetStatusTransformer):
|
|
|
24
24
|
|
|
25
25
|
@classmethod
|
|
26
26
|
def create(cls, config_dict: dict, ctx: PipelineContext) -> "MarkDatasetStatus":
|
|
27
|
-
config = MarkDatasetStatusConfig.
|
|
27
|
+
config = MarkDatasetStatusConfig.model_validate(config_dict)
|
|
28
28
|
return cls(config, ctx)
|
|
29
29
|
|
|
30
30
|
def transform_aspect(
|
|
@@ -38,7 +38,7 @@ class PatternCleanupDatasetUsageUser(DatasetUsageStatisticsTransformer):
|
|
|
38
38
|
def create(
|
|
39
39
|
cls, config_dict: dict, ctx: PipelineContext
|
|
40
40
|
) -> "PatternCleanupDatasetUsageUser":
|
|
41
|
-
config = PatternCleanupDatasetUsageUserConfig.
|
|
41
|
+
config = PatternCleanupDatasetUsageUserConfig.model_validate(config_dict)
|
|
42
42
|
return cls(config, ctx)
|
|
43
43
|
|
|
44
44
|
def transform_aspect(
|
|
@@ -37,7 +37,7 @@ class PatternCleanUpOwnership(OwnershipTransformer):
|
|
|
37
37
|
def create(
|
|
38
38
|
cls, config_dict: dict, ctx: PipelineContext
|
|
39
39
|
) -> "PatternCleanUpOwnership":
|
|
40
|
-
config = PatternCleanUpOwnershipConfig.
|
|
40
|
+
config = PatternCleanUpOwnershipConfig.model_validate(config_dict)
|
|
41
41
|
return cls(config, ctx)
|
|
42
42
|
|
|
43
43
|
def _get_current_owner_urns(self, entity_urn: str) -> Set[str]:
|
|
@@ -21,7 +21,7 @@ class SimpleRemoveDatasetOwnership(OwnershipTransformer):
|
|
|
21
21
|
def create(
|
|
22
22
|
cls, config_dict: dict, ctx: PipelineContext
|
|
23
23
|
) -> "SimpleRemoveDatasetOwnership":
|
|
24
|
-
config = ClearDatasetOwnershipConfig.
|
|
24
|
+
config = ClearDatasetOwnershipConfig.model_validate(config_dict)
|
|
25
25
|
return cls(config, ctx)
|
|
26
26
|
|
|
27
27
|
def transform_aspect(
|
|
@@ -47,7 +47,7 @@ class ReplaceExternalUrlDataset(DatasetPropertiesTransformer, ReplaceUrl):
|
|
|
47
47
|
def create(
|
|
48
48
|
cls, config_dict: dict, ctx: PipelineContext
|
|
49
49
|
) -> "ReplaceExternalUrlDataset":
|
|
50
|
-
config = ReplaceExternalUrlConfig.
|
|
50
|
+
config = ReplaceExternalUrlConfig.model_validate(config_dict)
|
|
51
51
|
return cls(config, ctx)
|
|
52
52
|
|
|
53
53
|
def transform_aspect(
|
|
@@ -97,7 +97,7 @@ class ReplaceExternalUrlContainer(ContainerPropertiesTransformer, ReplaceUrl):
|
|
|
97
97
|
def create(
|
|
98
98
|
cls, config_dict: dict, ctx: PipelineContext
|
|
99
99
|
) -> "ReplaceExternalUrlContainer":
|
|
100
|
-
config = ReplaceExternalUrlConfig.
|
|
100
|
+
config = ReplaceExternalUrlConfig.model_validate(config_dict)
|
|
101
101
|
return cls(config, ctx)
|
|
102
102
|
|
|
103
103
|
def transform_aspect(
|
|
@@ -42,7 +42,7 @@ class SetBrowsePathTransformer(BaseTransformer, SingleAspectTransformer):
|
|
|
42
42
|
def create(
|
|
43
43
|
cls, config_dict: dict, ctx: PipelineContext
|
|
44
44
|
) -> "SetBrowsePathTransformer":
|
|
45
|
-
config = SetBrowsePathTransformerConfig.
|
|
45
|
+
config = SetBrowsePathTransformerConfig.model_validate(config_dict)
|
|
46
46
|
return cls(config, ctx)
|
|
47
47
|
|
|
48
48
|
@staticmethod
|
|
@@ -32,7 +32,7 @@ class TagsToTermMapper(TagsToTermTransformer):
|
|
|
32
32
|
|
|
33
33
|
@classmethod
|
|
34
34
|
def create(cls, config_dict: dict, ctx: PipelineContext) -> "TagsToTermMapper":
|
|
35
|
-
config = TagsToTermMapperConfig.
|
|
35
|
+
config = TagsToTermMapperConfig.model_validate(config_dict)
|
|
36
36
|
return cls(config, ctx)
|
|
37
37
|
|
|
38
38
|
@staticmethod
|
datahub/lite/duckdb_lite.py
CHANGED
|
@@ -42,7 +42,7 @@ logger = logging.getLogger(__name__)
|
|
|
42
42
|
class DuckDBLite(DataHubLiteLocal[DuckDBLiteConfig]):
|
|
43
43
|
@classmethod
|
|
44
44
|
def create(cls, config_dict: dict) -> "DuckDBLite":
|
|
45
|
-
config: DuckDBLiteConfig = DuckDBLiteConfig.
|
|
45
|
+
config: DuckDBLiteConfig = DuckDBLiteConfig.model_validate(config_dict)
|
|
46
46
|
return DuckDBLite(config)
|
|
47
47
|
|
|
48
48
|
def __init__(self, config: DuckDBLiteConfig) -> None:
|
datahub/lite/lite_util.py
CHANGED
|
@@ -92,7 +92,7 @@ class DataHubLiteWrapper(DataHubLiteLocal):
|
|
|
92
92
|
|
|
93
93
|
|
|
94
94
|
def get_datahub_lite(config_dict: dict, read_only: bool = False) -> "DataHubLiteLocal":
|
|
95
|
-
lite_local_config = LiteLocalConfig.
|
|
95
|
+
lite_local_config = LiteLocalConfig.model_validate(config_dict)
|
|
96
96
|
|
|
97
97
|
lite_type = lite_local_config.type
|
|
98
98
|
try:
|
|
@@ -102,7 +102,7 @@ def get_datahub_lite(config_dict: dict, read_only: bool = False) -> "DataHubLite
|
|
|
102
102
|
f"Failed to find a registered lite implementation for {lite_type}. Valid values are {[k for k in lite_registry.mapping]}"
|
|
103
103
|
) from e
|
|
104
104
|
|
|
105
|
-
lite_specific_config = lite_class.get_config_class().
|
|
105
|
+
lite_specific_config = lite_class.get_config_class().model_validate(
|
|
106
106
|
lite_local_config.config
|
|
107
107
|
)
|
|
108
108
|
lite = lite_class(lite_specific_config)
|
datahub/metadata/schema.avsc
CHANGED
|
@@ -4824,7 +4824,7 @@
|
|
|
4824
4824
|
{
|
|
4825
4825
|
"Searchable": {
|
|
4826
4826
|
"fieldName": "entities",
|
|
4827
|
-
"fieldType": "
|
|
4827
|
+
"fieldType": "KEYWORD"
|
|
4828
4828
|
},
|
|
4829
4829
|
"java": {
|
|
4830
4830
|
"class": "com.linkedin.pegasus2avro.common.urn.Urn"
|
|
@@ -20303,7 +20303,12 @@
|
|
|
20303
20303
|
"doc": "The fully qualified name of the property. e.g. io.acryl.datahub.myProperty"
|
|
20304
20304
|
},
|
|
20305
20305
|
{
|
|
20306
|
-
"Searchable": {
|
|
20306
|
+
"Searchable": {
|
|
20307
|
+
"enableAutocomplete": true,
|
|
20308
|
+
"fieldType": "WORD_GRAM",
|
|
20309
|
+
"searchLabel": "entityName",
|
|
20310
|
+
"searchTier": 1
|
|
20311
|
+
},
|
|
20307
20312
|
"type": [
|
|
20308
20313
|
"null",
|
|
20309
20314
|
"string"
|
|
@@ -17,7 +17,12 @@
|
|
|
17
17
|
"doc": "The fully qualified name of the property. e.g. io.acryl.datahub.myProperty"
|
|
18
18
|
},
|
|
19
19
|
{
|
|
20
|
-
"Searchable": {
|
|
20
|
+
"Searchable": {
|
|
21
|
+
"enableAutocomplete": true,
|
|
22
|
+
"fieldType": "WORD_GRAM",
|
|
23
|
+
"searchLabel": "entityName",
|
|
24
|
+
"searchTier": 1
|
|
25
|
+
},
|
|
21
26
|
"type": [
|
|
22
27
|
"null",
|
|
23
28
|
"string"
|
datahub/sdk/__init__.py
CHANGED
|
@@ -28,6 +28,7 @@ from datahub.sdk.main_client import DataHubClient
|
|
|
28
28
|
from datahub.sdk.mlmodel import MLModel
|
|
29
29
|
from datahub.sdk.mlmodelgroup import MLModelGroup
|
|
30
30
|
from datahub.sdk.search_filters import Filter, FilterDsl
|
|
31
|
+
from datahub.sdk.tag import Tag
|
|
31
32
|
|
|
32
33
|
# We want to print out the warning if people do `from datahub.sdk import X`.
|
|
33
34
|
# But we don't want to print out warnings if they're doing a more direct
|
datahub/sdk/_all_entities.py
CHANGED
|
@@ -9,6 +9,7 @@ from datahub.sdk.dataset import Dataset
|
|
|
9
9
|
from datahub.sdk.entity import Entity
|
|
10
10
|
from datahub.sdk.mlmodel import MLModel
|
|
11
11
|
from datahub.sdk.mlmodelgroup import MLModelGroup
|
|
12
|
+
from datahub.sdk.tag import Tag
|
|
12
13
|
|
|
13
14
|
# Base entity classes that don't have circular dependencies
|
|
14
15
|
# Those that do are imported in the EntityClient where needed
|
|
@@ -22,6 +23,7 @@ ENTITY_CLASSES_LIST: List[Type[Entity]] = [
|
|
|
22
23
|
DataJob,
|
|
23
24
|
Dashboard,
|
|
24
25
|
Chart,
|
|
26
|
+
Tag,
|
|
25
27
|
]
|
|
26
28
|
|
|
27
29
|
# Create the mapping of entity types to classes
|
datahub/sdk/search_filters.py
CHANGED
|
@@ -16,6 +16,7 @@ from typing import (
|
|
|
16
16
|
)
|
|
17
17
|
|
|
18
18
|
import pydantic
|
|
19
|
+
from pydantic import field_validator
|
|
19
20
|
|
|
20
21
|
from datahub.configuration.common import ConfigModel
|
|
21
22
|
from datahub.configuration.pydantic_migration_helpers import (
|
|
@@ -102,7 +103,8 @@ class _EntitySubtypeFilter(_BaseFilter):
|
|
|
102
103
|
description="The entity subtype to filter on. Can be 'Table', 'View', 'Source', etc. depending on the native platform's concepts.",
|
|
103
104
|
)
|
|
104
105
|
|
|
105
|
-
@
|
|
106
|
+
@field_validator("entity_subtype", mode="before")
|
|
107
|
+
@classmethod
|
|
106
108
|
def validate_entity_subtype(cls, v: str) -> List[str]:
|
|
107
109
|
return [v] if not isinstance(v, list) else v
|
|
108
110
|
|
|
@@ -141,10 +143,13 @@ class _PlatformFilter(_BaseFilter):
|
|
|
141
143
|
platform: List[str]
|
|
142
144
|
# TODO: Add validator to convert string -> list of strings
|
|
143
145
|
|
|
144
|
-
@
|
|
145
|
-
|
|
146
|
+
@field_validator("platform", mode="before")
|
|
147
|
+
@classmethod
|
|
148
|
+
def validate_platform(cls, v):
|
|
146
149
|
# Subtle - we use the constructor instead of the from_string method
|
|
147
150
|
# because coercion is acceptable here.
|
|
151
|
+
if isinstance(v, list):
|
|
152
|
+
return [str(DataPlatformUrn(item)) for item in v]
|
|
148
153
|
return str(DataPlatformUrn(v))
|
|
149
154
|
|
|
150
155
|
def _build_rule(self) -> SearchFilterRule:
|
|
@@ -161,8 +166,11 @@ class _PlatformFilter(_BaseFilter):
|
|
|
161
166
|
class _DomainFilter(_BaseFilter):
|
|
162
167
|
domain: List[str]
|
|
163
168
|
|
|
164
|
-
@
|
|
165
|
-
|
|
169
|
+
@field_validator("domain", mode="before")
|
|
170
|
+
@classmethod
|
|
171
|
+
def validate_domain(cls, v):
|
|
172
|
+
if isinstance(v, list):
|
|
173
|
+
return [str(DomainUrn.from_string(item)) for item in v]
|
|
166
174
|
return str(DomainUrn.from_string(v))
|
|
167
175
|
|
|
168
176
|
def _build_rule(self) -> SearchFilterRule:
|
|
@@ -183,8 +191,11 @@ class _ContainerFilter(_BaseFilter):
|
|
|
183
191
|
description="If true, only entities that are direct descendants of the container will be returned.",
|
|
184
192
|
)
|
|
185
193
|
|
|
186
|
-
@
|
|
187
|
-
|
|
194
|
+
@field_validator("container", mode="before")
|
|
195
|
+
@classmethod
|
|
196
|
+
def validate_container(cls, v):
|
|
197
|
+
if isinstance(v, list):
|
|
198
|
+
return [str(ContainerUrn.from_string(item)) for item in v]
|
|
188
199
|
return str(ContainerUrn.from_string(v))
|
|
189
200
|
|
|
190
201
|
@classmethod
|
|
@@ -249,17 +260,25 @@ class _OwnerFilter(_BaseFilter):
|
|
|
249
260
|
description="The owner to filter on. Should be user or group URNs.",
|
|
250
261
|
)
|
|
251
262
|
|
|
252
|
-
@
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
+
@field_validator("owner", mode="before")
|
|
264
|
+
@classmethod
|
|
265
|
+
def validate_owner(cls, v):
|
|
266
|
+
validated = []
|
|
267
|
+
for owner in v:
|
|
268
|
+
if not owner.startswith("urn:li:"):
|
|
269
|
+
raise ValueError(
|
|
270
|
+
f"Owner must be a valid User or Group URN, got: {owner}"
|
|
271
|
+
)
|
|
272
|
+
_type = guess_entity_type(owner)
|
|
273
|
+
if _type == CorpUserUrn.ENTITY_TYPE:
|
|
274
|
+
validated.append(str(CorpUserUrn.from_string(owner)))
|
|
275
|
+
elif _type == CorpGroupUrn.ENTITY_TYPE:
|
|
276
|
+
validated.append(str(CorpGroupUrn.from_string(owner)))
|
|
277
|
+
else:
|
|
278
|
+
raise ValueError(
|
|
279
|
+
f"Owner must be a valid User or Group URN, got: {owner}"
|
|
280
|
+
)
|
|
281
|
+
return validated
|
|
263
282
|
|
|
264
283
|
def _build_rule(self) -> SearchFilterRule:
|
|
265
284
|
return SearchFilterRule(
|
|
@@ -279,17 +298,21 @@ class _GlossaryTermFilter(_BaseFilter):
|
|
|
279
298
|
description="The glossary term to filter on. Should be glossary term URNs.",
|
|
280
299
|
)
|
|
281
300
|
|
|
282
|
-
@
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
301
|
+
@field_validator("glossary_term", mode="before")
|
|
302
|
+
@classmethod
|
|
303
|
+
def validate_glossary_term(cls, v):
|
|
304
|
+
validated = []
|
|
305
|
+
for term in v:
|
|
306
|
+
if not term.startswith("urn:li:"):
|
|
307
|
+
raise ValueError(f"Glossary term must be a valid URN, got: {term}")
|
|
308
|
+
# Validate that it's a glossary term URN
|
|
309
|
+
_type = guess_entity_type(term)
|
|
310
|
+
if _type != "glossaryTerm":
|
|
311
|
+
raise ValueError(
|
|
312
|
+
f"Glossary term must be a valid glossary term URN, got: {term}"
|
|
313
|
+
)
|
|
314
|
+
validated.append(term)
|
|
315
|
+
return validated
|
|
293
316
|
|
|
294
317
|
def _build_rule(self) -> SearchFilterRule:
|
|
295
318
|
return SearchFilterRule(
|
|
@@ -309,15 +332,19 @@ class _TagFilter(_BaseFilter):
|
|
|
309
332
|
description="The tag to filter on. Should be tag URNs.",
|
|
310
333
|
)
|
|
311
334
|
|
|
312
|
-
@
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
335
|
+
@field_validator("tag", mode="before")
|
|
336
|
+
@classmethod
|
|
337
|
+
def validate_tag(cls, v):
|
|
338
|
+
validated = []
|
|
339
|
+
for tag in v:
|
|
340
|
+
if not tag.startswith("urn:li:"):
|
|
341
|
+
raise ValueError(f"Tag must be a valid URN, got: {tag}")
|
|
342
|
+
# Validate that it's a tag URN
|
|
343
|
+
_type = guess_entity_type(tag)
|
|
344
|
+
if _type != "tag":
|
|
345
|
+
raise ValueError(f"Tag must be a valid tag URN, got: {tag}")
|
|
346
|
+
validated.append(tag)
|
|
347
|
+
return validated
|
|
321
348
|
|
|
322
349
|
def _build_rule(self) -> SearchFilterRule:
|
|
323
350
|
return SearchFilterRule(
|
|
@@ -426,7 +453,8 @@ class _Not(_BaseFilter):
|
|
|
426
453
|
|
|
427
454
|
not_: "Filter" = pydantic.Field(alias="not")
|
|
428
455
|
|
|
429
|
-
@
|
|
456
|
+
@field_validator("not_", mode="after")
|
|
457
|
+
@classmethod
|
|
430
458
|
def validate_not(cls, v: "Filter") -> "Filter":
|
|
431
459
|
inner_filter = v.compile()
|
|
432
460
|
if len(inner_filter) != 1:
|
|
@@ -571,7 +599,7 @@ def load_filters(obj: Any) -> Filter:
|
|
|
571
599
|
if PYDANTIC_VERSION_2:
|
|
572
600
|
return pydantic.TypeAdapter(Filter).validate_python(obj) # type: ignore
|
|
573
601
|
else:
|
|
574
|
-
return pydantic.
|
|
602
|
+
return pydantic.TypeAdapter(Filter).validate_python(obj) # type: ignore
|
|
575
603
|
|
|
576
604
|
|
|
577
605
|
# We need FilterDsl for two reasons:
|