acryl-datahub 1.3.1__py3-none-any.whl → 1.3.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1.dist-info}/METADATA +2582 -2582
- {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1.dist-info}/RECORD +203 -201
- datahub/_version.py +1 -1
- datahub/api/entities/common/serialized_value.py +2 -2
- datahub/api/entities/corpgroup/corpgroup.py +11 -6
- datahub/api/entities/corpuser/corpuser.py +11 -11
- datahub/api/entities/dataproduct/dataproduct.py +47 -27
- datahub/api/entities/dataset/dataset.py +32 -21
- datahub/api/entities/external/lake_formation_external_entites.py +5 -6
- datahub/api/entities/external/unity_catalog_external_entites.py +5 -7
- datahub/api/entities/forms/forms.py +16 -14
- datahub/api/entities/structuredproperties/structuredproperties.py +23 -16
- datahub/cli/check_cli.py +2 -2
- datahub/cli/config_utils.py +3 -3
- datahub/cli/lite_cli.py +9 -7
- datahub/cli/migrate.py +4 -4
- datahub/cli/quickstart_versioning.py +3 -3
- datahub/cli/specific/group_cli.py +1 -1
- datahub/cli/specific/structuredproperties_cli.py +1 -1
- datahub/cli/specific/user_cli.py +1 -1
- datahub/configuration/common.py +14 -2
- datahub/configuration/connection_resolver.py +2 -2
- datahub/configuration/git.py +47 -30
- datahub/configuration/import_resolver.py +2 -2
- datahub/configuration/kafka.py +4 -3
- datahub/configuration/time_window_config.py +26 -26
- datahub/configuration/validate_field_deprecation.py +2 -2
- datahub/configuration/validate_field_removal.py +2 -2
- datahub/configuration/validate_field_rename.py +2 -2
- datahub/configuration/validate_multiline_string.py +2 -1
- datahub/emitter/kafka_emitter.py +3 -1
- datahub/emitter/rest_emitter.py +2 -4
- datahub/ingestion/api/decorators.py +1 -1
- datahub/ingestion/api/report.py +1 -1
- datahub/ingestion/api/sink.py +1 -1
- datahub/ingestion/api/source.py +1 -1
- datahub/ingestion/glossary/datahub_classifier.py +11 -8
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +1 -1
- datahub/ingestion/reporting/file_reporter.py +5 -4
- datahub/ingestion/run/pipeline.py +6 -6
- datahub/ingestion/run/pipeline_config.py +12 -14
- datahub/ingestion/run/sink_callback.py +1 -1
- datahub/ingestion/sink/datahub_rest.py +6 -4
- datahub/ingestion/source/abs/config.py +19 -19
- datahub/ingestion/source/abs/datalake_profiler_config.py +11 -13
- datahub/ingestion/source/abs/source.py +2 -2
- datahub/ingestion/source/aws/aws_common.py +1 -1
- datahub/ingestion/source/aws/glue.py +6 -4
- datahub/ingestion/source/aws/sagemaker.py +1 -1
- datahub/ingestion/source/azure/azure_common.py +8 -12
- datahub/ingestion/source/bigquery_v2/bigquery.py +1 -1
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +43 -30
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +1 -1
- datahub/ingestion/source/cassandra/cassandra.py +1 -1
- datahub/ingestion/source/common/gcp_credentials_config.py +10 -10
- datahub/ingestion/source/data_lake_common/path_spec.py +85 -89
- datahub/ingestion/source/datahub/config.py +8 -8
- datahub/ingestion/source/datahub/datahub_source.py +1 -1
- datahub/ingestion/source/dbt/dbt_cloud.py +9 -3
- datahub/ingestion/source/dbt/dbt_common.py +39 -37
- datahub/ingestion/source/dbt/dbt_core.py +10 -12
- datahub/ingestion/source/debug/datahub_debug.py +1 -1
- datahub/ingestion/source/delta_lake/config.py +6 -4
- datahub/ingestion/source/dremio/dremio_config.py +10 -6
- datahub/ingestion/source/dremio/dremio_source.py +15 -15
- datahub/ingestion/source/dynamodb/dynamodb.py +1 -1
- datahub/ingestion/source/elastic_search.py +4 -3
- datahub/ingestion/source/excel/source.py +1 -1
- datahub/ingestion/source/feast.py +1 -1
- datahub/ingestion/source/file.py +5 -4
- datahub/ingestion/source/fivetran/config.py +17 -16
- datahub/ingestion/source/fivetran/fivetran.py +2 -2
- datahub/ingestion/source/gc/datahub_gc.py +1 -1
- datahub/ingestion/source/gcs/gcs_source.py +8 -10
- datahub/ingestion/source/ge_profiling_config.py +8 -5
- datahub/ingestion/source/grafana/grafana_api.py +2 -2
- datahub/ingestion/source/grafana/grafana_config.py +4 -3
- datahub/ingestion/source/grafana/grafana_source.py +1 -1
- datahub/ingestion/source/grafana/models.py +23 -5
- datahub/ingestion/source/hex/api.py +7 -5
- datahub/ingestion/source/hex/hex.py +4 -3
- datahub/ingestion/source/iceberg/iceberg.py +1 -1
- datahub/ingestion/source/iceberg/iceberg_common.py +5 -3
- datahub/ingestion/source/identity/azure_ad.py +1 -1
- datahub/ingestion/source/identity/okta.py +10 -10
- datahub/ingestion/source/kafka/kafka.py +1 -1
- datahub/ingestion/source/ldap.py +1 -1
- datahub/ingestion/source/looker/looker_common.py +7 -5
- datahub/ingestion/source/looker/looker_config.py +21 -20
- datahub/ingestion/source/looker/lookml_config.py +47 -47
- datahub/ingestion/source/metabase.py +8 -8
- datahub/ingestion/source/metadata/business_glossary.py +2 -2
- datahub/ingestion/source/metadata/lineage.py +13 -8
- datahub/ingestion/source/mlflow.py +1 -1
- datahub/ingestion/source/mode.py +6 -4
- datahub/ingestion/source/mongodb.py +4 -3
- datahub/ingestion/source/neo4j/neo4j_source.py +1 -1
- datahub/ingestion/source/nifi.py +17 -23
- datahub/ingestion/source/openapi.py +6 -8
- datahub/ingestion/source/powerbi/config.py +33 -32
- datahub/ingestion/source/powerbi/dataplatform_instance_resolver.py +2 -2
- datahub/ingestion/source/powerbi/powerbi.py +1 -1
- datahub/ingestion/source/powerbi_report_server/report_server.py +2 -2
- datahub/ingestion/source/powerbi_report_server/report_server_domain.py +8 -6
- datahub/ingestion/source/preset.py +8 -8
- datahub/ingestion/source/pulsar.py +1 -1
- datahub/ingestion/source/qlik_sense/data_classes.py +15 -8
- datahub/ingestion/source/qlik_sense/qlik_api.py +7 -7
- datahub/ingestion/source/qlik_sense/qlik_sense.py +1 -1
- datahub/ingestion/source/redshift/config.py +18 -20
- datahub/ingestion/source/redshift/redshift.py +2 -2
- datahub/ingestion/source/redshift/usage.py +23 -3
- datahub/ingestion/source/s3/config.py +83 -62
- datahub/ingestion/source/s3/datalake_profiler_config.py +11 -13
- datahub/ingestion/source/s3/source.py +8 -5
- datahub/ingestion/source/sac/sac.py +5 -4
- datahub/ingestion/source/salesforce.py +3 -2
- datahub/ingestion/source/schema/json_schema.py +2 -2
- datahub/ingestion/source/sigma/data_classes.py +3 -2
- datahub/ingestion/source/sigma/sigma.py +1 -1
- datahub/ingestion/source/sigma/sigma_api.py +7 -7
- datahub/ingestion/source/slack/slack.py +1 -1
- datahub/ingestion/source/snaplogic/snaplogic.py +1 -1
- datahub/ingestion/source/snowflake/snowflake_assertion.py +1 -1
- datahub/ingestion/source/snowflake/snowflake_config.py +35 -31
- datahub/ingestion/source/snowflake/snowflake_connection.py +35 -13
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +3 -3
- datahub/ingestion/source/snowflake/snowflake_queries.py +1 -1
- datahub/ingestion/source/sql/athena.py +1 -1
- datahub/ingestion/source/sql/clickhouse.py +4 -2
- datahub/ingestion/source/sql/cockroachdb.py +1 -1
- datahub/ingestion/source/sql/druid.py +1 -1
- datahub/ingestion/source/sql/hana.py +1 -1
- datahub/ingestion/source/sql/hive.py +7 -5
- datahub/ingestion/source/sql/hive_metastore.py +1 -1
- datahub/ingestion/source/sql/mssql/source.py +13 -6
- datahub/ingestion/source/sql/mysql.py +1 -1
- datahub/ingestion/source/sql/oracle.py +17 -10
- datahub/ingestion/source/sql/postgres.py +2 -2
- datahub/ingestion/source/sql/presto.py +1 -1
- datahub/ingestion/source/sql/sql_config.py +8 -9
- datahub/ingestion/source/sql/sql_generic.py +1 -1
- datahub/ingestion/source/sql/teradata.py +1 -1
- datahub/ingestion/source/sql/trino.py +1 -1
- datahub/ingestion/source/sql/vertica.py +5 -4
- datahub/ingestion/source/sql_queries.py +11 -8
- datahub/ingestion/source/state/checkpoint.py +2 -2
- datahub/ingestion/source/state/entity_removal_state.py +2 -1
- datahub/ingestion/source/state/stateful_ingestion_base.py +55 -45
- datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +1 -1
- datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py +1 -1
- datahub/ingestion/source/superset.py +9 -9
- datahub/ingestion/source/tableau/tableau.py +14 -16
- datahub/ingestion/source/unity/azure_auth_config.py +15 -0
- datahub/ingestion/source/unity/config.py +51 -34
- datahub/ingestion/source/unity/connection.py +7 -1
- datahub/ingestion/source/unity/connection_test.py +1 -1
- datahub/ingestion/source/unity/proxy.py +216 -7
- datahub/ingestion/source/unity/proxy_types.py +91 -0
- datahub/ingestion/source/unity/source.py +29 -3
- datahub/ingestion/source/usage/clickhouse_usage.py +1 -1
- datahub/ingestion/source/usage/starburst_trino_usage.py +1 -1
- datahub/ingestion/source/usage/usage_common.py +5 -3
- datahub/ingestion/source_config/csv_enricher.py +7 -6
- datahub/ingestion/source_config/operation_config.py +7 -4
- datahub/ingestion/source_config/pulsar.py +11 -15
- datahub/ingestion/transformer/add_dataset_browse_path.py +1 -1
- datahub/ingestion/transformer/add_dataset_dataproduct.py +6 -5
- datahub/ingestion/transformer/add_dataset_ownership.py +3 -3
- datahub/ingestion/transformer/add_dataset_properties.py +2 -2
- datahub/ingestion/transformer/add_dataset_schema_tags.py +2 -2
- datahub/ingestion/transformer/add_dataset_schema_terms.py +2 -2
- datahub/ingestion/transformer/add_dataset_tags.py +3 -3
- datahub/ingestion/transformer/add_dataset_terms.py +3 -3
- datahub/ingestion/transformer/dataset_domain.py +3 -3
- datahub/ingestion/transformer/dataset_domain_based_on_tags.py +1 -1
- datahub/ingestion/transformer/extract_dataset_tags.py +1 -1
- datahub/ingestion/transformer/extract_ownership_from_tags.py +1 -1
- datahub/ingestion/transformer/mark_dataset_status.py +1 -1
- datahub/ingestion/transformer/pattern_cleanup_dataset_usage_user.py +1 -1
- datahub/ingestion/transformer/pattern_cleanup_ownership.py +1 -1
- datahub/ingestion/transformer/remove_dataset_ownership.py +1 -1
- datahub/ingestion/transformer/replace_external_url.py +2 -2
- datahub/ingestion/transformer/set_browse_path.py +1 -1
- datahub/ingestion/transformer/tags_to_terms.py +1 -1
- datahub/lite/duckdb_lite.py +1 -1
- datahub/lite/lite_util.py +2 -2
- datahub/metadata/schema.avsc +7 -2
- datahub/metadata/schemas/QuerySubjects.avsc +1 -1
- datahub/metadata/schemas/StructuredPropertyDefinition.avsc +6 -1
- datahub/sdk/__init__.py +1 -0
- datahub/sdk/_all_entities.py +2 -0
- datahub/sdk/search_filters.py +68 -40
- datahub/sdk/tag.py +112 -0
- datahub/secret/datahub_secret_store.py +7 -4
- datahub/secret/file_secret_store.py +1 -1
- datahub/sql_parsing/sqlglot_lineage.py +5 -2
- datahub/testing/check_sql_parser_result.py +2 -2
- datahub/utilities/ingest_utils.py +1 -1
- {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1.dist-info}/top_level.txt +0 -0
datahub/sdk/tag.py
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Optional, Type
|
|
4
|
+
|
|
5
|
+
from typing_extensions import Self
|
|
6
|
+
|
|
7
|
+
import datahub.metadata.schema_classes as models
|
|
8
|
+
from datahub.metadata.urns import TagUrn, Urn
|
|
9
|
+
from datahub.sdk._shared import (
|
|
10
|
+
HasOwnership,
|
|
11
|
+
OwnersInputType,
|
|
12
|
+
)
|
|
13
|
+
from datahub.sdk.entity import Entity, ExtraAspectsType
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class Tag(
|
|
17
|
+
HasOwnership,
|
|
18
|
+
Entity,
|
|
19
|
+
):
|
|
20
|
+
__slots__ = ()
|
|
21
|
+
|
|
22
|
+
@classmethod
|
|
23
|
+
def get_urn_type(cls) -> Type[TagUrn]:
|
|
24
|
+
return TagUrn
|
|
25
|
+
|
|
26
|
+
def __init__(
|
|
27
|
+
self,
|
|
28
|
+
*,
|
|
29
|
+
# Identity.
|
|
30
|
+
name: str,
|
|
31
|
+
# Tag properties.
|
|
32
|
+
display_name: Optional[str] = None,
|
|
33
|
+
description: Optional[str] = None,
|
|
34
|
+
color: Optional[str] = None,
|
|
35
|
+
# Standard aspects.
|
|
36
|
+
owners: Optional[OwnersInputType] = None,
|
|
37
|
+
extra_aspects: ExtraAspectsType = None,
|
|
38
|
+
):
|
|
39
|
+
"""Initialize a new Tag instance."""
|
|
40
|
+
urn = TagUrn(name=name)
|
|
41
|
+
super().__init__(urn)
|
|
42
|
+
self._set_extra_aspects(extra_aspects)
|
|
43
|
+
|
|
44
|
+
self._ensure_tag_props(
|
|
45
|
+
display_name=display_name or name,
|
|
46
|
+
description=description,
|
|
47
|
+
color=color,
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
if owners is not None:
|
|
51
|
+
self.set_owners(owners)
|
|
52
|
+
|
|
53
|
+
@classmethod
|
|
54
|
+
def _new_from_graph(cls, urn: Urn, current_aspects: models.AspectBag) -> Self:
|
|
55
|
+
assert isinstance(urn, TagUrn)
|
|
56
|
+
entity = cls(name=urn.name)
|
|
57
|
+
return entity._init_from_graph(current_aspects)
|
|
58
|
+
|
|
59
|
+
@property
|
|
60
|
+
def urn(self) -> TagUrn:
|
|
61
|
+
assert isinstance(self._urn, TagUrn)
|
|
62
|
+
return self._urn
|
|
63
|
+
|
|
64
|
+
def _ensure_tag_props(
|
|
65
|
+
self,
|
|
66
|
+
*,
|
|
67
|
+
display_name: Optional[str] = None,
|
|
68
|
+
description: Optional[str] = None,
|
|
69
|
+
color: Optional[str] = None,
|
|
70
|
+
) -> models.TagPropertiesClass:
|
|
71
|
+
existing_props = self._get_aspect(models.TagPropertiesClass)
|
|
72
|
+
if existing_props is not None:
|
|
73
|
+
if display_name is not None:
|
|
74
|
+
existing_props.name = display_name
|
|
75
|
+
if description is not None:
|
|
76
|
+
existing_props.description = description
|
|
77
|
+
if color is not None:
|
|
78
|
+
existing_props.colorHex = color
|
|
79
|
+
return existing_props
|
|
80
|
+
|
|
81
|
+
return self._setdefault_aspect(
|
|
82
|
+
models.TagPropertiesClass(
|
|
83
|
+
name=display_name or self.urn.name,
|
|
84
|
+
description=description,
|
|
85
|
+
colorHex=color,
|
|
86
|
+
)
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
@property
|
|
90
|
+
def name(self) -> str:
|
|
91
|
+
return self.urn.name
|
|
92
|
+
|
|
93
|
+
@property
|
|
94
|
+
def display_name(self) -> str:
|
|
95
|
+
return self._ensure_tag_props().name
|
|
96
|
+
|
|
97
|
+
def set_display_name(self, display_name: str) -> None:
|
|
98
|
+
self._ensure_tag_props(display_name=display_name)
|
|
99
|
+
|
|
100
|
+
@property
|
|
101
|
+
def description(self) -> Optional[str]:
|
|
102
|
+
return self._ensure_tag_props().description
|
|
103
|
+
|
|
104
|
+
def set_description(self, description: str) -> None:
|
|
105
|
+
self._ensure_tag_props(description=description)
|
|
106
|
+
|
|
107
|
+
@property
|
|
108
|
+
def color(self) -> Optional[str]:
|
|
109
|
+
return self._ensure_tag_props().colorHex
|
|
110
|
+
|
|
111
|
+
def set_color(self, color: str) -> None:
|
|
112
|
+
self._ensure_tag_props(color=color)
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from typing import Any, Dict, List, Optional, Union
|
|
3
3
|
|
|
4
|
-
from pydantic import BaseModel,
|
|
4
|
+
from pydantic import BaseModel, field_validator
|
|
5
5
|
|
|
6
6
|
from datahub.ingestion.graph.client import DataHubGraph
|
|
7
7
|
from datahub.ingestion.graph.config import DatahubClientConfig
|
|
@@ -18,8 +18,11 @@ class DataHubSecretStoreConfig(BaseModel):
|
|
|
18
18
|
class Config:
|
|
19
19
|
arbitrary_types_allowed = True
|
|
20
20
|
|
|
21
|
-
@
|
|
22
|
-
|
|
21
|
+
@field_validator("graph_client", mode="after")
|
|
22
|
+
@classmethod
|
|
23
|
+
def check_graph_connection(
|
|
24
|
+
cls, v: Optional[DataHubGraph]
|
|
25
|
+
) -> Optional[DataHubGraph]:
|
|
23
26
|
if v is not None:
|
|
24
27
|
v.test_connection()
|
|
25
28
|
return v
|
|
@@ -63,7 +66,7 @@ class DataHubSecretStore(SecretStore):
|
|
|
63
66
|
|
|
64
67
|
@classmethod
|
|
65
68
|
def create(cls, config: Any) -> "DataHubSecretStore":
|
|
66
|
-
config = DataHubSecretStoreConfig.
|
|
69
|
+
config = DataHubSecretStoreConfig.model_validate(config)
|
|
67
70
|
return cls(config)
|
|
68
71
|
|
|
69
72
|
def close(self) -> None:
|
|
@@ -28,6 +28,7 @@ import sqlglot.optimizer.optimizer
|
|
|
28
28
|
import sqlglot.optimizer.qualify
|
|
29
29
|
import sqlglot.optimizer.qualify_columns
|
|
30
30
|
import sqlglot.optimizer.unnest_subqueries
|
|
31
|
+
from pydantic import field_validator
|
|
31
32
|
|
|
32
33
|
from datahub.cli.env_utils import get_boolean_env_variable
|
|
33
34
|
from datahub.ingestion.graph.client import DataHubGraph
|
|
@@ -141,7 +142,8 @@ class DownstreamColumnRef(_ParserBaseModel):
|
|
|
141
142
|
column_type: Optional[SchemaFieldDataTypeClass] = None
|
|
142
143
|
native_column_type: Optional[str] = None
|
|
143
144
|
|
|
144
|
-
@
|
|
145
|
+
@field_validator("column_type", mode="before")
|
|
146
|
+
@classmethod
|
|
145
147
|
def _load_column_type(
|
|
146
148
|
cls, v: Optional[Union[dict, SchemaFieldDataTypeClass]]
|
|
147
149
|
) -> Optional[SchemaFieldDataTypeClass]:
|
|
@@ -215,7 +217,8 @@ class SqlParsingDebugInfo(_ParserBaseModel):
|
|
|
215
217
|
def error(self) -> Optional[Exception]:
|
|
216
218
|
return self.table_error or self.column_error
|
|
217
219
|
|
|
218
|
-
@
|
|
220
|
+
@field_validator("table_error", "column_error", mode="before")
|
|
221
|
+
@classmethod
|
|
219
222
|
def remove_variables_from_error(cls, v: Optional[Exception]) -> Optional[Exception]:
|
|
220
223
|
if v and v.__traceback__:
|
|
221
224
|
# Remove local variables from the traceback to avoid memory leaks.
|
|
@@ -60,8 +60,8 @@ def assert_sql_result_with_resolver(
|
|
|
60
60
|
expected = SqlParsingResult.parse_raw(expected_file.read_text())
|
|
61
61
|
|
|
62
62
|
full_diff = deepdiff.DeepDiff(
|
|
63
|
-
expected.
|
|
64
|
-
res.
|
|
63
|
+
expected.model_dump(),
|
|
64
|
+
res.model_dump(),
|
|
65
65
|
exclude_regex_paths=[
|
|
66
66
|
r"root.column_lineage\[\d+\].logic",
|
|
67
67
|
],
|
|
@@ -48,7 +48,7 @@ def deploy_source_vars(
|
|
|
48
48
|
|
|
49
49
|
deploy_options_raw = pipeline_config.pop("deployment", None)
|
|
50
50
|
if deploy_options_raw is not None:
|
|
51
|
-
deploy_options = DeployOptions.
|
|
51
|
+
deploy_options = DeployOptions.model_validate(deploy_options_raw)
|
|
52
52
|
|
|
53
53
|
if name:
|
|
54
54
|
logger.info(f"Overriding deployment name {deploy_options.name} with {name}")
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|