acryl-datahub 0.15.0.2rc6__py3-none-any.whl → 0.15.0.2rc8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-0.15.0.2rc6.dist-info → acryl_datahub-0.15.0.2rc8.dist-info}/METADATA +2513 -2521
- {acryl_datahub-0.15.0.2rc6.dist-info → acryl_datahub-0.15.0.2rc8.dist-info}/RECORD +168 -168
- datahub/__init__.py +1 -1
- datahub/api/entities/assertion/assertion_operator.py +3 -5
- datahub/api/entities/corpgroup/corpgroup.py +1 -1
- datahub/api/entities/datacontract/assertion_operator.py +3 -5
- datahub/api/entities/dataproduct/dataproduct.py +4 -4
- datahub/api/entities/dataset/dataset.py +2 -1
- datahub/api/entities/structuredproperties/structuredproperties.py +6 -6
- datahub/cli/cli_utils.py +1 -1
- datahub/cli/docker_cli.py +6 -6
- datahub/cli/ingest_cli.py +25 -15
- datahub/cli/lite_cli.py +2 -2
- datahub/cli/migrate.py +3 -3
- datahub/cli/specific/assertions_cli.py +3 -3
- datahub/cli/timeline_cli.py +1 -1
- datahub/configuration/common.py +1 -2
- datahub/configuration/config_loader.py +73 -50
- datahub/configuration/git.py +2 -2
- datahub/configuration/time_window_config.py +10 -5
- datahub/emitter/mce_builder.py +4 -8
- datahub/emitter/mcp_patch_builder.py +1 -2
- datahub/entrypoints.py +6 -0
- datahub/ingestion/api/incremental_lineage_helper.py +2 -8
- datahub/ingestion/api/report.py +1 -2
- datahub/ingestion/api/source_helpers.py +1 -1
- datahub/ingestion/extractor/json_schema_util.py +3 -3
- datahub/ingestion/extractor/schema_util.py +3 -5
- datahub/ingestion/fs/s3_fs.py +3 -3
- datahub/ingestion/glossary/datahub_classifier.py +6 -4
- datahub/ingestion/graph/client.py +4 -6
- datahub/ingestion/run/pipeline.py +8 -7
- datahub/ingestion/run/pipeline_config.py +3 -3
- datahub/ingestion/source/abs/datalake_profiler_config.py +3 -3
- datahub/ingestion/source/abs/source.py +19 -8
- datahub/ingestion/source/aws/glue.py +11 -11
- datahub/ingestion/source/aws/s3_boto_utils.py +3 -3
- datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
- datahub/ingestion/source/aws/sagemaker_processors/models.py +2 -2
- datahub/ingestion/source/bigquery_v2/bigquery.py +3 -3
- datahub/ingestion/source/bigquery_v2/bigquery_audit.py +3 -3
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +6 -6
- datahub/ingestion/source/bigquery_v2/bigquery_platform_resource_helper.py +8 -4
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +15 -9
- datahub/ingestion/source/bigquery_v2/lineage.py +9 -9
- datahub/ingestion/source/bigquery_v2/queries.py +1 -3
- datahub/ingestion/source/bigquery_v2/queries_extractor.py +3 -3
- datahub/ingestion/source/bigquery_v2/usage.py +3 -3
- datahub/ingestion/source/cassandra/cassandra.py +0 -1
- datahub/ingestion/source/cassandra/cassandra_utils.py +4 -4
- datahub/ingestion/source/confluent_schema_registry.py +6 -6
- datahub/ingestion/source/csv_enricher.py +29 -29
- datahub/ingestion/source/datahub/config.py +4 -0
- datahub/ingestion/source/datahub/datahub_database_reader.py +4 -2
- datahub/ingestion/source/dbt/dbt_cloud.py +13 -13
- datahub/ingestion/source/dbt/dbt_common.py +9 -7
- datahub/ingestion/source/dremio/dremio_api.py +4 -4
- datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +3 -3
- datahub/ingestion/source/elastic_search.py +4 -4
- datahub/ingestion/source/gc/datahub_gc.py +1 -0
- datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +17 -5
- datahub/ingestion/source/gcs/gcs_source.py +3 -2
- datahub/ingestion/source/ge_data_profiler.py +2 -5
- datahub/ingestion/source/ge_profiling_config.py +3 -3
- datahub/ingestion/source/iceberg/iceberg.py +3 -3
- datahub/ingestion/source/identity/azure_ad.py +3 -3
- datahub/ingestion/source/identity/okta.py +3 -3
- datahub/ingestion/source/kafka/kafka.py +11 -9
- datahub/ingestion/source/kafka_connect/kafka_connect.py +2 -3
- datahub/ingestion/source/kafka_connect/sink_connectors.py +3 -3
- datahub/ingestion/source/kafka_connect/source_connectors.py +3 -3
- datahub/ingestion/source/looker/looker_common.py +19 -19
- datahub/ingestion/source/looker/looker_config.py +3 -3
- datahub/ingestion/source/looker/looker_source.py +25 -25
- datahub/ingestion/source/looker/looker_template_language.py +3 -3
- datahub/ingestion/source/looker/looker_usage.py +5 -7
- datahub/ingestion/source/looker/lookml_concept_context.py +6 -6
- datahub/ingestion/source/looker/lookml_source.py +13 -15
- datahub/ingestion/source/looker/view_upstream.py +5 -5
- datahub/ingestion/source/mlflow.py +4 -4
- datahub/ingestion/source/mode.py +5 -5
- datahub/ingestion/source/mongodb.py +6 -4
- datahub/ingestion/source/neo4j/neo4j_source.py +1 -1
- datahub/ingestion/source/nifi.py +24 -26
- datahub/ingestion/source/openapi.py +9 -9
- datahub/ingestion/source/powerbi/config.py +12 -12
- datahub/ingestion/source/powerbi/m_query/parser.py +11 -11
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py +26 -24
- datahub/ingestion/source/powerbi/m_query/resolver.py +13 -13
- datahub/ingestion/source/powerbi/powerbi.py +6 -6
- datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +9 -9
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +7 -7
- datahub/ingestion/source/qlik_sense/qlik_api.py +1 -1
- datahub/ingestion/source/redshift/config.py +3 -3
- datahub/ingestion/source/redshift/redshift.py +12 -12
- datahub/ingestion/source/redshift/usage.py +8 -8
- datahub/ingestion/source/s3/datalake_profiler_config.py +3 -3
- datahub/ingestion/source/s3/source.py +1 -1
- datahub/ingestion/source/salesforce.py +26 -25
- datahub/ingestion/source/schema/json_schema.py +1 -1
- datahub/ingestion/source/sigma/sigma.py +3 -3
- datahub/ingestion/source/sigma/sigma_api.py +12 -10
- datahub/ingestion/source/snowflake/snowflake_config.py +9 -7
- datahub/ingestion/source/snowflake/snowflake_connection.py +6 -6
- datahub/ingestion/source/snowflake/snowflake_queries.py +2 -2
- datahub/ingestion/source/snowflake/snowflake_schema.py +3 -3
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +6 -6
- datahub/ingestion/source/snowflake/snowflake_tag.py +7 -7
- datahub/ingestion/source/snowflake/snowflake_usage_v2.py +3 -3
- datahub/ingestion/source/snowflake/snowflake_utils.py +1 -2
- datahub/ingestion/source/snowflake/snowflake_v2.py +13 -4
- datahub/ingestion/source/sql/athena.py +1 -3
- datahub/ingestion/source/sql/clickhouse.py +8 -14
- datahub/ingestion/source/sql/oracle.py +1 -3
- datahub/ingestion/source/sql/sql_generic_profiler.py +1 -2
- datahub/ingestion/source/sql/sql_types.py +0 -1
- datahub/ingestion/source/sql/teradata.py +16 -3
- datahub/ingestion/source/state/profiling_state_handler.py +3 -3
- datahub/ingestion/source/state/redundant_run_skip_handler.py +5 -7
- datahub/ingestion/source/state/stale_entity_removal_handler.py +3 -3
- datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +9 -9
- datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py +1 -1
- datahub/ingestion/source/tableau/tableau.py +245 -101
- datahub/ingestion/source/tableau/tableau_common.py +5 -2
- datahub/ingestion/source/unity/config.py +3 -1
- datahub/ingestion/source/unity/proxy.py +1 -1
- datahub/ingestion/source/unity/source.py +3 -3
- datahub/ingestion/source/unity/usage.py +3 -1
- datahub/ingestion/source/usage/clickhouse_usage.py +4 -4
- datahub/ingestion/source/usage/starburst_trino_usage.py +3 -3
- datahub/ingestion/source/usage/usage_common.py +1 -1
- datahub/ingestion/transformer/add_dataset_dataproduct.py +4 -4
- datahub/ingestion/transformer/add_dataset_properties.py +3 -3
- datahub/ingestion/transformer/add_dataset_schema_tags.py +3 -3
- datahub/ingestion/transformer/add_dataset_schema_terms.py +3 -3
- datahub/ingestion/transformer/dataset_domain_based_on_tags.py +4 -4
- datahub/ingestion/transformer/extract_ownership_from_tags.py +3 -3
- datahub/ingestion/transformer/tags_to_terms.py +7 -7
- datahub/integrations/assertion/snowflake/compiler.py +10 -10
- datahub/lite/duckdb_lite.py +12 -10
- datahub/metadata/_schema_classes.py +1 -1
- datahub/metadata/schema.avsc +6 -2
- datahub/metadata/schemas/DataProcessInstanceInput.avsc +4 -2
- datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -0
- datahub/secret/datahub_secrets_client.py +12 -21
- datahub/secret/secret_common.py +14 -8
- datahub/specific/aspect_helpers/custom_properties.py +1 -2
- datahub/sql_parsing/schema_resolver.py +5 -10
- datahub/sql_parsing/sql_parsing_aggregator.py +18 -16
- datahub/sql_parsing/sqlglot_lineage.py +3 -3
- datahub/sql_parsing/sqlglot_utils.py +1 -1
- datahub/telemetry/stats.py +1 -2
- datahub/testing/mcp_diff.py +1 -1
- datahub/utilities/file_backed_collections.py +10 -10
- datahub/utilities/hive_schema_to_avro.py +2 -2
- datahub/utilities/logging_manager.py +2 -2
- datahub/utilities/lossy_collections.py +3 -3
- datahub/utilities/mapping.py +3 -3
- datahub/utilities/memory_footprint.py +3 -2
- datahub/utilities/serialized_lru_cache.py +3 -1
- datahub/utilities/sqlalchemy_query_combiner.py +6 -6
- datahub/utilities/sqllineage_patch.py +1 -1
- datahub/utilities/stats_collections.py +3 -1
- datahub/utilities/urns/_urn_base.py +28 -5
- datahub/utilities/urns/urn_iter.py +2 -2
- {acryl_datahub-0.15.0.2rc6.dist-info → acryl_datahub-0.15.0.2rc8.dist-info}/WHEEL +0 -0
- {acryl_datahub-0.15.0.2rc6.dist-info → acryl_datahub-0.15.0.2rc8.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-0.15.0.2rc6.dist-info → acryl_datahub-0.15.0.2rc8.dist-info}/top_level.txt +0 -0
|
@@ -151,9 +151,9 @@ class LossyDict(Dict[_KT, _VT], Generic[_KT, _VT]):
|
|
|
151
151
|
def as_obj(self) -> Dict[Union[_KT, str], Union[_VT, str]]:
|
|
152
152
|
base_dict: Dict[Union[_KT, str], Union[_VT, str]] = super().copy() # type: ignore
|
|
153
153
|
if self.sampled:
|
|
154
|
-
base_dict[
|
|
155
|
-
"sampled"
|
|
156
|
-
|
|
154
|
+
base_dict["sampled"] = (
|
|
155
|
+
f"{len(self.keys())} sampled of at most {self.total_key_count()} entries."
|
|
156
|
+
)
|
|
157
157
|
return base_dict
|
|
158
158
|
|
|
159
159
|
def total_key_count(self) -> int:
|
datahub/utilities/mapping.py
CHANGED
|
@@ -349,9 +349,9 @@ class OperationProcessor:
|
|
|
349
349
|
elements=[institutional_memory_element]
|
|
350
350
|
)
|
|
351
351
|
|
|
352
|
-
aspect_map[
|
|
353
|
-
|
|
354
|
-
|
|
352
|
+
aspect_map[Constants.ADD_DOC_LINK_OPERATION] = (
|
|
353
|
+
institutional_memory_aspect
|
|
354
|
+
)
|
|
355
355
|
else:
|
|
356
356
|
raise Exception(
|
|
357
357
|
f"Expected 1 item of type list for the documentation_link meta_mapping config,"
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from collections import deque
|
|
2
2
|
from itertools import chain
|
|
3
3
|
from sys import getsizeof
|
|
4
|
-
from typing import Any,
|
|
4
|
+
from typing import Any, Iterator
|
|
5
5
|
|
|
6
6
|
|
|
7
7
|
def total_size(o: Any, handlers: Any = {}) -> int:
|
|
@@ -15,7 +15,8 @@ def total_size(o: Any, handlers: Any = {}) -> int:
|
|
|
15
15
|
Based on https://github.com/ActiveState/recipe-577504-compute-mem-footprint/blob/master/recipe.py
|
|
16
16
|
"""
|
|
17
17
|
|
|
18
|
-
dict_handler:
|
|
18
|
+
def dict_handler(d: dict) -> Iterator[Any]:
|
|
19
|
+
return chain.from_iterable(d.items())
|
|
19
20
|
|
|
20
21
|
all_handlers = {
|
|
21
22
|
tuple: iter,
|
|
@@ -41,7 +41,9 @@ def serialized_lru_cache(
|
|
|
41
41
|
def wrapper(*args: _F.args, **kwargs: _F.kwargs) -> _T:
|
|
42
42
|
# We need a type ignore here because there's no way for us to require that
|
|
43
43
|
# the args and kwargs are hashable while using ParamSpec.
|
|
44
|
-
key: _Key = cachetools.keys.hashkey(
|
|
44
|
+
key: _Key = cachetools.keys.hashkey(
|
|
45
|
+
*args, **{k: v for k, v in kwargs.items() if "cache_exclude" not in k}
|
|
46
|
+
) # type: ignore
|
|
45
47
|
|
|
46
48
|
with cache_lock:
|
|
47
49
|
if key in cache:
|
|
@@ -160,12 +160,12 @@ class SQLAlchemyQueryCombiner:
|
|
|
160
160
|
_greenlets_by_thread_lock: threading.Lock = dataclasses.field(
|
|
161
161
|
default_factory=lambda: threading.Lock()
|
|
162
162
|
)
|
|
163
|
-
_queries_by_thread: Dict[
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
_greenlets_by_thread: Dict[
|
|
167
|
-
|
|
168
|
-
|
|
163
|
+
_queries_by_thread: Dict[greenlet.greenlet, Dict[str, _QueryFuture]] = (
|
|
164
|
+
dataclasses.field(default_factory=lambda: collections.defaultdict(dict))
|
|
165
|
+
)
|
|
166
|
+
_greenlets_by_thread: Dict[greenlet.greenlet, Set[greenlet.greenlet]] = (
|
|
167
|
+
dataclasses.field(default_factory=lambda: collections.defaultdict(set))
|
|
168
|
+
)
|
|
169
169
|
|
|
170
170
|
@staticmethod
|
|
171
171
|
def _generate_sql_safe_identifier() -> str:
|
|
@@ -8,7 +8,7 @@ from sqllineage.utils.constant import EdgeType
|
|
|
8
8
|
|
|
9
9
|
# Patch based on sqllineage v1.3.3
|
|
10
10
|
def end_of_query_cleanup_patch(self, holder: SubQueryLineageHolder) -> None: # type: ignore
|
|
11
|
-
for
|
|
11
|
+
for tbl in self.tables:
|
|
12
12
|
holder.add_read(tbl)
|
|
13
13
|
self.union_barriers.append((len(self.columns), len(self.tables)))
|
|
14
14
|
|
|
@@ -48,7 +48,9 @@ class TopKDict(DefaultDict[_KT, _VT]):
|
|
|
48
48
|
total_value: Union[_VT, str] = sum(trimmed_dict.values()) # type: ignore
|
|
49
49
|
except Exception:
|
|
50
50
|
total_value = ""
|
|
51
|
-
trimmed_dict[f"... top {self.top_k} of total {len(self)} entries"] =
|
|
51
|
+
trimmed_dict[f"... top {self.top_k} of total {len(self)} entries"] = ( # type: ignore
|
|
52
|
+
total_value # type: ignore
|
|
53
|
+
)
|
|
52
54
|
return trimmed_dict
|
|
53
55
|
|
|
54
56
|
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import functools
|
|
2
2
|
import urllib.parse
|
|
3
3
|
from abc import abstractmethod
|
|
4
|
-
from typing import ClassVar, Dict, List, Optional, Type
|
|
4
|
+
from typing import ClassVar, Dict, List, Optional, Type, Union
|
|
5
5
|
|
|
6
6
|
from deprecated import deprecated
|
|
7
7
|
from typing_extensions import Self
|
|
@@ -86,12 +86,24 @@ class Urn:
|
|
|
86
86
|
return self._entity_ids
|
|
87
87
|
|
|
88
88
|
@classmethod
|
|
89
|
-
def from_string(cls, urn_str: str) -> Self:
|
|
90
|
-
"""
|
|
91
|
-
|
|
89
|
+
def from_string(cls, urn_str: Union[str, "Urn"], /) -> Self:
|
|
90
|
+
"""Create an Urn from its string representation.
|
|
91
|
+
|
|
92
|
+
When called against the base Urn class, this method will return a more specific Urn type where possible.
|
|
93
|
+
|
|
94
|
+
>>> from datahub.metadata.urns import DatasetUrn, Urn
|
|
95
|
+
>>> urn_str = 'urn:li:dataset:(urn:li:dataPlatform:snowflake,my_db.my_schema.my_table,PROD)'
|
|
96
|
+
>>> urn = Urn.from_string(urn_str)
|
|
97
|
+
>>> assert isinstance(urn, DatasetUrn)
|
|
98
|
+
|
|
99
|
+
When called against a specific Urn type (e.g. DatasetUrn.from_string), this method can
|
|
100
|
+
also be used for type narrowing.
|
|
101
|
+
|
|
102
|
+
>>> urn_str = 'urn:li:dataset:(urn:li:dataPlatform:snowflake,my_db.my_schema.my_table,PROD)'
|
|
103
|
+
>>> assert DatasetUrn.from_string(urn_str)
|
|
92
104
|
|
|
93
105
|
Args:
|
|
94
|
-
urn_str: The string representation of the Urn.
|
|
106
|
+
urn_str: The string representation of the urn. Also accepts an existing Urn instance.
|
|
95
107
|
|
|
96
108
|
Returns:
|
|
97
109
|
Urn of the given string representation.
|
|
@@ -100,6 +112,17 @@ class Urn:
|
|
|
100
112
|
InvalidUrnError: If the string representation is in invalid format.
|
|
101
113
|
"""
|
|
102
114
|
|
|
115
|
+
if isinstance(urn_str, Urn):
|
|
116
|
+
if issubclass(cls, _SpecificUrn) and isinstance(urn_str, cls):
|
|
117
|
+
# Fast path - we're already the right type.
|
|
118
|
+
|
|
119
|
+
# I'm not really sure why we need a type ignore here, but mypy doesn't really
|
|
120
|
+
# understand the isinstance check above.
|
|
121
|
+
return urn_str # type: ignore
|
|
122
|
+
|
|
123
|
+
# Fall through, so that we can convert a generic Urn to a specific Urn type.
|
|
124
|
+
urn_str = urn_str.urn()
|
|
125
|
+
|
|
103
126
|
# TODO: Add handling for url encoded urns e.g. urn%3A ...
|
|
104
127
|
|
|
105
128
|
if not urn_str.startswith("urn:li:"):
|
|
@@ -21,7 +21,7 @@ def _add_prefix_to_paths(
|
|
|
21
21
|
|
|
22
22
|
|
|
23
23
|
def list_urns_with_path(
|
|
24
|
-
model: Union[DictWrapper, MetadataChangeProposalWrapper]
|
|
24
|
+
model: Union[DictWrapper, MetadataChangeProposalWrapper],
|
|
25
25
|
) -> List[Tuple[str, _Path]]:
|
|
26
26
|
"""List urns in the given model with their paths.
|
|
27
27
|
|
|
@@ -145,7 +145,7 @@ def lowercase_dataset_urns(
|
|
|
145
145
|
MetadataChangeEventClass,
|
|
146
146
|
MetadataChangeProposalClass,
|
|
147
147
|
MetadataChangeProposalWrapper,
|
|
148
|
-
]
|
|
148
|
+
],
|
|
149
149
|
) -> None:
|
|
150
150
|
def modify_urn(urn: str) -> str:
|
|
151
151
|
if guess_entity_type(urn) == "dataset":
|
|
File without changes
|
{acryl_datahub-0.15.0.2rc6.dist-info → acryl_datahub-0.15.0.2rc8.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
|
File without changes
|