acryl-datahub 1.2.0.3rc2__py3-none-any.whl → 1.2.0.4rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.2.0.3rc2.dist-info → acryl_datahub-1.2.0.4rc1.dist-info}/METADATA +2522 -2522
- {acryl_datahub-1.2.0.3rc2.dist-info → acryl_datahub-1.2.0.4rc1.dist-info}/RECORD +32 -32
- datahub/_version.py +1 -1
- datahub/api/entities/external/external_tag.py +6 -4
- datahub/api/entities/external/lake_formation_external_entites.py +50 -49
- datahub/api/entities/external/restricted_text.py +107 -182
- datahub/api/entities/external/unity_catalog_external_entites.py +51 -52
- datahub/ingestion/api/source.py +81 -7
- datahub/ingestion/autogenerated/capability_summary.json +47 -19
- datahub/ingestion/source/abs/source.py +9 -0
- datahub/ingestion/source/aws/glue.py +18 -2
- datahub/ingestion/source/aws/tag_entities.py +2 -2
- datahub/ingestion/source/datahub/datahub_source.py +8 -1
- datahub/ingestion/source/delta_lake/source.py +8 -1
- datahub/ingestion/source/dremio/dremio_source.py +19 -2
- datahub/ingestion/source/fivetran/fivetran.py +9 -3
- datahub/ingestion/source/ge_data_profiler.py +8 -0
- datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
- datahub/ingestion/source/mock_data/datahub_mock_data.py +26 -10
- datahub/ingestion/source/powerbi/powerbi.py +4 -1
- datahub/ingestion/source/redshift/redshift.py +1 -0
- datahub/ingestion/source/salesforce.py +8 -0
- datahub/ingestion/source/sql/hive_metastore.py +8 -0
- datahub/ingestion/source/sql/teradata.py +8 -1
- datahub/ingestion/source/sql/trino.py +9 -0
- datahub/ingestion/source/unity/tag_entities.py +3 -3
- datahub/sdk/entity_client.py +22 -7
- datahub/utilities/mapping.py +29 -2
- {acryl_datahub-1.2.0.3rc2.dist-info → acryl_datahub-1.2.0.4rc1.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.2.0.3rc2.dist-info → acryl_datahub-1.2.0.4rc1.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.2.0.3rc2.dist-info → acryl_datahub-1.2.0.4rc1.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.2.0.3rc2.dist-info → acryl_datahub-1.2.0.4rc1.dist-info}/top_level.txt +0 -0
|
@@ -36,6 +36,7 @@ from datahub.ingestion.api.decorators import (
|
|
|
36
36
|
from datahub.ingestion.api.workunit import MetadataWorkUnit
|
|
37
37
|
from datahub.ingestion.extractor import schema_util
|
|
38
38
|
from datahub.ingestion.source.common.data_reader import DataReader
|
|
39
|
+
from datahub.ingestion.source.common.subtypes import SourceCapabilityModifier
|
|
39
40
|
from datahub.ingestion.source.sql.sql_common import (
|
|
40
41
|
SQLAlchemySource,
|
|
41
42
|
SqlWorkUnit,
|
|
@@ -249,6 +250,14 @@ class TrinoConfig(BasicSQLAlchemyConfig):
|
|
|
249
250
|
@support_status(SupportStatus.CERTIFIED)
|
|
250
251
|
@capability(SourceCapability.DOMAINS, "Supported via the `domain` config field")
|
|
251
252
|
@capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
|
|
253
|
+
@capability(
|
|
254
|
+
SourceCapability.LINEAGE_COARSE,
|
|
255
|
+
"Extract table-level lineage",
|
|
256
|
+
subtype_modifier=[
|
|
257
|
+
SourceCapabilityModifier.TABLE,
|
|
258
|
+
SourceCapabilityModifier.VIEW,
|
|
259
|
+
],
|
|
260
|
+
)
|
|
252
261
|
class TrinoSource(SQLAlchemySource):
|
|
253
262
|
"""
|
|
254
263
|
|
|
@@ -77,13 +77,13 @@ class UnityCatalogTagPlatformResourceId(BaseModel, ExternalEntityId):
|
|
|
77
77
|
)
|
|
78
78
|
if existing_platform_resource:
|
|
79
79
|
logger.info(
|
|
80
|
-
f"Found existing UnityCatalogTagPlatformResourceId for tag {tag.key.
|
|
80
|
+
f"Found existing UnityCatalogTagPlatformResourceId for tag {tag.key.raw_text}: {existing_platform_resource}"
|
|
81
81
|
)
|
|
82
82
|
return existing_platform_resource
|
|
83
83
|
|
|
84
84
|
return UnityCatalogTagPlatformResourceId(
|
|
85
|
-
tag_key=tag.key.
|
|
86
|
-
tag_value=tag.value.
|
|
85
|
+
tag_key=tag.key.raw_text,
|
|
86
|
+
tag_value=tag.value.raw_text if tag.value is not None else None,
|
|
87
87
|
platform_instance=platform_instance,
|
|
88
88
|
exists_in_unity_catalog=exists_in_unity_catalog,
|
|
89
89
|
persisted=False,
|
datahub/sdk/entity_client.py
CHANGED
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import warnings
|
|
4
|
-
from typing import TYPE_CHECKING, Union, overload
|
|
4
|
+
from typing import TYPE_CHECKING, Optional, Union, overload
|
|
5
5
|
|
|
6
6
|
import datahub.metadata.schema_classes as models
|
|
7
7
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
8
8
|
from datahub.emitter.mcp_patch_builder import MetadataPatchProposal
|
|
9
|
+
from datahub.emitter.rest_emitter import EmitMode
|
|
9
10
|
from datahub.errors import IngestionAttributionWarning, ItemNotFoundError, SdkUsageError
|
|
10
11
|
from datahub.ingestion.graph.client import DataHubGraph
|
|
11
12
|
from datahub.metadata.urns import (
|
|
@@ -133,7 +134,7 @@ class EntityClient:
|
|
|
133
134
|
|
|
134
135
|
return entity
|
|
135
136
|
|
|
136
|
-
def create(self, entity: Entity) -> None:
|
|
137
|
+
def create(self, entity: Entity, *, emit_mode: Optional[EmitMode] = None) -> None:
|
|
137
138
|
mcps = []
|
|
138
139
|
|
|
139
140
|
if self._graph.exists(str(entity.urn)):
|
|
@@ -152,9 +153,12 @@ class EntityClient:
|
|
|
152
153
|
)
|
|
153
154
|
mcps.extend(entity.as_mcps(models.ChangeTypeClass.CREATE))
|
|
154
155
|
|
|
155
|
-
|
|
156
|
+
if emit_mode:
|
|
157
|
+
self._graph.emit_mcps(mcps, emit_mode=emit_mode)
|
|
158
|
+
else:
|
|
159
|
+
self._graph.emit_mcps(mcps)
|
|
156
160
|
|
|
157
|
-
def upsert(self, entity: Entity) -> None:
|
|
161
|
+
def upsert(self, entity: Entity, *, emit_mode: Optional[EmitMode] = None) -> None:
|
|
158
162
|
if entity._prev_aspects is None and self._graph.exists(str(entity.urn)):
|
|
159
163
|
warnings.warn(
|
|
160
164
|
f"The entity {entity.urn} already exists. This operation will partially overwrite the existing entity.",
|
|
@@ -164,9 +168,17 @@ class EntityClient:
|
|
|
164
168
|
# TODO: If there are no previous aspects but the entity exists, should we delete aspects that are not present here?
|
|
165
169
|
|
|
166
170
|
mcps = entity.as_mcps(models.ChangeTypeClass.UPSERT)
|
|
167
|
-
|
|
171
|
+
if emit_mode:
|
|
172
|
+
self._graph.emit_mcps(mcps, emit_mode=emit_mode)
|
|
173
|
+
else:
|
|
174
|
+
self._graph.emit_mcps(mcps)
|
|
168
175
|
|
|
169
|
-
def update(
|
|
176
|
+
def update(
|
|
177
|
+
self,
|
|
178
|
+
entity: Union[Entity, MetadataPatchProposal],
|
|
179
|
+
*,
|
|
180
|
+
emit_mode: Optional[EmitMode] = None,
|
|
181
|
+
) -> None:
|
|
170
182
|
if isinstance(entity, MetadataPatchProposal):
|
|
171
183
|
return self._update_patch(entity)
|
|
172
184
|
|
|
@@ -179,7 +191,10 @@ class EntityClient:
|
|
|
179
191
|
# -> probably add a "mode" parameter that can be "update" (e.g. if not modified) or "update_force"
|
|
180
192
|
|
|
181
193
|
mcps = entity.as_mcps(models.ChangeTypeClass.UPSERT)
|
|
182
|
-
|
|
194
|
+
if emit_mode:
|
|
195
|
+
self._graph.emit_mcps(mcps, emit_mode=emit_mode)
|
|
196
|
+
else:
|
|
197
|
+
self._graph.emit_mcps(mcps)
|
|
183
198
|
|
|
184
199
|
def _update_patch(
|
|
185
200
|
self, updater: MetadataPatchProposal, check_exists: bool = True
|
datahub/utilities/mapping.py
CHANGED
|
@@ -83,7 +83,7 @@ class Constants:
|
|
|
83
83
|
MATCH = "match"
|
|
84
84
|
USER_OWNER = "user"
|
|
85
85
|
GROUP_OWNER = "group"
|
|
86
|
-
OPERAND_DATATYPE_SUPPORTED = [int, bool, str, float]
|
|
86
|
+
OPERAND_DATATYPE_SUPPORTED = [int, bool, str, float, list]
|
|
87
87
|
TAG_PARTITION_KEY = "PARTITION_KEY"
|
|
88
88
|
TAG_DIST_KEY = "DIST_KEY"
|
|
89
89
|
TAG_SORT_KEY = "SORT_KEY"
|
|
@@ -455,7 +455,34 @@ class OperationProcessor:
|
|
|
455
455
|
# function to check if a match clause is satisfied to a value.
|
|
456
456
|
if not any(
|
|
457
457
|
isinstance(raw_props_value, t) for t in Constants.OPERAND_DATATYPE_SUPPORTED
|
|
458
|
-
)
|
|
458
|
+
):
|
|
459
|
+
return None
|
|
460
|
+
|
|
461
|
+
# Handle list values by checking if any item in the list matches
|
|
462
|
+
if isinstance(raw_props_value, list):
|
|
463
|
+
# For lists, we need to find at least one matching item
|
|
464
|
+
# Return a match with the concatenated values of all matching items
|
|
465
|
+
matching_items = []
|
|
466
|
+
for item in raw_props_value:
|
|
467
|
+
if isinstance(item, str):
|
|
468
|
+
match = re.match(match_clause, item)
|
|
469
|
+
if match:
|
|
470
|
+
matching_items.append(item)
|
|
471
|
+
elif isinstance(match_clause, type(item)):
|
|
472
|
+
match = re.match(str(match_clause), str(item))
|
|
473
|
+
if match:
|
|
474
|
+
matching_items.append(str(item))
|
|
475
|
+
|
|
476
|
+
if matching_items:
|
|
477
|
+
# Create a synthetic match object with all matching items joined
|
|
478
|
+
combined_value = ",".join(matching_items)
|
|
479
|
+
return re.match(
|
|
480
|
+
".*", combined_value
|
|
481
|
+
) # Always matches, returns combined value
|
|
482
|
+
return None
|
|
483
|
+
|
|
484
|
+
# Handle scalar values (existing logic)
|
|
485
|
+
elif not isinstance(raw_props_value, type(match_clause)):
|
|
459
486
|
return None
|
|
460
487
|
elif isinstance(raw_props_value, str):
|
|
461
488
|
return re.match(match_clause, raw_props_value)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|