acryl-datahub 1.2.0.3rc2__py3-none-any.whl → 1.2.0.4rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (32) hide show
  1. {acryl_datahub-1.2.0.3rc2.dist-info → acryl_datahub-1.2.0.4rc1.dist-info}/METADATA +2522 -2522
  2. {acryl_datahub-1.2.0.3rc2.dist-info → acryl_datahub-1.2.0.4rc1.dist-info}/RECORD +32 -32
  3. datahub/_version.py +1 -1
  4. datahub/api/entities/external/external_tag.py +6 -4
  5. datahub/api/entities/external/lake_formation_external_entites.py +50 -49
  6. datahub/api/entities/external/restricted_text.py +107 -182
  7. datahub/api/entities/external/unity_catalog_external_entites.py +51 -52
  8. datahub/ingestion/api/source.py +81 -7
  9. datahub/ingestion/autogenerated/capability_summary.json +47 -19
  10. datahub/ingestion/source/abs/source.py +9 -0
  11. datahub/ingestion/source/aws/glue.py +18 -2
  12. datahub/ingestion/source/aws/tag_entities.py +2 -2
  13. datahub/ingestion/source/datahub/datahub_source.py +8 -1
  14. datahub/ingestion/source/delta_lake/source.py +8 -1
  15. datahub/ingestion/source/dremio/dremio_source.py +19 -2
  16. datahub/ingestion/source/fivetran/fivetran.py +9 -3
  17. datahub/ingestion/source/ge_data_profiler.py +8 -0
  18. datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
  19. datahub/ingestion/source/mock_data/datahub_mock_data.py +26 -10
  20. datahub/ingestion/source/powerbi/powerbi.py +4 -1
  21. datahub/ingestion/source/redshift/redshift.py +1 -0
  22. datahub/ingestion/source/salesforce.py +8 -0
  23. datahub/ingestion/source/sql/hive_metastore.py +8 -0
  24. datahub/ingestion/source/sql/teradata.py +8 -1
  25. datahub/ingestion/source/sql/trino.py +9 -0
  26. datahub/ingestion/source/unity/tag_entities.py +3 -3
  27. datahub/sdk/entity_client.py +22 -7
  28. datahub/utilities/mapping.py +29 -2
  29. {acryl_datahub-1.2.0.3rc2.dist-info → acryl_datahub-1.2.0.4rc1.dist-info}/WHEEL +0 -0
  30. {acryl_datahub-1.2.0.3rc2.dist-info → acryl_datahub-1.2.0.4rc1.dist-info}/entry_points.txt +0 -0
  31. {acryl_datahub-1.2.0.3rc2.dist-info → acryl_datahub-1.2.0.4rc1.dist-info}/licenses/LICENSE +0 -0
  32. {acryl_datahub-1.2.0.3rc2.dist-info → acryl_datahub-1.2.0.4rc1.dist-info}/top_level.txt +0 -0
@@ -36,6 +36,7 @@ from datahub.ingestion.api.decorators import (
36
36
  from datahub.ingestion.api.workunit import MetadataWorkUnit
37
37
  from datahub.ingestion.extractor import schema_util
38
38
  from datahub.ingestion.source.common.data_reader import DataReader
39
+ from datahub.ingestion.source.common.subtypes import SourceCapabilityModifier
39
40
  from datahub.ingestion.source.sql.sql_common import (
40
41
  SQLAlchemySource,
41
42
  SqlWorkUnit,
@@ -249,6 +250,14 @@ class TrinoConfig(BasicSQLAlchemyConfig):
249
250
  @support_status(SupportStatus.CERTIFIED)
250
251
  @capability(SourceCapability.DOMAINS, "Supported via the `domain` config field")
251
252
  @capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
253
+ @capability(
254
+ SourceCapability.LINEAGE_COARSE,
255
+ "Extract table-level lineage",
256
+ subtype_modifier=[
257
+ SourceCapabilityModifier.TABLE,
258
+ SourceCapabilityModifier.VIEW,
259
+ ],
260
+ )
252
261
  class TrinoSource(SQLAlchemySource):
253
262
  """
254
263
 
@@ -77,13 +77,13 @@ class UnityCatalogTagPlatformResourceId(BaseModel, ExternalEntityId):
77
77
  )
78
78
  if existing_platform_resource:
79
79
  logger.info(
80
- f"Found existing UnityCatalogTagPlatformResourceId for tag {tag.key.original}: {existing_platform_resource}"
80
+ f"Found existing UnityCatalogTagPlatformResourceId for tag {tag.key.raw_text}: {existing_platform_resource}"
81
81
  )
82
82
  return existing_platform_resource
83
83
 
84
84
  return UnityCatalogTagPlatformResourceId(
85
- tag_key=tag.key.original,
86
- tag_value=tag.value.original if tag.value is not None else None,
85
+ tag_key=tag.key.raw_text,
86
+ tag_value=tag.value.raw_text if tag.value is not None else None,
87
87
  platform_instance=platform_instance,
88
88
  exists_in_unity_catalog=exists_in_unity_catalog,
89
89
  persisted=False,
@@ -1,11 +1,12 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import warnings
4
- from typing import TYPE_CHECKING, Union, overload
4
+ from typing import TYPE_CHECKING, Optional, Union, overload
5
5
 
6
6
  import datahub.metadata.schema_classes as models
7
7
  from datahub.emitter.mcp import MetadataChangeProposalWrapper
8
8
  from datahub.emitter.mcp_patch_builder import MetadataPatchProposal
9
+ from datahub.emitter.rest_emitter import EmitMode
9
10
  from datahub.errors import IngestionAttributionWarning, ItemNotFoundError, SdkUsageError
10
11
  from datahub.ingestion.graph.client import DataHubGraph
11
12
  from datahub.metadata.urns import (
@@ -133,7 +134,7 @@ class EntityClient:
133
134
 
134
135
  return entity
135
136
 
136
- def create(self, entity: Entity) -> None:
137
+ def create(self, entity: Entity, *, emit_mode: Optional[EmitMode] = None) -> None:
137
138
  mcps = []
138
139
 
139
140
  if self._graph.exists(str(entity.urn)):
@@ -152,9 +153,12 @@ class EntityClient:
152
153
  )
153
154
  mcps.extend(entity.as_mcps(models.ChangeTypeClass.CREATE))
154
155
 
155
- self._graph.emit_mcps(mcps)
156
+ if emit_mode:
157
+ self._graph.emit_mcps(mcps, emit_mode=emit_mode)
158
+ else:
159
+ self._graph.emit_mcps(mcps)
156
160
 
157
- def upsert(self, entity: Entity) -> None:
161
+ def upsert(self, entity: Entity, *, emit_mode: Optional[EmitMode] = None) -> None:
158
162
  if entity._prev_aspects is None and self._graph.exists(str(entity.urn)):
159
163
  warnings.warn(
160
164
  f"The entity {entity.urn} already exists. This operation will partially overwrite the existing entity.",
@@ -164,9 +168,17 @@ class EntityClient:
164
168
  # TODO: If there are no previous aspects but the entity exists, should we delete aspects that are not present here?
165
169
 
166
170
  mcps = entity.as_mcps(models.ChangeTypeClass.UPSERT)
167
- self._graph.emit_mcps(mcps)
171
+ if emit_mode:
172
+ self._graph.emit_mcps(mcps, emit_mode=emit_mode)
173
+ else:
174
+ self._graph.emit_mcps(mcps)
168
175
 
169
- def update(self, entity: Union[Entity, MetadataPatchProposal]) -> None:
176
+ def update(
177
+ self,
178
+ entity: Union[Entity, MetadataPatchProposal],
179
+ *,
180
+ emit_mode: Optional[EmitMode] = None,
181
+ ) -> None:
170
182
  if isinstance(entity, MetadataPatchProposal):
171
183
  return self._update_patch(entity)
172
184
 
@@ -179,7 +191,10 @@ class EntityClient:
179
191
  # -> probably add a "mode" parameter that can be "update" (e.g. if not modified) or "update_force"
180
192
 
181
193
  mcps = entity.as_mcps(models.ChangeTypeClass.UPSERT)
182
- self._graph.emit_mcps(mcps)
194
+ if emit_mode:
195
+ self._graph.emit_mcps(mcps, emit_mode=emit_mode)
196
+ else:
197
+ self._graph.emit_mcps(mcps)
183
198
 
184
199
  def _update_patch(
185
200
  self, updater: MetadataPatchProposal, check_exists: bool = True
@@ -83,7 +83,7 @@ class Constants:
83
83
  MATCH = "match"
84
84
  USER_OWNER = "user"
85
85
  GROUP_OWNER = "group"
86
- OPERAND_DATATYPE_SUPPORTED = [int, bool, str, float]
86
+ OPERAND_DATATYPE_SUPPORTED = [int, bool, str, float, list]
87
87
  TAG_PARTITION_KEY = "PARTITION_KEY"
88
88
  TAG_DIST_KEY = "DIST_KEY"
89
89
  TAG_SORT_KEY = "SORT_KEY"
@@ -455,7 +455,34 @@ class OperationProcessor:
455
455
  # function to check if a match clause is satisfied to a value.
456
456
  if not any(
457
457
  isinstance(raw_props_value, t) for t in Constants.OPERAND_DATATYPE_SUPPORTED
458
- ) or not isinstance(raw_props_value, type(match_clause)):
458
+ ):
459
+ return None
460
+
461
+ # Handle list values by checking if any item in the list matches
462
+ if isinstance(raw_props_value, list):
463
+ # For lists, we need to find at least one matching item
464
+ # Return a match with the concatenated values of all matching items
465
+ matching_items = []
466
+ for item in raw_props_value:
467
+ if isinstance(item, str):
468
+ match = re.match(match_clause, item)
469
+ if match:
470
+ matching_items.append(item)
471
+ elif isinstance(match_clause, type(item)):
472
+ match = re.match(str(match_clause), str(item))
473
+ if match:
474
+ matching_items.append(str(item))
475
+
476
+ if matching_items:
477
+ # Create a synthetic match object with all matching items joined
478
+ combined_value = ",".join(matching_items)
479
+ return re.match(
480
+ ".*", combined_value
481
+ ) # Always matches, returns combined value
482
+ return None
483
+
484
+ # Handle scalar values (existing logic)
485
+ elif not isinstance(raw_props_value, type(match_clause)):
459
486
  return None
460
487
  elif isinstance(raw_props_value, str):
461
488
  return re.match(match_clause, raw_props_value)