acryl-datahub 1.2.0.3rc1__py3-none-any.whl → 1.2.0.4rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.2.0.3rc1.dist-info → acryl_datahub-1.2.0.4rc1.dist-info}/METADATA +2535 -2535
- {acryl_datahub-1.2.0.3rc1.dist-info → acryl_datahub-1.2.0.4rc1.dist-info}/RECORD +38 -38
- datahub/_version.py +1 -1
- datahub/api/entities/external/external_tag.py +6 -4
- datahub/api/entities/external/lake_formation_external_entites.py +50 -49
- datahub/api/entities/external/restricted_text.py +107 -182
- datahub/api/entities/external/unity_catalog_external_entites.py +51 -52
- datahub/emitter/rest_emitter.py +18 -5
- datahub/ingestion/api/source.py +81 -7
- datahub/ingestion/autogenerated/capability_summary.json +47 -19
- datahub/ingestion/graph/client.py +19 -3
- datahub/ingestion/sink/datahub_rest.py +2 -0
- datahub/ingestion/source/abs/source.py +9 -0
- datahub/ingestion/source/aws/glue.py +18 -2
- datahub/ingestion/source/aws/tag_entities.py +2 -2
- datahub/ingestion/source/datahub/datahub_source.py +8 -1
- datahub/ingestion/source/dbt/dbt_common.py +10 -0
- datahub/ingestion/source/delta_lake/source.py +8 -1
- datahub/ingestion/source/dremio/dremio_source.py +19 -2
- datahub/ingestion/source/fivetran/fivetran.py +9 -3
- datahub/ingestion/source/ge_data_profiler.py +8 -0
- datahub/ingestion/source/hex/query_fetcher.py +1 -1
- datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
- datahub/ingestion/source/mock_data/datahub_mock_data.py +26 -10
- datahub/ingestion/source/powerbi/powerbi.py +4 -1
- datahub/ingestion/source/redshift/redshift.py +1 -0
- datahub/ingestion/source/salesforce.py +8 -0
- datahub/ingestion/source/sql/athena_properties_extractor.py +2 -2
- datahub/ingestion/source/sql/hive_metastore.py +8 -0
- datahub/ingestion/source/sql/teradata.py +8 -1
- datahub/ingestion/source/sql/trino.py +9 -0
- datahub/ingestion/source/unity/tag_entities.py +3 -3
- datahub/sdk/entity_client.py +22 -7
- datahub/utilities/mapping.py +29 -2
- {acryl_datahub-1.2.0.3rc1.dist-info → acryl_datahub-1.2.0.4rc1.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.2.0.3rc1.dist-info → acryl_datahub-1.2.0.4rc1.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.2.0.3rc1.dist-info → acryl_datahub-1.2.0.4rc1.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.2.0.3rc1.dist-info → acryl_datahub-1.2.0.4rc1.dist-info}/top_level.txt +0 -0
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from functools import lru_cache
|
|
2
|
-
from typing import ClassVar, Optional, TextIO
|
|
2
|
+
from typing import ClassVar, Optional, TextIO, Type
|
|
3
3
|
|
|
4
4
|
from liquid import Environment
|
|
5
5
|
from liquid.ast import Node
|
|
@@ -20,16 +20,27 @@ class CustomTagException(Exception):
|
|
|
20
20
|
class ConditionNode(Node):
|
|
21
21
|
def __init__(self, tok: Token, sql_or_lookml_reference: str, filter_name: str):
|
|
22
22
|
self.tok = tok
|
|
23
|
-
|
|
24
23
|
self.sql_or_lookml_reference = sql_or_lookml_reference
|
|
25
|
-
|
|
26
24
|
self.filter_name = filter_name
|
|
27
25
|
|
|
28
26
|
def render_to_output(self, context: Context, buffer: TextIO) -> Optional[bool]:
|
|
29
27
|
# This implementation will make sure that sql parse work correctly if looker condition tag
|
|
30
28
|
# is used in lookml sql field
|
|
31
29
|
buffer.write(f"{self.sql_or_lookml_reference}='dummy_value'")
|
|
30
|
+
return True
|
|
32
31
|
|
|
32
|
+
|
|
33
|
+
class IncrementConditionNode(Node):
|
|
34
|
+
def __init__(self, tok: Token, sql_or_lookml_reference: str):
|
|
35
|
+
self.tok = tok
|
|
36
|
+
self.sql_or_lookml_reference = sql_or_lookml_reference
|
|
37
|
+
|
|
38
|
+
def render_to_output(self, context: Context, buffer: TextIO) -> Optional[bool]:
|
|
39
|
+
# For incrementcondition, we need to generate a condition that would be used
|
|
40
|
+
# in incremental PDT updates. This typically involves date/time comparisons.
|
|
41
|
+
# We'll render it as a date comparison with a placeholder value
|
|
42
|
+
# See details in Looker documentation for incrementcondition tag -> cloud.google.com/looker/docs/reference/param-view-increment-key
|
|
43
|
+
buffer.write(f"{self.sql_or_lookml_reference} > '2023-01-01'")
|
|
33
44
|
return True
|
|
34
45
|
|
|
35
46
|
|
|
@@ -44,7 +55,6 @@ class ConditionTag(Tag):
|
|
|
44
55
|
This class render the below tag as order.region='ap-south-1' if order_region is provided in config.liquid_variables
|
|
45
56
|
as order_region: 'ap-south-1'
|
|
46
57
|
{% condition order_region %} order.region {% endcondition %}
|
|
47
|
-
|
|
48
58
|
"""
|
|
49
59
|
|
|
50
60
|
TAG_START: ClassVar[str] = "condition"
|
|
@@ -79,7 +89,48 @@ class ConditionTag(Tag):
|
|
|
79
89
|
)
|
|
80
90
|
|
|
81
91
|
|
|
82
|
-
|
|
92
|
+
class IncrementConditionTag(Tag):
|
|
93
|
+
"""
|
|
94
|
+
IncrementConditionTag is the equivalent implementation of looker's custom liquid tag "incrementcondition".
|
|
95
|
+
Refer doc: https://cloud.google.com/looker/docs/incremental-pdts#using_the_incrementcondition_tag
|
|
96
|
+
|
|
97
|
+
This tag is used for incremental PDTs to determine which records should be updated.
|
|
98
|
+
It typically works with date/time fields to filter data that has changed since the last update.
|
|
99
|
+
|
|
100
|
+
Example usage in Looker:
|
|
101
|
+
{% incrementcondition created_at %} order.created_at {% endincrementcondition %}
|
|
102
|
+
|
|
103
|
+
This would generate SQL like: order.created_at > '2023-01-01 00:00:00'
|
|
104
|
+
"""
|
|
105
|
+
|
|
106
|
+
TAG_START: ClassVar[str] = "incrementcondition"
|
|
107
|
+
TAG_END: ClassVar[str] = "endincrementcondition"
|
|
108
|
+
name: str = "incrementcondition"
|
|
109
|
+
|
|
110
|
+
def __init__(self, env: Environment):
|
|
111
|
+
super().__init__(env)
|
|
112
|
+
self.parser = get_parser(self.env)
|
|
113
|
+
|
|
114
|
+
def parse(self, stream: TokenStream) -> Node:
|
|
115
|
+
expect(stream, TOKEN_TAG, value=IncrementConditionTag.TAG_START)
|
|
116
|
+
|
|
117
|
+
start_token = stream.current
|
|
118
|
+
|
|
119
|
+
stream.next_token()
|
|
120
|
+
expect(stream, TOKEN_LITERAL)
|
|
121
|
+
sql_or_lookml_reference: str = stream.current.value.strip()
|
|
122
|
+
|
|
123
|
+
stream.next_token()
|
|
124
|
+
expect(stream, TOKEN_TAG, value=IncrementConditionTag.TAG_END)
|
|
125
|
+
|
|
126
|
+
return IncrementConditionNode(
|
|
127
|
+
tok=start_token,
|
|
128
|
+
sql_or_lookml_reference=sql_or_lookml_reference,
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
# Updated custom_tags list to include both tags
|
|
133
|
+
custom_tags: list[Type[Tag]] = [ConditionTag, IncrementConditionTag]
|
|
83
134
|
|
|
84
135
|
|
|
85
136
|
@string_filter
|
|
@@ -13,7 +13,7 @@ from datahub.ingestion.api.decorators import (
|
|
|
13
13
|
platform_name,
|
|
14
14
|
support_status,
|
|
15
15
|
)
|
|
16
|
-
from datahub.ingestion.api.source import Source, SourceReport
|
|
16
|
+
from datahub.ingestion.api.source import Source, SourceReport, StructuredLogCategory
|
|
17
17
|
from datahub.ingestion.api.workunit import MetadataWorkUnit
|
|
18
18
|
from datahub.ingestion.source.common.subtypes import DatasetSubTypes
|
|
19
19
|
from datahub.ingestion.source.mock_data.datahub_mock_data_report import (
|
|
@@ -35,6 +35,8 @@ from datahub.utilities.str_enum import StrEnum
|
|
|
35
35
|
|
|
36
36
|
logger = logging.getLogger(__name__)
|
|
37
37
|
|
|
38
|
+
PLATFORM_NAME = "fake"
|
|
39
|
+
|
|
38
40
|
|
|
39
41
|
class SubTypePattern(StrEnum):
|
|
40
42
|
ALTERNATING = "alternating"
|
|
@@ -137,6 +139,10 @@ class DataHubMockDataConfig(ConfigModel):
|
|
|
137
139
|
default=0,
|
|
138
140
|
description="Number of warnings to add in report for testing",
|
|
139
141
|
)
|
|
142
|
+
num_info: int = Field(
|
|
143
|
+
default=0,
|
|
144
|
+
description="Number of info to add in report for testing",
|
|
145
|
+
)
|
|
140
146
|
|
|
141
147
|
gen_1: LineageConfigGen1 = Field(
|
|
142
148
|
default_factory=LineageConfigGen1,
|
|
@@ -144,7 +150,7 @@ class DataHubMockDataConfig(ConfigModel):
|
|
|
144
150
|
)
|
|
145
151
|
|
|
146
152
|
|
|
147
|
-
@platform_name(
|
|
153
|
+
@platform_name(PLATFORM_NAME)
|
|
148
154
|
@config_class(DataHubMockDataConfig)
|
|
149
155
|
@support_status(SupportStatus.TESTING)
|
|
150
156
|
class DataHubMockDataSource(Source):
|
|
@@ -159,6 +165,9 @@ class DataHubMockDataSource(Source):
|
|
|
159
165
|
self.report = DataHubMockDataReport()
|
|
160
166
|
|
|
161
167
|
def get_workunits(self) -> Iterable[MetadataWorkUnit]:
|
|
168
|
+
# We don't want any implicit aspects to be produced
|
|
169
|
+
# so we are not using get_workunits_internal
|
|
170
|
+
|
|
162
171
|
if self.config.throw_uncaught_exceptions:
|
|
163
172
|
raise Exception("This is a test exception")
|
|
164
173
|
|
|
@@ -176,10 +185,17 @@ class DataHubMockDataSource(Source):
|
|
|
176
185
|
message="This is test warning",
|
|
177
186
|
title="Test Warning",
|
|
178
187
|
context=f"This is test warning {i}",
|
|
188
|
+
log_category=StructuredLogCategory.LINEAGE,
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
if self.config.num_info > 0:
|
|
192
|
+
for i in range(self.config.num_info):
|
|
193
|
+
self.report.info(
|
|
194
|
+
message="This is test info",
|
|
195
|
+
title="Test Info",
|
|
196
|
+
context=f"This is test info {i}",
|
|
179
197
|
)
|
|
180
198
|
|
|
181
|
-
# We don't want any implicit aspects to be produced
|
|
182
|
-
# so we are not using get_workunits_internal
|
|
183
199
|
if self.config.gen_1.enabled:
|
|
184
200
|
for wu in self._data_gen_1():
|
|
185
201
|
if self.report.first_urn_seen is None:
|
|
@@ -309,7 +325,7 @@ class DataHubMockDataSource(Source):
|
|
|
309
325
|
table_level, table_index, subtype_pattern, subtype_types, level_subtypes
|
|
310
326
|
)
|
|
311
327
|
|
|
312
|
-
urn = make_dataset_urn(platform=
|
|
328
|
+
urn = make_dataset_urn(platform=PLATFORM_NAME, name=table_name)
|
|
313
329
|
mcp = MetadataChangeProposalWrapper(
|
|
314
330
|
entityUrn=urn,
|
|
315
331
|
entityType="dataset",
|
|
@@ -433,7 +449,7 @@ class DataHubMockDataSource(Source):
|
|
|
433
449
|
|
|
434
450
|
def _get_status_aspect(self, table: str) -> MetadataWorkUnit:
|
|
435
451
|
urn = make_dataset_urn(
|
|
436
|
-
platform=
|
|
452
|
+
platform=PLATFORM_NAME,
|
|
437
453
|
name=table,
|
|
438
454
|
)
|
|
439
455
|
mcp = MetadataChangeProposalWrapper(
|
|
@@ -448,7 +464,7 @@ class DataHubMockDataSource(Source):
|
|
|
448
464
|
) -> MetadataWorkUnit:
|
|
449
465
|
mcp = MetadataChangeProposalWrapper(
|
|
450
466
|
entityUrn=make_dataset_urn(
|
|
451
|
-
platform=
|
|
467
|
+
platform=PLATFORM_NAME,
|
|
452
468
|
name=downstream_table,
|
|
453
469
|
),
|
|
454
470
|
entityType="dataset",
|
|
@@ -456,7 +472,7 @@ class DataHubMockDataSource(Source):
|
|
|
456
472
|
upstreams=[
|
|
457
473
|
UpstreamClass(
|
|
458
474
|
dataset=make_dataset_urn(
|
|
459
|
-
platform=
|
|
475
|
+
platform=PLATFORM_NAME,
|
|
460
476
|
name=upstream_table,
|
|
461
477
|
),
|
|
462
478
|
type=DatasetLineageTypeClass.TRANSFORMED,
|
|
@@ -468,7 +484,7 @@ class DataHubMockDataSource(Source):
|
|
|
468
484
|
|
|
469
485
|
def _get_profile_aspect(self, table: str) -> MetadataWorkUnit:
|
|
470
486
|
urn = make_dataset_urn(
|
|
471
|
-
platform=
|
|
487
|
+
platform=PLATFORM_NAME,
|
|
472
488
|
name=table,
|
|
473
489
|
)
|
|
474
490
|
mcp = MetadataChangeProposalWrapper(
|
|
@@ -485,7 +501,7 @@ class DataHubMockDataSource(Source):
|
|
|
485
501
|
|
|
486
502
|
def _get_usage_aspect(self, table: str) -> MetadataWorkUnit:
|
|
487
503
|
urn = make_dataset_urn(
|
|
488
|
-
platform=
|
|
504
|
+
platform=PLATFORM_NAME,
|
|
489
505
|
name=table,
|
|
490
506
|
)
|
|
491
507
|
mcp = MetadataChangeProposalWrapper(
|
|
@@ -1226,7 +1226,10 @@ class Mapper:
|
|
|
1226
1226
|
@platform_name("PowerBI")
|
|
1227
1227
|
@config_class(PowerBiDashboardSourceConfig)
|
|
1228
1228
|
@support_status(SupportStatus.CERTIFIED)
|
|
1229
|
-
@capability(
|
|
1229
|
+
@capability(
|
|
1230
|
+
SourceCapability.CONTAINERS,
|
|
1231
|
+
"Enabled by default",
|
|
1232
|
+
)
|
|
1230
1233
|
@capability(SourceCapability.DESCRIPTIONS, "Enabled by default")
|
|
1231
1234
|
@capability(SourceCapability.OWNERSHIP, "Enabled by default")
|
|
1232
1235
|
@capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
|
|
@@ -132,6 +132,7 @@ logger: logging.Logger = logging.getLogger(__name__)
|
|
|
132
132
|
"Enabled by default",
|
|
133
133
|
subtype_modifier=[
|
|
134
134
|
SourceCapabilityModifier.DATABASE,
|
|
135
|
+
SourceCapabilityModifier.SCHEMA,
|
|
135
136
|
],
|
|
136
137
|
)
|
|
137
138
|
@capability(SourceCapability.DOMAINS, "Supported via the `domain` config field")
|
|
@@ -549,6 +549,14 @@ class SalesforceApi:
|
|
|
549
549
|
capability_name=SourceCapability.TAGS,
|
|
550
550
|
description="Enabled by default",
|
|
551
551
|
)
|
|
552
|
+
@capability(
|
|
553
|
+
capability_name=SourceCapability.LINEAGE_COARSE,
|
|
554
|
+
description="Extract table-level lineage for Salesforce objects",
|
|
555
|
+
subtype_modifier=[
|
|
556
|
+
SourceCapabilityModifier.SALESFORCE_CUSTOM_OBJECT,
|
|
557
|
+
SourceCapabilityModifier.SALESFORCE_STANDARD_OBJECT,
|
|
558
|
+
],
|
|
559
|
+
)
|
|
552
560
|
class SalesforceSource(StatefulIngestionSourceBase):
|
|
553
561
|
def __init__(self, config: SalesforceConfig, ctx: PipelineContext) -> None:
|
|
554
562
|
super().__init__(config, ctx)
|
|
@@ -99,10 +99,10 @@ class AthenaPropertiesExtractor:
|
|
|
99
99
|
"""A class to extract properties from Athena CREATE TABLE statements."""
|
|
100
100
|
|
|
101
101
|
CREATE_TABLE_REGEXP = re.compile(
|
|
102
|
-
"(CREATE TABLE[\s\n]*)(.*?)(\s*\()", re.MULTILINE | re.IGNORECASE
|
|
102
|
+
r"(CREATE TABLE[\s\n]*)(.*?)(\s*\()", re.MULTILINE | re.IGNORECASE
|
|
103
103
|
)
|
|
104
104
|
PARTITIONED_BY_REGEXP = re.compile(
|
|
105
|
-
"(PARTITIONED BY[\s\n]*\()((?:[^()]|\([^)]*\))*?)(\))",
|
|
105
|
+
r"(PARTITIONED BY[\s\n]*\()((?:[^()]|\([^)]*\))*?)(\))",
|
|
106
106
|
re.MULTILINE | re.IGNORECASE,
|
|
107
107
|
)
|
|
108
108
|
|
|
@@ -27,6 +27,7 @@ from datahub.ingestion.api.workunit import MetadataWorkUnit
|
|
|
27
27
|
from datahub.ingestion.source.common.subtypes import (
|
|
28
28
|
DatasetContainerSubTypes,
|
|
29
29
|
DatasetSubTypes,
|
|
30
|
+
SourceCapabilityModifier,
|
|
30
31
|
)
|
|
31
32
|
from datahub.ingestion.source.sql.sql_common import (
|
|
32
33
|
SQLAlchemySource,
|
|
@@ -168,6 +169,13 @@ class HiveMetastore(BasicSQLAlchemyConfig):
|
|
|
168
169
|
@capability(
|
|
169
170
|
SourceCapability.LINEAGE_COARSE, "View lineage is not supported", supported=False
|
|
170
171
|
)
|
|
172
|
+
@capability(
|
|
173
|
+
SourceCapability.CONTAINERS,
|
|
174
|
+
"Enabled by default",
|
|
175
|
+
subtype_modifier=[
|
|
176
|
+
SourceCapabilityModifier.CATALOG,
|
|
177
|
+
],
|
|
178
|
+
)
|
|
171
179
|
class HiveMetastoreSource(SQLAlchemySource):
|
|
172
180
|
"""
|
|
173
181
|
This plugin extracts the following:
|
|
@@ -42,6 +42,7 @@ from datahub.ingestion.api.decorators import (
|
|
|
42
42
|
)
|
|
43
43
|
from datahub.ingestion.api.workunit import MetadataWorkUnit
|
|
44
44
|
from datahub.ingestion.graph.client import DataHubGraph
|
|
45
|
+
from datahub.ingestion.source.common.subtypes import SourceCapabilityModifier
|
|
45
46
|
from datahub.ingestion.source.sql.sql_common import register_custom_type
|
|
46
47
|
from datahub.ingestion.source.sql.sql_config import SQLCommonConfig
|
|
47
48
|
from datahub.ingestion.source.sql.sql_report import SQLSourceReport
|
|
@@ -539,7 +540,13 @@ class TeradataConfig(BaseTeradataConfig, BaseTimeWindowConfig):
|
|
|
539
540
|
@config_class(TeradataConfig)
|
|
540
541
|
@support_status(SupportStatus.TESTING)
|
|
541
542
|
@capability(SourceCapability.DOMAINS, "Enabled by default")
|
|
542
|
-
@capability(
|
|
543
|
+
@capability(
|
|
544
|
+
SourceCapability.CONTAINERS,
|
|
545
|
+
"Enabled by default",
|
|
546
|
+
subtype_modifier=[
|
|
547
|
+
SourceCapabilityModifier.DATABASE,
|
|
548
|
+
],
|
|
549
|
+
)
|
|
543
550
|
@capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
|
|
544
551
|
@capability(
|
|
545
552
|
SourceCapability.DELETION_DETECTION,
|
|
@@ -36,6 +36,7 @@ from datahub.ingestion.api.decorators import (
|
|
|
36
36
|
from datahub.ingestion.api.workunit import MetadataWorkUnit
|
|
37
37
|
from datahub.ingestion.extractor import schema_util
|
|
38
38
|
from datahub.ingestion.source.common.data_reader import DataReader
|
|
39
|
+
from datahub.ingestion.source.common.subtypes import SourceCapabilityModifier
|
|
39
40
|
from datahub.ingestion.source.sql.sql_common import (
|
|
40
41
|
SQLAlchemySource,
|
|
41
42
|
SqlWorkUnit,
|
|
@@ -249,6 +250,14 @@ class TrinoConfig(BasicSQLAlchemyConfig):
|
|
|
249
250
|
@support_status(SupportStatus.CERTIFIED)
|
|
250
251
|
@capability(SourceCapability.DOMAINS, "Supported via the `domain` config field")
|
|
251
252
|
@capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
|
|
253
|
+
@capability(
|
|
254
|
+
SourceCapability.LINEAGE_COARSE,
|
|
255
|
+
"Extract table-level lineage",
|
|
256
|
+
subtype_modifier=[
|
|
257
|
+
SourceCapabilityModifier.TABLE,
|
|
258
|
+
SourceCapabilityModifier.VIEW,
|
|
259
|
+
],
|
|
260
|
+
)
|
|
252
261
|
class TrinoSource(SQLAlchemySource):
|
|
253
262
|
"""
|
|
254
263
|
|
|
@@ -77,13 +77,13 @@ class UnityCatalogTagPlatformResourceId(BaseModel, ExternalEntityId):
|
|
|
77
77
|
)
|
|
78
78
|
if existing_platform_resource:
|
|
79
79
|
logger.info(
|
|
80
|
-
f"Found existing UnityCatalogTagPlatformResourceId for tag {tag.key.
|
|
80
|
+
f"Found existing UnityCatalogTagPlatformResourceId for tag {tag.key.raw_text}: {existing_platform_resource}"
|
|
81
81
|
)
|
|
82
82
|
return existing_platform_resource
|
|
83
83
|
|
|
84
84
|
return UnityCatalogTagPlatformResourceId(
|
|
85
|
-
tag_key=tag.key.
|
|
86
|
-
tag_value=tag.value.
|
|
85
|
+
tag_key=tag.key.raw_text,
|
|
86
|
+
tag_value=tag.value.raw_text if tag.value is not None else None,
|
|
87
87
|
platform_instance=platform_instance,
|
|
88
88
|
exists_in_unity_catalog=exists_in_unity_catalog,
|
|
89
89
|
persisted=False,
|
datahub/sdk/entity_client.py
CHANGED
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import warnings
|
|
4
|
-
from typing import TYPE_CHECKING, Union, overload
|
|
4
|
+
from typing import TYPE_CHECKING, Optional, Union, overload
|
|
5
5
|
|
|
6
6
|
import datahub.metadata.schema_classes as models
|
|
7
7
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
8
8
|
from datahub.emitter.mcp_patch_builder import MetadataPatchProposal
|
|
9
|
+
from datahub.emitter.rest_emitter import EmitMode
|
|
9
10
|
from datahub.errors import IngestionAttributionWarning, ItemNotFoundError, SdkUsageError
|
|
10
11
|
from datahub.ingestion.graph.client import DataHubGraph
|
|
11
12
|
from datahub.metadata.urns import (
|
|
@@ -133,7 +134,7 @@ class EntityClient:
|
|
|
133
134
|
|
|
134
135
|
return entity
|
|
135
136
|
|
|
136
|
-
def create(self, entity: Entity) -> None:
|
|
137
|
+
def create(self, entity: Entity, *, emit_mode: Optional[EmitMode] = None) -> None:
|
|
137
138
|
mcps = []
|
|
138
139
|
|
|
139
140
|
if self._graph.exists(str(entity.urn)):
|
|
@@ -152,9 +153,12 @@ class EntityClient:
|
|
|
152
153
|
)
|
|
153
154
|
mcps.extend(entity.as_mcps(models.ChangeTypeClass.CREATE))
|
|
154
155
|
|
|
155
|
-
|
|
156
|
+
if emit_mode:
|
|
157
|
+
self._graph.emit_mcps(mcps, emit_mode=emit_mode)
|
|
158
|
+
else:
|
|
159
|
+
self._graph.emit_mcps(mcps)
|
|
156
160
|
|
|
157
|
-
def upsert(self, entity: Entity) -> None:
|
|
161
|
+
def upsert(self, entity: Entity, *, emit_mode: Optional[EmitMode] = None) -> None:
|
|
158
162
|
if entity._prev_aspects is None and self._graph.exists(str(entity.urn)):
|
|
159
163
|
warnings.warn(
|
|
160
164
|
f"The entity {entity.urn} already exists. This operation will partially overwrite the existing entity.",
|
|
@@ -164,9 +168,17 @@ class EntityClient:
|
|
|
164
168
|
# TODO: If there are no previous aspects but the entity exists, should we delete aspects that are not present here?
|
|
165
169
|
|
|
166
170
|
mcps = entity.as_mcps(models.ChangeTypeClass.UPSERT)
|
|
167
|
-
|
|
171
|
+
if emit_mode:
|
|
172
|
+
self._graph.emit_mcps(mcps, emit_mode=emit_mode)
|
|
173
|
+
else:
|
|
174
|
+
self._graph.emit_mcps(mcps)
|
|
168
175
|
|
|
169
|
-
def update(
|
|
176
|
+
def update(
|
|
177
|
+
self,
|
|
178
|
+
entity: Union[Entity, MetadataPatchProposal],
|
|
179
|
+
*,
|
|
180
|
+
emit_mode: Optional[EmitMode] = None,
|
|
181
|
+
) -> None:
|
|
170
182
|
if isinstance(entity, MetadataPatchProposal):
|
|
171
183
|
return self._update_patch(entity)
|
|
172
184
|
|
|
@@ -179,7 +191,10 @@ class EntityClient:
|
|
|
179
191
|
# -> probably add a "mode" parameter that can be "update" (e.g. if not modified) or "update_force"
|
|
180
192
|
|
|
181
193
|
mcps = entity.as_mcps(models.ChangeTypeClass.UPSERT)
|
|
182
|
-
|
|
194
|
+
if emit_mode:
|
|
195
|
+
self._graph.emit_mcps(mcps, emit_mode=emit_mode)
|
|
196
|
+
else:
|
|
197
|
+
self._graph.emit_mcps(mcps)
|
|
183
198
|
|
|
184
199
|
def _update_patch(
|
|
185
200
|
self, updater: MetadataPatchProposal, check_exists: bool = True
|
datahub/utilities/mapping.py
CHANGED
|
@@ -83,7 +83,7 @@ class Constants:
|
|
|
83
83
|
MATCH = "match"
|
|
84
84
|
USER_OWNER = "user"
|
|
85
85
|
GROUP_OWNER = "group"
|
|
86
|
-
OPERAND_DATATYPE_SUPPORTED = [int, bool, str, float]
|
|
86
|
+
OPERAND_DATATYPE_SUPPORTED = [int, bool, str, float, list]
|
|
87
87
|
TAG_PARTITION_KEY = "PARTITION_KEY"
|
|
88
88
|
TAG_DIST_KEY = "DIST_KEY"
|
|
89
89
|
TAG_SORT_KEY = "SORT_KEY"
|
|
@@ -455,7 +455,34 @@ class OperationProcessor:
|
|
|
455
455
|
# function to check if a match clause is satisfied to a value.
|
|
456
456
|
if not any(
|
|
457
457
|
isinstance(raw_props_value, t) for t in Constants.OPERAND_DATATYPE_SUPPORTED
|
|
458
|
-
)
|
|
458
|
+
):
|
|
459
|
+
return None
|
|
460
|
+
|
|
461
|
+
# Handle list values by checking if any item in the list matches
|
|
462
|
+
if isinstance(raw_props_value, list):
|
|
463
|
+
# For lists, we need to find at least one matching item
|
|
464
|
+
# Return a match with the concatenated values of all matching items
|
|
465
|
+
matching_items = []
|
|
466
|
+
for item in raw_props_value:
|
|
467
|
+
if isinstance(item, str):
|
|
468
|
+
match = re.match(match_clause, item)
|
|
469
|
+
if match:
|
|
470
|
+
matching_items.append(item)
|
|
471
|
+
elif isinstance(match_clause, type(item)):
|
|
472
|
+
match = re.match(str(match_clause), str(item))
|
|
473
|
+
if match:
|
|
474
|
+
matching_items.append(str(item))
|
|
475
|
+
|
|
476
|
+
if matching_items:
|
|
477
|
+
# Create a synthetic match object with all matching items joined
|
|
478
|
+
combined_value = ",".join(matching_items)
|
|
479
|
+
return re.match(
|
|
480
|
+
".*", combined_value
|
|
481
|
+
) # Always matches, returns combined value
|
|
482
|
+
return None
|
|
483
|
+
|
|
484
|
+
# Handle scalar values (existing logic)
|
|
485
|
+
elif not isinstance(raw_props_value, type(match_clause)):
|
|
459
486
|
return None
|
|
460
487
|
elif isinstance(raw_props_value, str):
|
|
461
488
|
return re.match(match_clause, raw_props_value)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|