acryl-datahub 1.2.0.4rc2__py3-none-any.whl → 1.2.0.4rc4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.2.0.4rc2.dist-info → acryl_datahub-1.2.0.4rc4.dist-info}/METADATA +2265 -2264
- {acryl_datahub-1.2.0.4rc2.dist-info → acryl_datahub-1.2.0.4rc4.dist-info}/RECORD +25 -24
- datahub/_version.py +1 -1
- datahub/cli/specific/assertions_cli.py +37 -2
- datahub/cli/specific/datacontract_cli.py +54 -4
- datahub/ingestion/api/report.py +21 -2
- datahub/ingestion/source/data_lake_common/path_spec.py +5 -1
- datahub/ingestion/source/dbt/dbt_cloud.py +6 -3
- datahub/ingestion/source/fivetran/fivetran_log_api.py +4 -3
- datahub/ingestion/source/grafana/models.py +6 -0
- datahub/ingestion/source/tableau/tableau.py +1 -1
- datahub/ingestion/transformer/base_transformer.py +8 -5
- datahub/metadata/_internal_schema_classes.py +513 -513
- datahub/metadata/_urns/urn_defs.py +1684 -1684
- datahub/metadata/schema.avsc +16745 -16348
- datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
- datahub/sdk/search_client.py +3 -0
- datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
- datahub/specific/datajob.py +15 -1
- datahub/specific/dataset.py +37 -59
- datahub/utilities/server_config_util.py +2 -1
- {acryl_datahub-1.2.0.4rc2.dist-info → acryl_datahub-1.2.0.4rc4.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.2.0.4rc2.dist-info → acryl_datahub-1.2.0.4rc4.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.2.0.4rc2.dist-info → acryl_datahub-1.2.0.4rc4.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.2.0.4rc2.dist-info → acryl_datahub-1.2.0.4rc4.dist-info}/top_level.txt +0 -0
|
@@ -130,6 +130,10 @@
|
|
|
130
130
|
"doc": "The optional messageId, if provided serves as a custom user-defined unique identifier for an aspect value."
|
|
131
131
|
},
|
|
132
132
|
{
|
|
133
|
+
"Searchable": {
|
|
134
|
+
"fieldType": "COUNT",
|
|
135
|
+
"hasValuesFieldName": "hasUniqueUserCount"
|
|
136
|
+
},
|
|
133
137
|
"TimeseriesField": {},
|
|
134
138
|
"type": [
|
|
135
139
|
"null",
|
|
@@ -140,6 +144,10 @@
|
|
|
140
144
|
"doc": "Unique user count"
|
|
141
145
|
},
|
|
142
146
|
{
|
|
147
|
+
"Searchable": {
|
|
148
|
+
"fieldType": "COUNT",
|
|
149
|
+
"hasValuesFieldName": "hasTotalSqlQueriesCount"
|
|
150
|
+
},
|
|
143
151
|
"TimeseriesField": {},
|
|
144
152
|
"type": [
|
|
145
153
|
"null",
|
datahub/sdk/search_client.py
CHANGED
|
@@ -112,6 +112,8 @@ class SearchClient:
|
|
|
112
112
|
self,
|
|
113
113
|
query: Optional[str] = None,
|
|
114
114
|
filter: Optional[Filter] = None,
|
|
115
|
+
*,
|
|
116
|
+
skip_cache: bool = False,
|
|
115
117
|
) -> Iterable[Urn]:
|
|
116
118
|
# TODO: Add better limit / pagination support.
|
|
117
119
|
types, compiled_filters = compile_filters(filter)
|
|
@@ -120,5 +122,6 @@ class SearchClient:
|
|
|
120
122
|
status=None,
|
|
121
123
|
extra_or_filters=compiled_filters,
|
|
122
124
|
entity_types=types,
|
|
125
|
+
skip_cache=skip_cache,
|
|
123
126
|
):
|
|
124
127
|
yield Urn.from_string(urn)
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
from abc import abstractmethod
|
|
2
|
+
from typing import List, Tuple
|
|
3
|
+
|
|
4
|
+
from typing_extensions import Self
|
|
5
|
+
|
|
6
|
+
from datahub.emitter.mcp_patch_builder import MetadataPatchProposal, PatchPath
|
|
7
|
+
from datahub.metadata.schema_classes import (
|
|
8
|
+
FineGrainedLineageClass as FineGrainedLineage,
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class HasFineGrainedLineagePatch(MetadataPatchProposal):
|
|
13
|
+
@abstractmethod
|
|
14
|
+
def _fine_grained_lineage_location(self) -> Tuple[str, PatchPath]:
|
|
15
|
+
"""Return the aspect name where fine-grained lineage is stored."""
|
|
16
|
+
raise NotImplementedError("Subclasses must implement this method.")
|
|
17
|
+
|
|
18
|
+
@staticmethod
|
|
19
|
+
def _get_fine_grained_key(
|
|
20
|
+
fine_grained_lineage: FineGrainedLineage,
|
|
21
|
+
) -> Tuple[str, str, str]:
|
|
22
|
+
downstreams = fine_grained_lineage.downstreams or []
|
|
23
|
+
if len(downstreams) != 1:
|
|
24
|
+
raise TypeError("Cannot patch with more or less than one downstream.")
|
|
25
|
+
transform_op = fine_grained_lineage.transformOperation or "NONE"
|
|
26
|
+
downstream_urn = downstreams[0]
|
|
27
|
+
query_id = fine_grained_lineage.query or "NONE"
|
|
28
|
+
return transform_op, downstream_urn, query_id
|
|
29
|
+
|
|
30
|
+
def add_fine_grained_lineage(
|
|
31
|
+
self, fine_grained_lineage: FineGrainedLineage
|
|
32
|
+
) -> Self:
|
|
33
|
+
aspect_name, path = self._fine_grained_lineage_location()
|
|
34
|
+
(
|
|
35
|
+
transform_op,
|
|
36
|
+
downstream_urn,
|
|
37
|
+
query_id,
|
|
38
|
+
) = self._get_fine_grained_key(fine_grained_lineage)
|
|
39
|
+
for upstream_urn in fine_grained_lineage.upstreams or []:
|
|
40
|
+
self._add_patch(
|
|
41
|
+
aspect_name,
|
|
42
|
+
"add",
|
|
43
|
+
path=(*path, transform_op, downstream_urn, query_id, upstream_urn),
|
|
44
|
+
value={"confidenceScore": fine_grained_lineage.confidenceScore},
|
|
45
|
+
)
|
|
46
|
+
return self
|
|
47
|
+
|
|
48
|
+
def remove_fine_grained_lineage(
|
|
49
|
+
self, fine_grained_lineage: FineGrainedLineage
|
|
50
|
+
) -> Self:
|
|
51
|
+
aspect_name, path = self._fine_grained_lineage_location()
|
|
52
|
+
(
|
|
53
|
+
transform_op,
|
|
54
|
+
downstream_urn,
|
|
55
|
+
query_id,
|
|
56
|
+
) = self._get_fine_grained_key(fine_grained_lineage)
|
|
57
|
+
for upstream_urn in fine_grained_lineage.upstreams or []:
|
|
58
|
+
self._add_patch(
|
|
59
|
+
aspect_name,
|
|
60
|
+
"remove",
|
|
61
|
+
path=(*path, transform_op, downstream_urn, query_id, upstream_urn),
|
|
62
|
+
value={},
|
|
63
|
+
)
|
|
64
|
+
return self
|
|
65
|
+
|
|
66
|
+
def set_fine_grained_lineages(
|
|
67
|
+
self, fine_grained_lineages: List[FineGrainedLineage]
|
|
68
|
+
) -> Self:
|
|
69
|
+
aspect_name, path = self._fine_grained_lineage_location()
|
|
70
|
+
self._add_patch(
|
|
71
|
+
aspect_name,
|
|
72
|
+
"add",
|
|
73
|
+
path=path,
|
|
74
|
+
value=fine_grained_lineages,
|
|
75
|
+
)
|
|
76
|
+
return self
|
datahub/specific/datajob.py
CHANGED
|
@@ -1,15 +1,19 @@
|
|
|
1
|
-
from typing import List, Optional, Tuple, Union
|
|
1
|
+
from typing import List, Optional, Set, Tuple, Union
|
|
2
2
|
|
|
3
3
|
from datahub.emitter.mcp_patch_builder import MetadataPatchProposal, PatchPath
|
|
4
4
|
from datahub.metadata.schema_classes import (
|
|
5
5
|
DataJobInfoClass as DataJobInfo,
|
|
6
6
|
DataJobInputOutputClass as DataJobInputOutput,
|
|
7
7
|
EdgeClass as Edge,
|
|
8
|
+
FineGrainedLineageClass as FineGrainedLineage,
|
|
8
9
|
KafkaAuditHeaderClass,
|
|
9
10
|
SystemMetadataClass,
|
|
10
11
|
)
|
|
11
12
|
from datahub.metadata.urns import SchemaFieldUrn, Urn
|
|
12
13
|
from datahub.specific.aspect_helpers.custom_properties import HasCustomPropertiesPatch
|
|
14
|
+
from datahub.specific.aspect_helpers.fine_grained_lineage import (
|
|
15
|
+
HasFineGrainedLineagePatch,
|
|
16
|
+
)
|
|
13
17
|
from datahub.specific.aspect_helpers.ownership import HasOwnershipPatch
|
|
14
18
|
from datahub.specific.aspect_helpers.tags import HasTagsPatch
|
|
15
19
|
from datahub.specific.aspect_helpers.terms import HasTermsPatch
|
|
@@ -20,6 +24,7 @@ class DataJobPatchBuilder(
|
|
|
20
24
|
HasCustomPropertiesPatch,
|
|
21
25
|
HasTagsPatch,
|
|
22
26
|
HasTermsPatch,
|
|
27
|
+
HasFineGrainedLineagePatch,
|
|
23
28
|
MetadataPatchProposal,
|
|
24
29
|
):
|
|
25
30
|
def __init__(
|
|
@@ -40,10 +45,19 @@ class DataJobPatchBuilder(
|
|
|
40
45
|
urn, system_metadata=system_metadata, audit_header=audit_header
|
|
41
46
|
)
|
|
42
47
|
|
|
48
|
+
# Track fine-grained lineages for DataJob-specific handling
|
|
49
|
+
self._fine_grained_lineages_to_add: List[FineGrainedLineage] = []
|
|
50
|
+
self._fine_grained_lineage_keys_to_remove: Set[Tuple[str, str, str]] = set()
|
|
51
|
+
self._fine_grained_lineages_set: Optional[List[FineGrainedLineage]] = None
|
|
52
|
+
|
|
43
53
|
@classmethod
|
|
44
54
|
def _custom_properties_location(cls) -> Tuple[str, PatchPath]:
|
|
45
55
|
return DataJobInfo.ASPECT_NAME, ("customProperties",)
|
|
46
56
|
|
|
57
|
+
@classmethod
|
|
58
|
+
def _fine_grained_lineage_location(cls) -> Tuple[str, PatchPath]:
|
|
59
|
+
return DataJobInputOutput.ASPECT_NAME, ("fineGrainedLineages",)
|
|
60
|
+
|
|
47
61
|
def add_input_datajob(self, input: Union[Edge, Urn, str]) -> "DataJobPatchBuilder":
|
|
48
62
|
"""
|
|
49
63
|
Adds an input data job to the DataJobPatchBuilder.
|
datahub/specific/dataset.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import warnings
|
|
1
2
|
from typing import Generic, List, Optional, Tuple, TypeVar, Union
|
|
2
3
|
|
|
3
4
|
from datahub.emitter.mcp_patch_builder import MetadataPatchProposal, PatchPath
|
|
@@ -17,6 +18,9 @@ from datahub.metadata.schema_classes import (
|
|
|
17
18
|
)
|
|
18
19
|
from datahub.metadata.urns import DatasetUrn, TagUrn, Urn
|
|
19
20
|
from datahub.specific.aspect_helpers.custom_properties import HasCustomPropertiesPatch
|
|
21
|
+
from datahub.specific.aspect_helpers.fine_grained_lineage import (
|
|
22
|
+
HasFineGrainedLineagePatch,
|
|
23
|
+
)
|
|
20
24
|
from datahub.specific.aspect_helpers.ownership import HasOwnershipPatch
|
|
21
25
|
from datahub.specific.aspect_helpers.structured_properties import (
|
|
22
26
|
HasStructuredPropertiesPatch,
|
|
@@ -99,6 +103,7 @@ class DatasetPatchBuilder(
|
|
|
99
103
|
HasStructuredPropertiesPatch,
|
|
100
104
|
HasTagsPatch,
|
|
101
105
|
HasTermsPatch,
|
|
106
|
+
HasFineGrainedLineagePatch,
|
|
102
107
|
MetadataPatchProposal,
|
|
103
108
|
):
|
|
104
109
|
def __init__(
|
|
@@ -115,6 +120,10 @@ class DatasetPatchBuilder(
|
|
|
115
120
|
def _custom_properties_location(cls) -> Tuple[str, PatchPath]:
|
|
116
121
|
return DatasetProperties.ASPECT_NAME, ("customProperties",)
|
|
117
122
|
|
|
123
|
+
@classmethod
|
|
124
|
+
def _fine_grained_lineage_location(cls) -> Tuple[str, PatchPath]:
|
|
125
|
+
return UpstreamLineage.ASPECT_NAME, ("fineGrainedLineages",)
|
|
126
|
+
|
|
118
127
|
def add_upstream_lineage(self, upstream: Upstream) -> "DatasetPatchBuilder":
|
|
119
128
|
self._add_patch(
|
|
120
129
|
UpstreamLineage.ASPECT_NAME,
|
|
@@ -144,75 +153,44 @@ class DatasetPatchBuilder(
|
|
|
144
153
|
def add_fine_grained_upstream_lineage(
|
|
145
154
|
self, fine_grained_lineage: FineGrainedLineage
|
|
146
155
|
) -> "DatasetPatchBuilder":
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
"add",
|
|
156
|
-
path=self._build_fine_grained_path(
|
|
157
|
-
transform_op, downstream_urn, query_id, upstream_urn
|
|
158
|
-
),
|
|
159
|
-
value={"confidenceScore": fine_grained_lineage.confidenceScore},
|
|
160
|
-
)
|
|
161
|
-
return self
|
|
162
|
-
|
|
163
|
-
@staticmethod
|
|
164
|
-
def get_fine_grained_key(
|
|
165
|
-
fine_grained_lineage: FineGrainedLineage,
|
|
166
|
-
) -> Tuple[str, str, str]:
|
|
167
|
-
downstreams = fine_grained_lineage.downstreams or []
|
|
168
|
-
if len(downstreams) != 1:
|
|
169
|
-
raise TypeError("Cannot patch with more or less than one downstream.")
|
|
170
|
-
transform_op = fine_grained_lineage.transformOperation or "NONE"
|
|
171
|
-
downstream_urn = downstreams[0]
|
|
172
|
-
query_id = fine_grained_lineage.query or "NONE"
|
|
173
|
-
return transform_op, downstream_urn, query_id
|
|
174
|
-
|
|
175
|
-
@classmethod
|
|
176
|
-
def _build_fine_grained_path(
|
|
177
|
-
cls, transform_op: str, downstream_urn: str, query_id: str, upstream_urn: str
|
|
178
|
-
) -> PatchPath:
|
|
179
|
-
return (
|
|
180
|
-
"fineGrainedLineages",
|
|
181
|
-
transform_op,
|
|
182
|
-
downstream_urn,
|
|
183
|
-
query_id,
|
|
184
|
-
upstream_urn,
|
|
156
|
+
"""
|
|
157
|
+
Deprecated: Use `add_fine_grained_lineage` instead.
|
|
158
|
+
"""
|
|
159
|
+
warnings.warn(
|
|
160
|
+
"add_fine_grained_upstream_lineage() is deprecated."
|
|
161
|
+
" Use add_fine_grained_lineage() instead.",
|
|
162
|
+
DeprecationWarning,
|
|
163
|
+
stacklevel=2,
|
|
185
164
|
)
|
|
165
|
+
return self.add_fine_grained_lineage(fine_grained_lineage)
|
|
186
166
|
|
|
187
167
|
def remove_fine_grained_upstream_lineage(
|
|
188
168
|
self, fine_grained_lineage: FineGrainedLineage
|
|
189
169
|
) -> "DatasetPatchBuilder":
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
transform_op, downstream_urn, query_id, upstream_urn
|
|
201
|
-
),
|
|
202
|
-
value={},
|
|
203
|
-
)
|
|
204
|
-
return self
|
|
170
|
+
"""
|
|
171
|
+
Deprecated: Use `remove_fine_grained_lineage` instead.
|
|
172
|
+
"""
|
|
173
|
+
warnings.warn(
|
|
174
|
+
"remove_fine_grained_upstream_lineage() is deprecated."
|
|
175
|
+
" Use remove_fine_grained_lineage() instead.",
|
|
176
|
+
DeprecationWarning,
|
|
177
|
+
stacklevel=2,
|
|
178
|
+
)
|
|
179
|
+
return self.remove_fine_grained_lineage(fine_grained_lineage)
|
|
205
180
|
|
|
206
181
|
def set_fine_grained_upstream_lineages(
|
|
207
182
|
self, fine_grained_lineages: List[FineGrainedLineage]
|
|
208
183
|
) -> "DatasetPatchBuilder":
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
184
|
+
"""
|
|
185
|
+
Deprecated: Use `set_fine_grained_lineages` instead.
|
|
186
|
+
"""
|
|
187
|
+
warnings.warn(
|
|
188
|
+
"set_fine_grained_upstream_lineages() is deprecated."
|
|
189
|
+
" Use set_fine_grained_lineages() instead.",
|
|
190
|
+
DeprecationWarning,
|
|
191
|
+
stacklevel=2,
|
|
214
192
|
)
|
|
215
|
-
return self
|
|
193
|
+
return self.set_fine_grained_lineages(fine_grained_lineages)
|
|
216
194
|
|
|
217
195
|
def for_field(
|
|
218
196
|
self, field_path: str, editable: bool = True
|
|
@@ -242,7 +242,8 @@ class RestServiceConfig:
|
|
|
242
242
|
|
|
243
243
|
# Check if this is a config-based feature
|
|
244
244
|
if feature in config_based_features:
|
|
245
|
-
|
|
245
|
+
result = config_based_features[feature]()
|
|
246
|
+
return bool(result) if result is not None else False
|
|
246
247
|
|
|
247
248
|
# For environment-based features, determine requirements based on cloud vs. non-cloud
|
|
248
249
|
deployment_type = "cloud" if self.is_datahub_cloud else "core"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|