acryl-datahub 1.0.0rc8__py3-none-any.whl → 1.0.0rc9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.0.0rc8.dist-info → acryl_datahub-1.0.0rc9.dist-info}/METADATA +2445 -2445
- {acryl_datahub-1.0.0rc8.dist-info → acryl_datahub-1.0.0rc9.dist-info}/RECORD +46 -42
- datahub/_version.py +1 -1
- datahub/api/entities/dataset/dataset.py +731 -42
- datahub/api/entities/structuredproperties/structuredproperties.py +2 -2
- datahub/cli/specific/dataset_cli.py +128 -14
- datahub/ingestion/graph/client.py +15 -11
- datahub/ingestion/graph/filters.py +64 -37
- datahub/ingestion/source/cassandra/cassandra.py +1 -1
- datahub/ingestion/source/preset.py +7 -4
- datahub/ingestion/source/superset.py +158 -24
- datahub/metadata/_schema_classes.py +157 -14
- datahub/metadata/_urns/urn_defs.py +58 -58
- datahub/metadata/schema.avsc +23 -10
- datahub/metadata/schemas/CorpGroupKey.avsc +2 -1
- datahub/metadata/schemas/CorpUserKey.avsc +2 -1
- datahub/metadata/schemas/DataProcessKey.avsc +2 -1
- datahub/metadata/schemas/DataProductKey.avsc +2 -1
- datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
- datahub/metadata/schemas/GlossaryTermKey.avsc +2 -1
- datahub/metadata/schemas/MLFeatureKey.avsc +2 -1
- datahub/metadata/schemas/MLFeatureTableKey.avsc +2 -1
- datahub/metadata/schemas/MLModelGroupKey.avsc +2 -1
- datahub/metadata/schemas/MLModelKey.avsc +2 -1
- datahub/metadata/schemas/MLPrimaryKeyKey.avsc +2 -1
- datahub/metadata/schemas/PostKey.avsc +2 -1
- datahub/metadata/schemas/SchemaFieldKey.avsc +2 -1
- datahub/metadata/schemas/VersionProperties.avsc +18 -0
- datahub/metadata/schemas/VersionSetProperties.avsc +5 -0
- datahub/pydantic/__init__.py +0 -0
- datahub/pydantic/compat.py +58 -0
- datahub/sdk/__init__.py +1 -0
- datahub/sdk/_all_entities.py +1 -1
- datahub/sdk/_shared.py +88 -3
- datahub/sdk/container.py +7 -1
- datahub/sdk/dataset.py +7 -1
- datahub/sdk/{_entity.py → entity.py} +4 -0
- datahub/sdk/entity_client.py +1 -1
- datahub/sdk/main_client.py +7 -1
- datahub/sdk/resolver_client.py +17 -29
- datahub/sdk/search_client.py +50 -0
- datahub/sdk/search_filters.py +374 -0
- {acryl_datahub-1.0.0rc8.dist-info → acryl_datahub-1.0.0rc9.dist-info}/LICENSE +0 -0
- {acryl_datahub-1.0.0rc8.dist-info → acryl_datahub-1.0.0rc9.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.0.0rc8.dist-info → acryl_datahub-1.0.0rc9.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.0.0rc8.dist-info → acryl_datahub-1.0.0rc9.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,374 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import abc
|
|
4
|
+
from typing import (
|
|
5
|
+
Any,
|
|
6
|
+
List,
|
|
7
|
+
Sequence,
|
|
8
|
+
TypedDict,
|
|
9
|
+
Union,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
import pydantic
|
|
13
|
+
|
|
14
|
+
from datahub.configuration.common import ConfigModel
|
|
15
|
+
from datahub.configuration.pydantic_migration_helpers import PYDANTIC_VERSION_2
|
|
16
|
+
from datahub.ingestion.graph.client import entity_type_to_graphql
|
|
17
|
+
from datahub.ingestion.graph.filters import SearchFilterRule
|
|
18
|
+
from datahub.metadata.schema_classes import EntityTypeName
|
|
19
|
+
from datahub.metadata.urns import DataPlatformUrn, DomainUrn
|
|
20
|
+
|
|
21
|
+
_AndSearchFilterRule = TypedDict(
|
|
22
|
+
"_AndSearchFilterRule", {"and": List[SearchFilterRule]}
|
|
23
|
+
)
|
|
24
|
+
_OrFilters = List[_AndSearchFilterRule]
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class _BaseFilter(ConfigModel):
|
|
28
|
+
class Config:
|
|
29
|
+
# We can't wrap this in a TYPE_CHECKING block because the pydantic plugin
|
|
30
|
+
# doesn't recognize it properly. So unfortunately we'll need to live
|
|
31
|
+
# with the deprecation warning w/ pydantic v2.
|
|
32
|
+
allow_population_by_field_name = True
|
|
33
|
+
if PYDANTIC_VERSION_2:
|
|
34
|
+
populate_by_name = True
|
|
35
|
+
|
|
36
|
+
@abc.abstractmethod
|
|
37
|
+
def compile(self) -> _OrFilters:
|
|
38
|
+
pass
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _flexible_entity_type_to_graphql(entity_type: str) -> str:
|
|
42
|
+
if entity_type.upper() == entity_type:
|
|
43
|
+
# Assume that we were passed a graphql EntityType enum value,
|
|
44
|
+
# so no conversion is needed.
|
|
45
|
+
return entity_type
|
|
46
|
+
return entity_type_to_graphql(entity_type)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class _EntityTypeFilter(_BaseFilter):
|
|
50
|
+
entity_type: List[str] = pydantic.Field(
|
|
51
|
+
description="The entity type to filter on. Can be 'dataset', 'chart', 'dashboard', 'corpuser', etc.",
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
def _build_rule(self) -> SearchFilterRule:
|
|
55
|
+
return SearchFilterRule(
|
|
56
|
+
field="_entityType",
|
|
57
|
+
condition="EQUAL",
|
|
58
|
+
values=[_flexible_entity_type_to_graphql(t) for t in self.entity_type],
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
def compile(self) -> _OrFilters:
|
|
62
|
+
return [{"and": [self._build_rule()]}]
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class _EntitySubtypeFilter(_BaseFilter):
|
|
66
|
+
entity_type: str
|
|
67
|
+
entity_subtype: str = pydantic.Field(
|
|
68
|
+
description="The entity subtype to filter on. Can be 'Table', 'View', 'Source', etc. depending on the native platform's concepts.",
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
def compile(self) -> _OrFilters:
|
|
72
|
+
rules = [
|
|
73
|
+
SearchFilterRule(
|
|
74
|
+
field="_entityType",
|
|
75
|
+
condition="EQUAL",
|
|
76
|
+
values=[_flexible_entity_type_to_graphql(self.entity_type)],
|
|
77
|
+
),
|
|
78
|
+
SearchFilterRule(
|
|
79
|
+
field="typeNames",
|
|
80
|
+
condition="EQUAL",
|
|
81
|
+
values=[self.entity_subtype],
|
|
82
|
+
),
|
|
83
|
+
]
|
|
84
|
+
return [{"and": rules}]
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
class _PlatformFilter(_BaseFilter):
|
|
88
|
+
platform: List[str]
|
|
89
|
+
# TODO: Add validator to convert string -> list of strings
|
|
90
|
+
|
|
91
|
+
@pydantic.validator("platform", each_item=True)
|
|
92
|
+
def validate_platform(cls, v: str) -> str:
|
|
93
|
+
# Subtle - we use the constructor instead of the from_string method
|
|
94
|
+
# because coercion is acceptable here.
|
|
95
|
+
return str(DataPlatformUrn(v))
|
|
96
|
+
|
|
97
|
+
def _build_rule(self) -> SearchFilterRule:
|
|
98
|
+
return SearchFilterRule(
|
|
99
|
+
field="platform.keyword",
|
|
100
|
+
condition="EQUAL",
|
|
101
|
+
values=self.platform,
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
def compile(self) -> _OrFilters:
|
|
105
|
+
return [{"and": [self._build_rule()]}]
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
class _DomainFilter(_BaseFilter):
|
|
109
|
+
domain: List[str]
|
|
110
|
+
|
|
111
|
+
@pydantic.validator("domain", each_item=True)
|
|
112
|
+
def validate_domain(cls, v: str) -> str:
|
|
113
|
+
return str(DomainUrn.from_string(v))
|
|
114
|
+
|
|
115
|
+
def _build_rule(self) -> SearchFilterRule:
|
|
116
|
+
return SearchFilterRule(
|
|
117
|
+
field="domains",
|
|
118
|
+
condition="EQUAL",
|
|
119
|
+
values=self.domain,
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
def compile(self) -> _OrFilters:
|
|
123
|
+
return [{"and": [self._build_rule()]}]
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
class _EnvFilter(_BaseFilter):
|
|
127
|
+
# Note that not all entity types have an env (e.g. dashboards / charts).
|
|
128
|
+
# If the env filter is specified, these will be excluded.
|
|
129
|
+
env: List[str]
|
|
130
|
+
|
|
131
|
+
def compile(self) -> _OrFilters:
|
|
132
|
+
return [
|
|
133
|
+
# For most entity types, we look at the origin field.
|
|
134
|
+
{
|
|
135
|
+
"and": [
|
|
136
|
+
SearchFilterRule(
|
|
137
|
+
field="origin",
|
|
138
|
+
condition="EQUAL",
|
|
139
|
+
values=self.env,
|
|
140
|
+
),
|
|
141
|
+
]
|
|
142
|
+
},
|
|
143
|
+
# For containers, we now have an "env" property as of
|
|
144
|
+
# https://github.com/datahub-project/datahub/pull/11214
|
|
145
|
+
# Prior to this, we put "env" in the customProperties. But we're
|
|
146
|
+
# not bothering with that here.
|
|
147
|
+
{
|
|
148
|
+
"and": [
|
|
149
|
+
SearchFilterRule(
|
|
150
|
+
field="env",
|
|
151
|
+
condition="EQUAL",
|
|
152
|
+
values=self.env,
|
|
153
|
+
),
|
|
154
|
+
]
|
|
155
|
+
},
|
|
156
|
+
]
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
class _CustomCondition(_BaseFilter):
|
|
160
|
+
"""Represents a single field condition"""
|
|
161
|
+
|
|
162
|
+
field: str
|
|
163
|
+
condition: str
|
|
164
|
+
values: List[str]
|
|
165
|
+
|
|
166
|
+
def compile(self) -> _OrFilters:
|
|
167
|
+
rule = SearchFilterRule(
|
|
168
|
+
field=self.field,
|
|
169
|
+
condition=self.condition,
|
|
170
|
+
values=self.values,
|
|
171
|
+
)
|
|
172
|
+
return [{"and": [rule]}]
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
class _And(_BaseFilter):
|
|
176
|
+
"""Represents an AND conjunction of filters"""
|
|
177
|
+
|
|
178
|
+
and_: Sequence["Filter"] = pydantic.Field(alias="and")
|
|
179
|
+
# TODO: Add validator to ensure that the "and" field is not empty
|
|
180
|
+
|
|
181
|
+
def compile(self) -> _OrFilters:
|
|
182
|
+
# The "and" operator must be implemented by doing a Cartesian product
|
|
183
|
+
# of the OR clauses.
|
|
184
|
+
# Example 1:
|
|
185
|
+
# (A or B) and (C or D) ->
|
|
186
|
+
# (A and C) or (A and D) or (B and C) or (B and D)
|
|
187
|
+
# Example 2:
|
|
188
|
+
# (A or B) and (C or D) and (E or F) ->
|
|
189
|
+
# (A and C and E) or (A and C and F) or (A and D and E) or (A and D and F) or
|
|
190
|
+
# (B and C and E) or (B and C and F) or (B and D and E) or (B and D and F)
|
|
191
|
+
|
|
192
|
+
# Start with the first filter's OR clauses
|
|
193
|
+
result = self.and_[0].compile()
|
|
194
|
+
|
|
195
|
+
# For each subsequent filter
|
|
196
|
+
for filter in self.and_[1:]:
|
|
197
|
+
new_result = []
|
|
198
|
+
# Get its OR clauses
|
|
199
|
+
other_clauses = filter.compile()
|
|
200
|
+
|
|
201
|
+
# Create Cartesian product
|
|
202
|
+
for existing_clause in result:
|
|
203
|
+
for other_clause in other_clauses:
|
|
204
|
+
# Merge the AND conditions from both clauses
|
|
205
|
+
new_result.append(self._merge_ands(existing_clause, other_clause))
|
|
206
|
+
|
|
207
|
+
result = new_result
|
|
208
|
+
|
|
209
|
+
return result
|
|
210
|
+
|
|
211
|
+
@classmethod
|
|
212
|
+
def _merge_ands(
|
|
213
|
+
cls, a: _AndSearchFilterRule, b: _AndSearchFilterRule
|
|
214
|
+
) -> _AndSearchFilterRule:
|
|
215
|
+
return {
|
|
216
|
+
"and": [
|
|
217
|
+
*a["and"],
|
|
218
|
+
*b["and"],
|
|
219
|
+
]
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
class _Or(_BaseFilter):
|
|
224
|
+
"""Represents an OR conjunction of filters"""
|
|
225
|
+
|
|
226
|
+
or_: Sequence["Filter"] = pydantic.Field(alias="or")
|
|
227
|
+
# TODO: Add validator to ensure that the "or" field is not empty
|
|
228
|
+
|
|
229
|
+
def compile(self) -> _OrFilters:
|
|
230
|
+
merged_filter = []
|
|
231
|
+
for filter in self.or_:
|
|
232
|
+
merged_filter.extend(filter.compile())
|
|
233
|
+
return merged_filter
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
class _Not(_BaseFilter):
|
|
237
|
+
"""Represents a NOT filter"""
|
|
238
|
+
|
|
239
|
+
not_: "Filter" = pydantic.Field(alias="not")
|
|
240
|
+
|
|
241
|
+
@pydantic.validator("not_", pre=False)
|
|
242
|
+
def validate_not(cls, v: "Filter") -> "Filter":
|
|
243
|
+
inner_filter = v.compile()
|
|
244
|
+
if len(inner_filter) != 1:
|
|
245
|
+
raise ValueError(
|
|
246
|
+
"Cannot negate a filter with multiple OR clauses [not yet supported]"
|
|
247
|
+
)
|
|
248
|
+
return v
|
|
249
|
+
|
|
250
|
+
def compile(self) -> _OrFilters:
|
|
251
|
+
# TODO: Eventually we'll want to implement a full DNF normalizer.
|
|
252
|
+
# https://en.wikipedia.org/wiki/Disjunctive_normal_form#Conversion_to_DNF
|
|
253
|
+
|
|
254
|
+
inner_filter = self.not_.compile()
|
|
255
|
+
assert len(inner_filter) == 1 # validated above
|
|
256
|
+
|
|
257
|
+
# ¬(A and B) -> (¬A) OR (¬B)
|
|
258
|
+
and_filters = inner_filter[0]["and"]
|
|
259
|
+
final_filters: _OrFilters = []
|
|
260
|
+
for rule in and_filters:
|
|
261
|
+
final_filters.append({"and": [rule.negate()]})
|
|
262
|
+
|
|
263
|
+
return final_filters
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
# TODO: With pydantic 2, we can use a RootModel with a
|
|
267
|
+
# discriminated union to make the error messages more informative.
|
|
268
|
+
Filter = Union[
|
|
269
|
+
_And,
|
|
270
|
+
_Or,
|
|
271
|
+
_Not,
|
|
272
|
+
_EntityTypeFilter,
|
|
273
|
+
_EntitySubtypeFilter,
|
|
274
|
+
_PlatformFilter,
|
|
275
|
+
_DomainFilter,
|
|
276
|
+
_EnvFilter,
|
|
277
|
+
_CustomCondition,
|
|
278
|
+
]
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
# Required to resolve forward references to "Filter"
|
|
282
|
+
if PYDANTIC_VERSION_2:
|
|
283
|
+
_And.model_rebuild() # type: ignore
|
|
284
|
+
_Or.model_rebuild() # type: ignore
|
|
285
|
+
_Not.model_rebuild() # type: ignore
|
|
286
|
+
else:
|
|
287
|
+
_And.update_forward_refs()
|
|
288
|
+
_Or.update_forward_refs()
|
|
289
|
+
_Not.update_forward_refs()
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
def load_filters(obj: Any) -> Filter:
|
|
293
|
+
if PYDANTIC_VERSION_2:
|
|
294
|
+
return pydantic.TypeAdapter(Filter).validate_python(obj) # type: ignore
|
|
295
|
+
else:
|
|
296
|
+
return pydantic.parse_obj_as(Filter, obj) # type: ignore
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
# We need FilterDsl for two reasons:
|
|
300
|
+
# 1. To provide wrapper methods around lots of filters while avoid bloating the
|
|
301
|
+
# yaml spec.
|
|
302
|
+
# 2. Pydantic models in general don't support positional arguments, making the
|
|
303
|
+
# calls feel repetitive (e.g. Platform(platform=...)).
|
|
304
|
+
# See https://github.com/pydantic/pydantic/issues/6792
|
|
305
|
+
# We also considered using dataclasses / pydantic dataclasses, but
|
|
306
|
+
# ultimately decided that they didn't quite suit our requirements,
|
|
307
|
+
# particularly with regards to the field aliases for and/or/not.
|
|
308
|
+
class FilterDsl:
|
|
309
|
+
@staticmethod
|
|
310
|
+
def and_(*args: "Filter") -> _And:
|
|
311
|
+
return _And(and_=list(args))
|
|
312
|
+
|
|
313
|
+
@staticmethod
|
|
314
|
+
def or_(*args: "Filter") -> _Or:
|
|
315
|
+
return _Or(or_=list(args))
|
|
316
|
+
|
|
317
|
+
@staticmethod
|
|
318
|
+
def not_(arg: "Filter") -> _Not:
|
|
319
|
+
return _Not(not_=arg)
|
|
320
|
+
|
|
321
|
+
@staticmethod
|
|
322
|
+
def entity_type(
|
|
323
|
+
entity_type: Union[EntityTypeName, Sequence[EntityTypeName]],
|
|
324
|
+
) -> _EntityTypeFilter:
|
|
325
|
+
return _EntityTypeFilter(
|
|
326
|
+
entity_type=(
|
|
327
|
+
[entity_type] if isinstance(entity_type, str) else list(entity_type)
|
|
328
|
+
)
|
|
329
|
+
)
|
|
330
|
+
|
|
331
|
+
@staticmethod
|
|
332
|
+
def entity_subtype(entity_type: str, subtype: str) -> _EntitySubtypeFilter:
|
|
333
|
+
return _EntitySubtypeFilter(
|
|
334
|
+
entity_type=entity_type,
|
|
335
|
+
entity_subtype=subtype,
|
|
336
|
+
)
|
|
337
|
+
|
|
338
|
+
@staticmethod
|
|
339
|
+
def platform(platform: Union[str, List[str]], /) -> _PlatformFilter:
|
|
340
|
+
return _PlatformFilter(
|
|
341
|
+
platform=[platform] if isinstance(platform, str) else platform
|
|
342
|
+
)
|
|
343
|
+
|
|
344
|
+
# TODO: Add a platform_instance filter
|
|
345
|
+
|
|
346
|
+
@staticmethod
|
|
347
|
+
def domain(domain: Union[str, List[str]], /) -> _DomainFilter:
|
|
348
|
+
return _DomainFilter(domain=[domain] if isinstance(domain, str) else domain)
|
|
349
|
+
|
|
350
|
+
@staticmethod
|
|
351
|
+
def env(env: Union[str, List[str]], /) -> _EnvFilter:
|
|
352
|
+
return _EnvFilter(env=[env] if isinstance(env, str) else env)
|
|
353
|
+
|
|
354
|
+
@staticmethod
|
|
355
|
+
def has_custom_property(key: str, value: str) -> _CustomCondition:
|
|
356
|
+
return _CustomCondition(
|
|
357
|
+
field="customProperties",
|
|
358
|
+
condition="EQUAL",
|
|
359
|
+
values=[f"{key}={value}"],
|
|
360
|
+
)
|
|
361
|
+
|
|
362
|
+
# TODO: Add a soft-deletion status filter
|
|
363
|
+
# TODO: add a container / browse path filter
|
|
364
|
+
# TODO add shortcut for custom filters
|
|
365
|
+
|
|
366
|
+
@staticmethod
|
|
367
|
+
def custom_filter(
|
|
368
|
+
field: str, condition: str, values: List[str]
|
|
369
|
+
) -> _CustomCondition:
|
|
370
|
+
return _CustomCondition(
|
|
371
|
+
field=field,
|
|
372
|
+
condition=condition,
|
|
373
|
+
values=values,
|
|
374
|
+
)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|