acryl-datahub 1.0.0.1rc5__py3-none-any.whl → 1.0.0.1rc7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.0.0.1rc5.dist-info → acryl_datahub-1.0.0.1rc7.dist-info}/METADATA +2451 -2451
- {acryl_datahub-1.0.0.1rc5.dist-info → acryl_datahub-1.0.0.1rc7.dist-info}/RECORD +32 -32
- datahub/_version.py +1 -1
- datahub/cli/specific/dataset_cli.py +26 -10
- datahub/emitter/mcp_builder.py +8 -0
- datahub/emitter/rest_emitter.py +13 -5
- datahub/errors.py +4 -0
- datahub/ingestion/api/source.py +2 -1
- datahub/ingestion/api/source_helpers.py +9 -1
- datahub/ingestion/graph/client.py +20 -9
- datahub/ingestion/graph/filters.py +41 -16
- datahub/ingestion/sink/datahub_rest.py +2 -2
- datahub/ingestion/source/cassandra/cassandra.py +1 -10
- datahub/ingestion/source/common/subtypes.py +1 -0
- datahub/ingestion/source/iceberg/iceberg.py +159 -102
- datahub/ingestion/source/iceberg/iceberg_profiler.py +21 -18
- datahub/ingestion/source/powerbi/config.py +31 -4
- datahub/ingestion/source/powerbi/m_query/data_classes.py +1 -0
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py +111 -10
- datahub/ingestion/source/powerbi/m_query/resolver.py +10 -0
- datahub/ingestion/source/powerbi/powerbi.py +12 -1
- datahub/ingestion/source/sigma/config.py +3 -4
- datahub/ingestion/source/sigma/sigma.py +10 -6
- datahub/ingestion/source/sql/oracle.py +51 -4
- datahub/ingestion/source/usage/usage_common.py +0 -65
- datahub/sdk/search_client.py +81 -8
- datahub/sdk/search_filters.py +73 -11
- datahub/utilities/threaded_iterator_executor.py +16 -3
- {acryl_datahub-1.0.0.1rc5.dist-info → acryl_datahub-1.0.0.1rc7.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.0.0.1rc5.dist-info → acryl_datahub-1.0.0.1rc7.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.0.0.1rc5.dist-info → acryl_datahub-1.0.0.1rc7.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.0.0.1rc5.dist-info → acryl_datahub-1.0.0.1rc7.dist-info}/top_level.txt +0 -0
datahub/sdk/search_filters.py
CHANGED
|
@@ -3,7 +3,10 @@ from __future__ import annotations
|
|
|
3
3
|
import abc
|
|
4
4
|
from typing import (
|
|
5
5
|
Any,
|
|
6
|
+
ClassVar,
|
|
7
|
+
Iterator,
|
|
6
8
|
List,
|
|
9
|
+
Optional,
|
|
7
10
|
Sequence,
|
|
8
11
|
TypedDict,
|
|
9
12
|
Union,
|
|
@@ -13,8 +16,13 @@ import pydantic
|
|
|
13
16
|
|
|
14
17
|
from datahub.configuration.common import ConfigModel
|
|
15
18
|
from datahub.configuration.pydantic_migration_helpers import PYDANTIC_VERSION_2
|
|
16
|
-
from datahub.ingestion.graph.client import
|
|
17
|
-
from datahub.ingestion.graph.filters import
|
|
19
|
+
from datahub.ingestion.graph.client import flexible_entity_type_to_graphql
|
|
20
|
+
from datahub.ingestion.graph.filters import (
|
|
21
|
+
FilterOperator,
|
|
22
|
+
RemovedStatusFilter,
|
|
23
|
+
SearchFilterRule,
|
|
24
|
+
_get_status_filter,
|
|
25
|
+
)
|
|
18
26
|
from datahub.metadata.schema_classes import EntityTypeName
|
|
19
27
|
from datahub.metadata.urns import DataPlatformUrn, DomainUrn
|
|
20
28
|
|
|
@@ -37,25 +45,28 @@ class _BaseFilter(ConfigModel):
|
|
|
37
45
|
def compile(self) -> _OrFilters:
|
|
38
46
|
pass
|
|
39
47
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
if entity_type.upper() == entity_type:
|
|
43
|
-
# Assume that we were passed a graphql EntityType enum value,
|
|
44
|
-
# so no conversion is needed.
|
|
45
|
-
return entity_type
|
|
46
|
-
return entity_type_to_graphql(entity_type)
|
|
48
|
+
def dfs(self) -> Iterator[_BaseFilter]:
|
|
49
|
+
yield self
|
|
47
50
|
|
|
48
51
|
|
|
49
52
|
class _EntityTypeFilter(_BaseFilter):
|
|
53
|
+
"""Filter for specific entity types.
|
|
54
|
+
|
|
55
|
+
If no entity type filter is specified, we will search all entity types in the
|
|
56
|
+
default search set, mirroring the behavior of the DataHub UI.
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
ENTITY_TYPE_FIELD: ClassVar[str] = "_entityType"
|
|
60
|
+
|
|
50
61
|
entity_type: List[str] = pydantic.Field(
|
|
51
62
|
description="The entity type to filter on. Can be 'dataset', 'chart', 'dashboard', 'corpuser', etc.",
|
|
52
63
|
)
|
|
53
64
|
|
|
54
65
|
def _build_rule(self) -> SearchFilterRule:
|
|
55
66
|
return SearchFilterRule(
|
|
56
|
-
field=
|
|
67
|
+
field=self.ENTITY_TYPE_FIELD,
|
|
57
68
|
condition="EQUAL",
|
|
58
|
-
values=[
|
|
69
|
+
values=[flexible_entity_type_to_graphql(t) for t in self.entity_type],
|
|
59
70
|
)
|
|
60
71
|
|
|
61
72
|
def compile(self) -> _OrFilters:
|
|
@@ -78,6 +89,26 @@ class _EntitySubtypeFilter(_BaseFilter):
|
|
|
78
89
|
return [{"and": [self._build_rule()]}]
|
|
79
90
|
|
|
80
91
|
|
|
92
|
+
class _StatusFilter(_BaseFilter):
|
|
93
|
+
"""Filter for the status of entities during search.
|
|
94
|
+
|
|
95
|
+
If not explicitly specified, the NOT_SOFT_DELETED status filter will be applied.
|
|
96
|
+
"""
|
|
97
|
+
|
|
98
|
+
status: RemovedStatusFilter
|
|
99
|
+
|
|
100
|
+
def _build_rule(self) -> Optional[SearchFilterRule]:
|
|
101
|
+
return _get_status_filter(self.status)
|
|
102
|
+
|
|
103
|
+
def compile(self) -> _OrFilters:
|
|
104
|
+
rule = self._build_rule()
|
|
105
|
+
if rule:
|
|
106
|
+
return [{"and": [rule]}]
|
|
107
|
+
else:
|
|
108
|
+
# Our boolean algebra logic requires something here - returning [] would cause errors.
|
|
109
|
+
return FilterDsl.true().compile()
|
|
110
|
+
|
|
111
|
+
|
|
81
112
|
class _PlatformFilter(_BaseFilter):
|
|
82
113
|
platform: List[str]
|
|
83
114
|
# TODO: Add validator to convert string -> list of strings
|
|
@@ -213,6 +244,11 @@ class _And(_BaseFilter):
|
|
|
213
244
|
]
|
|
214
245
|
}
|
|
215
246
|
|
|
247
|
+
def dfs(self) -> Iterator[_BaseFilter]:
|
|
248
|
+
yield self
|
|
249
|
+
for filter in self.and_:
|
|
250
|
+
yield from filter.dfs()
|
|
251
|
+
|
|
216
252
|
|
|
217
253
|
class _Or(_BaseFilter):
|
|
218
254
|
"""Represents an OR conjunction of filters."""
|
|
@@ -226,6 +262,11 @@ class _Or(_BaseFilter):
|
|
|
226
262
|
merged_filter.extend(filter.compile())
|
|
227
263
|
return merged_filter
|
|
228
264
|
|
|
265
|
+
def dfs(self) -> Iterator[_BaseFilter]:
|
|
266
|
+
yield self
|
|
267
|
+
for filter in self.or_:
|
|
268
|
+
yield from filter.dfs()
|
|
269
|
+
|
|
229
270
|
|
|
230
271
|
class _Not(_BaseFilter):
|
|
231
272
|
"""Represents a NOT filter."""
|
|
@@ -256,6 +297,10 @@ class _Not(_BaseFilter):
|
|
|
256
297
|
|
|
257
298
|
return final_filters
|
|
258
299
|
|
|
300
|
+
def dfs(self) -> Iterator[_BaseFilter]:
|
|
301
|
+
yield self
|
|
302
|
+
yield from self.not_.dfs()
|
|
303
|
+
|
|
259
304
|
|
|
260
305
|
# TODO: With pydantic 2, we can use a RootModel with a
|
|
261
306
|
# discriminated union to make the error messages more informative.
|
|
@@ -265,6 +310,7 @@ Filter = Union[
|
|
|
265
310
|
_Not,
|
|
266
311
|
_EntityTypeFilter,
|
|
267
312
|
_EntitySubtypeFilter,
|
|
313
|
+
_StatusFilter,
|
|
268
314
|
_PlatformFilter,
|
|
269
315
|
_DomainFilter,
|
|
270
316
|
_EnvFilter,
|
|
@@ -312,6 +358,18 @@ class FilterDsl:
|
|
|
312
358
|
def not_(arg: "Filter") -> _Not:
|
|
313
359
|
return _Not(not_=arg)
|
|
314
360
|
|
|
361
|
+
@staticmethod
|
|
362
|
+
def true() -> "Filter":
|
|
363
|
+
return _CustomCondition(
|
|
364
|
+
field="urn",
|
|
365
|
+
condition="EXISTS",
|
|
366
|
+
values=[],
|
|
367
|
+
)
|
|
368
|
+
|
|
369
|
+
@staticmethod
|
|
370
|
+
def false() -> "Filter":
|
|
371
|
+
return FilterDsl.not_(FilterDsl.true())
|
|
372
|
+
|
|
315
373
|
@staticmethod
|
|
316
374
|
def entity_type(
|
|
317
375
|
entity_type: Union[EntityTypeName, Sequence[EntityTypeName]],
|
|
@@ -354,6 +412,10 @@ class FilterDsl:
|
|
|
354
412
|
values=[f"{key}={value}"],
|
|
355
413
|
)
|
|
356
414
|
|
|
415
|
+
@staticmethod
|
|
416
|
+
def soft_deleted(status: RemovedStatusFilter) -> _StatusFilter:
|
|
417
|
+
return _StatusFilter(status=status)
|
|
418
|
+
|
|
357
419
|
# TODO: Add a soft-deletion status filter
|
|
358
420
|
# TODO: add a container / browse path filter
|
|
359
421
|
# TODO add shortcut for custom filters
|
|
@@ -1,7 +1,15 @@
|
|
|
1
1
|
import concurrent.futures
|
|
2
2
|
import contextlib
|
|
3
3
|
import queue
|
|
4
|
-
from typing import
|
|
4
|
+
from typing import (
|
|
5
|
+
Any,
|
|
6
|
+
Callable,
|
|
7
|
+
Iterable,
|
|
8
|
+
Iterator,
|
|
9
|
+
Optional,
|
|
10
|
+
Tuple,
|
|
11
|
+
TypeVar,
|
|
12
|
+
)
|
|
5
13
|
|
|
6
14
|
T = TypeVar("T")
|
|
7
15
|
|
|
@@ -18,8 +26,13 @@ class ThreadedIteratorExecutor:
|
|
|
18
26
|
worker_func: Callable[..., Iterable[T]],
|
|
19
27
|
args_list: Iterable[Tuple[Any, ...]],
|
|
20
28
|
max_workers: int,
|
|
21
|
-
|
|
22
|
-
|
|
29
|
+
max_backpressure: Optional[int] = None,
|
|
30
|
+
) -> Iterator[T]:
|
|
31
|
+
if max_backpressure is None:
|
|
32
|
+
max_backpressure = 10 * max_workers
|
|
33
|
+
assert max_backpressure >= max_workers
|
|
34
|
+
|
|
35
|
+
out_q: queue.Queue[T] = queue.Queue(maxsize=max_backpressure)
|
|
23
36
|
|
|
24
37
|
def _worker_wrapper(
|
|
25
38
|
worker_func: Callable[..., Iterable[T]], *args: Any
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|