acryl-datahub 1.0.0.1rc5__py3-none-any.whl → 1.0.0.1rc7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (32) hide show
  1. {acryl_datahub-1.0.0.1rc5.dist-info → acryl_datahub-1.0.0.1rc7.dist-info}/METADATA +2451 -2451
  2. {acryl_datahub-1.0.0.1rc5.dist-info → acryl_datahub-1.0.0.1rc7.dist-info}/RECORD +32 -32
  3. datahub/_version.py +1 -1
  4. datahub/cli/specific/dataset_cli.py +26 -10
  5. datahub/emitter/mcp_builder.py +8 -0
  6. datahub/emitter/rest_emitter.py +13 -5
  7. datahub/errors.py +4 -0
  8. datahub/ingestion/api/source.py +2 -1
  9. datahub/ingestion/api/source_helpers.py +9 -1
  10. datahub/ingestion/graph/client.py +20 -9
  11. datahub/ingestion/graph/filters.py +41 -16
  12. datahub/ingestion/sink/datahub_rest.py +2 -2
  13. datahub/ingestion/source/cassandra/cassandra.py +1 -10
  14. datahub/ingestion/source/common/subtypes.py +1 -0
  15. datahub/ingestion/source/iceberg/iceberg.py +159 -102
  16. datahub/ingestion/source/iceberg/iceberg_profiler.py +21 -18
  17. datahub/ingestion/source/powerbi/config.py +31 -4
  18. datahub/ingestion/source/powerbi/m_query/data_classes.py +1 -0
  19. datahub/ingestion/source/powerbi/m_query/pattern_handler.py +111 -10
  20. datahub/ingestion/source/powerbi/m_query/resolver.py +10 -0
  21. datahub/ingestion/source/powerbi/powerbi.py +12 -1
  22. datahub/ingestion/source/sigma/config.py +3 -4
  23. datahub/ingestion/source/sigma/sigma.py +10 -6
  24. datahub/ingestion/source/sql/oracle.py +51 -4
  25. datahub/ingestion/source/usage/usage_common.py +0 -65
  26. datahub/sdk/search_client.py +81 -8
  27. datahub/sdk/search_filters.py +73 -11
  28. datahub/utilities/threaded_iterator_executor.py +16 -3
  29. {acryl_datahub-1.0.0.1rc5.dist-info → acryl_datahub-1.0.0.1rc7.dist-info}/WHEEL +0 -0
  30. {acryl_datahub-1.0.0.1rc5.dist-info → acryl_datahub-1.0.0.1rc7.dist-info}/entry_points.txt +0 -0
  31. {acryl_datahub-1.0.0.1rc5.dist-info → acryl_datahub-1.0.0.1rc7.dist-info}/licenses/LICENSE +0 -0
  32. {acryl_datahub-1.0.0.1rc5.dist-info → acryl_datahub-1.0.0.1rc7.dist-info}/top_level.txt +0 -0
@@ -3,7 +3,10 @@ from __future__ import annotations
3
3
  import abc
4
4
  from typing import (
5
5
  Any,
6
+ ClassVar,
7
+ Iterator,
6
8
  List,
9
+ Optional,
7
10
  Sequence,
8
11
  TypedDict,
9
12
  Union,
@@ -13,8 +16,13 @@ import pydantic
13
16
 
14
17
  from datahub.configuration.common import ConfigModel
15
18
  from datahub.configuration.pydantic_migration_helpers import PYDANTIC_VERSION_2
16
- from datahub.ingestion.graph.client import entity_type_to_graphql
17
- from datahub.ingestion.graph.filters import FilterOperator, SearchFilterRule
19
+ from datahub.ingestion.graph.client import flexible_entity_type_to_graphql
20
+ from datahub.ingestion.graph.filters import (
21
+ FilterOperator,
22
+ RemovedStatusFilter,
23
+ SearchFilterRule,
24
+ _get_status_filter,
25
+ )
18
26
  from datahub.metadata.schema_classes import EntityTypeName
19
27
  from datahub.metadata.urns import DataPlatformUrn, DomainUrn
20
28
 
@@ -37,25 +45,28 @@ class _BaseFilter(ConfigModel):
37
45
  def compile(self) -> _OrFilters:
38
46
  pass
39
47
 
40
-
41
- def _flexible_entity_type_to_graphql(entity_type: str) -> str:
42
- if entity_type.upper() == entity_type:
43
- # Assume that we were passed a graphql EntityType enum value,
44
- # so no conversion is needed.
45
- return entity_type
46
- return entity_type_to_graphql(entity_type)
48
+ def dfs(self) -> Iterator[_BaseFilter]:
49
+ yield self
47
50
 
48
51
 
49
52
  class _EntityTypeFilter(_BaseFilter):
53
+ """Filter for specific entity types.
54
+
55
+ If no entity type filter is specified, we will search all entity types in the
56
+ default search set, mirroring the behavior of the DataHub UI.
57
+ """
58
+
59
+ ENTITY_TYPE_FIELD: ClassVar[str] = "_entityType"
60
+
50
61
  entity_type: List[str] = pydantic.Field(
51
62
  description="The entity type to filter on. Can be 'dataset', 'chart', 'dashboard', 'corpuser', etc.",
52
63
  )
53
64
 
54
65
  def _build_rule(self) -> SearchFilterRule:
55
66
  return SearchFilterRule(
56
- field="_entityType",
67
+ field=self.ENTITY_TYPE_FIELD,
57
68
  condition="EQUAL",
58
- values=[_flexible_entity_type_to_graphql(t) for t in self.entity_type],
69
+ values=[flexible_entity_type_to_graphql(t) for t in self.entity_type],
59
70
  )
60
71
 
61
72
  def compile(self) -> _OrFilters:
@@ -78,6 +89,26 @@ class _EntitySubtypeFilter(_BaseFilter):
78
89
  return [{"and": [self._build_rule()]}]
79
90
 
80
91
 
92
+ class _StatusFilter(_BaseFilter):
93
+ """Filter for the status of entities during search.
94
+
95
+ If not explicitly specified, the NOT_SOFT_DELETED status filter will be applied.
96
+ """
97
+
98
+ status: RemovedStatusFilter
99
+
100
+ def _build_rule(self) -> Optional[SearchFilterRule]:
101
+ return _get_status_filter(self.status)
102
+
103
+ def compile(self) -> _OrFilters:
104
+ rule = self._build_rule()
105
+ if rule:
106
+ return [{"and": [rule]}]
107
+ else:
108
+ # Our boolean algebra logic requires something here - returning [] would cause errors.
109
+ return FilterDsl.true().compile()
110
+
111
+
81
112
  class _PlatformFilter(_BaseFilter):
82
113
  platform: List[str]
83
114
  # TODO: Add validator to convert string -> list of strings
@@ -213,6 +244,11 @@ class _And(_BaseFilter):
213
244
  ]
214
245
  }
215
246
 
247
+ def dfs(self) -> Iterator[_BaseFilter]:
248
+ yield self
249
+ for filter in self.and_:
250
+ yield from filter.dfs()
251
+
216
252
 
217
253
  class _Or(_BaseFilter):
218
254
  """Represents an OR conjunction of filters."""
@@ -226,6 +262,11 @@ class _Or(_BaseFilter):
226
262
  merged_filter.extend(filter.compile())
227
263
  return merged_filter
228
264
 
265
+ def dfs(self) -> Iterator[_BaseFilter]:
266
+ yield self
267
+ for filter in self.or_:
268
+ yield from filter.dfs()
269
+
229
270
 
230
271
  class _Not(_BaseFilter):
231
272
  """Represents a NOT filter."""
@@ -256,6 +297,10 @@ class _Not(_BaseFilter):
256
297
 
257
298
  return final_filters
258
299
 
300
+ def dfs(self) -> Iterator[_BaseFilter]:
301
+ yield self
302
+ yield from self.not_.dfs()
303
+
259
304
 
260
305
  # TODO: With pydantic 2, we can use a RootModel with a
261
306
  # discriminated union to make the error messages more informative.
@@ -265,6 +310,7 @@ Filter = Union[
265
310
  _Not,
266
311
  _EntityTypeFilter,
267
312
  _EntitySubtypeFilter,
313
+ _StatusFilter,
268
314
  _PlatformFilter,
269
315
  _DomainFilter,
270
316
  _EnvFilter,
@@ -312,6 +358,18 @@ class FilterDsl:
312
358
  def not_(arg: "Filter") -> _Not:
313
359
  return _Not(not_=arg)
314
360
 
361
+ @staticmethod
362
+ def true() -> "Filter":
363
+ return _CustomCondition(
364
+ field="urn",
365
+ condition="EXISTS",
366
+ values=[],
367
+ )
368
+
369
+ @staticmethod
370
+ def false() -> "Filter":
371
+ return FilterDsl.not_(FilterDsl.true())
372
+
315
373
  @staticmethod
316
374
  def entity_type(
317
375
  entity_type: Union[EntityTypeName, Sequence[EntityTypeName]],
@@ -354,6 +412,10 @@ class FilterDsl:
354
412
  values=[f"{key}={value}"],
355
413
  )
356
414
 
415
+ @staticmethod
416
+ def soft_deleted(status: RemovedStatusFilter) -> _StatusFilter:
417
+ return _StatusFilter(status=status)
418
+
357
419
  # TODO: Add a soft-deletion status filter
358
420
  # TODO: add a container / browse path filter
359
421
  # TODO add shortcut for custom filters
@@ -1,7 +1,15 @@
1
1
  import concurrent.futures
2
2
  import contextlib
3
3
  import queue
4
- from typing import Any, Callable, Generator, Iterable, Tuple, TypeVar
4
+ from typing import (
5
+ Any,
6
+ Callable,
7
+ Iterable,
8
+ Iterator,
9
+ Optional,
10
+ Tuple,
11
+ TypeVar,
12
+ )
5
13
 
6
14
  T = TypeVar("T")
7
15
 
@@ -18,8 +26,13 @@ class ThreadedIteratorExecutor:
18
26
  worker_func: Callable[..., Iterable[T]],
19
27
  args_list: Iterable[Tuple[Any, ...]],
20
28
  max_workers: int,
21
- ) -> Generator[T, None, None]:
22
- out_q: queue.Queue[T] = queue.Queue()
29
+ max_backpressure: Optional[int] = None,
30
+ ) -> Iterator[T]:
31
+ if max_backpressure is None:
32
+ max_backpressure = 10 * max_workers
33
+ assert max_backpressure >= max_workers
34
+
35
+ out_q: queue.Queue[T] = queue.Queue(maxsize=max_backpressure)
23
36
 
24
37
  def _worker_wrapper(
25
38
  worker_func: Callable[..., Iterable[T]], *args: Any