acryl-datahub 1.0.0.1rc6__py3-none-any.whl → 1.0.0.1rc7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.0.0.1rc6.dist-info → acryl_datahub-1.0.0.1rc7.dist-info}/METADATA +2444 -2444
- {acryl_datahub-1.0.0.1rc6.dist-info → acryl_datahub-1.0.0.1rc7.dist-info}/RECORD +16 -16
- datahub/_version.py +1 -1
- datahub/errors.py +4 -0
- datahub/ingestion/api/source.py +2 -1
- datahub/ingestion/api/source_helpers.py +9 -1
- datahub/ingestion/graph/client.py +18 -7
- datahub/ingestion/graph/filters.py +41 -16
- datahub/ingestion/source/cassandra/cassandra.py +1 -10
- datahub/sdk/search_client.py +81 -8
- datahub/sdk/search_filters.py +73 -11
- datahub/utilities/threaded_iterator_executor.py +16 -3
- {acryl_datahub-1.0.0.1rc6.dist-info → acryl_datahub-1.0.0.1rc7.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.0.0.1rc6.dist-info → acryl_datahub-1.0.0.1rc7.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.0.0.1rc6.dist-info → acryl_datahub-1.0.0.1rc7.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.0.0.1rc6.dist-info → acryl_datahub-1.0.0.1rc7.dist-info}/top_level.txt +0 -0
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
acryl_datahub-1.0.0.
|
|
1
|
+
acryl_datahub-1.0.0.1rc7.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
|
|
2
2
|
datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
|
|
3
3
|
datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
|
|
4
|
-
datahub/_version.py,sha256=
|
|
4
|
+
datahub/_version.py,sha256=AZj-rwp4edRcZvS9Mq4fxTeV64QHFW-6zysNAtjc2qg,323
|
|
5
5
|
datahub/entrypoints.py,sha256=2TYgHhs3sCxJlojIHjqfxzt3_ImPwPzq4vBtsUuMqu4,8885
|
|
6
|
-
datahub/errors.py,sha256=
|
|
6
|
+
datahub/errors.py,sha256=BzKdcmYseHOt36zfjJXc17WNutFhp9Y23cU_L6cIkxc,612
|
|
7
7
|
datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
8
|
datahub/_codegen/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
9
|
datahub/_codegen/aspect.py,sha256=PJRa-Z4ouXHq3OkulfyWhwZn-fFUBDK_UPvmqaWdbWk,1063
|
|
@@ -145,8 +145,8 @@ datahub/ingestion/api/registry.py,sha256=LbdZr89465Lj7ptQRVB4vI1JR1igWABvQFj9-WX
|
|
|
145
145
|
datahub/ingestion/api/report.py,sha256=eM_TWWz6iJNd-c_S2_4eg2qKLGYP8vSROb_TMiCwBhY,4644
|
|
146
146
|
datahub/ingestion/api/report_helpers.py,sha256=WbUC1kQeaKqIagGV3XzfPmPs7slAT1mfNY4og2BH2A8,994
|
|
147
147
|
datahub/ingestion/api/sink.py,sha256=nfal7nsYY1AT2WQRjqO48uAHitpjax7TsRVzYXnqbeM,4918
|
|
148
|
-
datahub/ingestion/api/source.py,sha256=
|
|
149
|
-
datahub/ingestion/api/source_helpers.py,sha256=
|
|
148
|
+
datahub/ingestion/api/source.py,sha256=HrQahSEBeapMDnW8S6wSEyNLLE9RCs2R6eUrVaibuuc,19349
|
|
149
|
+
datahub/ingestion/api/source_helpers.py,sha256=OhgBLdpUIuqF_gl4uV8Y2csp-z97zzXeFj2I5aUypCI,20158
|
|
150
150
|
datahub/ingestion/api/transform.py,sha256=X0GpjMJzYkLuZx8MTWxH50cWGm9rGsnn3k188mmC8J8,582
|
|
151
151
|
datahub/ingestion/api/workunit.py,sha256=e8n8RfSjHZZm2R4ShNH0UuMtUkMjyqqM2j2t7oL74lo,6327
|
|
152
152
|
datahub/ingestion/api/auto_work_units/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -171,11 +171,11 @@ datahub/ingestion/glossary/classifier.py,sha256=daLxnVv_JlfB_jBOxH5LrU_xQRndrsGo
|
|
|
171
171
|
datahub/ingestion/glossary/classifier_registry.py,sha256=yFOYLQhDgCLqXYMG3L1BquXafeLcZDcmp8meyw6k9ts,307
|
|
172
172
|
datahub/ingestion/glossary/datahub_classifier.py,sha256=O7wm6gQT1Jf2QSKdWjJQbS5oSzJwplXzfza26Gdq5Mg,7555
|
|
173
173
|
datahub/ingestion/graph/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
174
|
-
datahub/ingestion/graph/client.py,sha256=
|
|
174
|
+
datahub/ingestion/graph/client.py,sha256=Qtjf5YrQeQzcTb0qxr6-y4MSEKSJm8f0hO6BoeRA_yI,65916
|
|
175
175
|
datahub/ingestion/graph/config.py,sha256=_oha8Je7P80ZmrkZUAaRHyYbdMmTkMI5JkYjEP2Ri1Q,751
|
|
176
176
|
datahub/ingestion/graph/connections.py,sha256=9462L0ZWGKURyypAln25eMPhK3pcufBar9tNDoqspXs,741
|
|
177
177
|
datahub/ingestion/graph/entity_versioning.py,sha256=nrcNz0Qm6kpE6oTu_mrYUQDx14KPspBTc6R9SyFUY6c,6901
|
|
178
|
-
datahub/ingestion/graph/filters.py,sha256=
|
|
178
|
+
datahub/ingestion/graph/filters.py,sha256=hZ8YOQRxC0_mbAx_SLkgqyYXr0Fw3O4U2wo2UMuDHJY,8653
|
|
179
179
|
datahub/ingestion/reporting/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
180
180
|
datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py,sha256=iEulcZMLBQuUfe9MAYyobMekvMcNm4dqVcS_C_2KfrI,9736
|
|
181
181
|
datahub/ingestion/reporting/file_reporter.py,sha256=tiWukmMxHrTQI3rOAumsq6lRlw8T6spqpS6XBDYnrZU,1640
|
|
@@ -263,7 +263,7 @@ datahub/ingestion/source/bigquery_v2/queries.py,sha256=c1BpeQP8p8y-FOhmiQkkY2IqG
|
|
|
263
263
|
datahub/ingestion/source/bigquery_v2/queries_extractor.py,sha256=_5cAXVU8b8T_nAPDsvN2JRd2dmM1t1J1mRylfKiPen4,19530
|
|
264
264
|
datahub/ingestion/source/bigquery_v2/usage.py,sha256=A9c-ofclaRk0NSnc4IRaqJYqMPv6ecCld_TPy3V2qFs,40748
|
|
265
265
|
datahub/ingestion/source/cassandra/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
266
|
-
datahub/ingestion/source/cassandra/cassandra.py,sha256=
|
|
266
|
+
datahub/ingestion/source/cassandra/cassandra.py,sha256=lKvPP0Uahi9xw_yh9cArPPtwvAauXolaEk-6f-jhpz4,14558
|
|
267
267
|
datahub/ingestion/source/cassandra/cassandra_api.py,sha256=UVGQTsk6O57Q6wrWo54bQPLtStTWhw_Fq6fgW3Bjgk8,12515
|
|
268
268
|
datahub/ingestion/source/cassandra/cassandra_config.py,sha256=vIMUOzazWTGi03B51vI0-YMxaMJHUGmCxJJgd8pKhC8,3791
|
|
269
269
|
datahub/ingestion/source/cassandra/cassandra_profiling.py,sha256=DkSIryZNwLei5PaKuu9fNEKxEbhIrPI-T9gaVoM87NQ,11063
|
|
@@ -906,8 +906,8 @@ datahub/sdk/entity.py,sha256=Q29AbpS58L4gD8ETwoNIwG-ouytz4c0MSSFi6-jLl_4,6742
|
|
|
906
906
|
datahub/sdk/entity_client.py,sha256=Sxe6H6Vr_tqLJu5KW7MJfLWJ6mgh4mbsx7u7MOBpM64,5052
|
|
907
907
|
datahub/sdk/main_client.py,sha256=h2MKRhR-BO0zGCMhF7z2bTncX4hagKrAYwR3wTNTtzA,3666
|
|
908
908
|
datahub/sdk/resolver_client.py,sha256=nKMAZJt2tRSGfKSzoREIh43PXqjM3umLiYkYHJjo1io,3243
|
|
909
|
-
datahub/sdk/search_client.py,sha256=
|
|
910
|
-
datahub/sdk/search_filters.py,sha256=
|
|
909
|
+
datahub/sdk/search_client.py,sha256=BJR5t7Ff2oDNOGLcSCp9YHzrGKbgOQr7T8XQKGEpucw,3437
|
|
910
|
+
datahub/sdk/search_filters.py,sha256=BcMhvG5hGYAATtLPLz4WLRjKApX2oLYrrcGn-CG__ek,12901
|
|
911
911
|
datahub/secret/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
912
912
|
datahub/secret/datahub_secret_store.py,sha256=9u9S87-15jwhj4h0EsAVIMdQLgvstKc8voQux2slxgU,2477
|
|
913
913
|
datahub/secret/datahub_secrets_client.py,sha256=nDmhziKdvseJHlaDVUcAwK8Fv8maeAaG-ktZtWG2b70,1316
|
|
@@ -996,7 +996,7 @@ datahub/utilities/sqllineage_patch.py,sha256=0Buh50bmEqJFg1HFRCknCnePo1cecI4JmGx
|
|
|
996
996
|
datahub/utilities/stats_collections.py,sha256=CxaTcrF7J6am7iX5jPhFKne535UcyDk_oreVwR013fU,1625
|
|
997
997
|
datahub/utilities/str_enum.py,sha256=EsqCLPbrqyQ2YU_wt7QP-a6P5fnpIshXJ3AI8gLBlVA,474
|
|
998
998
|
datahub/utilities/tee_io.py,sha256=jBrsUfTPTk9IICntfGOG0HR-Fjp8BQMde-FPQ4r3kuI,601
|
|
999
|
-
datahub/utilities/threaded_iterator_executor.py,sha256=
|
|
999
|
+
datahub/utilities/threaded_iterator_executor.py,sha256=6BpCE0os3d-uMYxHBilPQC-JvEBkU6JQY4bGs06JKYI,2004
|
|
1000
1000
|
datahub/utilities/threading_timeout.py,sha256=hOzDI55E3onXblHNwGsePJUWMXo5zqaWCnoYdL2-KPM,1316
|
|
1001
1001
|
datahub/utilities/time.py,sha256=Q7S_Zyom8C2zcl2xFbjNw6K8nZsCub5XGAB4OEmIS34,1847
|
|
1002
1002
|
datahub/utilities/topological_sort.py,sha256=kcK5zPSR393fgItr-KSLV3bDqfJfBRS8E5kkCpPBgUY,1358
|
|
@@ -1043,8 +1043,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
|
|
|
1043
1043
|
datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
|
|
1044
1044
|
datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
|
|
1045
1045
|
datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
|
|
1046
|
-
acryl_datahub-1.0.0.
|
|
1047
|
-
acryl_datahub-1.0.0.
|
|
1048
|
-
acryl_datahub-1.0.0.
|
|
1049
|
-
acryl_datahub-1.0.0.
|
|
1050
|
-
acryl_datahub-1.0.0.
|
|
1046
|
+
acryl_datahub-1.0.0.1rc7.dist-info/METADATA,sha256=QkeMAnAXXez9FFTnJVpxhOJMNDadF0gfVQ3uz3Fh4i8,176849
|
|
1047
|
+
acryl_datahub-1.0.0.1rc7.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
|
1048
|
+
acryl_datahub-1.0.0.1rc7.dist-info/entry_points.txt,sha256=o3mDeJXSKhsy7XLkuogihraiabBdLn9HaizYXPrxmk0,9710
|
|
1049
|
+
acryl_datahub-1.0.0.1rc7.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
|
|
1050
|
+
acryl_datahub-1.0.0.1rc7.dist-info/RECORD,,
|
datahub/_version.py
CHANGED
datahub/errors.py
CHANGED
datahub/ingestion/api/source.py
CHANGED
|
@@ -51,6 +51,7 @@ from datahub.ingestion.api.source_helpers import (
|
|
|
51
51
|
from datahub.ingestion.api.workunit import MetadataWorkUnit
|
|
52
52
|
from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent
|
|
53
53
|
from datahub.metadata.schema_classes import UpstreamLineageClass
|
|
54
|
+
from datahub.sdk.entity import Entity
|
|
54
55
|
from datahub.utilities.lossy_collections import LossyDict, LossyList
|
|
55
56
|
from datahub.utilities.type_annotations import get_class_from_annotation
|
|
56
57
|
|
|
@@ -480,7 +481,7 @@ class Source(Closeable, metaclass=ABCMeta):
|
|
|
480
481
|
|
|
481
482
|
def get_workunits_internal(
|
|
482
483
|
self,
|
|
483
|
-
) -> Iterable[Union[MetadataWorkUnit, MetadataChangeProposalWrapper]]:
|
|
484
|
+
) -> Iterable[Union[MetadataWorkUnit, MetadataChangeProposalWrapper, Entity]]:
|
|
484
485
|
raise NotImplementedError(
|
|
485
486
|
"get_workunits_internal must be implemented if get_workunits is not overriden."
|
|
486
487
|
)
|
|
@@ -35,6 +35,7 @@ from datahub.metadata.schema_classes import (
|
|
|
35
35
|
TimeWindowSizeClass,
|
|
36
36
|
)
|
|
37
37
|
from datahub.metadata.urns import DatasetUrn, GlossaryTermUrn, TagUrn, Urn
|
|
38
|
+
from datahub.sdk.entity import Entity
|
|
38
39
|
from datahub.specific.dataset import DatasetPatchBuilder
|
|
39
40
|
from datahub.telemetry import telemetry
|
|
40
41
|
from datahub.utilities.urns.error import InvalidUrnError
|
|
@@ -49,7 +50,12 @@ logger = logging.getLogger(__name__)
|
|
|
49
50
|
|
|
50
51
|
def auto_workunit(
|
|
51
52
|
stream: Iterable[
|
|
52
|
-
Union[
|
|
53
|
+
Union[
|
|
54
|
+
MetadataChangeEventClass,
|
|
55
|
+
MetadataChangeProposalWrapper,
|
|
56
|
+
MetadataWorkUnit,
|
|
57
|
+
Entity,
|
|
58
|
+
]
|
|
53
59
|
],
|
|
54
60
|
) -> Iterable[MetadataWorkUnit]:
|
|
55
61
|
"""Convert a stream of MCEs and MCPs to a stream of :class:`MetadataWorkUnit`s."""
|
|
@@ -62,6 +68,8 @@ def auto_workunit(
|
|
|
62
68
|
)
|
|
63
69
|
elif isinstance(item, MetadataChangeProposalWrapper):
|
|
64
70
|
yield item.as_workunit()
|
|
71
|
+
elif isinstance(item, Entity):
|
|
72
|
+
yield from item.as_workunits()
|
|
65
73
|
else:
|
|
66
74
|
yield item
|
|
67
75
|
|
|
@@ -49,6 +49,7 @@ from datahub.ingestion.graph.connections import (
|
|
|
49
49
|
)
|
|
50
50
|
from datahub.ingestion.graph.entity_versioning import EntityVersioningAPI
|
|
51
51
|
from datahub.ingestion.graph.filters import (
|
|
52
|
+
RawSearchFilter,
|
|
52
53
|
RawSearchFilterRule,
|
|
53
54
|
RemovedStatusFilter,
|
|
54
55
|
generate_filter,
|
|
@@ -75,10 +76,11 @@ from datahub.metadata.schema_classes import (
|
|
|
75
76
|
SystemMetadataClass,
|
|
76
77
|
TelemetryClientIdClass,
|
|
77
78
|
)
|
|
79
|
+
from datahub.metadata.urns import CorpUserUrn, Urn
|
|
78
80
|
from datahub.telemetry.telemetry import telemetry_instance
|
|
79
81
|
from datahub.utilities.perf_timer import PerfTimer
|
|
80
82
|
from datahub.utilities.str_enum import StrEnum
|
|
81
|
-
from datahub.utilities.urns.urn import
|
|
83
|
+
from datahub.utilities.urns.urn import guess_entity_type
|
|
82
84
|
|
|
83
85
|
if TYPE_CHECKING:
|
|
84
86
|
from datahub.ingestion.sink.datahub_rest import (
|
|
@@ -116,7 +118,7 @@ def entity_type_to_graphql(entity_type: str) -> str:
|
|
|
116
118
|
"""Convert the entity types into GraphQL "EntityType" enum values."""
|
|
117
119
|
|
|
118
120
|
# Hard-coded special cases.
|
|
119
|
-
if entity_type ==
|
|
121
|
+
if entity_type == CorpUserUrn.ENTITY_TYPE:
|
|
120
122
|
return "CORP_USER"
|
|
121
123
|
|
|
122
124
|
# Convert camelCase to UPPER_UNDERSCORE.
|
|
@@ -133,6 +135,14 @@ def entity_type_to_graphql(entity_type: str) -> str:
|
|
|
133
135
|
return entity_type
|
|
134
136
|
|
|
135
137
|
|
|
138
|
+
def flexible_entity_type_to_graphql(entity_type: str) -> str:
|
|
139
|
+
if entity_type.upper() == entity_type:
|
|
140
|
+
# Assume that we were passed a graphql EntityType enum value,
|
|
141
|
+
# so no conversion is needed.
|
|
142
|
+
return entity_type
|
|
143
|
+
return entity_type_to_graphql(entity_type)
|
|
144
|
+
|
|
145
|
+
|
|
136
146
|
class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
137
147
|
def __init__(self, config: DatahubClientConfig) -> None:
|
|
138
148
|
self.config = config
|
|
@@ -805,7 +815,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
805
815
|
|
|
806
816
|
:return: An iterable of (urn, schema info) tuple that match the filters.
|
|
807
817
|
"""
|
|
808
|
-
types = [
|
|
818
|
+
types = self._get_types(["dataset"])
|
|
809
819
|
|
|
810
820
|
# Add the query default of * if no query is specified.
|
|
811
821
|
query = query or "*"
|
|
@@ -873,10 +883,10 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
873
883
|
env: Optional[str] = None,
|
|
874
884
|
query: Optional[str] = None,
|
|
875
885
|
container: Optional[str] = None,
|
|
876
|
-
status: RemovedStatusFilter = RemovedStatusFilter.NOT_SOFT_DELETED,
|
|
886
|
+
status: Optional[RemovedStatusFilter] = RemovedStatusFilter.NOT_SOFT_DELETED,
|
|
877
887
|
batch_size: int = 10000,
|
|
878
888
|
extraFilters: Optional[List[RawSearchFilterRule]] = None,
|
|
879
|
-
extra_or_filters: Optional[
|
|
889
|
+
extra_or_filters: Optional[RawSearchFilter] = None,
|
|
880
890
|
) -> Iterable[str]:
|
|
881
891
|
"""Fetch all urns that match all of the given filters.
|
|
882
892
|
|
|
@@ -968,7 +978,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
968
978
|
status: RemovedStatusFilter = RemovedStatusFilter.NOT_SOFT_DELETED,
|
|
969
979
|
batch_size: int = 10000,
|
|
970
980
|
extra_and_filters: Optional[List[RawSearchFilterRule]] = None,
|
|
971
|
-
extra_or_filters: Optional[
|
|
981
|
+
extra_or_filters: Optional[RawSearchFilter] = None,
|
|
972
982
|
extra_source_fields: Optional[List[str]] = None,
|
|
973
983
|
skip_cache: bool = False,
|
|
974
984
|
) -> Iterable[dict]:
|
|
@@ -1121,7 +1131,8 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
1121
1131
|
)
|
|
1122
1132
|
|
|
1123
1133
|
types = [
|
|
1124
|
-
|
|
1134
|
+
flexible_entity_type_to_graphql(entity_type)
|
|
1135
|
+
for entity_type in entity_types
|
|
1125
1136
|
]
|
|
1126
1137
|
return types
|
|
1127
1138
|
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import dataclasses
|
|
2
2
|
import enum
|
|
3
|
-
|
|
3
|
+
import warnings
|
|
4
|
+
from typing import Dict, List, Literal, Optional, Union
|
|
4
5
|
|
|
5
6
|
from typing_extensions import TypeAlias
|
|
6
7
|
|
|
@@ -8,9 +9,14 @@ from datahub.emitter.mce_builder import (
|
|
|
8
9
|
make_data_platform_urn,
|
|
9
10
|
make_dataplatform_instance_urn,
|
|
10
11
|
)
|
|
12
|
+
from datahub.errors import SearchFilterWarning
|
|
11
13
|
from datahub.utilities.urns.urn import guess_entity_type
|
|
12
14
|
|
|
13
|
-
RawSearchFilterRule = Dict[str,
|
|
15
|
+
RawSearchFilterRule: TypeAlias = Dict[str, Union[str, bool, List[str]]]
|
|
16
|
+
|
|
17
|
+
# This is a list of OR filters, each of which is a list of AND filters.
|
|
18
|
+
# This can be put directly into the orFilters parameter in GraphQL.
|
|
19
|
+
RawSearchFilter: TypeAlias = List[Dict[Literal["and"], List[RawSearchFilterRule]]]
|
|
14
20
|
|
|
15
21
|
# Mirrors our GraphQL enum: https://datahubproject.io/docs/graphql/enums#filteroperator
|
|
16
22
|
FilterOperator: TypeAlias = Literal[
|
|
@@ -39,12 +45,14 @@ class SearchFilterRule:
|
|
|
39
45
|
negated: bool = False
|
|
40
46
|
|
|
41
47
|
def to_raw(self) -> RawSearchFilterRule:
|
|
42
|
-
|
|
48
|
+
rule: RawSearchFilterRule = {
|
|
43
49
|
"field": self.field,
|
|
44
50
|
"condition": self.condition,
|
|
45
51
|
"values": self.values,
|
|
46
|
-
"negated": self.negated,
|
|
47
52
|
}
|
|
53
|
+
if self.negated:
|
|
54
|
+
rule["negated"] = True
|
|
55
|
+
return rule
|
|
48
56
|
|
|
49
57
|
def negate(self) -> "SearchFilterRule":
|
|
50
58
|
return SearchFilterRule(
|
|
@@ -73,10 +81,10 @@ def generate_filter(
|
|
|
73
81
|
platform_instance: Optional[str],
|
|
74
82
|
env: Optional[str],
|
|
75
83
|
container: Optional[str],
|
|
76
|
-
status: RemovedStatusFilter,
|
|
84
|
+
status: Optional[RemovedStatusFilter],
|
|
77
85
|
extra_filters: Optional[List[RawSearchFilterRule]],
|
|
78
|
-
extra_or_filters: Optional[
|
|
79
|
-
) ->
|
|
86
|
+
extra_or_filters: Optional[RawSearchFilter] = None,
|
|
87
|
+
) -> RawSearchFilter:
|
|
80
88
|
"""
|
|
81
89
|
Generate a search filter based on the provided parameters.
|
|
82
90
|
:param platform: The platform to filter by.
|
|
@@ -105,15 +113,16 @@ def generate_filter(
|
|
|
105
113
|
and_filters.append(_get_container_filter(container).to_raw())
|
|
106
114
|
|
|
107
115
|
# Status filter.
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
116
|
+
if status:
|
|
117
|
+
status_filter = _get_status_filter(status)
|
|
118
|
+
if status_filter:
|
|
119
|
+
and_filters.append(status_filter.to_raw())
|
|
111
120
|
|
|
112
121
|
# Extra filters.
|
|
113
122
|
if extra_filters:
|
|
114
123
|
and_filters += extra_filters
|
|
115
124
|
|
|
116
|
-
or_filters:
|
|
125
|
+
or_filters: RawSearchFilter = [{"and": and_filters}]
|
|
117
126
|
|
|
118
127
|
# Env filter
|
|
119
128
|
if env:
|
|
@@ -127,11 +136,27 @@ def generate_filter(
|
|
|
127
136
|
|
|
128
137
|
# Extra OR filters are distributed across the top level and lists.
|
|
129
138
|
if extra_or_filters:
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
for extra_or_filter in extra_or_filters
|
|
133
|
-
|
|
134
|
-
|
|
139
|
+
new_or_filters: RawSearchFilter = []
|
|
140
|
+
for and_filter in or_filters:
|
|
141
|
+
for extra_or_filter in extra_or_filters:
|
|
142
|
+
if isinstance(extra_or_filter, dict) and "and" in extra_or_filter:
|
|
143
|
+
new_or_filters.append(
|
|
144
|
+
{"and": and_filter["and"] + extra_or_filter["and"]}
|
|
145
|
+
)
|
|
146
|
+
else:
|
|
147
|
+
# Hack for backwards compatibility.
|
|
148
|
+
# We have some code that erroneously passed a List[RawSearchFilterRule]
|
|
149
|
+
# instead of a List[Dict["and", List[RawSearchFilterRule]]].
|
|
150
|
+
warnings.warn(
|
|
151
|
+
"Passing a List[RawSearchFilterRule] to extra_or_filters is deprecated. "
|
|
152
|
+
"Please pass a List[Dict[str, List[RawSearchFilterRule]]] instead.",
|
|
153
|
+
SearchFilterWarning,
|
|
154
|
+
stacklevel=3,
|
|
155
|
+
)
|
|
156
|
+
new_or_filters.append(
|
|
157
|
+
{"and": and_filter["and"] + [extra_or_filter]} # type: ignore
|
|
158
|
+
)
|
|
159
|
+
or_filters = new_or_filters
|
|
135
160
|
|
|
136
161
|
return or_filters
|
|
137
162
|
|
|
@@ -123,16 +123,7 @@ class CassandraSource(StatefulIngestionSourceBase):
|
|
|
123
123
|
).workunit_processor,
|
|
124
124
|
]
|
|
125
125
|
|
|
126
|
-
def get_workunits_internal(
|
|
127
|
-
self,
|
|
128
|
-
) -> Iterable[MetadataWorkUnit]:
|
|
129
|
-
for metadata in self._get_metadata():
|
|
130
|
-
if isinstance(metadata, MetadataWorkUnit):
|
|
131
|
-
yield metadata
|
|
132
|
-
else:
|
|
133
|
-
yield from metadata.as_workunits()
|
|
134
|
-
|
|
135
|
-
def _get_metadata(self) -> Iterable[Union[MetadataWorkUnit, Entity]]:
|
|
126
|
+
def get_workunits_internal(self) -> Iterable[Union[MetadataWorkUnit, Entity]]:
|
|
136
127
|
if not self.cassandra_api.authenticate():
|
|
137
128
|
return
|
|
138
129
|
keyspaces: List[CassandraKeyspace] = self.cassandra_api.get_keyspaces()
|
datahub/sdk/search_client.py
CHANGED
|
@@ -2,36 +2,106 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
from typing import (
|
|
4
4
|
TYPE_CHECKING,
|
|
5
|
-
Dict,
|
|
6
5
|
Iterable,
|
|
7
6
|
List,
|
|
8
7
|
Optional,
|
|
8
|
+
Tuple,
|
|
9
|
+
Type,
|
|
10
|
+
TypeVar,
|
|
9
11
|
)
|
|
10
12
|
|
|
11
|
-
from datahub.ingestion.graph.filters import
|
|
13
|
+
from datahub.ingestion.graph.filters import RawSearchFilter, RemovedStatusFilter
|
|
12
14
|
from datahub.metadata.urns import Urn
|
|
13
|
-
from datahub.sdk.search_filters import
|
|
15
|
+
from datahub.sdk.search_filters import (
|
|
16
|
+
Filter,
|
|
17
|
+
FilterDsl,
|
|
18
|
+
_EntityTypeFilter,
|
|
19
|
+
_OrFilters,
|
|
20
|
+
_StatusFilter,
|
|
21
|
+
)
|
|
14
22
|
|
|
15
23
|
if TYPE_CHECKING:
|
|
16
24
|
from datahub.sdk.main_client import DataHubClient
|
|
17
25
|
|
|
18
26
|
|
|
27
|
+
_FilterType = TypeVar("_FilterType", bound=Filter)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _typed_dfs(
|
|
31
|
+
filter: Optional[_FilterType], type: Type[_FilterType]
|
|
32
|
+
) -> Optional[List[_FilterType]]:
|
|
33
|
+
if filter is None:
|
|
34
|
+
return None
|
|
35
|
+
|
|
36
|
+
found: Optional[List[_FilterType]] = None
|
|
37
|
+
for f in filter.dfs():
|
|
38
|
+
if isinstance(f, type):
|
|
39
|
+
if found is None:
|
|
40
|
+
found = []
|
|
41
|
+
found.append(f)
|
|
42
|
+
return found
|
|
43
|
+
|
|
44
|
+
|
|
19
45
|
def compile_filters(
|
|
20
46
|
filter: Optional[Filter],
|
|
21
|
-
) -> Optional[List[
|
|
47
|
+
) -> Tuple[Optional[List[str]], RawSearchFilter]:
|
|
22
48
|
# TODO: Not every filter type is supported for every entity type.
|
|
23
49
|
# If we can detect issues with the filters at compile time, we should
|
|
24
50
|
# raise an error.
|
|
25
51
|
|
|
26
|
-
|
|
27
|
-
|
|
52
|
+
existing_soft_deleted_filter = _typed_dfs(filter, _StatusFilter)
|
|
53
|
+
if existing_soft_deleted_filter is None:
|
|
54
|
+
soft_deleted_filter = FilterDsl.soft_deleted(
|
|
55
|
+
RemovedStatusFilter.NOT_SOFT_DELETED
|
|
56
|
+
)
|
|
57
|
+
if filter is None:
|
|
58
|
+
filter = soft_deleted_filter
|
|
59
|
+
else:
|
|
60
|
+
filter = FilterDsl.and_(filter, soft_deleted_filter)
|
|
61
|
+
|
|
62
|
+
# This should be safe - if filter were None coming in, then we would replace it
|
|
63
|
+
# with the soft-deleted filter.
|
|
64
|
+
assert filter is not None
|
|
28
65
|
|
|
29
66
|
initial_filters = filter.compile()
|
|
30
|
-
|
|
67
|
+
|
|
68
|
+
compiled_filters: RawSearchFilter = [
|
|
31
69
|
{"and": [rule.to_raw() for rule in andClause["and"]]}
|
|
32
70
|
for andClause in initial_filters
|
|
33
71
|
]
|
|
34
72
|
|
|
73
|
+
entity_types = compute_entity_types(initial_filters)
|
|
74
|
+
|
|
75
|
+
return entity_types, compiled_filters
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def compute_entity_types(
|
|
79
|
+
filters: _OrFilters,
|
|
80
|
+
) -> Optional[List[str]]:
|
|
81
|
+
found_filters = False
|
|
82
|
+
found_positive_filters = False
|
|
83
|
+
entity_types: List[str] = []
|
|
84
|
+
for ands in filters:
|
|
85
|
+
for clause in ands["and"]:
|
|
86
|
+
if clause.field == _EntityTypeFilter.ENTITY_TYPE_FIELD:
|
|
87
|
+
found_filters = True
|
|
88
|
+
if not clause.negated:
|
|
89
|
+
found_positive_filters = True
|
|
90
|
+
|
|
91
|
+
entity_types.extend(clause.values)
|
|
92
|
+
|
|
93
|
+
if not found_filters:
|
|
94
|
+
# If we didn't find any filters, use None so we use the default set.
|
|
95
|
+
return None
|
|
96
|
+
|
|
97
|
+
if not found_positive_filters:
|
|
98
|
+
# If we only found negated filters, then it's probably a query like
|
|
99
|
+
# "find me all entities except for dashboards". In that case, we
|
|
100
|
+
# still want to use the default set.
|
|
101
|
+
return None
|
|
102
|
+
|
|
103
|
+
return entity_types
|
|
104
|
+
|
|
35
105
|
|
|
36
106
|
class SearchClient:
|
|
37
107
|
def __init__(self, client: DataHubClient):
|
|
@@ -43,8 +113,11 @@ class SearchClient:
|
|
|
43
113
|
filter: Optional[Filter] = None,
|
|
44
114
|
) -> Iterable[Urn]:
|
|
45
115
|
# TODO: Add better limit / pagination support.
|
|
116
|
+
types, compiled_filters = compile_filters(filter)
|
|
46
117
|
for urn in self._client._graph.get_urns_by_filter(
|
|
47
118
|
query=query,
|
|
48
|
-
|
|
119
|
+
status=None,
|
|
120
|
+
extra_or_filters=compiled_filters,
|
|
121
|
+
entity_types=types,
|
|
49
122
|
):
|
|
50
123
|
yield Urn.from_string(urn)
|
datahub/sdk/search_filters.py
CHANGED
|
@@ -3,7 +3,10 @@ from __future__ import annotations
|
|
|
3
3
|
import abc
|
|
4
4
|
from typing import (
|
|
5
5
|
Any,
|
|
6
|
+
ClassVar,
|
|
7
|
+
Iterator,
|
|
6
8
|
List,
|
|
9
|
+
Optional,
|
|
7
10
|
Sequence,
|
|
8
11
|
TypedDict,
|
|
9
12
|
Union,
|
|
@@ -13,8 +16,13 @@ import pydantic
|
|
|
13
16
|
|
|
14
17
|
from datahub.configuration.common import ConfigModel
|
|
15
18
|
from datahub.configuration.pydantic_migration_helpers import PYDANTIC_VERSION_2
|
|
16
|
-
from datahub.ingestion.graph.client import
|
|
17
|
-
from datahub.ingestion.graph.filters import
|
|
19
|
+
from datahub.ingestion.graph.client import flexible_entity_type_to_graphql
|
|
20
|
+
from datahub.ingestion.graph.filters import (
|
|
21
|
+
FilterOperator,
|
|
22
|
+
RemovedStatusFilter,
|
|
23
|
+
SearchFilterRule,
|
|
24
|
+
_get_status_filter,
|
|
25
|
+
)
|
|
18
26
|
from datahub.metadata.schema_classes import EntityTypeName
|
|
19
27
|
from datahub.metadata.urns import DataPlatformUrn, DomainUrn
|
|
20
28
|
|
|
@@ -37,25 +45,28 @@ class _BaseFilter(ConfigModel):
|
|
|
37
45
|
def compile(self) -> _OrFilters:
|
|
38
46
|
pass
|
|
39
47
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
if entity_type.upper() == entity_type:
|
|
43
|
-
# Assume that we were passed a graphql EntityType enum value,
|
|
44
|
-
# so no conversion is needed.
|
|
45
|
-
return entity_type
|
|
46
|
-
return entity_type_to_graphql(entity_type)
|
|
48
|
+
def dfs(self) -> Iterator[_BaseFilter]:
|
|
49
|
+
yield self
|
|
47
50
|
|
|
48
51
|
|
|
49
52
|
class _EntityTypeFilter(_BaseFilter):
|
|
53
|
+
"""Filter for specific entity types.
|
|
54
|
+
|
|
55
|
+
If no entity type filter is specified, we will search all entity types in the
|
|
56
|
+
default search set, mirroring the behavior of the DataHub UI.
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
ENTITY_TYPE_FIELD: ClassVar[str] = "_entityType"
|
|
60
|
+
|
|
50
61
|
entity_type: List[str] = pydantic.Field(
|
|
51
62
|
description="The entity type to filter on. Can be 'dataset', 'chart', 'dashboard', 'corpuser', etc.",
|
|
52
63
|
)
|
|
53
64
|
|
|
54
65
|
def _build_rule(self) -> SearchFilterRule:
|
|
55
66
|
return SearchFilterRule(
|
|
56
|
-
field=
|
|
67
|
+
field=self.ENTITY_TYPE_FIELD,
|
|
57
68
|
condition="EQUAL",
|
|
58
|
-
values=[
|
|
69
|
+
values=[flexible_entity_type_to_graphql(t) for t in self.entity_type],
|
|
59
70
|
)
|
|
60
71
|
|
|
61
72
|
def compile(self) -> _OrFilters:
|
|
@@ -78,6 +89,26 @@ class _EntitySubtypeFilter(_BaseFilter):
|
|
|
78
89
|
return [{"and": [self._build_rule()]}]
|
|
79
90
|
|
|
80
91
|
|
|
92
|
+
class _StatusFilter(_BaseFilter):
|
|
93
|
+
"""Filter for the status of entities during search.
|
|
94
|
+
|
|
95
|
+
If not explicitly specified, the NOT_SOFT_DELETED status filter will be applied.
|
|
96
|
+
"""
|
|
97
|
+
|
|
98
|
+
status: RemovedStatusFilter
|
|
99
|
+
|
|
100
|
+
def _build_rule(self) -> Optional[SearchFilterRule]:
|
|
101
|
+
return _get_status_filter(self.status)
|
|
102
|
+
|
|
103
|
+
def compile(self) -> _OrFilters:
|
|
104
|
+
rule = self._build_rule()
|
|
105
|
+
if rule:
|
|
106
|
+
return [{"and": [rule]}]
|
|
107
|
+
else:
|
|
108
|
+
# Our boolean algebra logic requires something here - returning [] would cause errors.
|
|
109
|
+
return FilterDsl.true().compile()
|
|
110
|
+
|
|
111
|
+
|
|
81
112
|
class _PlatformFilter(_BaseFilter):
|
|
82
113
|
platform: List[str]
|
|
83
114
|
# TODO: Add validator to convert string -> list of strings
|
|
@@ -213,6 +244,11 @@ class _And(_BaseFilter):
|
|
|
213
244
|
]
|
|
214
245
|
}
|
|
215
246
|
|
|
247
|
+
def dfs(self) -> Iterator[_BaseFilter]:
|
|
248
|
+
yield self
|
|
249
|
+
for filter in self.and_:
|
|
250
|
+
yield from filter.dfs()
|
|
251
|
+
|
|
216
252
|
|
|
217
253
|
class _Or(_BaseFilter):
|
|
218
254
|
"""Represents an OR conjunction of filters."""
|
|
@@ -226,6 +262,11 @@ class _Or(_BaseFilter):
|
|
|
226
262
|
merged_filter.extend(filter.compile())
|
|
227
263
|
return merged_filter
|
|
228
264
|
|
|
265
|
+
def dfs(self) -> Iterator[_BaseFilter]:
|
|
266
|
+
yield self
|
|
267
|
+
for filter in self.or_:
|
|
268
|
+
yield from filter.dfs()
|
|
269
|
+
|
|
229
270
|
|
|
230
271
|
class _Not(_BaseFilter):
|
|
231
272
|
"""Represents a NOT filter."""
|
|
@@ -256,6 +297,10 @@ class _Not(_BaseFilter):
|
|
|
256
297
|
|
|
257
298
|
return final_filters
|
|
258
299
|
|
|
300
|
+
def dfs(self) -> Iterator[_BaseFilter]:
|
|
301
|
+
yield self
|
|
302
|
+
yield from self.not_.dfs()
|
|
303
|
+
|
|
259
304
|
|
|
260
305
|
# TODO: With pydantic 2, we can use a RootModel with a
|
|
261
306
|
# discriminated union to make the error messages more informative.
|
|
@@ -265,6 +310,7 @@ Filter = Union[
|
|
|
265
310
|
_Not,
|
|
266
311
|
_EntityTypeFilter,
|
|
267
312
|
_EntitySubtypeFilter,
|
|
313
|
+
_StatusFilter,
|
|
268
314
|
_PlatformFilter,
|
|
269
315
|
_DomainFilter,
|
|
270
316
|
_EnvFilter,
|
|
@@ -312,6 +358,18 @@ class FilterDsl:
|
|
|
312
358
|
def not_(arg: "Filter") -> _Not:
|
|
313
359
|
return _Not(not_=arg)
|
|
314
360
|
|
|
361
|
+
@staticmethod
|
|
362
|
+
def true() -> "Filter":
|
|
363
|
+
return _CustomCondition(
|
|
364
|
+
field="urn",
|
|
365
|
+
condition="EXISTS",
|
|
366
|
+
values=[],
|
|
367
|
+
)
|
|
368
|
+
|
|
369
|
+
@staticmethod
|
|
370
|
+
def false() -> "Filter":
|
|
371
|
+
return FilterDsl.not_(FilterDsl.true())
|
|
372
|
+
|
|
315
373
|
@staticmethod
|
|
316
374
|
def entity_type(
|
|
317
375
|
entity_type: Union[EntityTypeName, Sequence[EntityTypeName]],
|
|
@@ -354,6 +412,10 @@ class FilterDsl:
|
|
|
354
412
|
values=[f"{key}={value}"],
|
|
355
413
|
)
|
|
356
414
|
|
|
415
|
+
@staticmethod
|
|
416
|
+
def soft_deleted(status: RemovedStatusFilter) -> _StatusFilter:
|
|
417
|
+
return _StatusFilter(status=status)
|
|
418
|
+
|
|
357
419
|
# TODO: Add a soft-deletion status filter
|
|
358
420
|
# TODO: add a container / browse path filter
|
|
359
421
|
# TODO add shortcut for custom filters
|