acryl-datahub 0.15.0.6rc2__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (205) hide show
  1. {acryl_datahub-0.15.0.6rc2.dist-info → acryl_datahub-1.0.0.dist-info}/METADATA +2522 -2493
  2. {acryl_datahub-0.15.0.6rc2.dist-info → acryl_datahub-1.0.0.dist-info}/RECORD +205 -192
  3. {acryl_datahub-0.15.0.6rc2.dist-info → acryl_datahub-1.0.0.dist-info}/WHEEL +1 -1
  4. {acryl_datahub-0.15.0.6rc2.dist-info → acryl_datahub-1.0.0.dist-info}/entry_points.txt +1 -0
  5. datahub/_version.py +1 -1
  6. datahub/api/entities/common/serialized_value.py +4 -3
  7. datahub/api/entities/dataset/dataset.py +731 -42
  8. datahub/api/entities/structuredproperties/structuredproperties.py +2 -2
  9. datahub/cli/check_cli.py +72 -19
  10. datahub/cli/docker_cli.py +3 -3
  11. datahub/cli/iceberg_cli.py +31 -7
  12. datahub/cli/ingest_cli.py +30 -93
  13. datahub/cli/lite_cli.py +4 -2
  14. datahub/cli/specific/dataproduct_cli.py +1 -1
  15. datahub/cli/specific/dataset_cli.py +128 -14
  16. datahub/configuration/common.py +10 -2
  17. datahub/configuration/git.py +1 -3
  18. datahub/configuration/kafka.py +1 -1
  19. datahub/emitter/mce_builder.py +28 -13
  20. datahub/emitter/mcp_builder.py +4 -1
  21. datahub/emitter/response_helper.py +145 -0
  22. datahub/emitter/rest_emitter.py +323 -10
  23. datahub/ingestion/api/decorators.py +1 -1
  24. datahub/ingestion/api/source_helpers.py +4 -0
  25. datahub/ingestion/fs/s3_fs.py +2 -2
  26. datahub/ingestion/glossary/classification_mixin.py +1 -5
  27. datahub/ingestion/graph/client.py +41 -22
  28. datahub/ingestion/graph/entity_versioning.py +3 -3
  29. datahub/ingestion/graph/filters.py +64 -37
  30. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +1 -6
  31. datahub/ingestion/run/pipeline.py +112 -148
  32. datahub/ingestion/run/sink_callback.py +77 -0
  33. datahub/ingestion/sink/datahub_rest.py +8 -0
  34. datahub/ingestion/source/abs/config.py +2 -4
  35. datahub/ingestion/source/bigquery_v2/bigquery_audit.py +1 -1
  36. datahub/ingestion/source/bigquery_v2/bigquery_config.py +2 -46
  37. datahub/ingestion/source/bigquery_v2/bigquery_schema.py +6 -1
  38. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +7 -4
  39. datahub/ingestion/source/cassandra/cassandra.py +152 -233
  40. datahub/ingestion/source/cassandra/cassandra_api.py +13 -5
  41. datahub/ingestion/source/common/gcp_credentials_config.py +53 -0
  42. datahub/ingestion/source/common/subtypes.py +12 -0
  43. datahub/ingestion/source/csv_enricher.py +3 -3
  44. datahub/ingestion/source/data_lake_common/path_spec.py +1 -3
  45. datahub/ingestion/source/dbt/dbt_common.py +8 -5
  46. datahub/ingestion/source/dbt/dbt_core.py +11 -9
  47. datahub/ingestion/source/dbt/dbt_tests.py +4 -8
  48. datahub/ingestion/source/delta_lake/config.py +8 -1
  49. datahub/ingestion/source/delta_lake/report.py +4 -2
  50. datahub/ingestion/source/delta_lake/source.py +20 -5
  51. datahub/ingestion/source/dremio/dremio_api.py +4 -8
  52. datahub/ingestion/source/dremio/dremio_aspects.py +3 -5
  53. datahub/ingestion/source/dynamodb/dynamodb.py +6 -0
  54. datahub/ingestion/source/elastic_search.py +26 -6
  55. datahub/ingestion/source/feast.py +27 -8
  56. datahub/ingestion/source/file.py +6 -3
  57. datahub/ingestion/source/gc/dataprocess_cleanup.py +1 -1
  58. datahub/ingestion/source/gc/execution_request_cleanup.py +2 -1
  59. datahub/ingestion/source/ge_data_profiler.py +12 -15
  60. datahub/ingestion/source/iceberg/iceberg.py +46 -12
  61. datahub/ingestion/source/iceberg/iceberg_common.py +71 -21
  62. datahub/ingestion/source/identity/okta.py +37 -7
  63. datahub/ingestion/source/kafka/kafka.py +1 -1
  64. datahub/ingestion/source/kafka_connect/common.py +2 -7
  65. datahub/ingestion/source/kafka_connect/kafka_connect.py +97 -4
  66. datahub/ingestion/source/kafka_connect/sink_connectors.py +2 -2
  67. datahub/ingestion/source/kafka_connect/source_connectors.py +6 -9
  68. datahub/ingestion/source/looker/looker_common.py +6 -5
  69. datahub/ingestion/source/looker/looker_file_loader.py +2 -2
  70. datahub/ingestion/source/looker/looker_lib_wrapper.py +2 -1
  71. datahub/ingestion/source/looker/looker_source.py +1 -1
  72. datahub/ingestion/source/looker/looker_template_language.py +4 -2
  73. datahub/ingestion/source/looker/lookml_source.py +3 -2
  74. datahub/ingestion/source/metabase.py +57 -35
  75. datahub/ingestion/source/metadata/business_glossary.py +45 -3
  76. datahub/ingestion/source/metadata/lineage.py +2 -2
  77. datahub/ingestion/source/mlflow.py +365 -35
  78. datahub/ingestion/source/mode.py +18 -8
  79. datahub/ingestion/source/neo4j/neo4j_source.py +27 -7
  80. datahub/ingestion/source/nifi.py +37 -11
  81. datahub/ingestion/source/openapi.py +1 -1
  82. datahub/ingestion/source/openapi_parser.py +49 -17
  83. datahub/ingestion/source/powerbi/m_query/parser.py +3 -2
  84. datahub/ingestion/source/powerbi/m_query/tree_function.py +2 -1
  85. datahub/ingestion/source/powerbi/powerbi.py +1 -3
  86. datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -1
  87. datahub/ingestion/source/powerbi_report_server/report_server.py +26 -7
  88. datahub/ingestion/source/powerbi_report_server/report_server_domain.py +1 -1
  89. datahub/ingestion/source/preset.py +7 -4
  90. datahub/ingestion/source/pulsar.py +3 -2
  91. datahub/ingestion/source/qlik_sense/websocket_connection.py +4 -2
  92. datahub/ingestion/source/redash.py +31 -7
  93. datahub/ingestion/source/redshift/config.py +4 -0
  94. datahub/ingestion/source/redshift/datashares.py +236 -0
  95. datahub/ingestion/source/redshift/lineage.py +6 -2
  96. datahub/ingestion/source/redshift/lineage_v2.py +24 -9
  97. datahub/ingestion/source/redshift/profile.py +1 -1
  98. datahub/ingestion/source/redshift/query.py +133 -33
  99. datahub/ingestion/source/redshift/redshift.py +46 -73
  100. datahub/ingestion/source/redshift/redshift_schema.py +186 -6
  101. datahub/ingestion/source/redshift/report.py +3 -0
  102. datahub/ingestion/source/s3/config.py +5 -5
  103. datahub/ingestion/source/s3/source.py +20 -41
  104. datahub/ingestion/source/salesforce.py +550 -275
  105. datahub/ingestion/source/schema_inference/object.py +1 -1
  106. datahub/ingestion/source/sigma/sigma.py +1 -1
  107. datahub/ingestion/source/slack/slack.py +31 -10
  108. datahub/ingestion/source/snowflake/snowflake_connection.py +2 -2
  109. datahub/ingestion/source/snowflake/snowflake_queries.py +19 -13
  110. datahub/ingestion/source/snowflake/snowflake_query.py +6 -4
  111. datahub/ingestion/source/snowflake/snowflake_schema.py +3 -4
  112. datahub/ingestion/source/snowflake/snowflake_v2.py +1 -1
  113. datahub/ingestion/source/sql/athena.py +10 -16
  114. datahub/ingestion/source/sql/druid.py +1 -5
  115. datahub/ingestion/source/sql/hive.py +15 -6
  116. datahub/ingestion/source/sql/hive_metastore.py +3 -2
  117. datahub/ingestion/source/sql/mssql/job_models.py +29 -0
  118. datahub/ingestion/source/sql/mssql/source.py +11 -5
  119. datahub/ingestion/source/sql/oracle.py +127 -63
  120. datahub/ingestion/source/sql/sql_common.py +16 -18
  121. datahub/ingestion/source/sql/sql_types.py +2 -2
  122. datahub/ingestion/source/sql/teradata.py +19 -5
  123. datahub/ingestion/source/sql/trino.py +2 -2
  124. datahub/ingestion/source/state/stale_entity_removal_handler.py +4 -8
  125. datahub/ingestion/source/superset.py +222 -62
  126. datahub/ingestion/source/tableau/tableau.py +22 -6
  127. datahub/ingestion/source/tableau/tableau_common.py +3 -2
  128. datahub/ingestion/source/unity/ge_profiler.py +2 -1
  129. datahub/ingestion/source/unity/source.py +11 -1
  130. datahub/ingestion/source/vertexai.py +697 -0
  131. datahub/ingestion/source_config/pulsar.py +3 -1
  132. datahub/ingestion/transformer/pattern_cleanup_ownership.py +25 -7
  133. datahub/lite/duckdb_lite.py +3 -10
  134. datahub/lite/lite_local.py +1 -1
  135. datahub/lite/lite_util.py +4 -3
  136. datahub/metadata/_schema_classes.py +714 -417
  137. datahub/metadata/_urns/urn_defs.py +1673 -1649
  138. datahub/metadata/com/linkedin/pegasus2avro/incident/__init__.py +4 -0
  139. datahub/metadata/schema.avsc +16438 -16603
  140. datahub/metadata/schemas/AssertionInfo.avsc +3 -1
  141. datahub/metadata/schemas/BusinessAttributeInfo.avsc +6 -2
  142. datahub/metadata/schemas/BusinessAttributes.avsc +6 -0
  143. datahub/metadata/schemas/ChartInfo.avsc +1 -0
  144. datahub/metadata/schemas/CorpGroupKey.avsc +2 -1
  145. datahub/metadata/schemas/CorpUserInfo.avsc +13 -0
  146. datahub/metadata/schemas/CorpUserKey.avsc +2 -1
  147. datahub/metadata/schemas/DataHubIngestionSourceInfo.avsc +8 -3
  148. datahub/metadata/schemas/DataProcessInstanceInput.avsc +129 -1
  149. datahub/metadata/schemas/DataProcessInstanceOutput.avsc +131 -3
  150. datahub/metadata/schemas/DataProcessKey.avsc +2 -1
  151. datahub/metadata/schemas/DataProductKey.avsc +2 -1
  152. datahub/metadata/schemas/DomainKey.avsc +2 -1
  153. datahub/metadata/schemas/EditableSchemaMetadata.avsc +6 -2
  154. datahub/metadata/schemas/GlossaryNodeKey.avsc +3 -1
  155. datahub/metadata/schemas/GlossaryTermKey.avsc +2 -1
  156. datahub/metadata/schemas/GlossaryTerms.avsc +3 -1
  157. datahub/metadata/schemas/IncidentInfo.avsc +130 -46
  158. datahub/metadata/schemas/InputFields.avsc +3 -1
  159. datahub/metadata/schemas/MLFeatureKey.avsc +2 -1
  160. datahub/metadata/schemas/MLFeatureTableKey.avsc +2 -1
  161. datahub/metadata/schemas/MLModelDeploymentKey.avsc +2 -1
  162. datahub/metadata/schemas/MLModelGroupKey.avsc +3 -1
  163. datahub/metadata/schemas/MLModelKey.avsc +3 -1
  164. datahub/metadata/schemas/MLPrimaryKeyKey.avsc +2 -1
  165. datahub/metadata/schemas/MetadataChangeEvent.avsc +20 -2
  166. datahub/metadata/schemas/PostKey.avsc +2 -1
  167. datahub/metadata/schemas/SchemaFieldKey.avsc +2 -1
  168. datahub/metadata/schemas/SchemaMetadata.avsc +3 -1
  169. datahub/metadata/schemas/StructuredPropertyDefinition.avsc +14 -0
  170. datahub/metadata/schemas/VersionProperties.avsc +18 -0
  171. datahub/metadata/schemas/VersionSetProperties.avsc +5 -0
  172. datahub/pydantic/__init__.py +0 -0
  173. datahub/pydantic/compat.py +58 -0
  174. datahub/sdk/__init__.py +30 -12
  175. datahub/sdk/_all_entities.py +1 -1
  176. datahub/sdk/_attribution.py +4 -0
  177. datahub/sdk/_shared.py +258 -16
  178. datahub/sdk/_utils.py +35 -0
  179. datahub/sdk/container.py +30 -6
  180. datahub/sdk/dataset.py +118 -20
  181. datahub/sdk/{_entity.py → entity.py} +24 -1
  182. datahub/sdk/entity_client.py +1 -1
  183. datahub/sdk/main_client.py +23 -0
  184. datahub/sdk/resolver_client.py +17 -29
  185. datahub/sdk/search_client.py +50 -0
  186. datahub/sdk/search_filters.py +374 -0
  187. datahub/specific/dataset.py +3 -4
  188. datahub/sql_parsing/_sqlglot_patch.py +2 -10
  189. datahub/sql_parsing/schema_resolver.py +1 -1
  190. datahub/sql_parsing/split_statements.py +220 -126
  191. datahub/sql_parsing/sql_parsing_common.py +7 -0
  192. datahub/sql_parsing/sqlglot_lineage.py +1 -1
  193. datahub/sql_parsing/sqlglot_utils.py +1 -4
  194. datahub/testing/check_sql_parser_result.py +5 -6
  195. datahub/testing/compare_metadata_json.py +7 -6
  196. datahub/testing/pytest_hooks.py +56 -0
  197. datahub/upgrade/upgrade.py +2 -2
  198. datahub/utilities/file_backed_collections.py +3 -14
  199. datahub/utilities/ingest_utils.py +106 -0
  200. datahub/utilities/mapping.py +1 -1
  201. datahub/utilities/memory_footprint.py +3 -2
  202. datahub/utilities/sentinels.py +22 -0
  203. datahub/utilities/unified_diff.py +5 -1
  204. {acryl_datahub-0.15.0.6rc2.dist-info → acryl_datahub-1.0.0.dist-info}/LICENSE +0 -0
  205. {acryl_datahub-0.15.0.6rc2.dist-info → acryl_datahub-1.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,374 @@
1
+ from __future__ import annotations
2
+
3
+ import abc
4
+ from typing import (
5
+ Any,
6
+ List,
7
+ Sequence,
8
+ TypedDict,
9
+ Union,
10
+ )
11
+
12
+ import pydantic
13
+
14
+ from datahub.configuration.common import ConfigModel
15
+ from datahub.configuration.pydantic_migration_helpers import PYDANTIC_VERSION_2
16
+ from datahub.ingestion.graph.client import entity_type_to_graphql
17
+ from datahub.ingestion.graph.filters import SearchFilterRule
18
+ from datahub.metadata.schema_classes import EntityTypeName
19
+ from datahub.metadata.urns import DataPlatformUrn, DomainUrn
20
+
21
+ _AndSearchFilterRule = TypedDict(
22
+ "_AndSearchFilterRule", {"and": List[SearchFilterRule]}
23
+ )
24
+ _OrFilters = List[_AndSearchFilterRule]
25
+
26
+
27
+ class _BaseFilter(ConfigModel):
28
+ class Config:
29
+ # We can't wrap this in a TYPE_CHECKING block because the pydantic plugin
30
+ # doesn't recognize it properly. So unfortunately we'll need to live
31
+ # with the deprecation warning w/ pydantic v2.
32
+ allow_population_by_field_name = True
33
+ if PYDANTIC_VERSION_2:
34
+ populate_by_name = True
35
+
36
+ @abc.abstractmethod
37
+ def compile(self) -> _OrFilters:
38
+ pass
39
+
40
+
41
+ def _flexible_entity_type_to_graphql(entity_type: str) -> str:
42
+ if entity_type.upper() == entity_type:
43
+ # Assume that we were passed a graphql EntityType enum value,
44
+ # so no conversion is needed.
45
+ return entity_type
46
+ return entity_type_to_graphql(entity_type)
47
+
48
+
49
+ class _EntityTypeFilter(_BaseFilter):
50
+ entity_type: List[str] = pydantic.Field(
51
+ description="The entity type to filter on. Can be 'dataset', 'chart', 'dashboard', 'corpuser', etc.",
52
+ )
53
+
54
+ def _build_rule(self) -> SearchFilterRule:
55
+ return SearchFilterRule(
56
+ field="_entityType",
57
+ condition="EQUAL",
58
+ values=[_flexible_entity_type_to_graphql(t) for t in self.entity_type],
59
+ )
60
+
61
+ def compile(self) -> _OrFilters:
62
+ return [{"and": [self._build_rule()]}]
63
+
64
+
65
+ class _EntitySubtypeFilter(_BaseFilter):
66
+ entity_type: str
67
+ entity_subtype: str = pydantic.Field(
68
+ description="The entity subtype to filter on. Can be 'Table', 'View', 'Source', etc. depending on the native platform's concepts.",
69
+ )
70
+
71
+ def compile(self) -> _OrFilters:
72
+ rules = [
73
+ SearchFilterRule(
74
+ field="_entityType",
75
+ condition="EQUAL",
76
+ values=[_flexible_entity_type_to_graphql(self.entity_type)],
77
+ ),
78
+ SearchFilterRule(
79
+ field="typeNames",
80
+ condition="EQUAL",
81
+ values=[self.entity_subtype],
82
+ ),
83
+ ]
84
+ return [{"and": rules}]
85
+
86
+
87
+ class _PlatformFilter(_BaseFilter):
88
+ platform: List[str]
89
+ # TODO: Add validator to convert string -> list of strings
90
+
91
+ @pydantic.validator("platform", each_item=True)
92
+ def validate_platform(cls, v: str) -> str:
93
+ # Subtle - we use the constructor instead of the from_string method
94
+ # because coercion is acceptable here.
95
+ return str(DataPlatformUrn(v))
96
+
97
+ def _build_rule(self) -> SearchFilterRule:
98
+ return SearchFilterRule(
99
+ field="platform.keyword",
100
+ condition="EQUAL",
101
+ values=self.platform,
102
+ )
103
+
104
+ def compile(self) -> _OrFilters:
105
+ return [{"and": [self._build_rule()]}]
106
+
107
+
108
+ class _DomainFilter(_BaseFilter):
109
+ domain: List[str]
110
+
111
+ @pydantic.validator("domain", each_item=True)
112
+ def validate_domain(cls, v: str) -> str:
113
+ return str(DomainUrn.from_string(v))
114
+
115
+ def _build_rule(self) -> SearchFilterRule:
116
+ return SearchFilterRule(
117
+ field="domains",
118
+ condition="EQUAL",
119
+ values=self.domain,
120
+ )
121
+
122
+ def compile(self) -> _OrFilters:
123
+ return [{"and": [self._build_rule()]}]
124
+
125
+
126
+ class _EnvFilter(_BaseFilter):
127
+ # Note that not all entity types have an env (e.g. dashboards / charts).
128
+ # If the env filter is specified, these will be excluded.
129
+ env: List[str]
130
+
131
+ def compile(self) -> _OrFilters:
132
+ return [
133
+ # For most entity types, we look at the origin field.
134
+ {
135
+ "and": [
136
+ SearchFilterRule(
137
+ field="origin",
138
+ condition="EQUAL",
139
+ values=self.env,
140
+ ),
141
+ ]
142
+ },
143
+ # For containers, we now have an "env" property as of
144
+ # https://github.com/datahub-project/datahub/pull/11214
145
+ # Prior to this, we put "env" in the customProperties. But we're
146
+ # not bothering with that here.
147
+ {
148
+ "and": [
149
+ SearchFilterRule(
150
+ field="env",
151
+ condition="EQUAL",
152
+ values=self.env,
153
+ ),
154
+ ]
155
+ },
156
+ ]
157
+
158
+
159
+ class _CustomCondition(_BaseFilter):
160
+ """Represents a single field condition"""
161
+
162
+ field: str
163
+ condition: str
164
+ values: List[str]
165
+
166
+ def compile(self) -> _OrFilters:
167
+ rule = SearchFilterRule(
168
+ field=self.field,
169
+ condition=self.condition,
170
+ values=self.values,
171
+ )
172
+ return [{"and": [rule]}]
173
+
174
+
175
+ class _And(_BaseFilter):
176
+ """Represents an AND conjunction of filters"""
177
+
178
+ and_: Sequence["Filter"] = pydantic.Field(alias="and")
179
+ # TODO: Add validator to ensure that the "and" field is not empty
180
+
181
+ def compile(self) -> _OrFilters:
182
+ # The "and" operator must be implemented by doing a Cartesian product
183
+ # of the OR clauses.
184
+ # Example 1:
185
+ # (A or B) and (C or D) ->
186
+ # (A and C) or (A and D) or (B and C) or (B and D)
187
+ # Example 2:
188
+ # (A or B) and (C or D) and (E or F) ->
189
+ # (A and C and E) or (A and C and F) or (A and D and E) or (A and D and F) or
190
+ # (B and C and E) or (B and C and F) or (B and D and E) or (B and D and F)
191
+
192
+ # Start with the first filter's OR clauses
193
+ result = self.and_[0].compile()
194
+
195
+ # For each subsequent filter
196
+ for filter in self.and_[1:]:
197
+ new_result = []
198
+ # Get its OR clauses
199
+ other_clauses = filter.compile()
200
+
201
+ # Create Cartesian product
202
+ for existing_clause in result:
203
+ for other_clause in other_clauses:
204
+ # Merge the AND conditions from both clauses
205
+ new_result.append(self._merge_ands(existing_clause, other_clause))
206
+
207
+ result = new_result
208
+
209
+ return result
210
+
211
+ @classmethod
212
+ def _merge_ands(
213
+ cls, a: _AndSearchFilterRule, b: _AndSearchFilterRule
214
+ ) -> _AndSearchFilterRule:
215
+ return {
216
+ "and": [
217
+ *a["and"],
218
+ *b["and"],
219
+ ]
220
+ }
221
+
222
+
223
+ class _Or(_BaseFilter):
224
+ """Represents an OR conjunction of filters"""
225
+
226
+ or_: Sequence["Filter"] = pydantic.Field(alias="or")
227
+ # TODO: Add validator to ensure that the "or" field is not empty
228
+
229
+ def compile(self) -> _OrFilters:
230
+ merged_filter = []
231
+ for filter in self.or_:
232
+ merged_filter.extend(filter.compile())
233
+ return merged_filter
234
+
235
+
236
+ class _Not(_BaseFilter):
237
+ """Represents a NOT filter"""
238
+
239
+ not_: "Filter" = pydantic.Field(alias="not")
240
+
241
+ @pydantic.validator("not_", pre=False)
242
+ def validate_not(cls, v: "Filter") -> "Filter":
243
+ inner_filter = v.compile()
244
+ if len(inner_filter) != 1:
245
+ raise ValueError(
246
+ "Cannot negate a filter with multiple OR clauses [not yet supported]"
247
+ )
248
+ return v
249
+
250
+ def compile(self) -> _OrFilters:
251
+ # TODO: Eventually we'll want to implement a full DNF normalizer.
252
+ # https://en.wikipedia.org/wiki/Disjunctive_normal_form#Conversion_to_DNF
253
+
254
+ inner_filter = self.not_.compile()
255
+ assert len(inner_filter) == 1 # validated above
256
+
257
+ # ¬(A and B) -> (¬A) OR (¬B)
258
+ and_filters = inner_filter[0]["and"]
259
+ final_filters: _OrFilters = []
260
+ for rule in and_filters:
261
+ final_filters.append({"and": [rule.negate()]})
262
+
263
+ return final_filters
264
+
265
+
266
+ # TODO: With pydantic 2, we can use a RootModel with a
267
+ # discriminated union to make the error messages more informative.
268
+ Filter = Union[
269
+ _And,
270
+ _Or,
271
+ _Not,
272
+ _EntityTypeFilter,
273
+ _EntitySubtypeFilter,
274
+ _PlatformFilter,
275
+ _DomainFilter,
276
+ _EnvFilter,
277
+ _CustomCondition,
278
+ ]
279
+
280
+
281
+ # Required to resolve forward references to "Filter"
282
+ if PYDANTIC_VERSION_2:
283
+ _And.model_rebuild() # type: ignore
284
+ _Or.model_rebuild() # type: ignore
285
+ _Not.model_rebuild() # type: ignore
286
+ else:
287
+ _And.update_forward_refs()
288
+ _Or.update_forward_refs()
289
+ _Not.update_forward_refs()
290
+
291
+
292
+ def load_filters(obj: Any) -> Filter:
293
+ if PYDANTIC_VERSION_2:
294
+ return pydantic.TypeAdapter(Filter).validate_python(obj) # type: ignore
295
+ else:
296
+ return pydantic.parse_obj_as(Filter, obj) # type: ignore
297
+
298
+
299
+ # We need FilterDsl for two reasons:
300
+ # 1. To provide wrapper methods around lots of filters while avoid bloating the
301
+ # yaml spec.
302
+ # 2. Pydantic models in general don't support positional arguments, making the
303
+ # calls feel repetitive (e.g. Platform(platform=...)).
304
+ # See https://github.com/pydantic/pydantic/issues/6792
305
+ # We also considered using dataclasses / pydantic dataclasses, but
306
+ # ultimately decided that they didn't quite suit our requirements,
307
+ # particularly with regards to the field aliases for and/or/not.
308
+ class FilterDsl:
309
+ @staticmethod
310
+ def and_(*args: "Filter") -> _And:
311
+ return _And(and_=list(args))
312
+
313
+ @staticmethod
314
+ def or_(*args: "Filter") -> _Or:
315
+ return _Or(or_=list(args))
316
+
317
+ @staticmethod
318
+ def not_(arg: "Filter") -> _Not:
319
+ return _Not(not_=arg)
320
+
321
+ @staticmethod
322
+ def entity_type(
323
+ entity_type: Union[EntityTypeName, Sequence[EntityTypeName]],
324
+ ) -> _EntityTypeFilter:
325
+ return _EntityTypeFilter(
326
+ entity_type=(
327
+ [entity_type] if isinstance(entity_type, str) else list(entity_type)
328
+ )
329
+ )
330
+
331
+ @staticmethod
332
+ def entity_subtype(entity_type: str, subtype: str) -> _EntitySubtypeFilter:
333
+ return _EntitySubtypeFilter(
334
+ entity_type=entity_type,
335
+ entity_subtype=subtype,
336
+ )
337
+
338
+ @staticmethod
339
+ def platform(platform: Union[str, List[str]], /) -> _PlatformFilter:
340
+ return _PlatformFilter(
341
+ platform=[platform] if isinstance(platform, str) else platform
342
+ )
343
+
344
+ # TODO: Add a platform_instance filter
345
+
346
+ @staticmethod
347
+ def domain(domain: Union[str, List[str]], /) -> _DomainFilter:
348
+ return _DomainFilter(domain=[domain] if isinstance(domain, str) else domain)
349
+
350
+ @staticmethod
351
+ def env(env: Union[str, List[str]], /) -> _EnvFilter:
352
+ return _EnvFilter(env=[env] if isinstance(env, str) else env)
353
+
354
+ @staticmethod
355
+ def has_custom_property(key: str, value: str) -> _CustomCondition:
356
+ return _CustomCondition(
357
+ field="customProperties",
358
+ condition="EQUAL",
359
+ values=[f"{key}={value}"],
360
+ )
361
+
362
+ # TODO: Add a soft-deletion status filter
363
+ # TODO: add a container / browse path filter
364
+ # TODO add shortcut for custom filters
365
+
366
+ @staticmethod
367
+ def custom_filter(
368
+ field: str, condition: str, values: List[str]
369
+ ) -> _CustomCondition:
370
+ return _CustomCondition(
371
+ field=field,
372
+ condition=condition,
373
+ values=values,
374
+ )
@@ -15,6 +15,7 @@ from datahub.metadata.schema_classes import (
15
15
  UpstreamClass as Upstream,
16
16
  UpstreamLineageClass as UpstreamLineage,
17
17
  )
18
+ from datahub.metadata.urns import DatasetUrn, TagUrn, Urn
18
19
  from datahub.specific.aspect_helpers.custom_properties import HasCustomPropertiesPatch
19
20
  from datahub.specific.aspect_helpers.ownership import HasOwnershipPatch
20
21
  from datahub.specific.aspect_helpers.structured_properties import (
@@ -22,8 +23,6 @@ from datahub.specific.aspect_helpers.structured_properties import (
22
23
  )
23
24
  from datahub.specific.aspect_helpers.tags import HasTagsPatch
24
25
  from datahub.specific.aspect_helpers.terms import HasTermsPatch
25
- from datahub.utilities.urns.tag_urn import TagUrn
26
- from datahub.utilities.urns.urn import Urn
27
26
 
28
27
  _Parent = TypeVar("_Parent", bound=MetadataPatchProposal)
29
28
 
@@ -104,12 +103,12 @@ class DatasetPatchBuilder(
104
103
  ):
105
104
  def __init__(
106
105
  self,
107
- urn: str,
106
+ urn: Union[str, DatasetUrn],
108
107
  system_metadata: Optional[SystemMetadataClass] = None,
109
108
  audit_header: Optional[KafkaAuditHeaderClass] = None,
110
109
  ) -> None:
111
110
  super().__init__(
112
- urn, system_metadata=system_metadata, audit_header=audit_header
111
+ str(urn), system_metadata=system_metadata, audit_header=audit_header
113
112
  )
114
113
 
115
114
  @classmethod
@@ -172,17 +172,9 @@ def _patch_lineage() -> None:
172
172
  derived_tables = [
173
173
  source.expression.parent
174
174
  for source in scope.sources.values()
175
- @@ -254,6 +257,7 @@ def to_node(
176
- if dt.comments and dt.comments[0].startswith("source: ")
177
- }
178
-
179
- + c: exp.Column
180
- for c in source_columns:
181
- table = c.table
182
- source = scope.sources.get(table)
183
175
  @@ -281,8 +285,21 @@ def to_node(
184
- # it means this column's lineage is unknown. This can happen if the definition of a source used in a query
185
- # is not passed into the `sources` map.
176
+ # is unknown. This can happen if the definition of a source used in a query is not
177
+ # passed into the `sources` map.
186
178
  source = source or exp.Placeholder()
187
179
  +
188
180
  + subfields = []
@@ -13,7 +13,7 @@ from datahub.ingestion.graph.client import DataHubGraph
13
13
  from datahub.ingestion.source.bigquery_v2.bigquery_audit import BigqueryTableIdentifier
14
14
  from datahub.metadata.schema_classes import SchemaFieldClass, SchemaMetadataClass
15
15
  from datahub.metadata.urns import DataPlatformUrn
16
- from datahub.sql_parsing._models import _TableName as _TableName # noqa: I250
16
+ from datahub.sql_parsing._models import _TableName as _TableName
17
17
  from datahub.sql_parsing.sql_parsing_common import PLATFORMS_WITH_CASE_SENSITIVE_TABLES
18
18
  from datahub.utilities.file_backed_collections import ConnectionWrapper, FileBackedDict
19
19
  from datahub.utilities.urns.field_paths import get_simple_field_path_from_v2_field_path