acryl-datahub 1.2.0.6__py3-none-any.whl → 1.2.0.7rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (63) hide show
  1. {acryl_datahub-1.2.0.6.dist-info → acryl_datahub-1.2.0.7rc1.dist-info}/METADATA +2693 -2630
  2. {acryl_datahub-1.2.0.6.dist-info → acryl_datahub-1.2.0.7rc1.dist-info}/RECORD +63 -55
  3. {acryl_datahub-1.2.0.6.dist-info → acryl_datahub-1.2.0.7rc1.dist-info}/entry_points.txt +1 -0
  4. datahub/_version.py +1 -1
  5. datahub/api/graphql/operation.py +1 -1
  6. datahub/ingestion/autogenerated/capability_summary.json +45 -5
  7. datahub/ingestion/autogenerated/lineage.json +3 -2
  8. datahub/ingestion/run/pipeline.py +1 -0
  9. datahub/ingestion/source/aws/s3_boto_utils.py +97 -5
  10. datahub/ingestion/source/common/subtypes.py +3 -0
  11. datahub/ingestion/source/data_lake_common/path_spec.py +1 -1
  12. datahub/ingestion/source/datahub/datahub_database_reader.py +19 -8
  13. datahub/ingestion/source/dbt/dbt_common.py +74 -0
  14. datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
  15. datahub/ingestion/source/dremio/dremio_source.py +4 -0
  16. datahub/ingestion/source/dynamodb/dynamodb.py +10 -7
  17. datahub/ingestion/source/excel/__init__.py +0 -0
  18. datahub/ingestion/source/excel/config.py +92 -0
  19. datahub/ingestion/source/excel/excel_file.py +539 -0
  20. datahub/ingestion/source/excel/profiling.py +308 -0
  21. datahub/ingestion/source/excel/report.py +49 -0
  22. datahub/ingestion/source/excel/source.py +662 -0
  23. datahub/ingestion/source/excel/util.py +18 -0
  24. datahub/ingestion/source/fivetran/fivetran_query.py +8 -1
  25. datahub/ingestion/source/openapi.py +1 -1
  26. datahub/ingestion/source/powerbi/config.py +33 -0
  27. datahub/ingestion/source/powerbi/m_query/data_classes.py +1 -0
  28. datahub/ingestion/source/powerbi/m_query/pattern_handler.py +100 -10
  29. datahub/ingestion/source/powerbi/powerbi.py +5 -0
  30. datahub/ingestion/source/s3/source.py +65 -59
  31. datahub/ingestion/source/snowflake/snowflake_config.py +10 -0
  32. datahub/ingestion/source/snowflake/snowflake_connection.py +1 -1
  33. datahub/ingestion/source/snowflake/snowflake_query.py +27 -0
  34. datahub/ingestion/source/snowflake/snowflake_report.py +1 -0
  35. datahub/ingestion/source/snowflake/snowflake_schema.py +179 -7
  36. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +21 -6
  37. datahub/ingestion/source/snowflake/snowflake_summary.py +1 -0
  38. datahub/ingestion/source/snowflake/snowflake_v2.py +4 -1
  39. datahub/ingestion/source/sql/hive_metastore.py +1 -0
  40. datahub/ingestion/source/sql_queries.py +24 -2
  41. datahub/ingestion/source/state/checkpoint.py +3 -28
  42. datahub/metadata/_internal_schema_classes.py +568 -512
  43. datahub/metadata/_urns/urn_defs.py +1748 -1748
  44. datahub/metadata/schema.avsc +18242 -18168
  45. datahub/metadata/schemas/ChartInfo.avsc +2 -1
  46. datahub/metadata/schemas/DataHubPageModuleProperties.avsc +9 -0
  47. datahub/metadata/schemas/InstitutionalMemory.avsc +9 -0
  48. datahub/metadata/schemas/MetadataChangeEvent.avsc +81 -45
  49. datahub/metadata/schemas/Ownership.avsc +69 -0
  50. datahub/metadata/schemas/StructuredProperties.avsc +69 -0
  51. datahub/metadata/schemas/StructuredPropertyDefinition.avsc +3 -0
  52. datahub/metadata/schemas/__init__.py +3 -3
  53. datahub/sdk/lineage_client.py +6 -26
  54. datahub/sdk/main_client.py +7 -3
  55. datahub/sdk/search_filters.py +16 -0
  56. datahub/specific/aspect_helpers/siblings.py +73 -0
  57. datahub/specific/dataset.py +2 -0
  58. datahub/sql_parsing/sql_parsing_aggregator.py +3 -0
  59. datahub/sql_parsing/tool_meta_extractor.py +1 -3
  60. datahub/upgrade/upgrade.py +14 -2
  61. {acryl_datahub-1.2.0.6.dist-info → acryl_datahub-1.2.0.7rc1.dist-info}/WHEEL +0 -0
  62. {acryl_datahub-1.2.0.6.dist-info → acryl_datahub-1.2.0.7rc1.dist-info}/licenses/LICENSE +0 -0
  63. {acryl_datahub-1.2.0.6.dist-info → acryl_datahub-1.2.0.7rc1.dist-info}/top_level.txt +0 -0
@@ -188,7 +188,8 @@
188
188
  "createdActor": "inputEdges/*/created/actor",
189
189
  "createdOn": "inputEdges/*/created/time",
190
190
  "entityTypes": [
191
- "dataset"
191
+ "dataset",
192
+ "chart"
192
193
  ],
193
194
  "isLineage": true,
194
195
  "name": "Consumes",
@@ -148,6 +148,15 @@
148
148
  "name": "assetUrns",
149
149
  "Urn": "Urn",
150
150
  "urn_is_array": true
151
+ },
152
+ {
153
+ "type": [
154
+ "null",
155
+ "string"
156
+ ],
157
+ "name": "dynamicFilterJson",
158
+ "default": null,
159
+ "doc": "Optional dynamic filters\n\nThe stringified json representing the logical predicate built in the UI to select assets.\nThis predicate is turned into orFilters to send through graphql since graphql doesn't support\narbitrary nesting. This string is used to restore the UI for this logical predicate."
151
160
  }
152
161
  ],
153
162
  "doc": "The params required if the module is type ASSET_COLLECTION"
@@ -75,6 +75,15 @@
75
75
  },
76
76
  "name": "createStamp",
77
77
  "doc": "Audit stamp associated with creation of this record"
78
+ },
79
+ {
80
+ "type": [
81
+ "null",
82
+ "com.linkedin.pegasus2avro.common.AuditStamp"
83
+ ],
84
+ "name": "updateStamp",
85
+ "default": null,
86
+ "doc": "Audit stamp associated with updation of this record"
78
87
  }
79
88
  ],
80
89
  "doc": "Metadata corresponding to a record of institutional memory."
@@ -332,7 +332,8 @@
332
332
  "createdActor": "inputEdges/*/created/actor",
333
333
  "createdOn": "inputEdges/*/created/time",
334
334
  "entityTypes": [
335
- "dataset"
335
+ "dataset",
336
+ "chart"
336
337
  ],
337
338
  "isLineage": true,
338
339
  "name": "Consumes",
@@ -747,6 +748,75 @@
747
748
  "name": "source",
748
749
  "default": null,
749
750
  "doc": "Source information for the ownership"
751
+ },
752
+ {
753
+ "Searchable": {
754
+ "/actor": {
755
+ "fieldName": "ownerAttributionActors",
756
+ "fieldType": "URN",
757
+ "queryByDefault": false
758
+ },
759
+ "/source": {
760
+ "fieldName": "ownerAttributionSources",
761
+ "fieldType": "URN",
762
+ "queryByDefault": false
763
+ },
764
+ "/time": {
765
+ "fieldName": "ownerAttributionDates",
766
+ "fieldType": "DATETIME",
767
+ "queryByDefault": false
768
+ }
769
+ },
770
+ "type": [
771
+ "null",
772
+ {
773
+ "type": "record",
774
+ "name": "MetadataAttribution",
775
+ "namespace": "com.linkedin.pegasus2avro.common",
776
+ "fields": [
777
+ {
778
+ "type": "long",
779
+ "name": "time",
780
+ "doc": "When this metadata was updated."
781
+ },
782
+ {
783
+ "java": {
784
+ "class": "com.linkedin.pegasus2avro.common.urn.Urn"
785
+ },
786
+ "type": "string",
787
+ "name": "actor",
788
+ "doc": "The entity (e.g. a member URN) responsible for applying the assocated metadata. This can\neither be a user (in case of UI edits) or the datahub system for automation.",
789
+ "Urn": "Urn"
790
+ },
791
+ {
792
+ "java": {
793
+ "class": "com.linkedin.pegasus2avro.common.urn.Urn"
794
+ },
795
+ "type": [
796
+ "null",
797
+ "string"
798
+ ],
799
+ "name": "source",
800
+ "default": null,
801
+ "doc": "The DataHub source responsible for applying the associated metadata. This will only be filled out\nwhen a DataHub source is responsible. This includes the specific metadata test urn, the automation urn.",
802
+ "Urn": "Urn"
803
+ },
804
+ {
805
+ "type": {
806
+ "type": "map",
807
+ "values": "string"
808
+ },
809
+ "name": "sourceDetail",
810
+ "default": {},
811
+ "doc": "The details associated with why this metadata was applied. For example, this could include\nthe actual regex rule, sql statement, ingestion pipeline ID, etc."
812
+ }
813
+ ],
814
+ "doc": "Information about who, why, and how this metadata was applied"
815
+ }
816
+ ],
817
+ "name": "attribution",
818
+ "default": null,
819
+ "doc": "Information about who, why, and how this metadata was applied"
750
820
  }
751
821
  ],
752
822
  "doc": "Ownership information"
@@ -883,50 +953,7 @@
883
953
  },
884
954
  "type": [
885
955
  "null",
886
- {
887
- "type": "record",
888
- "name": "MetadataAttribution",
889
- "namespace": "com.linkedin.pegasus2avro.common",
890
- "fields": [
891
- {
892
- "type": "long",
893
- "name": "time",
894
- "doc": "When this metadata was updated."
895
- },
896
- {
897
- "java": {
898
- "class": "com.linkedin.pegasus2avro.common.urn.Urn"
899
- },
900
- "type": "string",
901
- "name": "actor",
902
- "doc": "The entity (e.g. a member URN) responsible for applying the assocated metadata. This can\neither be a user (in case of UI edits) or the datahub system for automation.",
903
- "Urn": "Urn"
904
- },
905
- {
906
- "java": {
907
- "class": "com.linkedin.pegasus2avro.common.urn.Urn"
908
- },
909
- "type": [
910
- "null",
911
- "string"
912
- ],
913
- "name": "source",
914
- "default": null,
915
- "doc": "The DataHub source responsible for applying the associated metadata. This will only be filled out\nwhen a DataHub source is responsible. This includes the specific metadata test urn, the automation urn.",
916
- "Urn": "Urn"
917
- },
918
- {
919
- "type": {
920
- "type": "map",
921
- "values": "string"
922
- },
923
- "name": "sourceDetail",
924
- "default": {},
925
- "doc": "The details associated with why this metadata was applied. For example, this could include\nthe actual regex rule, sql statement, ingestion pipeline ID, etc."
926
- }
927
- ],
928
- "doc": "Information about who, why, and how this metadata was applied"
929
- }
956
+ "com.linkedin.pegasus2avro.common.MetadataAttribution"
930
957
  ],
931
958
  "name": "attribution",
932
959
  "default": null,
@@ -1107,6 +1134,15 @@
1107
1134
  "type": "com.linkedin.pegasus2avro.common.AuditStamp",
1108
1135
  "name": "createStamp",
1109
1136
  "doc": "Audit stamp associated with creation of this record"
1137
+ },
1138
+ {
1139
+ "type": [
1140
+ "null",
1141
+ "com.linkedin.pegasus2avro.common.AuditStamp"
1142
+ ],
1143
+ "name": "updateStamp",
1144
+ "default": null,
1145
+ "doc": "Audit stamp associated with updation of this record"
1110
1146
  }
1111
1147
  ],
1112
1148
  "doc": "Metadata corresponding to a record of institutional memory."
@@ -162,6 +162,75 @@
162
162
  "name": "source",
163
163
  "default": null,
164
164
  "doc": "Source information for the ownership"
165
+ },
166
+ {
167
+ "Searchable": {
168
+ "/actor": {
169
+ "fieldName": "ownerAttributionActors",
170
+ "fieldType": "URN",
171
+ "queryByDefault": false
172
+ },
173
+ "/source": {
174
+ "fieldName": "ownerAttributionSources",
175
+ "fieldType": "URN",
176
+ "queryByDefault": false
177
+ },
178
+ "/time": {
179
+ "fieldName": "ownerAttributionDates",
180
+ "fieldType": "DATETIME",
181
+ "queryByDefault": false
182
+ }
183
+ },
184
+ "type": [
185
+ "null",
186
+ {
187
+ "type": "record",
188
+ "name": "MetadataAttribution",
189
+ "namespace": "com.linkedin.pegasus2avro.common",
190
+ "fields": [
191
+ {
192
+ "type": "long",
193
+ "name": "time",
194
+ "doc": "When this metadata was updated."
195
+ },
196
+ {
197
+ "java": {
198
+ "class": "com.linkedin.pegasus2avro.common.urn.Urn"
199
+ },
200
+ "type": "string",
201
+ "name": "actor",
202
+ "doc": "The entity (e.g. a member URN) responsible for applying the assocated metadata. This can\neither be a user (in case of UI edits) or the datahub system for automation.",
203
+ "Urn": "Urn"
204
+ },
205
+ {
206
+ "java": {
207
+ "class": "com.linkedin.pegasus2avro.common.urn.Urn"
208
+ },
209
+ "type": [
210
+ "null",
211
+ "string"
212
+ ],
213
+ "name": "source",
214
+ "default": null,
215
+ "doc": "The DataHub source responsible for applying the associated metadata. This will only be filled out\nwhen a DataHub source is responsible. This includes the specific metadata test urn, the automation urn.",
216
+ "Urn": "Urn"
217
+ },
218
+ {
219
+ "type": {
220
+ "type": "map",
221
+ "values": "string"
222
+ },
223
+ "name": "sourceDetail",
224
+ "default": {},
225
+ "doc": "The details associated with why this metadata was applied. For example, this could include\nthe actual regex rule, sql statement, ingestion pipeline ID, etc."
226
+ }
227
+ ],
228
+ "doc": "Information about who, why, and how this metadata was applied"
229
+ }
230
+ ],
231
+ "name": "attribution",
232
+ "default": null,
233
+ "doc": "Information about who, why, and how this metadata was applied"
165
234
  }
166
235
  ],
167
236
  "doc": "Ownership information"
@@ -94,6 +94,75 @@
94
94
  "name": "lastModified",
95
95
  "default": null,
96
96
  "doc": "Audit stamp containing who last modified this relationship edge and when"
97
+ },
98
+ {
99
+ "Searchable": {
100
+ "/actor": {
101
+ "fieldName": "structuredPropertyAttributionActors",
102
+ "fieldType": "URN",
103
+ "queryByDefault": false
104
+ },
105
+ "/source": {
106
+ "fieldName": "structuredPropertyAttributionSources",
107
+ "fieldType": "URN",
108
+ "queryByDefault": false
109
+ },
110
+ "/time": {
111
+ "fieldName": "structuredPropertyAttributionDates",
112
+ "fieldType": "DATETIME",
113
+ "queryByDefault": false
114
+ }
115
+ },
116
+ "type": [
117
+ "null",
118
+ {
119
+ "type": "record",
120
+ "name": "MetadataAttribution",
121
+ "namespace": "com.linkedin.pegasus2avro.common",
122
+ "fields": [
123
+ {
124
+ "type": "long",
125
+ "name": "time",
126
+ "doc": "When this metadata was updated."
127
+ },
128
+ {
129
+ "java": {
130
+ "class": "com.linkedin.pegasus2avro.common.urn.Urn"
131
+ },
132
+ "type": "string",
133
+ "name": "actor",
134
+ "doc": "The entity (e.g. a member URN) responsible for applying the assocated metadata. This can\neither be a user (in case of UI edits) or the datahub system for automation.",
135
+ "Urn": "Urn"
136
+ },
137
+ {
138
+ "java": {
139
+ "class": "com.linkedin.pegasus2avro.common.urn.Urn"
140
+ },
141
+ "type": [
142
+ "null",
143
+ "string"
144
+ ],
145
+ "name": "source",
146
+ "default": null,
147
+ "doc": "The DataHub source responsible for applying the associated metadata. This will only be filled out\nwhen a DataHub source is responsible. This includes the specific metadata test urn, the automation urn.",
148
+ "Urn": "Urn"
149
+ },
150
+ {
151
+ "type": {
152
+ "type": "map",
153
+ "values": "string"
154
+ },
155
+ "name": "sourceDetail",
156
+ "default": {},
157
+ "doc": "The details associated with why this metadata was applied. For example, this could include\nthe actual regex rule, sql statement, ingestion pipeline ID, etc."
158
+ }
159
+ ],
160
+ "doc": "Information about who, why, and how this metadata was applied"
161
+ }
162
+ ],
163
+ "name": "attribution",
164
+ "default": null,
165
+ "doc": "Information about who, why, and how this metadata was applied"
97
166
  }
98
167
  ]
99
168
  }
@@ -23,6 +23,9 @@
23
23
  "doc": "The display name of the property. This is the name that will be shown in the UI and can be used to look up the property id."
24
24
  },
25
25
  {
26
+ "Searchable": {
27
+ "fieldType": "URN"
28
+ },
26
29
  "UrnValidation": {
27
30
  "entityTypes": [
28
31
  "dataType"
@@ -15,10 +15,10 @@ import pathlib
15
15
  def _load_schema(schema_name: str) -> str:
16
16
  return (pathlib.Path(__file__).parent / f"{schema_name}.avsc").read_text()
17
17
 
18
- def getMetadataChangeProposalSchema() -> str:
19
- return _load_schema("MetadataChangeProposal")
20
-
21
18
  def getMetadataChangeEventSchema() -> str:
22
19
  return _load_schema("MetadataChangeEvent")
23
20
 
21
+ def getMetadataChangeProposalSchema() -> str:
22
+ return _load_schema("MetadataChangeProposal")
23
+
24
24
  # fmt: on
@@ -165,11 +165,7 @@ class LineageClient:
165
165
  ] = False,
166
166
  transformation_text: Optional[str] = None,
167
167
  ) -> None:
168
- ...
169
-
170
- """
171
- Add dataset-to-dataset lineage with column-level mapping.
172
- """
168
+ """Add dataset-to-dataset lineage with column-level mapping."""
173
169
 
174
170
  @overload
175
171
  def add_lineage(
@@ -178,11 +174,7 @@ class LineageClient:
178
174
  upstream: Union[DatajobUrnOrStr],
179
175
  downstream: DatasetUrnOrStr,
180
176
  ) -> None:
181
- ...
182
-
183
- """
184
- Add dataset-to-datajob or dataset-to-mlmodel lineage.
185
- """
177
+ """Add dataset-to-datajob or dataset-to-mlmodel lineage."""
186
178
 
187
179
  @overload
188
180
  def add_lineage(
@@ -191,11 +183,7 @@ class LineageClient:
191
183
  upstream: Union[DatasetUrnOrStr, DatajobUrnOrStr],
192
184
  downstream: DatajobUrnOrStr,
193
185
  ) -> None:
194
- ...
195
-
196
- """
197
- Add datajob-to-dataset or datajob-to-datajob lineage.
198
- """
186
+ """Add datajob-to-dataset or datajob-to-datajob lineage."""
199
187
 
200
188
  @overload
201
189
  def add_lineage(
@@ -204,11 +192,7 @@ class LineageClient:
204
192
  upstream: Union[DashboardUrnOrStr, DatasetUrnOrStr, ChartUrnOrStr],
205
193
  downstream: DashboardUrnOrStr,
206
194
  ) -> None:
207
- ...
208
-
209
- """
210
- Add dashboard-to-dashboard or dashboard-to-dataset lineage.
211
- """
195
+ """Add dashboard-to-dashboard or dashboard-to-dataset lineage."""
212
196
 
213
197
  @overload
214
198
  def add_lineage(
@@ -217,10 +201,7 @@ class LineageClient:
217
201
  upstream: DatasetUrnOrStr,
218
202
  downstream: ChartUrnOrStr,
219
203
  ) -> None:
220
- ...
221
- """
222
- Add dataset-to-chart lineage.
223
- """
204
+ """Add dataset-to-chart lineage."""
224
205
 
225
206
  # The actual implementation that handles all overloaded cases
226
207
  def add_lineage(
@@ -237,8 +218,7 @@ class LineageClient:
237
218
  ] = False,
238
219
  transformation_text: Optional[str] = None,
239
220
  ) -> None:
240
- """
241
- Add lineage between two entities.
221
+ """Add lineage between two entities.
242
222
 
243
223
  This flexible method handles different combinations of entity types:
244
224
  - dataset to dataset
@@ -1,6 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import Optional, overload
3
+ from typing import TYPE_CHECKING, Optional, overload
4
4
 
5
5
  from datahub.errors import SdkUsageError
6
6
  from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
@@ -9,6 +9,9 @@ from datahub.sdk.entity_client import EntityClient
9
9
  from datahub.sdk.lineage_client import LineageClient
10
10
  from datahub.sdk.search_client import SearchClient
11
11
 
12
+ if TYPE_CHECKING:
13
+ from datahub.sdk.resolver_client import ResolverClient
14
+
12
15
 
13
16
  class DataHubClient:
14
17
  """Main client for interacting with DataHub.
@@ -104,13 +107,14 @@ class DataHubClient:
104
107
  return EntityClient(self)
105
108
 
106
109
  @property
107
- def resolve(self): # type: ignore[report-untyped-call] # Not available due to circular import issues
110
+ def resolve(self) -> "ResolverClient":
108
111
  try:
109
112
  from acryl_datahub_cloud.sdk import ( # type: ignore[import-not-found]
110
113
  ResolverClient,
111
114
  )
112
115
  except ImportError:
113
- from datahub.sdk.resolver_client import ( # type: ignore[assignment] # If the client is not installed, use the one from the SDK
116
+ # If the client is not installed, use the one from the SDK.
117
+ from datahub.sdk.resolver_client import ( # type: ignore[assignment]
114
118
  ResolverClient,
115
119
  )
116
120
  return ResolverClient(self)
@@ -384,6 +384,21 @@ def _filter_discriminator(v: Any) -> Optional[str]:
384
384
  return None
385
385
 
386
386
 
387
+ def _parse_and_like_filter(value: Any) -> Any:
388
+ # Do not parse if filter is already of type and/or/not or a custom condition
389
+ # also do not parse container filter if direct_descendants_only is specified
390
+ if (
391
+ isinstance(value, dict)
392
+ and not set(value.keys()).intersection(
393
+ {"and", "or", "not", "field", "condition", "direct_descendants_only"}
394
+ )
395
+ and len(value) > 1
396
+ ):
397
+ return {"and": [{k: v} for k, v in value.items()]}
398
+
399
+ return value
400
+
401
+
387
402
  if TYPE_CHECKING or not PYDANTIC_SUPPORTS_CALLABLE_DISCRIMINATOR:
388
403
  # The `not TYPE_CHECKING` bit is required to make the linter happy,
389
404
  # since we currently only run mypy with pydantic v1.
@@ -445,6 +460,7 @@ else:
445
460
  ],
446
461
  Discriminator(_filter_discriminator),
447
462
  ],
463
+ pydantic.BeforeValidator(_parse_and_like_filter),
448
464
  pydantic.BeforeValidator(_parse_json_from_string),
449
465
  ]
450
466
 
@@ -0,0 +1,73 @@
1
+ from typing import List
2
+
3
+ from typing_extensions import Self
4
+
5
+ from datahub.emitter.mcp_patch_builder import MetadataPatchProposal
6
+ from datahub.metadata.com.linkedin.pegasus2avro.common import Siblings
7
+
8
+
9
+ class HasSiblingsPatch(MetadataPatchProposal):
10
+ def add_sibling(self, sibling_urn: str, primary: bool = False) -> Self:
11
+ """Add a sibling relationship to the entity.
12
+
13
+ Args:
14
+ sibling_urn: The URN of the sibling entity to add.
15
+ primary: Whether this entity should be marked as primary in the relationship.
16
+
17
+ Returns:
18
+ The patch builder instance.
19
+ """
20
+ self._add_patch(
21
+ Siblings.ASPECT_NAME,
22
+ "add",
23
+ path=("siblings", sibling_urn),
24
+ value=sibling_urn,
25
+ )
26
+
27
+ # Set primary flag if specified
28
+ if primary:
29
+ self._add_patch(
30
+ Siblings.ASPECT_NAME,
31
+ "add",
32
+ path=("primary",),
33
+ value=primary,
34
+ )
35
+
36
+ return self
37
+
38
+ def remove_sibling(self, sibling_urn: str) -> Self:
39
+ """Remove a sibling relationship from the entity.
40
+
41
+ Args:
42
+ sibling_urn: The URN of the sibling entity to remove.
43
+
44
+ Returns:
45
+ The patch builder instance.
46
+ """
47
+ self._add_patch(
48
+ Siblings.ASPECT_NAME,
49
+ "remove",
50
+ path=("siblings", sibling_urn),
51
+ value={},
52
+ )
53
+ return self
54
+
55
+ def set_siblings(self, sibling_urns: List[str], primary: bool = False) -> Self:
56
+ """Set the complete list of siblings for the entity.
57
+
58
+ This will replace all existing siblings with the new list.
59
+
60
+ Args:
61
+ sibling_urns: The list of sibling URNs to set.
62
+ primary: Whether this entity should be marked as primary.
63
+
64
+ Returns:
65
+ The patch builder instance.
66
+ """
67
+ self._add_patch(
68
+ Siblings.ASPECT_NAME, "add", path=("siblings",), value=sibling_urns
69
+ )
70
+
71
+ self._add_patch(Siblings.ASPECT_NAME, "add", path=("primary",), value=primary)
72
+
73
+ return self
@@ -22,6 +22,7 @@ from datahub.specific.aspect_helpers.fine_grained_lineage import (
22
22
  HasFineGrainedLineagePatch,
23
23
  )
24
24
  from datahub.specific.aspect_helpers.ownership import HasOwnershipPatch
25
+ from datahub.specific.aspect_helpers.siblings import HasSiblingsPatch
25
26
  from datahub.specific.aspect_helpers.structured_properties import (
26
27
  HasStructuredPropertiesPatch,
27
28
  )
@@ -104,6 +105,7 @@ class DatasetPatchBuilder(
104
105
  HasTagsPatch,
105
106
  HasTermsPatch,
106
107
  HasFineGrainedLineagePatch,
108
+ HasSiblingsPatch,
107
109
  MetadataPatchProposal,
108
110
  ):
109
111
  def __init__(
@@ -634,6 +634,9 @@ class SqlParsingAggregator(Closeable):
634
634
  TableSwap,
635
635
  ],
636
636
  ) -> None:
637
+ """
638
+ This assumes that queries come in order of increasing timestamps.
639
+ """
637
640
  if isinstance(item, KnownQueryLineageInfo):
638
641
  self.add_known_query_lineage(item)
639
642
  elif isinstance(item, KnownLineageMapping):
@@ -208,9 +208,7 @@ class ToolMetaExtractor:
208
208
  Returns:
209
209
  bool: whether QueryLog entry is that of hex.
210
210
  """
211
- last_line = _get_last_line(entry.query_text)
212
-
213
- if not last_line.startswith("-- Hex query metadata:"):
211
+ if "-- Hex query metadata:" not in entry.query_text:
214
212
  return False
215
213
 
216
214
  entry.origin = HEX_PLATFORM_URN