acryl-datahub 1.2.0.6rc1__py3-none-any.whl → 1.2.0.7rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.2.0.6rc1.dist-info → acryl_datahub-1.2.0.7rc2.dist-info}/METADATA +2659 -2578
- {acryl_datahub-1.2.0.6rc1.dist-info → acryl_datahub-1.2.0.7rc2.dist-info}/RECORD +65 -57
- {acryl_datahub-1.2.0.6rc1.dist-info → acryl_datahub-1.2.0.7rc2.dist-info}/entry_points.txt +1 -0
- datahub/_version.py +1 -1
- datahub/api/graphql/operation.py +1 -1
- datahub/ingestion/autogenerated/capability_summary.json +45 -5
- datahub/ingestion/autogenerated/lineage.json +3 -2
- datahub/ingestion/run/pipeline.py +1 -0
- datahub/ingestion/source/aws/s3_boto_utils.py +97 -5
- datahub/ingestion/source/common/subtypes.py +3 -0
- datahub/ingestion/source/data_lake_common/path_spec.py +1 -1
- datahub/ingestion/source/datahub/datahub_database_reader.py +19 -8
- datahub/ingestion/source/dbt/dbt_common.py +74 -0
- datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
- datahub/ingestion/source/dremio/dremio_source.py +4 -0
- datahub/ingestion/source/dynamodb/dynamodb.py +10 -7
- datahub/ingestion/source/excel/__init__.py +0 -0
- datahub/ingestion/source/excel/config.py +92 -0
- datahub/ingestion/source/excel/excel_file.py +539 -0
- datahub/ingestion/source/excel/profiling.py +308 -0
- datahub/ingestion/source/excel/report.py +49 -0
- datahub/ingestion/source/excel/source.py +662 -0
- datahub/ingestion/source/excel/util.py +18 -0
- datahub/ingestion/source/fivetran/fivetran_query.py +8 -1
- datahub/ingestion/source/openapi.py +1 -1
- datahub/ingestion/source/powerbi/config.py +33 -0
- datahub/ingestion/source/powerbi/m_query/data_classes.py +1 -0
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py +100 -10
- datahub/ingestion/source/powerbi/powerbi.py +5 -0
- datahub/ingestion/source/s3/source.py +65 -59
- datahub/ingestion/source/snowflake/constants.py +2 -0
- datahub/ingestion/source/snowflake/snowflake_config.py +10 -0
- datahub/ingestion/source/snowflake/snowflake_connection.py +16 -5
- datahub/ingestion/source/snowflake/snowflake_query.py +27 -0
- datahub/ingestion/source/snowflake/snowflake_report.py +1 -0
- datahub/ingestion/source/snowflake/snowflake_schema.py +179 -7
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +21 -6
- datahub/ingestion/source/snowflake/snowflake_summary.py +1 -0
- datahub/ingestion/source/snowflake/snowflake_utils.py +18 -5
- datahub/ingestion/source/snowflake/snowflake_v2.py +5 -1
- datahub/ingestion/source/sql/hive_metastore.py +1 -0
- datahub/ingestion/source/sql_queries.py +24 -2
- datahub/ingestion/source/state/checkpoint.py +3 -28
- datahub/metadata/_internal_schema_classes.py +568 -512
- datahub/metadata/_urns/urn_defs.py +1748 -1748
- datahub/metadata/schema.avsc +18242 -18168
- datahub/metadata/schemas/ChartInfo.avsc +2 -1
- datahub/metadata/schemas/DataHubPageModuleProperties.avsc +9 -0
- datahub/metadata/schemas/InstitutionalMemory.avsc +9 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +81 -45
- datahub/metadata/schemas/Ownership.avsc +69 -0
- datahub/metadata/schemas/StructuredProperties.avsc +69 -0
- datahub/metadata/schemas/StructuredPropertyDefinition.avsc +3 -0
- datahub/metadata/schemas/__init__.py +3 -3
- datahub/sdk/lineage_client.py +6 -26
- datahub/sdk/main_client.py +7 -3
- datahub/sdk/search_filters.py +16 -0
- datahub/specific/aspect_helpers/siblings.py +73 -0
- datahub/specific/dataset.py +2 -0
- datahub/sql_parsing/sql_parsing_aggregator.py +3 -0
- datahub/sql_parsing/tool_meta_extractor.py +1 -3
- datahub/upgrade/upgrade.py +14 -2
- {acryl_datahub-1.2.0.6rc1.dist-info → acryl_datahub-1.2.0.7rc2.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.2.0.6rc1.dist-info → acryl_datahub-1.2.0.7rc2.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.2.0.6rc1.dist-info → acryl_datahub-1.2.0.7rc2.dist-info}/top_level.txt +0 -0
|
@@ -148,6 +148,15 @@
|
|
|
148
148
|
"name": "assetUrns",
|
|
149
149
|
"Urn": "Urn",
|
|
150
150
|
"urn_is_array": true
|
|
151
|
+
},
|
|
152
|
+
{
|
|
153
|
+
"type": [
|
|
154
|
+
"null",
|
|
155
|
+
"string"
|
|
156
|
+
],
|
|
157
|
+
"name": "dynamicFilterJson",
|
|
158
|
+
"default": null,
|
|
159
|
+
"doc": "Optional dynamic filters\n\nThe stringified json representing the logical predicate built in the UI to select assets.\nThis predicate is turned into orFilters to send through graphql since graphql doesn't support\narbitrary nesting. This string is used to restore the UI for this logical predicate."
|
|
151
160
|
}
|
|
152
161
|
],
|
|
153
162
|
"doc": "The params required if the module is type ASSET_COLLECTION"
|
|
@@ -75,6 +75,15 @@
|
|
|
75
75
|
},
|
|
76
76
|
"name": "createStamp",
|
|
77
77
|
"doc": "Audit stamp associated with creation of this record"
|
|
78
|
+
},
|
|
79
|
+
{
|
|
80
|
+
"type": [
|
|
81
|
+
"null",
|
|
82
|
+
"com.linkedin.pegasus2avro.common.AuditStamp"
|
|
83
|
+
],
|
|
84
|
+
"name": "updateStamp",
|
|
85
|
+
"default": null,
|
|
86
|
+
"doc": "Audit stamp associated with updation of this record"
|
|
78
87
|
}
|
|
79
88
|
],
|
|
80
89
|
"doc": "Metadata corresponding to a record of institutional memory."
|
|
@@ -332,7 +332,8 @@
|
|
|
332
332
|
"createdActor": "inputEdges/*/created/actor",
|
|
333
333
|
"createdOn": "inputEdges/*/created/time",
|
|
334
334
|
"entityTypes": [
|
|
335
|
-
"dataset"
|
|
335
|
+
"dataset",
|
|
336
|
+
"chart"
|
|
336
337
|
],
|
|
337
338
|
"isLineage": true,
|
|
338
339
|
"name": "Consumes",
|
|
@@ -747,6 +748,75 @@
|
|
|
747
748
|
"name": "source",
|
|
748
749
|
"default": null,
|
|
749
750
|
"doc": "Source information for the ownership"
|
|
751
|
+
},
|
|
752
|
+
{
|
|
753
|
+
"Searchable": {
|
|
754
|
+
"/actor": {
|
|
755
|
+
"fieldName": "ownerAttributionActors",
|
|
756
|
+
"fieldType": "URN",
|
|
757
|
+
"queryByDefault": false
|
|
758
|
+
},
|
|
759
|
+
"/source": {
|
|
760
|
+
"fieldName": "ownerAttributionSources",
|
|
761
|
+
"fieldType": "URN",
|
|
762
|
+
"queryByDefault": false
|
|
763
|
+
},
|
|
764
|
+
"/time": {
|
|
765
|
+
"fieldName": "ownerAttributionDates",
|
|
766
|
+
"fieldType": "DATETIME",
|
|
767
|
+
"queryByDefault": false
|
|
768
|
+
}
|
|
769
|
+
},
|
|
770
|
+
"type": [
|
|
771
|
+
"null",
|
|
772
|
+
{
|
|
773
|
+
"type": "record",
|
|
774
|
+
"name": "MetadataAttribution",
|
|
775
|
+
"namespace": "com.linkedin.pegasus2avro.common",
|
|
776
|
+
"fields": [
|
|
777
|
+
{
|
|
778
|
+
"type": "long",
|
|
779
|
+
"name": "time",
|
|
780
|
+
"doc": "When this metadata was updated."
|
|
781
|
+
},
|
|
782
|
+
{
|
|
783
|
+
"java": {
|
|
784
|
+
"class": "com.linkedin.pegasus2avro.common.urn.Urn"
|
|
785
|
+
},
|
|
786
|
+
"type": "string",
|
|
787
|
+
"name": "actor",
|
|
788
|
+
"doc": "The entity (e.g. a member URN) responsible for applying the assocated metadata. This can\neither be a user (in case of UI edits) or the datahub system for automation.",
|
|
789
|
+
"Urn": "Urn"
|
|
790
|
+
},
|
|
791
|
+
{
|
|
792
|
+
"java": {
|
|
793
|
+
"class": "com.linkedin.pegasus2avro.common.urn.Urn"
|
|
794
|
+
},
|
|
795
|
+
"type": [
|
|
796
|
+
"null",
|
|
797
|
+
"string"
|
|
798
|
+
],
|
|
799
|
+
"name": "source",
|
|
800
|
+
"default": null,
|
|
801
|
+
"doc": "The DataHub source responsible for applying the associated metadata. This will only be filled out\nwhen a DataHub source is responsible. This includes the specific metadata test urn, the automation urn.",
|
|
802
|
+
"Urn": "Urn"
|
|
803
|
+
},
|
|
804
|
+
{
|
|
805
|
+
"type": {
|
|
806
|
+
"type": "map",
|
|
807
|
+
"values": "string"
|
|
808
|
+
},
|
|
809
|
+
"name": "sourceDetail",
|
|
810
|
+
"default": {},
|
|
811
|
+
"doc": "The details associated with why this metadata was applied. For example, this could include\nthe actual regex rule, sql statement, ingestion pipeline ID, etc."
|
|
812
|
+
}
|
|
813
|
+
],
|
|
814
|
+
"doc": "Information about who, why, and how this metadata was applied"
|
|
815
|
+
}
|
|
816
|
+
],
|
|
817
|
+
"name": "attribution",
|
|
818
|
+
"default": null,
|
|
819
|
+
"doc": "Information about who, why, and how this metadata was applied"
|
|
750
820
|
}
|
|
751
821
|
],
|
|
752
822
|
"doc": "Ownership information"
|
|
@@ -883,50 +953,7 @@
|
|
|
883
953
|
},
|
|
884
954
|
"type": [
|
|
885
955
|
"null",
|
|
886
|
-
|
|
887
|
-
"type": "record",
|
|
888
|
-
"name": "MetadataAttribution",
|
|
889
|
-
"namespace": "com.linkedin.pegasus2avro.common",
|
|
890
|
-
"fields": [
|
|
891
|
-
{
|
|
892
|
-
"type": "long",
|
|
893
|
-
"name": "time",
|
|
894
|
-
"doc": "When this metadata was updated."
|
|
895
|
-
},
|
|
896
|
-
{
|
|
897
|
-
"java": {
|
|
898
|
-
"class": "com.linkedin.pegasus2avro.common.urn.Urn"
|
|
899
|
-
},
|
|
900
|
-
"type": "string",
|
|
901
|
-
"name": "actor",
|
|
902
|
-
"doc": "The entity (e.g. a member URN) responsible for applying the assocated metadata. This can\neither be a user (in case of UI edits) or the datahub system for automation.",
|
|
903
|
-
"Urn": "Urn"
|
|
904
|
-
},
|
|
905
|
-
{
|
|
906
|
-
"java": {
|
|
907
|
-
"class": "com.linkedin.pegasus2avro.common.urn.Urn"
|
|
908
|
-
},
|
|
909
|
-
"type": [
|
|
910
|
-
"null",
|
|
911
|
-
"string"
|
|
912
|
-
],
|
|
913
|
-
"name": "source",
|
|
914
|
-
"default": null,
|
|
915
|
-
"doc": "The DataHub source responsible for applying the associated metadata. This will only be filled out\nwhen a DataHub source is responsible. This includes the specific metadata test urn, the automation urn.",
|
|
916
|
-
"Urn": "Urn"
|
|
917
|
-
},
|
|
918
|
-
{
|
|
919
|
-
"type": {
|
|
920
|
-
"type": "map",
|
|
921
|
-
"values": "string"
|
|
922
|
-
},
|
|
923
|
-
"name": "sourceDetail",
|
|
924
|
-
"default": {},
|
|
925
|
-
"doc": "The details associated with why this metadata was applied. For example, this could include\nthe actual regex rule, sql statement, ingestion pipeline ID, etc."
|
|
926
|
-
}
|
|
927
|
-
],
|
|
928
|
-
"doc": "Information about who, why, and how this metadata was applied"
|
|
929
|
-
}
|
|
956
|
+
"com.linkedin.pegasus2avro.common.MetadataAttribution"
|
|
930
957
|
],
|
|
931
958
|
"name": "attribution",
|
|
932
959
|
"default": null,
|
|
@@ -1107,6 +1134,15 @@
|
|
|
1107
1134
|
"type": "com.linkedin.pegasus2avro.common.AuditStamp",
|
|
1108
1135
|
"name": "createStamp",
|
|
1109
1136
|
"doc": "Audit stamp associated with creation of this record"
|
|
1137
|
+
},
|
|
1138
|
+
{
|
|
1139
|
+
"type": [
|
|
1140
|
+
"null",
|
|
1141
|
+
"com.linkedin.pegasus2avro.common.AuditStamp"
|
|
1142
|
+
],
|
|
1143
|
+
"name": "updateStamp",
|
|
1144
|
+
"default": null,
|
|
1145
|
+
"doc": "Audit stamp associated with updation of this record"
|
|
1110
1146
|
}
|
|
1111
1147
|
],
|
|
1112
1148
|
"doc": "Metadata corresponding to a record of institutional memory."
|
|
@@ -162,6 +162,75 @@
|
|
|
162
162
|
"name": "source",
|
|
163
163
|
"default": null,
|
|
164
164
|
"doc": "Source information for the ownership"
|
|
165
|
+
},
|
|
166
|
+
{
|
|
167
|
+
"Searchable": {
|
|
168
|
+
"/actor": {
|
|
169
|
+
"fieldName": "ownerAttributionActors",
|
|
170
|
+
"fieldType": "URN",
|
|
171
|
+
"queryByDefault": false
|
|
172
|
+
},
|
|
173
|
+
"/source": {
|
|
174
|
+
"fieldName": "ownerAttributionSources",
|
|
175
|
+
"fieldType": "URN",
|
|
176
|
+
"queryByDefault": false
|
|
177
|
+
},
|
|
178
|
+
"/time": {
|
|
179
|
+
"fieldName": "ownerAttributionDates",
|
|
180
|
+
"fieldType": "DATETIME",
|
|
181
|
+
"queryByDefault": false
|
|
182
|
+
}
|
|
183
|
+
},
|
|
184
|
+
"type": [
|
|
185
|
+
"null",
|
|
186
|
+
{
|
|
187
|
+
"type": "record",
|
|
188
|
+
"name": "MetadataAttribution",
|
|
189
|
+
"namespace": "com.linkedin.pegasus2avro.common",
|
|
190
|
+
"fields": [
|
|
191
|
+
{
|
|
192
|
+
"type": "long",
|
|
193
|
+
"name": "time",
|
|
194
|
+
"doc": "When this metadata was updated."
|
|
195
|
+
},
|
|
196
|
+
{
|
|
197
|
+
"java": {
|
|
198
|
+
"class": "com.linkedin.pegasus2avro.common.urn.Urn"
|
|
199
|
+
},
|
|
200
|
+
"type": "string",
|
|
201
|
+
"name": "actor",
|
|
202
|
+
"doc": "The entity (e.g. a member URN) responsible for applying the assocated metadata. This can\neither be a user (in case of UI edits) or the datahub system for automation.",
|
|
203
|
+
"Urn": "Urn"
|
|
204
|
+
},
|
|
205
|
+
{
|
|
206
|
+
"java": {
|
|
207
|
+
"class": "com.linkedin.pegasus2avro.common.urn.Urn"
|
|
208
|
+
},
|
|
209
|
+
"type": [
|
|
210
|
+
"null",
|
|
211
|
+
"string"
|
|
212
|
+
],
|
|
213
|
+
"name": "source",
|
|
214
|
+
"default": null,
|
|
215
|
+
"doc": "The DataHub source responsible for applying the associated metadata. This will only be filled out\nwhen a DataHub source is responsible. This includes the specific metadata test urn, the automation urn.",
|
|
216
|
+
"Urn": "Urn"
|
|
217
|
+
},
|
|
218
|
+
{
|
|
219
|
+
"type": {
|
|
220
|
+
"type": "map",
|
|
221
|
+
"values": "string"
|
|
222
|
+
},
|
|
223
|
+
"name": "sourceDetail",
|
|
224
|
+
"default": {},
|
|
225
|
+
"doc": "The details associated with why this metadata was applied. For example, this could include\nthe actual regex rule, sql statement, ingestion pipeline ID, etc."
|
|
226
|
+
}
|
|
227
|
+
],
|
|
228
|
+
"doc": "Information about who, why, and how this metadata was applied"
|
|
229
|
+
}
|
|
230
|
+
],
|
|
231
|
+
"name": "attribution",
|
|
232
|
+
"default": null,
|
|
233
|
+
"doc": "Information about who, why, and how this metadata was applied"
|
|
165
234
|
}
|
|
166
235
|
],
|
|
167
236
|
"doc": "Ownership information"
|
|
@@ -94,6 +94,75 @@
|
|
|
94
94
|
"name": "lastModified",
|
|
95
95
|
"default": null,
|
|
96
96
|
"doc": "Audit stamp containing who last modified this relationship edge and when"
|
|
97
|
+
},
|
|
98
|
+
{
|
|
99
|
+
"Searchable": {
|
|
100
|
+
"/actor": {
|
|
101
|
+
"fieldName": "structuredPropertyAttributionActors",
|
|
102
|
+
"fieldType": "URN",
|
|
103
|
+
"queryByDefault": false
|
|
104
|
+
},
|
|
105
|
+
"/source": {
|
|
106
|
+
"fieldName": "structuredPropertyAttributionSources",
|
|
107
|
+
"fieldType": "URN",
|
|
108
|
+
"queryByDefault": false
|
|
109
|
+
},
|
|
110
|
+
"/time": {
|
|
111
|
+
"fieldName": "structuredPropertyAttributionDates",
|
|
112
|
+
"fieldType": "DATETIME",
|
|
113
|
+
"queryByDefault": false
|
|
114
|
+
}
|
|
115
|
+
},
|
|
116
|
+
"type": [
|
|
117
|
+
"null",
|
|
118
|
+
{
|
|
119
|
+
"type": "record",
|
|
120
|
+
"name": "MetadataAttribution",
|
|
121
|
+
"namespace": "com.linkedin.pegasus2avro.common",
|
|
122
|
+
"fields": [
|
|
123
|
+
{
|
|
124
|
+
"type": "long",
|
|
125
|
+
"name": "time",
|
|
126
|
+
"doc": "When this metadata was updated."
|
|
127
|
+
},
|
|
128
|
+
{
|
|
129
|
+
"java": {
|
|
130
|
+
"class": "com.linkedin.pegasus2avro.common.urn.Urn"
|
|
131
|
+
},
|
|
132
|
+
"type": "string",
|
|
133
|
+
"name": "actor",
|
|
134
|
+
"doc": "The entity (e.g. a member URN) responsible for applying the assocated metadata. This can\neither be a user (in case of UI edits) or the datahub system for automation.",
|
|
135
|
+
"Urn": "Urn"
|
|
136
|
+
},
|
|
137
|
+
{
|
|
138
|
+
"java": {
|
|
139
|
+
"class": "com.linkedin.pegasus2avro.common.urn.Urn"
|
|
140
|
+
},
|
|
141
|
+
"type": [
|
|
142
|
+
"null",
|
|
143
|
+
"string"
|
|
144
|
+
],
|
|
145
|
+
"name": "source",
|
|
146
|
+
"default": null,
|
|
147
|
+
"doc": "The DataHub source responsible for applying the associated metadata. This will only be filled out\nwhen a DataHub source is responsible. This includes the specific metadata test urn, the automation urn.",
|
|
148
|
+
"Urn": "Urn"
|
|
149
|
+
},
|
|
150
|
+
{
|
|
151
|
+
"type": {
|
|
152
|
+
"type": "map",
|
|
153
|
+
"values": "string"
|
|
154
|
+
},
|
|
155
|
+
"name": "sourceDetail",
|
|
156
|
+
"default": {},
|
|
157
|
+
"doc": "The details associated with why this metadata was applied. For example, this could include\nthe actual regex rule, sql statement, ingestion pipeline ID, etc."
|
|
158
|
+
}
|
|
159
|
+
],
|
|
160
|
+
"doc": "Information about who, why, and how this metadata was applied"
|
|
161
|
+
}
|
|
162
|
+
],
|
|
163
|
+
"name": "attribution",
|
|
164
|
+
"default": null,
|
|
165
|
+
"doc": "Information about who, why, and how this metadata was applied"
|
|
97
166
|
}
|
|
98
167
|
]
|
|
99
168
|
}
|
|
@@ -15,10 +15,10 @@ import pathlib
|
|
|
15
15
|
def _load_schema(schema_name: str) -> str:
|
|
16
16
|
return (pathlib.Path(__file__).parent / f"{schema_name}.avsc").read_text()
|
|
17
17
|
|
|
18
|
-
def getMetadataChangeProposalSchema() -> str:
|
|
19
|
-
return _load_schema("MetadataChangeProposal")
|
|
20
|
-
|
|
21
18
|
def getMetadataChangeEventSchema() -> str:
|
|
22
19
|
return _load_schema("MetadataChangeEvent")
|
|
23
20
|
|
|
21
|
+
def getMetadataChangeProposalSchema() -> str:
|
|
22
|
+
return _load_schema("MetadataChangeProposal")
|
|
23
|
+
|
|
24
24
|
# fmt: on
|
datahub/sdk/lineage_client.py
CHANGED
|
@@ -165,11 +165,7 @@ class LineageClient:
|
|
|
165
165
|
] = False,
|
|
166
166
|
transformation_text: Optional[str] = None,
|
|
167
167
|
) -> None:
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
"""
|
|
171
|
-
Add dataset-to-dataset lineage with column-level mapping.
|
|
172
|
-
"""
|
|
168
|
+
"""Add dataset-to-dataset lineage with column-level mapping."""
|
|
173
169
|
|
|
174
170
|
@overload
|
|
175
171
|
def add_lineage(
|
|
@@ -178,11 +174,7 @@ class LineageClient:
|
|
|
178
174
|
upstream: Union[DatajobUrnOrStr],
|
|
179
175
|
downstream: DatasetUrnOrStr,
|
|
180
176
|
) -> None:
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
"""
|
|
184
|
-
Add dataset-to-datajob or dataset-to-mlmodel lineage.
|
|
185
|
-
"""
|
|
177
|
+
"""Add dataset-to-datajob or dataset-to-mlmodel lineage."""
|
|
186
178
|
|
|
187
179
|
@overload
|
|
188
180
|
def add_lineage(
|
|
@@ -191,11 +183,7 @@ class LineageClient:
|
|
|
191
183
|
upstream: Union[DatasetUrnOrStr, DatajobUrnOrStr],
|
|
192
184
|
downstream: DatajobUrnOrStr,
|
|
193
185
|
) -> None:
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
"""
|
|
197
|
-
Add datajob-to-dataset or datajob-to-datajob lineage.
|
|
198
|
-
"""
|
|
186
|
+
"""Add datajob-to-dataset or datajob-to-datajob lineage."""
|
|
199
187
|
|
|
200
188
|
@overload
|
|
201
189
|
def add_lineage(
|
|
@@ -204,11 +192,7 @@ class LineageClient:
|
|
|
204
192
|
upstream: Union[DashboardUrnOrStr, DatasetUrnOrStr, ChartUrnOrStr],
|
|
205
193
|
downstream: DashboardUrnOrStr,
|
|
206
194
|
) -> None:
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
"""
|
|
210
|
-
Add dashboard-to-dashboard or dashboard-to-dataset lineage.
|
|
211
|
-
"""
|
|
195
|
+
"""Add dashboard-to-dashboard or dashboard-to-dataset lineage."""
|
|
212
196
|
|
|
213
197
|
@overload
|
|
214
198
|
def add_lineage(
|
|
@@ -217,10 +201,7 @@ class LineageClient:
|
|
|
217
201
|
upstream: DatasetUrnOrStr,
|
|
218
202
|
downstream: ChartUrnOrStr,
|
|
219
203
|
) -> None:
|
|
220
|
-
|
|
221
|
-
"""
|
|
222
|
-
Add dataset-to-chart lineage.
|
|
223
|
-
"""
|
|
204
|
+
"""Add dataset-to-chart lineage."""
|
|
224
205
|
|
|
225
206
|
# The actual implementation that handles all overloaded cases
|
|
226
207
|
def add_lineage(
|
|
@@ -237,8 +218,7 @@ class LineageClient:
|
|
|
237
218
|
] = False,
|
|
238
219
|
transformation_text: Optional[str] = None,
|
|
239
220
|
) -> None:
|
|
240
|
-
"""
|
|
241
|
-
Add lineage between two entities.
|
|
221
|
+
"""Add lineage between two entities.
|
|
242
222
|
|
|
243
223
|
This flexible method handles different combinations of entity types:
|
|
244
224
|
- dataset to dataset
|
datahub/sdk/main_client.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import Optional, overload
|
|
3
|
+
from typing import TYPE_CHECKING, Optional, overload
|
|
4
4
|
|
|
5
5
|
from datahub.errors import SdkUsageError
|
|
6
6
|
from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
|
|
@@ -9,6 +9,9 @@ from datahub.sdk.entity_client import EntityClient
|
|
|
9
9
|
from datahub.sdk.lineage_client import LineageClient
|
|
10
10
|
from datahub.sdk.search_client import SearchClient
|
|
11
11
|
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from datahub.sdk.resolver_client import ResolverClient
|
|
14
|
+
|
|
12
15
|
|
|
13
16
|
class DataHubClient:
|
|
14
17
|
"""Main client for interacting with DataHub.
|
|
@@ -104,13 +107,14 @@ class DataHubClient:
|
|
|
104
107
|
return EntityClient(self)
|
|
105
108
|
|
|
106
109
|
@property
|
|
107
|
-
def resolve(self)
|
|
110
|
+
def resolve(self) -> "ResolverClient":
|
|
108
111
|
try:
|
|
109
112
|
from acryl_datahub_cloud.sdk import ( # type: ignore[import-not-found]
|
|
110
113
|
ResolverClient,
|
|
111
114
|
)
|
|
112
115
|
except ImportError:
|
|
113
|
-
|
|
116
|
+
# If the client is not installed, use the one from the SDK.
|
|
117
|
+
from datahub.sdk.resolver_client import ( # type: ignore[assignment]
|
|
114
118
|
ResolverClient,
|
|
115
119
|
)
|
|
116
120
|
return ResolverClient(self)
|
datahub/sdk/search_filters.py
CHANGED
|
@@ -384,6 +384,21 @@ def _filter_discriminator(v: Any) -> Optional[str]:
|
|
|
384
384
|
return None
|
|
385
385
|
|
|
386
386
|
|
|
387
|
+
def _parse_and_like_filter(value: Any) -> Any:
|
|
388
|
+
# Do not parse if filter is already of type and/or/not or a custom condition
|
|
389
|
+
# also do not parse container filter if direct_descendants_only is specified
|
|
390
|
+
if (
|
|
391
|
+
isinstance(value, dict)
|
|
392
|
+
and not set(value.keys()).intersection(
|
|
393
|
+
{"and", "or", "not", "field", "condition", "direct_descendants_only"}
|
|
394
|
+
)
|
|
395
|
+
and len(value) > 1
|
|
396
|
+
):
|
|
397
|
+
return {"and": [{k: v} for k, v in value.items()]}
|
|
398
|
+
|
|
399
|
+
return value
|
|
400
|
+
|
|
401
|
+
|
|
387
402
|
if TYPE_CHECKING or not PYDANTIC_SUPPORTS_CALLABLE_DISCRIMINATOR:
|
|
388
403
|
# The `not TYPE_CHECKING` bit is required to make the linter happy,
|
|
389
404
|
# since we currently only run mypy with pydantic v1.
|
|
@@ -445,6 +460,7 @@ else:
|
|
|
445
460
|
],
|
|
446
461
|
Discriminator(_filter_discriminator),
|
|
447
462
|
],
|
|
463
|
+
pydantic.BeforeValidator(_parse_and_like_filter),
|
|
448
464
|
pydantic.BeforeValidator(_parse_json_from_string),
|
|
449
465
|
]
|
|
450
466
|
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
from typing import List
|
|
2
|
+
|
|
3
|
+
from typing_extensions import Self
|
|
4
|
+
|
|
5
|
+
from datahub.emitter.mcp_patch_builder import MetadataPatchProposal
|
|
6
|
+
from datahub.metadata.com.linkedin.pegasus2avro.common import Siblings
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class HasSiblingsPatch(MetadataPatchProposal):
|
|
10
|
+
def add_sibling(self, sibling_urn: str, primary: bool = False) -> Self:
|
|
11
|
+
"""Add a sibling relationship to the entity.
|
|
12
|
+
|
|
13
|
+
Args:
|
|
14
|
+
sibling_urn: The URN of the sibling entity to add.
|
|
15
|
+
primary: Whether this entity should be marked as primary in the relationship.
|
|
16
|
+
|
|
17
|
+
Returns:
|
|
18
|
+
The patch builder instance.
|
|
19
|
+
"""
|
|
20
|
+
self._add_patch(
|
|
21
|
+
Siblings.ASPECT_NAME,
|
|
22
|
+
"add",
|
|
23
|
+
path=("siblings", sibling_urn),
|
|
24
|
+
value=sibling_urn,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
# Set primary flag if specified
|
|
28
|
+
if primary:
|
|
29
|
+
self._add_patch(
|
|
30
|
+
Siblings.ASPECT_NAME,
|
|
31
|
+
"add",
|
|
32
|
+
path=("primary",),
|
|
33
|
+
value=primary,
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
return self
|
|
37
|
+
|
|
38
|
+
def remove_sibling(self, sibling_urn: str) -> Self:
|
|
39
|
+
"""Remove a sibling relationship from the entity.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
sibling_urn: The URN of the sibling entity to remove.
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
The patch builder instance.
|
|
46
|
+
"""
|
|
47
|
+
self._add_patch(
|
|
48
|
+
Siblings.ASPECT_NAME,
|
|
49
|
+
"remove",
|
|
50
|
+
path=("siblings", sibling_urn),
|
|
51
|
+
value={},
|
|
52
|
+
)
|
|
53
|
+
return self
|
|
54
|
+
|
|
55
|
+
def set_siblings(self, sibling_urns: List[str], primary: bool = False) -> Self:
|
|
56
|
+
"""Set the complete list of siblings for the entity.
|
|
57
|
+
|
|
58
|
+
This will replace all existing siblings with the new list.
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
sibling_urns: The list of sibling URNs to set.
|
|
62
|
+
primary: Whether this entity should be marked as primary.
|
|
63
|
+
|
|
64
|
+
Returns:
|
|
65
|
+
The patch builder instance.
|
|
66
|
+
"""
|
|
67
|
+
self._add_patch(
|
|
68
|
+
Siblings.ASPECT_NAME, "add", path=("siblings",), value=sibling_urns
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
self._add_patch(Siblings.ASPECT_NAME, "add", path=("primary",), value=primary)
|
|
72
|
+
|
|
73
|
+
return self
|
datahub/specific/dataset.py
CHANGED
|
@@ -22,6 +22,7 @@ from datahub.specific.aspect_helpers.fine_grained_lineage import (
|
|
|
22
22
|
HasFineGrainedLineagePatch,
|
|
23
23
|
)
|
|
24
24
|
from datahub.specific.aspect_helpers.ownership import HasOwnershipPatch
|
|
25
|
+
from datahub.specific.aspect_helpers.siblings import HasSiblingsPatch
|
|
25
26
|
from datahub.specific.aspect_helpers.structured_properties import (
|
|
26
27
|
HasStructuredPropertiesPatch,
|
|
27
28
|
)
|
|
@@ -104,6 +105,7 @@ class DatasetPatchBuilder(
|
|
|
104
105
|
HasTagsPatch,
|
|
105
106
|
HasTermsPatch,
|
|
106
107
|
HasFineGrainedLineagePatch,
|
|
108
|
+
HasSiblingsPatch,
|
|
107
109
|
MetadataPatchProposal,
|
|
108
110
|
):
|
|
109
111
|
def __init__(
|
|
@@ -634,6 +634,9 @@ class SqlParsingAggregator(Closeable):
|
|
|
634
634
|
TableSwap,
|
|
635
635
|
],
|
|
636
636
|
) -> None:
|
|
637
|
+
"""
|
|
638
|
+
This assumes that queries come in order of increasing timestamps.
|
|
639
|
+
"""
|
|
637
640
|
if isinstance(item, KnownQueryLineageInfo):
|
|
638
641
|
self.add_known_query_lineage(item)
|
|
639
642
|
elif isinstance(item, KnownLineageMapping):
|
|
@@ -208,9 +208,7 @@ class ToolMetaExtractor:
|
|
|
208
208
|
Returns:
|
|
209
209
|
bool: whether QueryLog entry is that of hex.
|
|
210
210
|
"""
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
if not last_line.startswith("-- Hex query metadata:"):
|
|
211
|
+
if "-- Hex query metadata:" not in entry.query_text:
|
|
214
212
|
return False
|
|
215
213
|
|
|
216
214
|
entry.origin = HEX_PLATFORM_URN
|