acryl-datahub 0.15.0.2rc3__py3-none-any.whl → 0.15.0.2rc5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (58) hide show
  1. {acryl_datahub-0.15.0.2rc3.dist-info → acryl_datahub-0.15.0.2rc5.dist-info}/METADATA +2460 -2460
  2. {acryl_datahub-0.15.0.2rc3.dist-info → acryl_datahub-0.15.0.2rc5.dist-info}/RECORD +58 -54
  3. datahub/__init__.py +1 -1
  4. datahub/cli/delete_cli.py +3 -3
  5. datahub/cli/migrate.py +2 -2
  6. datahub/emitter/mcp_builder.py +27 -0
  7. datahub/emitter/rest_emitter.py +1 -1
  8. datahub/ingestion/api/source.py +2 -2
  9. datahub/ingestion/graph/client.py +4 -2
  10. datahub/ingestion/source/aws/glue.py +14 -1
  11. datahub/ingestion/source/aws/s3_util.py +24 -1
  12. datahub/ingestion/source/delta_lake/source.py +0 -5
  13. datahub/ingestion/source/demo_data.py +1 -1
  14. datahub/ingestion/source/fivetran/fivetran.py +1 -6
  15. datahub/ingestion/source/gc/execution_request_cleanup.py +31 -6
  16. datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +14 -1
  17. datahub/ingestion/source/iceberg/iceberg.py +10 -3
  18. datahub/ingestion/source/iceberg/iceberg_common.py +49 -9
  19. datahub/ingestion/source/iceberg/iceberg_profiler.py +3 -1
  20. datahub/ingestion/source/kafka_connect/kafka_connect.py +1 -6
  21. datahub/ingestion/source/metabase.py +1 -6
  22. datahub/ingestion/source/mlflow.py +0 -5
  23. datahub/ingestion/source/nifi.py +0 -5
  24. datahub/ingestion/source/powerbi_report_server/report_server.py +1 -1
  25. datahub/ingestion/source/redash.py +0 -5
  26. datahub/ingestion/source/redshift/redshift.py +1 -0
  27. datahub/ingestion/source/s3/source.py +10 -14
  28. datahub/ingestion/source/snowflake/snowflake_config.py +13 -0
  29. datahub/ingestion/source/snowflake/snowflake_schema.py +5 -2
  30. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +112 -20
  31. datahub/ingestion/source/snowflake/snowflake_tag.py +14 -4
  32. datahub/ingestion/source/snowflake/snowflake_v2.py +0 -6
  33. datahub/ingestion/source/sql/sql_types.py +1 -1
  34. datahub/ingestion/source/sql/sql_utils.py +5 -0
  35. datahub/ingestion/source/superset.py +1 -6
  36. datahub/ingestion/source/tableau/tableau.py +0 -6
  37. datahub/metadata/_schema_classes.py +316 -43
  38. datahub/metadata/_urns/urn_defs.py +69 -15
  39. datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
  40. datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +2 -0
  41. datahub/metadata/com/linkedin/pegasus2avro/versionset/__init__.py +17 -0
  42. datahub/metadata/schema.avsc +296 -87
  43. datahub/metadata/schemas/DataFlowKey.avsc +1 -0
  44. datahub/metadata/schemas/DataJobKey.avsc +1 -0
  45. datahub/metadata/schemas/DatasetKey.avsc +2 -1
  46. datahub/metadata/schemas/MLFeatureProperties.avsc +51 -0
  47. datahub/metadata/schemas/MLModelDeploymentProperties.avsc +51 -0
  48. datahub/metadata/schemas/MLModelGroupProperties.avsc +96 -23
  49. datahub/metadata/schemas/MLModelKey.avsc +2 -1
  50. datahub/metadata/schemas/MLModelProperties.avsc +96 -48
  51. datahub/metadata/schemas/MLPrimaryKeyProperties.avsc +51 -0
  52. datahub/metadata/schemas/MetadataChangeEvent.avsc +98 -71
  53. datahub/metadata/schemas/VersionProperties.avsc +216 -0
  54. datahub/metadata/schemas/VersionSetKey.avsc +26 -0
  55. datahub/metadata/schemas/VersionSetProperties.avsc +49 -0
  56. {acryl_datahub-0.15.0.2rc3.dist-info → acryl_datahub-0.15.0.2rc5.dist-info}/WHEEL +0 -0
  57. {acryl_datahub-0.15.0.2rc3.dist-info → acryl_datahub-0.15.0.2rc5.dist-info}/entry_points.txt +0 -0
  58. {acryl_datahub-0.15.0.2rc3.dist-info → acryl_datahub-0.15.0.2rc5.dist-info}/top_level.txt +0 -0
@@ -101,6 +101,57 @@
101
101
  ],
102
102
  "name": "versionTag",
103
103
  "default": null
104
+ },
105
+ {
106
+ "type": [
107
+ "null",
108
+ {
109
+ "type": "record",
110
+ "name": "MetadataAttribution",
111
+ "namespace": "com.linkedin.pegasus2avro.common",
112
+ "fields": [
113
+ {
114
+ "type": "long",
115
+ "name": "time",
116
+ "doc": "When this metadata was updated."
117
+ },
118
+ {
119
+ "java": {
120
+ "class": "com.linkedin.pegasus2avro.common.urn.Urn"
121
+ },
122
+ "type": "string",
123
+ "name": "actor",
124
+ "doc": "The entity (e.g. a member URN) responsible for applying the assocated metadata. This can\neither be a user (in case of UI edits) or the datahub system for automation.",
125
+ "Urn": "Urn"
126
+ },
127
+ {
128
+ "java": {
129
+ "class": "com.linkedin.pegasus2avro.common.urn.Urn"
130
+ },
131
+ "type": [
132
+ "null",
133
+ "string"
134
+ ],
135
+ "name": "source",
136
+ "default": null,
137
+ "doc": "The DataHub source responsible for applying the associated metadata. This will only be filled out\nwhen a DataHub source is responsible. This includes the specific metadata test urn, the automation urn.",
138
+ "Urn": "Urn"
139
+ },
140
+ {
141
+ "type": {
142
+ "type": "map",
143
+ "values": "string"
144
+ },
145
+ "name": "sourceDetail",
146
+ "default": {},
147
+ "doc": "The details associated with why this metadata was applied. For example, this could include\nthe actual regex rule, sql statement, ingestion pipeline ID, etc."
148
+ }
149
+ ],
150
+ "doc": "Information about who, why, and how this metadata was applied"
151
+ }
152
+ ],
153
+ "name": "metadataAttribution",
154
+ "default": null
104
155
  }
105
156
  ],
106
157
  "doc": "A resource-defined string representing the resource state for the purpose of concurrency control"
@@ -74,6 +74,57 @@
74
74
  ],
75
75
  "name": "versionTag",
76
76
  "default": null
77
+ },
78
+ {
79
+ "type": [
80
+ "null",
81
+ {
82
+ "type": "record",
83
+ "name": "MetadataAttribution",
84
+ "namespace": "com.linkedin.pegasus2avro.common",
85
+ "fields": [
86
+ {
87
+ "type": "long",
88
+ "name": "time",
89
+ "doc": "When this metadata was updated."
90
+ },
91
+ {
92
+ "java": {
93
+ "class": "com.linkedin.pegasus2avro.common.urn.Urn"
94
+ },
95
+ "type": "string",
96
+ "name": "actor",
97
+ "doc": "The entity (e.g. a member URN) responsible for applying the assocated metadata. This can\neither be a user (in case of UI edits) or the datahub system for automation.",
98
+ "Urn": "Urn"
99
+ },
100
+ {
101
+ "java": {
102
+ "class": "com.linkedin.pegasus2avro.common.urn.Urn"
103
+ },
104
+ "type": [
105
+ "null",
106
+ "string"
107
+ ],
108
+ "name": "source",
109
+ "default": null,
110
+ "doc": "The DataHub source responsible for applying the associated metadata. This will only be filled out\nwhen a DataHub source is responsible. This includes the specific metadata test urn, the automation urn.",
111
+ "Urn": "Urn"
112
+ },
113
+ {
114
+ "type": {
115
+ "type": "map",
116
+ "values": "string"
117
+ },
118
+ "name": "sourceDetail",
119
+ "default": {},
120
+ "doc": "The details associated with why this metadata was applied. For example, this could include\nthe actual regex rule, sql statement, ingestion pipeline ID, etc."
121
+ }
122
+ ],
123
+ "doc": "Information about who, why, and how this metadata was applied"
124
+ }
125
+ ],
126
+ "name": "metadataAttribution",
127
+ "default": null
77
128
  }
78
129
  ],
79
130
  "doc": "A resource-defined string representing the resource state for the purpose of concurrency control"
@@ -21,6 +21,51 @@
21
21
  "default": {},
22
22
  "doc": "Custom property bag."
23
23
  },
24
+ {
25
+ "Relationship": {
26
+ "/*": {
27
+ "entityTypes": [
28
+ "dataJob",
29
+ "dataProcessInstance"
30
+ ],
31
+ "isLineage": true,
32
+ "name": "TrainedBy"
33
+ }
34
+ },
35
+ "type": [
36
+ "null",
37
+ {
38
+ "type": "array",
39
+ "items": "string"
40
+ }
41
+ ],
42
+ "name": "trainingJobs",
43
+ "default": null,
44
+ "doc": "List of jobs or process instances (if any) used to train the model or group. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect."
45
+ },
46
+ {
47
+ "Relationship": {
48
+ "/*": {
49
+ "entityTypes": [
50
+ "dataJob",
51
+ "dataProcessInstance"
52
+ ],
53
+ "isLineage": true,
54
+ "isUpstream": false,
55
+ "name": "UsedBy"
56
+ }
57
+ },
58
+ "type": [
59
+ "null",
60
+ {
61
+ "type": "array",
62
+ "items": "string"
63
+ }
64
+ ],
65
+ "name": "downstreamJobs",
66
+ "default": null,
67
+ "doc": "List of jobs or process instances (if any) that use the model or group."
68
+ },
24
69
  {
25
70
  "Searchable": {
26
71
  "boostScore": 10.0,
@@ -102,29 +147,6 @@
102
147
  "default": null,
103
148
  "doc": "Date when the MLModelGroup was last modified"
104
149
  },
105
- {
106
- "Relationship": {
107
- "/*": {
108
- "entityTypes": [
109
- "dataJob"
110
- ],
111
- "isLineage": true,
112
- "name": "TrainedBy"
113
- }
114
- },
115
- "type": [
116
- "null",
117
- {
118
- "type": "array",
119
- "items": "string"
120
- }
121
- ],
122
- "name": "trainingJobs",
123
- "default": null,
124
- "doc": "List of jobs (if any) used to train the model group. Visible in Lineage.",
125
- "Urn": "Urn",
126
- "urn_is_array": true
127
- },
128
150
  {
129
151
  "type": [
130
152
  "null",
@@ -140,6 +162,57 @@
140
162
  ],
141
163
  "name": "versionTag",
142
164
  "default": null
165
+ },
166
+ {
167
+ "type": [
168
+ "null",
169
+ {
170
+ "type": "record",
171
+ "name": "MetadataAttribution",
172
+ "namespace": "com.linkedin.pegasus2avro.common",
173
+ "fields": [
174
+ {
175
+ "type": "long",
176
+ "name": "time",
177
+ "doc": "When this metadata was updated."
178
+ },
179
+ {
180
+ "java": {
181
+ "class": "com.linkedin.pegasus2avro.common.urn.Urn"
182
+ },
183
+ "type": "string",
184
+ "name": "actor",
185
+ "doc": "The entity (e.g. a member URN) responsible for applying the assocated metadata. This can\neither be a user (in case of UI edits) or the datahub system for automation.",
186
+ "Urn": "Urn"
187
+ },
188
+ {
189
+ "java": {
190
+ "class": "com.linkedin.pegasus2avro.common.urn.Urn"
191
+ },
192
+ "type": [
193
+ "null",
194
+ "string"
195
+ ],
196
+ "name": "source",
197
+ "default": null,
198
+ "doc": "The DataHub source responsible for applying the associated metadata. This will only be filled out\nwhen a DataHub source is responsible. This includes the specific metadata test urn, the automation urn.",
199
+ "Urn": "Urn"
200
+ },
201
+ {
202
+ "type": {
203
+ "type": "map",
204
+ "values": "string"
205
+ },
206
+ "name": "sourceDetail",
207
+ "default": {},
208
+ "doc": "The details associated with why this metadata was applied. For example, this could include\nthe actual regex rule, sql statement, ingestion pipeline ID, etc."
209
+ }
210
+ ],
211
+ "doc": "Information about who, why, and how this metadata was applied"
212
+ }
213
+ ],
214
+ "name": "metadataAttribution",
215
+ "default": null
143
216
  }
144
217
  ],
145
218
  "doc": "A resource-defined string representing the resource state for the purpose of concurrency control"
@@ -29,7 +29,8 @@
29
29
  "browsePathsV2",
30
30
  "structuredProperties",
31
31
  "forms",
32
- "testResults"
32
+ "testResults",
33
+ "versionProperties"
33
34
  ]
34
35
  },
35
36
  "name": "MLModelKey",
@@ -37,6 +37,51 @@
37
37
  "default": null,
38
38
  "doc": "URL where the reference exist"
39
39
  },
40
+ {
41
+ "Relationship": {
42
+ "/*": {
43
+ "entityTypes": [
44
+ "dataJob",
45
+ "dataProcessInstance"
46
+ ],
47
+ "isLineage": true,
48
+ "name": "TrainedBy"
49
+ }
50
+ },
51
+ "type": [
52
+ "null",
53
+ {
54
+ "type": "array",
55
+ "items": "string"
56
+ }
57
+ ],
58
+ "name": "trainingJobs",
59
+ "default": null,
60
+ "doc": "List of jobs or process instances (if any) used to train the model or group. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect."
61
+ },
62
+ {
63
+ "Relationship": {
64
+ "/*": {
65
+ "entityTypes": [
66
+ "dataJob",
67
+ "dataProcessInstance"
68
+ ],
69
+ "isLineage": true,
70
+ "isUpstream": false,
71
+ "name": "UsedBy"
72
+ }
73
+ },
74
+ "type": [
75
+ "null",
76
+ {
77
+ "type": "array",
78
+ "items": "string"
79
+ }
80
+ ],
81
+ "name": "downstreamJobs",
82
+ "default": null,
83
+ "doc": "List of jobs or process instances (if any) that use the model or group."
84
+ },
40
85
  {
41
86
  "Searchable": {
42
87
  "boostScore": 10.0,
@@ -133,6 +178,57 @@
133
178
  ],
134
179
  "name": "versionTag",
135
180
  "default": null
181
+ },
182
+ {
183
+ "type": [
184
+ "null",
185
+ {
186
+ "type": "record",
187
+ "name": "MetadataAttribution",
188
+ "namespace": "com.linkedin.pegasus2avro.common",
189
+ "fields": [
190
+ {
191
+ "type": "long",
192
+ "name": "time",
193
+ "doc": "When this metadata was updated."
194
+ },
195
+ {
196
+ "java": {
197
+ "class": "com.linkedin.pegasus2avro.common.urn.Urn"
198
+ },
199
+ "type": "string",
200
+ "name": "actor",
201
+ "doc": "The entity (e.g. a member URN) responsible for applying the assocated metadata. This can\neither be a user (in case of UI edits) or the datahub system for automation.",
202
+ "Urn": "Urn"
203
+ },
204
+ {
205
+ "java": {
206
+ "class": "com.linkedin.pegasus2avro.common.urn.Urn"
207
+ },
208
+ "type": [
209
+ "null",
210
+ "string"
211
+ ],
212
+ "name": "source",
213
+ "default": null,
214
+ "doc": "The DataHub source responsible for applying the associated metadata. This will only be filled out\nwhen a DataHub source is responsible. This includes the specific metadata test urn, the automation urn.",
215
+ "Urn": "Urn"
216
+ },
217
+ {
218
+ "type": {
219
+ "type": "map",
220
+ "values": "string"
221
+ },
222
+ "name": "sourceDetail",
223
+ "default": {},
224
+ "doc": "The details associated with why this metadata was applied. For example, this could include\nthe actual regex rule, sql statement, ingestion pipeline ID, etc."
225
+ }
226
+ ],
227
+ "doc": "Information about who, why, and how this metadata was applied"
228
+ }
229
+ ],
230
+ "name": "metadataAttribution",
231
+ "default": null
136
232
  }
137
233
  ],
138
234
  "doc": "A resource-defined string representing the resource state for the purpose of concurrency control"
@@ -346,54 +442,6 @@
346
442
  "Urn": "Urn",
347
443
  "urn_is_array": true
348
444
  },
349
- {
350
- "Relationship": {
351
- "/*": {
352
- "entityTypes": [
353
- "dataJob",
354
- "dataProcessInstance"
355
- ],
356
- "isLineage": true,
357
- "name": "TrainedBy"
358
- }
359
- },
360
- "type": [
361
- "null",
362
- {
363
- "type": "array",
364
- "items": "string"
365
- }
366
- ],
367
- "name": "trainingJobs",
368
- "default": null,
369
- "doc": "List of jobs (if any) used to train the model. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect.",
370
- "Urn": "Urn",
371
- "urn_is_array": true
372
- },
373
- {
374
- "Relationship": {
375
- "/*": {
376
- "entityTypes": [
377
- "dataJob"
378
- ],
379
- "isLineage": true,
380
- "isUpstream": false,
381
- "name": "UsedBy"
382
- }
383
- },
384
- "type": [
385
- "null",
386
- {
387
- "type": "array",
388
- "items": "string"
389
- }
390
- ],
391
- "name": "downstreamJobs",
392
- "default": null,
393
- "doc": "List of jobs (if any) that use the model",
394
- "Urn": "Urn",
395
- "urn_is_array": true
396
- },
397
445
  {
398
446
  "Relationship": {
399
447
  "/*": {
@@ -97,6 +97,57 @@
97
97
  ],
98
98
  "name": "versionTag",
99
99
  "default": null
100
+ },
101
+ {
102
+ "type": [
103
+ "null",
104
+ {
105
+ "type": "record",
106
+ "name": "MetadataAttribution",
107
+ "namespace": "com.linkedin.pegasus2avro.common",
108
+ "fields": [
109
+ {
110
+ "type": "long",
111
+ "name": "time",
112
+ "doc": "When this metadata was updated."
113
+ },
114
+ {
115
+ "java": {
116
+ "class": "com.linkedin.pegasus2avro.common.urn.Urn"
117
+ },
118
+ "type": "string",
119
+ "name": "actor",
120
+ "doc": "The entity (e.g. a member URN) responsible for applying the assocated metadata. This can\neither be a user (in case of UI edits) or the datahub system for automation.",
121
+ "Urn": "Urn"
122
+ },
123
+ {
124
+ "java": {
125
+ "class": "com.linkedin.pegasus2avro.common.urn.Urn"
126
+ },
127
+ "type": [
128
+ "null",
129
+ "string"
130
+ ],
131
+ "name": "source",
132
+ "default": null,
133
+ "doc": "The DataHub source responsible for applying the associated metadata. This will only be filled out\nwhen a DataHub source is responsible. This includes the specific metadata test urn, the automation urn.",
134
+ "Urn": "Urn"
135
+ },
136
+ {
137
+ "type": {
138
+ "type": "map",
139
+ "values": "string"
140
+ },
141
+ "name": "sourceDetail",
142
+ "default": {},
143
+ "doc": "The details associated with why this metadata was applied. For example, this could include\nthe actual regex rule, sql statement, ingestion pipeline ID, etc."
144
+ }
145
+ ],
146
+ "doc": "Information about who, why, and how this metadata was applied"
147
+ }
148
+ ],
149
+ "name": "metadataAttribution",
150
+ "default": null
100
151
  }
101
152
  ],
102
153
  "doc": "A resource-defined string representing the resource state for the purpose of concurrency control"