acryl-datahub 0.15.0.6rc3__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (204) hide show
  1. {acryl_datahub-0.15.0.6rc3.dist-info → acryl_datahub-1.0.0.dist-info}/METADATA +2552 -2523
  2. {acryl_datahub-0.15.0.6rc3.dist-info → acryl_datahub-1.0.0.dist-info}/RECORD +204 -191
  3. {acryl_datahub-0.15.0.6rc3.dist-info → acryl_datahub-1.0.0.dist-info}/WHEEL +1 -1
  4. {acryl_datahub-0.15.0.6rc3.dist-info → acryl_datahub-1.0.0.dist-info}/entry_points.txt +1 -0
  5. datahub/_version.py +1 -1
  6. datahub/api/entities/common/serialized_value.py +4 -3
  7. datahub/api/entities/dataset/dataset.py +731 -42
  8. datahub/api/entities/structuredproperties/structuredproperties.py +2 -2
  9. datahub/cli/check_cli.py +72 -19
  10. datahub/cli/docker_cli.py +3 -3
  11. datahub/cli/iceberg_cli.py +1 -1
  12. datahub/cli/ingest_cli.py +30 -93
  13. datahub/cli/lite_cli.py +4 -2
  14. datahub/cli/specific/dataproduct_cli.py +1 -1
  15. datahub/cli/specific/dataset_cli.py +128 -14
  16. datahub/configuration/common.py +10 -2
  17. datahub/configuration/git.py +1 -3
  18. datahub/configuration/kafka.py +1 -1
  19. datahub/emitter/mce_builder.py +28 -13
  20. datahub/emitter/mcp_builder.py +4 -1
  21. datahub/emitter/response_helper.py +145 -0
  22. datahub/emitter/rest_emitter.py +323 -10
  23. datahub/ingestion/api/decorators.py +1 -1
  24. datahub/ingestion/api/source_helpers.py +4 -0
  25. datahub/ingestion/fs/s3_fs.py +2 -2
  26. datahub/ingestion/glossary/classification_mixin.py +1 -5
  27. datahub/ingestion/graph/client.py +41 -22
  28. datahub/ingestion/graph/entity_versioning.py +3 -3
  29. datahub/ingestion/graph/filters.py +64 -37
  30. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +1 -6
  31. datahub/ingestion/run/pipeline.py +112 -148
  32. datahub/ingestion/run/sink_callback.py +77 -0
  33. datahub/ingestion/sink/datahub_rest.py +8 -0
  34. datahub/ingestion/source/abs/config.py +2 -4
  35. datahub/ingestion/source/bigquery_v2/bigquery_audit.py +1 -1
  36. datahub/ingestion/source/bigquery_v2/bigquery_config.py +2 -46
  37. datahub/ingestion/source/bigquery_v2/bigquery_schema.py +6 -1
  38. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +7 -4
  39. datahub/ingestion/source/cassandra/cassandra.py +152 -233
  40. datahub/ingestion/source/cassandra/cassandra_api.py +13 -5
  41. datahub/ingestion/source/common/gcp_credentials_config.py +53 -0
  42. datahub/ingestion/source/common/subtypes.py +12 -0
  43. datahub/ingestion/source/csv_enricher.py +3 -3
  44. datahub/ingestion/source/data_lake_common/path_spec.py +1 -3
  45. datahub/ingestion/source/dbt/dbt_common.py +3 -5
  46. datahub/ingestion/source/dbt/dbt_tests.py +4 -8
  47. datahub/ingestion/source/delta_lake/config.py +8 -1
  48. datahub/ingestion/source/delta_lake/report.py +4 -2
  49. datahub/ingestion/source/delta_lake/source.py +20 -5
  50. datahub/ingestion/source/dremio/dremio_api.py +4 -8
  51. datahub/ingestion/source/dremio/dremio_aspects.py +3 -5
  52. datahub/ingestion/source/dynamodb/dynamodb.py +1 -0
  53. datahub/ingestion/source/elastic_search.py +26 -6
  54. datahub/ingestion/source/feast.py +27 -8
  55. datahub/ingestion/source/file.py +6 -3
  56. datahub/ingestion/source/gc/dataprocess_cleanup.py +1 -1
  57. datahub/ingestion/source/gc/execution_request_cleanup.py +2 -1
  58. datahub/ingestion/source/ge_data_profiler.py +12 -15
  59. datahub/ingestion/source/iceberg/iceberg.py +46 -12
  60. datahub/ingestion/source/iceberg/iceberg_common.py +71 -21
  61. datahub/ingestion/source/identity/okta.py +37 -7
  62. datahub/ingestion/source/kafka/kafka.py +1 -1
  63. datahub/ingestion/source/kafka_connect/common.py +2 -7
  64. datahub/ingestion/source/kafka_connect/kafka_connect.py +97 -4
  65. datahub/ingestion/source/kafka_connect/sink_connectors.py +2 -2
  66. datahub/ingestion/source/kafka_connect/source_connectors.py +6 -9
  67. datahub/ingestion/source/looker/looker_common.py +3 -3
  68. datahub/ingestion/source/looker/looker_file_loader.py +2 -2
  69. datahub/ingestion/source/looker/looker_lib_wrapper.py +2 -1
  70. datahub/ingestion/source/looker/looker_source.py +1 -1
  71. datahub/ingestion/source/looker/looker_template_language.py +4 -2
  72. datahub/ingestion/source/looker/lookml_source.py +3 -2
  73. datahub/ingestion/source/metabase.py +57 -35
  74. datahub/ingestion/source/metadata/business_glossary.py +45 -3
  75. datahub/ingestion/source/metadata/lineage.py +2 -2
  76. datahub/ingestion/source/mlflow.py +365 -35
  77. datahub/ingestion/source/mode.py +18 -8
  78. datahub/ingestion/source/neo4j/neo4j_source.py +27 -7
  79. datahub/ingestion/source/nifi.py +37 -11
  80. datahub/ingestion/source/openapi.py +1 -1
  81. datahub/ingestion/source/openapi_parser.py +49 -17
  82. datahub/ingestion/source/powerbi/m_query/parser.py +3 -2
  83. datahub/ingestion/source/powerbi/m_query/tree_function.py +2 -1
  84. datahub/ingestion/source/powerbi/powerbi.py +1 -3
  85. datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -1
  86. datahub/ingestion/source/powerbi_report_server/report_server.py +26 -7
  87. datahub/ingestion/source/powerbi_report_server/report_server_domain.py +1 -1
  88. datahub/ingestion/source/preset.py +7 -4
  89. datahub/ingestion/source/pulsar.py +3 -2
  90. datahub/ingestion/source/qlik_sense/websocket_connection.py +4 -2
  91. datahub/ingestion/source/redash.py +31 -7
  92. datahub/ingestion/source/redshift/config.py +4 -0
  93. datahub/ingestion/source/redshift/datashares.py +236 -0
  94. datahub/ingestion/source/redshift/lineage.py +6 -2
  95. datahub/ingestion/source/redshift/lineage_v2.py +24 -9
  96. datahub/ingestion/source/redshift/profile.py +1 -1
  97. datahub/ingestion/source/redshift/query.py +133 -33
  98. datahub/ingestion/source/redshift/redshift.py +46 -73
  99. datahub/ingestion/source/redshift/redshift_schema.py +186 -6
  100. datahub/ingestion/source/redshift/report.py +3 -0
  101. datahub/ingestion/source/s3/config.py +5 -5
  102. datahub/ingestion/source/s3/source.py +20 -41
  103. datahub/ingestion/source/salesforce.py +550 -275
  104. datahub/ingestion/source/schema_inference/object.py +1 -1
  105. datahub/ingestion/source/sigma/sigma.py +1 -1
  106. datahub/ingestion/source/slack/slack.py +31 -10
  107. datahub/ingestion/source/snowflake/snowflake_connection.py +2 -2
  108. datahub/ingestion/source/snowflake/snowflake_queries.py +19 -13
  109. datahub/ingestion/source/snowflake/snowflake_query.py +6 -4
  110. datahub/ingestion/source/snowflake/snowflake_schema.py +3 -4
  111. datahub/ingestion/source/snowflake/snowflake_v2.py +1 -1
  112. datahub/ingestion/source/sql/athena.py +10 -16
  113. datahub/ingestion/source/sql/druid.py +1 -5
  114. datahub/ingestion/source/sql/hive.py +15 -6
  115. datahub/ingestion/source/sql/hive_metastore.py +3 -2
  116. datahub/ingestion/source/sql/mssql/job_models.py +29 -0
  117. datahub/ingestion/source/sql/mssql/source.py +11 -5
  118. datahub/ingestion/source/sql/oracle.py +127 -63
  119. datahub/ingestion/source/sql/sql_common.py +6 -12
  120. datahub/ingestion/source/sql/sql_types.py +2 -2
  121. datahub/ingestion/source/sql/teradata.py +7 -5
  122. datahub/ingestion/source/sql/trino.py +2 -2
  123. datahub/ingestion/source/state/stale_entity_removal_handler.py +4 -8
  124. datahub/ingestion/source/superset.py +222 -62
  125. datahub/ingestion/source/tableau/tableau.py +22 -6
  126. datahub/ingestion/source/tableau/tableau_common.py +3 -2
  127. datahub/ingestion/source/unity/ge_profiler.py +2 -1
  128. datahub/ingestion/source/unity/source.py +11 -1
  129. datahub/ingestion/source/vertexai.py +697 -0
  130. datahub/ingestion/source_config/pulsar.py +3 -1
  131. datahub/ingestion/transformer/pattern_cleanup_ownership.py +25 -7
  132. datahub/lite/duckdb_lite.py +3 -10
  133. datahub/lite/lite_local.py +1 -1
  134. datahub/lite/lite_util.py +4 -3
  135. datahub/metadata/_schema_classes.py +714 -417
  136. datahub/metadata/_urns/urn_defs.py +1673 -1649
  137. datahub/metadata/com/linkedin/pegasus2avro/incident/__init__.py +4 -0
  138. datahub/metadata/schema.avsc +16438 -16603
  139. datahub/metadata/schemas/AssertionInfo.avsc +3 -1
  140. datahub/metadata/schemas/BusinessAttributeInfo.avsc +6 -2
  141. datahub/metadata/schemas/BusinessAttributes.avsc +6 -0
  142. datahub/metadata/schemas/ChartInfo.avsc +1 -0
  143. datahub/metadata/schemas/CorpGroupKey.avsc +2 -1
  144. datahub/metadata/schemas/CorpUserInfo.avsc +13 -0
  145. datahub/metadata/schemas/CorpUserKey.avsc +2 -1
  146. datahub/metadata/schemas/DataHubIngestionSourceInfo.avsc +8 -3
  147. datahub/metadata/schemas/DataProcessInstanceInput.avsc +129 -1
  148. datahub/metadata/schemas/DataProcessInstanceOutput.avsc +131 -3
  149. datahub/metadata/schemas/DataProcessKey.avsc +2 -1
  150. datahub/metadata/schemas/DataProductKey.avsc +2 -1
  151. datahub/metadata/schemas/DomainKey.avsc +2 -1
  152. datahub/metadata/schemas/EditableSchemaMetadata.avsc +6 -2
  153. datahub/metadata/schemas/GlossaryNodeKey.avsc +3 -1
  154. datahub/metadata/schemas/GlossaryTermKey.avsc +2 -1
  155. datahub/metadata/schemas/GlossaryTerms.avsc +3 -1
  156. datahub/metadata/schemas/IncidentInfo.avsc +130 -46
  157. datahub/metadata/schemas/InputFields.avsc +3 -1
  158. datahub/metadata/schemas/MLFeatureKey.avsc +2 -1
  159. datahub/metadata/schemas/MLFeatureTableKey.avsc +2 -1
  160. datahub/metadata/schemas/MLModelDeploymentKey.avsc +2 -1
  161. datahub/metadata/schemas/MLModelGroupKey.avsc +3 -1
  162. datahub/metadata/schemas/MLModelKey.avsc +3 -1
  163. datahub/metadata/schemas/MLPrimaryKeyKey.avsc +2 -1
  164. datahub/metadata/schemas/MetadataChangeEvent.avsc +20 -2
  165. datahub/metadata/schemas/PostKey.avsc +2 -1
  166. datahub/metadata/schemas/SchemaFieldKey.avsc +2 -1
  167. datahub/metadata/schemas/SchemaMetadata.avsc +3 -1
  168. datahub/metadata/schemas/StructuredPropertyDefinition.avsc +14 -0
  169. datahub/metadata/schemas/VersionProperties.avsc +18 -0
  170. datahub/metadata/schemas/VersionSetProperties.avsc +5 -0
  171. datahub/pydantic/__init__.py +0 -0
  172. datahub/pydantic/compat.py +58 -0
  173. datahub/sdk/__init__.py +30 -12
  174. datahub/sdk/_all_entities.py +1 -1
  175. datahub/sdk/_attribution.py +4 -0
  176. datahub/sdk/_shared.py +251 -16
  177. datahub/sdk/_utils.py +35 -0
  178. datahub/sdk/container.py +29 -5
  179. datahub/sdk/dataset.py +118 -20
  180. datahub/sdk/{_entity.py → entity.py} +24 -1
  181. datahub/sdk/entity_client.py +1 -1
  182. datahub/sdk/main_client.py +23 -0
  183. datahub/sdk/resolver_client.py +17 -29
  184. datahub/sdk/search_client.py +50 -0
  185. datahub/sdk/search_filters.py +374 -0
  186. datahub/specific/dataset.py +3 -4
  187. datahub/sql_parsing/_sqlglot_patch.py +2 -10
  188. datahub/sql_parsing/schema_resolver.py +1 -1
  189. datahub/sql_parsing/split_statements.py +20 -13
  190. datahub/sql_parsing/sql_parsing_common.py +7 -0
  191. datahub/sql_parsing/sqlglot_lineage.py +1 -1
  192. datahub/sql_parsing/sqlglot_utils.py +1 -4
  193. datahub/testing/check_sql_parser_result.py +5 -6
  194. datahub/testing/compare_metadata_json.py +7 -6
  195. datahub/testing/pytest_hooks.py +56 -0
  196. datahub/upgrade/upgrade.py +2 -2
  197. datahub/utilities/file_backed_collections.py +3 -14
  198. datahub/utilities/ingest_utils.py +106 -0
  199. datahub/utilities/mapping.py +1 -1
  200. datahub/utilities/memory_footprint.py +3 -2
  201. datahub/utilities/sentinels.py +22 -0
  202. datahub/utilities/unified_diff.py +5 -1
  203. {acryl_datahub-0.15.0.6rc3.dist-info → acryl_datahub-1.0.0.dist-info}/LICENSE +0 -0
  204. {acryl_datahub-0.15.0.6rc3.dist-info → acryl_datahub-1.0.0.dist-info}/top_level.txt +0 -0
@@ -108,7 +108,88 @@
108
108
  ],
109
109
  "name": "priority",
110
110
  "default": 0,
111
- "doc": "A numeric severity or priority for the incident. On the UI we will translate this into something easy to understand."
111
+ "doc": "A numeric severity or priority for the incident. On the UI we will translate this into something easy to understand.\nCurrently supported: 0 - CRITICAL, 1 - HIGH, 2 - MED, 3 - LOW\n(We probably should have modeled as an enum)"
112
+ },
113
+ {
114
+ "type": [
115
+ "null",
116
+ {
117
+ "type": "array",
118
+ "items": {
119
+ "type": "record",
120
+ "name": "IncidentAssignee",
121
+ "namespace": "com.linkedin.pegasus2avro.incident",
122
+ "fields": [
123
+ {
124
+ "Searchable": {
125
+ "addToFilters": true,
126
+ "fieldName": "assignees",
127
+ "filterNameOverride": "Assignee"
128
+ },
129
+ "java": {
130
+ "class": "com.linkedin.pegasus2avro.common.urn.Urn"
131
+ },
132
+ "type": "string",
133
+ "name": "actor",
134
+ "doc": "The user or group assigned to the incident.",
135
+ "Urn": "Urn"
136
+ },
137
+ {
138
+ "type": {
139
+ "type": "record",
140
+ "name": "AuditStamp",
141
+ "namespace": "com.linkedin.pegasus2avro.common",
142
+ "fields": [
143
+ {
144
+ "type": "long",
145
+ "name": "time",
146
+ "doc": "When did the resource/association/sub-resource move into the specific lifecycle stage represented by this AuditEvent."
147
+ },
148
+ {
149
+ "java": {
150
+ "class": "com.linkedin.pegasus2avro.common.urn.Urn"
151
+ },
152
+ "type": "string",
153
+ "name": "actor",
154
+ "doc": "The entity (e.g. a member URN) which will be credited for moving the resource/association/sub-resource into the specific lifecycle stage. It is also the one used to authorize the change.",
155
+ "Urn": "Urn"
156
+ },
157
+ {
158
+ "java": {
159
+ "class": "com.linkedin.pegasus2avro.common.urn.Urn"
160
+ },
161
+ "type": [
162
+ "null",
163
+ "string"
164
+ ],
165
+ "name": "impersonator",
166
+ "default": null,
167
+ "doc": "The entity (e.g. a service URN) which performs the change on behalf of the Actor and must be authorized to act as the Actor.",
168
+ "Urn": "Urn"
169
+ },
170
+ {
171
+ "type": [
172
+ "null",
173
+ "string"
174
+ ],
175
+ "name": "message",
176
+ "default": null,
177
+ "doc": "Additional context around how DataHub was informed of the particular change. For example: was the change created by an automated process, or manually."
178
+ }
179
+ ],
180
+ "doc": "Data captured on a resource/association/sub-resource level giving insight into when that resource/association/sub-resource moved into a particular lifecycle stage, and who acted to move it into that specific lifecycle stage."
181
+ },
182
+ "name": "assignedAt",
183
+ "doc": "The time & actor responsible for assiging the assignee."
184
+ }
185
+ ],
186
+ "doc": "The incident assignee type.\nThis is in a record so that we can add additional fields if we need to later (e.g.\nthe type of the assignee."
187
+ }
188
+ }
189
+ ],
190
+ "name": "assignees",
191
+ "default": null,
192
+ "doc": "The parties assigned with resolving the incident"
112
193
  },
113
194
  {
114
195
  "type": {
@@ -135,7 +216,38 @@
135
216
  ]
136
217
  },
137
218
  "name": "state",
138
- "doc": "The state of the incident"
219
+ "doc": "The top-level state of the incident, whether it's active or resolved."
220
+ },
221
+ {
222
+ "Searchable": {
223
+ "addToFilters": true,
224
+ "filterNameOverride": "Stage"
225
+ },
226
+ "type": [
227
+ "null",
228
+ {
229
+ "type": "enum",
230
+ "symbolDocs": {
231
+ "FIXED": "The incident is in the resolved as completed stage.",
232
+ "INVESTIGATION": "The incident root cause is being investigated.",
233
+ "NO_ACTION_REQUIRED": "The incident is in the resolved with no action required state, e.g. the\nincident was a false positive, or was expected.",
234
+ "TRIAGE": "The impact and priority of the incident is being actively assessed.",
235
+ "WORK_IN_PROGRESS": "The incident is in the remediation stage."
236
+ },
237
+ "name": "IncidentStage",
238
+ "namespace": "com.linkedin.pegasus2avro.incident",
239
+ "symbols": [
240
+ "TRIAGE",
241
+ "INVESTIGATION",
242
+ "WORK_IN_PROGRESS",
243
+ "FIXED",
244
+ "NO_ACTION_REQUIRED"
245
+ ]
246
+ }
247
+ ],
248
+ "name": "stage",
249
+ "default": null,
250
+ "doc": "The lifecycle stage for the incident - Null means no stage was assigned yet.\nIn the future, we may add CUSTOM here with a customStage string field for user-defined stages."
139
251
  },
140
252
  {
141
253
  "type": [
@@ -153,50 +265,7 @@
153
265
  "fieldType": "COUNT"
154
266
  }
155
267
  },
156
- "type": {
157
- "type": "record",
158
- "name": "AuditStamp",
159
- "namespace": "com.linkedin.pegasus2avro.common",
160
- "fields": [
161
- {
162
- "type": "long",
163
- "name": "time",
164
- "doc": "When did the resource/association/sub-resource move into the specific lifecycle stage represented by this AuditEvent."
165
- },
166
- {
167
- "java": {
168
- "class": "com.linkedin.pegasus2avro.common.urn.Urn"
169
- },
170
- "type": "string",
171
- "name": "actor",
172
- "doc": "The entity (e.g. a member URN) which will be credited for moving the resource/association/sub-resource into the specific lifecycle stage. It is also the one used to authorize the change.",
173
- "Urn": "Urn"
174
- },
175
- {
176
- "java": {
177
- "class": "com.linkedin.pegasus2avro.common.urn.Urn"
178
- },
179
- "type": [
180
- "null",
181
- "string"
182
- ],
183
- "name": "impersonator",
184
- "default": null,
185
- "doc": "The entity (e.g. a service URN) which performs the change on behalf of the Actor and must be authorized to act as the Actor.",
186
- "Urn": "Urn"
187
- },
188
- {
189
- "type": [
190
- "null",
191
- "string"
192
- ],
193
- "name": "message",
194
- "default": null,
195
- "doc": "Additional context around how DataHub was informed of the particular change. For example: was the change created by an automated process, or manually."
196
- }
197
- ],
198
- "doc": "Data captured on a resource/association/sub-resource level giving insight into when that resource/association/sub-resource moved into a particular lifecycle stage, and who acted to move it into that specific lifecycle stage."
199
- },
268
+ "type": "com.linkedin.pegasus2avro.common.AuditStamp",
200
269
  "name": "lastUpdated",
201
270
  "doc": "The time at which the request was initially created"
202
271
  }
@@ -262,6 +331,21 @@
262
331
  "default": null,
263
332
  "doc": "The source of an incident, i.e. how it was generated."
264
333
  },
334
+ {
335
+ "Searchable": {
336
+ "/time": {
337
+ "fieldName": "startedAt",
338
+ "fieldType": "COUNT"
339
+ }
340
+ },
341
+ "type": [
342
+ "null",
343
+ "long"
344
+ ],
345
+ "name": "startedAt",
346
+ "default": null,
347
+ "doc": "The time at which the incident actually started (may be before the date it was raised)."
348
+ },
265
349
  {
266
350
  "Searchable": {
267
351
  "/time": {
@@ -553,7 +553,9 @@
553
553
  "fieldName": "glossaryTerms",
554
554
  "fieldType": "URN",
555
555
  "filterNameOverride": "Glossary Term",
556
- "hasValuesFieldName": "hasGlossaryTerms"
556
+ "hasValuesFieldName": "hasGlossaryTerms",
557
+ "includeSystemModifiedAt": true,
558
+ "systemModifiedAtFieldName": "termsModifiedAt"
557
559
  },
558
560
  "java": {
559
561
  "class": "com.linkedin.pegasus2avro.common.urn.GlossaryTermUrn"
@@ -19,7 +19,8 @@
19
19
  "browsePathsV2",
20
20
  "structuredProperties",
21
21
  "forms",
22
- "testResults"
22
+ "testResults",
23
+ "subTypes"
23
24
  ]
24
25
  },
25
26
  "name": "MLFeatureKey",
@@ -19,7 +19,8 @@
19
19
  "browsePathsV2",
20
20
  "structuredProperties",
21
21
  "forms",
22
- "testResults"
22
+ "testResults",
23
+ "subTypes"
23
24
  ]
24
25
  },
25
26
  "name": "MLFeatureTableKey",
@@ -11,7 +11,8 @@
11
11
  "deprecation",
12
12
  "globalTags",
13
13
  "dataPlatformInstance",
14
- "testResults"
14
+ "testResults",
15
+ "container"
15
16
  ]
16
17
  },
17
18
  "name": "MLModelDeploymentKey",
@@ -18,7 +18,9 @@
18
18
  "browsePathsV2",
19
19
  "structuredProperties",
20
20
  "forms",
21
- "testResults"
21
+ "testResults",
22
+ "subTypes",
23
+ "container"
22
24
  ]
23
25
  },
24
26
  "name": "MLModelGroupKey",
@@ -30,7 +30,9 @@
30
30
  "structuredProperties",
31
31
  "forms",
32
32
  "testResults",
33
- "versionProperties"
33
+ "versionProperties",
34
+ "subTypes",
35
+ "container"
34
36
  ]
35
37
  },
36
38
  "name": "MLModelKey",
@@ -17,7 +17,8 @@
17
17
  "dataPlatformInstance",
18
18
  "structuredProperties",
19
19
  "forms",
20
- "testResults"
20
+ "testResults",
21
+ "subTypes"
21
22
  ]
22
23
  },
23
24
  "name": "MLPrimaryKeyKey",
@@ -183,6 +183,7 @@
183
183
  },
184
184
  {
185
185
  "Searchable": {
186
+ "boostScore": 10.0,
186
187
  "enableAutocomplete": true,
187
188
  "fieldNameAliases": [
188
189
  "_entityName"
@@ -994,7 +995,9 @@
994
995
  "fieldName": "glossaryTerms",
995
996
  "fieldType": "URN",
996
997
  "filterNameOverride": "Glossary Term",
997
- "hasValuesFieldName": "hasGlossaryTerms"
998
+ "hasValuesFieldName": "hasGlossaryTerms",
999
+ "includeSystemModifiedAt": true,
1000
+ "systemModifiedAtFieldName": "termsModifiedAt"
998
1001
  },
999
1002
  "java": {
1000
1003
  "class": "com.linkedin.pegasus2avro.common.urn.GlossaryTermUrn"
@@ -1616,6 +1619,19 @@
1616
1619
  "name": "countryCode",
1617
1620
  "default": null,
1618
1621
  "doc": "two uppercase letters country code. e.g. US"
1622
+ },
1623
+ {
1624
+ "Searchable": {
1625
+ "fieldType": "BOOLEAN",
1626
+ "queryByDefault": false
1627
+ },
1628
+ "type": [
1629
+ "boolean",
1630
+ "null"
1631
+ ],
1632
+ "name": "system",
1633
+ "default": false,
1634
+ "doc": "Whether the corpUser is a system user."
1619
1635
  }
1620
1636
  ],
1621
1637
  "doc": "Linkedin corp user information"
@@ -4644,7 +4660,9 @@
4644
4660
  "/terms/*/urn": {
4645
4661
  "boostScore": 0.5,
4646
4662
  "fieldName": "editedFieldGlossaryTerms",
4647
- "fieldType": "URN"
4663
+ "fieldType": "URN",
4664
+ "includeSystemModifiedAt": true,
4665
+ "systemModifiedAtFieldName": "schemaFieldTermsModifiedAt"
4648
4666
  }
4649
4667
  },
4650
4668
  "type": [
@@ -5,7 +5,8 @@
5
5
  "keyForEntity": "post",
6
6
  "entityCategory": "core",
7
7
  "entityAspects": [
8
- "postInfo"
8
+ "postInfo",
9
+ "subTypes"
9
10
  ]
10
11
  },
11
12
  "name": "PostKey",
@@ -13,7 +13,8 @@
13
13
  "schemaFieldAliases",
14
14
  "documentation",
15
15
  "testResults",
16
- "deprecation"
16
+ "deprecation",
17
+ "subTypes"
17
18
  ]
18
19
  },
19
20
  "name": "SchemaFieldKey",
@@ -777,7 +777,9 @@
777
777
  "fieldName": "glossaryTerms",
778
778
  "fieldType": "URN",
779
779
  "filterNameOverride": "Glossary Term",
780
- "hasValuesFieldName": "hasGlossaryTerms"
780
+ "hasValuesFieldName": "hasGlossaryTerms",
781
+ "includeSystemModifiedAt": true,
782
+ "systemModifiedAtFieldName": "termsModifiedAt"
781
783
  },
782
784
  "java": {
783
785
  "class": "com.linkedin.pegasus2avro.common.urn.GlossaryTermUrn"
@@ -23,6 +23,13 @@
23
23
  "doc": "The display name of the property. This is the name that will be shown in the UI and can be used to look up the property id."
24
24
  },
25
25
  {
26
+ "UrnValidation": {
27
+ "entityTypes": [
28
+ "dataType"
29
+ ],
30
+ "exist": true,
31
+ "strict": true
32
+ },
26
33
  "java": {
27
34
  "class": "com.linkedin.pegasus2avro.common.urn.Urn"
28
35
  },
@@ -111,6 +118,13 @@
111
118
  "fieldName": "entityTypes"
112
119
  }
113
120
  },
121
+ "UrnValidation": {
122
+ "entityTypes": [
123
+ "entityType"
124
+ ],
125
+ "exist": true,
126
+ "strict": true
127
+ },
114
128
  "type": {
115
129
  "type": "array",
116
130
  "items": "string"
@@ -137,6 +137,24 @@
137
137
  "name": "sortId",
138
138
  "doc": "Sort identifier that determines where a version lives in the order of the Version Set.\nWhat this looks like depends on the Version Scheme. For sort ids generated by DataHub we use an 8 character string representation."
139
139
  },
140
+ {
141
+ "type": {
142
+ "type": "enum",
143
+ "symbolDocs": {
144
+ "ALPHANUMERIC_GENERATED_BY_DATAHUB": "String managed by DataHub. Currently, an 8 character alphabetical string.",
145
+ "LEXICOGRAPHIC_STRING": "String sorted lexicographically."
146
+ },
147
+ "name": "VersioningScheme",
148
+ "namespace": "com.linkedin.pegasus2avro.versionset",
149
+ "symbols": [
150
+ "LEXICOGRAPHIC_STRING",
151
+ "ALPHANUMERIC_GENERATED_BY_DATAHUB"
152
+ ]
153
+ },
154
+ "name": "versioningScheme",
155
+ "default": "LEXICOGRAPHIC_STRING",
156
+ "doc": "What versioning scheme `sortId` belongs to.\nDefaults to a plain string that is lexicographically sorted."
157
+ },
140
158
  {
141
159
  "type": [
142
160
  "null",
@@ -36,9 +36,14 @@
36
36
  {
37
37
  "type": {
38
38
  "type": "enum",
39
+ "symbolDocs": {
40
+ "ALPHANUMERIC_GENERATED_BY_DATAHUB": "String managed by DataHub. Currently, an 8 character alphabetical string.",
41
+ "LEXICOGRAPHIC_STRING": "String sorted lexicographically."
42
+ },
39
43
  "name": "VersioningScheme",
40
44
  "namespace": "com.linkedin.pegasus2avro.versionset",
41
45
  "symbols": [
46
+ "LEXICOGRAPHIC_STRING",
42
47
  "ALPHANUMERIC_GENERATED_BY_DATAHUB"
43
48
  ]
44
49
  },
File without changes
@@ -0,0 +1,58 @@
1
+ import functools
2
+ from typing import Any, Callable, Optional, TypeVar, cast
3
+
4
+ # Define a type variable for the decorator
5
+ F = TypeVar("F", bound=Callable[..., Any])
6
+
7
+
8
+ # Check which Pydantic version is installed
9
+ def get_pydantic_version() -> int:
10
+ """Determine if Pydantic v1 or v2 is installed."""
11
+ try:
12
+ import pydantic
13
+
14
+ version = pydantic.__version__
15
+ return 1 if version.startswith("1.") else 2
16
+ except (ImportError, AttributeError):
17
+ # Default to v1 if we can't determine version
18
+ return 1
19
+
20
+
21
+ PYDANTIC_VERSION = get_pydantic_version()
22
+
23
+
24
+ # Create compatibility layer for dict-like methods
25
+ def compat_dict_method(v1_method: Optional[Callable] = None) -> Callable:
26
+ """
27
+ Decorator to make a dict method work with both Pydantic v1 and v2.
28
+
29
+ In v1: Uses the decorated method (typically dict)
30
+ In v2: Redirects to model_dump with appropriate parameter mapping
31
+ """
32
+
33
+ def decorator(func: F) -> F:
34
+ @functools.wraps(func)
35
+ def wrapper(self, *args, **kwargs):
36
+ if PYDANTIC_VERSION >= 2:
37
+ # Map v1 parameters to v2 parameters
38
+ # exclude -> exclude
39
+ # exclude_unset -> exclude_unset
40
+ # exclude_defaults -> exclude_defaults
41
+ # exclude_none -> exclude_none
42
+ # by_alias -> by_alias
43
+ model_dump_kwargs = kwargs.copy()
44
+
45
+ # Handle the 'exclude' parameter differently between versions
46
+ exclude = kwargs.get("exclude", set())
47
+ if isinstance(exclude, (set, dict)):
48
+ model_dump_kwargs["exclude"] = exclude
49
+
50
+ return self.model_dump(**model_dump_kwargs)
51
+ return func(self, *args, **kwargs)
52
+
53
+ return cast(F, wrapper)
54
+
55
+ # Allow use as both @compat_dict_method and @compat_dict_method()
56
+ if v1_method is None:
57
+ return decorator
58
+ return decorator(v1_method)
datahub/sdk/__init__.py CHANGED
@@ -1,7 +1,7 @@
1
- import warnings
1
+ import types
2
2
 
3
3
  import datahub.metadata.schema_classes as models
4
- from datahub.errors import ExperimentalWarning, SdkUsageError
4
+ from datahub.errors import SdkUsageError
5
5
  from datahub.ingestion.graph.config import DatahubClientConfig
6
6
  from datahub.metadata.urns import (
7
7
  ChartUrn,
@@ -20,14 +20,32 @@ from datahub.metadata.urns import (
20
20
  from datahub.sdk.container import Container
21
21
  from datahub.sdk.dataset import Dataset
22
22
  from datahub.sdk.main_client import DataHubClient
23
+ from datahub.sdk.search_filters import Filter, FilterDsl
23
24
 
24
- warnings.warn(
25
- "The new datahub SDK (e.g. datahub.sdk.*) is experimental. "
26
- "Our typical backwards-compatibility and stability guarantees do not apply to this code. "
27
- "When it's promoted to stable, the import path will change "
28
- "from `from datahub.sdk import ...` to `from datahub import ...`.",
29
- ExperimentalWarning,
30
- stacklevel=2,
31
- )
32
- del warnings
33
- del ExperimentalWarning
25
+ # We want to print out the warning if people do `from datahub.sdk import X`.
26
+ # But we don't want to print out warnings if they're doing a more direct
27
+ # import like `from datahub.sdk.container import Container`, since that's
28
+ # what our internal code does.
29
+ _vars = {}
30
+ for _name, _value in list(locals().items()):
31
+ if not _name.startswith("_") and (
32
+ _name == "models" or not isinstance(_value, types.ModuleType)
33
+ ):
34
+ _vars[_name] = _value
35
+ del locals()[_name]
36
+
37
+
38
+ def __getattr__(name):
39
+ import warnings
40
+
41
+ from datahub.errors import ExperimentalWarning
42
+
43
+ warnings.warn(
44
+ "The new datahub SDK (e.g. datahub.sdk.*) is experimental. "
45
+ "Our typical backwards-compatibility and stability guarantees do not apply to this code. "
46
+ "When it's promoted to stable, the import path will change "
47
+ "from `from datahub.sdk import ...` to `from datahub import ...`.",
48
+ ExperimentalWarning,
49
+ stacklevel=2,
50
+ )
51
+ return _vars[name]
@@ -1,8 +1,8 @@
1
1
  from typing import Dict, List, Type
2
2
 
3
- from datahub.sdk._entity import Entity
4
3
  from datahub.sdk.container import Container
5
4
  from datahub.sdk.dataset import Dataset
5
+ from datahub.sdk.entity import Entity
6
6
 
7
7
  # TODO: Is there a better way to declare this?
8
8
  ENTITY_CLASSES_LIST: List[Type[Entity]] = [
@@ -5,6 +5,10 @@ from typing import Iterator
5
5
 
6
6
  from datahub.utilities.str_enum import StrEnum
7
7
 
8
+ # TODO: This attribution setup is not the final form. I expect that once we have better
9
+ # backend support for attribution and attribution-oriented patch, this will become a bit
10
+ # more sophisticated.
11
+
8
12
 
9
13
  class KnownAttribution(StrEnum):
10
14
  INGESTION = "INGESTION"