acryl-datahub 1.0.0rc18__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (503) hide show
  1. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2686 -2563
  2. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +499 -392
  3. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +1 -1
  4. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +7 -1
  5. datahub/_version.py +1 -1
  6. datahub/api/circuit_breaker/operation_circuit_breaker.py +2 -2
  7. datahub/api/entities/assertion/assertion.py +1 -1
  8. datahub/api/entities/common/serialized_value.py +1 -1
  9. datahub/api/entities/corpgroup/corpgroup.py +1 -1
  10. datahub/api/entities/datacontract/datacontract.py +35 -3
  11. datahub/api/entities/datajob/dataflow.py +18 -3
  12. datahub/api/entities/datajob/datajob.py +24 -4
  13. datahub/api/entities/dataprocess/dataprocess_instance.py +4 -0
  14. datahub/api/entities/dataproduct/dataproduct.py +32 -3
  15. datahub/api/entities/dataset/dataset.py +47 -72
  16. datahub/api/entities/external/__init__.py +0 -0
  17. datahub/api/entities/external/external_entities.py +724 -0
  18. datahub/api/entities/external/external_tag.py +147 -0
  19. datahub/api/entities/external/lake_formation_external_entites.py +162 -0
  20. datahub/api/entities/external/restricted_text.py +172 -0
  21. datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
  22. datahub/api/entities/forms/forms.py +37 -37
  23. datahub/api/entities/structuredproperties/structuredproperties.py +6 -6
  24. datahub/api/graphql/assertion.py +1 -1
  25. datahub/api/graphql/base.py +8 -6
  26. datahub/api/graphql/operation.py +14 -10
  27. datahub/cli/check_cli.py +91 -9
  28. datahub/cli/cli_utils.py +63 -0
  29. datahub/cli/config_utils.py +20 -12
  30. datahub/cli/container_cli.py +5 -0
  31. datahub/cli/delete_cli.py +133 -34
  32. datahub/cli/docker_check.py +110 -14
  33. datahub/cli/docker_cli.py +155 -231
  34. datahub/cli/exists_cli.py +2 -3
  35. datahub/cli/get_cli.py +2 -3
  36. datahub/cli/graphql_cli.py +1422 -0
  37. datahub/cli/iceberg_cli.py +11 -5
  38. datahub/cli/ingest_cli.py +25 -26
  39. datahub/cli/migrate.py +12 -9
  40. datahub/cli/migration_utils.py +4 -3
  41. datahub/cli/put_cli.py +4 -6
  42. datahub/cli/quickstart_versioning.py +53 -10
  43. datahub/cli/specific/assertions_cli.py +39 -7
  44. datahub/cli/specific/datacontract_cli.py +57 -9
  45. datahub/cli/specific/dataproduct_cli.py +12 -24
  46. datahub/cli/specific/dataset_cli.py +31 -21
  47. datahub/cli/specific/forms_cli.py +2 -5
  48. datahub/cli/specific/group_cli.py +2 -3
  49. datahub/cli/specific/structuredproperties_cli.py +5 -7
  50. datahub/cli/specific/user_cli.py +174 -4
  51. datahub/cli/state_cli.py +2 -3
  52. datahub/cli/timeline_cli.py +2 -3
  53. datahub/configuration/common.py +46 -2
  54. datahub/configuration/connection_resolver.py +5 -2
  55. datahub/configuration/env_vars.py +331 -0
  56. datahub/configuration/import_resolver.py +7 -4
  57. datahub/configuration/kafka.py +21 -1
  58. datahub/configuration/pydantic_migration_helpers.py +6 -13
  59. datahub/configuration/source_common.py +4 -3
  60. datahub/configuration/validate_field_deprecation.py +5 -2
  61. datahub/configuration/validate_field_removal.py +8 -2
  62. datahub/configuration/validate_field_rename.py +6 -5
  63. datahub/configuration/validate_multiline_string.py +5 -2
  64. datahub/emitter/mce_builder.py +12 -8
  65. datahub/emitter/mcp.py +20 -5
  66. datahub/emitter/mcp_builder.py +12 -0
  67. datahub/emitter/request_helper.py +138 -15
  68. datahub/emitter/response_helper.py +111 -19
  69. datahub/emitter/rest_emitter.py +399 -163
  70. datahub/entrypoints.py +10 -5
  71. datahub/errors.py +12 -0
  72. datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +299 -2
  73. datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
  74. datahub/ingestion/api/common.py +9 -0
  75. datahub/ingestion/api/decorators.py +15 -3
  76. datahub/ingestion/api/report.py +381 -3
  77. datahub/ingestion/api/sink.py +27 -2
  78. datahub/ingestion/api/source.py +174 -62
  79. datahub/ingestion/api/source_helpers.py +41 -3
  80. datahub/ingestion/api/source_protocols.py +23 -0
  81. datahub/ingestion/autogenerated/__init__.py +0 -0
  82. datahub/ingestion/autogenerated/capability_summary.json +3652 -0
  83. datahub/ingestion/autogenerated/lineage.json +402 -0
  84. datahub/ingestion/autogenerated/lineage_helper.py +177 -0
  85. datahub/ingestion/extractor/schema_util.py +31 -5
  86. datahub/ingestion/glossary/classification_mixin.py +9 -2
  87. datahub/ingestion/graph/client.py +492 -55
  88. datahub/ingestion/graph/config.py +18 -2
  89. datahub/ingestion/graph/filters.py +96 -32
  90. datahub/ingestion/graph/links.py +55 -0
  91. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
  92. datahub/ingestion/run/pipeline.py +90 -23
  93. datahub/ingestion/run/pipeline_config.py +3 -3
  94. datahub/ingestion/sink/datahub_kafka.py +1 -0
  95. datahub/ingestion/sink/datahub_rest.py +31 -23
  96. datahub/ingestion/sink/file.py +1 -0
  97. datahub/ingestion/source/abs/config.py +1 -1
  98. datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
  99. datahub/ingestion/source/abs/source.py +15 -30
  100. datahub/ingestion/source/apply/datahub_apply.py +6 -5
  101. datahub/ingestion/source/aws/aws_common.py +185 -13
  102. datahub/ingestion/source/aws/glue.py +517 -244
  103. datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
  104. datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
  105. datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
  106. datahub/ingestion/source/aws/sagemaker_processors/models.py +4 -4
  107. datahub/ingestion/source/aws/tag_entities.py +270 -0
  108. datahub/ingestion/source/azure/azure_common.py +3 -3
  109. datahub/ingestion/source/bigquery_v2/bigquery.py +51 -7
  110. datahub/ingestion/source/bigquery_v2/bigquery_config.py +51 -81
  111. datahub/ingestion/source/bigquery_v2/bigquery_connection.py +81 -0
  112. datahub/ingestion/source/bigquery_v2/bigquery_queries.py +6 -1
  113. datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
  114. datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
  115. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
  116. datahub/ingestion/source/bigquery_v2/common.py +1 -1
  117. datahub/ingestion/source/bigquery_v2/lineage.py +1 -1
  118. datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
  119. datahub/ingestion/source/bigquery_v2/queries.py +3 -3
  120. datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
  121. datahub/ingestion/source/cassandra/cassandra.py +7 -18
  122. datahub/ingestion/source/cassandra/cassandra_api.py +36 -0
  123. datahub/ingestion/source/cassandra/cassandra_config.py +20 -0
  124. datahub/ingestion/source/cassandra/cassandra_profiling.py +26 -24
  125. datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
  126. datahub/ingestion/source/common/data_platforms.py +23 -0
  127. datahub/ingestion/source/common/gcp_credentials_config.py +9 -1
  128. datahub/ingestion/source/common/subtypes.py +73 -1
  129. datahub/ingestion/source/data_lake_common/data_lake_utils.py +59 -10
  130. datahub/ingestion/source/data_lake_common/object_store.py +732 -0
  131. datahub/ingestion/source/data_lake_common/path_spec.py +87 -38
  132. datahub/ingestion/source/datahub/config.py +19 -5
  133. datahub/ingestion/source/datahub/datahub_database_reader.py +205 -36
  134. datahub/ingestion/source/datahub/datahub_source.py +11 -1
  135. datahub/ingestion/source/dbt/dbt_cloud.py +17 -10
  136. datahub/ingestion/source/dbt/dbt_common.py +270 -26
  137. datahub/ingestion/source/dbt/dbt_core.py +88 -47
  138. datahub/ingestion/source/dbt/dbt_tests.py +8 -6
  139. datahub/ingestion/source/debug/__init__.py +0 -0
  140. datahub/ingestion/source/debug/datahub_debug.py +300 -0
  141. datahub/ingestion/source/delta_lake/config.py +9 -5
  142. datahub/ingestion/source/delta_lake/source.py +8 -0
  143. datahub/ingestion/source/dremio/dremio_api.py +114 -73
  144. datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
  145. datahub/ingestion/source/dremio/dremio_config.py +5 -4
  146. datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +1 -1
  147. datahub/ingestion/source/dremio/dremio_entities.py +6 -5
  148. datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
  149. datahub/ingestion/source/dremio/dremio_source.py +228 -215
  150. datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
  151. datahub/ingestion/source/dynamodb/dynamodb.py +19 -13
  152. datahub/ingestion/source/excel/__init__.py +0 -0
  153. datahub/ingestion/source/excel/config.py +92 -0
  154. datahub/ingestion/source/excel/excel_file.py +539 -0
  155. datahub/ingestion/source/excel/profiling.py +308 -0
  156. datahub/ingestion/source/excel/report.py +49 -0
  157. datahub/ingestion/source/excel/source.py +662 -0
  158. datahub/ingestion/source/excel/util.py +18 -0
  159. datahub/ingestion/source/feast.py +12 -14
  160. datahub/ingestion/source/file.py +3 -0
  161. datahub/ingestion/source/fivetran/config.py +67 -8
  162. datahub/ingestion/source/fivetran/fivetran.py +228 -43
  163. datahub/ingestion/source/fivetran/fivetran_log_api.py +42 -9
  164. datahub/ingestion/source/fivetran/fivetran_query.py +58 -36
  165. datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
  166. datahub/ingestion/source/fivetran/response_models.py +97 -0
  167. datahub/ingestion/source/gc/datahub_gc.py +0 -2
  168. datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +101 -104
  169. datahub/ingestion/source/gcs/gcs_source.py +53 -10
  170. datahub/ingestion/source/gcs/gcs_utils.py +36 -9
  171. datahub/ingestion/source/ge_data_profiler.py +146 -33
  172. datahub/ingestion/source/ge_profiling_config.py +26 -11
  173. datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
  174. datahub/ingestion/source/grafana/field_utils.py +307 -0
  175. datahub/ingestion/source/grafana/grafana_api.py +142 -0
  176. datahub/ingestion/source/grafana/grafana_config.py +104 -0
  177. datahub/ingestion/source/grafana/grafana_source.py +522 -84
  178. datahub/ingestion/source/grafana/lineage.py +202 -0
  179. datahub/ingestion/source/grafana/models.py +137 -0
  180. datahub/ingestion/source/grafana/report.py +90 -0
  181. datahub/ingestion/source/grafana/types.py +16 -0
  182. datahub/ingestion/source/hex/__init__.py +0 -0
  183. datahub/ingestion/source/hex/api.py +402 -0
  184. datahub/ingestion/source/hex/constants.py +8 -0
  185. datahub/ingestion/source/hex/hex.py +311 -0
  186. datahub/ingestion/source/hex/mapper.py +412 -0
  187. datahub/ingestion/source/hex/model.py +78 -0
  188. datahub/ingestion/source/hex/query_fetcher.py +307 -0
  189. datahub/ingestion/source/iceberg/iceberg.py +385 -164
  190. datahub/ingestion/source/iceberg/iceberg_common.py +2 -2
  191. datahub/ingestion/source/iceberg/iceberg_profiler.py +25 -20
  192. datahub/ingestion/source/identity/azure_ad.py +1 -1
  193. datahub/ingestion/source/identity/okta.py +1 -14
  194. datahub/ingestion/source/kafka/kafka.py +28 -71
  195. datahub/ingestion/source/kafka/kafka_config.py +78 -0
  196. datahub/ingestion/source/kafka_connect/common.py +2 -2
  197. datahub/ingestion/source/kafka_connect/sink_connectors.py +157 -48
  198. datahub/ingestion/source/kafka_connect/source_connectors.py +63 -5
  199. datahub/ingestion/source/ldap.py +1 -1
  200. datahub/ingestion/source/looker/looker_common.py +216 -86
  201. datahub/ingestion/source/looker/looker_config.py +15 -4
  202. datahub/ingestion/source/looker/looker_constant.py +4 -0
  203. datahub/ingestion/source/looker/looker_lib_wrapper.py +37 -4
  204. datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
  205. datahub/ingestion/source/looker/looker_source.py +539 -555
  206. datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
  207. datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
  208. datahub/ingestion/source/looker/lookml_config.py +31 -3
  209. datahub/ingestion/source/looker/lookml_refinement.py +1 -1
  210. datahub/ingestion/source/looker/lookml_source.py +103 -118
  211. datahub/ingestion/source/looker/view_upstream.py +494 -1
  212. datahub/ingestion/source/metabase.py +32 -6
  213. datahub/ingestion/source/metadata/business_glossary.py +7 -7
  214. datahub/ingestion/source/metadata/lineage.py +11 -10
  215. datahub/ingestion/source/mlflow.py +254 -23
  216. datahub/ingestion/source/mock_data/__init__.py +0 -0
  217. datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
  218. datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
  219. datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
  220. datahub/ingestion/source/mode.py +359 -181
  221. datahub/ingestion/source/mongodb.py +11 -1
  222. datahub/ingestion/source/neo4j/neo4j_source.py +122 -153
  223. datahub/ingestion/source/nifi.py +5 -5
  224. datahub/ingestion/source/openapi.py +85 -38
  225. datahub/ingestion/source/openapi_parser.py +59 -40
  226. datahub/ingestion/source/powerbi/config.py +92 -27
  227. datahub/ingestion/source/powerbi/m_query/data_classes.py +3 -0
  228. datahub/ingestion/source/powerbi/m_query/odbc.py +185 -0
  229. datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
  230. datahub/ingestion/source/powerbi/m_query/pattern_handler.py +358 -14
  231. datahub/ingestion/source/powerbi/m_query/resolver.py +10 -0
  232. datahub/ingestion/source/powerbi/powerbi.py +66 -32
  233. datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -2
  234. datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +11 -12
  235. datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
  236. datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
  237. datahub/ingestion/source/preset.py +3 -3
  238. datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
  239. datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
  240. datahub/ingestion/source/redash.py +1 -1
  241. datahub/ingestion/source/redshift/config.py +15 -9
  242. datahub/ingestion/source/redshift/datashares.py +1 -1
  243. datahub/ingestion/source/redshift/lineage.py +386 -687
  244. datahub/ingestion/source/redshift/profile.py +2 -2
  245. datahub/ingestion/source/redshift/query.py +24 -20
  246. datahub/ingestion/source/redshift/redshift.py +52 -111
  247. datahub/ingestion/source/redshift/redshift_schema.py +17 -12
  248. datahub/ingestion/source/redshift/report.py +0 -2
  249. datahub/ingestion/source/redshift/usage.py +13 -11
  250. datahub/ingestion/source/s3/report.py +4 -2
  251. datahub/ingestion/source/s3/source.py +515 -244
  252. datahub/ingestion/source/sac/sac.py +3 -1
  253. datahub/ingestion/source/salesforce.py +28 -13
  254. datahub/ingestion/source/schema/json_schema.py +14 -14
  255. datahub/ingestion/source/schema_inference/object.py +22 -6
  256. datahub/ingestion/source/sigma/config.py +75 -8
  257. datahub/ingestion/source/sigma/data_classes.py +3 -0
  258. datahub/ingestion/source/sigma/sigma.py +36 -7
  259. datahub/ingestion/source/sigma/sigma_api.py +99 -58
  260. datahub/ingestion/source/slack/slack.py +403 -140
  261. datahub/ingestion/source/snaplogic/__init__.py +0 -0
  262. datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
  263. datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
  264. datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
  265. datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
  266. datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
  267. datahub/ingestion/source/snowflake/constants.py +4 -0
  268. datahub/ingestion/source/snowflake/snowflake_config.py +103 -34
  269. datahub/ingestion/source/snowflake/snowflake_connection.py +47 -25
  270. datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +25 -6
  271. datahub/ingestion/source/snowflake/snowflake_profiler.py +1 -6
  272. datahub/ingestion/source/snowflake/snowflake_queries.py +511 -107
  273. datahub/ingestion/source/snowflake/snowflake_query.py +100 -72
  274. datahub/ingestion/source/snowflake/snowflake_report.py +4 -2
  275. datahub/ingestion/source/snowflake/snowflake_schema.py +381 -16
  276. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +163 -52
  277. datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
  278. datahub/ingestion/source/snowflake/snowflake_tag.py +4 -1
  279. datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
  280. datahub/ingestion/source/snowflake/snowflake_utils.py +62 -17
  281. datahub/ingestion/source/snowflake/snowflake_v2.py +56 -10
  282. datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
  283. datahub/ingestion/source/sql/athena.py +219 -26
  284. datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
  285. datahub/ingestion/source/sql/clickhouse.py +29 -9
  286. datahub/ingestion/source/sql/cockroachdb.py +5 -4
  287. datahub/ingestion/source/sql/druid.py +9 -4
  288. datahub/ingestion/source/sql/hana.py +3 -1
  289. datahub/ingestion/source/sql/hive.py +28 -8
  290. datahub/ingestion/source/sql/hive_metastore.py +24 -25
  291. datahub/ingestion/source/sql/mariadb.py +0 -1
  292. datahub/ingestion/source/sql/mssql/job_models.py +18 -2
  293. datahub/ingestion/source/sql/mssql/source.py +376 -62
  294. datahub/ingestion/source/sql/mysql.py +154 -4
  295. datahub/ingestion/source/sql/oracle.py +62 -11
  296. datahub/ingestion/source/sql/postgres.py +142 -6
  297. datahub/ingestion/source/sql/presto.py +20 -2
  298. datahub/ingestion/source/sql/sql_common.py +281 -49
  299. datahub/ingestion/source/sql/sql_config.py +1 -34
  300. datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
  301. datahub/ingestion/source/sql/sql_types.py +27 -2
  302. datahub/ingestion/source/sql/sqlalchemy_uri.py +68 -0
  303. datahub/ingestion/source/sql/stored_procedures/__init__.py +0 -0
  304. datahub/ingestion/source/sql/stored_procedures/base.py +253 -0
  305. datahub/ingestion/source/sql/{mssql/stored_procedure_lineage.py → stored_procedures/lineage.py} +2 -29
  306. datahub/ingestion/source/sql/teradata.py +1028 -245
  307. datahub/ingestion/source/sql/trino.py +43 -10
  308. datahub/ingestion/source/sql/two_tier_sql_source.py +3 -4
  309. datahub/ingestion/source/sql/vertica.py +14 -7
  310. datahub/ingestion/source/sql_queries.py +219 -121
  311. datahub/ingestion/source/state/checkpoint.py +8 -29
  312. datahub/ingestion/source/state/entity_removal_state.py +5 -2
  313. datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
  314. datahub/ingestion/source/state/stale_entity_removal_handler.py +0 -1
  315. datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
  316. datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +2 -1
  317. datahub/ingestion/source/superset.py +810 -126
  318. datahub/ingestion/source/tableau/tableau.py +172 -69
  319. datahub/ingestion/source/tableau/tableau_common.py +11 -4
  320. datahub/ingestion/source/tableau/tableau_constant.py +1 -4
  321. datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
  322. datahub/ingestion/source/tableau/tableau_validation.py +1 -1
  323. datahub/ingestion/source/unity/config.py +161 -40
  324. datahub/ingestion/source/unity/connection.py +61 -0
  325. datahub/ingestion/source/unity/connection_test.py +1 -0
  326. datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
  327. datahub/ingestion/source/unity/proxy.py +794 -51
  328. datahub/ingestion/source/unity/proxy_patch.py +321 -0
  329. datahub/ingestion/source/unity/proxy_types.py +36 -2
  330. datahub/ingestion/source/unity/report.py +15 -3
  331. datahub/ingestion/source/unity/source.py +465 -131
  332. datahub/ingestion/source/unity/tag_entities.py +197 -0
  333. datahub/ingestion/source/unity/usage.py +46 -4
  334. datahub/ingestion/source/usage/clickhouse_usage.py +11 -4
  335. datahub/ingestion/source/usage/starburst_trino_usage.py +10 -5
  336. datahub/ingestion/source/usage/usage_common.py +4 -68
  337. datahub/ingestion/source/vertexai/__init__.py +0 -0
  338. datahub/ingestion/source/vertexai/vertexai.py +1367 -0
  339. datahub/ingestion/source/vertexai/vertexai_config.py +29 -0
  340. datahub/ingestion/source/vertexai/vertexai_result_type_utils.py +89 -0
  341. datahub/ingestion/source_config/pulsar.py +3 -1
  342. datahub/ingestion/source_report/ingestion_stage.py +50 -11
  343. datahub/ingestion/transformer/add_dataset_dataproduct.py +1 -1
  344. datahub/ingestion/transformer/add_dataset_ownership.py +19 -3
  345. datahub/ingestion/transformer/base_transformer.py +8 -5
  346. datahub/ingestion/transformer/dataset_domain.py +1 -1
  347. datahub/ingestion/transformer/set_browse_path.py +112 -0
  348. datahub/integrations/assertion/common.py +3 -2
  349. datahub/integrations/assertion/snowflake/compiler.py +4 -3
  350. datahub/lite/lite_util.py +2 -2
  351. datahub/metadata/{_schema_classes.py → _internal_schema_classes.py} +3095 -631
  352. datahub/metadata/_urns/urn_defs.py +1866 -1582
  353. datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
  354. datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
  355. datahub/metadata/com/linkedin/pegasus2avro/dataplatform/slack/__init__.py +15 -0
  356. datahub/metadata/com/linkedin/pegasus2avro/event/__init__.py +11 -0
  357. datahub/metadata/com/linkedin/pegasus2avro/event/notification/__init__.py +15 -0
  358. datahub/metadata/com/linkedin/pegasus2avro/event/notification/settings/__init__.py +19 -0
  359. datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
  360. datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
  361. datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
  362. datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +8 -0
  363. datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
  364. datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
  365. datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
  366. datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
  367. datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
  368. datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
  369. datahub/metadata/schema.avsc +18404 -16617
  370. datahub/metadata/schema_classes.py +3 -3
  371. datahub/metadata/schemas/Actors.avsc +38 -1
  372. datahub/metadata/schemas/ApplicationKey.avsc +31 -0
  373. datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
  374. datahub/metadata/schemas/Applications.avsc +38 -0
  375. datahub/metadata/schemas/AssetSettings.avsc +63 -0
  376. datahub/metadata/schemas/ChartInfo.avsc +2 -1
  377. datahub/metadata/schemas/ChartKey.avsc +1 -0
  378. datahub/metadata/schemas/ContainerKey.avsc +1 -0
  379. datahub/metadata/schemas/ContainerProperties.avsc +8 -0
  380. datahub/metadata/schemas/CorpUserEditableInfo.avsc +15 -1
  381. datahub/metadata/schemas/CorpUserKey.avsc +2 -1
  382. datahub/metadata/schemas/CorpUserSettings.avsc +145 -0
  383. datahub/metadata/schemas/DashboardKey.avsc +1 -0
  384. datahub/metadata/schemas/DataContractKey.avsc +2 -1
  385. datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
  386. datahub/metadata/schemas/DataFlowKey.avsc +1 -0
  387. datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
  388. datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
  389. datahub/metadata/schemas/DataHubIngestionSourceKey.avsc +2 -1
  390. datahub/metadata/schemas/DataHubOpenAPISchemaKey.avsc +22 -0
  391. datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
  392. datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
  393. datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
  394. datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
  395. datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
  396. datahub/metadata/schemas/DataJobInfo.avsc +8 -0
  397. datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
  398. datahub/metadata/schemas/DataJobKey.avsc +1 -0
  399. datahub/metadata/schemas/DataProcessInstanceInput.avsc +2 -1
  400. datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -1
  401. datahub/metadata/schemas/DataProcessKey.avsc +8 -0
  402. datahub/metadata/schemas/DataProductKey.avsc +3 -1
  403. datahub/metadata/schemas/DataProductProperties.avsc +1 -1
  404. datahub/metadata/schemas/DataTransformLogic.avsc +4 -2
  405. datahub/metadata/schemas/DatasetKey.avsc +11 -1
  406. datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
  407. datahub/metadata/schemas/Deprecation.avsc +2 -0
  408. datahub/metadata/schemas/DomainKey.avsc +2 -1
  409. datahub/metadata/schemas/ExecutionRequestInput.avsc +5 -0
  410. datahub/metadata/schemas/FormInfo.avsc +5 -0
  411. datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
  412. datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
  413. datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
  414. datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
  415. datahub/metadata/schemas/IncidentInfo.avsc +3 -3
  416. datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
  417. datahub/metadata/schemas/LogicalParent.avsc +145 -0
  418. datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
  419. datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
  420. datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
  421. datahub/metadata/schemas/MLModelDeploymentProperties.avsc +3 -0
  422. datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
  423. datahub/metadata/schemas/MLModelGroupProperties.avsc +16 -0
  424. datahub/metadata/schemas/MLModelKey.avsc +9 -0
  425. datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
  426. datahub/metadata/schemas/MetadataChangeEvent.avsc +189 -47
  427. datahub/metadata/schemas/MetadataChangeLog.avsc +65 -44
  428. datahub/metadata/schemas/MetadataChangeProposal.avsc +64 -0
  429. datahub/metadata/schemas/NotebookKey.avsc +1 -0
  430. datahub/metadata/schemas/Operation.avsc +21 -2
  431. datahub/metadata/schemas/Ownership.avsc +69 -0
  432. datahub/metadata/schemas/QueryProperties.avsc +24 -2
  433. datahub/metadata/schemas/QuerySubjects.avsc +1 -12
  434. datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
  435. datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
  436. datahub/metadata/schemas/Siblings.avsc +2 -0
  437. datahub/metadata/schemas/SlackUserInfo.avsc +160 -0
  438. datahub/metadata/schemas/StructuredProperties.avsc +69 -0
  439. datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
  440. datahub/metadata/schemas/SystemMetadata.avsc +147 -0
  441. datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
  442. datahub/metadata/schemas/__init__.py +3 -3
  443. datahub/sdk/__init__.py +7 -0
  444. datahub/sdk/_all_entities.py +15 -0
  445. datahub/sdk/_shared.py +393 -10
  446. datahub/sdk/_utils.py +4 -0
  447. datahub/sdk/chart.py +386 -0
  448. datahub/sdk/container.py +7 -0
  449. datahub/sdk/dashboard.py +453 -0
  450. datahub/sdk/dataflow.py +309 -0
  451. datahub/sdk/datajob.py +367 -0
  452. datahub/sdk/dataset.py +180 -4
  453. datahub/sdk/entity.py +99 -3
  454. datahub/sdk/entity_client.py +154 -12
  455. datahub/sdk/lineage_client.py +943 -0
  456. datahub/sdk/main_client.py +83 -8
  457. datahub/sdk/mlmodel.py +383 -0
  458. datahub/sdk/mlmodelgroup.py +240 -0
  459. datahub/sdk/search_client.py +85 -8
  460. datahub/sdk/search_filters.py +393 -68
  461. datahub/secret/datahub_secret_store.py +5 -1
  462. datahub/secret/environment_secret_store.py +29 -0
  463. datahub/secret/file_secret_store.py +49 -0
  464. datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
  465. datahub/specific/aspect_helpers/siblings.py +73 -0
  466. datahub/specific/aspect_helpers/structured_properties.py +27 -0
  467. datahub/specific/chart.py +1 -1
  468. datahub/specific/datajob.py +15 -1
  469. datahub/specific/dataproduct.py +4 -0
  470. datahub/specific/dataset.py +51 -59
  471. datahub/sql_parsing/_sqlglot_patch.py +1 -2
  472. datahub/sql_parsing/fingerprint_utils.py +6 -0
  473. datahub/sql_parsing/split_statements.py +30 -3
  474. datahub/sql_parsing/sql_parsing_aggregator.py +144 -63
  475. datahub/sql_parsing/sqlglot_lineage.py +517 -44
  476. datahub/sql_parsing/sqlglot_utils.py +30 -18
  477. datahub/sql_parsing/tool_meta_extractor.py +25 -2
  478. datahub/telemetry/telemetry.py +30 -16
  479. datahub/testing/check_imports.py +1 -1
  480. datahub/testing/docker_utils.py +8 -2
  481. datahub/testing/mce_helpers.py +421 -0
  482. datahub/testing/mcp_diff.py +17 -21
  483. datahub/testing/sdk_v2_helpers.py +18 -0
  484. datahub/upgrade/upgrade.py +86 -30
  485. datahub/utilities/file_backed_collections.py +14 -15
  486. datahub/utilities/hive_schema_to_avro.py +2 -2
  487. datahub/utilities/ingest_utils.py +2 -2
  488. datahub/utilities/is_pytest.py +3 -2
  489. datahub/utilities/logging_manager.py +30 -7
  490. datahub/utilities/mapping.py +29 -2
  491. datahub/utilities/sample_data.py +5 -4
  492. datahub/utilities/server_config_util.py +298 -10
  493. datahub/utilities/sqlalchemy_query_combiner.py +6 -4
  494. datahub/utilities/stats_collections.py +4 -0
  495. datahub/utilities/threaded_iterator_executor.py +16 -3
  496. datahub/utilities/urn_encoder.py +1 -1
  497. datahub/utilities/urns/urn.py +41 -2
  498. datahub/emitter/sql_parsing_builder.py +0 -306
  499. datahub/ingestion/source/redshift/lineage_v2.py +0 -458
  500. datahub/ingestion/source/vertexai.py +0 -697
  501. datahub/ingestion/transformer/system_metadata_transformer.py +0 -45
  502. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info/licenses}/LICENSE +0 -0
  503. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
datahub/sdk/_shared.py CHANGED
@@ -1,10 +1,12 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import warnings
4
+ from abc import ABC, abstractmethod
4
5
  from datetime import datetime
5
6
  from typing import (
6
7
  TYPE_CHECKING,
7
8
  Callable,
9
+ Dict,
8
10
  List,
9
11
  Optional,
10
12
  Sequence,
@@ -14,6 +16,7 @@ from typing import (
14
16
 
15
17
  from typing_extensions import TypeAlias, assert_never
16
18
 
19
+ import datahub.emitter.mce_builder as builder
17
20
  import datahub.metadata.schema_classes as models
18
21
  from datahub.emitter.mce_builder import (
19
22
  make_ts_millis,
@@ -24,33 +27,77 @@ from datahub.emitter.mce_builder import (
24
27
  from datahub.emitter.mcp_builder import ContainerKey
25
28
  from datahub.errors import MultipleSubtypesWarning, SdkUsageError
26
29
  from datahub.metadata.urns import (
30
+ ChartUrn,
27
31
  ContainerUrn,
28
32
  CorpGroupUrn,
29
33
  CorpUserUrn,
34
+ DashboardUrn,
35
+ DataFlowUrn,
30
36
  DataJobUrn,
31
37
  DataPlatformInstanceUrn,
32
38
  DataPlatformUrn,
39
+ DataProcessInstanceUrn,
33
40
  DatasetUrn,
34
41
  DomainUrn,
35
42
  GlossaryTermUrn,
36
43
  OwnershipTypeUrn,
44
+ StructuredPropertyUrn,
37
45
  TagUrn,
38
46
  Urn,
47
+ VersionSetUrn,
39
48
  )
40
- from datahub.sdk._utils import add_list_unique, remove_list_unique
49
+ from datahub.sdk._utils import DEFAULT_ACTOR_URN, add_list_unique, remove_list_unique
41
50
  from datahub.sdk.entity import Entity
42
51
  from datahub.utilities.urns.error import InvalidUrnError
43
52
 
44
53
  if TYPE_CHECKING:
45
54
  from datahub.sdk.container import Container
46
-
47
55
  UrnOrStr: TypeAlias = Union[Urn, str]
56
+ ChartUrnOrStr: TypeAlias = Union[str, ChartUrn]
48
57
  DatasetUrnOrStr: TypeAlias = Union[str, DatasetUrn]
49
58
  DatajobUrnOrStr: TypeAlias = Union[str, DataJobUrn]
59
+ DataflowUrnOrStr: TypeAlias = Union[str, DataFlowUrn]
60
+ DashboardUrnOrStr: TypeAlias = Union[str, DashboardUrn]
61
+ DataPlatformInstanceUrnOrStr: TypeAlias = Union[str, DataPlatformInstanceUrn]
62
+ DataPlatformUrnOrStr: TypeAlias = Union[str, DataPlatformUrn]
50
63
 
51
64
  ActorUrn: TypeAlias = Union[CorpUserUrn, CorpGroupUrn]
65
+ ActorUrnOrStr: TypeAlias = Union[str, ActorUrn]
66
+ StructuredPropertyUrnOrStr: TypeAlias = Union[str, StructuredPropertyUrn]
67
+ StructuredPropertyValueType: TypeAlias = Union[str, float, int]
68
+ StructuredPropertyInputType: TypeAlias = Dict[
69
+ StructuredPropertyUrnOrStr, Sequence[StructuredPropertyValueType]
70
+ ]
71
+
72
+ TrainingMetricsInputType: TypeAlias = Union[
73
+ List[models.MLMetricClass], Dict[str, Optional[str]]
74
+ ]
75
+ HyperParamsInputType: TypeAlias = Union[
76
+ List[models.MLHyperParamClass], Dict[str, Optional[str]]
77
+ ]
78
+ MLTrainingJobInputType: TypeAlias = Union[Sequence[Union[str, DataProcessInstanceUrn]]]
52
79
 
53
- _DEFAULT_ACTOR_URN = CorpUserUrn("__ingestion").urn()
80
+
81
+ def convert_training_metrics(
82
+ metrics: TrainingMetricsInputType,
83
+ ) -> List[models.MLMetricClass]:
84
+ if isinstance(metrics, dict):
85
+ return [
86
+ models.MLMetricClass(name=name, value=str(value))
87
+ for name, value in metrics.items()
88
+ ]
89
+ return metrics
90
+
91
+
92
+ def convert_hyper_params(
93
+ params: HyperParamsInputType,
94
+ ) -> List[models.MLHyperParamClass]:
95
+ if isinstance(params, dict):
96
+ return [
97
+ models.MLHyperParamClass(name=name, value=str(value))
98
+ for name, value in params.items()
99
+ ]
100
+ return params
54
101
 
55
102
 
56
103
  def make_time_stamp(ts: Optional[datetime]) -> Optional[models.TimeStampClass]:
@@ -65,6 +112,130 @@ def parse_time_stamp(ts: Optional[models.TimeStampClass]) -> Optional[datetime]:
65
112
  return parse_ts_millis(ts.time)
66
113
 
67
114
 
115
+ class ChangeAuditStampsMixin(ABC):
116
+ """Mixin class for managing audit stamps on entities."""
117
+
118
+ __slots__ = ()
119
+
120
+ @abstractmethod
121
+ def _get_audit_stamps(self) -> models.ChangeAuditStampsClass:
122
+ """Get the audit stamps from the entity properties."""
123
+ pass
124
+
125
+ @abstractmethod
126
+ def _set_audit_stamps(self, audit_stamps: models.ChangeAuditStampsClass) -> None:
127
+ """Set the audit stamps on the entity properties."""
128
+ pass
129
+
130
+ @property
131
+ def last_modified(self) -> Optional[datetime]:
132
+ """Get the last modification timestamp from audit stamps."""
133
+ audit_stamps: models.ChangeAuditStampsClass = self._get_audit_stamps()
134
+ if audit_stamps.lastModified.time == 0:
135
+ return None
136
+ return datetime.fromtimestamp(
137
+ audit_stamps.lastModified.time / 1000
138
+ ) # supports only seconds precision
139
+
140
+ def set_last_modified(self, last_modified: datetime) -> None:
141
+ """Set the last modification timestamp in audit stamps."""
142
+ audit_stamps: models.ChangeAuditStampsClass = self._get_audit_stamps()
143
+ audit_stamps.lastModified.time = make_ts_millis(last_modified)
144
+ self._set_audit_stamps(audit_stamps)
145
+
146
+ @property
147
+ def last_modified_by(self) -> Optional[str]:
148
+ """Get the last modification actor from audit stamps."""
149
+ audit_stamps: models.ChangeAuditStampsClass = self._get_audit_stamps()
150
+ if audit_stamps.lastModified.actor == builder.UNKNOWN_USER:
151
+ return None
152
+ return audit_stamps.lastModified.actor
153
+
154
+ def set_last_modified_by(self, last_modified_by: ActorUrnOrStr) -> None:
155
+ """Set the last modification actor in audit stamps."""
156
+ if isinstance(last_modified_by, str):
157
+ last_modified_by = make_user_urn(last_modified_by)
158
+ audit_stamps: models.ChangeAuditStampsClass = self._get_audit_stamps()
159
+ audit_stamps.lastModified.actor = str(last_modified_by)
160
+ self._set_audit_stamps(audit_stamps)
161
+
162
+ @property
163
+ def created_at(self) -> Optional[datetime]:
164
+ """Get the creation timestamp from audit stamps."""
165
+ audit_stamps: models.ChangeAuditStampsClass = self._get_audit_stamps()
166
+ if audit_stamps.created.time == 0:
167
+ return None
168
+ return datetime.fromtimestamp(
169
+ audit_stamps.created.time / 1000
170
+ ) # supports only seconds precision
171
+
172
+ def set_created_at(self, created_at: datetime) -> None:
173
+ """Set the creation timestamp in audit stamps."""
174
+ audit_stamps: models.ChangeAuditStampsClass = self._get_audit_stamps()
175
+ audit_stamps.created.time = make_ts_millis(created_at)
176
+ self._set_audit_stamps(audit_stamps)
177
+
178
+ @property
179
+ def created_by(self) -> Optional[ActorUrnOrStr]:
180
+ """Get the creation actor from audit stamps."""
181
+ audit_stamps: models.ChangeAuditStampsClass = self._get_audit_stamps()
182
+ if audit_stamps.created.actor == builder.UNKNOWN_USER:
183
+ return None
184
+ return audit_stamps.created.actor
185
+
186
+ def set_created_by(self, created_by: ActorUrnOrStr) -> None:
187
+ """Set the creation actor in audit stamps."""
188
+ if isinstance(created_by, str):
189
+ created_by = make_user_urn(created_by)
190
+ audit_stamps: models.ChangeAuditStampsClass = self._get_audit_stamps()
191
+ audit_stamps.created.actor = str(created_by)
192
+ self._set_audit_stamps(audit_stamps)
193
+
194
+ @property
195
+ def deleted_on(self) -> Optional[datetime]:
196
+ """Get the deletion timestamp from audit stamps."""
197
+ audit_stamps: models.ChangeAuditStampsClass = self._get_audit_stamps()
198
+ if audit_stamps.deleted is None or audit_stamps.deleted.time == 0:
199
+ return None
200
+ return datetime.fromtimestamp(
201
+ audit_stamps.deleted.time / 1000
202
+ ) # supports only seconds precision
203
+
204
+ def set_deleted_on(self, deleted_on: datetime) -> None:
205
+ """Set the deletion timestamp in audit stamps."""
206
+ audit_stamps: models.ChangeAuditStampsClass = self._get_audit_stamps()
207
+ # Default constructor sets deleted to None
208
+ if audit_stamps.deleted is None:
209
+ audit_stamps.deleted = models.AuditStampClass(
210
+ time=0, actor=builder.UNKNOWN_USER
211
+ )
212
+ audit_stamps.deleted.time = make_ts_millis(deleted_on)
213
+ self._set_audit_stamps(audit_stamps)
214
+
215
+ @property
216
+ def deleted_by(self) -> Optional[ActorUrnOrStr]:
217
+ """Get the deletion actor from audit stamps."""
218
+ audit_stamps: models.ChangeAuditStampsClass = self._get_audit_stamps()
219
+ if (
220
+ audit_stamps.deleted is None
221
+ or audit_stamps.deleted.actor == builder.UNKNOWN_USER
222
+ ):
223
+ return None
224
+ return audit_stamps.deleted.actor
225
+
226
+ def set_deleted_by(self, deleted_by: ActorUrnOrStr) -> None:
227
+ """Set the deletion actor in audit stamps."""
228
+ if isinstance(deleted_by, str):
229
+ deleted_by = make_user_urn(deleted_by)
230
+ audit_stamps: models.ChangeAuditStampsClass = self._get_audit_stamps()
231
+ if audit_stamps.deleted is None:
232
+ audit_stamps.deleted = models.AuditStampClass(
233
+ time=0, actor=builder.UNKNOWN_USER
234
+ )
235
+ audit_stamps.deleted.actor = str(deleted_by)
236
+ self._set_audit_stamps(audit_stamps)
237
+
238
+
68
239
  class HasPlatformInstance(Entity):
69
240
  __slots__ = ()
70
241
 
@@ -134,7 +305,7 @@ OwnerInputType: TypeAlias = Union[
134
305
  Tuple[ActorUrn, OwnershipTypeType],
135
306
  models.OwnerClass,
136
307
  ]
137
- OwnersInputType: TypeAlias = List[OwnerInputType]
308
+ OwnersInputType: TypeAlias = Sequence[OwnerInputType]
138
309
 
139
310
 
140
311
  class HasOwnership(Entity):
@@ -235,7 +406,9 @@ class HasOwnership(Entity):
235
406
  # If you pass in a ContainerKey, we can use parent_key() to build the browse path.
236
407
  # If you pass in a list of urns, we'll use that as the browse path. Any non-urn strings
237
408
  # will be treated as raw ids.
238
- ParentContainerInputType: TypeAlias = Union["Container", ContainerKey, List[UrnOrStr]]
409
+ ParentContainerInputType: TypeAlias = Union[
410
+ "Container", ContainerKey, Sequence[UrnOrStr]
411
+ ]
239
412
 
240
413
 
241
414
  class HasContainer(Entity):
@@ -295,7 +468,7 @@ class HasContainer(Entity):
295
468
  )
296
469
  for entry in parsed_path
297
470
  ]
298
- elif container is not None:
471
+ elif isinstance(container, ContainerKey):
299
472
  container_urn = container.as_urn()
300
473
 
301
474
  browse_path_reversed = [container_urn]
@@ -354,7 +527,7 @@ class HasContainer(Entity):
354
527
 
355
528
 
356
529
  TagInputType: TypeAlias = Union[str, TagUrn, models.TagAssociationClass]
357
- TagsInputType: TypeAlias = List[TagInputType]
530
+ TagsInputType: TypeAlias = Sequence[TagInputType]
358
531
 
359
532
 
360
533
  class HasTags(Entity):
@@ -409,7 +582,7 @@ class HasTags(Entity):
409
582
  TermInputType: TypeAlias = Union[
410
583
  str, GlossaryTermUrn, models.GlossaryTermAssociationClass
411
584
  ]
412
- TermsInputType: TypeAlias = List[TermInputType]
585
+ TermsInputType: TypeAlias = Sequence[TermInputType]
413
586
 
414
587
 
415
588
  class HasTerms(Entity):
@@ -441,7 +614,7 @@ class HasTerms(Entity):
441
614
  def _terms_audit_stamp(self) -> models.AuditStampClass:
442
615
  return models.AuditStampClass(
443
616
  time=0,
444
- actor=_DEFAULT_ACTOR_URN,
617
+ actor=DEFAULT_ACTOR_URN,
445
618
  )
446
619
 
447
620
  def set_terms(self, terms: TermsInputType) -> None:
@@ -529,7 +702,7 @@ class HasInstitutionalMemory(Entity):
529
702
  def _institutional_memory_audit_stamp(self) -> models.AuditStampClass:
530
703
  return models.AuditStampClass(
531
704
  time=0,
532
- actor=_DEFAULT_ACTOR_URN,
705
+ actor=DEFAULT_ACTOR_URN,
533
706
  )
534
707
 
535
708
  @classmethod
@@ -578,3 +751,213 @@ class HasInstitutionalMemory(Entity):
578
751
  self._link_key,
579
752
  self._parse_link_association_class(link),
580
753
  )
754
+
755
+
756
+ class HasVersion(Entity):
757
+ """Mixin for entities that have version properties."""
758
+
759
+ def _get_version_props(self) -> Optional[models.VersionPropertiesClass]:
760
+ return self._get_aspect(models.VersionPropertiesClass)
761
+
762
+ def _ensure_version_props(self) -> models.VersionPropertiesClass:
763
+ version_props = self._get_version_props()
764
+ if version_props is None:
765
+ guid_dict = {"urn": str(self.urn)}
766
+ version_set_urn = VersionSetUrn(
767
+ id=builder.datahub_guid(guid_dict), entity_type=self.urn.ENTITY_TYPE
768
+ )
769
+
770
+ version_props = models.VersionPropertiesClass(
771
+ versionSet=str(version_set_urn),
772
+ version=models.VersionTagClass(versionTag="0.1.0"),
773
+ sortId="0000000.1.0",
774
+ )
775
+ self._set_aspect(version_props)
776
+ return version_props
777
+
778
+ @property
779
+ def version(self) -> Optional[str]:
780
+ version_props = self._get_version_props()
781
+ if version_props and version_props.version:
782
+ return version_props.version.versionTag
783
+ return None
784
+
785
+ def set_version(self, version: str) -> None:
786
+ """Set the version of the entity."""
787
+ guid_dict = {"urn": str(self.urn)}
788
+ version_set_urn = VersionSetUrn(
789
+ id=builder.datahub_guid(guid_dict), entity_type=self.urn.ENTITY_TYPE
790
+ )
791
+
792
+ version_props = self._get_version_props()
793
+ if version_props is None:
794
+ # If no version properties exist, create a new one
795
+ version_props = models.VersionPropertiesClass(
796
+ version=models.VersionTagClass(versionTag=version),
797
+ versionSet=str(version_set_urn),
798
+ sortId=version.zfill(10), # Pad with zeros for sorting
799
+ )
800
+ else:
801
+ # Update existing version properties
802
+ version_props.version = models.VersionTagClass(versionTag=version)
803
+ version_props.versionSet = str(version_set_urn)
804
+ version_props.sortId = version.zfill(10)
805
+
806
+ self._set_aspect(version_props)
807
+
808
+ @property
809
+ def version_aliases(self) -> List[str]:
810
+ version_props = self._get_version_props()
811
+ if version_props and version_props.aliases:
812
+ return [
813
+ alias.versionTag
814
+ for alias in version_props.aliases
815
+ if alias.versionTag is not None
816
+ ]
817
+ return [] # Return empty list instead of None
818
+
819
+ def set_version_aliases(self, aliases: List[str]) -> None:
820
+ version_props = self._get_aspect(models.VersionPropertiesClass)
821
+ if version_props:
822
+ version_props.aliases = [
823
+ models.VersionTagClass(versionTag=alias) for alias in aliases
824
+ ]
825
+ else:
826
+ # If no version properties exist, we need to create one with a default version
827
+ guid_dict = {"urn": str(self.urn)}
828
+ version_set_urn = VersionSetUrn(
829
+ id=builder.datahub_guid(guid_dict), entity_type=self.urn.ENTITY_TYPE
830
+ )
831
+ self._set_aspect(
832
+ models.VersionPropertiesClass(
833
+ version=models.VersionTagClass(
834
+ versionTag="0.1.0"
835
+ ), # Default version
836
+ versionSet=str(version_set_urn),
837
+ sortId="0000000.1.0",
838
+ aliases=[
839
+ models.VersionTagClass(versionTag=alias) for alias in aliases
840
+ ],
841
+ )
842
+ )
843
+
844
+ def add_version_alias(self, alias: str) -> None:
845
+ if not alias:
846
+ raise ValueError("Alias cannot be empty")
847
+ version_props = self._ensure_version_props()
848
+ if version_props.aliases is None:
849
+ version_props.aliases = []
850
+ version_props.aliases.append(models.VersionTagClass(versionTag=alias))
851
+ self._set_aspect(version_props)
852
+
853
+ def remove_version_alias(self, alias: str) -> None:
854
+ version_props = self._get_version_props()
855
+ if version_props and version_props.aliases:
856
+ version_props.aliases = [
857
+ a for a in version_props.aliases if a.versionTag != alias
858
+ ]
859
+ self._set_aspect(version_props)
860
+
861
+
862
+ class HasStructuredProperties(Entity):
863
+ """
864
+ Mixin for entities that support structured properties
865
+ """
866
+
867
+ __slots__ = ()
868
+
869
+ @property
870
+ def structured_properties(
871
+ self,
872
+ ) -> Optional[List[models.StructuredPropertyValueAssignmentClass]]:
873
+ """
874
+ Retrieve structured properties for the entity
875
+
876
+ Returns:
877
+ Optional list of structured property value assignments
878
+ """
879
+ sp_aspect = self._get_aspect(models.StructuredPropertiesClass)
880
+ return sp_aspect.properties if sp_aspect else None
881
+
882
+ def _ensure_structured_properties(self) -> models.StructuredPropertiesClass:
883
+ """
884
+ Ensure structured properties aspect exists, creating it if necessary
885
+
886
+ Returns:
887
+ StructuredPropertiesClass aspect
888
+ """
889
+ return self._setdefault_aspect(models.StructuredPropertiesClass(properties=[]))
890
+
891
+ def set_structured_property(
892
+ self,
893
+ property_urn: StructuredPropertyUrnOrStr,
894
+ values: Sequence[StructuredPropertyValueType],
895
+ ) -> None:
896
+ """
897
+ Update an existing structured property or add if it doesn't exist
898
+
899
+ Args:
900
+ property_urn: URN of the structured property
901
+ values: List of values for the property
902
+ """
903
+ # validate property_urn is a valid structured property urn
904
+ property_urn = StructuredPropertyUrn.from_string(property_urn)
905
+
906
+ properties = self._ensure_structured_properties()
907
+
908
+ # Find existing property assignment
909
+ existing_prop = next(
910
+ (
911
+ prop
912
+ for prop in properties.properties
913
+ if prop.propertyUrn == str(property_urn)
914
+ ),
915
+ None,
916
+ )
917
+ current_timestamp = make_ts_millis(datetime.now())
918
+
919
+ if existing_prop:
920
+ # Update existing property
921
+ existing_prop.values = list(values)
922
+ existing_prop.lastModified = models.AuditStampClass(
923
+ time=current_timestamp,
924
+ actor=DEFAULT_ACTOR_URN,
925
+ )
926
+ else:
927
+ # Create new property assignment
928
+ new_property = models.StructuredPropertyValueAssignmentClass(
929
+ propertyUrn=str(property_urn),
930
+ values=list(values),
931
+ created=models.AuditStampClass(
932
+ time=current_timestamp,
933
+ actor=DEFAULT_ACTOR_URN,
934
+ ),
935
+ lastModified=models.AuditStampClass(
936
+ time=current_timestamp,
937
+ actor=DEFAULT_ACTOR_URN,
938
+ ),
939
+ )
940
+ add_list_unique(
941
+ properties.properties,
942
+ key=lambda prop: prop.propertyUrn,
943
+ item=new_property,
944
+ )
945
+
946
+ self._set_aspect(properties)
947
+
948
+ def remove_structured_property(
949
+ self, property_urn: StructuredPropertyUrnOrStr
950
+ ) -> None:
951
+ """
952
+ Remove a structured property from the entity
953
+
954
+ Args:
955
+ property_urn: URN of the structured property to remove
956
+ """
957
+ remove_list_unique(
958
+ self._ensure_structured_properties().properties,
959
+ key=lambda prop: prop.propertyUrn,
960
+ item=models.StructuredPropertyValueAssignmentClass(
961
+ propertyUrn=str(property_urn), values=[]
962
+ ),
963
+ )
datahub/sdk/_utils.py CHANGED
@@ -1,6 +1,10 @@
1
1
  from typing import Any, Callable, List, Protocol, TypeVar
2
2
 
3
3
  from datahub.errors import ItemNotFoundError
4
+ from datahub.metadata.urns import CorpUserUrn
5
+
6
+ # TODO: Change __ingestion to _ingestion.
7
+ DEFAULT_ACTOR_URN = CorpUserUrn("__ingestion").urn()
4
8
 
5
9
 
6
10
  class _SupportsEq(Protocol):