acryl-datahub 1.1.1rc4__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (414) hide show
  1. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2615 -2547
  2. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +412 -338
  3. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +5 -0
  4. datahub/_version.py +1 -1
  5. datahub/api/entities/assertion/assertion.py +1 -1
  6. datahub/api/entities/common/serialized_value.py +1 -1
  7. datahub/api/entities/corpgroup/corpgroup.py +1 -1
  8. datahub/api/entities/dataproduct/dataproduct.py +32 -3
  9. datahub/api/entities/dataset/dataset.py +26 -23
  10. datahub/api/entities/external/__init__.py +0 -0
  11. datahub/api/entities/external/external_entities.py +724 -0
  12. datahub/api/entities/external/external_tag.py +147 -0
  13. datahub/api/entities/external/lake_formation_external_entites.py +162 -0
  14. datahub/api/entities/external/restricted_text.py +172 -0
  15. datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
  16. datahub/api/entities/forms/forms.py +3 -3
  17. datahub/api/entities/structuredproperties/structuredproperties.py +4 -4
  18. datahub/api/graphql/operation.py +10 -6
  19. datahub/cli/check_cli.py +88 -7
  20. datahub/cli/cli_utils.py +63 -0
  21. datahub/cli/config_utils.py +18 -10
  22. datahub/cli/container_cli.py +5 -0
  23. datahub/cli/delete_cli.py +125 -27
  24. datahub/cli/docker_check.py +110 -14
  25. datahub/cli/docker_cli.py +153 -229
  26. datahub/cli/exists_cli.py +0 -2
  27. datahub/cli/get_cli.py +0 -2
  28. datahub/cli/graphql_cli.py +1422 -0
  29. datahub/cli/iceberg_cli.py +5 -0
  30. datahub/cli/ingest_cli.py +3 -15
  31. datahub/cli/migrate.py +2 -0
  32. datahub/cli/put_cli.py +1 -4
  33. datahub/cli/quickstart_versioning.py +53 -10
  34. datahub/cli/specific/assertions_cli.py +37 -6
  35. datahub/cli/specific/datacontract_cli.py +54 -7
  36. datahub/cli/specific/dataproduct_cli.py +2 -15
  37. datahub/cli/specific/dataset_cli.py +1 -8
  38. datahub/cli/specific/forms_cli.py +0 -4
  39. datahub/cli/specific/group_cli.py +0 -2
  40. datahub/cli/specific/structuredproperties_cli.py +1 -4
  41. datahub/cli/specific/user_cli.py +172 -3
  42. datahub/cli/state_cli.py +0 -2
  43. datahub/cli/timeline_cli.py +0 -2
  44. datahub/configuration/common.py +40 -1
  45. datahub/configuration/connection_resolver.py +5 -2
  46. datahub/configuration/env_vars.py +331 -0
  47. datahub/configuration/import_resolver.py +7 -4
  48. datahub/configuration/kafka.py +21 -1
  49. datahub/configuration/pydantic_migration_helpers.py +6 -13
  50. datahub/configuration/source_common.py +3 -2
  51. datahub/configuration/validate_field_deprecation.py +5 -2
  52. datahub/configuration/validate_field_removal.py +8 -2
  53. datahub/configuration/validate_field_rename.py +6 -5
  54. datahub/configuration/validate_multiline_string.py +5 -2
  55. datahub/emitter/mce_builder.py +8 -4
  56. datahub/emitter/rest_emitter.py +103 -30
  57. datahub/entrypoints.py +6 -3
  58. datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +297 -1
  59. datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
  60. datahub/ingestion/api/decorators.py +15 -3
  61. datahub/ingestion/api/report.py +381 -3
  62. datahub/ingestion/api/sink.py +27 -2
  63. datahub/ingestion/api/source.py +165 -58
  64. datahub/ingestion/api/source_protocols.py +23 -0
  65. datahub/ingestion/autogenerated/__init__.py +0 -0
  66. datahub/ingestion/autogenerated/capability_summary.json +3652 -0
  67. datahub/ingestion/autogenerated/lineage.json +402 -0
  68. datahub/ingestion/autogenerated/lineage_helper.py +177 -0
  69. datahub/ingestion/extractor/schema_util.py +13 -4
  70. datahub/ingestion/glossary/classification_mixin.py +5 -0
  71. datahub/ingestion/graph/client.py +330 -25
  72. datahub/ingestion/graph/config.py +3 -2
  73. datahub/ingestion/graph/filters.py +30 -11
  74. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
  75. datahub/ingestion/run/pipeline.py +81 -11
  76. datahub/ingestion/run/pipeline_config.py +2 -2
  77. datahub/ingestion/sink/datahub_kafka.py +1 -0
  78. datahub/ingestion/sink/datahub_rest.py +13 -5
  79. datahub/ingestion/sink/file.py +1 -0
  80. datahub/ingestion/source/abs/config.py +1 -1
  81. datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
  82. datahub/ingestion/source/abs/source.py +15 -30
  83. datahub/ingestion/source/aws/aws_common.py +185 -13
  84. datahub/ingestion/source/aws/glue.py +517 -244
  85. datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
  86. datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
  87. datahub/ingestion/source/aws/tag_entities.py +270 -0
  88. datahub/ingestion/source/azure/azure_common.py +3 -3
  89. datahub/ingestion/source/bigquery_v2/bigquery.py +67 -24
  90. datahub/ingestion/source/bigquery_v2/bigquery_config.py +47 -19
  91. datahub/ingestion/source/bigquery_v2/bigquery_connection.py +12 -1
  92. datahub/ingestion/source/bigquery_v2/bigquery_queries.py +3 -0
  93. datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
  94. datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
  95. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
  96. datahub/ingestion/source/bigquery_v2/common.py +1 -1
  97. datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
  98. datahub/ingestion/source/bigquery_v2/queries.py +3 -3
  99. datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
  100. datahub/ingestion/source/cassandra/cassandra.py +6 -8
  101. datahub/ingestion/source/cassandra/cassandra_api.py +17 -1
  102. datahub/ingestion/source/cassandra/cassandra_config.py +5 -0
  103. datahub/ingestion/source/cassandra/cassandra_profiling.py +7 -6
  104. datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
  105. datahub/ingestion/source/common/gcp_credentials_config.py +3 -1
  106. datahub/ingestion/source/common/subtypes.py +53 -0
  107. datahub/ingestion/source/data_lake_common/data_lake_utils.py +37 -0
  108. datahub/ingestion/source/data_lake_common/object_store.py +115 -27
  109. datahub/ingestion/source/data_lake_common/path_spec.py +72 -43
  110. datahub/ingestion/source/datahub/config.py +12 -9
  111. datahub/ingestion/source/datahub/datahub_database_reader.py +26 -11
  112. datahub/ingestion/source/datahub/datahub_source.py +10 -0
  113. datahub/ingestion/source/dbt/dbt_cloud.py +16 -5
  114. datahub/ingestion/source/dbt/dbt_common.py +224 -9
  115. datahub/ingestion/source/dbt/dbt_core.py +3 -0
  116. datahub/ingestion/source/debug/__init__.py +0 -0
  117. datahub/ingestion/source/debug/datahub_debug.py +300 -0
  118. datahub/ingestion/source/delta_lake/config.py +9 -5
  119. datahub/ingestion/source/delta_lake/source.py +8 -0
  120. datahub/ingestion/source/dremio/dremio_api.py +114 -73
  121. datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
  122. datahub/ingestion/source/dremio/dremio_config.py +5 -4
  123. datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
  124. datahub/ingestion/source/dremio/dremio_source.py +132 -98
  125. datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
  126. datahub/ingestion/source/dynamodb/dynamodb.py +11 -8
  127. datahub/ingestion/source/excel/__init__.py +0 -0
  128. datahub/ingestion/source/excel/config.py +92 -0
  129. datahub/ingestion/source/excel/excel_file.py +539 -0
  130. datahub/ingestion/source/excel/profiling.py +308 -0
  131. datahub/ingestion/source/excel/report.py +49 -0
  132. datahub/ingestion/source/excel/source.py +662 -0
  133. datahub/ingestion/source/excel/util.py +18 -0
  134. datahub/ingestion/source/feast.py +8 -10
  135. datahub/ingestion/source/file.py +3 -0
  136. datahub/ingestion/source/fivetran/config.py +66 -7
  137. datahub/ingestion/source/fivetran/fivetran.py +227 -43
  138. datahub/ingestion/source/fivetran/fivetran_log_api.py +37 -8
  139. datahub/ingestion/source/fivetran/fivetran_query.py +51 -29
  140. datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
  141. datahub/ingestion/source/fivetran/response_models.py +97 -0
  142. datahub/ingestion/source/gc/datahub_gc.py +0 -2
  143. datahub/ingestion/source/gcs/gcs_source.py +32 -4
  144. datahub/ingestion/source/ge_data_profiler.py +108 -31
  145. datahub/ingestion/source/ge_profiling_config.py +26 -11
  146. datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
  147. datahub/ingestion/source/grafana/field_utils.py +307 -0
  148. datahub/ingestion/source/grafana/grafana_api.py +142 -0
  149. datahub/ingestion/source/grafana/grafana_config.py +104 -0
  150. datahub/ingestion/source/grafana/grafana_source.py +522 -84
  151. datahub/ingestion/source/grafana/lineage.py +202 -0
  152. datahub/ingestion/source/grafana/models.py +137 -0
  153. datahub/ingestion/source/grafana/report.py +90 -0
  154. datahub/ingestion/source/grafana/types.py +16 -0
  155. datahub/ingestion/source/hex/api.py +28 -1
  156. datahub/ingestion/source/hex/hex.py +16 -5
  157. datahub/ingestion/source/hex/mapper.py +16 -2
  158. datahub/ingestion/source/hex/model.py +2 -0
  159. datahub/ingestion/source/hex/query_fetcher.py +1 -1
  160. datahub/ingestion/source/iceberg/iceberg.py +123 -59
  161. datahub/ingestion/source/iceberg/iceberg_profiler.py +4 -2
  162. datahub/ingestion/source/identity/azure_ad.py +1 -1
  163. datahub/ingestion/source/identity/okta.py +1 -14
  164. datahub/ingestion/source/kafka/kafka.py +16 -0
  165. datahub/ingestion/source/kafka_connect/common.py +2 -2
  166. datahub/ingestion/source/kafka_connect/sink_connectors.py +156 -47
  167. datahub/ingestion/source/kafka_connect/source_connectors.py +62 -4
  168. datahub/ingestion/source/looker/looker_common.py +148 -79
  169. datahub/ingestion/source/looker/looker_config.py +15 -4
  170. datahub/ingestion/source/looker/looker_constant.py +4 -0
  171. datahub/ingestion/source/looker/looker_lib_wrapper.py +36 -3
  172. datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
  173. datahub/ingestion/source/looker/looker_source.py +503 -547
  174. datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
  175. datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
  176. datahub/ingestion/source/looker/lookml_config.py +31 -3
  177. datahub/ingestion/source/looker/lookml_refinement.py +1 -1
  178. datahub/ingestion/source/looker/lookml_source.py +96 -117
  179. datahub/ingestion/source/looker/view_upstream.py +494 -1
  180. datahub/ingestion/source/metabase.py +32 -6
  181. datahub/ingestion/source/metadata/business_glossary.py +7 -7
  182. datahub/ingestion/source/metadata/lineage.py +9 -9
  183. datahub/ingestion/source/mlflow.py +12 -2
  184. datahub/ingestion/source/mock_data/__init__.py +0 -0
  185. datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
  186. datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
  187. datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
  188. datahub/ingestion/source/mode.py +26 -5
  189. datahub/ingestion/source/mongodb.py +11 -1
  190. datahub/ingestion/source/neo4j/neo4j_source.py +83 -144
  191. datahub/ingestion/source/nifi.py +2 -2
  192. datahub/ingestion/source/openapi.py +1 -1
  193. datahub/ingestion/source/powerbi/config.py +47 -21
  194. datahub/ingestion/source/powerbi/m_query/data_classes.py +1 -0
  195. datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
  196. datahub/ingestion/source/powerbi/m_query/pattern_handler.py +100 -10
  197. datahub/ingestion/source/powerbi/powerbi.py +10 -6
  198. datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +0 -1
  199. datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
  200. datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
  201. datahub/ingestion/source/preset.py +3 -3
  202. datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
  203. datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
  204. datahub/ingestion/source/redash.py +1 -1
  205. datahub/ingestion/source/redshift/config.py +15 -9
  206. datahub/ingestion/source/redshift/datashares.py +1 -1
  207. datahub/ingestion/source/redshift/lineage.py +386 -687
  208. datahub/ingestion/source/redshift/query.py +23 -19
  209. datahub/ingestion/source/redshift/redshift.py +52 -111
  210. datahub/ingestion/source/redshift/redshift_schema.py +17 -12
  211. datahub/ingestion/source/redshift/report.py +0 -2
  212. datahub/ingestion/source/redshift/usage.py +6 -5
  213. datahub/ingestion/source/s3/report.py +4 -2
  214. datahub/ingestion/source/s3/source.py +449 -248
  215. datahub/ingestion/source/sac/sac.py +3 -1
  216. datahub/ingestion/source/salesforce.py +28 -13
  217. datahub/ingestion/source/schema/json_schema.py +14 -14
  218. datahub/ingestion/source/schema_inference/object.py +22 -6
  219. datahub/ingestion/source/sigma/data_classes.py +3 -0
  220. datahub/ingestion/source/sigma/sigma.py +7 -1
  221. datahub/ingestion/source/slack/slack.py +10 -16
  222. datahub/ingestion/source/snaplogic/__init__.py +0 -0
  223. datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
  224. datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
  225. datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
  226. datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
  227. datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
  228. datahub/ingestion/source/snowflake/constants.py +3 -0
  229. datahub/ingestion/source/snowflake/snowflake_config.py +76 -23
  230. datahub/ingestion/source/snowflake/snowflake_connection.py +24 -8
  231. datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +19 -6
  232. datahub/ingestion/source/snowflake/snowflake_queries.py +464 -97
  233. datahub/ingestion/source/snowflake/snowflake_query.py +77 -5
  234. datahub/ingestion/source/snowflake/snowflake_report.py +1 -2
  235. datahub/ingestion/source/snowflake/snowflake_schema.py +352 -16
  236. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +51 -10
  237. datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
  238. datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
  239. datahub/ingestion/source/snowflake/snowflake_utils.py +36 -15
  240. datahub/ingestion/source/snowflake/snowflake_v2.py +39 -4
  241. datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
  242. datahub/ingestion/source/sql/athena.py +217 -25
  243. datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
  244. datahub/ingestion/source/sql/clickhouse.py +24 -8
  245. datahub/ingestion/source/sql/cockroachdb.py +5 -4
  246. datahub/ingestion/source/sql/druid.py +2 -2
  247. datahub/ingestion/source/sql/hana.py +3 -1
  248. datahub/ingestion/source/sql/hive.py +4 -3
  249. datahub/ingestion/source/sql/hive_metastore.py +19 -20
  250. datahub/ingestion/source/sql/mariadb.py +0 -1
  251. datahub/ingestion/source/sql/mssql/job_models.py +3 -1
  252. datahub/ingestion/source/sql/mssql/source.py +336 -57
  253. datahub/ingestion/source/sql/mysql.py +154 -4
  254. datahub/ingestion/source/sql/oracle.py +5 -5
  255. datahub/ingestion/source/sql/postgres.py +142 -6
  256. datahub/ingestion/source/sql/presto.py +2 -1
  257. datahub/ingestion/source/sql/sql_common.py +281 -49
  258. datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
  259. datahub/ingestion/source/sql/sql_types.py +22 -0
  260. datahub/ingestion/source/sql/sqlalchemy_uri.py +39 -7
  261. datahub/ingestion/source/sql/teradata.py +1028 -245
  262. datahub/ingestion/source/sql/trino.py +11 -1
  263. datahub/ingestion/source/sql/two_tier_sql_source.py +2 -3
  264. datahub/ingestion/source/sql/vertica.py +14 -7
  265. datahub/ingestion/source/sql_queries.py +219 -121
  266. datahub/ingestion/source/state/checkpoint.py +8 -29
  267. datahub/ingestion/source/state/entity_removal_state.py +5 -2
  268. datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
  269. datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
  270. datahub/ingestion/source/superset.py +314 -67
  271. datahub/ingestion/source/tableau/tableau.py +135 -59
  272. datahub/ingestion/source/tableau/tableau_common.py +9 -2
  273. datahub/ingestion/source/tableau/tableau_constant.py +1 -4
  274. datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
  275. datahub/ingestion/source/unity/config.py +160 -40
  276. datahub/ingestion/source/unity/connection.py +61 -0
  277. datahub/ingestion/source/unity/connection_test.py +1 -0
  278. datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
  279. datahub/ingestion/source/unity/proxy.py +794 -51
  280. datahub/ingestion/source/unity/proxy_patch.py +321 -0
  281. datahub/ingestion/source/unity/proxy_types.py +36 -2
  282. datahub/ingestion/source/unity/report.py +15 -3
  283. datahub/ingestion/source/unity/source.py +465 -131
  284. datahub/ingestion/source/unity/tag_entities.py +197 -0
  285. datahub/ingestion/source/unity/usage.py +46 -4
  286. datahub/ingestion/source/usage/clickhouse_usage.py +4 -1
  287. datahub/ingestion/source/usage/starburst_trino_usage.py +5 -2
  288. datahub/ingestion/source/usage/usage_common.py +4 -3
  289. datahub/ingestion/source/vertexai/vertexai.py +1 -1
  290. datahub/ingestion/source_config/pulsar.py +3 -1
  291. datahub/ingestion/source_report/ingestion_stage.py +50 -11
  292. datahub/ingestion/transformer/add_dataset_ownership.py +18 -2
  293. datahub/ingestion/transformer/base_transformer.py +8 -5
  294. datahub/ingestion/transformer/set_browse_path.py +112 -0
  295. datahub/integrations/assertion/snowflake/compiler.py +4 -3
  296. datahub/metadata/_internal_schema_classes.py +6806 -4871
  297. datahub/metadata/_urns/urn_defs.py +1767 -1539
  298. datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
  299. datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
  300. datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
  301. datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
  302. datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
  303. datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +6 -0
  304. datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
  305. datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
  306. datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
  307. datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
  308. datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
  309. datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
  310. datahub/metadata/schema.avsc +18395 -16979
  311. datahub/metadata/schemas/Actors.avsc +38 -1
  312. datahub/metadata/schemas/ApplicationKey.avsc +31 -0
  313. datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
  314. datahub/metadata/schemas/Applications.avsc +38 -0
  315. datahub/metadata/schemas/AssetSettings.avsc +63 -0
  316. datahub/metadata/schemas/ChartInfo.avsc +2 -1
  317. datahub/metadata/schemas/ChartKey.avsc +1 -0
  318. datahub/metadata/schemas/ContainerKey.avsc +1 -0
  319. datahub/metadata/schemas/ContainerProperties.avsc +8 -0
  320. datahub/metadata/schemas/CorpUserEditableInfo.avsc +1 -1
  321. datahub/metadata/schemas/CorpUserSettings.avsc +50 -0
  322. datahub/metadata/schemas/DashboardKey.avsc +1 -0
  323. datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
  324. datahub/metadata/schemas/DataFlowKey.avsc +1 -0
  325. datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
  326. datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
  327. datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
  328. datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
  329. datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
  330. datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
  331. datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
  332. datahub/metadata/schemas/DataJobInfo.avsc +8 -0
  333. datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
  334. datahub/metadata/schemas/DataJobKey.avsc +1 -0
  335. datahub/metadata/schemas/DataProcessKey.avsc +8 -0
  336. datahub/metadata/schemas/DataProductKey.avsc +3 -1
  337. datahub/metadata/schemas/DataProductProperties.avsc +1 -1
  338. datahub/metadata/schemas/DatasetKey.avsc +11 -1
  339. datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
  340. datahub/metadata/schemas/DomainKey.avsc +2 -1
  341. datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
  342. datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
  343. datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
  344. datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
  345. datahub/metadata/schemas/IncidentInfo.avsc +3 -3
  346. datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
  347. datahub/metadata/schemas/LogicalParent.avsc +145 -0
  348. datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
  349. datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
  350. datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
  351. datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
  352. datahub/metadata/schemas/MLModelKey.avsc +9 -0
  353. datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
  354. datahub/metadata/schemas/MetadataChangeEvent.avsc +151 -47
  355. datahub/metadata/schemas/MetadataChangeLog.avsc +62 -44
  356. datahub/metadata/schemas/MetadataChangeProposal.avsc +61 -0
  357. datahub/metadata/schemas/NotebookKey.avsc +1 -0
  358. datahub/metadata/schemas/Operation.avsc +4 -2
  359. datahub/metadata/schemas/Ownership.avsc +69 -0
  360. datahub/metadata/schemas/QuerySubjects.avsc +1 -12
  361. datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
  362. datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
  363. datahub/metadata/schemas/StructuredProperties.avsc +69 -0
  364. datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
  365. datahub/metadata/schemas/SystemMetadata.avsc +61 -0
  366. datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
  367. datahub/sdk/__init__.py +2 -0
  368. datahub/sdk/_all_entities.py +7 -0
  369. datahub/sdk/_shared.py +249 -5
  370. datahub/sdk/chart.py +386 -0
  371. datahub/sdk/container.py +7 -0
  372. datahub/sdk/dashboard.py +453 -0
  373. datahub/sdk/dataflow.py +7 -0
  374. datahub/sdk/datajob.py +45 -13
  375. datahub/sdk/dataset.py +56 -2
  376. datahub/sdk/entity_client.py +111 -9
  377. datahub/sdk/lineage_client.py +663 -82
  378. datahub/sdk/main_client.py +50 -16
  379. datahub/sdk/mlmodel.py +120 -38
  380. datahub/sdk/mlmodelgroup.py +7 -0
  381. datahub/sdk/search_client.py +7 -3
  382. datahub/sdk/search_filters.py +304 -36
  383. datahub/secret/datahub_secret_store.py +3 -0
  384. datahub/secret/environment_secret_store.py +29 -0
  385. datahub/secret/file_secret_store.py +49 -0
  386. datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
  387. datahub/specific/aspect_helpers/siblings.py +73 -0
  388. datahub/specific/aspect_helpers/structured_properties.py +27 -0
  389. datahub/specific/chart.py +1 -1
  390. datahub/specific/datajob.py +15 -1
  391. datahub/specific/dataproduct.py +4 -0
  392. datahub/specific/dataset.py +39 -59
  393. datahub/sql_parsing/split_statements.py +13 -0
  394. datahub/sql_parsing/sql_parsing_aggregator.py +70 -26
  395. datahub/sql_parsing/sqlglot_lineage.py +196 -42
  396. datahub/sql_parsing/sqlglot_utils.py +12 -4
  397. datahub/sql_parsing/tool_meta_extractor.py +1 -3
  398. datahub/telemetry/telemetry.py +28 -14
  399. datahub/testing/sdk_v2_helpers.py +7 -1
  400. datahub/upgrade/upgrade.py +73 -17
  401. datahub/utilities/file_backed_collections.py +8 -9
  402. datahub/utilities/is_pytest.py +3 -2
  403. datahub/utilities/logging_manager.py +22 -6
  404. datahub/utilities/mapping.py +29 -2
  405. datahub/utilities/sample_data.py +5 -4
  406. datahub/utilities/server_config_util.py +10 -1
  407. datahub/utilities/sqlalchemy_query_combiner.py +5 -2
  408. datahub/utilities/stats_collections.py +4 -0
  409. datahub/utilities/urns/urn.py +41 -2
  410. datahub/emitter/sql_parsing_builder.py +0 -306
  411. datahub/ingestion/source/redshift/lineage_v2.py +0 -466
  412. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +0 -0
  413. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/licenses/LICENSE +0 -0
  414. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
datahub/sdk/_shared.py CHANGED
@@ -1,6 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import warnings
4
+ from abc import ABC, abstractmethod
4
5
  from datetime import datetime
5
6
  from typing import (
6
7
  TYPE_CHECKING,
@@ -26,9 +27,11 @@ from datahub.emitter.mce_builder import (
26
27
  from datahub.emitter.mcp_builder import ContainerKey
27
28
  from datahub.errors import MultipleSubtypesWarning, SdkUsageError
28
29
  from datahub.metadata.urns import (
30
+ ChartUrn,
29
31
  ContainerUrn,
30
32
  CorpGroupUrn,
31
33
  CorpUserUrn,
34
+ DashboardUrn,
32
35
  DataFlowUrn,
33
36
  DataJobUrn,
34
37
  DataPlatformInstanceUrn,
@@ -38,6 +41,7 @@ from datahub.metadata.urns import (
38
41
  DomainUrn,
39
42
  GlossaryTermUrn,
40
43
  OwnershipTypeUrn,
44
+ StructuredPropertyUrn,
41
45
  TagUrn,
42
46
  Urn,
43
47
  VersionSetUrn,
@@ -49,11 +53,21 @@ from datahub.utilities.urns.error import InvalidUrnError
49
53
  if TYPE_CHECKING:
50
54
  from datahub.sdk.container import Container
51
55
  UrnOrStr: TypeAlias = Union[Urn, str]
56
+ ChartUrnOrStr: TypeAlias = Union[str, ChartUrn]
52
57
  DatasetUrnOrStr: TypeAlias = Union[str, DatasetUrn]
53
58
  DatajobUrnOrStr: TypeAlias = Union[str, DataJobUrn]
54
59
  DataflowUrnOrStr: TypeAlias = Union[str, DataFlowUrn]
60
+ DashboardUrnOrStr: TypeAlias = Union[str, DashboardUrn]
61
+ DataPlatformInstanceUrnOrStr: TypeAlias = Union[str, DataPlatformInstanceUrn]
62
+ DataPlatformUrnOrStr: TypeAlias = Union[str, DataPlatformUrn]
55
63
 
56
64
  ActorUrn: TypeAlias = Union[CorpUserUrn, CorpGroupUrn]
65
+ ActorUrnOrStr: TypeAlias = Union[str, ActorUrn]
66
+ StructuredPropertyUrnOrStr: TypeAlias = Union[str, StructuredPropertyUrn]
67
+ StructuredPropertyValueType: TypeAlias = Union[str, float, int]
68
+ StructuredPropertyInputType: TypeAlias = Dict[
69
+ StructuredPropertyUrnOrStr, Sequence[StructuredPropertyValueType]
70
+ ]
57
71
 
58
72
  TrainingMetricsInputType: TypeAlias = Union[
59
73
  List[models.MLMetricClass], Dict[str, Optional[str]]
@@ -98,6 +112,130 @@ def parse_time_stamp(ts: Optional[models.TimeStampClass]) -> Optional[datetime]:
98
112
  return parse_ts_millis(ts.time)
99
113
 
100
114
 
115
+ class ChangeAuditStampsMixin(ABC):
116
+ """Mixin class for managing audit stamps on entities."""
117
+
118
+ __slots__ = ()
119
+
120
+ @abstractmethod
121
+ def _get_audit_stamps(self) -> models.ChangeAuditStampsClass:
122
+ """Get the audit stamps from the entity properties."""
123
+ pass
124
+
125
+ @abstractmethod
126
+ def _set_audit_stamps(self, audit_stamps: models.ChangeAuditStampsClass) -> None:
127
+ """Set the audit stamps on the entity properties."""
128
+ pass
129
+
130
+ @property
131
+ def last_modified(self) -> Optional[datetime]:
132
+ """Get the last modification timestamp from audit stamps."""
133
+ audit_stamps: models.ChangeAuditStampsClass = self._get_audit_stamps()
134
+ if audit_stamps.lastModified.time == 0:
135
+ return None
136
+ return datetime.fromtimestamp(
137
+ audit_stamps.lastModified.time / 1000
138
+ ) # supports only seconds precision
139
+
140
+ def set_last_modified(self, last_modified: datetime) -> None:
141
+ """Set the last modification timestamp in audit stamps."""
142
+ audit_stamps: models.ChangeAuditStampsClass = self._get_audit_stamps()
143
+ audit_stamps.lastModified.time = make_ts_millis(last_modified)
144
+ self._set_audit_stamps(audit_stamps)
145
+
146
+ @property
147
+ def last_modified_by(self) -> Optional[str]:
148
+ """Get the last modification actor from audit stamps."""
149
+ audit_stamps: models.ChangeAuditStampsClass = self._get_audit_stamps()
150
+ if audit_stamps.lastModified.actor == builder.UNKNOWN_USER:
151
+ return None
152
+ return audit_stamps.lastModified.actor
153
+
154
+ def set_last_modified_by(self, last_modified_by: ActorUrnOrStr) -> None:
155
+ """Set the last modification actor in audit stamps."""
156
+ if isinstance(last_modified_by, str):
157
+ last_modified_by = make_user_urn(last_modified_by)
158
+ audit_stamps: models.ChangeAuditStampsClass = self._get_audit_stamps()
159
+ audit_stamps.lastModified.actor = str(last_modified_by)
160
+ self._set_audit_stamps(audit_stamps)
161
+
162
+ @property
163
+ def created_at(self) -> Optional[datetime]:
164
+ """Get the creation timestamp from audit stamps."""
165
+ audit_stamps: models.ChangeAuditStampsClass = self._get_audit_stamps()
166
+ if audit_stamps.created.time == 0:
167
+ return None
168
+ return datetime.fromtimestamp(
169
+ audit_stamps.created.time / 1000
170
+ ) # supports only seconds precision
171
+
172
+ def set_created_at(self, created_at: datetime) -> None:
173
+ """Set the creation timestamp in audit stamps."""
174
+ audit_stamps: models.ChangeAuditStampsClass = self._get_audit_stamps()
175
+ audit_stamps.created.time = make_ts_millis(created_at)
176
+ self._set_audit_stamps(audit_stamps)
177
+
178
+ @property
179
+ def created_by(self) -> Optional[ActorUrnOrStr]:
180
+ """Get the creation actor from audit stamps."""
181
+ audit_stamps: models.ChangeAuditStampsClass = self._get_audit_stamps()
182
+ if audit_stamps.created.actor == builder.UNKNOWN_USER:
183
+ return None
184
+ return audit_stamps.created.actor
185
+
186
+ def set_created_by(self, created_by: ActorUrnOrStr) -> None:
187
+ """Set the creation actor in audit stamps."""
188
+ if isinstance(created_by, str):
189
+ created_by = make_user_urn(created_by)
190
+ audit_stamps: models.ChangeAuditStampsClass = self._get_audit_stamps()
191
+ audit_stamps.created.actor = str(created_by)
192
+ self._set_audit_stamps(audit_stamps)
193
+
194
+ @property
195
+ def deleted_on(self) -> Optional[datetime]:
196
+ """Get the deletion timestamp from audit stamps."""
197
+ audit_stamps: models.ChangeAuditStampsClass = self._get_audit_stamps()
198
+ if audit_stamps.deleted is None or audit_stamps.deleted.time == 0:
199
+ return None
200
+ return datetime.fromtimestamp(
201
+ audit_stamps.deleted.time / 1000
202
+ ) # supports only seconds precision
203
+
204
+ def set_deleted_on(self, deleted_on: datetime) -> None:
205
+ """Set the deletion timestamp in audit stamps."""
206
+ audit_stamps: models.ChangeAuditStampsClass = self._get_audit_stamps()
207
+ # Default constructor sets deleted to None
208
+ if audit_stamps.deleted is None:
209
+ audit_stamps.deleted = models.AuditStampClass(
210
+ time=0, actor=builder.UNKNOWN_USER
211
+ )
212
+ audit_stamps.deleted.time = make_ts_millis(deleted_on)
213
+ self._set_audit_stamps(audit_stamps)
214
+
215
+ @property
216
+ def deleted_by(self) -> Optional[ActorUrnOrStr]:
217
+ """Get the deletion actor from audit stamps."""
218
+ audit_stamps: models.ChangeAuditStampsClass = self._get_audit_stamps()
219
+ if (
220
+ audit_stamps.deleted is None
221
+ or audit_stamps.deleted.actor == builder.UNKNOWN_USER
222
+ ):
223
+ return None
224
+ return audit_stamps.deleted.actor
225
+
226
+ def set_deleted_by(self, deleted_by: ActorUrnOrStr) -> None:
227
+ """Set the deletion actor in audit stamps."""
228
+ if isinstance(deleted_by, str):
229
+ deleted_by = make_user_urn(deleted_by)
230
+ audit_stamps: models.ChangeAuditStampsClass = self._get_audit_stamps()
231
+ if audit_stamps.deleted is None:
232
+ audit_stamps.deleted = models.AuditStampClass(
233
+ time=0, actor=builder.UNKNOWN_USER
234
+ )
235
+ audit_stamps.deleted.actor = str(deleted_by)
236
+ self._set_audit_stamps(audit_stamps)
237
+
238
+
101
239
  class HasPlatformInstance(Entity):
102
240
  __slots__ = ()
103
241
 
@@ -167,7 +305,7 @@ OwnerInputType: TypeAlias = Union[
167
305
  Tuple[ActorUrn, OwnershipTypeType],
168
306
  models.OwnerClass,
169
307
  ]
170
- OwnersInputType: TypeAlias = List[OwnerInputType]
308
+ OwnersInputType: TypeAlias = Sequence[OwnerInputType]
171
309
 
172
310
 
173
311
  class HasOwnership(Entity):
@@ -268,7 +406,9 @@ class HasOwnership(Entity):
268
406
  # If you pass in a ContainerKey, we can use parent_key() to build the browse path.
269
407
  # If you pass in a list of urns, we'll use that as the browse path. Any non-urn strings
270
408
  # will be treated as raw ids.
271
- ParentContainerInputType: TypeAlias = Union["Container", ContainerKey, List[UrnOrStr]]
409
+ ParentContainerInputType: TypeAlias = Union[
410
+ "Container", ContainerKey, Sequence[UrnOrStr]
411
+ ]
272
412
 
273
413
 
274
414
  class HasContainer(Entity):
@@ -328,7 +468,7 @@ class HasContainer(Entity):
328
468
  )
329
469
  for entry in parsed_path
330
470
  ]
331
- elif container is not None:
471
+ elif isinstance(container, ContainerKey):
332
472
  container_urn = container.as_urn()
333
473
 
334
474
  browse_path_reversed = [container_urn]
@@ -387,7 +527,7 @@ class HasContainer(Entity):
387
527
 
388
528
 
389
529
  TagInputType: TypeAlias = Union[str, TagUrn, models.TagAssociationClass]
390
- TagsInputType: TypeAlias = List[TagInputType]
530
+ TagsInputType: TypeAlias = Sequence[TagInputType]
391
531
 
392
532
 
393
533
  class HasTags(Entity):
@@ -442,7 +582,7 @@ class HasTags(Entity):
442
582
  TermInputType: TypeAlias = Union[
443
583
  str, GlossaryTermUrn, models.GlossaryTermAssociationClass
444
584
  ]
445
- TermsInputType: TypeAlias = List[TermInputType]
585
+ TermsInputType: TypeAlias = Sequence[TermInputType]
446
586
 
447
587
 
448
588
  class HasTerms(Entity):
@@ -717,3 +857,107 @@ class HasVersion(Entity):
717
857
  a for a in version_props.aliases if a.versionTag != alias
718
858
  ]
719
859
  self._set_aspect(version_props)
860
+
861
+
862
+ class HasStructuredProperties(Entity):
863
+ """
864
+ Mixin for entities that support structured properties
865
+ """
866
+
867
+ __slots__ = ()
868
+
869
+ @property
870
+ def structured_properties(
871
+ self,
872
+ ) -> Optional[List[models.StructuredPropertyValueAssignmentClass]]:
873
+ """
874
+ Retrieve structured properties for the entity
875
+
876
+ Returns:
877
+ Optional list of structured property value assignments
878
+ """
879
+ sp_aspect = self._get_aspect(models.StructuredPropertiesClass)
880
+ return sp_aspect.properties if sp_aspect else None
881
+
882
+ def _ensure_structured_properties(self) -> models.StructuredPropertiesClass:
883
+ """
884
+ Ensure structured properties aspect exists, creating it if necessary
885
+
886
+ Returns:
887
+ StructuredPropertiesClass aspect
888
+ """
889
+ return self._setdefault_aspect(models.StructuredPropertiesClass(properties=[]))
890
+
891
+ def set_structured_property(
892
+ self,
893
+ property_urn: StructuredPropertyUrnOrStr,
894
+ values: Sequence[StructuredPropertyValueType],
895
+ ) -> None:
896
+ """
897
+ Update an existing structured property or add if it doesn't exist
898
+
899
+ Args:
900
+ property_urn: URN of the structured property
901
+ values: List of values for the property
902
+ """
903
+ # validate property_urn is a valid structured property urn
904
+ property_urn = StructuredPropertyUrn.from_string(property_urn)
905
+
906
+ properties = self._ensure_structured_properties()
907
+
908
+ # Find existing property assignment
909
+ existing_prop = next(
910
+ (
911
+ prop
912
+ for prop in properties.properties
913
+ if prop.propertyUrn == str(property_urn)
914
+ ),
915
+ None,
916
+ )
917
+ current_timestamp = make_ts_millis(datetime.now())
918
+
919
+ if existing_prop:
920
+ # Update existing property
921
+ existing_prop.values = list(values)
922
+ existing_prop.lastModified = models.AuditStampClass(
923
+ time=current_timestamp,
924
+ actor=DEFAULT_ACTOR_URN,
925
+ )
926
+ else:
927
+ # Create new property assignment
928
+ new_property = models.StructuredPropertyValueAssignmentClass(
929
+ propertyUrn=str(property_urn),
930
+ values=list(values),
931
+ created=models.AuditStampClass(
932
+ time=current_timestamp,
933
+ actor=DEFAULT_ACTOR_URN,
934
+ ),
935
+ lastModified=models.AuditStampClass(
936
+ time=current_timestamp,
937
+ actor=DEFAULT_ACTOR_URN,
938
+ ),
939
+ )
940
+ add_list_unique(
941
+ properties.properties,
942
+ key=lambda prop: prop.propertyUrn,
943
+ item=new_property,
944
+ )
945
+
946
+ self._set_aspect(properties)
947
+
948
+ def remove_structured_property(
949
+ self, property_urn: StructuredPropertyUrnOrStr
950
+ ) -> None:
951
+ """
952
+ Remove a structured property from the entity
953
+
954
+ Args:
955
+ property_urn: URN of the structured property to remove
956
+ """
957
+ remove_list_unique(
958
+ self._ensure_structured_properties().properties,
959
+ key=lambda prop: prop.propertyUrn,
960
+ item=models.StructuredPropertyValueAssignmentClass(
961
+ propertyUrn=str(property_urn), values=[]
962
+ ),
963
+ )