acryl-datahub 1.1.1rc4__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (414) hide show
  1. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2615 -2547
  2. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +412 -338
  3. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +5 -0
  4. datahub/_version.py +1 -1
  5. datahub/api/entities/assertion/assertion.py +1 -1
  6. datahub/api/entities/common/serialized_value.py +1 -1
  7. datahub/api/entities/corpgroup/corpgroup.py +1 -1
  8. datahub/api/entities/dataproduct/dataproduct.py +32 -3
  9. datahub/api/entities/dataset/dataset.py +26 -23
  10. datahub/api/entities/external/__init__.py +0 -0
  11. datahub/api/entities/external/external_entities.py +724 -0
  12. datahub/api/entities/external/external_tag.py +147 -0
  13. datahub/api/entities/external/lake_formation_external_entites.py +162 -0
  14. datahub/api/entities/external/restricted_text.py +172 -0
  15. datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
  16. datahub/api/entities/forms/forms.py +3 -3
  17. datahub/api/entities/structuredproperties/structuredproperties.py +4 -4
  18. datahub/api/graphql/operation.py +10 -6
  19. datahub/cli/check_cli.py +88 -7
  20. datahub/cli/cli_utils.py +63 -0
  21. datahub/cli/config_utils.py +18 -10
  22. datahub/cli/container_cli.py +5 -0
  23. datahub/cli/delete_cli.py +125 -27
  24. datahub/cli/docker_check.py +110 -14
  25. datahub/cli/docker_cli.py +153 -229
  26. datahub/cli/exists_cli.py +0 -2
  27. datahub/cli/get_cli.py +0 -2
  28. datahub/cli/graphql_cli.py +1422 -0
  29. datahub/cli/iceberg_cli.py +5 -0
  30. datahub/cli/ingest_cli.py +3 -15
  31. datahub/cli/migrate.py +2 -0
  32. datahub/cli/put_cli.py +1 -4
  33. datahub/cli/quickstart_versioning.py +53 -10
  34. datahub/cli/specific/assertions_cli.py +37 -6
  35. datahub/cli/specific/datacontract_cli.py +54 -7
  36. datahub/cli/specific/dataproduct_cli.py +2 -15
  37. datahub/cli/specific/dataset_cli.py +1 -8
  38. datahub/cli/specific/forms_cli.py +0 -4
  39. datahub/cli/specific/group_cli.py +0 -2
  40. datahub/cli/specific/structuredproperties_cli.py +1 -4
  41. datahub/cli/specific/user_cli.py +172 -3
  42. datahub/cli/state_cli.py +0 -2
  43. datahub/cli/timeline_cli.py +0 -2
  44. datahub/configuration/common.py +40 -1
  45. datahub/configuration/connection_resolver.py +5 -2
  46. datahub/configuration/env_vars.py +331 -0
  47. datahub/configuration/import_resolver.py +7 -4
  48. datahub/configuration/kafka.py +21 -1
  49. datahub/configuration/pydantic_migration_helpers.py +6 -13
  50. datahub/configuration/source_common.py +3 -2
  51. datahub/configuration/validate_field_deprecation.py +5 -2
  52. datahub/configuration/validate_field_removal.py +8 -2
  53. datahub/configuration/validate_field_rename.py +6 -5
  54. datahub/configuration/validate_multiline_string.py +5 -2
  55. datahub/emitter/mce_builder.py +8 -4
  56. datahub/emitter/rest_emitter.py +103 -30
  57. datahub/entrypoints.py +6 -3
  58. datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +297 -1
  59. datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
  60. datahub/ingestion/api/decorators.py +15 -3
  61. datahub/ingestion/api/report.py +381 -3
  62. datahub/ingestion/api/sink.py +27 -2
  63. datahub/ingestion/api/source.py +165 -58
  64. datahub/ingestion/api/source_protocols.py +23 -0
  65. datahub/ingestion/autogenerated/__init__.py +0 -0
  66. datahub/ingestion/autogenerated/capability_summary.json +3652 -0
  67. datahub/ingestion/autogenerated/lineage.json +402 -0
  68. datahub/ingestion/autogenerated/lineage_helper.py +177 -0
  69. datahub/ingestion/extractor/schema_util.py +13 -4
  70. datahub/ingestion/glossary/classification_mixin.py +5 -0
  71. datahub/ingestion/graph/client.py +330 -25
  72. datahub/ingestion/graph/config.py +3 -2
  73. datahub/ingestion/graph/filters.py +30 -11
  74. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
  75. datahub/ingestion/run/pipeline.py +81 -11
  76. datahub/ingestion/run/pipeline_config.py +2 -2
  77. datahub/ingestion/sink/datahub_kafka.py +1 -0
  78. datahub/ingestion/sink/datahub_rest.py +13 -5
  79. datahub/ingestion/sink/file.py +1 -0
  80. datahub/ingestion/source/abs/config.py +1 -1
  81. datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
  82. datahub/ingestion/source/abs/source.py +15 -30
  83. datahub/ingestion/source/aws/aws_common.py +185 -13
  84. datahub/ingestion/source/aws/glue.py +517 -244
  85. datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
  86. datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
  87. datahub/ingestion/source/aws/tag_entities.py +270 -0
  88. datahub/ingestion/source/azure/azure_common.py +3 -3
  89. datahub/ingestion/source/bigquery_v2/bigquery.py +67 -24
  90. datahub/ingestion/source/bigquery_v2/bigquery_config.py +47 -19
  91. datahub/ingestion/source/bigquery_v2/bigquery_connection.py +12 -1
  92. datahub/ingestion/source/bigquery_v2/bigquery_queries.py +3 -0
  93. datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
  94. datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
  95. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
  96. datahub/ingestion/source/bigquery_v2/common.py +1 -1
  97. datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
  98. datahub/ingestion/source/bigquery_v2/queries.py +3 -3
  99. datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
  100. datahub/ingestion/source/cassandra/cassandra.py +6 -8
  101. datahub/ingestion/source/cassandra/cassandra_api.py +17 -1
  102. datahub/ingestion/source/cassandra/cassandra_config.py +5 -0
  103. datahub/ingestion/source/cassandra/cassandra_profiling.py +7 -6
  104. datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
  105. datahub/ingestion/source/common/gcp_credentials_config.py +3 -1
  106. datahub/ingestion/source/common/subtypes.py +53 -0
  107. datahub/ingestion/source/data_lake_common/data_lake_utils.py +37 -0
  108. datahub/ingestion/source/data_lake_common/object_store.py +115 -27
  109. datahub/ingestion/source/data_lake_common/path_spec.py +72 -43
  110. datahub/ingestion/source/datahub/config.py +12 -9
  111. datahub/ingestion/source/datahub/datahub_database_reader.py +26 -11
  112. datahub/ingestion/source/datahub/datahub_source.py +10 -0
  113. datahub/ingestion/source/dbt/dbt_cloud.py +16 -5
  114. datahub/ingestion/source/dbt/dbt_common.py +224 -9
  115. datahub/ingestion/source/dbt/dbt_core.py +3 -0
  116. datahub/ingestion/source/debug/__init__.py +0 -0
  117. datahub/ingestion/source/debug/datahub_debug.py +300 -0
  118. datahub/ingestion/source/delta_lake/config.py +9 -5
  119. datahub/ingestion/source/delta_lake/source.py +8 -0
  120. datahub/ingestion/source/dremio/dremio_api.py +114 -73
  121. datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
  122. datahub/ingestion/source/dremio/dremio_config.py +5 -4
  123. datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
  124. datahub/ingestion/source/dremio/dremio_source.py +132 -98
  125. datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
  126. datahub/ingestion/source/dynamodb/dynamodb.py +11 -8
  127. datahub/ingestion/source/excel/__init__.py +0 -0
  128. datahub/ingestion/source/excel/config.py +92 -0
  129. datahub/ingestion/source/excel/excel_file.py +539 -0
  130. datahub/ingestion/source/excel/profiling.py +308 -0
  131. datahub/ingestion/source/excel/report.py +49 -0
  132. datahub/ingestion/source/excel/source.py +662 -0
  133. datahub/ingestion/source/excel/util.py +18 -0
  134. datahub/ingestion/source/feast.py +8 -10
  135. datahub/ingestion/source/file.py +3 -0
  136. datahub/ingestion/source/fivetran/config.py +66 -7
  137. datahub/ingestion/source/fivetran/fivetran.py +227 -43
  138. datahub/ingestion/source/fivetran/fivetran_log_api.py +37 -8
  139. datahub/ingestion/source/fivetran/fivetran_query.py +51 -29
  140. datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
  141. datahub/ingestion/source/fivetran/response_models.py +97 -0
  142. datahub/ingestion/source/gc/datahub_gc.py +0 -2
  143. datahub/ingestion/source/gcs/gcs_source.py +32 -4
  144. datahub/ingestion/source/ge_data_profiler.py +108 -31
  145. datahub/ingestion/source/ge_profiling_config.py +26 -11
  146. datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
  147. datahub/ingestion/source/grafana/field_utils.py +307 -0
  148. datahub/ingestion/source/grafana/grafana_api.py +142 -0
  149. datahub/ingestion/source/grafana/grafana_config.py +104 -0
  150. datahub/ingestion/source/grafana/grafana_source.py +522 -84
  151. datahub/ingestion/source/grafana/lineage.py +202 -0
  152. datahub/ingestion/source/grafana/models.py +137 -0
  153. datahub/ingestion/source/grafana/report.py +90 -0
  154. datahub/ingestion/source/grafana/types.py +16 -0
  155. datahub/ingestion/source/hex/api.py +28 -1
  156. datahub/ingestion/source/hex/hex.py +16 -5
  157. datahub/ingestion/source/hex/mapper.py +16 -2
  158. datahub/ingestion/source/hex/model.py +2 -0
  159. datahub/ingestion/source/hex/query_fetcher.py +1 -1
  160. datahub/ingestion/source/iceberg/iceberg.py +123 -59
  161. datahub/ingestion/source/iceberg/iceberg_profiler.py +4 -2
  162. datahub/ingestion/source/identity/azure_ad.py +1 -1
  163. datahub/ingestion/source/identity/okta.py +1 -14
  164. datahub/ingestion/source/kafka/kafka.py +16 -0
  165. datahub/ingestion/source/kafka_connect/common.py +2 -2
  166. datahub/ingestion/source/kafka_connect/sink_connectors.py +156 -47
  167. datahub/ingestion/source/kafka_connect/source_connectors.py +62 -4
  168. datahub/ingestion/source/looker/looker_common.py +148 -79
  169. datahub/ingestion/source/looker/looker_config.py +15 -4
  170. datahub/ingestion/source/looker/looker_constant.py +4 -0
  171. datahub/ingestion/source/looker/looker_lib_wrapper.py +36 -3
  172. datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
  173. datahub/ingestion/source/looker/looker_source.py +503 -547
  174. datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
  175. datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
  176. datahub/ingestion/source/looker/lookml_config.py +31 -3
  177. datahub/ingestion/source/looker/lookml_refinement.py +1 -1
  178. datahub/ingestion/source/looker/lookml_source.py +96 -117
  179. datahub/ingestion/source/looker/view_upstream.py +494 -1
  180. datahub/ingestion/source/metabase.py +32 -6
  181. datahub/ingestion/source/metadata/business_glossary.py +7 -7
  182. datahub/ingestion/source/metadata/lineage.py +9 -9
  183. datahub/ingestion/source/mlflow.py +12 -2
  184. datahub/ingestion/source/mock_data/__init__.py +0 -0
  185. datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
  186. datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
  187. datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
  188. datahub/ingestion/source/mode.py +26 -5
  189. datahub/ingestion/source/mongodb.py +11 -1
  190. datahub/ingestion/source/neo4j/neo4j_source.py +83 -144
  191. datahub/ingestion/source/nifi.py +2 -2
  192. datahub/ingestion/source/openapi.py +1 -1
  193. datahub/ingestion/source/powerbi/config.py +47 -21
  194. datahub/ingestion/source/powerbi/m_query/data_classes.py +1 -0
  195. datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
  196. datahub/ingestion/source/powerbi/m_query/pattern_handler.py +100 -10
  197. datahub/ingestion/source/powerbi/powerbi.py +10 -6
  198. datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +0 -1
  199. datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
  200. datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
  201. datahub/ingestion/source/preset.py +3 -3
  202. datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
  203. datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
  204. datahub/ingestion/source/redash.py +1 -1
  205. datahub/ingestion/source/redshift/config.py +15 -9
  206. datahub/ingestion/source/redshift/datashares.py +1 -1
  207. datahub/ingestion/source/redshift/lineage.py +386 -687
  208. datahub/ingestion/source/redshift/query.py +23 -19
  209. datahub/ingestion/source/redshift/redshift.py +52 -111
  210. datahub/ingestion/source/redshift/redshift_schema.py +17 -12
  211. datahub/ingestion/source/redshift/report.py +0 -2
  212. datahub/ingestion/source/redshift/usage.py +6 -5
  213. datahub/ingestion/source/s3/report.py +4 -2
  214. datahub/ingestion/source/s3/source.py +449 -248
  215. datahub/ingestion/source/sac/sac.py +3 -1
  216. datahub/ingestion/source/salesforce.py +28 -13
  217. datahub/ingestion/source/schema/json_schema.py +14 -14
  218. datahub/ingestion/source/schema_inference/object.py +22 -6
  219. datahub/ingestion/source/sigma/data_classes.py +3 -0
  220. datahub/ingestion/source/sigma/sigma.py +7 -1
  221. datahub/ingestion/source/slack/slack.py +10 -16
  222. datahub/ingestion/source/snaplogic/__init__.py +0 -0
  223. datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
  224. datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
  225. datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
  226. datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
  227. datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
  228. datahub/ingestion/source/snowflake/constants.py +3 -0
  229. datahub/ingestion/source/snowflake/snowflake_config.py +76 -23
  230. datahub/ingestion/source/snowflake/snowflake_connection.py +24 -8
  231. datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +19 -6
  232. datahub/ingestion/source/snowflake/snowflake_queries.py +464 -97
  233. datahub/ingestion/source/snowflake/snowflake_query.py +77 -5
  234. datahub/ingestion/source/snowflake/snowflake_report.py +1 -2
  235. datahub/ingestion/source/snowflake/snowflake_schema.py +352 -16
  236. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +51 -10
  237. datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
  238. datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
  239. datahub/ingestion/source/snowflake/snowflake_utils.py +36 -15
  240. datahub/ingestion/source/snowflake/snowflake_v2.py +39 -4
  241. datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
  242. datahub/ingestion/source/sql/athena.py +217 -25
  243. datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
  244. datahub/ingestion/source/sql/clickhouse.py +24 -8
  245. datahub/ingestion/source/sql/cockroachdb.py +5 -4
  246. datahub/ingestion/source/sql/druid.py +2 -2
  247. datahub/ingestion/source/sql/hana.py +3 -1
  248. datahub/ingestion/source/sql/hive.py +4 -3
  249. datahub/ingestion/source/sql/hive_metastore.py +19 -20
  250. datahub/ingestion/source/sql/mariadb.py +0 -1
  251. datahub/ingestion/source/sql/mssql/job_models.py +3 -1
  252. datahub/ingestion/source/sql/mssql/source.py +336 -57
  253. datahub/ingestion/source/sql/mysql.py +154 -4
  254. datahub/ingestion/source/sql/oracle.py +5 -5
  255. datahub/ingestion/source/sql/postgres.py +142 -6
  256. datahub/ingestion/source/sql/presto.py +2 -1
  257. datahub/ingestion/source/sql/sql_common.py +281 -49
  258. datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
  259. datahub/ingestion/source/sql/sql_types.py +22 -0
  260. datahub/ingestion/source/sql/sqlalchemy_uri.py +39 -7
  261. datahub/ingestion/source/sql/teradata.py +1028 -245
  262. datahub/ingestion/source/sql/trino.py +11 -1
  263. datahub/ingestion/source/sql/two_tier_sql_source.py +2 -3
  264. datahub/ingestion/source/sql/vertica.py +14 -7
  265. datahub/ingestion/source/sql_queries.py +219 -121
  266. datahub/ingestion/source/state/checkpoint.py +8 -29
  267. datahub/ingestion/source/state/entity_removal_state.py +5 -2
  268. datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
  269. datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
  270. datahub/ingestion/source/superset.py +314 -67
  271. datahub/ingestion/source/tableau/tableau.py +135 -59
  272. datahub/ingestion/source/tableau/tableau_common.py +9 -2
  273. datahub/ingestion/source/tableau/tableau_constant.py +1 -4
  274. datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
  275. datahub/ingestion/source/unity/config.py +160 -40
  276. datahub/ingestion/source/unity/connection.py +61 -0
  277. datahub/ingestion/source/unity/connection_test.py +1 -0
  278. datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
  279. datahub/ingestion/source/unity/proxy.py +794 -51
  280. datahub/ingestion/source/unity/proxy_patch.py +321 -0
  281. datahub/ingestion/source/unity/proxy_types.py +36 -2
  282. datahub/ingestion/source/unity/report.py +15 -3
  283. datahub/ingestion/source/unity/source.py +465 -131
  284. datahub/ingestion/source/unity/tag_entities.py +197 -0
  285. datahub/ingestion/source/unity/usage.py +46 -4
  286. datahub/ingestion/source/usage/clickhouse_usage.py +4 -1
  287. datahub/ingestion/source/usage/starburst_trino_usage.py +5 -2
  288. datahub/ingestion/source/usage/usage_common.py +4 -3
  289. datahub/ingestion/source/vertexai/vertexai.py +1 -1
  290. datahub/ingestion/source_config/pulsar.py +3 -1
  291. datahub/ingestion/source_report/ingestion_stage.py +50 -11
  292. datahub/ingestion/transformer/add_dataset_ownership.py +18 -2
  293. datahub/ingestion/transformer/base_transformer.py +8 -5
  294. datahub/ingestion/transformer/set_browse_path.py +112 -0
  295. datahub/integrations/assertion/snowflake/compiler.py +4 -3
  296. datahub/metadata/_internal_schema_classes.py +6806 -4871
  297. datahub/metadata/_urns/urn_defs.py +1767 -1539
  298. datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
  299. datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
  300. datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
  301. datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
  302. datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
  303. datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +6 -0
  304. datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
  305. datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
  306. datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
  307. datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
  308. datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
  309. datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
  310. datahub/metadata/schema.avsc +18395 -16979
  311. datahub/metadata/schemas/Actors.avsc +38 -1
  312. datahub/metadata/schemas/ApplicationKey.avsc +31 -0
  313. datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
  314. datahub/metadata/schemas/Applications.avsc +38 -0
  315. datahub/metadata/schemas/AssetSettings.avsc +63 -0
  316. datahub/metadata/schemas/ChartInfo.avsc +2 -1
  317. datahub/metadata/schemas/ChartKey.avsc +1 -0
  318. datahub/metadata/schemas/ContainerKey.avsc +1 -0
  319. datahub/metadata/schemas/ContainerProperties.avsc +8 -0
  320. datahub/metadata/schemas/CorpUserEditableInfo.avsc +1 -1
  321. datahub/metadata/schemas/CorpUserSettings.avsc +50 -0
  322. datahub/metadata/schemas/DashboardKey.avsc +1 -0
  323. datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
  324. datahub/metadata/schemas/DataFlowKey.avsc +1 -0
  325. datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
  326. datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
  327. datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
  328. datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
  329. datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
  330. datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
  331. datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
  332. datahub/metadata/schemas/DataJobInfo.avsc +8 -0
  333. datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
  334. datahub/metadata/schemas/DataJobKey.avsc +1 -0
  335. datahub/metadata/schemas/DataProcessKey.avsc +8 -0
  336. datahub/metadata/schemas/DataProductKey.avsc +3 -1
  337. datahub/metadata/schemas/DataProductProperties.avsc +1 -1
  338. datahub/metadata/schemas/DatasetKey.avsc +11 -1
  339. datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
  340. datahub/metadata/schemas/DomainKey.avsc +2 -1
  341. datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
  342. datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
  343. datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
  344. datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
  345. datahub/metadata/schemas/IncidentInfo.avsc +3 -3
  346. datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
  347. datahub/metadata/schemas/LogicalParent.avsc +145 -0
  348. datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
  349. datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
  350. datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
  351. datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
  352. datahub/metadata/schemas/MLModelKey.avsc +9 -0
  353. datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
  354. datahub/metadata/schemas/MetadataChangeEvent.avsc +151 -47
  355. datahub/metadata/schemas/MetadataChangeLog.avsc +62 -44
  356. datahub/metadata/schemas/MetadataChangeProposal.avsc +61 -0
  357. datahub/metadata/schemas/NotebookKey.avsc +1 -0
  358. datahub/metadata/schemas/Operation.avsc +4 -2
  359. datahub/metadata/schemas/Ownership.avsc +69 -0
  360. datahub/metadata/schemas/QuerySubjects.avsc +1 -12
  361. datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
  362. datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
  363. datahub/metadata/schemas/StructuredProperties.avsc +69 -0
  364. datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
  365. datahub/metadata/schemas/SystemMetadata.avsc +61 -0
  366. datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
  367. datahub/sdk/__init__.py +2 -0
  368. datahub/sdk/_all_entities.py +7 -0
  369. datahub/sdk/_shared.py +249 -5
  370. datahub/sdk/chart.py +386 -0
  371. datahub/sdk/container.py +7 -0
  372. datahub/sdk/dashboard.py +453 -0
  373. datahub/sdk/dataflow.py +7 -0
  374. datahub/sdk/datajob.py +45 -13
  375. datahub/sdk/dataset.py +56 -2
  376. datahub/sdk/entity_client.py +111 -9
  377. datahub/sdk/lineage_client.py +663 -82
  378. datahub/sdk/main_client.py +50 -16
  379. datahub/sdk/mlmodel.py +120 -38
  380. datahub/sdk/mlmodelgroup.py +7 -0
  381. datahub/sdk/search_client.py +7 -3
  382. datahub/sdk/search_filters.py +304 -36
  383. datahub/secret/datahub_secret_store.py +3 -0
  384. datahub/secret/environment_secret_store.py +29 -0
  385. datahub/secret/file_secret_store.py +49 -0
  386. datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
  387. datahub/specific/aspect_helpers/siblings.py +73 -0
  388. datahub/specific/aspect_helpers/structured_properties.py +27 -0
  389. datahub/specific/chart.py +1 -1
  390. datahub/specific/datajob.py +15 -1
  391. datahub/specific/dataproduct.py +4 -0
  392. datahub/specific/dataset.py +39 -59
  393. datahub/sql_parsing/split_statements.py +13 -0
  394. datahub/sql_parsing/sql_parsing_aggregator.py +70 -26
  395. datahub/sql_parsing/sqlglot_lineage.py +196 -42
  396. datahub/sql_parsing/sqlglot_utils.py +12 -4
  397. datahub/sql_parsing/tool_meta_extractor.py +1 -3
  398. datahub/telemetry/telemetry.py +28 -14
  399. datahub/testing/sdk_v2_helpers.py +7 -1
  400. datahub/upgrade/upgrade.py +73 -17
  401. datahub/utilities/file_backed_collections.py +8 -9
  402. datahub/utilities/is_pytest.py +3 -2
  403. datahub/utilities/logging_manager.py +22 -6
  404. datahub/utilities/mapping.py +29 -2
  405. datahub/utilities/sample_data.py +5 -4
  406. datahub/utilities/server_config_util.py +10 -1
  407. datahub/utilities/sqlalchemy_query_combiner.py +5 -2
  408. datahub/utilities/stats_collections.py +4 -0
  409. datahub/utilities/urns/urn.py +41 -2
  410. datahub/emitter/sql_parsing_builder.py +0 -306
  411. datahub/ingestion/source/redshift/lineage_v2.py +0 -466
  412. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +0 -0
  413. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/licenses/LICENSE +0 -0
  414. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
@@ -1,21 +1,16 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import Optional, overload
3
+ from typing import TYPE_CHECKING, Optional, overload
4
4
 
5
5
  from datahub.errors import SdkUsageError
6
6
  from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
7
7
  from datahub.ingestion.graph.config import ClientMode, DatahubClientConfig
8
8
  from datahub.sdk.entity_client import EntityClient
9
9
  from datahub.sdk.lineage_client import LineageClient
10
- from datahub.sdk.resolver_client import ResolverClient
11
10
  from datahub.sdk.search_client import SearchClient
12
11
 
13
- try:
14
- from acryl_datahub_cloud._sdk_extras import ( # type: ignore[import-not-found]
15
- AssertionsClient,
16
- )
17
- except ImportError:
18
- AssertionsClient = None
12
+ if TYPE_CHECKING:
13
+ from datahub.sdk.resolver_client import ResolverClient
19
14
 
20
15
 
21
16
  class DataHubClient:
@@ -74,7 +69,12 @@ class DataHubClient:
74
69
  self._graph.test_connection()
75
70
 
76
71
  @classmethod
77
- def from_env(cls) -> "DataHubClient":
72
+ def from_env(
73
+ cls,
74
+ *,
75
+ client_mode: ClientMode = ClientMode.SDK,
76
+ datahub_component: Optional[str] = None,
77
+ ) -> "DataHubClient":
78
78
  """Initialize a DataHubClient from the environment variables or ~/.datahubenv file.
79
79
 
80
80
  This will first check DATAHUB_GMS_URL and DATAHUB_GMS_TOKEN. If not present,
@@ -84,6 +84,10 @@ class DataHubClient:
84
84
  If you're looking to specify the server/token in code, use the
85
85
  DataHubClient(server=..., token=...) constructor instead.
86
86
 
87
+ Args:
88
+ client_mode: [internal] The client mode to use. Defaults to "SDK".
89
+ datahub_component: [internal] The DataHub component name to include in the user agent.
90
+
87
91
  Returns:
88
92
  A DataHubClient instance.
89
93
  """
@@ -91,7 +95,10 @@ class DataHubClient:
91
95
  # Inspired by the DockerClient.from_env() method.
92
96
  # TODO: This one also reads from ~/.datahubenv, so the "from_env" name might be a bit confusing.
93
97
  # That file is part of the "environment", but is not a traditional "env variable".
94
- graph = get_default_graph(ClientMode.SDK)
98
+ graph = get_default_graph(
99
+ client_mode=client_mode,
100
+ datahub_component=datahub_component,
101
+ )
95
102
 
96
103
  return cls(graph=graph)
97
104
 
@@ -100,7 +107,16 @@ class DataHubClient:
100
107
  return EntityClient(self)
101
108
 
102
109
  @property
103
- def resolve(self) -> ResolverClient:
110
+ def resolve(self) -> "ResolverClient":
111
+ try:
112
+ from acryl_datahub_cloud.sdk import ( # type: ignore[import-not-found]
113
+ ResolverClient,
114
+ )
115
+ except ImportError:
116
+ # If the client is not installed, use the one from the SDK.
117
+ from datahub.sdk.resolver_client import ( # type: ignore[assignment]
118
+ ResolverClient,
119
+ )
104
120
  return ResolverClient(self)
105
121
 
106
122
  @property
@@ -112,9 +128,27 @@ class DataHubClient:
112
128
  return LineageClient(self)
113
129
 
114
130
  @property
115
- def assertions(self) -> AssertionsClient: # type: ignore[return-value] # Type is not available if assertion_client is not installed
116
- if AssertionsClient is None:
117
- raise SdkUsageError(
118
- "AssertionsClient is not installed, please install it with `pip install acryl-datahub-cloud`"
119
- )
131
+ def assertions(self): # type: ignore[report-untyped-call] # Not available due to circular import issues
132
+ try:
133
+ from acryl_datahub_cloud.sdk import AssertionsClient
134
+ except ImportError as e:
135
+ if "acryl_datahub_cloud" in str(e):
136
+ raise SdkUsageError(
137
+ "AssertionsClient is not installed, please install it with `pip install acryl-datahub-cloud`"
138
+ ) from e
139
+ else:
140
+ raise e
120
141
  return AssertionsClient(self)
142
+
143
+ @property
144
+ def subscriptions(self): # type: ignore[report-untyped-call] # Not available due to circular import issues
145
+ try:
146
+ from acryl_datahub_cloud.sdk import SubscriptionClient
147
+ except ImportError as e:
148
+ if "acryl_datahub_cloud" in str(e):
149
+ raise SdkUsageError(
150
+ "SubscriptionClient is not installed, please install it with `pip install acryl-datahub-cloud`"
151
+ ) from e
152
+ else:
153
+ raise e
154
+ return SubscriptionClient(self)
datahub/sdk/mlmodel.py CHANGED
@@ -24,6 +24,7 @@ from datahub.sdk._shared import (
24
24
  HasInstitutionalMemory,
25
25
  HasOwnership,
26
26
  HasPlatformInstance,
27
+ HasStructuredProperties,
27
28
  HasTags,
28
29
  HasTerms,
29
30
  HasVersion,
@@ -31,6 +32,7 @@ from datahub.sdk._shared import (
31
32
  LinksInputType,
32
33
  MLTrainingJobInputType,
33
34
  OwnersInputType,
35
+ StructuredPropertyInputType,
34
36
  TagsInputType,
35
37
  TermsInputType,
36
38
  TrainingMetricsInputType,
@@ -50,6 +52,7 @@ class MLModel(
50
52
  HasTerms,
51
53
  HasDomain,
52
54
  HasVersion,
55
+ HasStructuredProperties,
53
56
  Entity,
54
57
  ):
55
58
  __slots__ = ()
@@ -82,53 +85,43 @@ class MLModel(
82
85
  model_group: Optional[Union[str, MlModelGroupUrn]] = None,
83
86
  training_jobs: Optional[MLTrainingJobInputType] = None,
84
87
  downstream_jobs: Optional[MLTrainingJobInputType] = None,
88
+ structured_properties: Optional[StructuredPropertyInputType] = None,
85
89
  extra_aspects: ExtraAspectsType = None,
86
90
  ):
87
91
  urn = MlModelUrn(platform=platform, name=id, env=env)
88
92
  super().__init__(urn)
89
93
  self._set_extra_aspects(extra_aspects)
90
-
91
94
  self._set_platform_instance(urn.platform, platform_instance)
92
-
93
95
  self._ensure_model_props()
94
96
 
95
- if version is not None:
96
- self.set_version(version)
97
- if name is not None:
98
- self.set_name(name)
99
- if aliases is not None:
100
- self.set_version_aliases(aliases)
101
- if description is not None:
102
- self.set_description(description)
103
- if training_metrics is not None:
104
- self.set_training_metrics(training_metrics)
105
- if hyper_params is not None:
106
- self.set_hyper_params(hyper_params)
107
- if external_url is not None:
108
- self.set_external_url(external_url)
109
- if custom_properties is not None:
110
- self.set_custom_properties(custom_properties)
111
- if created is not None:
112
- self.set_created(created)
113
- if last_modified is not None:
114
- self.set_last_modified(last_modified)
97
+ # Initialize properties in logical groups
98
+ self._init_basic_properties(
99
+ version=version,
100
+ name=name,
101
+ aliases=aliases,
102
+ description=description,
103
+ external_url=external_url,
104
+ custom_properties=custom_properties,
105
+ created=created,
106
+ last_modified=last_modified,
107
+ )
115
108
 
116
- if owners is not None:
117
- self.set_owners(owners)
118
- if links is not None:
119
- self.set_links(links)
120
- if tags is not None:
121
- self.set_tags(tags)
122
- if terms is not None:
123
- self.set_terms(terms)
124
- if domain is not None:
125
- self.set_domain(domain)
126
- if model_group is not None:
127
- self.set_model_group(model_group)
128
- if training_jobs is not None:
129
- self.set_training_jobs(training_jobs)
130
- if downstream_jobs is not None:
131
- self.set_downstream_jobs(downstream_jobs)
109
+ self._init_ml_specific_properties(
110
+ training_metrics=training_metrics,
111
+ hyper_params=hyper_params,
112
+ model_group=model_group,
113
+ training_jobs=training_jobs,
114
+ downstream_jobs=downstream_jobs,
115
+ )
116
+
117
+ self._init_metadata_properties(
118
+ owners=owners,
119
+ links=links,
120
+ tags=tags,
121
+ terms=terms,
122
+ domain=domain,
123
+ structured_properties=structured_properties,
124
+ )
132
125
 
133
126
  @classmethod
134
127
  def _new_from_graph(cls, urn: Urn, current_aspects: AspectBag) -> Self:
@@ -299,3 +292,92 @@ class MLModel(
299
292
  props.downstreamJobs = [
300
293
  job for job in props.downstreamJobs if job != job_str
301
294
  ]
295
+
296
+ @property
297
+ def deployments(self) -> Optional[List[str]]:
298
+ return self._ensure_model_props().deployments
299
+
300
+ def set_deployments(self, deployments: Sequence[str]) -> None:
301
+ self._ensure_model_props().deployments = list(deployments)
302
+
303
+ def add_deployment(self, deployment: str) -> None:
304
+ props = self._ensure_model_props()
305
+ if props.deployments is None:
306
+ props.deployments = []
307
+ if deployment not in props.deployments:
308
+ props.deployments.append(deployment)
309
+
310
+ def remove_deployment(self, deployment: str) -> None:
311
+ props = self._ensure_model_props()
312
+ if props.deployments is not None:
313
+ props.deployments = [d for d in props.deployments if d != deployment]
314
+
315
+ def _init_basic_properties(
316
+ self,
317
+ version: Optional[str] = None,
318
+ name: Optional[str] = None,
319
+ aliases: Optional[List[str]] = None,
320
+ description: Optional[str] = None,
321
+ external_url: Optional[str] = None,
322
+ custom_properties: Optional[Dict[str, str]] = None,
323
+ created: Optional[datetime] = None,
324
+ last_modified: Optional[datetime] = None,
325
+ ) -> None:
326
+ if version is not None:
327
+ self.set_version(version)
328
+ if name is not None:
329
+ self.set_name(name)
330
+ if aliases is not None:
331
+ self.set_version_aliases(aliases)
332
+ if description is not None:
333
+ self.set_description(description)
334
+ if external_url is not None:
335
+ self.set_external_url(external_url)
336
+ if custom_properties is not None:
337
+ self.set_custom_properties(custom_properties)
338
+ if created is not None:
339
+ self.set_created(created)
340
+ if last_modified is not None:
341
+ self.set_last_modified(last_modified)
342
+
343
+ def _init_ml_specific_properties(
344
+ self,
345
+ training_metrics: Optional[TrainingMetricsInputType] = None,
346
+ hyper_params: Optional[HyperParamsInputType] = None,
347
+ model_group: Optional[Union[str, MlModelGroupUrn]] = None,
348
+ training_jobs: Optional[MLTrainingJobInputType] = None,
349
+ downstream_jobs: Optional[MLTrainingJobInputType] = None,
350
+ ) -> None:
351
+ if training_metrics is not None:
352
+ self.set_training_metrics(training_metrics)
353
+ if hyper_params is not None:
354
+ self.set_hyper_params(hyper_params)
355
+ if model_group is not None:
356
+ self.set_model_group(model_group)
357
+ if training_jobs is not None:
358
+ self.set_training_jobs(training_jobs)
359
+ if downstream_jobs is not None:
360
+ self.set_downstream_jobs(downstream_jobs)
361
+
362
+ def _init_metadata_properties(
363
+ self,
364
+ owners: Optional[OwnersInputType] = None,
365
+ links: Optional[LinksInputType] = None,
366
+ tags: Optional[TagsInputType] = None,
367
+ terms: Optional[TermsInputType] = None,
368
+ domain: Optional[DomainInputType] = None,
369
+ structured_properties: Optional[StructuredPropertyInputType] = None,
370
+ ) -> None:
371
+ if owners is not None:
372
+ self.set_owners(owners)
373
+ if links is not None:
374
+ self.set_links(links)
375
+ if tags is not None:
376
+ self.set_tags(tags)
377
+ if terms is not None:
378
+ self.set_terms(terms)
379
+ if domain is not None:
380
+ self.set_domain(domain)
381
+ if structured_properties is not None:
382
+ for key, value in structured_properties.items():
383
+ self.set_structured_property(property_urn=key, values=value)
@@ -17,10 +17,12 @@ from datahub.sdk._shared import (
17
17
  HasInstitutionalMemory,
18
18
  HasOwnership,
19
19
  HasPlatformInstance,
20
+ HasStructuredProperties,
20
21
  HasTags,
21
22
  HasTerms,
22
23
  LinksInputType,
23
24
  OwnersInputType,
25
+ StructuredPropertyInputType,
24
26
  TagsInputType,
25
27
  TermsInputType,
26
28
  make_time_stamp,
@@ -36,6 +38,7 @@ class MLModelGroup(
36
38
  HasTags,
37
39
  HasTerms,
38
40
  HasDomain,
41
+ HasStructuredProperties,
39
42
  Entity,
40
43
  ):
41
44
  __slots__ = ()
@@ -66,6 +69,7 @@ class MLModelGroup(
66
69
  domain: Optional[DomainInputType] = None,
67
70
  training_jobs: Optional[Sequence[Union[str, DataProcessInstanceUrn]]] = None,
68
71
  downstream_jobs: Optional[Sequence[Union[str, DataProcessInstanceUrn]]] = None,
72
+ structured_properties: Optional[StructuredPropertyInputType] = None,
69
73
  extra_aspects: ExtraAspectsType = None,
70
74
  ):
71
75
  urn = MlModelGroupUrn(platform=platform, name=id, env=env)
@@ -105,6 +109,9 @@ class MLModelGroup(
105
109
  self.set_training_jobs(training_jobs)
106
110
  if downstream_jobs is not None:
107
111
  self.set_downstream_jobs(downstream_jobs)
112
+ if structured_properties is not None:
113
+ for key, value in structured_properties.items():
114
+ self.set_structured_property(property_urn=key, values=value)
108
115
 
109
116
  @classmethod
110
117
  def _new_from_graph(cls, urn: Urn, current_aspects: AspectBag) -> Self:
@@ -19,6 +19,7 @@ from datahub.sdk.search_filters import (
19
19
  _OrFilters,
20
20
  _StatusFilter,
21
21
  )
22
+ from datahub.utilities.ordered_set import OrderedSet
22
23
 
23
24
  if TYPE_CHECKING:
24
25
  from datahub.sdk.main_client import DataHubClient
@@ -80,7 +81,7 @@ def compute_entity_types(
80
81
  ) -> Optional[List[str]]:
81
82
  found_filters = False
82
83
  found_positive_filters = False
83
- entity_types: List[str] = []
84
+ entity_types: OrderedSet[str] = OrderedSet()
84
85
  for ands in filters:
85
86
  for clause in ands["and"]:
86
87
  if clause.field == _EntityTypeFilter.ENTITY_TYPE_FIELD:
@@ -88,7 +89,7 @@ def compute_entity_types(
88
89
  if not clause.negated:
89
90
  found_positive_filters = True
90
91
 
91
- entity_types.extend(clause.values)
92
+ entity_types.update(clause.values)
92
93
 
93
94
  if not found_filters:
94
95
  # If we didn't find any filters, use None so we use the default set.
@@ -100,7 +101,7 @@ def compute_entity_types(
100
101
  # still want to use the default set.
101
102
  return None
102
103
 
103
- return entity_types
104
+ return list(entity_types)
104
105
 
105
106
 
106
107
  class SearchClient:
@@ -111,6 +112,8 @@ class SearchClient:
111
112
  self,
112
113
  query: Optional[str] = None,
113
114
  filter: Optional[Filter] = None,
115
+ *,
116
+ skip_cache: bool = False,
114
117
  ) -> Iterable[Urn]:
115
118
  # TODO: Add better limit / pagination support.
116
119
  types, compiled_filters = compile_filters(filter)
@@ -119,5 +122,6 @@ class SearchClient:
119
122
  status=None,
120
123
  extra_or_filters=compiled_filters,
121
124
  entity_types=types,
125
+ skip_cache=skip_cache,
122
126
  ):
123
127
  yield Urn.from_string(urn)