acryl-datahub 1.1.1rc4__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (414) hide show
  1. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2615 -2547
  2. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +412 -338
  3. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +5 -0
  4. datahub/_version.py +1 -1
  5. datahub/api/entities/assertion/assertion.py +1 -1
  6. datahub/api/entities/common/serialized_value.py +1 -1
  7. datahub/api/entities/corpgroup/corpgroup.py +1 -1
  8. datahub/api/entities/dataproduct/dataproduct.py +32 -3
  9. datahub/api/entities/dataset/dataset.py +26 -23
  10. datahub/api/entities/external/__init__.py +0 -0
  11. datahub/api/entities/external/external_entities.py +724 -0
  12. datahub/api/entities/external/external_tag.py +147 -0
  13. datahub/api/entities/external/lake_formation_external_entites.py +162 -0
  14. datahub/api/entities/external/restricted_text.py +172 -0
  15. datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
  16. datahub/api/entities/forms/forms.py +3 -3
  17. datahub/api/entities/structuredproperties/structuredproperties.py +4 -4
  18. datahub/api/graphql/operation.py +10 -6
  19. datahub/cli/check_cli.py +88 -7
  20. datahub/cli/cli_utils.py +63 -0
  21. datahub/cli/config_utils.py +18 -10
  22. datahub/cli/container_cli.py +5 -0
  23. datahub/cli/delete_cli.py +125 -27
  24. datahub/cli/docker_check.py +110 -14
  25. datahub/cli/docker_cli.py +153 -229
  26. datahub/cli/exists_cli.py +0 -2
  27. datahub/cli/get_cli.py +0 -2
  28. datahub/cli/graphql_cli.py +1422 -0
  29. datahub/cli/iceberg_cli.py +5 -0
  30. datahub/cli/ingest_cli.py +3 -15
  31. datahub/cli/migrate.py +2 -0
  32. datahub/cli/put_cli.py +1 -4
  33. datahub/cli/quickstart_versioning.py +53 -10
  34. datahub/cli/specific/assertions_cli.py +37 -6
  35. datahub/cli/specific/datacontract_cli.py +54 -7
  36. datahub/cli/specific/dataproduct_cli.py +2 -15
  37. datahub/cli/specific/dataset_cli.py +1 -8
  38. datahub/cli/specific/forms_cli.py +0 -4
  39. datahub/cli/specific/group_cli.py +0 -2
  40. datahub/cli/specific/structuredproperties_cli.py +1 -4
  41. datahub/cli/specific/user_cli.py +172 -3
  42. datahub/cli/state_cli.py +0 -2
  43. datahub/cli/timeline_cli.py +0 -2
  44. datahub/configuration/common.py +40 -1
  45. datahub/configuration/connection_resolver.py +5 -2
  46. datahub/configuration/env_vars.py +331 -0
  47. datahub/configuration/import_resolver.py +7 -4
  48. datahub/configuration/kafka.py +21 -1
  49. datahub/configuration/pydantic_migration_helpers.py +6 -13
  50. datahub/configuration/source_common.py +3 -2
  51. datahub/configuration/validate_field_deprecation.py +5 -2
  52. datahub/configuration/validate_field_removal.py +8 -2
  53. datahub/configuration/validate_field_rename.py +6 -5
  54. datahub/configuration/validate_multiline_string.py +5 -2
  55. datahub/emitter/mce_builder.py +8 -4
  56. datahub/emitter/rest_emitter.py +103 -30
  57. datahub/entrypoints.py +6 -3
  58. datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +297 -1
  59. datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
  60. datahub/ingestion/api/decorators.py +15 -3
  61. datahub/ingestion/api/report.py +381 -3
  62. datahub/ingestion/api/sink.py +27 -2
  63. datahub/ingestion/api/source.py +165 -58
  64. datahub/ingestion/api/source_protocols.py +23 -0
  65. datahub/ingestion/autogenerated/__init__.py +0 -0
  66. datahub/ingestion/autogenerated/capability_summary.json +3652 -0
  67. datahub/ingestion/autogenerated/lineage.json +402 -0
  68. datahub/ingestion/autogenerated/lineage_helper.py +177 -0
  69. datahub/ingestion/extractor/schema_util.py +13 -4
  70. datahub/ingestion/glossary/classification_mixin.py +5 -0
  71. datahub/ingestion/graph/client.py +330 -25
  72. datahub/ingestion/graph/config.py +3 -2
  73. datahub/ingestion/graph/filters.py +30 -11
  74. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
  75. datahub/ingestion/run/pipeline.py +81 -11
  76. datahub/ingestion/run/pipeline_config.py +2 -2
  77. datahub/ingestion/sink/datahub_kafka.py +1 -0
  78. datahub/ingestion/sink/datahub_rest.py +13 -5
  79. datahub/ingestion/sink/file.py +1 -0
  80. datahub/ingestion/source/abs/config.py +1 -1
  81. datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
  82. datahub/ingestion/source/abs/source.py +15 -30
  83. datahub/ingestion/source/aws/aws_common.py +185 -13
  84. datahub/ingestion/source/aws/glue.py +517 -244
  85. datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
  86. datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
  87. datahub/ingestion/source/aws/tag_entities.py +270 -0
  88. datahub/ingestion/source/azure/azure_common.py +3 -3
  89. datahub/ingestion/source/bigquery_v2/bigquery.py +67 -24
  90. datahub/ingestion/source/bigquery_v2/bigquery_config.py +47 -19
  91. datahub/ingestion/source/bigquery_v2/bigquery_connection.py +12 -1
  92. datahub/ingestion/source/bigquery_v2/bigquery_queries.py +3 -0
  93. datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
  94. datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
  95. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
  96. datahub/ingestion/source/bigquery_v2/common.py +1 -1
  97. datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
  98. datahub/ingestion/source/bigquery_v2/queries.py +3 -3
  99. datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
  100. datahub/ingestion/source/cassandra/cassandra.py +6 -8
  101. datahub/ingestion/source/cassandra/cassandra_api.py +17 -1
  102. datahub/ingestion/source/cassandra/cassandra_config.py +5 -0
  103. datahub/ingestion/source/cassandra/cassandra_profiling.py +7 -6
  104. datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
  105. datahub/ingestion/source/common/gcp_credentials_config.py +3 -1
  106. datahub/ingestion/source/common/subtypes.py +53 -0
  107. datahub/ingestion/source/data_lake_common/data_lake_utils.py +37 -0
  108. datahub/ingestion/source/data_lake_common/object_store.py +115 -27
  109. datahub/ingestion/source/data_lake_common/path_spec.py +72 -43
  110. datahub/ingestion/source/datahub/config.py +12 -9
  111. datahub/ingestion/source/datahub/datahub_database_reader.py +26 -11
  112. datahub/ingestion/source/datahub/datahub_source.py +10 -0
  113. datahub/ingestion/source/dbt/dbt_cloud.py +16 -5
  114. datahub/ingestion/source/dbt/dbt_common.py +224 -9
  115. datahub/ingestion/source/dbt/dbt_core.py +3 -0
  116. datahub/ingestion/source/debug/__init__.py +0 -0
  117. datahub/ingestion/source/debug/datahub_debug.py +300 -0
  118. datahub/ingestion/source/delta_lake/config.py +9 -5
  119. datahub/ingestion/source/delta_lake/source.py +8 -0
  120. datahub/ingestion/source/dremio/dremio_api.py +114 -73
  121. datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
  122. datahub/ingestion/source/dremio/dremio_config.py +5 -4
  123. datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
  124. datahub/ingestion/source/dremio/dremio_source.py +132 -98
  125. datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
  126. datahub/ingestion/source/dynamodb/dynamodb.py +11 -8
  127. datahub/ingestion/source/excel/__init__.py +0 -0
  128. datahub/ingestion/source/excel/config.py +92 -0
  129. datahub/ingestion/source/excel/excel_file.py +539 -0
  130. datahub/ingestion/source/excel/profiling.py +308 -0
  131. datahub/ingestion/source/excel/report.py +49 -0
  132. datahub/ingestion/source/excel/source.py +662 -0
  133. datahub/ingestion/source/excel/util.py +18 -0
  134. datahub/ingestion/source/feast.py +8 -10
  135. datahub/ingestion/source/file.py +3 -0
  136. datahub/ingestion/source/fivetran/config.py +66 -7
  137. datahub/ingestion/source/fivetran/fivetran.py +227 -43
  138. datahub/ingestion/source/fivetran/fivetran_log_api.py +37 -8
  139. datahub/ingestion/source/fivetran/fivetran_query.py +51 -29
  140. datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
  141. datahub/ingestion/source/fivetran/response_models.py +97 -0
  142. datahub/ingestion/source/gc/datahub_gc.py +0 -2
  143. datahub/ingestion/source/gcs/gcs_source.py +32 -4
  144. datahub/ingestion/source/ge_data_profiler.py +108 -31
  145. datahub/ingestion/source/ge_profiling_config.py +26 -11
  146. datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
  147. datahub/ingestion/source/grafana/field_utils.py +307 -0
  148. datahub/ingestion/source/grafana/grafana_api.py +142 -0
  149. datahub/ingestion/source/grafana/grafana_config.py +104 -0
  150. datahub/ingestion/source/grafana/grafana_source.py +522 -84
  151. datahub/ingestion/source/grafana/lineage.py +202 -0
  152. datahub/ingestion/source/grafana/models.py +137 -0
  153. datahub/ingestion/source/grafana/report.py +90 -0
  154. datahub/ingestion/source/grafana/types.py +16 -0
  155. datahub/ingestion/source/hex/api.py +28 -1
  156. datahub/ingestion/source/hex/hex.py +16 -5
  157. datahub/ingestion/source/hex/mapper.py +16 -2
  158. datahub/ingestion/source/hex/model.py +2 -0
  159. datahub/ingestion/source/hex/query_fetcher.py +1 -1
  160. datahub/ingestion/source/iceberg/iceberg.py +123 -59
  161. datahub/ingestion/source/iceberg/iceberg_profiler.py +4 -2
  162. datahub/ingestion/source/identity/azure_ad.py +1 -1
  163. datahub/ingestion/source/identity/okta.py +1 -14
  164. datahub/ingestion/source/kafka/kafka.py +16 -0
  165. datahub/ingestion/source/kafka_connect/common.py +2 -2
  166. datahub/ingestion/source/kafka_connect/sink_connectors.py +156 -47
  167. datahub/ingestion/source/kafka_connect/source_connectors.py +62 -4
  168. datahub/ingestion/source/looker/looker_common.py +148 -79
  169. datahub/ingestion/source/looker/looker_config.py +15 -4
  170. datahub/ingestion/source/looker/looker_constant.py +4 -0
  171. datahub/ingestion/source/looker/looker_lib_wrapper.py +36 -3
  172. datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
  173. datahub/ingestion/source/looker/looker_source.py +503 -547
  174. datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
  175. datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
  176. datahub/ingestion/source/looker/lookml_config.py +31 -3
  177. datahub/ingestion/source/looker/lookml_refinement.py +1 -1
  178. datahub/ingestion/source/looker/lookml_source.py +96 -117
  179. datahub/ingestion/source/looker/view_upstream.py +494 -1
  180. datahub/ingestion/source/metabase.py +32 -6
  181. datahub/ingestion/source/metadata/business_glossary.py +7 -7
  182. datahub/ingestion/source/metadata/lineage.py +9 -9
  183. datahub/ingestion/source/mlflow.py +12 -2
  184. datahub/ingestion/source/mock_data/__init__.py +0 -0
  185. datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
  186. datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
  187. datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
  188. datahub/ingestion/source/mode.py +26 -5
  189. datahub/ingestion/source/mongodb.py +11 -1
  190. datahub/ingestion/source/neo4j/neo4j_source.py +83 -144
  191. datahub/ingestion/source/nifi.py +2 -2
  192. datahub/ingestion/source/openapi.py +1 -1
  193. datahub/ingestion/source/powerbi/config.py +47 -21
  194. datahub/ingestion/source/powerbi/m_query/data_classes.py +1 -0
  195. datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
  196. datahub/ingestion/source/powerbi/m_query/pattern_handler.py +100 -10
  197. datahub/ingestion/source/powerbi/powerbi.py +10 -6
  198. datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +0 -1
  199. datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
  200. datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
  201. datahub/ingestion/source/preset.py +3 -3
  202. datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
  203. datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
  204. datahub/ingestion/source/redash.py +1 -1
  205. datahub/ingestion/source/redshift/config.py +15 -9
  206. datahub/ingestion/source/redshift/datashares.py +1 -1
  207. datahub/ingestion/source/redshift/lineage.py +386 -687
  208. datahub/ingestion/source/redshift/query.py +23 -19
  209. datahub/ingestion/source/redshift/redshift.py +52 -111
  210. datahub/ingestion/source/redshift/redshift_schema.py +17 -12
  211. datahub/ingestion/source/redshift/report.py +0 -2
  212. datahub/ingestion/source/redshift/usage.py +6 -5
  213. datahub/ingestion/source/s3/report.py +4 -2
  214. datahub/ingestion/source/s3/source.py +449 -248
  215. datahub/ingestion/source/sac/sac.py +3 -1
  216. datahub/ingestion/source/salesforce.py +28 -13
  217. datahub/ingestion/source/schema/json_schema.py +14 -14
  218. datahub/ingestion/source/schema_inference/object.py +22 -6
  219. datahub/ingestion/source/sigma/data_classes.py +3 -0
  220. datahub/ingestion/source/sigma/sigma.py +7 -1
  221. datahub/ingestion/source/slack/slack.py +10 -16
  222. datahub/ingestion/source/snaplogic/__init__.py +0 -0
  223. datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
  224. datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
  225. datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
  226. datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
  227. datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
  228. datahub/ingestion/source/snowflake/constants.py +3 -0
  229. datahub/ingestion/source/snowflake/snowflake_config.py +76 -23
  230. datahub/ingestion/source/snowflake/snowflake_connection.py +24 -8
  231. datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +19 -6
  232. datahub/ingestion/source/snowflake/snowflake_queries.py +464 -97
  233. datahub/ingestion/source/snowflake/snowflake_query.py +77 -5
  234. datahub/ingestion/source/snowflake/snowflake_report.py +1 -2
  235. datahub/ingestion/source/snowflake/snowflake_schema.py +352 -16
  236. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +51 -10
  237. datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
  238. datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
  239. datahub/ingestion/source/snowflake/snowflake_utils.py +36 -15
  240. datahub/ingestion/source/snowflake/snowflake_v2.py +39 -4
  241. datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
  242. datahub/ingestion/source/sql/athena.py +217 -25
  243. datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
  244. datahub/ingestion/source/sql/clickhouse.py +24 -8
  245. datahub/ingestion/source/sql/cockroachdb.py +5 -4
  246. datahub/ingestion/source/sql/druid.py +2 -2
  247. datahub/ingestion/source/sql/hana.py +3 -1
  248. datahub/ingestion/source/sql/hive.py +4 -3
  249. datahub/ingestion/source/sql/hive_metastore.py +19 -20
  250. datahub/ingestion/source/sql/mariadb.py +0 -1
  251. datahub/ingestion/source/sql/mssql/job_models.py +3 -1
  252. datahub/ingestion/source/sql/mssql/source.py +336 -57
  253. datahub/ingestion/source/sql/mysql.py +154 -4
  254. datahub/ingestion/source/sql/oracle.py +5 -5
  255. datahub/ingestion/source/sql/postgres.py +142 -6
  256. datahub/ingestion/source/sql/presto.py +2 -1
  257. datahub/ingestion/source/sql/sql_common.py +281 -49
  258. datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
  259. datahub/ingestion/source/sql/sql_types.py +22 -0
  260. datahub/ingestion/source/sql/sqlalchemy_uri.py +39 -7
  261. datahub/ingestion/source/sql/teradata.py +1028 -245
  262. datahub/ingestion/source/sql/trino.py +11 -1
  263. datahub/ingestion/source/sql/two_tier_sql_source.py +2 -3
  264. datahub/ingestion/source/sql/vertica.py +14 -7
  265. datahub/ingestion/source/sql_queries.py +219 -121
  266. datahub/ingestion/source/state/checkpoint.py +8 -29
  267. datahub/ingestion/source/state/entity_removal_state.py +5 -2
  268. datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
  269. datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
  270. datahub/ingestion/source/superset.py +314 -67
  271. datahub/ingestion/source/tableau/tableau.py +135 -59
  272. datahub/ingestion/source/tableau/tableau_common.py +9 -2
  273. datahub/ingestion/source/tableau/tableau_constant.py +1 -4
  274. datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
  275. datahub/ingestion/source/unity/config.py +160 -40
  276. datahub/ingestion/source/unity/connection.py +61 -0
  277. datahub/ingestion/source/unity/connection_test.py +1 -0
  278. datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
  279. datahub/ingestion/source/unity/proxy.py +794 -51
  280. datahub/ingestion/source/unity/proxy_patch.py +321 -0
  281. datahub/ingestion/source/unity/proxy_types.py +36 -2
  282. datahub/ingestion/source/unity/report.py +15 -3
  283. datahub/ingestion/source/unity/source.py +465 -131
  284. datahub/ingestion/source/unity/tag_entities.py +197 -0
  285. datahub/ingestion/source/unity/usage.py +46 -4
  286. datahub/ingestion/source/usage/clickhouse_usage.py +4 -1
  287. datahub/ingestion/source/usage/starburst_trino_usage.py +5 -2
  288. datahub/ingestion/source/usage/usage_common.py +4 -3
  289. datahub/ingestion/source/vertexai/vertexai.py +1 -1
  290. datahub/ingestion/source_config/pulsar.py +3 -1
  291. datahub/ingestion/source_report/ingestion_stage.py +50 -11
  292. datahub/ingestion/transformer/add_dataset_ownership.py +18 -2
  293. datahub/ingestion/transformer/base_transformer.py +8 -5
  294. datahub/ingestion/transformer/set_browse_path.py +112 -0
  295. datahub/integrations/assertion/snowflake/compiler.py +4 -3
  296. datahub/metadata/_internal_schema_classes.py +6806 -4871
  297. datahub/metadata/_urns/urn_defs.py +1767 -1539
  298. datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
  299. datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
  300. datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
  301. datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
  302. datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
  303. datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +6 -0
  304. datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
  305. datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
  306. datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
  307. datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
  308. datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
  309. datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
  310. datahub/metadata/schema.avsc +18395 -16979
  311. datahub/metadata/schemas/Actors.avsc +38 -1
  312. datahub/metadata/schemas/ApplicationKey.avsc +31 -0
  313. datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
  314. datahub/metadata/schemas/Applications.avsc +38 -0
  315. datahub/metadata/schemas/AssetSettings.avsc +63 -0
  316. datahub/metadata/schemas/ChartInfo.avsc +2 -1
  317. datahub/metadata/schemas/ChartKey.avsc +1 -0
  318. datahub/metadata/schemas/ContainerKey.avsc +1 -0
  319. datahub/metadata/schemas/ContainerProperties.avsc +8 -0
  320. datahub/metadata/schemas/CorpUserEditableInfo.avsc +1 -1
  321. datahub/metadata/schemas/CorpUserSettings.avsc +50 -0
  322. datahub/metadata/schemas/DashboardKey.avsc +1 -0
  323. datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
  324. datahub/metadata/schemas/DataFlowKey.avsc +1 -0
  325. datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
  326. datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
  327. datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
  328. datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
  329. datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
  330. datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
  331. datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
  332. datahub/metadata/schemas/DataJobInfo.avsc +8 -0
  333. datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
  334. datahub/metadata/schemas/DataJobKey.avsc +1 -0
  335. datahub/metadata/schemas/DataProcessKey.avsc +8 -0
  336. datahub/metadata/schemas/DataProductKey.avsc +3 -1
  337. datahub/metadata/schemas/DataProductProperties.avsc +1 -1
  338. datahub/metadata/schemas/DatasetKey.avsc +11 -1
  339. datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
  340. datahub/metadata/schemas/DomainKey.avsc +2 -1
  341. datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
  342. datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
  343. datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
  344. datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
  345. datahub/metadata/schemas/IncidentInfo.avsc +3 -3
  346. datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
  347. datahub/metadata/schemas/LogicalParent.avsc +145 -0
  348. datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
  349. datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
  350. datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
  351. datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
  352. datahub/metadata/schemas/MLModelKey.avsc +9 -0
  353. datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
  354. datahub/metadata/schemas/MetadataChangeEvent.avsc +151 -47
  355. datahub/metadata/schemas/MetadataChangeLog.avsc +62 -44
  356. datahub/metadata/schemas/MetadataChangeProposal.avsc +61 -0
  357. datahub/metadata/schemas/NotebookKey.avsc +1 -0
  358. datahub/metadata/schemas/Operation.avsc +4 -2
  359. datahub/metadata/schemas/Ownership.avsc +69 -0
  360. datahub/metadata/schemas/QuerySubjects.avsc +1 -12
  361. datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
  362. datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
  363. datahub/metadata/schemas/StructuredProperties.avsc +69 -0
  364. datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
  365. datahub/metadata/schemas/SystemMetadata.avsc +61 -0
  366. datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
  367. datahub/sdk/__init__.py +2 -0
  368. datahub/sdk/_all_entities.py +7 -0
  369. datahub/sdk/_shared.py +249 -5
  370. datahub/sdk/chart.py +386 -0
  371. datahub/sdk/container.py +7 -0
  372. datahub/sdk/dashboard.py +453 -0
  373. datahub/sdk/dataflow.py +7 -0
  374. datahub/sdk/datajob.py +45 -13
  375. datahub/sdk/dataset.py +56 -2
  376. datahub/sdk/entity_client.py +111 -9
  377. datahub/sdk/lineage_client.py +663 -82
  378. datahub/sdk/main_client.py +50 -16
  379. datahub/sdk/mlmodel.py +120 -38
  380. datahub/sdk/mlmodelgroup.py +7 -0
  381. datahub/sdk/search_client.py +7 -3
  382. datahub/sdk/search_filters.py +304 -36
  383. datahub/secret/datahub_secret_store.py +3 -0
  384. datahub/secret/environment_secret_store.py +29 -0
  385. datahub/secret/file_secret_store.py +49 -0
  386. datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
  387. datahub/specific/aspect_helpers/siblings.py +73 -0
  388. datahub/specific/aspect_helpers/structured_properties.py +27 -0
  389. datahub/specific/chart.py +1 -1
  390. datahub/specific/datajob.py +15 -1
  391. datahub/specific/dataproduct.py +4 -0
  392. datahub/specific/dataset.py +39 -59
  393. datahub/sql_parsing/split_statements.py +13 -0
  394. datahub/sql_parsing/sql_parsing_aggregator.py +70 -26
  395. datahub/sql_parsing/sqlglot_lineage.py +196 -42
  396. datahub/sql_parsing/sqlglot_utils.py +12 -4
  397. datahub/sql_parsing/tool_meta_extractor.py +1 -3
  398. datahub/telemetry/telemetry.py +28 -14
  399. datahub/testing/sdk_v2_helpers.py +7 -1
  400. datahub/upgrade/upgrade.py +73 -17
  401. datahub/utilities/file_backed_collections.py +8 -9
  402. datahub/utilities/is_pytest.py +3 -2
  403. datahub/utilities/logging_manager.py +22 -6
  404. datahub/utilities/mapping.py +29 -2
  405. datahub/utilities/sample_data.py +5 -4
  406. datahub/utilities/server_config_util.py +10 -1
  407. datahub/utilities/sqlalchemy_query_combiner.py +5 -2
  408. datahub/utilities/stats_collections.py +4 -0
  409. datahub/utilities/urns/urn.py +41 -2
  410. datahub/emitter/sql_parsing_builder.py +0 -306
  411. datahub/ingestion/source/redshift/lineage_v2.py +0 -466
  412. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +0 -0
  413. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/licenses/LICENSE +0 -0
  414. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
@@ -36,8 +36,10 @@ csv-enricher = datahub.ingestion.source.csv_enricher:CSVEnricherSource
36
36
  datahub = datahub.ingestion.source.datahub.datahub_source:DataHubSource
37
37
  datahub-apply = datahub.ingestion.source.apply.datahub_apply:DataHubApplySource
38
38
  datahub-business-glossary = datahub.ingestion.source.metadata.business_glossary:BusinessGlossaryFileSource
39
+ datahub-debug = datahub.ingestion.source.debug.datahub_debug:DataHubDebugSource
39
40
  datahub-gc = datahub.ingestion.source.gc.datahub_gc:DataHubGcSource
40
41
  datahub-lineage-file = datahub.ingestion.source.metadata.lineage:LineageFileSource
42
+ datahub-mock-data = datahub.ingestion.source.mock_data.datahub_mock_data:DataHubMockDataSource
41
43
  dbt = datahub.ingestion.source.dbt.dbt_core:DBTCoreSource
42
44
  dbt-cloud = datahub.ingestion.source.dbt.dbt_cloud:DBTCloudSource
43
45
  delta-lake = datahub.ingestion.source.delta_lake:DeltaLakeSource
@@ -46,6 +48,7 @@ dremio = datahub.ingestion.source.dremio.dremio_source:DremioSource
46
48
  druid = datahub.ingestion.source.sql.druid:DruidSource
47
49
  dynamodb = datahub.ingestion.source.dynamodb.dynamodb:DynamoDBSource
48
50
  elasticsearch = datahub.ingestion.source.elastic_search:ElasticsearchSource
51
+ excel = datahub.ingestion.source.excel.source:ExcelSource
49
52
  feast = datahub.ingestion.source.feast:FeastRepositorySource
50
53
  file = datahub.ingestion.source.file:GenericFileSource
51
54
  fivetran = datahub.ingestion.source.fivetran.fivetran:FivetranSource
@@ -91,6 +94,7 @@ sagemaker = datahub.ingestion.source.aws.sagemaker:SagemakerSource
91
94
  salesforce = datahub.ingestion.source.salesforce:SalesforceSource
92
95
  sigma = datahub.ingestion.source.sigma.sigma:SigmaSource
93
96
  slack = datahub.ingestion.source.slack.slack:SlackSource
97
+ snaplogic = datahub.ingestion.source.snaplogic.snaplogic:SnaplogicSource
94
98
  snowflake = datahub.ingestion.source.snowflake.snowflake_v2:SnowflakeV2Source
95
99
  snowflake-queries = datahub.ingestion.source.snowflake.snowflake_queries:SnowflakeQueriesSource
96
100
  snowflake-summary = datahub.ingestion.source.snowflake.snowflake_summary:SnowflakeSummarySource
@@ -127,6 +131,7 @@ pattern_cleanup_dataset_usage_user = datahub.ingestion.transformer.pattern_clean
127
131
  pattern_cleanup_ownership = datahub.ingestion.transformer.pattern_cleanup_ownership:PatternCleanUpOwnership
128
132
  replace_external_url = datahub.ingestion.transformer.replace_external_url:ReplaceExternalUrlDataset
129
133
  replace_external_url_container = datahub.ingestion.transformer.replace_external_url:ReplaceExternalUrlContainer
134
+ set_browse_path = datahub.ingestion.transformer.set_browse_path:SetBrowsePathTransformer
130
135
  set_dataset_browse_path = datahub.ingestion.transformer.add_dataset_browse_path:AddDatasetBrowsePathTransformer
131
136
  simple_add_dataset_dataproduct = datahub.ingestion.transformer.add_dataset_dataproduct:SimpleAddDatasetDataProduct
132
137
  simple_add_dataset_domain = datahub.ingestion.transformer.dataset_domain:SimpleAddDatasetDomain
datahub/_version.py CHANGED
@@ -1,6 +1,6 @@
1
1
  # Published at https://pypi.org/project/acryl-datahub/.
2
2
  __package_name__ = "acryl-datahub"
3
- __version__ = "1.1.1rc4"
3
+ __version__ = "1.3.0.1rc9"
4
4
 
5
5
 
6
6
  def is_dev_mode() -> bool:
@@ -53,5 +53,5 @@ class BaseEntityAssertion(BaseAssertion):
53
53
  )
54
54
 
55
55
  trigger: Optional[AssertionTrigger] = v1_Field(
56
- description="The trigger schedule for assertion", alias="schedule"
56
+ default=None, description="The trigger schedule for assertion", alias="schedule"
57
57
  )
@@ -131,7 +131,7 @@ class SerializedResourceValue(BaseModel):
131
131
  elif isinstance(object, BaseModel):
132
132
  return SerializedResourceValue(
133
133
  content_type=models.SerializedValueContentTypeClass.JSON,
134
- blob=json.dumps(object.dict()).encode("utf-8"),
134
+ blob=json.dumps(object.dict(), sort_keys=True).encode("utf-8"),
135
135
  schema_type=models.SerializedValueSchemaTypeClass.JSON,
136
136
  schema_ref=object.__class__.__name__,
137
137
  )
@@ -71,7 +71,7 @@ class CorpGroup(BaseModel):
71
71
  _rename_admins_to_owners = pydantic_renamed_field("admins", "owners")
72
72
 
73
73
  @pydantic.validator("owners", "members", each_item=True)
74
- def make_urn_if_needed(v):
74
+ def make_urn_if_needed(cls, v):
75
75
  if isinstance(v, str):
76
76
  return builder.make_user_urn(v)
77
77
  return v
@@ -6,9 +6,10 @@ from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union
6
6
 
7
7
  import pydantic
8
8
  from ruamel.yaml import YAML
9
+ from typing_extensions import assert_never
9
10
 
10
11
  import datahub.emitter.mce_builder as builder
11
- from datahub.configuration.common import ConfigModel
12
+ from datahub.configuration.common import ConfigModel, LaxStr
12
13
  from datahub.emitter.generic_emitter import Emitter
13
14
  from datahub.emitter.mcp import MetadataChangeProposalWrapper
14
15
  from datahub.ingestion.graph.client import DataHubGraph
@@ -110,8 +111,9 @@ class DataProduct(ConfigModel):
110
111
  description: Optional[str] = None
111
112
  tags: Optional[List[str]] = None
112
113
  terms: Optional[List[str]] = None
113
- properties: Optional[Dict[str, str]] = None
114
+ properties: Optional[Dict[str, LaxStr]] = None
114
115
  external_url: Optional[str] = None
116
+ output_ports: Optional[List[str]] = None
115
117
  _original_yaml_dict: Optional[dict] = None
116
118
 
117
119
  @pydantic.validator("assets", each_item=True)
@@ -123,6 +125,22 @@ class DataProduct(ConfigModel):
123
125
 
124
126
  return v
125
127
 
128
+ @pydantic.validator("output_ports", each_item=True)
129
+ def output_ports_must_be_urns(cls, v: str) -> str:
130
+ try:
131
+ Urn.create_from_string(v)
132
+ except Exception as e:
133
+ raise ValueError(f"Output port {v} is not an urn: {e}") from e
134
+
135
+ return v
136
+
137
+ @pydantic.validator("output_ports", each_item=True)
138
+ def output_ports_must_be_from_asset_list(cls, v: str, values: dict) -> str:
139
+ assets = values.get("assets", [])
140
+ if v not in assets:
141
+ raise ValueError(f"Output port {v} is not in asset list")
142
+ return v
143
+
126
144
  @property
127
145
  def urn(self) -> str:
128
146
  if self.id.startswith("urn:li:dataProduct:"):
@@ -180,6 +198,7 @@ class DataProduct(ConfigModel):
180
198
  DataProductAssociationClass(
181
199
  destinationUrn=asset,
182
200
  created=self._mint_auditstamp("yaml"),
201
+ outputPort=asset in (self.output_ports or []),
183
202
  )
184
203
  for asset in self.assets
185
204
  ]
@@ -203,6 +222,7 @@ class DataProduct(ConfigModel):
203
222
  DataProductAssociationClass(
204
223
  destinationUrn=asset,
205
224
  created=self._mint_auditstamp("yaml"),
225
+ outputPort=asset in (self.output_ports or []),
206
226
  )
207
227
  for asset in self.assets or []
208
228
  ],
@@ -368,6 +388,13 @@ class DataProduct(ConfigModel):
368
388
  external_url=(
369
389
  data_product_properties.externalUrl if data_product_properties else None
370
390
  ),
391
+ output_ports=[
392
+ e.destinationUrn
393
+ for e in (data_product_properties.assets or [])
394
+ if e.outputPort
395
+ ]
396
+ if data_product_properties
397
+ else None,
371
398
  )
372
399
 
373
400
  def _patch_ownership(
@@ -414,7 +441,9 @@ class DataProduct(ConfigModel):
414
441
  "type": new_owner_type_map[owner_urn],
415
442
  }
416
443
  else:
417
- patches_drop[i] = o
444
+ patches_drop[i] = o.model_dump()
445
+ else:
446
+ assert_never(o)
418
447
 
419
448
  # Figure out what if any are new owners to add
420
449
  new_owners_to_add = {o for o in new_owner_type_map} - set(owners_matched)
@@ -27,10 +27,11 @@ from typing_extensions import TypeAlias
27
27
 
28
28
  import datahub.metadata.schema_classes as models
29
29
  from datahub.api.entities.structuredproperties.structuredproperties import AllowedTypes
30
- from datahub.configuration.common import ConfigModel
30
+ from datahub.configuration.common import ConfigModel, LaxStr
31
31
  from datahub.emitter.mce_builder import (
32
32
  make_data_platform_urn,
33
33
  make_dataset_urn,
34
+ make_domain_urn,
34
35
  make_schema_field_urn,
35
36
  make_tag_urn,
36
37
  make_term_urn,
@@ -43,6 +44,7 @@ from datahub.ingestion.graph.client import DataHubGraph
43
44
  from datahub.metadata.schema_classes import (
44
45
  AuditStampClass,
45
46
  DatasetPropertiesClass,
47
+ DomainsClass,
46
48
  GlobalTagsClass,
47
49
  GlossaryTermAssociationClass,
48
50
  GlossaryTermsClass,
@@ -134,14 +136,13 @@ class StructuredPropertiesHelper:
134
136
 
135
137
  class SchemaFieldSpecification(StrictModel):
136
138
  id: Optional[str] = None
137
- urn: Optional[str] = None
139
+ urn: Optional[str] = Field(None, validate_default=True)
138
140
  structured_properties: Optional[StructuredProperties] = None
139
141
  type: Optional[str] = None
140
142
  nativeDataType: Optional[str] = None
141
143
  jsonPath: Union[None, str] = None
142
144
  nullable: bool = False
143
145
  description: Union[None, str] = None
144
- doc: Union[None, str] = None # doc is an alias for description
145
146
  label: Optional[str] = None
146
147
  created: Optional[dict] = None
147
148
  lastModified: Optional[dict] = None
@@ -219,14 +220,14 @@ class SchemaFieldSpecification(StrictModel):
219
220
  return v
220
221
 
221
222
  @root_validator(pre=True)
222
- def sync_description_and_doc(cls, values: Dict) -> Dict:
223
- """Synchronize doc and description fields if one is provided but not the other."""
223
+ def sync_doc_into_description(cls, values: Dict) -> Dict:
224
+ """Synchronize doc into description field if doc is provided."""
224
225
  description = values.get("description")
225
- doc = values.get("doc")
226
+ doc = values.pop("doc", None)
226
227
 
227
- if description is not None and doc is None:
228
- values["doc"] = description
229
- elif doc is not None and description is None:
228
+ if doc is not None:
229
+ if description is not None:
230
+ raise ValueError("doc and description cannot both be provided")
230
231
  values["description"] = doc
231
232
 
232
233
  return values
@@ -294,10 +295,6 @@ class SchemaFieldSpecification(StrictModel):
294
295
  """Custom dict method for Pydantic v1 to handle YAML serialization properly."""
295
296
  exclude = kwargs.pop("exclude", None) or set()
296
297
 
297
- # If description and doc are identical, exclude doc from the output
298
- if self.description == self.doc and self.description is not None:
299
- exclude.add("doc")
300
-
301
298
  # if nativeDataType and type are identical, exclude nativeDataType from the output
302
299
  if self.nativeDataType == self.type and self.nativeDataType is not None:
303
300
  exclude.add("nativeDataType")
@@ -325,10 +322,6 @@ class SchemaFieldSpecification(StrictModel):
325
322
  """Custom model_dump method for Pydantic v2 to handle YAML serialization properly."""
326
323
  exclude = kwargs.pop("exclude", None) or set()
327
324
 
328
- # If description and doc are identical, exclude doc from the output
329
- if self.description == self.doc and self.description is not None:
330
- exclude.add("doc")
331
-
332
325
  # if nativeDataType and type are identical, exclude nativeDataType from the output
333
326
  if self.nativeDataType == self.type and self.nativeDataType is not None:
334
327
  exclude.add("nativeDataType")
@@ -380,12 +373,12 @@ class Dataset(StrictModel):
380
373
  id: Optional[str] = None
381
374
  platform: Optional[str] = None
382
375
  env: str = "PROD"
383
- urn: Optional[str] = None
376
+ urn: Optional[str] = Field(None, validate_default=True)
384
377
  description: Optional[str] = None
385
- name: Optional[str] = None
386
- schema_metadata: Optional[SchemaSpecification] = Field(alias="schema")
378
+ name: Optional[str] = Field(None, validate_default=True)
379
+ schema_metadata: Optional[SchemaSpecification] = Field(default=None, alias="schema")
387
380
  downstreams: Optional[List[str]] = None
388
- properties: Optional[Dict[str, str]] = None
381
+ properties: Optional[Dict[str, LaxStr]] = None
389
382
  subtype: Optional[str] = None
390
383
  subtypes: Optional[List[str]] = None
391
384
  tags: Optional[List[str]] = None
@@ -393,6 +386,7 @@ class Dataset(StrictModel):
393
386
  owners: Optional[List[Union[str, Ownership]]] = None
394
387
  structured_properties: Optional[StructuredProperties] = None
395
388
  external_url: Optional[str] = None
389
+ domains: Optional[List[str]] = None
396
390
 
397
391
  @property
398
392
  def platform_urn(self) -> str:
@@ -602,7 +596,7 @@ class Dataset(StrictModel):
602
596
  ],
603
597
  platformSchema=OtherSchemaClass(
604
598
  rawSchema=yaml.dump(
605
- self.schema_metadata.dict(
599
+ self.schema_metadata.model_dump(
606
600
  exclude_none=True, exclude_unset=True
607
601
  )
608
602
  )
@@ -735,7 +729,14 @@ class Dataset(StrictModel):
735
729
  )
736
730
  )
737
731
  yield from patch_builder.build()
738
-
732
+ if self.domains:
733
+ mcp = MetadataChangeProposalWrapper(
734
+ entityUrn=self.urn,
735
+ aspect=DomainsClass(
736
+ [make_domain_urn(domain) for domain in self.domains]
737
+ ),
738
+ )
739
+ yield mcp
739
740
  logger.info(f"Created dataset {self.urn}")
740
741
 
741
742
  @staticmethod
@@ -897,6 +898,7 @@ class Dataset(StrictModel):
897
898
  structured_properties_map[sp.propertyUrn].extend(sp.values) # type: ignore[arg-type,union-attr]
898
899
  else:
899
900
  structured_properties_map[sp.propertyUrn] = sp.values
901
+ domains: Optional[DomainsClass] = graph.get_aspect(urn, DomainsClass)
900
902
 
901
903
  if config.include_downstreams:
902
904
  related_downstreams = graph.get_related_entities(
@@ -937,6 +939,7 @@ class Dataset(StrictModel):
937
939
  structured_properties=(
938
940
  structured_properties_map if structured_properties else None
939
941
  ),
942
+ domains=[domain for domain in domains.domains] if domains else None,
940
943
  downstreams=downstreams if config.include_downstreams else None,
941
944
  )
942
945
 
File without changes