acryl-datahub 1.1.1rc4__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (414) hide show
  1. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2615 -2547
  2. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +412 -338
  3. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +5 -0
  4. datahub/_version.py +1 -1
  5. datahub/api/entities/assertion/assertion.py +1 -1
  6. datahub/api/entities/common/serialized_value.py +1 -1
  7. datahub/api/entities/corpgroup/corpgroup.py +1 -1
  8. datahub/api/entities/dataproduct/dataproduct.py +32 -3
  9. datahub/api/entities/dataset/dataset.py +26 -23
  10. datahub/api/entities/external/__init__.py +0 -0
  11. datahub/api/entities/external/external_entities.py +724 -0
  12. datahub/api/entities/external/external_tag.py +147 -0
  13. datahub/api/entities/external/lake_formation_external_entites.py +162 -0
  14. datahub/api/entities/external/restricted_text.py +172 -0
  15. datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
  16. datahub/api/entities/forms/forms.py +3 -3
  17. datahub/api/entities/structuredproperties/structuredproperties.py +4 -4
  18. datahub/api/graphql/operation.py +10 -6
  19. datahub/cli/check_cli.py +88 -7
  20. datahub/cli/cli_utils.py +63 -0
  21. datahub/cli/config_utils.py +18 -10
  22. datahub/cli/container_cli.py +5 -0
  23. datahub/cli/delete_cli.py +125 -27
  24. datahub/cli/docker_check.py +110 -14
  25. datahub/cli/docker_cli.py +153 -229
  26. datahub/cli/exists_cli.py +0 -2
  27. datahub/cli/get_cli.py +0 -2
  28. datahub/cli/graphql_cli.py +1422 -0
  29. datahub/cli/iceberg_cli.py +5 -0
  30. datahub/cli/ingest_cli.py +3 -15
  31. datahub/cli/migrate.py +2 -0
  32. datahub/cli/put_cli.py +1 -4
  33. datahub/cli/quickstart_versioning.py +53 -10
  34. datahub/cli/specific/assertions_cli.py +37 -6
  35. datahub/cli/specific/datacontract_cli.py +54 -7
  36. datahub/cli/specific/dataproduct_cli.py +2 -15
  37. datahub/cli/specific/dataset_cli.py +1 -8
  38. datahub/cli/specific/forms_cli.py +0 -4
  39. datahub/cli/specific/group_cli.py +0 -2
  40. datahub/cli/specific/structuredproperties_cli.py +1 -4
  41. datahub/cli/specific/user_cli.py +172 -3
  42. datahub/cli/state_cli.py +0 -2
  43. datahub/cli/timeline_cli.py +0 -2
  44. datahub/configuration/common.py +40 -1
  45. datahub/configuration/connection_resolver.py +5 -2
  46. datahub/configuration/env_vars.py +331 -0
  47. datahub/configuration/import_resolver.py +7 -4
  48. datahub/configuration/kafka.py +21 -1
  49. datahub/configuration/pydantic_migration_helpers.py +6 -13
  50. datahub/configuration/source_common.py +3 -2
  51. datahub/configuration/validate_field_deprecation.py +5 -2
  52. datahub/configuration/validate_field_removal.py +8 -2
  53. datahub/configuration/validate_field_rename.py +6 -5
  54. datahub/configuration/validate_multiline_string.py +5 -2
  55. datahub/emitter/mce_builder.py +8 -4
  56. datahub/emitter/rest_emitter.py +103 -30
  57. datahub/entrypoints.py +6 -3
  58. datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +297 -1
  59. datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
  60. datahub/ingestion/api/decorators.py +15 -3
  61. datahub/ingestion/api/report.py +381 -3
  62. datahub/ingestion/api/sink.py +27 -2
  63. datahub/ingestion/api/source.py +165 -58
  64. datahub/ingestion/api/source_protocols.py +23 -0
  65. datahub/ingestion/autogenerated/__init__.py +0 -0
  66. datahub/ingestion/autogenerated/capability_summary.json +3652 -0
  67. datahub/ingestion/autogenerated/lineage.json +402 -0
  68. datahub/ingestion/autogenerated/lineage_helper.py +177 -0
  69. datahub/ingestion/extractor/schema_util.py +13 -4
  70. datahub/ingestion/glossary/classification_mixin.py +5 -0
  71. datahub/ingestion/graph/client.py +330 -25
  72. datahub/ingestion/graph/config.py +3 -2
  73. datahub/ingestion/graph/filters.py +30 -11
  74. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
  75. datahub/ingestion/run/pipeline.py +81 -11
  76. datahub/ingestion/run/pipeline_config.py +2 -2
  77. datahub/ingestion/sink/datahub_kafka.py +1 -0
  78. datahub/ingestion/sink/datahub_rest.py +13 -5
  79. datahub/ingestion/sink/file.py +1 -0
  80. datahub/ingestion/source/abs/config.py +1 -1
  81. datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
  82. datahub/ingestion/source/abs/source.py +15 -30
  83. datahub/ingestion/source/aws/aws_common.py +185 -13
  84. datahub/ingestion/source/aws/glue.py +517 -244
  85. datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
  86. datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
  87. datahub/ingestion/source/aws/tag_entities.py +270 -0
  88. datahub/ingestion/source/azure/azure_common.py +3 -3
  89. datahub/ingestion/source/bigquery_v2/bigquery.py +67 -24
  90. datahub/ingestion/source/bigquery_v2/bigquery_config.py +47 -19
  91. datahub/ingestion/source/bigquery_v2/bigquery_connection.py +12 -1
  92. datahub/ingestion/source/bigquery_v2/bigquery_queries.py +3 -0
  93. datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
  94. datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
  95. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
  96. datahub/ingestion/source/bigquery_v2/common.py +1 -1
  97. datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
  98. datahub/ingestion/source/bigquery_v2/queries.py +3 -3
  99. datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
  100. datahub/ingestion/source/cassandra/cassandra.py +6 -8
  101. datahub/ingestion/source/cassandra/cassandra_api.py +17 -1
  102. datahub/ingestion/source/cassandra/cassandra_config.py +5 -0
  103. datahub/ingestion/source/cassandra/cassandra_profiling.py +7 -6
  104. datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
  105. datahub/ingestion/source/common/gcp_credentials_config.py +3 -1
  106. datahub/ingestion/source/common/subtypes.py +53 -0
  107. datahub/ingestion/source/data_lake_common/data_lake_utils.py +37 -0
  108. datahub/ingestion/source/data_lake_common/object_store.py +115 -27
  109. datahub/ingestion/source/data_lake_common/path_spec.py +72 -43
  110. datahub/ingestion/source/datahub/config.py +12 -9
  111. datahub/ingestion/source/datahub/datahub_database_reader.py +26 -11
  112. datahub/ingestion/source/datahub/datahub_source.py +10 -0
  113. datahub/ingestion/source/dbt/dbt_cloud.py +16 -5
  114. datahub/ingestion/source/dbt/dbt_common.py +224 -9
  115. datahub/ingestion/source/dbt/dbt_core.py +3 -0
  116. datahub/ingestion/source/debug/__init__.py +0 -0
  117. datahub/ingestion/source/debug/datahub_debug.py +300 -0
  118. datahub/ingestion/source/delta_lake/config.py +9 -5
  119. datahub/ingestion/source/delta_lake/source.py +8 -0
  120. datahub/ingestion/source/dremio/dremio_api.py +114 -73
  121. datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
  122. datahub/ingestion/source/dremio/dremio_config.py +5 -4
  123. datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
  124. datahub/ingestion/source/dremio/dremio_source.py +132 -98
  125. datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
  126. datahub/ingestion/source/dynamodb/dynamodb.py +11 -8
  127. datahub/ingestion/source/excel/__init__.py +0 -0
  128. datahub/ingestion/source/excel/config.py +92 -0
  129. datahub/ingestion/source/excel/excel_file.py +539 -0
  130. datahub/ingestion/source/excel/profiling.py +308 -0
  131. datahub/ingestion/source/excel/report.py +49 -0
  132. datahub/ingestion/source/excel/source.py +662 -0
  133. datahub/ingestion/source/excel/util.py +18 -0
  134. datahub/ingestion/source/feast.py +8 -10
  135. datahub/ingestion/source/file.py +3 -0
  136. datahub/ingestion/source/fivetran/config.py +66 -7
  137. datahub/ingestion/source/fivetran/fivetran.py +227 -43
  138. datahub/ingestion/source/fivetran/fivetran_log_api.py +37 -8
  139. datahub/ingestion/source/fivetran/fivetran_query.py +51 -29
  140. datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
  141. datahub/ingestion/source/fivetran/response_models.py +97 -0
  142. datahub/ingestion/source/gc/datahub_gc.py +0 -2
  143. datahub/ingestion/source/gcs/gcs_source.py +32 -4
  144. datahub/ingestion/source/ge_data_profiler.py +108 -31
  145. datahub/ingestion/source/ge_profiling_config.py +26 -11
  146. datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
  147. datahub/ingestion/source/grafana/field_utils.py +307 -0
  148. datahub/ingestion/source/grafana/grafana_api.py +142 -0
  149. datahub/ingestion/source/grafana/grafana_config.py +104 -0
  150. datahub/ingestion/source/grafana/grafana_source.py +522 -84
  151. datahub/ingestion/source/grafana/lineage.py +202 -0
  152. datahub/ingestion/source/grafana/models.py +137 -0
  153. datahub/ingestion/source/grafana/report.py +90 -0
  154. datahub/ingestion/source/grafana/types.py +16 -0
  155. datahub/ingestion/source/hex/api.py +28 -1
  156. datahub/ingestion/source/hex/hex.py +16 -5
  157. datahub/ingestion/source/hex/mapper.py +16 -2
  158. datahub/ingestion/source/hex/model.py +2 -0
  159. datahub/ingestion/source/hex/query_fetcher.py +1 -1
  160. datahub/ingestion/source/iceberg/iceberg.py +123 -59
  161. datahub/ingestion/source/iceberg/iceberg_profiler.py +4 -2
  162. datahub/ingestion/source/identity/azure_ad.py +1 -1
  163. datahub/ingestion/source/identity/okta.py +1 -14
  164. datahub/ingestion/source/kafka/kafka.py +16 -0
  165. datahub/ingestion/source/kafka_connect/common.py +2 -2
  166. datahub/ingestion/source/kafka_connect/sink_connectors.py +156 -47
  167. datahub/ingestion/source/kafka_connect/source_connectors.py +62 -4
  168. datahub/ingestion/source/looker/looker_common.py +148 -79
  169. datahub/ingestion/source/looker/looker_config.py +15 -4
  170. datahub/ingestion/source/looker/looker_constant.py +4 -0
  171. datahub/ingestion/source/looker/looker_lib_wrapper.py +36 -3
  172. datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
  173. datahub/ingestion/source/looker/looker_source.py +503 -547
  174. datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
  175. datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
  176. datahub/ingestion/source/looker/lookml_config.py +31 -3
  177. datahub/ingestion/source/looker/lookml_refinement.py +1 -1
  178. datahub/ingestion/source/looker/lookml_source.py +96 -117
  179. datahub/ingestion/source/looker/view_upstream.py +494 -1
  180. datahub/ingestion/source/metabase.py +32 -6
  181. datahub/ingestion/source/metadata/business_glossary.py +7 -7
  182. datahub/ingestion/source/metadata/lineage.py +9 -9
  183. datahub/ingestion/source/mlflow.py +12 -2
  184. datahub/ingestion/source/mock_data/__init__.py +0 -0
  185. datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
  186. datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
  187. datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
  188. datahub/ingestion/source/mode.py +26 -5
  189. datahub/ingestion/source/mongodb.py +11 -1
  190. datahub/ingestion/source/neo4j/neo4j_source.py +83 -144
  191. datahub/ingestion/source/nifi.py +2 -2
  192. datahub/ingestion/source/openapi.py +1 -1
  193. datahub/ingestion/source/powerbi/config.py +47 -21
  194. datahub/ingestion/source/powerbi/m_query/data_classes.py +1 -0
  195. datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
  196. datahub/ingestion/source/powerbi/m_query/pattern_handler.py +100 -10
  197. datahub/ingestion/source/powerbi/powerbi.py +10 -6
  198. datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +0 -1
  199. datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
  200. datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
  201. datahub/ingestion/source/preset.py +3 -3
  202. datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
  203. datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
  204. datahub/ingestion/source/redash.py +1 -1
  205. datahub/ingestion/source/redshift/config.py +15 -9
  206. datahub/ingestion/source/redshift/datashares.py +1 -1
  207. datahub/ingestion/source/redshift/lineage.py +386 -687
  208. datahub/ingestion/source/redshift/query.py +23 -19
  209. datahub/ingestion/source/redshift/redshift.py +52 -111
  210. datahub/ingestion/source/redshift/redshift_schema.py +17 -12
  211. datahub/ingestion/source/redshift/report.py +0 -2
  212. datahub/ingestion/source/redshift/usage.py +6 -5
  213. datahub/ingestion/source/s3/report.py +4 -2
  214. datahub/ingestion/source/s3/source.py +449 -248
  215. datahub/ingestion/source/sac/sac.py +3 -1
  216. datahub/ingestion/source/salesforce.py +28 -13
  217. datahub/ingestion/source/schema/json_schema.py +14 -14
  218. datahub/ingestion/source/schema_inference/object.py +22 -6
  219. datahub/ingestion/source/sigma/data_classes.py +3 -0
  220. datahub/ingestion/source/sigma/sigma.py +7 -1
  221. datahub/ingestion/source/slack/slack.py +10 -16
  222. datahub/ingestion/source/snaplogic/__init__.py +0 -0
  223. datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
  224. datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
  225. datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
  226. datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
  227. datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
  228. datahub/ingestion/source/snowflake/constants.py +3 -0
  229. datahub/ingestion/source/snowflake/snowflake_config.py +76 -23
  230. datahub/ingestion/source/snowflake/snowflake_connection.py +24 -8
  231. datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +19 -6
  232. datahub/ingestion/source/snowflake/snowflake_queries.py +464 -97
  233. datahub/ingestion/source/snowflake/snowflake_query.py +77 -5
  234. datahub/ingestion/source/snowflake/snowflake_report.py +1 -2
  235. datahub/ingestion/source/snowflake/snowflake_schema.py +352 -16
  236. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +51 -10
  237. datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
  238. datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
  239. datahub/ingestion/source/snowflake/snowflake_utils.py +36 -15
  240. datahub/ingestion/source/snowflake/snowflake_v2.py +39 -4
  241. datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
  242. datahub/ingestion/source/sql/athena.py +217 -25
  243. datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
  244. datahub/ingestion/source/sql/clickhouse.py +24 -8
  245. datahub/ingestion/source/sql/cockroachdb.py +5 -4
  246. datahub/ingestion/source/sql/druid.py +2 -2
  247. datahub/ingestion/source/sql/hana.py +3 -1
  248. datahub/ingestion/source/sql/hive.py +4 -3
  249. datahub/ingestion/source/sql/hive_metastore.py +19 -20
  250. datahub/ingestion/source/sql/mariadb.py +0 -1
  251. datahub/ingestion/source/sql/mssql/job_models.py +3 -1
  252. datahub/ingestion/source/sql/mssql/source.py +336 -57
  253. datahub/ingestion/source/sql/mysql.py +154 -4
  254. datahub/ingestion/source/sql/oracle.py +5 -5
  255. datahub/ingestion/source/sql/postgres.py +142 -6
  256. datahub/ingestion/source/sql/presto.py +2 -1
  257. datahub/ingestion/source/sql/sql_common.py +281 -49
  258. datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
  259. datahub/ingestion/source/sql/sql_types.py +22 -0
  260. datahub/ingestion/source/sql/sqlalchemy_uri.py +39 -7
  261. datahub/ingestion/source/sql/teradata.py +1028 -245
  262. datahub/ingestion/source/sql/trino.py +11 -1
  263. datahub/ingestion/source/sql/two_tier_sql_source.py +2 -3
  264. datahub/ingestion/source/sql/vertica.py +14 -7
  265. datahub/ingestion/source/sql_queries.py +219 -121
  266. datahub/ingestion/source/state/checkpoint.py +8 -29
  267. datahub/ingestion/source/state/entity_removal_state.py +5 -2
  268. datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
  269. datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
  270. datahub/ingestion/source/superset.py +314 -67
  271. datahub/ingestion/source/tableau/tableau.py +135 -59
  272. datahub/ingestion/source/tableau/tableau_common.py +9 -2
  273. datahub/ingestion/source/tableau/tableau_constant.py +1 -4
  274. datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
  275. datahub/ingestion/source/unity/config.py +160 -40
  276. datahub/ingestion/source/unity/connection.py +61 -0
  277. datahub/ingestion/source/unity/connection_test.py +1 -0
  278. datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
  279. datahub/ingestion/source/unity/proxy.py +794 -51
  280. datahub/ingestion/source/unity/proxy_patch.py +321 -0
  281. datahub/ingestion/source/unity/proxy_types.py +36 -2
  282. datahub/ingestion/source/unity/report.py +15 -3
  283. datahub/ingestion/source/unity/source.py +465 -131
  284. datahub/ingestion/source/unity/tag_entities.py +197 -0
  285. datahub/ingestion/source/unity/usage.py +46 -4
  286. datahub/ingestion/source/usage/clickhouse_usage.py +4 -1
  287. datahub/ingestion/source/usage/starburst_trino_usage.py +5 -2
  288. datahub/ingestion/source/usage/usage_common.py +4 -3
  289. datahub/ingestion/source/vertexai/vertexai.py +1 -1
  290. datahub/ingestion/source_config/pulsar.py +3 -1
  291. datahub/ingestion/source_report/ingestion_stage.py +50 -11
  292. datahub/ingestion/transformer/add_dataset_ownership.py +18 -2
  293. datahub/ingestion/transformer/base_transformer.py +8 -5
  294. datahub/ingestion/transformer/set_browse_path.py +112 -0
  295. datahub/integrations/assertion/snowflake/compiler.py +4 -3
  296. datahub/metadata/_internal_schema_classes.py +6806 -4871
  297. datahub/metadata/_urns/urn_defs.py +1767 -1539
  298. datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
  299. datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
  300. datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
  301. datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
  302. datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
  303. datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +6 -0
  304. datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
  305. datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
  306. datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
  307. datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
  308. datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
  309. datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
  310. datahub/metadata/schema.avsc +18395 -16979
  311. datahub/metadata/schemas/Actors.avsc +38 -1
  312. datahub/metadata/schemas/ApplicationKey.avsc +31 -0
  313. datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
  314. datahub/metadata/schemas/Applications.avsc +38 -0
  315. datahub/metadata/schemas/AssetSettings.avsc +63 -0
  316. datahub/metadata/schemas/ChartInfo.avsc +2 -1
  317. datahub/metadata/schemas/ChartKey.avsc +1 -0
  318. datahub/metadata/schemas/ContainerKey.avsc +1 -0
  319. datahub/metadata/schemas/ContainerProperties.avsc +8 -0
  320. datahub/metadata/schemas/CorpUserEditableInfo.avsc +1 -1
  321. datahub/metadata/schemas/CorpUserSettings.avsc +50 -0
  322. datahub/metadata/schemas/DashboardKey.avsc +1 -0
  323. datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
  324. datahub/metadata/schemas/DataFlowKey.avsc +1 -0
  325. datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
  326. datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
  327. datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
  328. datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
  329. datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
  330. datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
  331. datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
  332. datahub/metadata/schemas/DataJobInfo.avsc +8 -0
  333. datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
  334. datahub/metadata/schemas/DataJobKey.avsc +1 -0
  335. datahub/metadata/schemas/DataProcessKey.avsc +8 -0
  336. datahub/metadata/schemas/DataProductKey.avsc +3 -1
  337. datahub/metadata/schemas/DataProductProperties.avsc +1 -1
  338. datahub/metadata/schemas/DatasetKey.avsc +11 -1
  339. datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
  340. datahub/metadata/schemas/DomainKey.avsc +2 -1
  341. datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
  342. datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
  343. datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
  344. datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
  345. datahub/metadata/schemas/IncidentInfo.avsc +3 -3
  346. datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
  347. datahub/metadata/schemas/LogicalParent.avsc +145 -0
  348. datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
  349. datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
  350. datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
  351. datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
  352. datahub/metadata/schemas/MLModelKey.avsc +9 -0
  353. datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
  354. datahub/metadata/schemas/MetadataChangeEvent.avsc +151 -47
  355. datahub/metadata/schemas/MetadataChangeLog.avsc +62 -44
  356. datahub/metadata/schemas/MetadataChangeProposal.avsc +61 -0
  357. datahub/metadata/schemas/NotebookKey.avsc +1 -0
  358. datahub/metadata/schemas/Operation.avsc +4 -2
  359. datahub/metadata/schemas/Ownership.avsc +69 -0
  360. datahub/metadata/schemas/QuerySubjects.avsc +1 -12
  361. datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
  362. datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
  363. datahub/metadata/schemas/StructuredProperties.avsc +69 -0
  364. datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
  365. datahub/metadata/schemas/SystemMetadata.avsc +61 -0
  366. datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
  367. datahub/sdk/__init__.py +2 -0
  368. datahub/sdk/_all_entities.py +7 -0
  369. datahub/sdk/_shared.py +249 -5
  370. datahub/sdk/chart.py +386 -0
  371. datahub/sdk/container.py +7 -0
  372. datahub/sdk/dashboard.py +453 -0
  373. datahub/sdk/dataflow.py +7 -0
  374. datahub/sdk/datajob.py +45 -13
  375. datahub/sdk/dataset.py +56 -2
  376. datahub/sdk/entity_client.py +111 -9
  377. datahub/sdk/lineage_client.py +663 -82
  378. datahub/sdk/main_client.py +50 -16
  379. datahub/sdk/mlmodel.py +120 -38
  380. datahub/sdk/mlmodelgroup.py +7 -0
  381. datahub/sdk/search_client.py +7 -3
  382. datahub/sdk/search_filters.py +304 -36
  383. datahub/secret/datahub_secret_store.py +3 -0
  384. datahub/secret/environment_secret_store.py +29 -0
  385. datahub/secret/file_secret_store.py +49 -0
  386. datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
  387. datahub/specific/aspect_helpers/siblings.py +73 -0
  388. datahub/specific/aspect_helpers/structured_properties.py +27 -0
  389. datahub/specific/chart.py +1 -1
  390. datahub/specific/datajob.py +15 -1
  391. datahub/specific/dataproduct.py +4 -0
  392. datahub/specific/dataset.py +39 -59
  393. datahub/sql_parsing/split_statements.py +13 -0
  394. datahub/sql_parsing/sql_parsing_aggregator.py +70 -26
  395. datahub/sql_parsing/sqlglot_lineage.py +196 -42
  396. datahub/sql_parsing/sqlglot_utils.py +12 -4
  397. datahub/sql_parsing/tool_meta_extractor.py +1 -3
  398. datahub/telemetry/telemetry.py +28 -14
  399. datahub/testing/sdk_v2_helpers.py +7 -1
  400. datahub/upgrade/upgrade.py +73 -17
  401. datahub/utilities/file_backed_collections.py +8 -9
  402. datahub/utilities/is_pytest.py +3 -2
  403. datahub/utilities/logging_manager.py +22 -6
  404. datahub/utilities/mapping.py +29 -2
  405. datahub/utilities/sample_data.py +5 -4
  406. datahub/utilities/server_config_util.py +10 -1
  407. datahub/utilities/sqlalchemy_query_combiner.py +5 -2
  408. datahub/utilities/stats_collections.py +4 -0
  409. datahub/utilities/urns/urn.py +41 -2
  410. datahub/emitter/sql_parsing_builder.py +0 -306
  411. datahub/ingestion/source/redshift/lineage_v2.py +0 -466
  412. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +0 -0
  413. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/licenses/LICENSE +0 -0
  414. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
@@ -18,6 +18,7 @@ from sqlalchemy.sql import sqltypes
18
18
  from sqlalchemy.types import BOOLEAN, DATE, DATETIME, INTEGER
19
19
 
20
20
  import datahub.emitter.mce_builder as builder
21
+ from datahub.configuration.common import HiddenFromDocs, LaxStr
21
22
  from datahub.configuration.source_common import DatasetLineageProviderConfigBase
22
23
  from datahub.configuration.time_window_config import BaseTimeWindowConfig
23
24
  from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated
@@ -32,6 +33,7 @@ from datahub.ingestion.api.decorators import (
32
33
  support_status,
33
34
  )
34
35
  from datahub.ingestion.api.workunit import MetadataWorkUnit
36
+ from datahub.ingestion.source.common.subtypes import SourceCapabilityModifier
35
37
  from datahub.ingestion.source.sql.sql_common import (
36
38
  SqlWorkUnit,
37
39
  logger,
@@ -127,16 +129,20 @@ class ClickHouseConfig(
127
129
  ):
128
130
  # defaults
129
131
  host_port: str = Field(default="localhost:8123", description="ClickHouse host URL.")
130
- scheme: str = Field(default="clickhouse", description="", hidden_from_docs=True)
132
+ scheme: HiddenFromDocs[str] = Field(default="clickhouse")
131
133
  password: pydantic.SecretStr = Field(
132
134
  default=pydantic.SecretStr(""), description="password"
133
135
  )
134
- secure: Optional[bool] = Field(default=None, description="")
135
- protocol: Optional[str] = Field(default=None, description="")
136
+ secure: Optional[bool] = Field(
137
+ default=None, description="[deprecated] Use uri_opts instead."
138
+ )
139
+ protocol: Optional[str] = Field(
140
+ default=None, description="[deprecated] Use uri_opts instead."
141
+ )
136
142
  _deprecate_secure = pydantic_field_deprecated("secure")
137
143
  _deprecate_protocol = pydantic_field_deprecated("protocol")
138
144
 
139
- uri_opts: Dict[str, str] = Field(
145
+ uri_opts: Dict[str, LaxStr] = Field(
140
146
  default={},
141
147
  description="The part of the URI and it's used to provide additional configuration options or parameters for the database connection.",
142
148
  )
@@ -184,9 +190,9 @@ class ClickHouseConfig(
184
190
  "Initializing uri_opts from deprecated secure or protocol options"
185
191
  )
186
192
  values["uri_opts"] = {}
187
- if secure:
188
- values["uri_opts"]["secure"] = secure
189
- if protocol:
193
+ if secure is not None:
194
+ values["uri_opts"]["secure"] = str(secure)
195
+ if protocol is not None:
190
196
  values["uri_opts"]["protocol"] = protocol
191
197
  logger.debug(f"uri_opts: {uri_opts}")
192
198
  elif (secure or protocol) and uri_opts:
@@ -379,8 +385,18 @@ clickhouse_datetime_format = "%Y-%m-%d %H:%M:%S"
379
385
  @platform_name("ClickHouse")
380
386
  @config_class(ClickHouseConfig)
381
387
  @support_status(SupportStatus.CERTIFIED)
382
- @capability(SourceCapability.DELETION_DETECTION, "Enabled via stateful ingestion")
388
+ @capability(
389
+ SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
390
+ )
383
391
  @capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
392
+ @capability(
393
+ SourceCapability.LINEAGE_COARSE,
394
+ "Enabled by default to get lineage for views via `include_view_lineage`",
395
+ subtype_modifier=[
396
+ SourceCapabilityModifier.VIEW,
397
+ SourceCapabilityModifier.TABLE,
398
+ ],
399
+ )
384
400
  class ClickHouseSource(TwoTierSQLAlchemySource):
385
401
  """
386
402
  This plugin extracts the following:
@@ -1,6 +1,6 @@
1
1
  from pydantic.fields import Field
2
2
 
3
- from datahub.configuration.common import AllowDenyPattern
3
+ from datahub.configuration.common import AllowDenyPattern, HiddenFromDocs
4
4
  from datahub.ingestion.api.common import PipelineContext
5
5
  from datahub.ingestion.api.decorators import (
6
6
  SourceCapability,
@@ -14,8 +14,10 @@ from datahub.ingestion.source.sql.postgres import PostgresConfig, PostgresSource
14
14
 
15
15
 
16
16
  class CockroachDBConfig(PostgresConfig):
17
- scheme = Field(default="cockroachdb+psycopg2", description="database scheme")
18
- schema_pattern = Field(
17
+ scheme: HiddenFromDocs[str] = Field(
18
+ default="cockroachdb+psycopg2", description="database scheme"
19
+ )
20
+ schema_pattern: AllowDenyPattern = Field(
19
21
  default=AllowDenyPattern(deny=["information_schema", "crdb_internal"])
20
22
  )
21
23
 
@@ -26,7 +28,6 @@ class CockroachDBConfig(PostgresConfig):
26
28
  @capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
27
29
  @capability(SourceCapability.DOMAINS, "Supported via the `domain` config field")
28
30
  @capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
29
- @capability(SourceCapability.DELETION_DETECTION, "Enabled via stateful ingestion")
30
31
  class CockroachDBSource(PostgresSource):
31
32
  config: CockroachDBConfig
32
33
 
@@ -6,7 +6,7 @@ from pydantic.fields import Field
6
6
  from pydruid.db.sqlalchemy import DruidDialect
7
7
  from sqlalchemy.exc import ResourceClosedError
8
8
 
9
- from datahub.configuration.common import AllowDenyPattern
9
+ from datahub.configuration.common import AllowDenyPattern, HiddenFromDocs
10
10
  from datahub.ingestion.api.decorators import (
11
11
  SourceCapability,
12
12
  SupportStatus,
@@ -34,7 +34,7 @@ DruidDialect.get_table_names = get_table_names
34
34
 
35
35
  class DruidConfig(BasicSQLAlchemyConfig):
36
36
  # defaults
37
- scheme: str = "druid"
37
+ scheme: HiddenFromDocs[str] = "druid"
38
38
  schema_pattern: AllowDenyPattern = Field(
39
39
  default=AllowDenyPattern(deny=["^(lookup|sysgit|view).*"]),
40
40
  description="regex patterns for schemas to filter in ingestion.",
@@ -27,7 +27,9 @@ class HanaConfig(BasicSQLAlchemyConfig):
27
27
  @capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
28
28
  @capability(SourceCapability.DOMAINS, "Supported via the `domain` config field")
29
29
  @capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
30
- @capability(SourceCapability.DELETION_DETECTION, "Enabled via stateful ingestion")
30
+ @capability(
31
+ SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
32
+ )
31
33
  class HanaSource(SQLAlchemySource):
32
34
  def __init__(self, config: HanaConfig, ctx: PipelineContext):
33
35
  super().__init__(config, ctx, "hana")
@@ -6,7 +6,7 @@ from enum import Enum
6
6
  from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
7
7
  from urllib.parse import urlparse
8
8
 
9
- from pydantic.class_validators import validator
9
+ from pydantic import validator
10
10
  from pydantic.fields import Field
11
11
 
12
12
  # This import verifies that the dependencies are available.
@@ -14,6 +14,7 @@ from pyhive import hive # noqa: F401
14
14
  from pyhive.sqlalchemy_hive import HiveDate, HiveDecimal, HiveDialect, HiveTimestamp
15
15
  from sqlalchemy.engine.reflection import Inspector
16
16
 
17
+ from datahub.configuration.common import HiddenFromDocs
17
18
  from datahub.emitter.mce_builder import (
18
19
  make_data_platform_urn,
19
20
  make_dataplatform_instance_urn,
@@ -651,10 +652,10 @@ HiveDialect.get_view_definition = get_view_definition_patched
651
652
 
652
653
  class HiveConfig(TwoTierSQLAlchemyConfig):
653
654
  # defaults
654
- scheme: str = Field(default="hive", hidden_from_docs=True)
655
+ scheme: HiddenFromDocs[str] = Field(default="hive")
655
656
 
656
657
  # Overriding as table location lineage is richer implementation here than with include_table_location_lineage
657
- include_table_location_lineage: bool = Field(default=False, hidden_from_docs=True)
658
+ include_table_location_lineage: HiddenFromDocs[bool] = Field(default=False)
658
659
 
659
660
  emit_storage_lineage: bool = Field(
660
661
  default=False,
@@ -1,17 +1,15 @@
1
1
  import base64
2
+ import dataclasses
2
3
  import json
3
4
  import logging
4
5
  from collections import namedtuple
5
6
  from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
6
7
 
7
- from pydantic.dataclasses import dataclass
8
- from pydantic.fields import Field
9
-
10
- # This import verifies that the dependencies are available.
8
+ from pydantic import Field
11
9
  from sqlalchemy import create_engine, text
12
10
  from sqlalchemy.engine.reflection import Inspector
13
11
 
14
- from datahub.configuration.common import AllowDenyPattern
12
+ from datahub.configuration.common import AllowDenyPattern, HiddenFromDocs
15
13
  from datahub.emitter.mce_builder import make_dataset_urn_with_platform_instance
16
14
  from datahub.emitter.mcp import MetadataChangeProposalWrapper
17
15
  from datahub.ingestion.api.common import PipelineContext
@@ -27,6 +25,7 @@ from datahub.ingestion.api.workunit import MetadataWorkUnit
27
25
  from datahub.ingestion.source.common.subtypes import (
28
26
  DatasetContainerSubTypes,
29
27
  DatasetSubTypes,
28
+ SourceCapabilityModifier,
30
29
  )
31
30
  from datahub.ingestion.source.sql.sql_common import (
32
31
  SQLAlchemySource,
@@ -52,7 +51,6 @@ from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import Dataset
52
51
  from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent
53
52
  from datahub.metadata.com.linkedin.pegasus2avro.schema import SchemaField
54
53
  from datahub.metadata.schema_classes import (
55
- ChangeTypeClass,
56
54
  DatasetPropertiesClass,
57
55
  SubTypesClass,
58
56
  ViewPropertiesClass,
@@ -73,7 +71,7 @@ class HiveMetastoreConfigMode(StrEnum):
73
71
  trino = "trino"
74
72
 
75
73
 
76
- @dataclass
74
+ @dataclasses.dataclass
77
75
  class ViewDataset:
78
76
  dataset_name: str
79
77
  schema_name: str
@@ -99,7 +97,7 @@ class HiveMetastore(BasicSQLAlchemyConfig):
99
97
  default="localhost:3306",
100
98
  description="Host URL and port to connect to. Example: localhost:3306",
101
99
  )
102
- scheme: str = Field(default="mysql+pymysql", description="", hidden_from_docs=True)
100
+ scheme: HiddenFromDocs[str] = Field(default="mysql+pymysql")
103
101
 
104
102
  database_pattern: AllowDenyPattern = Field(
105
103
  default=AllowDenyPattern.allow_all(),
@@ -123,8 +121,8 @@ class HiveMetastore(BasicSQLAlchemyConfig):
123
121
  description="Dataset Subtype name to be 'Table' or 'View' Valid options: ['True', 'False']",
124
122
  )
125
123
 
126
- include_view_lineage: bool = Field(
127
- default=False, description="", hidden_from_docs=True
124
+ include_view_lineage: HiddenFromDocs[bool] = Field(
125
+ default=False,
128
126
  )
129
127
 
130
128
  include_catalog_name_in_ids: bool = Field(
@@ -161,12 +159,22 @@ class HiveMetastore(BasicSQLAlchemyConfig):
161
159
  @platform_name("Hive Metastore")
162
160
  @config_class(HiveMetastore)
163
161
  @support_status(SupportStatus.CERTIFIED)
164
- @capability(SourceCapability.DELETION_DETECTION, "Enabled via stateful ingestion")
162
+ @capability(
163
+ SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
164
+ )
165
165
  @capability(SourceCapability.DATA_PROFILING, "Not Supported", False)
166
166
  @capability(SourceCapability.CLASSIFICATION, "Not Supported", False)
167
167
  @capability(
168
168
  SourceCapability.LINEAGE_COARSE, "View lineage is not supported", supported=False
169
169
  )
170
+ @capability(
171
+ SourceCapability.CONTAINERS,
172
+ "Enabled by default",
173
+ subtype_modifier=[
174
+ SourceCapabilityModifier.CATALOG,
175
+ SourceCapabilityModifier.SCHEMA,
176
+ ],
177
+ )
170
178
  class HiveMetastoreSource(SQLAlchemySource):
171
179
  """
172
180
  This plugin extracts the following:
@@ -599,10 +607,7 @@ class HiveMetastoreSource(SQLAlchemySource):
599
607
  yield dpi_aspect
600
608
 
601
609
  yield MetadataChangeProposalWrapper(
602
- entityType="dataset",
603
- changeType=ChangeTypeClass.UPSERT,
604
610
  entityUrn=dataset_urn,
605
- aspectName="subTypes",
606
611
  aspect=SubTypesClass(typeNames=[self.table_subtype]),
607
612
  ).as_workunit()
608
613
 
@@ -808,10 +813,7 @@ class HiveMetastoreSource(SQLAlchemySource):
808
813
 
809
814
  # Add views subtype
810
815
  yield MetadataChangeProposalWrapper(
811
- entityType="dataset",
812
- changeType=ChangeTypeClass.UPSERT,
813
816
  entityUrn=dataset_urn,
814
- aspectName="subTypes",
815
817
  aspect=SubTypesClass(typeNames=[self.view_subtype]),
816
818
  ).as_workunit()
817
819
 
@@ -822,10 +824,7 @@ class HiveMetastoreSource(SQLAlchemySource):
822
824
  viewLogic=dataset.view_definition if dataset.view_definition else "",
823
825
  )
824
826
  yield MetadataChangeProposalWrapper(
825
- entityType="dataset",
826
- changeType=ChangeTypeClass.UPSERT,
827
827
  entityUrn=dataset_urn,
828
- aspectName="viewProperties",
829
828
  aspect=view_properties_aspect,
830
829
  ).as_workunit()
831
830
 
@@ -15,7 +15,6 @@ from datahub.ingestion.source.sql.mysql import MySQLConfig, MySQLSource
15
15
  @capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
16
16
  @capability(SourceCapability.DOMAINS, "Supported via the `domain` config field")
17
17
  @capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
18
- @capability(SourceCapability.DELETION_DETECTION, "Enabled via stateful ingestion")
19
18
  class MariaDBSource(MySQLSource):
20
19
  def get_platform(self):
21
20
  return "mariadb"
@@ -134,7 +134,9 @@ class StoredProcedure:
134
134
 
135
135
  @property
136
136
  def escape_full_name(self) -> str:
137
- return f"[{self.db}].[{self.schema}].[{self.formatted_name}]"
137
+ return f"[{self.db}].[{self.schema}].[{self.formatted_name}]".replace(
138
+ "'", r"''"
139
+ )
138
140
 
139
141
  def to_base_procedure(self) -> BaseProcedure:
140
142
  return BaseProcedure(