acryl-datahub 1.0.0rc18__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (503) hide show
  1. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2686 -2563
  2. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +499 -392
  3. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +1 -1
  4. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +7 -1
  5. datahub/_version.py +1 -1
  6. datahub/api/circuit_breaker/operation_circuit_breaker.py +2 -2
  7. datahub/api/entities/assertion/assertion.py +1 -1
  8. datahub/api/entities/common/serialized_value.py +1 -1
  9. datahub/api/entities/corpgroup/corpgroup.py +1 -1
  10. datahub/api/entities/datacontract/datacontract.py +35 -3
  11. datahub/api/entities/datajob/dataflow.py +18 -3
  12. datahub/api/entities/datajob/datajob.py +24 -4
  13. datahub/api/entities/dataprocess/dataprocess_instance.py +4 -0
  14. datahub/api/entities/dataproduct/dataproduct.py +32 -3
  15. datahub/api/entities/dataset/dataset.py +47 -72
  16. datahub/api/entities/external/__init__.py +0 -0
  17. datahub/api/entities/external/external_entities.py +724 -0
  18. datahub/api/entities/external/external_tag.py +147 -0
  19. datahub/api/entities/external/lake_formation_external_entites.py +162 -0
  20. datahub/api/entities/external/restricted_text.py +172 -0
  21. datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
  22. datahub/api/entities/forms/forms.py +37 -37
  23. datahub/api/entities/structuredproperties/structuredproperties.py +6 -6
  24. datahub/api/graphql/assertion.py +1 -1
  25. datahub/api/graphql/base.py +8 -6
  26. datahub/api/graphql/operation.py +14 -10
  27. datahub/cli/check_cli.py +91 -9
  28. datahub/cli/cli_utils.py +63 -0
  29. datahub/cli/config_utils.py +20 -12
  30. datahub/cli/container_cli.py +5 -0
  31. datahub/cli/delete_cli.py +133 -34
  32. datahub/cli/docker_check.py +110 -14
  33. datahub/cli/docker_cli.py +155 -231
  34. datahub/cli/exists_cli.py +2 -3
  35. datahub/cli/get_cli.py +2 -3
  36. datahub/cli/graphql_cli.py +1422 -0
  37. datahub/cli/iceberg_cli.py +11 -5
  38. datahub/cli/ingest_cli.py +25 -26
  39. datahub/cli/migrate.py +12 -9
  40. datahub/cli/migration_utils.py +4 -3
  41. datahub/cli/put_cli.py +4 -6
  42. datahub/cli/quickstart_versioning.py +53 -10
  43. datahub/cli/specific/assertions_cli.py +39 -7
  44. datahub/cli/specific/datacontract_cli.py +57 -9
  45. datahub/cli/specific/dataproduct_cli.py +12 -24
  46. datahub/cli/specific/dataset_cli.py +31 -21
  47. datahub/cli/specific/forms_cli.py +2 -5
  48. datahub/cli/specific/group_cli.py +2 -3
  49. datahub/cli/specific/structuredproperties_cli.py +5 -7
  50. datahub/cli/specific/user_cli.py +174 -4
  51. datahub/cli/state_cli.py +2 -3
  52. datahub/cli/timeline_cli.py +2 -3
  53. datahub/configuration/common.py +46 -2
  54. datahub/configuration/connection_resolver.py +5 -2
  55. datahub/configuration/env_vars.py +331 -0
  56. datahub/configuration/import_resolver.py +7 -4
  57. datahub/configuration/kafka.py +21 -1
  58. datahub/configuration/pydantic_migration_helpers.py +6 -13
  59. datahub/configuration/source_common.py +4 -3
  60. datahub/configuration/validate_field_deprecation.py +5 -2
  61. datahub/configuration/validate_field_removal.py +8 -2
  62. datahub/configuration/validate_field_rename.py +6 -5
  63. datahub/configuration/validate_multiline_string.py +5 -2
  64. datahub/emitter/mce_builder.py +12 -8
  65. datahub/emitter/mcp.py +20 -5
  66. datahub/emitter/mcp_builder.py +12 -0
  67. datahub/emitter/request_helper.py +138 -15
  68. datahub/emitter/response_helper.py +111 -19
  69. datahub/emitter/rest_emitter.py +399 -163
  70. datahub/entrypoints.py +10 -5
  71. datahub/errors.py +12 -0
  72. datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +299 -2
  73. datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
  74. datahub/ingestion/api/common.py +9 -0
  75. datahub/ingestion/api/decorators.py +15 -3
  76. datahub/ingestion/api/report.py +381 -3
  77. datahub/ingestion/api/sink.py +27 -2
  78. datahub/ingestion/api/source.py +174 -62
  79. datahub/ingestion/api/source_helpers.py +41 -3
  80. datahub/ingestion/api/source_protocols.py +23 -0
  81. datahub/ingestion/autogenerated/__init__.py +0 -0
  82. datahub/ingestion/autogenerated/capability_summary.json +3652 -0
  83. datahub/ingestion/autogenerated/lineage.json +402 -0
  84. datahub/ingestion/autogenerated/lineage_helper.py +177 -0
  85. datahub/ingestion/extractor/schema_util.py +31 -5
  86. datahub/ingestion/glossary/classification_mixin.py +9 -2
  87. datahub/ingestion/graph/client.py +492 -55
  88. datahub/ingestion/graph/config.py +18 -2
  89. datahub/ingestion/graph/filters.py +96 -32
  90. datahub/ingestion/graph/links.py +55 -0
  91. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
  92. datahub/ingestion/run/pipeline.py +90 -23
  93. datahub/ingestion/run/pipeline_config.py +3 -3
  94. datahub/ingestion/sink/datahub_kafka.py +1 -0
  95. datahub/ingestion/sink/datahub_rest.py +31 -23
  96. datahub/ingestion/sink/file.py +1 -0
  97. datahub/ingestion/source/abs/config.py +1 -1
  98. datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
  99. datahub/ingestion/source/abs/source.py +15 -30
  100. datahub/ingestion/source/apply/datahub_apply.py +6 -5
  101. datahub/ingestion/source/aws/aws_common.py +185 -13
  102. datahub/ingestion/source/aws/glue.py +517 -244
  103. datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
  104. datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
  105. datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
  106. datahub/ingestion/source/aws/sagemaker_processors/models.py +4 -4
  107. datahub/ingestion/source/aws/tag_entities.py +270 -0
  108. datahub/ingestion/source/azure/azure_common.py +3 -3
  109. datahub/ingestion/source/bigquery_v2/bigquery.py +51 -7
  110. datahub/ingestion/source/bigquery_v2/bigquery_config.py +51 -81
  111. datahub/ingestion/source/bigquery_v2/bigquery_connection.py +81 -0
  112. datahub/ingestion/source/bigquery_v2/bigquery_queries.py +6 -1
  113. datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
  114. datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
  115. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
  116. datahub/ingestion/source/bigquery_v2/common.py +1 -1
  117. datahub/ingestion/source/bigquery_v2/lineage.py +1 -1
  118. datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
  119. datahub/ingestion/source/bigquery_v2/queries.py +3 -3
  120. datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
  121. datahub/ingestion/source/cassandra/cassandra.py +7 -18
  122. datahub/ingestion/source/cassandra/cassandra_api.py +36 -0
  123. datahub/ingestion/source/cassandra/cassandra_config.py +20 -0
  124. datahub/ingestion/source/cassandra/cassandra_profiling.py +26 -24
  125. datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
  126. datahub/ingestion/source/common/data_platforms.py +23 -0
  127. datahub/ingestion/source/common/gcp_credentials_config.py +9 -1
  128. datahub/ingestion/source/common/subtypes.py +73 -1
  129. datahub/ingestion/source/data_lake_common/data_lake_utils.py +59 -10
  130. datahub/ingestion/source/data_lake_common/object_store.py +732 -0
  131. datahub/ingestion/source/data_lake_common/path_spec.py +87 -38
  132. datahub/ingestion/source/datahub/config.py +19 -5
  133. datahub/ingestion/source/datahub/datahub_database_reader.py +205 -36
  134. datahub/ingestion/source/datahub/datahub_source.py +11 -1
  135. datahub/ingestion/source/dbt/dbt_cloud.py +17 -10
  136. datahub/ingestion/source/dbt/dbt_common.py +270 -26
  137. datahub/ingestion/source/dbt/dbt_core.py +88 -47
  138. datahub/ingestion/source/dbt/dbt_tests.py +8 -6
  139. datahub/ingestion/source/debug/__init__.py +0 -0
  140. datahub/ingestion/source/debug/datahub_debug.py +300 -0
  141. datahub/ingestion/source/delta_lake/config.py +9 -5
  142. datahub/ingestion/source/delta_lake/source.py +8 -0
  143. datahub/ingestion/source/dremio/dremio_api.py +114 -73
  144. datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
  145. datahub/ingestion/source/dremio/dremio_config.py +5 -4
  146. datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +1 -1
  147. datahub/ingestion/source/dremio/dremio_entities.py +6 -5
  148. datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
  149. datahub/ingestion/source/dremio/dremio_source.py +228 -215
  150. datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
  151. datahub/ingestion/source/dynamodb/dynamodb.py +19 -13
  152. datahub/ingestion/source/excel/__init__.py +0 -0
  153. datahub/ingestion/source/excel/config.py +92 -0
  154. datahub/ingestion/source/excel/excel_file.py +539 -0
  155. datahub/ingestion/source/excel/profiling.py +308 -0
  156. datahub/ingestion/source/excel/report.py +49 -0
  157. datahub/ingestion/source/excel/source.py +662 -0
  158. datahub/ingestion/source/excel/util.py +18 -0
  159. datahub/ingestion/source/feast.py +12 -14
  160. datahub/ingestion/source/file.py +3 -0
  161. datahub/ingestion/source/fivetran/config.py +67 -8
  162. datahub/ingestion/source/fivetran/fivetran.py +228 -43
  163. datahub/ingestion/source/fivetran/fivetran_log_api.py +42 -9
  164. datahub/ingestion/source/fivetran/fivetran_query.py +58 -36
  165. datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
  166. datahub/ingestion/source/fivetran/response_models.py +97 -0
  167. datahub/ingestion/source/gc/datahub_gc.py +0 -2
  168. datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +101 -104
  169. datahub/ingestion/source/gcs/gcs_source.py +53 -10
  170. datahub/ingestion/source/gcs/gcs_utils.py +36 -9
  171. datahub/ingestion/source/ge_data_profiler.py +146 -33
  172. datahub/ingestion/source/ge_profiling_config.py +26 -11
  173. datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
  174. datahub/ingestion/source/grafana/field_utils.py +307 -0
  175. datahub/ingestion/source/grafana/grafana_api.py +142 -0
  176. datahub/ingestion/source/grafana/grafana_config.py +104 -0
  177. datahub/ingestion/source/grafana/grafana_source.py +522 -84
  178. datahub/ingestion/source/grafana/lineage.py +202 -0
  179. datahub/ingestion/source/grafana/models.py +137 -0
  180. datahub/ingestion/source/grafana/report.py +90 -0
  181. datahub/ingestion/source/grafana/types.py +16 -0
  182. datahub/ingestion/source/hex/__init__.py +0 -0
  183. datahub/ingestion/source/hex/api.py +402 -0
  184. datahub/ingestion/source/hex/constants.py +8 -0
  185. datahub/ingestion/source/hex/hex.py +311 -0
  186. datahub/ingestion/source/hex/mapper.py +412 -0
  187. datahub/ingestion/source/hex/model.py +78 -0
  188. datahub/ingestion/source/hex/query_fetcher.py +307 -0
  189. datahub/ingestion/source/iceberg/iceberg.py +385 -164
  190. datahub/ingestion/source/iceberg/iceberg_common.py +2 -2
  191. datahub/ingestion/source/iceberg/iceberg_profiler.py +25 -20
  192. datahub/ingestion/source/identity/azure_ad.py +1 -1
  193. datahub/ingestion/source/identity/okta.py +1 -14
  194. datahub/ingestion/source/kafka/kafka.py +28 -71
  195. datahub/ingestion/source/kafka/kafka_config.py +78 -0
  196. datahub/ingestion/source/kafka_connect/common.py +2 -2
  197. datahub/ingestion/source/kafka_connect/sink_connectors.py +157 -48
  198. datahub/ingestion/source/kafka_connect/source_connectors.py +63 -5
  199. datahub/ingestion/source/ldap.py +1 -1
  200. datahub/ingestion/source/looker/looker_common.py +216 -86
  201. datahub/ingestion/source/looker/looker_config.py +15 -4
  202. datahub/ingestion/source/looker/looker_constant.py +4 -0
  203. datahub/ingestion/source/looker/looker_lib_wrapper.py +37 -4
  204. datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
  205. datahub/ingestion/source/looker/looker_source.py +539 -555
  206. datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
  207. datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
  208. datahub/ingestion/source/looker/lookml_config.py +31 -3
  209. datahub/ingestion/source/looker/lookml_refinement.py +1 -1
  210. datahub/ingestion/source/looker/lookml_source.py +103 -118
  211. datahub/ingestion/source/looker/view_upstream.py +494 -1
  212. datahub/ingestion/source/metabase.py +32 -6
  213. datahub/ingestion/source/metadata/business_glossary.py +7 -7
  214. datahub/ingestion/source/metadata/lineage.py +11 -10
  215. datahub/ingestion/source/mlflow.py +254 -23
  216. datahub/ingestion/source/mock_data/__init__.py +0 -0
  217. datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
  218. datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
  219. datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
  220. datahub/ingestion/source/mode.py +359 -181
  221. datahub/ingestion/source/mongodb.py +11 -1
  222. datahub/ingestion/source/neo4j/neo4j_source.py +122 -153
  223. datahub/ingestion/source/nifi.py +5 -5
  224. datahub/ingestion/source/openapi.py +85 -38
  225. datahub/ingestion/source/openapi_parser.py +59 -40
  226. datahub/ingestion/source/powerbi/config.py +92 -27
  227. datahub/ingestion/source/powerbi/m_query/data_classes.py +3 -0
  228. datahub/ingestion/source/powerbi/m_query/odbc.py +185 -0
  229. datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
  230. datahub/ingestion/source/powerbi/m_query/pattern_handler.py +358 -14
  231. datahub/ingestion/source/powerbi/m_query/resolver.py +10 -0
  232. datahub/ingestion/source/powerbi/powerbi.py +66 -32
  233. datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -2
  234. datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +11 -12
  235. datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
  236. datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
  237. datahub/ingestion/source/preset.py +3 -3
  238. datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
  239. datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
  240. datahub/ingestion/source/redash.py +1 -1
  241. datahub/ingestion/source/redshift/config.py +15 -9
  242. datahub/ingestion/source/redshift/datashares.py +1 -1
  243. datahub/ingestion/source/redshift/lineage.py +386 -687
  244. datahub/ingestion/source/redshift/profile.py +2 -2
  245. datahub/ingestion/source/redshift/query.py +24 -20
  246. datahub/ingestion/source/redshift/redshift.py +52 -111
  247. datahub/ingestion/source/redshift/redshift_schema.py +17 -12
  248. datahub/ingestion/source/redshift/report.py +0 -2
  249. datahub/ingestion/source/redshift/usage.py +13 -11
  250. datahub/ingestion/source/s3/report.py +4 -2
  251. datahub/ingestion/source/s3/source.py +515 -244
  252. datahub/ingestion/source/sac/sac.py +3 -1
  253. datahub/ingestion/source/salesforce.py +28 -13
  254. datahub/ingestion/source/schema/json_schema.py +14 -14
  255. datahub/ingestion/source/schema_inference/object.py +22 -6
  256. datahub/ingestion/source/sigma/config.py +75 -8
  257. datahub/ingestion/source/sigma/data_classes.py +3 -0
  258. datahub/ingestion/source/sigma/sigma.py +36 -7
  259. datahub/ingestion/source/sigma/sigma_api.py +99 -58
  260. datahub/ingestion/source/slack/slack.py +403 -140
  261. datahub/ingestion/source/snaplogic/__init__.py +0 -0
  262. datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
  263. datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
  264. datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
  265. datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
  266. datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
  267. datahub/ingestion/source/snowflake/constants.py +4 -0
  268. datahub/ingestion/source/snowflake/snowflake_config.py +103 -34
  269. datahub/ingestion/source/snowflake/snowflake_connection.py +47 -25
  270. datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +25 -6
  271. datahub/ingestion/source/snowflake/snowflake_profiler.py +1 -6
  272. datahub/ingestion/source/snowflake/snowflake_queries.py +511 -107
  273. datahub/ingestion/source/snowflake/snowflake_query.py +100 -72
  274. datahub/ingestion/source/snowflake/snowflake_report.py +4 -2
  275. datahub/ingestion/source/snowflake/snowflake_schema.py +381 -16
  276. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +163 -52
  277. datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
  278. datahub/ingestion/source/snowflake/snowflake_tag.py +4 -1
  279. datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
  280. datahub/ingestion/source/snowflake/snowflake_utils.py +62 -17
  281. datahub/ingestion/source/snowflake/snowflake_v2.py +56 -10
  282. datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
  283. datahub/ingestion/source/sql/athena.py +219 -26
  284. datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
  285. datahub/ingestion/source/sql/clickhouse.py +29 -9
  286. datahub/ingestion/source/sql/cockroachdb.py +5 -4
  287. datahub/ingestion/source/sql/druid.py +9 -4
  288. datahub/ingestion/source/sql/hana.py +3 -1
  289. datahub/ingestion/source/sql/hive.py +28 -8
  290. datahub/ingestion/source/sql/hive_metastore.py +24 -25
  291. datahub/ingestion/source/sql/mariadb.py +0 -1
  292. datahub/ingestion/source/sql/mssql/job_models.py +18 -2
  293. datahub/ingestion/source/sql/mssql/source.py +376 -62
  294. datahub/ingestion/source/sql/mysql.py +154 -4
  295. datahub/ingestion/source/sql/oracle.py +62 -11
  296. datahub/ingestion/source/sql/postgres.py +142 -6
  297. datahub/ingestion/source/sql/presto.py +20 -2
  298. datahub/ingestion/source/sql/sql_common.py +281 -49
  299. datahub/ingestion/source/sql/sql_config.py +1 -34
  300. datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
  301. datahub/ingestion/source/sql/sql_types.py +27 -2
  302. datahub/ingestion/source/sql/sqlalchemy_uri.py +68 -0
  303. datahub/ingestion/source/sql/stored_procedures/__init__.py +0 -0
  304. datahub/ingestion/source/sql/stored_procedures/base.py +253 -0
  305. datahub/ingestion/source/sql/{mssql/stored_procedure_lineage.py → stored_procedures/lineage.py} +2 -29
  306. datahub/ingestion/source/sql/teradata.py +1028 -245
  307. datahub/ingestion/source/sql/trino.py +43 -10
  308. datahub/ingestion/source/sql/two_tier_sql_source.py +3 -4
  309. datahub/ingestion/source/sql/vertica.py +14 -7
  310. datahub/ingestion/source/sql_queries.py +219 -121
  311. datahub/ingestion/source/state/checkpoint.py +8 -29
  312. datahub/ingestion/source/state/entity_removal_state.py +5 -2
  313. datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
  314. datahub/ingestion/source/state/stale_entity_removal_handler.py +0 -1
  315. datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
  316. datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +2 -1
  317. datahub/ingestion/source/superset.py +810 -126
  318. datahub/ingestion/source/tableau/tableau.py +172 -69
  319. datahub/ingestion/source/tableau/tableau_common.py +11 -4
  320. datahub/ingestion/source/tableau/tableau_constant.py +1 -4
  321. datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
  322. datahub/ingestion/source/tableau/tableau_validation.py +1 -1
  323. datahub/ingestion/source/unity/config.py +161 -40
  324. datahub/ingestion/source/unity/connection.py +61 -0
  325. datahub/ingestion/source/unity/connection_test.py +1 -0
  326. datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
  327. datahub/ingestion/source/unity/proxy.py +794 -51
  328. datahub/ingestion/source/unity/proxy_patch.py +321 -0
  329. datahub/ingestion/source/unity/proxy_types.py +36 -2
  330. datahub/ingestion/source/unity/report.py +15 -3
  331. datahub/ingestion/source/unity/source.py +465 -131
  332. datahub/ingestion/source/unity/tag_entities.py +197 -0
  333. datahub/ingestion/source/unity/usage.py +46 -4
  334. datahub/ingestion/source/usage/clickhouse_usage.py +11 -4
  335. datahub/ingestion/source/usage/starburst_trino_usage.py +10 -5
  336. datahub/ingestion/source/usage/usage_common.py +4 -68
  337. datahub/ingestion/source/vertexai/__init__.py +0 -0
  338. datahub/ingestion/source/vertexai/vertexai.py +1367 -0
  339. datahub/ingestion/source/vertexai/vertexai_config.py +29 -0
  340. datahub/ingestion/source/vertexai/vertexai_result_type_utils.py +89 -0
  341. datahub/ingestion/source_config/pulsar.py +3 -1
  342. datahub/ingestion/source_report/ingestion_stage.py +50 -11
  343. datahub/ingestion/transformer/add_dataset_dataproduct.py +1 -1
  344. datahub/ingestion/transformer/add_dataset_ownership.py +19 -3
  345. datahub/ingestion/transformer/base_transformer.py +8 -5
  346. datahub/ingestion/transformer/dataset_domain.py +1 -1
  347. datahub/ingestion/transformer/set_browse_path.py +112 -0
  348. datahub/integrations/assertion/common.py +3 -2
  349. datahub/integrations/assertion/snowflake/compiler.py +4 -3
  350. datahub/lite/lite_util.py +2 -2
  351. datahub/metadata/{_schema_classes.py → _internal_schema_classes.py} +3095 -631
  352. datahub/metadata/_urns/urn_defs.py +1866 -1582
  353. datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
  354. datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
  355. datahub/metadata/com/linkedin/pegasus2avro/dataplatform/slack/__init__.py +15 -0
  356. datahub/metadata/com/linkedin/pegasus2avro/event/__init__.py +11 -0
  357. datahub/metadata/com/linkedin/pegasus2avro/event/notification/__init__.py +15 -0
  358. datahub/metadata/com/linkedin/pegasus2avro/event/notification/settings/__init__.py +19 -0
  359. datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
  360. datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
  361. datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
  362. datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +8 -0
  363. datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
  364. datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
  365. datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
  366. datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
  367. datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
  368. datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
  369. datahub/metadata/schema.avsc +18404 -16617
  370. datahub/metadata/schema_classes.py +3 -3
  371. datahub/metadata/schemas/Actors.avsc +38 -1
  372. datahub/metadata/schemas/ApplicationKey.avsc +31 -0
  373. datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
  374. datahub/metadata/schemas/Applications.avsc +38 -0
  375. datahub/metadata/schemas/AssetSettings.avsc +63 -0
  376. datahub/metadata/schemas/ChartInfo.avsc +2 -1
  377. datahub/metadata/schemas/ChartKey.avsc +1 -0
  378. datahub/metadata/schemas/ContainerKey.avsc +1 -0
  379. datahub/metadata/schemas/ContainerProperties.avsc +8 -0
  380. datahub/metadata/schemas/CorpUserEditableInfo.avsc +15 -1
  381. datahub/metadata/schemas/CorpUserKey.avsc +2 -1
  382. datahub/metadata/schemas/CorpUserSettings.avsc +145 -0
  383. datahub/metadata/schemas/DashboardKey.avsc +1 -0
  384. datahub/metadata/schemas/DataContractKey.avsc +2 -1
  385. datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
  386. datahub/metadata/schemas/DataFlowKey.avsc +1 -0
  387. datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
  388. datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
  389. datahub/metadata/schemas/DataHubIngestionSourceKey.avsc +2 -1
  390. datahub/metadata/schemas/DataHubOpenAPISchemaKey.avsc +22 -0
  391. datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
  392. datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
  393. datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
  394. datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
  395. datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
  396. datahub/metadata/schemas/DataJobInfo.avsc +8 -0
  397. datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
  398. datahub/metadata/schemas/DataJobKey.avsc +1 -0
  399. datahub/metadata/schemas/DataProcessInstanceInput.avsc +2 -1
  400. datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -1
  401. datahub/metadata/schemas/DataProcessKey.avsc +8 -0
  402. datahub/metadata/schemas/DataProductKey.avsc +3 -1
  403. datahub/metadata/schemas/DataProductProperties.avsc +1 -1
  404. datahub/metadata/schemas/DataTransformLogic.avsc +4 -2
  405. datahub/metadata/schemas/DatasetKey.avsc +11 -1
  406. datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
  407. datahub/metadata/schemas/Deprecation.avsc +2 -0
  408. datahub/metadata/schemas/DomainKey.avsc +2 -1
  409. datahub/metadata/schemas/ExecutionRequestInput.avsc +5 -0
  410. datahub/metadata/schemas/FormInfo.avsc +5 -0
  411. datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
  412. datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
  413. datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
  414. datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
  415. datahub/metadata/schemas/IncidentInfo.avsc +3 -3
  416. datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
  417. datahub/metadata/schemas/LogicalParent.avsc +145 -0
  418. datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
  419. datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
  420. datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
  421. datahub/metadata/schemas/MLModelDeploymentProperties.avsc +3 -0
  422. datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
  423. datahub/metadata/schemas/MLModelGroupProperties.avsc +16 -0
  424. datahub/metadata/schemas/MLModelKey.avsc +9 -0
  425. datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
  426. datahub/metadata/schemas/MetadataChangeEvent.avsc +189 -47
  427. datahub/metadata/schemas/MetadataChangeLog.avsc +65 -44
  428. datahub/metadata/schemas/MetadataChangeProposal.avsc +64 -0
  429. datahub/metadata/schemas/NotebookKey.avsc +1 -0
  430. datahub/metadata/schemas/Operation.avsc +21 -2
  431. datahub/metadata/schemas/Ownership.avsc +69 -0
  432. datahub/metadata/schemas/QueryProperties.avsc +24 -2
  433. datahub/metadata/schemas/QuerySubjects.avsc +1 -12
  434. datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
  435. datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
  436. datahub/metadata/schemas/Siblings.avsc +2 -0
  437. datahub/metadata/schemas/SlackUserInfo.avsc +160 -0
  438. datahub/metadata/schemas/StructuredProperties.avsc +69 -0
  439. datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
  440. datahub/metadata/schemas/SystemMetadata.avsc +147 -0
  441. datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
  442. datahub/metadata/schemas/__init__.py +3 -3
  443. datahub/sdk/__init__.py +7 -0
  444. datahub/sdk/_all_entities.py +15 -0
  445. datahub/sdk/_shared.py +393 -10
  446. datahub/sdk/_utils.py +4 -0
  447. datahub/sdk/chart.py +386 -0
  448. datahub/sdk/container.py +7 -0
  449. datahub/sdk/dashboard.py +453 -0
  450. datahub/sdk/dataflow.py +309 -0
  451. datahub/sdk/datajob.py +367 -0
  452. datahub/sdk/dataset.py +180 -4
  453. datahub/sdk/entity.py +99 -3
  454. datahub/sdk/entity_client.py +154 -12
  455. datahub/sdk/lineage_client.py +943 -0
  456. datahub/sdk/main_client.py +83 -8
  457. datahub/sdk/mlmodel.py +383 -0
  458. datahub/sdk/mlmodelgroup.py +240 -0
  459. datahub/sdk/search_client.py +85 -8
  460. datahub/sdk/search_filters.py +393 -68
  461. datahub/secret/datahub_secret_store.py +5 -1
  462. datahub/secret/environment_secret_store.py +29 -0
  463. datahub/secret/file_secret_store.py +49 -0
  464. datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
  465. datahub/specific/aspect_helpers/siblings.py +73 -0
  466. datahub/specific/aspect_helpers/structured_properties.py +27 -0
  467. datahub/specific/chart.py +1 -1
  468. datahub/specific/datajob.py +15 -1
  469. datahub/specific/dataproduct.py +4 -0
  470. datahub/specific/dataset.py +51 -59
  471. datahub/sql_parsing/_sqlglot_patch.py +1 -2
  472. datahub/sql_parsing/fingerprint_utils.py +6 -0
  473. datahub/sql_parsing/split_statements.py +30 -3
  474. datahub/sql_parsing/sql_parsing_aggregator.py +144 -63
  475. datahub/sql_parsing/sqlglot_lineage.py +517 -44
  476. datahub/sql_parsing/sqlglot_utils.py +30 -18
  477. datahub/sql_parsing/tool_meta_extractor.py +25 -2
  478. datahub/telemetry/telemetry.py +30 -16
  479. datahub/testing/check_imports.py +1 -1
  480. datahub/testing/docker_utils.py +8 -2
  481. datahub/testing/mce_helpers.py +421 -0
  482. datahub/testing/mcp_diff.py +17 -21
  483. datahub/testing/sdk_v2_helpers.py +18 -0
  484. datahub/upgrade/upgrade.py +86 -30
  485. datahub/utilities/file_backed_collections.py +14 -15
  486. datahub/utilities/hive_schema_to_avro.py +2 -2
  487. datahub/utilities/ingest_utils.py +2 -2
  488. datahub/utilities/is_pytest.py +3 -2
  489. datahub/utilities/logging_manager.py +30 -7
  490. datahub/utilities/mapping.py +29 -2
  491. datahub/utilities/sample_data.py +5 -4
  492. datahub/utilities/server_config_util.py +298 -10
  493. datahub/utilities/sqlalchemy_query_combiner.py +6 -4
  494. datahub/utilities/stats_collections.py +4 -0
  495. datahub/utilities/threaded_iterator_executor.py +16 -3
  496. datahub/utilities/urn_encoder.py +1 -1
  497. datahub/utilities/urns/urn.py +41 -2
  498. datahub/emitter/sql_parsing_builder.py +0 -306
  499. datahub/ingestion/source/redshift/lineage_v2.py +0 -458
  500. datahub/ingestion/source/vertexai.py +0 -697
  501. datahub/ingestion/transformer/system_metadata_transformer.py +0 -45
  502. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info/licenses}/LICENSE +0 -0
  503. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
@@ -18,6 +18,7 @@ from sqlalchemy.sql import sqltypes
18
18
  from sqlalchemy.types import BOOLEAN, DATE, DATETIME, INTEGER
19
19
 
20
20
  import datahub.emitter.mce_builder as builder
21
+ from datahub.configuration.common import HiddenFromDocs, LaxStr
21
22
  from datahub.configuration.source_common import DatasetLineageProviderConfigBase
22
23
  from datahub.configuration.time_window_config import BaseTimeWindowConfig
23
24
  from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated
@@ -32,6 +33,7 @@ from datahub.ingestion.api.decorators import (
32
33
  support_status,
33
34
  )
34
35
  from datahub.ingestion.api.workunit import MetadataWorkUnit
36
+ from datahub.ingestion.source.common.subtypes import SourceCapabilityModifier
35
37
  from datahub.ingestion.source.sql.sql_common import (
36
38
  SqlWorkUnit,
37
39
  logger,
@@ -127,16 +129,20 @@ class ClickHouseConfig(
127
129
  ):
128
130
  # defaults
129
131
  host_port: str = Field(default="localhost:8123", description="ClickHouse host URL.")
130
- scheme: str = Field(default="clickhouse", description="", hidden_from_docs=True)
132
+ scheme: HiddenFromDocs[str] = Field(default="clickhouse")
131
133
  password: pydantic.SecretStr = Field(
132
134
  default=pydantic.SecretStr(""), description="password"
133
135
  )
134
- secure: Optional[bool] = Field(default=None, description="")
135
- protocol: Optional[str] = Field(default=None, description="")
136
+ secure: Optional[bool] = Field(
137
+ default=None, description="[deprecated] Use uri_opts instead."
138
+ )
139
+ protocol: Optional[str] = Field(
140
+ default=None, description="[deprecated] Use uri_opts instead."
141
+ )
136
142
  _deprecate_secure = pydantic_field_deprecated("secure")
137
143
  _deprecate_protocol = pydantic_field_deprecated("protocol")
138
144
 
139
- uri_opts: Dict[str, str] = Field(
145
+ uri_opts: Dict[str, LaxStr] = Field(
140
146
  default={},
141
147
  description="The part of the URI and it's used to provide additional configuration options or parameters for the database connection.",
142
148
  )
@@ -145,7 +151,11 @@ class ClickHouseConfig(
145
151
  )
146
152
  include_materialized_views: Optional[bool] = Field(default=True, description="")
147
153
 
148
- def get_sql_alchemy_url(self, current_db=None):
154
+ def get_sql_alchemy_url(
155
+ self,
156
+ uri_opts: Optional[Dict[str, Any]] = None,
157
+ current_db: Optional[str] = None,
158
+ ) -> str:
149
159
  url = make_url(
150
160
  super().get_sql_alchemy_url(uri_opts=self.uri_opts, current_db=current_db)
151
161
  )
@@ -180,9 +190,9 @@ class ClickHouseConfig(
180
190
  "Initializing uri_opts from deprecated secure or protocol options"
181
191
  )
182
192
  values["uri_opts"] = {}
183
- if secure:
184
- values["uri_opts"]["secure"] = secure
185
- if protocol:
193
+ if secure is not None:
194
+ values["uri_opts"]["secure"] = str(secure)
195
+ if protocol is not None:
186
196
  values["uri_opts"]["protocol"] = protocol
187
197
  logger.debug(f"uri_opts: {uri_opts}")
188
198
  elif (secure or protocol) and uri_opts:
@@ -375,8 +385,18 @@ clickhouse_datetime_format = "%Y-%m-%d %H:%M:%S"
375
385
  @platform_name("ClickHouse")
376
386
  @config_class(ClickHouseConfig)
377
387
  @support_status(SupportStatus.CERTIFIED)
378
- @capability(SourceCapability.DELETION_DETECTION, "Enabled via stateful ingestion")
388
+ @capability(
389
+ SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
390
+ )
379
391
  @capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
392
+ @capability(
393
+ SourceCapability.LINEAGE_COARSE,
394
+ "Enabled by default to get lineage for views via `include_view_lineage`",
395
+ subtype_modifier=[
396
+ SourceCapabilityModifier.VIEW,
397
+ SourceCapabilityModifier.TABLE,
398
+ ],
399
+ )
380
400
  class ClickHouseSource(TwoTierSQLAlchemySource):
381
401
  """
382
402
  This plugin extracts the following:
@@ -1,6 +1,6 @@
1
1
  from pydantic.fields import Field
2
2
 
3
- from datahub.configuration.common import AllowDenyPattern
3
+ from datahub.configuration.common import AllowDenyPattern, HiddenFromDocs
4
4
  from datahub.ingestion.api.common import PipelineContext
5
5
  from datahub.ingestion.api.decorators import (
6
6
  SourceCapability,
@@ -14,8 +14,10 @@ from datahub.ingestion.source.sql.postgres import PostgresConfig, PostgresSource
14
14
 
15
15
 
16
16
  class CockroachDBConfig(PostgresConfig):
17
- scheme = Field(default="cockroachdb+psycopg2", description="database scheme")
18
- schema_pattern = Field(
17
+ scheme: HiddenFromDocs[str] = Field(
18
+ default="cockroachdb+psycopg2", description="database scheme"
19
+ )
20
+ schema_pattern: AllowDenyPattern = Field(
19
21
  default=AllowDenyPattern(deny=["information_schema", "crdb_internal"])
20
22
  )
21
23
 
@@ -26,7 +28,6 @@ class CockroachDBConfig(PostgresConfig):
26
28
  @capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
27
29
  @capability(SourceCapability.DOMAINS, "Supported via the `domain` config field")
28
30
  @capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
29
- @capability(SourceCapability.DELETION_DETECTION, "Enabled via stateful ingestion")
30
31
  class CockroachDBSource(PostgresSource):
31
32
  config: CockroachDBConfig
32
33
 
@@ -1,10 +1,12 @@
1
1
  # This import verifies that the dependencies are available.
2
+ from typing import Any, Dict, Optional
3
+
2
4
  import pydruid # noqa: F401
3
5
  from pydantic.fields import Field
4
6
  from pydruid.db.sqlalchemy import DruidDialect
5
7
  from sqlalchemy.exc import ResourceClosedError
6
8
 
7
- from datahub.configuration.common import AllowDenyPattern
9
+ from datahub.configuration.common import AllowDenyPattern, HiddenFromDocs
8
10
  from datahub.ingestion.api.decorators import (
9
11
  SourceCapability,
10
12
  SupportStatus,
@@ -32,14 +34,17 @@ DruidDialect.get_table_names = get_table_names
32
34
 
33
35
  class DruidConfig(BasicSQLAlchemyConfig):
34
36
  # defaults
35
- scheme: str = "druid"
37
+ scheme: HiddenFromDocs[str] = "druid"
36
38
  schema_pattern: AllowDenyPattern = Field(
37
39
  default=AllowDenyPattern(deny=["^(lookup|sysgit|view).*"]),
38
40
  description="regex patterns for schemas to filter in ingestion.",
39
41
  )
40
42
 
41
- def get_sql_alchemy_url(self):
42
- return f"{super().get_sql_alchemy_url()}/druid/v2/sql/"
43
+ def get_sql_alchemy_url(
44
+ self, uri_opts: Optional[Dict[str, Any]] = None, database: Optional[str] = None
45
+ ) -> str:
46
+ base_url = super().get_sql_alchemy_url(uri_opts=uri_opts, database=database)
47
+ return f"{base_url}/druid/v2/sql/"
43
48
 
44
49
  """
45
50
  The pydruid library already formats the table name correctly, so we do not
@@ -27,7 +27,9 @@ class HanaConfig(BasicSQLAlchemyConfig):
27
27
  @capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
28
28
  @capability(SourceCapability.DOMAINS, "Supported via the `domain` config field")
29
29
  @capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
30
- @capability(SourceCapability.DELETION_DETECTION, "Enabled via stateful ingestion")
30
+ @capability(
31
+ SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
32
+ )
31
33
  class HanaSource(SQLAlchemySource):
32
34
  def __init__(self, config: HanaConfig, ctx: PipelineContext):
33
35
  super().__init__(config, ctx, "hana")
@@ -6,7 +6,7 @@ from enum import Enum
6
6
  from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
7
7
  from urllib.parse import urlparse
8
8
 
9
- from pydantic.class_validators import validator
9
+ from pydantic import validator
10
10
  from pydantic.fields import Field
11
11
 
12
12
  # This import verifies that the dependencies are available.
@@ -14,6 +14,7 @@ from pyhive import hive # noqa: F401
14
14
  from pyhive.sqlalchemy_hive import HiveDate, HiveDecimal, HiveDialect, HiveTimestamp
15
15
  from sqlalchemy.engine.reflection import Inspector
16
16
 
17
+ from datahub.configuration.common import HiddenFromDocs
17
18
  from datahub.emitter.mce_builder import (
18
19
  make_data_platform_urn,
19
20
  make_dataplatform_instance_urn,
@@ -139,7 +140,7 @@ class StoragePathParser:
139
140
  path = f"{parsed.netloc}/{parsed.path.lstrip('/')}"
140
141
 
141
142
  elif platform == StoragePlatform.AZURE:
142
- if scheme in ("abfs", "abfss"):
143
+ if scheme in ("abfs", "abfss", "wasbs"):
143
144
  # Format: abfss://container@account.dfs.core.windows.net/path
144
145
  container = parsed.netloc.split("@")[0]
145
146
  path = f"{container}/{parsed.path.lstrip('/')}"
@@ -153,7 +154,7 @@ class StoragePathParser:
153
154
 
154
155
  elif platform == StoragePlatform.DBFS:
155
156
  # For DBFS, use path as-is
156
- path = parsed.path.lstrip("/")
157
+ path = "/" + parsed.path.lstrip("/")
157
158
 
158
159
  elif platform == StoragePlatform.LOCAL:
159
160
  # For local files, use full path
@@ -169,7 +170,6 @@ class StoragePathParser:
169
170
  # Clean up the path
170
171
  path = path.rstrip("/") # Remove trailing slashes
171
172
  path = re.sub(r"/+", "/", path) # Normalize multiple slashes
172
- path = f"/{path}"
173
173
 
174
174
  return platform, path
175
175
 
@@ -637,8 +637,13 @@ def get_view_definition_patched(self, connection, view_name, schema=None, **kw):
637
637
  self.identifier_preparer.quote_identifier(schema),
638
638
  self.identifier_preparer.quote_identifier(view_name),
639
639
  )
640
- row = connection.execute(f"SHOW CREATE TABLE {full_table}").fetchone()
641
- return row[0]
640
+ # Hive responds to the SHOW CREATE TABLE with the full view DDL,
641
+ # including the view definition. However, for multiline view definitions,
642
+ # it returns multiple rows (of one column each), each with a part of the definition.
643
+ # Any whitespace at the beginning/end of each view definition line is lost.
644
+ rows = connection.execute(f"SHOW CREATE TABLE {full_table}").fetchall()
645
+ parts = [row[0] for row in rows]
646
+ return "\n".join(parts)
642
647
 
643
648
 
644
649
  HiveDialect.get_view_names = get_view_names_patched
@@ -647,10 +652,10 @@ HiveDialect.get_view_definition = get_view_definition_patched
647
652
 
648
653
  class HiveConfig(TwoTierSQLAlchemyConfig):
649
654
  # defaults
650
- scheme: str = Field(default="hive", hidden_from_docs=True)
655
+ scheme: HiddenFromDocs[str] = Field(default="hive")
651
656
 
652
657
  # Overriding as table location lineage is richer implementation here than with include_table_location_lineage
653
- include_table_location_lineage: bool = Field(default=False, hidden_from_docs=True)
658
+ include_table_location_lineage: HiddenFromDocs[bool] = Field(default=False)
654
659
 
655
660
  emit_storage_lineage: bool = Field(
656
661
  default=False,
@@ -862,3 +867,18 @@ class HiveSource(TwoTierSQLAlchemySource):
862
867
  return partition_column.get("column_names")
863
868
 
864
869
  return []
870
+
871
+ def get_table_properties(
872
+ self, inspector: Inspector, schema: str, table: str
873
+ ) -> Tuple[Optional[str], Dict[str, str], Optional[str]]:
874
+ (description, properties, location) = super().get_table_properties(
875
+ inspector, schema, table
876
+ )
877
+
878
+ new_properties = {}
879
+ for key, value in properties.items():
880
+ if key and key[-1] == ":":
881
+ new_properties[key[:-1]] = value
882
+ else:
883
+ new_properties[key] = value
884
+ return (description, new_properties, location)
@@ -1,17 +1,15 @@
1
1
  import base64
2
+ import dataclasses
2
3
  import json
3
4
  import logging
4
5
  from collections import namedtuple
5
6
  from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
6
7
 
7
- from pydantic.dataclasses import dataclass
8
- from pydantic.fields import Field
9
-
10
- # This import verifies that the dependencies are available.
8
+ from pydantic import Field
11
9
  from sqlalchemy import create_engine, text
12
10
  from sqlalchemy.engine.reflection import Inspector
13
11
 
14
- from datahub.configuration.common import AllowDenyPattern
12
+ from datahub.configuration.common import AllowDenyPattern, HiddenFromDocs
15
13
  from datahub.emitter.mce_builder import make_dataset_urn_with_platform_instance
16
14
  from datahub.emitter.mcp import MetadataChangeProposalWrapper
17
15
  from datahub.ingestion.api.common import PipelineContext
@@ -27,6 +25,7 @@ from datahub.ingestion.api.workunit import MetadataWorkUnit
27
25
  from datahub.ingestion.source.common.subtypes import (
28
26
  DatasetContainerSubTypes,
29
27
  DatasetSubTypes,
28
+ SourceCapabilityModifier,
30
29
  )
31
30
  from datahub.ingestion.source.sql.sql_common import (
32
31
  SQLAlchemySource,
@@ -36,7 +35,6 @@ from datahub.ingestion.source.sql.sql_common import (
36
35
  from datahub.ingestion.source.sql.sql_config import (
37
36
  BasicSQLAlchemyConfig,
38
37
  SQLCommonConfig,
39
- make_sqlalchemy_uri,
40
38
  )
41
39
  from datahub.ingestion.source.sql.sql_utils import (
42
40
  add_table_to_schema_container,
@@ -46,13 +44,13 @@ from datahub.ingestion.source.sql.sql_utils import (
46
44
  gen_schema_key,
47
45
  get_domain_wu,
48
46
  )
47
+ from datahub.ingestion.source.sql.sqlalchemy_uri import make_sqlalchemy_uri
49
48
  from datahub.ingestion.source.state.stateful_ingestion_base import JobId
50
49
  from datahub.metadata.com.linkedin.pegasus2avro.common import StatusClass
51
50
  from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import DatasetSnapshot
52
51
  from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent
53
52
  from datahub.metadata.com.linkedin.pegasus2avro.schema import SchemaField
54
53
  from datahub.metadata.schema_classes import (
55
- ChangeTypeClass,
56
54
  DatasetPropertiesClass,
57
55
  SubTypesClass,
58
56
  ViewPropertiesClass,
@@ -67,13 +65,13 @@ TableKey = namedtuple("TableKey", ["schema", "table"])
67
65
 
68
66
 
69
67
  class HiveMetastoreConfigMode(StrEnum):
70
- hive: str = "hive"
71
- presto: str = "presto"
72
- presto_on_hive: str = "presto-on-hive"
73
- trino: str = "trino"
68
+ hive = "hive"
69
+ presto = "presto"
70
+ presto_on_hive = "presto-on-hive"
71
+ trino = "trino"
74
72
 
75
73
 
76
- @dataclass
74
+ @dataclasses.dataclass
77
75
  class ViewDataset:
78
76
  dataset_name: str
79
77
  schema_name: str
@@ -99,7 +97,7 @@ class HiveMetastore(BasicSQLAlchemyConfig):
99
97
  default="localhost:3306",
100
98
  description="Host URL and port to connect to. Example: localhost:3306",
101
99
  )
102
- scheme: str = Field(default="mysql+pymysql", description="", hidden_from_docs=True)
100
+ scheme: HiddenFromDocs[str] = Field(default="mysql+pymysql")
103
101
 
104
102
  database_pattern: AllowDenyPattern = Field(
105
103
  default=AllowDenyPattern.allow_all(),
@@ -123,8 +121,8 @@ class HiveMetastore(BasicSQLAlchemyConfig):
123
121
  description="Dataset Subtype name to be 'Table' or 'View' Valid options: ['True', 'False']",
124
122
  )
125
123
 
126
- include_view_lineage: bool = Field(
127
- default=False, description="", hidden_from_docs=True
124
+ include_view_lineage: HiddenFromDocs[bool] = Field(
125
+ default=False,
128
126
  )
129
127
 
130
128
  include_catalog_name_in_ids: bool = Field(
@@ -161,12 +159,22 @@ class HiveMetastore(BasicSQLAlchemyConfig):
161
159
  @platform_name("Hive Metastore")
162
160
  @config_class(HiveMetastore)
163
161
  @support_status(SupportStatus.CERTIFIED)
164
- @capability(SourceCapability.DELETION_DETECTION, "Enabled via stateful ingestion")
162
+ @capability(
163
+ SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
164
+ )
165
165
  @capability(SourceCapability.DATA_PROFILING, "Not Supported", False)
166
166
  @capability(SourceCapability.CLASSIFICATION, "Not Supported", False)
167
167
  @capability(
168
168
  SourceCapability.LINEAGE_COARSE, "View lineage is not supported", supported=False
169
169
  )
170
+ @capability(
171
+ SourceCapability.CONTAINERS,
172
+ "Enabled by default",
173
+ subtype_modifier=[
174
+ SourceCapabilityModifier.CATALOG,
175
+ SourceCapabilityModifier.SCHEMA,
176
+ ],
177
+ )
170
178
  class HiveMetastoreSource(SQLAlchemySource):
171
179
  """
172
180
  This plugin extracts the following:
@@ -599,10 +607,7 @@ class HiveMetastoreSource(SQLAlchemySource):
599
607
  yield dpi_aspect
600
608
 
601
609
  yield MetadataChangeProposalWrapper(
602
- entityType="dataset",
603
- changeType=ChangeTypeClass.UPSERT,
604
610
  entityUrn=dataset_urn,
605
- aspectName="subTypes",
606
611
  aspect=SubTypesClass(typeNames=[self.table_subtype]),
607
612
  ).as_workunit()
608
613
 
@@ -808,10 +813,7 @@ class HiveMetastoreSource(SQLAlchemySource):
808
813
 
809
814
  # Add views subtype
810
815
  yield MetadataChangeProposalWrapper(
811
- entityType="dataset",
812
- changeType=ChangeTypeClass.UPSERT,
813
816
  entityUrn=dataset_urn,
814
- aspectName="subTypes",
815
817
  aspect=SubTypesClass(typeNames=[self.view_subtype]),
816
818
  ).as_workunit()
817
819
 
@@ -822,10 +824,7 @@ class HiveMetastoreSource(SQLAlchemySource):
822
824
  viewLogic=dataset.view_definition if dataset.view_definition else "",
823
825
  )
824
826
  yield MetadataChangeProposalWrapper(
825
- entityType="dataset",
826
- changeType=ChangeTypeClass.UPSERT,
827
827
  entityUrn=dataset_urn,
828
- aspectName="viewProperties",
829
828
  aspect=view_properties_aspect,
830
829
  ).as_workunit()
831
830
 
@@ -15,7 +15,6 @@ from datahub.ingestion.source.sql.mysql import MySQLConfig, MySQLSource
15
15
  @capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
16
16
  @capability(SourceCapability.DOMAINS, "Supported via the `domain` config field")
17
17
  @capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
18
- @capability(SourceCapability.DELETION_DETECTION, "Enabled via stateful ingestion")
19
18
  class MariaDBSource(MySQLSource):
20
19
  def get_platform(self):
21
20
  return "mariadb"
@@ -15,6 +15,7 @@ from datahub.ingestion.source.common.subtypes import (
15
15
  FlowContainerSubTypes,
16
16
  JobContainerSubTypes,
17
17
  )
18
+ from datahub.ingestion.source.sql.stored_procedures.base import BaseProcedure
18
19
  from datahub.metadata.schema_classes import (
19
20
  ContainerClass,
20
21
  DataFlowInfoClass,
@@ -133,7 +134,22 @@ class StoredProcedure:
133
134
 
134
135
  @property
135
136
  def escape_full_name(self) -> str:
136
- return f"[{self.db}].[{self.schema}].[{self.formatted_name}]"
137
+ return f"[{self.db}].[{self.schema}].[{self.formatted_name}]".replace(
138
+ "'", r"''"
139
+ )
140
+
141
+ def to_base_procedure(self) -> BaseProcedure:
142
+ return BaseProcedure(
143
+ name=self.formatted_name,
144
+ procedure_definition=self.code,
145
+ created=None,
146
+ last_altered=None,
147
+ comment=None,
148
+ argument_signature=None,
149
+ return_type=None,
150
+ language="SQL",
151
+ extra_properties=None,
152
+ )
137
153
 
138
154
 
139
155
  @dataclass
@@ -222,7 +238,7 @@ class MSSQLDataJob:
222
238
  type = (
223
239
  JobContainerSubTypes.MSSQL_JOBSTEP
224
240
  if isinstance(self.entity, JobStep)
225
- else JobContainerSubTypes.MSSQL_STORED_PROCEDURE
241
+ else JobContainerSubTypes.STORED_PROCEDURE
226
242
  )
227
243
  return SubTypesClass(
228
244
  typeNames=[type],