acryl-datahub 1.1.1rc4__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (414) hide show
  1. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2615 -2547
  2. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +412 -338
  3. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +5 -0
  4. datahub/_version.py +1 -1
  5. datahub/api/entities/assertion/assertion.py +1 -1
  6. datahub/api/entities/common/serialized_value.py +1 -1
  7. datahub/api/entities/corpgroup/corpgroup.py +1 -1
  8. datahub/api/entities/dataproduct/dataproduct.py +32 -3
  9. datahub/api/entities/dataset/dataset.py +26 -23
  10. datahub/api/entities/external/__init__.py +0 -0
  11. datahub/api/entities/external/external_entities.py +724 -0
  12. datahub/api/entities/external/external_tag.py +147 -0
  13. datahub/api/entities/external/lake_formation_external_entites.py +162 -0
  14. datahub/api/entities/external/restricted_text.py +172 -0
  15. datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
  16. datahub/api/entities/forms/forms.py +3 -3
  17. datahub/api/entities/structuredproperties/structuredproperties.py +4 -4
  18. datahub/api/graphql/operation.py +10 -6
  19. datahub/cli/check_cli.py +88 -7
  20. datahub/cli/cli_utils.py +63 -0
  21. datahub/cli/config_utils.py +18 -10
  22. datahub/cli/container_cli.py +5 -0
  23. datahub/cli/delete_cli.py +125 -27
  24. datahub/cli/docker_check.py +110 -14
  25. datahub/cli/docker_cli.py +153 -229
  26. datahub/cli/exists_cli.py +0 -2
  27. datahub/cli/get_cli.py +0 -2
  28. datahub/cli/graphql_cli.py +1422 -0
  29. datahub/cli/iceberg_cli.py +5 -0
  30. datahub/cli/ingest_cli.py +3 -15
  31. datahub/cli/migrate.py +2 -0
  32. datahub/cli/put_cli.py +1 -4
  33. datahub/cli/quickstart_versioning.py +53 -10
  34. datahub/cli/specific/assertions_cli.py +37 -6
  35. datahub/cli/specific/datacontract_cli.py +54 -7
  36. datahub/cli/specific/dataproduct_cli.py +2 -15
  37. datahub/cli/specific/dataset_cli.py +1 -8
  38. datahub/cli/specific/forms_cli.py +0 -4
  39. datahub/cli/specific/group_cli.py +0 -2
  40. datahub/cli/specific/structuredproperties_cli.py +1 -4
  41. datahub/cli/specific/user_cli.py +172 -3
  42. datahub/cli/state_cli.py +0 -2
  43. datahub/cli/timeline_cli.py +0 -2
  44. datahub/configuration/common.py +40 -1
  45. datahub/configuration/connection_resolver.py +5 -2
  46. datahub/configuration/env_vars.py +331 -0
  47. datahub/configuration/import_resolver.py +7 -4
  48. datahub/configuration/kafka.py +21 -1
  49. datahub/configuration/pydantic_migration_helpers.py +6 -13
  50. datahub/configuration/source_common.py +3 -2
  51. datahub/configuration/validate_field_deprecation.py +5 -2
  52. datahub/configuration/validate_field_removal.py +8 -2
  53. datahub/configuration/validate_field_rename.py +6 -5
  54. datahub/configuration/validate_multiline_string.py +5 -2
  55. datahub/emitter/mce_builder.py +8 -4
  56. datahub/emitter/rest_emitter.py +103 -30
  57. datahub/entrypoints.py +6 -3
  58. datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +297 -1
  59. datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
  60. datahub/ingestion/api/decorators.py +15 -3
  61. datahub/ingestion/api/report.py +381 -3
  62. datahub/ingestion/api/sink.py +27 -2
  63. datahub/ingestion/api/source.py +165 -58
  64. datahub/ingestion/api/source_protocols.py +23 -0
  65. datahub/ingestion/autogenerated/__init__.py +0 -0
  66. datahub/ingestion/autogenerated/capability_summary.json +3652 -0
  67. datahub/ingestion/autogenerated/lineage.json +402 -0
  68. datahub/ingestion/autogenerated/lineage_helper.py +177 -0
  69. datahub/ingestion/extractor/schema_util.py +13 -4
  70. datahub/ingestion/glossary/classification_mixin.py +5 -0
  71. datahub/ingestion/graph/client.py +330 -25
  72. datahub/ingestion/graph/config.py +3 -2
  73. datahub/ingestion/graph/filters.py +30 -11
  74. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
  75. datahub/ingestion/run/pipeline.py +81 -11
  76. datahub/ingestion/run/pipeline_config.py +2 -2
  77. datahub/ingestion/sink/datahub_kafka.py +1 -0
  78. datahub/ingestion/sink/datahub_rest.py +13 -5
  79. datahub/ingestion/sink/file.py +1 -0
  80. datahub/ingestion/source/abs/config.py +1 -1
  81. datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
  82. datahub/ingestion/source/abs/source.py +15 -30
  83. datahub/ingestion/source/aws/aws_common.py +185 -13
  84. datahub/ingestion/source/aws/glue.py +517 -244
  85. datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
  86. datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
  87. datahub/ingestion/source/aws/tag_entities.py +270 -0
  88. datahub/ingestion/source/azure/azure_common.py +3 -3
  89. datahub/ingestion/source/bigquery_v2/bigquery.py +67 -24
  90. datahub/ingestion/source/bigquery_v2/bigquery_config.py +47 -19
  91. datahub/ingestion/source/bigquery_v2/bigquery_connection.py +12 -1
  92. datahub/ingestion/source/bigquery_v2/bigquery_queries.py +3 -0
  93. datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
  94. datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
  95. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
  96. datahub/ingestion/source/bigquery_v2/common.py +1 -1
  97. datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
  98. datahub/ingestion/source/bigquery_v2/queries.py +3 -3
  99. datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
  100. datahub/ingestion/source/cassandra/cassandra.py +6 -8
  101. datahub/ingestion/source/cassandra/cassandra_api.py +17 -1
  102. datahub/ingestion/source/cassandra/cassandra_config.py +5 -0
  103. datahub/ingestion/source/cassandra/cassandra_profiling.py +7 -6
  104. datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
  105. datahub/ingestion/source/common/gcp_credentials_config.py +3 -1
  106. datahub/ingestion/source/common/subtypes.py +53 -0
  107. datahub/ingestion/source/data_lake_common/data_lake_utils.py +37 -0
  108. datahub/ingestion/source/data_lake_common/object_store.py +115 -27
  109. datahub/ingestion/source/data_lake_common/path_spec.py +72 -43
  110. datahub/ingestion/source/datahub/config.py +12 -9
  111. datahub/ingestion/source/datahub/datahub_database_reader.py +26 -11
  112. datahub/ingestion/source/datahub/datahub_source.py +10 -0
  113. datahub/ingestion/source/dbt/dbt_cloud.py +16 -5
  114. datahub/ingestion/source/dbt/dbt_common.py +224 -9
  115. datahub/ingestion/source/dbt/dbt_core.py +3 -0
  116. datahub/ingestion/source/debug/__init__.py +0 -0
  117. datahub/ingestion/source/debug/datahub_debug.py +300 -0
  118. datahub/ingestion/source/delta_lake/config.py +9 -5
  119. datahub/ingestion/source/delta_lake/source.py +8 -0
  120. datahub/ingestion/source/dremio/dremio_api.py +114 -73
  121. datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
  122. datahub/ingestion/source/dremio/dremio_config.py +5 -4
  123. datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
  124. datahub/ingestion/source/dremio/dremio_source.py +132 -98
  125. datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
  126. datahub/ingestion/source/dynamodb/dynamodb.py +11 -8
  127. datahub/ingestion/source/excel/__init__.py +0 -0
  128. datahub/ingestion/source/excel/config.py +92 -0
  129. datahub/ingestion/source/excel/excel_file.py +539 -0
  130. datahub/ingestion/source/excel/profiling.py +308 -0
  131. datahub/ingestion/source/excel/report.py +49 -0
  132. datahub/ingestion/source/excel/source.py +662 -0
  133. datahub/ingestion/source/excel/util.py +18 -0
  134. datahub/ingestion/source/feast.py +8 -10
  135. datahub/ingestion/source/file.py +3 -0
  136. datahub/ingestion/source/fivetran/config.py +66 -7
  137. datahub/ingestion/source/fivetran/fivetran.py +227 -43
  138. datahub/ingestion/source/fivetran/fivetran_log_api.py +37 -8
  139. datahub/ingestion/source/fivetran/fivetran_query.py +51 -29
  140. datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
  141. datahub/ingestion/source/fivetran/response_models.py +97 -0
  142. datahub/ingestion/source/gc/datahub_gc.py +0 -2
  143. datahub/ingestion/source/gcs/gcs_source.py +32 -4
  144. datahub/ingestion/source/ge_data_profiler.py +108 -31
  145. datahub/ingestion/source/ge_profiling_config.py +26 -11
  146. datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
  147. datahub/ingestion/source/grafana/field_utils.py +307 -0
  148. datahub/ingestion/source/grafana/grafana_api.py +142 -0
  149. datahub/ingestion/source/grafana/grafana_config.py +104 -0
  150. datahub/ingestion/source/grafana/grafana_source.py +522 -84
  151. datahub/ingestion/source/grafana/lineage.py +202 -0
  152. datahub/ingestion/source/grafana/models.py +137 -0
  153. datahub/ingestion/source/grafana/report.py +90 -0
  154. datahub/ingestion/source/grafana/types.py +16 -0
  155. datahub/ingestion/source/hex/api.py +28 -1
  156. datahub/ingestion/source/hex/hex.py +16 -5
  157. datahub/ingestion/source/hex/mapper.py +16 -2
  158. datahub/ingestion/source/hex/model.py +2 -0
  159. datahub/ingestion/source/hex/query_fetcher.py +1 -1
  160. datahub/ingestion/source/iceberg/iceberg.py +123 -59
  161. datahub/ingestion/source/iceberg/iceberg_profiler.py +4 -2
  162. datahub/ingestion/source/identity/azure_ad.py +1 -1
  163. datahub/ingestion/source/identity/okta.py +1 -14
  164. datahub/ingestion/source/kafka/kafka.py +16 -0
  165. datahub/ingestion/source/kafka_connect/common.py +2 -2
  166. datahub/ingestion/source/kafka_connect/sink_connectors.py +156 -47
  167. datahub/ingestion/source/kafka_connect/source_connectors.py +62 -4
  168. datahub/ingestion/source/looker/looker_common.py +148 -79
  169. datahub/ingestion/source/looker/looker_config.py +15 -4
  170. datahub/ingestion/source/looker/looker_constant.py +4 -0
  171. datahub/ingestion/source/looker/looker_lib_wrapper.py +36 -3
  172. datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
  173. datahub/ingestion/source/looker/looker_source.py +503 -547
  174. datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
  175. datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
  176. datahub/ingestion/source/looker/lookml_config.py +31 -3
  177. datahub/ingestion/source/looker/lookml_refinement.py +1 -1
  178. datahub/ingestion/source/looker/lookml_source.py +96 -117
  179. datahub/ingestion/source/looker/view_upstream.py +494 -1
  180. datahub/ingestion/source/metabase.py +32 -6
  181. datahub/ingestion/source/metadata/business_glossary.py +7 -7
  182. datahub/ingestion/source/metadata/lineage.py +9 -9
  183. datahub/ingestion/source/mlflow.py +12 -2
  184. datahub/ingestion/source/mock_data/__init__.py +0 -0
  185. datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
  186. datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
  187. datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
  188. datahub/ingestion/source/mode.py +26 -5
  189. datahub/ingestion/source/mongodb.py +11 -1
  190. datahub/ingestion/source/neo4j/neo4j_source.py +83 -144
  191. datahub/ingestion/source/nifi.py +2 -2
  192. datahub/ingestion/source/openapi.py +1 -1
  193. datahub/ingestion/source/powerbi/config.py +47 -21
  194. datahub/ingestion/source/powerbi/m_query/data_classes.py +1 -0
  195. datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
  196. datahub/ingestion/source/powerbi/m_query/pattern_handler.py +100 -10
  197. datahub/ingestion/source/powerbi/powerbi.py +10 -6
  198. datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +0 -1
  199. datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
  200. datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
  201. datahub/ingestion/source/preset.py +3 -3
  202. datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
  203. datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
  204. datahub/ingestion/source/redash.py +1 -1
  205. datahub/ingestion/source/redshift/config.py +15 -9
  206. datahub/ingestion/source/redshift/datashares.py +1 -1
  207. datahub/ingestion/source/redshift/lineage.py +386 -687
  208. datahub/ingestion/source/redshift/query.py +23 -19
  209. datahub/ingestion/source/redshift/redshift.py +52 -111
  210. datahub/ingestion/source/redshift/redshift_schema.py +17 -12
  211. datahub/ingestion/source/redshift/report.py +0 -2
  212. datahub/ingestion/source/redshift/usage.py +6 -5
  213. datahub/ingestion/source/s3/report.py +4 -2
  214. datahub/ingestion/source/s3/source.py +449 -248
  215. datahub/ingestion/source/sac/sac.py +3 -1
  216. datahub/ingestion/source/salesforce.py +28 -13
  217. datahub/ingestion/source/schema/json_schema.py +14 -14
  218. datahub/ingestion/source/schema_inference/object.py +22 -6
  219. datahub/ingestion/source/sigma/data_classes.py +3 -0
  220. datahub/ingestion/source/sigma/sigma.py +7 -1
  221. datahub/ingestion/source/slack/slack.py +10 -16
  222. datahub/ingestion/source/snaplogic/__init__.py +0 -0
  223. datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
  224. datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
  225. datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
  226. datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
  227. datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
  228. datahub/ingestion/source/snowflake/constants.py +3 -0
  229. datahub/ingestion/source/snowflake/snowflake_config.py +76 -23
  230. datahub/ingestion/source/snowflake/snowflake_connection.py +24 -8
  231. datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +19 -6
  232. datahub/ingestion/source/snowflake/snowflake_queries.py +464 -97
  233. datahub/ingestion/source/snowflake/snowflake_query.py +77 -5
  234. datahub/ingestion/source/snowflake/snowflake_report.py +1 -2
  235. datahub/ingestion/source/snowflake/snowflake_schema.py +352 -16
  236. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +51 -10
  237. datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
  238. datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
  239. datahub/ingestion/source/snowflake/snowflake_utils.py +36 -15
  240. datahub/ingestion/source/snowflake/snowflake_v2.py +39 -4
  241. datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
  242. datahub/ingestion/source/sql/athena.py +217 -25
  243. datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
  244. datahub/ingestion/source/sql/clickhouse.py +24 -8
  245. datahub/ingestion/source/sql/cockroachdb.py +5 -4
  246. datahub/ingestion/source/sql/druid.py +2 -2
  247. datahub/ingestion/source/sql/hana.py +3 -1
  248. datahub/ingestion/source/sql/hive.py +4 -3
  249. datahub/ingestion/source/sql/hive_metastore.py +19 -20
  250. datahub/ingestion/source/sql/mariadb.py +0 -1
  251. datahub/ingestion/source/sql/mssql/job_models.py +3 -1
  252. datahub/ingestion/source/sql/mssql/source.py +336 -57
  253. datahub/ingestion/source/sql/mysql.py +154 -4
  254. datahub/ingestion/source/sql/oracle.py +5 -5
  255. datahub/ingestion/source/sql/postgres.py +142 -6
  256. datahub/ingestion/source/sql/presto.py +2 -1
  257. datahub/ingestion/source/sql/sql_common.py +281 -49
  258. datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
  259. datahub/ingestion/source/sql/sql_types.py +22 -0
  260. datahub/ingestion/source/sql/sqlalchemy_uri.py +39 -7
  261. datahub/ingestion/source/sql/teradata.py +1028 -245
  262. datahub/ingestion/source/sql/trino.py +11 -1
  263. datahub/ingestion/source/sql/two_tier_sql_source.py +2 -3
  264. datahub/ingestion/source/sql/vertica.py +14 -7
  265. datahub/ingestion/source/sql_queries.py +219 -121
  266. datahub/ingestion/source/state/checkpoint.py +8 -29
  267. datahub/ingestion/source/state/entity_removal_state.py +5 -2
  268. datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
  269. datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
  270. datahub/ingestion/source/superset.py +314 -67
  271. datahub/ingestion/source/tableau/tableau.py +135 -59
  272. datahub/ingestion/source/tableau/tableau_common.py +9 -2
  273. datahub/ingestion/source/tableau/tableau_constant.py +1 -4
  274. datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
  275. datahub/ingestion/source/unity/config.py +160 -40
  276. datahub/ingestion/source/unity/connection.py +61 -0
  277. datahub/ingestion/source/unity/connection_test.py +1 -0
  278. datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
  279. datahub/ingestion/source/unity/proxy.py +794 -51
  280. datahub/ingestion/source/unity/proxy_patch.py +321 -0
  281. datahub/ingestion/source/unity/proxy_types.py +36 -2
  282. datahub/ingestion/source/unity/report.py +15 -3
  283. datahub/ingestion/source/unity/source.py +465 -131
  284. datahub/ingestion/source/unity/tag_entities.py +197 -0
  285. datahub/ingestion/source/unity/usage.py +46 -4
  286. datahub/ingestion/source/usage/clickhouse_usage.py +4 -1
  287. datahub/ingestion/source/usage/starburst_trino_usage.py +5 -2
  288. datahub/ingestion/source/usage/usage_common.py +4 -3
  289. datahub/ingestion/source/vertexai/vertexai.py +1 -1
  290. datahub/ingestion/source_config/pulsar.py +3 -1
  291. datahub/ingestion/source_report/ingestion_stage.py +50 -11
  292. datahub/ingestion/transformer/add_dataset_ownership.py +18 -2
  293. datahub/ingestion/transformer/base_transformer.py +8 -5
  294. datahub/ingestion/transformer/set_browse_path.py +112 -0
  295. datahub/integrations/assertion/snowflake/compiler.py +4 -3
  296. datahub/metadata/_internal_schema_classes.py +6806 -4871
  297. datahub/metadata/_urns/urn_defs.py +1767 -1539
  298. datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
  299. datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
  300. datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
  301. datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
  302. datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
  303. datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +6 -0
  304. datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
  305. datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
  306. datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
  307. datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
  308. datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
  309. datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
  310. datahub/metadata/schema.avsc +18395 -16979
  311. datahub/metadata/schemas/Actors.avsc +38 -1
  312. datahub/metadata/schemas/ApplicationKey.avsc +31 -0
  313. datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
  314. datahub/metadata/schemas/Applications.avsc +38 -0
  315. datahub/metadata/schemas/AssetSettings.avsc +63 -0
  316. datahub/metadata/schemas/ChartInfo.avsc +2 -1
  317. datahub/metadata/schemas/ChartKey.avsc +1 -0
  318. datahub/metadata/schemas/ContainerKey.avsc +1 -0
  319. datahub/metadata/schemas/ContainerProperties.avsc +8 -0
  320. datahub/metadata/schemas/CorpUserEditableInfo.avsc +1 -1
  321. datahub/metadata/schemas/CorpUserSettings.avsc +50 -0
  322. datahub/metadata/schemas/DashboardKey.avsc +1 -0
  323. datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
  324. datahub/metadata/schemas/DataFlowKey.avsc +1 -0
  325. datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
  326. datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
  327. datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
  328. datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
  329. datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
  330. datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
  331. datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
  332. datahub/metadata/schemas/DataJobInfo.avsc +8 -0
  333. datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
  334. datahub/metadata/schemas/DataJobKey.avsc +1 -0
  335. datahub/metadata/schemas/DataProcessKey.avsc +8 -0
  336. datahub/metadata/schemas/DataProductKey.avsc +3 -1
  337. datahub/metadata/schemas/DataProductProperties.avsc +1 -1
  338. datahub/metadata/schemas/DatasetKey.avsc +11 -1
  339. datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
  340. datahub/metadata/schemas/DomainKey.avsc +2 -1
  341. datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
  342. datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
  343. datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
  344. datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
  345. datahub/metadata/schemas/IncidentInfo.avsc +3 -3
  346. datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
  347. datahub/metadata/schemas/LogicalParent.avsc +145 -0
  348. datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
  349. datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
  350. datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
  351. datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
  352. datahub/metadata/schemas/MLModelKey.avsc +9 -0
  353. datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
  354. datahub/metadata/schemas/MetadataChangeEvent.avsc +151 -47
  355. datahub/metadata/schemas/MetadataChangeLog.avsc +62 -44
  356. datahub/metadata/schemas/MetadataChangeProposal.avsc +61 -0
  357. datahub/metadata/schemas/NotebookKey.avsc +1 -0
  358. datahub/metadata/schemas/Operation.avsc +4 -2
  359. datahub/metadata/schemas/Ownership.avsc +69 -0
  360. datahub/metadata/schemas/QuerySubjects.avsc +1 -12
  361. datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
  362. datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
  363. datahub/metadata/schemas/StructuredProperties.avsc +69 -0
  364. datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
  365. datahub/metadata/schemas/SystemMetadata.avsc +61 -0
  366. datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
  367. datahub/sdk/__init__.py +2 -0
  368. datahub/sdk/_all_entities.py +7 -0
  369. datahub/sdk/_shared.py +249 -5
  370. datahub/sdk/chart.py +386 -0
  371. datahub/sdk/container.py +7 -0
  372. datahub/sdk/dashboard.py +453 -0
  373. datahub/sdk/dataflow.py +7 -0
  374. datahub/sdk/datajob.py +45 -13
  375. datahub/sdk/dataset.py +56 -2
  376. datahub/sdk/entity_client.py +111 -9
  377. datahub/sdk/lineage_client.py +663 -82
  378. datahub/sdk/main_client.py +50 -16
  379. datahub/sdk/mlmodel.py +120 -38
  380. datahub/sdk/mlmodelgroup.py +7 -0
  381. datahub/sdk/search_client.py +7 -3
  382. datahub/sdk/search_filters.py +304 -36
  383. datahub/secret/datahub_secret_store.py +3 -0
  384. datahub/secret/environment_secret_store.py +29 -0
  385. datahub/secret/file_secret_store.py +49 -0
  386. datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
  387. datahub/specific/aspect_helpers/siblings.py +73 -0
  388. datahub/specific/aspect_helpers/structured_properties.py +27 -0
  389. datahub/specific/chart.py +1 -1
  390. datahub/specific/datajob.py +15 -1
  391. datahub/specific/dataproduct.py +4 -0
  392. datahub/specific/dataset.py +39 -59
  393. datahub/sql_parsing/split_statements.py +13 -0
  394. datahub/sql_parsing/sql_parsing_aggregator.py +70 -26
  395. datahub/sql_parsing/sqlglot_lineage.py +196 -42
  396. datahub/sql_parsing/sqlglot_utils.py +12 -4
  397. datahub/sql_parsing/tool_meta_extractor.py +1 -3
  398. datahub/telemetry/telemetry.py +28 -14
  399. datahub/testing/sdk_v2_helpers.py +7 -1
  400. datahub/upgrade/upgrade.py +73 -17
  401. datahub/utilities/file_backed_collections.py +8 -9
  402. datahub/utilities/is_pytest.py +3 -2
  403. datahub/utilities/logging_manager.py +22 -6
  404. datahub/utilities/mapping.py +29 -2
  405. datahub/utilities/sample_data.py +5 -4
  406. datahub/utilities/server_config_util.py +10 -1
  407. datahub/utilities/sqlalchemy_query_combiner.py +5 -2
  408. datahub/utilities/stats_collections.py +4 -0
  409. datahub/utilities/urns/urn.py +41 -2
  410. datahub/emitter/sql_parsing_builder.py +0 -306
  411. datahub/ingestion/source/redshift/lineage_v2.py +0 -466
  412. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +0 -0
  413. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/licenses/LICENSE +0 -0
  414. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
@@ -1,12 +1,18 @@
1
1
  # This import verifies that the dependencies are available.
2
+ import logging
3
+ from typing import TYPE_CHECKING, Any, List, Optional
2
4
 
3
5
  import pymysql # noqa: F401
4
6
  from pydantic.fields import Field
5
- from sqlalchemy import util
7
+ from sqlalchemy import create_engine, event, inspect, util
6
8
  from sqlalchemy.dialects.mysql import BIT, base
7
9
  from sqlalchemy.dialects.mysql.enumerated import SET
8
10
  from sqlalchemy.engine.reflection import Inspector
9
11
 
12
+ if TYPE_CHECKING:
13
+ from sqlalchemy.engine import Engine
14
+
15
+ from datahub.configuration.common import AllowDenyPattern, HiddenFromDocs
10
16
  from datahub.ingestion.api.decorators import (
11
17
  SourceCapability,
12
18
  SupportStatus,
@@ -15,16 +21,27 @@ from datahub.ingestion.api.decorators import (
15
21
  platform_name,
16
22
  support_status,
17
23
  )
24
+ from datahub.ingestion.source.aws.aws_common import (
25
+ AwsConnectionConfig,
26
+ RDSIAMTokenManager,
27
+ )
18
28
  from datahub.ingestion.source.sql.sql_common import (
19
29
  make_sqlalchemy_type,
20
30
  register_custom_type,
21
31
  )
22
32
  from datahub.ingestion.source.sql.sql_config import SQLAlchemyConnectionConfig
33
+ from datahub.ingestion.source.sql.sqlalchemy_uri import parse_host_port
34
+ from datahub.ingestion.source.sql.stored_procedures.base import (
35
+ BaseProcedure,
36
+ )
23
37
  from datahub.ingestion.source.sql.two_tier_sql_source import (
24
38
  TwoTierSQLAlchemyConfig,
25
39
  TwoTierSQLAlchemySource,
26
40
  )
27
41
  from datahub.metadata.schema_classes import BytesTypeClass
42
+ from datahub.utilities.str_enum import StrEnum
43
+
44
+ logger = logging.getLogger(__name__)
28
45
 
29
46
  SET.__repr__ = util.generic_repr # type:ignore
30
47
 
@@ -48,16 +65,49 @@ base.ischema_names["polygon"] = POLYGON
48
65
  base.ischema_names["decimal128"] = DECIMAL128
49
66
 
50
67
 
68
+ class MySQLAuthMode(StrEnum):
69
+ """Authentication mode for MySQL connection."""
70
+
71
+ PASSWORD = "PASSWORD"
72
+ AWS_IAM = "AWS_IAM"
73
+
74
+
51
75
  class MySQLConnectionConfig(SQLAlchemyConnectionConfig):
52
76
  # defaults
53
77
  host_port: str = Field(default="localhost:3306", description="MySQL host URL.")
54
- scheme: str = "mysql+pymysql"
78
+ scheme: HiddenFromDocs[str] = "mysql+pymysql"
79
+
80
+ # Authentication configuration
81
+ auth_mode: MySQLAuthMode = Field(
82
+ default=MySQLAuthMode.PASSWORD,
83
+ description="Authentication mode to use for the MySQL connection. "
84
+ "Options are 'PASSWORD' (default) for standard username/password authentication, "
85
+ "or 'AWS_IAM' for AWS RDS IAM authentication.",
86
+ )
87
+ aws_config: AwsConnectionConfig = Field(
88
+ default_factory=AwsConnectionConfig,
89
+ description="AWS configuration for RDS IAM authentication (only used when auth_mode is AWS_IAM). "
90
+ "Provides full control over AWS credentials, region, profiles, role assumption, retry logic, and proxy settings. "
91
+ "If not explicitly configured, boto3 will automatically use the default credential chain and region from "
92
+ "environment variables (AWS_DEFAULT_REGION, AWS_REGION), AWS config files (~/.aws/config), or IAM role metadata.",
93
+ )
55
94
 
56
95
 
57
96
  class MySQLConfig(MySQLConnectionConfig, TwoTierSQLAlchemyConfig):
58
97
  def get_identifier(self, *, schema: str, table: str) -> str:
59
98
  return f"{schema}.{table}"
60
99
 
100
+ include_stored_procedures: bool = Field(
101
+ default=True,
102
+ description="Include ingest of stored procedures.",
103
+ )
104
+
105
+ procedure_pattern: AllowDenyPattern = Field(
106
+ default=AllowDenyPattern.allow_all(),
107
+ description="Regex patterns for stored procedures to filter in ingestion."
108
+ "Specify regex to match the entire procedure name in database.schema.procedure_name format. e.g. to match all procedures starting with customer in Customer database and public schema, use the regex 'Customer.public.customer.*'",
109
+ )
110
+
61
111
 
62
112
  @platform_name("MySQL")
63
113
  @config_class(MySQLConfig)
@@ -65,7 +115,6 @@ class MySQLConfig(MySQLConnectionConfig, TwoTierSQLAlchemyConfig):
65
115
  @capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
66
116
  @capability(SourceCapability.DOMAINS, "Supported via the `domain` config field")
67
117
  @capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
68
- @capability(SourceCapability.DELETION_DETECTION, "Enabled via stateful ingestion")
69
118
  class MySQLSource(TwoTierSQLAlchemySource):
70
119
  """
71
120
  This plugin extracts the following:
@@ -75,9 +124,27 @@ class MySQLSource(TwoTierSQLAlchemySource):
75
124
  Table, row, and column statistics via optional SQL profiling
76
125
  """
77
126
 
78
- def __init__(self, config, ctx):
127
+ config: MySQLConfig
128
+
129
+ def __init__(self, config: MySQLConfig, ctx: Any):
79
130
  super().__init__(config, ctx, self.get_platform())
80
131
 
132
+ self._rds_iam_token_manager: Optional[RDSIAMTokenManager] = None
133
+ if config.auth_mode == MySQLAuthMode.AWS_IAM:
134
+ hostname, port = parse_host_port(config.host_port, default_port=3306)
135
+ if port is None:
136
+ raise ValueError("Port must be specified for RDS IAM authentication")
137
+
138
+ if not config.username:
139
+ raise ValueError("username is required for RDS IAM authentication")
140
+
141
+ self._rds_iam_token_manager = RDSIAMTokenManager(
142
+ endpoint=hostname,
143
+ username=config.username,
144
+ port=port,
145
+ aws_config=config.aws_config,
146
+ )
147
+
81
148
  def get_platform(self):
82
149
  return "mysql"
83
150
 
@@ -86,6 +153,52 @@ class MySQLSource(TwoTierSQLAlchemySource):
86
153
  config = MySQLConfig.parse_obj(config_dict)
87
154
  return cls(config, ctx)
88
155
 
156
+ def _setup_rds_iam_event_listener(
157
+ self, engine: "Engine", database_name: Optional[str] = None
158
+ ) -> None:
159
+ """Setup SQLAlchemy event listener to inject RDS IAM tokens."""
160
+ if not (
161
+ self.config.auth_mode == MySQLAuthMode.AWS_IAM
162
+ and self._rds_iam_token_manager
163
+ ):
164
+ return
165
+
166
+ def do_connect_listener(_dialect, _conn_rec, _cargs, cparams):
167
+ if not self._rds_iam_token_manager:
168
+ raise RuntimeError("RDS IAM Token Manager is not initialized")
169
+ cparams["password"] = self._rds_iam_token_manager.get_token()
170
+ # PyMySQL requires SSL to be enabled for RDS IAM authentication.
171
+ # Preserve any existing SSL configuration, otherwise enable with default settings.
172
+ # The {"ssl": True} dict is a workaround to make PyMySQL recognize that SSL
173
+ # should be enabled, since the library requires a truthy value in the ssl parameter.
174
+ # See https://pymysql.readthedocs.io/en/latest/modules/connections.html#pymysql.connections.Connection
175
+ cparams["ssl"] = cparams.get("ssl") or {"ssl": True}
176
+
177
+ event.listen(engine, "do_connect", do_connect_listener) # type: ignore[misc]
178
+
179
+ def get_inspectors(self):
180
+ url = self.config.get_sql_alchemy_url()
181
+ logger.debug(f"sql_alchemy_url={url}")
182
+
183
+ engine = create_engine(url, **self.config.options)
184
+ self._setup_rds_iam_event_listener(engine)
185
+
186
+ with engine.connect() as conn:
187
+ inspector = inspect(conn)
188
+ if self.config.database and self.config.database != "":
189
+ databases = [self.config.database]
190
+ else:
191
+ databases = inspector.get_schema_names()
192
+ for db in databases:
193
+ if self.config.database_pattern.allowed(db):
194
+ url = self.config.get_sql_alchemy_url(current_db=db)
195
+ db_engine = create_engine(url, **self.config.options)
196
+ self._setup_rds_iam_event_listener(db_engine, database_name=db)
197
+
198
+ with db_engine.connect() as conn:
199
+ inspector = inspect(conn)
200
+ yield inspector
201
+
89
202
  def add_profile_metadata(self, inspector: Inspector) -> None:
90
203
  if not self.config.is_profiling_enabled():
91
204
  return
@@ -96,3 +209,40 @@ class MySQLSource(TwoTierSQLAlchemySource):
96
209
  self.profile_metadata_info.dataset_name_to_storage_bytes[
97
210
  f"{row.TABLE_SCHEMA}.{row.TABLE_NAME}"
98
211
  ] = row.DATA_LENGTH
212
+
213
+ def get_procedures_for_schema(
214
+ self, inspector: Inspector, schema: str, db_name: str
215
+ ) -> List[BaseProcedure]:
216
+ """
217
+ Get stored procedures for a specific schema.
218
+ """
219
+ base_procedures = []
220
+ with inspector.engine.connect() as conn:
221
+ procedures = conn.execute(
222
+ """
223
+ SELECT ROUTINE_NAME AS name,
224
+ ROUTINE_DEFINITION AS definition,
225
+ EXTERNAL_LANGUAGE AS language
226
+ FROM information_schema.ROUTINES
227
+ WHERE ROUTINE_TYPE = 'PROCEDURE'
228
+ AND ROUTINE_SCHEMA = %s
229
+ """,
230
+ (schema,),
231
+ )
232
+
233
+ procedure_rows = list(procedures)
234
+ for row in procedure_rows:
235
+ base_procedures.append(
236
+ BaseProcedure(
237
+ name=row.name,
238
+ language=row.language,
239
+ argument_signature=None,
240
+ return_type=None,
241
+ procedure_definition=row.definition,
242
+ created=None,
243
+ last_altered=None,
244
+ extra_properties=None,
245
+ comment=None,
246
+ )
247
+ )
248
+ return base_procedures
@@ -37,7 +37,7 @@ from datahub.ingestion.source.sql.sql_config import (
37
37
 
38
38
  logger = logging.getLogger(__name__)
39
39
 
40
- oracledb.version = "8.3.0"
40
+ oracledb.version = "8.3.0" # type: ignore[assignment]
41
41
  sys.modules["cx_Oracle"] = oracledb
42
42
 
43
43
  extra_oracle_types = {
@@ -110,10 +110,10 @@ class OracleConfig(BasicSQLAlchemyConfig):
110
110
  return v
111
111
 
112
112
  @pydantic.validator("data_dictionary_mode")
113
- def check_data_dictionary_mode(cls, values):
114
- if values not in ("ALL", "DBA"):
113
+ def check_data_dictionary_mode(cls, value):
114
+ if value not in ("ALL", "DBA"):
115
115
  raise ValueError("Specify one of data dictionary views mode: 'ALL', 'DBA'.")
116
- return values
116
+ return value
117
117
 
118
118
  @pydantic.validator("thick_mode_lib_dir", always=True)
119
119
  def check_thick_mode_lib_dir(cls, v, values):
@@ -441,7 +441,7 @@ class OracleInspectorObjectWrapper:
441
441
  "\nac.constraint_name,"
442
442
  "\nac.constraint_type,"
443
443
  "\nacc.column_name AS local_column,"
444
- "\nac.r_table_name AS remote_table,"
444
+ "\nac.table_name AS remote_table,"
445
445
  "\nrcc.column_name AS remote_column,"
446
446
  "\nac.r_owner AS remote_owner,"
447
447
  "\nacc.position AS loc_pos,"
@@ -1,6 +1,6 @@
1
1
  import logging
2
2
  from collections import defaultdict
3
- from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
3
+ from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Tuple, Union
4
4
 
5
5
  # This import verifies that the dependencies are available.
6
6
  import psycopg2 # noqa: F401
@@ -14,9 +14,12 @@ import sqlalchemy.dialects.postgresql as custom_types
14
14
  from geoalchemy2 import Geometry # noqa: F401
15
15
  from pydantic import BaseModel
16
16
  from pydantic.fields import Field
17
- from sqlalchemy import create_engine, inspect
17
+ from sqlalchemy import create_engine, event, inspect
18
18
  from sqlalchemy.engine.reflection import Inspector
19
19
 
20
+ if TYPE_CHECKING:
21
+ from sqlalchemy.engine import Engine
22
+
20
23
  from datahub.configuration.common import AllowDenyPattern
21
24
  from datahub.emitter import mce_builder
22
25
  from datahub.emitter.mcp_builder import mcps_from_mce
@@ -30,17 +33,26 @@ from datahub.ingestion.api.decorators import (
30
33
  support_status,
31
34
  )
32
35
  from datahub.ingestion.api.workunit import MetadataWorkUnit
36
+ from datahub.ingestion.source.aws.aws_common import (
37
+ AwsConnectionConfig,
38
+ RDSIAMTokenManager,
39
+ )
33
40
  from datahub.ingestion.source.sql.sql_common import (
34
41
  SQLAlchemySource,
35
42
  SqlWorkUnit,
36
43
  register_custom_type,
37
44
  )
38
45
  from datahub.ingestion.source.sql.sql_config import BasicSQLAlchemyConfig
46
+ from datahub.ingestion.source.sql.sqlalchemy_uri import parse_host_port
47
+ from datahub.ingestion.source.sql.stored_procedures.base import (
48
+ BaseProcedure,
49
+ )
39
50
  from datahub.metadata.com.linkedin.pegasus2avro.schema import (
40
51
  ArrayTypeClass,
41
52
  BytesTypeClass,
42
53
  MapTypeClass,
43
54
  )
55
+ from datahub.utilities.str_enum import StrEnum
44
56
 
45
57
  logger: logging.Logger = logging.getLogger(__name__)
46
58
 
@@ -97,12 +109,34 @@ class ViewLineageEntry(BaseModel):
97
109
  dependent_schema: str
98
110
 
99
111
 
112
+ class PostgresAuthMode(StrEnum):
113
+ """Authentication mode for PostgreSQL connection."""
114
+
115
+ PASSWORD = "PASSWORD"
116
+ AWS_IAM = "AWS_IAM"
117
+
118
+
100
119
  class BasePostgresConfig(BasicSQLAlchemyConfig):
101
120
  scheme: str = Field(default="postgresql+psycopg2", description="database scheme")
102
121
  schema_pattern: AllowDenyPattern = Field(
103
122
  default=AllowDenyPattern(deny=["information_schema"])
104
123
  )
105
124
 
125
+ # Authentication configuration
126
+ auth_mode: PostgresAuthMode = Field(
127
+ default=PostgresAuthMode.PASSWORD,
128
+ description="Authentication mode to use for the PostgreSQL connection. "
129
+ "Options are 'PASSWORD' (default) for standard username/password authentication, "
130
+ "or 'AWS_IAM' for AWS RDS IAM authentication.",
131
+ )
132
+ aws_config: AwsConnectionConfig = Field(
133
+ default_factory=AwsConnectionConfig,
134
+ description="AWS configuration for RDS IAM authentication (only used when auth_mode is AWS_IAM). "
135
+ "Provides full control over AWS credentials, region, profiles, role assumption, retry logic, and proxy settings. "
136
+ "If not explicitly configured, boto3 will automatically use the default credential chain and region from "
137
+ "environment variables (AWS_DEFAULT_REGION, AWS_REGION), AWS config files (~/.aws/config), or IAM role metadata.",
138
+ )
139
+
106
140
 
107
141
  class PostgresConfig(BasePostgresConfig):
108
142
  database_pattern: AllowDenyPattern = Field(
@@ -124,6 +158,17 @@ class PostgresConfig(BasePostgresConfig):
124
158
  ),
125
159
  )
126
160
 
161
+ include_stored_procedures: bool = Field(
162
+ default=True,
163
+ description="Include ingest of stored procedures.",
164
+ )
165
+
166
+ procedure_pattern: AllowDenyPattern = Field(
167
+ default=AllowDenyPattern.allow_all(),
168
+ description="Regex patterns for stored procedures to filter in ingestion."
169
+ "Specify regex to match the entire procedure name in database.schema.procedure_name format. e.g. to match all procedures starting with customer in Customer database and public schema, use the regex 'Customer.public.customer.*'",
170
+ )
171
+
127
172
 
128
173
  @platform_name("Postgres")
129
174
  @config_class(PostgresConfig)
@@ -131,12 +176,11 @@ class PostgresConfig(BasePostgresConfig):
131
176
  @capability(SourceCapability.DOMAINS, "Enabled by default")
132
177
  @capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
133
178
  @capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
134
- @capability(SourceCapability.LINEAGE_COARSE, "Optionally enabled via configuration")
135
179
  class PostgresSource(SQLAlchemySource):
136
180
  """
137
181
  This plugin extracts the following:
138
182
 
139
- - Metadata for databases, schemas, views, and tables
183
+ - Metadata for databases, schemas, views, tables, and stored procedures
140
184
  - Column types associated with each table
141
185
  - Also supports PostGIS extensions
142
186
  - Table, row, and column statistics via optional SQL profiling
@@ -147,6 +191,22 @@ class PostgresSource(SQLAlchemySource):
147
191
  def __init__(self, config: PostgresConfig, ctx: PipelineContext):
148
192
  super().__init__(config, ctx, self.get_platform())
149
193
 
194
+ self._rds_iam_token_manager: Optional[RDSIAMTokenManager] = None
195
+ if config.auth_mode == PostgresAuthMode.AWS_IAM:
196
+ hostname, port = parse_host_port(config.host_port, default_port=5432)
197
+ if port is None:
198
+ raise ValueError("Port must be specified for RDS IAM authentication")
199
+
200
+ if not config.username:
201
+ raise ValueError("username is required for RDS IAM authentication")
202
+
203
+ self._rds_iam_token_manager = RDSIAMTokenManager(
204
+ endpoint=hostname,
205
+ username=config.username,
206
+ port=port,
207
+ aws_config=config.aws_config,
208
+ )
209
+
150
210
  def get_platform(self):
151
211
  return "postgres"
152
212
 
@@ -155,13 +215,36 @@ class PostgresSource(SQLAlchemySource):
155
215
  config = PostgresConfig.parse_obj(config_dict)
156
216
  return cls(config, ctx)
157
217
 
218
+ def _setup_rds_iam_event_listener(
219
+ self, engine: "Engine", database_name: Optional[str] = None
220
+ ) -> None:
221
+ """Setup SQLAlchemy event listener to inject RDS IAM tokens."""
222
+ if not (
223
+ self.config.auth_mode == PostgresAuthMode.AWS_IAM
224
+ and self._rds_iam_token_manager
225
+ ):
226
+ return
227
+
228
+ def do_connect_listener(_dialect, _conn_rec, _cargs, cparams):
229
+ if not self._rds_iam_token_manager:
230
+ raise RuntimeError("RDS IAM Token Manager is not initialized")
231
+ cparams["password"] = self._rds_iam_token_manager.get_token()
232
+ if cparams.get("sslmode") not in ("require", "verify-ca", "verify-full"):
233
+ cparams["sslmode"] = "require"
234
+
235
+ event.listen(engine, "do_connect", do_connect_listener) # type: ignore[misc]
236
+
158
237
  def get_inspectors(self) -> Iterable[Inspector]:
159
238
  # Note: get_sql_alchemy_url will choose `sqlalchemy_uri` over the passed in database
160
239
  url = self.config.get_sql_alchemy_url(
161
240
  database=self.config.database or self.config.initial_database
162
241
  )
242
+
163
243
  logger.debug(f"sql_alchemy_url={url}")
244
+
164
245
  engine = create_engine(url, **self.config.options)
246
+ self._setup_rds_iam_event_listener(engine)
247
+
165
248
  with engine.connect() as conn:
166
249
  if self.config.database or self.config.sqlalchemy_uri:
167
250
  inspector = inspect(conn)
@@ -169,14 +252,21 @@ class PostgresSource(SQLAlchemySource):
169
252
  else:
170
253
  # pg_database catalog - https://www.postgresql.org/docs/current/catalog-pg-database.html
171
254
  # exclude template databases - https://www.postgresql.org/docs/current/manage-ag-templatedbs.html
255
+ # exclude rdsadmin - AWS RDS administrative database
172
256
  databases = conn.execute(
173
- "SELECT datname from pg_database where datname not in ('template0', 'template1')"
257
+ "SELECT datname from pg_database where datname not in ('template0', 'template1', 'rdsadmin')"
174
258
  )
175
259
  for db in databases:
176
260
  if not self.config.database_pattern.allowed(db["datname"]):
177
261
  continue
262
+
178
263
  url = self.config.get_sql_alchemy_url(database=db["datname"])
179
- with create_engine(url, **self.config.options).connect() as conn:
264
+ db_engine = create_engine(url, **self.config.options)
265
+ self._setup_rds_iam_event_listener(
266
+ db_engine, database_name=db["datname"]
267
+ )
268
+
269
+ with db_engine.connect() as conn:
180
270
  inspector = inspect(conn)
181
271
  yield inspector
182
272
 
@@ -292,3 +382,49 @@ class PostgresSource(SQLAlchemySource):
292
382
  ] = row.table_size
293
383
  except Exception as e:
294
384
  logger.error(f"failed to fetch profile metadata: {e}")
385
+
386
+ def get_procedures_for_schema(
387
+ self, inspector: Inspector, schema: str, db_name: str
388
+ ) -> List[BaseProcedure]:
389
+ """
390
+ Get stored procedures for a specific schema.
391
+ """
392
+ base_procedures = []
393
+ with inspector.engine.connect() as conn:
394
+ procedures = conn.execute(
395
+ """
396
+ SELECT
397
+ p.proname AS name,
398
+ l.lanname AS language,
399
+ pg_get_function_arguments(p.oid) AS arguments,
400
+ pg_get_functiondef(p.oid) AS definition,
401
+ obj_description(p.oid, 'pg_proc') AS comment
402
+ FROM
403
+ pg_proc p
404
+ JOIN
405
+ pg_namespace n ON n.oid = p.pronamespace
406
+ JOIN
407
+ pg_language l ON l.oid = p.prolang
408
+ WHERE
409
+ p.prokind = 'p'
410
+ AND n.nspname = %s;
411
+ """,
412
+ (schema,),
413
+ )
414
+
415
+ procedure_rows = list(procedures)
416
+ for row in procedure_rows:
417
+ base_procedures.append(
418
+ BaseProcedure(
419
+ name=row.name,
420
+ language=row.language,
421
+ argument_signature=row.arguments,
422
+ return_type=None,
423
+ procedure_definition=row.definition,
424
+ created=None,
425
+ last_altered=None,
426
+ comment=row.comment,
427
+ extra_properties=None,
428
+ )
429
+ )
430
+ return base_procedures
@@ -8,6 +8,7 @@ from sqlalchemy import exc, sql
8
8
  from sqlalchemy.engine import reflection
9
9
  from sqlalchemy.engine.base import Engine
10
10
 
11
+ from datahub.configuration.common import HiddenFromDocs
11
12
  from datahub.ingestion.api.common import PipelineContext
12
13
  from datahub.ingestion.api.decorators import (
13
14
  SourceCapability,
@@ -87,7 +88,7 @@ PrestoDialect._get_full_table = _get_full_table
87
88
 
88
89
  class PrestoConfig(TrinoConfig):
89
90
  # defaults
90
- scheme: str = Field(default="presto", description="", hidden_from_docs=True)
91
+ scheme: HiddenFromDocs[str] = Field(default="presto")
91
92
 
92
93
 
93
94
  @platform_name("Presto", doc_order=1)