acryl-datahub 1.0.0rc18__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (503) hide show
  1. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2686 -2563
  2. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +499 -392
  3. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +1 -1
  4. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +7 -1
  5. datahub/_version.py +1 -1
  6. datahub/api/circuit_breaker/operation_circuit_breaker.py +2 -2
  7. datahub/api/entities/assertion/assertion.py +1 -1
  8. datahub/api/entities/common/serialized_value.py +1 -1
  9. datahub/api/entities/corpgroup/corpgroup.py +1 -1
  10. datahub/api/entities/datacontract/datacontract.py +35 -3
  11. datahub/api/entities/datajob/dataflow.py +18 -3
  12. datahub/api/entities/datajob/datajob.py +24 -4
  13. datahub/api/entities/dataprocess/dataprocess_instance.py +4 -0
  14. datahub/api/entities/dataproduct/dataproduct.py +32 -3
  15. datahub/api/entities/dataset/dataset.py +47 -72
  16. datahub/api/entities/external/__init__.py +0 -0
  17. datahub/api/entities/external/external_entities.py +724 -0
  18. datahub/api/entities/external/external_tag.py +147 -0
  19. datahub/api/entities/external/lake_formation_external_entites.py +162 -0
  20. datahub/api/entities/external/restricted_text.py +172 -0
  21. datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
  22. datahub/api/entities/forms/forms.py +37 -37
  23. datahub/api/entities/structuredproperties/structuredproperties.py +6 -6
  24. datahub/api/graphql/assertion.py +1 -1
  25. datahub/api/graphql/base.py +8 -6
  26. datahub/api/graphql/operation.py +14 -10
  27. datahub/cli/check_cli.py +91 -9
  28. datahub/cli/cli_utils.py +63 -0
  29. datahub/cli/config_utils.py +20 -12
  30. datahub/cli/container_cli.py +5 -0
  31. datahub/cli/delete_cli.py +133 -34
  32. datahub/cli/docker_check.py +110 -14
  33. datahub/cli/docker_cli.py +155 -231
  34. datahub/cli/exists_cli.py +2 -3
  35. datahub/cli/get_cli.py +2 -3
  36. datahub/cli/graphql_cli.py +1422 -0
  37. datahub/cli/iceberg_cli.py +11 -5
  38. datahub/cli/ingest_cli.py +25 -26
  39. datahub/cli/migrate.py +12 -9
  40. datahub/cli/migration_utils.py +4 -3
  41. datahub/cli/put_cli.py +4 -6
  42. datahub/cli/quickstart_versioning.py +53 -10
  43. datahub/cli/specific/assertions_cli.py +39 -7
  44. datahub/cli/specific/datacontract_cli.py +57 -9
  45. datahub/cli/specific/dataproduct_cli.py +12 -24
  46. datahub/cli/specific/dataset_cli.py +31 -21
  47. datahub/cli/specific/forms_cli.py +2 -5
  48. datahub/cli/specific/group_cli.py +2 -3
  49. datahub/cli/specific/structuredproperties_cli.py +5 -7
  50. datahub/cli/specific/user_cli.py +174 -4
  51. datahub/cli/state_cli.py +2 -3
  52. datahub/cli/timeline_cli.py +2 -3
  53. datahub/configuration/common.py +46 -2
  54. datahub/configuration/connection_resolver.py +5 -2
  55. datahub/configuration/env_vars.py +331 -0
  56. datahub/configuration/import_resolver.py +7 -4
  57. datahub/configuration/kafka.py +21 -1
  58. datahub/configuration/pydantic_migration_helpers.py +6 -13
  59. datahub/configuration/source_common.py +4 -3
  60. datahub/configuration/validate_field_deprecation.py +5 -2
  61. datahub/configuration/validate_field_removal.py +8 -2
  62. datahub/configuration/validate_field_rename.py +6 -5
  63. datahub/configuration/validate_multiline_string.py +5 -2
  64. datahub/emitter/mce_builder.py +12 -8
  65. datahub/emitter/mcp.py +20 -5
  66. datahub/emitter/mcp_builder.py +12 -0
  67. datahub/emitter/request_helper.py +138 -15
  68. datahub/emitter/response_helper.py +111 -19
  69. datahub/emitter/rest_emitter.py +399 -163
  70. datahub/entrypoints.py +10 -5
  71. datahub/errors.py +12 -0
  72. datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +299 -2
  73. datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
  74. datahub/ingestion/api/common.py +9 -0
  75. datahub/ingestion/api/decorators.py +15 -3
  76. datahub/ingestion/api/report.py +381 -3
  77. datahub/ingestion/api/sink.py +27 -2
  78. datahub/ingestion/api/source.py +174 -62
  79. datahub/ingestion/api/source_helpers.py +41 -3
  80. datahub/ingestion/api/source_protocols.py +23 -0
  81. datahub/ingestion/autogenerated/__init__.py +0 -0
  82. datahub/ingestion/autogenerated/capability_summary.json +3652 -0
  83. datahub/ingestion/autogenerated/lineage.json +402 -0
  84. datahub/ingestion/autogenerated/lineage_helper.py +177 -0
  85. datahub/ingestion/extractor/schema_util.py +31 -5
  86. datahub/ingestion/glossary/classification_mixin.py +9 -2
  87. datahub/ingestion/graph/client.py +492 -55
  88. datahub/ingestion/graph/config.py +18 -2
  89. datahub/ingestion/graph/filters.py +96 -32
  90. datahub/ingestion/graph/links.py +55 -0
  91. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
  92. datahub/ingestion/run/pipeline.py +90 -23
  93. datahub/ingestion/run/pipeline_config.py +3 -3
  94. datahub/ingestion/sink/datahub_kafka.py +1 -0
  95. datahub/ingestion/sink/datahub_rest.py +31 -23
  96. datahub/ingestion/sink/file.py +1 -0
  97. datahub/ingestion/source/abs/config.py +1 -1
  98. datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
  99. datahub/ingestion/source/abs/source.py +15 -30
  100. datahub/ingestion/source/apply/datahub_apply.py +6 -5
  101. datahub/ingestion/source/aws/aws_common.py +185 -13
  102. datahub/ingestion/source/aws/glue.py +517 -244
  103. datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
  104. datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
  105. datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
  106. datahub/ingestion/source/aws/sagemaker_processors/models.py +4 -4
  107. datahub/ingestion/source/aws/tag_entities.py +270 -0
  108. datahub/ingestion/source/azure/azure_common.py +3 -3
  109. datahub/ingestion/source/bigquery_v2/bigquery.py +51 -7
  110. datahub/ingestion/source/bigquery_v2/bigquery_config.py +51 -81
  111. datahub/ingestion/source/bigquery_v2/bigquery_connection.py +81 -0
  112. datahub/ingestion/source/bigquery_v2/bigquery_queries.py +6 -1
  113. datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
  114. datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
  115. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
  116. datahub/ingestion/source/bigquery_v2/common.py +1 -1
  117. datahub/ingestion/source/bigquery_v2/lineage.py +1 -1
  118. datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
  119. datahub/ingestion/source/bigquery_v2/queries.py +3 -3
  120. datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
  121. datahub/ingestion/source/cassandra/cassandra.py +7 -18
  122. datahub/ingestion/source/cassandra/cassandra_api.py +36 -0
  123. datahub/ingestion/source/cassandra/cassandra_config.py +20 -0
  124. datahub/ingestion/source/cassandra/cassandra_profiling.py +26 -24
  125. datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
  126. datahub/ingestion/source/common/data_platforms.py +23 -0
  127. datahub/ingestion/source/common/gcp_credentials_config.py +9 -1
  128. datahub/ingestion/source/common/subtypes.py +73 -1
  129. datahub/ingestion/source/data_lake_common/data_lake_utils.py +59 -10
  130. datahub/ingestion/source/data_lake_common/object_store.py +732 -0
  131. datahub/ingestion/source/data_lake_common/path_spec.py +87 -38
  132. datahub/ingestion/source/datahub/config.py +19 -5
  133. datahub/ingestion/source/datahub/datahub_database_reader.py +205 -36
  134. datahub/ingestion/source/datahub/datahub_source.py +11 -1
  135. datahub/ingestion/source/dbt/dbt_cloud.py +17 -10
  136. datahub/ingestion/source/dbt/dbt_common.py +270 -26
  137. datahub/ingestion/source/dbt/dbt_core.py +88 -47
  138. datahub/ingestion/source/dbt/dbt_tests.py +8 -6
  139. datahub/ingestion/source/debug/__init__.py +0 -0
  140. datahub/ingestion/source/debug/datahub_debug.py +300 -0
  141. datahub/ingestion/source/delta_lake/config.py +9 -5
  142. datahub/ingestion/source/delta_lake/source.py +8 -0
  143. datahub/ingestion/source/dremio/dremio_api.py +114 -73
  144. datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
  145. datahub/ingestion/source/dremio/dremio_config.py +5 -4
  146. datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +1 -1
  147. datahub/ingestion/source/dremio/dremio_entities.py +6 -5
  148. datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
  149. datahub/ingestion/source/dremio/dremio_source.py +228 -215
  150. datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
  151. datahub/ingestion/source/dynamodb/dynamodb.py +19 -13
  152. datahub/ingestion/source/excel/__init__.py +0 -0
  153. datahub/ingestion/source/excel/config.py +92 -0
  154. datahub/ingestion/source/excel/excel_file.py +539 -0
  155. datahub/ingestion/source/excel/profiling.py +308 -0
  156. datahub/ingestion/source/excel/report.py +49 -0
  157. datahub/ingestion/source/excel/source.py +662 -0
  158. datahub/ingestion/source/excel/util.py +18 -0
  159. datahub/ingestion/source/feast.py +12 -14
  160. datahub/ingestion/source/file.py +3 -0
  161. datahub/ingestion/source/fivetran/config.py +67 -8
  162. datahub/ingestion/source/fivetran/fivetran.py +228 -43
  163. datahub/ingestion/source/fivetran/fivetran_log_api.py +42 -9
  164. datahub/ingestion/source/fivetran/fivetran_query.py +58 -36
  165. datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
  166. datahub/ingestion/source/fivetran/response_models.py +97 -0
  167. datahub/ingestion/source/gc/datahub_gc.py +0 -2
  168. datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +101 -104
  169. datahub/ingestion/source/gcs/gcs_source.py +53 -10
  170. datahub/ingestion/source/gcs/gcs_utils.py +36 -9
  171. datahub/ingestion/source/ge_data_profiler.py +146 -33
  172. datahub/ingestion/source/ge_profiling_config.py +26 -11
  173. datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
  174. datahub/ingestion/source/grafana/field_utils.py +307 -0
  175. datahub/ingestion/source/grafana/grafana_api.py +142 -0
  176. datahub/ingestion/source/grafana/grafana_config.py +104 -0
  177. datahub/ingestion/source/grafana/grafana_source.py +522 -84
  178. datahub/ingestion/source/grafana/lineage.py +202 -0
  179. datahub/ingestion/source/grafana/models.py +137 -0
  180. datahub/ingestion/source/grafana/report.py +90 -0
  181. datahub/ingestion/source/grafana/types.py +16 -0
  182. datahub/ingestion/source/hex/__init__.py +0 -0
  183. datahub/ingestion/source/hex/api.py +402 -0
  184. datahub/ingestion/source/hex/constants.py +8 -0
  185. datahub/ingestion/source/hex/hex.py +311 -0
  186. datahub/ingestion/source/hex/mapper.py +412 -0
  187. datahub/ingestion/source/hex/model.py +78 -0
  188. datahub/ingestion/source/hex/query_fetcher.py +307 -0
  189. datahub/ingestion/source/iceberg/iceberg.py +385 -164
  190. datahub/ingestion/source/iceberg/iceberg_common.py +2 -2
  191. datahub/ingestion/source/iceberg/iceberg_profiler.py +25 -20
  192. datahub/ingestion/source/identity/azure_ad.py +1 -1
  193. datahub/ingestion/source/identity/okta.py +1 -14
  194. datahub/ingestion/source/kafka/kafka.py +28 -71
  195. datahub/ingestion/source/kafka/kafka_config.py +78 -0
  196. datahub/ingestion/source/kafka_connect/common.py +2 -2
  197. datahub/ingestion/source/kafka_connect/sink_connectors.py +157 -48
  198. datahub/ingestion/source/kafka_connect/source_connectors.py +63 -5
  199. datahub/ingestion/source/ldap.py +1 -1
  200. datahub/ingestion/source/looker/looker_common.py +216 -86
  201. datahub/ingestion/source/looker/looker_config.py +15 -4
  202. datahub/ingestion/source/looker/looker_constant.py +4 -0
  203. datahub/ingestion/source/looker/looker_lib_wrapper.py +37 -4
  204. datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
  205. datahub/ingestion/source/looker/looker_source.py +539 -555
  206. datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
  207. datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
  208. datahub/ingestion/source/looker/lookml_config.py +31 -3
  209. datahub/ingestion/source/looker/lookml_refinement.py +1 -1
  210. datahub/ingestion/source/looker/lookml_source.py +103 -118
  211. datahub/ingestion/source/looker/view_upstream.py +494 -1
  212. datahub/ingestion/source/metabase.py +32 -6
  213. datahub/ingestion/source/metadata/business_glossary.py +7 -7
  214. datahub/ingestion/source/metadata/lineage.py +11 -10
  215. datahub/ingestion/source/mlflow.py +254 -23
  216. datahub/ingestion/source/mock_data/__init__.py +0 -0
  217. datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
  218. datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
  219. datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
  220. datahub/ingestion/source/mode.py +359 -181
  221. datahub/ingestion/source/mongodb.py +11 -1
  222. datahub/ingestion/source/neo4j/neo4j_source.py +122 -153
  223. datahub/ingestion/source/nifi.py +5 -5
  224. datahub/ingestion/source/openapi.py +85 -38
  225. datahub/ingestion/source/openapi_parser.py +59 -40
  226. datahub/ingestion/source/powerbi/config.py +92 -27
  227. datahub/ingestion/source/powerbi/m_query/data_classes.py +3 -0
  228. datahub/ingestion/source/powerbi/m_query/odbc.py +185 -0
  229. datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
  230. datahub/ingestion/source/powerbi/m_query/pattern_handler.py +358 -14
  231. datahub/ingestion/source/powerbi/m_query/resolver.py +10 -0
  232. datahub/ingestion/source/powerbi/powerbi.py +66 -32
  233. datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -2
  234. datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +11 -12
  235. datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
  236. datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
  237. datahub/ingestion/source/preset.py +3 -3
  238. datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
  239. datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
  240. datahub/ingestion/source/redash.py +1 -1
  241. datahub/ingestion/source/redshift/config.py +15 -9
  242. datahub/ingestion/source/redshift/datashares.py +1 -1
  243. datahub/ingestion/source/redshift/lineage.py +386 -687
  244. datahub/ingestion/source/redshift/profile.py +2 -2
  245. datahub/ingestion/source/redshift/query.py +24 -20
  246. datahub/ingestion/source/redshift/redshift.py +52 -111
  247. datahub/ingestion/source/redshift/redshift_schema.py +17 -12
  248. datahub/ingestion/source/redshift/report.py +0 -2
  249. datahub/ingestion/source/redshift/usage.py +13 -11
  250. datahub/ingestion/source/s3/report.py +4 -2
  251. datahub/ingestion/source/s3/source.py +515 -244
  252. datahub/ingestion/source/sac/sac.py +3 -1
  253. datahub/ingestion/source/salesforce.py +28 -13
  254. datahub/ingestion/source/schema/json_schema.py +14 -14
  255. datahub/ingestion/source/schema_inference/object.py +22 -6
  256. datahub/ingestion/source/sigma/config.py +75 -8
  257. datahub/ingestion/source/sigma/data_classes.py +3 -0
  258. datahub/ingestion/source/sigma/sigma.py +36 -7
  259. datahub/ingestion/source/sigma/sigma_api.py +99 -58
  260. datahub/ingestion/source/slack/slack.py +403 -140
  261. datahub/ingestion/source/snaplogic/__init__.py +0 -0
  262. datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
  263. datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
  264. datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
  265. datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
  266. datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
  267. datahub/ingestion/source/snowflake/constants.py +4 -0
  268. datahub/ingestion/source/snowflake/snowflake_config.py +103 -34
  269. datahub/ingestion/source/snowflake/snowflake_connection.py +47 -25
  270. datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +25 -6
  271. datahub/ingestion/source/snowflake/snowflake_profiler.py +1 -6
  272. datahub/ingestion/source/snowflake/snowflake_queries.py +511 -107
  273. datahub/ingestion/source/snowflake/snowflake_query.py +100 -72
  274. datahub/ingestion/source/snowflake/snowflake_report.py +4 -2
  275. datahub/ingestion/source/snowflake/snowflake_schema.py +381 -16
  276. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +163 -52
  277. datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
  278. datahub/ingestion/source/snowflake/snowflake_tag.py +4 -1
  279. datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
  280. datahub/ingestion/source/snowflake/snowflake_utils.py +62 -17
  281. datahub/ingestion/source/snowflake/snowflake_v2.py +56 -10
  282. datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
  283. datahub/ingestion/source/sql/athena.py +219 -26
  284. datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
  285. datahub/ingestion/source/sql/clickhouse.py +29 -9
  286. datahub/ingestion/source/sql/cockroachdb.py +5 -4
  287. datahub/ingestion/source/sql/druid.py +9 -4
  288. datahub/ingestion/source/sql/hana.py +3 -1
  289. datahub/ingestion/source/sql/hive.py +28 -8
  290. datahub/ingestion/source/sql/hive_metastore.py +24 -25
  291. datahub/ingestion/source/sql/mariadb.py +0 -1
  292. datahub/ingestion/source/sql/mssql/job_models.py +18 -2
  293. datahub/ingestion/source/sql/mssql/source.py +376 -62
  294. datahub/ingestion/source/sql/mysql.py +154 -4
  295. datahub/ingestion/source/sql/oracle.py +62 -11
  296. datahub/ingestion/source/sql/postgres.py +142 -6
  297. datahub/ingestion/source/sql/presto.py +20 -2
  298. datahub/ingestion/source/sql/sql_common.py +281 -49
  299. datahub/ingestion/source/sql/sql_config.py +1 -34
  300. datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
  301. datahub/ingestion/source/sql/sql_types.py +27 -2
  302. datahub/ingestion/source/sql/sqlalchemy_uri.py +68 -0
  303. datahub/ingestion/source/sql/stored_procedures/__init__.py +0 -0
  304. datahub/ingestion/source/sql/stored_procedures/base.py +253 -0
  305. datahub/ingestion/source/sql/{mssql/stored_procedure_lineage.py → stored_procedures/lineage.py} +2 -29
  306. datahub/ingestion/source/sql/teradata.py +1028 -245
  307. datahub/ingestion/source/sql/trino.py +43 -10
  308. datahub/ingestion/source/sql/two_tier_sql_source.py +3 -4
  309. datahub/ingestion/source/sql/vertica.py +14 -7
  310. datahub/ingestion/source/sql_queries.py +219 -121
  311. datahub/ingestion/source/state/checkpoint.py +8 -29
  312. datahub/ingestion/source/state/entity_removal_state.py +5 -2
  313. datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
  314. datahub/ingestion/source/state/stale_entity_removal_handler.py +0 -1
  315. datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
  316. datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +2 -1
  317. datahub/ingestion/source/superset.py +810 -126
  318. datahub/ingestion/source/tableau/tableau.py +172 -69
  319. datahub/ingestion/source/tableau/tableau_common.py +11 -4
  320. datahub/ingestion/source/tableau/tableau_constant.py +1 -4
  321. datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
  322. datahub/ingestion/source/tableau/tableau_validation.py +1 -1
  323. datahub/ingestion/source/unity/config.py +161 -40
  324. datahub/ingestion/source/unity/connection.py +61 -0
  325. datahub/ingestion/source/unity/connection_test.py +1 -0
  326. datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
  327. datahub/ingestion/source/unity/proxy.py +794 -51
  328. datahub/ingestion/source/unity/proxy_patch.py +321 -0
  329. datahub/ingestion/source/unity/proxy_types.py +36 -2
  330. datahub/ingestion/source/unity/report.py +15 -3
  331. datahub/ingestion/source/unity/source.py +465 -131
  332. datahub/ingestion/source/unity/tag_entities.py +197 -0
  333. datahub/ingestion/source/unity/usage.py +46 -4
  334. datahub/ingestion/source/usage/clickhouse_usage.py +11 -4
  335. datahub/ingestion/source/usage/starburst_trino_usage.py +10 -5
  336. datahub/ingestion/source/usage/usage_common.py +4 -68
  337. datahub/ingestion/source/vertexai/__init__.py +0 -0
  338. datahub/ingestion/source/vertexai/vertexai.py +1367 -0
  339. datahub/ingestion/source/vertexai/vertexai_config.py +29 -0
  340. datahub/ingestion/source/vertexai/vertexai_result_type_utils.py +89 -0
  341. datahub/ingestion/source_config/pulsar.py +3 -1
  342. datahub/ingestion/source_report/ingestion_stage.py +50 -11
  343. datahub/ingestion/transformer/add_dataset_dataproduct.py +1 -1
  344. datahub/ingestion/transformer/add_dataset_ownership.py +19 -3
  345. datahub/ingestion/transformer/base_transformer.py +8 -5
  346. datahub/ingestion/transformer/dataset_domain.py +1 -1
  347. datahub/ingestion/transformer/set_browse_path.py +112 -0
  348. datahub/integrations/assertion/common.py +3 -2
  349. datahub/integrations/assertion/snowflake/compiler.py +4 -3
  350. datahub/lite/lite_util.py +2 -2
  351. datahub/metadata/{_schema_classes.py → _internal_schema_classes.py} +3095 -631
  352. datahub/metadata/_urns/urn_defs.py +1866 -1582
  353. datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
  354. datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
  355. datahub/metadata/com/linkedin/pegasus2avro/dataplatform/slack/__init__.py +15 -0
  356. datahub/metadata/com/linkedin/pegasus2avro/event/__init__.py +11 -0
  357. datahub/metadata/com/linkedin/pegasus2avro/event/notification/__init__.py +15 -0
  358. datahub/metadata/com/linkedin/pegasus2avro/event/notification/settings/__init__.py +19 -0
  359. datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
  360. datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
  361. datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
  362. datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +8 -0
  363. datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
  364. datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
  365. datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
  366. datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
  367. datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
  368. datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
  369. datahub/metadata/schema.avsc +18404 -16617
  370. datahub/metadata/schema_classes.py +3 -3
  371. datahub/metadata/schemas/Actors.avsc +38 -1
  372. datahub/metadata/schemas/ApplicationKey.avsc +31 -0
  373. datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
  374. datahub/metadata/schemas/Applications.avsc +38 -0
  375. datahub/metadata/schemas/AssetSettings.avsc +63 -0
  376. datahub/metadata/schemas/ChartInfo.avsc +2 -1
  377. datahub/metadata/schemas/ChartKey.avsc +1 -0
  378. datahub/metadata/schemas/ContainerKey.avsc +1 -0
  379. datahub/metadata/schemas/ContainerProperties.avsc +8 -0
  380. datahub/metadata/schemas/CorpUserEditableInfo.avsc +15 -1
  381. datahub/metadata/schemas/CorpUserKey.avsc +2 -1
  382. datahub/metadata/schemas/CorpUserSettings.avsc +145 -0
  383. datahub/metadata/schemas/DashboardKey.avsc +1 -0
  384. datahub/metadata/schemas/DataContractKey.avsc +2 -1
  385. datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
  386. datahub/metadata/schemas/DataFlowKey.avsc +1 -0
  387. datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
  388. datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
  389. datahub/metadata/schemas/DataHubIngestionSourceKey.avsc +2 -1
  390. datahub/metadata/schemas/DataHubOpenAPISchemaKey.avsc +22 -0
  391. datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
  392. datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
  393. datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
  394. datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
  395. datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
  396. datahub/metadata/schemas/DataJobInfo.avsc +8 -0
  397. datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
  398. datahub/metadata/schemas/DataJobKey.avsc +1 -0
  399. datahub/metadata/schemas/DataProcessInstanceInput.avsc +2 -1
  400. datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -1
  401. datahub/metadata/schemas/DataProcessKey.avsc +8 -0
  402. datahub/metadata/schemas/DataProductKey.avsc +3 -1
  403. datahub/metadata/schemas/DataProductProperties.avsc +1 -1
  404. datahub/metadata/schemas/DataTransformLogic.avsc +4 -2
  405. datahub/metadata/schemas/DatasetKey.avsc +11 -1
  406. datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
  407. datahub/metadata/schemas/Deprecation.avsc +2 -0
  408. datahub/metadata/schemas/DomainKey.avsc +2 -1
  409. datahub/metadata/schemas/ExecutionRequestInput.avsc +5 -0
  410. datahub/metadata/schemas/FormInfo.avsc +5 -0
  411. datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
  412. datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
  413. datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
  414. datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
  415. datahub/metadata/schemas/IncidentInfo.avsc +3 -3
  416. datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
  417. datahub/metadata/schemas/LogicalParent.avsc +145 -0
  418. datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
  419. datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
  420. datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
  421. datahub/metadata/schemas/MLModelDeploymentProperties.avsc +3 -0
  422. datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
  423. datahub/metadata/schemas/MLModelGroupProperties.avsc +16 -0
  424. datahub/metadata/schemas/MLModelKey.avsc +9 -0
  425. datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
  426. datahub/metadata/schemas/MetadataChangeEvent.avsc +189 -47
  427. datahub/metadata/schemas/MetadataChangeLog.avsc +65 -44
  428. datahub/metadata/schemas/MetadataChangeProposal.avsc +64 -0
  429. datahub/metadata/schemas/NotebookKey.avsc +1 -0
  430. datahub/metadata/schemas/Operation.avsc +21 -2
  431. datahub/metadata/schemas/Ownership.avsc +69 -0
  432. datahub/metadata/schemas/QueryProperties.avsc +24 -2
  433. datahub/metadata/schemas/QuerySubjects.avsc +1 -12
  434. datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
  435. datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
  436. datahub/metadata/schemas/Siblings.avsc +2 -0
  437. datahub/metadata/schemas/SlackUserInfo.avsc +160 -0
  438. datahub/metadata/schemas/StructuredProperties.avsc +69 -0
  439. datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
  440. datahub/metadata/schemas/SystemMetadata.avsc +147 -0
  441. datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
  442. datahub/metadata/schemas/__init__.py +3 -3
  443. datahub/sdk/__init__.py +7 -0
  444. datahub/sdk/_all_entities.py +15 -0
  445. datahub/sdk/_shared.py +393 -10
  446. datahub/sdk/_utils.py +4 -0
  447. datahub/sdk/chart.py +386 -0
  448. datahub/sdk/container.py +7 -0
  449. datahub/sdk/dashboard.py +453 -0
  450. datahub/sdk/dataflow.py +309 -0
  451. datahub/sdk/datajob.py +367 -0
  452. datahub/sdk/dataset.py +180 -4
  453. datahub/sdk/entity.py +99 -3
  454. datahub/sdk/entity_client.py +154 -12
  455. datahub/sdk/lineage_client.py +943 -0
  456. datahub/sdk/main_client.py +83 -8
  457. datahub/sdk/mlmodel.py +383 -0
  458. datahub/sdk/mlmodelgroup.py +240 -0
  459. datahub/sdk/search_client.py +85 -8
  460. datahub/sdk/search_filters.py +393 -68
  461. datahub/secret/datahub_secret_store.py +5 -1
  462. datahub/secret/environment_secret_store.py +29 -0
  463. datahub/secret/file_secret_store.py +49 -0
  464. datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
  465. datahub/specific/aspect_helpers/siblings.py +73 -0
  466. datahub/specific/aspect_helpers/structured_properties.py +27 -0
  467. datahub/specific/chart.py +1 -1
  468. datahub/specific/datajob.py +15 -1
  469. datahub/specific/dataproduct.py +4 -0
  470. datahub/specific/dataset.py +51 -59
  471. datahub/sql_parsing/_sqlglot_patch.py +1 -2
  472. datahub/sql_parsing/fingerprint_utils.py +6 -0
  473. datahub/sql_parsing/split_statements.py +30 -3
  474. datahub/sql_parsing/sql_parsing_aggregator.py +144 -63
  475. datahub/sql_parsing/sqlglot_lineage.py +517 -44
  476. datahub/sql_parsing/sqlglot_utils.py +30 -18
  477. datahub/sql_parsing/tool_meta_extractor.py +25 -2
  478. datahub/telemetry/telemetry.py +30 -16
  479. datahub/testing/check_imports.py +1 -1
  480. datahub/testing/docker_utils.py +8 -2
  481. datahub/testing/mce_helpers.py +421 -0
  482. datahub/testing/mcp_diff.py +17 -21
  483. datahub/testing/sdk_v2_helpers.py +18 -0
  484. datahub/upgrade/upgrade.py +86 -30
  485. datahub/utilities/file_backed_collections.py +14 -15
  486. datahub/utilities/hive_schema_to_avro.py +2 -2
  487. datahub/utilities/ingest_utils.py +2 -2
  488. datahub/utilities/is_pytest.py +3 -2
  489. datahub/utilities/logging_manager.py +30 -7
  490. datahub/utilities/mapping.py +29 -2
  491. datahub/utilities/sample_data.py +5 -4
  492. datahub/utilities/server_config_util.py +298 -10
  493. datahub/utilities/sqlalchemy_query_combiner.py +6 -4
  494. datahub/utilities/stats_collections.py +4 -0
  495. datahub/utilities/threaded_iterator_executor.py +16 -3
  496. datahub/utilities/urn_encoder.py +1 -1
  497. datahub/utilities/urns/urn.py +41 -2
  498. datahub/emitter/sql_parsing_builder.py +0 -306
  499. datahub/ingestion/source/redshift/lineage_v2.py +0 -458
  500. datahub/ingestion/source/vertexai.py +0 -697
  501. datahub/ingestion/transformer/system_metadata_transformer.py +0 -45
  502. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info/licenses}/LICENSE +0 -0
  503. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
@@ -1,26 +1,28 @@
1
1
  import logging
2
- import os
3
2
  from collections import defaultdict
4
3
  from dataclasses import dataclass, field
5
4
  from datetime import datetime
6
- from typing import Callable, Dict, Iterable, List, MutableMapping, Optional
5
+ from typing import Any, Callable, Dict, Iterable, List, MutableMapping, Optional, Tuple
7
6
 
7
+ from datahub.configuration.env_vars import get_snowflake_schema_parallelism
8
8
  from datahub.ingestion.api.report import SupportsAsObj
9
9
  from datahub.ingestion.source.common.subtypes import DatasetSubTypes
10
10
  from datahub.ingestion.source.snowflake.constants import SnowflakeObjectDomain
11
11
  from datahub.ingestion.source.snowflake.snowflake_connection import SnowflakeConnection
12
12
  from datahub.ingestion.source.snowflake.snowflake_query import (
13
- SHOW_VIEWS_MAX_PAGE_SIZE,
13
+ SHOW_COMMAND_MAX_PAGE_SIZE,
14
14
  SnowflakeQuery,
15
15
  )
16
+ from datahub.ingestion.source.snowflake.snowflake_report import SnowflakeV2Report
16
17
  from datahub.ingestion.source.sql.sql_generic import BaseColumn, BaseTable, BaseView
18
+ from datahub.ingestion.source.sql.stored_procedures.base import BaseProcedure
17
19
  from datahub.utilities.file_backed_collections import FileBackedDict
18
20
  from datahub.utilities.prefix_batch_builder import PrefixGroup, build_prefix_batches
19
21
  from datahub.utilities.serialized_lru_cache import serialized_lru_cache
20
22
 
21
23
  logger: logging.Logger = logging.getLogger(__name__)
22
24
 
23
- SCHEMA_PARALLELISM = int(os.getenv("DATAHUB_SNOWFLAKE_SCHEMA_PARALLELISM", 20))
25
+ SCHEMA_PARALLELISM = get_snowflake_schema_parallelism()
24
26
 
25
27
 
26
28
  @dataclass
@@ -102,6 +104,17 @@ class SnowflakeTable(BaseTable):
102
104
  return DatasetSubTypes.TABLE
103
105
 
104
106
 
107
+ @dataclass
108
+ class SnowflakeDynamicTable(SnowflakeTable):
109
+ definition: Optional[str] = (
110
+ None # SQL query that defines the dynamic table's content
111
+ )
112
+ target_lag: Optional[str] = None # Refresh frequency (e.g., "1 HOUR", "30 MINUTES")
113
+
114
+ def get_subtype(self) -> DatasetSubTypes:
115
+ return DatasetSubTypes.DYNAMIC_TABLE
116
+
117
+
105
118
  @dataclass
106
119
  class SnowflakeView(BaseView):
107
120
  materialized: bool = False
@@ -225,10 +238,17 @@ class _SnowflakeTagCache:
225
238
 
226
239
 
227
240
  class SnowflakeDataDictionary(SupportsAsObj):
228
- def __init__(self, connection: SnowflakeConnection) -> None:
241
+ def __init__(
242
+ self,
243
+ connection: SnowflakeConnection,
244
+ report: SnowflakeV2Report,
245
+ fetch_views_from_information_schema: bool = False,
246
+ ) -> None:
229
247
  self.connection = connection
248
+ self.report = report
249
+ self._fetch_views_from_information_schema = fetch_views_from_information_schema
230
250
 
231
- def as_obj(self) -> Dict[str, Dict[str, int]]:
251
+ def as_obj(self) -> Dict[str, Any]:
232
252
  # TODO: Move this into a proper report type that gets computed.
233
253
 
234
254
  # Reports how many times we reset in-memory `functools.lru_cache` caches of data,
@@ -244,7 +264,9 @@ class SnowflakeDataDictionary(SupportsAsObj):
244
264
  self.get_fk_constraints_for_schema,
245
265
  ]
246
266
 
247
- report = {}
267
+ report: Dict[str, Any] = {
268
+ "fetch_views_from_information_schema": self._fetch_views_from_information_schema,
269
+ }
248
270
  for func in lru_cache_functions:
249
271
  report[func.__name__] = func.cache_info()._asdict() # type: ignore
250
272
  return report
@@ -354,8 +376,11 @@ class SnowflakeDataDictionary(SupportsAsObj):
354
376
  if table["TABLE_SCHEMA"] not in tables:
355
377
  tables[table["TABLE_SCHEMA"]] = []
356
378
 
379
+ is_dynamic = table.get("IS_DYNAMIC", "NO").upper() == "YES"
380
+ table_cls = SnowflakeDynamicTable if is_dynamic else SnowflakeTable
381
+
357
382
  tables[table["TABLE_SCHEMA"]].append(
358
- SnowflakeTable(
383
+ table_cls(
359
384
  name=table["TABLE_NAME"],
360
385
  type=table["TABLE_TYPE"],
361
386
  created=table["CREATED"],
@@ -364,11 +389,15 @@ class SnowflakeDataDictionary(SupportsAsObj):
364
389
  rows_count=table["ROW_COUNT"],
365
390
  comment=table["COMMENT"],
366
391
  clustering_key=table["CLUSTERING_KEY"],
367
- is_dynamic=table.get("IS_DYNAMIC", "NO").upper() == "YES",
392
+ is_dynamic=is_dynamic,
368
393
  is_iceberg=table.get("IS_ICEBERG", "NO").upper() == "YES",
369
394
  is_hybrid=table.get("IS_HYBRID", "NO").upper() == "YES",
370
395
  )
371
396
  )
397
+
398
+ # Populate dynamic table definitions
399
+ self.populate_dynamic_table_definitions(tables, db_name)
400
+
372
401
  return tables
373
402
 
374
403
  def get_tables_for_schema(
@@ -381,8 +410,11 @@ class SnowflakeDataDictionary(SupportsAsObj):
381
410
  )
382
411
 
383
412
  for table in cur:
413
+ is_dynamic = table.get("IS_DYNAMIC", "NO").upper() == "YES"
414
+ table_cls = SnowflakeDynamicTable if is_dynamic else SnowflakeTable
415
+
384
416
  tables.append(
385
- SnowflakeTable(
417
+ table_cls(
386
418
  name=table["TABLE_NAME"],
387
419
  type=table["TABLE_TYPE"],
388
420
  created=table["CREATED"],
@@ -391,16 +423,31 @@ class SnowflakeDataDictionary(SupportsAsObj):
391
423
  rows_count=table["ROW_COUNT"],
392
424
  comment=table["COMMENT"],
393
425
  clustering_key=table["CLUSTERING_KEY"],
394
- is_dynamic=table.get("IS_DYNAMIC", "NO").upper() == "YES",
426
+ is_dynamic=is_dynamic,
395
427
  is_iceberg=table.get("IS_ICEBERG", "NO").upper() == "YES",
396
428
  is_hybrid=table.get("IS_HYBRID", "NO").upper() == "YES",
397
429
  )
398
430
  )
431
+
432
+ # Populate dynamic table definitions for just this schema
433
+ schema_tables = {schema_name: tables}
434
+ self.populate_dynamic_table_definitions(schema_tables, db_name)
435
+
399
436
  return tables
400
437
 
401
438
  @serialized_lru_cache(maxsize=1)
402
- def get_views_for_database(self, db_name: str) -> Dict[str, List[SnowflakeView]]:
403
- page_limit = SHOW_VIEWS_MAX_PAGE_SIZE
439
+ def get_views_for_database(
440
+ self, db_name: str
441
+ ) -> Optional[Dict[str, List[SnowflakeView]]]:
442
+ if self._fetch_views_from_information_schema:
443
+ return self._get_views_for_database_using_information_schema(db_name)
444
+ else:
445
+ return self._get_views_for_database_using_show(db_name)
446
+
447
+ def _get_views_for_database_using_show(
448
+ self, db_name: str
449
+ ) -> Dict[str, List[SnowflakeView]]:
450
+ page_limit = SHOW_COMMAND_MAX_PAGE_SIZE
404
451
 
405
452
  views: Dict[str, List[SnowflakeView]] = {}
406
453
 
@@ -430,10 +477,9 @@ class SnowflakeDataDictionary(SupportsAsObj):
430
477
  SnowflakeView(
431
478
  name=view_name,
432
479
  created=view["created_on"],
433
- # last_altered=table["last_altered"],
434
480
  comment=view["comment"],
435
481
  view_definition=view["text"],
436
- last_altered=view["created_on"],
482
+ last_altered=view["created_on"], # TODO: This is not correct.
437
483
  materialized=(
438
484
  view.get("is_materialized", "false").lower() == "true"
439
485
  ),
@@ -448,6 +494,163 @@ class SnowflakeDataDictionary(SupportsAsObj):
448
494
  )
449
495
  view_pagination_marker = view_name
450
496
 
497
+ # Because this is in a cached function, this will only log once per database.
498
+ view_counts = {schema_name: len(views[schema_name]) for schema_name in views}
499
+ logger.info(
500
+ f"Finished fetching views in {db_name}; counts by schema {view_counts}"
501
+ )
502
+ return views
503
+
504
+ def _map_view(self, db_name: str, row: Dict[str, Any]) -> Tuple[str, SnowflakeView]:
505
+ schema_name = row["VIEW_SCHEMA"]
506
+ view_definition = row.get("VIEW_DEFINITION")
507
+ fragment_view_definition = (
508
+ view_definition[:50].strip() if view_definition else None
509
+ )
510
+ logger.info(
511
+ f"Mapping view {db_name}.{schema_name}.{row['VIEW_NAME']} with view definition: {fragment_view_definition}..."
512
+ )
513
+
514
+ return schema_name, SnowflakeView(
515
+ name=row["VIEW_NAME"],
516
+ created=row["CREATED"],
517
+ comment=row["COMMENT"],
518
+ view_definition=view_definition,
519
+ last_altered=row["LAST_ALTERED"],
520
+ is_secure=(row.get("IS_SECURE", "false").lower() == "true"),
521
+ # TODO: This doesn't work for materialized views.
522
+ materialized=False,
523
+ )
524
+
525
+ def _maybe_populate_empty_view_definitions(
526
+ self,
527
+ db_name: str,
528
+ schema_name: str,
529
+ views_with_empty_definition: List[SnowflakeView],
530
+ ) -> List[SnowflakeView]:
531
+ if not views_with_empty_definition:
532
+ return []
533
+
534
+ view_names = [view.name for view in views_with_empty_definition]
535
+ batches = [
536
+ batch[0]
537
+ for batch in build_prefix_batches(
538
+ view_names, max_batch_size=1000, max_groups_in_batch=1
539
+ )
540
+ if batch
541
+ # Skip empty batch if so, also max_groups_in_batch=1 makes it safe to access batch[0]
542
+ ]
543
+
544
+ view_map: Dict[str, SnowflakeView] = {
545
+ view.name: view for view in views_with_empty_definition
546
+ }
547
+ views_found_count = 0
548
+
549
+ logger.info(
550
+ f"Fetching definitions for {len(view_map)} views in {db_name}.{schema_name} "
551
+ f"using batched 'SHOW VIEWS ... LIKE ...' queries. Found {len(batches)} batch(es)."
552
+ )
553
+
554
+ for batch_index, prefix_group in enumerate(batches):
555
+ query = f'SHOW VIEWS LIKE \'{prefix_group.prefix}%\' IN SCHEMA "{db_name}"."{schema_name}"'
556
+ logger.info(f"Processing batch {batch_index + 1}/{len(batches)}: {query}")
557
+
558
+ try:
559
+ cur = self.connection.query(query)
560
+ for row in cur:
561
+ view_name = row["name"]
562
+ if view_name in view_map:
563
+ view_definition = row.get("text")
564
+ if view_definition: # Ensure definition is not None or empty
565
+ view_map[view_name].view_definition = view_definition
566
+ views_found_count += 1
567
+ logger.debug(
568
+ f"Fetched view definition for {db_name}.{schema_name}.{view_name}"
569
+ )
570
+ # If all targeted views are found, we could theoretically break early,
571
+ # but SHOW VIEWS doesn't guarantee order, so we must process all results.
572
+ else:
573
+ logger.warning(
574
+ f"'text' field missing or empty in SHOW VIEWS result for {db_name}.{schema_name}.{view_name}"
575
+ )
576
+
577
+ except Exception as e:
578
+ logger.error(
579
+ f"Failed to execute query for batch {batch_index + 1} ('{query}') for {db_name}.{schema_name} or process its results.",
580
+ exc_info=e,
581
+ )
582
+ # Returning the original list; some views might still be missing definitions.
583
+ # This also means subsequent batches for this schema (in this call) are skipped.
584
+ return views_with_empty_definition
585
+
586
+ logger.info(
587
+ f"Finished processing 'SHOW VIEWS' batches for {db_name}.{schema_name}. "
588
+ f"Fetched definitions for {views_found_count} out of {len(view_map)} targeted views."
589
+ )
590
+
591
+ if views_found_count < len(view_map):
592
+ missing_count = len(view_map) - views_found_count
593
+ logger.warning(
594
+ f"Could not fetch definitions for {missing_count} views in {db_name}.{schema_name} after processing all batches."
595
+ )
596
+ # The SnowflakeView objects in the original list were modified in place via view_map
597
+ return views_with_empty_definition
598
+
599
+ def _get_views_for_database_using_information_schema(
600
+ self, db_name: str
601
+ ) -> Optional[Dict[str, List[SnowflakeView]]]:
602
+ try:
603
+ cur = self.connection.query(
604
+ SnowflakeQuery.get_views_for_database(db_name),
605
+ )
606
+ except Exception as e:
607
+ logger.debug(f"Failed to get all views for database {db_name}", exc_info=e)
608
+ # Error - Information schema query returned too much data. Please repeat query with more selective predicates.
609
+ return None
610
+
611
+ views: Dict[str, List[SnowflakeView]] = {}
612
+ views_with_empty_definition: Dict[str, List[SnowflakeView]] = {}
613
+
614
+ for row in cur:
615
+ schema_name, view = self._map_view(db_name, row)
616
+ if view.view_definition is None or view.view_definition == "":
617
+ views_with_empty_definition.setdefault(schema_name, []).append(view)
618
+ else:
619
+ views.setdefault(schema_name, []).append(view)
620
+
621
+ for schema_name, empty_views in views_with_empty_definition.items():
622
+ updated_views = self._maybe_populate_empty_view_definitions(
623
+ db_name, schema_name, empty_views
624
+ )
625
+ views.setdefault(schema_name, []).extend(updated_views)
626
+
627
+ return views
628
+
629
+ def get_views_for_schema_using_information_schema(
630
+ self, *, schema_name: str, db_name: str
631
+ ) -> List[SnowflakeView]:
632
+ cur = self.connection.query(
633
+ SnowflakeQuery.get_views_for_schema(
634
+ db_name=db_name, schema_name=schema_name
635
+ ),
636
+ )
637
+
638
+ views: List[SnowflakeView] = []
639
+ views_with_empty_definition: List[SnowflakeView] = []
640
+
641
+ for row in cur:
642
+ schema_name, view = self._map_view(db_name, row)
643
+ if view.view_definition is None or view.view_definition == "":
644
+ views_with_empty_definition.append(view)
645
+ else:
646
+ views.append(view)
647
+
648
+ if views_with_empty_definition:
649
+ updated_empty_views = self._maybe_populate_empty_view_definitions(
650
+ db_name, schema_name, views_with_empty_definition
651
+ )
652
+ views.extend(updated_empty_views)
653
+
451
654
  return views
452
655
 
453
656
  @serialized_lru_cache(maxsize=SCHEMA_PARALLELISM)
@@ -659,7 +862,7 @@ class SnowflakeDataDictionary(SupportsAsObj):
659
862
  def get_streams_for_database(
660
863
  self, db_name: str
661
864
  ) -> Dict[str, List[SnowflakeStream]]:
662
- page_limit = SHOW_VIEWS_MAX_PAGE_SIZE
865
+ page_limit = SHOW_COMMAND_MAX_PAGE_SIZE
663
866
 
664
867
  streams: Dict[str, List[SnowflakeStream]] = {}
665
868
 
@@ -714,3 +917,165 @@ class SnowflakeDataDictionary(SupportsAsObj):
714
917
  stream_pagination_marker = stream_name
715
918
 
716
919
  return streams
920
+
921
+ @serialized_lru_cache(maxsize=1)
922
+ def get_procedures_for_database(
923
+ self, db_name: str
924
+ ) -> Dict[str, List[BaseProcedure]]:
925
+ procedures: Dict[str, List[BaseProcedure]] = {}
926
+ cur = self.connection.query(
927
+ SnowflakeQuery.procedures_for_database(db_name),
928
+ )
929
+
930
+ for procedure in cur:
931
+ if procedure["PROCEDURE_SCHEMA"] not in procedures:
932
+ procedures[procedure["PROCEDURE_SCHEMA"]] = []
933
+
934
+ procedures[procedure["PROCEDURE_SCHEMA"]].append(
935
+ BaseProcedure(
936
+ name=procedure["PROCEDURE_NAME"],
937
+ language=procedure["PROCEDURE_LANGUAGE"],
938
+ argument_signature=procedure["ARGUMENT_SIGNATURE"],
939
+ return_type=procedure["PROCEDURE_RETURN_TYPE"],
940
+ procedure_definition=procedure["PROCEDURE_DEFINITION"],
941
+ created=procedure["CREATED"],
942
+ last_altered=procedure["LAST_ALTERED"],
943
+ comment=procedure["COMMENT"],
944
+ extra_properties=None,
945
+ )
946
+ )
947
+ return procedures
948
+
949
+ @serialized_lru_cache(maxsize=1)
950
+ def get_dynamic_table_graph_info(self, db_name: str) -> Dict[str, Dict[str, Any]]:
951
+ """Get dynamic table dependency information from information schema."""
952
+ dt_graph_info: Dict[str, Dict[str, Any]] = {}
953
+ try:
954
+ cur = self.connection.query(
955
+ SnowflakeQuery.get_dynamic_table_graph_history(db_name)
956
+ )
957
+ for row in cur:
958
+ dt_name = row["NAME"]
959
+ dt_graph_info[dt_name] = {
960
+ "inputs": row.get("INPUTS"),
961
+ "target_lag_type": row.get("TARGET_LAG_TYPE"),
962
+ "target_lag_sec": row.get("TARGET_LAG_SEC"),
963
+ "scheduling_state": row.get("SCHEDULING_STATE"),
964
+ "alter_trigger": row.get("ALTER_TRIGGER"),
965
+ }
966
+ logger.debug(
967
+ f"Successfully retrieved graph info for {len(dt_graph_info)} dynamic tables in {db_name}"
968
+ )
969
+ except Exception as e:
970
+ self.report.warning(
971
+ "Failed to get dynamic table graph history",
972
+ db_name,
973
+ exc=e,
974
+ )
975
+
976
+ return dt_graph_info
977
+
978
+ @serialized_lru_cache(maxsize=1)
979
+ def get_dynamic_tables_with_definitions(
980
+ self, db_name: str
981
+ ) -> Dict[str, List[SnowflakeDynamicTable]]:
982
+ """Get dynamic tables with their definitions using SHOW DYNAMIC TABLES."""
983
+ page_limit = SHOW_COMMAND_MAX_PAGE_SIZE
984
+ dynamic_tables: Dict[str, List[SnowflakeDynamicTable]] = {}
985
+
986
+ # Get graph/dependency information (pass db_name)
987
+ dt_graph_info = self.get_dynamic_table_graph_info(db_name)
988
+
989
+ first_iteration = True
990
+ dt_pagination_marker: Optional[str] = None
991
+
992
+ while first_iteration or dt_pagination_marker is not None:
993
+ try:
994
+ cur = self.connection.query(
995
+ SnowflakeQuery.show_dynamic_tables_for_database(
996
+ db_name,
997
+ limit=page_limit,
998
+ dynamic_table_pagination_marker=dt_pagination_marker,
999
+ )
1000
+ )
1001
+
1002
+ first_iteration = False
1003
+ dt_pagination_marker = None
1004
+ result_set_size = 0
1005
+
1006
+ for dt in cur:
1007
+ result_set_size += 1
1008
+
1009
+ dt_name = dt["name"]
1010
+ schema_name = dt["schema_name"]
1011
+
1012
+ if schema_name not in dynamic_tables:
1013
+ dynamic_tables[schema_name] = []
1014
+
1015
+ # Get definition from SHOW result
1016
+ definition = dt.get("text")
1017
+
1018
+ # Get target lag from SHOW result or graph info
1019
+ target_lag = dt.get("target_lag")
1020
+ if not target_lag and dt_graph_info:
1021
+ qualified_name = f"{db_name}.{schema_name}.{dt_name}"
1022
+ graph_info = dt_graph_info.get(qualified_name, {})
1023
+ if graph_info.get("target_lag_type") and graph_info.get(
1024
+ "target_lag_sec"
1025
+ ):
1026
+ target_lag = f"{graph_info['target_lag_sec']} {graph_info['target_lag_type']}"
1027
+
1028
+ dynamic_tables[schema_name].append(
1029
+ SnowflakeDynamicTable(
1030
+ name=dt_name,
1031
+ created=dt["created_on"],
1032
+ last_altered=dt.get("created_on"),
1033
+ size_in_bytes=dt.get("bytes", 0),
1034
+ rows_count=dt.get("rows", 0),
1035
+ comment=dt.get("comment"),
1036
+ definition=definition,
1037
+ target_lag=target_lag,
1038
+ is_dynamic=True,
1039
+ type="DYNAMIC TABLE",
1040
+ )
1041
+ )
1042
+
1043
+ if result_set_size >= page_limit:
1044
+ logger.info(
1045
+ f"Fetching next page of dynamic tables for {db_name} - after {dt_name}"
1046
+ )
1047
+ dt_pagination_marker = dt_name
1048
+
1049
+ except Exception as e:
1050
+ logger.debug(
1051
+ f"Failed to get dynamic tables for database {db_name}: {e}"
1052
+ )
1053
+ break
1054
+
1055
+ return dynamic_tables
1056
+
1057
+ def populate_dynamic_table_definitions(
1058
+ self, tables: Dict[str, List[SnowflakeTable]], db_name: str
1059
+ ) -> None:
1060
+ """Populate dynamic table definitions for tables that are marked as dynamic."""
1061
+ try:
1062
+ # Get dynamic tables with definitions from SHOW command
1063
+ dt_with_definitions = self.get_dynamic_tables_with_definitions(db_name)
1064
+
1065
+ for schema_name, table_list in tables.items():
1066
+ for table in table_list:
1067
+ if (
1068
+ isinstance(table, SnowflakeDynamicTable)
1069
+ and table.definition is None
1070
+ ):
1071
+ # Find matching dynamic table from SHOW results
1072
+ show_dt_list = dt_with_definitions.get(schema_name, [])
1073
+ for show_dt in show_dt_list:
1074
+ if show_dt.name == table.name:
1075
+ table.definition = show_dt.definition
1076
+ table.target_lag = show_dt.target_lag
1077
+ break
1078
+ except Exception as e:
1079
+ logger.debug(
1080
+ f"Failed to populate dynamic table definitions for {db_name}: {e}"
1081
+ )