acryl-datahub 1.0.0rc18__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (503) hide show
  1. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2686 -2563
  2. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +499 -392
  3. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +1 -1
  4. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +7 -1
  5. datahub/_version.py +1 -1
  6. datahub/api/circuit_breaker/operation_circuit_breaker.py +2 -2
  7. datahub/api/entities/assertion/assertion.py +1 -1
  8. datahub/api/entities/common/serialized_value.py +1 -1
  9. datahub/api/entities/corpgroup/corpgroup.py +1 -1
  10. datahub/api/entities/datacontract/datacontract.py +35 -3
  11. datahub/api/entities/datajob/dataflow.py +18 -3
  12. datahub/api/entities/datajob/datajob.py +24 -4
  13. datahub/api/entities/dataprocess/dataprocess_instance.py +4 -0
  14. datahub/api/entities/dataproduct/dataproduct.py +32 -3
  15. datahub/api/entities/dataset/dataset.py +47 -72
  16. datahub/api/entities/external/__init__.py +0 -0
  17. datahub/api/entities/external/external_entities.py +724 -0
  18. datahub/api/entities/external/external_tag.py +147 -0
  19. datahub/api/entities/external/lake_formation_external_entites.py +162 -0
  20. datahub/api/entities/external/restricted_text.py +172 -0
  21. datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
  22. datahub/api/entities/forms/forms.py +37 -37
  23. datahub/api/entities/structuredproperties/structuredproperties.py +6 -6
  24. datahub/api/graphql/assertion.py +1 -1
  25. datahub/api/graphql/base.py +8 -6
  26. datahub/api/graphql/operation.py +14 -10
  27. datahub/cli/check_cli.py +91 -9
  28. datahub/cli/cli_utils.py +63 -0
  29. datahub/cli/config_utils.py +20 -12
  30. datahub/cli/container_cli.py +5 -0
  31. datahub/cli/delete_cli.py +133 -34
  32. datahub/cli/docker_check.py +110 -14
  33. datahub/cli/docker_cli.py +155 -231
  34. datahub/cli/exists_cli.py +2 -3
  35. datahub/cli/get_cli.py +2 -3
  36. datahub/cli/graphql_cli.py +1422 -0
  37. datahub/cli/iceberg_cli.py +11 -5
  38. datahub/cli/ingest_cli.py +25 -26
  39. datahub/cli/migrate.py +12 -9
  40. datahub/cli/migration_utils.py +4 -3
  41. datahub/cli/put_cli.py +4 -6
  42. datahub/cli/quickstart_versioning.py +53 -10
  43. datahub/cli/specific/assertions_cli.py +39 -7
  44. datahub/cli/specific/datacontract_cli.py +57 -9
  45. datahub/cli/specific/dataproduct_cli.py +12 -24
  46. datahub/cli/specific/dataset_cli.py +31 -21
  47. datahub/cli/specific/forms_cli.py +2 -5
  48. datahub/cli/specific/group_cli.py +2 -3
  49. datahub/cli/specific/structuredproperties_cli.py +5 -7
  50. datahub/cli/specific/user_cli.py +174 -4
  51. datahub/cli/state_cli.py +2 -3
  52. datahub/cli/timeline_cli.py +2 -3
  53. datahub/configuration/common.py +46 -2
  54. datahub/configuration/connection_resolver.py +5 -2
  55. datahub/configuration/env_vars.py +331 -0
  56. datahub/configuration/import_resolver.py +7 -4
  57. datahub/configuration/kafka.py +21 -1
  58. datahub/configuration/pydantic_migration_helpers.py +6 -13
  59. datahub/configuration/source_common.py +4 -3
  60. datahub/configuration/validate_field_deprecation.py +5 -2
  61. datahub/configuration/validate_field_removal.py +8 -2
  62. datahub/configuration/validate_field_rename.py +6 -5
  63. datahub/configuration/validate_multiline_string.py +5 -2
  64. datahub/emitter/mce_builder.py +12 -8
  65. datahub/emitter/mcp.py +20 -5
  66. datahub/emitter/mcp_builder.py +12 -0
  67. datahub/emitter/request_helper.py +138 -15
  68. datahub/emitter/response_helper.py +111 -19
  69. datahub/emitter/rest_emitter.py +399 -163
  70. datahub/entrypoints.py +10 -5
  71. datahub/errors.py +12 -0
  72. datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +299 -2
  73. datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
  74. datahub/ingestion/api/common.py +9 -0
  75. datahub/ingestion/api/decorators.py +15 -3
  76. datahub/ingestion/api/report.py +381 -3
  77. datahub/ingestion/api/sink.py +27 -2
  78. datahub/ingestion/api/source.py +174 -62
  79. datahub/ingestion/api/source_helpers.py +41 -3
  80. datahub/ingestion/api/source_protocols.py +23 -0
  81. datahub/ingestion/autogenerated/__init__.py +0 -0
  82. datahub/ingestion/autogenerated/capability_summary.json +3652 -0
  83. datahub/ingestion/autogenerated/lineage.json +402 -0
  84. datahub/ingestion/autogenerated/lineage_helper.py +177 -0
  85. datahub/ingestion/extractor/schema_util.py +31 -5
  86. datahub/ingestion/glossary/classification_mixin.py +9 -2
  87. datahub/ingestion/graph/client.py +492 -55
  88. datahub/ingestion/graph/config.py +18 -2
  89. datahub/ingestion/graph/filters.py +96 -32
  90. datahub/ingestion/graph/links.py +55 -0
  91. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
  92. datahub/ingestion/run/pipeline.py +90 -23
  93. datahub/ingestion/run/pipeline_config.py +3 -3
  94. datahub/ingestion/sink/datahub_kafka.py +1 -0
  95. datahub/ingestion/sink/datahub_rest.py +31 -23
  96. datahub/ingestion/sink/file.py +1 -0
  97. datahub/ingestion/source/abs/config.py +1 -1
  98. datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
  99. datahub/ingestion/source/abs/source.py +15 -30
  100. datahub/ingestion/source/apply/datahub_apply.py +6 -5
  101. datahub/ingestion/source/aws/aws_common.py +185 -13
  102. datahub/ingestion/source/aws/glue.py +517 -244
  103. datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
  104. datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
  105. datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
  106. datahub/ingestion/source/aws/sagemaker_processors/models.py +4 -4
  107. datahub/ingestion/source/aws/tag_entities.py +270 -0
  108. datahub/ingestion/source/azure/azure_common.py +3 -3
  109. datahub/ingestion/source/bigquery_v2/bigquery.py +51 -7
  110. datahub/ingestion/source/bigquery_v2/bigquery_config.py +51 -81
  111. datahub/ingestion/source/bigquery_v2/bigquery_connection.py +81 -0
  112. datahub/ingestion/source/bigquery_v2/bigquery_queries.py +6 -1
  113. datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
  114. datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
  115. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
  116. datahub/ingestion/source/bigquery_v2/common.py +1 -1
  117. datahub/ingestion/source/bigquery_v2/lineage.py +1 -1
  118. datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
  119. datahub/ingestion/source/bigquery_v2/queries.py +3 -3
  120. datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
  121. datahub/ingestion/source/cassandra/cassandra.py +7 -18
  122. datahub/ingestion/source/cassandra/cassandra_api.py +36 -0
  123. datahub/ingestion/source/cassandra/cassandra_config.py +20 -0
  124. datahub/ingestion/source/cassandra/cassandra_profiling.py +26 -24
  125. datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
  126. datahub/ingestion/source/common/data_platforms.py +23 -0
  127. datahub/ingestion/source/common/gcp_credentials_config.py +9 -1
  128. datahub/ingestion/source/common/subtypes.py +73 -1
  129. datahub/ingestion/source/data_lake_common/data_lake_utils.py +59 -10
  130. datahub/ingestion/source/data_lake_common/object_store.py +732 -0
  131. datahub/ingestion/source/data_lake_common/path_spec.py +87 -38
  132. datahub/ingestion/source/datahub/config.py +19 -5
  133. datahub/ingestion/source/datahub/datahub_database_reader.py +205 -36
  134. datahub/ingestion/source/datahub/datahub_source.py +11 -1
  135. datahub/ingestion/source/dbt/dbt_cloud.py +17 -10
  136. datahub/ingestion/source/dbt/dbt_common.py +270 -26
  137. datahub/ingestion/source/dbt/dbt_core.py +88 -47
  138. datahub/ingestion/source/dbt/dbt_tests.py +8 -6
  139. datahub/ingestion/source/debug/__init__.py +0 -0
  140. datahub/ingestion/source/debug/datahub_debug.py +300 -0
  141. datahub/ingestion/source/delta_lake/config.py +9 -5
  142. datahub/ingestion/source/delta_lake/source.py +8 -0
  143. datahub/ingestion/source/dremio/dremio_api.py +114 -73
  144. datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
  145. datahub/ingestion/source/dremio/dremio_config.py +5 -4
  146. datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +1 -1
  147. datahub/ingestion/source/dremio/dremio_entities.py +6 -5
  148. datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
  149. datahub/ingestion/source/dremio/dremio_source.py +228 -215
  150. datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
  151. datahub/ingestion/source/dynamodb/dynamodb.py +19 -13
  152. datahub/ingestion/source/excel/__init__.py +0 -0
  153. datahub/ingestion/source/excel/config.py +92 -0
  154. datahub/ingestion/source/excel/excel_file.py +539 -0
  155. datahub/ingestion/source/excel/profiling.py +308 -0
  156. datahub/ingestion/source/excel/report.py +49 -0
  157. datahub/ingestion/source/excel/source.py +662 -0
  158. datahub/ingestion/source/excel/util.py +18 -0
  159. datahub/ingestion/source/feast.py +12 -14
  160. datahub/ingestion/source/file.py +3 -0
  161. datahub/ingestion/source/fivetran/config.py +67 -8
  162. datahub/ingestion/source/fivetran/fivetran.py +228 -43
  163. datahub/ingestion/source/fivetran/fivetran_log_api.py +42 -9
  164. datahub/ingestion/source/fivetran/fivetran_query.py +58 -36
  165. datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
  166. datahub/ingestion/source/fivetran/response_models.py +97 -0
  167. datahub/ingestion/source/gc/datahub_gc.py +0 -2
  168. datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +101 -104
  169. datahub/ingestion/source/gcs/gcs_source.py +53 -10
  170. datahub/ingestion/source/gcs/gcs_utils.py +36 -9
  171. datahub/ingestion/source/ge_data_profiler.py +146 -33
  172. datahub/ingestion/source/ge_profiling_config.py +26 -11
  173. datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
  174. datahub/ingestion/source/grafana/field_utils.py +307 -0
  175. datahub/ingestion/source/grafana/grafana_api.py +142 -0
  176. datahub/ingestion/source/grafana/grafana_config.py +104 -0
  177. datahub/ingestion/source/grafana/grafana_source.py +522 -84
  178. datahub/ingestion/source/grafana/lineage.py +202 -0
  179. datahub/ingestion/source/grafana/models.py +137 -0
  180. datahub/ingestion/source/grafana/report.py +90 -0
  181. datahub/ingestion/source/grafana/types.py +16 -0
  182. datahub/ingestion/source/hex/__init__.py +0 -0
  183. datahub/ingestion/source/hex/api.py +402 -0
  184. datahub/ingestion/source/hex/constants.py +8 -0
  185. datahub/ingestion/source/hex/hex.py +311 -0
  186. datahub/ingestion/source/hex/mapper.py +412 -0
  187. datahub/ingestion/source/hex/model.py +78 -0
  188. datahub/ingestion/source/hex/query_fetcher.py +307 -0
  189. datahub/ingestion/source/iceberg/iceberg.py +385 -164
  190. datahub/ingestion/source/iceberg/iceberg_common.py +2 -2
  191. datahub/ingestion/source/iceberg/iceberg_profiler.py +25 -20
  192. datahub/ingestion/source/identity/azure_ad.py +1 -1
  193. datahub/ingestion/source/identity/okta.py +1 -14
  194. datahub/ingestion/source/kafka/kafka.py +28 -71
  195. datahub/ingestion/source/kafka/kafka_config.py +78 -0
  196. datahub/ingestion/source/kafka_connect/common.py +2 -2
  197. datahub/ingestion/source/kafka_connect/sink_connectors.py +157 -48
  198. datahub/ingestion/source/kafka_connect/source_connectors.py +63 -5
  199. datahub/ingestion/source/ldap.py +1 -1
  200. datahub/ingestion/source/looker/looker_common.py +216 -86
  201. datahub/ingestion/source/looker/looker_config.py +15 -4
  202. datahub/ingestion/source/looker/looker_constant.py +4 -0
  203. datahub/ingestion/source/looker/looker_lib_wrapper.py +37 -4
  204. datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
  205. datahub/ingestion/source/looker/looker_source.py +539 -555
  206. datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
  207. datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
  208. datahub/ingestion/source/looker/lookml_config.py +31 -3
  209. datahub/ingestion/source/looker/lookml_refinement.py +1 -1
  210. datahub/ingestion/source/looker/lookml_source.py +103 -118
  211. datahub/ingestion/source/looker/view_upstream.py +494 -1
  212. datahub/ingestion/source/metabase.py +32 -6
  213. datahub/ingestion/source/metadata/business_glossary.py +7 -7
  214. datahub/ingestion/source/metadata/lineage.py +11 -10
  215. datahub/ingestion/source/mlflow.py +254 -23
  216. datahub/ingestion/source/mock_data/__init__.py +0 -0
  217. datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
  218. datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
  219. datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
  220. datahub/ingestion/source/mode.py +359 -181
  221. datahub/ingestion/source/mongodb.py +11 -1
  222. datahub/ingestion/source/neo4j/neo4j_source.py +122 -153
  223. datahub/ingestion/source/nifi.py +5 -5
  224. datahub/ingestion/source/openapi.py +85 -38
  225. datahub/ingestion/source/openapi_parser.py +59 -40
  226. datahub/ingestion/source/powerbi/config.py +92 -27
  227. datahub/ingestion/source/powerbi/m_query/data_classes.py +3 -0
  228. datahub/ingestion/source/powerbi/m_query/odbc.py +185 -0
  229. datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
  230. datahub/ingestion/source/powerbi/m_query/pattern_handler.py +358 -14
  231. datahub/ingestion/source/powerbi/m_query/resolver.py +10 -0
  232. datahub/ingestion/source/powerbi/powerbi.py +66 -32
  233. datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -2
  234. datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +11 -12
  235. datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
  236. datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
  237. datahub/ingestion/source/preset.py +3 -3
  238. datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
  239. datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
  240. datahub/ingestion/source/redash.py +1 -1
  241. datahub/ingestion/source/redshift/config.py +15 -9
  242. datahub/ingestion/source/redshift/datashares.py +1 -1
  243. datahub/ingestion/source/redshift/lineage.py +386 -687
  244. datahub/ingestion/source/redshift/profile.py +2 -2
  245. datahub/ingestion/source/redshift/query.py +24 -20
  246. datahub/ingestion/source/redshift/redshift.py +52 -111
  247. datahub/ingestion/source/redshift/redshift_schema.py +17 -12
  248. datahub/ingestion/source/redshift/report.py +0 -2
  249. datahub/ingestion/source/redshift/usage.py +13 -11
  250. datahub/ingestion/source/s3/report.py +4 -2
  251. datahub/ingestion/source/s3/source.py +515 -244
  252. datahub/ingestion/source/sac/sac.py +3 -1
  253. datahub/ingestion/source/salesforce.py +28 -13
  254. datahub/ingestion/source/schema/json_schema.py +14 -14
  255. datahub/ingestion/source/schema_inference/object.py +22 -6
  256. datahub/ingestion/source/sigma/config.py +75 -8
  257. datahub/ingestion/source/sigma/data_classes.py +3 -0
  258. datahub/ingestion/source/sigma/sigma.py +36 -7
  259. datahub/ingestion/source/sigma/sigma_api.py +99 -58
  260. datahub/ingestion/source/slack/slack.py +403 -140
  261. datahub/ingestion/source/snaplogic/__init__.py +0 -0
  262. datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
  263. datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
  264. datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
  265. datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
  266. datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
  267. datahub/ingestion/source/snowflake/constants.py +4 -0
  268. datahub/ingestion/source/snowflake/snowflake_config.py +103 -34
  269. datahub/ingestion/source/snowflake/snowflake_connection.py +47 -25
  270. datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +25 -6
  271. datahub/ingestion/source/snowflake/snowflake_profiler.py +1 -6
  272. datahub/ingestion/source/snowflake/snowflake_queries.py +511 -107
  273. datahub/ingestion/source/snowflake/snowflake_query.py +100 -72
  274. datahub/ingestion/source/snowflake/snowflake_report.py +4 -2
  275. datahub/ingestion/source/snowflake/snowflake_schema.py +381 -16
  276. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +163 -52
  277. datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
  278. datahub/ingestion/source/snowflake/snowflake_tag.py +4 -1
  279. datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
  280. datahub/ingestion/source/snowflake/snowflake_utils.py +62 -17
  281. datahub/ingestion/source/snowflake/snowflake_v2.py +56 -10
  282. datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
  283. datahub/ingestion/source/sql/athena.py +219 -26
  284. datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
  285. datahub/ingestion/source/sql/clickhouse.py +29 -9
  286. datahub/ingestion/source/sql/cockroachdb.py +5 -4
  287. datahub/ingestion/source/sql/druid.py +9 -4
  288. datahub/ingestion/source/sql/hana.py +3 -1
  289. datahub/ingestion/source/sql/hive.py +28 -8
  290. datahub/ingestion/source/sql/hive_metastore.py +24 -25
  291. datahub/ingestion/source/sql/mariadb.py +0 -1
  292. datahub/ingestion/source/sql/mssql/job_models.py +18 -2
  293. datahub/ingestion/source/sql/mssql/source.py +376 -62
  294. datahub/ingestion/source/sql/mysql.py +154 -4
  295. datahub/ingestion/source/sql/oracle.py +62 -11
  296. datahub/ingestion/source/sql/postgres.py +142 -6
  297. datahub/ingestion/source/sql/presto.py +20 -2
  298. datahub/ingestion/source/sql/sql_common.py +281 -49
  299. datahub/ingestion/source/sql/sql_config.py +1 -34
  300. datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
  301. datahub/ingestion/source/sql/sql_types.py +27 -2
  302. datahub/ingestion/source/sql/sqlalchemy_uri.py +68 -0
  303. datahub/ingestion/source/sql/stored_procedures/__init__.py +0 -0
  304. datahub/ingestion/source/sql/stored_procedures/base.py +253 -0
  305. datahub/ingestion/source/sql/{mssql/stored_procedure_lineage.py → stored_procedures/lineage.py} +2 -29
  306. datahub/ingestion/source/sql/teradata.py +1028 -245
  307. datahub/ingestion/source/sql/trino.py +43 -10
  308. datahub/ingestion/source/sql/two_tier_sql_source.py +3 -4
  309. datahub/ingestion/source/sql/vertica.py +14 -7
  310. datahub/ingestion/source/sql_queries.py +219 -121
  311. datahub/ingestion/source/state/checkpoint.py +8 -29
  312. datahub/ingestion/source/state/entity_removal_state.py +5 -2
  313. datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
  314. datahub/ingestion/source/state/stale_entity_removal_handler.py +0 -1
  315. datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
  316. datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +2 -1
  317. datahub/ingestion/source/superset.py +810 -126
  318. datahub/ingestion/source/tableau/tableau.py +172 -69
  319. datahub/ingestion/source/tableau/tableau_common.py +11 -4
  320. datahub/ingestion/source/tableau/tableau_constant.py +1 -4
  321. datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
  322. datahub/ingestion/source/tableau/tableau_validation.py +1 -1
  323. datahub/ingestion/source/unity/config.py +161 -40
  324. datahub/ingestion/source/unity/connection.py +61 -0
  325. datahub/ingestion/source/unity/connection_test.py +1 -0
  326. datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
  327. datahub/ingestion/source/unity/proxy.py +794 -51
  328. datahub/ingestion/source/unity/proxy_patch.py +321 -0
  329. datahub/ingestion/source/unity/proxy_types.py +36 -2
  330. datahub/ingestion/source/unity/report.py +15 -3
  331. datahub/ingestion/source/unity/source.py +465 -131
  332. datahub/ingestion/source/unity/tag_entities.py +197 -0
  333. datahub/ingestion/source/unity/usage.py +46 -4
  334. datahub/ingestion/source/usage/clickhouse_usage.py +11 -4
  335. datahub/ingestion/source/usage/starburst_trino_usage.py +10 -5
  336. datahub/ingestion/source/usage/usage_common.py +4 -68
  337. datahub/ingestion/source/vertexai/__init__.py +0 -0
  338. datahub/ingestion/source/vertexai/vertexai.py +1367 -0
  339. datahub/ingestion/source/vertexai/vertexai_config.py +29 -0
  340. datahub/ingestion/source/vertexai/vertexai_result_type_utils.py +89 -0
  341. datahub/ingestion/source_config/pulsar.py +3 -1
  342. datahub/ingestion/source_report/ingestion_stage.py +50 -11
  343. datahub/ingestion/transformer/add_dataset_dataproduct.py +1 -1
  344. datahub/ingestion/transformer/add_dataset_ownership.py +19 -3
  345. datahub/ingestion/transformer/base_transformer.py +8 -5
  346. datahub/ingestion/transformer/dataset_domain.py +1 -1
  347. datahub/ingestion/transformer/set_browse_path.py +112 -0
  348. datahub/integrations/assertion/common.py +3 -2
  349. datahub/integrations/assertion/snowflake/compiler.py +4 -3
  350. datahub/lite/lite_util.py +2 -2
  351. datahub/metadata/{_schema_classes.py → _internal_schema_classes.py} +3095 -631
  352. datahub/metadata/_urns/urn_defs.py +1866 -1582
  353. datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
  354. datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
  355. datahub/metadata/com/linkedin/pegasus2avro/dataplatform/slack/__init__.py +15 -0
  356. datahub/metadata/com/linkedin/pegasus2avro/event/__init__.py +11 -0
  357. datahub/metadata/com/linkedin/pegasus2avro/event/notification/__init__.py +15 -0
  358. datahub/metadata/com/linkedin/pegasus2avro/event/notification/settings/__init__.py +19 -0
  359. datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
  360. datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
  361. datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
  362. datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +8 -0
  363. datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
  364. datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
  365. datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
  366. datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
  367. datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
  368. datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
  369. datahub/metadata/schema.avsc +18404 -16617
  370. datahub/metadata/schema_classes.py +3 -3
  371. datahub/metadata/schemas/Actors.avsc +38 -1
  372. datahub/metadata/schemas/ApplicationKey.avsc +31 -0
  373. datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
  374. datahub/metadata/schemas/Applications.avsc +38 -0
  375. datahub/metadata/schemas/AssetSettings.avsc +63 -0
  376. datahub/metadata/schemas/ChartInfo.avsc +2 -1
  377. datahub/metadata/schemas/ChartKey.avsc +1 -0
  378. datahub/metadata/schemas/ContainerKey.avsc +1 -0
  379. datahub/metadata/schemas/ContainerProperties.avsc +8 -0
  380. datahub/metadata/schemas/CorpUserEditableInfo.avsc +15 -1
  381. datahub/metadata/schemas/CorpUserKey.avsc +2 -1
  382. datahub/metadata/schemas/CorpUserSettings.avsc +145 -0
  383. datahub/metadata/schemas/DashboardKey.avsc +1 -0
  384. datahub/metadata/schemas/DataContractKey.avsc +2 -1
  385. datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
  386. datahub/metadata/schemas/DataFlowKey.avsc +1 -0
  387. datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
  388. datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
  389. datahub/metadata/schemas/DataHubIngestionSourceKey.avsc +2 -1
  390. datahub/metadata/schemas/DataHubOpenAPISchemaKey.avsc +22 -0
  391. datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
  392. datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
  393. datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
  394. datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
  395. datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
  396. datahub/metadata/schemas/DataJobInfo.avsc +8 -0
  397. datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
  398. datahub/metadata/schemas/DataJobKey.avsc +1 -0
  399. datahub/metadata/schemas/DataProcessInstanceInput.avsc +2 -1
  400. datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -1
  401. datahub/metadata/schemas/DataProcessKey.avsc +8 -0
  402. datahub/metadata/schemas/DataProductKey.avsc +3 -1
  403. datahub/metadata/schemas/DataProductProperties.avsc +1 -1
  404. datahub/metadata/schemas/DataTransformLogic.avsc +4 -2
  405. datahub/metadata/schemas/DatasetKey.avsc +11 -1
  406. datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
  407. datahub/metadata/schemas/Deprecation.avsc +2 -0
  408. datahub/metadata/schemas/DomainKey.avsc +2 -1
  409. datahub/metadata/schemas/ExecutionRequestInput.avsc +5 -0
  410. datahub/metadata/schemas/FormInfo.avsc +5 -0
  411. datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
  412. datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
  413. datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
  414. datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
  415. datahub/metadata/schemas/IncidentInfo.avsc +3 -3
  416. datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
  417. datahub/metadata/schemas/LogicalParent.avsc +145 -0
  418. datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
  419. datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
  420. datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
  421. datahub/metadata/schemas/MLModelDeploymentProperties.avsc +3 -0
  422. datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
  423. datahub/metadata/schemas/MLModelGroupProperties.avsc +16 -0
  424. datahub/metadata/schemas/MLModelKey.avsc +9 -0
  425. datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
  426. datahub/metadata/schemas/MetadataChangeEvent.avsc +189 -47
  427. datahub/metadata/schemas/MetadataChangeLog.avsc +65 -44
  428. datahub/metadata/schemas/MetadataChangeProposal.avsc +64 -0
  429. datahub/metadata/schemas/NotebookKey.avsc +1 -0
  430. datahub/metadata/schemas/Operation.avsc +21 -2
  431. datahub/metadata/schemas/Ownership.avsc +69 -0
  432. datahub/metadata/schemas/QueryProperties.avsc +24 -2
  433. datahub/metadata/schemas/QuerySubjects.avsc +1 -12
  434. datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
  435. datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
  436. datahub/metadata/schemas/Siblings.avsc +2 -0
  437. datahub/metadata/schemas/SlackUserInfo.avsc +160 -0
  438. datahub/metadata/schemas/StructuredProperties.avsc +69 -0
  439. datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
  440. datahub/metadata/schemas/SystemMetadata.avsc +147 -0
  441. datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
  442. datahub/metadata/schemas/__init__.py +3 -3
  443. datahub/sdk/__init__.py +7 -0
  444. datahub/sdk/_all_entities.py +15 -0
  445. datahub/sdk/_shared.py +393 -10
  446. datahub/sdk/_utils.py +4 -0
  447. datahub/sdk/chart.py +386 -0
  448. datahub/sdk/container.py +7 -0
  449. datahub/sdk/dashboard.py +453 -0
  450. datahub/sdk/dataflow.py +309 -0
  451. datahub/sdk/datajob.py +367 -0
  452. datahub/sdk/dataset.py +180 -4
  453. datahub/sdk/entity.py +99 -3
  454. datahub/sdk/entity_client.py +154 -12
  455. datahub/sdk/lineage_client.py +943 -0
  456. datahub/sdk/main_client.py +83 -8
  457. datahub/sdk/mlmodel.py +383 -0
  458. datahub/sdk/mlmodelgroup.py +240 -0
  459. datahub/sdk/search_client.py +85 -8
  460. datahub/sdk/search_filters.py +393 -68
  461. datahub/secret/datahub_secret_store.py +5 -1
  462. datahub/secret/environment_secret_store.py +29 -0
  463. datahub/secret/file_secret_store.py +49 -0
  464. datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
  465. datahub/specific/aspect_helpers/siblings.py +73 -0
  466. datahub/specific/aspect_helpers/structured_properties.py +27 -0
  467. datahub/specific/chart.py +1 -1
  468. datahub/specific/datajob.py +15 -1
  469. datahub/specific/dataproduct.py +4 -0
  470. datahub/specific/dataset.py +51 -59
  471. datahub/sql_parsing/_sqlglot_patch.py +1 -2
  472. datahub/sql_parsing/fingerprint_utils.py +6 -0
  473. datahub/sql_parsing/split_statements.py +30 -3
  474. datahub/sql_parsing/sql_parsing_aggregator.py +144 -63
  475. datahub/sql_parsing/sqlglot_lineage.py +517 -44
  476. datahub/sql_parsing/sqlglot_utils.py +30 -18
  477. datahub/sql_parsing/tool_meta_extractor.py +25 -2
  478. datahub/telemetry/telemetry.py +30 -16
  479. datahub/testing/check_imports.py +1 -1
  480. datahub/testing/docker_utils.py +8 -2
  481. datahub/testing/mce_helpers.py +421 -0
  482. datahub/testing/mcp_diff.py +17 -21
  483. datahub/testing/sdk_v2_helpers.py +18 -0
  484. datahub/upgrade/upgrade.py +86 -30
  485. datahub/utilities/file_backed_collections.py +14 -15
  486. datahub/utilities/hive_schema_to_avro.py +2 -2
  487. datahub/utilities/ingest_utils.py +2 -2
  488. datahub/utilities/is_pytest.py +3 -2
  489. datahub/utilities/logging_manager.py +30 -7
  490. datahub/utilities/mapping.py +29 -2
  491. datahub/utilities/sample_data.py +5 -4
  492. datahub/utilities/server_config_util.py +298 -10
  493. datahub/utilities/sqlalchemy_query_combiner.py +6 -4
  494. datahub/utilities/stats_collections.py +4 -0
  495. datahub/utilities/threaded_iterator_executor.py +16 -3
  496. datahub/utilities/urn_encoder.py +1 -1
  497. datahub/utilities/urns/urn.py +41 -2
  498. datahub/emitter/sql_parsing_builder.py +0 -306
  499. datahub/ingestion/source/redshift/lineage_v2.py +0 -458
  500. datahub/ingestion/source/vertexai.py +0 -697
  501. datahub/ingestion/transformer/system_metadata_transformer.py +0 -45
  502. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info/licenses}/LICENSE +0 -0
  503. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
@@ -41,9 +41,11 @@ from datahub.ingestion.source.snowflake.snowflake_query import SnowflakeQuery
41
41
  from datahub.ingestion.source.snowflake.snowflake_report import SnowflakeV2Report
42
42
  from datahub.ingestion.source.snowflake.snowflake_schema import (
43
43
  SCHEMA_PARALLELISM,
44
+ BaseProcedure,
44
45
  SnowflakeColumn,
45
46
  SnowflakeDatabase,
46
47
  SnowflakeDataDictionary,
48
+ SnowflakeDynamicTable,
47
49
  SnowflakeFK,
48
50
  SnowflakePK,
49
51
  SnowflakeSchema,
@@ -63,17 +65,19 @@ from datahub.ingestion.source.snowflake.snowflake_utils import (
63
65
  from datahub.ingestion.source.sql.sql_utils import (
64
66
  add_table_to_schema_container,
65
67
  gen_database_container,
66
- gen_database_key,
67
68
  gen_schema_container,
68
- gen_schema_key,
69
69
  get_dataplatform_instance_aspect,
70
70
  get_domain_wu,
71
71
  )
72
+ from datahub.ingestion.source.sql.stored_procedures.base import (
73
+ generate_procedure_container_workunits,
74
+ generate_procedure_workunits,
75
+ )
72
76
  from datahub.ingestion.source_report.ingestion_stage import (
73
77
  EXTERNAL_TABLE_DDL_LINEAGE,
74
78
  LINEAGE_EXTRACTION,
75
79
  METADATA_EXTRACTION,
76
- PROFILING,
80
+ IngestionHighStage,
77
81
  )
78
82
  from datahub.metadata.com.linkedin.pegasus2avro.common import (
79
83
  GlobalTags,
@@ -162,8 +166,8 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
162
166
 
163
167
  def __init__(
164
168
  self,
165
- config: SnowflakeV2Config,
166
- report: SnowflakeV2Report,
169
+ config: SnowflakeV2Config, # FIXME: SnowflakeSummary is passing here SnowflakeSummaryConfig
170
+ report: SnowflakeV2Report, # FIXME: SnowflakeSummary is passing here SnowflakeSummaryReport
167
171
  connection: SnowflakeConnection,
168
172
  filters: SnowflakeFilter,
169
173
  identifiers: SnowflakeIdentifierBuilder,
@@ -171,6 +175,7 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
171
175
  profiler: Optional[SnowflakeProfiler],
172
176
  aggregator: Optional[SqlParsingAggregator],
173
177
  snowsight_url_builder: Optional[SnowsightUrlBuilder],
178
+ fetch_views_from_information_schema: bool = False,
174
179
  ) -> None:
175
180
  self.config: SnowflakeV2Config = config
176
181
  self.report: SnowflakeV2Report = report
@@ -179,7 +184,9 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
179
184
  self.identifiers: SnowflakeIdentifierBuilder = identifiers
180
185
 
181
186
  self.data_dictionary: SnowflakeDataDictionary = SnowflakeDataDictionary(
182
- connection=self.connection
187
+ connection=self.connection,
188
+ report=self.report,
189
+ fetch_views_from_information_schema=fetch_views_from_information_schema,
183
190
  )
184
191
  self.report.data_dictionary_cache = self.data_dictionary
185
192
 
@@ -353,7 +360,7 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
353
360
  yield from self._process_db_schemas(snowflake_db, db_tables)
354
361
 
355
362
  if self.profiler and db_tables:
356
- with self.report.new_stage(f"{snowflake_db.name}: {PROFILING}"):
363
+ with self.report.new_high_stage(IngestionHighStage.PROFILING):
357
364
  yield from self.profiler.get_workunits(snowflake_db, db_tables)
358
365
 
359
366
  def _process_db_schemas(
@@ -434,13 +441,16 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
434
441
  tables = self.fetch_tables_for_schema(
435
442
  snowflake_schema, db_name, schema_name
436
443
  )
444
+ if self.config.include_views:
445
+ views = self.fetch_views_for_schema(snowflake_schema, db_name, schema_name)
446
+
447
+ if self.config.include_tables:
437
448
  db_tables[schema_name] = tables
438
449
  yield from self._process_tables(
439
450
  tables, snowflake_schema, db_name, schema_name
440
451
  )
441
452
 
442
453
  if self.config.include_views:
443
- views = self.fetch_views_for_schema(snowflake_schema, db_name, schema_name)
444
454
  yield from self._process_views(
445
455
  views, snowflake_schema, db_name, schema_name
446
456
  )
@@ -448,10 +458,15 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
448
458
  if self.config.include_streams:
449
459
  self.report.num_get_streams_for_schema_queries += 1
450
460
  streams = self.fetch_streams_for_schema(
451
- snowflake_schema, db_name, schema_name
461
+ snowflake_schema,
462
+ db_name,
452
463
  )
453
464
  yield from self._process_streams(streams, snowflake_schema, db_name)
454
465
 
466
+ if self.config.include_procedures:
467
+ procedures = self.fetch_procedures_for_schema(snowflake_schema, db_name)
468
+ yield from self._process_procedures(procedures, snowflake_schema, db_name)
469
+
455
470
  if self.config.include_technical_schema and snowflake_schema.tags:
456
471
  yield from self._process_tags_in_schema(snowflake_schema)
457
472
 
@@ -487,6 +502,22 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
487
502
  if self.config.include_technical_schema:
488
503
  data_reader = self.make_data_reader()
489
504
  for table in tables:
505
+ # Handle dynamic table definitions for lineage
506
+ if (
507
+ isinstance(table, SnowflakeDynamicTable)
508
+ and table.definition
509
+ and self.aggregator
510
+ ):
511
+ table_identifier = self.identifiers.get_dataset_identifier(
512
+ table.name, schema_name, db_name
513
+ )
514
+ self.aggregator.add_view_definition(
515
+ view_urn=self.identifiers.gen_dataset_urn(table_identifier),
516
+ view_definition=table.definition,
517
+ default_db=db_name,
518
+ default_schema=schema_name,
519
+ )
520
+
490
521
  table_wu_generator = self._process_table(
491
522
  table, snowflake_schema, db_name
492
523
  )
@@ -536,6 +567,26 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
536
567
  for stream in streams:
537
568
  yield from self._process_stream(stream, snowflake_schema, db_name)
538
569
 
570
+ def _process_procedures(
571
+ self,
572
+ procedures: List[BaseProcedure],
573
+ snowflake_schema: SnowflakeSchema,
574
+ db_name: str,
575
+ ) -> Iterable[MetadataWorkUnit]:
576
+ if self.config.include_technical_schema:
577
+ if procedures:
578
+ yield from generate_procedure_container_workunits(
579
+ self.identifiers.gen_database_key(
580
+ db_name,
581
+ ),
582
+ self.identifiers.gen_schema_key(
583
+ db_name=db_name,
584
+ schema_name=snowflake_schema.name,
585
+ ),
586
+ )
587
+ for procedure in procedures:
588
+ yield from self._process_procedure(procedure, snowflake_schema, db_name)
589
+
539
590
  def _process_tags_in_schema(
540
591
  self, snowflake_schema: SnowflakeSchema
541
592
  ) -> Iterable[MetadataWorkUnit]:
@@ -819,13 +870,7 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
819
870
  entityUrn=dataset_urn, aspect=dataset_properties
820
871
  ).as_workunit()
821
872
 
822
- schema_container_key = gen_schema_key(
823
- db_name=self.snowflake_identifier(db_name),
824
- schema=self.snowflake_identifier(schema_name),
825
- platform=self.platform,
826
- platform_instance=self.config.platform_instance,
827
- env=self.config.env,
828
- )
873
+ schema_container_key = self.identifiers.gen_schema_key(db_name, schema_name)
829
874
 
830
875
  if self.config.extract_tags_as_structured_properties:
831
876
  yield from self.gen_column_tags_as_structured_properties(dataset_urn, table)
@@ -913,6 +958,10 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
913
958
  }
914
959
  )
915
960
 
961
+ if isinstance(table, SnowflakeDynamicTable):
962
+ if table.target_lag:
963
+ custom_properties["TARGET_LAG"] = table.target_lag
964
+
916
965
  if isinstance(table, SnowflakeView) and table.is_secure:
917
966
  custom_properties["IS_SECURE"] = "true"
918
967
 
@@ -958,7 +1007,9 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
958
1007
  schema_name,
959
1008
  db_name,
960
1009
  (
961
- SnowflakeObjectDomain.TABLE
1010
+ SnowflakeObjectDomain.DYNAMIC_TABLE
1011
+ if isinstance(table, SnowflakeTable) and table.is_dynamic
1012
+ else SnowflakeObjectDomain.TABLE
962
1013
  if isinstance(table, SnowflakeTable)
963
1014
  else SnowflakeObjectDomain.VIEW
964
1015
  ),
@@ -1094,11 +1145,8 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
1094
1145
  def gen_database_containers(
1095
1146
  self, database: SnowflakeDatabase
1096
1147
  ) -> Iterable[MetadataWorkUnit]:
1097
- database_container_key = gen_database_key(
1098
- self.snowflake_identifier(database.name),
1099
- platform=self.platform,
1100
- platform_instance=self.config.platform_instance,
1101
- env=self.config.env,
1148
+ database_container_key = self.identifiers.gen_database_key(
1149
+ database.name,
1102
1150
  )
1103
1151
 
1104
1152
  yield from gen_database_container(
@@ -1147,21 +1195,9 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
1147
1195
  def gen_schema_containers(
1148
1196
  self, schema: SnowflakeSchema, db_name: str
1149
1197
  ) -> Iterable[MetadataWorkUnit]:
1150
- schema_name = self.snowflake_identifier(schema.name)
1151
- database_container_key = gen_database_key(
1152
- database=self.snowflake_identifier(db_name),
1153
- platform=self.platform,
1154
- platform_instance=self.config.platform_instance,
1155
- env=self.config.env,
1156
- )
1198
+ database_container_key = self.identifiers.gen_database_key(db_name)
1157
1199
 
1158
- schema_container_key = gen_schema_key(
1159
- db_name=self.snowflake_identifier(db_name),
1160
- schema=schema_name,
1161
- platform=self.platform,
1162
- platform_instance=self.config.platform_instance,
1163
- env=self.config.env,
1164
- )
1200
+ schema_container_key = self.identifiers.gen_schema_key(db_name, schema.name)
1165
1201
 
1166
1202
  yield from gen_schema_container(
1167
1203
  name=schema.name,
@@ -1211,7 +1247,10 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
1211
1247
  # falling back to get tables for schema
1212
1248
  if tables is None:
1213
1249
  self.report.num_get_tables_for_schema_queries += 1
1214
- return self.data_dictionary.get_tables_for_schema(schema_name, db_name)
1250
+ return self.data_dictionary.get_tables_for_schema(
1251
+ db_name=db_name,
1252
+ schema_name=schema_name,
1253
+ )
1215
1254
 
1216
1255
  # Some schema may not have any table
1217
1256
  return tables.get(schema_name, [])
@@ -1221,8 +1260,17 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
1221
1260
  ) -> List[SnowflakeView]:
1222
1261
  views = self.data_dictionary.get_views_for_database(db_name)
1223
1262
 
1224
- # Some schema may not have any table
1225
- return views.get(schema_name, [])
1263
+ if views is not None:
1264
+ # Some schemas may not have any views
1265
+ return views.get(schema_name, [])
1266
+
1267
+ # Usually this fails when there are too many views in the schema.
1268
+ # Fall back to per-schema queries.
1269
+ self.report.num_get_views_for_schema_queries += 1
1270
+ return self.data_dictionary.get_views_for_schema_using_information_schema(
1271
+ db_name=db_name,
1272
+ schema_name=schema_name,
1273
+ )
1226
1274
 
1227
1275
  def get_columns_for_table(
1228
1276
  self, table_name: str, snowflake_schema: SnowflakeSchema, db_name: str
@@ -1290,13 +1338,13 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
1290
1338
  )
1291
1339
 
1292
1340
  def fetch_streams_for_schema(
1293
- self, snowflake_schema: SnowflakeSchema, db_name: str, schema_name: str
1341
+ self, snowflake_schema: SnowflakeSchema, db_name: str
1294
1342
  ) -> List[SnowflakeStream]:
1295
1343
  try:
1296
1344
  streams: List[SnowflakeStream] = []
1297
- for stream in self.get_streams_for_schema(schema_name, db_name):
1345
+ for stream in self.get_streams_for_schema(snowflake_schema.name, db_name):
1298
1346
  stream_identifier = self.identifiers.get_dataset_identifier(
1299
- stream.name, schema_name, db_name
1347
+ stream.name, snowflake_schema.name, db_name
1300
1348
  )
1301
1349
 
1302
1350
  self.report.report_entity_scanned(stream_identifier, "stream")
@@ -1310,16 +1358,15 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
1310
1358
  snowflake_schema.streams = [stream.name for stream in streams]
1311
1359
  return streams
1312
1360
  except Exception as e:
1313
- if isinstance(e, SnowflakePermissionError):
1314
- error_msg = f"Failed to get streams for schema {db_name}.{schema_name}. Please check permissions."
1315
- raise SnowflakePermissionError(error_msg) from e.__cause__
1316
- else:
1317
- self.structured_reporter.warning(
1318
- "Failed to get streams for schema",
1319
- f"{db_name}.{schema_name}",
1320
- exc=e,
1321
- )
1322
- return []
1361
+ self.structured_reporter.warning(
1362
+ title="Failed to get streams for schema",
1363
+ message="Please check permissions"
1364
+ if isinstance(e, SnowflakePermissionError)
1365
+ else "",
1366
+ context=f"{db_name}.{snowflake_schema.name}",
1367
+ exc=e,
1368
+ )
1369
+ return []
1323
1370
 
1324
1371
  def get_streams_for_schema(
1325
1372
  self, schema_name: str, db_name: str
@@ -1328,6 +1375,42 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
1328
1375
 
1329
1376
  return streams.get(schema_name, [])
1330
1377
 
1378
+ def fetch_procedures_for_schema(
1379
+ self, snowflake_schema: SnowflakeSchema, db_name: str
1380
+ ) -> List[BaseProcedure]:
1381
+ try:
1382
+ procedures: List[BaseProcedure] = []
1383
+ for procedure in self.get_procedures_for_schema(snowflake_schema, db_name):
1384
+ procedure_qualified_name = self.identifiers.get_dataset_identifier(
1385
+ procedure.name, snowflake_schema.name, db_name
1386
+ )
1387
+ self.report.report_entity_scanned(procedure_qualified_name, "procedure")
1388
+
1389
+ if self.filters.is_procedure_allowed(procedure_qualified_name):
1390
+ procedures.append(procedure)
1391
+ else:
1392
+ self.report.report_dropped(procedure_qualified_name)
1393
+ return procedures
1394
+ except Exception as e:
1395
+ self.structured_reporter.warning(
1396
+ title="Failed to get procedures for schema",
1397
+ message="Please check permissions"
1398
+ if isinstance(e, SnowflakePermissionError)
1399
+ else "",
1400
+ context=f"{db_name}.{snowflake_schema.name}",
1401
+ exc=e,
1402
+ )
1403
+ return []
1404
+
1405
+ def get_procedures_for_schema(
1406
+ self,
1407
+ snowflake_schema: SnowflakeSchema,
1408
+ db_name: str,
1409
+ ) -> List[BaseProcedure]:
1410
+ procedures = self.data_dictionary.get_procedures_for_database(db_name)
1411
+
1412
+ return procedures.get(snowflake_schema.name, [])
1413
+
1331
1414
  def _process_stream(
1332
1415
  self,
1333
1416
  stream: SnowflakeStream,
@@ -1350,6 +1433,34 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
1350
1433
  "Failed to get columns for stream:", stream.name, exc=e
1351
1434
  )
1352
1435
 
1436
+ def _process_procedure(
1437
+ self,
1438
+ procedure: BaseProcedure,
1439
+ snowflake_schema: SnowflakeSchema,
1440
+ db_name: str,
1441
+ ) -> Iterable[MetadataWorkUnit]:
1442
+ try:
1443
+ # TODO: For CLL, we should process procedures after all tables are processed
1444
+ yield from generate_procedure_workunits(
1445
+ procedure,
1446
+ database_key=self.identifiers.gen_database_key(
1447
+ db_name,
1448
+ ),
1449
+ schema_key=self.identifiers.gen_schema_key(
1450
+ db_name, snowflake_schema.name
1451
+ ),
1452
+ schema_resolver=(
1453
+ self.aggregator._schema_resolver if self.aggregator else None
1454
+ ),
1455
+ )
1456
+ except Exception as e:
1457
+ self.structured_reporter.warning(
1458
+ title="Failed to ingest stored procedure",
1459
+ message="",
1460
+ context=procedure.name,
1461
+ exc=e,
1462
+ )
1463
+
1353
1464
  def get_columns_for_stream(
1354
1465
  self,
1355
1466
  source_object: str, # Qualified name of source table/view
@@ -20,6 +20,7 @@ from datahub.ingestion.source.snowflake.snowflake_schema_gen import (
20
20
  SnowflakeSchemaGenerator,
21
21
  )
22
22
  from datahub.ingestion.source.snowflake.snowflake_utils import (
23
+ SnowflakeFilter,
23
24
  SnowflakeIdentifierBuilder,
24
25
  )
25
26
  from datahub.ingestion.source_report.time_window import BaseTimeWindowReport
@@ -58,7 +59,7 @@ class SnowflakeSummaryReport(SourceReport, BaseTimeWindowReport):
58
59
 
59
60
 
60
61
  @config_class(SnowflakeSummaryConfig)
61
- @support_status(SupportStatus.INCUBATING)
62
+ @support_status(SupportStatus.CERTIFIED)
62
63
  class SnowflakeSummarySource(Source):
63
64
  def __init__(self, ctx: PipelineContext, config: SnowflakeSummaryConfig):
64
65
  super().__init__(ctx)
@@ -81,6 +82,11 @@ class SnowflakeSummarySource(Source):
81
82
  profiler=None,
82
83
  aggregator=None,
83
84
  snowsight_url_builder=None,
85
+ filters=SnowflakeFilter(
86
+ filter_config=self.config,
87
+ structured_reporter=self.report,
88
+ ),
89
+ fetch_views_from_information_schema=False, # we haven't enabled this config for SnowflakeSummarySource
84
90
  )
85
91
 
86
92
  # Databases.
@@ -23,6 +23,7 @@ from datahub.metadata.com.linkedin.pegasus2avro.common import AuditStamp
23
23
  from datahub.metadata.com.linkedin.pegasus2avro.structured import (
24
24
  StructuredPropertyDefinition,
25
25
  )
26
+ from datahub.metadata.schema_classes import ChangeTypeClass
26
27
  from datahub.metadata.urns import (
27
28
  ContainerUrn,
28
29
  DatasetUrn,
@@ -81,7 +82,7 @@ class SnowflakeTagExtractor(SnowflakeCommonMixin):
81
82
  def create_structured_property_templates(self) -> Iterable[MetadataWorkUnit]:
82
83
  for tag in self.data_dictionary.get_all_tags():
83
84
  if not self.config.structured_property_pattern.allowed(
84
- tag.tag_identifier()
85
+ tag._id_prefix_as_str()
85
86
  ):
86
87
  continue
87
88
  if self.config.extract_tags_as_structured_properties:
@@ -111,6 +112,8 @@ class SnowflakeTagExtractor(SnowflakeCommonMixin):
111
112
  yield MetadataChangeProposalWrapper(
112
113
  entityUrn=urn,
113
114
  aspect=aspect,
115
+ changeType=ChangeTypeClass.CREATE,
116
+ headers={"If-None-Match": "*"},
114
117
  ).as_workunit()
115
118
 
116
119
  def _get_tags_on_object_with_propagation(
@@ -231,7 +231,10 @@ class SnowflakeUsageExtractor(SnowflakeCommonMixin, Closeable):
231
231
 
232
232
  with self.report.usage_aggregation.result_fetch_timer as fetch_timer:
233
233
  for row in results:
234
- with fetch_timer.pause(), self.report.usage_aggregation.result_skip_timer as skip_timer:
234
+ with (
235
+ fetch_timer.pause(),
236
+ self.report.usage_aggregation.result_skip_timer as skip_timer,
237
+ ):
235
238
  if results.rownumber is not None and results.rownumber % 1000 == 0:
236
239
  logger.debug(f"Processing usage row number {results.rownumber}")
237
240
  logger.debug(self.report.usage_aggregation.as_string())
@@ -255,7 +258,10 @@ class SnowflakeUsageExtractor(SnowflakeCommonMixin, Closeable):
255
258
  f"Skipping usage for {row['OBJECT_DOMAIN']} {dataset_identifier}, as table is not accessible."
256
259
  )
257
260
  continue
258
- with skip_timer.pause(), self.report.usage_aggregation.result_map_timer as map_timer:
261
+ with (
262
+ skip_timer.pause(),
263
+ self.report.usage_aggregation.result_map_timer as map_timer,
264
+ ):
259
265
  wu = self.build_usage_statistics_for_dataset(
260
266
  dataset_identifier, row
261
267
  )
@@ -3,9 +3,13 @@ from functools import cached_property
3
3
  from typing import ClassVar, List, Literal, Optional, Tuple
4
4
 
5
5
  from datahub.configuration.pattern_utils import is_schema_allowed
6
- from datahub.emitter.mce_builder import make_dataset_urn_with_platform_instance
6
+ from datahub.emitter.mce_builder import (
7
+ make_dataset_urn_with_platform_instance,
8
+ )
9
+ from datahub.emitter.mcp_builder import DatabaseKey, SchemaKey
7
10
  from datahub.ingestion.api.source import SourceReport
8
11
  from datahub.ingestion.source.snowflake.constants import (
12
+ DEFAULT_SNOWFLAKE_DOMAIN,
9
13
  SNOWFLAKE_REGION_CLOUD_REGION_MAPPING,
10
14
  SnowflakeCloudProvider,
11
15
  SnowflakeObjectDomain,
@@ -16,6 +20,7 @@ from datahub.ingestion.source.snowflake.snowflake_config import (
16
20
  SnowflakeV2Config,
17
21
  )
18
22
  from datahub.ingestion.source.snowflake.snowflake_report import SnowflakeV2Report
23
+ from datahub.ingestion.source.sql.sql_utils import gen_database_key, gen_schema_key
19
24
 
20
25
 
21
26
  class SnowflakeStructuredReportMixin(abc.ABC):
@@ -30,16 +35,21 @@ class SnowsightUrlBuilder:
30
35
  "us-east-1",
31
36
  "eu-west-1",
32
37
  "eu-central-1",
33
- "ap-southeast-1",
34
38
  "ap-southeast-2",
35
39
  ]
36
40
 
37
41
  snowsight_base_url: str
38
42
 
39
- def __init__(self, account_locator: str, region: str, privatelink: bool = False):
43
+ def __init__(
44
+ self,
45
+ account_locator: str,
46
+ region: str,
47
+ privatelink: bool = False,
48
+ snowflake_domain: str = DEFAULT_SNOWFLAKE_DOMAIN,
49
+ ):
40
50
  cloud, cloud_region_id = self.get_cloud_region_from_snowflake_region_id(region)
41
51
  self.snowsight_base_url = self.create_snowsight_base_url(
42
- account_locator, cloud_region_id, cloud, privatelink
52
+ account_locator, cloud_region_id, cloud, privatelink, snowflake_domain
43
53
  )
44
54
 
45
55
  @staticmethod
@@ -48,6 +58,7 @@ class SnowsightUrlBuilder:
48
58
  cloud_region_id: str,
49
59
  cloud: str,
50
60
  privatelink: bool = False,
61
+ snowflake_domain: str = DEFAULT_SNOWFLAKE_DOMAIN,
51
62
  ) -> str:
52
63
  if cloud:
53
64
  url_cloud_provider_suffix = f".{cloud}"
@@ -62,8 +73,14 @@ class SnowsightUrlBuilder:
62
73
  url_cloud_provider_suffix = ""
63
74
  else:
64
75
  url_cloud_provider_suffix = f".{cloud}"
65
- if privatelink:
66
- url = f"https://app.{account_locator}.{cloud_region_id}.privatelink.snowflakecomputing.com/"
76
+ # Note: Snowsight is always accessed via the public internet (app.snowflake.com)
77
+ # even for accounts using privatelink. Privatelink only applies to database connections,
78
+ # not the Snowsight web UI.
79
+ # Standard Snowsight URL format - works for most regions
80
+ # China region may use app.snowflake.cn instead of app.snowflake.com. This is not documented, just
81
+ # guessing Based on existence of snowflake.cn domain (https://domainindex.com/domains/snowflake.cn)
82
+ if snowflake_domain == "snowflakecomputing.cn":
83
+ url = f"https://app.snowflake.cn/{cloud_region_id}{url_cloud_provider_suffix}/{account_locator}/"
67
84
  else:
68
85
  url = f"https://app.snowflake.com/{cloud_region_id}{url_cloud_provider_suffix}/{account_locator}/"
69
86
  return url
@@ -73,7 +90,7 @@ class SnowsightUrlBuilder:
73
90
  region: str,
74
91
  ) -> Tuple[str, str]:
75
92
  cloud: str
76
- if region in SNOWFLAKE_REGION_CLOUD_REGION_MAPPING.keys():
93
+ if region in SNOWFLAKE_REGION_CLOUD_REGION_MAPPING:
77
94
  cloud, cloud_region_id = SNOWFLAKE_REGION_CLOUD_REGION_MAPPING[region]
78
95
  elif region.startswith(("aws_", "gcp_", "azure_")):
79
96
  # e.g. aws_us_west_2, gcp_us_central1, azure_northeurope
@@ -89,9 +106,20 @@ class SnowsightUrlBuilder:
89
106
  table_name: str,
90
107
  schema_name: str,
91
108
  db_name: str,
92
- domain: Literal[SnowflakeObjectDomain.TABLE, SnowflakeObjectDomain.VIEW],
109
+ domain: Literal[
110
+ SnowflakeObjectDomain.TABLE,
111
+ SnowflakeObjectDomain.VIEW,
112
+ SnowflakeObjectDomain.DYNAMIC_TABLE,
113
+ ],
93
114
  ) -> Optional[str]:
94
- return f"{self.snowsight_base_url}#/data/databases/{db_name}/schemas/{schema_name}/{domain}/{table_name}/"
115
+ # For dynamic tables, use the dynamic-table domain in the URL path
116
+ # Ensure only explicitly dynamic tables use dynamic-table URL path
117
+ url_domain = (
118
+ "dynamic-table"
119
+ if domain == SnowflakeObjectDomain.DYNAMIC_TABLE
120
+ else str(domain)
121
+ )
122
+ return f"{self.snowsight_base_url}#/data/databases/{db_name}/schemas/{schema_name}/{url_domain}/{table_name}/"
95
123
 
96
124
  def get_external_url_for_schema(
97
125
  self, schema_name: str, db_name: str
@@ -125,6 +153,7 @@ class SnowflakeFilter:
125
153
  SnowflakeObjectDomain.MATERIALIZED_VIEW,
126
154
  SnowflakeObjectDomain.ICEBERG_TABLE,
127
155
  SnowflakeObjectDomain.STREAM,
156
+ SnowflakeObjectDomain.DYNAMIC_TABLE,
128
157
  ):
129
158
  return False
130
159
  if _is_sys_table(dataset_name):
@@ -156,7 +185,8 @@ class SnowflakeFilter:
156
185
  return False
157
186
 
158
187
  if dataset_type.lower() in {
159
- SnowflakeObjectDomain.TABLE
188
+ SnowflakeObjectDomain.TABLE,
189
+ SnowflakeObjectDomain.DYNAMIC_TABLE,
160
190
  } and not self.filter_config.table_pattern.allowed(
161
191
  _cleanup_qualified_name(dataset_name, self.structured_reporter)
162
192
  ):
@@ -180,6 +210,9 @@ class SnowflakeFilter:
180
210
 
181
211
  return True
182
212
 
213
+ def is_procedure_allowed(self, procedure_name: str) -> bool:
214
+ return self.filter_config.procedure_pattern.allowed(procedure_name)
215
+
183
216
 
184
217
  def _combine_identifier_parts(
185
218
  *, table_name: str, schema_name: str, db_name: str
@@ -318,18 +351,30 @@ class SnowflakeIdentifierBuilder:
318
351
  user_email: Optional[str],
319
352
  ) -> str:
320
353
  if user_email:
321
- return self.snowflake_identifier(
322
- user_email
323
- if self.identifier_config.email_as_user_identifier is True
324
- else user_email.split("@")[0]
325
- )
354
+ return self.snowflake_identifier(user_email)
326
355
  return self.snowflake_identifier(
327
356
  f"{user_name}@{self.identifier_config.email_domain}"
328
- if self.identifier_config.email_as_user_identifier is True
329
- and self.identifier_config.email_domain is not None
357
+ if self.identifier_config.email_domain is not None
330
358
  else user_name
331
359
  )
332
360
 
361
+ def gen_schema_key(self, db_name: str, schema_name: str) -> SchemaKey:
362
+ return gen_schema_key(
363
+ db_name=self.snowflake_identifier(db_name),
364
+ schema=self.snowflake_identifier(schema_name),
365
+ platform=self.platform,
366
+ platform_instance=self.identifier_config.platform_instance,
367
+ env=self.identifier_config.env,
368
+ )
369
+
370
+ def gen_database_key(self, db_name: str) -> DatabaseKey:
371
+ return gen_database_key(
372
+ database=self.snowflake_identifier(db_name),
373
+ platform=self.platform,
374
+ platform_instance=self.identifier_config.platform_instance,
375
+ env=self.identifier_config.env,
376
+ )
377
+
333
378
 
334
379
  class SnowflakeCommonMixin(SnowflakeStructuredReportMixin):
335
380
  platform = "snowflake"