acryl-datahub 1.1.1rc4__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (414) hide show
  1. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2615 -2547
  2. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +412 -338
  3. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +5 -0
  4. datahub/_version.py +1 -1
  5. datahub/api/entities/assertion/assertion.py +1 -1
  6. datahub/api/entities/common/serialized_value.py +1 -1
  7. datahub/api/entities/corpgroup/corpgroup.py +1 -1
  8. datahub/api/entities/dataproduct/dataproduct.py +32 -3
  9. datahub/api/entities/dataset/dataset.py +26 -23
  10. datahub/api/entities/external/__init__.py +0 -0
  11. datahub/api/entities/external/external_entities.py +724 -0
  12. datahub/api/entities/external/external_tag.py +147 -0
  13. datahub/api/entities/external/lake_formation_external_entites.py +162 -0
  14. datahub/api/entities/external/restricted_text.py +172 -0
  15. datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
  16. datahub/api/entities/forms/forms.py +3 -3
  17. datahub/api/entities/structuredproperties/structuredproperties.py +4 -4
  18. datahub/api/graphql/operation.py +10 -6
  19. datahub/cli/check_cli.py +88 -7
  20. datahub/cli/cli_utils.py +63 -0
  21. datahub/cli/config_utils.py +18 -10
  22. datahub/cli/container_cli.py +5 -0
  23. datahub/cli/delete_cli.py +125 -27
  24. datahub/cli/docker_check.py +110 -14
  25. datahub/cli/docker_cli.py +153 -229
  26. datahub/cli/exists_cli.py +0 -2
  27. datahub/cli/get_cli.py +0 -2
  28. datahub/cli/graphql_cli.py +1422 -0
  29. datahub/cli/iceberg_cli.py +5 -0
  30. datahub/cli/ingest_cli.py +3 -15
  31. datahub/cli/migrate.py +2 -0
  32. datahub/cli/put_cli.py +1 -4
  33. datahub/cli/quickstart_versioning.py +53 -10
  34. datahub/cli/specific/assertions_cli.py +37 -6
  35. datahub/cli/specific/datacontract_cli.py +54 -7
  36. datahub/cli/specific/dataproduct_cli.py +2 -15
  37. datahub/cli/specific/dataset_cli.py +1 -8
  38. datahub/cli/specific/forms_cli.py +0 -4
  39. datahub/cli/specific/group_cli.py +0 -2
  40. datahub/cli/specific/structuredproperties_cli.py +1 -4
  41. datahub/cli/specific/user_cli.py +172 -3
  42. datahub/cli/state_cli.py +0 -2
  43. datahub/cli/timeline_cli.py +0 -2
  44. datahub/configuration/common.py +40 -1
  45. datahub/configuration/connection_resolver.py +5 -2
  46. datahub/configuration/env_vars.py +331 -0
  47. datahub/configuration/import_resolver.py +7 -4
  48. datahub/configuration/kafka.py +21 -1
  49. datahub/configuration/pydantic_migration_helpers.py +6 -13
  50. datahub/configuration/source_common.py +3 -2
  51. datahub/configuration/validate_field_deprecation.py +5 -2
  52. datahub/configuration/validate_field_removal.py +8 -2
  53. datahub/configuration/validate_field_rename.py +6 -5
  54. datahub/configuration/validate_multiline_string.py +5 -2
  55. datahub/emitter/mce_builder.py +8 -4
  56. datahub/emitter/rest_emitter.py +103 -30
  57. datahub/entrypoints.py +6 -3
  58. datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +297 -1
  59. datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
  60. datahub/ingestion/api/decorators.py +15 -3
  61. datahub/ingestion/api/report.py +381 -3
  62. datahub/ingestion/api/sink.py +27 -2
  63. datahub/ingestion/api/source.py +165 -58
  64. datahub/ingestion/api/source_protocols.py +23 -0
  65. datahub/ingestion/autogenerated/__init__.py +0 -0
  66. datahub/ingestion/autogenerated/capability_summary.json +3652 -0
  67. datahub/ingestion/autogenerated/lineage.json +402 -0
  68. datahub/ingestion/autogenerated/lineage_helper.py +177 -0
  69. datahub/ingestion/extractor/schema_util.py +13 -4
  70. datahub/ingestion/glossary/classification_mixin.py +5 -0
  71. datahub/ingestion/graph/client.py +330 -25
  72. datahub/ingestion/graph/config.py +3 -2
  73. datahub/ingestion/graph/filters.py +30 -11
  74. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
  75. datahub/ingestion/run/pipeline.py +81 -11
  76. datahub/ingestion/run/pipeline_config.py +2 -2
  77. datahub/ingestion/sink/datahub_kafka.py +1 -0
  78. datahub/ingestion/sink/datahub_rest.py +13 -5
  79. datahub/ingestion/sink/file.py +1 -0
  80. datahub/ingestion/source/abs/config.py +1 -1
  81. datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
  82. datahub/ingestion/source/abs/source.py +15 -30
  83. datahub/ingestion/source/aws/aws_common.py +185 -13
  84. datahub/ingestion/source/aws/glue.py +517 -244
  85. datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
  86. datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
  87. datahub/ingestion/source/aws/tag_entities.py +270 -0
  88. datahub/ingestion/source/azure/azure_common.py +3 -3
  89. datahub/ingestion/source/bigquery_v2/bigquery.py +67 -24
  90. datahub/ingestion/source/bigquery_v2/bigquery_config.py +47 -19
  91. datahub/ingestion/source/bigquery_v2/bigquery_connection.py +12 -1
  92. datahub/ingestion/source/bigquery_v2/bigquery_queries.py +3 -0
  93. datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
  94. datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
  95. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
  96. datahub/ingestion/source/bigquery_v2/common.py +1 -1
  97. datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
  98. datahub/ingestion/source/bigquery_v2/queries.py +3 -3
  99. datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
  100. datahub/ingestion/source/cassandra/cassandra.py +6 -8
  101. datahub/ingestion/source/cassandra/cassandra_api.py +17 -1
  102. datahub/ingestion/source/cassandra/cassandra_config.py +5 -0
  103. datahub/ingestion/source/cassandra/cassandra_profiling.py +7 -6
  104. datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
  105. datahub/ingestion/source/common/gcp_credentials_config.py +3 -1
  106. datahub/ingestion/source/common/subtypes.py +53 -0
  107. datahub/ingestion/source/data_lake_common/data_lake_utils.py +37 -0
  108. datahub/ingestion/source/data_lake_common/object_store.py +115 -27
  109. datahub/ingestion/source/data_lake_common/path_spec.py +72 -43
  110. datahub/ingestion/source/datahub/config.py +12 -9
  111. datahub/ingestion/source/datahub/datahub_database_reader.py +26 -11
  112. datahub/ingestion/source/datahub/datahub_source.py +10 -0
  113. datahub/ingestion/source/dbt/dbt_cloud.py +16 -5
  114. datahub/ingestion/source/dbt/dbt_common.py +224 -9
  115. datahub/ingestion/source/dbt/dbt_core.py +3 -0
  116. datahub/ingestion/source/debug/__init__.py +0 -0
  117. datahub/ingestion/source/debug/datahub_debug.py +300 -0
  118. datahub/ingestion/source/delta_lake/config.py +9 -5
  119. datahub/ingestion/source/delta_lake/source.py +8 -0
  120. datahub/ingestion/source/dremio/dremio_api.py +114 -73
  121. datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
  122. datahub/ingestion/source/dremio/dremio_config.py +5 -4
  123. datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
  124. datahub/ingestion/source/dremio/dremio_source.py +132 -98
  125. datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
  126. datahub/ingestion/source/dynamodb/dynamodb.py +11 -8
  127. datahub/ingestion/source/excel/__init__.py +0 -0
  128. datahub/ingestion/source/excel/config.py +92 -0
  129. datahub/ingestion/source/excel/excel_file.py +539 -0
  130. datahub/ingestion/source/excel/profiling.py +308 -0
  131. datahub/ingestion/source/excel/report.py +49 -0
  132. datahub/ingestion/source/excel/source.py +662 -0
  133. datahub/ingestion/source/excel/util.py +18 -0
  134. datahub/ingestion/source/feast.py +8 -10
  135. datahub/ingestion/source/file.py +3 -0
  136. datahub/ingestion/source/fivetran/config.py +66 -7
  137. datahub/ingestion/source/fivetran/fivetran.py +227 -43
  138. datahub/ingestion/source/fivetran/fivetran_log_api.py +37 -8
  139. datahub/ingestion/source/fivetran/fivetran_query.py +51 -29
  140. datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
  141. datahub/ingestion/source/fivetran/response_models.py +97 -0
  142. datahub/ingestion/source/gc/datahub_gc.py +0 -2
  143. datahub/ingestion/source/gcs/gcs_source.py +32 -4
  144. datahub/ingestion/source/ge_data_profiler.py +108 -31
  145. datahub/ingestion/source/ge_profiling_config.py +26 -11
  146. datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
  147. datahub/ingestion/source/grafana/field_utils.py +307 -0
  148. datahub/ingestion/source/grafana/grafana_api.py +142 -0
  149. datahub/ingestion/source/grafana/grafana_config.py +104 -0
  150. datahub/ingestion/source/grafana/grafana_source.py +522 -84
  151. datahub/ingestion/source/grafana/lineage.py +202 -0
  152. datahub/ingestion/source/grafana/models.py +137 -0
  153. datahub/ingestion/source/grafana/report.py +90 -0
  154. datahub/ingestion/source/grafana/types.py +16 -0
  155. datahub/ingestion/source/hex/api.py +28 -1
  156. datahub/ingestion/source/hex/hex.py +16 -5
  157. datahub/ingestion/source/hex/mapper.py +16 -2
  158. datahub/ingestion/source/hex/model.py +2 -0
  159. datahub/ingestion/source/hex/query_fetcher.py +1 -1
  160. datahub/ingestion/source/iceberg/iceberg.py +123 -59
  161. datahub/ingestion/source/iceberg/iceberg_profiler.py +4 -2
  162. datahub/ingestion/source/identity/azure_ad.py +1 -1
  163. datahub/ingestion/source/identity/okta.py +1 -14
  164. datahub/ingestion/source/kafka/kafka.py +16 -0
  165. datahub/ingestion/source/kafka_connect/common.py +2 -2
  166. datahub/ingestion/source/kafka_connect/sink_connectors.py +156 -47
  167. datahub/ingestion/source/kafka_connect/source_connectors.py +62 -4
  168. datahub/ingestion/source/looker/looker_common.py +148 -79
  169. datahub/ingestion/source/looker/looker_config.py +15 -4
  170. datahub/ingestion/source/looker/looker_constant.py +4 -0
  171. datahub/ingestion/source/looker/looker_lib_wrapper.py +36 -3
  172. datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
  173. datahub/ingestion/source/looker/looker_source.py +503 -547
  174. datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
  175. datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
  176. datahub/ingestion/source/looker/lookml_config.py +31 -3
  177. datahub/ingestion/source/looker/lookml_refinement.py +1 -1
  178. datahub/ingestion/source/looker/lookml_source.py +96 -117
  179. datahub/ingestion/source/looker/view_upstream.py +494 -1
  180. datahub/ingestion/source/metabase.py +32 -6
  181. datahub/ingestion/source/metadata/business_glossary.py +7 -7
  182. datahub/ingestion/source/metadata/lineage.py +9 -9
  183. datahub/ingestion/source/mlflow.py +12 -2
  184. datahub/ingestion/source/mock_data/__init__.py +0 -0
  185. datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
  186. datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
  187. datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
  188. datahub/ingestion/source/mode.py +26 -5
  189. datahub/ingestion/source/mongodb.py +11 -1
  190. datahub/ingestion/source/neo4j/neo4j_source.py +83 -144
  191. datahub/ingestion/source/nifi.py +2 -2
  192. datahub/ingestion/source/openapi.py +1 -1
  193. datahub/ingestion/source/powerbi/config.py +47 -21
  194. datahub/ingestion/source/powerbi/m_query/data_classes.py +1 -0
  195. datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
  196. datahub/ingestion/source/powerbi/m_query/pattern_handler.py +100 -10
  197. datahub/ingestion/source/powerbi/powerbi.py +10 -6
  198. datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +0 -1
  199. datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
  200. datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
  201. datahub/ingestion/source/preset.py +3 -3
  202. datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
  203. datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
  204. datahub/ingestion/source/redash.py +1 -1
  205. datahub/ingestion/source/redshift/config.py +15 -9
  206. datahub/ingestion/source/redshift/datashares.py +1 -1
  207. datahub/ingestion/source/redshift/lineage.py +386 -687
  208. datahub/ingestion/source/redshift/query.py +23 -19
  209. datahub/ingestion/source/redshift/redshift.py +52 -111
  210. datahub/ingestion/source/redshift/redshift_schema.py +17 -12
  211. datahub/ingestion/source/redshift/report.py +0 -2
  212. datahub/ingestion/source/redshift/usage.py +6 -5
  213. datahub/ingestion/source/s3/report.py +4 -2
  214. datahub/ingestion/source/s3/source.py +449 -248
  215. datahub/ingestion/source/sac/sac.py +3 -1
  216. datahub/ingestion/source/salesforce.py +28 -13
  217. datahub/ingestion/source/schema/json_schema.py +14 -14
  218. datahub/ingestion/source/schema_inference/object.py +22 -6
  219. datahub/ingestion/source/sigma/data_classes.py +3 -0
  220. datahub/ingestion/source/sigma/sigma.py +7 -1
  221. datahub/ingestion/source/slack/slack.py +10 -16
  222. datahub/ingestion/source/snaplogic/__init__.py +0 -0
  223. datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
  224. datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
  225. datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
  226. datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
  227. datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
  228. datahub/ingestion/source/snowflake/constants.py +3 -0
  229. datahub/ingestion/source/snowflake/snowflake_config.py +76 -23
  230. datahub/ingestion/source/snowflake/snowflake_connection.py +24 -8
  231. datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +19 -6
  232. datahub/ingestion/source/snowflake/snowflake_queries.py +464 -97
  233. datahub/ingestion/source/snowflake/snowflake_query.py +77 -5
  234. datahub/ingestion/source/snowflake/snowflake_report.py +1 -2
  235. datahub/ingestion/source/snowflake/snowflake_schema.py +352 -16
  236. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +51 -10
  237. datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
  238. datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
  239. datahub/ingestion/source/snowflake/snowflake_utils.py +36 -15
  240. datahub/ingestion/source/snowflake/snowflake_v2.py +39 -4
  241. datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
  242. datahub/ingestion/source/sql/athena.py +217 -25
  243. datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
  244. datahub/ingestion/source/sql/clickhouse.py +24 -8
  245. datahub/ingestion/source/sql/cockroachdb.py +5 -4
  246. datahub/ingestion/source/sql/druid.py +2 -2
  247. datahub/ingestion/source/sql/hana.py +3 -1
  248. datahub/ingestion/source/sql/hive.py +4 -3
  249. datahub/ingestion/source/sql/hive_metastore.py +19 -20
  250. datahub/ingestion/source/sql/mariadb.py +0 -1
  251. datahub/ingestion/source/sql/mssql/job_models.py +3 -1
  252. datahub/ingestion/source/sql/mssql/source.py +336 -57
  253. datahub/ingestion/source/sql/mysql.py +154 -4
  254. datahub/ingestion/source/sql/oracle.py +5 -5
  255. datahub/ingestion/source/sql/postgres.py +142 -6
  256. datahub/ingestion/source/sql/presto.py +2 -1
  257. datahub/ingestion/source/sql/sql_common.py +281 -49
  258. datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
  259. datahub/ingestion/source/sql/sql_types.py +22 -0
  260. datahub/ingestion/source/sql/sqlalchemy_uri.py +39 -7
  261. datahub/ingestion/source/sql/teradata.py +1028 -245
  262. datahub/ingestion/source/sql/trino.py +11 -1
  263. datahub/ingestion/source/sql/two_tier_sql_source.py +2 -3
  264. datahub/ingestion/source/sql/vertica.py +14 -7
  265. datahub/ingestion/source/sql_queries.py +219 -121
  266. datahub/ingestion/source/state/checkpoint.py +8 -29
  267. datahub/ingestion/source/state/entity_removal_state.py +5 -2
  268. datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
  269. datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
  270. datahub/ingestion/source/superset.py +314 -67
  271. datahub/ingestion/source/tableau/tableau.py +135 -59
  272. datahub/ingestion/source/tableau/tableau_common.py +9 -2
  273. datahub/ingestion/source/tableau/tableau_constant.py +1 -4
  274. datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
  275. datahub/ingestion/source/unity/config.py +160 -40
  276. datahub/ingestion/source/unity/connection.py +61 -0
  277. datahub/ingestion/source/unity/connection_test.py +1 -0
  278. datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
  279. datahub/ingestion/source/unity/proxy.py +794 -51
  280. datahub/ingestion/source/unity/proxy_patch.py +321 -0
  281. datahub/ingestion/source/unity/proxy_types.py +36 -2
  282. datahub/ingestion/source/unity/report.py +15 -3
  283. datahub/ingestion/source/unity/source.py +465 -131
  284. datahub/ingestion/source/unity/tag_entities.py +197 -0
  285. datahub/ingestion/source/unity/usage.py +46 -4
  286. datahub/ingestion/source/usage/clickhouse_usage.py +4 -1
  287. datahub/ingestion/source/usage/starburst_trino_usage.py +5 -2
  288. datahub/ingestion/source/usage/usage_common.py +4 -3
  289. datahub/ingestion/source/vertexai/vertexai.py +1 -1
  290. datahub/ingestion/source_config/pulsar.py +3 -1
  291. datahub/ingestion/source_report/ingestion_stage.py +50 -11
  292. datahub/ingestion/transformer/add_dataset_ownership.py +18 -2
  293. datahub/ingestion/transformer/base_transformer.py +8 -5
  294. datahub/ingestion/transformer/set_browse_path.py +112 -0
  295. datahub/integrations/assertion/snowflake/compiler.py +4 -3
  296. datahub/metadata/_internal_schema_classes.py +6806 -4871
  297. datahub/metadata/_urns/urn_defs.py +1767 -1539
  298. datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
  299. datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
  300. datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
  301. datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
  302. datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
  303. datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +6 -0
  304. datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
  305. datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
  306. datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
  307. datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
  308. datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
  309. datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
  310. datahub/metadata/schema.avsc +18395 -16979
  311. datahub/metadata/schemas/Actors.avsc +38 -1
  312. datahub/metadata/schemas/ApplicationKey.avsc +31 -0
  313. datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
  314. datahub/metadata/schemas/Applications.avsc +38 -0
  315. datahub/metadata/schemas/AssetSettings.avsc +63 -0
  316. datahub/metadata/schemas/ChartInfo.avsc +2 -1
  317. datahub/metadata/schemas/ChartKey.avsc +1 -0
  318. datahub/metadata/schemas/ContainerKey.avsc +1 -0
  319. datahub/metadata/schemas/ContainerProperties.avsc +8 -0
  320. datahub/metadata/schemas/CorpUserEditableInfo.avsc +1 -1
  321. datahub/metadata/schemas/CorpUserSettings.avsc +50 -0
  322. datahub/metadata/schemas/DashboardKey.avsc +1 -0
  323. datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
  324. datahub/metadata/schemas/DataFlowKey.avsc +1 -0
  325. datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
  326. datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
  327. datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
  328. datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
  329. datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
  330. datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
  331. datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
  332. datahub/metadata/schemas/DataJobInfo.avsc +8 -0
  333. datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
  334. datahub/metadata/schemas/DataJobKey.avsc +1 -0
  335. datahub/metadata/schemas/DataProcessKey.avsc +8 -0
  336. datahub/metadata/schemas/DataProductKey.avsc +3 -1
  337. datahub/metadata/schemas/DataProductProperties.avsc +1 -1
  338. datahub/metadata/schemas/DatasetKey.avsc +11 -1
  339. datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
  340. datahub/metadata/schemas/DomainKey.avsc +2 -1
  341. datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
  342. datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
  343. datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
  344. datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
  345. datahub/metadata/schemas/IncidentInfo.avsc +3 -3
  346. datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
  347. datahub/metadata/schemas/LogicalParent.avsc +145 -0
  348. datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
  349. datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
  350. datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
  351. datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
  352. datahub/metadata/schemas/MLModelKey.avsc +9 -0
  353. datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
  354. datahub/metadata/schemas/MetadataChangeEvent.avsc +151 -47
  355. datahub/metadata/schemas/MetadataChangeLog.avsc +62 -44
  356. datahub/metadata/schemas/MetadataChangeProposal.avsc +61 -0
  357. datahub/metadata/schemas/NotebookKey.avsc +1 -0
  358. datahub/metadata/schemas/Operation.avsc +4 -2
  359. datahub/metadata/schemas/Ownership.avsc +69 -0
  360. datahub/metadata/schemas/QuerySubjects.avsc +1 -12
  361. datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
  362. datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
  363. datahub/metadata/schemas/StructuredProperties.avsc +69 -0
  364. datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
  365. datahub/metadata/schemas/SystemMetadata.avsc +61 -0
  366. datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
  367. datahub/sdk/__init__.py +2 -0
  368. datahub/sdk/_all_entities.py +7 -0
  369. datahub/sdk/_shared.py +249 -5
  370. datahub/sdk/chart.py +386 -0
  371. datahub/sdk/container.py +7 -0
  372. datahub/sdk/dashboard.py +453 -0
  373. datahub/sdk/dataflow.py +7 -0
  374. datahub/sdk/datajob.py +45 -13
  375. datahub/sdk/dataset.py +56 -2
  376. datahub/sdk/entity_client.py +111 -9
  377. datahub/sdk/lineage_client.py +663 -82
  378. datahub/sdk/main_client.py +50 -16
  379. datahub/sdk/mlmodel.py +120 -38
  380. datahub/sdk/mlmodelgroup.py +7 -0
  381. datahub/sdk/search_client.py +7 -3
  382. datahub/sdk/search_filters.py +304 -36
  383. datahub/secret/datahub_secret_store.py +3 -0
  384. datahub/secret/environment_secret_store.py +29 -0
  385. datahub/secret/file_secret_store.py +49 -0
  386. datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
  387. datahub/specific/aspect_helpers/siblings.py +73 -0
  388. datahub/specific/aspect_helpers/structured_properties.py +27 -0
  389. datahub/specific/chart.py +1 -1
  390. datahub/specific/datajob.py +15 -1
  391. datahub/specific/dataproduct.py +4 -0
  392. datahub/specific/dataset.py +39 -59
  393. datahub/sql_parsing/split_statements.py +13 -0
  394. datahub/sql_parsing/sql_parsing_aggregator.py +70 -26
  395. datahub/sql_parsing/sqlglot_lineage.py +196 -42
  396. datahub/sql_parsing/sqlglot_utils.py +12 -4
  397. datahub/sql_parsing/tool_meta_extractor.py +1 -3
  398. datahub/telemetry/telemetry.py +28 -14
  399. datahub/testing/sdk_v2_helpers.py +7 -1
  400. datahub/upgrade/upgrade.py +73 -17
  401. datahub/utilities/file_backed_collections.py +8 -9
  402. datahub/utilities/is_pytest.py +3 -2
  403. datahub/utilities/logging_manager.py +22 -6
  404. datahub/utilities/mapping.py +29 -2
  405. datahub/utilities/sample_data.py +5 -4
  406. datahub/utilities/server_config_util.py +10 -1
  407. datahub/utilities/sqlalchemy_query_combiner.py +5 -2
  408. datahub/utilities/stats_collections.py +4 -0
  409. datahub/utilities/urns/urn.py +41 -2
  410. datahub/emitter/sql_parsing_builder.py +0 -306
  411. datahub/ingestion/source/redshift/lineage_v2.py +0 -466
  412. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +0 -0
  413. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/licenses/LICENSE +0 -0
  414. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,7 @@
1
1
  import logging
2
2
  import re
3
3
  import urllib.parse
4
- from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
4
+ from typing import Any, Dict, Iterable, List, Optional, Tuple
5
5
 
6
6
  import pydantic
7
7
  import sqlalchemy.dialects.mssql
@@ -10,9 +10,10 @@ from sqlalchemy import create_engine, inspect
10
10
  from sqlalchemy.engine.base import Connection
11
11
  from sqlalchemy.engine.reflection import Inspector
12
12
  from sqlalchemy.exc import ProgrammingError, ResourceClosedError
13
+ from sqlalchemy.sql import quoted_name
13
14
 
14
15
  import datahub.metadata.schema_classes as models
15
- from datahub.configuration.common import AllowDenyPattern
16
+ from datahub.configuration.common import AllowDenyPattern, HiddenFromDocs
16
17
  from datahub.configuration.pattern_utils import UUID_REGEX
17
18
  from datahub.emitter.mcp import MetadataChangeProposalWrapper
18
19
  from datahub.ingestion.api.common import PipelineContext
@@ -27,6 +28,7 @@ from datahub.ingestion.api.decorators import (
27
28
  from datahub.ingestion.api.source import StructuredLogLevel
28
29
  from datahub.ingestion.api.source_helpers import auto_workunit
29
30
  from datahub.ingestion.api.workunit import MetadataWorkUnit
31
+ from datahub.ingestion.source.common.subtypes import SourceCapabilityModifier
30
32
  from datahub.ingestion.source.sql.mssql.job_models import (
31
33
  JobStep,
32
34
  MSSQLDataFlow,
@@ -40,7 +42,6 @@ from datahub.ingestion.source.sql.mssql.job_models import (
40
42
  )
41
43
  from datahub.ingestion.source.sql.sql_common import (
42
44
  SQLAlchemySource,
43
- SqlWorkUnit,
44
45
  register_custom_type,
45
46
  )
46
47
  from datahub.ingestion.source.sql.sql_config import (
@@ -74,7 +75,7 @@ DEFAULT_TEMP_TABLES_PATTERNS = [
74
75
  class SQLServerConfig(BasicSQLAlchemyConfig):
75
76
  # defaults
76
77
  host_port: str = Field(default="localhost:1433", description="MSSQL host URL.")
77
- scheme: str = Field(default="mssql+pytds", description="", hidden_from_docs=True)
78
+ scheme: HiddenFromDocs[str] = Field(default="mssql+pytds")
78
79
 
79
80
  # TODO: rename to include_procedures ?
80
81
  include_stored_procedures: bool = Field(
@@ -130,10 +131,18 @@ class SQLServerConfig(BasicSQLAlchemyConfig):
130
131
  "match the entire table name in database.schema.table format. Defaults are to set in such a way "
131
132
  "to ignore the temporary staging tables created by known ETL tools.",
132
133
  )
134
+ quote_schemas: bool = Field(
135
+ default=False,
136
+ description="Represent a schema identifiers combined with quoting preferences. See [sqlalchemy quoted_name docs](https://docs.sqlalchemy.org/en/20/core/sqlelement.html#sqlalchemy.sql.expression.quoted_name).",
137
+ )
138
+ is_aws_rds: Optional[bool] = Field(
139
+ default=None,
140
+ description="Indicates if the SQL Server instance is running on AWS RDS. When None (default), automatic detection will be attempted using server name analysis.",
141
+ )
133
142
 
134
143
  @pydantic.validator("uri_args")
135
144
  def passwords_match(cls, v, values, **kwargs):
136
- if values["use_odbc"] and "driver" not in v:
145
+ if values["use_odbc"] and not values["sqlalchemy_uri"] and "driver" not in v:
137
146
  raise ValueError("uri_args must contain a 'driver' option")
138
147
  elif not values["use_odbc"] and v:
139
148
  raise ValueError("uri_args is not supported when ODBC is disabled")
@@ -144,22 +153,36 @@ class SQLServerConfig(BasicSQLAlchemyConfig):
144
153
  uri_opts: Optional[Dict[str, Any]] = None,
145
154
  current_db: Optional[str] = None,
146
155
  ) -> str:
156
+ current_db = current_db or self.database
157
+
147
158
  if self.use_odbc:
148
159
  # Ensure that the import is available.
149
160
  import pyodbc # noqa: F401
150
161
 
151
162
  self.scheme = "mssql+pyodbc"
152
163
 
164
+ # ODBC requires a database name, otherwise it will interpret host_port
165
+ # as a pre-defined ODBC connection name.
166
+ current_db = current_db or "master"
167
+
153
168
  uri: str = self.sqlalchemy_uri or make_sqlalchemy_uri(
154
169
  self.scheme, # type: ignore
155
170
  self.username,
156
171
  self.password.get_secret_value() if self.password else None,
157
172
  self.host_port, # type: ignore
158
- current_db if current_db else self.database,
173
+ current_db,
159
174
  uri_opts=uri_opts,
160
175
  )
161
176
  if self.use_odbc:
162
- uri = f"{uri}?{urllib.parse.urlencode(self.uri_args)}"
177
+ final_uri_args = self.uri_args.copy()
178
+ if final_uri_args and current_db:
179
+ final_uri_args.update({"database": current_db})
180
+
181
+ uri = (
182
+ f"{uri}?{urllib.parse.urlencode(final_uri_args)}"
183
+ if final_uri_args
184
+ else uri
185
+ )
163
186
  return uri
164
187
 
165
188
  @property
@@ -174,7 +197,22 @@ class SQLServerConfig(BasicSQLAlchemyConfig):
174
197
  @capability(SourceCapability.DOMAINS, "Supported via the `domain` config field")
175
198
  @capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
176
199
  @capability(SourceCapability.DESCRIPTIONS, "Enabled by default")
177
- @capability(SourceCapability.DELETION_DETECTION, "Enabled via stateful ingestion")
200
+ @capability(
201
+ SourceCapability.LINEAGE_COARSE,
202
+ "Enabled by default to get lineage for stored procedures via `include_lineage` and for views via `include_view_lineage`",
203
+ subtype_modifier=[
204
+ SourceCapabilityModifier.STORED_PROCEDURE,
205
+ SourceCapabilityModifier.VIEW,
206
+ ],
207
+ )
208
+ @capability(
209
+ SourceCapability.LINEAGE_FINE,
210
+ "Enabled by default to get lineage for stored procedures via `include_lineage` and for views via `include_view_column_lineage`",
211
+ subtype_modifier=[
212
+ SourceCapabilityModifier.STORED_PROCEDURE,
213
+ SourceCapabilityModifier.VIEW,
214
+ ],
215
+ )
178
216
  class SQLServerSource(SQLAlchemySource):
179
217
  """
180
218
  This plugin extracts the following:
@@ -323,32 +361,186 @@ class SQLServerSource(SQLAlchemySource):
323
361
  try:
324
362
  yield from self.loop_jobs(inspector, self.config)
325
363
  except Exception as e:
326
- self.report.report_failure(
327
- "jobs",
328
- f"Failed to list jobs due to error {e}",
364
+ self.report.failure(
365
+ message="Failed to list jobs",
366
+ title="SQL Server Jobs Extraction",
367
+ context="Error occurred during database-level job extraction",
368
+ exc=e,
329
369
  )
330
370
 
331
- def get_schema_level_workunits(
332
- self,
333
- inspector: Inspector,
334
- schema: str,
335
- database: str,
336
- ) -> Iterable[Union[MetadataWorkUnit, SqlWorkUnit]]:
337
- yield from super().get_schema_level_workunits(
338
- inspector=inspector,
339
- schema=schema,
340
- database=database,
341
- )
342
- if self.config.include_stored_procedures:
343
- try:
344
- yield from self.loop_stored_procedures(inspector, schema, self.config)
345
- except Exception as e:
346
- self.report.report_failure(
347
- "jobs",
348
- f"Failed to list jobs due to error {e}",
371
+ def _detect_rds_environment(self, conn: Connection) -> bool:
372
+ """
373
+ Detect if we're running in an RDS/managed environment vs on-premises.
374
+ Uses explicit configuration if provided, otherwise attempts automatic detection.
375
+ Returns True if RDS/managed, False if on-premises.
376
+ """
377
+ if self.config.is_aws_rds is not None:
378
+ logger.info(
379
+ f"Using explicit is_aws_rds configuration: {self.config.is_aws_rds}"
380
+ )
381
+ return self.config.is_aws_rds
382
+
383
+ try:
384
+ result = conn.execute("SELECT @@servername AS server_name")
385
+ server_name_row = result.fetchone()
386
+ if server_name_row:
387
+ server_name = server_name_row["server_name"].lower()
388
+
389
+ aws_indicators = ["amazon", "amzn", "amaz", "ec2", "rds.amazonaws.com"]
390
+ is_rds = any(indicator in server_name for indicator in aws_indicators)
391
+ if is_rds:
392
+ logger.info(f"AWS RDS detected based on server name: {server_name}")
393
+ else:
394
+ logger.info(
395
+ f"Non-RDS environment detected based on server name: {server_name}"
396
+ )
397
+
398
+ return is_rds
399
+ else:
400
+ logger.warning(
401
+ "Could not retrieve server name, assuming non-RDS environment"
349
402
  )
403
+ return False
404
+
405
+ except Exception as e:
406
+ logger.warning(
407
+ f"Failed to detect RDS/managed vs on-prem env, assuming non-RDS environment ({e})"
408
+ )
409
+ return False
350
410
 
351
411
  def _get_jobs(self, conn: Connection, db_name: str) -> Dict[str, Dict[str, Any]]:
412
+ """
413
+ Get job information with environment detection to choose optimal method first.
414
+ """
415
+ jobs: Dict[str, Dict[str, Any]] = {}
416
+
417
+ # Detect environment to choose optimal method first
418
+ is_rds = self._detect_rds_environment(conn)
419
+
420
+ if is_rds:
421
+ # Managed environment - try stored procedures first
422
+ try:
423
+ jobs = self._get_jobs_via_stored_procedures(conn, db_name)
424
+ logger.info(
425
+ "Successfully retrieved jobs using stored procedures (managed environment)"
426
+ )
427
+ return jobs
428
+ except Exception as sp_error:
429
+ logger.warning(
430
+ f"Failed to retrieve jobs via stored procedures in managed environment: {sp_error}"
431
+ )
432
+ # Try direct query as fallback (might work in some managed environments)
433
+ try:
434
+ jobs = self._get_jobs_via_direct_query(conn, db_name)
435
+ logger.info(
436
+ "Successfully retrieved jobs using direct query fallback in managed environment"
437
+ )
438
+ return jobs
439
+ except Exception as direct_error:
440
+ self.report.failure(
441
+ message="Failed to retrieve jobs in managed environment",
442
+ title="SQL Server Jobs Extraction",
443
+ context="Both stored procedures and direct query methods failed",
444
+ exc=direct_error,
445
+ )
446
+ else:
447
+ # On-premises environment - try direct query first (usually faster)
448
+ try:
449
+ jobs = self._get_jobs_via_direct_query(conn, db_name)
450
+ logger.info(
451
+ "Successfully retrieved jobs using direct query (on-premises environment)"
452
+ )
453
+ return jobs
454
+ except Exception as direct_error:
455
+ logger.warning(
456
+ f"Failed to retrieve jobs via direct query in on-premises environment: {direct_error}"
457
+ )
458
+ # Try stored procedures as fallback
459
+ try:
460
+ jobs = self._get_jobs_via_stored_procedures(conn, db_name)
461
+ logger.info(
462
+ "Successfully retrieved jobs using stored procedures fallback in on-premises environment"
463
+ )
464
+ return jobs
465
+ except Exception as sp_error:
466
+ self.report.failure(
467
+ message="Failed to retrieve jobs in on-premises environment",
468
+ title="SQL Server Jobs Extraction",
469
+ context="Both direct query and stored procedures methods failed",
470
+ exc=sp_error,
471
+ )
472
+
473
+ return jobs
474
+
475
+ def _get_jobs_via_stored_procedures(
476
+ self, conn: Connection, db_name: str
477
+ ) -> Dict[str, Dict[str, Any]]:
478
+ jobs: Dict[str, Dict[str, Any]] = {}
479
+
480
+ # First, get all jobs
481
+ jobs_result = conn.execute("EXEC msdb.dbo.sp_help_job")
482
+ jobs_data = {}
483
+
484
+ # SQLAlchemy 1.3 support was dropped in Sept 2023 (PR #8810)
485
+ # SQLAlchemy 1.4+ returns LegacyRow objects that don't support dictionary-style .get() method
486
+ # Use .mappings() to get MappingResult with dictionary-like rows that support .get()
487
+ for row in jobs_result.mappings():
488
+ job_id = str(row["job_id"])
489
+ jobs_data[job_id] = {
490
+ "job_id": job_id,
491
+ "name": row["name"],
492
+ "description": row.get("description", ""),
493
+ "date_created": row.get("date_created"),
494
+ "date_modified": row.get("date_modified"),
495
+ "enabled": row.get("enabled", 1),
496
+ }
497
+
498
+ # Now get job steps for each job, filtering by database
499
+ for job_id, job_info in jobs_data.items():
500
+ try:
501
+ # Get steps for this specific job
502
+ steps_result = conn.execute(
503
+ f"EXEC msdb.dbo.sp_help_jobstep @job_id = '{job_id}'"
504
+ )
505
+
506
+ job_steps = {}
507
+ # Use .mappings() for dictionary-like access (SQLAlchemy 1.4+ compatibility)
508
+ for step_row in steps_result.mappings():
509
+ # Only include steps that run against our target database
510
+ step_database = step_row.get("database_name", "")
511
+ if step_database.lower() == db_name.lower() or not step_database:
512
+ step_data = {
513
+ "job_id": job_id,
514
+ "job_name": job_info["name"],
515
+ "description": job_info["description"],
516
+ "date_created": job_info["date_created"],
517
+ "date_modified": job_info["date_modified"],
518
+ "step_id": step_row["step_id"],
519
+ "step_name": step_row["step_name"],
520
+ "subsystem": step_row.get("subsystem", ""),
521
+ "command": step_row.get("command", ""),
522
+ "database_name": step_database,
523
+ }
524
+ job_steps[step_row["step_id"]] = step_data
525
+
526
+ # Only add job if it has relevant steps
527
+ if job_steps:
528
+ jobs[job_info["name"]] = job_steps
529
+
530
+ except Exception as step_error:
531
+ logger.warning(
532
+ f"Failed to get steps for job {job_info['name']}: {step_error}"
533
+ )
534
+ continue
535
+
536
+ return jobs
537
+
538
+ def _get_jobs_via_direct_query(
539
+ self, conn: Connection, db_name: str
540
+ ) -> Dict[str, Dict[str, Any]]:
541
+ """
542
+ Original method using direct table access for on-premises SQL Server.
543
+ """
352
544
  jobs_data = conn.execute(
353
545
  f"""
354
546
  SELECT
@@ -371,6 +563,7 @@ class SQLServerSource(SQLAlchemySource):
371
563
  where database_name = '{db_name}'
372
564
  """
373
565
  )
566
+
374
567
  jobs: Dict[str, Dict[str, Any]] = {}
375
568
  for row in jobs_data:
376
569
  step_data = dict(
@@ -383,11 +576,13 @@ class SQLServerSource(SQLAlchemySource):
383
576
  step_name=row["step_name"],
384
577
  subsystem=row["subsystem"],
385
578
  command=row["command"],
579
+ database_name=row["database_name"],
386
580
  )
387
581
  if row["name"] in jobs:
388
582
  jobs[row["name"]][row["step_id"]] = step_data
389
583
  else:
390
584
  jobs[row["name"]] = {row["step_id"]: step_data}
585
+
391
586
  return jobs
392
587
 
393
588
  def loop_jobs(
@@ -397,21 +592,59 @@ class SQLServerSource(SQLAlchemySource):
397
592
  ) -> Iterable[MetadataWorkUnit]:
398
593
  """
399
594
  Loop MS SQL jobs as dataFlow-s.
400
- :return:
595
+ Now supports both managed and on-premises SQL Server.
401
596
  """
402
597
  db_name = self.get_db_name(inspector)
403
- with inspector.engine.connect() as conn:
404
- jobs = self._get_jobs(conn, db_name)
405
- for job_name, job_steps in jobs.items():
406
- job = MSSQLJob(
407
- name=job_name,
408
- env=sql_config.env,
409
- db=db_name,
410
- platform_instance=sql_config.platform_instance,
598
+
599
+ try:
600
+ with inspector.engine.connect() as conn:
601
+ jobs = self._get_jobs(conn, db_name)
602
+
603
+ if not jobs:
604
+ logger.info(f"No jobs found for database: {db_name}")
605
+ return
606
+
607
+ logger.info(f"Found {len(jobs)} jobs for database: {db_name}")
608
+
609
+ for job_name, job_steps in jobs.items():
610
+ try:
611
+ job = MSSQLJob(
612
+ name=job_name,
613
+ env=sql_config.env,
614
+ db=db_name,
615
+ platform_instance=sql_config.platform_instance,
616
+ )
617
+ data_flow = MSSQLDataFlow(entity=job)
618
+ yield from self.construct_flow_workunits(data_flow=data_flow)
619
+ yield from self.loop_job_steps(job, job_steps)
620
+
621
+ except Exception as job_error:
622
+ logger.warning(f"Failed to process job {job_name}: {job_error}")
623
+ self.report.warning(
624
+ message=f"Failed to process job {job_name}",
625
+ title="SQL Server Jobs Extraction",
626
+ context="Error occurred while processing individual job",
627
+ exc=job_error,
628
+ )
629
+ continue
630
+
631
+ except Exception as e:
632
+ error_message = f"Failed to retrieve jobs for database {db_name}: {e}"
633
+ logger.error(error_message)
634
+
635
+ # Provide specific guidance for permission issues
636
+ if "permission" in str(e).lower() or "denied" in str(e).lower():
637
+ permission_guidance = (
638
+ "For managed SQL Server services, ensure the following permissions are granted:\n"
639
+ "GRANT EXECUTE ON msdb.dbo.sp_help_job TO datahub_read;\n"
640
+ "GRANT EXECUTE ON msdb.dbo.sp_help_jobstep TO datahub_read;\n"
641
+ "For on-premises SQL Server, you may also need:\n"
642
+ "GRANT SELECT ON msdb.dbo.sysjobs TO datahub_read;\n"
643
+ "GRANT SELECT ON msdb.dbo.sysjobsteps TO datahub_read;"
411
644
  )
412
- data_flow = MSSQLDataFlow(entity=job)
413
- yield from self.construct_flow_workunits(data_flow=data_flow)
414
- yield from self.loop_job_steps(job, job_steps)
645
+ logger.info(permission_guidance)
646
+
647
+ raise e
415
648
 
416
649
  def loop_job_steps(
417
650
  self, job: MSSQLJob, job_steps: Dict[str, Any]
@@ -431,7 +664,7 @@ class SQLServerSource(SQLAlchemySource):
431
664
  self,
432
665
  inspector: Inspector,
433
666
  schema: str,
434
- sql_config: SQLServerConfig,
667
+ sql_config: SQLServerConfig, # type: ignore
435
668
  ) -> Iterable[MetadataWorkUnit]:
436
669
  """
437
670
  Loop schema data for get stored procedures as dataJob-s.
@@ -740,25 +973,29 @@ class SQLServerSource(SQLAlchemySource):
740
973
  url = self.config.get_sql_alchemy_url()
741
974
  logger.debug(f"sql_alchemy_url={url}")
742
975
  engine = create_engine(url, **self.config.options)
743
- with engine.connect() as conn:
744
- if self.config.database and self.config.database != "":
745
- inspector = inspect(conn)
746
- yield inspector
747
- else:
976
+
977
+ if (
978
+ self.config.database
979
+ and self.config.database != ""
980
+ or (self.config.sqlalchemy_uri and self.config.sqlalchemy_uri != "")
981
+ ):
982
+ inspector = inspect(engine)
983
+ yield inspector
984
+ else:
985
+ with engine.begin() as conn:
748
986
  databases = conn.execute(
749
987
  "SELECT name FROM master.sys.databases WHERE name NOT IN \
750
988
  ('master', 'model', 'msdb', 'tempdb', 'Resource', \
751
989
  'distribution' , 'reportserver', 'reportservertempdb'); "
752
- )
753
- for db in databases:
754
- if self.config.database_pattern.allowed(db["name"]):
755
- url = self.config.get_sql_alchemy_url(current_db=db["name"])
756
- with create_engine(
757
- url, **self.config.options
758
- ).connect() as conn:
759
- inspector = inspect(conn)
760
- self.current_database = db["name"]
761
- yield inspector
990
+ ).fetchall()
991
+
992
+ for db in databases:
993
+ if self.config.database_pattern.allowed(db["name"]):
994
+ url = self.config.get_sql_alchemy_url(current_db=db["name"])
995
+ engine = create_engine(url, **self.config.options)
996
+ inspector = inspect(engine)
997
+ self.current_database = db["name"]
998
+ yield inspector
762
999
 
763
1000
  def get_identifier(
764
1001
  self, *, schema: str, entity: str, inspector: Inspector, **kwargs: Any
@@ -838,3 +1075,45 @@ class SQLServerSource(SQLAlchemySource):
838
1075
  if self.config.convert_urns_to_lowercase
839
1076
  else table_ref_str
840
1077
  )
1078
+
1079
+ def get_allowed_schemas(self, inspector: Inspector, db_name: str) -> Iterable[str]:
1080
+ for schema in super().get_allowed_schemas(inspector, db_name):
1081
+ if self.config.quote_schemas:
1082
+ yield quoted_name(schema, True)
1083
+ else:
1084
+ yield schema
1085
+
1086
+ def get_db_name(self, inspector: Inspector) -> str:
1087
+ engine = inspector.engine
1088
+
1089
+ try:
1090
+ if (
1091
+ engine
1092
+ and hasattr(engine, "url")
1093
+ and hasattr(engine.url, "database")
1094
+ and engine.url.database
1095
+ ):
1096
+ return str(engine.url.database).strip('"')
1097
+
1098
+ if (
1099
+ engine
1100
+ and hasattr(engine, "url")
1101
+ and hasattr(engine.url, "query")
1102
+ and "odbc_connect" in engine.url.query
1103
+ ):
1104
+ # According to the ODBC connection keywords: https://learn.microsoft.com/en-us/sql/connect/odbc/dsn-connection-string-attribute?view=sql-server-ver17#supported-dsnconnection-string-keywords-and-connection-attributes
1105
+ database = re.search(
1106
+ r"DATABASE=([^;]*);",
1107
+ urllib.parse.unquote_plus(str(engine.url.query["odbc_connect"])),
1108
+ flags=re.IGNORECASE,
1109
+ )
1110
+
1111
+ if database and database.group(1):
1112
+ return database.group(1)
1113
+
1114
+ return ""
1115
+
1116
+ except Exception as e:
1117
+ raise RuntimeError(
1118
+ "Unable to get database name from Sqlalchemy inspector"
1119
+ ) from e