acryl-datahub 1.0.0rc18__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (503) hide show
  1. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2686 -2563
  2. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +499 -392
  3. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +1 -1
  4. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +7 -1
  5. datahub/_version.py +1 -1
  6. datahub/api/circuit_breaker/operation_circuit_breaker.py +2 -2
  7. datahub/api/entities/assertion/assertion.py +1 -1
  8. datahub/api/entities/common/serialized_value.py +1 -1
  9. datahub/api/entities/corpgroup/corpgroup.py +1 -1
  10. datahub/api/entities/datacontract/datacontract.py +35 -3
  11. datahub/api/entities/datajob/dataflow.py +18 -3
  12. datahub/api/entities/datajob/datajob.py +24 -4
  13. datahub/api/entities/dataprocess/dataprocess_instance.py +4 -0
  14. datahub/api/entities/dataproduct/dataproduct.py +32 -3
  15. datahub/api/entities/dataset/dataset.py +47 -72
  16. datahub/api/entities/external/__init__.py +0 -0
  17. datahub/api/entities/external/external_entities.py +724 -0
  18. datahub/api/entities/external/external_tag.py +147 -0
  19. datahub/api/entities/external/lake_formation_external_entites.py +162 -0
  20. datahub/api/entities/external/restricted_text.py +172 -0
  21. datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
  22. datahub/api/entities/forms/forms.py +37 -37
  23. datahub/api/entities/structuredproperties/structuredproperties.py +6 -6
  24. datahub/api/graphql/assertion.py +1 -1
  25. datahub/api/graphql/base.py +8 -6
  26. datahub/api/graphql/operation.py +14 -10
  27. datahub/cli/check_cli.py +91 -9
  28. datahub/cli/cli_utils.py +63 -0
  29. datahub/cli/config_utils.py +20 -12
  30. datahub/cli/container_cli.py +5 -0
  31. datahub/cli/delete_cli.py +133 -34
  32. datahub/cli/docker_check.py +110 -14
  33. datahub/cli/docker_cli.py +155 -231
  34. datahub/cli/exists_cli.py +2 -3
  35. datahub/cli/get_cli.py +2 -3
  36. datahub/cli/graphql_cli.py +1422 -0
  37. datahub/cli/iceberg_cli.py +11 -5
  38. datahub/cli/ingest_cli.py +25 -26
  39. datahub/cli/migrate.py +12 -9
  40. datahub/cli/migration_utils.py +4 -3
  41. datahub/cli/put_cli.py +4 -6
  42. datahub/cli/quickstart_versioning.py +53 -10
  43. datahub/cli/specific/assertions_cli.py +39 -7
  44. datahub/cli/specific/datacontract_cli.py +57 -9
  45. datahub/cli/specific/dataproduct_cli.py +12 -24
  46. datahub/cli/specific/dataset_cli.py +31 -21
  47. datahub/cli/specific/forms_cli.py +2 -5
  48. datahub/cli/specific/group_cli.py +2 -3
  49. datahub/cli/specific/structuredproperties_cli.py +5 -7
  50. datahub/cli/specific/user_cli.py +174 -4
  51. datahub/cli/state_cli.py +2 -3
  52. datahub/cli/timeline_cli.py +2 -3
  53. datahub/configuration/common.py +46 -2
  54. datahub/configuration/connection_resolver.py +5 -2
  55. datahub/configuration/env_vars.py +331 -0
  56. datahub/configuration/import_resolver.py +7 -4
  57. datahub/configuration/kafka.py +21 -1
  58. datahub/configuration/pydantic_migration_helpers.py +6 -13
  59. datahub/configuration/source_common.py +4 -3
  60. datahub/configuration/validate_field_deprecation.py +5 -2
  61. datahub/configuration/validate_field_removal.py +8 -2
  62. datahub/configuration/validate_field_rename.py +6 -5
  63. datahub/configuration/validate_multiline_string.py +5 -2
  64. datahub/emitter/mce_builder.py +12 -8
  65. datahub/emitter/mcp.py +20 -5
  66. datahub/emitter/mcp_builder.py +12 -0
  67. datahub/emitter/request_helper.py +138 -15
  68. datahub/emitter/response_helper.py +111 -19
  69. datahub/emitter/rest_emitter.py +399 -163
  70. datahub/entrypoints.py +10 -5
  71. datahub/errors.py +12 -0
  72. datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +299 -2
  73. datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
  74. datahub/ingestion/api/common.py +9 -0
  75. datahub/ingestion/api/decorators.py +15 -3
  76. datahub/ingestion/api/report.py +381 -3
  77. datahub/ingestion/api/sink.py +27 -2
  78. datahub/ingestion/api/source.py +174 -62
  79. datahub/ingestion/api/source_helpers.py +41 -3
  80. datahub/ingestion/api/source_protocols.py +23 -0
  81. datahub/ingestion/autogenerated/__init__.py +0 -0
  82. datahub/ingestion/autogenerated/capability_summary.json +3652 -0
  83. datahub/ingestion/autogenerated/lineage.json +402 -0
  84. datahub/ingestion/autogenerated/lineage_helper.py +177 -0
  85. datahub/ingestion/extractor/schema_util.py +31 -5
  86. datahub/ingestion/glossary/classification_mixin.py +9 -2
  87. datahub/ingestion/graph/client.py +492 -55
  88. datahub/ingestion/graph/config.py +18 -2
  89. datahub/ingestion/graph/filters.py +96 -32
  90. datahub/ingestion/graph/links.py +55 -0
  91. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
  92. datahub/ingestion/run/pipeline.py +90 -23
  93. datahub/ingestion/run/pipeline_config.py +3 -3
  94. datahub/ingestion/sink/datahub_kafka.py +1 -0
  95. datahub/ingestion/sink/datahub_rest.py +31 -23
  96. datahub/ingestion/sink/file.py +1 -0
  97. datahub/ingestion/source/abs/config.py +1 -1
  98. datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
  99. datahub/ingestion/source/abs/source.py +15 -30
  100. datahub/ingestion/source/apply/datahub_apply.py +6 -5
  101. datahub/ingestion/source/aws/aws_common.py +185 -13
  102. datahub/ingestion/source/aws/glue.py +517 -244
  103. datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
  104. datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
  105. datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
  106. datahub/ingestion/source/aws/sagemaker_processors/models.py +4 -4
  107. datahub/ingestion/source/aws/tag_entities.py +270 -0
  108. datahub/ingestion/source/azure/azure_common.py +3 -3
  109. datahub/ingestion/source/bigquery_v2/bigquery.py +51 -7
  110. datahub/ingestion/source/bigquery_v2/bigquery_config.py +51 -81
  111. datahub/ingestion/source/bigquery_v2/bigquery_connection.py +81 -0
  112. datahub/ingestion/source/bigquery_v2/bigquery_queries.py +6 -1
  113. datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
  114. datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
  115. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
  116. datahub/ingestion/source/bigquery_v2/common.py +1 -1
  117. datahub/ingestion/source/bigquery_v2/lineage.py +1 -1
  118. datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
  119. datahub/ingestion/source/bigquery_v2/queries.py +3 -3
  120. datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
  121. datahub/ingestion/source/cassandra/cassandra.py +7 -18
  122. datahub/ingestion/source/cassandra/cassandra_api.py +36 -0
  123. datahub/ingestion/source/cassandra/cassandra_config.py +20 -0
  124. datahub/ingestion/source/cassandra/cassandra_profiling.py +26 -24
  125. datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
  126. datahub/ingestion/source/common/data_platforms.py +23 -0
  127. datahub/ingestion/source/common/gcp_credentials_config.py +9 -1
  128. datahub/ingestion/source/common/subtypes.py +73 -1
  129. datahub/ingestion/source/data_lake_common/data_lake_utils.py +59 -10
  130. datahub/ingestion/source/data_lake_common/object_store.py +732 -0
  131. datahub/ingestion/source/data_lake_common/path_spec.py +87 -38
  132. datahub/ingestion/source/datahub/config.py +19 -5
  133. datahub/ingestion/source/datahub/datahub_database_reader.py +205 -36
  134. datahub/ingestion/source/datahub/datahub_source.py +11 -1
  135. datahub/ingestion/source/dbt/dbt_cloud.py +17 -10
  136. datahub/ingestion/source/dbt/dbt_common.py +270 -26
  137. datahub/ingestion/source/dbt/dbt_core.py +88 -47
  138. datahub/ingestion/source/dbt/dbt_tests.py +8 -6
  139. datahub/ingestion/source/debug/__init__.py +0 -0
  140. datahub/ingestion/source/debug/datahub_debug.py +300 -0
  141. datahub/ingestion/source/delta_lake/config.py +9 -5
  142. datahub/ingestion/source/delta_lake/source.py +8 -0
  143. datahub/ingestion/source/dremio/dremio_api.py +114 -73
  144. datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
  145. datahub/ingestion/source/dremio/dremio_config.py +5 -4
  146. datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +1 -1
  147. datahub/ingestion/source/dremio/dremio_entities.py +6 -5
  148. datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
  149. datahub/ingestion/source/dremio/dremio_source.py +228 -215
  150. datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
  151. datahub/ingestion/source/dynamodb/dynamodb.py +19 -13
  152. datahub/ingestion/source/excel/__init__.py +0 -0
  153. datahub/ingestion/source/excel/config.py +92 -0
  154. datahub/ingestion/source/excel/excel_file.py +539 -0
  155. datahub/ingestion/source/excel/profiling.py +308 -0
  156. datahub/ingestion/source/excel/report.py +49 -0
  157. datahub/ingestion/source/excel/source.py +662 -0
  158. datahub/ingestion/source/excel/util.py +18 -0
  159. datahub/ingestion/source/feast.py +12 -14
  160. datahub/ingestion/source/file.py +3 -0
  161. datahub/ingestion/source/fivetran/config.py +67 -8
  162. datahub/ingestion/source/fivetran/fivetran.py +228 -43
  163. datahub/ingestion/source/fivetran/fivetran_log_api.py +42 -9
  164. datahub/ingestion/source/fivetran/fivetran_query.py +58 -36
  165. datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
  166. datahub/ingestion/source/fivetran/response_models.py +97 -0
  167. datahub/ingestion/source/gc/datahub_gc.py +0 -2
  168. datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +101 -104
  169. datahub/ingestion/source/gcs/gcs_source.py +53 -10
  170. datahub/ingestion/source/gcs/gcs_utils.py +36 -9
  171. datahub/ingestion/source/ge_data_profiler.py +146 -33
  172. datahub/ingestion/source/ge_profiling_config.py +26 -11
  173. datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
  174. datahub/ingestion/source/grafana/field_utils.py +307 -0
  175. datahub/ingestion/source/grafana/grafana_api.py +142 -0
  176. datahub/ingestion/source/grafana/grafana_config.py +104 -0
  177. datahub/ingestion/source/grafana/grafana_source.py +522 -84
  178. datahub/ingestion/source/grafana/lineage.py +202 -0
  179. datahub/ingestion/source/grafana/models.py +137 -0
  180. datahub/ingestion/source/grafana/report.py +90 -0
  181. datahub/ingestion/source/grafana/types.py +16 -0
  182. datahub/ingestion/source/hex/__init__.py +0 -0
  183. datahub/ingestion/source/hex/api.py +402 -0
  184. datahub/ingestion/source/hex/constants.py +8 -0
  185. datahub/ingestion/source/hex/hex.py +311 -0
  186. datahub/ingestion/source/hex/mapper.py +412 -0
  187. datahub/ingestion/source/hex/model.py +78 -0
  188. datahub/ingestion/source/hex/query_fetcher.py +307 -0
  189. datahub/ingestion/source/iceberg/iceberg.py +385 -164
  190. datahub/ingestion/source/iceberg/iceberg_common.py +2 -2
  191. datahub/ingestion/source/iceberg/iceberg_profiler.py +25 -20
  192. datahub/ingestion/source/identity/azure_ad.py +1 -1
  193. datahub/ingestion/source/identity/okta.py +1 -14
  194. datahub/ingestion/source/kafka/kafka.py +28 -71
  195. datahub/ingestion/source/kafka/kafka_config.py +78 -0
  196. datahub/ingestion/source/kafka_connect/common.py +2 -2
  197. datahub/ingestion/source/kafka_connect/sink_connectors.py +157 -48
  198. datahub/ingestion/source/kafka_connect/source_connectors.py +63 -5
  199. datahub/ingestion/source/ldap.py +1 -1
  200. datahub/ingestion/source/looker/looker_common.py +216 -86
  201. datahub/ingestion/source/looker/looker_config.py +15 -4
  202. datahub/ingestion/source/looker/looker_constant.py +4 -0
  203. datahub/ingestion/source/looker/looker_lib_wrapper.py +37 -4
  204. datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
  205. datahub/ingestion/source/looker/looker_source.py +539 -555
  206. datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
  207. datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
  208. datahub/ingestion/source/looker/lookml_config.py +31 -3
  209. datahub/ingestion/source/looker/lookml_refinement.py +1 -1
  210. datahub/ingestion/source/looker/lookml_source.py +103 -118
  211. datahub/ingestion/source/looker/view_upstream.py +494 -1
  212. datahub/ingestion/source/metabase.py +32 -6
  213. datahub/ingestion/source/metadata/business_glossary.py +7 -7
  214. datahub/ingestion/source/metadata/lineage.py +11 -10
  215. datahub/ingestion/source/mlflow.py +254 -23
  216. datahub/ingestion/source/mock_data/__init__.py +0 -0
  217. datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
  218. datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
  219. datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
  220. datahub/ingestion/source/mode.py +359 -181
  221. datahub/ingestion/source/mongodb.py +11 -1
  222. datahub/ingestion/source/neo4j/neo4j_source.py +122 -153
  223. datahub/ingestion/source/nifi.py +5 -5
  224. datahub/ingestion/source/openapi.py +85 -38
  225. datahub/ingestion/source/openapi_parser.py +59 -40
  226. datahub/ingestion/source/powerbi/config.py +92 -27
  227. datahub/ingestion/source/powerbi/m_query/data_classes.py +3 -0
  228. datahub/ingestion/source/powerbi/m_query/odbc.py +185 -0
  229. datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
  230. datahub/ingestion/source/powerbi/m_query/pattern_handler.py +358 -14
  231. datahub/ingestion/source/powerbi/m_query/resolver.py +10 -0
  232. datahub/ingestion/source/powerbi/powerbi.py +66 -32
  233. datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -2
  234. datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +11 -12
  235. datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
  236. datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
  237. datahub/ingestion/source/preset.py +3 -3
  238. datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
  239. datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
  240. datahub/ingestion/source/redash.py +1 -1
  241. datahub/ingestion/source/redshift/config.py +15 -9
  242. datahub/ingestion/source/redshift/datashares.py +1 -1
  243. datahub/ingestion/source/redshift/lineage.py +386 -687
  244. datahub/ingestion/source/redshift/profile.py +2 -2
  245. datahub/ingestion/source/redshift/query.py +24 -20
  246. datahub/ingestion/source/redshift/redshift.py +52 -111
  247. datahub/ingestion/source/redshift/redshift_schema.py +17 -12
  248. datahub/ingestion/source/redshift/report.py +0 -2
  249. datahub/ingestion/source/redshift/usage.py +13 -11
  250. datahub/ingestion/source/s3/report.py +4 -2
  251. datahub/ingestion/source/s3/source.py +515 -244
  252. datahub/ingestion/source/sac/sac.py +3 -1
  253. datahub/ingestion/source/salesforce.py +28 -13
  254. datahub/ingestion/source/schema/json_schema.py +14 -14
  255. datahub/ingestion/source/schema_inference/object.py +22 -6
  256. datahub/ingestion/source/sigma/config.py +75 -8
  257. datahub/ingestion/source/sigma/data_classes.py +3 -0
  258. datahub/ingestion/source/sigma/sigma.py +36 -7
  259. datahub/ingestion/source/sigma/sigma_api.py +99 -58
  260. datahub/ingestion/source/slack/slack.py +403 -140
  261. datahub/ingestion/source/snaplogic/__init__.py +0 -0
  262. datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
  263. datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
  264. datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
  265. datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
  266. datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
  267. datahub/ingestion/source/snowflake/constants.py +4 -0
  268. datahub/ingestion/source/snowflake/snowflake_config.py +103 -34
  269. datahub/ingestion/source/snowflake/snowflake_connection.py +47 -25
  270. datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +25 -6
  271. datahub/ingestion/source/snowflake/snowflake_profiler.py +1 -6
  272. datahub/ingestion/source/snowflake/snowflake_queries.py +511 -107
  273. datahub/ingestion/source/snowflake/snowflake_query.py +100 -72
  274. datahub/ingestion/source/snowflake/snowflake_report.py +4 -2
  275. datahub/ingestion/source/snowflake/snowflake_schema.py +381 -16
  276. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +163 -52
  277. datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
  278. datahub/ingestion/source/snowflake/snowflake_tag.py +4 -1
  279. datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
  280. datahub/ingestion/source/snowflake/snowflake_utils.py +62 -17
  281. datahub/ingestion/source/snowflake/snowflake_v2.py +56 -10
  282. datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
  283. datahub/ingestion/source/sql/athena.py +219 -26
  284. datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
  285. datahub/ingestion/source/sql/clickhouse.py +29 -9
  286. datahub/ingestion/source/sql/cockroachdb.py +5 -4
  287. datahub/ingestion/source/sql/druid.py +9 -4
  288. datahub/ingestion/source/sql/hana.py +3 -1
  289. datahub/ingestion/source/sql/hive.py +28 -8
  290. datahub/ingestion/source/sql/hive_metastore.py +24 -25
  291. datahub/ingestion/source/sql/mariadb.py +0 -1
  292. datahub/ingestion/source/sql/mssql/job_models.py +18 -2
  293. datahub/ingestion/source/sql/mssql/source.py +376 -62
  294. datahub/ingestion/source/sql/mysql.py +154 -4
  295. datahub/ingestion/source/sql/oracle.py +62 -11
  296. datahub/ingestion/source/sql/postgres.py +142 -6
  297. datahub/ingestion/source/sql/presto.py +20 -2
  298. datahub/ingestion/source/sql/sql_common.py +281 -49
  299. datahub/ingestion/source/sql/sql_config.py +1 -34
  300. datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
  301. datahub/ingestion/source/sql/sql_types.py +27 -2
  302. datahub/ingestion/source/sql/sqlalchemy_uri.py +68 -0
  303. datahub/ingestion/source/sql/stored_procedures/__init__.py +0 -0
  304. datahub/ingestion/source/sql/stored_procedures/base.py +253 -0
  305. datahub/ingestion/source/sql/{mssql/stored_procedure_lineage.py → stored_procedures/lineage.py} +2 -29
  306. datahub/ingestion/source/sql/teradata.py +1028 -245
  307. datahub/ingestion/source/sql/trino.py +43 -10
  308. datahub/ingestion/source/sql/two_tier_sql_source.py +3 -4
  309. datahub/ingestion/source/sql/vertica.py +14 -7
  310. datahub/ingestion/source/sql_queries.py +219 -121
  311. datahub/ingestion/source/state/checkpoint.py +8 -29
  312. datahub/ingestion/source/state/entity_removal_state.py +5 -2
  313. datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
  314. datahub/ingestion/source/state/stale_entity_removal_handler.py +0 -1
  315. datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
  316. datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +2 -1
  317. datahub/ingestion/source/superset.py +810 -126
  318. datahub/ingestion/source/tableau/tableau.py +172 -69
  319. datahub/ingestion/source/tableau/tableau_common.py +11 -4
  320. datahub/ingestion/source/tableau/tableau_constant.py +1 -4
  321. datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
  322. datahub/ingestion/source/tableau/tableau_validation.py +1 -1
  323. datahub/ingestion/source/unity/config.py +161 -40
  324. datahub/ingestion/source/unity/connection.py +61 -0
  325. datahub/ingestion/source/unity/connection_test.py +1 -0
  326. datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
  327. datahub/ingestion/source/unity/proxy.py +794 -51
  328. datahub/ingestion/source/unity/proxy_patch.py +321 -0
  329. datahub/ingestion/source/unity/proxy_types.py +36 -2
  330. datahub/ingestion/source/unity/report.py +15 -3
  331. datahub/ingestion/source/unity/source.py +465 -131
  332. datahub/ingestion/source/unity/tag_entities.py +197 -0
  333. datahub/ingestion/source/unity/usage.py +46 -4
  334. datahub/ingestion/source/usage/clickhouse_usage.py +11 -4
  335. datahub/ingestion/source/usage/starburst_trino_usage.py +10 -5
  336. datahub/ingestion/source/usage/usage_common.py +4 -68
  337. datahub/ingestion/source/vertexai/__init__.py +0 -0
  338. datahub/ingestion/source/vertexai/vertexai.py +1367 -0
  339. datahub/ingestion/source/vertexai/vertexai_config.py +29 -0
  340. datahub/ingestion/source/vertexai/vertexai_result_type_utils.py +89 -0
  341. datahub/ingestion/source_config/pulsar.py +3 -1
  342. datahub/ingestion/source_report/ingestion_stage.py +50 -11
  343. datahub/ingestion/transformer/add_dataset_dataproduct.py +1 -1
  344. datahub/ingestion/transformer/add_dataset_ownership.py +19 -3
  345. datahub/ingestion/transformer/base_transformer.py +8 -5
  346. datahub/ingestion/transformer/dataset_domain.py +1 -1
  347. datahub/ingestion/transformer/set_browse_path.py +112 -0
  348. datahub/integrations/assertion/common.py +3 -2
  349. datahub/integrations/assertion/snowflake/compiler.py +4 -3
  350. datahub/lite/lite_util.py +2 -2
  351. datahub/metadata/{_schema_classes.py → _internal_schema_classes.py} +3095 -631
  352. datahub/metadata/_urns/urn_defs.py +1866 -1582
  353. datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
  354. datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
  355. datahub/metadata/com/linkedin/pegasus2avro/dataplatform/slack/__init__.py +15 -0
  356. datahub/metadata/com/linkedin/pegasus2avro/event/__init__.py +11 -0
  357. datahub/metadata/com/linkedin/pegasus2avro/event/notification/__init__.py +15 -0
  358. datahub/metadata/com/linkedin/pegasus2avro/event/notification/settings/__init__.py +19 -0
  359. datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
  360. datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
  361. datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
  362. datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +8 -0
  363. datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
  364. datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
  365. datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
  366. datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
  367. datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
  368. datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
  369. datahub/metadata/schema.avsc +18404 -16617
  370. datahub/metadata/schema_classes.py +3 -3
  371. datahub/metadata/schemas/Actors.avsc +38 -1
  372. datahub/metadata/schemas/ApplicationKey.avsc +31 -0
  373. datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
  374. datahub/metadata/schemas/Applications.avsc +38 -0
  375. datahub/metadata/schemas/AssetSettings.avsc +63 -0
  376. datahub/metadata/schemas/ChartInfo.avsc +2 -1
  377. datahub/metadata/schemas/ChartKey.avsc +1 -0
  378. datahub/metadata/schemas/ContainerKey.avsc +1 -0
  379. datahub/metadata/schemas/ContainerProperties.avsc +8 -0
  380. datahub/metadata/schemas/CorpUserEditableInfo.avsc +15 -1
  381. datahub/metadata/schemas/CorpUserKey.avsc +2 -1
  382. datahub/metadata/schemas/CorpUserSettings.avsc +145 -0
  383. datahub/metadata/schemas/DashboardKey.avsc +1 -0
  384. datahub/metadata/schemas/DataContractKey.avsc +2 -1
  385. datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
  386. datahub/metadata/schemas/DataFlowKey.avsc +1 -0
  387. datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
  388. datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
  389. datahub/metadata/schemas/DataHubIngestionSourceKey.avsc +2 -1
  390. datahub/metadata/schemas/DataHubOpenAPISchemaKey.avsc +22 -0
  391. datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
  392. datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
  393. datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
  394. datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
  395. datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
  396. datahub/metadata/schemas/DataJobInfo.avsc +8 -0
  397. datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
  398. datahub/metadata/schemas/DataJobKey.avsc +1 -0
  399. datahub/metadata/schemas/DataProcessInstanceInput.avsc +2 -1
  400. datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -1
  401. datahub/metadata/schemas/DataProcessKey.avsc +8 -0
  402. datahub/metadata/schemas/DataProductKey.avsc +3 -1
  403. datahub/metadata/schemas/DataProductProperties.avsc +1 -1
  404. datahub/metadata/schemas/DataTransformLogic.avsc +4 -2
  405. datahub/metadata/schemas/DatasetKey.avsc +11 -1
  406. datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
  407. datahub/metadata/schemas/Deprecation.avsc +2 -0
  408. datahub/metadata/schemas/DomainKey.avsc +2 -1
  409. datahub/metadata/schemas/ExecutionRequestInput.avsc +5 -0
  410. datahub/metadata/schemas/FormInfo.avsc +5 -0
  411. datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
  412. datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
  413. datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
  414. datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
  415. datahub/metadata/schemas/IncidentInfo.avsc +3 -3
  416. datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
  417. datahub/metadata/schemas/LogicalParent.avsc +145 -0
  418. datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
  419. datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
  420. datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
  421. datahub/metadata/schemas/MLModelDeploymentProperties.avsc +3 -0
  422. datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
  423. datahub/metadata/schemas/MLModelGroupProperties.avsc +16 -0
  424. datahub/metadata/schemas/MLModelKey.avsc +9 -0
  425. datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
  426. datahub/metadata/schemas/MetadataChangeEvent.avsc +189 -47
  427. datahub/metadata/schemas/MetadataChangeLog.avsc +65 -44
  428. datahub/metadata/schemas/MetadataChangeProposal.avsc +64 -0
  429. datahub/metadata/schemas/NotebookKey.avsc +1 -0
  430. datahub/metadata/schemas/Operation.avsc +21 -2
  431. datahub/metadata/schemas/Ownership.avsc +69 -0
  432. datahub/metadata/schemas/QueryProperties.avsc +24 -2
  433. datahub/metadata/schemas/QuerySubjects.avsc +1 -12
  434. datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
  435. datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
  436. datahub/metadata/schemas/Siblings.avsc +2 -0
  437. datahub/metadata/schemas/SlackUserInfo.avsc +160 -0
  438. datahub/metadata/schemas/StructuredProperties.avsc +69 -0
  439. datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
  440. datahub/metadata/schemas/SystemMetadata.avsc +147 -0
  441. datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
  442. datahub/metadata/schemas/__init__.py +3 -3
  443. datahub/sdk/__init__.py +7 -0
  444. datahub/sdk/_all_entities.py +15 -0
  445. datahub/sdk/_shared.py +393 -10
  446. datahub/sdk/_utils.py +4 -0
  447. datahub/sdk/chart.py +386 -0
  448. datahub/sdk/container.py +7 -0
  449. datahub/sdk/dashboard.py +453 -0
  450. datahub/sdk/dataflow.py +309 -0
  451. datahub/sdk/datajob.py +367 -0
  452. datahub/sdk/dataset.py +180 -4
  453. datahub/sdk/entity.py +99 -3
  454. datahub/sdk/entity_client.py +154 -12
  455. datahub/sdk/lineage_client.py +943 -0
  456. datahub/sdk/main_client.py +83 -8
  457. datahub/sdk/mlmodel.py +383 -0
  458. datahub/sdk/mlmodelgroup.py +240 -0
  459. datahub/sdk/search_client.py +85 -8
  460. datahub/sdk/search_filters.py +393 -68
  461. datahub/secret/datahub_secret_store.py +5 -1
  462. datahub/secret/environment_secret_store.py +29 -0
  463. datahub/secret/file_secret_store.py +49 -0
  464. datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
  465. datahub/specific/aspect_helpers/siblings.py +73 -0
  466. datahub/specific/aspect_helpers/structured_properties.py +27 -0
  467. datahub/specific/chart.py +1 -1
  468. datahub/specific/datajob.py +15 -1
  469. datahub/specific/dataproduct.py +4 -0
  470. datahub/specific/dataset.py +51 -59
  471. datahub/sql_parsing/_sqlglot_patch.py +1 -2
  472. datahub/sql_parsing/fingerprint_utils.py +6 -0
  473. datahub/sql_parsing/split_statements.py +30 -3
  474. datahub/sql_parsing/sql_parsing_aggregator.py +144 -63
  475. datahub/sql_parsing/sqlglot_lineage.py +517 -44
  476. datahub/sql_parsing/sqlglot_utils.py +30 -18
  477. datahub/sql_parsing/tool_meta_extractor.py +25 -2
  478. datahub/telemetry/telemetry.py +30 -16
  479. datahub/testing/check_imports.py +1 -1
  480. datahub/testing/docker_utils.py +8 -2
  481. datahub/testing/mce_helpers.py +421 -0
  482. datahub/testing/mcp_diff.py +17 -21
  483. datahub/testing/sdk_v2_helpers.py +18 -0
  484. datahub/upgrade/upgrade.py +86 -30
  485. datahub/utilities/file_backed_collections.py +14 -15
  486. datahub/utilities/hive_schema_to_avro.py +2 -2
  487. datahub/utilities/ingest_utils.py +2 -2
  488. datahub/utilities/is_pytest.py +3 -2
  489. datahub/utilities/logging_manager.py +30 -7
  490. datahub/utilities/mapping.py +29 -2
  491. datahub/utilities/sample_data.py +5 -4
  492. datahub/utilities/server_config_util.py +298 -10
  493. datahub/utilities/sqlalchemy_query_combiner.py +6 -4
  494. datahub/utilities/stats_collections.py +4 -0
  495. datahub/utilities/threaded_iterator_executor.py +16 -3
  496. datahub/utilities/urn_encoder.py +1 -1
  497. datahub/utilities/urns/urn.py +41 -2
  498. datahub/emitter/sql_parsing_builder.py +0 -306
  499. datahub/ingestion/source/redshift/lineage_v2.py +0 -458
  500. datahub/ingestion/source/vertexai.py +0 -697
  501. datahub/ingestion/transformer/system_metadata_transformer.py +0 -45
  502. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info/licenses}/LICENSE +0 -0
  503. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
@@ -3,7 +3,9 @@ from abc import ABC, abstractmethod
3
3
  from enum import Enum
4
4
  from typing import Dict, List, Optional, Tuple, Type, cast
5
5
 
6
+ import sqlglot
6
7
  from lark import Tree
8
+ from sqlglot import ParseError, expressions as exp
7
9
 
8
10
  from datahub.configuration.source_common import PlatformDetail
9
11
  from datahub.emitter import mce_builder as builder
@@ -29,8 +31,20 @@ from datahub.ingestion.source.powerbi.m_query.data_classes import (
29
31
  Lineage,
30
32
  ReferencedTable,
31
33
  )
34
+ from datahub.ingestion.source.powerbi.m_query.odbc import (
35
+ extract_dsn,
36
+ extract_platform,
37
+ extract_server,
38
+ normalize_platform_name,
39
+ )
32
40
  from datahub.ingestion.source.powerbi.rest_api_wrapper.data_classes import Table
33
- from datahub.sql_parsing.sqlglot_lineage import SqlParsingResult
41
+ from datahub.metadata.schema_classes import SchemaFieldDataTypeClass
42
+ from datahub.sql_parsing.sqlglot_lineage import (
43
+ ColumnLineageInfo,
44
+ ColumnRef,
45
+ DownstreamColumnRef,
46
+ SqlParsingResult,
47
+ )
34
48
 
35
49
  logger = logging.getLogger(__name__)
36
50
 
@@ -149,6 +163,7 @@ class AbstractLineage(ABC):
149
163
  tree_function.token_values(arg_list)
150
164
  ),
151
165
  )
166
+ logger.debug(f"DB Details: {arguments}")
152
167
 
153
168
  if len(arguments) < 2:
154
169
  logger.debug(f"Expected minimum 2 arguments, but got {len(arguments)}")
@@ -196,15 +211,34 @@ class AbstractLineage(ABC):
196
211
 
197
212
  return None
198
213
 
214
+ @staticmethod
215
+ def is_sql_query(query: Optional[str]) -> bool:
216
+ if not query:
217
+ return False
218
+ query = native_sql_parser.remove_special_characters(query)
219
+ try:
220
+ expression = sqlglot.parse_one(query)
221
+ return isinstance(expression, exp.Select)
222
+ except (ParseError, Exception):
223
+ logger.debug(f"Failed to parse query as SQL: {query}")
224
+ return False
225
+
199
226
  def parse_custom_sql(
200
- self, query: str, server: str, database: Optional[str], schema: Optional[str]
227
+ self,
228
+ query: str,
229
+ server: str,
230
+ database: Optional[str],
231
+ schema: Optional[str],
232
+ platform_pair: Optional[DataPlatformPair] = None,
201
233
  ) -> Lineage:
202
234
  dataplatform_tables: List[DataPlatformTable] = []
235
+ if not platform_pair:
236
+ platform_pair = self.get_platform_pair()
203
237
 
204
238
  platform_detail: PlatformDetail = (
205
239
  self.platform_instance_resolver.get_platform_instance(
206
240
  PowerBIPlatformDetail(
207
- data_platform_pair=self.get_platform_pair(),
241
+ data_platform_pair=platform_pair,
208
242
  data_platform_server=server,
209
243
  )
210
244
  )
@@ -218,7 +252,7 @@ class AbstractLineage(ABC):
218
252
  native_sql_parser.parse_custom_sql(
219
253
  ctx=self.ctx,
220
254
  query=query,
221
- platform=self.get_platform_pair().datahub_data_platform_name,
255
+ platform=platform_pair.datahub_data_platform_name,
222
256
  platform_instance=platform_detail.platform_instance,
223
257
  env=platform_detail.env,
224
258
  database=database,
@@ -245,7 +279,7 @@ class AbstractLineage(ABC):
245
279
  for urn in parsed_result.in_tables:
246
280
  dataplatform_tables.append(
247
281
  DataPlatformTable(
248
- data_platform_pair=self.get_platform_pair(),
282
+ data_platform_pair=platform_pair,
249
283
  urn=urn,
250
284
  )
251
285
  )
@@ -262,6 +296,33 @@ class AbstractLineage(ABC):
262
296
  ),
263
297
  )
264
298
 
299
+ def create_table_column_lineage(self, urn: str) -> List[ColumnLineageInfo]:
300
+ column_lineage = []
301
+
302
+ if self.table.columns is not None:
303
+ for column in self.table.columns:
304
+ downstream = DownstreamColumnRef(
305
+ table=self.table.name,
306
+ column=column.name,
307
+ column_type=SchemaFieldDataTypeClass(type=column.datahubDataType),
308
+ native_column_type=column.dataType or "UNKNOWN",
309
+ )
310
+
311
+ upstreams = [
312
+ ColumnRef(
313
+ table=urn,
314
+ column=column.name.lower(),
315
+ )
316
+ ]
317
+
318
+ column_lineage_info = ColumnLineageInfo(
319
+ downstream=downstream, upstreams=upstreams
320
+ )
321
+
322
+ column_lineage.append(column_lineage_info)
323
+
324
+ return column_lineage
325
+
265
326
 
266
327
  class AmazonRedshiftLineage(AbstractLineage):
267
328
  def get_platform_pair(self) -> DataPlatformPair:
@@ -299,6 +360,8 @@ class AmazonRedshiftLineage(AbstractLineage):
299
360
  qualified_table_name=qualified_table_name,
300
361
  )
301
362
 
363
+ column_lineage = self.create_table_column_lineage(urn)
364
+
302
365
  return Lineage(
303
366
  upstreams=[
304
367
  DataPlatformTable(
@@ -306,7 +369,7 @@ class AmazonRedshiftLineage(AbstractLineage):
306
369
  urn=urn,
307
370
  )
308
371
  ],
309
- column_lineage=[],
372
+ column_lineage=column_lineage,
310
373
  )
311
374
 
312
375
 
@@ -364,6 +427,8 @@ class OracleLineage(AbstractLineage):
364
427
  qualified_table_name=qualified_table_name,
365
428
  )
366
429
 
430
+ column_lineage = self.create_table_column_lineage(urn)
431
+
367
432
  return Lineage(
368
433
  upstreams=[
369
434
  DataPlatformTable(
@@ -371,7 +436,7 @@ class OracleLineage(AbstractLineage):
371
436
  urn=urn,
372
437
  )
373
438
  ],
374
- column_lineage=[],
439
+ column_lineage=column_lineage,
375
440
  )
376
441
 
377
442
 
@@ -449,6 +514,8 @@ class DatabricksLineage(AbstractLineage):
449
514
  qualified_table_name=qualified_table_name,
450
515
  )
451
516
 
517
+ column_lineage = self.create_table_column_lineage(urn)
518
+
452
519
  return Lineage(
453
520
  upstreams=[
454
521
  DataPlatformTable(
@@ -456,7 +523,7 @@ class DatabricksLineage(AbstractLineage):
456
523
  urn=urn,
457
524
  )
458
525
  ],
459
- column_lineage=[],
526
+ column_lineage=column_lineage,
460
527
  )
461
528
 
462
529
  return Lineage.empty()
@@ -509,6 +576,9 @@ class TwoStepDataAccessPattern(AbstractLineage, ABC):
509
576
  server=server,
510
577
  qualified_table_name=qualified_table_name,
511
578
  )
579
+
580
+ column_lineage = self.create_table_column_lineage(urn)
581
+
512
582
  return Lineage(
513
583
  upstreams=[
514
584
  DataPlatformTable(
@@ -516,10 +586,62 @@ class TwoStepDataAccessPattern(AbstractLineage, ABC):
516
586
  urn=urn,
517
587
  )
518
588
  ],
519
- column_lineage=[],
589
+ column_lineage=column_lineage,
520
590
  )
521
591
 
522
592
 
593
+ class MySQLLineage(AbstractLineage):
594
+ def create_lineage(
595
+ self, data_access_func_detail: DataAccessFunctionDetail
596
+ ) -> Lineage:
597
+ logger.debug(
598
+ f"Processing {self.get_platform_pair().powerbi_data_platform_name} data-access function detail {data_access_func_detail}"
599
+ )
600
+
601
+ server, db_name = self.get_db_detail_from_argument(
602
+ data_access_func_detail.arg_list
603
+ )
604
+ if server is None or db_name is None:
605
+ return Lineage.empty() # Return an empty list
606
+
607
+ schema_name: str = cast(
608
+ IdentifierAccessor, data_access_func_detail.identifier_accessor
609
+ ).items["Schema"]
610
+
611
+ table_name: str = cast(
612
+ IdentifierAccessor, data_access_func_detail.identifier_accessor
613
+ ).items["Item"]
614
+
615
+ qualified_table_name: str = f"{schema_name}.{table_name}"
616
+
617
+ logger.debug(
618
+ f"Platform({self.get_platform_pair().datahub_data_platform_name}) qualified_table_name= {qualified_table_name}"
619
+ )
620
+
621
+ urn = make_urn(
622
+ config=self.config,
623
+ platform_instance_resolver=self.platform_instance_resolver,
624
+ data_platform_pair=self.get_platform_pair(),
625
+ server=server,
626
+ qualified_table_name=qualified_table_name,
627
+ )
628
+
629
+ column_lineage = self.create_table_column_lineage(urn)
630
+
631
+ return Lineage(
632
+ upstreams=[
633
+ DataPlatformTable(
634
+ data_platform_pair=self.get_platform_pair(),
635
+ urn=urn,
636
+ )
637
+ ],
638
+ column_lineage=column_lineage,
639
+ )
640
+
641
+ def get_platform_pair(self) -> DataPlatformPair:
642
+ return SupportedDataPlatform.MYSQL.value
643
+
644
+
523
645
  class PostgresLineage(TwoStepDataAccessPattern):
524
646
  def create_lineage(
525
647
  self, data_access_func_detail: DataAccessFunctionDetail
@@ -671,6 +793,8 @@ class ThreeStepDataAccessPattern(AbstractLineage, ABC):
671
793
  qualified_table_name=qualified_table_name,
672
794
  )
673
795
 
796
+ column_lineage = self.create_table_column_lineage(urn)
797
+
674
798
  return Lineage(
675
799
  upstreams=[
676
800
  DataPlatformTable(
@@ -678,7 +802,7 @@ class ThreeStepDataAccessPattern(AbstractLineage, ABC):
678
802
  urn=urn,
679
803
  )
680
804
  ],
681
- column_lineage=[],
805
+ column_lineage=column_lineage,
682
806
  )
683
807
 
684
808
 
@@ -726,6 +850,7 @@ class NativeQueryLineage(AbstractLineage):
726
850
 
727
851
  tables: List[str] = native_sql_parser.get_tables(query)
728
852
 
853
+ column_lineage = []
729
854
  for qualified_table_name in tables:
730
855
  if len(qualified_table_name.split(".")) != 3:
731
856
  logger.debug(
@@ -748,12 +873,11 @@ class NativeQueryLineage(AbstractLineage):
748
873
  )
749
874
  )
750
875
 
876
+ column_lineage = self.create_table_column_lineage(urn)
877
+
751
878
  logger.debug(f"Generated dataplatform_tables {dataplatform_tables}")
752
879
 
753
- return Lineage(
754
- upstreams=dataplatform_tables,
755
- column_lineage=[],
756
- )
880
+ return Lineage(upstreams=dataplatform_tables, column_lineage=column_lineage)
757
881
 
758
882
  def get_db_name(self, data_access_tokens: List[str]) -> Optional[str]:
759
883
  if (
@@ -844,6 +968,211 @@ class NativeQueryLineage(AbstractLineage):
844
968
  )
845
969
 
846
970
 
971
+ class OdbcLineage(AbstractLineage):
972
+ def create_lineage(
973
+ self, data_access_func_detail: DataAccessFunctionDetail
974
+ ) -> Lineage:
975
+ logger.debug(
976
+ f"Processing {self.get_platform_pair().powerbi_data_platform_name} "
977
+ f"data-access function detail {data_access_func_detail}"
978
+ )
979
+
980
+ connect_string, query = self.get_db_detail_from_argument(
981
+ data_access_func_detail.arg_list
982
+ )
983
+
984
+ if not connect_string:
985
+ self.reporter.warning(
986
+ title="Can not extract ODBC connect string",
987
+ message="Can not extract ODBC connect string from data access function. Skipping Lineage creation.",
988
+ context=f"table-name={self.table.full_name}, data-access-func-detail={data_access_func_detail}",
989
+ )
990
+ return Lineage.empty()
991
+
992
+ logger.debug(f"ODBC connect string: {connect_string}")
993
+ data_platform, powerbi_platform = extract_platform(connect_string)
994
+ server_name = extract_server(connect_string)
995
+
996
+ dsn = extract_dsn(connect_string)
997
+ if not dsn:
998
+ self.reporter.warning(
999
+ title="Can not determine ODBC DSN",
1000
+ message="Can not extract DSN from ODBC connect string. Skipping Lineage creation.",
1001
+ context=f"table-name={self.table.full_name}, connect-string={connect_string}",
1002
+ )
1003
+ return Lineage.empty()
1004
+ logger.debug(f"Extracted DSN: {dsn}")
1005
+
1006
+ if not data_platform:
1007
+ server_name = dsn
1008
+ if self.config.dsn_to_platform_name:
1009
+ logger.debug(f"Attempting to map DSN {dsn} to platform")
1010
+ name = self.config.dsn_to_platform_name.get(dsn)
1011
+ if name:
1012
+ logger.debug(f"Found DSN {dsn} mapped to platform {name}")
1013
+ data_platform, powerbi_platform = normalize_platform_name(name)
1014
+
1015
+ if not data_platform or not powerbi_platform:
1016
+ self.reporter.warning(
1017
+ title="Can not determine ODBC platform",
1018
+ message="Can not determine platform from ODBC connect string. Skipping Lineage creation.",
1019
+ context=f"table-name={self.table.full_name}, connect-string={connect_string}",
1020
+ )
1021
+ return Lineage.empty()
1022
+
1023
+ platform_pair: DataPlatformPair = self.create_platform_pair(
1024
+ data_platform, powerbi_platform
1025
+ )
1026
+
1027
+ if not server_name and self.config.server_to_platform_instance:
1028
+ self.reporter.warning(
1029
+ title="Can not determine ODBC server name",
1030
+ message="Can not determine server name with server_to_platform_instance mapping. Skipping Lineage creation.",
1031
+ context=f"table-name={self.table.full_name}",
1032
+ )
1033
+ return Lineage.empty()
1034
+ elif not server_name:
1035
+ server_name = "unknown"
1036
+
1037
+ if self.is_sql_query(query):
1038
+ return self.query_lineage(query, platform_pair, server_name, dsn)
1039
+ else:
1040
+ return self.expression_lineage(
1041
+ data_access_func_detail, data_platform, platform_pair, server_name
1042
+ )
1043
+
1044
+ def query_lineage(
1045
+ self,
1046
+ query: Optional[str],
1047
+ platform_pair: DataPlatformPair,
1048
+ server_name: str,
1049
+ dsn: str,
1050
+ ) -> Lineage:
1051
+ database = None
1052
+ schema = None
1053
+
1054
+ if not query:
1055
+ # query should never be None as it is checked before calling this function.
1056
+ # however, we need to check just in case.
1057
+ self.reporter.warning(
1058
+ title="ODBC Query is null",
1059
+ message="No SQL to parse. Skipping Lineage creation.",
1060
+ context=f"table-name={self.table.full_name}",
1061
+ )
1062
+ return Lineage.empty()
1063
+
1064
+ if self.config.dsn_to_database_schema:
1065
+ value = self.config.dsn_to_database_schema.get(dsn)
1066
+ if value:
1067
+ parts = value.split(".")
1068
+ if len(parts) == 1:
1069
+ database = parts[0]
1070
+ elif len(parts) == 2:
1071
+ database = parts[0]
1072
+ schema = parts[1]
1073
+
1074
+ logger.debug(
1075
+ f"ODBC query processing: dsn={dsn} mapped to database={database}, schema={schema}"
1076
+ )
1077
+ result = self.parse_custom_sql(
1078
+ query=query,
1079
+ server=server_name,
1080
+ database=database,
1081
+ schema=schema,
1082
+ platform_pair=platform_pair,
1083
+ )
1084
+ logger.debug(f"ODBC query lineage generated {len(result.upstreams)} upstreams")
1085
+ return result
1086
+
1087
+ def expression_lineage(
1088
+ self,
1089
+ data_access_func_detail: DataAccessFunctionDetail,
1090
+ data_platform: str,
1091
+ platform_pair: DataPlatformPair,
1092
+ server_name: str,
1093
+ ) -> Lineage:
1094
+ database_name = None
1095
+ schema_name = None
1096
+ table_name = None
1097
+ qualified_table_name = None
1098
+
1099
+ temp_accessor: Optional[IdentifierAccessor] = (
1100
+ data_access_func_detail.identifier_accessor
1101
+ )
1102
+
1103
+ while temp_accessor:
1104
+ logger.debug(
1105
+ f"identifier = {temp_accessor.identifier} items = {temp_accessor.items}"
1106
+ )
1107
+ if temp_accessor.items.get("Kind") == "Database":
1108
+ database_name = temp_accessor.items["Name"]
1109
+
1110
+ if temp_accessor.items.get("Kind") == "Schema":
1111
+ schema_name = temp_accessor.items["Name"]
1112
+
1113
+ if temp_accessor.items.get("Kind") == "Table":
1114
+ table_name = temp_accessor.items["Name"]
1115
+
1116
+ if temp_accessor.next is not None:
1117
+ temp_accessor = temp_accessor.next
1118
+ else:
1119
+ break
1120
+
1121
+ if (
1122
+ database_name is not None
1123
+ and schema_name is not None
1124
+ and table_name is not None
1125
+ ):
1126
+ qualified_table_name = f"{database_name}.{schema_name}.{table_name}"
1127
+ elif database_name is not None and table_name is not None:
1128
+ qualified_table_name = f"{database_name}.{table_name}"
1129
+
1130
+ if not qualified_table_name:
1131
+ self.reporter.warning(
1132
+ title="Can not determine qualified table name",
1133
+ message="Can not determine qualified table name for ODBC data source. Skipping Lineage creation.",
1134
+ context=f"table-name={self.table.full_name}, data-platform={data_platform}",
1135
+ )
1136
+ logger.warning(
1137
+ f"Can not determine qualified table name for ODBC data source {data_platform} "
1138
+ f"table {self.table.full_name}."
1139
+ )
1140
+ return Lineage.empty()
1141
+
1142
+ logger.debug(
1143
+ f"ODBC Platform {data_platform} found qualified table name {qualified_table_name}"
1144
+ )
1145
+
1146
+ urn = make_urn(
1147
+ config=self.config,
1148
+ platform_instance_resolver=self.platform_instance_resolver,
1149
+ data_platform_pair=platform_pair,
1150
+ server=server_name,
1151
+ qualified_table_name=qualified_table_name,
1152
+ )
1153
+
1154
+ column_lineage = self.create_table_column_lineage(urn)
1155
+
1156
+ return Lineage(
1157
+ upstreams=[
1158
+ DataPlatformTable(
1159
+ data_platform_pair=platform_pair,
1160
+ urn=urn,
1161
+ )
1162
+ ],
1163
+ column_lineage=column_lineage,
1164
+ )
1165
+
1166
+ @staticmethod
1167
+ def create_platform_pair(
1168
+ data_platform: str, powerbi_platform: str
1169
+ ) -> DataPlatformPair:
1170
+ return DataPlatformPair(data_platform, powerbi_platform)
1171
+
1172
+ def get_platform_pair(self) -> DataPlatformPair:
1173
+ return SupportedDataPlatform.ODBC.value
1174
+
1175
+
847
1176
  class SupportedPattern(Enum):
848
1177
  DATABRICKS_QUERY = (
849
1178
  DatabricksLineage,
@@ -885,11 +1214,26 @@ class SupportedPattern(Enum):
885
1214
  FunctionName.AMAZON_REDSHIFT_DATA_ACCESS,
886
1215
  )
887
1216
 
1217
+ MYSQL = (
1218
+ MySQLLineage,
1219
+ FunctionName.MYSQL_DATA_ACCESS,
1220
+ )
1221
+
888
1222
  NATIVE_QUERY = (
889
1223
  NativeQueryLineage,
890
1224
  FunctionName.NATIVE_QUERY,
891
1225
  )
892
1226
 
1227
+ ODBC = (
1228
+ OdbcLineage,
1229
+ FunctionName.ODBC_DATA_ACCESS,
1230
+ )
1231
+
1232
+ ODBC_QUERY = (
1233
+ OdbcLineage,
1234
+ FunctionName.ODBC_QUERY,
1235
+ )
1236
+
893
1237
  def handler(self) -> Type[AbstractLineage]:
894
1238
  return self.value[0]
895
1239
 
@@ -361,6 +361,9 @@ class MQueryResolver(AbstractDataAccessMQueryResolver, ABC):
361
361
  )
362
362
 
363
363
  if output_variable is None:
364
+ logger.debug(
365
+ f"Table: {self.table.full_name}: output-variable not found in tree"
366
+ )
364
367
  self.reporter.report_warning(
365
368
  f"{self.table.full_name}-output-variable",
366
369
  "output-variable not found in table expression",
@@ -374,6 +377,9 @@ class MQueryResolver(AbstractDataAccessMQueryResolver, ABC):
374
377
 
375
378
  # Each item is data-access function
376
379
  for f_detail in table_links:
380
+ logger.debug(
381
+ f"Processing data-access-function {f_detail.data_access_function_name}"
382
+ )
377
383
  # Get & Check if we support data-access-function available in M-Query
378
384
  supported_resolver = SupportedPattern.get_pattern_handler(
379
385
  f_detail.data_access_function_name
@@ -390,6 +396,10 @@ class MQueryResolver(AbstractDataAccessMQueryResolver, ABC):
390
396
 
391
397
  # From supported_resolver enum get respective handler like AmazonRedshift or Snowflake or Oracle or NativeQuery and create instance of it
392
398
  # & also pass additional information that will be need to generate lineage
399
+ logger.debug(
400
+ f"Creating instance of {supported_resolver.handler().__name__} "
401
+ f"for data-access-function {f_detail.data_access_function_name}"
402
+ )
393
403
  pattern_handler: AbstractLineage = supported_resolver.handler()(
394
404
  ctx=ctx,
395
405
  table=self.table,