acryl-datahub 1.0.0rc18__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (503) hide show
  1. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2686 -2563
  2. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +499 -392
  3. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +1 -1
  4. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +7 -1
  5. datahub/_version.py +1 -1
  6. datahub/api/circuit_breaker/operation_circuit_breaker.py +2 -2
  7. datahub/api/entities/assertion/assertion.py +1 -1
  8. datahub/api/entities/common/serialized_value.py +1 -1
  9. datahub/api/entities/corpgroup/corpgroup.py +1 -1
  10. datahub/api/entities/datacontract/datacontract.py +35 -3
  11. datahub/api/entities/datajob/dataflow.py +18 -3
  12. datahub/api/entities/datajob/datajob.py +24 -4
  13. datahub/api/entities/dataprocess/dataprocess_instance.py +4 -0
  14. datahub/api/entities/dataproduct/dataproduct.py +32 -3
  15. datahub/api/entities/dataset/dataset.py +47 -72
  16. datahub/api/entities/external/__init__.py +0 -0
  17. datahub/api/entities/external/external_entities.py +724 -0
  18. datahub/api/entities/external/external_tag.py +147 -0
  19. datahub/api/entities/external/lake_formation_external_entites.py +162 -0
  20. datahub/api/entities/external/restricted_text.py +172 -0
  21. datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
  22. datahub/api/entities/forms/forms.py +37 -37
  23. datahub/api/entities/structuredproperties/structuredproperties.py +6 -6
  24. datahub/api/graphql/assertion.py +1 -1
  25. datahub/api/graphql/base.py +8 -6
  26. datahub/api/graphql/operation.py +14 -10
  27. datahub/cli/check_cli.py +91 -9
  28. datahub/cli/cli_utils.py +63 -0
  29. datahub/cli/config_utils.py +20 -12
  30. datahub/cli/container_cli.py +5 -0
  31. datahub/cli/delete_cli.py +133 -34
  32. datahub/cli/docker_check.py +110 -14
  33. datahub/cli/docker_cli.py +155 -231
  34. datahub/cli/exists_cli.py +2 -3
  35. datahub/cli/get_cli.py +2 -3
  36. datahub/cli/graphql_cli.py +1422 -0
  37. datahub/cli/iceberg_cli.py +11 -5
  38. datahub/cli/ingest_cli.py +25 -26
  39. datahub/cli/migrate.py +12 -9
  40. datahub/cli/migration_utils.py +4 -3
  41. datahub/cli/put_cli.py +4 -6
  42. datahub/cli/quickstart_versioning.py +53 -10
  43. datahub/cli/specific/assertions_cli.py +39 -7
  44. datahub/cli/specific/datacontract_cli.py +57 -9
  45. datahub/cli/specific/dataproduct_cli.py +12 -24
  46. datahub/cli/specific/dataset_cli.py +31 -21
  47. datahub/cli/specific/forms_cli.py +2 -5
  48. datahub/cli/specific/group_cli.py +2 -3
  49. datahub/cli/specific/structuredproperties_cli.py +5 -7
  50. datahub/cli/specific/user_cli.py +174 -4
  51. datahub/cli/state_cli.py +2 -3
  52. datahub/cli/timeline_cli.py +2 -3
  53. datahub/configuration/common.py +46 -2
  54. datahub/configuration/connection_resolver.py +5 -2
  55. datahub/configuration/env_vars.py +331 -0
  56. datahub/configuration/import_resolver.py +7 -4
  57. datahub/configuration/kafka.py +21 -1
  58. datahub/configuration/pydantic_migration_helpers.py +6 -13
  59. datahub/configuration/source_common.py +4 -3
  60. datahub/configuration/validate_field_deprecation.py +5 -2
  61. datahub/configuration/validate_field_removal.py +8 -2
  62. datahub/configuration/validate_field_rename.py +6 -5
  63. datahub/configuration/validate_multiline_string.py +5 -2
  64. datahub/emitter/mce_builder.py +12 -8
  65. datahub/emitter/mcp.py +20 -5
  66. datahub/emitter/mcp_builder.py +12 -0
  67. datahub/emitter/request_helper.py +138 -15
  68. datahub/emitter/response_helper.py +111 -19
  69. datahub/emitter/rest_emitter.py +399 -163
  70. datahub/entrypoints.py +10 -5
  71. datahub/errors.py +12 -0
  72. datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +299 -2
  73. datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
  74. datahub/ingestion/api/common.py +9 -0
  75. datahub/ingestion/api/decorators.py +15 -3
  76. datahub/ingestion/api/report.py +381 -3
  77. datahub/ingestion/api/sink.py +27 -2
  78. datahub/ingestion/api/source.py +174 -62
  79. datahub/ingestion/api/source_helpers.py +41 -3
  80. datahub/ingestion/api/source_protocols.py +23 -0
  81. datahub/ingestion/autogenerated/__init__.py +0 -0
  82. datahub/ingestion/autogenerated/capability_summary.json +3652 -0
  83. datahub/ingestion/autogenerated/lineage.json +402 -0
  84. datahub/ingestion/autogenerated/lineage_helper.py +177 -0
  85. datahub/ingestion/extractor/schema_util.py +31 -5
  86. datahub/ingestion/glossary/classification_mixin.py +9 -2
  87. datahub/ingestion/graph/client.py +492 -55
  88. datahub/ingestion/graph/config.py +18 -2
  89. datahub/ingestion/graph/filters.py +96 -32
  90. datahub/ingestion/graph/links.py +55 -0
  91. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
  92. datahub/ingestion/run/pipeline.py +90 -23
  93. datahub/ingestion/run/pipeline_config.py +3 -3
  94. datahub/ingestion/sink/datahub_kafka.py +1 -0
  95. datahub/ingestion/sink/datahub_rest.py +31 -23
  96. datahub/ingestion/sink/file.py +1 -0
  97. datahub/ingestion/source/abs/config.py +1 -1
  98. datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
  99. datahub/ingestion/source/abs/source.py +15 -30
  100. datahub/ingestion/source/apply/datahub_apply.py +6 -5
  101. datahub/ingestion/source/aws/aws_common.py +185 -13
  102. datahub/ingestion/source/aws/glue.py +517 -244
  103. datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
  104. datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
  105. datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
  106. datahub/ingestion/source/aws/sagemaker_processors/models.py +4 -4
  107. datahub/ingestion/source/aws/tag_entities.py +270 -0
  108. datahub/ingestion/source/azure/azure_common.py +3 -3
  109. datahub/ingestion/source/bigquery_v2/bigquery.py +51 -7
  110. datahub/ingestion/source/bigquery_v2/bigquery_config.py +51 -81
  111. datahub/ingestion/source/bigquery_v2/bigquery_connection.py +81 -0
  112. datahub/ingestion/source/bigquery_v2/bigquery_queries.py +6 -1
  113. datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
  114. datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
  115. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
  116. datahub/ingestion/source/bigquery_v2/common.py +1 -1
  117. datahub/ingestion/source/bigquery_v2/lineage.py +1 -1
  118. datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
  119. datahub/ingestion/source/bigquery_v2/queries.py +3 -3
  120. datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
  121. datahub/ingestion/source/cassandra/cassandra.py +7 -18
  122. datahub/ingestion/source/cassandra/cassandra_api.py +36 -0
  123. datahub/ingestion/source/cassandra/cassandra_config.py +20 -0
  124. datahub/ingestion/source/cassandra/cassandra_profiling.py +26 -24
  125. datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
  126. datahub/ingestion/source/common/data_platforms.py +23 -0
  127. datahub/ingestion/source/common/gcp_credentials_config.py +9 -1
  128. datahub/ingestion/source/common/subtypes.py +73 -1
  129. datahub/ingestion/source/data_lake_common/data_lake_utils.py +59 -10
  130. datahub/ingestion/source/data_lake_common/object_store.py +732 -0
  131. datahub/ingestion/source/data_lake_common/path_spec.py +87 -38
  132. datahub/ingestion/source/datahub/config.py +19 -5
  133. datahub/ingestion/source/datahub/datahub_database_reader.py +205 -36
  134. datahub/ingestion/source/datahub/datahub_source.py +11 -1
  135. datahub/ingestion/source/dbt/dbt_cloud.py +17 -10
  136. datahub/ingestion/source/dbt/dbt_common.py +270 -26
  137. datahub/ingestion/source/dbt/dbt_core.py +88 -47
  138. datahub/ingestion/source/dbt/dbt_tests.py +8 -6
  139. datahub/ingestion/source/debug/__init__.py +0 -0
  140. datahub/ingestion/source/debug/datahub_debug.py +300 -0
  141. datahub/ingestion/source/delta_lake/config.py +9 -5
  142. datahub/ingestion/source/delta_lake/source.py +8 -0
  143. datahub/ingestion/source/dremio/dremio_api.py +114 -73
  144. datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
  145. datahub/ingestion/source/dremio/dremio_config.py +5 -4
  146. datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +1 -1
  147. datahub/ingestion/source/dremio/dremio_entities.py +6 -5
  148. datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
  149. datahub/ingestion/source/dremio/dremio_source.py +228 -215
  150. datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
  151. datahub/ingestion/source/dynamodb/dynamodb.py +19 -13
  152. datahub/ingestion/source/excel/__init__.py +0 -0
  153. datahub/ingestion/source/excel/config.py +92 -0
  154. datahub/ingestion/source/excel/excel_file.py +539 -0
  155. datahub/ingestion/source/excel/profiling.py +308 -0
  156. datahub/ingestion/source/excel/report.py +49 -0
  157. datahub/ingestion/source/excel/source.py +662 -0
  158. datahub/ingestion/source/excel/util.py +18 -0
  159. datahub/ingestion/source/feast.py +12 -14
  160. datahub/ingestion/source/file.py +3 -0
  161. datahub/ingestion/source/fivetran/config.py +67 -8
  162. datahub/ingestion/source/fivetran/fivetran.py +228 -43
  163. datahub/ingestion/source/fivetran/fivetran_log_api.py +42 -9
  164. datahub/ingestion/source/fivetran/fivetran_query.py +58 -36
  165. datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
  166. datahub/ingestion/source/fivetran/response_models.py +97 -0
  167. datahub/ingestion/source/gc/datahub_gc.py +0 -2
  168. datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +101 -104
  169. datahub/ingestion/source/gcs/gcs_source.py +53 -10
  170. datahub/ingestion/source/gcs/gcs_utils.py +36 -9
  171. datahub/ingestion/source/ge_data_profiler.py +146 -33
  172. datahub/ingestion/source/ge_profiling_config.py +26 -11
  173. datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
  174. datahub/ingestion/source/grafana/field_utils.py +307 -0
  175. datahub/ingestion/source/grafana/grafana_api.py +142 -0
  176. datahub/ingestion/source/grafana/grafana_config.py +104 -0
  177. datahub/ingestion/source/grafana/grafana_source.py +522 -84
  178. datahub/ingestion/source/grafana/lineage.py +202 -0
  179. datahub/ingestion/source/grafana/models.py +137 -0
  180. datahub/ingestion/source/grafana/report.py +90 -0
  181. datahub/ingestion/source/grafana/types.py +16 -0
  182. datahub/ingestion/source/hex/__init__.py +0 -0
  183. datahub/ingestion/source/hex/api.py +402 -0
  184. datahub/ingestion/source/hex/constants.py +8 -0
  185. datahub/ingestion/source/hex/hex.py +311 -0
  186. datahub/ingestion/source/hex/mapper.py +412 -0
  187. datahub/ingestion/source/hex/model.py +78 -0
  188. datahub/ingestion/source/hex/query_fetcher.py +307 -0
  189. datahub/ingestion/source/iceberg/iceberg.py +385 -164
  190. datahub/ingestion/source/iceberg/iceberg_common.py +2 -2
  191. datahub/ingestion/source/iceberg/iceberg_profiler.py +25 -20
  192. datahub/ingestion/source/identity/azure_ad.py +1 -1
  193. datahub/ingestion/source/identity/okta.py +1 -14
  194. datahub/ingestion/source/kafka/kafka.py +28 -71
  195. datahub/ingestion/source/kafka/kafka_config.py +78 -0
  196. datahub/ingestion/source/kafka_connect/common.py +2 -2
  197. datahub/ingestion/source/kafka_connect/sink_connectors.py +157 -48
  198. datahub/ingestion/source/kafka_connect/source_connectors.py +63 -5
  199. datahub/ingestion/source/ldap.py +1 -1
  200. datahub/ingestion/source/looker/looker_common.py +216 -86
  201. datahub/ingestion/source/looker/looker_config.py +15 -4
  202. datahub/ingestion/source/looker/looker_constant.py +4 -0
  203. datahub/ingestion/source/looker/looker_lib_wrapper.py +37 -4
  204. datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
  205. datahub/ingestion/source/looker/looker_source.py +539 -555
  206. datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
  207. datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
  208. datahub/ingestion/source/looker/lookml_config.py +31 -3
  209. datahub/ingestion/source/looker/lookml_refinement.py +1 -1
  210. datahub/ingestion/source/looker/lookml_source.py +103 -118
  211. datahub/ingestion/source/looker/view_upstream.py +494 -1
  212. datahub/ingestion/source/metabase.py +32 -6
  213. datahub/ingestion/source/metadata/business_glossary.py +7 -7
  214. datahub/ingestion/source/metadata/lineage.py +11 -10
  215. datahub/ingestion/source/mlflow.py +254 -23
  216. datahub/ingestion/source/mock_data/__init__.py +0 -0
  217. datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
  218. datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
  219. datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
  220. datahub/ingestion/source/mode.py +359 -181
  221. datahub/ingestion/source/mongodb.py +11 -1
  222. datahub/ingestion/source/neo4j/neo4j_source.py +122 -153
  223. datahub/ingestion/source/nifi.py +5 -5
  224. datahub/ingestion/source/openapi.py +85 -38
  225. datahub/ingestion/source/openapi_parser.py +59 -40
  226. datahub/ingestion/source/powerbi/config.py +92 -27
  227. datahub/ingestion/source/powerbi/m_query/data_classes.py +3 -0
  228. datahub/ingestion/source/powerbi/m_query/odbc.py +185 -0
  229. datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
  230. datahub/ingestion/source/powerbi/m_query/pattern_handler.py +358 -14
  231. datahub/ingestion/source/powerbi/m_query/resolver.py +10 -0
  232. datahub/ingestion/source/powerbi/powerbi.py +66 -32
  233. datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -2
  234. datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +11 -12
  235. datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
  236. datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
  237. datahub/ingestion/source/preset.py +3 -3
  238. datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
  239. datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
  240. datahub/ingestion/source/redash.py +1 -1
  241. datahub/ingestion/source/redshift/config.py +15 -9
  242. datahub/ingestion/source/redshift/datashares.py +1 -1
  243. datahub/ingestion/source/redshift/lineage.py +386 -687
  244. datahub/ingestion/source/redshift/profile.py +2 -2
  245. datahub/ingestion/source/redshift/query.py +24 -20
  246. datahub/ingestion/source/redshift/redshift.py +52 -111
  247. datahub/ingestion/source/redshift/redshift_schema.py +17 -12
  248. datahub/ingestion/source/redshift/report.py +0 -2
  249. datahub/ingestion/source/redshift/usage.py +13 -11
  250. datahub/ingestion/source/s3/report.py +4 -2
  251. datahub/ingestion/source/s3/source.py +515 -244
  252. datahub/ingestion/source/sac/sac.py +3 -1
  253. datahub/ingestion/source/salesforce.py +28 -13
  254. datahub/ingestion/source/schema/json_schema.py +14 -14
  255. datahub/ingestion/source/schema_inference/object.py +22 -6
  256. datahub/ingestion/source/sigma/config.py +75 -8
  257. datahub/ingestion/source/sigma/data_classes.py +3 -0
  258. datahub/ingestion/source/sigma/sigma.py +36 -7
  259. datahub/ingestion/source/sigma/sigma_api.py +99 -58
  260. datahub/ingestion/source/slack/slack.py +403 -140
  261. datahub/ingestion/source/snaplogic/__init__.py +0 -0
  262. datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
  263. datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
  264. datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
  265. datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
  266. datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
  267. datahub/ingestion/source/snowflake/constants.py +4 -0
  268. datahub/ingestion/source/snowflake/snowflake_config.py +103 -34
  269. datahub/ingestion/source/snowflake/snowflake_connection.py +47 -25
  270. datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +25 -6
  271. datahub/ingestion/source/snowflake/snowflake_profiler.py +1 -6
  272. datahub/ingestion/source/snowflake/snowflake_queries.py +511 -107
  273. datahub/ingestion/source/snowflake/snowflake_query.py +100 -72
  274. datahub/ingestion/source/snowflake/snowflake_report.py +4 -2
  275. datahub/ingestion/source/snowflake/snowflake_schema.py +381 -16
  276. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +163 -52
  277. datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
  278. datahub/ingestion/source/snowflake/snowflake_tag.py +4 -1
  279. datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
  280. datahub/ingestion/source/snowflake/snowflake_utils.py +62 -17
  281. datahub/ingestion/source/snowflake/snowflake_v2.py +56 -10
  282. datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
  283. datahub/ingestion/source/sql/athena.py +219 -26
  284. datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
  285. datahub/ingestion/source/sql/clickhouse.py +29 -9
  286. datahub/ingestion/source/sql/cockroachdb.py +5 -4
  287. datahub/ingestion/source/sql/druid.py +9 -4
  288. datahub/ingestion/source/sql/hana.py +3 -1
  289. datahub/ingestion/source/sql/hive.py +28 -8
  290. datahub/ingestion/source/sql/hive_metastore.py +24 -25
  291. datahub/ingestion/source/sql/mariadb.py +0 -1
  292. datahub/ingestion/source/sql/mssql/job_models.py +18 -2
  293. datahub/ingestion/source/sql/mssql/source.py +376 -62
  294. datahub/ingestion/source/sql/mysql.py +154 -4
  295. datahub/ingestion/source/sql/oracle.py +62 -11
  296. datahub/ingestion/source/sql/postgres.py +142 -6
  297. datahub/ingestion/source/sql/presto.py +20 -2
  298. datahub/ingestion/source/sql/sql_common.py +281 -49
  299. datahub/ingestion/source/sql/sql_config.py +1 -34
  300. datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
  301. datahub/ingestion/source/sql/sql_types.py +27 -2
  302. datahub/ingestion/source/sql/sqlalchemy_uri.py +68 -0
  303. datahub/ingestion/source/sql/stored_procedures/__init__.py +0 -0
  304. datahub/ingestion/source/sql/stored_procedures/base.py +253 -0
  305. datahub/ingestion/source/sql/{mssql/stored_procedure_lineage.py → stored_procedures/lineage.py} +2 -29
  306. datahub/ingestion/source/sql/teradata.py +1028 -245
  307. datahub/ingestion/source/sql/trino.py +43 -10
  308. datahub/ingestion/source/sql/two_tier_sql_source.py +3 -4
  309. datahub/ingestion/source/sql/vertica.py +14 -7
  310. datahub/ingestion/source/sql_queries.py +219 -121
  311. datahub/ingestion/source/state/checkpoint.py +8 -29
  312. datahub/ingestion/source/state/entity_removal_state.py +5 -2
  313. datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
  314. datahub/ingestion/source/state/stale_entity_removal_handler.py +0 -1
  315. datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
  316. datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +2 -1
  317. datahub/ingestion/source/superset.py +810 -126
  318. datahub/ingestion/source/tableau/tableau.py +172 -69
  319. datahub/ingestion/source/tableau/tableau_common.py +11 -4
  320. datahub/ingestion/source/tableau/tableau_constant.py +1 -4
  321. datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
  322. datahub/ingestion/source/tableau/tableau_validation.py +1 -1
  323. datahub/ingestion/source/unity/config.py +161 -40
  324. datahub/ingestion/source/unity/connection.py +61 -0
  325. datahub/ingestion/source/unity/connection_test.py +1 -0
  326. datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
  327. datahub/ingestion/source/unity/proxy.py +794 -51
  328. datahub/ingestion/source/unity/proxy_patch.py +321 -0
  329. datahub/ingestion/source/unity/proxy_types.py +36 -2
  330. datahub/ingestion/source/unity/report.py +15 -3
  331. datahub/ingestion/source/unity/source.py +465 -131
  332. datahub/ingestion/source/unity/tag_entities.py +197 -0
  333. datahub/ingestion/source/unity/usage.py +46 -4
  334. datahub/ingestion/source/usage/clickhouse_usage.py +11 -4
  335. datahub/ingestion/source/usage/starburst_trino_usage.py +10 -5
  336. datahub/ingestion/source/usage/usage_common.py +4 -68
  337. datahub/ingestion/source/vertexai/__init__.py +0 -0
  338. datahub/ingestion/source/vertexai/vertexai.py +1367 -0
  339. datahub/ingestion/source/vertexai/vertexai_config.py +29 -0
  340. datahub/ingestion/source/vertexai/vertexai_result_type_utils.py +89 -0
  341. datahub/ingestion/source_config/pulsar.py +3 -1
  342. datahub/ingestion/source_report/ingestion_stage.py +50 -11
  343. datahub/ingestion/transformer/add_dataset_dataproduct.py +1 -1
  344. datahub/ingestion/transformer/add_dataset_ownership.py +19 -3
  345. datahub/ingestion/transformer/base_transformer.py +8 -5
  346. datahub/ingestion/transformer/dataset_domain.py +1 -1
  347. datahub/ingestion/transformer/set_browse_path.py +112 -0
  348. datahub/integrations/assertion/common.py +3 -2
  349. datahub/integrations/assertion/snowflake/compiler.py +4 -3
  350. datahub/lite/lite_util.py +2 -2
  351. datahub/metadata/{_schema_classes.py → _internal_schema_classes.py} +3095 -631
  352. datahub/metadata/_urns/urn_defs.py +1866 -1582
  353. datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
  354. datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
  355. datahub/metadata/com/linkedin/pegasus2avro/dataplatform/slack/__init__.py +15 -0
  356. datahub/metadata/com/linkedin/pegasus2avro/event/__init__.py +11 -0
  357. datahub/metadata/com/linkedin/pegasus2avro/event/notification/__init__.py +15 -0
  358. datahub/metadata/com/linkedin/pegasus2avro/event/notification/settings/__init__.py +19 -0
  359. datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
  360. datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
  361. datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
  362. datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +8 -0
  363. datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
  364. datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
  365. datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
  366. datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
  367. datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
  368. datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
  369. datahub/metadata/schema.avsc +18404 -16617
  370. datahub/metadata/schema_classes.py +3 -3
  371. datahub/metadata/schemas/Actors.avsc +38 -1
  372. datahub/metadata/schemas/ApplicationKey.avsc +31 -0
  373. datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
  374. datahub/metadata/schemas/Applications.avsc +38 -0
  375. datahub/metadata/schemas/AssetSettings.avsc +63 -0
  376. datahub/metadata/schemas/ChartInfo.avsc +2 -1
  377. datahub/metadata/schemas/ChartKey.avsc +1 -0
  378. datahub/metadata/schemas/ContainerKey.avsc +1 -0
  379. datahub/metadata/schemas/ContainerProperties.avsc +8 -0
  380. datahub/metadata/schemas/CorpUserEditableInfo.avsc +15 -1
  381. datahub/metadata/schemas/CorpUserKey.avsc +2 -1
  382. datahub/metadata/schemas/CorpUserSettings.avsc +145 -0
  383. datahub/metadata/schemas/DashboardKey.avsc +1 -0
  384. datahub/metadata/schemas/DataContractKey.avsc +2 -1
  385. datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
  386. datahub/metadata/schemas/DataFlowKey.avsc +1 -0
  387. datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
  388. datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
  389. datahub/metadata/schemas/DataHubIngestionSourceKey.avsc +2 -1
  390. datahub/metadata/schemas/DataHubOpenAPISchemaKey.avsc +22 -0
  391. datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
  392. datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
  393. datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
  394. datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
  395. datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
  396. datahub/metadata/schemas/DataJobInfo.avsc +8 -0
  397. datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
  398. datahub/metadata/schemas/DataJobKey.avsc +1 -0
  399. datahub/metadata/schemas/DataProcessInstanceInput.avsc +2 -1
  400. datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -1
  401. datahub/metadata/schemas/DataProcessKey.avsc +8 -0
  402. datahub/metadata/schemas/DataProductKey.avsc +3 -1
  403. datahub/metadata/schemas/DataProductProperties.avsc +1 -1
  404. datahub/metadata/schemas/DataTransformLogic.avsc +4 -2
  405. datahub/metadata/schemas/DatasetKey.avsc +11 -1
  406. datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
  407. datahub/metadata/schemas/Deprecation.avsc +2 -0
  408. datahub/metadata/schemas/DomainKey.avsc +2 -1
  409. datahub/metadata/schemas/ExecutionRequestInput.avsc +5 -0
  410. datahub/metadata/schemas/FormInfo.avsc +5 -0
  411. datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
  412. datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
  413. datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
  414. datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
  415. datahub/metadata/schemas/IncidentInfo.avsc +3 -3
  416. datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
  417. datahub/metadata/schemas/LogicalParent.avsc +145 -0
  418. datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
  419. datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
  420. datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
  421. datahub/metadata/schemas/MLModelDeploymentProperties.avsc +3 -0
  422. datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
  423. datahub/metadata/schemas/MLModelGroupProperties.avsc +16 -0
  424. datahub/metadata/schemas/MLModelKey.avsc +9 -0
  425. datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
  426. datahub/metadata/schemas/MetadataChangeEvent.avsc +189 -47
  427. datahub/metadata/schemas/MetadataChangeLog.avsc +65 -44
  428. datahub/metadata/schemas/MetadataChangeProposal.avsc +64 -0
  429. datahub/metadata/schemas/NotebookKey.avsc +1 -0
  430. datahub/metadata/schemas/Operation.avsc +21 -2
  431. datahub/metadata/schemas/Ownership.avsc +69 -0
  432. datahub/metadata/schemas/QueryProperties.avsc +24 -2
  433. datahub/metadata/schemas/QuerySubjects.avsc +1 -12
  434. datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
  435. datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
  436. datahub/metadata/schemas/Siblings.avsc +2 -0
  437. datahub/metadata/schemas/SlackUserInfo.avsc +160 -0
  438. datahub/metadata/schemas/StructuredProperties.avsc +69 -0
  439. datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
  440. datahub/metadata/schemas/SystemMetadata.avsc +147 -0
  441. datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
  442. datahub/metadata/schemas/__init__.py +3 -3
  443. datahub/sdk/__init__.py +7 -0
  444. datahub/sdk/_all_entities.py +15 -0
  445. datahub/sdk/_shared.py +393 -10
  446. datahub/sdk/_utils.py +4 -0
  447. datahub/sdk/chart.py +386 -0
  448. datahub/sdk/container.py +7 -0
  449. datahub/sdk/dashboard.py +453 -0
  450. datahub/sdk/dataflow.py +309 -0
  451. datahub/sdk/datajob.py +367 -0
  452. datahub/sdk/dataset.py +180 -4
  453. datahub/sdk/entity.py +99 -3
  454. datahub/sdk/entity_client.py +154 -12
  455. datahub/sdk/lineage_client.py +943 -0
  456. datahub/sdk/main_client.py +83 -8
  457. datahub/sdk/mlmodel.py +383 -0
  458. datahub/sdk/mlmodelgroup.py +240 -0
  459. datahub/sdk/search_client.py +85 -8
  460. datahub/sdk/search_filters.py +393 -68
  461. datahub/secret/datahub_secret_store.py +5 -1
  462. datahub/secret/environment_secret_store.py +29 -0
  463. datahub/secret/file_secret_store.py +49 -0
  464. datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
  465. datahub/specific/aspect_helpers/siblings.py +73 -0
  466. datahub/specific/aspect_helpers/structured_properties.py +27 -0
  467. datahub/specific/chart.py +1 -1
  468. datahub/specific/datajob.py +15 -1
  469. datahub/specific/dataproduct.py +4 -0
  470. datahub/specific/dataset.py +51 -59
  471. datahub/sql_parsing/_sqlglot_patch.py +1 -2
  472. datahub/sql_parsing/fingerprint_utils.py +6 -0
  473. datahub/sql_parsing/split_statements.py +30 -3
  474. datahub/sql_parsing/sql_parsing_aggregator.py +144 -63
  475. datahub/sql_parsing/sqlglot_lineage.py +517 -44
  476. datahub/sql_parsing/sqlglot_utils.py +30 -18
  477. datahub/sql_parsing/tool_meta_extractor.py +25 -2
  478. datahub/telemetry/telemetry.py +30 -16
  479. datahub/testing/check_imports.py +1 -1
  480. datahub/testing/docker_utils.py +8 -2
  481. datahub/testing/mce_helpers.py +421 -0
  482. datahub/testing/mcp_diff.py +17 -21
  483. datahub/testing/sdk_v2_helpers.py +18 -0
  484. datahub/upgrade/upgrade.py +86 -30
  485. datahub/utilities/file_backed_collections.py +14 -15
  486. datahub/utilities/hive_schema_to_avro.py +2 -2
  487. datahub/utilities/ingest_utils.py +2 -2
  488. datahub/utilities/is_pytest.py +3 -2
  489. datahub/utilities/logging_manager.py +30 -7
  490. datahub/utilities/mapping.py +29 -2
  491. datahub/utilities/sample_data.py +5 -4
  492. datahub/utilities/server_config_util.py +298 -10
  493. datahub/utilities/sqlalchemy_query_combiner.py +6 -4
  494. datahub/utilities/stats_collections.py +4 -0
  495. datahub/utilities/threaded_iterator_executor.py +16 -3
  496. datahub/utilities/urn_encoder.py +1 -1
  497. datahub/utilities/urns/urn.py +41 -2
  498. datahub/emitter/sql_parsing_builder.py +0 -306
  499. datahub/ingestion/source/redshift/lineage_v2.py +0 -458
  500. datahub/ingestion/source/vertexai.py +0 -697
  501. datahub/ingestion/transformer/system_metadata_transformer.py +0 -45
  502. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info/licenses}/LICENSE +0 -0
  503. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
@@ -3,6 +3,7 @@
3
3
  # Meta Data Ingestion From the Power BI Source
4
4
  #
5
5
  #########################################################
6
+ import functools
6
7
  import logging
7
8
  from datetime import datetime
8
9
  from typing import Iterable, List, Optional, Tuple, Union
@@ -24,6 +25,7 @@ from datahub.ingestion.api.decorators import (
24
25
  support_status,
25
26
  )
26
27
  from datahub.ingestion.api.incremental_lineage_helper import (
28
+ auto_incremental_lineage,
27
29
  convert_dashboard_info_to_patch,
28
30
  )
29
31
  from datahub.ingestion.api.source import (
@@ -38,6 +40,7 @@ from datahub.ingestion.api.workunit import MetadataWorkUnit
38
40
  from datahub.ingestion.source.common.subtypes import (
39
41
  BIAssetSubTypes,
40
42
  BIContainerSubTypes,
43
+ SourceCapabilityModifier,
41
44
  )
42
45
  from datahub.ingestion.source.powerbi.config import (
43
46
  Constant,
@@ -92,7 +95,7 @@ from datahub.metadata.schema_classes import (
92
95
  UpstreamLineageClass,
93
96
  ViewPropertiesClass,
94
97
  )
95
- from datahub.metadata.urns import ChartUrn
98
+ from datahub.metadata.urns import ChartUrn, DatasetUrn
96
99
  from datahub.sql_parsing.sqlglot_lineage import ColumnLineageInfo
97
100
  from datahub.utilities.dedup_list import deduplicate_list
98
101
  from datahub.utilities.urns.urn_iter import lowercase_dataset_urn
@@ -238,6 +241,10 @@ class Mapper:
238
241
  upstream: List[UpstreamClass] = []
239
242
  cll_lineage: List[FineGrainedLineage] = []
240
243
 
244
+ logger.debug(
245
+ f"Extracting lineage for table {table.full_name} in dataset {table.dataset.name if table.dataset else None}"
246
+ )
247
+
241
248
  upstream_lineage: List[
242
249
  datahub.ingestion.source.powerbi.m_query.data_classes.Lineage
243
250
  ] = parser.get_upstream_tables(
@@ -257,7 +264,7 @@ class Mapper:
257
264
  for upstream_dpt in lineage.upstreams:
258
265
  if (
259
266
  upstream_dpt.data_platform_pair.powerbi_data_platform_name
260
- not in self.__config.dataset_type_mapping.keys()
267
+ not in self.__config.dataset_type_mapping
261
268
  ):
262
269
  logger.debug(
263
270
  f"Skipping upstream table for {ds_urn}. The platform {upstream_dpt.data_platform_pair.powerbi_data_platform_name} is not part of dataset_type_mapping",
@@ -288,8 +295,6 @@ class Mapper:
288
295
  logger.debug(f"Dataset urn = {ds_urn} and its lineage = {upstream_lineage}")
289
296
 
290
297
  mcp = MetadataChangeProposalWrapper(
291
- entityType=Constant.DATASET,
292
- changeType=ChangeTypeClass.UPSERT,
293
298
  entityUrn=ds_urn,
294
299
  aspect=upstream_lineage_class,
295
300
  )
@@ -532,9 +537,7 @@ class Mapper:
532
537
  profile.columnCount = table.column_count
533
538
 
534
539
  mcp = MetadataChangeProposalWrapper(
535
- entityType="dataset",
536
540
  entityUrn=ds_urn,
537
- aspectName="datasetProfile",
538
541
  aspect=profile,
539
542
  )
540
543
  dataset_mcps.append(mcp)
@@ -666,6 +669,7 @@ class Mapper:
666
669
  workspace: powerbi_data_classes.Workspace,
667
670
  chart_mcps: List[MetadataChangeProposalWrapper],
668
671
  user_mcps: List[MetadataChangeProposalWrapper],
672
+ dashboard_edges: List[EdgeClass],
669
673
  ) -> List[MetadataChangeProposalWrapper]:
670
674
  """
671
675
  Map PowerBi dashboard to Datahub dashboard
@@ -695,6 +699,7 @@ class Mapper:
695
699
  lastModified=ChangeAuditStamps(),
696
700
  dashboardUrl=dashboard.webUrl,
697
701
  customProperties={**chart_custom_properties(dashboard)},
702
+ dashboards=dashboard_edges,
698
703
  )
699
704
 
700
705
  info_mcp = self.new_mcp(
@@ -788,7 +793,6 @@ class Mapper:
788
793
  guid=container_key.guid(),
789
794
  )
790
795
  mcp = MetadataChangeProposalWrapper(
791
- changeType=ChangeTypeClass.UPSERT,
792
796
  entityUrn=entity_urn,
793
797
  aspect=ContainerClass(container=f"{container_urn}"),
794
798
  )
@@ -933,7 +937,7 @@ class Mapper:
933
937
  dashboard: powerbi_data_classes.Dashboard,
934
938
  workspace: powerbi_data_classes.Workspace,
935
939
  ) -> List[EquableMetadataWorkUnit]:
936
- mcps = []
940
+ mcps: List[MetadataChangeProposalWrapper] = []
937
941
 
938
942
  logger.info(
939
943
  f"Converting dashboard={dashboard.displayName} to datahub dashboard"
@@ -945,9 +949,30 @@ class Mapper:
945
949
  )
946
950
  # Convert tiles to charts
947
951
  ds_mcps, chart_mcps = self.to_datahub_chart(dashboard.tiles, workspace)
952
+
953
+ # collect all downstream reports (dashboards)
954
+ dashboard_edges = []
955
+ for t in dashboard.tiles:
956
+ if t.report:
957
+ dashboard_urn = builder.make_dashboard_urn(
958
+ platform=self.__config.platform_name,
959
+ platform_instance=self.__config.platform_instance,
960
+ name=t.report.get_urn_part(),
961
+ )
962
+ edge = EdgeClass(
963
+ destinationUrn=dashboard_urn,
964
+ )
965
+ dashboard_edges.append(edge)
966
+
948
967
  # Lets convert dashboard to datahub dashboard
949
968
  dashboard_mcps: List[MetadataChangeProposalWrapper] = (
950
- self.to_datahub_dashboard_mcp(dashboard, workspace, chart_mcps, user_mcps)
969
+ self.to_datahub_dashboard_mcp(
970
+ dashboard=dashboard,
971
+ workspace=workspace,
972
+ chart_mcps=chart_mcps,
973
+ user_mcps=user_mcps,
974
+ dashboard_edges=dashboard_edges,
975
+ )
951
976
  )
952
977
 
953
978
  # Now add MCPs in sequence
@@ -1054,7 +1079,7 @@ class Mapper:
1054
1079
  report: powerbi_data_classes.Report,
1055
1080
  chart_mcps: List[MetadataChangeProposalWrapper],
1056
1081
  user_mcps: List[MetadataChangeProposalWrapper],
1057
- dashboard_edges: List[EdgeClass],
1082
+ dataset_edges: List[EdgeClass],
1058
1083
  ) -> List[MetadataChangeProposalWrapper]:
1059
1084
  """
1060
1085
  Map PowerBi report to Datahub dashboard
@@ -1076,7 +1101,7 @@ class Mapper:
1076
1101
  charts=chart_urn_list,
1077
1102
  lastModified=ChangeAuditStamps(),
1078
1103
  dashboardUrl=report.webUrl,
1079
- dashboards=dashboard_edges,
1104
+ datasetEdges=dataset_edges,
1080
1105
  )
1081
1106
 
1082
1107
  info_mcp = self.new_mcp(
@@ -1170,27 +1195,23 @@ class Mapper:
1170
1195
  ds_mcps = self.to_datahub_dataset(report.dataset, workspace)
1171
1196
  chart_mcps = self.pages_to_chart(report.pages, workspace, ds_mcps)
1172
1197
 
1173
- # find all dashboards with a Tile referencing this report
1174
- downstream_dashboards_edges = []
1175
- for d in workspace.dashboards.values():
1176
- if any(t.report_id == report.id for t in d.tiles):
1177
- dashboard_urn = builder.make_dashboard_urn(
1178
- platform=self.__config.platform_name,
1179
- platform_instance=self.__config.platform_instance,
1180
- name=d.get_urn_part(),
1181
- )
1182
- edge = EdgeClass(
1183
- destinationUrn=dashboard_urn,
1184
- sourceUrn=None,
1185
- created=None,
1186
- lastModified=None,
1187
- properties=None,
1188
- )
1189
- downstream_dashboards_edges.append(edge)
1198
+ # collect all upstream datasets; using a set to retain unique urns
1199
+ dataset_urns = {
1200
+ dataset.entityUrn
1201
+ for dataset in ds_mcps
1202
+ if dataset.entityType == DatasetUrn.ENTITY_TYPE and dataset.entityUrn
1203
+ }
1204
+ dataset_edges = [
1205
+ EdgeClass(destinationUrn=dataset_urn) for dataset_urn in dataset_urns
1206
+ ]
1190
1207
 
1191
1208
  # Let's convert report to datahub dashboard
1192
1209
  report_mcps = self.report_to_dashboard(
1193
- workspace, report, chart_mcps, user_mcps, downstream_dashboards_edges
1210
+ workspace=workspace,
1211
+ report=report,
1212
+ chart_mcps=chart_mcps,
1213
+ user_mcps=user_mcps,
1214
+ dataset_edges=dataset_edges,
1194
1215
  )
1195
1216
 
1196
1217
  # Now add MCPs in sequence
@@ -1206,7 +1227,14 @@ class Mapper:
1206
1227
  @platform_name("PowerBI")
1207
1228
  @config_class(PowerBiDashboardSourceConfig)
1208
1229
  @support_status(SupportStatus.CERTIFIED)
1209
- @capability(SourceCapability.CONTAINERS, "Enabled by default")
1230
+ @capability(
1231
+ SourceCapability.CONTAINERS,
1232
+ "Enabled by default",
1233
+ subtype_modifier=[
1234
+ SourceCapabilityModifier.POWERBI_WORKSPACE,
1235
+ SourceCapabilityModifier.POWERBI_DATASET,
1236
+ ],
1237
+ )
1210
1238
  @capability(SourceCapability.DESCRIPTIONS, "Enabled by default")
1211
1239
  @capability(SourceCapability.OWNERSHIP, "Enabled by default")
1212
1240
  @capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
@@ -1228,6 +1256,7 @@ class Mapper:
1228
1256
  SourceCapability.DATA_PROFILING,
1229
1257
  "Optionally enabled via configuration profiling.enabled",
1230
1258
  )
1259
+ @capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
1231
1260
  class PowerBiDashboardSource(StatefulIngestionSourceBase, TestableSource):
1232
1261
  """
1233
1262
  This plugin extracts the following:
@@ -1300,7 +1329,9 @@ class PowerBiDashboardSource(StatefulIngestionSourceBase, TestableSource):
1300
1329
 
1301
1330
  allowed_workspaces = []
1302
1331
  for workspace in all_workspaces:
1303
- if not self.source_config.workspace_id_pattern.allowed(workspace.id):
1332
+ if not self.source_config.workspace_id_pattern.allowed(
1333
+ workspace.id
1334
+ ) or not self.source_config.workspace_name_pattern.allowed(workspace.name):
1304
1335
  self.reporter.filtered_workspace_names.append(
1305
1336
  f"{workspace.id} - {workspace.name}"
1306
1337
  )
@@ -1326,7 +1357,7 @@ class PowerBiDashboardSource(StatefulIngestionSourceBase, TestableSource):
1326
1357
  for data_platform in SupportedDataPlatform
1327
1358
  ]
1328
1359
 
1329
- for key in self.source_config.dataset_type_mapping.keys():
1360
+ for key in self.source_config.dataset_type_mapping:
1330
1361
  if key not in powerbi_data_platforms:
1331
1362
  raise ValueError(f"PowerBI DataPlatform {key} is not supported")
1332
1363
 
@@ -1516,6 +1547,9 @@ class PowerBiDashboardSource(StatefulIngestionSourceBase, TestableSource):
1516
1547
  else:
1517
1548
  return [
1518
1549
  *super().get_workunit_processors(),
1550
+ functools.partial(
1551
+ auto_incremental_lineage, self.source_config.incremental_lineage
1552
+ ),
1519
1553
  self.stale_entity_removal_handler.workunit_processor,
1520
1554
  ]
1521
1555
 
@@ -63,10 +63,10 @@ class SessionWithTimeout(requests.Session):
63
63
  super().__init__(*args, **kwargs)
64
64
  self.timeout = timeout
65
65
 
66
- def request(self, method, url, **kwargs):
66
+ def request(self, method, url, *args, **kwargs):
67
67
  # Set the default timeout if none is provided
68
68
  kwargs.setdefault("timeout", self.timeout)
69
- return super().request(method, url, **kwargs)
69
+ return super().request(method, url, *args, **kwargs)
70
70
 
71
71
 
72
72
  class DataResolverBase(ABC):
@@ -115,7 +115,7 @@ class PowerBiAPI:
115
115
  if scan_result is None:
116
116
  return results
117
117
 
118
- for scanned_dashboard in scan_result.get(Constant.DASHBOARDS, []):
118
+ for scanned_dashboard in scan_result.get(Constant.DASHBOARDS) or []:
119
119
  # Iterate through response and create a list of PowerBiAPI.Dashboard
120
120
  dashboard_id = scanned_dashboard.get("id")
121
121
  tags = self._parse_endorsement(
@@ -133,17 +133,17 @@ class PowerBiAPI:
133
133
  if scan_result is None:
134
134
  return results
135
135
 
136
- reports: List[dict] = scan_result.get(Constant.REPORTS, [])
136
+ reports: List[dict] = scan_result.get(Constant.REPORTS) or []
137
137
 
138
138
  for report in reports:
139
- report_id = report.get(Constant.ID, None)
139
+ report_id = report.get(Constant.ID)
140
140
  if report_id is None:
141
141
  logger.warning(
142
142
  f"Report id is none. Skipping endorsement tag for report instance {report}"
143
143
  )
144
144
  continue
145
145
  endorsements = self._parse_endorsement(
146
- report.get(Constant.ENDORSEMENT_DETAIL, None)
146
+ report.get(Constant.ENDORSEMENT_DETAIL)
147
147
  )
148
148
  results[report_id] = endorsements
149
149
 
@@ -339,7 +339,7 @@ class PowerBiAPI:
339
339
  if not endorsements:
340
340
  return []
341
341
 
342
- endorsement = endorsements.get(Constant.ENDORSEMENT, None)
342
+ endorsement = endorsements.get(Constant.ENDORSEMENT)
343
343
  if not endorsement:
344
344
  return []
345
345
 
@@ -396,7 +396,7 @@ class PowerBiAPI:
396
396
 
397
397
  if self.__config.extract_endorsements_to_tags:
398
398
  dataset_instance.tags = self._parse_endorsement(
399
- dataset_dict.get(Constant.ENDORSEMENT_DETAIL, None)
399
+ dataset_dict.get(Constant.ENDORSEMENT_DETAIL)
400
400
  )
401
401
 
402
402
  dataset_map[dataset_instance.id] = dataset_instance
@@ -407,7 +407,7 @@ class PowerBiAPI:
407
407
  else dataset_instance.id
408
408
  )
409
409
  logger.debug(f"dataset_dict = {dataset_dict}")
410
- for table in dataset_dict.get(Constant.TABLES, []):
410
+ for table in dataset_dict.get(Constant.TABLES) or []:
411
411
  expression: Optional[str] = (
412
412
  table[Constant.SOURCE][0][Constant.EXPRESSION]
413
413
  if table.get(Constant.SOURCE) is not None
@@ -430,10 +430,10 @@ class PowerBiAPI:
430
430
  column["dataType"], FIELD_TYPE_MAPPING["Null"]
431
431
  ),
432
432
  )
433
- for column in table.get("columns", [])
433
+ for column in table.get("columns") or []
434
434
  ],
435
435
  measures=[
436
- Measure(**measure) for measure in table.get("measures", [])
436
+ Measure(**measure) for measure in table.get("measures") or []
437
437
  ],
438
438
  dataset=dataset_instance,
439
439
  row_count=None,
@@ -480,7 +480,7 @@ class PowerBiAPI:
480
480
  )
481
481
  )
482
482
  if app_id is None: # In PowerBI one workspace can have one app
483
- app_id = report.get(Constant.APP_ID)
483
+ app_id = report[Constant.APP_ID]
484
484
 
485
485
  raw_app_dashboards: List[Dict] = []
486
486
  # Filter app dashboards
@@ -488,7 +488,7 @@ class PowerBiAPI:
488
488
  if dashboard.get(Constant.APP_ID):
489
489
  raw_app_dashboards.append(dashboard)
490
490
  if app_id is None: # In PowerBI, one workspace contains one app
491
- app_id = report[Constant.APP_ID]
491
+ app_id = dashboard[Constant.APP_ID]
492
492
 
493
493
  # workspace doesn't have an App. Above two loops can be avoided
494
494
  # if app_id is available at root level in workspace_metadata
@@ -673,7 +673,6 @@ class PowerBiAPI:
673
673
  fill_dashboard_tags()
674
674
  self._fill_independent_datasets(workspace=workspace)
675
675
 
676
- # flake8: noqa: C901
677
676
  def fill_workspaces(
678
677
  self, workspaces: List[Workspace], reporter: PowerBiDashboardSourceReport
679
678
  ) -> Iterable[Workspace]:
@@ -52,7 +52,6 @@ from datahub.ingestion.source.state.stateful_ingestion_base import (
52
52
  from datahub.metadata.com.linkedin.pegasus2avro.common import ChangeAuditStamps
53
53
  from datahub.metadata.schema_classes import (
54
54
  BrowsePathsClass,
55
- ChangeTypeClass,
56
55
  CorpUserInfoClass,
57
56
  CorpUserKeyClass,
58
57
  DashboardInfoClass,
@@ -243,20 +242,14 @@ class Mapper:
243
242
 
244
243
  @staticmethod
245
244
  def new_mcp(
246
- entity_type,
247
245
  entity_urn,
248
- aspect_name,
249
246
  aspect,
250
- change_type=ChangeTypeClass.UPSERT,
251
247
  ):
252
248
  """
253
249
  Create MCP
254
250
  """
255
251
  return MetadataChangeProposalWrapper(
256
- entityType=entity_type,
257
- changeType=change_type,
258
252
  entityUrn=entity_urn,
259
- aspectName=aspect_name,
260
253
  aspect=aspect,
261
254
  )
262
255
 
@@ -343,17 +336,13 @@ class Mapper:
343
336
  )
344
337
 
345
338
  info_mcp = self.new_mcp(
346
- entity_type=Constant.DASHBOARD,
347
339
  entity_urn=dashboard_urn,
348
- aspect_name=Constant.DASHBOARD_INFO,
349
340
  aspect=dashboard_info_cls,
350
341
  )
351
342
 
352
343
  # removed status mcp
353
344
  removed_status_mcp = self.new_mcp(
354
- entity_type=Constant.DASHBOARD,
355
345
  entity_urn=dashboard_urn,
356
- aspect_name=Constant.STATUS,
357
346
  aspect=StatusClass(removed=False),
358
347
  )
359
348
 
@@ -365,9 +354,7 @@ class Mapper:
365
354
 
366
355
  # Dashboard key
367
356
  dashboard_key_mcp = self.new_mcp(
368
- entity_type=Constant.DASHBOARD,
369
357
  entity_urn=dashboard_urn,
370
- aspect_name=Constant.DASHBOARD_KEY,
371
358
  aspect=dashboard_key_cls,
372
359
  )
373
360
 
@@ -378,9 +365,7 @@ class Mapper:
378
365
  ownership = OwnershipClass(owners=owners)
379
366
  # Dashboard owner MCP
380
367
  owner_mcp = self.new_mcp(
381
- entity_type=Constant.DASHBOARD,
382
368
  entity_urn=dashboard_urn,
383
- aspect_name=Constant.OWNERSHIP,
384
369
  aspect=ownership,
385
370
  )
386
371
 
@@ -396,9 +381,7 @@ class Mapper:
396
381
  ]
397
382
  )
398
383
  browse_path_mcp = self.new_mcp(
399
- entity_type=Constant.DASHBOARD,
400
384
  entity_urn=dashboard_urn,
401
- aspect_name=Constant.BROWSERPATH,
402
385
  aspect=browse_path,
403
386
  )
404
387
 
@@ -429,27 +412,21 @@ class Mapper:
429
412
  )
430
413
 
431
414
  info_mcp = self.new_mcp(
432
- entity_type=Constant.CORP_USER,
433
415
  entity_urn=user_urn,
434
- aspect_name=Constant.CORP_USER_INFO,
435
416
  aspect=user_info_instance,
436
417
  )
437
418
  user_mcps.append(info_mcp)
438
419
 
439
420
  # removed status mcp
440
421
  status_mcp = self.new_mcp(
441
- entity_type=Constant.CORP_USER,
442
422
  entity_urn=user_urn,
443
- aspect_name=Constant.STATUS,
444
423
  aspect=StatusClass(removed=False),
445
424
  )
446
425
  user_mcps.append(status_mcp)
447
426
  user_key = CorpUserKeyClass(username=user.username)
448
427
 
449
428
  user_key_mcp = self.new_mcp(
450
- entity_type=Constant.CORP_USER,
451
429
  entity_urn=user_urn,
452
- aspect_name=Constant.CORP_USER_KEY,
453
430
  aspect=user_key,
454
431
  )
455
432
  user_mcps.append(user_key_mcp)
@@ -27,10 +27,8 @@ class CatalogItem(BaseModel):
27
27
  is_favorite: bool = Field(alias="IsFavorite")
28
28
  user_info: Any = Field(None, alias="UserInfo")
29
29
  display_name: Optional[str] = Field(None, alias="DisplayName")
30
- has_data_sources: bool = Field(default=False, alias="HasDataSources")
31
- data_sources: Optional[List["DataSource"]] = Field(
32
- default_factory=list, alias="DataSources"
33
- )
30
+ has_data_sources: bool = Field(False, alias="HasDataSources")
31
+ data_sources: Optional[List["DataSource"]] = Field(None, alias="DataSources")
34
32
 
35
33
  @validator("display_name", always=True)
36
34
  def validate_diplay_name(cls, value, values):
@@ -2,7 +2,7 @@ import logging
2
2
  from typing import Dict, Optional
3
3
 
4
4
  import requests
5
- from pydantic.class_validators import root_validator, validator
5
+ from pydantic import root_validator, validator
6
6
  from pydantic.fields import Field
7
7
 
8
8
  from datahub.emitter.mce_builder import DEFAULT_ENV
@@ -69,9 +69,9 @@ class PresetConfig(SupersetConfig):
69
69
 
70
70
  @platform_name("Preset")
71
71
  @config_class(PresetConfig)
72
- @support_status(SupportStatus.TESTING)
72
+ @support_status(SupportStatus.CERTIFIED)
73
73
  @capability(
74
- SourceCapability.DELETION_DETECTION, "Optionally enabled via stateful_ingestion"
74
+ SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
75
75
  )
76
76
  class PresetSource(SupersetSource):
77
77
  """
@@ -1,8 +1,9 @@
1
+ from copy import deepcopy
1
2
  from datetime import datetime
2
3
  from enum import Enum
3
4
  from typing import Dict, List, Optional, Type, Union
4
5
 
5
- from pydantic import BaseModel, Field, root_validator
6
+ from pydantic import BaseModel, ConfigDict, Field, root_validator
6
7
 
7
8
  from datahub.emitter.mcp_builder import ContainerKey
8
9
  from datahub.ingestion.source.qlik_sense.config import QLIK_DATETIME_FORMAT, Constant
@@ -78,7 +79,11 @@ PERSONAL_SPACE_DICT = {
78
79
  }
79
80
 
80
81
 
81
- class Space(BaseModel):
82
+ class _QlikBaseModel(BaseModel):
83
+ model_config = ConfigDict(coerce_numbers_to_str=True)
84
+
85
+
86
+ class Space(_QlikBaseModel):
82
87
  id: str
83
88
  name: str
84
89
  description: str
@@ -89,6 +94,9 @@ class Space(BaseModel):
89
94
 
90
95
  @root_validator(pre=True)
91
96
  def update_values(cls, values: Dict) -> Dict:
97
+ # Create a copy to avoid modifying the input dictionary, preventing state contamination in tests
98
+ values = deepcopy(values)
99
+
92
100
  values[Constant.CREATEDAT] = datetime.strptime(
93
101
  values[Constant.CREATEDAT], QLIK_DATETIME_FORMAT
94
102
  )
@@ -98,7 +106,7 @@ class Space(BaseModel):
98
106
  return values
99
107
 
100
108
 
101
- class Item(BaseModel):
109
+ class Item(_QlikBaseModel):
102
110
  id: str
103
111
  description: str = ""
104
112
  ownerId: str
@@ -107,7 +115,7 @@ class Item(BaseModel):
107
115
  updatedAt: datetime
108
116
 
109
117
 
110
- class SchemaField(BaseModel):
118
+ class SchemaField(_QlikBaseModel):
111
119
  name: str
112
120
  dataType: Optional[str] = None
113
121
  primaryKey: Optional[bool] = None
@@ -115,6 +123,8 @@ class SchemaField(BaseModel):
115
123
 
116
124
  @root_validator(pre=True)
117
125
  def update_values(cls, values: Dict) -> Dict:
126
+ # Create a copy to avoid modifying the input dictionary, preventing state contamination in tests
127
+ values = deepcopy(values)
118
128
  values[Constant.DATATYPE] = values.get(Constant.DATATYPE, {}).get(Constant.TYPE)
119
129
  return values
120
130
 
@@ -130,6 +140,8 @@ class QlikDataset(Item):
130
140
 
131
141
  @root_validator(pre=True)
132
142
  def update_values(cls, values: Dict) -> Dict:
143
+ # Create a copy to avoid modifying the input dictionary, preventing state contamination in tests
144
+ values = deepcopy(values)
133
145
  # Update str time to datetime
134
146
  values[Constant.CREATEDAT] = datetime.strptime(
135
147
  values[Constant.CREATEDTIME], QLIK_DATETIME_FORMAT
@@ -148,13 +160,13 @@ class QlikDataset(Item):
148
160
  return values
149
161
 
150
162
 
151
- class AxisProperty(BaseModel):
163
+ class AxisProperty(_QlikBaseModel):
152
164
  Title: str = Field(alias="qFallbackTitle")
153
165
  Min: str = Field(alias="qMin")
154
166
  Max: str = Field(alias="qMax")
155
167
 
156
168
 
157
- class Chart(BaseModel):
169
+ class Chart(_QlikBaseModel):
158
170
  qId: str
159
171
  visualization: str
160
172
  title: str
@@ -164,13 +176,15 @@ class Chart(BaseModel):
164
176
 
165
177
  @root_validator(pre=True)
166
178
  def update_values(cls, values: Dict) -> Dict:
179
+ # Create a copy to avoid modifying the input dictionary, preventing state contamination in tests
180
+ values = deepcopy(values)
167
181
  values[Constant.QID] = values[Constant.QINFO][Constant.QID]
168
182
  values["qDimension"] = values[Constant.HYPERCUBE]["qDimensionInfo"]
169
183
  values["qMeasure"] = values[Constant.HYPERCUBE]["qMeasureInfo"]
170
184
  return values
171
185
 
172
186
 
173
- class Sheet(BaseModel):
187
+ class Sheet(_QlikBaseModel):
174
188
  id: str
175
189
  title: str
176
190
  description: str
@@ -181,6 +195,8 @@ class Sheet(BaseModel):
181
195
 
182
196
  @root_validator(pre=True)
183
197
  def update_values(cls, values: Dict) -> Dict:
198
+ # Create a copy to avoid modifying the input dictionary, preventing state contamination in tests
199
+ values = deepcopy(values)
184
200
  values[Constant.CREATEDAT] = datetime.strptime(
185
201
  values[Constant.CREATEDDATE], QLIK_DATETIME_FORMAT
186
202
  )
@@ -190,7 +206,7 @@ class Sheet(BaseModel):
190
206
  return values
191
207
 
192
208
 
193
- class QlikTable(BaseModel):
209
+ class QlikTable(_QlikBaseModel):
194
210
  tableName: str
195
211
  type: BoxType = Field(alias="boxType")
196
212
  tableAlias: str
@@ -206,6 +222,8 @@ class QlikTable(BaseModel):
206
222
 
207
223
  @root_validator(pre=True)
208
224
  def update_values(cls, values: Dict) -> Dict:
225
+ # Create a copy to avoid modifying the input dictionary, preventing state contamination in tests
226
+ values = deepcopy(values)
209
227
  values[Constant.DATACONNECTORID] = values[Constant.CONNECTIONINFO][Constant.ID]
210
228
  values[Constant.DATACONNECTORPLATFORM] = values[Constant.CONNECTIONINFO][
211
229
  Constant.SOURCECONNECTORID
@@ -223,6 +241,8 @@ class App(Item):
223
241
 
224
242
  @root_validator(pre=True)
225
243
  def update_values(cls, values: Dict) -> Dict:
244
+ # Create a copy to avoid modifying the input dictionary, preventing state contamination in tests
245
+ values = deepcopy(values)
226
246
  values[Constant.CREATEDAT] = datetime.strptime(
227
247
  values[Constant.CREATEDDATE], QLIK_DATETIME_FORMAT
228
248
  )
@@ -101,7 +101,7 @@ logger = logging.getLogger(__name__)
101
101
  )
102
102
  @capability(
103
103
  SourceCapability.LINEAGE_FINE,
104
- "Disabled by default. ",
104
+ "Disabled by default.",
105
105
  )
106
106
  @capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
107
107
  @capability(
@@ -109,6 +109,7 @@ logger = logging.getLogger(__name__)
109
109
  "Enabled by default, configured using `ingest_owner`",
110
110
  )
111
111
  @capability(SourceCapability.SCHEMA_METADATA, "Enabled by default")
112
+ @capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
112
113
  class QlikSenseSource(StatefulIngestionSourceBase, TestableSource):
113
114
  """
114
115
  This plugin extracts the following:
@@ -447,7 +447,7 @@ class RedashSource(StatefulIngestionSourceBase):
447
447
  dataset_urns = sql_parser_in_tables.in_tables
448
448
  if sql_parser_in_tables.debug_info.table_error:
449
449
  self.report.queries_problem_parsing.add(str(query_id))
450
- self.error(
450
+ self.warn(
451
451
  logger,
452
452
  "sql-parsing",
453
453
  f"exception {sql_parser_in_tables.debug_info.table_error} in parsing query-{query_id}-datasource-{data_source_id}",