acryl-datahub 1.1.1rc4__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (414) hide show
  1. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2615 -2547
  2. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +412 -338
  3. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +5 -0
  4. datahub/_version.py +1 -1
  5. datahub/api/entities/assertion/assertion.py +1 -1
  6. datahub/api/entities/common/serialized_value.py +1 -1
  7. datahub/api/entities/corpgroup/corpgroup.py +1 -1
  8. datahub/api/entities/dataproduct/dataproduct.py +32 -3
  9. datahub/api/entities/dataset/dataset.py +26 -23
  10. datahub/api/entities/external/__init__.py +0 -0
  11. datahub/api/entities/external/external_entities.py +724 -0
  12. datahub/api/entities/external/external_tag.py +147 -0
  13. datahub/api/entities/external/lake_formation_external_entites.py +162 -0
  14. datahub/api/entities/external/restricted_text.py +172 -0
  15. datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
  16. datahub/api/entities/forms/forms.py +3 -3
  17. datahub/api/entities/structuredproperties/structuredproperties.py +4 -4
  18. datahub/api/graphql/operation.py +10 -6
  19. datahub/cli/check_cli.py +88 -7
  20. datahub/cli/cli_utils.py +63 -0
  21. datahub/cli/config_utils.py +18 -10
  22. datahub/cli/container_cli.py +5 -0
  23. datahub/cli/delete_cli.py +125 -27
  24. datahub/cli/docker_check.py +110 -14
  25. datahub/cli/docker_cli.py +153 -229
  26. datahub/cli/exists_cli.py +0 -2
  27. datahub/cli/get_cli.py +0 -2
  28. datahub/cli/graphql_cli.py +1422 -0
  29. datahub/cli/iceberg_cli.py +5 -0
  30. datahub/cli/ingest_cli.py +3 -15
  31. datahub/cli/migrate.py +2 -0
  32. datahub/cli/put_cli.py +1 -4
  33. datahub/cli/quickstart_versioning.py +53 -10
  34. datahub/cli/specific/assertions_cli.py +37 -6
  35. datahub/cli/specific/datacontract_cli.py +54 -7
  36. datahub/cli/specific/dataproduct_cli.py +2 -15
  37. datahub/cli/specific/dataset_cli.py +1 -8
  38. datahub/cli/specific/forms_cli.py +0 -4
  39. datahub/cli/specific/group_cli.py +0 -2
  40. datahub/cli/specific/structuredproperties_cli.py +1 -4
  41. datahub/cli/specific/user_cli.py +172 -3
  42. datahub/cli/state_cli.py +0 -2
  43. datahub/cli/timeline_cli.py +0 -2
  44. datahub/configuration/common.py +40 -1
  45. datahub/configuration/connection_resolver.py +5 -2
  46. datahub/configuration/env_vars.py +331 -0
  47. datahub/configuration/import_resolver.py +7 -4
  48. datahub/configuration/kafka.py +21 -1
  49. datahub/configuration/pydantic_migration_helpers.py +6 -13
  50. datahub/configuration/source_common.py +3 -2
  51. datahub/configuration/validate_field_deprecation.py +5 -2
  52. datahub/configuration/validate_field_removal.py +8 -2
  53. datahub/configuration/validate_field_rename.py +6 -5
  54. datahub/configuration/validate_multiline_string.py +5 -2
  55. datahub/emitter/mce_builder.py +8 -4
  56. datahub/emitter/rest_emitter.py +103 -30
  57. datahub/entrypoints.py +6 -3
  58. datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +297 -1
  59. datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
  60. datahub/ingestion/api/decorators.py +15 -3
  61. datahub/ingestion/api/report.py +381 -3
  62. datahub/ingestion/api/sink.py +27 -2
  63. datahub/ingestion/api/source.py +165 -58
  64. datahub/ingestion/api/source_protocols.py +23 -0
  65. datahub/ingestion/autogenerated/__init__.py +0 -0
  66. datahub/ingestion/autogenerated/capability_summary.json +3652 -0
  67. datahub/ingestion/autogenerated/lineage.json +402 -0
  68. datahub/ingestion/autogenerated/lineage_helper.py +177 -0
  69. datahub/ingestion/extractor/schema_util.py +13 -4
  70. datahub/ingestion/glossary/classification_mixin.py +5 -0
  71. datahub/ingestion/graph/client.py +330 -25
  72. datahub/ingestion/graph/config.py +3 -2
  73. datahub/ingestion/graph/filters.py +30 -11
  74. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
  75. datahub/ingestion/run/pipeline.py +81 -11
  76. datahub/ingestion/run/pipeline_config.py +2 -2
  77. datahub/ingestion/sink/datahub_kafka.py +1 -0
  78. datahub/ingestion/sink/datahub_rest.py +13 -5
  79. datahub/ingestion/sink/file.py +1 -0
  80. datahub/ingestion/source/abs/config.py +1 -1
  81. datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
  82. datahub/ingestion/source/abs/source.py +15 -30
  83. datahub/ingestion/source/aws/aws_common.py +185 -13
  84. datahub/ingestion/source/aws/glue.py +517 -244
  85. datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
  86. datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
  87. datahub/ingestion/source/aws/tag_entities.py +270 -0
  88. datahub/ingestion/source/azure/azure_common.py +3 -3
  89. datahub/ingestion/source/bigquery_v2/bigquery.py +67 -24
  90. datahub/ingestion/source/bigquery_v2/bigquery_config.py +47 -19
  91. datahub/ingestion/source/bigquery_v2/bigquery_connection.py +12 -1
  92. datahub/ingestion/source/bigquery_v2/bigquery_queries.py +3 -0
  93. datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
  94. datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
  95. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
  96. datahub/ingestion/source/bigquery_v2/common.py +1 -1
  97. datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
  98. datahub/ingestion/source/bigquery_v2/queries.py +3 -3
  99. datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
  100. datahub/ingestion/source/cassandra/cassandra.py +6 -8
  101. datahub/ingestion/source/cassandra/cassandra_api.py +17 -1
  102. datahub/ingestion/source/cassandra/cassandra_config.py +5 -0
  103. datahub/ingestion/source/cassandra/cassandra_profiling.py +7 -6
  104. datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
  105. datahub/ingestion/source/common/gcp_credentials_config.py +3 -1
  106. datahub/ingestion/source/common/subtypes.py +53 -0
  107. datahub/ingestion/source/data_lake_common/data_lake_utils.py +37 -0
  108. datahub/ingestion/source/data_lake_common/object_store.py +115 -27
  109. datahub/ingestion/source/data_lake_common/path_spec.py +72 -43
  110. datahub/ingestion/source/datahub/config.py +12 -9
  111. datahub/ingestion/source/datahub/datahub_database_reader.py +26 -11
  112. datahub/ingestion/source/datahub/datahub_source.py +10 -0
  113. datahub/ingestion/source/dbt/dbt_cloud.py +16 -5
  114. datahub/ingestion/source/dbt/dbt_common.py +224 -9
  115. datahub/ingestion/source/dbt/dbt_core.py +3 -0
  116. datahub/ingestion/source/debug/__init__.py +0 -0
  117. datahub/ingestion/source/debug/datahub_debug.py +300 -0
  118. datahub/ingestion/source/delta_lake/config.py +9 -5
  119. datahub/ingestion/source/delta_lake/source.py +8 -0
  120. datahub/ingestion/source/dremio/dremio_api.py +114 -73
  121. datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
  122. datahub/ingestion/source/dremio/dremio_config.py +5 -4
  123. datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
  124. datahub/ingestion/source/dremio/dremio_source.py +132 -98
  125. datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
  126. datahub/ingestion/source/dynamodb/dynamodb.py +11 -8
  127. datahub/ingestion/source/excel/__init__.py +0 -0
  128. datahub/ingestion/source/excel/config.py +92 -0
  129. datahub/ingestion/source/excel/excel_file.py +539 -0
  130. datahub/ingestion/source/excel/profiling.py +308 -0
  131. datahub/ingestion/source/excel/report.py +49 -0
  132. datahub/ingestion/source/excel/source.py +662 -0
  133. datahub/ingestion/source/excel/util.py +18 -0
  134. datahub/ingestion/source/feast.py +8 -10
  135. datahub/ingestion/source/file.py +3 -0
  136. datahub/ingestion/source/fivetran/config.py +66 -7
  137. datahub/ingestion/source/fivetran/fivetran.py +227 -43
  138. datahub/ingestion/source/fivetran/fivetran_log_api.py +37 -8
  139. datahub/ingestion/source/fivetran/fivetran_query.py +51 -29
  140. datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
  141. datahub/ingestion/source/fivetran/response_models.py +97 -0
  142. datahub/ingestion/source/gc/datahub_gc.py +0 -2
  143. datahub/ingestion/source/gcs/gcs_source.py +32 -4
  144. datahub/ingestion/source/ge_data_profiler.py +108 -31
  145. datahub/ingestion/source/ge_profiling_config.py +26 -11
  146. datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
  147. datahub/ingestion/source/grafana/field_utils.py +307 -0
  148. datahub/ingestion/source/grafana/grafana_api.py +142 -0
  149. datahub/ingestion/source/grafana/grafana_config.py +104 -0
  150. datahub/ingestion/source/grafana/grafana_source.py +522 -84
  151. datahub/ingestion/source/grafana/lineage.py +202 -0
  152. datahub/ingestion/source/grafana/models.py +137 -0
  153. datahub/ingestion/source/grafana/report.py +90 -0
  154. datahub/ingestion/source/grafana/types.py +16 -0
  155. datahub/ingestion/source/hex/api.py +28 -1
  156. datahub/ingestion/source/hex/hex.py +16 -5
  157. datahub/ingestion/source/hex/mapper.py +16 -2
  158. datahub/ingestion/source/hex/model.py +2 -0
  159. datahub/ingestion/source/hex/query_fetcher.py +1 -1
  160. datahub/ingestion/source/iceberg/iceberg.py +123 -59
  161. datahub/ingestion/source/iceberg/iceberg_profiler.py +4 -2
  162. datahub/ingestion/source/identity/azure_ad.py +1 -1
  163. datahub/ingestion/source/identity/okta.py +1 -14
  164. datahub/ingestion/source/kafka/kafka.py +16 -0
  165. datahub/ingestion/source/kafka_connect/common.py +2 -2
  166. datahub/ingestion/source/kafka_connect/sink_connectors.py +156 -47
  167. datahub/ingestion/source/kafka_connect/source_connectors.py +62 -4
  168. datahub/ingestion/source/looker/looker_common.py +148 -79
  169. datahub/ingestion/source/looker/looker_config.py +15 -4
  170. datahub/ingestion/source/looker/looker_constant.py +4 -0
  171. datahub/ingestion/source/looker/looker_lib_wrapper.py +36 -3
  172. datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
  173. datahub/ingestion/source/looker/looker_source.py +503 -547
  174. datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
  175. datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
  176. datahub/ingestion/source/looker/lookml_config.py +31 -3
  177. datahub/ingestion/source/looker/lookml_refinement.py +1 -1
  178. datahub/ingestion/source/looker/lookml_source.py +96 -117
  179. datahub/ingestion/source/looker/view_upstream.py +494 -1
  180. datahub/ingestion/source/metabase.py +32 -6
  181. datahub/ingestion/source/metadata/business_glossary.py +7 -7
  182. datahub/ingestion/source/metadata/lineage.py +9 -9
  183. datahub/ingestion/source/mlflow.py +12 -2
  184. datahub/ingestion/source/mock_data/__init__.py +0 -0
  185. datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
  186. datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
  187. datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
  188. datahub/ingestion/source/mode.py +26 -5
  189. datahub/ingestion/source/mongodb.py +11 -1
  190. datahub/ingestion/source/neo4j/neo4j_source.py +83 -144
  191. datahub/ingestion/source/nifi.py +2 -2
  192. datahub/ingestion/source/openapi.py +1 -1
  193. datahub/ingestion/source/powerbi/config.py +47 -21
  194. datahub/ingestion/source/powerbi/m_query/data_classes.py +1 -0
  195. datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
  196. datahub/ingestion/source/powerbi/m_query/pattern_handler.py +100 -10
  197. datahub/ingestion/source/powerbi/powerbi.py +10 -6
  198. datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +0 -1
  199. datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
  200. datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
  201. datahub/ingestion/source/preset.py +3 -3
  202. datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
  203. datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
  204. datahub/ingestion/source/redash.py +1 -1
  205. datahub/ingestion/source/redshift/config.py +15 -9
  206. datahub/ingestion/source/redshift/datashares.py +1 -1
  207. datahub/ingestion/source/redshift/lineage.py +386 -687
  208. datahub/ingestion/source/redshift/query.py +23 -19
  209. datahub/ingestion/source/redshift/redshift.py +52 -111
  210. datahub/ingestion/source/redshift/redshift_schema.py +17 -12
  211. datahub/ingestion/source/redshift/report.py +0 -2
  212. datahub/ingestion/source/redshift/usage.py +6 -5
  213. datahub/ingestion/source/s3/report.py +4 -2
  214. datahub/ingestion/source/s3/source.py +449 -248
  215. datahub/ingestion/source/sac/sac.py +3 -1
  216. datahub/ingestion/source/salesforce.py +28 -13
  217. datahub/ingestion/source/schema/json_schema.py +14 -14
  218. datahub/ingestion/source/schema_inference/object.py +22 -6
  219. datahub/ingestion/source/sigma/data_classes.py +3 -0
  220. datahub/ingestion/source/sigma/sigma.py +7 -1
  221. datahub/ingestion/source/slack/slack.py +10 -16
  222. datahub/ingestion/source/snaplogic/__init__.py +0 -0
  223. datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
  224. datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
  225. datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
  226. datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
  227. datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
  228. datahub/ingestion/source/snowflake/constants.py +3 -0
  229. datahub/ingestion/source/snowflake/snowflake_config.py +76 -23
  230. datahub/ingestion/source/snowflake/snowflake_connection.py +24 -8
  231. datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +19 -6
  232. datahub/ingestion/source/snowflake/snowflake_queries.py +464 -97
  233. datahub/ingestion/source/snowflake/snowflake_query.py +77 -5
  234. datahub/ingestion/source/snowflake/snowflake_report.py +1 -2
  235. datahub/ingestion/source/snowflake/snowflake_schema.py +352 -16
  236. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +51 -10
  237. datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
  238. datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
  239. datahub/ingestion/source/snowflake/snowflake_utils.py +36 -15
  240. datahub/ingestion/source/snowflake/snowflake_v2.py +39 -4
  241. datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
  242. datahub/ingestion/source/sql/athena.py +217 -25
  243. datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
  244. datahub/ingestion/source/sql/clickhouse.py +24 -8
  245. datahub/ingestion/source/sql/cockroachdb.py +5 -4
  246. datahub/ingestion/source/sql/druid.py +2 -2
  247. datahub/ingestion/source/sql/hana.py +3 -1
  248. datahub/ingestion/source/sql/hive.py +4 -3
  249. datahub/ingestion/source/sql/hive_metastore.py +19 -20
  250. datahub/ingestion/source/sql/mariadb.py +0 -1
  251. datahub/ingestion/source/sql/mssql/job_models.py +3 -1
  252. datahub/ingestion/source/sql/mssql/source.py +336 -57
  253. datahub/ingestion/source/sql/mysql.py +154 -4
  254. datahub/ingestion/source/sql/oracle.py +5 -5
  255. datahub/ingestion/source/sql/postgres.py +142 -6
  256. datahub/ingestion/source/sql/presto.py +2 -1
  257. datahub/ingestion/source/sql/sql_common.py +281 -49
  258. datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
  259. datahub/ingestion/source/sql/sql_types.py +22 -0
  260. datahub/ingestion/source/sql/sqlalchemy_uri.py +39 -7
  261. datahub/ingestion/source/sql/teradata.py +1028 -245
  262. datahub/ingestion/source/sql/trino.py +11 -1
  263. datahub/ingestion/source/sql/two_tier_sql_source.py +2 -3
  264. datahub/ingestion/source/sql/vertica.py +14 -7
  265. datahub/ingestion/source/sql_queries.py +219 -121
  266. datahub/ingestion/source/state/checkpoint.py +8 -29
  267. datahub/ingestion/source/state/entity_removal_state.py +5 -2
  268. datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
  269. datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
  270. datahub/ingestion/source/superset.py +314 -67
  271. datahub/ingestion/source/tableau/tableau.py +135 -59
  272. datahub/ingestion/source/tableau/tableau_common.py +9 -2
  273. datahub/ingestion/source/tableau/tableau_constant.py +1 -4
  274. datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
  275. datahub/ingestion/source/unity/config.py +160 -40
  276. datahub/ingestion/source/unity/connection.py +61 -0
  277. datahub/ingestion/source/unity/connection_test.py +1 -0
  278. datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
  279. datahub/ingestion/source/unity/proxy.py +794 -51
  280. datahub/ingestion/source/unity/proxy_patch.py +321 -0
  281. datahub/ingestion/source/unity/proxy_types.py +36 -2
  282. datahub/ingestion/source/unity/report.py +15 -3
  283. datahub/ingestion/source/unity/source.py +465 -131
  284. datahub/ingestion/source/unity/tag_entities.py +197 -0
  285. datahub/ingestion/source/unity/usage.py +46 -4
  286. datahub/ingestion/source/usage/clickhouse_usage.py +4 -1
  287. datahub/ingestion/source/usage/starburst_trino_usage.py +5 -2
  288. datahub/ingestion/source/usage/usage_common.py +4 -3
  289. datahub/ingestion/source/vertexai/vertexai.py +1 -1
  290. datahub/ingestion/source_config/pulsar.py +3 -1
  291. datahub/ingestion/source_report/ingestion_stage.py +50 -11
  292. datahub/ingestion/transformer/add_dataset_ownership.py +18 -2
  293. datahub/ingestion/transformer/base_transformer.py +8 -5
  294. datahub/ingestion/transformer/set_browse_path.py +112 -0
  295. datahub/integrations/assertion/snowflake/compiler.py +4 -3
  296. datahub/metadata/_internal_schema_classes.py +6806 -4871
  297. datahub/metadata/_urns/urn_defs.py +1767 -1539
  298. datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
  299. datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
  300. datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
  301. datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
  302. datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
  303. datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +6 -0
  304. datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
  305. datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
  306. datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
  307. datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
  308. datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
  309. datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
  310. datahub/metadata/schema.avsc +18395 -16979
  311. datahub/metadata/schemas/Actors.avsc +38 -1
  312. datahub/metadata/schemas/ApplicationKey.avsc +31 -0
  313. datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
  314. datahub/metadata/schemas/Applications.avsc +38 -0
  315. datahub/metadata/schemas/AssetSettings.avsc +63 -0
  316. datahub/metadata/schemas/ChartInfo.avsc +2 -1
  317. datahub/metadata/schemas/ChartKey.avsc +1 -0
  318. datahub/metadata/schemas/ContainerKey.avsc +1 -0
  319. datahub/metadata/schemas/ContainerProperties.avsc +8 -0
  320. datahub/metadata/schemas/CorpUserEditableInfo.avsc +1 -1
  321. datahub/metadata/schemas/CorpUserSettings.avsc +50 -0
  322. datahub/metadata/schemas/DashboardKey.avsc +1 -0
  323. datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
  324. datahub/metadata/schemas/DataFlowKey.avsc +1 -0
  325. datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
  326. datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
  327. datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
  328. datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
  329. datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
  330. datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
  331. datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
  332. datahub/metadata/schemas/DataJobInfo.avsc +8 -0
  333. datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
  334. datahub/metadata/schemas/DataJobKey.avsc +1 -0
  335. datahub/metadata/schemas/DataProcessKey.avsc +8 -0
  336. datahub/metadata/schemas/DataProductKey.avsc +3 -1
  337. datahub/metadata/schemas/DataProductProperties.avsc +1 -1
  338. datahub/metadata/schemas/DatasetKey.avsc +11 -1
  339. datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
  340. datahub/metadata/schemas/DomainKey.avsc +2 -1
  341. datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
  342. datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
  343. datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
  344. datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
  345. datahub/metadata/schemas/IncidentInfo.avsc +3 -3
  346. datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
  347. datahub/metadata/schemas/LogicalParent.avsc +145 -0
  348. datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
  349. datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
  350. datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
  351. datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
  352. datahub/metadata/schemas/MLModelKey.avsc +9 -0
  353. datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
  354. datahub/metadata/schemas/MetadataChangeEvent.avsc +151 -47
  355. datahub/metadata/schemas/MetadataChangeLog.avsc +62 -44
  356. datahub/metadata/schemas/MetadataChangeProposal.avsc +61 -0
  357. datahub/metadata/schemas/NotebookKey.avsc +1 -0
  358. datahub/metadata/schemas/Operation.avsc +4 -2
  359. datahub/metadata/schemas/Ownership.avsc +69 -0
  360. datahub/metadata/schemas/QuerySubjects.avsc +1 -12
  361. datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
  362. datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
  363. datahub/metadata/schemas/StructuredProperties.avsc +69 -0
  364. datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
  365. datahub/metadata/schemas/SystemMetadata.avsc +61 -0
  366. datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
  367. datahub/sdk/__init__.py +2 -0
  368. datahub/sdk/_all_entities.py +7 -0
  369. datahub/sdk/_shared.py +249 -5
  370. datahub/sdk/chart.py +386 -0
  371. datahub/sdk/container.py +7 -0
  372. datahub/sdk/dashboard.py +453 -0
  373. datahub/sdk/dataflow.py +7 -0
  374. datahub/sdk/datajob.py +45 -13
  375. datahub/sdk/dataset.py +56 -2
  376. datahub/sdk/entity_client.py +111 -9
  377. datahub/sdk/lineage_client.py +663 -82
  378. datahub/sdk/main_client.py +50 -16
  379. datahub/sdk/mlmodel.py +120 -38
  380. datahub/sdk/mlmodelgroup.py +7 -0
  381. datahub/sdk/search_client.py +7 -3
  382. datahub/sdk/search_filters.py +304 -36
  383. datahub/secret/datahub_secret_store.py +3 -0
  384. datahub/secret/environment_secret_store.py +29 -0
  385. datahub/secret/file_secret_store.py +49 -0
  386. datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
  387. datahub/specific/aspect_helpers/siblings.py +73 -0
  388. datahub/specific/aspect_helpers/structured_properties.py +27 -0
  389. datahub/specific/chart.py +1 -1
  390. datahub/specific/datajob.py +15 -1
  391. datahub/specific/dataproduct.py +4 -0
  392. datahub/specific/dataset.py +39 -59
  393. datahub/sql_parsing/split_statements.py +13 -0
  394. datahub/sql_parsing/sql_parsing_aggregator.py +70 -26
  395. datahub/sql_parsing/sqlglot_lineage.py +196 -42
  396. datahub/sql_parsing/sqlglot_utils.py +12 -4
  397. datahub/sql_parsing/tool_meta_extractor.py +1 -3
  398. datahub/telemetry/telemetry.py +28 -14
  399. datahub/testing/sdk_v2_helpers.py +7 -1
  400. datahub/upgrade/upgrade.py +73 -17
  401. datahub/utilities/file_backed_collections.py +8 -9
  402. datahub/utilities/is_pytest.py +3 -2
  403. datahub/utilities/logging_manager.py +22 -6
  404. datahub/utilities/mapping.py +29 -2
  405. datahub/utilities/sample_data.py +5 -4
  406. datahub/utilities/server_config_util.py +10 -1
  407. datahub/utilities/sqlalchemy_query_combiner.py +5 -2
  408. datahub/utilities/stats_collections.py +4 -0
  409. datahub/utilities/urns/urn.py +41 -2
  410. datahub/emitter/sql_parsing_builder.py +0 -306
  411. datahub/ingestion/source/redshift/lineage_v2.py +0 -466
  412. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +0 -0
  413. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/licenses/LICENSE +0 -0
  414. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
@@ -1,13 +1,13 @@
1
1
  import functools
2
2
  import importlib.resources as pkg_resource
3
3
  import logging
4
- import os
5
4
  from typing import Dict, List, Optional
6
5
 
7
6
  import lark
8
7
  from lark import Lark, Tree
9
8
 
10
9
  import datahub.ingestion.source.powerbi.m_query.data_classes
10
+ from datahub.configuration.env_vars import get_powerbi_m_query_parse_timeout
11
11
  from datahub.ingestion.api.common import PipelineContext
12
12
  from datahub.ingestion.source.powerbi.config import (
13
13
  PowerBiDashboardSourceConfig,
@@ -25,7 +25,7 @@ from datahub.utilities.threading_timeout import TimeoutException, threading_time
25
25
 
26
26
  logger = logging.getLogger(__name__)
27
27
 
28
- _M_QUERY_PARSE_TIMEOUT = int(os.getenv("DATAHUB_POWERBI_M_QUERY_PARSE_TIMEOUT", 60))
28
+ _M_QUERY_PARSE_TIMEOUT = get_powerbi_m_query_parse_timeout()
29
29
 
30
30
 
31
31
  @functools.lru_cache(maxsize=1)
@@ -3,7 +3,9 @@ from abc import ABC, abstractmethod
3
3
  from enum import Enum
4
4
  from typing import Dict, List, Optional, Tuple, Type, cast
5
5
 
6
+ import sqlglot
6
7
  from lark import Tree
8
+ from sqlglot import ParseError, expressions as exp
7
9
 
8
10
  from datahub.configuration.source_common import PlatformDetail
9
11
  from datahub.emitter import mce_builder as builder
@@ -209,15 +211,34 @@ class AbstractLineage(ABC):
209
211
 
210
212
  return None
211
213
 
214
+ @staticmethod
215
+ def is_sql_query(query: Optional[str]) -> bool:
216
+ if not query:
217
+ return False
218
+ query = native_sql_parser.remove_special_characters(query)
219
+ try:
220
+ expression = sqlglot.parse_one(query)
221
+ return isinstance(expression, exp.Select)
222
+ except (ParseError, Exception):
223
+ logger.debug(f"Failed to parse query as SQL: {query}")
224
+ return False
225
+
212
226
  def parse_custom_sql(
213
- self, query: str, server: str, database: Optional[str], schema: Optional[str]
227
+ self,
228
+ query: str,
229
+ server: str,
230
+ database: Optional[str],
231
+ schema: Optional[str],
232
+ platform_pair: Optional[DataPlatformPair] = None,
214
233
  ) -> Lineage:
215
234
  dataplatform_tables: List[DataPlatformTable] = []
235
+ if not platform_pair:
236
+ platform_pair = self.get_platform_pair()
216
237
 
217
238
  platform_detail: PlatformDetail = (
218
239
  self.platform_instance_resolver.get_platform_instance(
219
240
  PowerBIPlatformDetail(
220
- data_platform_pair=self.get_platform_pair(),
241
+ data_platform_pair=platform_pair,
221
242
  data_platform_server=server,
222
243
  )
223
244
  )
@@ -231,7 +252,7 @@ class AbstractLineage(ABC):
231
252
  native_sql_parser.parse_custom_sql(
232
253
  ctx=self.ctx,
233
254
  query=query,
234
- platform=self.get_platform_pair().datahub_data_platform_name,
255
+ platform=platform_pair.datahub_data_platform_name,
235
256
  platform_instance=platform_detail.platform_instance,
236
257
  env=platform_detail.env,
237
258
  database=database,
@@ -258,7 +279,7 @@ class AbstractLineage(ABC):
258
279
  for urn in parsed_result.in_tables:
259
280
  dataplatform_tables.append(
260
281
  DataPlatformTable(
261
- data_platform_pair=self.get_platform_pair(),
282
+ data_platform_pair=platform_pair,
262
283
  urn=urn,
263
284
  )
264
285
  )
@@ -956,7 +977,7 @@ class OdbcLineage(AbstractLineage):
956
977
  f"data-access function detail {data_access_func_detail}"
957
978
  )
958
979
 
959
- connect_string, _ = self.get_db_detail_from_argument(
980
+ connect_string, query = self.get_db_detail_from_argument(
960
981
  data_access_func_detail.arg_list
961
982
  )
962
983
 
@@ -972,12 +993,19 @@ class OdbcLineage(AbstractLineage):
972
993
  data_platform, powerbi_platform = extract_platform(connect_string)
973
994
  server_name = extract_server(connect_string)
974
995
 
996
+ dsn = extract_dsn(connect_string)
997
+ if not dsn:
998
+ self.reporter.warning(
999
+ title="Can not determine ODBC DSN",
1000
+ message="Can not extract DSN from ODBC connect string. Skipping Lineage creation.",
1001
+ context=f"table-name={self.table.full_name}, connect-string={connect_string}",
1002
+ )
1003
+ return Lineage.empty()
1004
+ logger.debug(f"Extracted DSN: {dsn}")
1005
+
975
1006
  if not data_platform:
976
- dsn = extract_dsn(connect_string)
977
- if dsn:
978
- logger.debug(f"Extracted DSN: {dsn}")
979
- server_name = dsn
980
- if dsn and self.config.dsn_to_platform_name:
1007
+ server_name = dsn
1008
+ if self.config.dsn_to_platform_name:
981
1009
  logger.debug(f"Attempting to map DSN {dsn} to platform")
982
1010
  name = self.config.dsn_to_platform_name.get(dsn)
983
1011
  if name:
@@ -1006,6 +1034,63 @@ class OdbcLineage(AbstractLineage):
1006
1034
  elif not server_name:
1007
1035
  server_name = "unknown"
1008
1036
 
1037
+ if self.is_sql_query(query):
1038
+ return self.query_lineage(query, platform_pair, server_name, dsn)
1039
+ else:
1040
+ return self.expression_lineage(
1041
+ data_access_func_detail, data_platform, platform_pair, server_name
1042
+ )
1043
+
1044
+ def query_lineage(
1045
+ self,
1046
+ query: Optional[str],
1047
+ platform_pair: DataPlatformPair,
1048
+ server_name: str,
1049
+ dsn: str,
1050
+ ) -> Lineage:
1051
+ database = None
1052
+ schema = None
1053
+
1054
+ if not query:
1055
+ # query should never be None as it is checked before calling this function.
1056
+ # however, we need to check just in case.
1057
+ self.reporter.warning(
1058
+ title="ODBC Query is null",
1059
+ message="No SQL to parse. Skipping Lineage creation.",
1060
+ context=f"table-name={self.table.full_name}",
1061
+ )
1062
+ return Lineage.empty()
1063
+
1064
+ if self.config.dsn_to_database_schema:
1065
+ value = self.config.dsn_to_database_schema.get(dsn)
1066
+ if value:
1067
+ parts = value.split(".")
1068
+ if len(parts) == 1:
1069
+ database = parts[0]
1070
+ elif len(parts) == 2:
1071
+ database = parts[0]
1072
+ schema = parts[1]
1073
+
1074
+ logger.debug(
1075
+ f"ODBC query processing: dsn={dsn} mapped to database={database}, schema={schema}"
1076
+ )
1077
+ result = self.parse_custom_sql(
1078
+ query=query,
1079
+ server=server_name,
1080
+ database=database,
1081
+ schema=schema,
1082
+ platform_pair=platform_pair,
1083
+ )
1084
+ logger.debug(f"ODBC query lineage generated {len(result.upstreams)} upstreams")
1085
+ return result
1086
+
1087
+ def expression_lineage(
1088
+ self,
1089
+ data_access_func_detail: DataAccessFunctionDetail,
1090
+ data_platform: str,
1091
+ platform_pair: DataPlatformPair,
1092
+ server_name: str,
1093
+ ) -> Lineage:
1009
1094
  database_name = None
1010
1095
  schema_name = None
1011
1096
  table_name = None
@@ -1144,6 +1229,11 @@ class SupportedPattern(Enum):
1144
1229
  FunctionName.ODBC_DATA_ACCESS,
1145
1230
  )
1146
1231
 
1232
+ ODBC_QUERY = (
1233
+ OdbcLineage,
1234
+ FunctionName.ODBC_QUERY,
1235
+ )
1236
+
1147
1237
  def handler(self) -> Type[AbstractLineage]:
1148
1238
  return self.value[0]
1149
1239
 
@@ -40,6 +40,7 @@ from datahub.ingestion.api.workunit import MetadataWorkUnit
40
40
  from datahub.ingestion.source.common.subtypes import (
41
41
  BIAssetSubTypes,
42
42
  BIContainerSubTypes,
43
+ SourceCapabilityModifier,
43
44
  )
44
45
  from datahub.ingestion.source.powerbi.config import (
45
46
  Constant,
@@ -294,8 +295,6 @@ class Mapper:
294
295
  logger.debug(f"Dataset urn = {ds_urn} and its lineage = {upstream_lineage}")
295
296
 
296
297
  mcp = MetadataChangeProposalWrapper(
297
- entityType=Constant.DATASET,
298
- changeType=ChangeTypeClass.UPSERT,
299
298
  entityUrn=ds_urn,
300
299
  aspect=upstream_lineage_class,
301
300
  )
@@ -538,9 +537,7 @@ class Mapper:
538
537
  profile.columnCount = table.column_count
539
538
 
540
539
  mcp = MetadataChangeProposalWrapper(
541
- entityType="dataset",
542
540
  entityUrn=ds_urn,
543
- aspectName="datasetProfile",
544
541
  aspect=profile,
545
542
  )
546
543
  dataset_mcps.append(mcp)
@@ -796,7 +793,6 @@ class Mapper:
796
793
  guid=container_key.guid(),
797
794
  )
798
795
  mcp = MetadataChangeProposalWrapper(
799
- changeType=ChangeTypeClass.UPSERT,
800
796
  entityUrn=entity_urn,
801
797
  aspect=ContainerClass(container=f"{container_urn}"),
802
798
  )
@@ -1231,7 +1227,14 @@ class Mapper:
1231
1227
  @platform_name("PowerBI")
1232
1228
  @config_class(PowerBiDashboardSourceConfig)
1233
1229
  @support_status(SupportStatus.CERTIFIED)
1234
- @capability(SourceCapability.CONTAINERS, "Enabled by default")
1230
+ @capability(
1231
+ SourceCapability.CONTAINERS,
1232
+ "Enabled by default",
1233
+ subtype_modifier=[
1234
+ SourceCapabilityModifier.POWERBI_WORKSPACE,
1235
+ SourceCapabilityModifier.POWERBI_DATASET,
1236
+ ],
1237
+ )
1235
1238
  @capability(SourceCapability.DESCRIPTIONS, "Enabled by default")
1236
1239
  @capability(SourceCapability.OWNERSHIP, "Enabled by default")
1237
1240
  @capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
@@ -1253,6 +1256,7 @@ class Mapper:
1253
1256
  SourceCapability.DATA_PROFILING,
1254
1257
  "Optionally enabled via configuration profiling.enabled",
1255
1258
  )
1259
+ @capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
1256
1260
  class PowerBiDashboardSource(StatefulIngestionSourceBase, TestableSource):
1257
1261
  """
1258
1262
  This plugin extracts the following:
@@ -673,7 +673,6 @@ class PowerBiAPI:
673
673
  fill_dashboard_tags()
674
674
  self._fill_independent_datasets(workspace=workspace)
675
675
 
676
- # flake8: noqa: C901
677
676
  def fill_workspaces(
678
677
  self, workspaces: List[Workspace], reporter: PowerBiDashboardSourceReport
679
678
  ) -> Iterable[Workspace]:
@@ -52,7 +52,6 @@ from datahub.ingestion.source.state.stateful_ingestion_base import (
52
52
  from datahub.metadata.com.linkedin.pegasus2avro.common import ChangeAuditStamps
53
53
  from datahub.metadata.schema_classes import (
54
54
  BrowsePathsClass,
55
- ChangeTypeClass,
56
55
  CorpUserInfoClass,
57
56
  CorpUserKeyClass,
58
57
  DashboardInfoClass,
@@ -243,20 +242,14 @@ class Mapper:
243
242
 
244
243
  @staticmethod
245
244
  def new_mcp(
246
- entity_type,
247
245
  entity_urn,
248
- aspect_name,
249
246
  aspect,
250
- change_type=ChangeTypeClass.UPSERT,
251
247
  ):
252
248
  """
253
249
  Create MCP
254
250
  """
255
251
  return MetadataChangeProposalWrapper(
256
- entityType=entity_type,
257
- changeType=change_type,
258
252
  entityUrn=entity_urn,
259
- aspectName=aspect_name,
260
253
  aspect=aspect,
261
254
  )
262
255
 
@@ -343,17 +336,13 @@ class Mapper:
343
336
  )
344
337
 
345
338
  info_mcp = self.new_mcp(
346
- entity_type=Constant.DASHBOARD,
347
339
  entity_urn=dashboard_urn,
348
- aspect_name=Constant.DASHBOARD_INFO,
349
340
  aspect=dashboard_info_cls,
350
341
  )
351
342
 
352
343
  # removed status mcp
353
344
  removed_status_mcp = self.new_mcp(
354
- entity_type=Constant.DASHBOARD,
355
345
  entity_urn=dashboard_urn,
356
- aspect_name=Constant.STATUS,
357
346
  aspect=StatusClass(removed=False),
358
347
  )
359
348
 
@@ -365,9 +354,7 @@ class Mapper:
365
354
 
366
355
  # Dashboard key
367
356
  dashboard_key_mcp = self.new_mcp(
368
- entity_type=Constant.DASHBOARD,
369
357
  entity_urn=dashboard_urn,
370
- aspect_name=Constant.DASHBOARD_KEY,
371
358
  aspect=dashboard_key_cls,
372
359
  )
373
360
 
@@ -378,9 +365,7 @@ class Mapper:
378
365
  ownership = OwnershipClass(owners=owners)
379
366
  # Dashboard owner MCP
380
367
  owner_mcp = self.new_mcp(
381
- entity_type=Constant.DASHBOARD,
382
368
  entity_urn=dashboard_urn,
383
- aspect_name=Constant.OWNERSHIP,
384
369
  aspect=ownership,
385
370
  )
386
371
 
@@ -396,9 +381,7 @@ class Mapper:
396
381
  ]
397
382
  )
398
383
  browse_path_mcp = self.new_mcp(
399
- entity_type=Constant.DASHBOARD,
400
384
  entity_urn=dashboard_urn,
401
- aspect_name=Constant.BROWSERPATH,
402
385
  aspect=browse_path,
403
386
  )
404
387
 
@@ -429,27 +412,21 @@ class Mapper:
429
412
  )
430
413
 
431
414
  info_mcp = self.new_mcp(
432
- entity_type=Constant.CORP_USER,
433
415
  entity_urn=user_urn,
434
- aspect_name=Constant.CORP_USER_INFO,
435
416
  aspect=user_info_instance,
436
417
  )
437
418
  user_mcps.append(info_mcp)
438
419
 
439
420
  # removed status mcp
440
421
  status_mcp = self.new_mcp(
441
- entity_type=Constant.CORP_USER,
442
422
  entity_urn=user_urn,
443
- aspect_name=Constant.STATUS,
444
423
  aspect=StatusClass(removed=False),
445
424
  )
446
425
  user_mcps.append(status_mcp)
447
426
  user_key = CorpUserKeyClass(username=user.username)
448
427
 
449
428
  user_key_mcp = self.new_mcp(
450
- entity_type=Constant.CORP_USER,
451
429
  entity_urn=user_urn,
452
- aspect_name=Constant.CORP_USER_KEY,
453
430
  aspect=user_key,
454
431
  )
455
432
  user_mcps.append(user_key_mcp)
@@ -27,10 +27,8 @@ class CatalogItem(BaseModel):
27
27
  is_favorite: bool = Field(alias="IsFavorite")
28
28
  user_info: Any = Field(None, alias="UserInfo")
29
29
  display_name: Optional[str] = Field(None, alias="DisplayName")
30
- has_data_sources: bool = Field(default=False, alias="HasDataSources")
31
- data_sources: Optional[List["DataSource"]] = Field(
32
- default_factory=list, alias="DataSources"
33
- )
30
+ has_data_sources: bool = Field(False, alias="HasDataSources")
31
+ data_sources: Optional[List["DataSource"]] = Field(None, alias="DataSources")
34
32
 
35
33
  @validator("display_name", always=True)
36
34
  def validate_diplay_name(cls, value, values):
@@ -2,7 +2,7 @@ import logging
2
2
  from typing import Dict, Optional
3
3
 
4
4
  import requests
5
- from pydantic.class_validators import root_validator, validator
5
+ from pydantic import root_validator, validator
6
6
  from pydantic.fields import Field
7
7
 
8
8
  from datahub.emitter.mce_builder import DEFAULT_ENV
@@ -69,9 +69,9 @@ class PresetConfig(SupersetConfig):
69
69
 
70
70
  @platform_name("Preset")
71
71
  @config_class(PresetConfig)
72
- @support_status(SupportStatus.TESTING)
72
+ @support_status(SupportStatus.CERTIFIED)
73
73
  @capability(
74
- SourceCapability.DELETION_DETECTION, "Optionally enabled via stateful_ingestion"
74
+ SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
75
75
  )
76
76
  class PresetSource(SupersetSource):
77
77
  """
@@ -1,8 +1,9 @@
1
+ from copy import deepcopy
1
2
  from datetime import datetime
2
3
  from enum import Enum
3
4
  from typing import Dict, List, Optional, Type, Union
4
5
 
5
- from pydantic import BaseModel, Field, root_validator
6
+ from pydantic import BaseModel, ConfigDict, Field, root_validator
6
7
 
7
8
  from datahub.emitter.mcp_builder import ContainerKey
8
9
  from datahub.ingestion.source.qlik_sense.config import QLIK_DATETIME_FORMAT, Constant
@@ -78,7 +79,11 @@ PERSONAL_SPACE_DICT = {
78
79
  }
79
80
 
80
81
 
81
- class Space(BaseModel):
82
+ class _QlikBaseModel(BaseModel):
83
+ model_config = ConfigDict(coerce_numbers_to_str=True)
84
+
85
+
86
+ class Space(_QlikBaseModel):
82
87
  id: str
83
88
  name: str
84
89
  description: str
@@ -89,6 +94,9 @@ class Space(BaseModel):
89
94
 
90
95
  @root_validator(pre=True)
91
96
  def update_values(cls, values: Dict) -> Dict:
97
+ # Create a copy to avoid modifying the input dictionary, preventing state contamination in tests
98
+ values = deepcopy(values)
99
+
92
100
  values[Constant.CREATEDAT] = datetime.strptime(
93
101
  values[Constant.CREATEDAT], QLIK_DATETIME_FORMAT
94
102
  )
@@ -98,7 +106,7 @@ class Space(BaseModel):
98
106
  return values
99
107
 
100
108
 
101
- class Item(BaseModel):
109
+ class Item(_QlikBaseModel):
102
110
  id: str
103
111
  description: str = ""
104
112
  ownerId: str
@@ -107,7 +115,7 @@ class Item(BaseModel):
107
115
  updatedAt: datetime
108
116
 
109
117
 
110
- class SchemaField(BaseModel):
118
+ class SchemaField(_QlikBaseModel):
111
119
  name: str
112
120
  dataType: Optional[str] = None
113
121
  primaryKey: Optional[bool] = None
@@ -115,6 +123,8 @@ class SchemaField(BaseModel):
115
123
 
116
124
  @root_validator(pre=True)
117
125
  def update_values(cls, values: Dict) -> Dict:
126
+ # Create a copy to avoid modifying the input dictionary, preventing state contamination in tests
127
+ values = deepcopy(values)
118
128
  values[Constant.DATATYPE] = values.get(Constant.DATATYPE, {}).get(Constant.TYPE)
119
129
  return values
120
130
 
@@ -130,6 +140,8 @@ class QlikDataset(Item):
130
140
 
131
141
  @root_validator(pre=True)
132
142
  def update_values(cls, values: Dict) -> Dict:
143
+ # Create a copy to avoid modifying the input dictionary, preventing state contamination in tests
144
+ values = deepcopy(values)
133
145
  # Update str time to datetime
134
146
  values[Constant.CREATEDAT] = datetime.strptime(
135
147
  values[Constant.CREATEDTIME], QLIK_DATETIME_FORMAT
@@ -148,13 +160,13 @@ class QlikDataset(Item):
148
160
  return values
149
161
 
150
162
 
151
- class AxisProperty(BaseModel):
163
+ class AxisProperty(_QlikBaseModel):
152
164
  Title: str = Field(alias="qFallbackTitle")
153
165
  Min: str = Field(alias="qMin")
154
166
  Max: str = Field(alias="qMax")
155
167
 
156
168
 
157
- class Chart(BaseModel):
169
+ class Chart(_QlikBaseModel):
158
170
  qId: str
159
171
  visualization: str
160
172
  title: str
@@ -164,13 +176,15 @@ class Chart(BaseModel):
164
176
 
165
177
  @root_validator(pre=True)
166
178
  def update_values(cls, values: Dict) -> Dict:
179
+ # Create a copy to avoid modifying the input dictionary, preventing state contamination in tests
180
+ values = deepcopy(values)
167
181
  values[Constant.QID] = values[Constant.QINFO][Constant.QID]
168
182
  values["qDimension"] = values[Constant.HYPERCUBE]["qDimensionInfo"]
169
183
  values["qMeasure"] = values[Constant.HYPERCUBE]["qMeasureInfo"]
170
184
  return values
171
185
 
172
186
 
173
- class Sheet(BaseModel):
187
+ class Sheet(_QlikBaseModel):
174
188
  id: str
175
189
  title: str
176
190
  description: str
@@ -181,6 +195,8 @@ class Sheet(BaseModel):
181
195
 
182
196
  @root_validator(pre=True)
183
197
  def update_values(cls, values: Dict) -> Dict:
198
+ # Create a copy to avoid modifying the input dictionary, preventing state contamination in tests
199
+ values = deepcopy(values)
184
200
  values[Constant.CREATEDAT] = datetime.strptime(
185
201
  values[Constant.CREATEDDATE], QLIK_DATETIME_FORMAT
186
202
  )
@@ -190,7 +206,7 @@ class Sheet(BaseModel):
190
206
  return values
191
207
 
192
208
 
193
- class QlikTable(BaseModel):
209
+ class QlikTable(_QlikBaseModel):
194
210
  tableName: str
195
211
  type: BoxType = Field(alias="boxType")
196
212
  tableAlias: str
@@ -206,6 +222,8 @@ class QlikTable(BaseModel):
206
222
 
207
223
  @root_validator(pre=True)
208
224
  def update_values(cls, values: Dict) -> Dict:
225
+ # Create a copy to avoid modifying the input dictionary, preventing state contamination in tests
226
+ values = deepcopy(values)
209
227
  values[Constant.DATACONNECTORID] = values[Constant.CONNECTIONINFO][Constant.ID]
210
228
  values[Constant.DATACONNECTORPLATFORM] = values[Constant.CONNECTIONINFO][
211
229
  Constant.SOURCECONNECTORID
@@ -223,6 +241,8 @@ class App(Item):
223
241
 
224
242
  @root_validator(pre=True)
225
243
  def update_values(cls, values: Dict) -> Dict:
244
+ # Create a copy to avoid modifying the input dictionary, preventing state contamination in tests
245
+ values = deepcopy(values)
226
246
  values[Constant.CREATEDAT] = datetime.strptime(
227
247
  values[Constant.CREATEDDATE], QLIK_DATETIME_FORMAT
228
248
  )
@@ -101,7 +101,7 @@ logger = logging.getLogger(__name__)
101
101
  )
102
102
  @capability(
103
103
  SourceCapability.LINEAGE_FINE,
104
- "Disabled by default. ",
104
+ "Disabled by default.",
105
105
  )
106
106
  @capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
107
107
  @capability(
@@ -109,6 +109,7 @@ logger = logging.getLogger(__name__)
109
109
  "Enabled by default, configured using `ingest_owner`",
110
110
  )
111
111
  @capability(SourceCapability.SCHEMA_METADATA, "Enabled by default")
112
+ @capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
112
113
  class QlikSenseSource(StatefulIngestionSourceBase, TestableSource):
113
114
  """
114
115
  This plugin extracts the following:
@@ -447,7 +447,7 @@ class RedashSource(StatefulIngestionSourceBase):
447
447
  dataset_urns = sql_parser_in_tables.in_tables
448
448
  if sql_parser_in_tables.debug_info.table_error:
449
449
  self.report.queries_problem_parsing.add(str(query_id))
450
- self.error(
450
+ self.warn(
451
451
  logger,
452
452
  "sql-parsing",
453
453
  f"exception {sql_parser_in_tables.debug_info.table_error} in parsing query-{query_id}-datasource-{data_source_id}",
@@ -1,4 +1,5 @@
1
1
  import logging
2
+ from copy import deepcopy
2
3
  from enum import Enum
3
4
  from typing import Any, Dict, List, Optional
4
5
 
@@ -6,9 +7,10 @@ from pydantic import root_validator
6
7
  from pydantic.fields import Field
7
8
 
8
9
  from datahub.configuration import ConfigModel
9
- from datahub.configuration.common import AllowDenyPattern
10
+ from datahub.configuration.common import AllowDenyPattern, HiddenFromDocs
10
11
  from datahub.configuration.source_common import DatasetLineageProviderConfigBase
11
12
  from datahub.configuration.validate_field_removal import pydantic_removed_field
13
+ from datahub.configuration.validate_field_rename import pydantic_renamed_field
12
14
  from datahub.ingestion.api.incremental_lineage_helper import (
13
15
  IncrementalLineageConfigMixin,
14
16
  )
@@ -94,13 +96,18 @@ class RedshiftConfig(
94
96
  # Because of this behavior, it uses dramatically fewer round trips for
95
97
  # large Redshift warehouses. As an example, see this query for the columns:
96
98
  # https://github.com/sqlalchemy-redshift/sqlalchemy-redshift/blob/60b4db04c1d26071c291aeea52f1dcb5dd8b0eb0/sqlalchemy_redshift/dialect.py#L745.
97
- scheme: str = Field(
99
+ scheme: HiddenFromDocs[str] = Field(
98
100
  default="redshift+redshift_connector",
99
101
  description="",
100
- hidden_from_docs=True,
101
102
  )
102
103
 
103
104
  _database_alias_removed = pydantic_removed_field("database_alias")
105
+ _use_lineage_v2_removed = pydantic_removed_field("use_lineage_v2")
106
+ _rename_lineage_v2_generate_queries_to_lineage_generate_queries = (
107
+ pydantic_renamed_field(
108
+ "lineage_v2_generate_queries", "lineage_generate_queries"
109
+ )
110
+ )
104
111
 
105
112
  default_schema: str = Field(
106
113
  default="public",
@@ -112,13 +119,9 @@ class RedshiftConfig(
112
119
  description="Whether target Redshift instance is serverless (alternative is provisioned cluster)",
113
120
  )
114
121
 
115
- use_lineage_v2: bool = Field(
116
- default=True,
117
- description="Whether to use the new SQL-based lineage collector.",
118
- )
119
- lineage_v2_generate_queries: bool = Field(
122
+ lineage_generate_queries: bool = Field(
120
123
  default=True,
121
- description="Whether to generate queries entities for the new SQL-based lineage collector.",
124
+ description="Whether to generate queries entities for the SQL-based lineage collector.",
122
125
  )
123
126
 
124
127
  include_table_lineage: bool = Field(
@@ -213,6 +216,9 @@ class RedshiftConfig(
213
216
 
214
217
  @root_validator(skip_on_failure=True)
215
218
  def connection_config_compatibility_set(cls, values: Dict) -> Dict:
219
+ # Create a copy to avoid modifying the input dictionary, preventing state contamination in tests
220
+ values = deepcopy(values)
221
+
216
222
  if (
217
223
  ("options" in values and "connect_args" in values["options"])
218
224
  and "extra_client_options" in values
@@ -26,7 +26,7 @@ from datahub.utilities.search_utils import LogicalOperator
26
26
 
27
27
  class OutboundSharePlatformResource(BaseModel):
28
28
  namespace: str
29
- platform_instance: Optional[str]
29
+ platform_instance: Optional[str] = None
30
30
  env: str
31
31
  source_database: str
32
32
  share_name: str