acryl-datahub 1.1.1rc4__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (414) hide show
  1. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2615 -2547
  2. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +412 -338
  3. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +5 -0
  4. datahub/_version.py +1 -1
  5. datahub/api/entities/assertion/assertion.py +1 -1
  6. datahub/api/entities/common/serialized_value.py +1 -1
  7. datahub/api/entities/corpgroup/corpgroup.py +1 -1
  8. datahub/api/entities/dataproduct/dataproduct.py +32 -3
  9. datahub/api/entities/dataset/dataset.py +26 -23
  10. datahub/api/entities/external/__init__.py +0 -0
  11. datahub/api/entities/external/external_entities.py +724 -0
  12. datahub/api/entities/external/external_tag.py +147 -0
  13. datahub/api/entities/external/lake_formation_external_entites.py +162 -0
  14. datahub/api/entities/external/restricted_text.py +172 -0
  15. datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
  16. datahub/api/entities/forms/forms.py +3 -3
  17. datahub/api/entities/structuredproperties/structuredproperties.py +4 -4
  18. datahub/api/graphql/operation.py +10 -6
  19. datahub/cli/check_cli.py +88 -7
  20. datahub/cli/cli_utils.py +63 -0
  21. datahub/cli/config_utils.py +18 -10
  22. datahub/cli/container_cli.py +5 -0
  23. datahub/cli/delete_cli.py +125 -27
  24. datahub/cli/docker_check.py +110 -14
  25. datahub/cli/docker_cli.py +153 -229
  26. datahub/cli/exists_cli.py +0 -2
  27. datahub/cli/get_cli.py +0 -2
  28. datahub/cli/graphql_cli.py +1422 -0
  29. datahub/cli/iceberg_cli.py +5 -0
  30. datahub/cli/ingest_cli.py +3 -15
  31. datahub/cli/migrate.py +2 -0
  32. datahub/cli/put_cli.py +1 -4
  33. datahub/cli/quickstart_versioning.py +53 -10
  34. datahub/cli/specific/assertions_cli.py +37 -6
  35. datahub/cli/specific/datacontract_cli.py +54 -7
  36. datahub/cli/specific/dataproduct_cli.py +2 -15
  37. datahub/cli/specific/dataset_cli.py +1 -8
  38. datahub/cli/specific/forms_cli.py +0 -4
  39. datahub/cli/specific/group_cli.py +0 -2
  40. datahub/cli/specific/structuredproperties_cli.py +1 -4
  41. datahub/cli/specific/user_cli.py +172 -3
  42. datahub/cli/state_cli.py +0 -2
  43. datahub/cli/timeline_cli.py +0 -2
  44. datahub/configuration/common.py +40 -1
  45. datahub/configuration/connection_resolver.py +5 -2
  46. datahub/configuration/env_vars.py +331 -0
  47. datahub/configuration/import_resolver.py +7 -4
  48. datahub/configuration/kafka.py +21 -1
  49. datahub/configuration/pydantic_migration_helpers.py +6 -13
  50. datahub/configuration/source_common.py +3 -2
  51. datahub/configuration/validate_field_deprecation.py +5 -2
  52. datahub/configuration/validate_field_removal.py +8 -2
  53. datahub/configuration/validate_field_rename.py +6 -5
  54. datahub/configuration/validate_multiline_string.py +5 -2
  55. datahub/emitter/mce_builder.py +8 -4
  56. datahub/emitter/rest_emitter.py +103 -30
  57. datahub/entrypoints.py +6 -3
  58. datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +297 -1
  59. datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
  60. datahub/ingestion/api/decorators.py +15 -3
  61. datahub/ingestion/api/report.py +381 -3
  62. datahub/ingestion/api/sink.py +27 -2
  63. datahub/ingestion/api/source.py +165 -58
  64. datahub/ingestion/api/source_protocols.py +23 -0
  65. datahub/ingestion/autogenerated/__init__.py +0 -0
  66. datahub/ingestion/autogenerated/capability_summary.json +3652 -0
  67. datahub/ingestion/autogenerated/lineage.json +402 -0
  68. datahub/ingestion/autogenerated/lineage_helper.py +177 -0
  69. datahub/ingestion/extractor/schema_util.py +13 -4
  70. datahub/ingestion/glossary/classification_mixin.py +5 -0
  71. datahub/ingestion/graph/client.py +330 -25
  72. datahub/ingestion/graph/config.py +3 -2
  73. datahub/ingestion/graph/filters.py +30 -11
  74. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
  75. datahub/ingestion/run/pipeline.py +81 -11
  76. datahub/ingestion/run/pipeline_config.py +2 -2
  77. datahub/ingestion/sink/datahub_kafka.py +1 -0
  78. datahub/ingestion/sink/datahub_rest.py +13 -5
  79. datahub/ingestion/sink/file.py +1 -0
  80. datahub/ingestion/source/abs/config.py +1 -1
  81. datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
  82. datahub/ingestion/source/abs/source.py +15 -30
  83. datahub/ingestion/source/aws/aws_common.py +185 -13
  84. datahub/ingestion/source/aws/glue.py +517 -244
  85. datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
  86. datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
  87. datahub/ingestion/source/aws/tag_entities.py +270 -0
  88. datahub/ingestion/source/azure/azure_common.py +3 -3
  89. datahub/ingestion/source/bigquery_v2/bigquery.py +67 -24
  90. datahub/ingestion/source/bigquery_v2/bigquery_config.py +47 -19
  91. datahub/ingestion/source/bigquery_v2/bigquery_connection.py +12 -1
  92. datahub/ingestion/source/bigquery_v2/bigquery_queries.py +3 -0
  93. datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
  94. datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
  95. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
  96. datahub/ingestion/source/bigquery_v2/common.py +1 -1
  97. datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
  98. datahub/ingestion/source/bigquery_v2/queries.py +3 -3
  99. datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
  100. datahub/ingestion/source/cassandra/cassandra.py +6 -8
  101. datahub/ingestion/source/cassandra/cassandra_api.py +17 -1
  102. datahub/ingestion/source/cassandra/cassandra_config.py +5 -0
  103. datahub/ingestion/source/cassandra/cassandra_profiling.py +7 -6
  104. datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
  105. datahub/ingestion/source/common/gcp_credentials_config.py +3 -1
  106. datahub/ingestion/source/common/subtypes.py +53 -0
  107. datahub/ingestion/source/data_lake_common/data_lake_utils.py +37 -0
  108. datahub/ingestion/source/data_lake_common/object_store.py +115 -27
  109. datahub/ingestion/source/data_lake_common/path_spec.py +72 -43
  110. datahub/ingestion/source/datahub/config.py +12 -9
  111. datahub/ingestion/source/datahub/datahub_database_reader.py +26 -11
  112. datahub/ingestion/source/datahub/datahub_source.py +10 -0
  113. datahub/ingestion/source/dbt/dbt_cloud.py +16 -5
  114. datahub/ingestion/source/dbt/dbt_common.py +224 -9
  115. datahub/ingestion/source/dbt/dbt_core.py +3 -0
  116. datahub/ingestion/source/debug/__init__.py +0 -0
  117. datahub/ingestion/source/debug/datahub_debug.py +300 -0
  118. datahub/ingestion/source/delta_lake/config.py +9 -5
  119. datahub/ingestion/source/delta_lake/source.py +8 -0
  120. datahub/ingestion/source/dremio/dremio_api.py +114 -73
  121. datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
  122. datahub/ingestion/source/dremio/dremio_config.py +5 -4
  123. datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
  124. datahub/ingestion/source/dremio/dremio_source.py +132 -98
  125. datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
  126. datahub/ingestion/source/dynamodb/dynamodb.py +11 -8
  127. datahub/ingestion/source/excel/__init__.py +0 -0
  128. datahub/ingestion/source/excel/config.py +92 -0
  129. datahub/ingestion/source/excel/excel_file.py +539 -0
  130. datahub/ingestion/source/excel/profiling.py +308 -0
  131. datahub/ingestion/source/excel/report.py +49 -0
  132. datahub/ingestion/source/excel/source.py +662 -0
  133. datahub/ingestion/source/excel/util.py +18 -0
  134. datahub/ingestion/source/feast.py +8 -10
  135. datahub/ingestion/source/file.py +3 -0
  136. datahub/ingestion/source/fivetran/config.py +66 -7
  137. datahub/ingestion/source/fivetran/fivetran.py +227 -43
  138. datahub/ingestion/source/fivetran/fivetran_log_api.py +37 -8
  139. datahub/ingestion/source/fivetran/fivetran_query.py +51 -29
  140. datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
  141. datahub/ingestion/source/fivetran/response_models.py +97 -0
  142. datahub/ingestion/source/gc/datahub_gc.py +0 -2
  143. datahub/ingestion/source/gcs/gcs_source.py +32 -4
  144. datahub/ingestion/source/ge_data_profiler.py +108 -31
  145. datahub/ingestion/source/ge_profiling_config.py +26 -11
  146. datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
  147. datahub/ingestion/source/grafana/field_utils.py +307 -0
  148. datahub/ingestion/source/grafana/grafana_api.py +142 -0
  149. datahub/ingestion/source/grafana/grafana_config.py +104 -0
  150. datahub/ingestion/source/grafana/grafana_source.py +522 -84
  151. datahub/ingestion/source/grafana/lineage.py +202 -0
  152. datahub/ingestion/source/grafana/models.py +137 -0
  153. datahub/ingestion/source/grafana/report.py +90 -0
  154. datahub/ingestion/source/grafana/types.py +16 -0
  155. datahub/ingestion/source/hex/api.py +28 -1
  156. datahub/ingestion/source/hex/hex.py +16 -5
  157. datahub/ingestion/source/hex/mapper.py +16 -2
  158. datahub/ingestion/source/hex/model.py +2 -0
  159. datahub/ingestion/source/hex/query_fetcher.py +1 -1
  160. datahub/ingestion/source/iceberg/iceberg.py +123 -59
  161. datahub/ingestion/source/iceberg/iceberg_profiler.py +4 -2
  162. datahub/ingestion/source/identity/azure_ad.py +1 -1
  163. datahub/ingestion/source/identity/okta.py +1 -14
  164. datahub/ingestion/source/kafka/kafka.py +16 -0
  165. datahub/ingestion/source/kafka_connect/common.py +2 -2
  166. datahub/ingestion/source/kafka_connect/sink_connectors.py +156 -47
  167. datahub/ingestion/source/kafka_connect/source_connectors.py +62 -4
  168. datahub/ingestion/source/looker/looker_common.py +148 -79
  169. datahub/ingestion/source/looker/looker_config.py +15 -4
  170. datahub/ingestion/source/looker/looker_constant.py +4 -0
  171. datahub/ingestion/source/looker/looker_lib_wrapper.py +36 -3
  172. datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
  173. datahub/ingestion/source/looker/looker_source.py +503 -547
  174. datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
  175. datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
  176. datahub/ingestion/source/looker/lookml_config.py +31 -3
  177. datahub/ingestion/source/looker/lookml_refinement.py +1 -1
  178. datahub/ingestion/source/looker/lookml_source.py +96 -117
  179. datahub/ingestion/source/looker/view_upstream.py +494 -1
  180. datahub/ingestion/source/metabase.py +32 -6
  181. datahub/ingestion/source/metadata/business_glossary.py +7 -7
  182. datahub/ingestion/source/metadata/lineage.py +9 -9
  183. datahub/ingestion/source/mlflow.py +12 -2
  184. datahub/ingestion/source/mock_data/__init__.py +0 -0
  185. datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
  186. datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
  187. datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
  188. datahub/ingestion/source/mode.py +26 -5
  189. datahub/ingestion/source/mongodb.py +11 -1
  190. datahub/ingestion/source/neo4j/neo4j_source.py +83 -144
  191. datahub/ingestion/source/nifi.py +2 -2
  192. datahub/ingestion/source/openapi.py +1 -1
  193. datahub/ingestion/source/powerbi/config.py +47 -21
  194. datahub/ingestion/source/powerbi/m_query/data_classes.py +1 -0
  195. datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
  196. datahub/ingestion/source/powerbi/m_query/pattern_handler.py +100 -10
  197. datahub/ingestion/source/powerbi/powerbi.py +10 -6
  198. datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +0 -1
  199. datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
  200. datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
  201. datahub/ingestion/source/preset.py +3 -3
  202. datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
  203. datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
  204. datahub/ingestion/source/redash.py +1 -1
  205. datahub/ingestion/source/redshift/config.py +15 -9
  206. datahub/ingestion/source/redshift/datashares.py +1 -1
  207. datahub/ingestion/source/redshift/lineage.py +386 -687
  208. datahub/ingestion/source/redshift/query.py +23 -19
  209. datahub/ingestion/source/redshift/redshift.py +52 -111
  210. datahub/ingestion/source/redshift/redshift_schema.py +17 -12
  211. datahub/ingestion/source/redshift/report.py +0 -2
  212. datahub/ingestion/source/redshift/usage.py +6 -5
  213. datahub/ingestion/source/s3/report.py +4 -2
  214. datahub/ingestion/source/s3/source.py +449 -248
  215. datahub/ingestion/source/sac/sac.py +3 -1
  216. datahub/ingestion/source/salesforce.py +28 -13
  217. datahub/ingestion/source/schema/json_schema.py +14 -14
  218. datahub/ingestion/source/schema_inference/object.py +22 -6
  219. datahub/ingestion/source/sigma/data_classes.py +3 -0
  220. datahub/ingestion/source/sigma/sigma.py +7 -1
  221. datahub/ingestion/source/slack/slack.py +10 -16
  222. datahub/ingestion/source/snaplogic/__init__.py +0 -0
  223. datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
  224. datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
  225. datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
  226. datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
  227. datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
  228. datahub/ingestion/source/snowflake/constants.py +3 -0
  229. datahub/ingestion/source/snowflake/snowflake_config.py +76 -23
  230. datahub/ingestion/source/snowflake/snowflake_connection.py +24 -8
  231. datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +19 -6
  232. datahub/ingestion/source/snowflake/snowflake_queries.py +464 -97
  233. datahub/ingestion/source/snowflake/snowflake_query.py +77 -5
  234. datahub/ingestion/source/snowflake/snowflake_report.py +1 -2
  235. datahub/ingestion/source/snowflake/snowflake_schema.py +352 -16
  236. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +51 -10
  237. datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
  238. datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
  239. datahub/ingestion/source/snowflake/snowflake_utils.py +36 -15
  240. datahub/ingestion/source/snowflake/snowflake_v2.py +39 -4
  241. datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
  242. datahub/ingestion/source/sql/athena.py +217 -25
  243. datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
  244. datahub/ingestion/source/sql/clickhouse.py +24 -8
  245. datahub/ingestion/source/sql/cockroachdb.py +5 -4
  246. datahub/ingestion/source/sql/druid.py +2 -2
  247. datahub/ingestion/source/sql/hana.py +3 -1
  248. datahub/ingestion/source/sql/hive.py +4 -3
  249. datahub/ingestion/source/sql/hive_metastore.py +19 -20
  250. datahub/ingestion/source/sql/mariadb.py +0 -1
  251. datahub/ingestion/source/sql/mssql/job_models.py +3 -1
  252. datahub/ingestion/source/sql/mssql/source.py +336 -57
  253. datahub/ingestion/source/sql/mysql.py +154 -4
  254. datahub/ingestion/source/sql/oracle.py +5 -5
  255. datahub/ingestion/source/sql/postgres.py +142 -6
  256. datahub/ingestion/source/sql/presto.py +2 -1
  257. datahub/ingestion/source/sql/sql_common.py +281 -49
  258. datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
  259. datahub/ingestion/source/sql/sql_types.py +22 -0
  260. datahub/ingestion/source/sql/sqlalchemy_uri.py +39 -7
  261. datahub/ingestion/source/sql/teradata.py +1028 -245
  262. datahub/ingestion/source/sql/trino.py +11 -1
  263. datahub/ingestion/source/sql/two_tier_sql_source.py +2 -3
  264. datahub/ingestion/source/sql/vertica.py +14 -7
  265. datahub/ingestion/source/sql_queries.py +219 -121
  266. datahub/ingestion/source/state/checkpoint.py +8 -29
  267. datahub/ingestion/source/state/entity_removal_state.py +5 -2
  268. datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
  269. datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
  270. datahub/ingestion/source/superset.py +314 -67
  271. datahub/ingestion/source/tableau/tableau.py +135 -59
  272. datahub/ingestion/source/tableau/tableau_common.py +9 -2
  273. datahub/ingestion/source/tableau/tableau_constant.py +1 -4
  274. datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
  275. datahub/ingestion/source/unity/config.py +160 -40
  276. datahub/ingestion/source/unity/connection.py +61 -0
  277. datahub/ingestion/source/unity/connection_test.py +1 -0
  278. datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
  279. datahub/ingestion/source/unity/proxy.py +794 -51
  280. datahub/ingestion/source/unity/proxy_patch.py +321 -0
  281. datahub/ingestion/source/unity/proxy_types.py +36 -2
  282. datahub/ingestion/source/unity/report.py +15 -3
  283. datahub/ingestion/source/unity/source.py +465 -131
  284. datahub/ingestion/source/unity/tag_entities.py +197 -0
  285. datahub/ingestion/source/unity/usage.py +46 -4
  286. datahub/ingestion/source/usage/clickhouse_usage.py +4 -1
  287. datahub/ingestion/source/usage/starburst_trino_usage.py +5 -2
  288. datahub/ingestion/source/usage/usage_common.py +4 -3
  289. datahub/ingestion/source/vertexai/vertexai.py +1 -1
  290. datahub/ingestion/source_config/pulsar.py +3 -1
  291. datahub/ingestion/source_report/ingestion_stage.py +50 -11
  292. datahub/ingestion/transformer/add_dataset_ownership.py +18 -2
  293. datahub/ingestion/transformer/base_transformer.py +8 -5
  294. datahub/ingestion/transformer/set_browse_path.py +112 -0
  295. datahub/integrations/assertion/snowflake/compiler.py +4 -3
  296. datahub/metadata/_internal_schema_classes.py +6806 -4871
  297. datahub/metadata/_urns/urn_defs.py +1767 -1539
  298. datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
  299. datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
  300. datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
  301. datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
  302. datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
  303. datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +6 -0
  304. datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
  305. datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
  306. datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
  307. datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
  308. datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
  309. datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
  310. datahub/metadata/schema.avsc +18395 -16979
  311. datahub/metadata/schemas/Actors.avsc +38 -1
  312. datahub/metadata/schemas/ApplicationKey.avsc +31 -0
  313. datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
  314. datahub/metadata/schemas/Applications.avsc +38 -0
  315. datahub/metadata/schemas/AssetSettings.avsc +63 -0
  316. datahub/metadata/schemas/ChartInfo.avsc +2 -1
  317. datahub/metadata/schemas/ChartKey.avsc +1 -0
  318. datahub/metadata/schemas/ContainerKey.avsc +1 -0
  319. datahub/metadata/schemas/ContainerProperties.avsc +8 -0
  320. datahub/metadata/schemas/CorpUserEditableInfo.avsc +1 -1
  321. datahub/metadata/schemas/CorpUserSettings.avsc +50 -0
  322. datahub/metadata/schemas/DashboardKey.avsc +1 -0
  323. datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
  324. datahub/metadata/schemas/DataFlowKey.avsc +1 -0
  325. datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
  326. datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
  327. datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
  328. datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
  329. datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
  330. datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
  331. datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
  332. datahub/metadata/schemas/DataJobInfo.avsc +8 -0
  333. datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
  334. datahub/metadata/schemas/DataJobKey.avsc +1 -0
  335. datahub/metadata/schemas/DataProcessKey.avsc +8 -0
  336. datahub/metadata/schemas/DataProductKey.avsc +3 -1
  337. datahub/metadata/schemas/DataProductProperties.avsc +1 -1
  338. datahub/metadata/schemas/DatasetKey.avsc +11 -1
  339. datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
  340. datahub/metadata/schemas/DomainKey.avsc +2 -1
  341. datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
  342. datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
  343. datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
  344. datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
  345. datahub/metadata/schemas/IncidentInfo.avsc +3 -3
  346. datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
  347. datahub/metadata/schemas/LogicalParent.avsc +145 -0
  348. datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
  349. datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
  350. datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
  351. datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
  352. datahub/metadata/schemas/MLModelKey.avsc +9 -0
  353. datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
  354. datahub/metadata/schemas/MetadataChangeEvent.avsc +151 -47
  355. datahub/metadata/schemas/MetadataChangeLog.avsc +62 -44
  356. datahub/metadata/schemas/MetadataChangeProposal.avsc +61 -0
  357. datahub/metadata/schemas/NotebookKey.avsc +1 -0
  358. datahub/metadata/schemas/Operation.avsc +4 -2
  359. datahub/metadata/schemas/Ownership.avsc +69 -0
  360. datahub/metadata/schemas/QuerySubjects.avsc +1 -12
  361. datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
  362. datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
  363. datahub/metadata/schemas/StructuredProperties.avsc +69 -0
  364. datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
  365. datahub/metadata/schemas/SystemMetadata.avsc +61 -0
  366. datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
  367. datahub/sdk/__init__.py +2 -0
  368. datahub/sdk/_all_entities.py +7 -0
  369. datahub/sdk/_shared.py +249 -5
  370. datahub/sdk/chart.py +386 -0
  371. datahub/sdk/container.py +7 -0
  372. datahub/sdk/dashboard.py +453 -0
  373. datahub/sdk/dataflow.py +7 -0
  374. datahub/sdk/datajob.py +45 -13
  375. datahub/sdk/dataset.py +56 -2
  376. datahub/sdk/entity_client.py +111 -9
  377. datahub/sdk/lineage_client.py +663 -82
  378. datahub/sdk/main_client.py +50 -16
  379. datahub/sdk/mlmodel.py +120 -38
  380. datahub/sdk/mlmodelgroup.py +7 -0
  381. datahub/sdk/search_client.py +7 -3
  382. datahub/sdk/search_filters.py +304 -36
  383. datahub/secret/datahub_secret_store.py +3 -0
  384. datahub/secret/environment_secret_store.py +29 -0
  385. datahub/secret/file_secret_store.py +49 -0
  386. datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
  387. datahub/specific/aspect_helpers/siblings.py +73 -0
  388. datahub/specific/aspect_helpers/structured_properties.py +27 -0
  389. datahub/specific/chart.py +1 -1
  390. datahub/specific/datajob.py +15 -1
  391. datahub/specific/dataproduct.py +4 -0
  392. datahub/specific/dataset.py +39 -59
  393. datahub/sql_parsing/split_statements.py +13 -0
  394. datahub/sql_parsing/sql_parsing_aggregator.py +70 -26
  395. datahub/sql_parsing/sqlglot_lineage.py +196 -42
  396. datahub/sql_parsing/sqlglot_utils.py +12 -4
  397. datahub/sql_parsing/tool_meta_extractor.py +1 -3
  398. datahub/telemetry/telemetry.py +28 -14
  399. datahub/testing/sdk_v2_helpers.py +7 -1
  400. datahub/upgrade/upgrade.py +73 -17
  401. datahub/utilities/file_backed_collections.py +8 -9
  402. datahub/utilities/is_pytest.py +3 -2
  403. datahub/utilities/logging_manager.py +22 -6
  404. datahub/utilities/mapping.py +29 -2
  405. datahub/utilities/sample_data.py +5 -4
  406. datahub/utilities/server_config_util.py +10 -1
  407. datahub/utilities/sqlalchemy_query_combiner.py +5 -2
  408. datahub/utilities/stats_collections.py +4 -0
  409. datahub/utilities/urns/urn.py +41 -2
  410. datahub/emitter/sql_parsing_builder.py +0 -306
  411. datahub/ingestion/source/redshift/lineage_v2.py +0 -466
  412. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +0 -0
  413. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/licenses/LICENSE +0 -0
  414. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
@@ -3,6 +3,7 @@ import logging
3
3
  import re
4
4
  import time
5
5
  from collections import OrderedDict, defaultdict
6
+ from copy import deepcopy
6
7
  from dataclasses import dataclass, field as dataclass_field
7
8
  from datetime import datetime, timedelta, timezone
8
9
  from functools import lru_cache
@@ -80,6 +81,7 @@ from datahub.ingestion.api.workunit import MetadataWorkUnit
80
81
  from datahub.ingestion.source.common.subtypes import (
81
82
  BIContainerSubTypes,
82
83
  DatasetSubTypes,
84
+ SourceCapabilityModifier,
83
85
  )
84
86
  from datahub.ingestion.source.state.stale_entity_removal_handler import (
85
87
  StaleEntityRemovalHandler,
@@ -118,7 +120,6 @@ from datahub.ingestion.source.tableau.tableau_common import (
118
120
  )
119
121
  from datahub.ingestion.source.tableau.tableau_server_wrapper import UserInfo
120
122
  from datahub.ingestion.source.tableau.tableau_validation import check_user_role
121
- from datahub.ingestion.source_report.ingestion_stage import IngestionStageReport
122
123
  from datahub.metadata.com.linkedin.pegasus2avro.common import (
123
124
  AuditStamp,
124
125
  ChangeAuditStamps,
@@ -148,7 +149,6 @@ from datahub.metadata.com.linkedin.pegasus2avro.schema import (
148
149
  )
149
150
  from datahub.metadata.schema_classes import (
150
151
  BrowsePathsClass,
151
- ChangeTypeClass,
152
152
  ChartInfoClass,
153
153
  ChartUsageStatisticsClass,
154
154
  DashboardInfoClass,
@@ -474,6 +474,13 @@ class TableauPageSizeConfig(ConfigModel):
474
474
  return self.database_table_page_size or self.page_size
475
475
 
476
476
 
477
+ _IngestHiddenAssetsOptionsType = Literal["worksheet", "dashboard"]
478
+ _IngestHiddenAssetsOptions: List[_IngestHiddenAssetsOptionsType] = [
479
+ "worksheet",
480
+ "dashboard",
481
+ ]
482
+
483
+
477
484
  class TableauConfig(
478
485
  DatasetLineageProviderConfigBase,
479
486
  StatefulIngestionConfigBase,
@@ -524,10 +531,22 @@ class TableauConfig(
524
531
  default=False,
525
532
  description="Ingest Owner from source. This will override Owner info entered from UI",
526
533
  )
534
+ use_email_as_username: bool = Field(
535
+ default=False,
536
+ description="Use email address instead of username for entity owners. Requires ingest_owner to be True.",
537
+ )
527
538
  ingest_tables_external: bool = Field(
528
539
  default=False,
529
540
  description="Ingest details for tables external to (not embedded in) tableau as entities.",
530
541
  )
542
+ emit_all_published_datasources: bool = Field(
543
+ default=False,
544
+ description="Ingest all published data sources. When False (default), only ingest published data sources that belong to an ingested workbook.",
545
+ )
546
+ emit_all_embedded_datasources: bool = Field(
547
+ default=False,
548
+ description="Ingest all embedded data sources. When False (default), only ingest embedded data sources that belong to an ingested workbook.",
549
+ )
531
550
 
532
551
  env: str = Field(
533
552
  default=builder.DEFAULT_ENV,
@@ -574,13 +593,13 @@ class TableauConfig(
574
593
  )
575
594
 
576
595
  extract_lineage_from_unsupported_custom_sql_queries: bool = Field(
577
- default=False,
578
- description="[Experimental] Whether to extract lineage from unsupported custom sql queries using SQL parsing",
596
+ default=True,
597
+ description="[Experimental] Extract lineage from Custom SQL queries using DataHub's SQL parser in cases where the Tableau Catalog API fails to return lineage for the query.",
579
598
  )
580
599
 
581
600
  force_extraction_of_lineage_from_custom_sql_queries: bool = Field(
582
601
  default=False,
583
- description="[Experimental] Force extraction of lineage from custom sql queries using SQL parsing, ignoring Tableau metadata",
602
+ description="[Experimental] Force extraction of lineage from Custom SQL queries using DataHub's SQL parser, even when the Tableau Catalog API returns lineage already.",
584
603
  )
585
604
 
586
605
  sql_parsing_disable_schema_awareness: bool = Field(
@@ -613,8 +632,8 @@ class TableauConfig(
613
632
  description="Configuration settings for ingesting Tableau groups and their capabilities as custom properties.",
614
633
  )
615
634
 
616
- ingest_hidden_assets: Union[List[Literal["worksheet", "dashboard"]], bool] = Field(
617
- default=["worksheet", "dashboard"],
635
+ ingest_hidden_assets: Union[List[_IngestHiddenAssetsOptionsType], bool] = Field(
636
+ _IngestHiddenAssetsOptions,
618
637
  description=(
619
638
  "When enabled, hidden worksheets and dashboards are ingested into Datahub."
620
639
  " If a dashboard or worksheet is hidden in Tableau the luid is blank."
@@ -636,6 +655,11 @@ class TableauConfig(
636
655
  # pre = True because we want to take some decision before pydantic initialize the configuration to default values
637
656
  @root_validator(pre=True)
638
657
  def projects_backward_compatibility(cls, values: Dict) -> Dict:
658
+ # In-place update of the input dict would cause state contamination. This was discovered through test failures
659
+ # in test_hex.py where the same dict is reused.
660
+ # So a copy is performed first.
661
+ values = deepcopy(values)
662
+
639
663
  projects = values.get("projects")
640
664
  project_pattern = values.get("project_pattern")
641
665
  project_path_pattern = values.get("project_path_pattern")
@@ -647,6 +671,7 @@ class TableauConfig(
647
671
  values["project_pattern"] = AllowDenyPattern(
648
672
  allow=[f"^{prj}$" for prj in projects]
649
673
  )
674
+ values.pop("projects")
650
675
  elif (project_pattern or project_path_pattern) and projects:
651
676
  raise ValueError(
652
677
  "projects is deprecated. Please use project_path_pattern only."
@@ -658,7 +683,7 @@ class TableauConfig(
658
683
 
659
684
  return values
660
685
 
661
- @root_validator()
686
+ @root_validator(skip_on_failure=True)
662
687
  def validate_config_values(cls, values: Dict) -> Dict:
663
688
  tags_for_hidden_assets = values.get("tags_for_hidden_assets")
664
689
  ingest_tags = values.get("ingest_tags")
@@ -670,6 +695,14 @@ class TableauConfig(
670
695
  raise ValueError(
671
696
  "tags_for_hidden_assets is only allowed with ingest_tags enabled. Be aware that this will overwrite tags entered from the UI."
672
697
  )
698
+
699
+ use_email_as_username = values.get("use_email_as_username")
700
+ ingest_owner = values.get("ingest_owner")
701
+ if use_email_as_username and not ingest_owner:
702
+ raise ValueError(
703
+ "use_email_as_username requires ingest_owner to be enabled."
704
+ )
705
+
673
706
  return values
674
707
 
675
708
 
@@ -761,7 +794,6 @@ class SiteIdContentUrl:
761
794
  @dataclass
762
795
  class TableauSourceReport(
763
796
  StaleEntityRemovalSourceReport,
764
- IngestionStageReport,
765
797
  ):
766
798
  get_all_datasources_query_failed: bool = False
767
799
  num_get_datasource_query_failures: int = 0
@@ -831,6 +863,9 @@ class TableauSourceReport(
831
863
  default_factory=(lambda: defaultdict(int))
832
864
  )
833
865
 
866
+ # Owner extraction statistics
867
+ num_email_fallback_to_username: int = 0
868
+
834
869
 
835
870
  def report_user_role(report: TableauSourceReport, server: Server) -> None:
836
871
  title: str = "Insufficient Permissions"
@@ -861,16 +896,29 @@ def report_user_role(report: TableauSourceReport, server: Server) -> None:
861
896
  @platform_name("Tableau")
862
897
  @config_class(TableauConfig)
863
898
  @support_status(SupportStatus.CERTIFIED)
899
+ @capability(
900
+ SourceCapability.CONTAINERS,
901
+ "Enabled by default",
902
+ subtype_modifier=[
903
+ SourceCapabilityModifier.TABLEAU_PROJECT,
904
+ SourceCapabilityModifier.TABLEAU_SITE,
905
+ SourceCapabilityModifier.TABLEAU_WORKBOOK,
906
+ ],
907
+ )
864
908
  @capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
865
909
  @capability(SourceCapability.DOMAINS, "Requires transformer", supported=False)
866
910
  @capability(SourceCapability.DESCRIPTIONS, "Enabled by default")
867
911
  @capability(
868
912
  SourceCapability.USAGE_STATS,
869
913
  "Dashboard/Chart view counts, enabled using extract_usage_stats config",
914
+ subtype_modifier=[
915
+ SourceCapabilityModifier.DASHBOARD,
916
+ SourceCapabilityModifier.CHART,
917
+ ],
870
918
  )
871
919
  @capability(
872
920
  SourceCapability.DELETION_DETECTION,
873
- "Enabled by default when stateful ingestion is turned on.",
921
+ "Enabled by default via stateful ingestion.",
874
922
  )
875
923
  @capability(SourceCapability.OWNERSHIP, "Requires recipe configuration")
876
924
  @capability(SourceCapability.TAGS, "Requires recipe configuration")
@@ -879,6 +927,7 @@ def report_user_role(report: TableauSourceReport, server: Server) -> None:
879
927
  SourceCapability.LINEAGE_FINE,
880
928
  "Enabled by default, configure using `extract_column_level_lineage`",
881
929
  )
930
+ @capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
882
931
  class TableauSource(StatefulIngestionSourceBase, TestableSource):
883
932
  platform = "tableau"
884
933
 
@@ -1162,7 +1211,7 @@ class TableauSiteSource:
1162
1211
  self.report.warning(
1163
1212
  title="Incomplete project hierarchy",
1164
1213
  message="Project details missing. Child projects will be ingested without reference to their parent project. We generally need Site Administrator Explorer permissions to extract the complete project hierarchy.",
1165
- context=f"Missing {project.parent_id}, referenced by {project.id} {project.project_name}",
1214
+ context=f"Missing {project.parent_id}, referenced by {project.id} {project.name}",
1166
1215
  )
1167
1216
  project.parent_id = None
1168
1217
 
@@ -1539,12 +1588,15 @@ class TableauSiteSource:
1539
1588
  }}""",
1540
1589
  )
1541
1590
  else:
1542
- # As of Tableau Server 2024.2, the metadata API sporadically returns a 30-second
1543
- # timeout error.
1544
- # It doesn't reliably happen, so retrying a couple of times makes sense.
1545
1591
  if all(
1592
+ # As of Tableau Server 2024.2, the metadata API sporadically returns a 30-second
1593
+ # timeout error.
1594
+ # It doesn't reliably happen, so retrying a couple of times makes sense.
1546
1595
  error.get("message")
1547
1596
  == "Execution canceled because timeout of 30000 millis was reached"
1597
+ # The Metadata API sometimes returns an 'unexpected error' message when querying
1598
+ # embeddedDatasourcesConnection. Try retrying a couple of times.
1599
+ or error.get("message") == "Unexpected error occurred"
1548
1600
  for error in errors
1549
1601
  ):
1550
1602
  # If it was only a timeout error, we can retry.
@@ -1556,8 +1608,8 @@ class TableauSiteSource:
1556
1608
  (self.config.max_retries - retries_remaining + 1) ** 2, 60
1557
1609
  )
1558
1610
  logger.info(
1559
- f"Query {connection_type} received a 30 second timeout error - will retry in {backoff_time} seconds. "
1560
- f"Retries remaining: {retries_remaining}"
1611
+ f"Query {connection_type} received a retryable error with {retries_remaining} retries remaining, "
1612
+ f"will retry in {backoff_time} seconds: {errors}"
1561
1613
  )
1562
1614
  time.sleep(backoff_time)
1563
1615
  return self.get_connection_object_page(
@@ -2174,32 +2226,32 @@ class TableauSiteSource:
2174
2226
  else []
2175
2227
  )
2176
2228
 
2177
- # The Tableau SQL parser much worse than our sqlglot based parser,
2178
- # so relying on metadata parsed by Tableau from SQL queries can be
2179
- # less accurate. This option allows us to ignore Tableau's parser and
2180
- # only use our own.
2181
- if self.config.force_extraction_of_lineage_from_custom_sql_queries:
2182
- logger.debug("Extracting TLL & CLL from custom sql (forced)")
2229
+ tableau_table_list = csql.get(c.TABLES, [])
2230
+ if self.config.force_extraction_of_lineage_from_custom_sql_queries or (
2231
+ not tableau_table_list
2232
+ and self.config.extract_lineage_from_unsupported_custom_sql_queries
2233
+ ):
2234
+ if not tableau_table_list:
2235
+ # custom sql tables may contain unsupported sql, causing incomplete lineage
2236
+ # we extract the lineage from the raw queries
2237
+ logger.debug(
2238
+ "Parsing TLL & CLL from custom sql (tableau metadata incomplete)"
2239
+ )
2240
+ else:
2241
+ # The Tableau SQL parser is much worse than our sqlglot based parser,
2242
+ # so relying on metadata parsed by Tableau from SQL queries can be
2243
+ # less accurate. This option allows us to ignore Tableau's parser and
2244
+ # only use our own.
2245
+ logger.debug("Parsing TLL & CLL from custom sql (forced)")
2246
+
2183
2247
  yield from self._create_lineage_from_unsupported_csql(
2184
2248
  csql_urn, csql, columns
2185
2249
  )
2186
2250
  else:
2187
- tables = csql.get(c.TABLES, [])
2188
-
2189
- if tables:
2190
- # lineage from custom sql -> datasets/tables #
2191
- yield from self._create_lineage_to_upstream_tables(
2192
- csql_urn, tables, datasource
2193
- )
2194
- elif (
2195
- self.config.extract_lineage_from_unsupported_custom_sql_queries
2196
- ):
2197
- logger.debug("Extracting TLL & CLL from custom sql")
2198
- # custom sql tables may contain unsupported sql, causing incomplete lineage
2199
- # we extract the lineage from the raw queries
2200
- yield from self._create_lineage_from_unsupported_csql(
2201
- csql_urn, csql, columns
2202
- )
2251
+ # lineage from custom sql -> datasets/tables #
2252
+ yield from self._create_lineage_to_upstream_tables(
2253
+ csql_urn, tableau_table_list, datasource
2254
+ )
2203
2255
 
2204
2256
  # Schema Metadata
2205
2257
  schema_metadata = self.get_schema_metadata_for_custom_sql(columns)
@@ -2237,7 +2289,6 @@ class TableauSiteSource:
2237
2289
  yield self.get_metadata_change_event(dataset_snapshot)
2238
2290
  yield self.get_metadata_change_proposal(
2239
2291
  dataset_snapshot.urn,
2240
- aspect_name=c.SUB_TYPES,
2241
2292
  aspect=SubTypesClass(typeNames=[DatasetSubTypes.VIEW, c.CUSTOM_SQL]),
2242
2293
  )
2243
2294
 
@@ -2402,7 +2453,6 @@ class TableauSiteSource:
2402
2453
  upstream_lineage = UpstreamLineage(upstreams=upstream_tables)
2403
2454
  yield self.get_metadata_change_proposal(
2404
2455
  csql_urn,
2405
- aspect_name=c.UPSTREAM_LINEAGE,
2406
2456
  aspect=upstream_lineage,
2407
2457
  )
2408
2458
  self.report.num_tables_with_upstream_lineage += 1
@@ -2588,7 +2638,6 @@ class TableauSiteSource:
2588
2638
  )
2589
2639
  yield self.get_metadata_change_proposal(
2590
2640
  csql_urn,
2591
- aspect_name=c.UPSTREAM_LINEAGE,
2592
2641
  aspect=upstream_lineage,
2593
2642
  )
2594
2643
  self.report.num_tables_with_upstream_lineage += 1
@@ -2634,14 +2683,10 @@ class TableauSiteSource:
2634
2683
  def get_metadata_change_proposal(
2635
2684
  self,
2636
2685
  urn: str,
2637
- aspect_name: str,
2638
2686
  aspect: Union["UpstreamLineage", "SubTypesClass"],
2639
2687
  ) -> MetadataWorkUnit:
2640
2688
  return MetadataChangeProposalWrapper(
2641
- entityType=c.DATASET,
2642
- changeType=ChangeTypeClass.UPSERT,
2643
2689
  entityUrn=urn,
2644
- aspectName=aspect_name,
2645
2690
  aspect=aspect,
2646
2691
  ).as_workunit()
2647
2692
 
@@ -2698,13 +2743,12 @@ class TableauSiteSource:
2698
2743
  dataset_snapshot.aspects.append(browse_paths)
2699
2744
 
2700
2745
  # Ownership
2701
- owner = (
2702
- self._get_ownership(datasource_info[c.OWNER][c.USERNAME])
2703
- if datasource_info
2704
- and datasource_info.get(c.OWNER)
2705
- and datasource_info[c.OWNER].get(c.USERNAME)
2746
+ owner_identifier = (
2747
+ self._get_owner_identifier(datasource_info[c.OWNER])
2748
+ if datasource_info and datasource_info.get(c.OWNER)
2706
2749
  else None
2707
2750
  )
2751
+ owner = self._get_ownership(owner_identifier) if owner_identifier else None
2708
2752
  if owner is not None:
2709
2753
  dataset_snapshot.aspects.append(owner)
2710
2754
 
@@ -2749,7 +2793,6 @@ class TableauSiteSource:
2749
2793
  )
2750
2794
  yield self.get_metadata_change_proposal(
2751
2795
  datasource_urn,
2752
- aspect_name=c.UPSTREAM_LINEAGE,
2753
2796
  aspect=upstream_lineage,
2754
2797
  )
2755
2798
  self.report.num_tables_with_upstream_lineage += 1
@@ -2768,7 +2811,6 @@ class TableauSiteSource:
2768
2811
  yield self.get_metadata_change_event(dataset_snapshot)
2769
2812
  yield self.get_metadata_change_proposal(
2770
2813
  dataset_snapshot.urn,
2771
- aspect_name=c.SUB_TYPES,
2772
2814
  aspect=SubTypesClass(
2773
2815
  typeNames=(
2774
2816
  ["Embedded Data Source"]
@@ -2854,7 +2896,11 @@ class TableauSiteSource:
2854
2896
  return datasource
2855
2897
 
2856
2898
  def emit_published_datasources(self) -> Iterable[MetadataWorkUnit]:
2857
- datasource_filter = {c.ID_WITH_IN: self.datasource_ids_being_used}
2899
+ datasource_filter = (
2900
+ {}
2901
+ if self.config.emit_all_published_datasources
2902
+ else {c.ID_WITH_IN: self.datasource_ids_being_used}
2903
+ )
2858
2904
 
2859
2905
  for datasource in self.get_connection_objects(
2860
2906
  query=published_datasource_graphql_query,
@@ -3107,7 +3153,7 @@ class TableauSiteSource:
3107
3153
 
3108
3154
  creator: Optional[str] = None
3109
3155
  if workbook is not None and workbook.get(c.OWNER) is not None:
3110
- creator = workbook[c.OWNER].get(c.USERNAME)
3156
+ creator = self._get_owner_identifier(workbook[c.OWNER])
3111
3157
  created_at = sheet.get(c.CREATED_AT, datetime.now())
3112
3158
  updated_at = sheet.get(c.UPDATED_AT, datetime.now())
3113
3159
  last_modified = self.get_last_modified(creator, created_at, updated_at)
@@ -3256,7 +3302,7 @@ class TableauSiteSource:
3256
3302
 
3257
3303
  def emit_workbook_as_container(self, workbook: Dict) -> Iterable[MetadataWorkUnit]:
3258
3304
  workbook_container_key = self.gen_workbook_key(workbook[c.ID])
3259
- creator = workbook.get(c.OWNER, {}).get(c.USERNAME)
3305
+ creator = self._get_owner_identifier(workbook.get(c.OWNER, {}))
3260
3306
 
3261
3307
  owner_urn = (
3262
3308
  builder.make_user_urn(creator)
@@ -3438,7 +3484,7 @@ class TableauSiteSource:
3438
3484
 
3439
3485
  creator: Optional[str] = None
3440
3486
  if workbook is not None and workbook.get(c.OWNER) is not None:
3441
- creator = workbook[c.OWNER].get(c.USERNAME)
3487
+ creator = self._get_owner_identifier(workbook[c.OWNER])
3442
3488
  created_at = dashboard.get(c.CREATED_AT, datetime.now())
3443
3489
  updated_at = dashboard.get(c.UPDATED_AT, datetime.now())
3444
3490
  last_modified = self.get_last_modified(creator, created_at, updated_at)
@@ -3547,7 +3593,11 @@ class TableauSiteSource:
3547
3593
  return browse_paths
3548
3594
 
3549
3595
  def emit_embedded_datasources(self) -> Iterable[MetadataWorkUnit]:
3550
- datasource_filter = {c.ID_WITH_IN: self.embedded_datasource_ids_being_used}
3596
+ datasource_filter = (
3597
+ {}
3598
+ if self.config.emit_all_embedded_datasources
3599
+ else {c.ID_WITH_IN: self.embedded_datasource_ids_being_used}
3600
+ )
3551
3601
 
3552
3602
  for datasource in self.get_connection_objects(
3553
3603
  query=embedded_datasource_graphql_query,
@@ -3581,6 +3631,20 @@ class TableauSiteSource:
3581
3631
  )
3582
3632
  return last_modified
3583
3633
 
3634
+ def _get_owner_identifier(self, owner_dict: dict) -> Optional[str]:
3635
+ """Extract owner identifier (email or username) based on configuration."""
3636
+ if not owner_dict:
3637
+ return None
3638
+
3639
+ if self.config.use_email_as_username:
3640
+ email = owner_dict.get(c.EMAIL)
3641
+ if email:
3642
+ return email
3643
+ # Fall back to username if email is not available
3644
+ self.report.num_email_fallback_to_username += 1
3645
+
3646
+ return owner_dict.get(c.USERNAME)
3647
+
3584
3648
  @lru_cache(maxsize=None)
3585
3649
  def _get_ownership(self, user: str) -> Optional[OwnershipClass]:
3586
3650
  if self.config.ingest_owner and user:
@@ -3659,7 +3723,7 @@ class TableauSiteSource:
3659
3723
  container_key=project_key,
3660
3724
  name=project_.name,
3661
3725
  description=project_.description,
3662
- sub_types=[c.PROJECT],
3726
+ sub_types=[BIContainerSubTypes.TABLEAU_PROJECT],
3663
3727
  parent_container_key=parent_project_key,
3664
3728
  )
3665
3729
 
@@ -3677,7 +3741,7 @@ class TableauSiteSource:
3677
3741
  yield from gen_containers(
3678
3742
  container_key=self.gen_site_key(self.site_id),
3679
3743
  name=self.site.name or "Default",
3680
- sub_types=[c.SITE],
3744
+ sub_types=[BIContainerSubTypes.TABLEAU_SITE],
3681
3745
  )
3682
3746
 
3683
3747
  def _fetch_groups(self):
@@ -3804,3 +3868,15 @@ class TableauSiteSource:
3804
3868
  self.report.emit_upstream_tables_timer[self.site_content_url] = (
3805
3869
  timer.elapsed_seconds(digits=2)
3806
3870
  )
3871
+
3872
+ # Log owner extraction statistics if there were fallbacks
3873
+ if (
3874
+ self.config.use_email_as_username
3875
+ and self.config.ingest_owner
3876
+ and self.report.num_email_fallback_to_username > 0
3877
+ ):
3878
+ logger.info(
3879
+ f"Owner extraction summary for site '{self.site_content_url}': "
3880
+ f"{self.report.num_email_fallback_to_username} entities fell back from email to username "
3881
+ f"(email was not available)"
3882
+ )
@@ -65,6 +65,7 @@ workbook_graphql_query = """
65
65
  projectName
66
66
  owner {
67
67
  username
68
+ email
68
69
  }
69
70
  description
70
71
  uri
@@ -107,6 +108,7 @@ sheet_graphql_query = """
107
108
  luid
108
109
  owner {
109
110
  username
111
+ email
110
112
  }
111
113
  }
112
114
  datasourceFields {
@@ -185,6 +187,7 @@ dashboard_graphql_query = """
185
187
  luid
186
188
  owner {
187
189
  username
190
+ email
188
191
  }
189
192
  }
190
193
  }
@@ -268,6 +271,7 @@ embedded_datasource_graphql_query = """
268
271
  luid
269
272
  owner {
270
273
  username
274
+ email
271
275
  }
272
276
  }
273
277
  }
@@ -424,6 +428,7 @@ published_datasource_graphql_query = """
424
428
  }
425
429
  owner {
426
430
  username
431
+ email
427
432
  }
428
433
  description
429
434
  uri
@@ -579,10 +584,12 @@ def get_platform(connection_type: str) -> str:
579
584
  platform = "oracle"
580
585
  elif connection_type in ("tbio", "teradata"):
581
586
  platform = "teradata"
582
- elif connection_type in ("sqlserver"):
587
+ elif connection_type in ("sqlserver",):
583
588
  platform = "mssql"
584
- elif connection_type in ("athena"):
589
+ elif connection_type in ("athena",):
585
590
  platform = "athena"
591
+ elif connection_type in ("googlebigquery",):
592
+ platform = "bigquery"
586
593
  elif connection_type.endswith("_jdbc"):
587
594
  # e.g. convert trino_jdbc -> trino
588
595
  platform = connection_type[: -len("_jdbc")]
@@ -50,7 +50,6 @@ TABLES = "tables"
50
50
  DESCRIPTION = "description"
51
51
  SQL = "SQL"
52
52
  QUERY = "query"
53
- SUB_TYPES = "subTypes"
54
53
  VIEW = "view"
55
54
  CUSTOM_SQL = "Custom SQL"
56
55
  REMOTE_TYPE = "remoteType"
@@ -58,9 +57,9 @@ UNKNOWN = "UNKNOWN"
58
57
  PUBLISHED_DATA_SOURCE = "PublishedDatasource"
59
58
  LUID = "luid"
60
59
  EMBEDDED_DATA_SOURCE = "EmbeddedDatasource"
61
- UPSTREAM_LINEAGE = "upstreamLineage"
62
60
  OWNER = "owner"
63
61
  USERNAME = "username"
62
+ EMAIL = "email"
64
63
  HAS_EXTRACTS = "hasExtracts"
65
64
  EXTRACT_LAST_REFRESH_TIME = "extractLastRefreshTime"
66
65
  EXTRACT_LAST_INCREMENTAL_UPDATE_TIME = "extractLastIncrementalUpdateTime"
@@ -78,8 +77,6 @@ CHART = "chart"
78
77
  DASHBOARD = "dashboard"
79
78
  DASHBOARDS_CONNECTION = "dashboardsConnection"
80
79
  EMBEDDED_DATA_SOURCES_CONNECTION = "embeddedDatasourcesConnection"
81
- PROJECT = "Project"
82
- SITE = "Site"
83
80
  IS_UNSUPPORTED_CUSTOM_SQL = "isUnsupportedCustomSql"
84
81
  SITE_PERMISSION = "sitePermission"
85
82
  ROLE_SITE_ADMIN_EXPLORER = "SiteAdministratorExplorer"
@@ -1,4 +1,5 @@
1
1
  from dataclasses import dataclass
2
+ from typing import Optional
2
3
 
3
4
  from tableauserverclient import Server, UserItem
4
5
 
@@ -10,6 +11,7 @@ class UserInfo:
10
11
  user_name: str
11
12
  site_role: str
12
13
  site_id: str
14
+ email: Optional[str] = None
13
15
 
14
16
  def has_site_administrator_explorer_privileges(self):
15
17
  return self.site_role in [
@@ -34,4 +36,5 @@ class UserInfo:
34
36
  user_name=user.name,
35
37
  site_role=user.site_role,
36
38
  site_id=server.site_id,
39
+ email=user.email,
37
40
  )