acryl-datahub 1.0.0rc18__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (503) hide show
  1. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2686 -2563
  2. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +499 -392
  3. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +1 -1
  4. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +7 -1
  5. datahub/_version.py +1 -1
  6. datahub/api/circuit_breaker/operation_circuit_breaker.py +2 -2
  7. datahub/api/entities/assertion/assertion.py +1 -1
  8. datahub/api/entities/common/serialized_value.py +1 -1
  9. datahub/api/entities/corpgroup/corpgroup.py +1 -1
  10. datahub/api/entities/datacontract/datacontract.py +35 -3
  11. datahub/api/entities/datajob/dataflow.py +18 -3
  12. datahub/api/entities/datajob/datajob.py +24 -4
  13. datahub/api/entities/dataprocess/dataprocess_instance.py +4 -0
  14. datahub/api/entities/dataproduct/dataproduct.py +32 -3
  15. datahub/api/entities/dataset/dataset.py +47 -72
  16. datahub/api/entities/external/__init__.py +0 -0
  17. datahub/api/entities/external/external_entities.py +724 -0
  18. datahub/api/entities/external/external_tag.py +147 -0
  19. datahub/api/entities/external/lake_formation_external_entites.py +162 -0
  20. datahub/api/entities/external/restricted_text.py +172 -0
  21. datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
  22. datahub/api/entities/forms/forms.py +37 -37
  23. datahub/api/entities/structuredproperties/structuredproperties.py +6 -6
  24. datahub/api/graphql/assertion.py +1 -1
  25. datahub/api/graphql/base.py +8 -6
  26. datahub/api/graphql/operation.py +14 -10
  27. datahub/cli/check_cli.py +91 -9
  28. datahub/cli/cli_utils.py +63 -0
  29. datahub/cli/config_utils.py +20 -12
  30. datahub/cli/container_cli.py +5 -0
  31. datahub/cli/delete_cli.py +133 -34
  32. datahub/cli/docker_check.py +110 -14
  33. datahub/cli/docker_cli.py +155 -231
  34. datahub/cli/exists_cli.py +2 -3
  35. datahub/cli/get_cli.py +2 -3
  36. datahub/cli/graphql_cli.py +1422 -0
  37. datahub/cli/iceberg_cli.py +11 -5
  38. datahub/cli/ingest_cli.py +25 -26
  39. datahub/cli/migrate.py +12 -9
  40. datahub/cli/migration_utils.py +4 -3
  41. datahub/cli/put_cli.py +4 -6
  42. datahub/cli/quickstart_versioning.py +53 -10
  43. datahub/cli/specific/assertions_cli.py +39 -7
  44. datahub/cli/specific/datacontract_cli.py +57 -9
  45. datahub/cli/specific/dataproduct_cli.py +12 -24
  46. datahub/cli/specific/dataset_cli.py +31 -21
  47. datahub/cli/specific/forms_cli.py +2 -5
  48. datahub/cli/specific/group_cli.py +2 -3
  49. datahub/cli/specific/structuredproperties_cli.py +5 -7
  50. datahub/cli/specific/user_cli.py +174 -4
  51. datahub/cli/state_cli.py +2 -3
  52. datahub/cli/timeline_cli.py +2 -3
  53. datahub/configuration/common.py +46 -2
  54. datahub/configuration/connection_resolver.py +5 -2
  55. datahub/configuration/env_vars.py +331 -0
  56. datahub/configuration/import_resolver.py +7 -4
  57. datahub/configuration/kafka.py +21 -1
  58. datahub/configuration/pydantic_migration_helpers.py +6 -13
  59. datahub/configuration/source_common.py +4 -3
  60. datahub/configuration/validate_field_deprecation.py +5 -2
  61. datahub/configuration/validate_field_removal.py +8 -2
  62. datahub/configuration/validate_field_rename.py +6 -5
  63. datahub/configuration/validate_multiline_string.py +5 -2
  64. datahub/emitter/mce_builder.py +12 -8
  65. datahub/emitter/mcp.py +20 -5
  66. datahub/emitter/mcp_builder.py +12 -0
  67. datahub/emitter/request_helper.py +138 -15
  68. datahub/emitter/response_helper.py +111 -19
  69. datahub/emitter/rest_emitter.py +399 -163
  70. datahub/entrypoints.py +10 -5
  71. datahub/errors.py +12 -0
  72. datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +299 -2
  73. datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
  74. datahub/ingestion/api/common.py +9 -0
  75. datahub/ingestion/api/decorators.py +15 -3
  76. datahub/ingestion/api/report.py +381 -3
  77. datahub/ingestion/api/sink.py +27 -2
  78. datahub/ingestion/api/source.py +174 -62
  79. datahub/ingestion/api/source_helpers.py +41 -3
  80. datahub/ingestion/api/source_protocols.py +23 -0
  81. datahub/ingestion/autogenerated/__init__.py +0 -0
  82. datahub/ingestion/autogenerated/capability_summary.json +3652 -0
  83. datahub/ingestion/autogenerated/lineage.json +402 -0
  84. datahub/ingestion/autogenerated/lineage_helper.py +177 -0
  85. datahub/ingestion/extractor/schema_util.py +31 -5
  86. datahub/ingestion/glossary/classification_mixin.py +9 -2
  87. datahub/ingestion/graph/client.py +492 -55
  88. datahub/ingestion/graph/config.py +18 -2
  89. datahub/ingestion/graph/filters.py +96 -32
  90. datahub/ingestion/graph/links.py +55 -0
  91. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
  92. datahub/ingestion/run/pipeline.py +90 -23
  93. datahub/ingestion/run/pipeline_config.py +3 -3
  94. datahub/ingestion/sink/datahub_kafka.py +1 -0
  95. datahub/ingestion/sink/datahub_rest.py +31 -23
  96. datahub/ingestion/sink/file.py +1 -0
  97. datahub/ingestion/source/abs/config.py +1 -1
  98. datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
  99. datahub/ingestion/source/abs/source.py +15 -30
  100. datahub/ingestion/source/apply/datahub_apply.py +6 -5
  101. datahub/ingestion/source/aws/aws_common.py +185 -13
  102. datahub/ingestion/source/aws/glue.py +517 -244
  103. datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
  104. datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
  105. datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
  106. datahub/ingestion/source/aws/sagemaker_processors/models.py +4 -4
  107. datahub/ingestion/source/aws/tag_entities.py +270 -0
  108. datahub/ingestion/source/azure/azure_common.py +3 -3
  109. datahub/ingestion/source/bigquery_v2/bigquery.py +51 -7
  110. datahub/ingestion/source/bigquery_v2/bigquery_config.py +51 -81
  111. datahub/ingestion/source/bigquery_v2/bigquery_connection.py +81 -0
  112. datahub/ingestion/source/bigquery_v2/bigquery_queries.py +6 -1
  113. datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
  114. datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
  115. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
  116. datahub/ingestion/source/bigquery_v2/common.py +1 -1
  117. datahub/ingestion/source/bigquery_v2/lineage.py +1 -1
  118. datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
  119. datahub/ingestion/source/bigquery_v2/queries.py +3 -3
  120. datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
  121. datahub/ingestion/source/cassandra/cassandra.py +7 -18
  122. datahub/ingestion/source/cassandra/cassandra_api.py +36 -0
  123. datahub/ingestion/source/cassandra/cassandra_config.py +20 -0
  124. datahub/ingestion/source/cassandra/cassandra_profiling.py +26 -24
  125. datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
  126. datahub/ingestion/source/common/data_platforms.py +23 -0
  127. datahub/ingestion/source/common/gcp_credentials_config.py +9 -1
  128. datahub/ingestion/source/common/subtypes.py +73 -1
  129. datahub/ingestion/source/data_lake_common/data_lake_utils.py +59 -10
  130. datahub/ingestion/source/data_lake_common/object_store.py +732 -0
  131. datahub/ingestion/source/data_lake_common/path_spec.py +87 -38
  132. datahub/ingestion/source/datahub/config.py +19 -5
  133. datahub/ingestion/source/datahub/datahub_database_reader.py +205 -36
  134. datahub/ingestion/source/datahub/datahub_source.py +11 -1
  135. datahub/ingestion/source/dbt/dbt_cloud.py +17 -10
  136. datahub/ingestion/source/dbt/dbt_common.py +270 -26
  137. datahub/ingestion/source/dbt/dbt_core.py +88 -47
  138. datahub/ingestion/source/dbt/dbt_tests.py +8 -6
  139. datahub/ingestion/source/debug/__init__.py +0 -0
  140. datahub/ingestion/source/debug/datahub_debug.py +300 -0
  141. datahub/ingestion/source/delta_lake/config.py +9 -5
  142. datahub/ingestion/source/delta_lake/source.py +8 -0
  143. datahub/ingestion/source/dremio/dremio_api.py +114 -73
  144. datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
  145. datahub/ingestion/source/dremio/dremio_config.py +5 -4
  146. datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +1 -1
  147. datahub/ingestion/source/dremio/dremio_entities.py +6 -5
  148. datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
  149. datahub/ingestion/source/dremio/dremio_source.py +228 -215
  150. datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
  151. datahub/ingestion/source/dynamodb/dynamodb.py +19 -13
  152. datahub/ingestion/source/excel/__init__.py +0 -0
  153. datahub/ingestion/source/excel/config.py +92 -0
  154. datahub/ingestion/source/excel/excel_file.py +539 -0
  155. datahub/ingestion/source/excel/profiling.py +308 -0
  156. datahub/ingestion/source/excel/report.py +49 -0
  157. datahub/ingestion/source/excel/source.py +662 -0
  158. datahub/ingestion/source/excel/util.py +18 -0
  159. datahub/ingestion/source/feast.py +12 -14
  160. datahub/ingestion/source/file.py +3 -0
  161. datahub/ingestion/source/fivetran/config.py +67 -8
  162. datahub/ingestion/source/fivetran/fivetran.py +228 -43
  163. datahub/ingestion/source/fivetran/fivetran_log_api.py +42 -9
  164. datahub/ingestion/source/fivetran/fivetran_query.py +58 -36
  165. datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
  166. datahub/ingestion/source/fivetran/response_models.py +97 -0
  167. datahub/ingestion/source/gc/datahub_gc.py +0 -2
  168. datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +101 -104
  169. datahub/ingestion/source/gcs/gcs_source.py +53 -10
  170. datahub/ingestion/source/gcs/gcs_utils.py +36 -9
  171. datahub/ingestion/source/ge_data_profiler.py +146 -33
  172. datahub/ingestion/source/ge_profiling_config.py +26 -11
  173. datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
  174. datahub/ingestion/source/grafana/field_utils.py +307 -0
  175. datahub/ingestion/source/grafana/grafana_api.py +142 -0
  176. datahub/ingestion/source/grafana/grafana_config.py +104 -0
  177. datahub/ingestion/source/grafana/grafana_source.py +522 -84
  178. datahub/ingestion/source/grafana/lineage.py +202 -0
  179. datahub/ingestion/source/grafana/models.py +137 -0
  180. datahub/ingestion/source/grafana/report.py +90 -0
  181. datahub/ingestion/source/grafana/types.py +16 -0
  182. datahub/ingestion/source/hex/__init__.py +0 -0
  183. datahub/ingestion/source/hex/api.py +402 -0
  184. datahub/ingestion/source/hex/constants.py +8 -0
  185. datahub/ingestion/source/hex/hex.py +311 -0
  186. datahub/ingestion/source/hex/mapper.py +412 -0
  187. datahub/ingestion/source/hex/model.py +78 -0
  188. datahub/ingestion/source/hex/query_fetcher.py +307 -0
  189. datahub/ingestion/source/iceberg/iceberg.py +385 -164
  190. datahub/ingestion/source/iceberg/iceberg_common.py +2 -2
  191. datahub/ingestion/source/iceberg/iceberg_profiler.py +25 -20
  192. datahub/ingestion/source/identity/azure_ad.py +1 -1
  193. datahub/ingestion/source/identity/okta.py +1 -14
  194. datahub/ingestion/source/kafka/kafka.py +28 -71
  195. datahub/ingestion/source/kafka/kafka_config.py +78 -0
  196. datahub/ingestion/source/kafka_connect/common.py +2 -2
  197. datahub/ingestion/source/kafka_connect/sink_connectors.py +157 -48
  198. datahub/ingestion/source/kafka_connect/source_connectors.py +63 -5
  199. datahub/ingestion/source/ldap.py +1 -1
  200. datahub/ingestion/source/looker/looker_common.py +216 -86
  201. datahub/ingestion/source/looker/looker_config.py +15 -4
  202. datahub/ingestion/source/looker/looker_constant.py +4 -0
  203. datahub/ingestion/source/looker/looker_lib_wrapper.py +37 -4
  204. datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
  205. datahub/ingestion/source/looker/looker_source.py +539 -555
  206. datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
  207. datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
  208. datahub/ingestion/source/looker/lookml_config.py +31 -3
  209. datahub/ingestion/source/looker/lookml_refinement.py +1 -1
  210. datahub/ingestion/source/looker/lookml_source.py +103 -118
  211. datahub/ingestion/source/looker/view_upstream.py +494 -1
  212. datahub/ingestion/source/metabase.py +32 -6
  213. datahub/ingestion/source/metadata/business_glossary.py +7 -7
  214. datahub/ingestion/source/metadata/lineage.py +11 -10
  215. datahub/ingestion/source/mlflow.py +254 -23
  216. datahub/ingestion/source/mock_data/__init__.py +0 -0
  217. datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
  218. datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
  219. datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
  220. datahub/ingestion/source/mode.py +359 -181
  221. datahub/ingestion/source/mongodb.py +11 -1
  222. datahub/ingestion/source/neo4j/neo4j_source.py +122 -153
  223. datahub/ingestion/source/nifi.py +5 -5
  224. datahub/ingestion/source/openapi.py +85 -38
  225. datahub/ingestion/source/openapi_parser.py +59 -40
  226. datahub/ingestion/source/powerbi/config.py +92 -27
  227. datahub/ingestion/source/powerbi/m_query/data_classes.py +3 -0
  228. datahub/ingestion/source/powerbi/m_query/odbc.py +185 -0
  229. datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
  230. datahub/ingestion/source/powerbi/m_query/pattern_handler.py +358 -14
  231. datahub/ingestion/source/powerbi/m_query/resolver.py +10 -0
  232. datahub/ingestion/source/powerbi/powerbi.py +66 -32
  233. datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -2
  234. datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +11 -12
  235. datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
  236. datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
  237. datahub/ingestion/source/preset.py +3 -3
  238. datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
  239. datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
  240. datahub/ingestion/source/redash.py +1 -1
  241. datahub/ingestion/source/redshift/config.py +15 -9
  242. datahub/ingestion/source/redshift/datashares.py +1 -1
  243. datahub/ingestion/source/redshift/lineage.py +386 -687
  244. datahub/ingestion/source/redshift/profile.py +2 -2
  245. datahub/ingestion/source/redshift/query.py +24 -20
  246. datahub/ingestion/source/redshift/redshift.py +52 -111
  247. datahub/ingestion/source/redshift/redshift_schema.py +17 -12
  248. datahub/ingestion/source/redshift/report.py +0 -2
  249. datahub/ingestion/source/redshift/usage.py +13 -11
  250. datahub/ingestion/source/s3/report.py +4 -2
  251. datahub/ingestion/source/s3/source.py +515 -244
  252. datahub/ingestion/source/sac/sac.py +3 -1
  253. datahub/ingestion/source/salesforce.py +28 -13
  254. datahub/ingestion/source/schema/json_schema.py +14 -14
  255. datahub/ingestion/source/schema_inference/object.py +22 -6
  256. datahub/ingestion/source/sigma/config.py +75 -8
  257. datahub/ingestion/source/sigma/data_classes.py +3 -0
  258. datahub/ingestion/source/sigma/sigma.py +36 -7
  259. datahub/ingestion/source/sigma/sigma_api.py +99 -58
  260. datahub/ingestion/source/slack/slack.py +403 -140
  261. datahub/ingestion/source/snaplogic/__init__.py +0 -0
  262. datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
  263. datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
  264. datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
  265. datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
  266. datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
  267. datahub/ingestion/source/snowflake/constants.py +4 -0
  268. datahub/ingestion/source/snowflake/snowflake_config.py +103 -34
  269. datahub/ingestion/source/snowflake/snowflake_connection.py +47 -25
  270. datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +25 -6
  271. datahub/ingestion/source/snowflake/snowflake_profiler.py +1 -6
  272. datahub/ingestion/source/snowflake/snowflake_queries.py +511 -107
  273. datahub/ingestion/source/snowflake/snowflake_query.py +100 -72
  274. datahub/ingestion/source/snowflake/snowflake_report.py +4 -2
  275. datahub/ingestion/source/snowflake/snowflake_schema.py +381 -16
  276. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +163 -52
  277. datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
  278. datahub/ingestion/source/snowflake/snowflake_tag.py +4 -1
  279. datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
  280. datahub/ingestion/source/snowflake/snowflake_utils.py +62 -17
  281. datahub/ingestion/source/snowflake/snowflake_v2.py +56 -10
  282. datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
  283. datahub/ingestion/source/sql/athena.py +219 -26
  284. datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
  285. datahub/ingestion/source/sql/clickhouse.py +29 -9
  286. datahub/ingestion/source/sql/cockroachdb.py +5 -4
  287. datahub/ingestion/source/sql/druid.py +9 -4
  288. datahub/ingestion/source/sql/hana.py +3 -1
  289. datahub/ingestion/source/sql/hive.py +28 -8
  290. datahub/ingestion/source/sql/hive_metastore.py +24 -25
  291. datahub/ingestion/source/sql/mariadb.py +0 -1
  292. datahub/ingestion/source/sql/mssql/job_models.py +18 -2
  293. datahub/ingestion/source/sql/mssql/source.py +376 -62
  294. datahub/ingestion/source/sql/mysql.py +154 -4
  295. datahub/ingestion/source/sql/oracle.py +62 -11
  296. datahub/ingestion/source/sql/postgres.py +142 -6
  297. datahub/ingestion/source/sql/presto.py +20 -2
  298. datahub/ingestion/source/sql/sql_common.py +281 -49
  299. datahub/ingestion/source/sql/sql_config.py +1 -34
  300. datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
  301. datahub/ingestion/source/sql/sql_types.py +27 -2
  302. datahub/ingestion/source/sql/sqlalchemy_uri.py +68 -0
  303. datahub/ingestion/source/sql/stored_procedures/__init__.py +0 -0
  304. datahub/ingestion/source/sql/stored_procedures/base.py +253 -0
  305. datahub/ingestion/source/sql/{mssql/stored_procedure_lineage.py → stored_procedures/lineage.py} +2 -29
  306. datahub/ingestion/source/sql/teradata.py +1028 -245
  307. datahub/ingestion/source/sql/trino.py +43 -10
  308. datahub/ingestion/source/sql/two_tier_sql_source.py +3 -4
  309. datahub/ingestion/source/sql/vertica.py +14 -7
  310. datahub/ingestion/source/sql_queries.py +219 -121
  311. datahub/ingestion/source/state/checkpoint.py +8 -29
  312. datahub/ingestion/source/state/entity_removal_state.py +5 -2
  313. datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
  314. datahub/ingestion/source/state/stale_entity_removal_handler.py +0 -1
  315. datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
  316. datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +2 -1
  317. datahub/ingestion/source/superset.py +810 -126
  318. datahub/ingestion/source/tableau/tableau.py +172 -69
  319. datahub/ingestion/source/tableau/tableau_common.py +11 -4
  320. datahub/ingestion/source/tableau/tableau_constant.py +1 -4
  321. datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
  322. datahub/ingestion/source/tableau/tableau_validation.py +1 -1
  323. datahub/ingestion/source/unity/config.py +161 -40
  324. datahub/ingestion/source/unity/connection.py +61 -0
  325. datahub/ingestion/source/unity/connection_test.py +1 -0
  326. datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
  327. datahub/ingestion/source/unity/proxy.py +794 -51
  328. datahub/ingestion/source/unity/proxy_patch.py +321 -0
  329. datahub/ingestion/source/unity/proxy_types.py +36 -2
  330. datahub/ingestion/source/unity/report.py +15 -3
  331. datahub/ingestion/source/unity/source.py +465 -131
  332. datahub/ingestion/source/unity/tag_entities.py +197 -0
  333. datahub/ingestion/source/unity/usage.py +46 -4
  334. datahub/ingestion/source/usage/clickhouse_usage.py +11 -4
  335. datahub/ingestion/source/usage/starburst_trino_usage.py +10 -5
  336. datahub/ingestion/source/usage/usage_common.py +4 -68
  337. datahub/ingestion/source/vertexai/__init__.py +0 -0
  338. datahub/ingestion/source/vertexai/vertexai.py +1367 -0
  339. datahub/ingestion/source/vertexai/vertexai_config.py +29 -0
  340. datahub/ingestion/source/vertexai/vertexai_result_type_utils.py +89 -0
  341. datahub/ingestion/source_config/pulsar.py +3 -1
  342. datahub/ingestion/source_report/ingestion_stage.py +50 -11
  343. datahub/ingestion/transformer/add_dataset_dataproduct.py +1 -1
  344. datahub/ingestion/transformer/add_dataset_ownership.py +19 -3
  345. datahub/ingestion/transformer/base_transformer.py +8 -5
  346. datahub/ingestion/transformer/dataset_domain.py +1 -1
  347. datahub/ingestion/transformer/set_browse_path.py +112 -0
  348. datahub/integrations/assertion/common.py +3 -2
  349. datahub/integrations/assertion/snowflake/compiler.py +4 -3
  350. datahub/lite/lite_util.py +2 -2
  351. datahub/metadata/{_schema_classes.py → _internal_schema_classes.py} +3095 -631
  352. datahub/metadata/_urns/urn_defs.py +1866 -1582
  353. datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
  354. datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
  355. datahub/metadata/com/linkedin/pegasus2avro/dataplatform/slack/__init__.py +15 -0
  356. datahub/metadata/com/linkedin/pegasus2avro/event/__init__.py +11 -0
  357. datahub/metadata/com/linkedin/pegasus2avro/event/notification/__init__.py +15 -0
  358. datahub/metadata/com/linkedin/pegasus2avro/event/notification/settings/__init__.py +19 -0
  359. datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
  360. datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
  361. datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
  362. datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +8 -0
  363. datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
  364. datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
  365. datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
  366. datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
  367. datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
  368. datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
  369. datahub/metadata/schema.avsc +18404 -16617
  370. datahub/metadata/schema_classes.py +3 -3
  371. datahub/metadata/schemas/Actors.avsc +38 -1
  372. datahub/metadata/schemas/ApplicationKey.avsc +31 -0
  373. datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
  374. datahub/metadata/schemas/Applications.avsc +38 -0
  375. datahub/metadata/schemas/AssetSettings.avsc +63 -0
  376. datahub/metadata/schemas/ChartInfo.avsc +2 -1
  377. datahub/metadata/schemas/ChartKey.avsc +1 -0
  378. datahub/metadata/schemas/ContainerKey.avsc +1 -0
  379. datahub/metadata/schemas/ContainerProperties.avsc +8 -0
  380. datahub/metadata/schemas/CorpUserEditableInfo.avsc +15 -1
  381. datahub/metadata/schemas/CorpUserKey.avsc +2 -1
  382. datahub/metadata/schemas/CorpUserSettings.avsc +145 -0
  383. datahub/metadata/schemas/DashboardKey.avsc +1 -0
  384. datahub/metadata/schemas/DataContractKey.avsc +2 -1
  385. datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
  386. datahub/metadata/schemas/DataFlowKey.avsc +1 -0
  387. datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
  388. datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
  389. datahub/metadata/schemas/DataHubIngestionSourceKey.avsc +2 -1
  390. datahub/metadata/schemas/DataHubOpenAPISchemaKey.avsc +22 -0
  391. datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
  392. datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
  393. datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
  394. datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
  395. datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
  396. datahub/metadata/schemas/DataJobInfo.avsc +8 -0
  397. datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
  398. datahub/metadata/schemas/DataJobKey.avsc +1 -0
  399. datahub/metadata/schemas/DataProcessInstanceInput.avsc +2 -1
  400. datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -1
  401. datahub/metadata/schemas/DataProcessKey.avsc +8 -0
  402. datahub/metadata/schemas/DataProductKey.avsc +3 -1
  403. datahub/metadata/schemas/DataProductProperties.avsc +1 -1
  404. datahub/metadata/schemas/DataTransformLogic.avsc +4 -2
  405. datahub/metadata/schemas/DatasetKey.avsc +11 -1
  406. datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
  407. datahub/metadata/schemas/Deprecation.avsc +2 -0
  408. datahub/metadata/schemas/DomainKey.avsc +2 -1
  409. datahub/metadata/schemas/ExecutionRequestInput.avsc +5 -0
  410. datahub/metadata/schemas/FormInfo.avsc +5 -0
  411. datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
  412. datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
  413. datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
  414. datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
  415. datahub/metadata/schemas/IncidentInfo.avsc +3 -3
  416. datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
  417. datahub/metadata/schemas/LogicalParent.avsc +145 -0
  418. datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
  419. datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
  420. datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
  421. datahub/metadata/schemas/MLModelDeploymentProperties.avsc +3 -0
  422. datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
  423. datahub/metadata/schemas/MLModelGroupProperties.avsc +16 -0
  424. datahub/metadata/schemas/MLModelKey.avsc +9 -0
  425. datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
  426. datahub/metadata/schemas/MetadataChangeEvent.avsc +189 -47
  427. datahub/metadata/schemas/MetadataChangeLog.avsc +65 -44
  428. datahub/metadata/schemas/MetadataChangeProposal.avsc +64 -0
  429. datahub/metadata/schemas/NotebookKey.avsc +1 -0
  430. datahub/metadata/schemas/Operation.avsc +21 -2
  431. datahub/metadata/schemas/Ownership.avsc +69 -0
  432. datahub/metadata/schemas/QueryProperties.avsc +24 -2
  433. datahub/metadata/schemas/QuerySubjects.avsc +1 -12
  434. datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
  435. datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
  436. datahub/metadata/schemas/Siblings.avsc +2 -0
  437. datahub/metadata/schemas/SlackUserInfo.avsc +160 -0
  438. datahub/metadata/schemas/StructuredProperties.avsc +69 -0
  439. datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
  440. datahub/metadata/schemas/SystemMetadata.avsc +147 -0
  441. datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
  442. datahub/metadata/schemas/__init__.py +3 -3
  443. datahub/sdk/__init__.py +7 -0
  444. datahub/sdk/_all_entities.py +15 -0
  445. datahub/sdk/_shared.py +393 -10
  446. datahub/sdk/_utils.py +4 -0
  447. datahub/sdk/chart.py +386 -0
  448. datahub/sdk/container.py +7 -0
  449. datahub/sdk/dashboard.py +453 -0
  450. datahub/sdk/dataflow.py +309 -0
  451. datahub/sdk/datajob.py +367 -0
  452. datahub/sdk/dataset.py +180 -4
  453. datahub/sdk/entity.py +99 -3
  454. datahub/sdk/entity_client.py +154 -12
  455. datahub/sdk/lineage_client.py +943 -0
  456. datahub/sdk/main_client.py +83 -8
  457. datahub/sdk/mlmodel.py +383 -0
  458. datahub/sdk/mlmodelgroup.py +240 -0
  459. datahub/sdk/search_client.py +85 -8
  460. datahub/sdk/search_filters.py +393 -68
  461. datahub/secret/datahub_secret_store.py +5 -1
  462. datahub/secret/environment_secret_store.py +29 -0
  463. datahub/secret/file_secret_store.py +49 -0
  464. datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
  465. datahub/specific/aspect_helpers/siblings.py +73 -0
  466. datahub/specific/aspect_helpers/structured_properties.py +27 -0
  467. datahub/specific/chart.py +1 -1
  468. datahub/specific/datajob.py +15 -1
  469. datahub/specific/dataproduct.py +4 -0
  470. datahub/specific/dataset.py +51 -59
  471. datahub/sql_parsing/_sqlglot_patch.py +1 -2
  472. datahub/sql_parsing/fingerprint_utils.py +6 -0
  473. datahub/sql_parsing/split_statements.py +30 -3
  474. datahub/sql_parsing/sql_parsing_aggregator.py +144 -63
  475. datahub/sql_parsing/sqlglot_lineage.py +517 -44
  476. datahub/sql_parsing/sqlglot_utils.py +30 -18
  477. datahub/sql_parsing/tool_meta_extractor.py +25 -2
  478. datahub/telemetry/telemetry.py +30 -16
  479. datahub/testing/check_imports.py +1 -1
  480. datahub/testing/docker_utils.py +8 -2
  481. datahub/testing/mce_helpers.py +421 -0
  482. datahub/testing/mcp_diff.py +17 -21
  483. datahub/testing/sdk_v2_helpers.py +18 -0
  484. datahub/upgrade/upgrade.py +86 -30
  485. datahub/utilities/file_backed_collections.py +14 -15
  486. datahub/utilities/hive_schema_to_avro.py +2 -2
  487. datahub/utilities/ingest_utils.py +2 -2
  488. datahub/utilities/is_pytest.py +3 -2
  489. datahub/utilities/logging_manager.py +30 -7
  490. datahub/utilities/mapping.py +29 -2
  491. datahub/utilities/sample_data.py +5 -4
  492. datahub/utilities/server_config_util.py +298 -10
  493. datahub/utilities/sqlalchemy_query_combiner.py +6 -4
  494. datahub/utilities/stats_collections.py +4 -0
  495. datahub/utilities/threaded_iterator_executor.py +16 -3
  496. datahub/utilities/urn_encoder.py +1 -1
  497. datahub/utilities/urns/urn.py +41 -2
  498. datahub/emitter/sql_parsing_builder.py +0 -306
  499. datahub/ingestion/source/redshift/lineage_v2.py +0 -458
  500. datahub/ingestion/source/vertexai.py +0 -697
  501. datahub/ingestion/transformer/system_metadata_transformer.py +0 -45
  502. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info/licenses}/LICENSE +0 -0
  503. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
@@ -9,6 +9,7 @@ from sqlalchemy import create_engine
9
9
 
10
10
  from datahub.configuration.common import AllowDenyPattern, ConfigurationError
11
11
  from datahub.ingestion.source.fivetran.config import (
12
+ DISABLE_COL_LINEAGE_FOR_CONNECTOR_TYPES,
12
13
  Constant,
13
14
  FivetranLogConfig,
14
15
  FivetranSourceReport,
@@ -54,7 +55,7 @@ class FivetranLogAPI:
54
55
  snowflake_destination_config.database,
55
56
  )
56
57
  )
57
- fivetran_log_query.set_db(
58
+ fivetran_log_query.set_schema(
58
59
  snowflake_destination_config.log_schema,
59
60
  )
60
61
  fivetran_log_database = snowflake_destination_config.database
@@ -66,8 +67,26 @@ class FivetranLogAPI:
66
67
  engine = create_engine(
67
68
  bigquery_destination_config.get_sql_alchemy_url(),
68
69
  )
69
- fivetran_log_query.set_db(bigquery_destination_config.dataset)
70
- fivetran_log_database = bigquery_destination_config.dataset
70
+ fivetran_log_query.set_schema(bigquery_destination_config.dataset)
71
+
72
+ # The "database" should be the BigQuery project name.
73
+ result = engine.execute("SELECT @@project_id").fetchone()
74
+ if result is None:
75
+ raise ValueError("Failed to retrieve BigQuery project ID")
76
+ fivetran_log_database = result[0]
77
+ elif destination_platform == "databricks":
78
+ databricks_destination_config = (
79
+ self.fivetran_log_config.databricks_destination_config
80
+ )
81
+ if databricks_destination_config is not None:
82
+ engine = create_engine(
83
+ databricks_destination_config.get_sql_alchemy_url(
84
+ databricks_destination_config.catalog
85
+ ),
86
+ **databricks_destination_config.get_options(),
87
+ )
88
+ fivetran_log_query.set_schema(databricks_destination_config.log_schema)
89
+ fivetran_log_database = databricks_destination_config.catalog
71
90
  else:
72
91
  raise ConfigurationError(
73
92
  f"Destination platform '{destination_platform}' is not yet supported."
@@ -94,7 +113,11 @@ class FivetranLogAPI:
94
113
  """
95
114
  Returns dict of column lineage metadata with key as (<SOURCE_TABLE_ID>, <DESTINATION_TABLE_ID>)
96
115
  """
97
- all_column_lineage = defaultdict(list)
116
+ all_column_lineage: Dict[Tuple[str, str], List] = defaultdict(list)
117
+
118
+ if not connector_ids:
119
+ return dict(all_column_lineage)
120
+
98
121
  column_lineage_result = self._query(
99
122
  self.fivetran_log_query.get_column_lineage_query(
100
123
  connector_ids=connector_ids
@@ -112,7 +135,11 @@ class FivetranLogAPI:
112
135
  """
113
136
  Returns dict of table lineage metadata with key as 'CONNECTOR_ID'
114
137
  """
115
- connectors_table_lineage_metadata = defaultdict(list)
138
+ connectors_table_lineage_metadata: Dict[str, List] = defaultdict(list)
139
+
140
+ if not connector_ids:
141
+ return dict(connectors_table_lineage_metadata)
142
+
116
143
  table_lineage_result = self._query(
117
144
  self.fivetran_log_query.get_table_lineage_query(connector_ids=connector_ids)
118
145
  )
@@ -190,7 +217,7 @@ class FivetranLogAPI:
190
217
  jobs: List[Job] = []
191
218
  if connector_sync_log is None:
192
219
  return jobs
193
- for sync_id in connector_sync_log.keys():
220
+ for sync_id in connector_sync_log:
194
221
  if len(connector_sync_log[sync_id]) != 2:
195
222
  # If both sync-start and sync-end event log not present for this sync that means sync is still in progress
196
223
  continue
@@ -228,9 +255,15 @@ class FivetranLogAPI:
228
255
  return self._get_users().get(user_id)
229
256
 
230
257
  def _fill_connectors_lineage(self, connectors: List[Connector]) -> None:
231
- connector_ids = [connector.connector_id for connector in connectors]
232
- table_lineage_metadata = self._get_table_lineage_metadata(connector_ids)
233
- column_lineage_metadata = self._get_column_lineage_metadata(connector_ids)
258
+ # Create 2 filtered connector_ids lists - one for table lineage and one for column lineage
259
+ tll_connector_ids: List[str] = []
260
+ cll_connector_ids: List[str] = []
261
+ for connector in connectors:
262
+ tll_connector_ids.append(connector.connector_id)
263
+ if connector.connector_type not in DISABLE_COL_LINEAGE_FOR_CONNECTOR_TYPES:
264
+ cll_connector_ids.append(connector.connector_id)
265
+ table_lineage_metadata = self._get_table_lineage_metadata(tll_connector_ids)
266
+ column_lineage_metadata = self._get_column_lineage_metadata(cll_connector_ids)
234
267
  for connector in connectors:
235
268
  connector.lineage = self._extract_connector_lineage(
236
269
  table_lineage_result=table_lineage_metadata.get(connector.connector_id),
@@ -6,34 +6,56 @@ MAX_COLUMN_LINEAGE_PER_CONNECTOR = 1000
6
6
  MAX_JOBS_PER_CONNECTOR = 500
7
7
 
8
8
 
9
+ """
10
+ ------------------------------------------------------------------------------------------------------------
11
+ Fivetran Platform Connector Handling
12
+ ------------------------------------------------------------------------------------------------------------
13
+ Current Query Change Log: August 2025 (See: https://fivetran.com/docs/changelog/2025/august-2025)
14
+
15
+ All queries have to be updated as per Fivetran Platform Connector release if any. We expect customers
16
+ and fivetran to keep platform connector configured for DataHub with auto sync enabled to get latest changes.
17
+
18
+ References:
19
+ - Fivetran Release Notes: https://fivetran.com/docs/changelog (Look for "Fivetran Platform Connector")
20
+ - Latest Platform Connector Schema: https://fivetran.com/docs/logs/fivetran-platform?erdModal=open
21
+ """
22
+
23
+
9
24
  class FivetranLogQuery:
10
25
  # Note: All queries are written in Snowflake SQL.
11
26
  # They will be transpiled to the target database's SQL dialect at runtime.
12
27
 
13
28
  def __init__(self) -> None:
14
29
  # Select query db clause
15
- self.db_clause: str = ""
16
-
17
- def set_db(self, db_name: str) -> None:
18
- self.db_clause = f"{db_name}."
30
+ self.schema_clause: str = ""
19
31
 
20
32
  def use_database(self, db_name: str) -> str:
21
33
  return f"use database {db_name}"
22
34
 
35
+ def set_schema(self, schema_name: str) -> None:
36
+ """
37
+ Using Snowflake quoted identifiers convention
38
+
39
+ Add double quotes around an identifier
40
+ Use two quotes to use the double quote character inside a quoted identifier
41
+ """
42
+ schema_name = schema_name.replace('"', '""')
43
+ self.schema_clause = f'"{schema_name}".'
44
+
23
45
  def get_connectors_query(self) -> str:
24
46
  return f"""\
25
47
  SELECT
26
- connector_id,
48
+ connection_id,
27
49
  connecting_user_id,
28
50
  connector_type_id,
29
- connector_name,
51
+ connection_name,
30
52
  paused,
31
53
  sync_frequency,
32
54
  destination_id
33
- FROM {self.db_clause}connector
55
+ FROM {self.schema_clause}connection
34
56
  WHERE
35
57
  _fivetran_deleted = FALSE
36
- QUALIFY ROW_NUMBER() OVER (PARTITION BY connector_id ORDER BY _fivetran_synced DESC) = 1
58
+ QUALIFY ROW_NUMBER() OVER (PARTITION BY connection_id ORDER BY _fivetran_synced DESC) = 1
37
59
  """
38
60
 
39
61
  def get_users_query(self) -> str:
@@ -42,7 +64,7 @@ SELECT id as user_id,
42
64
  given_name,
43
65
  family_name,
44
66
  email
45
- FROM {self.db_clause}user
67
+ FROM {self.schema_clause}user
46
68
  """
47
69
 
48
70
  def get_sync_logs_query(
@@ -56,20 +78,20 @@ FROM {self.db_clause}user
56
78
  return f"""\
57
79
  WITH ranked_syncs AS (
58
80
  SELECT
59
- connector_id,
81
+ connection_id,
60
82
  sync_id,
61
83
  MAX(CASE WHEN message_event = 'sync_start' THEN time_stamp END) as start_time,
62
84
  MAX(CASE WHEN message_event = 'sync_end' THEN time_stamp END) as end_time,
63
85
  MAX(CASE WHEN message_event = 'sync_end' THEN message_data END) as end_message_data,
64
- ROW_NUMBER() OVER (PARTITION BY connector_id ORDER BY MAX(time_stamp) DESC) as rn
65
- FROM {self.db_clause}log
86
+ ROW_NUMBER() OVER (PARTITION BY connection_id ORDER BY MAX(time_stamp) DESC) as rn
87
+ FROM {self.schema_clause}log
66
88
  WHERE message_event in ('sync_start', 'sync_end')
67
89
  AND time_stamp > CURRENT_TIMESTAMP - INTERVAL '{syncs_interval} days'
68
- AND connector_id IN ({formatted_connector_ids})
69
- GROUP BY connector_id, sync_id
90
+ AND connection_id IN ({formatted_connector_ids})
91
+ GROUP BY connection_id, sync_id
70
92
  )
71
93
  SELECT
72
- connector_id,
94
+ connection_id,
73
95
  sync_id,
74
96
  start_time,
75
97
  end_time,
@@ -78,7 +100,7 @@ FROM ranked_syncs
78
100
  WHERE rn <= {MAX_JOBS_PER_CONNECTOR}
79
101
  AND start_time IS NOT NULL
80
102
  AND end_time IS NOT NULL
81
- ORDER BY connector_id, end_time DESC
103
+ ORDER BY connection_id, end_time DESC
82
104
  """
83
105
 
84
106
  def get_table_lineage_query(self, connector_ids: List[str]) -> str:
@@ -90,7 +112,7 @@ SELECT
90
112
  *
91
113
  FROM (
92
114
  SELECT
93
- stm.connector_id as connector_id,
115
+ stm.connection_id as connection_id,
94
116
  stm.id as source_table_id,
95
117
  stm.name as source_table_name,
96
118
  ssm.name as source_schema_name,
@@ -98,18 +120,18 @@ FROM (
98
120
  dtm.name as destination_table_name,
99
121
  dsm.name as destination_schema_name,
100
122
  tl.created_at as created_at,
101
- ROW_NUMBER() OVER (PARTITION BY stm.connector_id, stm.id, dtm.id ORDER BY tl.created_at DESC) as table_combo_rn
102
- FROM {self.db_clause}table_lineage as tl
103
- JOIN {self.db_clause}source_table_metadata as stm on tl.source_table_id = stm.id
104
- JOIN {self.db_clause}destination_table_metadata as dtm on tl.destination_table_id = dtm.id
105
- JOIN {self.db_clause}source_schema_metadata as ssm on stm.schema_id = ssm.id
106
- JOIN {self.db_clause}destination_schema_metadata as dsm on dtm.schema_id = dsm.id
107
- WHERE stm.connector_id IN ({formatted_connector_ids})
123
+ ROW_NUMBER() OVER (PARTITION BY stm.connection_id, stm.id, dtm.id ORDER BY tl.created_at DESC) as table_combo_rn
124
+ FROM {self.schema_clause}table_lineage as tl
125
+ JOIN {self.schema_clause}source_table as stm on tl.source_table_id = stm.id -- stm: source_table_metadata
126
+ JOIN {self.schema_clause}destination_table as dtm on tl.destination_table_id = dtm.id -- dtm: destination_table_metadata
127
+ JOIN {self.schema_clause}source_schema as ssm on stm.schema_id = ssm.id -- ssm: source_schema_metadata
128
+ JOIN {self.schema_clause}destination_schema as dsm on dtm.schema_id = dsm.id -- dsm: destination_schema_metadata
129
+ WHERE stm.connection_id IN ({formatted_connector_ids})
108
130
  )
109
131
  -- Ensure that we only get back one entry per source and destination pair.
110
132
  WHERE table_combo_rn = 1
111
- QUALIFY ROW_NUMBER() OVER (PARTITION BY connector_id ORDER BY created_at DESC) <= {MAX_TABLE_LINEAGE_PER_CONNECTOR}
112
- ORDER BY connector_id, created_at DESC
133
+ QUALIFY ROW_NUMBER() OVER (PARTITION BY connection_id ORDER BY created_at DESC) <= {MAX_TABLE_LINEAGE_PER_CONNECTOR}
134
+ ORDER BY connection_id, created_at DESC
113
135
  """
114
136
 
115
137
  def get_column_lineage_query(self, connector_ids: List[str]) -> str:
@@ -124,25 +146,25 @@ SELECT
124
146
  destination_column_name
125
147
  FROM (
126
148
  SELECT
127
- stm.connector_id as connector_id,
149
+ stm.connection_id as connection_id,
128
150
  scm.table_id as source_table_id,
129
151
  dcm.table_id as destination_table_id,
130
152
  scm.name as source_column_name,
131
153
  dcm.name as destination_column_name,
132
154
  cl.created_at as created_at,
133
- ROW_NUMBER() OVER (PARTITION BY stm.connector_id, cl.source_column_id, cl.destination_column_id ORDER BY cl.created_at DESC) as column_combo_rn
134
- FROM {self.db_clause}column_lineage as cl
135
- JOIN {self.db_clause}source_column_metadata as scm
155
+ ROW_NUMBER() OVER (PARTITION BY stm.connection_id, cl.source_column_id, cl.destination_column_id ORDER BY cl.created_at DESC) as column_combo_rn
156
+ FROM {self.schema_clause}column_lineage as cl
157
+ JOIN {self.schema_clause}source_column as scm -- scm: source_column_metadata
136
158
  ON cl.source_column_id = scm.id
137
- JOIN {self.db_clause}destination_column_metadata as dcm
159
+ JOIN {self.schema_clause}destination_column as dcm -- dcm: destination_column_metadata
138
160
  ON cl.destination_column_id = dcm.id
139
- -- Only joining source_table_metadata to get the connector_id.
140
- JOIN {self.db_clause}source_table_metadata as stm
161
+ -- Only joining source_table to get the connection_id.
162
+ JOIN {self.schema_clause}source_table as stm -- stm: source_table_metadata
141
163
  ON scm.table_id = stm.id
142
- WHERE stm.connector_id IN ({formatted_connector_ids})
164
+ WHERE stm.connection_id IN ({formatted_connector_ids})
143
165
  )
144
166
  -- Ensure that we only get back one entry per (connector, source column, destination column) pair.
145
167
  WHERE column_combo_rn = 1
146
- QUALIFY ROW_NUMBER() OVER (PARTITION BY connector_id ORDER BY created_at DESC) <= {MAX_COLUMN_LINEAGE_PER_CONNECTOR}
147
- ORDER BY connector_id, created_at DESC
168
+ QUALIFY ROW_NUMBER() OVER (PARTITION BY connection_id ORDER BY created_at DESC) <= {MAX_COLUMN_LINEAGE_PER_CONNECTOR}
169
+ ORDER BY connection_id, created_at DESC
148
170
  """
@@ -0,0 +1,65 @@
1
+ import logging
2
+
3
+ import requests
4
+ from requests.adapters import HTTPAdapter
5
+ from urllib3.util import Retry
6
+
7
+ from datahub.ingestion.source.fivetran.config import (
8
+ FivetranAPIConfig,
9
+ )
10
+ from datahub.ingestion.source.fivetran.response_models import FivetranConnectionDetails
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+ # Retry configuration constants
15
+ RETRY_MAX_TIMES = 3
16
+ RETRY_STATUS_CODES = [429, 500, 502, 503, 504]
17
+ RETRY_BACKOFF_FACTOR = 1
18
+ RETRY_ALLOWED_METHODS = ["GET"]
19
+
20
+
21
+ class FivetranAPIClient:
22
+ """Client for interacting with the Fivetran REST API."""
23
+
24
+ def __init__(self, config: FivetranAPIConfig) -> None:
25
+ self.config = config
26
+ self._session = self._create_session()
27
+
28
+ def _create_session(self) -> requests.Session:
29
+ """
30
+ Create a session with retry logic and basic authentication
31
+ """
32
+ requests_session = requests.Session()
33
+
34
+ # Configure retry strategy for transient failures
35
+ retry_strategy = Retry(
36
+ total=RETRY_MAX_TIMES,
37
+ backoff_factor=RETRY_BACKOFF_FACTOR,
38
+ status_forcelist=RETRY_STATUS_CODES,
39
+ allowed_methods=RETRY_ALLOWED_METHODS,
40
+ raise_on_status=True,
41
+ )
42
+
43
+ adapter = HTTPAdapter(max_retries=retry_strategy)
44
+ requests_session.mount("http://", adapter)
45
+ requests_session.mount("https://", adapter)
46
+
47
+ # Set up basic authentication
48
+ requests_session.auth = (self.config.api_key, self.config.api_secret)
49
+ requests_session.headers.update(
50
+ {
51
+ "Content-Type": "application/json",
52
+ "Accept": "application/json",
53
+ }
54
+ )
55
+ return requests_session
56
+
57
+ def get_connection_details_by_id(
58
+ self, connection_id: str
59
+ ) -> FivetranConnectionDetails:
60
+ """Get details for a specific connection."""
61
+ connection_details = self._session.get(
62
+ f"{self.config.base_url}/v1/connections/{connection_id}",
63
+ timeout=self.config.request_timeout_sec,
64
+ )
65
+ return FivetranConnectionDetails(**connection_details.json().get("data", {}))
@@ -0,0 +1,97 @@
1
+ import datetime
2
+ from typing import Dict, List
3
+
4
+ from pydantic import BaseModel
5
+
6
+
7
+ class FivetranConnectionWarnings(BaseModel):
8
+ code: str # Warning Code
9
+ message: str # Warning Message
10
+ details: Dict # Warning Details
11
+
12
+
13
+ class FivetranConnectionStatus(BaseModel):
14
+ setup_state: str # Setup State
15
+ schema_status: str # Schema Status
16
+ sync_state: str # Sync State
17
+ update_state: str # Update State
18
+ is_historical_sync: bool # Is Historical Sync
19
+ warnings: List[FivetranConnectionWarnings] # Warnings
20
+
21
+
22
+ class FivetranConnectionConfig(BaseModel):
23
+ # Note: Connection Config is different for different connectors
24
+ auth_type: str # Auth Type
25
+ sheet_id: str # Sheet ID - URL to the Google Sheet
26
+ named_range: str # Named Range
27
+
28
+
29
+ class FivetranConnectionSourceSyncDetails(BaseModel):
30
+ last_synced: datetime.datetime # Last Synced
31
+
32
+
33
+ class FivetranConnectionDetails(BaseModel):
34
+ """
35
+ Note: This reponse class only captures fields that are relevant to the Google Sheets Connector
36
+ """
37
+
38
+ id: str # Source ID
39
+ group_id: str # Destination ID
40
+ service: str # Connector Type
41
+ created_at: datetime.datetime
42
+ succeeded_at: datetime.datetime
43
+ paused: bool # Paused Status
44
+ sync_frequency: int # Sync Frequency (minutes)
45
+ status: FivetranConnectionStatus # Status
46
+ config: FivetranConnectionConfig # Connection Config
47
+ source_sync_details: FivetranConnectionSourceSyncDetails # Source Sync Details
48
+
49
+ """
50
+ # Sample Response for Google Sheets Connector
51
+ {
52
+ "code": "Success",
53
+ "data": {
54
+ "id": "dialectical_remindful",
55
+ "group_id": "empties_classification",
56
+ "service": "google_sheets",
57
+ "service_version": 1,
58
+ "schema": "fivetran_google_sheets.fivetran_google_sheets",
59
+ "connected_by": "sewn_restrained",
60
+ "created_at": "2025-10-06T17:53:01.554289Z",
61
+ "succeeded_at": "2025-10-06T22:55:45.275000Z",
62
+ "failed_at": null,
63
+ "paused": true,
64
+ "pause_after_trial": false,
65
+ "sync_frequency": 360,
66
+ "data_delay_threshold": 0,
67
+ "data_delay_sensitivity": "NORMAL",
68
+ "private_link_id": null,
69
+ "networking_method": "Directly",
70
+ "proxy_agent_id": null,
71
+ "schedule_type": "auto",
72
+ "status": {
73
+ "setup_state": "connected",
74
+ "schema_status": "ready",
75
+ "sync_state": "paused",
76
+ "update_state": "on_schedule",
77
+ "is_historical_sync": false,
78
+ "tasks": [],
79
+ "warnings": [
80
+ {
81
+ "code": "snowflake_discontinuing_password_auth",
82
+ "message": "Snowflake is discontinuing username/password authentication",
83
+ "details": {}
84
+ }
85
+ ]
86
+ },
87
+ "config": {
88
+ "auth_type": "ServiceAccount",
89
+ "sheet_id": "https://docs.google.com/spreadsheets/d/1A82PdLAE7NXLLb5JcLPKeIpKUMytXQba5Z-Ei-mbXLo/edit?gid=0#gid=0",
90
+ "named_range": "Fivetran_Test_Range"
91
+ },
92
+ "source_sync_details": {
93
+ "last_synced": "2025-10-06T22:55:27.371Z"
94
+ }
95
+ }
96
+ }
97
+ """
@@ -34,7 +34,6 @@ from datahub.ingestion.source.gc.soft_deleted_entity_cleanup import (
34
34
  SoftDeletedEntitiesCleanupConfig,
35
35
  SoftDeletedEntitiesReport,
36
36
  )
37
- from datahub.ingestion.source_report.ingestion_stage import IngestionStageReport
38
37
 
39
38
  logger = logging.getLogger(__name__)
40
39
 
@@ -87,7 +86,6 @@ class DataHubGcSourceReport(
87
86
  DataProcessCleanupReport,
88
87
  SoftDeletedEntitiesReport,
89
88
  DatahubExecutionRequestCleanupReport,
90
- IngestionStageReport,
91
89
  ):
92
90
  expired_tokens_revoked: int = 0
93
91