acryl-datahub 1.0.0rc18__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (503) hide show
  1. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2686 -2563
  2. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +499 -392
  3. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +1 -1
  4. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +7 -1
  5. datahub/_version.py +1 -1
  6. datahub/api/circuit_breaker/operation_circuit_breaker.py +2 -2
  7. datahub/api/entities/assertion/assertion.py +1 -1
  8. datahub/api/entities/common/serialized_value.py +1 -1
  9. datahub/api/entities/corpgroup/corpgroup.py +1 -1
  10. datahub/api/entities/datacontract/datacontract.py +35 -3
  11. datahub/api/entities/datajob/dataflow.py +18 -3
  12. datahub/api/entities/datajob/datajob.py +24 -4
  13. datahub/api/entities/dataprocess/dataprocess_instance.py +4 -0
  14. datahub/api/entities/dataproduct/dataproduct.py +32 -3
  15. datahub/api/entities/dataset/dataset.py +47 -72
  16. datahub/api/entities/external/__init__.py +0 -0
  17. datahub/api/entities/external/external_entities.py +724 -0
  18. datahub/api/entities/external/external_tag.py +147 -0
  19. datahub/api/entities/external/lake_formation_external_entites.py +162 -0
  20. datahub/api/entities/external/restricted_text.py +172 -0
  21. datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
  22. datahub/api/entities/forms/forms.py +37 -37
  23. datahub/api/entities/structuredproperties/structuredproperties.py +6 -6
  24. datahub/api/graphql/assertion.py +1 -1
  25. datahub/api/graphql/base.py +8 -6
  26. datahub/api/graphql/operation.py +14 -10
  27. datahub/cli/check_cli.py +91 -9
  28. datahub/cli/cli_utils.py +63 -0
  29. datahub/cli/config_utils.py +20 -12
  30. datahub/cli/container_cli.py +5 -0
  31. datahub/cli/delete_cli.py +133 -34
  32. datahub/cli/docker_check.py +110 -14
  33. datahub/cli/docker_cli.py +155 -231
  34. datahub/cli/exists_cli.py +2 -3
  35. datahub/cli/get_cli.py +2 -3
  36. datahub/cli/graphql_cli.py +1422 -0
  37. datahub/cli/iceberg_cli.py +11 -5
  38. datahub/cli/ingest_cli.py +25 -26
  39. datahub/cli/migrate.py +12 -9
  40. datahub/cli/migration_utils.py +4 -3
  41. datahub/cli/put_cli.py +4 -6
  42. datahub/cli/quickstart_versioning.py +53 -10
  43. datahub/cli/specific/assertions_cli.py +39 -7
  44. datahub/cli/specific/datacontract_cli.py +57 -9
  45. datahub/cli/specific/dataproduct_cli.py +12 -24
  46. datahub/cli/specific/dataset_cli.py +31 -21
  47. datahub/cli/specific/forms_cli.py +2 -5
  48. datahub/cli/specific/group_cli.py +2 -3
  49. datahub/cli/specific/structuredproperties_cli.py +5 -7
  50. datahub/cli/specific/user_cli.py +174 -4
  51. datahub/cli/state_cli.py +2 -3
  52. datahub/cli/timeline_cli.py +2 -3
  53. datahub/configuration/common.py +46 -2
  54. datahub/configuration/connection_resolver.py +5 -2
  55. datahub/configuration/env_vars.py +331 -0
  56. datahub/configuration/import_resolver.py +7 -4
  57. datahub/configuration/kafka.py +21 -1
  58. datahub/configuration/pydantic_migration_helpers.py +6 -13
  59. datahub/configuration/source_common.py +4 -3
  60. datahub/configuration/validate_field_deprecation.py +5 -2
  61. datahub/configuration/validate_field_removal.py +8 -2
  62. datahub/configuration/validate_field_rename.py +6 -5
  63. datahub/configuration/validate_multiline_string.py +5 -2
  64. datahub/emitter/mce_builder.py +12 -8
  65. datahub/emitter/mcp.py +20 -5
  66. datahub/emitter/mcp_builder.py +12 -0
  67. datahub/emitter/request_helper.py +138 -15
  68. datahub/emitter/response_helper.py +111 -19
  69. datahub/emitter/rest_emitter.py +399 -163
  70. datahub/entrypoints.py +10 -5
  71. datahub/errors.py +12 -0
  72. datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +299 -2
  73. datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
  74. datahub/ingestion/api/common.py +9 -0
  75. datahub/ingestion/api/decorators.py +15 -3
  76. datahub/ingestion/api/report.py +381 -3
  77. datahub/ingestion/api/sink.py +27 -2
  78. datahub/ingestion/api/source.py +174 -62
  79. datahub/ingestion/api/source_helpers.py +41 -3
  80. datahub/ingestion/api/source_protocols.py +23 -0
  81. datahub/ingestion/autogenerated/__init__.py +0 -0
  82. datahub/ingestion/autogenerated/capability_summary.json +3652 -0
  83. datahub/ingestion/autogenerated/lineage.json +402 -0
  84. datahub/ingestion/autogenerated/lineage_helper.py +177 -0
  85. datahub/ingestion/extractor/schema_util.py +31 -5
  86. datahub/ingestion/glossary/classification_mixin.py +9 -2
  87. datahub/ingestion/graph/client.py +492 -55
  88. datahub/ingestion/graph/config.py +18 -2
  89. datahub/ingestion/graph/filters.py +96 -32
  90. datahub/ingestion/graph/links.py +55 -0
  91. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
  92. datahub/ingestion/run/pipeline.py +90 -23
  93. datahub/ingestion/run/pipeline_config.py +3 -3
  94. datahub/ingestion/sink/datahub_kafka.py +1 -0
  95. datahub/ingestion/sink/datahub_rest.py +31 -23
  96. datahub/ingestion/sink/file.py +1 -0
  97. datahub/ingestion/source/abs/config.py +1 -1
  98. datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
  99. datahub/ingestion/source/abs/source.py +15 -30
  100. datahub/ingestion/source/apply/datahub_apply.py +6 -5
  101. datahub/ingestion/source/aws/aws_common.py +185 -13
  102. datahub/ingestion/source/aws/glue.py +517 -244
  103. datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
  104. datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
  105. datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
  106. datahub/ingestion/source/aws/sagemaker_processors/models.py +4 -4
  107. datahub/ingestion/source/aws/tag_entities.py +270 -0
  108. datahub/ingestion/source/azure/azure_common.py +3 -3
  109. datahub/ingestion/source/bigquery_v2/bigquery.py +51 -7
  110. datahub/ingestion/source/bigquery_v2/bigquery_config.py +51 -81
  111. datahub/ingestion/source/bigquery_v2/bigquery_connection.py +81 -0
  112. datahub/ingestion/source/bigquery_v2/bigquery_queries.py +6 -1
  113. datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
  114. datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
  115. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
  116. datahub/ingestion/source/bigquery_v2/common.py +1 -1
  117. datahub/ingestion/source/bigquery_v2/lineage.py +1 -1
  118. datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
  119. datahub/ingestion/source/bigquery_v2/queries.py +3 -3
  120. datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
  121. datahub/ingestion/source/cassandra/cassandra.py +7 -18
  122. datahub/ingestion/source/cassandra/cassandra_api.py +36 -0
  123. datahub/ingestion/source/cassandra/cassandra_config.py +20 -0
  124. datahub/ingestion/source/cassandra/cassandra_profiling.py +26 -24
  125. datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
  126. datahub/ingestion/source/common/data_platforms.py +23 -0
  127. datahub/ingestion/source/common/gcp_credentials_config.py +9 -1
  128. datahub/ingestion/source/common/subtypes.py +73 -1
  129. datahub/ingestion/source/data_lake_common/data_lake_utils.py +59 -10
  130. datahub/ingestion/source/data_lake_common/object_store.py +732 -0
  131. datahub/ingestion/source/data_lake_common/path_spec.py +87 -38
  132. datahub/ingestion/source/datahub/config.py +19 -5
  133. datahub/ingestion/source/datahub/datahub_database_reader.py +205 -36
  134. datahub/ingestion/source/datahub/datahub_source.py +11 -1
  135. datahub/ingestion/source/dbt/dbt_cloud.py +17 -10
  136. datahub/ingestion/source/dbt/dbt_common.py +270 -26
  137. datahub/ingestion/source/dbt/dbt_core.py +88 -47
  138. datahub/ingestion/source/dbt/dbt_tests.py +8 -6
  139. datahub/ingestion/source/debug/__init__.py +0 -0
  140. datahub/ingestion/source/debug/datahub_debug.py +300 -0
  141. datahub/ingestion/source/delta_lake/config.py +9 -5
  142. datahub/ingestion/source/delta_lake/source.py +8 -0
  143. datahub/ingestion/source/dremio/dremio_api.py +114 -73
  144. datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
  145. datahub/ingestion/source/dremio/dremio_config.py +5 -4
  146. datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +1 -1
  147. datahub/ingestion/source/dremio/dremio_entities.py +6 -5
  148. datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
  149. datahub/ingestion/source/dremio/dremio_source.py +228 -215
  150. datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
  151. datahub/ingestion/source/dynamodb/dynamodb.py +19 -13
  152. datahub/ingestion/source/excel/__init__.py +0 -0
  153. datahub/ingestion/source/excel/config.py +92 -0
  154. datahub/ingestion/source/excel/excel_file.py +539 -0
  155. datahub/ingestion/source/excel/profiling.py +308 -0
  156. datahub/ingestion/source/excel/report.py +49 -0
  157. datahub/ingestion/source/excel/source.py +662 -0
  158. datahub/ingestion/source/excel/util.py +18 -0
  159. datahub/ingestion/source/feast.py +12 -14
  160. datahub/ingestion/source/file.py +3 -0
  161. datahub/ingestion/source/fivetran/config.py +67 -8
  162. datahub/ingestion/source/fivetran/fivetran.py +228 -43
  163. datahub/ingestion/source/fivetran/fivetran_log_api.py +42 -9
  164. datahub/ingestion/source/fivetran/fivetran_query.py +58 -36
  165. datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
  166. datahub/ingestion/source/fivetran/response_models.py +97 -0
  167. datahub/ingestion/source/gc/datahub_gc.py +0 -2
  168. datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +101 -104
  169. datahub/ingestion/source/gcs/gcs_source.py +53 -10
  170. datahub/ingestion/source/gcs/gcs_utils.py +36 -9
  171. datahub/ingestion/source/ge_data_profiler.py +146 -33
  172. datahub/ingestion/source/ge_profiling_config.py +26 -11
  173. datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
  174. datahub/ingestion/source/grafana/field_utils.py +307 -0
  175. datahub/ingestion/source/grafana/grafana_api.py +142 -0
  176. datahub/ingestion/source/grafana/grafana_config.py +104 -0
  177. datahub/ingestion/source/grafana/grafana_source.py +522 -84
  178. datahub/ingestion/source/grafana/lineage.py +202 -0
  179. datahub/ingestion/source/grafana/models.py +137 -0
  180. datahub/ingestion/source/grafana/report.py +90 -0
  181. datahub/ingestion/source/grafana/types.py +16 -0
  182. datahub/ingestion/source/hex/__init__.py +0 -0
  183. datahub/ingestion/source/hex/api.py +402 -0
  184. datahub/ingestion/source/hex/constants.py +8 -0
  185. datahub/ingestion/source/hex/hex.py +311 -0
  186. datahub/ingestion/source/hex/mapper.py +412 -0
  187. datahub/ingestion/source/hex/model.py +78 -0
  188. datahub/ingestion/source/hex/query_fetcher.py +307 -0
  189. datahub/ingestion/source/iceberg/iceberg.py +385 -164
  190. datahub/ingestion/source/iceberg/iceberg_common.py +2 -2
  191. datahub/ingestion/source/iceberg/iceberg_profiler.py +25 -20
  192. datahub/ingestion/source/identity/azure_ad.py +1 -1
  193. datahub/ingestion/source/identity/okta.py +1 -14
  194. datahub/ingestion/source/kafka/kafka.py +28 -71
  195. datahub/ingestion/source/kafka/kafka_config.py +78 -0
  196. datahub/ingestion/source/kafka_connect/common.py +2 -2
  197. datahub/ingestion/source/kafka_connect/sink_connectors.py +157 -48
  198. datahub/ingestion/source/kafka_connect/source_connectors.py +63 -5
  199. datahub/ingestion/source/ldap.py +1 -1
  200. datahub/ingestion/source/looker/looker_common.py +216 -86
  201. datahub/ingestion/source/looker/looker_config.py +15 -4
  202. datahub/ingestion/source/looker/looker_constant.py +4 -0
  203. datahub/ingestion/source/looker/looker_lib_wrapper.py +37 -4
  204. datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
  205. datahub/ingestion/source/looker/looker_source.py +539 -555
  206. datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
  207. datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
  208. datahub/ingestion/source/looker/lookml_config.py +31 -3
  209. datahub/ingestion/source/looker/lookml_refinement.py +1 -1
  210. datahub/ingestion/source/looker/lookml_source.py +103 -118
  211. datahub/ingestion/source/looker/view_upstream.py +494 -1
  212. datahub/ingestion/source/metabase.py +32 -6
  213. datahub/ingestion/source/metadata/business_glossary.py +7 -7
  214. datahub/ingestion/source/metadata/lineage.py +11 -10
  215. datahub/ingestion/source/mlflow.py +254 -23
  216. datahub/ingestion/source/mock_data/__init__.py +0 -0
  217. datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
  218. datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
  219. datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
  220. datahub/ingestion/source/mode.py +359 -181
  221. datahub/ingestion/source/mongodb.py +11 -1
  222. datahub/ingestion/source/neo4j/neo4j_source.py +122 -153
  223. datahub/ingestion/source/nifi.py +5 -5
  224. datahub/ingestion/source/openapi.py +85 -38
  225. datahub/ingestion/source/openapi_parser.py +59 -40
  226. datahub/ingestion/source/powerbi/config.py +92 -27
  227. datahub/ingestion/source/powerbi/m_query/data_classes.py +3 -0
  228. datahub/ingestion/source/powerbi/m_query/odbc.py +185 -0
  229. datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
  230. datahub/ingestion/source/powerbi/m_query/pattern_handler.py +358 -14
  231. datahub/ingestion/source/powerbi/m_query/resolver.py +10 -0
  232. datahub/ingestion/source/powerbi/powerbi.py +66 -32
  233. datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -2
  234. datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +11 -12
  235. datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
  236. datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
  237. datahub/ingestion/source/preset.py +3 -3
  238. datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
  239. datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
  240. datahub/ingestion/source/redash.py +1 -1
  241. datahub/ingestion/source/redshift/config.py +15 -9
  242. datahub/ingestion/source/redshift/datashares.py +1 -1
  243. datahub/ingestion/source/redshift/lineage.py +386 -687
  244. datahub/ingestion/source/redshift/profile.py +2 -2
  245. datahub/ingestion/source/redshift/query.py +24 -20
  246. datahub/ingestion/source/redshift/redshift.py +52 -111
  247. datahub/ingestion/source/redshift/redshift_schema.py +17 -12
  248. datahub/ingestion/source/redshift/report.py +0 -2
  249. datahub/ingestion/source/redshift/usage.py +13 -11
  250. datahub/ingestion/source/s3/report.py +4 -2
  251. datahub/ingestion/source/s3/source.py +515 -244
  252. datahub/ingestion/source/sac/sac.py +3 -1
  253. datahub/ingestion/source/salesforce.py +28 -13
  254. datahub/ingestion/source/schema/json_schema.py +14 -14
  255. datahub/ingestion/source/schema_inference/object.py +22 -6
  256. datahub/ingestion/source/sigma/config.py +75 -8
  257. datahub/ingestion/source/sigma/data_classes.py +3 -0
  258. datahub/ingestion/source/sigma/sigma.py +36 -7
  259. datahub/ingestion/source/sigma/sigma_api.py +99 -58
  260. datahub/ingestion/source/slack/slack.py +403 -140
  261. datahub/ingestion/source/snaplogic/__init__.py +0 -0
  262. datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
  263. datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
  264. datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
  265. datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
  266. datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
  267. datahub/ingestion/source/snowflake/constants.py +4 -0
  268. datahub/ingestion/source/snowflake/snowflake_config.py +103 -34
  269. datahub/ingestion/source/snowflake/snowflake_connection.py +47 -25
  270. datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +25 -6
  271. datahub/ingestion/source/snowflake/snowflake_profiler.py +1 -6
  272. datahub/ingestion/source/snowflake/snowflake_queries.py +511 -107
  273. datahub/ingestion/source/snowflake/snowflake_query.py +100 -72
  274. datahub/ingestion/source/snowflake/snowflake_report.py +4 -2
  275. datahub/ingestion/source/snowflake/snowflake_schema.py +381 -16
  276. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +163 -52
  277. datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
  278. datahub/ingestion/source/snowflake/snowflake_tag.py +4 -1
  279. datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
  280. datahub/ingestion/source/snowflake/snowflake_utils.py +62 -17
  281. datahub/ingestion/source/snowflake/snowflake_v2.py +56 -10
  282. datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
  283. datahub/ingestion/source/sql/athena.py +219 -26
  284. datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
  285. datahub/ingestion/source/sql/clickhouse.py +29 -9
  286. datahub/ingestion/source/sql/cockroachdb.py +5 -4
  287. datahub/ingestion/source/sql/druid.py +9 -4
  288. datahub/ingestion/source/sql/hana.py +3 -1
  289. datahub/ingestion/source/sql/hive.py +28 -8
  290. datahub/ingestion/source/sql/hive_metastore.py +24 -25
  291. datahub/ingestion/source/sql/mariadb.py +0 -1
  292. datahub/ingestion/source/sql/mssql/job_models.py +18 -2
  293. datahub/ingestion/source/sql/mssql/source.py +376 -62
  294. datahub/ingestion/source/sql/mysql.py +154 -4
  295. datahub/ingestion/source/sql/oracle.py +62 -11
  296. datahub/ingestion/source/sql/postgres.py +142 -6
  297. datahub/ingestion/source/sql/presto.py +20 -2
  298. datahub/ingestion/source/sql/sql_common.py +281 -49
  299. datahub/ingestion/source/sql/sql_config.py +1 -34
  300. datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
  301. datahub/ingestion/source/sql/sql_types.py +27 -2
  302. datahub/ingestion/source/sql/sqlalchemy_uri.py +68 -0
  303. datahub/ingestion/source/sql/stored_procedures/__init__.py +0 -0
  304. datahub/ingestion/source/sql/stored_procedures/base.py +253 -0
  305. datahub/ingestion/source/sql/{mssql/stored_procedure_lineage.py → stored_procedures/lineage.py} +2 -29
  306. datahub/ingestion/source/sql/teradata.py +1028 -245
  307. datahub/ingestion/source/sql/trino.py +43 -10
  308. datahub/ingestion/source/sql/two_tier_sql_source.py +3 -4
  309. datahub/ingestion/source/sql/vertica.py +14 -7
  310. datahub/ingestion/source/sql_queries.py +219 -121
  311. datahub/ingestion/source/state/checkpoint.py +8 -29
  312. datahub/ingestion/source/state/entity_removal_state.py +5 -2
  313. datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
  314. datahub/ingestion/source/state/stale_entity_removal_handler.py +0 -1
  315. datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
  316. datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +2 -1
  317. datahub/ingestion/source/superset.py +810 -126
  318. datahub/ingestion/source/tableau/tableau.py +172 -69
  319. datahub/ingestion/source/tableau/tableau_common.py +11 -4
  320. datahub/ingestion/source/tableau/tableau_constant.py +1 -4
  321. datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
  322. datahub/ingestion/source/tableau/tableau_validation.py +1 -1
  323. datahub/ingestion/source/unity/config.py +161 -40
  324. datahub/ingestion/source/unity/connection.py +61 -0
  325. datahub/ingestion/source/unity/connection_test.py +1 -0
  326. datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
  327. datahub/ingestion/source/unity/proxy.py +794 -51
  328. datahub/ingestion/source/unity/proxy_patch.py +321 -0
  329. datahub/ingestion/source/unity/proxy_types.py +36 -2
  330. datahub/ingestion/source/unity/report.py +15 -3
  331. datahub/ingestion/source/unity/source.py +465 -131
  332. datahub/ingestion/source/unity/tag_entities.py +197 -0
  333. datahub/ingestion/source/unity/usage.py +46 -4
  334. datahub/ingestion/source/usage/clickhouse_usage.py +11 -4
  335. datahub/ingestion/source/usage/starburst_trino_usage.py +10 -5
  336. datahub/ingestion/source/usage/usage_common.py +4 -68
  337. datahub/ingestion/source/vertexai/__init__.py +0 -0
  338. datahub/ingestion/source/vertexai/vertexai.py +1367 -0
  339. datahub/ingestion/source/vertexai/vertexai_config.py +29 -0
  340. datahub/ingestion/source/vertexai/vertexai_result_type_utils.py +89 -0
  341. datahub/ingestion/source_config/pulsar.py +3 -1
  342. datahub/ingestion/source_report/ingestion_stage.py +50 -11
  343. datahub/ingestion/transformer/add_dataset_dataproduct.py +1 -1
  344. datahub/ingestion/transformer/add_dataset_ownership.py +19 -3
  345. datahub/ingestion/transformer/base_transformer.py +8 -5
  346. datahub/ingestion/transformer/dataset_domain.py +1 -1
  347. datahub/ingestion/transformer/set_browse_path.py +112 -0
  348. datahub/integrations/assertion/common.py +3 -2
  349. datahub/integrations/assertion/snowflake/compiler.py +4 -3
  350. datahub/lite/lite_util.py +2 -2
  351. datahub/metadata/{_schema_classes.py → _internal_schema_classes.py} +3095 -631
  352. datahub/metadata/_urns/urn_defs.py +1866 -1582
  353. datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
  354. datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
  355. datahub/metadata/com/linkedin/pegasus2avro/dataplatform/slack/__init__.py +15 -0
  356. datahub/metadata/com/linkedin/pegasus2avro/event/__init__.py +11 -0
  357. datahub/metadata/com/linkedin/pegasus2avro/event/notification/__init__.py +15 -0
  358. datahub/metadata/com/linkedin/pegasus2avro/event/notification/settings/__init__.py +19 -0
  359. datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
  360. datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
  361. datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
  362. datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +8 -0
  363. datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
  364. datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
  365. datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
  366. datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
  367. datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
  368. datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
  369. datahub/metadata/schema.avsc +18404 -16617
  370. datahub/metadata/schema_classes.py +3 -3
  371. datahub/metadata/schemas/Actors.avsc +38 -1
  372. datahub/metadata/schemas/ApplicationKey.avsc +31 -0
  373. datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
  374. datahub/metadata/schemas/Applications.avsc +38 -0
  375. datahub/metadata/schemas/AssetSettings.avsc +63 -0
  376. datahub/metadata/schemas/ChartInfo.avsc +2 -1
  377. datahub/metadata/schemas/ChartKey.avsc +1 -0
  378. datahub/metadata/schemas/ContainerKey.avsc +1 -0
  379. datahub/metadata/schemas/ContainerProperties.avsc +8 -0
  380. datahub/metadata/schemas/CorpUserEditableInfo.avsc +15 -1
  381. datahub/metadata/schemas/CorpUserKey.avsc +2 -1
  382. datahub/metadata/schemas/CorpUserSettings.avsc +145 -0
  383. datahub/metadata/schemas/DashboardKey.avsc +1 -0
  384. datahub/metadata/schemas/DataContractKey.avsc +2 -1
  385. datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
  386. datahub/metadata/schemas/DataFlowKey.avsc +1 -0
  387. datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
  388. datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
  389. datahub/metadata/schemas/DataHubIngestionSourceKey.avsc +2 -1
  390. datahub/metadata/schemas/DataHubOpenAPISchemaKey.avsc +22 -0
  391. datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
  392. datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
  393. datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
  394. datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
  395. datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
  396. datahub/metadata/schemas/DataJobInfo.avsc +8 -0
  397. datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
  398. datahub/metadata/schemas/DataJobKey.avsc +1 -0
  399. datahub/metadata/schemas/DataProcessInstanceInput.avsc +2 -1
  400. datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -1
  401. datahub/metadata/schemas/DataProcessKey.avsc +8 -0
  402. datahub/metadata/schemas/DataProductKey.avsc +3 -1
  403. datahub/metadata/schemas/DataProductProperties.avsc +1 -1
  404. datahub/metadata/schemas/DataTransformLogic.avsc +4 -2
  405. datahub/metadata/schemas/DatasetKey.avsc +11 -1
  406. datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
  407. datahub/metadata/schemas/Deprecation.avsc +2 -0
  408. datahub/metadata/schemas/DomainKey.avsc +2 -1
  409. datahub/metadata/schemas/ExecutionRequestInput.avsc +5 -0
  410. datahub/metadata/schemas/FormInfo.avsc +5 -0
  411. datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
  412. datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
  413. datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
  414. datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
  415. datahub/metadata/schemas/IncidentInfo.avsc +3 -3
  416. datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
  417. datahub/metadata/schemas/LogicalParent.avsc +145 -0
  418. datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
  419. datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
  420. datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
  421. datahub/metadata/schemas/MLModelDeploymentProperties.avsc +3 -0
  422. datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
  423. datahub/metadata/schemas/MLModelGroupProperties.avsc +16 -0
  424. datahub/metadata/schemas/MLModelKey.avsc +9 -0
  425. datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
  426. datahub/metadata/schemas/MetadataChangeEvent.avsc +189 -47
  427. datahub/metadata/schemas/MetadataChangeLog.avsc +65 -44
  428. datahub/metadata/schemas/MetadataChangeProposal.avsc +64 -0
  429. datahub/metadata/schemas/NotebookKey.avsc +1 -0
  430. datahub/metadata/schemas/Operation.avsc +21 -2
  431. datahub/metadata/schemas/Ownership.avsc +69 -0
  432. datahub/metadata/schemas/QueryProperties.avsc +24 -2
  433. datahub/metadata/schemas/QuerySubjects.avsc +1 -12
  434. datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
  435. datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
  436. datahub/metadata/schemas/Siblings.avsc +2 -0
  437. datahub/metadata/schemas/SlackUserInfo.avsc +160 -0
  438. datahub/metadata/schemas/StructuredProperties.avsc +69 -0
  439. datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
  440. datahub/metadata/schemas/SystemMetadata.avsc +147 -0
  441. datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
  442. datahub/metadata/schemas/__init__.py +3 -3
  443. datahub/sdk/__init__.py +7 -0
  444. datahub/sdk/_all_entities.py +15 -0
  445. datahub/sdk/_shared.py +393 -10
  446. datahub/sdk/_utils.py +4 -0
  447. datahub/sdk/chart.py +386 -0
  448. datahub/sdk/container.py +7 -0
  449. datahub/sdk/dashboard.py +453 -0
  450. datahub/sdk/dataflow.py +309 -0
  451. datahub/sdk/datajob.py +367 -0
  452. datahub/sdk/dataset.py +180 -4
  453. datahub/sdk/entity.py +99 -3
  454. datahub/sdk/entity_client.py +154 -12
  455. datahub/sdk/lineage_client.py +943 -0
  456. datahub/sdk/main_client.py +83 -8
  457. datahub/sdk/mlmodel.py +383 -0
  458. datahub/sdk/mlmodelgroup.py +240 -0
  459. datahub/sdk/search_client.py +85 -8
  460. datahub/sdk/search_filters.py +393 -68
  461. datahub/secret/datahub_secret_store.py +5 -1
  462. datahub/secret/environment_secret_store.py +29 -0
  463. datahub/secret/file_secret_store.py +49 -0
  464. datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
  465. datahub/specific/aspect_helpers/siblings.py +73 -0
  466. datahub/specific/aspect_helpers/structured_properties.py +27 -0
  467. datahub/specific/chart.py +1 -1
  468. datahub/specific/datajob.py +15 -1
  469. datahub/specific/dataproduct.py +4 -0
  470. datahub/specific/dataset.py +51 -59
  471. datahub/sql_parsing/_sqlglot_patch.py +1 -2
  472. datahub/sql_parsing/fingerprint_utils.py +6 -0
  473. datahub/sql_parsing/split_statements.py +30 -3
  474. datahub/sql_parsing/sql_parsing_aggregator.py +144 -63
  475. datahub/sql_parsing/sqlglot_lineage.py +517 -44
  476. datahub/sql_parsing/sqlglot_utils.py +30 -18
  477. datahub/sql_parsing/tool_meta_extractor.py +25 -2
  478. datahub/telemetry/telemetry.py +30 -16
  479. datahub/testing/check_imports.py +1 -1
  480. datahub/testing/docker_utils.py +8 -2
  481. datahub/testing/mce_helpers.py +421 -0
  482. datahub/testing/mcp_diff.py +17 -21
  483. datahub/testing/sdk_v2_helpers.py +18 -0
  484. datahub/upgrade/upgrade.py +86 -30
  485. datahub/utilities/file_backed_collections.py +14 -15
  486. datahub/utilities/hive_schema_to_avro.py +2 -2
  487. datahub/utilities/ingest_utils.py +2 -2
  488. datahub/utilities/is_pytest.py +3 -2
  489. datahub/utilities/logging_manager.py +30 -7
  490. datahub/utilities/mapping.py +29 -2
  491. datahub/utilities/sample_data.py +5 -4
  492. datahub/utilities/server_config_util.py +298 -10
  493. datahub/utilities/sqlalchemy_query_combiner.py +6 -4
  494. datahub/utilities/stats_collections.py +4 -0
  495. datahub/utilities/threaded_iterator_executor.py +16 -3
  496. datahub/utilities/urn_encoder.py +1 -1
  497. datahub/utilities/urns/urn.py +41 -2
  498. datahub/emitter/sql_parsing_builder.py +0 -306
  499. datahub/ingestion/source/redshift/lineage_v2.py +0 -458
  500. datahub/ingestion/source/vertexai.py +0 -697
  501. datahub/ingestion/transformer/system_metadata_transformer.py +0 -45
  502. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info/licenses}/LICENSE +0 -0
  503. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,76 @@
1
+ from abc import abstractmethod
2
+ from typing import List, Tuple
3
+
4
+ from typing_extensions import Self
5
+
6
+ from datahub.emitter.mcp_patch_builder import MetadataPatchProposal, PatchPath
7
+ from datahub.metadata.schema_classes import (
8
+ FineGrainedLineageClass as FineGrainedLineage,
9
+ )
10
+
11
+
12
+ class HasFineGrainedLineagePatch(MetadataPatchProposal):
13
+ @abstractmethod
14
+ def _fine_grained_lineage_location(self) -> Tuple[str, PatchPath]:
15
+ """Return the aspect name where fine-grained lineage is stored."""
16
+ raise NotImplementedError("Subclasses must implement this method.")
17
+
18
+ @staticmethod
19
+ def _get_fine_grained_key(
20
+ fine_grained_lineage: FineGrainedLineage,
21
+ ) -> Tuple[str, str, str]:
22
+ downstreams = fine_grained_lineage.downstreams or []
23
+ if len(downstreams) != 1:
24
+ raise TypeError("Cannot patch with more or less than one downstream.")
25
+ transform_op = fine_grained_lineage.transformOperation or "NONE"
26
+ downstream_urn = downstreams[0]
27
+ query_id = fine_grained_lineage.query or "NONE"
28
+ return transform_op, downstream_urn, query_id
29
+
30
+ def add_fine_grained_lineage(
31
+ self, fine_grained_lineage: FineGrainedLineage
32
+ ) -> Self:
33
+ aspect_name, path = self._fine_grained_lineage_location()
34
+ (
35
+ transform_op,
36
+ downstream_urn,
37
+ query_id,
38
+ ) = self._get_fine_grained_key(fine_grained_lineage)
39
+ for upstream_urn in fine_grained_lineage.upstreams or []:
40
+ self._add_patch(
41
+ aspect_name,
42
+ "add",
43
+ path=(*path, transform_op, downstream_urn, query_id, upstream_urn),
44
+ value={"confidenceScore": fine_grained_lineage.confidenceScore},
45
+ )
46
+ return self
47
+
48
+ def remove_fine_grained_lineage(
49
+ self, fine_grained_lineage: FineGrainedLineage
50
+ ) -> Self:
51
+ aspect_name, path = self._fine_grained_lineage_location()
52
+ (
53
+ transform_op,
54
+ downstream_urn,
55
+ query_id,
56
+ ) = self._get_fine_grained_key(fine_grained_lineage)
57
+ for upstream_urn in fine_grained_lineage.upstreams or []:
58
+ self._add_patch(
59
+ aspect_name,
60
+ "remove",
61
+ path=(*path, transform_op, downstream_urn, query_id, upstream_urn),
62
+ value={},
63
+ )
64
+ return self
65
+
66
+ def set_fine_grained_lineages(
67
+ self, fine_grained_lineages: List[FineGrainedLineage]
68
+ ) -> Self:
69
+ aspect_name, path = self._fine_grained_lineage_location()
70
+ self._add_patch(
71
+ aspect_name,
72
+ "add",
73
+ path=path,
74
+ value=fine_grained_lineages,
75
+ )
76
+ return self
@@ -0,0 +1,73 @@
1
+ from typing import List
2
+
3
+ from typing_extensions import Self
4
+
5
+ from datahub.emitter.mcp_patch_builder import MetadataPatchProposal
6
+ from datahub.metadata.com.linkedin.pegasus2avro.common import Siblings
7
+
8
+
9
+ class HasSiblingsPatch(MetadataPatchProposal):
10
+ def add_sibling(self, sibling_urn: str, primary: bool = False) -> Self:
11
+ """Add a sibling relationship to the entity.
12
+
13
+ Args:
14
+ sibling_urn: The URN of the sibling entity to add.
15
+ primary: Whether this entity should be marked as primary in the relationship.
16
+
17
+ Returns:
18
+ The patch builder instance.
19
+ """
20
+ self._add_patch(
21
+ Siblings.ASPECT_NAME,
22
+ "add",
23
+ path=("siblings", sibling_urn),
24
+ value=sibling_urn,
25
+ )
26
+
27
+ # Set primary flag if specified
28
+ if primary:
29
+ self._add_patch(
30
+ Siblings.ASPECT_NAME,
31
+ "add",
32
+ path=("primary",),
33
+ value=primary,
34
+ )
35
+
36
+ return self
37
+
38
+ def remove_sibling(self, sibling_urn: str) -> Self:
39
+ """Remove a sibling relationship from the entity.
40
+
41
+ Args:
42
+ sibling_urn: The URN of the sibling entity to remove.
43
+
44
+ Returns:
45
+ The patch builder instance.
46
+ """
47
+ self._add_patch(
48
+ Siblings.ASPECT_NAME,
49
+ "remove",
50
+ path=("siblings", sibling_urn),
51
+ value={},
52
+ )
53
+ return self
54
+
55
+ def set_siblings(self, sibling_urns: List[str], primary: bool = False) -> Self:
56
+ """Set the complete list of siblings for the entity.
57
+
58
+ This will replace all existing siblings with the new list.
59
+
60
+ Args:
61
+ sibling_urns: The list of sibling URNs to set.
62
+ primary: Whether this entity should be marked as primary.
63
+
64
+ Returns:
65
+ The patch builder instance.
66
+ """
67
+ self._add_patch(
68
+ Siblings.ASPECT_NAME, "add", path=("siblings",), value=sibling_urns
69
+ )
70
+
71
+ self._add_patch(Siblings.ASPECT_NAME, "add", path=("primary",), value=primary)
72
+
73
+ return self
@@ -70,3 +70,30 @@ class HasStructuredPropertiesPatch(MetadataPatchProposal):
70
70
  ),
71
71
  )
72
72
  return self
73
+
74
+ def set_structured_property_manual(
75
+ self, property: StructuredPropertyValueAssignmentClass
76
+ ) -> Self:
77
+ """Add or update a structured property, using a StructuredPropertyValueAssignmentClass object."""
78
+
79
+ self.remove_structured_property(property.propertyUrn)
80
+ self._add_patch(
81
+ StructuredPropertiesClass.ASPECT_NAME,
82
+ "add",
83
+ path=("properties", property.propertyUrn),
84
+ value=property,
85
+ )
86
+ return self
87
+
88
+ def add_structured_property_manual(
89
+ self, property: StructuredPropertyValueAssignmentClass
90
+ ) -> Self:
91
+ """Add a structured property, using a StructuredPropertyValueAssignmentClass object."""
92
+
93
+ self._add_patch(
94
+ StructuredPropertiesClass.ASPECT_NAME,
95
+ "add",
96
+ path=("properties", property.propertyUrn),
97
+ value=property,
98
+ )
99
+ return self
datahub/specific/chart.py CHANGED
@@ -77,7 +77,7 @@ class ChartPatchBuilder(
77
77
  ChartInfo.ASPECT_NAME,
78
78
  "add",
79
79
  path=("inputEdges", input_urn),
80
- value=input_urn,
80
+ value=input_edge,
81
81
  )
82
82
  return self
83
83
 
@@ -1,15 +1,19 @@
1
- from typing import List, Optional, Tuple, Union
1
+ from typing import List, Optional, Set, Tuple, Union
2
2
 
3
3
  from datahub.emitter.mcp_patch_builder import MetadataPatchProposal, PatchPath
4
4
  from datahub.metadata.schema_classes import (
5
5
  DataJobInfoClass as DataJobInfo,
6
6
  DataJobInputOutputClass as DataJobInputOutput,
7
7
  EdgeClass as Edge,
8
+ FineGrainedLineageClass as FineGrainedLineage,
8
9
  KafkaAuditHeaderClass,
9
10
  SystemMetadataClass,
10
11
  )
11
12
  from datahub.metadata.urns import SchemaFieldUrn, Urn
12
13
  from datahub.specific.aspect_helpers.custom_properties import HasCustomPropertiesPatch
14
+ from datahub.specific.aspect_helpers.fine_grained_lineage import (
15
+ HasFineGrainedLineagePatch,
16
+ )
13
17
  from datahub.specific.aspect_helpers.ownership import HasOwnershipPatch
14
18
  from datahub.specific.aspect_helpers.tags import HasTagsPatch
15
19
  from datahub.specific.aspect_helpers.terms import HasTermsPatch
@@ -20,6 +24,7 @@ class DataJobPatchBuilder(
20
24
  HasCustomPropertiesPatch,
21
25
  HasTagsPatch,
22
26
  HasTermsPatch,
27
+ HasFineGrainedLineagePatch,
23
28
  MetadataPatchProposal,
24
29
  ):
25
30
  def __init__(
@@ -40,10 +45,19 @@ class DataJobPatchBuilder(
40
45
  urn, system_metadata=system_metadata, audit_header=audit_header
41
46
  )
42
47
 
48
+ # Track fine-grained lineages for DataJob-specific handling
49
+ self._fine_grained_lineages_to_add: List[FineGrainedLineage] = []
50
+ self._fine_grained_lineage_keys_to_remove: Set[Tuple[str, str, str]] = set()
51
+ self._fine_grained_lineages_set: Optional[List[FineGrainedLineage]] = None
52
+
43
53
  @classmethod
44
54
  def _custom_properties_location(cls) -> Tuple[str, PatchPath]:
45
55
  return DataJobInfo.ASPECT_NAME, ("customProperties",)
46
56
 
57
+ @classmethod
58
+ def _fine_grained_lineage_location(cls) -> Tuple[str, PatchPath]:
59
+ return DataJobInputOutput.ASPECT_NAME, ("fineGrainedLineages",)
60
+
47
61
  def add_input_datajob(self, input: Union[Edge, Urn, str]) -> "DataJobPatchBuilder":
48
62
  """
49
63
  Adds an input data job to the DataJobPatchBuilder.
@@ -9,6 +9,9 @@ from datahub.metadata.schema_classes import (
9
9
  )
10
10
  from datahub.specific.aspect_helpers.custom_properties import HasCustomPropertiesPatch
11
11
  from datahub.specific.aspect_helpers.ownership import HasOwnershipPatch
12
+ from datahub.specific.aspect_helpers.structured_properties import (
13
+ HasStructuredPropertiesPatch,
14
+ )
12
15
  from datahub.specific.aspect_helpers.tags import HasTagsPatch
13
16
  from datahub.specific.aspect_helpers.terms import HasTermsPatch
14
17
 
@@ -16,6 +19,7 @@ from datahub.specific.aspect_helpers.terms import HasTermsPatch
16
19
  class DataProductPatchBuilder(
17
20
  HasOwnershipPatch,
18
21
  HasCustomPropertiesPatch,
22
+ HasStructuredPropertiesPatch,
19
23
  HasTagsPatch,
20
24
  HasTermsPatch,
21
25
  MetadataPatchProposal,
@@ -1,3 +1,4 @@
1
+ import warnings
1
2
  from typing import Generic, List, Optional, Tuple, TypeVar, Union
2
3
 
3
4
  from datahub.emitter.mcp_patch_builder import MetadataPatchProposal, PatchPath
@@ -17,7 +18,11 @@ from datahub.metadata.schema_classes import (
17
18
  )
18
19
  from datahub.metadata.urns import DatasetUrn, TagUrn, Urn
19
20
  from datahub.specific.aspect_helpers.custom_properties import HasCustomPropertiesPatch
21
+ from datahub.specific.aspect_helpers.fine_grained_lineage import (
22
+ HasFineGrainedLineagePatch,
23
+ )
20
24
  from datahub.specific.aspect_helpers.ownership import HasOwnershipPatch
25
+ from datahub.specific.aspect_helpers.siblings import HasSiblingsPatch
21
26
  from datahub.specific.aspect_helpers.structured_properties import (
22
27
  HasStructuredPropertiesPatch,
23
28
  )
@@ -99,6 +104,8 @@ class DatasetPatchBuilder(
99
104
  HasStructuredPropertiesPatch,
100
105
  HasTagsPatch,
101
106
  HasTermsPatch,
107
+ HasFineGrainedLineagePatch,
108
+ HasSiblingsPatch,
102
109
  MetadataPatchProposal,
103
110
  ):
104
111
  def __init__(
@@ -115,6 +122,10 @@ class DatasetPatchBuilder(
115
122
  def _custom_properties_location(cls) -> Tuple[str, PatchPath]:
116
123
  return DatasetProperties.ASPECT_NAME, ("customProperties",)
117
124
 
125
+ @classmethod
126
+ def _fine_grained_lineage_location(cls) -> Tuple[str, PatchPath]:
127
+ return UpstreamLineage.ASPECT_NAME, ("fineGrainedLineages",)
128
+
118
129
  def add_upstream_lineage(self, upstream: Upstream) -> "DatasetPatchBuilder":
119
130
  self._add_patch(
120
131
  UpstreamLineage.ASPECT_NAME,
@@ -144,75 +155,44 @@ class DatasetPatchBuilder(
144
155
  def add_fine_grained_upstream_lineage(
145
156
  self, fine_grained_lineage: FineGrainedLineage
146
157
  ) -> "DatasetPatchBuilder":
147
- (
148
- transform_op,
149
- downstream_urn,
150
- query_id,
151
- ) = DatasetPatchBuilder.get_fine_grained_key(fine_grained_lineage)
152
- for upstream_urn in fine_grained_lineage.upstreams or []:
153
- self._add_patch(
154
- UpstreamLineage.ASPECT_NAME,
155
- "add",
156
- path=self._build_fine_grained_path(
157
- transform_op, downstream_urn, query_id, upstream_urn
158
- ),
159
- value={"confidenceScore": fine_grained_lineage.confidenceScore},
160
- )
161
- return self
162
-
163
- @staticmethod
164
- def get_fine_grained_key(
165
- fine_grained_lineage: FineGrainedLineage,
166
- ) -> Tuple[str, str, str]:
167
- downstreams = fine_grained_lineage.downstreams or []
168
- if len(downstreams) != 1:
169
- raise TypeError("Cannot patch with more or less than one downstream.")
170
- transform_op = fine_grained_lineage.transformOperation or "NONE"
171
- downstream_urn = downstreams[0]
172
- query_id = fine_grained_lineage.query or "NONE"
173
- return transform_op, downstream_urn, query_id
174
-
175
- @classmethod
176
- def _build_fine_grained_path(
177
- cls, transform_op: str, downstream_urn: str, query_id: str, upstream_urn: str
178
- ) -> PatchPath:
179
- return (
180
- "fineGrainedLineages",
181
- transform_op,
182
- downstream_urn,
183
- query_id,
184
- upstream_urn,
158
+ """
159
+ Deprecated: Use `add_fine_grained_lineage` instead.
160
+ """
161
+ warnings.warn(
162
+ "add_fine_grained_upstream_lineage() is deprecated."
163
+ " Use add_fine_grained_lineage() instead.",
164
+ DeprecationWarning,
165
+ stacklevel=2,
185
166
  )
167
+ return self.add_fine_grained_lineage(fine_grained_lineage)
186
168
 
187
169
  def remove_fine_grained_upstream_lineage(
188
170
  self, fine_grained_lineage: FineGrainedLineage
189
171
  ) -> "DatasetPatchBuilder":
190
- (
191
- transform_op,
192
- downstream_urn,
193
- query_id,
194
- ) = DatasetPatchBuilder.get_fine_grained_key(fine_grained_lineage)
195
- for upstream_urn in fine_grained_lineage.upstreams or []:
196
- self._add_patch(
197
- UpstreamLineage.ASPECT_NAME,
198
- "remove",
199
- path=self._build_fine_grained_path(
200
- transform_op, downstream_urn, query_id, upstream_urn
201
- ),
202
- value={},
203
- )
204
- return self
172
+ """
173
+ Deprecated: Use `remove_fine_grained_lineage` instead.
174
+ """
175
+ warnings.warn(
176
+ "remove_fine_grained_upstream_lineage() is deprecated."
177
+ " Use remove_fine_grained_lineage() instead.",
178
+ DeprecationWarning,
179
+ stacklevel=2,
180
+ )
181
+ return self.remove_fine_grained_lineage(fine_grained_lineage)
205
182
 
206
183
  def set_fine_grained_upstream_lineages(
207
184
  self, fine_grained_lineages: List[FineGrainedLineage]
208
185
  ) -> "DatasetPatchBuilder":
209
- self._add_patch(
210
- UpstreamLineage.ASPECT_NAME,
211
- "add",
212
- path=("fineGrainedLineages",),
213
- value=fine_grained_lineages,
186
+ """
187
+ Deprecated: Use `set_fine_grained_lineages` instead.
188
+ """
189
+ warnings.warn(
190
+ "set_fine_grained_upstream_lineages() is deprecated."
191
+ " Use set_fine_grained_lineages() instead.",
192
+ DeprecationWarning,
193
+ stacklevel=2,
214
194
  )
215
- return self
195
+ return self.set_fine_grained_lineages(fine_grained_lineages)
216
196
 
217
197
  def for_field(
218
198
  self, field_path: str, editable: bool = True
@@ -292,3 +272,15 @@ class DatasetPatchBuilder(
292
272
  value=timestamp,
293
273
  )
294
274
  return self
275
+
276
+ def set_external_url(
277
+ self, external_url: Optional[str] = None
278
+ ) -> "DatasetPatchBuilder":
279
+ if external_url is not None:
280
+ self._add_patch(
281
+ DatasetProperties.ASPECT_NAME,
282
+ "add",
283
+ path=("externalUrl",),
284
+ value=external_url,
285
+ )
286
+ return self
@@ -163,8 +163,7 @@ def _patch_lineage() -> None:
163
163
  - source_columns = set(find_all_in_scope(select, exp.Column))
164
164
  + source_columns = list(find_all_in_scope(select, exp.Column))
165
165
 
166
- - # If the source is a UDTF find columns used in the UTDF to generate the table
167
- + # If the source is a UDTF find columns used in the UDTF to generate the table
166
+ # If the source is a UDTF find columns used in the UDTF to generate the table
168
167
  + source = scope.expression
169
168
  if isinstance(source, exp.UDTF):
170
169
  - source_columns |= set(source.find_all(exp.Column))
@@ -0,0 +1,6 @@
1
+ import hashlib
2
+
3
+
4
+ def generate_hash(text: str) -> str:
5
+ # Once we move to Python 3.9+, we can set `usedforsecurity=False`.
6
+ return hashlib.sha256(text.encode("utf-8")).hexdigest()
@@ -1,7 +1,9 @@
1
+ import logging
1
2
  import re
2
3
  from enum import Enum
3
4
  from typing import Iterator, List, Tuple
4
5
 
6
+ logger = logging.getLogger(__name__)
5
7
  SELECT_KEYWORD = "SELECT"
6
8
  CASE_KEYWORD = "CASE"
7
9
  END_KEYWORD = "END"
@@ -50,6 +52,7 @@ class ParserState(Enum):
50
52
  STRING = 2
51
53
  COMMENT = 3
52
54
  MULTILINE_COMMENT = 4
55
+ BRACKETED_IDENTIFIER = 5
53
56
 
54
57
 
55
58
  class _StatementSplitter:
@@ -120,7 +123,9 @@ class _StatementSplitter:
120
123
  # Reset current_statement-specific state.
121
124
  self.does_select_mean_new_statement = False
122
125
  if self.current_case_statements != 0:
123
- breakpoint()
126
+ logger.warning(
127
+ f"Unexpected END keyword. Current case statements: {self.current_case_statements}"
128
+ )
124
129
  self.current_case_statements = 0
125
130
 
126
131
  def process(self) -> Iterator[str]:
@@ -137,6 +142,10 @@ class _StatementSplitter:
137
142
  self.state = ParserState.STRING
138
143
  self.current_statement.append(c)
139
144
  prev_real_char = c
145
+ elif c == "[":
146
+ self.state = ParserState.BRACKETED_IDENTIFIER
147
+ self.current_statement.append(c)
148
+ prev_real_char = c
140
149
  elif c == "-" and next_char == "-":
141
150
  self.state = ParserState.COMMENT
142
151
  self.current_statement.append(c)
@@ -168,6 +177,14 @@ class _StatementSplitter:
168
177
  elif c == "'":
169
178
  self.state = ParserState.NORMAL
170
179
 
180
+ elif self.state == ParserState.BRACKETED_IDENTIFIER:
181
+ self.current_statement.append(c)
182
+ if c == "]" and next_char == "]":
183
+ self.current_statement.append(next_char)
184
+ self.i += 1
185
+ elif c == "]":
186
+ self.state = ParserState.NORMAL
187
+
171
188
  elif self.state == ParserState.COMMENT:
172
189
  self.current_statement.append(c)
173
190
  if c == "\n":
@@ -233,8 +250,10 @@ class _StatementSplitter:
233
250
  ),
234
251
  )
235
252
  if (
236
- is_force_new_statement_keyword and most_recent_real_char != ")"
237
- ): # usually we'd have a close paren that closes a CTE
253
+ is_force_new_statement_keyword
254
+ and not self._has_preceding_cte(most_recent_real_char)
255
+ and not self._is_part_of_merge_query()
256
+ ):
238
257
  # Force termination of current statement
239
258
  yield from self._yield_if_complete()
240
259
 
@@ -247,6 +266,14 @@ class _StatementSplitter:
247
266
  else:
248
267
  self.current_statement.append(c)
249
268
 
269
+ def _has_preceding_cte(self, most_recent_real_char: str) -> bool:
270
+ # usually we'd have a close paren that closes a CTE
271
+ return most_recent_real_char == ")"
272
+
273
+ def _is_part_of_merge_query(self) -> bool:
274
+ # In merge statement we'd have `when matched then` or `when not matched then"
275
+ return "".join(self.current_statement).strip().lower().endswith("then")
276
+
250
277
 
251
278
  def split_statements(sql: str) -> Iterator[str]:
252
279
  """