acryl-datahub 1.1.1rc4__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (414) hide show
  1. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2615 -2547
  2. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +412 -338
  3. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +5 -0
  4. datahub/_version.py +1 -1
  5. datahub/api/entities/assertion/assertion.py +1 -1
  6. datahub/api/entities/common/serialized_value.py +1 -1
  7. datahub/api/entities/corpgroup/corpgroup.py +1 -1
  8. datahub/api/entities/dataproduct/dataproduct.py +32 -3
  9. datahub/api/entities/dataset/dataset.py +26 -23
  10. datahub/api/entities/external/__init__.py +0 -0
  11. datahub/api/entities/external/external_entities.py +724 -0
  12. datahub/api/entities/external/external_tag.py +147 -0
  13. datahub/api/entities/external/lake_formation_external_entites.py +162 -0
  14. datahub/api/entities/external/restricted_text.py +172 -0
  15. datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
  16. datahub/api/entities/forms/forms.py +3 -3
  17. datahub/api/entities/structuredproperties/structuredproperties.py +4 -4
  18. datahub/api/graphql/operation.py +10 -6
  19. datahub/cli/check_cli.py +88 -7
  20. datahub/cli/cli_utils.py +63 -0
  21. datahub/cli/config_utils.py +18 -10
  22. datahub/cli/container_cli.py +5 -0
  23. datahub/cli/delete_cli.py +125 -27
  24. datahub/cli/docker_check.py +110 -14
  25. datahub/cli/docker_cli.py +153 -229
  26. datahub/cli/exists_cli.py +0 -2
  27. datahub/cli/get_cli.py +0 -2
  28. datahub/cli/graphql_cli.py +1422 -0
  29. datahub/cli/iceberg_cli.py +5 -0
  30. datahub/cli/ingest_cli.py +3 -15
  31. datahub/cli/migrate.py +2 -0
  32. datahub/cli/put_cli.py +1 -4
  33. datahub/cli/quickstart_versioning.py +53 -10
  34. datahub/cli/specific/assertions_cli.py +37 -6
  35. datahub/cli/specific/datacontract_cli.py +54 -7
  36. datahub/cli/specific/dataproduct_cli.py +2 -15
  37. datahub/cli/specific/dataset_cli.py +1 -8
  38. datahub/cli/specific/forms_cli.py +0 -4
  39. datahub/cli/specific/group_cli.py +0 -2
  40. datahub/cli/specific/structuredproperties_cli.py +1 -4
  41. datahub/cli/specific/user_cli.py +172 -3
  42. datahub/cli/state_cli.py +0 -2
  43. datahub/cli/timeline_cli.py +0 -2
  44. datahub/configuration/common.py +40 -1
  45. datahub/configuration/connection_resolver.py +5 -2
  46. datahub/configuration/env_vars.py +331 -0
  47. datahub/configuration/import_resolver.py +7 -4
  48. datahub/configuration/kafka.py +21 -1
  49. datahub/configuration/pydantic_migration_helpers.py +6 -13
  50. datahub/configuration/source_common.py +3 -2
  51. datahub/configuration/validate_field_deprecation.py +5 -2
  52. datahub/configuration/validate_field_removal.py +8 -2
  53. datahub/configuration/validate_field_rename.py +6 -5
  54. datahub/configuration/validate_multiline_string.py +5 -2
  55. datahub/emitter/mce_builder.py +8 -4
  56. datahub/emitter/rest_emitter.py +103 -30
  57. datahub/entrypoints.py +6 -3
  58. datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +297 -1
  59. datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
  60. datahub/ingestion/api/decorators.py +15 -3
  61. datahub/ingestion/api/report.py +381 -3
  62. datahub/ingestion/api/sink.py +27 -2
  63. datahub/ingestion/api/source.py +165 -58
  64. datahub/ingestion/api/source_protocols.py +23 -0
  65. datahub/ingestion/autogenerated/__init__.py +0 -0
  66. datahub/ingestion/autogenerated/capability_summary.json +3652 -0
  67. datahub/ingestion/autogenerated/lineage.json +402 -0
  68. datahub/ingestion/autogenerated/lineage_helper.py +177 -0
  69. datahub/ingestion/extractor/schema_util.py +13 -4
  70. datahub/ingestion/glossary/classification_mixin.py +5 -0
  71. datahub/ingestion/graph/client.py +330 -25
  72. datahub/ingestion/graph/config.py +3 -2
  73. datahub/ingestion/graph/filters.py +30 -11
  74. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
  75. datahub/ingestion/run/pipeline.py +81 -11
  76. datahub/ingestion/run/pipeline_config.py +2 -2
  77. datahub/ingestion/sink/datahub_kafka.py +1 -0
  78. datahub/ingestion/sink/datahub_rest.py +13 -5
  79. datahub/ingestion/sink/file.py +1 -0
  80. datahub/ingestion/source/abs/config.py +1 -1
  81. datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
  82. datahub/ingestion/source/abs/source.py +15 -30
  83. datahub/ingestion/source/aws/aws_common.py +185 -13
  84. datahub/ingestion/source/aws/glue.py +517 -244
  85. datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
  86. datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
  87. datahub/ingestion/source/aws/tag_entities.py +270 -0
  88. datahub/ingestion/source/azure/azure_common.py +3 -3
  89. datahub/ingestion/source/bigquery_v2/bigquery.py +67 -24
  90. datahub/ingestion/source/bigquery_v2/bigquery_config.py +47 -19
  91. datahub/ingestion/source/bigquery_v2/bigquery_connection.py +12 -1
  92. datahub/ingestion/source/bigquery_v2/bigquery_queries.py +3 -0
  93. datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
  94. datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
  95. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
  96. datahub/ingestion/source/bigquery_v2/common.py +1 -1
  97. datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
  98. datahub/ingestion/source/bigquery_v2/queries.py +3 -3
  99. datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
  100. datahub/ingestion/source/cassandra/cassandra.py +6 -8
  101. datahub/ingestion/source/cassandra/cassandra_api.py +17 -1
  102. datahub/ingestion/source/cassandra/cassandra_config.py +5 -0
  103. datahub/ingestion/source/cassandra/cassandra_profiling.py +7 -6
  104. datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
  105. datahub/ingestion/source/common/gcp_credentials_config.py +3 -1
  106. datahub/ingestion/source/common/subtypes.py +53 -0
  107. datahub/ingestion/source/data_lake_common/data_lake_utils.py +37 -0
  108. datahub/ingestion/source/data_lake_common/object_store.py +115 -27
  109. datahub/ingestion/source/data_lake_common/path_spec.py +72 -43
  110. datahub/ingestion/source/datahub/config.py +12 -9
  111. datahub/ingestion/source/datahub/datahub_database_reader.py +26 -11
  112. datahub/ingestion/source/datahub/datahub_source.py +10 -0
  113. datahub/ingestion/source/dbt/dbt_cloud.py +16 -5
  114. datahub/ingestion/source/dbt/dbt_common.py +224 -9
  115. datahub/ingestion/source/dbt/dbt_core.py +3 -0
  116. datahub/ingestion/source/debug/__init__.py +0 -0
  117. datahub/ingestion/source/debug/datahub_debug.py +300 -0
  118. datahub/ingestion/source/delta_lake/config.py +9 -5
  119. datahub/ingestion/source/delta_lake/source.py +8 -0
  120. datahub/ingestion/source/dremio/dremio_api.py +114 -73
  121. datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
  122. datahub/ingestion/source/dremio/dremio_config.py +5 -4
  123. datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
  124. datahub/ingestion/source/dremio/dremio_source.py +132 -98
  125. datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
  126. datahub/ingestion/source/dynamodb/dynamodb.py +11 -8
  127. datahub/ingestion/source/excel/__init__.py +0 -0
  128. datahub/ingestion/source/excel/config.py +92 -0
  129. datahub/ingestion/source/excel/excel_file.py +539 -0
  130. datahub/ingestion/source/excel/profiling.py +308 -0
  131. datahub/ingestion/source/excel/report.py +49 -0
  132. datahub/ingestion/source/excel/source.py +662 -0
  133. datahub/ingestion/source/excel/util.py +18 -0
  134. datahub/ingestion/source/feast.py +8 -10
  135. datahub/ingestion/source/file.py +3 -0
  136. datahub/ingestion/source/fivetran/config.py +66 -7
  137. datahub/ingestion/source/fivetran/fivetran.py +227 -43
  138. datahub/ingestion/source/fivetran/fivetran_log_api.py +37 -8
  139. datahub/ingestion/source/fivetran/fivetran_query.py +51 -29
  140. datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
  141. datahub/ingestion/source/fivetran/response_models.py +97 -0
  142. datahub/ingestion/source/gc/datahub_gc.py +0 -2
  143. datahub/ingestion/source/gcs/gcs_source.py +32 -4
  144. datahub/ingestion/source/ge_data_profiler.py +108 -31
  145. datahub/ingestion/source/ge_profiling_config.py +26 -11
  146. datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
  147. datahub/ingestion/source/grafana/field_utils.py +307 -0
  148. datahub/ingestion/source/grafana/grafana_api.py +142 -0
  149. datahub/ingestion/source/grafana/grafana_config.py +104 -0
  150. datahub/ingestion/source/grafana/grafana_source.py +522 -84
  151. datahub/ingestion/source/grafana/lineage.py +202 -0
  152. datahub/ingestion/source/grafana/models.py +137 -0
  153. datahub/ingestion/source/grafana/report.py +90 -0
  154. datahub/ingestion/source/grafana/types.py +16 -0
  155. datahub/ingestion/source/hex/api.py +28 -1
  156. datahub/ingestion/source/hex/hex.py +16 -5
  157. datahub/ingestion/source/hex/mapper.py +16 -2
  158. datahub/ingestion/source/hex/model.py +2 -0
  159. datahub/ingestion/source/hex/query_fetcher.py +1 -1
  160. datahub/ingestion/source/iceberg/iceberg.py +123 -59
  161. datahub/ingestion/source/iceberg/iceberg_profiler.py +4 -2
  162. datahub/ingestion/source/identity/azure_ad.py +1 -1
  163. datahub/ingestion/source/identity/okta.py +1 -14
  164. datahub/ingestion/source/kafka/kafka.py +16 -0
  165. datahub/ingestion/source/kafka_connect/common.py +2 -2
  166. datahub/ingestion/source/kafka_connect/sink_connectors.py +156 -47
  167. datahub/ingestion/source/kafka_connect/source_connectors.py +62 -4
  168. datahub/ingestion/source/looker/looker_common.py +148 -79
  169. datahub/ingestion/source/looker/looker_config.py +15 -4
  170. datahub/ingestion/source/looker/looker_constant.py +4 -0
  171. datahub/ingestion/source/looker/looker_lib_wrapper.py +36 -3
  172. datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
  173. datahub/ingestion/source/looker/looker_source.py +503 -547
  174. datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
  175. datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
  176. datahub/ingestion/source/looker/lookml_config.py +31 -3
  177. datahub/ingestion/source/looker/lookml_refinement.py +1 -1
  178. datahub/ingestion/source/looker/lookml_source.py +96 -117
  179. datahub/ingestion/source/looker/view_upstream.py +494 -1
  180. datahub/ingestion/source/metabase.py +32 -6
  181. datahub/ingestion/source/metadata/business_glossary.py +7 -7
  182. datahub/ingestion/source/metadata/lineage.py +9 -9
  183. datahub/ingestion/source/mlflow.py +12 -2
  184. datahub/ingestion/source/mock_data/__init__.py +0 -0
  185. datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
  186. datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
  187. datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
  188. datahub/ingestion/source/mode.py +26 -5
  189. datahub/ingestion/source/mongodb.py +11 -1
  190. datahub/ingestion/source/neo4j/neo4j_source.py +83 -144
  191. datahub/ingestion/source/nifi.py +2 -2
  192. datahub/ingestion/source/openapi.py +1 -1
  193. datahub/ingestion/source/powerbi/config.py +47 -21
  194. datahub/ingestion/source/powerbi/m_query/data_classes.py +1 -0
  195. datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
  196. datahub/ingestion/source/powerbi/m_query/pattern_handler.py +100 -10
  197. datahub/ingestion/source/powerbi/powerbi.py +10 -6
  198. datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +0 -1
  199. datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
  200. datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
  201. datahub/ingestion/source/preset.py +3 -3
  202. datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
  203. datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
  204. datahub/ingestion/source/redash.py +1 -1
  205. datahub/ingestion/source/redshift/config.py +15 -9
  206. datahub/ingestion/source/redshift/datashares.py +1 -1
  207. datahub/ingestion/source/redshift/lineage.py +386 -687
  208. datahub/ingestion/source/redshift/query.py +23 -19
  209. datahub/ingestion/source/redshift/redshift.py +52 -111
  210. datahub/ingestion/source/redshift/redshift_schema.py +17 -12
  211. datahub/ingestion/source/redshift/report.py +0 -2
  212. datahub/ingestion/source/redshift/usage.py +6 -5
  213. datahub/ingestion/source/s3/report.py +4 -2
  214. datahub/ingestion/source/s3/source.py +449 -248
  215. datahub/ingestion/source/sac/sac.py +3 -1
  216. datahub/ingestion/source/salesforce.py +28 -13
  217. datahub/ingestion/source/schema/json_schema.py +14 -14
  218. datahub/ingestion/source/schema_inference/object.py +22 -6
  219. datahub/ingestion/source/sigma/data_classes.py +3 -0
  220. datahub/ingestion/source/sigma/sigma.py +7 -1
  221. datahub/ingestion/source/slack/slack.py +10 -16
  222. datahub/ingestion/source/snaplogic/__init__.py +0 -0
  223. datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
  224. datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
  225. datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
  226. datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
  227. datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
  228. datahub/ingestion/source/snowflake/constants.py +3 -0
  229. datahub/ingestion/source/snowflake/snowflake_config.py +76 -23
  230. datahub/ingestion/source/snowflake/snowflake_connection.py +24 -8
  231. datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +19 -6
  232. datahub/ingestion/source/snowflake/snowflake_queries.py +464 -97
  233. datahub/ingestion/source/snowflake/snowflake_query.py +77 -5
  234. datahub/ingestion/source/snowflake/snowflake_report.py +1 -2
  235. datahub/ingestion/source/snowflake/snowflake_schema.py +352 -16
  236. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +51 -10
  237. datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
  238. datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
  239. datahub/ingestion/source/snowflake/snowflake_utils.py +36 -15
  240. datahub/ingestion/source/snowflake/snowflake_v2.py +39 -4
  241. datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
  242. datahub/ingestion/source/sql/athena.py +217 -25
  243. datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
  244. datahub/ingestion/source/sql/clickhouse.py +24 -8
  245. datahub/ingestion/source/sql/cockroachdb.py +5 -4
  246. datahub/ingestion/source/sql/druid.py +2 -2
  247. datahub/ingestion/source/sql/hana.py +3 -1
  248. datahub/ingestion/source/sql/hive.py +4 -3
  249. datahub/ingestion/source/sql/hive_metastore.py +19 -20
  250. datahub/ingestion/source/sql/mariadb.py +0 -1
  251. datahub/ingestion/source/sql/mssql/job_models.py +3 -1
  252. datahub/ingestion/source/sql/mssql/source.py +336 -57
  253. datahub/ingestion/source/sql/mysql.py +154 -4
  254. datahub/ingestion/source/sql/oracle.py +5 -5
  255. datahub/ingestion/source/sql/postgres.py +142 -6
  256. datahub/ingestion/source/sql/presto.py +2 -1
  257. datahub/ingestion/source/sql/sql_common.py +281 -49
  258. datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
  259. datahub/ingestion/source/sql/sql_types.py +22 -0
  260. datahub/ingestion/source/sql/sqlalchemy_uri.py +39 -7
  261. datahub/ingestion/source/sql/teradata.py +1028 -245
  262. datahub/ingestion/source/sql/trino.py +11 -1
  263. datahub/ingestion/source/sql/two_tier_sql_source.py +2 -3
  264. datahub/ingestion/source/sql/vertica.py +14 -7
  265. datahub/ingestion/source/sql_queries.py +219 -121
  266. datahub/ingestion/source/state/checkpoint.py +8 -29
  267. datahub/ingestion/source/state/entity_removal_state.py +5 -2
  268. datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
  269. datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
  270. datahub/ingestion/source/superset.py +314 -67
  271. datahub/ingestion/source/tableau/tableau.py +135 -59
  272. datahub/ingestion/source/tableau/tableau_common.py +9 -2
  273. datahub/ingestion/source/tableau/tableau_constant.py +1 -4
  274. datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
  275. datahub/ingestion/source/unity/config.py +160 -40
  276. datahub/ingestion/source/unity/connection.py +61 -0
  277. datahub/ingestion/source/unity/connection_test.py +1 -0
  278. datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
  279. datahub/ingestion/source/unity/proxy.py +794 -51
  280. datahub/ingestion/source/unity/proxy_patch.py +321 -0
  281. datahub/ingestion/source/unity/proxy_types.py +36 -2
  282. datahub/ingestion/source/unity/report.py +15 -3
  283. datahub/ingestion/source/unity/source.py +465 -131
  284. datahub/ingestion/source/unity/tag_entities.py +197 -0
  285. datahub/ingestion/source/unity/usage.py +46 -4
  286. datahub/ingestion/source/usage/clickhouse_usage.py +4 -1
  287. datahub/ingestion/source/usage/starburst_trino_usage.py +5 -2
  288. datahub/ingestion/source/usage/usage_common.py +4 -3
  289. datahub/ingestion/source/vertexai/vertexai.py +1 -1
  290. datahub/ingestion/source_config/pulsar.py +3 -1
  291. datahub/ingestion/source_report/ingestion_stage.py +50 -11
  292. datahub/ingestion/transformer/add_dataset_ownership.py +18 -2
  293. datahub/ingestion/transformer/base_transformer.py +8 -5
  294. datahub/ingestion/transformer/set_browse_path.py +112 -0
  295. datahub/integrations/assertion/snowflake/compiler.py +4 -3
  296. datahub/metadata/_internal_schema_classes.py +6806 -4871
  297. datahub/metadata/_urns/urn_defs.py +1767 -1539
  298. datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
  299. datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
  300. datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
  301. datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
  302. datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
  303. datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +6 -0
  304. datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
  305. datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
  306. datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
  307. datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
  308. datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
  309. datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
  310. datahub/metadata/schema.avsc +18395 -16979
  311. datahub/metadata/schemas/Actors.avsc +38 -1
  312. datahub/metadata/schemas/ApplicationKey.avsc +31 -0
  313. datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
  314. datahub/metadata/schemas/Applications.avsc +38 -0
  315. datahub/metadata/schemas/AssetSettings.avsc +63 -0
  316. datahub/metadata/schemas/ChartInfo.avsc +2 -1
  317. datahub/metadata/schemas/ChartKey.avsc +1 -0
  318. datahub/metadata/schemas/ContainerKey.avsc +1 -0
  319. datahub/metadata/schemas/ContainerProperties.avsc +8 -0
  320. datahub/metadata/schemas/CorpUserEditableInfo.avsc +1 -1
  321. datahub/metadata/schemas/CorpUserSettings.avsc +50 -0
  322. datahub/metadata/schemas/DashboardKey.avsc +1 -0
  323. datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
  324. datahub/metadata/schemas/DataFlowKey.avsc +1 -0
  325. datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
  326. datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
  327. datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
  328. datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
  329. datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
  330. datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
  331. datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
  332. datahub/metadata/schemas/DataJobInfo.avsc +8 -0
  333. datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
  334. datahub/metadata/schemas/DataJobKey.avsc +1 -0
  335. datahub/metadata/schemas/DataProcessKey.avsc +8 -0
  336. datahub/metadata/schemas/DataProductKey.avsc +3 -1
  337. datahub/metadata/schemas/DataProductProperties.avsc +1 -1
  338. datahub/metadata/schemas/DatasetKey.avsc +11 -1
  339. datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
  340. datahub/metadata/schemas/DomainKey.avsc +2 -1
  341. datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
  342. datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
  343. datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
  344. datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
  345. datahub/metadata/schemas/IncidentInfo.avsc +3 -3
  346. datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
  347. datahub/metadata/schemas/LogicalParent.avsc +145 -0
  348. datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
  349. datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
  350. datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
  351. datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
  352. datahub/metadata/schemas/MLModelKey.avsc +9 -0
  353. datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
  354. datahub/metadata/schemas/MetadataChangeEvent.avsc +151 -47
  355. datahub/metadata/schemas/MetadataChangeLog.avsc +62 -44
  356. datahub/metadata/schemas/MetadataChangeProposal.avsc +61 -0
  357. datahub/metadata/schemas/NotebookKey.avsc +1 -0
  358. datahub/metadata/schemas/Operation.avsc +4 -2
  359. datahub/metadata/schemas/Ownership.avsc +69 -0
  360. datahub/metadata/schemas/QuerySubjects.avsc +1 -12
  361. datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
  362. datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
  363. datahub/metadata/schemas/StructuredProperties.avsc +69 -0
  364. datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
  365. datahub/metadata/schemas/SystemMetadata.avsc +61 -0
  366. datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
  367. datahub/sdk/__init__.py +2 -0
  368. datahub/sdk/_all_entities.py +7 -0
  369. datahub/sdk/_shared.py +249 -5
  370. datahub/sdk/chart.py +386 -0
  371. datahub/sdk/container.py +7 -0
  372. datahub/sdk/dashboard.py +453 -0
  373. datahub/sdk/dataflow.py +7 -0
  374. datahub/sdk/datajob.py +45 -13
  375. datahub/sdk/dataset.py +56 -2
  376. datahub/sdk/entity_client.py +111 -9
  377. datahub/sdk/lineage_client.py +663 -82
  378. datahub/sdk/main_client.py +50 -16
  379. datahub/sdk/mlmodel.py +120 -38
  380. datahub/sdk/mlmodelgroup.py +7 -0
  381. datahub/sdk/search_client.py +7 -3
  382. datahub/sdk/search_filters.py +304 -36
  383. datahub/secret/datahub_secret_store.py +3 -0
  384. datahub/secret/environment_secret_store.py +29 -0
  385. datahub/secret/file_secret_store.py +49 -0
  386. datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
  387. datahub/specific/aspect_helpers/siblings.py +73 -0
  388. datahub/specific/aspect_helpers/structured_properties.py +27 -0
  389. datahub/specific/chart.py +1 -1
  390. datahub/specific/datajob.py +15 -1
  391. datahub/specific/dataproduct.py +4 -0
  392. datahub/specific/dataset.py +39 -59
  393. datahub/sql_parsing/split_statements.py +13 -0
  394. datahub/sql_parsing/sql_parsing_aggregator.py +70 -26
  395. datahub/sql_parsing/sqlglot_lineage.py +196 -42
  396. datahub/sql_parsing/sqlglot_utils.py +12 -4
  397. datahub/sql_parsing/tool_meta_extractor.py +1 -3
  398. datahub/telemetry/telemetry.py +28 -14
  399. datahub/testing/sdk_v2_helpers.py +7 -1
  400. datahub/upgrade/upgrade.py +73 -17
  401. datahub/utilities/file_backed_collections.py +8 -9
  402. datahub/utilities/is_pytest.py +3 -2
  403. datahub/utilities/logging_manager.py +22 -6
  404. datahub/utilities/mapping.py +29 -2
  405. datahub/utilities/sample_data.py +5 -4
  406. datahub/utilities/server_config_util.py +10 -1
  407. datahub/utilities/sqlalchemy_query_combiner.py +5 -2
  408. datahub/utilities/stats_collections.py +4 -0
  409. datahub/utilities/urns/urn.py +41 -2
  410. datahub/emitter/sql_parsing_builder.py +0 -306
  411. datahub/ingestion/source/redshift/lineage_v2.py +0 -466
  412. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +0 -0
  413. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/licenses/LICENSE +0 -0
  414. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
@@ -9,6 +9,8 @@ class SnowflakeCloudProvider(StrEnum):
9
9
 
10
10
  SNOWFLAKE_DEFAULT_CLOUD = SnowflakeCloudProvider.AWS
11
11
 
12
+ DEFAULT_SNOWFLAKE_DOMAIN = "snowflakecomputing.com"
13
+
12
14
 
13
15
  class SnowflakeEdition(StrEnum):
14
16
  STANDARD = "Standard"
@@ -55,6 +57,7 @@ class SnowflakeObjectDomain(StrEnum):
55
57
  ICEBERG_TABLE = "iceberg table"
56
58
  STREAM = "stream"
57
59
  PROCEDURE = "procedure"
60
+ DYNAMIC_TABLE = "dynamic table"
58
61
 
59
62
 
60
63
  GENERIC_PERMISSION_ERROR_KEY = "permission-error"
@@ -1,12 +1,13 @@
1
1
  import logging
2
2
  from collections import defaultdict
3
3
  from dataclasses import dataclass
4
+ from enum import Enum
4
5
  from typing import Dict, List, Optional, Set
5
6
 
6
7
  import pydantic
7
8
  from pydantic import Field, root_validator, validator
8
9
 
9
- from datahub.configuration.common import AllowDenyPattern, ConfigModel
10
+ from datahub.configuration.common import AllowDenyPattern, ConfigModel, HiddenFromDocs
10
11
  from datahub.configuration.pattern_utils import UUID_REGEX
11
12
  from datahub.configuration.source_common import (
12
13
  EnvConfigMixin,
@@ -30,6 +31,7 @@ from datahub.ingestion.source.sql.sql_config import SQLCommonConfig, SQLFilterCo
30
31
  from datahub.ingestion.source.state.stateful_ingestion_base import (
31
32
  StatefulLineageConfigMixin,
32
33
  StatefulProfilingConfigMixin,
34
+ StatefulTimeWindowConfigMixin,
33
35
  StatefulUsageConfigMixin,
34
36
  )
35
37
  from datahub.ingestion.source.usage.usage_common import BaseUsageConfig
@@ -49,9 +51,15 @@ DEFAULT_TEMP_TABLES_PATTERNS = [
49
51
  rf".*\.SEGMENT_{UUID_REGEX}", # segment
50
52
  rf".*\.STAGING_.*_{UUID_REGEX}", # stitch
51
53
  r".*\.(GE_TMP_|GE_TEMP_|GX_TEMP_)[0-9A-F]{8}", # great expectations
54
+ r".*\.SNOWPARK_TEMP_TABLE_.+", # snowpark
52
55
  ]
53
56
 
54
57
 
58
+ class QueryDedupStrategyType(Enum):
59
+ STANDARD = "STANDARD"
60
+ NONE = "NONE"
61
+
62
+
55
63
  class TagOption(StrEnum):
56
64
  with_lineage = "with_lineage"
57
65
  without_lineage = "without_lineage"
@@ -60,13 +68,10 @@ class TagOption(StrEnum):
60
68
 
61
69
  @dataclass(frozen=True)
62
70
  class DatabaseId:
63
- database: str = Field(
64
- description="Database created from share in consumer account."
65
- )
66
- platform_instance: Optional[str] = Field(
67
- default=None,
68
- description="Platform instance of consumer snowflake account.",
69
- )
71
+ # Database created from share in consumer account
72
+ database: str
73
+ # Platform instance of consumer snowflake account
74
+ platform_instance: Optional[str] = None
70
75
 
71
76
 
72
77
  class SnowflakeShareConfig(ConfigModel):
@@ -154,14 +159,11 @@ class SnowflakeIdentifierConfig(
154
159
 
155
160
  email_domain: Optional[str] = pydantic.Field(
156
161
  default=None,
157
- description="Email domain of your organization so users can be displayed on UI appropriately.",
162
+ description="Email domain of your organization so users can be displayed on UI appropriately. This is used only if we cannot infer email ID.",
158
163
  )
159
164
 
160
- email_as_user_identifier: bool = Field(
161
- default=True,
162
- description="Format user urns as an email, if the snowflake user's email is set. If `email_domain` is "
163
- "provided, generates email addresses for snowflake users with unset emails, based on their "
164
- "username.",
165
+ _email_as_user_identifier = pydantic_removed_field(
166
+ "email_as_user_identifier",
165
167
  )
166
168
 
167
169
 
@@ -198,6 +200,7 @@ class SnowflakeV2Config(
198
200
  SnowflakeUsageConfig,
199
201
  StatefulLineageConfigMixin,
200
202
  StatefulUsageConfigMixin,
203
+ StatefulTimeWindowConfigMixin,
201
204
  StatefulProfilingConfigMixin,
202
205
  ClassificationSourceConfigMixin,
203
206
  IncrementalPropertiesConfigMixin,
@@ -212,6 +215,16 @@ class SnowflakeV2Config(
212
215
  description="If enabled, populates the ingested views' definitions.",
213
216
  )
214
217
 
218
+ fetch_views_from_information_schema: bool = Field(
219
+ default=False,
220
+ description="If enabled, uses information_schema.views to fetch view definitions instead of SHOW VIEWS command. "
221
+ "This alternative method can be more reliable for databases with large numbers of views (> 10K views), as the "
222
+ "SHOW VIEWS approach has proven unreliable and can lead to missing views in such scenarios. However, this method "
223
+ "requires OWNERSHIP privileges on views to retrieve their definitions. For views without ownership permissions "
224
+ "(where VIEW_DEFINITION is null/empty), the system will automatically fall back to using batched SHOW VIEWS queries "
225
+ "to populate the missing definitions.",
226
+ )
227
+
215
228
  include_technical_schema: bool = Field(
216
229
  default=True,
217
230
  description="If enabled, populates the snowflake technical schema and descriptions.",
@@ -232,7 +245,7 @@ class SnowflakeV2Config(
232
245
  )
233
246
 
234
247
  use_queries_v2: bool = Field(
235
- default=False,
248
+ default=True,
236
249
  description="If enabled, uses the new queries extractor to extract queries from snowflake.",
237
250
  )
238
251
  include_queries: bool = Field(
@@ -250,6 +263,11 @@ class SnowflakeV2Config(
250
263
  "This is useful if you have a large number of schemas and want to avoid bulk fetching the schema for each table/view.",
251
264
  )
252
265
 
266
+ query_dedup_strategy: QueryDedupStrategyType = Field(
267
+ default=QueryDedupStrategyType.STANDARD,
268
+ description=f"Experimental: Choose the strategy for query deduplication (default value is appropriate for most use-cases; make sure you understand performance implications before changing it). Allowed values are: {', '.join([s.name for s in QueryDedupStrategyType])}",
269
+ )
270
+
253
271
  _check_role_grants_removed = pydantic_removed_field("check_role_grants")
254
272
  _provision_role_removed = pydantic_removed_field("provision_role")
255
273
 
@@ -263,10 +281,11 @@ class SnowflakeV2Config(
263
281
  description="If enabled along with `extract_tags`, extracts snowflake's key-value tags as DataHub structured properties instead of DataHub tags.",
264
282
  )
265
283
 
266
- structured_properties_template_cache_invalidation_interval: int = Field(
267
- hidden_from_docs=True,
268
- default=60,
269
- description="Interval in seconds to invalidate the structured properties template cache.",
284
+ structured_properties_template_cache_invalidation_interval: HiddenFromDocs[int] = (
285
+ Field(
286
+ default=60,
287
+ description="Interval in seconds to invalidate the structured properties template cache.",
288
+ )
270
289
  )
271
290
 
272
291
  include_external_url: bool = Field(
@@ -315,7 +334,7 @@ class SnowflakeV2Config(
315
334
  "to ignore the temporary staging tables created by known ETL tools.",
316
335
  )
317
336
 
318
- rename_upstreams_deny_pattern_to_temporary_table_pattern = pydantic_renamed_field(
337
+ rename_upstreams_deny_pattern_to_temporary_table_pattern = pydantic_renamed_field( # type: ignore[pydantic-field]
319
338
  "upstreams_deny_pattern", "temporary_tables_pattern"
320
339
  )
321
340
 
@@ -333,8 +352,7 @@ class SnowflakeV2Config(
333
352
  )
334
353
 
335
354
  # Allows empty containers to be ingested before datasets are added, avoiding permission errors
336
- warn_no_datasets: bool = Field(
337
- hidden_from_docs=True,
355
+ warn_no_datasets: HiddenFromDocs[bool] = Field(
338
356
  default=False,
339
357
  description="If True, warns when no datasets are found during ingestion. If False, ingestion fails when no datasets are found.",
340
358
  )
@@ -347,11 +365,32 @@ class SnowflakeV2Config(
347
365
 
348
366
  pushdown_deny_usernames: List[str] = Field(
349
367
  default=[],
350
- description="List of snowflake usernames which will not be considered for lineage/usage/queries extraction. "
368
+ description="List of snowflake usernames (SQL LIKE patterns, e.g., 'SERVICE_%', '%_PROD', 'TEST_USER') which will NOT be considered for lineage/usage/queries extraction. "
351
369
  "This is primarily useful for improving performance by filtering out users with extremely high query volumes. "
352
370
  "Only applicable if `use_queries_v2` is enabled.",
353
371
  )
354
372
 
373
+ pushdown_allow_usernames: List[str] = Field(
374
+ default=[],
375
+ description="List of snowflake usernames (SQL LIKE patterns, e.g., 'ANALYST_%', '%_USER', 'MAIN_ACCOUNT') which WILL be considered for lineage/usage/queries extraction. "
376
+ "This is primarily useful for improving performance by filtering in only specific users. "
377
+ "Only applicable if `use_queries_v2` is enabled. If not specified, all users not in deny list are included.",
378
+ )
379
+
380
+ push_down_database_pattern_access_history: bool = Field(
381
+ default=False,
382
+ description="If enabled, pushes down database pattern filtering to the access_history table for improved performance. "
383
+ "This filters on the accessed objects in access_history.",
384
+ )
385
+
386
+ additional_database_names_allowlist: List[str] = Field(
387
+ default=[],
388
+ description="Additional database names (no pattern matching) to be included in the access_history filter. "
389
+ "Only applies if push_down_database_pattern_access_history=True. "
390
+ "These databases will be included in the filter being pushed down regardless of database_pattern settings."
391
+ "This may be required in the case of _eg_ temporary tables being created in a different database than the ones in the database_name patterns.",
392
+ )
393
+
355
394
  @validator("convert_urns_to_lowercase")
356
395
  def validate_convert_urns_to_lowercase(cls, v):
357
396
  if not v:
@@ -440,6 +479,20 @@ class SnowflakeV2Config(
440
479
 
441
480
  return shares
442
481
 
482
+ @root_validator(pre=False, skip_on_failure=True)
483
+ def validate_queries_v2_stateful_ingestion(cls, values: Dict) -> Dict:
484
+ if values.get("use_queries_v2"):
485
+ if values.get("enable_stateful_lineage_ingestion") or values.get(
486
+ "enable_stateful_usage_ingestion"
487
+ ):
488
+ logger.warning(
489
+ "enable_stateful_lineage_ingestion and enable_stateful_usage_ingestion are deprecated "
490
+ "when using use_queries_v2=True. These configs only work with the legacy (non-queries v2) extraction path. "
491
+ "For queries v2, use enable_stateful_time_window instead to enable stateful ingestion "
492
+ "for the unified time window extraction (lineage + usage + operations + queries)."
493
+ )
494
+ return values
495
+
443
496
  def outbounds(self) -> Dict[str, Set[DatabaseId]]:
444
497
  """
445
498
  Returns mapping of
@@ -15,13 +15,19 @@ from snowflake.connector.network import (
15
15
  OAUTH_AUTHENTICATOR,
16
16
  )
17
17
 
18
- from datahub.configuration.common import ConfigModel, ConfigurationError, MetaError
18
+ from datahub.configuration.common import (
19
+ ConfigModel,
20
+ ConfigurationError,
21
+ HiddenFromDocs,
22
+ MetaError,
23
+ )
19
24
  from datahub.configuration.connection_resolver import auto_connection_resolver
20
25
  from datahub.configuration.validate_field_rename import pydantic_renamed_field
21
26
  from datahub.ingestion.api.closeable import Closeable
22
27
  from datahub.ingestion.source.snowflake.constants import (
23
28
  CLIENT_PREFETCH_THREADS,
24
29
  CLIENT_SESSION_KEEP_ALIVE,
30
+ DEFAULT_SNOWFLAKE_DOMAIN,
25
31
  )
26
32
  from datahub.ingestion.source.snowflake.oauth_config import (
27
33
  OAuthConfiguration,
@@ -47,8 +53,6 @@ _VALID_AUTH_TYPES: Dict[str, str] = {
47
53
  "OAUTH_AUTHENTICATOR_TOKEN": OAUTH_AUTHENTICATOR,
48
54
  }
49
55
 
50
- _SNOWFLAKE_HOST_SUFFIX = ".snowflakecomputing.com"
51
-
52
56
 
53
57
  class SnowflakePermissionError(MetaError):
54
58
  """A permission error has happened"""
@@ -64,7 +68,7 @@ class SnowflakeConnectionConfig(ConfigModel):
64
68
  description="Any options specified here will be passed to [SQLAlchemy.create_engine](https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine) as kwargs.",
65
69
  )
66
70
 
67
- scheme: str = "snowflake"
71
+ scheme: HiddenFromDocs[str] = "snowflake"
68
72
  username: Optional[str] = pydantic.Field(
69
73
  default=None, description="Snowflake username."
70
74
  )
@@ -110,18 +114,25 @@ class SnowflakeConnectionConfig(ConfigModel):
110
114
  default=None,
111
115
  description="OAuth token from external identity provider. Not recommended for most use cases because it will not be able to refresh once expired.",
112
116
  )
117
+ snowflake_domain: str = pydantic.Field(
118
+ default=DEFAULT_SNOWFLAKE_DOMAIN,
119
+ description="Snowflake domain. Use 'snowflakecomputing.com' for most regions or 'snowflakecomputing.cn' for China (cn-northwest-1) region.",
120
+ )
113
121
 
114
122
  def get_account(self) -> str:
115
123
  assert self.account_id
116
124
  return self.account_id
117
125
 
118
- rename_host_port_to_account_id = pydantic_renamed_field("host_port", "account_id")
126
+ rename_host_port_to_account_id = pydantic_renamed_field("host_port", "account_id") # type: ignore[pydantic-field]
119
127
 
120
128
  @pydantic.validator("account_id")
121
- def validate_account_id(cls, account_id: str) -> str:
129
+ def validate_account_id(cls, account_id: str, values: Dict) -> str:
122
130
  account_id = remove_protocol(account_id)
123
131
  account_id = remove_trailing_slashes(account_id)
124
- account_id = remove_suffix(account_id, _SNOWFLAKE_HOST_SUFFIX)
132
+ # Get the domain from config, fallback to default
133
+ domain = values.get("snowflake_domain", DEFAULT_SNOWFLAKE_DOMAIN)
134
+ snowflake_host_suffix = f".{domain}"
135
+ account_id = remove_suffix(account_id, snowflake_host_suffix)
125
136
  return account_id
126
137
 
127
138
  @pydantic.validator("authentication_type", always=True)
@@ -311,6 +322,7 @@ class SnowflakeConnectionConfig(ConfigModel):
311
322
  warehouse=self.warehouse,
312
323
  authenticator=_VALID_AUTH_TYPES.get(self.authentication_type),
313
324
  application=_APPLICATION_NAME,
325
+ host=f"{self.account_id}.{self.snowflake_domain}",
314
326
  **connect_args,
315
327
  )
316
328
 
@@ -324,6 +336,7 @@ class SnowflakeConnectionConfig(ConfigModel):
324
336
  role=self.role,
325
337
  authenticator=_VALID_AUTH_TYPES.get(self.authentication_type),
326
338
  application=_APPLICATION_NAME,
339
+ host=f"{self.account_id}.{self.snowflake_domain}",
327
340
  **connect_args,
328
341
  )
329
342
 
@@ -337,6 +350,7 @@ class SnowflakeConnectionConfig(ConfigModel):
337
350
  warehouse=self.warehouse,
338
351
  role=self.role,
339
352
  application=_APPLICATION_NAME,
353
+ host=f"{self.account_id}.{self.snowflake_domain}",
340
354
  **connect_args,
341
355
  )
342
356
  elif self.authentication_type == "OAUTH_AUTHENTICATOR_TOKEN":
@@ -348,6 +362,7 @@ class SnowflakeConnectionConfig(ConfigModel):
348
362
  warehouse=self.warehouse,
349
363
  role=self.role,
350
364
  application=_APPLICATION_NAME,
365
+ host=f"{self.account_id}.{self.snowflake_domain}",
351
366
  **connect_args,
352
367
  )
353
368
  elif self.authentication_type == "OAUTH_AUTHENTICATOR":
@@ -363,6 +378,7 @@ class SnowflakeConnectionConfig(ConfigModel):
363
378
  role=self.role,
364
379
  authenticator=_VALID_AUTH_TYPES.get(self.authentication_type),
365
380
  application=_APPLICATION_NAME,
381
+ host=f"{self.account_id}.{self.snowflake_domain}",
366
382
  **connect_args,
367
383
  )
368
384
  else:
@@ -408,7 +424,7 @@ class SnowflakeConnection(Closeable):
408
424
  # We often run multiple queries in parallel across multiple threads,
409
425
  # so we need to number them to help with log readability.
410
426
  query_num = self.get_query_no()
411
- logger.info(f"Query #{query_num}: {query}", stacklevel=2)
427
+ logger.info(f"Query #{query_num}: {query.rstrip()}", stacklevel=2)
412
428
  resp = self._connection.cursor(DictCursor).execute(query)
413
429
  if resp is not None and resp.rowcount is not None:
414
430
  logger.info(
@@ -2,7 +2,17 @@ import json
2
2
  import logging
3
3
  from dataclasses import dataclass
4
4
  from datetime import datetime
5
- from typing import Any, Collection, Iterable, List, Optional, Set, Tuple, Type
5
+ from typing import (
6
+ TYPE_CHECKING,
7
+ Any,
8
+ Collection,
9
+ Iterable,
10
+ List,
11
+ Optional,
12
+ Set,
13
+ Tuple,
14
+ Type,
15
+ )
6
16
 
7
17
  from pydantic import BaseModel, Field, validator
8
18
 
@@ -44,6 +54,9 @@ from datahub.sql_parsing.sqlglot_utils import get_query_fingerprint
44
54
  from datahub.utilities.perf_timer import PerfTimer
45
55
  from datahub.utilities.time import ts_millis_to_datetime
46
56
 
57
+ if TYPE_CHECKING:
58
+ from pydantic.deprecated.class_validators import V1Validator
59
+
47
60
  logger: logging.Logger = logging.getLogger(__name__)
48
61
 
49
62
  EXTERNAL_LINEAGE = "external_lineage"
@@ -51,7 +64,7 @@ TABLE_LINEAGE = "table_lineage"
51
64
  VIEW_LINEAGE = "view_lineage"
52
65
 
53
66
 
54
- def pydantic_parse_json(field: str) -> classmethod:
67
+ def pydantic_parse_json(field: str) -> "V1Validator":
55
68
  def _parse_from_json(cls: Type, v: Any) -> dict:
56
69
  if isinstance(v, str):
57
70
  return json.loads(v)
@@ -72,7 +85,7 @@ class ColumnUpstreamJob(BaseModel):
72
85
 
73
86
 
74
87
  class ColumnUpstreamLineage(BaseModel):
75
- column_name: Optional[str]
88
+ column_name: Optional[str] = None
76
89
  upstreams: List[ColumnUpstreamJob] = Field(default_factory=list)
77
90
 
78
91
 
@@ -91,9 +104,9 @@ class Query(BaseModel):
91
104
  class UpstreamLineageEdge(BaseModel):
92
105
  DOWNSTREAM_TABLE_NAME: str
93
106
  DOWNSTREAM_TABLE_DOMAIN: str
94
- UPSTREAM_TABLES: Optional[List[UpstreamTableNode]]
95
- UPSTREAM_COLUMNS: Optional[List[ColumnUpstreamLineage]]
96
- QUERIES: Optional[List[Query]]
107
+ UPSTREAM_TABLES: Optional[List[UpstreamTableNode]] = None
108
+ UPSTREAM_COLUMNS: Optional[List[ColumnUpstreamLineage]] = None
109
+ QUERIES: Optional[List[Query]] = None
97
110
 
98
111
  _json_upstream_tables = pydantic_parse_json("UPSTREAM_TABLES")
99
112
  _json_upstream_columns = pydantic_parse_json("UPSTREAM_COLUMNS")