acryl-datahub 1.0.0rc18__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (503) hide show
  1. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2686 -2563
  2. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +499 -392
  3. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +1 -1
  4. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +7 -1
  5. datahub/_version.py +1 -1
  6. datahub/api/circuit_breaker/operation_circuit_breaker.py +2 -2
  7. datahub/api/entities/assertion/assertion.py +1 -1
  8. datahub/api/entities/common/serialized_value.py +1 -1
  9. datahub/api/entities/corpgroup/corpgroup.py +1 -1
  10. datahub/api/entities/datacontract/datacontract.py +35 -3
  11. datahub/api/entities/datajob/dataflow.py +18 -3
  12. datahub/api/entities/datajob/datajob.py +24 -4
  13. datahub/api/entities/dataprocess/dataprocess_instance.py +4 -0
  14. datahub/api/entities/dataproduct/dataproduct.py +32 -3
  15. datahub/api/entities/dataset/dataset.py +47 -72
  16. datahub/api/entities/external/__init__.py +0 -0
  17. datahub/api/entities/external/external_entities.py +724 -0
  18. datahub/api/entities/external/external_tag.py +147 -0
  19. datahub/api/entities/external/lake_formation_external_entites.py +162 -0
  20. datahub/api/entities/external/restricted_text.py +172 -0
  21. datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
  22. datahub/api/entities/forms/forms.py +37 -37
  23. datahub/api/entities/structuredproperties/structuredproperties.py +6 -6
  24. datahub/api/graphql/assertion.py +1 -1
  25. datahub/api/graphql/base.py +8 -6
  26. datahub/api/graphql/operation.py +14 -10
  27. datahub/cli/check_cli.py +91 -9
  28. datahub/cli/cli_utils.py +63 -0
  29. datahub/cli/config_utils.py +20 -12
  30. datahub/cli/container_cli.py +5 -0
  31. datahub/cli/delete_cli.py +133 -34
  32. datahub/cli/docker_check.py +110 -14
  33. datahub/cli/docker_cli.py +155 -231
  34. datahub/cli/exists_cli.py +2 -3
  35. datahub/cli/get_cli.py +2 -3
  36. datahub/cli/graphql_cli.py +1422 -0
  37. datahub/cli/iceberg_cli.py +11 -5
  38. datahub/cli/ingest_cli.py +25 -26
  39. datahub/cli/migrate.py +12 -9
  40. datahub/cli/migration_utils.py +4 -3
  41. datahub/cli/put_cli.py +4 -6
  42. datahub/cli/quickstart_versioning.py +53 -10
  43. datahub/cli/specific/assertions_cli.py +39 -7
  44. datahub/cli/specific/datacontract_cli.py +57 -9
  45. datahub/cli/specific/dataproduct_cli.py +12 -24
  46. datahub/cli/specific/dataset_cli.py +31 -21
  47. datahub/cli/specific/forms_cli.py +2 -5
  48. datahub/cli/specific/group_cli.py +2 -3
  49. datahub/cli/specific/structuredproperties_cli.py +5 -7
  50. datahub/cli/specific/user_cli.py +174 -4
  51. datahub/cli/state_cli.py +2 -3
  52. datahub/cli/timeline_cli.py +2 -3
  53. datahub/configuration/common.py +46 -2
  54. datahub/configuration/connection_resolver.py +5 -2
  55. datahub/configuration/env_vars.py +331 -0
  56. datahub/configuration/import_resolver.py +7 -4
  57. datahub/configuration/kafka.py +21 -1
  58. datahub/configuration/pydantic_migration_helpers.py +6 -13
  59. datahub/configuration/source_common.py +4 -3
  60. datahub/configuration/validate_field_deprecation.py +5 -2
  61. datahub/configuration/validate_field_removal.py +8 -2
  62. datahub/configuration/validate_field_rename.py +6 -5
  63. datahub/configuration/validate_multiline_string.py +5 -2
  64. datahub/emitter/mce_builder.py +12 -8
  65. datahub/emitter/mcp.py +20 -5
  66. datahub/emitter/mcp_builder.py +12 -0
  67. datahub/emitter/request_helper.py +138 -15
  68. datahub/emitter/response_helper.py +111 -19
  69. datahub/emitter/rest_emitter.py +399 -163
  70. datahub/entrypoints.py +10 -5
  71. datahub/errors.py +12 -0
  72. datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +299 -2
  73. datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
  74. datahub/ingestion/api/common.py +9 -0
  75. datahub/ingestion/api/decorators.py +15 -3
  76. datahub/ingestion/api/report.py +381 -3
  77. datahub/ingestion/api/sink.py +27 -2
  78. datahub/ingestion/api/source.py +174 -62
  79. datahub/ingestion/api/source_helpers.py +41 -3
  80. datahub/ingestion/api/source_protocols.py +23 -0
  81. datahub/ingestion/autogenerated/__init__.py +0 -0
  82. datahub/ingestion/autogenerated/capability_summary.json +3652 -0
  83. datahub/ingestion/autogenerated/lineage.json +402 -0
  84. datahub/ingestion/autogenerated/lineage_helper.py +177 -0
  85. datahub/ingestion/extractor/schema_util.py +31 -5
  86. datahub/ingestion/glossary/classification_mixin.py +9 -2
  87. datahub/ingestion/graph/client.py +492 -55
  88. datahub/ingestion/graph/config.py +18 -2
  89. datahub/ingestion/graph/filters.py +96 -32
  90. datahub/ingestion/graph/links.py +55 -0
  91. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
  92. datahub/ingestion/run/pipeline.py +90 -23
  93. datahub/ingestion/run/pipeline_config.py +3 -3
  94. datahub/ingestion/sink/datahub_kafka.py +1 -0
  95. datahub/ingestion/sink/datahub_rest.py +31 -23
  96. datahub/ingestion/sink/file.py +1 -0
  97. datahub/ingestion/source/abs/config.py +1 -1
  98. datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
  99. datahub/ingestion/source/abs/source.py +15 -30
  100. datahub/ingestion/source/apply/datahub_apply.py +6 -5
  101. datahub/ingestion/source/aws/aws_common.py +185 -13
  102. datahub/ingestion/source/aws/glue.py +517 -244
  103. datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
  104. datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
  105. datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
  106. datahub/ingestion/source/aws/sagemaker_processors/models.py +4 -4
  107. datahub/ingestion/source/aws/tag_entities.py +270 -0
  108. datahub/ingestion/source/azure/azure_common.py +3 -3
  109. datahub/ingestion/source/bigquery_v2/bigquery.py +51 -7
  110. datahub/ingestion/source/bigquery_v2/bigquery_config.py +51 -81
  111. datahub/ingestion/source/bigquery_v2/bigquery_connection.py +81 -0
  112. datahub/ingestion/source/bigquery_v2/bigquery_queries.py +6 -1
  113. datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
  114. datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
  115. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
  116. datahub/ingestion/source/bigquery_v2/common.py +1 -1
  117. datahub/ingestion/source/bigquery_v2/lineage.py +1 -1
  118. datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
  119. datahub/ingestion/source/bigquery_v2/queries.py +3 -3
  120. datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
  121. datahub/ingestion/source/cassandra/cassandra.py +7 -18
  122. datahub/ingestion/source/cassandra/cassandra_api.py +36 -0
  123. datahub/ingestion/source/cassandra/cassandra_config.py +20 -0
  124. datahub/ingestion/source/cassandra/cassandra_profiling.py +26 -24
  125. datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
  126. datahub/ingestion/source/common/data_platforms.py +23 -0
  127. datahub/ingestion/source/common/gcp_credentials_config.py +9 -1
  128. datahub/ingestion/source/common/subtypes.py +73 -1
  129. datahub/ingestion/source/data_lake_common/data_lake_utils.py +59 -10
  130. datahub/ingestion/source/data_lake_common/object_store.py +732 -0
  131. datahub/ingestion/source/data_lake_common/path_spec.py +87 -38
  132. datahub/ingestion/source/datahub/config.py +19 -5
  133. datahub/ingestion/source/datahub/datahub_database_reader.py +205 -36
  134. datahub/ingestion/source/datahub/datahub_source.py +11 -1
  135. datahub/ingestion/source/dbt/dbt_cloud.py +17 -10
  136. datahub/ingestion/source/dbt/dbt_common.py +270 -26
  137. datahub/ingestion/source/dbt/dbt_core.py +88 -47
  138. datahub/ingestion/source/dbt/dbt_tests.py +8 -6
  139. datahub/ingestion/source/debug/__init__.py +0 -0
  140. datahub/ingestion/source/debug/datahub_debug.py +300 -0
  141. datahub/ingestion/source/delta_lake/config.py +9 -5
  142. datahub/ingestion/source/delta_lake/source.py +8 -0
  143. datahub/ingestion/source/dremio/dremio_api.py +114 -73
  144. datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
  145. datahub/ingestion/source/dremio/dremio_config.py +5 -4
  146. datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +1 -1
  147. datahub/ingestion/source/dremio/dremio_entities.py +6 -5
  148. datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
  149. datahub/ingestion/source/dremio/dremio_source.py +228 -215
  150. datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
  151. datahub/ingestion/source/dynamodb/dynamodb.py +19 -13
  152. datahub/ingestion/source/excel/__init__.py +0 -0
  153. datahub/ingestion/source/excel/config.py +92 -0
  154. datahub/ingestion/source/excel/excel_file.py +539 -0
  155. datahub/ingestion/source/excel/profiling.py +308 -0
  156. datahub/ingestion/source/excel/report.py +49 -0
  157. datahub/ingestion/source/excel/source.py +662 -0
  158. datahub/ingestion/source/excel/util.py +18 -0
  159. datahub/ingestion/source/feast.py +12 -14
  160. datahub/ingestion/source/file.py +3 -0
  161. datahub/ingestion/source/fivetran/config.py +67 -8
  162. datahub/ingestion/source/fivetran/fivetran.py +228 -43
  163. datahub/ingestion/source/fivetran/fivetran_log_api.py +42 -9
  164. datahub/ingestion/source/fivetran/fivetran_query.py +58 -36
  165. datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
  166. datahub/ingestion/source/fivetran/response_models.py +97 -0
  167. datahub/ingestion/source/gc/datahub_gc.py +0 -2
  168. datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +101 -104
  169. datahub/ingestion/source/gcs/gcs_source.py +53 -10
  170. datahub/ingestion/source/gcs/gcs_utils.py +36 -9
  171. datahub/ingestion/source/ge_data_profiler.py +146 -33
  172. datahub/ingestion/source/ge_profiling_config.py +26 -11
  173. datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
  174. datahub/ingestion/source/grafana/field_utils.py +307 -0
  175. datahub/ingestion/source/grafana/grafana_api.py +142 -0
  176. datahub/ingestion/source/grafana/grafana_config.py +104 -0
  177. datahub/ingestion/source/grafana/grafana_source.py +522 -84
  178. datahub/ingestion/source/grafana/lineage.py +202 -0
  179. datahub/ingestion/source/grafana/models.py +137 -0
  180. datahub/ingestion/source/grafana/report.py +90 -0
  181. datahub/ingestion/source/grafana/types.py +16 -0
  182. datahub/ingestion/source/hex/__init__.py +0 -0
  183. datahub/ingestion/source/hex/api.py +402 -0
  184. datahub/ingestion/source/hex/constants.py +8 -0
  185. datahub/ingestion/source/hex/hex.py +311 -0
  186. datahub/ingestion/source/hex/mapper.py +412 -0
  187. datahub/ingestion/source/hex/model.py +78 -0
  188. datahub/ingestion/source/hex/query_fetcher.py +307 -0
  189. datahub/ingestion/source/iceberg/iceberg.py +385 -164
  190. datahub/ingestion/source/iceberg/iceberg_common.py +2 -2
  191. datahub/ingestion/source/iceberg/iceberg_profiler.py +25 -20
  192. datahub/ingestion/source/identity/azure_ad.py +1 -1
  193. datahub/ingestion/source/identity/okta.py +1 -14
  194. datahub/ingestion/source/kafka/kafka.py +28 -71
  195. datahub/ingestion/source/kafka/kafka_config.py +78 -0
  196. datahub/ingestion/source/kafka_connect/common.py +2 -2
  197. datahub/ingestion/source/kafka_connect/sink_connectors.py +157 -48
  198. datahub/ingestion/source/kafka_connect/source_connectors.py +63 -5
  199. datahub/ingestion/source/ldap.py +1 -1
  200. datahub/ingestion/source/looker/looker_common.py +216 -86
  201. datahub/ingestion/source/looker/looker_config.py +15 -4
  202. datahub/ingestion/source/looker/looker_constant.py +4 -0
  203. datahub/ingestion/source/looker/looker_lib_wrapper.py +37 -4
  204. datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
  205. datahub/ingestion/source/looker/looker_source.py +539 -555
  206. datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
  207. datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
  208. datahub/ingestion/source/looker/lookml_config.py +31 -3
  209. datahub/ingestion/source/looker/lookml_refinement.py +1 -1
  210. datahub/ingestion/source/looker/lookml_source.py +103 -118
  211. datahub/ingestion/source/looker/view_upstream.py +494 -1
  212. datahub/ingestion/source/metabase.py +32 -6
  213. datahub/ingestion/source/metadata/business_glossary.py +7 -7
  214. datahub/ingestion/source/metadata/lineage.py +11 -10
  215. datahub/ingestion/source/mlflow.py +254 -23
  216. datahub/ingestion/source/mock_data/__init__.py +0 -0
  217. datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
  218. datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
  219. datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
  220. datahub/ingestion/source/mode.py +359 -181
  221. datahub/ingestion/source/mongodb.py +11 -1
  222. datahub/ingestion/source/neo4j/neo4j_source.py +122 -153
  223. datahub/ingestion/source/nifi.py +5 -5
  224. datahub/ingestion/source/openapi.py +85 -38
  225. datahub/ingestion/source/openapi_parser.py +59 -40
  226. datahub/ingestion/source/powerbi/config.py +92 -27
  227. datahub/ingestion/source/powerbi/m_query/data_classes.py +3 -0
  228. datahub/ingestion/source/powerbi/m_query/odbc.py +185 -0
  229. datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
  230. datahub/ingestion/source/powerbi/m_query/pattern_handler.py +358 -14
  231. datahub/ingestion/source/powerbi/m_query/resolver.py +10 -0
  232. datahub/ingestion/source/powerbi/powerbi.py +66 -32
  233. datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -2
  234. datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +11 -12
  235. datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
  236. datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
  237. datahub/ingestion/source/preset.py +3 -3
  238. datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
  239. datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
  240. datahub/ingestion/source/redash.py +1 -1
  241. datahub/ingestion/source/redshift/config.py +15 -9
  242. datahub/ingestion/source/redshift/datashares.py +1 -1
  243. datahub/ingestion/source/redshift/lineage.py +386 -687
  244. datahub/ingestion/source/redshift/profile.py +2 -2
  245. datahub/ingestion/source/redshift/query.py +24 -20
  246. datahub/ingestion/source/redshift/redshift.py +52 -111
  247. datahub/ingestion/source/redshift/redshift_schema.py +17 -12
  248. datahub/ingestion/source/redshift/report.py +0 -2
  249. datahub/ingestion/source/redshift/usage.py +13 -11
  250. datahub/ingestion/source/s3/report.py +4 -2
  251. datahub/ingestion/source/s3/source.py +515 -244
  252. datahub/ingestion/source/sac/sac.py +3 -1
  253. datahub/ingestion/source/salesforce.py +28 -13
  254. datahub/ingestion/source/schema/json_schema.py +14 -14
  255. datahub/ingestion/source/schema_inference/object.py +22 -6
  256. datahub/ingestion/source/sigma/config.py +75 -8
  257. datahub/ingestion/source/sigma/data_classes.py +3 -0
  258. datahub/ingestion/source/sigma/sigma.py +36 -7
  259. datahub/ingestion/source/sigma/sigma_api.py +99 -58
  260. datahub/ingestion/source/slack/slack.py +403 -140
  261. datahub/ingestion/source/snaplogic/__init__.py +0 -0
  262. datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
  263. datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
  264. datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
  265. datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
  266. datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
  267. datahub/ingestion/source/snowflake/constants.py +4 -0
  268. datahub/ingestion/source/snowflake/snowflake_config.py +103 -34
  269. datahub/ingestion/source/snowflake/snowflake_connection.py +47 -25
  270. datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +25 -6
  271. datahub/ingestion/source/snowflake/snowflake_profiler.py +1 -6
  272. datahub/ingestion/source/snowflake/snowflake_queries.py +511 -107
  273. datahub/ingestion/source/snowflake/snowflake_query.py +100 -72
  274. datahub/ingestion/source/snowflake/snowflake_report.py +4 -2
  275. datahub/ingestion/source/snowflake/snowflake_schema.py +381 -16
  276. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +163 -52
  277. datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
  278. datahub/ingestion/source/snowflake/snowflake_tag.py +4 -1
  279. datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
  280. datahub/ingestion/source/snowflake/snowflake_utils.py +62 -17
  281. datahub/ingestion/source/snowflake/snowflake_v2.py +56 -10
  282. datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
  283. datahub/ingestion/source/sql/athena.py +219 -26
  284. datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
  285. datahub/ingestion/source/sql/clickhouse.py +29 -9
  286. datahub/ingestion/source/sql/cockroachdb.py +5 -4
  287. datahub/ingestion/source/sql/druid.py +9 -4
  288. datahub/ingestion/source/sql/hana.py +3 -1
  289. datahub/ingestion/source/sql/hive.py +28 -8
  290. datahub/ingestion/source/sql/hive_metastore.py +24 -25
  291. datahub/ingestion/source/sql/mariadb.py +0 -1
  292. datahub/ingestion/source/sql/mssql/job_models.py +18 -2
  293. datahub/ingestion/source/sql/mssql/source.py +376 -62
  294. datahub/ingestion/source/sql/mysql.py +154 -4
  295. datahub/ingestion/source/sql/oracle.py +62 -11
  296. datahub/ingestion/source/sql/postgres.py +142 -6
  297. datahub/ingestion/source/sql/presto.py +20 -2
  298. datahub/ingestion/source/sql/sql_common.py +281 -49
  299. datahub/ingestion/source/sql/sql_config.py +1 -34
  300. datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
  301. datahub/ingestion/source/sql/sql_types.py +27 -2
  302. datahub/ingestion/source/sql/sqlalchemy_uri.py +68 -0
  303. datahub/ingestion/source/sql/stored_procedures/__init__.py +0 -0
  304. datahub/ingestion/source/sql/stored_procedures/base.py +253 -0
  305. datahub/ingestion/source/sql/{mssql/stored_procedure_lineage.py → stored_procedures/lineage.py} +2 -29
  306. datahub/ingestion/source/sql/teradata.py +1028 -245
  307. datahub/ingestion/source/sql/trino.py +43 -10
  308. datahub/ingestion/source/sql/two_tier_sql_source.py +3 -4
  309. datahub/ingestion/source/sql/vertica.py +14 -7
  310. datahub/ingestion/source/sql_queries.py +219 -121
  311. datahub/ingestion/source/state/checkpoint.py +8 -29
  312. datahub/ingestion/source/state/entity_removal_state.py +5 -2
  313. datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
  314. datahub/ingestion/source/state/stale_entity_removal_handler.py +0 -1
  315. datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
  316. datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +2 -1
  317. datahub/ingestion/source/superset.py +810 -126
  318. datahub/ingestion/source/tableau/tableau.py +172 -69
  319. datahub/ingestion/source/tableau/tableau_common.py +11 -4
  320. datahub/ingestion/source/tableau/tableau_constant.py +1 -4
  321. datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
  322. datahub/ingestion/source/tableau/tableau_validation.py +1 -1
  323. datahub/ingestion/source/unity/config.py +161 -40
  324. datahub/ingestion/source/unity/connection.py +61 -0
  325. datahub/ingestion/source/unity/connection_test.py +1 -0
  326. datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
  327. datahub/ingestion/source/unity/proxy.py +794 -51
  328. datahub/ingestion/source/unity/proxy_patch.py +321 -0
  329. datahub/ingestion/source/unity/proxy_types.py +36 -2
  330. datahub/ingestion/source/unity/report.py +15 -3
  331. datahub/ingestion/source/unity/source.py +465 -131
  332. datahub/ingestion/source/unity/tag_entities.py +197 -0
  333. datahub/ingestion/source/unity/usage.py +46 -4
  334. datahub/ingestion/source/usage/clickhouse_usage.py +11 -4
  335. datahub/ingestion/source/usage/starburst_trino_usage.py +10 -5
  336. datahub/ingestion/source/usage/usage_common.py +4 -68
  337. datahub/ingestion/source/vertexai/__init__.py +0 -0
  338. datahub/ingestion/source/vertexai/vertexai.py +1367 -0
  339. datahub/ingestion/source/vertexai/vertexai_config.py +29 -0
  340. datahub/ingestion/source/vertexai/vertexai_result_type_utils.py +89 -0
  341. datahub/ingestion/source_config/pulsar.py +3 -1
  342. datahub/ingestion/source_report/ingestion_stage.py +50 -11
  343. datahub/ingestion/transformer/add_dataset_dataproduct.py +1 -1
  344. datahub/ingestion/transformer/add_dataset_ownership.py +19 -3
  345. datahub/ingestion/transformer/base_transformer.py +8 -5
  346. datahub/ingestion/transformer/dataset_domain.py +1 -1
  347. datahub/ingestion/transformer/set_browse_path.py +112 -0
  348. datahub/integrations/assertion/common.py +3 -2
  349. datahub/integrations/assertion/snowflake/compiler.py +4 -3
  350. datahub/lite/lite_util.py +2 -2
  351. datahub/metadata/{_schema_classes.py → _internal_schema_classes.py} +3095 -631
  352. datahub/metadata/_urns/urn_defs.py +1866 -1582
  353. datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
  354. datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
  355. datahub/metadata/com/linkedin/pegasus2avro/dataplatform/slack/__init__.py +15 -0
  356. datahub/metadata/com/linkedin/pegasus2avro/event/__init__.py +11 -0
  357. datahub/metadata/com/linkedin/pegasus2avro/event/notification/__init__.py +15 -0
  358. datahub/metadata/com/linkedin/pegasus2avro/event/notification/settings/__init__.py +19 -0
  359. datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
  360. datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
  361. datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
  362. datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +8 -0
  363. datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
  364. datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
  365. datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
  366. datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
  367. datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
  368. datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
  369. datahub/metadata/schema.avsc +18404 -16617
  370. datahub/metadata/schema_classes.py +3 -3
  371. datahub/metadata/schemas/Actors.avsc +38 -1
  372. datahub/metadata/schemas/ApplicationKey.avsc +31 -0
  373. datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
  374. datahub/metadata/schemas/Applications.avsc +38 -0
  375. datahub/metadata/schemas/AssetSettings.avsc +63 -0
  376. datahub/metadata/schemas/ChartInfo.avsc +2 -1
  377. datahub/metadata/schemas/ChartKey.avsc +1 -0
  378. datahub/metadata/schemas/ContainerKey.avsc +1 -0
  379. datahub/metadata/schemas/ContainerProperties.avsc +8 -0
  380. datahub/metadata/schemas/CorpUserEditableInfo.avsc +15 -1
  381. datahub/metadata/schemas/CorpUserKey.avsc +2 -1
  382. datahub/metadata/schemas/CorpUserSettings.avsc +145 -0
  383. datahub/metadata/schemas/DashboardKey.avsc +1 -0
  384. datahub/metadata/schemas/DataContractKey.avsc +2 -1
  385. datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
  386. datahub/metadata/schemas/DataFlowKey.avsc +1 -0
  387. datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
  388. datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
  389. datahub/metadata/schemas/DataHubIngestionSourceKey.avsc +2 -1
  390. datahub/metadata/schemas/DataHubOpenAPISchemaKey.avsc +22 -0
  391. datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
  392. datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
  393. datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
  394. datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
  395. datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
  396. datahub/metadata/schemas/DataJobInfo.avsc +8 -0
  397. datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
  398. datahub/metadata/schemas/DataJobKey.avsc +1 -0
  399. datahub/metadata/schemas/DataProcessInstanceInput.avsc +2 -1
  400. datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -1
  401. datahub/metadata/schemas/DataProcessKey.avsc +8 -0
  402. datahub/metadata/schemas/DataProductKey.avsc +3 -1
  403. datahub/metadata/schemas/DataProductProperties.avsc +1 -1
  404. datahub/metadata/schemas/DataTransformLogic.avsc +4 -2
  405. datahub/metadata/schemas/DatasetKey.avsc +11 -1
  406. datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
  407. datahub/metadata/schemas/Deprecation.avsc +2 -0
  408. datahub/metadata/schemas/DomainKey.avsc +2 -1
  409. datahub/metadata/schemas/ExecutionRequestInput.avsc +5 -0
  410. datahub/metadata/schemas/FormInfo.avsc +5 -0
  411. datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
  412. datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
  413. datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
  414. datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
  415. datahub/metadata/schemas/IncidentInfo.avsc +3 -3
  416. datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
  417. datahub/metadata/schemas/LogicalParent.avsc +145 -0
  418. datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
  419. datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
  420. datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
  421. datahub/metadata/schemas/MLModelDeploymentProperties.avsc +3 -0
  422. datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
  423. datahub/metadata/schemas/MLModelGroupProperties.avsc +16 -0
  424. datahub/metadata/schemas/MLModelKey.avsc +9 -0
  425. datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
  426. datahub/metadata/schemas/MetadataChangeEvent.avsc +189 -47
  427. datahub/metadata/schemas/MetadataChangeLog.avsc +65 -44
  428. datahub/metadata/schemas/MetadataChangeProposal.avsc +64 -0
  429. datahub/metadata/schemas/NotebookKey.avsc +1 -0
  430. datahub/metadata/schemas/Operation.avsc +21 -2
  431. datahub/metadata/schemas/Ownership.avsc +69 -0
  432. datahub/metadata/schemas/QueryProperties.avsc +24 -2
  433. datahub/metadata/schemas/QuerySubjects.avsc +1 -12
  434. datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
  435. datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
  436. datahub/metadata/schemas/Siblings.avsc +2 -0
  437. datahub/metadata/schemas/SlackUserInfo.avsc +160 -0
  438. datahub/metadata/schemas/StructuredProperties.avsc +69 -0
  439. datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
  440. datahub/metadata/schemas/SystemMetadata.avsc +147 -0
  441. datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
  442. datahub/metadata/schemas/__init__.py +3 -3
  443. datahub/sdk/__init__.py +7 -0
  444. datahub/sdk/_all_entities.py +15 -0
  445. datahub/sdk/_shared.py +393 -10
  446. datahub/sdk/_utils.py +4 -0
  447. datahub/sdk/chart.py +386 -0
  448. datahub/sdk/container.py +7 -0
  449. datahub/sdk/dashboard.py +453 -0
  450. datahub/sdk/dataflow.py +309 -0
  451. datahub/sdk/datajob.py +367 -0
  452. datahub/sdk/dataset.py +180 -4
  453. datahub/sdk/entity.py +99 -3
  454. datahub/sdk/entity_client.py +154 -12
  455. datahub/sdk/lineage_client.py +943 -0
  456. datahub/sdk/main_client.py +83 -8
  457. datahub/sdk/mlmodel.py +383 -0
  458. datahub/sdk/mlmodelgroup.py +240 -0
  459. datahub/sdk/search_client.py +85 -8
  460. datahub/sdk/search_filters.py +393 -68
  461. datahub/secret/datahub_secret_store.py +5 -1
  462. datahub/secret/environment_secret_store.py +29 -0
  463. datahub/secret/file_secret_store.py +49 -0
  464. datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
  465. datahub/specific/aspect_helpers/siblings.py +73 -0
  466. datahub/specific/aspect_helpers/structured_properties.py +27 -0
  467. datahub/specific/chart.py +1 -1
  468. datahub/specific/datajob.py +15 -1
  469. datahub/specific/dataproduct.py +4 -0
  470. datahub/specific/dataset.py +51 -59
  471. datahub/sql_parsing/_sqlglot_patch.py +1 -2
  472. datahub/sql_parsing/fingerprint_utils.py +6 -0
  473. datahub/sql_parsing/split_statements.py +30 -3
  474. datahub/sql_parsing/sql_parsing_aggregator.py +144 -63
  475. datahub/sql_parsing/sqlglot_lineage.py +517 -44
  476. datahub/sql_parsing/sqlglot_utils.py +30 -18
  477. datahub/sql_parsing/tool_meta_extractor.py +25 -2
  478. datahub/telemetry/telemetry.py +30 -16
  479. datahub/testing/check_imports.py +1 -1
  480. datahub/testing/docker_utils.py +8 -2
  481. datahub/testing/mce_helpers.py +421 -0
  482. datahub/testing/mcp_diff.py +17 -21
  483. datahub/testing/sdk_v2_helpers.py +18 -0
  484. datahub/upgrade/upgrade.py +86 -30
  485. datahub/utilities/file_backed_collections.py +14 -15
  486. datahub/utilities/hive_schema_to_avro.py +2 -2
  487. datahub/utilities/ingest_utils.py +2 -2
  488. datahub/utilities/is_pytest.py +3 -2
  489. datahub/utilities/logging_manager.py +30 -7
  490. datahub/utilities/mapping.py +29 -2
  491. datahub/utilities/sample_data.py +5 -4
  492. datahub/utilities/server_config_util.py +298 -10
  493. datahub/utilities/sqlalchemy_query_combiner.py +6 -4
  494. datahub/utilities/stats_collections.py +4 -0
  495. datahub/utilities/threaded_iterator_executor.py +16 -3
  496. datahub/utilities/urn_encoder.py +1 -1
  497. datahub/utilities/urns/urn.py +41 -2
  498. datahub/emitter/sql_parsing_builder.py +0 -306
  499. datahub/ingestion/source/redshift/lineage_v2.py +0 -458
  500. datahub/ingestion/source/vertexai.py +0 -697
  501. datahub/ingestion/transformer/system_metadata_transformer.py +0 -45
  502. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info/licenses}/LICENSE +0 -0
  503. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
@@ -1,9 +1,15 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import abc
4
+ import json
4
5
  from typing import (
6
+ TYPE_CHECKING,
7
+ Annotated,
5
8
  Any,
9
+ ClassVar,
10
+ Iterator,
6
11
  List,
12
+ Optional,
7
13
  Sequence,
8
14
  TypedDict,
9
15
  Union,
@@ -12,11 +18,26 @@ from typing import (
12
18
  import pydantic
13
19
 
14
20
  from datahub.configuration.common import ConfigModel
15
- from datahub.configuration.pydantic_migration_helpers import PYDANTIC_VERSION_2
16
- from datahub.ingestion.graph.client import entity_type_to_graphql
17
- from datahub.ingestion.graph.filters import SearchFilterRule
21
+ from datahub.configuration.pydantic_migration_helpers import (
22
+ PYDANTIC_SUPPORTS_CALLABLE_DISCRIMINATOR,
23
+ PYDANTIC_VERSION_2,
24
+ )
25
+ from datahub.ingestion.graph.client import flexible_entity_type_to_graphql
26
+ from datahub.ingestion.graph.filters import (
27
+ FilterOperator,
28
+ RemovedStatusFilter,
29
+ SearchFilterRule,
30
+ _get_status_filter,
31
+ )
18
32
  from datahub.metadata.schema_classes import EntityTypeName
19
- from datahub.metadata.urns import DataPlatformUrn, DomainUrn
33
+ from datahub.metadata.urns import (
34
+ ContainerUrn,
35
+ CorpGroupUrn,
36
+ CorpUserUrn,
37
+ DataPlatformUrn,
38
+ DomainUrn,
39
+ )
40
+ from datahub.utilities.urns.urn import guess_entity_type
20
41
 
21
42
  _AndSearchFilterRule = TypedDict(
22
43
  "_AndSearchFilterRule", {"and": List[SearchFilterRule]}
@@ -25,37 +46,51 @@ _OrFilters = List[_AndSearchFilterRule]
25
46
 
26
47
 
27
48
  class _BaseFilter(ConfigModel):
28
- class Config:
29
- # We can't wrap this in a TYPE_CHECKING block because the pydantic plugin
30
- # doesn't recognize it properly. So unfortunately we'll need to live
31
- # with the deprecation warning w/ pydantic v2.
32
- allow_population_by_field_name = True
33
- if PYDANTIC_VERSION_2:
34
- populate_by_name = True
49
+ model_config = pydantic.ConfigDict(populate_by_name=True)
35
50
 
36
51
  @abc.abstractmethod
37
- def compile(self) -> _OrFilters:
38
- pass
52
+ def compile(self) -> _OrFilters: ...
53
+
54
+ def dfs(self) -> Iterator[_BaseFilter]:
55
+ yield self
39
56
 
57
+ @classmethod
58
+ def _field_discriminator(cls) -> str:
59
+ if cls is _BaseFilter:
60
+ raise ValueError("Cannot get discriminator for _BaseFilter")
61
+ if PYDANTIC_VERSION_2:
62
+ fields: dict = cls.model_fields # type: ignore
63
+ else:
64
+ fields = cls.__fields__ # type: ignore
40
65
 
41
- def _flexible_entity_type_to_graphql(entity_type: str) -> str:
42
- if entity_type.upper() == entity_type:
43
- # Assume that we were passed a graphql EntityType enum value,
44
- # so no conversion is needed.
45
- return entity_type
46
- return entity_type_to_graphql(entity_type)
66
+ # Assumes that there's only one field name per filter.
67
+ # If that's not the case, this method should be overridden.
68
+ if len(fields.keys()) != 1:
69
+ raise ValueError(
70
+ f"Found multiple fields that could be the discriminator for this filter: {list(fields.keys())}"
71
+ )
72
+ name, field = next(iter(fields.items()))
73
+ return field.alias or name # type: ignore
47
74
 
48
75
 
49
76
  class _EntityTypeFilter(_BaseFilter):
77
+ """Filter for specific entity types.
78
+
79
+ If no entity type filter is specified, we will search all entity types in the
80
+ default search set, mirroring the behavior of the DataHub UI.
81
+ """
82
+
83
+ ENTITY_TYPE_FIELD: ClassVar[str] = "_entityType"
84
+
50
85
  entity_type: List[str] = pydantic.Field(
51
- description="The entity type to filter on. Can be 'dataset', 'chart', 'dashboard', 'corpuser', etc.",
86
+ description="The entity type to filter on. Can be 'dataset', 'chart', 'dashboard', 'corpuser', 'dataProduct', etc.",
52
87
  )
53
88
 
54
89
  def _build_rule(self) -> SearchFilterRule:
55
90
  return SearchFilterRule(
56
- field="_entityType",
91
+ field=self.ENTITY_TYPE_FIELD,
57
92
  condition="EQUAL",
58
- values=[_flexible_entity_type_to_graphql(t) for t in self.entity_type],
93
+ values=[flexible_entity_type_to_graphql(t) for t in self.entity_type],
59
94
  )
60
95
 
61
96
  def compile(self) -> _OrFilters:
@@ -63,25 +98,43 @@ class _EntityTypeFilter(_BaseFilter):
63
98
 
64
99
 
65
100
  class _EntitySubtypeFilter(_BaseFilter):
66
- entity_type: str
67
- entity_subtype: str = pydantic.Field(
101
+ entity_subtype: List[str] = pydantic.Field(
68
102
  description="The entity subtype to filter on. Can be 'Table', 'View', 'Source', etc. depending on the native platform's concepts.",
69
103
  )
70
104
 
105
+ @pydantic.validator("entity_subtype", pre=True)
106
+ def validate_entity_subtype(cls, v: str) -> List[str]:
107
+ return [v] if not isinstance(v, list) else v
108
+
109
+ def _build_rule(self) -> SearchFilterRule:
110
+ return SearchFilterRule(
111
+ field="typeNames",
112
+ condition="EQUAL",
113
+ values=self.entity_subtype,
114
+ )
115
+
71
116
  def compile(self) -> _OrFilters:
72
- rules = [
73
- SearchFilterRule(
74
- field="_entityType",
75
- condition="EQUAL",
76
- values=[_flexible_entity_type_to_graphql(self.entity_type)],
77
- ),
78
- SearchFilterRule(
79
- field="typeNames",
80
- condition="EQUAL",
81
- values=[self.entity_subtype],
82
- ),
83
- ]
84
- return [{"and": rules}]
117
+ return [{"and": [self._build_rule()]}]
118
+
119
+
120
+ class _StatusFilter(_BaseFilter):
121
+ """Filter for the status of entities during search.
122
+
123
+ If not explicitly specified, the NOT_SOFT_DELETED status filter will be applied.
124
+ """
125
+
126
+ status: RemovedStatusFilter
127
+
128
+ def _build_rule(self) -> Optional[SearchFilterRule]:
129
+ return _get_status_filter(self.status)
130
+
131
+ def compile(self) -> _OrFilters:
132
+ rule = self._build_rule()
133
+ if rule:
134
+ return [{"and": [rule]}]
135
+ else:
136
+ # Our boolean algebra logic requires something here - returning [] would cause errors.
137
+ return FilterDsl.true().compile()
85
138
 
86
139
 
87
140
  class _PlatformFilter(_BaseFilter):
@@ -123,6 +176,39 @@ class _DomainFilter(_BaseFilter):
123
176
  return [{"and": [self._build_rule()]}]
124
177
 
125
178
 
179
+ class _ContainerFilter(_BaseFilter):
180
+ container: List[str]
181
+ direct_descendants_only: bool = pydantic.Field(
182
+ default=False,
183
+ description="If true, only entities that are direct descendants of the container will be returned.",
184
+ )
185
+
186
+ @pydantic.validator("container", each_item=True)
187
+ def validate_container(cls, v: str) -> str:
188
+ return str(ContainerUrn.from_string(v))
189
+
190
+ @classmethod
191
+ def _field_discriminator(cls) -> str:
192
+ return "container"
193
+
194
+ def _build_rule(self) -> SearchFilterRule:
195
+ if self.direct_descendants_only:
196
+ return SearchFilterRule(
197
+ field="container",
198
+ condition="EQUAL",
199
+ values=self.container,
200
+ )
201
+ else:
202
+ return SearchFilterRule(
203
+ field="browsePathV2",
204
+ condition="CONTAIN",
205
+ values=self.container,
206
+ )
207
+
208
+ def compile(self) -> _OrFilters:
209
+ return [{"and": [self._build_rule()]}]
210
+
211
+
126
212
  class _EnvFilter(_BaseFilter):
127
213
  # Note that not all entity types have an env (e.g. dashboards / charts).
128
214
  # If the env filter is specified, these will be excluded.
@@ -156,11 +242,99 @@ class _EnvFilter(_BaseFilter):
156
242
  ]
157
243
 
158
244
 
245
+ class _OwnerFilter(_BaseFilter):
246
+ """Filter for entities owned by specific users or groups."""
247
+
248
+ owner: List[str] = pydantic.Field(
249
+ description="The owner to filter on. Should be user or group URNs.",
250
+ )
251
+
252
+ @pydantic.validator("owner", each_item=True)
253
+ def validate_owner(cls, v: str) -> str:
254
+ if not v.startswith("urn:li:"):
255
+ raise ValueError(f"Owner must be a valid User or Group URN, got: {v}")
256
+ _type = guess_entity_type(v)
257
+ if _type == CorpUserUrn.ENTITY_TYPE:
258
+ return str(CorpUserUrn.from_string(v))
259
+ elif _type == CorpGroupUrn.ENTITY_TYPE:
260
+ return str(CorpGroupUrn.from_string(v))
261
+ else:
262
+ raise ValueError(f"Owner must be a valid User or Group URN, got: {v}")
263
+
264
+ def _build_rule(self) -> SearchFilterRule:
265
+ return SearchFilterRule(
266
+ field="owners",
267
+ condition="EQUAL",
268
+ values=self.owner,
269
+ )
270
+
271
+ def compile(self) -> _OrFilters:
272
+ return [{"and": [self._build_rule()]}]
273
+
274
+
275
+ class _GlossaryTermFilter(_BaseFilter):
276
+ """Filter for entities associated with specific glossary terms."""
277
+
278
+ glossary_term: List[str] = pydantic.Field(
279
+ description="The glossary term to filter on. Should be glossary term URNs.",
280
+ )
281
+
282
+ @pydantic.validator("glossary_term", each_item=True)
283
+ def validate_glossary_term(cls, v: str) -> str:
284
+ if not v.startswith("urn:li:"):
285
+ raise ValueError(f"Glossary term must be a valid URN, got: {v}")
286
+ # Validate that it's a glossary term URN
287
+ _type = guess_entity_type(v)
288
+ if _type != "glossaryTerm":
289
+ raise ValueError(
290
+ f"Glossary term must be a valid glossary term URN, got: {v}"
291
+ )
292
+ return v
293
+
294
+ def _build_rule(self) -> SearchFilterRule:
295
+ return SearchFilterRule(
296
+ field="glossaryTerms",
297
+ condition="EQUAL",
298
+ values=self.glossary_term,
299
+ )
300
+
301
+ def compile(self) -> _OrFilters:
302
+ return [{"and": [self._build_rule()]}]
303
+
304
+
305
+ class _TagFilter(_BaseFilter):
306
+ """Filter for entities associated with specific tags."""
307
+
308
+ tag: List[str] = pydantic.Field(
309
+ description="The tag to filter on. Should be tag URNs.",
310
+ )
311
+
312
+ @pydantic.validator("tag", each_item=True)
313
+ def validate_tag(cls, v: str) -> str:
314
+ if not v.startswith("urn:li:"):
315
+ raise ValueError(f"Tag must be a valid URN, got: {v}")
316
+ # Validate that it's a tag URN
317
+ _type = guess_entity_type(v)
318
+ if _type != "tag":
319
+ raise ValueError(f"Tag must be a valid tag URN, got: {v}")
320
+ return v
321
+
322
+ def _build_rule(self) -> SearchFilterRule:
323
+ return SearchFilterRule(
324
+ field="tags",
325
+ condition="EQUAL",
326
+ values=self.tag,
327
+ )
328
+
329
+ def compile(self) -> _OrFilters:
330
+ return [{"and": [self._build_rule()]}]
331
+
332
+
159
333
  class _CustomCondition(_BaseFilter):
160
- """Represents a single field condition"""
334
+ """Represents a single field condition."""
161
335
 
162
336
  field: str
163
- condition: str
337
+ condition: FilterOperator
164
338
  values: List[str]
165
339
 
166
340
  def compile(self) -> _OrFilters:
@@ -171,9 +345,13 @@ class _CustomCondition(_BaseFilter):
171
345
  )
172
346
  return [{"and": [rule]}]
173
347
 
348
+ @classmethod
349
+ def _field_discriminator(cls) -> str:
350
+ return "_custom"
351
+
174
352
 
175
353
  class _And(_BaseFilter):
176
- """Represents an AND conjunction of filters"""
354
+ """Represents an AND conjunction of filters."""
177
355
 
178
356
  and_: Sequence["Filter"] = pydantic.Field(alias="and")
179
357
  # TODO: Add validator to ensure that the "and" field is not empty
@@ -219,9 +397,14 @@ class _And(_BaseFilter):
219
397
  ]
220
398
  }
221
399
 
400
+ def dfs(self) -> Iterator[_BaseFilter]:
401
+ yield self
402
+ for filter in self.and_:
403
+ yield from filter.dfs()
404
+
222
405
 
223
406
  class _Or(_BaseFilter):
224
- """Represents an OR conjunction of filters"""
407
+ """Represents an OR conjunction of filters."""
225
408
 
226
409
  or_: Sequence["Filter"] = pydantic.Field(alias="or")
227
410
  # TODO: Add validator to ensure that the "or" field is not empty
@@ -232,9 +415,14 @@ class _Or(_BaseFilter):
232
415
  merged_filter.extend(filter.compile())
233
416
  return merged_filter
234
417
 
418
+ def dfs(self) -> Iterator[_BaseFilter]:
419
+ yield self
420
+ for filter in self.or_:
421
+ yield from filter.dfs()
422
+
235
423
 
236
424
  class _Not(_BaseFilter):
237
- """Represents a NOT filter"""
425
+ """Represents a NOT filter."""
238
426
 
239
427
  not_: "Filter" = pydantic.Field(alias="not")
240
428
 
@@ -262,31 +450,121 @@ class _Not(_BaseFilter):
262
450
 
263
451
  return final_filters
264
452
 
453
+ def dfs(self) -> Iterator[_BaseFilter]:
454
+ yield self
455
+ yield from self.not_.dfs()
265
456
 
266
- # TODO: With pydantic 2, we can use a RootModel with a
267
- # discriminated union to make the error messages more informative.
268
- Filter = Union[
269
- _And,
270
- _Or,
271
- _Not,
272
- _EntityTypeFilter,
273
- _EntitySubtypeFilter,
274
- _PlatformFilter,
275
- _DomainFilter,
276
- _EnvFilter,
277
- _CustomCondition,
278
- ]
279
457
 
458
+ def _filter_discriminator(v: Any) -> Optional[str]:
459
+ if isinstance(v, _BaseFilter):
460
+ return v._field_discriminator()
461
+
462
+ if not isinstance(v, dict):
463
+ return None
464
+
465
+ keys = list(v.keys())
466
+ if len(keys) == 1:
467
+ return keys[0]
468
+ elif set(keys).issuperset({"container"}):
469
+ return _ContainerFilter._field_discriminator()
470
+ elif set(keys).issuperset({"field", "condition"}):
471
+ return _CustomCondition._field_discriminator()
472
+
473
+ return None
474
+
475
+
476
+ def _parse_and_like_filter(value: Any) -> Any:
477
+ # Do not parse if filter is already of type and/or/not or a custom condition
478
+ # also do not parse container filter if direct_descendants_only is specified
479
+ if (
480
+ isinstance(value, dict)
481
+ and not set(value.keys()).intersection(
482
+ {"and", "or", "not", "field", "condition", "direct_descendants_only"}
483
+ )
484
+ and len(value) > 1
485
+ ):
486
+ return {"and": [{k: v} for k, v in value.items()]}
487
+
488
+ return value
489
+
490
+
491
+ if TYPE_CHECKING or not PYDANTIC_SUPPORTS_CALLABLE_DISCRIMINATOR:
492
+ # The `not TYPE_CHECKING` bit is required to make the linter happy,
493
+ # since we currently only run mypy with pydantic v1.
494
+ Filter = Union[
495
+ _And,
496
+ _Or,
497
+ _Not,
498
+ _EntityTypeFilter,
499
+ _EntitySubtypeFilter,
500
+ _StatusFilter,
501
+ _PlatformFilter,
502
+ _DomainFilter,
503
+ _ContainerFilter,
504
+ _EnvFilter,
505
+ _OwnerFilter,
506
+ _GlossaryTermFilter,
507
+ _TagFilter,
508
+ _CustomCondition,
509
+ ]
280
510
 
281
- # Required to resolve forward references to "Filter"
282
- if PYDANTIC_VERSION_2:
283
- _And.model_rebuild() # type: ignore
284
- _Or.model_rebuild() # type: ignore
285
- _Not.model_rebuild() # type: ignore
286
- else:
287
511
  _And.update_forward_refs()
288
512
  _Or.update_forward_refs()
289
513
  _Not.update_forward_refs()
514
+ else:
515
+ from pydantic import Discriminator, Tag
516
+
517
+ def _parse_json_from_string(value: Any) -> Any:
518
+ if isinstance(value, str):
519
+ try:
520
+ return json.loads(value)
521
+ except json.JSONDecodeError:
522
+ return value
523
+ else:
524
+ return value
525
+
526
+ # TODO: Once we're fully on pydantic 2, we can use a RootModel here.
527
+ # That way we'd be able to attach methods to the Filter type.
528
+ # e.g. replace load_filters(...) with Filter.load(...)
529
+ Filter = Annotated[
530
+ Annotated[
531
+ Union[
532
+ Annotated[_And, Tag(_And._field_discriminator())],
533
+ Annotated[_Or, Tag(_Or._field_discriminator())],
534
+ Annotated[_Not, Tag(_Not._field_discriminator())],
535
+ Annotated[
536
+ _EntityTypeFilter, Tag(_EntityTypeFilter._field_discriminator())
537
+ ],
538
+ Annotated[
539
+ _EntitySubtypeFilter,
540
+ Tag(_EntitySubtypeFilter._field_discriminator()),
541
+ ],
542
+ Annotated[_StatusFilter, Tag(_StatusFilter._field_discriminator())],
543
+ Annotated[_PlatformFilter, Tag(_PlatformFilter._field_discriminator())],
544
+ Annotated[_DomainFilter, Tag(_DomainFilter._field_discriminator())],
545
+ Annotated[
546
+ _ContainerFilter, Tag(_ContainerFilter._field_discriminator())
547
+ ],
548
+ Annotated[_EnvFilter, Tag(_EnvFilter._field_discriminator())],
549
+ Annotated[_OwnerFilter, Tag(_OwnerFilter._field_discriminator())],
550
+ Annotated[
551
+ _GlossaryTermFilter, Tag(_GlossaryTermFilter._field_discriminator())
552
+ ],
553
+ Annotated[_TagFilter, Tag(_TagFilter._field_discriminator())],
554
+ Annotated[
555
+ _CustomCondition, Tag(_CustomCondition._field_discriminator())
556
+ ],
557
+ ],
558
+ Discriminator(_filter_discriminator),
559
+ ],
560
+ pydantic.BeforeValidator(_parse_and_like_filter),
561
+ pydantic.BeforeValidator(_parse_json_from_string),
562
+ ]
563
+
564
+ # Required to resolve forward references to "Filter"
565
+ _And.model_rebuild() # type: ignore
566
+ _Or.model_rebuild() # type: ignore
567
+ _Not.model_rebuild() # type: ignore
290
568
 
291
569
 
292
570
  def load_filters(obj: Any) -> Filter:
@@ -318,6 +596,18 @@ class FilterDsl:
318
596
  def not_(arg: "Filter") -> _Not:
319
597
  return _Not(not_=arg)
320
598
 
599
+ @staticmethod
600
+ def true() -> "Filter":
601
+ return _CustomCondition(
602
+ field="urn",
603
+ condition="EXISTS",
604
+ values=[],
605
+ )
606
+
607
+ @staticmethod
608
+ def false() -> "Filter":
609
+ return FilterDsl.not_(FilterDsl.true())
610
+
321
611
  @staticmethod
322
612
  def entity_type(
323
613
  entity_type: Union[EntityTypeName, Sequence[EntityTypeName]],
@@ -329,14 +619,15 @@ class FilterDsl:
329
619
  )
330
620
 
331
621
  @staticmethod
332
- def entity_subtype(entity_type: str, subtype: str) -> _EntitySubtypeFilter:
622
+ def entity_subtype(
623
+ entity_subtype: Union[str, Sequence[str]],
624
+ ) -> _EntitySubtypeFilter:
333
625
  return _EntitySubtypeFilter(
334
- entity_type=entity_type,
335
- entity_subtype=subtype,
626
+ entity_subtype=entity_subtype,
336
627
  )
337
628
 
338
629
  @staticmethod
339
- def platform(platform: Union[str, List[str]], /) -> _PlatformFilter:
630
+ def platform(platform: Union[str, Sequence[str]], /) -> _PlatformFilter:
340
631
  return _PlatformFilter(
341
632
  platform=[platform] if isinstance(platform, str) else platform
342
633
  )
@@ -344,13 +635,43 @@ class FilterDsl:
344
635
  # TODO: Add a platform_instance filter
345
636
 
346
637
  @staticmethod
347
- def domain(domain: Union[str, List[str]], /) -> _DomainFilter:
638
+ def domain(domain: Union[str, Sequence[str]], /) -> _DomainFilter:
348
639
  return _DomainFilter(domain=[domain] if isinstance(domain, str) else domain)
349
640
 
350
641
  @staticmethod
351
- def env(env: Union[str, List[str]], /) -> _EnvFilter:
642
+ def container(
643
+ container: Union[str, Sequence[str]],
644
+ /,
645
+ *,
646
+ direct_descendants_only: bool = False,
647
+ ) -> _ContainerFilter:
648
+ return _ContainerFilter(
649
+ container=[container] if isinstance(container, str) else container,
650
+ direct_descendants_only=direct_descendants_only,
651
+ )
652
+
653
+ @staticmethod
654
+ def env(env: Union[str, Sequence[str]], /) -> _EnvFilter:
352
655
  return _EnvFilter(env=[env] if isinstance(env, str) else env)
353
656
 
657
+ @staticmethod
658
+ def owner(owner: Union[str, Sequence[str]], /) -> _OwnerFilter:
659
+ return _OwnerFilter(owner=[owner] if isinstance(owner, str) else owner)
660
+
661
+ @staticmethod
662
+ def glossary_term(
663
+ glossary_term: Union[str, Sequence[str]], /
664
+ ) -> _GlossaryTermFilter:
665
+ return _GlossaryTermFilter(
666
+ glossary_term=[glossary_term]
667
+ if isinstance(glossary_term, str)
668
+ else glossary_term
669
+ )
670
+
671
+ @staticmethod
672
+ def tag(tag: Union[str, Sequence[str]], /) -> _TagFilter:
673
+ return _TagFilter(tag=[tag] if isinstance(tag, str) else tag)
674
+
354
675
  @staticmethod
355
676
  def has_custom_property(key: str, value: str) -> _CustomCondition:
356
677
  return _CustomCondition(
@@ -359,13 +680,17 @@ class FilterDsl:
359
680
  values=[f"{key}={value}"],
360
681
  )
361
682
 
683
+ @staticmethod
684
+ def soft_deleted(status: RemovedStatusFilter) -> _StatusFilter:
685
+ return _StatusFilter(status=status)
686
+
362
687
  # TODO: Add a soft-deletion status filter
363
688
  # TODO: add a container / browse path filter
364
689
  # TODO add shortcut for custom filters
365
690
 
366
691
  @staticmethod
367
692
  def custom_filter(
368
- field: str, condition: str, values: List[str]
693
+ field: str, condition: FilterOperator, values: Sequence[str]
369
694
  ) -> _CustomCondition:
370
695
  return _CustomCondition(
371
696
  field=field,
@@ -3,7 +3,8 @@ from typing import Any, Dict, List, Optional, Union
3
3
 
4
4
  from pydantic import BaseModel, validator
5
5
 
6
- from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph
6
+ from datahub.ingestion.graph.client import DataHubGraph
7
+ from datahub.ingestion.graph.config import DatahubClientConfig
7
8
  from datahub.secret.datahub_secrets_client import DataHubSecretsClient
8
9
  from datahub.secret.secret_store import SecretStore
9
10
 
@@ -64,3 +65,6 @@ class DataHubSecretStore(SecretStore):
64
65
  def create(cls, config: Any) -> "DataHubSecretStore":
65
66
  config = DataHubSecretStoreConfig.parse_obj(config)
66
67
  return cls(config)
68
+
69
+ def close(self) -> None:
70
+ self.client.graph.close()
@@ -0,0 +1,29 @@
1
+ import os
2
+ from typing import Dict, List, Union
3
+
4
+ from datahub.secret.secret_store import SecretStore
5
+
6
+
7
+ # Simple SecretStore implementation that fetches Secret values from the local environment.
8
+ class EnvironmentSecretStore(SecretStore):
9
+ def __init__(self, config):
10
+ pass
11
+
12
+ def close(self) -> None:
13
+ return
14
+
15
+ def get_secret_values(self, secret_names: List[str]) -> Dict[str, Union[str, None]]:
16
+ values = {}
17
+ for secret_name in secret_names:
18
+ values[secret_name] = os.getenv(secret_name)
19
+ return values
20
+
21
+ def get_secret_value(self, secret_name: str) -> Union[str, None]:
22
+ return os.getenv(secret_name)
23
+
24
+ def get_id(self) -> str:
25
+ return "env"
26
+
27
+ @classmethod
28
+ def create(cls, config: Dict) -> "EnvironmentSecretStore":
29
+ return cls(config)
@@ -0,0 +1,49 @@
1
+ import logging
2
+ import os
3
+ from typing import Any, Dict, List, Union
4
+
5
+ from pydantic import BaseModel
6
+
7
+ from datahub.secret.secret_store import SecretStore
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+
12
+ class FileSecretStoreConfig(BaseModel):
13
+ basedir: str = "/mnt/secrets"
14
+ max_length: int = 1024768
15
+
16
+
17
+ # Simple SecretStore implementation that fetches Secret values from the local files.
18
+ class FileSecretStore(SecretStore):
19
+ def __init__(self, config: FileSecretStoreConfig):
20
+ self.config = config
21
+
22
+ def get_secret_values(self, secret_names: List[str]) -> Dict[str, Union[str, None]]:
23
+ values = {}
24
+ for secret_name in secret_names:
25
+ values[secret_name] = self.get_secret_value(secret_name)
26
+ return values
27
+
28
+ def get_secret_value(self, secret_name: str) -> Union[str, None]:
29
+ secret_path = os.path.join(self.config.basedir, secret_name)
30
+ if os.path.exists(secret_path):
31
+ with open(secret_path, "r") as f:
32
+ secret_value = f.read(self.config.max_length + 1)
33
+ if len(secret_value) > self.config.max_length:
34
+ logger.warning(
35
+ f"Secret {secret_name} is longer than {self.config.max_length} and will be truncated."
36
+ )
37
+ return secret_value[: self.config.max_length].rstrip()
38
+ return None
39
+
40
+ def get_id(self) -> str:
41
+ return "file"
42
+
43
+ def close(self) -> None:
44
+ return
45
+
46
+ @classmethod
47
+ def create(cls, config: Any) -> "FileSecretStore":
48
+ config = FileSecretStoreConfig.parse_obj(config)
49
+ return cls(config)