acryl-datahub 1.1.1rc4__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (414) hide show
  1. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2615 -2547
  2. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +412 -338
  3. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +5 -0
  4. datahub/_version.py +1 -1
  5. datahub/api/entities/assertion/assertion.py +1 -1
  6. datahub/api/entities/common/serialized_value.py +1 -1
  7. datahub/api/entities/corpgroup/corpgroup.py +1 -1
  8. datahub/api/entities/dataproduct/dataproduct.py +32 -3
  9. datahub/api/entities/dataset/dataset.py +26 -23
  10. datahub/api/entities/external/__init__.py +0 -0
  11. datahub/api/entities/external/external_entities.py +724 -0
  12. datahub/api/entities/external/external_tag.py +147 -0
  13. datahub/api/entities/external/lake_formation_external_entites.py +162 -0
  14. datahub/api/entities/external/restricted_text.py +172 -0
  15. datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
  16. datahub/api/entities/forms/forms.py +3 -3
  17. datahub/api/entities/structuredproperties/structuredproperties.py +4 -4
  18. datahub/api/graphql/operation.py +10 -6
  19. datahub/cli/check_cli.py +88 -7
  20. datahub/cli/cli_utils.py +63 -0
  21. datahub/cli/config_utils.py +18 -10
  22. datahub/cli/container_cli.py +5 -0
  23. datahub/cli/delete_cli.py +125 -27
  24. datahub/cli/docker_check.py +110 -14
  25. datahub/cli/docker_cli.py +153 -229
  26. datahub/cli/exists_cli.py +0 -2
  27. datahub/cli/get_cli.py +0 -2
  28. datahub/cli/graphql_cli.py +1422 -0
  29. datahub/cli/iceberg_cli.py +5 -0
  30. datahub/cli/ingest_cli.py +3 -15
  31. datahub/cli/migrate.py +2 -0
  32. datahub/cli/put_cli.py +1 -4
  33. datahub/cli/quickstart_versioning.py +53 -10
  34. datahub/cli/specific/assertions_cli.py +37 -6
  35. datahub/cli/specific/datacontract_cli.py +54 -7
  36. datahub/cli/specific/dataproduct_cli.py +2 -15
  37. datahub/cli/specific/dataset_cli.py +1 -8
  38. datahub/cli/specific/forms_cli.py +0 -4
  39. datahub/cli/specific/group_cli.py +0 -2
  40. datahub/cli/specific/structuredproperties_cli.py +1 -4
  41. datahub/cli/specific/user_cli.py +172 -3
  42. datahub/cli/state_cli.py +0 -2
  43. datahub/cli/timeline_cli.py +0 -2
  44. datahub/configuration/common.py +40 -1
  45. datahub/configuration/connection_resolver.py +5 -2
  46. datahub/configuration/env_vars.py +331 -0
  47. datahub/configuration/import_resolver.py +7 -4
  48. datahub/configuration/kafka.py +21 -1
  49. datahub/configuration/pydantic_migration_helpers.py +6 -13
  50. datahub/configuration/source_common.py +3 -2
  51. datahub/configuration/validate_field_deprecation.py +5 -2
  52. datahub/configuration/validate_field_removal.py +8 -2
  53. datahub/configuration/validate_field_rename.py +6 -5
  54. datahub/configuration/validate_multiline_string.py +5 -2
  55. datahub/emitter/mce_builder.py +8 -4
  56. datahub/emitter/rest_emitter.py +103 -30
  57. datahub/entrypoints.py +6 -3
  58. datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +297 -1
  59. datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
  60. datahub/ingestion/api/decorators.py +15 -3
  61. datahub/ingestion/api/report.py +381 -3
  62. datahub/ingestion/api/sink.py +27 -2
  63. datahub/ingestion/api/source.py +165 -58
  64. datahub/ingestion/api/source_protocols.py +23 -0
  65. datahub/ingestion/autogenerated/__init__.py +0 -0
  66. datahub/ingestion/autogenerated/capability_summary.json +3652 -0
  67. datahub/ingestion/autogenerated/lineage.json +402 -0
  68. datahub/ingestion/autogenerated/lineage_helper.py +177 -0
  69. datahub/ingestion/extractor/schema_util.py +13 -4
  70. datahub/ingestion/glossary/classification_mixin.py +5 -0
  71. datahub/ingestion/graph/client.py +330 -25
  72. datahub/ingestion/graph/config.py +3 -2
  73. datahub/ingestion/graph/filters.py +30 -11
  74. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
  75. datahub/ingestion/run/pipeline.py +81 -11
  76. datahub/ingestion/run/pipeline_config.py +2 -2
  77. datahub/ingestion/sink/datahub_kafka.py +1 -0
  78. datahub/ingestion/sink/datahub_rest.py +13 -5
  79. datahub/ingestion/sink/file.py +1 -0
  80. datahub/ingestion/source/abs/config.py +1 -1
  81. datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
  82. datahub/ingestion/source/abs/source.py +15 -30
  83. datahub/ingestion/source/aws/aws_common.py +185 -13
  84. datahub/ingestion/source/aws/glue.py +517 -244
  85. datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
  86. datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
  87. datahub/ingestion/source/aws/tag_entities.py +270 -0
  88. datahub/ingestion/source/azure/azure_common.py +3 -3
  89. datahub/ingestion/source/bigquery_v2/bigquery.py +67 -24
  90. datahub/ingestion/source/bigquery_v2/bigquery_config.py +47 -19
  91. datahub/ingestion/source/bigquery_v2/bigquery_connection.py +12 -1
  92. datahub/ingestion/source/bigquery_v2/bigquery_queries.py +3 -0
  93. datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
  94. datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
  95. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
  96. datahub/ingestion/source/bigquery_v2/common.py +1 -1
  97. datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
  98. datahub/ingestion/source/bigquery_v2/queries.py +3 -3
  99. datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
  100. datahub/ingestion/source/cassandra/cassandra.py +6 -8
  101. datahub/ingestion/source/cassandra/cassandra_api.py +17 -1
  102. datahub/ingestion/source/cassandra/cassandra_config.py +5 -0
  103. datahub/ingestion/source/cassandra/cassandra_profiling.py +7 -6
  104. datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
  105. datahub/ingestion/source/common/gcp_credentials_config.py +3 -1
  106. datahub/ingestion/source/common/subtypes.py +53 -0
  107. datahub/ingestion/source/data_lake_common/data_lake_utils.py +37 -0
  108. datahub/ingestion/source/data_lake_common/object_store.py +115 -27
  109. datahub/ingestion/source/data_lake_common/path_spec.py +72 -43
  110. datahub/ingestion/source/datahub/config.py +12 -9
  111. datahub/ingestion/source/datahub/datahub_database_reader.py +26 -11
  112. datahub/ingestion/source/datahub/datahub_source.py +10 -0
  113. datahub/ingestion/source/dbt/dbt_cloud.py +16 -5
  114. datahub/ingestion/source/dbt/dbt_common.py +224 -9
  115. datahub/ingestion/source/dbt/dbt_core.py +3 -0
  116. datahub/ingestion/source/debug/__init__.py +0 -0
  117. datahub/ingestion/source/debug/datahub_debug.py +300 -0
  118. datahub/ingestion/source/delta_lake/config.py +9 -5
  119. datahub/ingestion/source/delta_lake/source.py +8 -0
  120. datahub/ingestion/source/dremio/dremio_api.py +114 -73
  121. datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
  122. datahub/ingestion/source/dremio/dremio_config.py +5 -4
  123. datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
  124. datahub/ingestion/source/dremio/dremio_source.py +132 -98
  125. datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
  126. datahub/ingestion/source/dynamodb/dynamodb.py +11 -8
  127. datahub/ingestion/source/excel/__init__.py +0 -0
  128. datahub/ingestion/source/excel/config.py +92 -0
  129. datahub/ingestion/source/excel/excel_file.py +539 -0
  130. datahub/ingestion/source/excel/profiling.py +308 -0
  131. datahub/ingestion/source/excel/report.py +49 -0
  132. datahub/ingestion/source/excel/source.py +662 -0
  133. datahub/ingestion/source/excel/util.py +18 -0
  134. datahub/ingestion/source/feast.py +8 -10
  135. datahub/ingestion/source/file.py +3 -0
  136. datahub/ingestion/source/fivetran/config.py +66 -7
  137. datahub/ingestion/source/fivetran/fivetran.py +227 -43
  138. datahub/ingestion/source/fivetran/fivetran_log_api.py +37 -8
  139. datahub/ingestion/source/fivetran/fivetran_query.py +51 -29
  140. datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
  141. datahub/ingestion/source/fivetran/response_models.py +97 -0
  142. datahub/ingestion/source/gc/datahub_gc.py +0 -2
  143. datahub/ingestion/source/gcs/gcs_source.py +32 -4
  144. datahub/ingestion/source/ge_data_profiler.py +108 -31
  145. datahub/ingestion/source/ge_profiling_config.py +26 -11
  146. datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
  147. datahub/ingestion/source/grafana/field_utils.py +307 -0
  148. datahub/ingestion/source/grafana/grafana_api.py +142 -0
  149. datahub/ingestion/source/grafana/grafana_config.py +104 -0
  150. datahub/ingestion/source/grafana/grafana_source.py +522 -84
  151. datahub/ingestion/source/grafana/lineage.py +202 -0
  152. datahub/ingestion/source/grafana/models.py +137 -0
  153. datahub/ingestion/source/grafana/report.py +90 -0
  154. datahub/ingestion/source/grafana/types.py +16 -0
  155. datahub/ingestion/source/hex/api.py +28 -1
  156. datahub/ingestion/source/hex/hex.py +16 -5
  157. datahub/ingestion/source/hex/mapper.py +16 -2
  158. datahub/ingestion/source/hex/model.py +2 -0
  159. datahub/ingestion/source/hex/query_fetcher.py +1 -1
  160. datahub/ingestion/source/iceberg/iceberg.py +123 -59
  161. datahub/ingestion/source/iceberg/iceberg_profiler.py +4 -2
  162. datahub/ingestion/source/identity/azure_ad.py +1 -1
  163. datahub/ingestion/source/identity/okta.py +1 -14
  164. datahub/ingestion/source/kafka/kafka.py +16 -0
  165. datahub/ingestion/source/kafka_connect/common.py +2 -2
  166. datahub/ingestion/source/kafka_connect/sink_connectors.py +156 -47
  167. datahub/ingestion/source/kafka_connect/source_connectors.py +62 -4
  168. datahub/ingestion/source/looker/looker_common.py +148 -79
  169. datahub/ingestion/source/looker/looker_config.py +15 -4
  170. datahub/ingestion/source/looker/looker_constant.py +4 -0
  171. datahub/ingestion/source/looker/looker_lib_wrapper.py +36 -3
  172. datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
  173. datahub/ingestion/source/looker/looker_source.py +503 -547
  174. datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
  175. datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
  176. datahub/ingestion/source/looker/lookml_config.py +31 -3
  177. datahub/ingestion/source/looker/lookml_refinement.py +1 -1
  178. datahub/ingestion/source/looker/lookml_source.py +96 -117
  179. datahub/ingestion/source/looker/view_upstream.py +494 -1
  180. datahub/ingestion/source/metabase.py +32 -6
  181. datahub/ingestion/source/metadata/business_glossary.py +7 -7
  182. datahub/ingestion/source/metadata/lineage.py +9 -9
  183. datahub/ingestion/source/mlflow.py +12 -2
  184. datahub/ingestion/source/mock_data/__init__.py +0 -0
  185. datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
  186. datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
  187. datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
  188. datahub/ingestion/source/mode.py +26 -5
  189. datahub/ingestion/source/mongodb.py +11 -1
  190. datahub/ingestion/source/neo4j/neo4j_source.py +83 -144
  191. datahub/ingestion/source/nifi.py +2 -2
  192. datahub/ingestion/source/openapi.py +1 -1
  193. datahub/ingestion/source/powerbi/config.py +47 -21
  194. datahub/ingestion/source/powerbi/m_query/data_classes.py +1 -0
  195. datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
  196. datahub/ingestion/source/powerbi/m_query/pattern_handler.py +100 -10
  197. datahub/ingestion/source/powerbi/powerbi.py +10 -6
  198. datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +0 -1
  199. datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
  200. datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
  201. datahub/ingestion/source/preset.py +3 -3
  202. datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
  203. datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
  204. datahub/ingestion/source/redash.py +1 -1
  205. datahub/ingestion/source/redshift/config.py +15 -9
  206. datahub/ingestion/source/redshift/datashares.py +1 -1
  207. datahub/ingestion/source/redshift/lineage.py +386 -687
  208. datahub/ingestion/source/redshift/query.py +23 -19
  209. datahub/ingestion/source/redshift/redshift.py +52 -111
  210. datahub/ingestion/source/redshift/redshift_schema.py +17 -12
  211. datahub/ingestion/source/redshift/report.py +0 -2
  212. datahub/ingestion/source/redshift/usage.py +6 -5
  213. datahub/ingestion/source/s3/report.py +4 -2
  214. datahub/ingestion/source/s3/source.py +449 -248
  215. datahub/ingestion/source/sac/sac.py +3 -1
  216. datahub/ingestion/source/salesforce.py +28 -13
  217. datahub/ingestion/source/schema/json_schema.py +14 -14
  218. datahub/ingestion/source/schema_inference/object.py +22 -6
  219. datahub/ingestion/source/sigma/data_classes.py +3 -0
  220. datahub/ingestion/source/sigma/sigma.py +7 -1
  221. datahub/ingestion/source/slack/slack.py +10 -16
  222. datahub/ingestion/source/snaplogic/__init__.py +0 -0
  223. datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
  224. datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
  225. datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
  226. datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
  227. datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
  228. datahub/ingestion/source/snowflake/constants.py +3 -0
  229. datahub/ingestion/source/snowflake/snowflake_config.py +76 -23
  230. datahub/ingestion/source/snowflake/snowflake_connection.py +24 -8
  231. datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +19 -6
  232. datahub/ingestion/source/snowflake/snowflake_queries.py +464 -97
  233. datahub/ingestion/source/snowflake/snowflake_query.py +77 -5
  234. datahub/ingestion/source/snowflake/snowflake_report.py +1 -2
  235. datahub/ingestion/source/snowflake/snowflake_schema.py +352 -16
  236. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +51 -10
  237. datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
  238. datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
  239. datahub/ingestion/source/snowflake/snowflake_utils.py +36 -15
  240. datahub/ingestion/source/snowflake/snowflake_v2.py +39 -4
  241. datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
  242. datahub/ingestion/source/sql/athena.py +217 -25
  243. datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
  244. datahub/ingestion/source/sql/clickhouse.py +24 -8
  245. datahub/ingestion/source/sql/cockroachdb.py +5 -4
  246. datahub/ingestion/source/sql/druid.py +2 -2
  247. datahub/ingestion/source/sql/hana.py +3 -1
  248. datahub/ingestion/source/sql/hive.py +4 -3
  249. datahub/ingestion/source/sql/hive_metastore.py +19 -20
  250. datahub/ingestion/source/sql/mariadb.py +0 -1
  251. datahub/ingestion/source/sql/mssql/job_models.py +3 -1
  252. datahub/ingestion/source/sql/mssql/source.py +336 -57
  253. datahub/ingestion/source/sql/mysql.py +154 -4
  254. datahub/ingestion/source/sql/oracle.py +5 -5
  255. datahub/ingestion/source/sql/postgres.py +142 -6
  256. datahub/ingestion/source/sql/presto.py +2 -1
  257. datahub/ingestion/source/sql/sql_common.py +281 -49
  258. datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
  259. datahub/ingestion/source/sql/sql_types.py +22 -0
  260. datahub/ingestion/source/sql/sqlalchemy_uri.py +39 -7
  261. datahub/ingestion/source/sql/teradata.py +1028 -245
  262. datahub/ingestion/source/sql/trino.py +11 -1
  263. datahub/ingestion/source/sql/two_tier_sql_source.py +2 -3
  264. datahub/ingestion/source/sql/vertica.py +14 -7
  265. datahub/ingestion/source/sql_queries.py +219 -121
  266. datahub/ingestion/source/state/checkpoint.py +8 -29
  267. datahub/ingestion/source/state/entity_removal_state.py +5 -2
  268. datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
  269. datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
  270. datahub/ingestion/source/superset.py +314 -67
  271. datahub/ingestion/source/tableau/tableau.py +135 -59
  272. datahub/ingestion/source/tableau/tableau_common.py +9 -2
  273. datahub/ingestion/source/tableau/tableau_constant.py +1 -4
  274. datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
  275. datahub/ingestion/source/unity/config.py +160 -40
  276. datahub/ingestion/source/unity/connection.py +61 -0
  277. datahub/ingestion/source/unity/connection_test.py +1 -0
  278. datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
  279. datahub/ingestion/source/unity/proxy.py +794 -51
  280. datahub/ingestion/source/unity/proxy_patch.py +321 -0
  281. datahub/ingestion/source/unity/proxy_types.py +36 -2
  282. datahub/ingestion/source/unity/report.py +15 -3
  283. datahub/ingestion/source/unity/source.py +465 -131
  284. datahub/ingestion/source/unity/tag_entities.py +197 -0
  285. datahub/ingestion/source/unity/usage.py +46 -4
  286. datahub/ingestion/source/usage/clickhouse_usage.py +4 -1
  287. datahub/ingestion/source/usage/starburst_trino_usage.py +5 -2
  288. datahub/ingestion/source/usage/usage_common.py +4 -3
  289. datahub/ingestion/source/vertexai/vertexai.py +1 -1
  290. datahub/ingestion/source_config/pulsar.py +3 -1
  291. datahub/ingestion/source_report/ingestion_stage.py +50 -11
  292. datahub/ingestion/transformer/add_dataset_ownership.py +18 -2
  293. datahub/ingestion/transformer/base_transformer.py +8 -5
  294. datahub/ingestion/transformer/set_browse_path.py +112 -0
  295. datahub/integrations/assertion/snowflake/compiler.py +4 -3
  296. datahub/metadata/_internal_schema_classes.py +6806 -4871
  297. datahub/metadata/_urns/urn_defs.py +1767 -1539
  298. datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
  299. datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
  300. datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
  301. datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
  302. datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
  303. datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +6 -0
  304. datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
  305. datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
  306. datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
  307. datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
  308. datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
  309. datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
  310. datahub/metadata/schema.avsc +18395 -16979
  311. datahub/metadata/schemas/Actors.avsc +38 -1
  312. datahub/metadata/schemas/ApplicationKey.avsc +31 -0
  313. datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
  314. datahub/metadata/schemas/Applications.avsc +38 -0
  315. datahub/metadata/schemas/AssetSettings.avsc +63 -0
  316. datahub/metadata/schemas/ChartInfo.avsc +2 -1
  317. datahub/metadata/schemas/ChartKey.avsc +1 -0
  318. datahub/metadata/schemas/ContainerKey.avsc +1 -0
  319. datahub/metadata/schemas/ContainerProperties.avsc +8 -0
  320. datahub/metadata/schemas/CorpUserEditableInfo.avsc +1 -1
  321. datahub/metadata/schemas/CorpUserSettings.avsc +50 -0
  322. datahub/metadata/schemas/DashboardKey.avsc +1 -0
  323. datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
  324. datahub/metadata/schemas/DataFlowKey.avsc +1 -0
  325. datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
  326. datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
  327. datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
  328. datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
  329. datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
  330. datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
  331. datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
  332. datahub/metadata/schemas/DataJobInfo.avsc +8 -0
  333. datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
  334. datahub/metadata/schemas/DataJobKey.avsc +1 -0
  335. datahub/metadata/schemas/DataProcessKey.avsc +8 -0
  336. datahub/metadata/schemas/DataProductKey.avsc +3 -1
  337. datahub/metadata/schemas/DataProductProperties.avsc +1 -1
  338. datahub/metadata/schemas/DatasetKey.avsc +11 -1
  339. datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
  340. datahub/metadata/schemas/DomainKey.avsc +2 -1
  341. datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
  342. datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
  343. datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
  344. datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
  345. datahub/metadata/schemas/IncidentInfo.avsc +3 -3
  346. datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
  347. datahub/metadata/schemas/LogicalParent.avsc +145 -0
  348. datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
  349. datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
  350. datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
  351. datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
  352. datahub/metadata/schemas/MLModelKey.avsc +9 -0
  353. datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
  354. datahub/metadata/schemas/MetadataChangeEvent.avsc +151 -47
  355. datahub/metadata/schemas/MetadataChangeLog.avsc +62 -44
  356. datahub/metadata/schemas/MetadataChangeProposal.avsc +61 -0
  357. datahub/metadata/schemas/NotebookKey.avsc +1 -0
  358. datahub/metadata/schemas/Operation.avsc +4 -2
  359. datahub/metadata/schemas/Ownership.avsc +69 -0
  360. datahub/metadata/schemas/QuerySubjects.avsc +1 -12
  361. datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
  362. datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
  363. datahub/metadata/schemas/StructuredProperties.avsc +69 -0
  364. datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
  365. datahub/metadata/schemas/SystemMetadata.avsc +61 -0
  366. datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
  367. datahub/sdk/__init__.py +2 -0
  368. datahub/sdk/_all_entities.py +7 -0
  369. datahub/sdk/_shared.py +249 -5
  370. datahub/sdk/chart.py +386 -0
  371. datahub/sdk/container.py +7 -0
  372. datahub/sdk/dashboard.py +453 -0
  373. datahub/sdk/dataflow.py +7 -0
  374. datahub/sdk/datajob.py +45 -13
  375. datahub/sdk/dataset.py +56 -2
  376. datahub/sdk/entity_client.py +111 -9
  377. datahub/sdk/lineage_client.py +663 -82
  378. datahub/sdk/main_client.py +50 -16
  379. datahub/sdk/mlmodel.py +120 -38
  380. datahub/sdk/mlmodelgroup.py +7 -0
  381. datahub/sdk/search_client.py +7 -3
  382. datahub/sdk/search_filters.py +304 -36
  383. datahub/secret/datahub_secret_store.py +3 -0
  384. datahub/secret/environment_secret_store.py +29 -0
  385. datahub/secret/file_secret_store.py +49 -0
  386. datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
  387. datahub/specific/aspect_helpers/siblings.py +73 -0
  388. datahub/specific/aspect_helpers/structured_properties.py +27 -0
  389. datahub/specific/chart.py +1 -1
  390. datahub/specific/datajob.py +15 -1
  391. datahub/specific/dataproduct.py +4 -0
  392. datahub/specific/dataset.py +39 -59
  393. datahub/sql_parsing/split_statements.py +13 -0
  394. datahub/sql_parsing/sql_parsing_aggregator.py +70 -26
  395. datahub/sql_parsing/sqlglot_lineage.py +196 -42
  396. datahub/sql_parsing/sqlglot_utils.py +12 -4
  397. datahub/sql_parsing/tool_meta_extractor.py +1 -3
  398. datahub/telemetry/telemetry.py +28 -14
  399. datahub/testing/sdk_v2_helpers.py +7 -1
  400. datahub/upgrade/upgrade.py +73 -17
  401. datahub/utilities/file_backed_collections.py +8 -9
  402. datahub/utilities/is_pytest.py +3 -2
  403. datahub/utilities/logging_manager.py +22 -6
  404. datahub/utilities/mapping.py +29 -2
  405. datahub/utilities/sample_data.py +5 -4
  406. datahub/utilities/server_config_util.py +10 -1
  407. datahub/utilities/sqlalchemy_query_combiner.py +5 -2
  408. datahub/utilities/stats_collections.py +4 -0
  409. datahub/utilities/urns/urn.py +41 -2
  410. datahub/emitter/sql_parsing_builder.py +0 -306
  411. datahub/ingestion/source/redshift/lineage_v2.py +0 -466
  412. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +0 -0
  413. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/licenses/LICENSE +0 -0
  414. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,10 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import abc
4
+ import json
4
5
  from typing import (
6
+ TYPE_CHECKING,
7
+ Annotated,
5
8
  Any,
6
9
  ClassVar,
7
10
  Iterator,
@@ -15,7 +18,10 @@ from typing import (
15
18
  import pydantic
16
19
 
17
20
  from datahub.configuration.common import ConfigModel
18
- from datahub.configuration.pydantic_migration_helpers import PYDANTIC_VERSION_2
21
+ from datahub.configuration.pydantic_migration_helpers import (
22
+ PYDANTIC_SUPPORTS_CALLABLE_DISCRIMINATOR,
23
+ PYDANTIC_VERSION_2,
24
+ )
19
25
  from datahub.ingestion.graph.client import flexible_entity_type_to_graphql
20
26
  from datahub.ingestion.graph.filters import (
21
27
  FilterOperator,
@@ -24,7 +30,14 @@ from datahub.ingestion.graph.filters import (
24
30
  _get_status_filter,
25
31
  )
26
32
  from datahub.metadata.schema_classes import EntityTypeName
27
- from datahub.metadata.urns import DataPlatformUrn, DomainUrn
33
+ from datahub.metadata.urns import (
34
+ ContainerUrn,
35
+ CorpGroupUrn,
36
+ CorpUserUrn,
37
+ DataPlatformUrn,
38
+ DomainUrn,
39
+ )
40
+ from datahub.utilities.urns.urn import guess_entity_type
28
41
 
29
42
  _AndSearchFilterRule = TypedDict(
30
43
  "_AndSearchFilterRule", {"and": List[SearchFilterRule]}
@@ -33,21 +46,32 @@ _OrFilters = List[_AndSearchFilterRule]
33
46
 
34
47
 
35
48
  class _BaseFilter(ConfigModel):
36
- class Config:
37
- # We can't wrap this in a TYPE_CHECKING block because the pydantic plugin
38
- # doesn't recognize it properly. So unfortunately we'll need to live
39
- # with the deprecation warning w/ pydantic v2.
40
- allow_population_by_field_name = True
41
- if PYDANTIC_VERSION_2:
42
- populate_by_name = True
49
+ model_config = pydantic.ConfigDict(populate_by_name=True)
43
50
 
44
51
  @abc.abstractmethod
45
- def compile(self) -> _OrFilters:
46
- pass
52
+ def compile(self) -> _OrFilters: ...
47
53
 
48
54
  def dfs(self) -> Iterator[_BaseFilter]:
49
55
  yield self
50
56
 
57
+ @classmethod
58
+ def _field_discriminator(cls) -> str:
59
+ if cls is _BaseFilter:
60
+ raise ValueError("Cannot get discriminator for _BaseFilter")
61
+ if PYDANTIC_VERSION_2:
62
+ fields: dict = cls.model_fields # type: ignore
63
+ else:
64
+ fields = cls.__fields__ # type: ignore
65
+
66
+ # Assumes that there's only one field name per filter.
67
+ # If that's not the case, this method should be overridden.
68
+ if len(fields.keys()) != 1:
69
+ raise ValueError(
70
+ f"Found multiple fields that could be the discriminator for this filter: {list(fields.keys())}"
71
+ )
72
+ name, field = next(iter(fields.items()))
73
+ return field.alias or name # type: ignore
74
+
51
75
 
52
76
  class _EntityTypeFilter(_BaseFilter):
53
77
  """Filter for specific entity types.
@@ -59,7 +83,7 @@ class _EntityTypeFilter(_BaseFilter):
59
83
  ENTITY_TYPE_FIELD: ClassVar[str] = "_entityType"
60
84
 
61
85
  entity_type: List[str] = pydantic.Field(
62
- description="The entity type to filter on. Can be 'dataset', 'chart', 'dashboard', 'corpuser', etc.",
86
+ description="The entity type to filter on. Can be 'dataset', 'chart', 'dashboard', 'corpuser', 'dataProduct', etc.",
63
87
  )
64
88
 
65
89
  def _build_rule(self) -> SearchFilterRule:
@@ -74,15 +98,19 @@ class _EntityTypeFilter(_BaseFilter):
74
98
 
75
99
 
76
100
  class _EntitySubtypeFilter(_BaseFilter):
77
- entity_subtype: str = pydantic.Field(
101
+ entity_subtype: List[str] = pydantic.Field(
78
102
  description="The entity subtype to filter on. Can be 'Table', 'View', 'Source', etc. depending on the native platform's concepts.",
79
103
  )
80
104
 
105
+ @pydantic.validator("entity_subtype", pre=True)
106
+ def validate_entity_subtype(cls, v: str) -> List[str]:
107
+ return [v] if not isinstance(v, list) else v
108
+
81
109
  def _build_rule(self) -> SearchFilterRule:
82
110
  return SearchFilterRule(
83
111
  field="typeNames",
84
112
  condition="EQUAL",
85
- values=[self.entity_subtype],
113
+ values=self.entity_subtype,
86
114
  )
87
115
 
88
116
  def compile(self) -> _OrFilters:
@@ -148,6 +176,39 @@ class _DomainFilter(_BaseFilter):
148
176
  return [{"and": [self._build_rule()]}]
149
177
 
150
178
 
179
+ class _ContainerFilter(_BaseFilter):
180
+ container: List[str]
181
+ direct_descendants_only: bool = pydantic.Field(
182
+ default=False,
183
+ description="If true, only entities that are direct descendants of the container will be returned.",
184
+ )
185
+
186
+ @pydantic.validator("container", each_item=True)
187
+ def validate_container(cls, v: str) -> str:
188
+ return str(ContainerUrn.from_string(v))
189
+
190
+ @classmethod
191
+ def _field_discriminator(cls) -> str:
192
+ return "container"
193
+
194
+ def _build_rule(self) -> SearchFilterRule:
195
+ if self.direct_descendants_only:
196
+ return SearchFilterRule(
197
+ field="container",
198
+ condition="EQUAL",
199
+ values=self.container,
200
+ )
201
+ else:
202
+ return SearchFilterRule(
203
+ field="browsePathV2",
204
+ condition="CONTAIN",
205
+ values=self.container,
206
+ )
207
+
208
+ def compile(self) -> _OrFilters:
209
+ return [{"and": [self._build_rule()]}]
210
+
211
+
151
212
  class _EnvFilter(_BaseFilter):
152
213
  # Note that not all entity types have an env (e.g. dashboards / charts).
153
214
  # If the env filter is specified, these will be excluded.
@@ -181,6 +242,94 @@ class _EnvFilter(_BaseFilter):
181
242
  ]
182
243
 
183
244
 
245
+ class _OwnerFilter(_BaseFilter):
246
+ """Filter for entities owned by specific users or groups."""
247
+
248
+ owner: List[str] = pydantic.Field(
249
+ description="The owner to filter on. Should be user or group URNs.",
250
+ )
251
+
252
+ @pydantic.validator("owner", each_item=True)
253
+ def validate_owner(cls, v: str) -> str:
254
+ if not v.startswith("urn:li:"):
255
+ raise ValueError(f"Owner must be a valid User or Group URN, got: {v}")
256
+ _type = guess_entity_type(v)
257
+ if _type == CorpUserUrn.ENTITY_TYPE:
258
+ return str(CorpUserUrn.from_string(v))
259
+ elif _type == CorpGroupUrn.ENTITY_TYPE:
260
+ return str(CorpGroupUrn.from_string(v))
261
+ else:
262
+ raise ValueError(f"Owner must be a valid User or Group URN, got: {v}")
263
+
264
+ def _build_rule(self) -> SearchFilterRule:
265
+ return SearchFilterRule(
266
+ field="owners",
267
+ condition="EQUAL",
268
+ values=self.owner,
269
+ )
270
+
271
+ def compile(self) -> _OrFilters:
272
+ return [{"and": [self._build_rule()]}]
273
+
274
+
275
+ class _GlossaryTermFilter(_BaseFilter):
276
+ """Filter for entities associated with specific glossary terms."""
277
+
278
+ glossary_term: List[str] = pydantic.Field(
279
+ description="The glossary term to filter on. Should be glossary term URNs.",
280
+ )
281
+
282
+ @pydantic.validator("glossary_term", each_item=True)
283
+ def validate_glossary_term(cls, v: str) -> str:
284
+ if not v.startswith("urn:li:"):
285
+ raise ValueError(f"Glossary term must be a valid URN, got: {v}")
286
+ # Validate that it's a glossary term URN
287
+ _type = guess_entity_type(v)
288
+ if _type != "glossaryTerm":
289
+ raise ValueError(
290
+ f"Glossary term must be a valid glossary term URN, got: {v}"
291
+ )
292
+ return v
293
+
294
+ def _build_rule(self) -> SearchFilterRule:
295
+ return SearchFilterRule(
296
+ field="glossaryTerms",
297
+ condition="EQUAL",
298
+ values=self.glossary_term,
299
+ )
300
+
301
+ def compile(self) -> _OrFilters:
302
+ return [{"and": [self._build_rule()]}]
303
+
304
+
305
+ class _TagFilter(_BaseFilter):
306
+ """Filter for entities associated with specific tags."""
307
+
308
+ tag: List[str] = pydantic.Field(
309
+ description="The tag to filter on. Should be tag URNs.",
310
+ )
311
+
312
+ @pydantic.validator("tag", each_item=True)
313
+ def validate_tag(cls, v: str) -> str:
314
+ if not v.startswith("urn:li:"):
315
+ raise ValueError(f"Tag must be a valid URN, got: {v}")
316
+ # Validate that it's a tag URN
317
+ _type = guess_entity_type(v)
318
+ if _type != "tag":
319
+ raise ValueError(f"Tag must be a valid tag URN, got: {v}")
320
+ return v
321
+
322
+ def _build_rule(self) -> SearchFilterRule:
323
+ return SearchFilterRule(
324
+ field="tags",
325
+ condition="EQUAL",
326
+ values=self.tag,
327
+ )
328
+
329
+ def compile(self) -> _OrFilters:
330
+ return [{"and": [self._build_rule()]}]
331
+
332
+
184
333
  class _CustomCondition(_BaseFilter):
185
334
  """Represents a single field condition."""
186
335
 
@@ -196,6 +345,10 @@ class _CustomCondition(_BaseFilter):
196
345
  )
197
346
  return [{"and": [rule]}]
198
347
 
348
+ @classmethod
349
+ def _field_discriminator(cls) -> str:
350
+ return "_custom"
351
+
199
352
 
200
353
  class _And(_BaseFilter):
201
354
  """Represents an AND conjunction of filters."""
@@ -302,31 +455,116 @@ class _Not(_BaseFilter):
302
455
  yield from self.not_.dfs()
303
456
 
304
457
 
305
- # TODO: With pydantic 2, we can use a RootModel with a
306
- # discriminated union to make the error messages more informative.
307
- Filter = Union[
308
- _And,
309
- _Or,
310
- _Not,
311
- _EntityTypeFilter,
312
- _EntitySubtypeFilter,
313
- _StatusFilter,
314
- _PlatformFilter,
315
- _DomainFilter,
316
- _EnvFilter,
317
- _CustomCondition,
318
- ]
319
-
320
-
321
- # Required to resolve forward references to "Filter"
322
- if PYDANTIC_VERSION_2:
323
- _And.model_rebuild() # type: ignore
324
- _Or.model_rebuild() # type: ignore
325
- _Not.model_rebuild() # type: ignore
326
- else:
458
+ def _filter_discriminator(v: Any) -> Optional[str]:
459
+ if isinstance(v, _BaseFilter):
460
+ return v._field_discriminator()
461
+
462
+ if not isinstance(v, dict):
463
+ return None
464
+
465
+ keys = list(v.keys())
466
+ if len(keys) == 1:
467
+ return keys[0]
468
+ elif set(keys).issuperset({"container"}):
469
+ return _ContainerFilter._field_discriminator()
470
+ elif set(keys).issuperset({"field", "condition"}):
471
+ return _CustomCondition._field_discriminator()
472
+
473
+ return None
474
+
475
+
476
+ def _parse_and_like_filter(value: Any) -> Any:
477
+ # Do not parse if filter is already of type and/or/not or a custom condition
478
+ # also do not parse container filter if direct_descendants_only is specified
479
+ if (
480
+ isinstance(value, dict)
481
+ and not set(value.keys()).intersection(
482
+ {"and", "or", "not", "field", "condition", "direct_descendants_only"}
483
+ )
484
+ and len(value) > 1
485
+ ):
486
+ return {"and": [{k: v} for k, v in value.items()]}
487
+
488
+ return value
489
+
490
+
491
+ if TYPE_CHECKING or not PYDANTIC_SUPPORTS_CALLABLE_DISCRIMINATOR:
492
+ # The `not TYPE_CHECKING` bit is required to make the linter happy,
493
+ # since we currently only run mypy with pydantic v1.
494
+ Filter = Union[
495
+ _And,
496
+ _Or,
497
+ _Not,
498
+ _EntityTypeFilter,
499
+ _EntitySubtypeFilter,
500
+ _StatusFilter,
501
+ _PlatformFilter,
502
+ _DomainFilter,
503
+ _ContainerFilter,
504
+ _EnvFilter,
505
+ _OwnerFilter,
506
+ _GlossaryTermFilter,
507
+ _TagFilter,
508
+ _CustomCondition,
509
+ ]
510
+
327
511
  _And.update_forward_refs()
328
512
  _Or.update_forward_refs()
329
513
  _Not.update_forward_refs()
514
+ else:
515
+ from pydantic import Discriminator, Tag
516
+
517
+ def _parse_json_from_string(value: Any) -> Any:
518
+ if isinstance(value, str):
519
+ try:
520
+ return json.loads(value)
521
+ except json.JSONDecodeError:
522
+ return value
523
+ else:
524
+ return value
525
+
526
+ # TODO: Once we're fully on pydantic 2, we can use a RootModel here.
527
+ # That way we'd be able to attach methods to the Filter type.
528
+ # e.g. replace load_filters(...) with Filter.load(...)
529
+ Filter = Annotated[
530
+ Annotated[
531
+ Union[
532
+ Annotated[_And, Tag(_And._field_discriminator())],
533
+ Annotated[_Or, Tag(_Or._field_discriminator())],
534
+ Annotated[_Not, Tag(_Not._field_discriminator())],
535
+ Annotated[
536
+ _EntityTypeFilter, Tag(_EntityTypeFilter._field_discriminator())
537
+ ],
538
+ Annotated[
539
+ _EntitySubtypeFilter,
540
+ Tag(_EntitySubtypeFilter._field_discriminator()),
541
+ ],
542
+ Annotated[_StatusFilter, Tag(_StatusFilter._field_discriminator())],
543
+ Annotated[_PlatformFilter, Tag(_PlatformFilter._field_discriminator())],
544
+ Annotated[_DomainFilter, Tag(_DomainFilter._field_discriminator())],
545
+ Annotated[
546
+ _ContainerFilter, Tag(_ContainerFilter._field_discriminator())
547
+ ],
548
+ Annotated[_EnvFilter, Tag(_EnvFilter._field_discriminator())],
549
+ Annotated[_OwnerFilter, Tag(_OwnerFilter._field_discriminator())],
550
+ Annotated[
551
+ _GlossaryTermFilter, Tag(_GlossaryTermFilter._field_discriminator())
552
+ ],
553
+ Annotated[_TagFilter, Tag(_TagFilter._field_discriminator())],
554
+ Annotated[
555
+ _CustomCondition, Tag(_CustomCondition._field_discriminator())
556
+ ],
557
+ ],
558
+ Discriminator(_filter_discriminator),
559
+ ],
560
+ pydantic.BeforeValidator(_parse_and_like_filter),
561
+ pydantic.BeforeValidator(_parse_json_from_string),
562
+ ]
563
+
564
+ # Required to resolve forward references to "Filter"
565
+ _And.model_rebuild() # type: ignore
566
+ _Or.model_rebuild() # type: ignore
567
+ _Not.model_rebuild() # type: ignore
330
568
 
331
569
 
332
570
  def load_filters(obj: Any) -> Filter:
@@ -400,10 +638,40 @@ class FilterDsl:
400
638
  def domain(domain: Union[str, Sequence[str]], /) -> _DomainFilter:
401
639
  return _DomainFilter(domain=[domain] if isinstance(domain, str) else domain)
402
640
 
641
+ @staticmethod
642
+ def container(
643
+ container: Union[str, Sequence[str]],
644
+ /,
645
+ *,
646
+ direct_descendants_only: bool = False,
647
+ ) -> _ContainerFilter:
648
+ return _ContainerFilter(
649
+ container=[container] if isinstance(container, str) else container,
650
+ direct_descendants_only=direct_descendants_only,
651
+ )
652
+
403
653
  @staticmethod
404
654
  def env(env: Union[str, Sequence[str]], /) -> _EnvFilter:
405
655
  return _EnvFilter(env=[env] if isinstance(env, str) else env)
406
656
 
657
+ @staticmethod
658
+ def owner(owner: Union[str, Sequence[str]], /) -> _OwnerFilter:
659
+ return _OwnerFilter(owner=[owner] if isinstance(owner, str) else owner)
660
+
661
+ @staticmethod
662
+ def glossary_term(
663
+ glossary_term: Union[str, Sequence[str]], /
664
+ ) -> _GlossaryTermFilter:
665
+ return _GlossaryTermFilter(
666
+ glossary_term=[glossary_term]
667
+ if isinstance(glossary_term, str)
668
+ else glossary_term
669
+ )
670
+
671
+ @staticmethod
672
+ def tag(tag: Union[str, Sequence[str]], /) -> _TagFilter:
673
+ return _TagFilter(tag=[tag] if isinstance(tag, str) else tag)
674
+
407
675
  @staticmethod
408
676
  def has_custom_property(key: str, value: str) -> _CustomCondition:
409
677
  return _CustomCondition(
@@ -65,3 +65,6 @@ class DataHubSecretStore(SecretStore):
65
65
  def create(cls, config: Any) -> "DataHubSecretStore":
66
66
  config = DataHubSecretStoreConfig.parse_obj(config)
67
67
  return cls(config)
68
+
69
+ def close(self) -> None:
70
+ self.client.graph.close()
@@ -0,0 +1,29 @@
1
+ import os
2
+ from typing import Dict, List, Union
3
+
4
+ from datahub.secret.secret_store import SecretStore
5
+
6
+
7
+ # Simple SecretStore implementation that fetches Secret values from the local environment.
8
+ class EnvironmentSecretStore(SecretStore):
9
+ def __init__(self, config):
10
+ pass
11
+
12
+ def close(self) -> None:
13
+ return
14
+
15
+ def get_secret_values(self, secret_names: List[str]) -> Dict[str, Union[str, None]]:
16
+ values = {}
17
+ for secret_name in secret_names:
18
+ values[secret_name] = os.getenv(secret_name)
19
+ return values
20
+
21
+ def get_secret_value(self, secret_name: str) -> Union[str, None]:
22
+ return os.getenv(secret_name)
23
+
24
+ def get_id(self) -> str:
25
+ return "env"
26
+
27
+ @classmethod
28
+ def create(cls, config: Dict) -> "EnvironmentSecretStore":
29
+ return cls(config)
@@ -0,0 +1,49 @@
1
+ import logging
2
+ import os
3
+ from typing import Any, Dict, List, Union
4
+
5
+ from pydantic import BaseModel
6
+
7
+ from datahub.secret.secret_store import SecretStore
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+
12
+ class FileSecretStoreConfig(BaseModel):
13
+ basedir: str = "/mnt/secrets"
14
+ max_length: int = 1024768
15
+
16
+
17
+ # Simple SecretStore implementation that fetches Secret values from the local files.
18
+ class FileSecretStore(SecretStore):
19
+ def __init__(self, config: FileSecretStoreConfig):
20
+ self.config = config
21
+
22
+ def get_secret_values(self, secret_names: List[str]) -> Dict[str, Union[str, None]]:
23
+ values = {}
24
+ for secret_name in secret_names:
25
+ values[secret_name] = self.get_secret_value(secret_name)
26
+ return values
27
+
28
+ def get_secret_value(self, secret_name: str) -> Union[str, None]:
29
+ secret_path = os.path.join(self.config.basedir, secret_name)
30
+ if os.path.exists(secret_path):
31
+ with open(secret_path, "r") as f:
32
+ secret_value = f.read(self.config.max_length + 1)
33
+ if len(secret_value) > self.config.max_length:
34
+ logger.warning(
35
+ f"Secret {secret_name} is longer than {self.config.max_length} and will be truncated."
36
+ )
37
+ return secret_value[: self.config.max_length].rstrip()
38
+ return None
39
+
40
+ def get_id(self) -> str:
41
+ return "file"
42
+
43
+ def close(self) -> None:
44
+ return
45
+
46
+ @classmethod
47
+ def create(cls, config: Any) -> "FileSecretStore":
48
+ config = FileSecretStoreConfig.parse_obj(config)
49
+ return cls(config)
@@ -0,0 +1,76 @@
1
+ from abc import abstractmethod
2
+ from typing import List, Tuple
3
+
4
+ from typing_extensions import Self
5
+
6
+ from datahub.emitter.mcp_patch_builder import MetadataPatchProposal, PatchPath
7
+ from datahub.metadata.schema_classes import (
8
+ FineGrainedLineageClass as FineGrainedLineage,
9
+ )
10
+
11
+
12
+ class HasFineGrainedLineagePatch(MetadataPatchProposal):
13
+ @abstractmethod
14
+ def _fine_grained_lineage_location(self) -> Tuple[str, PatchPath]:
15
+ """Return the aspect name where fine-grained lineage is stored."""
16
+ raise NotImplementedError("Subclasses must implement this method.")
17
+
18
+ @staticmethod
19
+ def _get_fine_grained_key(
20
+ fine_grained_lineage: FineGrainedLineage,
21
+ ) -> Tuple[str, str, str]:
22
+ downstreams = fine_grained_lineage.downstreams or []
23
+ if len(downstreams) != 1:
24
+ raise TypeError("Cannot patch with more or less than one downstream.")
25
+ transform_op = fine_grained_lineage.transformOperation or "NONE"
26
+ downstream_urn = downstreams[0]
27
+ query_id = fine_grained_lineage.query or "NONE"
28
+ return transform_op, downstream_urn, query_id
29
+
30
+ def add_fine_grained_lineage(
31
+ self, fine_grained_lineage: FineGrainedLineage
32
+ ) -> Self:
33
+ aspect_name, path = self._fine_grained_lineage_location()
34
+ (
35
+ transform_op,
36
+ downstream_urn,
37
+ query_id,
38
+ ) = self._get_fine_grained_key(fine_grained_lineage)
39
+ for upstream_urn in fine_grained_lineage.upstreams or []:
40
+ self._add_patch(
41
+ aspect_name,
42
+ "add",
43
+ path=(*path, transform_op, downstream_urn, query_id, upstream_urn),
44
+ value={"confidenceScore": fine_grained_lineage.confidenceScore},
45
+ )
46
+ return self
47
+
48
+ def remove_fine_grained_lineage(
49
+ self, fine_grained_lineage: FineGrainedLineage
50
+ ) -> Self:
51
+ aspect_name, path = self._fine_grained_lineage_location()
52
+ (
53
+ transform_op,
54
+ downstream_urn,
55
+ query_id,
56
+ ) = self._get_fine_grained_key(fine_grained_lineage)
57
+ for upstream_urn in fine_grained_lineage.upstreams or []:
58
+ self._add_patch(
59
+ aspect_name,
60
+ "remove",
61
+ path=(*path, transform_op, downstream_urn, query_id, upstream_urn),
62
+ value={},
63
+ )
64
+ return self
65
+
66
+ def set_fine_grained_lineages(
67
+ self, fine_grained_lineages: List[FineGrainedLineage]
68
+ ) -> Self:
69
+ aspect_name, path = self._fine_grained_lineage_location()
70
+ self._add_patch(
71
+ aspect_name,
72
+ "add",
73
+ path=path,
74
+ value=fine_grained_lineages,
75
+ )
76
+ return self
@@ -0,0 +1,73 @@
1
+ from typing import List
2
+
3
+ from typing_extensions import Self
4
+
5
+ from datahub.emitter.mcp_patch_builder import MetadataPatchProposal
6
+ from datahub.metadata.com.linkedin.pegasus2avro.common import Siblings
7
+
8
+
9
+ class HasSiblingsPatch(MetadataPatchProposal):
10
+ def add_sibling(self, sibling_urn: str, primary: bool = False) -> Self:
11
+ """Add a sibling relationship to the entity.
12
+
13
+ Args:
14
+ sibling_urn: The URN of the sibling entity to add.
15
+ primary: Whether this entity should be marked as primary in the relationship.
16
+
17
+ Returns:
18
+ The patch builder instance.
19
+ """
20
+ self._add_patch(
21
+ Siblings.ASPECT_NAME,
22
+ "add",
23
+ path=("siblings", sibling_urn),
24
+ value=sibling_urn,
25
+ )
26
+
27
+ # Set primary flag if specified
28
+ if primary:
29
+ self._add_patch(
30
+ Siblings.ASPECT_NAME,
31
+ "add",
32
+ path=("primary",),
33
+ value=primary,
34
+ )
35
+
36
+ return self
37
+
38
+ def remove_sibling(self, sibling_urn: str) -> Self:
39
+ """Remove a sibling relationship from the entity.
40
+
41
+ Args:
42
+ sibling_urn: The URN of the sibling entity to remove.
43
+
44
+ Returns:
45
+ The patch builder instance.
46
+ """
47
+ self._add_patch(
48
+ Siblings.ASPECT_NAME,
49
+ "remove",
50
+ path=("siblings", sibling_urn),
51
+ value={},
52
+ )
53
+ return self
54
+
55
+ def set_siblings(self, sibling_urns: List[str], primary: bool = False) -> Self:
56
+ """Set the complete list of siblings for the entity.
57
+
58
+ This will replace all existing siblings with the new list.
59
+
60
+ Args:
61
+ sibling_urns: The list of sibling URNs to set.
62
+ primary: Whether this entity should be marked as primary.
63
+
64
+ Returns:
65
+ The patch builder instance.
66
+ """
67
+ self._add_patch(
68
+ Siblings.ASPECT_NAME, "add", path=("siblings",), value=sibling_urns
69
+ )
70
+
71
+ self._add_patch(Siblings.ASPECT_NAME, "add", path=("primary",), value=primary)
72
+
73
+ return self