acryl-datahub 0.15.0.1rc16__py3-none-any.whl → 0.15.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (214) hide show
  1. {acryl_datahub-0.15.0.1rc16.dist-info → acryl_datahub-0.15.0.2.dist-info}/METADATA +2462 -2460
  2. {acryl_datahub-0.15.0.1rc16.dist-info → acryl_datahub-0.15.0.2.dist-info}/RECORD +214 -210
  3. {acryl_datahub-0.15.0.1rc16.dist-info → acryl_datahub-0.15.0.2.dist-info}/WHEEL +1 -1
  4. datahub/__init__.py +1 -1
  5. datahub/api/entities/assertion/assertion_operator.py +3 -5
  6. datahub/api/entities/corpgroup/corpgroup.py +1 -1
  7. datahub/api/entities/datacontract/assertion_operator.py +3 -5
  8. datahub/api/entities/dataproduct/dataproduct.py +4 -4
  9. datahub/api/entities/dataset/dataset.py +2 -1
  10. datahub/api/entities/structuredproperties/structuredproperties.py +18 -7
  11. datahub/cli/cli_utils.py +13 -2
  12. datahub/cli/delete_cli.py +3 -3
  13. datahub/cli/docker_cli.py +6 -6
  14. datahub/cli/ingest_cli.py +25 -15
  15. datahub/cli/lite_cli.py +2 -2
  16. datahub/cli/migrate.py +5 -5
  17. datahub/cli/specific/assertions_cli.py +3 -3
  18. datahub/cli/specific/structuredproperties_cli.py +84 -0
  19. datahub/cli/timeline_cli.py +1 -1
  20. datahub/configuration/common.py +1 -2
  21. datahub/configuration/config_loader.py +73 -50
  22. datahub/configuration/git.py +2 -2
  23. datahub/configuration/time_window_config.py +10 -5
  24. datahub/emitter/mce_builder.py +4 -8
  25. datahub/emitter/mcp_builder.py +27 -0
  26. datahub/emitter/mcp_patch_builder.py +1 -2
  27. datahub/emitter/rest_emitter.py +141 -93
  28. datahub/entrypoints.py +6 -0
  29. datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +5 -3
  30. datahub/ingestion/api/incremental_lineage_helper.py +2 -8
  31. datahub/ingestion/api/report.py +1 -2
  32. datahub/ingestion/api/source.py +8 -2
  33. datahub/ingestion/api/source_helpers.py +1 -1
  34. datahub/ingestion/extractor/json_schema_util.py +3 -3
  35. datahub/ingestion/extractor/schema_util.py +3 -5
  36. datahub/ingestion/fs/s3_fs.py +3 -3
  37. datahub/ingestion/glossary/classifier.py +2 -3
  38. datahub/ingestion/glossary/datahub_classifier.py +6 -4
  39. datahub/ingestion/graph/client.py +22 -19
  40. datahub/ingestion/graph/config.py +1 -1
  41. datahub/ingestion/run/pipeline.py +8 -7
  42. datahub/ingestion/run/pipeline_config.py +3 -3
  43. datahub/ingestion/source/abs/datalake_profiler_config.py +3 -3
  44. datahub/ingestion/source/abs/source.py +19 -8
  45. datahub/ingestion/source/aws/glue.py +77 -47
  46. datahub/ingestion/source/aws/s3_boto_utils.py +3 -3
  47. datahub/ingestion/source/aws/s3_util.py +24 -1
  48. datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
  49. datahub/ingestion/source/aws/sagemaker_processors/models.py +2 -2
  50. datahub/ingestion/source/bigquery_v2/bigquery.py +34 -34
  51. datahub/ingestion/source/bigquery_v2/bigquery_audit.py +3 -3
  52. datahub/ingestion/source/bigquery_v2/bigquery_config.py +14 -6
  53. datahub/ingestion/source/bigquery_v2/bigquery_platform_resource_helper.py +8 -4
  54. datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -3
  55. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +22 -16
  56. datahub/ingestion/source/bigquery_v2/lineage.py +16 -16
  57. datahub/ingestion/source/bigquery_v2/queries.py +1 -3
  58. datahub/ingestion/source/bigquery_v2/queries_extractor.py +3 -3
  59. datahub/ingestion/source/bigquery_v2/usage.py +60 -60
  60. datahub/ingestion/source/cassandra/cassandra.py +0 -1
  61. datahub/ingestion/source/cassandra/cassandra_profiling.py +24 -24
  62. datahub/ingestion/source/cassandra/cassandra_utils.py +4 -7
  63. datahub/ingestion/source/confluent_schema_registry.py +6 -6
  64. datahub/ingestion/source/csv_enricher.py +29 -29
  65. datahub/ingestion/source/datahub/config.py +20 -0
  66. datahub/ingestion/source/datahub/datahub_database_reader.py +7 -19
  67. datahub/ingestion/source/datahub/datahub_source.py +13 -3
  68. datahub/ingestion/source/dbt/dbt_cloud.py +13 -13
  69. datahub/ingestion/source/dbt/dbt_common.py +9 -7
  70. datahub/ingestion/source/delta_lake/source.py +0 -5
  71. datahub/ingestion/source/demo_data.py +1 -1
  72. datahub/ingestion/source/dremio/dremio_api.py +4 -4
  73. datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +3 -3
  74. datahub/ingestion/source/dremio/dremio_reporting.py +0 -3
  75. datahub/ingestion/source/dremio/dremio_source.py +2 -2
  76. datahub/ingestion/source/elastic_search.py +4 -4
  77. datahub/ingestion/source/fivetran/fivetran.py +1 -6
  78. datahub/ingestion/source/gc/datahub_gc.py +11 -14
  79. datahub/ingestion/source/gc/execution_request_cleanup.py +31 -6
  80. datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +48 -15
  81. datahub/ingestion/source/gcs/gcs_source.py +3 -2
  82. datahub/ingestion/source/ge_data_profiler.py +2 -5
  83. datahub/ingestion/source/ge_profiling_config.py +3 -3
  84. datahub/ingestion/source/iceberg/iceberg.py +13 -6
  85. datahub/ingestion/source/iceberg/iceberg_common.py +49 -9
  86. datahub/ingestion/source/iceberg/iceberg_profiler.py +3 -1
  87. datahub/ingestion/source/identity/azure_ad.py +3 -3
  88. datahub/ingestion/source/identity/okta.py +3 -3
  89. datahub/ingestion/source/kafka/kafka.py +11 -9
  90. datahub/ingestion/source/kafka_connect/kafka_connect.py +3 -9
  91. datahub/ingestion/source/kafka_connect/sink_connectors.py +3 -3
  92. datahub/ingestion/source/kafka_connect/source_connectors.py +3 -3
  93. datahub/ingestion/source/looker/looker_common.py +19 -19
  94. datahub/ingestion/source/looker/looker_config.py +11 -6
  95. datahub/ingestion/source/looker/looker_source.py +25 -25
  96. datahub/ingestion/source/looker/looker_template_language.py +3 -3
  97. datahub/ingestion/source/looker/looker_usage.py +5 -7
  98. datahub/ingestion/source/looker/lookml_concept_context.py +6 -6
  99. datahub/ingestion/source/looker/lookml_source.py +13 -15
  100. datahub/ingestion/source/looker/view_upstream.py +5 -5
  101. datahub/ingestion/source/metabase.py +1 -6
  102. datahub/ingestion/source/mlflow.py +4 -9
  103. datahub/ingestion/source/mode.py +5 -5
  104. datahub/ingestion/source/mongodb.py +6 -4
  105. datahub/ingestion/source/neo4j/neo4j_source.py +1 -1
  106. datahub/ingestion/source/nifi.py +24 -31
  107. datahub/ingestion/source/openapi.py +9 -9
  108. datahub/ingestion/source/powerbi/config.py +12 -12
  109. datahub/ingestion/source/powerbi/m_query/parser.py +11 -11
  110. datahub/ingestion/source/powerbi/m_query/pattern_handler.py +26 -24
  111. datahub/ingestion/source/powerbi/m_query/resolver.py +13 -13
  112. datahub/ingestion/source/powerbi/powerbi.py +6 -6
  113. datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +9 -9
  114. datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +7 -7
  115. datahub/ingestion/source/powerbi_report_server/report_server.py +1 -1
  116. datahub/ingestion/source/qlik_sense/qlik_api.py +1 -1
  117. datahub/ingestion/source/redash.py +0 -5
  118. datahub/ingestion/source/redshift/config.py +3 -3
  119. datahub/ingestion/source/redshift/redshift.py +45 -46
  120. datahub/ingestion/source/redshift/usage.py +33 -33
  121. datahub/ingestion/source/s3/datalake_profiler_config.py +3 -3
  122. datahub/ingestion/source/s3/source.py +11 -15
  123. datahub/ingestion/source/salesforce.py +26 -25
  124. datahub/ingestion/source/schema/json_schema.py +1 -1
  125. datahub/ingestion/source/sigma/sigma.py +3 -3
  126. datahub/ingestion/source/sigma/sigma_api.py +12 -10
  127. datahub/ingestion/source/snowflake/snowflake_config.py +30 -7
  128. datahub/ingestion/source/snowflake/snowflake_connection.py +6 -6
  129. datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +4 -0
  130. datahub/ingestion/source/snowflake/snowflake_queries.py +2 -2
  131. datahub/ingestion/source/snowflake/snowflake_report.py +0 -3
  132. datahub/ingestion/source/snowflake/snowflake_schema.py +8 -5
  133. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +136 -42
  134. datahub/ingestion/source/snowflake/snowflake_tag.py +21 -11
  135. datahub/ingestion/source/snowflake/snowflake_usage_v2.py +49 -50
  136. datahub/ingestion/source/snowflake/snowflake_utils.py +1 -2
  137. datahub/ingestion/source/snowflake/snowflake_v2.py +51 -47
  138. datahub/ingestion/source/sql/athena.py +1 -3
  139. datahub/ingestion/source/sql/clickhouse.py +8 -14
  140. datahub/ingestion/source/sql/oracle.py +1 -3
  141. datahub/ingestion/source/sql/sql_generic_profiler.py +1 -2
  142. datahub/ingestion/source/sql/sql_types.py +1 -2
  143. datahub/ingestion/source/sql/sql_utils.py +5 -0
  144. datahub/ingestion/source/sql/teradata.py +18 -5
  145. datahub/ingestion/source/state/profiling_state_handler.py +3 -3
  146. datahub/ingestion/source/state/redundant_run_skip_handler.py +5 -7
  147. datahub/ingestion/source/state/stale_entity_removal_handler.py +3 -3
  148. datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +9 -9
  149. datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py +1 -1
  150. datahub/ingestion/source/superset.py +1 -6
  151. datahub/ingestion/source/tableau/tableau.py +343 -117
  152. datahub/ingestion/source/tableau/tableau_common.py +5 -2
  153. datahub/ingestion/source/unity/config.py +3 -1
  154. datahub/ingestion/source/unity/proxy.py +1 -1
  155. datahub/ingestion/source/unity/source.py +74 -78
  156. datahub/ingestion/source/unity/usage.py +3 -1
  157. datahub/ingestion/source/usage/clickhouse_usage.py +4 -4
  158. datahub/ingestion/source/usage/starburst_trino_usage.py +3 -3
  159. datahub/ingestion/source/usage/usage_common.py +1 -1
  160. datahub/ingestion/source_report/ingestion_stage.py +24 -20
  161. datahub/ingestion/transformer/add_dataset_dataproduct.py +4 -4
  162. datahub/ingestion/transformer/add_dataset_properties.py +3 -3
  163. datahub/ingestion/transformer/add_dataset_schema_tags.py +3 -3
  164. datahub/ingestion/transformer/add_dataset_schema_terms.py +3 -3
  165. datahub/ingestion/transformer/dataset_domain_based_on_tags.py +4 -4
  166. datahub/ingestion/transformer/extract_ownership_from_tags.py +3 -3
  167. datahub/ingestion/transformer/tags_to_terms.py +7 -7
  168. datahub/integrations/assertion/snowflake/compiler.py +10 -10
  169. datahub/lite/duckdb_lite.py +12 -10
  170. datahub/metadata/_schema_classes.py +317 -44
  171. datahub/metadata/_urns/urn_defs.py +69 -15
  172. datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
  173. datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +2 -0
  174. datahub/metadata/com/linkedin/pegasus2avro/versionset/__init__.py +17 -0
  175. datahub/metadata/schema.avsc +302 -89
  176. datahub/metadata/schemas/DataFlowKey.avsc +1 -0
  177. datahub/metadata/schemas/DataJobKey.avsc +1 -0
  178. datahub/metadata/schemas/DataProcessInstanceInput.avsc +4 -2
  179. datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -0
  180. datahub/metadata/schemas/DatasetKey.avsc +2 -1
  181. datahub/metadata/schemas/MLFeatureProperties.avsc +51 -0
  182. datahub/metadata/schemas/MLModelDeploymentProperties.avsc +51 -0
  183. datahub/metadata/schemas/MLModelGroupProperties.avsc +96 -23
  184. datahub/metadata/schemas/MLModelKey.avsc +2 -1
  185. datahub/metadata/schemas/MLModelProperties.avsc +96 -48
  186. datahub/metadata/schemas/MLPrimaryKeyProperties.avsc +51 -0
  187. datahub/metadata/schemas/MetadataChangeEvent.avsc +98 -71
  188. datahub/metadata/schemas/VersionProperties.avsc +216 -0
  189. datahub/metadata/schemas/VersionSetKey.avsc +26 -0
  190. datahub/metadata/schemas/VersionSetProperties.avsc +49 -0
  191. datahub/secret/datahub_secrets_client.py +12 -21
  192. datahub/secret/secret_common.py +14 -8
  193. datahub/specific/aspect_helpers/custom_properties.py +1 -2
  194. datahub/sql_parsing/schema_resolver.py +5 -10
  195. datahub/sql_parsing/sql_parsing_aggregator.py +26 -21
  196. datahub/sql_parsing/sqlglot_lineage.py +3 -3
  197. datahub/sql_parsing/sqlglot_utils.py +1 -1
  198. datahub/telemetry/stats.py +1 -2
  199. datahub/testing/mcp_diff.py +1 -1
  200. datahub/utilities/file_backed_collections.py +11 -11
  201. datahub/utilities/hive_schema_to_avro.py +2 -2
  202. datahub/utilities/logging_manager.py +2 -2
  203. datahub/utilities/lossy_collections.py +3 -3
  204. datahub/utilities/mapping.py +3 -3
  205. datahub/utilities/memory_footprint.py +3 -2
  206. datahub/utilities/perf_timer.py +11 -6
  207. datahub/utilities/serialized_lru_cache.py +3 -1
  208. datahub/utilities/sqlalchemy_query_combiner.py +6 -6
  209. datahub/utilities/sqllineage_patch.py +1 -1
  210. datahub/utilities/stats_collections.py +3 -1
  211. datahub/utilities/urns/_urn_base.py +28 -5
  212. datahub/utilities/urns/urn_iter.py +2 -2
  213. {acryl_datahub-0.15.0.1rc16.dist-info → acryl_datahub-0.15.0.2.dist-info}/entry_points.txt +0 -0
  214. {acryl_datahub-0.15.0.1rc16.dist-info → acryl_datahub-0.15.0.2.dist-info}/top_level.txt +0 -0
@@ -5109,6 +5109,51 @@
5109
5109
  "default": null,
5110
5110
  "doc": "URL where the reference exist"
5111
5111
  },
5112
+ {
5113
+ "Relationship": {
5114
+ "/*": {
5115
+ "entityTypes": [
5116
+ "dataJob",
5117
+ "dataProcessInstance"
5118
+ ],
5119
+ "isLineage": true,
5120
+ "name": "TrainedBy"
5121
+ }
5122
+ },
5123
+ "type": [
5124
+ "null",
5125
+ {
5126
+ "type": "array",
5127
+ "items": "string"
5128
+ }
5129
+ ],
5130
+ "name": "trainingJobs",
5131
+ "default": null,
5132
+ "doc": "List of jobs or process instances (if any) used to train the model or group. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect."
5133
+ },
5134
+ {
5135
+ "Relationship": {
5136
+ "/*": {
5137
+ "entityTypes": [
5138
+ "dataJob",
5139
+ "dataProcessInstance"
5140
+ ],
5141
+ "isLineage": true,
5142
+ "isUpstream": false,
5143
+ "name": "UsedBy"
5144
+ }
5145
+ },
5146
+ "type": [
5147
+ "null",
5148
+ {
5149
+ "type": "array",
5150
+ "items": "string"
5151
+ }
5152
+ ],
5153
+ "name": "downstreamJobs",
5154
+ "default": null,
5155
+ "doc": "List of jobs or process instances (if any) that use the model or group."
5156
+ },
5112
5157
  {
5113
5158
  "Searchable": {
5114
5159
  "boostScore": 10.0,
@@ -5180,6 +5225,14 @@
5180
5225
  ],
5181
5226
  "name": "versionTag",
5182
5227
  "default": null
5228
+ },
5229
+ {
5230
+ "type": [
5231
+ "null",
5232
+ "com.linkedin.pegasus2avro.common.MetadataAttribution"
5233
+ ],
5234
+ "name": "metadataAttribution",
5235
+ "default": null
5183
5236
  }
5184
5237
  ],
5185
5238
  "doc": "A resource-defined string representing the resource state for the purpose of concurrency control"
@@ -5393,54 +5446,6 @@
5393
5446
  "Urn": "Urn",
5394
5447
  "urn_is_array": true
5395
5448
  },
5396
- {
5397
- "Relationship": {
5398
- "/*": {
5399
- "entityTypes": [
5400
- "dataJob",
5401
- "dataProcessInstance"
5402
- ],
5403
- "isLineage": true,
5404
- "name": "TrainedBy"
5405
- }
5406
- },
5407
- "type": [
5408
- "null",
5409
- {
5410
- "type": "array",
5411
- "items": "string"
5412
- }
5413
- ],
5414
- "name": "trainingJobs",
5415
- "default": null,
5416
- "doc": "List of jobs (if any) used to train the model. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect.",
5417
- "Urn": "Urn",
5418
- "urn_is_array": true
5419
- },
5420
- {
5421
- "Relationship": {
5422
- "/*": {
5423
- "entityTypes": [
5424
- "dataJob"
5425
- ],
5426
- "isLineage": true,
5427
- "isUpstream": false,
5428
- "name": "UsedBy"
5429
- }
5430
- },
5431
- "type": [
5432
- "null",
5433
- {
5434
- "type": "array",
5435
- "items": "string"
5436
- }
5437
- ],
5438
- "name": "downstreamJobs",
5439
- "default": null,
5440
- "doc": "List of jobs (if any) that use the model",
5441
- "Urn": "Urn",
5442
- "urn_is_array": true
5443
- },
5444
5449
  {
5445
5450
  "Relationship": {
5446
5451
  "/*": {
@@ -6839,6 +6844,51 @@
6839
6844
  "default": {},
6840
6845
  "doc": "Custom property bag."
6841
6846
  },
6847
+ {
6848
+ "Relationship": {
6849
+ "/*": {
6850
+ "entityTypes": [
6851
+ "dataJob",
6852
+ "dataProcessInstance"
6853
+ ],
6854
+ "isLineage": true,
6855
+ "name": "TrainedBy"
6856
+ }
6857
+ },
6858
+ "type": [
6859
+ "null",
6860
+ {
6861
+ "type": "array",
6862
+ "items": "string"
6863
+ }
6864
+ ],
6865
+ "name": "trainingJobs",
6866
+ "default": null,
6867
+ "doc": "List of jobs or process instances (if any) used to train the model or group. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect."
6868
+ },
6869
+ {
6870
+ "Relationship": {
6871
+ "/*": {
6872
+ "entityTypes": [
6873
+ "dataJob",
6874
+ "dataProcessInstance"
6875
+ ],
6876
+ "isLineage": true,
6877
+ "isUpstream": false,
6878
+ "name": "UsedBy"
6879
+ }
6880
+ },
6881
+ "type": [
6882
+ "null",
6883
+ {
6884
+ "type": "array",
6885
+ "items": "string"
6886
+ }
6887
+ ],
6888
+ "name": "downstreamJobs",
6889
+ "default": null,
6890
+ "doc": "List of jobs or process instances (if any) that use the model or group."
6891
+ },
6842
6892
  {
6843
6893
  "Searchable": {
6844
6894
  "boostScore": 10.0,
@@ -6895,29 +6945,6 @@
6895
6945
  "default": null,
6896
6946
  "doc": "Date when the MLModelGroup was last modified"
6897
6947
  },
6898
- {
6899
- "Relationship": {
6900
- "/*": {
6901
- "entityTypes": [
6902
- "dataJob"
6903
- ],
6904
- "isLineage": true,
6905
- "name": "TrainedBy"
6906
- }
6907
- },
6908
- "type": [
6909
- "null",
6910
- {
6911
- "type": "array",
6912
- "items": "string"
6913
- }
6914
- ],
6915
- "name": "trainingJobs",
6916
- "default": null,
6917
- "doc": "List of jobs (if any) used to train the model group. Visible in Lineage.",
6918
- "Urn": "Urn",
6919
- "urn_is_array": true
6920
- },
6921
6948
  {
6922
6949
  "type": [
6923
6950
  "null",
@@ -0,0 +1,216 @@
1
+ {
2
+ "type": "record",
3
+ "Aspect": {
4
+ "name": "versionProperties"
5
+ },
6
+ "name": "VersionProperties",
7
+ "namespace": "com.linkedin.pegasus2avro.common",
8
+ "fields": [
9
+ {
10
+ "Relationship": {
11
+ "entityTypes": [
12
+ "versionSet"
13
+ ],
14
+ "name": "VersionOf"
15
+ },
16
+ "Searchable": {
17
+ "queryByDefault": false
18
+ },
19
+ "java": {
20
+ "class": "com.linkedin.pegasus2avro.common.urn.Urn"
21
+ },
22
+ "type": "string",
23
+ "name": "versionSet",
24
+ "doc": "The linked Version Set entity that ties multiple versioned assets together",
25
+ "Urn": "Urn",
26
+ "entityTypes": [
27
+ "versionSet"
28
+ ]
29
+ },
30
+ {
31
+ "Searchable": {
32
+ "/versionTag": {
33
+ "fieldName": "version",
34
+ "queryByDefault": false
35
+ }
36
+ },
37
+ "type": {
38
+ "type": "record",
39
+ "name": "VersionTag",
40
+ "namespace": "com.linkedin.pegasus2avro.common",
41
+ "fields": [
42
+ {
43
+ "type": [
44
+ "null",
45
+ "string"
46
+ ],
47
+ "name": "versionTag",
48
+ "default": null
49
+ },
50
+ {
51
+ "type": [
52
+ "null",
53
+ {
54
+ "type": "record",
55
+ "name": "MetadataAttribution",
56
+ "namespace": "com.linkedin.pegasus2avro.common",
57
+ "fields": [
58
+ {
59
+ "type": "long",
60
+ "name": "time",
61
+ "doc": "When this metadata was updated."
62
+ },
63
+ {
64
+ "java": {
65
+ "class": "com.linkedin.pegasus2avro.common.urn.Urn"
66
+ },
67
+ "type": "string",
68
+ "name": "actor",
69
+ "doc": "The entity (e.g. a member URN) responsible for applying the assocated metadata. This can\neither be a user (in case of UI edits) or the datahub system for automation.",
70
+ "Urn": "Urn"
71
+ },
72
+ {
73
+ "java": {
74
+ "class": "com.linkedin.pegasus2avro.common.urn.Urn"
75
+ },
76
+ "type": [
77
+ "null",
78
+ "string"
79
+ ],
80
+ "name": "source",
81
+ "default": null,
82
+ "doc": "The DataHub source responsible for applying the associated metadata. This will only be filled out\nwhen a DataHub source is responsible. This includes the specific metadata test urn, the automation urn.",
83
+ "Urn": "Urn"
84
+ },
85
+ {
86
+ "type": {
87
+ "type": "map",
88
+ "values": "string"
89
+ },
90
+ "name": "sourceDetail",
91
+ "default": {},
92
+ "doc": "The details associated with why this metadata was applied. For example, this could include\nthe actual regex rule, sql statement, ingestion pipeline ID, etc."
93
+ }
94
+ ],
95
+ "doc": "Information about who, why, and how this metadata was applied"
96
+ }
97
+ ],
98
+ "name": "metadataAttribution",
99
+ "default": null
100
+ }
101
+ ],
102
+ "doc": "A resource-defined string representing the resource state for the purpose of concurrency control"
103
+ },
104
+ "name": "version",
105
+ "doc": "Label for this versioned asset, is unique within a version set"
106
+ },
107
+ {
108
+ "Searchable": {
109
+ "/*/versionTag": {
110
+ "fieldName": "aliases",
111
+ "queryByDefault": false
112
+ }
113
+ },
114
+ "type": {
115
+ "type": "array",
116
+ "items": "com.linkedin.pegasus2avro.common.VersionTag"
117
+ },
118
+ "name": "aliases",
119
+ "default": [],
120
+ "doc": "Associated aliases for this versioned asset"
121
+ },
122
+ {
123
+ "type": [
124
+ "null",
125
+ "string"
126
+ ],
127
+ "name": "comment",
128
+ "default": null,
129
+ "doc": "Comment documenting what this version was created for, changes, or represents"
130
+ },
131
+ {
132
+ "Searchable": {
133
+ "fieldName": "versionSortId",
134
+ "queryByDefault": false
135
+ },
136
+ "type": "string",
137
+ "name": "sortId",
138
+ "doc": "Sort identifier that determines where a version lives in the order of the Version Set.\nWhat this looks like depends on the Version Scheme. For sort ids generated by DataHub we use an 8 character string representation."
139
+ },
140
+ {
141
+ "type": [
142
+ "null",
143
+ {
144
+ "type": "record",
145
+ "name": "AuditStamp",
146
+ "namespace": "com.linkedin.pegasus2avro.common",
147
+ "fields": [
148
+ {
149
+ "type": "long",
150
+ "name": "time",
151
+ "doc": "When did the resource/association/sub-resource move into the specific lifecycle stage represented by this AuditEvent."
152
+ },
153
+ {
154
+ "java": {
155
+ "class": "com.linkedin.pegasus2avro.common.urn.Urn"
156
+ },
157
+ "type": "string",
158
+ "name": "actor",
159
+ "doc": "The entity (e.g. a member URN) which will be credited for moving the resource/association/sub-resource into the specific lifecycle stage. It is also the one used to authorize the change.",
160
+ "Urn": "Urn"
161
+ },
162
+ {
163
+ "java": {
164
+ "class": "com.linkedin.pegasus2avro.common.urn.Urn"
165
+ },
166
+ "type": [
167
+ "null",
168
+ "string"
169
+ ],
170
+ "name": "impersonator",
171
+ "default": null,
172
+ "doc": "The entity (e.g. a service URN) which performs the change on behalf of the Actor and must be authorized to act as the Actor.",
173
+ "Urn": "Urn"
174
+ },
175
+ {
176
+ "type": [
177
+ "null",
178
+ "string"
179
+ ],
180
+ "name": "message",
181
+ "default": null,
182
+ "doc": "Additional context around how DataHub was informed of the particular change. For example: was the change created by an automated process, or manually."
183
+ }
184
+ ],
185
+ "doc": "Data captured on a resource/association/sub-resource level giving insight into when that resource/association/sub-resource moved into a particular lifecycle stage, and who acted to move it into that specific lifecycle stage."
186
+ }
187
+ ],
188
+ "name": "sourceCreatedTimestamp",
189
+ "default": null,
190
+ "doc": "Timestamp reflecting when this asset version was created in the source system."
191
+ },
192
+ {
193
+ "type": [
194
+ "null",
195
+ "com.linkedin.pegasus2avro.common.AuditStamp"
196
+ ],
197
+ "name": "metadataCreatedTimestamp",
198
+ "default": null,
199
+ "doc": "Timestamp reflecting when the metadata for this version was created in DataHub"
200
+ },
201
+ {
202
+ "Searchable": {
203
+ "fieldType": "BOOLEAN",
204
+ "queryByDefault": false
205
+ },
206
+ "type": [
207
+ "null",
208
+ "boolean"
209
+ ],
210
+ "name": "isLatest",
211
+ "default": null,
212
+ "doc": "Marks whether this version is currently the latest. Set by a side effect and should not be modified by API."
213
+ }
214
+ ],
215
+ "doc": "Properties about a versioned asset i.e. dataset, ML Model, etc."
216
+ }
@@ -0,0 +1,26 @@
1
+ {
2
+ "type": "record",
3
+ "Aspect": {
4
+ "name": "versionSetKey",
5
+ "keyForEntity": "versionSet",
6
+ "entityCategory": "core",
7
+ "entityAspects": [
8
+ "versionSetProperties"
9
+ ]
10
+ },
11
+ "name": "VersionSetKey",
12
+ "namespace": "com.linkedin.pegasus2avro.metadata.key",
13
+ "fields": [
14
+ {
15
+ "type": "string",
16
+ "name": "id",
17
+ "doc": "ID of the Version Set, generated from platform + asset id / name"
18
+ },
19
+ {
20
+ "type": "string",
21
+ "name": "entityType",
22
+ "doc": "Type of entities included in version set, limits to a single entity type between linked versioned entities"
23
+ }
24
+ ],
25
+ "doc": "Key for a Version Set entity"
26
+ }
@@ -0,0 +1,49 @@
1
+ {
2
+ "type": "record",
3
+ "Aspect": {
4
+ "name": "versionSetProperties"
5
+ },
6
+ "name": "VersionSetProperties",
7
+ "namespace": "com.linkedin.pegasus2avro.versionset",
8
+ "fields": [
9
+ {
10
+ "Searchable": {
11
+ "/*": {
12
+ "fieldType": "TEXT",
13
+ "queryByDefault": true
14
+ }
15
+ },
16
+ "type": {
17
+ "type": "map",
18
+ "values": "string"
19
+ },
20
+ "name": "customProperties",
21
+ "default": {},
22
+ "doc": "Custom property bag."
23
+ },
24
+ {
25
+ "Searchable": {
26
+ "queryByDefault": "false"
27
+ },
28
+ "java": {
29
+ "class": "com.linkedin.pegasus2avro.common.urn.Urn"
30
+ },
31
+ "type": "string",
32
+ "name": "latest",
33
+ "doc": "The latest versioned entity linked to in this version set",
34
+ "Urn": "Urn"
35
+ },
36
+ {
37
+ "type": {
38
+ "type": "enum",
39
+ "name": "VersioningScheme",
40
+ "namespace": "com.linkedin.pegasus2avro.versionset",
41
+ "symbols": [
42
+ "ALPHANUMERIC_GENERATED_BY_DATAHUB"
43
+ ]
44
+ },
45
+ "name": "versioningScheme",
46
+ "doc": "What versioning scheme is being utilized for the versioned entities sort criterion. Static once set"
47
+ }
48
+ ]
49
+ }
@@ -11,34 +11,25 @@ class DataHubSecretsClient:
11
11
  def __init__(self, graph: DataHubGraph):
12
12
  self.graph = graph
13
13
 
14
+ def _cleanup_secret_name(self, secret_names: List[str]) -> List[str]:
15
+ """Remove empty strings from the list of secret names."""
16
+ return [secret_name for secret_name in secret_names if secret_name]
17
+
14
18
  def get_secret_values(self, secret_names: List[str]) -> Dict[str, Optional[str]]:
15
19
  if len(secret_names) == 0:
16
20
  return {}
17
21
 
18
- request_json = {
19
- "query": """query getSecretValues($input: GetSecretValuesInput!) {\n
20
- getSecretValues(input: $input) {\n
21
- name\n
22
- value\n
23
- }\n
22
+ res_data = self.graph.execute_graphql(
23
+ query="""query getSecretValues($input: GetSecretValuesInput!) {
24
+ getSecretValues(input: $input) {
25
+ name
26
+ value
27
+ }
24
28
  }""",
25
- "variables": {"input": {"secrets": secret_names}},
26
- }
27
- # TODO: Use graph.execute_graphql() instead.
28
-
29
- # Fetch secrets using GraphQL API f
30
- response = self.graph._session.post(
31
- f"{self.graph.config.server}/api/graphql", json=request_json
29
+ variables={"input": {"secrets": self._cleanup_secret_name(secret_names)}},
32
30
  )
33
- response.raise_for_status()
34
-
35
- # Verify response
36
- res_data = response.json()
37
- if "errors" in res_data:
38
- raise Exception("Failed to retrieve secrets from DataHub.")
39
-
40
31
  # Convert list of name, value secret pairs into a dict and return
41
- secret_value_list = res_data["data"]["getSecretValues"]
32
+ secret_value_list = res_data["getSecretValues"]
42
33
  secret_value_dict = dict()
43
34
  for secret_value in secret_value_list:
44
35
  secret_value_dict[secret_value["name"]] = secret_value["value"]
@@ -2,10 +2,7 @@ import json
2
2
  import logging
3
3
  from typing import List
4
4
 
5
- from datahub.configuration.config_loader import (
6
- list_referenced_env_variables,
7
- resolve_env_variables,
8
- )
5
+ from datahub.configuration.config_loader import EnvResolver
9
6
  from datahub.secret.secret_store import SecretStore
10
7
 
11
8
  logger = logging.getLogger(__name__)
@@ -42,18 +39,27 @@ def resolve_secrets(secret_names: List[str], secret_stores: List[SecretStore]) -
42
39
  return final_secret_values
43
40
 
44
41
 
45
- def resolve_recipe(recipe: str, secret_stores: List[SecretStore]) -> dict:
42
+ def resolve_recipe(
43
+ recipe: str, secret_stores: List[SecretStore], strict_env_syntax: bool = True
44
+ ) -> dict:
45
+ # Note: the default for `strict_env_syntax` is normally False, but here we override
46
+ # it to be true. Particularly when fetching secrets from external secret stores, we
47
+ # want to be more careful about not over-fetching secrets.
48
+
46
49
  json_recipe_raw = json.loads(recipe)
47
50
 
48
51
  # 1. Extract all secrets needing resolved.
49
- secrets_to_resolve = list_referenced_env_variables(json_recipe_raw)
52
+ secrets_to_resolve = EnvResolver.list_referenced_variables(
53
+ json_recipe_raw, strict_env_syntax=strict_env_syntax
54
+ )
50
55
 
51
56
  # 2. Resolve secret values
52
57
  secret_values_dict = resolve_secrets(list(secrets_to_resolve), secret_stores)
53
58
 
54
59
  # 3. Substitute secrets into recipe file
55
- json_recipe_resolved = resolve_env_variables(
56
- json_recipe_raw, environ=secret_values_dict
60
+ resolver = EnvResolver(
61
+ environ=secret_values_dict, strict_env_syntax=strict_env_syntax
57
62
  )
63
+ json_recipe_resolved = resolver.resolve(json_recipe_raw)
58
64
 
59
65
  return json_recipe_resolved
@@ -9,8 +9,7 @@ from datahub.emitter.mcp_patch_builder import MetadataPatchProposal, PatchPath
9
9
  class HasCustomPropertiesPatch(MetadataPatchProposal):
10
10
  @classmethod
11
11
  @abstractmethod
12
- def _custom_properties_location(self) -> Tuple[str, PatchPath]:
13
- ...
12
+ def _custom_properties_location(self) -> Tuple[str, PatchPath]: ...
14
13
 
15
14
  def add_custom_property(self, key: str, value: str) -> Self:
16
15
  """Add a custom property to the entity.
@@ -33,14 +33,11 @@ class GraphQLSchemaMetadata(TypedDict):
33
33
 
34
34
  class SchemaResolverInterface(Protocol):
35
35
  @property
36
- def platform(self) -> str:
37
- ...
36
+ def platform(self) -> str: ...
38
37
 
39
- def includes_temp_tables(self) -> bool:
40
- ...
38
+ def includes_temp_tables(self) -> bool: ...
41
39
 
42
- def resolve_table(self, table: _TableName) -> Tuple[str, Optional[SchemaInfo]]:
43
- ...
40
+ def resolve_table(self, table: _TableName) -> Tuple[str, Optional[SchemaInfo]]: ...
44
41
 
45
42
  def __hash__(self) -> int:
46
43
  # Mainly to make lru_cache happy in methods that accept a schema resolver.
@@ -232,8 +229,7 @@ class SchemaResolver(Closeable, SchemaResolverInterface):
232
229
  return {
233
230
  get_simple_field_path_from_v2_field_path(field["fieldPath"]): (
234
231
  # The actual types are more of a "nice to have".
235
- field["nativeDataType"]
236
- or "str"
232
+ field["nativeDataType"] or "str"
237
233
  )
238
234
  for field in schema["fields"]
239
235
  # TODO: We can't generate lineage to columns nested within structs yet.
@@ -289,8 +285,7 @@ def _convert_schema_field_list_to_info(
289
285
  return {
290
286
  get_simple_field_path_from_v2_field_path(col.fieldPath): (
291
287
  # The actual types are more of a "nice to have".
292
- col.nativeDataType
293
- or "str"
288
+ col.nativeDataType or "str"
294
289
  )
295
290
  for col in schema_fields
296
291
  # TODO: We can't generate lineage to columns nested within structs yet.