acryl-datahub 0.15.0.1rc17__py3-none-any.whl → 0.15.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-0.15.0.1rc17.dist-info → acryl_datahub-0.15.0.2.dist-info}/METADATA +2440 -2438
- {acryl_datahub-0.15.0.1rc17.dist-info → acryl_datahub-0.15.0.2.dist-info}/RECORD +211 -207
- {acryl_datahub-0.15.0.1rc17.dist-info → acryl_datahub-0.15.0.2.dist-info}/WHEEL +1 -1
- datahub/__init__.py +1 -1
- datahub/api/entities/assertion/assertion_operator.py +3 -5
- datahub/api/entities/corpgroup/corpgroup.py +1 -1
- datahub/api/entities/datacontract/assertion_operator.py +3 -5
- datahub/api/entities/dataproduct/dataproduct.py +4 -4
- datahub/api/entities/dataset/dataset.py +2 -1
- datahub/api/entities/structuredproperties/structuredproperties.py +18 -7
- datahub/cli/cli_utils.py +13 -2
- datahub/cli/delete_cli.py +3 -3
- datahub/cli/docker_cli.py +6 -6
- datahub/cli/ingest_cli.py +25 -15
- datahub/cli/lite_cli.py +2 -2
- datahub/cli/migrate.py +5 -5
- datahub/cli/specific/assertions_cli.py +3 -3
- datahub/cli/specific/structuredproperties_cli.py +84 -0
- datahub/cli/timeline_cli.py +1 -1
- datahub/configuration/common.py +1 -2
- datahub/configuration/config_loader.py +73 -50
- datahub/configuration/git.py +2 -2
- datahub/configuration/time_window_config.py +10 -5
- datahub/emitter/mce_builder.py +4 -8
- datahub/emitter/mcp_builder.py +27 -0
- datahub/emitter/mcp_patch_builder.py +1 -2
- datahub/emitter/rest_emitter.py +126 -85
- datahub/entrypoints.py +6 -0
- datahub/ingestion/api/incremental_lineage_helper.py +2 -8
- datahub/ingestion/api/report.py +1 -2
- datahub/ingestion/api/source.py +4 -2
- datahub/ingestion/api/source_helpers.py +1 -1
- datahub/ingestion/extractor/json_schema_util.py +3 -3
- datahub/ingestion/extractor/schema_util.py +3 -5
- datahub/ingestion/fs/s3_fs.py +3 -3
- datahub/ingestion/glossary/datahub_classifier.py +6 -4
- datahub/ingestion/graph/client.py +22 -19
- datahub/ingestion/graph/config.py +1 -1
- datahub/ingestion/run/pipeline.py +8 -7
- datahub/ingestion/run/pipeline_config.py +3 -3
- datahub/ingestion/source/abs/datalake_profiler_config.py +3 -3
- datahub/ingestion/source/abs/source.py +19 -8
- datahub/ingestion/source/aws/glue.py +77 -47
- datahub/ingestion/source/aws/s3_boto_utils.py +3 -3
- datahub/ingestion/source/aws/s3_util.py +24 -1
- datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
- datahub/ingestion/source/aws/sagemaker_processors/models.py +2 -2
- datahub/ingestion/source/bigquery_v2/bigquery.py +34 -34
- datahub/ingestion/source/bigquery_v2/bigquery_audit.py +3 -3
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +14 -6
- datahub/ingestion/source/bigquery_v2/bigquery_platform_resource_helper.py +8 -4
- datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -3
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +22 -16
- datahub/ingestion/source/bigquery_v2/lineage.py +16 -16
- datahub/ingestion/source/bigquery_v2/queries.py +1 -3
- datahub/ingestion/source/bigquery_v2/queries_extractor.py +3 -3
- datahub/ingestion/source/bigquery_v2/usage.py +60 -60
- datahub/ingestion/source/cassandra/cassandra.py +0 -1
- datahub/ingestion/source/cassandra/cassandra_profiling.py +24 -24
- datahub/ingestion/source/cassandra/cassandra_utils.py +4 -7
- datahub/ingestion/source/confluent_schema_registry.py +6 -6
- datahub/ingestion/source/csv_enricher.py +29 -29
- datahub/ingestion/source/datahub/config.py +10 -0
- datahub/ingestion/source/datahub/datahub_database_reader.py +4 -2
- datahub/ingestion/source/datahub/datahub_source.py +12 -2
- datahub/ingestion/source/dbt/dbt_cloud.py +13 -13
- datahub/ingestion/source/dbt/dbt_common.py +9 -7
- datahub/ingestion/source/delta_lake/source.py +0 -5
- datahub/ingestion/source/demo_data.py +1 -1
- datahub/ingestion/source/dremio/dremio_api.py +4 -4
- datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +3 -3
- datahub/ingestion/source/dremio/dremio_reporting.py +0 -3
- datahub/ingestion/source/dremio/dremio_source.py +2 -2
- datahub/ingestion/source/elastic_search.py +4 -4
- datahub/ingestion/source/fivetran/fivetran.py +1 -6
- datahub/ingestion/source/gc/datahub_gc.py +11 -14
- datahub/ingestion/source/gc/execution_request_cleanup.py +31 -6
- datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +48 -15
- datahub/ingestion/source/gcs/gcs_source.py +3 -2
- datahub/ingestion/source/ge_data_profiler.py +2 -5
- datahub/ingestion/source/ge_profiling_config.py +3 -3
- datahub/ingestion/source/iceberg/iceberg.py +13 -6
- datahub/ingestion/source/iceberg/iceberg_common.py +49 -9
- datahub/ingestion/source/iceberg/iceberg_profiler.py +3 -1
- datahub/ingestion/source/identity/azure_ad.py +3 -3
- datahub/ingestion/source/identity/okta.py +3 -3
- datahub/ingestion/source/kafka/kafka.py +11 -9
- datahub/ingestion/source/kafka_connect/kafka_connect.py +3 -9
- datahub/ingestion/source/kafka_connect/sink_connectors.py +3 -3
- datahub/ingestion/source/kafka_connect/source_connectors.py +3 -3
- datahub/ingestion/source/looker/looker_common.py +19 -19
- datahub/ingestion/source/looker/looker_config.py +11 -6
- datahub/ingestion/source/looker/looker_source.py +25 -25
- datahub/ingestion/source/looker/looker_template_language.py +3 -3
- datahub/ingestion/source/looker/looker_usage.py +5 -7
- datahub/ingestion/source/looker/lookml_concept_context.py +6 -6
- datahub/ingestion/source/looker/lookml_source.py +13 -15
- datahub/ingestion/source/looker/view_upstream.py +5 -5
- datahub/ingestion/source/metabase.py +1 -6
- datahub/ingestion/source/mlflow.py +4 -9
- datahub/ingestion/source/mode.py +5 -5
- datahub/ingestion/source/mongodb.py +6 -4
- datahub/ingestion/source/neo4j/neo4j_source.py +1 -1
- datahub/ingestion/source/nifi.py +24 -31
- datahub/ingestion/source/openapi.py +9 -9
- datahub/ingestion/source/powerbi/config.py +12 -12
- datahub/ingestion/source/powerbi/m_query/parser.py +11 -11
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py +26 -24
- datahub/ingestion/source/powerbi/m_query/resolver.py +13 -13
- datahub/ingestion/source/powerbi/powerbi.py +6 -6
- datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +9 -9
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +7 -7
- datahub/ingestion/source/powerbi_report_server/report_server.py +1 -1
- datahub/ingestion/source/qlik_sense/qlik_api.py +1 -1
- datahub/ingestion/source/redash.py +0 -5
- datahub/ingestion/source/redshift/config.py +3 -3
- datahub/ingestion/source/redshift/redshift.py +45 -46
- datahub/ingestion/source/redshift/usage.py +33 -33
- datahub/ingestion/source/s3/datalake_profiler_config.py +3 -3
- datahub/ingestion/source/s3/source.py +11 -15
- datahub/ingestion/source/salesforce.py +26 -25
- datahub/ingestion/source/schema/json_schema.py +1 -1
- datahub/ingestion/source/sigma/sigma.py +3 -3
- datahub/ingestion/source/sigma/sigma_api.py +12 -10
- datahub/ingestion/source/snowflake/snowflake_config.py +30 -7
- datahub/ingestion/source/snowflake/snowflake_connection.py +6 -6
- datahub/ingestion/source/snowflake/snowflake_queries.py +2 -2
- datahub/ingestion/source/snowflake/snowflake_report.py +0 -3
- datahub/ingestion/source/snowflake/snowflake_schema.py +8 -5
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +136 -42
- datahub/ingestion/source/snowflake/snowflake_tag.py +21 -11
- datahub/ingestion/source/snowflake/snowflake_usage_v2.py +49 -50
- datahub/ingestion/source/snowflake/snowflake_utils.py +1 -2
- datahub/ingestion/source/snowflake/snowflake_v2.py +51 -47
- datahub/ingestion/source/sql/athena.py +1 -3
- datahub/ingestion/source/sql/clickhouse.py +8 -14
- datahub/ingestion/source/sql/oracle.py +1 -3
- datahub/ingestion/source/sql/sql_generic_profiler.py +1 -2
- datahub/ingestion/source/sql/sql_types.py +1 -2
- datahub/ingestion/source/sql/sql_utils.py +5 -0
- datahub/ingestion/source/sql/teradata.py +18 -5
- datahub/ingestion/source/state/profiling_state_handler.py +3 -3
- datahub/ingestion/source/state/redundant_run_skip_handler.py +5 -7
- datahub/ingestion/source/state/stale_entity_removal_handler.py +3 -3
- datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +9 -9
- datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py +1 -1
- datahub/ingestion/source/superset.py +1 -6
- datahub/ingestion/source/tableau/tableau.py +343 -117
- datahub/ingestion/source/tableau/tableau_common.py +5 -2
- datahub/ingestion/source/unity/config.py +3 -1
- datahub/ingestion/source/unity/proxy.py +1 -1
- datahub/ingestion/source/unity/source.py +74 -74
- datahub/ingestion/source/unity/usage.py +3 -1
- datahub/ingestion/source/usage/clickhouse_usage.py +4 -4
- datahub/ingestion/source/usage/starburst_trino_usage.py +3 -3
- datahub/ingestion/source/usage/usage_common.py +1 -1
- datahub/ingestion/source_report/ingestion_stage.py +24 -20
- datahub/ingestion/transformer/add_dataset_dataproduct.py +4 -4
- datahub/ingestion/transformer/add_dataset_properties.py +3 -3
- datahub/ingestion/transformer/add_dataset_schema_tags.py +3 -3
- datahub/ingestion/transformer/add_dataset_schema_terms.py +3 -3
- datahub/ingestion/transformer/dataset_domain_based_on_tags.py +4 -4
- datahub/ingestion/transformer/extract_ownership_from_tags.py +3 -3
- datahub/ingestion/transformer/tags_to_terms.py +7 -7
- datahub/integrations/assertion/snowflake/compiler.py +10 -10
- datahub/lite/duckdb_lite.py +12 -10
- datahub/metadata/_schema_classes.py +317 -44
- datahub/metadata/_urns/urn_defs.py +69 -15
- datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/versionset/__init__.py +17 -0
- datahub/metadata/schema.avsc +302 -89
- datahub/metadata/schemas/DataFlowKey.avsc +1 -0
- datahub/metadata/schemas/DataJobKey.avsc +1 -0
- datahub/metadata/schemas/DataProcessInstanceInput.avsc +4 -2
- datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -0
- datahub/metadata/schemas/DatasetKey.avsc +2 -1
- datahub/metadata/schemas/MLFeatureProperties.avsc +51 -0
- datahub/metadata/schemas/MLModelDeploymentProperties.avsc +51 -0
- datahub/metadata/schemas/MLModelGroupProperties.avsc +96 -23
- datahub/metadata/schemas/MLModelKey.avsc +2 -1
- datahub/metadata/schemas/MLModelProperties.avsc +96 -48
- datahub/metadata/schemas/MLPrimaryKeyProperties.avsc +51 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +98 -71
- datahub/metadata/schemas/VersionProperties.avsc +216 -0
- datahub/metadata/schemas/VersionSetKey.avsc +26 -0
- datahub/metadata/schemas/VersionSetProperties.avsc +49 -0
- datahub/secret/datahub_secrets_client.py +12 -21
- datahub/secret/secret_common.py +14 -8
- datahub/specific/aspect_helpers/custom_properties.py +1 -2
- datahub/sql_parsing/schema_resolver.py +5 -10
- datahub/sql_parsing/sql_parsing_aggregator.py +18 -16
- datahub/sql_parsing/sqlglot_lineage.py +3 -3
- datahub/sql_parsing/sqlglot_utils.py +1 -1
- datahub/telemetry/stats.py +1 -2
- datahub/testing/mcp_diff.py +1 -1
- datahub/utilities/file_backed_collections.py +11 -11
- datahub/utilities/hive_schema_to_avro.py +2 -2
- datahub/utilities/logging_manager.py +2 -2
- datahub/utilities/lossy_collections.py +3 -3
- datahub/utilities/mapping.py +3 -3
- datahub/utilities/memory_footprint.py +3 -2
- datahub/utilities/perf_timer.py +11 -6
- datahub/utilities/serialized_lru_cache.py +3 -1
- datahub/utilities/sqlalchemy_query_combiner.py +6 -6
- datahub/utilities/sqllineage_patch.py +1 -1
- datahub/utilities/stats_collections.py +3 -1
- datahub/utilities/urns/_urn_base.py +28 -5
- datahub/utilities/urns/urn_iter.py +2 -2
- {acryl_datahub-0.15.0.1rc17.dist-info → acryl_datahub-0.15.0.2.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-0.15.0.1rc17.dist-info → acryl_datahub-0.15.0.2.dist-info}/top_level.txt +0 -0
|
@@ -5109,6 +5109,51 @@
|
|
|
5109
5109
|
"default": null,
|
|
5110
5110
|
"doc": "URL where the reference exist"
|
|
5111
5111
|
},
|
|
5112
|
+
{
|
|
5113
|
+
"Relationship": {
|
|
5114
|
+
"/*": {
|
|
5115
|
+
"entityTypes": [
|
|
5116
|
+
"dataJob",
|
|
5117
|
+
"dataProcessInstance"
|
|
5118
|
+
],
|
|
5119
|
+
"isLineage": true,
|
|
5120
|
+
"name": "TrainedBy"
|
|
5121
|
+
}
|
|
5122
|
+
},
|
|
5123
|
+
"type": [
|
|
5124
|
+
"null",
|
|
5125
|
+
{
|
|
5126
|
+
"type": "array",
|
|
5127
|
+
"items": "string"
|
|
5128
|
+
}
|
|
5129
|
+
],
|
|
5130
|
+
"name": "trainingJobs",
|
|
5131
|
+
"default": null,
|
|
5132
|
+
"doc": "List of jobs or process instances (if any) used to train the model or group. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect."
|
|
5133
|
+
},
|
|
5134
|
+
{
|
|
5135
|
+
"Relationship": {
|
|
5136
|
+
"/*": {
|
|
5137
|
+
"entityTypes": [
|
|
5138
|
+
"dataJob",
|
|
5139
|
+
"dataProcessInstance"
|
|
5140
|
+
],
|
|
5141
|
+
"isLineage": true,
|
|
5142
|
+
"isUpstream": false,
|
|
5143
|
+
"name": "UsedBy"
|
|
5144
|
+
}
|
|
5145
|
+
},
|
|
5146
|
+
"type": [
|
|
5147
|
+
"null",
|
|
5148
|
+
{
|
|
5149
|
+
"type": "array",
|
|
5150
|
+
"items": "string"
|
|
5151
|
+
}
|
|
5152
|
+
],
|
|
5153
|
+
"name": "downstreamJobs",
|
|
5154
|
+
"default": null,
|
|
5155
|
+
"doc": "List of jobs or process instances (if any) that use the model or group."
|
|
5156
|
+
},
|
|
5112
5157
|
{
|
|
5113
5158
|
"Searchable": {
|
|
5114
5159
|
"boostScore": 10.0,
|
|
@@ -5180,6 +5225,14 @@
|
|
|
5180
5225
|
],
|
|
5181
5226
|
"name": "versionTag",
|
|
5182
5227
|
"default": null
|
|
5228
|
+
},
|
|
5229
|
+
{
|
|
5230
|
+
"type": [
|
|
5231
|
+
"null",
|
|
5232
|
+
"com.linkedin.pegasus2avro.common.MetadataAttribution"
|
|
5233
|
+
],
|
|
5234
|
+
"name": "metadataAttribution",
|
|
5235
|
+
"default": null
|
|
5183
5236
|
}
|
|
5184
5237
|
],
|
|
5185
5238
|
"doc": "A resource-defined string representing the resource state for the purpose of concurrency control"
|
|
@@ -5393,54 +5446,6 @@
|
|
|
5393
5446
|
"Urn": "Urn",
|
|
5394
5447
|
"urn_is_array": true
|
|
5395
5448
|
},
|
|
5396
|
-
{
|
|
5397
|
-
"Relationship": {
|
|
5398
|
-
"/*": {
|
|
5399
|
-
"entityTypes": [
|
|
5400
|
-
"dataJob",
|
|
5401
|
-
"dataProcessInstance"
|
|
5402
|
-
],
|
|
5403
|
-
"isLineage": true,
|
|
5404
|
-
"name": "TrainedBy"
|
|
5405
|
-
}
|
|
5406
|
-
},
|
|
5407
|
-
"type": [
|
|
5408
|
-
"null",
|
|
5409
|
-
{
|
|
5410
|
-
"type": "array",
|
|
5411
|
-
"items": "string"
|
|
5412
|
-
}
|
|
5413
|
-
],
|
|
5414
|
-
"name": "trainingJobs",
|
|
5415
|
-
"default": null,
|
|
5416
|
-
"doc": "List of jobs (if any) used to train the model. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect.",
|
|
5417
|
-
"Urn": "Urn",
|
|
5418
|
-
"urn_is_array": true
|
|
5419
|
-
},
|
|
5420
|
-
{
|
|
5421
|
-
"Relationship": {
|
|
5422
|
-
"/*": {
|
|
5423
|
-
"entityTypes": [
|
|
5424
|
-
"dataJob"
|
|
5425
|
-
],
|
|
5426
|
-
"isLineage": true,
|
|
5427
|
-
"isUpstream": false,
|
|
5428
|
-
"name": "UsedBy"
|
|
5429
|
-
}
|
|
5430
|
-
},
|
|
5431
|
-
"type": [
|
|
5432
|
-
"null",
|
|
5433
|
-
{
|
|
5434
|
-
"type": "array",
|
|
5435
|
-
"items": "string"
|
|
5436
|
-
}
|
|
5437
|
-
],
|
|
5438
|
-
"name": "downstreamJobs",
|
|
5439
|
-
"default": null,
|
|
5440
|
-
"doc": "List of jobs (if any) that use the model",
|
|
5441
|
-
"Urn": "Urn",
|
|
5442
|
-
"urn_is_array": true
|
|
5443
|
-
},
|
|
5444
5449
|
{
|
|
5445
5450
|
"Relationship": {
|
|
5446
5451
|
"/*": {
|
|
@@ -6839,6 +6844,51 @@
|
|
|
6839
6844
|
"default": {},
|
|
6840
6845
|
"doc": "Custom property bag."
|
|
6841
6846
|
},
|
|
6847
|
+
{
|
|
6848
|
+
"Relationship": {
|
|
6849
|
+
"/*": {
|
|
6850
|
+
"entityTypes": [
|
|
6851
|
+
"dataJob",
|
|
6852
|
+
"dataProcessInstance"
|
|
6853
|
+
],
|
|
6854
|
+
"isLineage": true,
|
|
6855
|
+
"name": "TrainedBy"
|
|
6856
|
+
}
|
|
6857
|
+
},
|
|
6858
|
+
"type": [
|
|
6859
|
+
"null",
|
|
6860
|
+
{
|
|
6861
|
+
"type": "array",
|
|
6862
|
+
"items": "string"
|
|
6863
|
+
}
|
|
6864
|
+
],
|
|
6865
|
+
"name": "trainingJobs",
|
|
6866
|
+
"default": null,
|
|
6867
|
+
"doc": "List of jobs or process instances (if any) used to train the model or group. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect."
|
|
6868
|
+
},
|
|
6869
|
+
{
|
|
6870
|
+
"Relationship": {
|
|
6871
|
+
"/*": {
|
|
6872
|
+
"entityTypes": [
|
|
6873
|
+
"dataJob",
|
|
6874
|
+
"dataProcessInstance"
|
|
6875
|
+
],
|
|
6876
|
+
"isLineage": true,
|
|
6877
|
+
"isUpstream": false,
|
|
6878
|
+
"name": "UsedBy"
|
|
6879
|
+
}
|
|
6880
|
+
},
|
|
6881
|
+
"type": [
|
|
6882
|
+
"null",
|
|
6883
|
+
{
|
|
6884
|
+
"type": "array",
|
|
6885
|
+
"items": "string"
|
|
6886
|
+
}
|
|
6887
|
+
],
|
|
6888
|
+
"name": "downstreamJobs",
|
|
6889
|
+
"default": null,
|
|
6890
|
+
"doc": "List of jobs or process instances (if any) that use the model or group."
|
|
6891
|
+
},
|
|
6842
6892
|
{
|
|
6843
6893
|
"Searchable": {
|
|
6844
6894
|
"boostScore": 10.0,
|
|
@@ -6895,29 +6945,6 @@
|
|
|
6895
6945
|
"default": null,
|
|
6896
6946
|
"doc": "Date when the MLModelGroup was last modified"
|
|
6897
6947
|
},
|
|
6898
|
-
{
|
|
6899
|
-
"Relationship": {
|
|
6900
|
-
"/*": {
|
|
6901
|
-
"entityTypes": [
|
|
6902
|
-
"dataJob"
|
|
6903
|
-
],
|
|
6904
|
-
"isLineage": true,
|
|
6905
|
-
"name": "TrainedBy"
|
|
6906
|
-
}
|
|
6907
|
-
},
|
|
6908
|
-
"type": [
|
|
6909
|
-
"null",
|
|
6910
|
-
{
|
|
6911
|
-
"type": "array",
|
|
6912
|
-
"items": "string"
|
|
6913
|
-
}
|
|
6914
|
-
],
|
|
6915
|
-
"name": "trainingJobs",
|
|
6916
|
-
"default": null,
|
|
6917
|
-
"doc": "List of jobs (if any) used to train the model group. Visible in Lineage.",
|
|
6918
|
-
"Urn": "Urn",
|
|
6919
|
-
"urn_is_array": true
|
|
6920
|
-
},
|
|
6921
6948
|
{
|
|
6922
6949
|
"type": [
|
|
6923
6950
|
"null",
|
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
{
|
|
2
|
+
"type": "record",
|
|
3
|
+
"Aspect": {
|
|
4
|
+
"name": "versionProperties"
|
|
5
|
+
},
|
|
6
|
+
"name": "VersionProperties",
|
|
7
|
+
"namespace": "com.linkedin.pegasus2avro.common",
|
|
8
|
+
"fields": [
|
|
9
|
+
{
|
|
10
|
+
"Relationship": {
|
|
11
|
+
"entityTypes": [
|
|
12
|
+
"versionSet"
|
|
13
|
+
],
|
|
14
|
+
"name": "VersionOf"
|
|
15
|
+
},
|
|
16
|
+
"Searchable": {
|
|
17
|
+
"queryByDefault": false
|
|
18
|
+
},
|
|
19
|
+
"java": {
|
|
20
|
+
"class": "com.linkedin.pegasus2avro.common.urn.Urn"
|
|
21
|
+
},
|
|
22
|
+
"type": "string",
|
|
23
|
+
"name": "versionSet",
|
|
24
|
+
"doc": "The linked Version Set entity that ties multiple versioned assets together",
|
|
25
|
+
"Urn": "Urn",
|
|
26
|
+
"entityTypes": [
|
|
27
|
+
"versionSet"
|
|
28
|
+
]
|
|
29
|
+
},
|
|
30
|
+
{
|
|
31
|
+
"Searchable": {
|
|
32
|
+
"/versionTag": {
|
|
33
|
+
"fieldName": "version",
|
|
34
|
+
"queryByDefault": false
|
|
35
|
+
}
|
|
36
|
+
},
|
|
37
|
+
"type": {
|
|
38
|
+
"type": "record",
|
|
39
|
+
"name": "VersionTag",
|
|
40
|
+
"namespace": "com.linkedin.pegasus2avro.common",
|
|
41
|
+
"fields": [
|
|
42
|
+
{
|
|
43
|
+
"type": [
|
|
44
|
+
"null",
|
|
45
|
+
"string"
|
|
46
|
+
],
|
|
47
|
+
"name": "versionTag",
|
|
48
|
+
"default": null
|
|
49
|
+
},
|
|
50
|
+
{
|
|
51
|
+
"type": [
|
|
52
|
+
"null",
|
|
53
|
+
{
|
|
54
|
+
"type": "record",
|
|
55
|
+
"name": "MetadataAttribution",
|
|
56
|
+
"namespace": "com.linkedin.pegasus2avro.common",
|
|
57
|
+
"fields": [
|
|
58
|
+
{
|
|
59
|
+
"type": "long",
|
|
60
|
+
"name": "time",
|
|
61
|
+
"doc": "When this metadata was updated."
|
|
62
|
+
},
|
|
63
|
+
{
|
|
64
|
+
"java": {
|
|
65
|
+
"class": "com.linkedin.pegasus2avro.common.urn.Urn"
|
|
66
|
+
},
|
|
67
|
+
"type": "string",
|
|
68
|
+
"name": "actor",
|
|
69
|
+
"doc": "The entity (e.g. a member URN) responsible for applying the assocated metadata. This can\neither be a user (in case of UI edits) or the datahub system for automation.",
|
|
70
|
+
"Urn": "Urn"
|
|
71
|
+
},
|
|
72
|
+
{
|
|
73
|
+
"java": {
|
|
74
|
+
"class": "com.linkedin.pegasus2avro.common.urn.Urn"
|
|
75
|
+
},
|
|
76
|
+
"type": [
|
|
77
|
+
"null",
|
|
78
|
+
"string"
|
|
79
|
+
],
|
|
80
|
+
"name": "source",
|
|
81
|
+
"default": null,
|
|
82
|
+
"doc": "The DataHub source responsible for applying the associated metadata. This will only be filled out\nwhen a DataHub source is responsible. This includes the specific metadata test urn, the automation urn.",
|
|
83
|
+
"Urn": "Urn"
|
|
84
|
+
},
|
|
85
|
+
{
|
|
86
|
+
"type": {
|
|
87
|
+
"type": "map",
|
|
88
|
+
"values": "string"
|
|
89
|
+
},
|
|
90
|
+
"name": "sourceDetail",
|
|
91
|
+
"default": {},
|
|
92
|
+
"doc": "The details associated with why this metadata was applied. For example, this could include\nthe actual regex rule, sql statement, ingestion pipeline ID, etc."
|
|
93
|
+
}
|
|
94
|
+
],
|
|
95
|
+
"doc": "Information about who, why, and how this metadata was applied"
|
|
96
|
+
}
|
|
97
|
+
],
|
|
98
|
+
"name": "metadataAttribution",
|
|
99
|
+
"default": null
|
|
100
|
+
}
|
|
101
|
+
],
|
|
102
|
+
"doc": "A resource-defined string representing the resource state for the purpose of concurrency control"
|
|
103
|
+
},
|
|
104
|
+
"name": "version",
|
|
105
|
+
"doc": "Label for this versioned asset, is unique within a version set"
|
|
106
|
+
},
|
|
107
|
+
{
|
|
108
|
+
"Searchable": {
|
|
109
|
+
"/*/versionTag": {
|
|
110
|
+
"fieldName": "aliases",
|
|
111
|
+
"queryByDefault": false
|
|
112
|
+
}
|
|
113
|
+
},
|
|
114
|
+
"type": {
|
|
115
|
+
"type": "array",
|
|
116
|
+
"items": "com.linkedin.pegasus2avro.common.VersionTag"
|
|
117
|
+
},
|
|
118
|
+
"name": "aliases",
|
|
119
|
+
"default": [],
|
|
120
|
+
"doc": "Associated aliases for this versioned asset"
|
|
121
|
+
},
|
|
122
|
+
{
|
|
123
|
+
"type": [
|
|
124
|
+
"null",
|
|
125
|
+
"string"
|
|
126
|
+
],
|
|
127
|
+
"name": "comment",
|
|
128
|
+
"default": null,
|
|
129
|
+
"doc": "Comment documenting what this version was created for, changes, or represents"
|
|
130
|
+
},
|
|
131
|
+
{
|
|
132
|
+
"Searchable": {
|
|
133
|
+
"fieldName": "versionSortId",
|
|
134
|
+
"queryByDefault": false
|
|
135
|
+
},
|
|
136
|
+
"type": "string",
|
|
137
|
+
"name": "sortId",
|
|
138
|
+
"doc": "Sort identifier that determines where a version lives in the order of the Version Set.\nWhat this looks like depends on the Version Scheme. For sort ids generated by DataHub we use an 8 character string representation."
|
|
139
|
+
},
|
|
140
|
+
{
|
|
141
|
+
"type": [
|
|
142
|
+
"null",
|
|
143
|
+
{
|
|
144
|
+
"type": "record",
|
|
145
|
+
"name": "AuditStamp",
|
|
146
|
+
"namespace": "com.linkedin.pegasus2avro.common",
|
|
147
|
+
"fields": [
|
|
148
|
+
{
|
|
149
|
+
"type": "long",
|
|
150
|
+
"name": "time",
|
|
151
|
+
"doc": "When did the resource/association/sub-resource move into the specific lifecycle stage represented by this AuditEvent."
|
|
152
|
+
},
|
|
153
|
+
{
|
|
154
|
+
"java": {
|
|
155
|
+
"class": "com.linkedin.pegasus2avro.common.urn.Urn"
|
|
156
|
+
},
|
|
157
|
+
"type": "string",
|
|
158
|
+
"name": "actor",
|
|
159
|
+
"doc": "The entity (e.g. a member URN) which will be credited for moving the resource/association/sub-resource into the specific lifecycle stage. It is also the one used to authorize the change.",
|
|
160
|
+
"Urn": "Urn"
|
|
161
|
+
},
|
|
162
|
+
{
|
|
163
|
+
"java": {
|
|
164
|
+
"class": "com.linkedin.pegasus2avro.common.urn.Urn"
|
|
165
|
+
},
|
|
166
|
+
"type": [
|
|
167
|
+
"null",
|
|
168
|
+
"string"
|
|
169
|
+
],
|
|
170
|
+
"name": "impersonator",
|
|
171
|
+
"default": null,
|
|
172
|
+
"doc": "The entity (e.g. a service URN) which performs the change on behalf of the Actor and must be authorized to act as the Actor.",
|
|
173
|
+
"Urn": "Urn"
|
|
174
|
+
},
|
|
175
|
+
{
|
|
176
|
+
"type": [
|
|
177
|
+
"null",
|
|
178
|
+
"string"
|
|
179
|
+
],
|
|
180
|
+
"name": "message",
|
|
181
|
+
"default": null,
|
|
182
|
+
"doc": "Additional context around how DataHub was informed of the particular change. For example: was the change created by an automated process, or manually."
|
|
183
|
+
}
|
|
184
|
+
],
|
|
185
|
+
"doc": "Data captured on a resource/association/sub-resource level giving insight into when that resource/association/sub-resource moved into a particular lifecycle stage, and who acted to move it into that specific lifecycle stage."
|
|
186
|
+
}
|
|
187
|
+
],
|
|
188
|
+
"name": "sourceCreatedTimestamp",
|
|
189
|
+
"default": null,
|
|
190
|
+
"doc": "Timestamp reflecting when this asset version was created in the source system."
|
|
191
|
+
},
|
|
192
|
+
{
|
|
193
|
+
"type": [
|
|
194
|
+
"null",
|
|
195
|
+
"com.linkedin.pegasus2avro.common.AuditStamp"
|
|
196
|
+
],
|
|
197
|
+
"name": "metadataCreatedTimestamp",
|
|
198
|
+
"default": null,
|
|
199
|
+
"doc": "Timestamp reflecting when the metadata for this version was created in DataHub"
|
|
200
|
+
},
|
|
201
|
+
{
|
|
202
|
+
"Searchable": {
|
|
203
|
+
"fieldType": "BOOLEAN",
|
|
204
|
+
"queryByDefault": false
|
|
205
|
+
},
|
|
206
|
+
"type": [
|
|
207
|
+
"null",
|
|
208
|
+
"boolean"
|
|
209
|
+
],
|
|
210
|
+
"name": "isLatest",
|
|
211
|
+
"default": null,
|
|
212
|
+
"doc": "Marks whether this version is currently the latest. Set by a side effect and should not be modified by API."
|
|
213
|
+
}
|
|
214
|
+
],
|
|
215
|
+
"doc": "Properties about a versioned asset i.e. dataset, ML Model, etc."
|
|
216
|
+
}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
{
|
|
2
|
+
"type": "record",
|
|
3
|
+
"Aspect": {
|
|
4
|
+
"name": "versionSetKey",
|
|
5
|
+
"keyForEntity": "versionSet",
|
|
6
|
+
"entityCategory": "core",
|
|
7
|
+
"entityAspects": [
|
|
8
|
+
"versionSetProperties"
|
|
9
|
+
]
|
|
10
|
+
},
|
|
11
|
+
"name": "VersionSetKey",
|
|
12
|
+
"namespace": "com.linkedin.pegasus2avro.metadata.key",
|
|
13
|
+
"fields": [
|
|
14
|
+
{
|
|
15
|
+
"type": "string",
|
|
16
|
+
"name": "id",
|
|
17
|
+
"doc": "ID of the Version Set, generated from platform + asset id / name"
|
|
18
|
+
},
|
|
19
|
+
{
|
|
20
|
+
"type": "string",
|
|
21
|
+
"name": "entityType",
|
|
22
|
+
"doc": "Type of entities included in version set, limits to a single entity type between linked versioned entities"
|
|
23
|
+
}
|
|
24
|
+
],
|
|
25
|
+
"doc": "Key for a Version Set entity"
|
|
26
|
+
}
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
{
|
|
2
|
+
"type": "record",
|
|
3
|
+
"Aspect": {
|
|
4
|
+
"name": "versionSetProperties"
|
|
5
|
+
},
|
|
6
|
+
"name": "VersionSetProperties",
|
|
7
|
+
"namespace": "com.linkedin.pegasus2avro.versionset",
|
|
8
|
+
"fields": [
|
|
9
|
+
{
|
|
10
|
+
"Searchable": {
|
|
11
|
+
"/*": {
|
|
12
|
+
"fieldType": "TEXT",
|
|
13
|
+
"queryByDefault": true
|
|
14
|
+
}
|
|
15
|
+
},
|
|
16
|
+
"type": {
|
|
17
|
+
"type": "map",
|
|
18
|
+
"values": "string"
|
|
19
|
+
},
|
|
20
|
+
"name": "customProperties",
|
|
21
|
+
"default": {},
|
|
22
|
+
"doc": "Custom property bag."
|
|
23
|
+
},
|
|
24
|
+
{
|
|
25
|
+
"Searchable": {
|
|
26
|
+
"queryByDefault": "false"
|
|
27
|
+
},
|
|
28
|
+
"java": {
|
|
29
|
+
"class": "com.linkedin.pegasus2avro.common.urn.Urn"
|
|
30
|
+
},
|
|
31
|
+
"type": "string",
|
|
32
|
+
"name": "latest",
|
|
33
|
+
"doc": "The latest versioned entity linked to in this version set",
|
|
34
|
+
"Urn": "Urn"
|
|
35
|
+
},
|
|
36
|
+
{
|
|
37
|
+
"type": {
|
|
38
|
+
"type": "enum",
|
|
39
|
+
"name": "VersioningScheme",
|
|
40
|
+
"namespace": "com.linkedin.pegasus2avro.versionset",
|
|
41
|
+
"symbols": [
|
|
42
|
+
"ALPHANUMERIC_GENERATED_BY_DATAHUB"
|
|
43
|
+
]
|
|
44
|
+
},
|
|
45
|
+
"name": "versioningScheme",
|
|
46
|
+
"doc": "What versioning scheme is being utilized for the versioned entities sort criterion. Static once set"
|
|
47
|
+
}
|
|
48
|
+
]
|
|
49
|
+
}
|
|
@@ -11,34 +11,25 @@ class DataHubSecretsClient:
|
|
|
11
11
|
def __init__(self, graph: DataHubGraph):
|
|
12
12
|
self.graph = graph
|
|
13
13
|
|
|
14
|
+
def _cleanup_secret_name(self, secret_names: List[str]) -> List[str]:
|
|
15
|
+
"""Remove empty strings from the list of secret names."""
|
|
16
|
+
return [secret_name for secret_name in secret_names if secret_name]
|
|
17
|
+
|
|
14
18
|
def get_secret_values(self, secret_names: List[str]) -> Dict[str, Optional[str]]:
|
|
15
19
|
if len(secret_names) == 0:
|
|
16
20
|
return {}
|
|
17
21
|
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
getSecretValues(input: $input) {
|
|
21
|
-
name
|
|
22
|
-
value
|
|
23
|
-
}
|
|
22
|
+
res_data = self.graph.execute_graphql(
|
|
23
|
+
query="""query getSecretValues($input: GetSecretValuesInput!) {
|
|
24
|
+
getSecretValues(input: $input) {
|
|
25
|
+
name
|
|
26
|
+
value
|
|
27
|
+
}
|
|
24
28
|
}""",
|
|
25
|
-
|
|
26
|
-
}
|
|
27
|
-
# TODO: Use graph.execute_graphql() instead.
|
|
28
|
-
|
|
29
|
-
# Fetch secrets using GraphQL API f
|
|
30
|
-
response = self.graph._session.post(
|
|
31
|
-
f"{self.graph.config.server}/api/graphql", json=request_json
|
|
29
|
+
variables={"input": {"secrets": self._cleanup_secret_name(secret_names)}},
|
|
32
30
|
)
|
|
33
|
-
response.raise_for_status()
|
|
34
|
-
|
|
35
|
-
# Verify response
|
|
36
|
-
res_data = response.json()
|
|
37
|
-
if "errors" in res_data:
|
|
38
|
-
raise Exception("Failed to retrieve secrets from DataHub.")
|
|
39
|
-
|
|
40
31
|
# Convert list of name, value secret pairs into a dict and return
|
|
41
|
-
secret_value_list = res_data["
|
|
32
|
+
secret_value_list = res_data["getSecretValues"]
|
|
42
33
|
secret_value_dict = dict()
|
|
43
34
|
for secret_value in secret_value_list:
|
|
44
35
|
secret_value_dict[secret_value["name"]] = secret_value["value"]
|
datahub/secret/secret_common.py
CHANGED
|
@@ -2,10 +2,7 @@ import json
|
|
|
2
2
|
import logging
|
|
3
3
|
from typing import List
|
|
4
4
|
|
|
5
|
-
from datahub.configuration.config_loader import
|
|
6
|
-
list_referenced_env_variables,
|
|
7
|
-
resolve_env_variables,
|
|
8
|
-
)
|
|
5
|
+
from datahub.configuration.config_loader import EnvResolver
|
|
9
6
|
from datahub.secret.secret_store import SecretStore
|
|
10
7
|
|
|
11
8
|
logger = logging.getLogger(__name__)
|
|
@@ -42,18 +39,27 @@ def resolve_secrets(secret_names: List[str], secret_stores: List[SecretStore]) -
|
|
|
42
39
|
return final_secret_values
|
|
43
40
|
|
|
44
41
|
|
|
45
|
-
def resolve_recipe(
|
|
42
|
+
def resolve_recipe(
|
|
43
|
+
recipe: str, secret_stores: List[SecretStore], strict_env_syntax: bool = True
|
|
44
|
+
) -> dict:
|
|
45
|
+
# Note: the default for `strict_env_syntax` is normally False, but here we override
|
|
46
|
+
# it to be true. Particularly when fetching secrets from external secret stores, we
|
|
47
|
+
# want to be more careful about not over-fetching secrets.
|
|
48
|
+
|
|
46
49
|
json_recipe_raw = json.loads(recipe)
|
|
47
50
|
|
|
48
51
|
# 1. Extract all secrets needing resolved.
|
|
49
|
-
secrets_to_resolve =
|
|
52
|
+
secrets_to_resolve = EnvResolver.list_referenced_variables(
|
|
53
|
+
json_recipe_raw, strict_env_syntax=strict_env_syntax
|
|
54
|
+
)
|
|
50
55
|
|
|
51
56
|
# 2. Resolve secret values
|
|
52
57
|
secret_values_dict = resolve_secrets(list(secrets_to_resolve), secret_stores)
|
|
53
58
|
|
|
54
59
|
# 3. Substitute secrets into recipe file
|
|
55
|
-
|
|
56
|
-
|
|
60
|
+
resolver = EnvResolver(
|
|
61
|
+
environ=secret_values_dict, strict_env_syntax=strict_env_syntax
|
|
57
62
|
)
|
|
63
|
+
json_recipe_resolved = resolver.resolve(json_recipe_raw)
|
|
58
64
|
|
|
59
65
|
return json_recipe_resolved
|
|
@@ -9,8 +9,7 @@ from datahub.emitter.mcp_patch_builder import MetadataPatchProposal, PatchPath
|
|
|
9
9
|
class HasCustomPropertiesPatch(MetadataPatchProposal):
|
|
10
10
|
@classmethod
|
|
11
11
|
@abstractmethod
|
|
12
|
-
def _custom_properties_location(self) -> Tuple[str, PatchPath]:
|
|
13
|
-
...
|
|
12
|
+
def _custom_properties_location(self) -> Tuple[str, PatchPath]: ...
|
|
14
13
|
|
|
15
14
|
def add_custom_property(self, key: str, value: str) -> Self:
|
|
16
15
|
"""Add a custom property to the entity.
|
|
@@ -33,14 +33,11 @@ class GraphQLSchemaMetadata(TypedDict):
|
|
|
33
33
|
|
|
34
34
|
class SchemaResolverInterface(Protocol):
|
|
35
35
|
@property
|
|
36
|
-
def platform(self) -> str:
|
|
37
|
-
...
|
|
36
|
+
def platform(self) -> str: ...
|
|
38
37
|
|
|
39
|
-
def includes_temp_tables(self) -> bool:
|
|
40
|
-
...
|
|
38
|
+
def includes_temp_tables(self) -> bool: ...
|
|
41
39
|
|
|
42
|
-
def resolve_table(self, table: _TableName) -> Tuple[str, Optional[SchemaInfo]]:
|
|
43
|
-
...
|
|
40
|
+
def resolve_table(self, table: _TableName) -> Tuple[str, Optional[SchemaInfo]]: ...
|
|
44
41
|
|
|
45
42
|
def __hash__(self) -> int:
|
|
46
43
|
# Mainly to make lru_cache happy in methods that accept a schema resolver.
|
|
@@ -232,8 +229,7 @@ class SchemaResolver(Closeable, SchemaResolverInterface):
|
|
|
232
229
|
return {
|
|
233
230
|
get_simple_field_path_from_v2_field_path(field["fieldPath"]): (
|
|
234
231
|
# The actual types are more of a "nice to have".
|
|
235
|
-
field["nativeDataType"]
|
|
236
|
-
or "str"
|
|
232
|
+
field["nativeDataType"] or "str"
|
|
237
233
|
)
|
|
238
234
|
for field in schema["fields"]
|
|
239
235
|
# TODO: We can't generate lineage to columns nested within structs yet.
|
|
@@ -289,8 +285,7 @@ def _convert_schema_field_list_to_info(
|
|
|
289
285
|
return {
|
|
290
286
|
get_simple_field_path_from_v2_field_path(col.fieldPath): (
|
|
291
287
|
# The actual types are more of a "nice to have".
|
|
292
|
-
col.nativeDataType
|
|
293
|
-
or "str"
|
|
288
|
+
col.nativeDataType or "str"
|
|
294
289
|
)
|
|
295
290
|
for col in schema_fields
|
|
296
291
|
# TODO: We can't generate lineage to columns nested within structs yet.
|