PyPI - acryl-datahub - Versions diffs - 0.15.0.1rc17__py3-none-any.whl → 0.15.0.2__py3-none-any.whl - Mend

acryl-datahub 0.15.0.1rc17py3-none-any.whl → 0.15.0.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of acryl-datahub might be problematic. Click here for more details.

Files changed (211) hide show

{acryl_datahub-0.15.0.1rc17.dist-info → acryl_datahub-0.15.0.2.dist-info}/METADATA +2440 -2438
{acryl_datahub-0.15.0.1rc17.dist-info → acryl_datahub-0.15.0.2.dist-info}/RECORD +211 -207
{acryl_datahub-0.15.0.1rc17.dist-info → acryl_datahub-0.15.0.2.dist-info}/WHEEL +1 -1
datahub/__init__.py +1 -1
datahub/api/entities/assertion/assertion_operator.py +3 -5
datahub/api/entities/corpgroup/corpgroup.py +1 -1
datahub/api/entities/datacontract/assertion_operator.py +3 -5
datahub/api/entities/dataproduct/dataproduct.py +4 -4
datahub/api/entities/dataset/dataset.py +2 -1
datahub/api/entities/structuredproperties/structuredproperties.py +18 -7
datahub/cli/cli_utils.py +13 -2
datahub/cli/delete_cli.py +3 -3
datahub/cli/docker_cli.py +6 -6
datahub/cli/ingest_cli.py +25 -15
datahub/cli/lite_cli.py +2 -2
datahub/cli/migrate.py +5 -5
datahub/cli/specific/assertions_cli.py +3 -3
datahub/cli/specific/structuredproperties_cli.py +84 -0
datahub/cli/timeline_cli.py +1 -1
datahub/configuration/common.py +1 -2
datahub/configuration/config_loader.py +73 -50
datahub/configuration/git.py +2 -2
datahub/configuration/time_window_config.py +10 -5
datahub/emitter/mce_builder.py +4 -8
datahub/emitter/mcp_builder.py +27 -0
datahub/emitter/mcp_patch_builder.py +1 -2
datahub/emitter/rest_emitter.py +126 -85
datahub/entrypoints.py +6 -0
datahub/ingestion/api/incremental_lineage_helper.py +2 -8
datahub/ingestion/api/report.py +1 -2
datahub/ingestion/api/source.py +4 -2
datahub/ingestion/api/source_helpers.py +1 -1
datahub/ingestion/extractor/json_schema_util.py +3 -3
datahub/ingestion/extractor/schema_util.py +3 -5
datahub/ingestion/fs/s3_fs.py +3 -3
datahub/ingestion/glossary/datahub_classifier.py +6 -4
datahub/ingestion/graph/client.py +22 -19
datahub/ingestion/graph/config.py +1 -1
datahub/ingestion/run/pipeline.py +8 -7
datahub/ingestion/run/pipeline_config.py +3 -3
datahub/ingestion/source/abs/datalake_profiler_config.py +3 -3
datahub/ingestion/source/abs/source.py +19 -8
datahub/ingestion/source/aws/glue.py +77 -47
datahub/ingestion/source/aws/s3_boto_utils.py +3 -3
datahub/ingestion/source/aws/s3_util.py +24 -1
datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
datahub/ingestion/source/aws/sagemaker_processors/models.py +2 -2
datahub/ingestion/source/bigquery_v2/bigquery.py +34 -34
datahub/ingestion/source/bigquery_v2/bigquery_audit.py +3 -3
datahub/ingestion/source/bigquery_v2/bigquery_config.py +14 -6
datahub/ingestion/source/bigquery_v2/bigquery_platform_resource_helper.py +8 -4
datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -3
datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +22 -16
datahub/ingestion/source/bigquery_v2/lineage.py +16 -16
datahub/ingestion/source/bigquery_v2/queries.py +1 -3
datahub/ingestion/source/bigquery_v2/queries_extractor.py +3 -3
datahub/ingestion/source/bigquery_v2/usage.py +60 -60
datahub/ingestion/source/cassandra/cassandra.py +0 -1
datahub/ingestion/source/cassandra/cassandra_profiling.py +24 -24
datahub/ingestion/source/cassandra/cassandra_utils.py +4 -7
datahub/ingestion/source/confluent_schema_registry.py +6 -6
datahub/ingestion/source/csv_enricher.py +29 -29
datahub/ingestion/source/datahub/config.py +10 -0
datahub/ingestion/source/datahub/datahub_database_reader.py +4 -2
datahub/ingestion/source/datahub/datahub_source.py +12 -2
datahub/ingestion/source/dbt/dbt_cloud.py +13 -13
datahub/ingestion/source/dbt/dbt_common.py +9 -7
datahub/ingestion/source/delta_lake/source.py +0 -5
datahub/ingestion/source/demo_data.py +1 -1
datahub/ingestion/source/dremio/dremio_api.py +4 -4
datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +3 -3
datahub/ingestion/source/dremio/dremio_reporting.py +0 -3
datahub/ingestion/source/dremio/dremio_source.py +2 -2
datahub/ingestion/source/elastic_search.py +4 -4
datahub/ingestion/source/fivetran/fivetran.py +1 -6
datahub/ingestion/source/gc/datahub_gc.py +11 -14
datahub/ingestion/source/gc/execution_request_cleanup.py +31 -6
datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +48 -15
datahub/ingestion/source/gcs/gcs_source.py +3 -2
datahub/ingestion/source/ge_data_profiler.py +2 -5
datahub/ingestion/source/ge_profiling_config.py +3 -3
datahub/ingestion/source/iceberg/iceberg.py +13 -6
datahub/ingestion/source/iceberg/iceberg_common.py +49 -9
datahub/ingestion/source/iceberg/iceberg_profiler.py +3 -1
datahub/ingestion/source/identity/azure_ad.py +3 -3
datahub/ingestion/source/identity/okta.py +3 -3
datahub/ingestion/source/kafka/kafka.py +11 -9
datahub/ingestion/source/kafka_connect/kafka_connect.py +3 -9
datahub/ingestion/source/kafka_connect/sink_connectors.py +3 -3
datahub/ingestion/source/kafka_connect/source_connectors.py +3 -3
datahub/ingestion/source/looker/looker_common.py +19 -19
datahub/ingestion/source/looker/looker_config.py +11 -6
datahub/ingestion/source/looker/looker_source.py +25 -25
datahub/ingestion/source/looker/looker_template_language.py +3 -3
datahub/ingestion/source/looker/looker_usage.py +5 -7
datahub/ingestion/source/looker/lookml_concept_context.py +6 -6
datahub/ingestion/source/looker/lookml_source.py +13 -15
datahub/ingestion/source/looker/view_upstream.py +5 -5
datahub/ingestion/source/metabase.py +1 -6
datahub/ingestion/source/mlflow.py +4 -9
datahub/ingestion/source/mode.py +5 -5
datahub/ingestion/source/mongodb.py +6 -4
datahub/ingestion/source/neo4j/neo4j_source.py +1 -1
datahub/ingestion/source/nifi.py +24 -31
datahub/ingestion/source/openapi.py +9 -9
datahub/ingestion/source/powerbi/config.py +12 -12
datahub/ingestion/source/powerbi/m_query/parser.py +11 -11
datahub/ingestion/source/powerbi/m_query/pattern_handler.py +26 -24
datahub/ingestion/source/powerbi/m_query/resolver.py +13 -13
datahub/ingestion/source/powerbi/powerbi.py +6 -6
datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +9 -9
datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +7 -7
datahub/ingestion/source/powerbi_report_server/report_server.py +1 -1
datahub/ingestion/source/qlik_sense/qlik_api.py +1 -1
datahub/ingestion/source/redash.py +0 -5
datahub/ingestion/source/redshift/config.py +3 -3
datahub/ingestion/source/redshift/redshift.py +45 -46
datahub/ingestion/source/redshift/usage.py +33 -33
datahub/ingestion/source/s3/datalake_profiler_config.py +3 -3
datahub/ingestion/source/s3/source.py +11 -15
datahub/ingestion/source/salesforce.py +26 -25
datahub/ingestion/source/schema/json_schema.py +1 -1
datahub/ingestion/source/sigma/sigma.py +3 -3
datahub/ingestion/source/sigma/sigma_api.py +12 -10
datahub/ingestion/source/snowflake/snowflake_config.py +30 -7
datahub/ingestion/source/snowflake/snowflake_connection.py +6 -6
datahub/ingestion/source/snowflake/snowflake_queries.py +2 -2
datahub/ingestion/source/snowflake/snowflake_report.py +0 -3
datahub/ingestion/source/snowflake/snowflake_schema.py +8 -5
datahub/ingestion/source/snowflake/snowflake_schema_gen.py +136 -42
datahub/ingestion/source/snowflake/snowflake_tag.py +21 -11
datahub/ingestion/source/snowflake/snowflake_usage_v2.py +49 -50
datahub/ingestion/source/snowflake/snowflake_utils.py +1 -2
datahub/ingestion/source/snowflake/snowflake_v2.py +51 -47
datahub/ingestion/source/sql/athena.py +1 -3
datahub/ingestion/source/sql/clickhouse.py +8 -14
datahub/ingestion/source/sql/oracle.py +1 -3
datahub/ingestion/source/sql/sql_generic_profiler.py +1 -2
datahub/ingestion/source/sql/sql_types.py +1 -2
datahub/ingestion/source/sql/sql_utils.py +5 -0
datahub/ingestion/source/sql/teradata.py +18 -5
datahub/ingestion/source/state/profiling_state_handler.py +3 -3
datahub/ingestion/source/state/redundant_run_skip_handler.py +5 -7
datahub/ingestion/source/state/stale_entity_removal_handler.py +3 -3
datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +9 -9
datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py +1 -1
datahub/ingestion/source/superset.py +1 -6
datahub/ingestion/source/tableau/tableau.py +343 -117
datahub/ingestion/source/tableau/tableau_common.py +5 -2
datahub/ingestion/source/unity/config.py +3 -1
datahub/ingestion/source/unity/proxy.py +1 -1
datahub/ingestion/source/unity/source.py +74 -74
datahub/ingestion/source/unity/usage.py +3 -1
datahub/ingestion/source/usage/clickhouse_usage.py +4 -4
datahub/ingestion/source/usage/starburst_trino_usage.py +3 -3
datahub/ingestion/source/usage/usage_common.py +1 -1
datahub/ingestion/source_report/ingestion_stage.py +24 -20
datahub/ingestion/transformer/add_dataset_dataproduct.py +4 -4
datahub/ingestion/transformer/add_dataset_properties.py +3 -3
datahub/ingestion/transformer/add_dataset_schema_tags.py +3 -3
datahub/ingestion/transformer/add_dataset_schema_terms.py +3 -3
datahub/ingestion/transformer/dataset_domain_based_on_tags.py +4 -4
datahub/ingestion/transformer/extract_ownership_from_tags.py +3 -3
datahub/ingestion/transformer/tags_to_terms.py +7 -7
datahub/integrations/assertion/snowflake/compiler.py +10 -10
datahub/lite/duckdb_lite.py +12 -10
datahub/metadata/_schema_classes.py +317 -44
datahub/metadata/_urns/urn_defs.py +69 -15
datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +2 -0
datahub/metadata/com/linkedin/pegasus2avro/versionset/__init__.py +17 -0
datahub/metadata/schema.avsc +302 -89
datahub/metadata/schemas/DataFlowKey.avsc +1 -0
datahub/metadata/schemas/DataJobKey.avsc +1 -0
datahub/metadata/schemas/DataProcessInstanceInput.avsc +4 -2
datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -0
datahub/metadata/schemas/DatasetKey.avsc +2 -1
datahub/metadata/schemas/MLFeatureProperties.avsc +51 -0
datahub/metadata/schemas/MLModelDeploymentProperties.avsc +51 -0
datahub/metadata/schemas/MLModelGroupProperties.avsc +96 -23
datahub/metadata/schemas/MLModelKey.avsc +2 -1
datahub/metadata/schemas/MLModelProperties.avsc +96 -48
datahub/metadata/schemas/MLPrimaryKeyProperties.avsc +51 -0
datahub/metadata/schemas/MetadataChangeEvent.avsc +98 -71
datahub/metadata/schemas/VersionProperties.avsc +216 -0
datahub/metadata/schemas/VersionSetKey.avsc +26 -0
datahub/metadata/schemas/VersionSetProperties.avsc +49 -0
datahub/secret/datahub_secrets_client.py +12 -21
datahub/secret/secret_common.py +14 -8
datahub/specific/aspect_helpers/custom_properties.py +1 -2
datahub/sql_parsing/schema_resolver.py +5 -10
datahub/sql_parsing/sql_parsing_aggregator.py +18 -16
datahub/sql_parsing/sqlglot_lineage.py +3 -3
datahub/sql_parsing/sqlglot_utils.py +1 -1
datahub/telemetry/stats.py +1 -2
datahub/testing/mcp_diff.py +1 -1
datahub/utilities/file_backed_collections.py +11 -11
datahub/utilities/hive_schema_to_avro.py +2 -2
datahub/utilities/logging_manager.py +2 -2
datahub/utilities/lossy_collections.py +3 -3
datahub/utilities/mapping.py +3 -3
datahub/utilities/memory_footprint.py +3 -2
datahub/utilities/perf_timer.py +11 -6
datahub/utilities/serialized_lru_cache.py +3 -1
datahub/utilities/sqlalchemy_query_combiner.py +6 -6
datahub/utilities/sqllineage_patch.py +1 -1
datahub/utilities/stats_collections.py +3 -1
datahub/utilities/urns/_urn_base.py +28 -5
datahub/utilities/urns/urn_iter.py +2 -2
{acryl_datahub-0.15.0.1rc17.dist-info → acryl_datahub-0.15.0.2.dist-info}/entry_points.txt +0 -0
{acryl_datahub-0.15.0.1rc17.dist-info → acryl_datahub-0.15.0.2.dist-info}/top_level.txt +0 -0

datahub/metadata/schemas/MetadataChangeEvent.avsc CHANGED Viewed

@@ -5109,6 +5109,51 @@
                         "default": null,
                         "doc": "URL where the reference exist"
                       },
+                      {
+                        "Relationship": {
+                          "/*": {
+                            "entityTypes": [
+                              "dataJob",
+                              "dataProcessInstance"
+                            ],
+                            "isLineage": true,
+                            "name": "TrainedBy"
+                          }
+                        },
+                        "type": [
+                          "null",
+                          {
+                            "type": "array",
+                            "items": "string"
+                          }
+                        ],
+                        "name": "trainingJobs",
+                        "default": null,
+                        "doc": "List of jobs or process instances (if any) used to train the model or group. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect."
+                      },
+                      {
+                        "Relationship": {
+                          "/*": {
+                            "entityTypes": [
+                              "dataJob",
+                              "dataProcessInstance"
+                            ],
+                            "isLineage": true,
+                            "isUpstream": false,
+                            "name": "UsedBy"
+                          }
+                        },
+                        "type": [
+                          "null",
+                          {
+                            "type": "array",
+                            "items": "string"
+                          }
+                        ],
+                        "name": "downstreamJobs",
+                        "default": null,
+                        "doc": "List of jobs or process instances (if any) that use the model or group."
+                      },
                       {
                         "Searchable": {
                           "boostScore": 10.0,
@@ -5180,6 +5225,14 @@
                                 ],
                                 "name": "versionTag",
                                 "default": null
+                              },
+                              {
+                                "type": [
+                                  "null",
+                                  "com.linkedin.pegasus2avro.common.MetadataAttribution"
+                                ],
+                                "name": "metadataAttribution",
+                                "default": null
                               }
                             ],
                             "doc": "A resource-defined string representing the resource state for the purpose of concurrency control"
@@ -5393,54 +5446,6 @@
                         "Urn": "Urn",
                         "urn_is_array": true
                       },
-                      {
-                        "Relationship": {
-                          "/*": {
-                            "entityTypes": [
-                              "dataJob",
-                              "dataProcessInstance"
-                            ],
-                            "isLineage": true,
-                            "name": "TrainedBy"
-                          }
-                        },
-                        "type": [
-                          "null",
-                          {
-                            "type": "array",
-                            "items": "string"
-                          }
-                        ],
-                        "name": "trainingJobs",
-                        "default": null,
-                        "doc": "List of jobs (if any) used to train the model. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect.",
-                        "Urn": "Urn",
-                        "urn_is_array": true
-                      },
-                      {
-                        "Relationship": {
-                          "/*": {
-                            "entityTypes": [
-                              "dataJob"
-                            ],
-                            "isLineage": true,
-                            "isUpstream": false,
-                            "name": "UsedBy"
-                          }
-                        },
-                        "type": [
-                          "null",
-                          {
-                            "type": "array",
-                            "items": "string"
-                          }
-                        ],
-                        "name": "downstreamJobs",
-                        "default": null,
-                        "doc": "List of jobs (if any) that use the model",
-                        "Urn": "Urn",
-                        "urn_is_array": true
-                      },
                       {
                         "Relationship": {
                           "/*": {
@@ -6839,6 +6844,51 @@
                         "default": {},
                         "doc": "Custom property bag."
                       },
+                      {
+                        "Relationship": {
+                          "/*": {
+                            "entityTypes": [
+                              "dataJob",
+                              "dataProcessInstance"
+                            ],
+                            "isLineage": true,
+                            "name": "TrainedBy"
+                          }
+                        },
+                        "type": [
+                          "null",
+                          {
+                            "type": "array",
+                            "items": "string"
+                          }
+                        ],
+                        "name": "trainingJobs",
+                        "default": null,
+                        "doc": "List of jobs or process instances (if any) used to train the model or group. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect."
+                      },
+                      {
+                        "Relationship": {
+                          "/*": {
+                            "entityTypes": [
+                              "dataJob",
+                              "dataProcessInstance"
+                            ],
+                            "isLineage": true,
+                            "isUpstream": false,
+                            "name": "UsedBy"
+                          }
+                        },
+                        "type": [
+                          "null",
+                          {
+                            "type": "array",
+                            "items": "string"
+                          }
+                        ],
+                        "name": "downstreamJobs",
+                        "default": null,
+                        "doc": "List of jobs or process instances (if any) that use the model or group."
+                      },
                       {
                         "Searchable": {
                           "boostScore": 10.0,
@@ -6895,29 +6945,6 @@
                         "default": null,
                         "doc": "Date when the MLModelGroup was last modified"
                       },
-                      {
-                        "Relationship": {
-                          "/*": {
-                            "entityTypes": [
-                              "dataJob"
-                            ],
-                            "isLineage": true,
-                            "name": "TrainedBy"
-                          }
-                        },
-                        "type": [
-                          "null",
-                          {
-                            "type": "array",
-                            "items": "string"
-                          }
-                        ],
-                        "name": "trainingJobs",
-                        "default": null,
-                        "doc": "List of jobs (if any) used to train the model group. Visible in Lineage.",
-                        "Urn": "Urn",
-                        "urn_is_array": true
-                      },
                       {
                         "type": [
                           "null",

datahub/metadata/schemas/VersionProperties.avsc ADDED Viewed

@@ -0,0 +1,216 @@
+{
+  "type": "record",
+  "Aspect": {
+    "name": "versionProperties"
+  },
+  "name": "VersionProperties",
+  "namespace": "com.linkedin.pegasus2avro.common",
+  "fields": [
+    {
+      "Relationship": {
+        "entityTypes": [
+          "versionSet"
+        ],
+        "name": "VersionOf"
+      },
+      "Searchable": {
+        "queryByDefault": false
+      },
+      "java": {
+        "class": "com.linkedin.pegasus2avro.common.urn.Urn"
+      },
+      "type": "string",
+      "name": "versionSet",
+      "doc": "The linked Version Set entity that ties multiple versioned assets together",
+      "Urn": "Urn",
+      "entityTypes": [
+        "versionSet"
+      ]
+    },
+    {
+      "Searchable": {
+        "/versionTag": {
+          "fieldName": "version",
+          "queryByDefault": false
+        }
+      },
+      "type": {
+        "type": "record",
+        "name": "VersionTag",
+        "namespace": "com.linkedin.pegasus2avro.common",
+        "fields": [
+          {
+            "type": [
+              "null",
+              "string"
+            ],
+            "name": "versionTag",
+            "default": null
+          },
+          {
+            "type": [
+              "null",
+              {
+                "type": "record",
+                "name": "MetadataAttribution",
+                "namespace": "com.linkedin.pegasus2avro.common",
+                "fields": [
+                  {
+                    "type": "long",
+                    "name": "time",
+                    "doc": "When this metadata was updated."
+                  },
+                  {
+                    "java": {
+                      "class": "com.linkedin.pegasus2avro.common.urn.Urn"
+                    },
+                    "type": "string",
+                    "name": "actor",
+                    "doc": "The entity (e.g. a member URN) responsible for applying the assocated metadata. This can\neither be a user (in case of UI edits) or the datahub system for automation.",
+                    "Urn": "Urn"
+                  },
+                  {
+                    "java": {
+                      "class": "com.linkedin.pegasus2avro.common.urn.Urn"
+                    },
+                    "type": [
+                      "null",
+                      "string"
+                    ],
+                    "name": "source",
+                    "default": null,
+                    "doc": "The DataHub source responsible for applying the associated metadata. This will only be filled out\nwhen a DataHub source is responsible. This includes the specific metadata test urn, the automation urn.",
+                    "Urn": "Urn"
+                  },
+                  {
+                    "type": {
+                      "type": "map",
+                      "values": "string"
+                    },
+                    "name": "sourceDetail",
+                    "default": {},
+                    "doc": "The details associated with why this metadata was applied. For example, this could include\nthe actual regex rule, sql statement, ingestion pipeline ID, etc."
+                  }
+                ],
+                "doc": "Information about who, why, and how this metadata was applied"
+              }
+            ],
+            "name": "metadataAttribution",
+            "default": null
+          }
+        ],
+        "doc": "A resource-defined string representing the resource state for the purpose of concurrency control"
+      },
+      "name": "version",
+      "doc": "Label for this versioned asset, is unique within a version set"
+    },
+    {
+      "Searchable": {
+        "/*/versionTag": {
+          "fieldName": "aliases",
+          "queryByDefault": false
+        }
+      },
+      "type": {
+        "type": "array",
+        "items": "com.linkedin.pegasus2avro.common.VersionTag"
+      },
+      "name": "aliases",
+      "default": [],
+      "doc": "Associated aliases for this versioned asset"
+    },
+    {
+      "type": [
+        "null",
+        "string"
+      ],
+      "name": "comment",
+      "default": null,
+      "doc": "Comment documenting what this version was created for, changes, or represents"
+    },
+    {
+      "Searchable": {
+        "fieldName": "versionSortId",
+        "queryByDefault": false
+      },
+      "type": "string",
+      "name": "sortId",
+      "doc": "Sort identifier that determines where a version lives in the order of the Version Set.\nWhat this looks like depends on the Version Scheme. For sort ids generated by DataHub we use an 8 character string representation."
+    },
+    {
+      "type": [
+        "null",
+        {
+          "type": "record",
+          "name": "AuditStamp",
+          "namespace": "com.linkedin.pegasus2avro.common",
+          "fields": [
+            {
+              "type": "long",
+              "name": "time",
+              "doc": "When did the resource/association/sub-resource move into the specific lifecycle stage represented by this AuditEvent."
+            },
+            {
+              "java": {
+                "class": "com.linkedin.pegasus2avro.common.urn.Urn"
+              },
+              "type": "string",
+              "name": "actor",
+              "doc": "The entity (e.g. a member URN) which will be credited for moving the resource/association/sub-resource into the specific lifecycle stage. It is also the one used to authorize the change.",
+              "Urn": "Urn"
+            },
+            {
+              "java": {
+                "class": "com.linkedin.pegasus2avro.common.urn.Urn"
+              },
+              "type": [
+                "null",
+                "string"
+              ],
+              "name": "impersonator",
+              "default": null,
+              "doc": "The entity (e.g. a service URN) which performs the change on behalf of the Actor and must be authorized to act as the Actor.",
+              "Urn": "Urn"
+            },
+            {
+              "type": [
+                "null",
+                "string"
+              ],
+              "name": "message",
+              "default": null,
+              "doc": "Additional context around how DataHub was informed of the particular change. For example: was the change created by an automated process, or manually."
+            }
+          ],
+          "doc": "Data captured on a resource/association/sub-resource level giving insight into when that resource/association/sub-resource moved into a particular lifecycle stage, and who acted to move it into that specific lifecycle stage."
+        }
+      ],
+      "name": "sourceCreatedTimestamp",
+      "default": null,
+      "doc": "Timestamp reflecting when this asset version was created in the source system."
+    },
+    {
+      "type": [
+        "null",
+        "com.linkedin.pegasus2avro.common.AuditStamp"
+      ],
+      "name": "metadataCreatedTimestamp",
+      "default": null,
+      "doc": "Timestamp reflecting when the metadata for this version was created in DataHub"
+    },
+    {
+      "Searchable": {
+        "fieldType": "BOOLEAN",
+        "queryByDefault": false
+      },
+      "type": [
+        "null",
+        "boolean"
+      ],
+      "name": "isLatest",
+      "default": null,
+      "doc": "Marks whether this version is currently the latest. Set by a side effect and should not be modified by API."
+    }
+  ],
+  "doc": "Properties about a versioned asset i.e. dataset, ML Model, etc."
+}

datahub/metadata/schemas/VersionSetKey.avsc ADDED Viewed

@@ -0,0 +1,26 @@
+{
+  "type": "record",
+  "Aspect": {
+    "name": "versionSetKey",
+    "keyForEntity": "versionSet",
+    "entityCategory": "core",
+    "entityAspects": [
+      "versionSetProperties"
+    ]
+  },
+  "name": "VersionSetKey",
+  "namespace": "com.linkedin.pegasus2avro.metadata.key",
+  "fields": [
+    {
+      "type": "string",
+      "name": "id",
+      "doc": "ID of the Version Set, generated from platform + asset id / name"
+    },
+    {
+      "type": "string",
+      "name": "entityType",
+      "doc": "Type of entities included in version set, limits to a single entity type between linked versioned entities"
+    }
+  ],
+  "doc": "Key for a Version Set entity"
+}

datahub/metadata/schemas/VersionSetProperties.avsc ADDED Viewed

@@ -0,0 +1,49 @@
+{
+  "type": "record",
+  "Aspect": {
+    "name": "versionSetProperties"
+  },
+  "name": "VersionSetProperties",
+  "namespace": "com.linkedin.pegasus2avro.versionset",
+  "fields": [
+    {
+      "Searchable": {
+        "/*": {
+          "fieldType": "TEXT",
+          "queryByDefault": true
+        }
+      },
+      "type": {
+        "type": "map",
+        "values": "string"
+      },
+      "name": "customProperties",
+      "default": {},
+      "doc": "Custom property bag."
+    },
+    {
+      "Searchable": {
+        "queryByDefault": "false"
+      },
+      "java": {
+        "class": "com.linkedin.pegasus2avro.common.urn.Urn"
+      },
+      "type": "string",
+      "name": "latest",
+      "doc": "The latest versioned entity linked to in this version set",
+      "Urn": "Urn"
+    },
+    {
+      "type": {
+        "type": "enum",
+        "name": "VersioningScheme",
+        "namespace": "com.linkedin.pegasus2avro.versionset",
+        "symbols": [
+          "ALPHANUMERIC_GENERATED_BY_DATAHUB"
+        ]
+      },
+      "name": "versioningScheme",
+      "doc": "What versioning scheme is being utilized for the versioned entities sort criterion. Static once set"
+    }
+  ]
+}

datahub/secret/datahub_secrets_client.py CHANGED Viewed

@@ -11,34 +11,25 @@ class DataHubSecretsClient:
     def __init__(self, graph: DataHubGraph):
         self.graph = graph
+    def _cleanup_secret_name(self, secret_names: List[str]) -> List[str]:
+        """Remove empty strings from the list of secret names."""
+        return [secret_name for secret_name in secret_names if secret_name]
     def get_secret_values(self, secret_names: List[str]) -> Dict[str, Optional[str]]:
         if len(secret_names) == 0:
             return {}
-        request_json = {
-            "query": """query getSecretValues($input: GetSecretValuesInput!) {\n
-                getSecretValues(input: $input) {\n
-                    name\n
-                    value\n
-                }\n
+        res_data = self.graph.execute_graphql(
+            query="""query getSecretValues($input: GetSecretValuesInput!) {
+                getSecretValues(input: $input) {
+                    name
+                    value
+                }
             }""",
-            "variables": {"input": {"secrets": secret_names}},
-        }
-        # TODO: Use graph.execute_graphql() instead.
-        # Fetch secrets using GraphQL API f
-        response = self.graph._session.post(
-            f"{self.graph.config.server}/api/graphql", json=request_json
+            variables={"input": {"secrets": self._cleanup_secret_name(secret_names)}},
         )
-        response.raise_for_status()
-        # Verify response
-        res_data = response.json()
-        if "errors" in res_data:
-            raise Exception("Failed to retrieve secrets from DataHub.")
         # Convert list of name, value secret pairs into a dict and return
-        secret_value_list = res_data["data"]["getSecretValues"]
+        secret_value_list = res_data["getSecretValues"]
         secret_value_dict = dict()
         for secret_value in secret_value_list:
             secret_value_dict[secret_value["name"]] = secret_value["value"]

datahub/secret/secret_common.py CHANGED Viewed

@@ -2,10 +2,7 @@ import json
 import logging
 from typing import List
-from datahub.configuration.config_loader import (
-    list_referenced_env_variables,
-    resolve_env_variables,
-)
+from datahub.configuration.config_loader import EnvResolver
 from datahub.secret.secret_store import SecretStore
 logger = logging.getLogger(__name__)
@@ -42,18 +39,27 @@ def resolve_secrets(secret_names: List[str], secret_stores: List[SecretStore]) -
     return final_secret_values
-def resolve_recipe(recipe: str, secret_stores: List[SecretStore]) -> dict:
+def resolve_recipe(
+    recipe: str, secret_stores: List[SecretStore], strict_env_syntax: bool = True
+) -> dict:
+    # Note: the default for `strict_env_syntax` is normally False, but here we override
+    # it to be true. Particularly when fetching secrets from external secret stores, we
+    # want to be more careful about not over-fetching secrets.
     json_recipe_raw = json.loads(recipe)
     # 1. Extract all secrets needing resolved.
-    secrets_to_resolve = list_referenced_env_variables(json_recipe_raw)
+    secrets_to_resolve = EnvResolver.list_referenced_variables(
+        json_recipe_raw, strict_env_syntax=strict_env_syntax
+    )
     # 2. Resolve secret values
     secret_values_dict = resolve_secrets(list(secrets_to_resolve), secret_stores)
     # 3. Substitute secrets into recipe file
-    json_recipe_resolved = resolve_env_variables(
-        json_recipe_raw, environ=secret_values_dict
+    resolver = EnvResolver(
+        environ=secret_values_dict, strict_env_syntax=strict_env_syntax
     )
+    json_recipe_resolved = resolver.resolve(json_recipe_raw)
     return json_recipe_resolved

datahub/specific/aspect_helpers/custom_properties.py CHANGED Viewed

@@ -9,8 +9,7 @@ from datahub.emitter.mcp_patch_builder import MetadataPatchProposal, PatchPath
 class HasCustomPropertiesPatch(MetadataPatchProposal):
     @classmethod
     @abstractmethod
-    def _custom_properties_location(self) -> Tuple[str, PatchPath]:
-        ...
+    def _custom_properties_location(self) -> Tuple[str, PatchPath]: ...
     def add_custom_property(self, key: str, value: str) -> Self:
         """Add a custom property to the entity.

datahub/sql_parsing/schema_resolver.py CHANGED Viewed

@@ -33,14 +33,11 @@ class GraphQLSchemaMetadata(TypedDict):
 class SchemaResolverInterface(Protocol):
     @property
-    def platform(self) -> str:
-        ...
+    def platform(self) -> str: ...
-    def includes_temp_tables(self) -> bool:
-        ...
+    def includes_temp_tables(self) -> bool: ...
-    def resolve_table(self, table: _TableName) -> Tuple[str, Optional[SchemaInfo]]:
-        ...
+    def resolve_table(self, table: _TableName) -> Tuple[str, Optional[SchemaInfo]]: ...
     def __hash__(self) -> int:
         # Mainly to make lru_cache happy in methods that accept a schema resolver.
@@ -232,8 +229,7 @@ class SchemaResolver(Closeable, SchemaResolverInterface):
         return {
             get_simple_field_path_from_v2_field_path(field["fieldPath"]): (
                 # The actual types are more of a "nice to have".
-                field["nativeDataType"]
-                or "str"
+                field["nativeDataType"] or "str"
             )
             for field in schema["fields"]
             # TODO: We can't generate lineage to columns nested within structs yet.
@@ -289,8 +285,7 @@ def _convert_schema_field_list_to_info(
     return {
         get_simple_field_path_from_v2_field_path(col.fieldPath): (
             # The actual types are more of a "nice to have".
-            col.nativeDataType
-            or "str"
+            col.nativeDataType or "str"
         )
         for col in schema_fields
         # TODO: We can't generate lineage to columns nested within structs yet.

acryl-datahub 0.15.0.1rc17__py3-none-any.whl → 0.15.0.2__py3-none-any.whl

Potentially problematic release.

acryl-datahub 0.15.0.1rc17py3-none-any.whl → 0.15.0.2py3-none-any.whl