PyPI - acryl-datahub - Versions diffs - 0.15.0.2rc6__py3-none-any.whl → 0.15.0.2rc8__py3-none-any.whl - Mend

acryl-datahub 0.15.0.2rc6py3-none-any.whl → 0.15.0.2rc8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of acryl-datahub might be problematic. Click here for more details.

Files changed (168) hide show

{acryl_datahub-0.15.0.2rc6.dist-info → acryl_datahub-0.15.0.2rc8.dist-info}/METADATA +2513 -2521
{acryl_datahub-0.15.0.2rc6.dist-info → acryl_datahub-0.15.0.2rc8.dist-info}/RECORD +168 -168
datahub/__init__.py +1 -1
datahub/api/entities/assertion/assertion_operator.py +3 -5
datahub/api/entities/corpgroup/corpgroup.py +1 -1
datahub/api/entities/datacontract/assertion_operator.py +3 -5
datahub/api/entities/dataproduct/dataproduct.py +4 -4
datahub/api/entities/dataset/dataset.py +2 -1
datahub/api/entities/structuredproperties/structuredproperties.py +6 -6
datahub/cli/cli_utils.py +1 -1
datahub/cli/docker_cli.py +6 -6
datahub/cli/ingest_cli.py +25 -15
datahub/cli/lite_cli.py +2 -2
datahub/cli/migrate.py +3 -3
datahub/cli/specific/assertions_cli.py +3 -3
datahub/cli/timeline_cli.py +1 -1
datahub/configuration/common.py +1 -2
datahub/configuration/config_loader.py +73 -50
datahub/configuration/git.py +2 -2
datahub/configuration/time_window_config.py +10 -5
datahub/emitter/mce_builder.py +4 -8
datahub/emitter/mcp_patch_builder.py +1 -2
datahub/entrypoints.py +6 -0
datahub/ingestion/api/incremental_lineage_helper.py +2 -8
datahub/ingestion/api/report.py +1 -2
datahub/ingestion/api/source_helpers.py +1 -1
datahub/ingestion/extractor/json_schema_util.py +3 -3
datahub/ingestion/extractor/schema_util.py +3 -5
datahub/ingestion/fs/s3_fs.py +3 -3
datahub/ingestion/glossary/datahub_classifier.py +6 -4
datahub/ingestion/graph/client.py +4 -6
datahub/ingestion/run/pipeline.py +8 -7
datahub/ingestion/run/pipeline_config.py +3 -3
datahub/ingestion/source/abs/datalake_profiler_config.py +3 -3
datahub/ingestion/source/abs/source.py +19 -8
datahub/ingestion/source/aws/glue.py +11 -11
datahub/ingestion/source/aws/s3_boto_utils.py +3 -3
datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
datahub/ingestion/source/aws/sagemaker_processors/models.py +2 -2
datahub/ingestion/source/bigquery_v2/bigquery.py +3 -3
datahub/ingestion/source/bigquery_v2/bigquery_audit.py +3 -3
datahub/ingestion/source/bigquery_v2/bigquery_config.py +6 -6
datahub/ingestion/source/bigquery_v2/bigquery_platform_resource_helper.py +8 -4
datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +15 -9
datahub/ingestion/source/bigquery_v2/lineage.py +9 -9
datahub/ingestion/source/bigquery_v2/queries.py +1 -3
datahub/ingestion/source/bigquery_v2/queries_extractor.py +3 -3
datahub/ingestion/source/bigquery_v2/usage.py +3 -3
datahub/ingestion/source/cassandra/cassandra.py +0 -1
datahub/ingestion/source/cassandra/cassandra_utils.py +4 -4
datahub/ingestion/source/confluent_schema_registry.py +6 -6
datahub/ingestion/source/csv_enricher.py +29 -29
datahub/ingestion/source/datahub/config.py +4 -0
datahub/ingestion/source/datahub/datahub_database_reader.py +4 -2
datahub/ingestion/source/dbt/dbt_cloud.py +13 -13
datahub/ingestion/source/dbt/dbt_common.py +9 -7
datahub/ingestion/source/dremio/dremio_api.py +4 -4
datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +3 -3
datahub/ingestion/source/elastic_search.py +4 -4
datahub/ingestion/source/gc/datahub_gc.py +1 -0
datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +17 -5
datahub/ingestion/source/gcs/gcs_source.py +3 -2
datahub/ingestion/source/ge_data_profiler.py +2 -5
datahub/ingestion/source/ge_profiling_config.py +3 -3
datahub/ingestion/source/iceberg/iceberg.py +3 -3
datahub/ingestion/source/identity/azure_ad.py +3 -3
datahub/ingestion/source/identity/okta.py +3 -3
datahub/ingestion/source/kafka/kafka.py +11 -9
datahub/ingestion/source/kafka_connect/kafka_connect.py +2 -3
datahub/ingestion/source/kafka_connect/sink_connectors.py +3 -3
datahub/ingestion/source/kafka_connect/source_connectors.py +3 -3
datahub/ingestion/source/looker/looker_common.py +19 -19
datahub/ingestion/source/looker/looker_config.py +3 -3
datahub/ingestion/source/looker/looker_source.py +25 -25
datahub/ingestion/source/looker/looker_template_language.py +3 -3
datahub/ingestion/source/looker/looker_usage.py +5 -7
datahub/ingestion/source/looker/lookml_concept_context.py +6 -6
datahub/ingestion/source/looker/lookml_source.py +13 -15
datahub/ingestion/source/looker/view_upstream.py +5 -5
datahub/ingestion/source/mlflow.py +4 -4
datahub/ingestion/source/mode.py +5 -5
datahub/ingestion/source/mongodb.py +6 -4
datahub/ingestion/source/neo4j/neo4j_source.py +1 -1
datahub/ingestion/source/nifi.py +24 -26
datahub/ingestion/source/openapi.py +9 -9
datahub/ingestion/source/powerbi/config.py +12 -12
datahub/ingestion/source/powerbi/m_query/parser.py +11 -11
datahub/ingestion/source/powerbi/m_query/pattern_handler.py +26 -24
datahub/ingestion/source/powerbi/m_query/resolver.py +13 -13
datahub/ingestion/source/powerbi/powerbi.py +6 -6
datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +9 -9
datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +7 -7
datahub/ingestion/source/qlik_sense/qlik_api.py +1 -1
datahub/ingestion/source/redshift/config.py +3 -3
datahub/ingestion/source/redshift/redshift.py +12 -12
datahub/ingestion/source/redshift/usage.py +8 -8
datahub/ingestion/source/s3/datalake_profiler_config.py +3 -3
datahub/ingestion/source/s3/source.py +1 -1
datahub/ingestion/source/salesforce.py +26 -25
datahub/ingestion/source/schema/json_schema.py +1 -1
datahub/ingestion/source/sigma/sigma.py +3 -3
datahub/ingestion/source/sigma/sigma_api.py +12 -10
datahub/ingestion/source/snowflake/snowflake_config.py +9 -7
datahub/ingestion/source/snowflake/snowflake_connection.py +6 -6
datahub/ingestion/source/snowflake/snowflake_queries.py +2 -2
datahub/ingestion/source/snowflake/snowflake_schema.py +3 -3
datahub/ingestion/source/snowflake/snowflake_schema_gen.py +6 -6
datahub/ingestion/source/snowflake/snowflake_tag.py +7 -7
datahub/ingestion/source/snowflake/snowflake_usage_v2.py +3 -3
datahub/ingestion/source/snowflake/snowflake_utils.py +1 -2
datahub/ingestion/source/snowflake/snowflake_v2.py +13 -4
datahub/ingestion/source/sql/athena.py +1 -3
datahub/ingestion/source/sql/clickhouse.py +8 -14
datahub/ingestion/source/sql/oracle.py +1 -3
datahub/ingestion/source/sql/sql_generic_profiler.py +1 -2
datahub/ingestion/source/sql/sql_types.py +0 -1
datahub/ingestion/source/sql/teradata.py +16 -3
datahub/ingestion/source/state/profiling_state_handler.py +3 -3
datahub/ingestion/source/state/redundant_run_skip_handler.py +5 -7
datahub/ingestion/source/state/stale_entity_removal_handler.py +3 -3
datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +9 -9
datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py +1 -1
datahub/ingestion/source/tableau/tableau.py +245 -101
datahub/ingestion/source/tableau/tableau_common.py +5 -2
datahub/ingestion/source/unity/config.py +3 -1
datahub/ingestion/source/unity/proxy.py +1 -1
datahub/ingestion/source/unity/source.py +3 -3
datahub/ingestion/source/unity/usage.py +3 -1
datahub/ingestion/source/usage/clickhouse_usage.py +4 -4
datahub/ingestion/source/usage/starburst_trino_usage.py +3 -3
datahub/ingestion/source/usage/usage_common.py +1 -1
datahub/ingestion/transformer/add_dataset_dataproduct.py +4 -4
datahub/ingestion/transformer/add_dataset_properties.py +3 -3
datahub/ingestion/transformer/add_dataset_schema_tags.py +3 -3
datahub/ingestion/transformer/add_dataset_schema_terms.py +3 -3
datahub/ingestion/transformer/dataset_domain_based_on_tags.py +4 -4
datahub/ingestion/transformer/extract_ownership_from_tags.py +3 -3
datahub/ingestion/transformer/tags_to_terms.py +7 -7
datahub/integrations/assertion/snowflake/compiler.py +10 -10
datahub/lite/duckdb_lite.py +12 -10
datahub/metadata/_schema_classes.py +1 -1
datahub/metadata/schema.avsc +6 -2
datahub/metadata/schemas/DataProcessInstanceInput.avsc +4 -2
datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -0
datahub/secret/datahub_secrets_client.py +12 -21
datahub/secret/secret_common.py +14 -8
datahub/specific/aspect_helpers/custom_properties.py +1 -2
datahub/sql_parsing/schema_resolver.py +5 -10
datahub/sql_parsing/sql_parsing_aggregator.py +18 -16
datahub/sql_parsing/sqlglot_lineage.py +3 -3
datahub/sql_parsing/sqlglot_utils.py +1 -1
datahub/telemetry/stats.py +1 -2
datahub/testing/mcp_diff.py +1 -1
datahub/utilities/file_backed_collections.py +10 -10
datahub/utilities/hive_schema_to_avro.py +2 -2
datahub/utilities/logging_manager.py +2 -2
datahub/utilities/lossy_collections.py +3 -3
datahub/utilities/mapping.py +3 -3
datahub/utilities/memory_footprint.py +3 -2
datahub/utilities/serialized_lru_cache.py +3 -1
datahub/utilities/sqlalchemy_query_combiner.py +6 -6
datahub/utilities/sqllineage_patch.py +1 -1
datahub/utilities/stats_collections.py +3 -1
datahub/utilities/urns/_urn_base.py +28 -5
datahub/utilities/urns/urn_iter.py +2 -2
{acryl_datahub-0.15.0.2rc6.dist-info → acryl_datahub-0.15.0.2rc8.dist-info}/WHEEL +0 -0
{acryl_datahub-0.15.0.2rc6.dist-info → acryl_datahub-0.15.0.2rc8.dist-info}/entry_points.txt +0 -0
{acryl_datahub-0.15.0.2rc6.dist-info → acryl_datahub-0.15.0.2rc8.dist-info}/top_level.txt +0 -0

datahub/ingestion/source/usage/usage_common.py CHANGED Viewed

@@ -89,7 +89,7 @@ def make_usage_workunit(
     top_sql_queries: Optional[List[str]] = None
     if query_freq is not None:
         if top_n_queries < len(query_freq):
-            logger.warn(
+            logger.warning(
                 f"Top N query limit exceeded on {str(resource)}.  Max number of queries {top_n_queries} <  {len(query_freq)}. Truncating top queries to {top_n_queries}."
             )
             query_freq = query_freq[0:top_n_queries]

datahub/ingestion/transformer/add_dataset_dataproduct.py CHANGED Viewed

@@ -80,10 +80,10 @@ class AddDatasetDataProduct(DatasetDataproductTransformer):
                         ).add_asset(container_urn)
                         data_products_container[data_product_urn] = container_product
                     else:
-                        data_products_container[
-                            data_product_urn
-                        ] = data_products_container[data_product_urn].add_asset(
-                            container_urn
+                        data_products_container[data_product_urn] = (
+                            data_products_container[data_product_urn].add_asset(
+                                container_urn
+                            )
                         )
         mcps: List[

datahub/ingestion/transformer/add_dataset_properties.py CHANGED Viewed

@@ -61,9 +61,9 @@ class AddDatasetProperties(DatasetPropertiesTransformer):
     ) -> Optional[DatasetPropertiesClass]:
         assert dataset_properties_aspect
-        server_dataset_properties_aspect: Optional[
-            DatasetPropertiesClass
-        ] = graph.get_dataset_properties(entity_urn)
+        server_dataset_properties_aspect: Optional[DatasetPropertiesClass] = (
+            graph.get_dataset_properties(entity_urn)
+        )
         # No need to take any action if server properties is None or there is not customProperties in server properties
         if (
             server_dataset_properties_aspect is None

datahub/ingestion/transformer/add_dataset_schema_tags.py CHANGED Viewed

@@ -89,9 +89,9 @@ class AddDatasetSchemaTags(DatasetSchemaMetadataTransformer):
         server_field_map: dict = {}
         if self.config.semantics == TransformerSemantics.PATCH:
             assert self.ctx.graph
-            server_schema_metadata_aspect: Optional[
-                SchemaMetadataClass
-            ] = self.ctx.graph.get_schema_metadata(entity_urn=entity_urn)
+            server_schema_metadata_aspect: Optional[SchemaMetadataClass] = (
+                self.ctx.graph.get_schema_metadata(entity_urn=entity_urn)
+            )
             if server_schema_metadata_aspect is not None:
                 if not schema_metadata_aspect:
                     schema_metadata_aspect = server_schema_metadata_aspect

datahub/ingestion/transformer/add_dataset_schema_terms.py CHANGED Viewed

@@ -108,9 +108,9 @@ class AddDatasetSchemaTerms(DatasetSchemaMetadataTransformer):
         ] = {}  # Map to cache server field objects, where fieldPath is key
         if self.config.semantics == TransformerSemantics.PATCH:
             assert self.ctx.graph
-            server_schema_metadata_aspect: Optional[
-                SchemaMetadataClass
-            ] = self.ctx.graph.get_schema_metadata(entity_urn=entity_urn)
+            server_schema_metadata_aspect: Optional[SchemaMetadataClass] = (
+                self.ctx.graph.get_schema_metadata(entity_urn=entity_urn)
+            )
             if server_schema_metadata_aspect is not None:
                 if not schema_metadata_aspect:
                     schema_metadata_aspect = server_schema_metadata_aspect

datahub/ingestion/transformer/dataset_domain_based_on_tags.py CHANGED Viewed

@@ -60,10 +60,10 @@ class DatasetTagDomainMapper(DatasetDomainTransformer):
                 domain_aspect.domains.extend(mapped_domains.domains)
                 if self.config.semantics == TransformerSemantics.PATCH:
                     # Try merging with server-side domains
-                    patch_domain_aspect: Optional[
-                        DomainsClass
-                    ] = AddDatasetDomain._merge_with_server_domains(
-                        self.ctx.graph, entity_urn, domain_aspect
+                    patch_domain_aspect: Optional[DomainsClass] = (
+                        AddDatasetDomain._merge_with_server_domains(
+                            self.ctx.graph, entity_urn, domain_aspect
+                        )
                     )
                     return cast(Optional[Aspect], patch_domain_aspect)
                 return cast(Optional[Aspect], domain_aspect)

datahub/ingestion/transformer/extract_ownership_from_tags.py CHANGED Viewed

@@ -141,9 +141,9 @@ class ExtractOwnersFromTagsTransformer(DatasetTagsTransformer):
                 else:
                     owner_type = get_owner_type(self.config.owner_type)
                     if owner_type == OwnershipTypeClass.CUSTOM:
-                        assert (
-                            self.config.owner_type_urn is not None
-                        ), "owner_type_urn must be set if owner_type is CUSTOM"
+                        assert self.config.owner_type_urn is not None, (
+                            "owner_type_urn must be set if owner_type is CUSTOM"
+                        )
                     owners.append(
                         OwnerClass(

datahub/ingestion/transformer/tags_to_terms.py CHANGED Viewed

@@ -92,9 +92,9 @@ class TagsToTermMapper(TagsToTermTransformer):
         in_global_tags_aspect: Optional[GlobalTagsClass] = self.ctx.graph.get_tags(
             entity_urn
         )
-        in_schema_metadata_aspect: Optional[
-            SchemaMetadataClass
-        ] = self.ctx.graph.get_schema_metadata(entity_urn)
+        in_schema_metadata_aspect: Optional[SchemaMetadataClass] = (
+            self.ctx.graph.get_schema_metadata(entity_urn)
+        )
         if in_global_tags_aspect is None and in_schema_metadata_aspect is None:
             return cast(Aspect, in_glossary_terms)
@@ -134,10 +134,10 @@ class TagsToTermMapper(TagsToTermTransformer):
         )
         if self.config.semantics == TransformerSemantics.PATCH:
-            patch_glossary_terms: Optional[
-                GlossaryTermsClass
-            ] = TagsToTermMapper._merge_with_server_glossary_terms(
-                self.ctx.graph, entity_urn, out_glossary_terms
+            patch_glossary_terms: Optional[GlossaryTermsClass] = (
+                TagsToTermMapper._merge_with_server_glossary_terms(
+                    self.ctx.graph, entity_urn, out_glossary_terms
+                )
             )
             return cast(Optional[Aspect], patch_glossary_terms)
         else:

datahub/integrations/assertion/snowflake/compiler.py CHANGED Viewed

@@ -61,17 +61,17 @@ class SnowflakeAssertionCompiler(AssertionCompiler):
     def create(
         cls, output_dir: str, extras: Dict[str, str]
     ) -> "SnowflakeAssertionCompiler":
-        assert os.path.exists(
-            output_dir
-        ), f"Specified location {output_dir} does not exist."
+        assert os.path.exists(output_dir), (
+            f"Specified location {output_dir} does not exist."
+        )
-        assert os.path.isdir(
-            output_dir
-        ), f"Specified location {output_dir} is not a folder."
+        assert os.path.isdir(output_dir), (
+            f"Specified location {output_dir} is not a folder."
+        )
-        assert any(
-            x.upper() == DMF_SCHEMA_PROPERTY_KEY for x in extras
-        ), "Must specify value for DMF schema using -x DMF_SCHEMA=<db.schema>"
+        assert any(x.upper() == DMF_SCHEMA_PROPERTY_KEY for x in extras), (
+            "Must specify value for DMF schema using -x DMF_SCHEMA=<db.schema>"
+        )
         return SnowflakeAssertionCompiler(output_dir, extras)
@@ -232,6 +232,6 @@ def get_dmf_schedule(trigger: AssertionTrigger) -> str:
     elif isinstance(trigger.trigger, CronTrigger):
         return f"USING CRON {trigger.trigger.cron} {trigger.trigger.timezone}"
     elif isinstance(trigger.trigger, IntervalTrigger):
-        return f"{trigger.trigger.interval.seconds/60} MIN"
+        return f"{trigger.trigger.interval.seconds / 60} MIN"
     else:
         raise ValueError(f"Unsupported trigger type {type(trigger.trigger)}")

datahub/lite/duckdb_lite.py CHANGED Viewed

@@ -163,9 +163,9 @@ class DuckDBLite(DataHubLiteLocal[DuckDBLiteConfig]):
                 if "properties" not in writeable_dict["systemMetadata"]:
                     writeable_dict["systemMetadata"]["properties"] = {}
-                writeable_dict["systemMetadata"]["properties"][
-                    "sysVersion"
-                ] = new_version
+                writeable_dict["systemMetadata"]["properties"]["sysVersion"] = (
+                    new_version
+                )
                 if needs_write:
                     self.duckdb_client.execute(
                         query="INSERT INTO metadata_aspect_v2 VALUES (?, ?, ?, ?, ?, ?)",
@@ -208,9 +208,9 @@ class DuckDBLite(DataHubLiteLocal[DuckDBLiteConfig]):
                             "lastObserved": writeable.systemMetadata.lastObserved
                         }
                     else:
-                        system_metadata[
-                            "lastObserved"
-                        ] = writeable.systemMetadata.lastObserved
+                        system_metadata["lastObserved"] = (
+                            writeable.systemMetadata.lastObserved
+                        )
                     self.duckdb_client.execute(
                         query="UPDATE metadata_aspect_v2 SET system_metadata = ? WHERE urn = ? AND aspect_name = ? AND version = 0",
                         parameters=[
@@ -497,9 +497,9 @@ class DuckDBLite(DataHubLiteLocal[DuckDBLiteConfig]):
             aspect_name = r[1]
             aspect_payload = json.loads(r[2])
             if typed:
-                assert (
-                    aspect_name in ASPECT_MAP
-                ), f"Missing aspect name {aspect_name} in the registry"
+                assert aspect_name in ASPECT_MAP, (
+                    f"Missing aspect name {aspect_name} in the registry"
+                )
                 try:
                     aspect_payload = ASPECT_MAP[aspect_name].from_obj(
                         post_json_transform(aspect_payload)
@@ -531,7 +531,9 @@ class DuckDBLite(DataHubLiteLocal[DuckDBLiteConfig]):
         for r in results.fetchall():
             urn = r[0]
             aspect_name = r[1]
-            aspect_metadata = ASPECT_MAP[aspect_name].from_obj(post_json_transform(json.loads(r[2])))  # type: ignore
+            aspect_metadata = ASPECT_MAP[aspect_name].from_obj(
+                post_json_transform(json.loads(r[2]))
+            )  # type: ignore
             system_metadata = SystemMetadataClass.from_obj(json.loads(r[3]))
             mcp = MetadataChangeProposalWrapper(
                 entityUrn=urn,

datahub/metadata/_schema_classes.py CHANGED Viewed

@@ -9096,7 +9096,7 @@ class DataProcessInstanceInputClass(_Aspect):
     @property
     def inputs(self) -> List[str]:
-        """Input datasets to be consumed"""
+        """Input assets consumed"""
         return self._inner_dict.get('inputs')  # type: ignore
     @inputs.setter

datahub/metadata/schema.avsc CHANGED Viewed

@@ -12699,8 +12699,10 @@
         "Relationship": {
           "/*": {
             "entityTypes": [
-              "dataset"
+              "dataset",
+              "mlModel"
             ],
+            "isLineage": true,
             "name": "Consumes"
           }
         },
@@ -12720,7 +12722,7 @@
           "items": "string"
         },
         "name": "inputs",
-        "doc": "Input datasets to be consumed"
+        "doc": "Input assets consumed"
       }
     ],
     "doc": "Information about the inputs datasets of a Data process"
@@ -12883,6 +12885,8 @@
               "dataset",
               "mlModel"
             ],
+            "isLineage": true,
+            "isUpstream": false,
             "name": "Produces"
           }
         },

datahub/metadata/schemas/DataProcessInstanceInput.avsc CHANGED Viewed

@@ -10,8 +10,10 @@
       "Relationship": {
         "/*": {
           "entityTypes": [
-            "dataset"
+            "dataset",
+            "mlModel"
           ],
+          "isLineage": true,
           "name": "Consumes"
         }
       },
@@ -29,7 +31,7 @@
         "items": "string"
       },
       "name": "inputs",
-      "doc": "Input datasets to be consumed",
+      "doc": "Input assets consumed",
       "Urn": "Urn",
       "urn_is_array": true
     }

datahub/metadata/schemas/DataProcessInstanceOutput.avsc CHANGED Viewed

@@ -13,6 +13,8 @@
             "dataset",
             "mlModel"
           ],
+          "isLineage": true,
+          "isUpstream": false,
           "name": "Produces"
         }
       },

datahub/secret/datahub_secrets_client.py CHANGED Viewed

@@ -11,34 +11,25 @@ class DataHubSecretsClient:
     def __init__(self, graph: DataHubGraph):
         self.graph = graph
+    def _cleanup_secret_name(self, secret_names: List[str]) -> List[str]:
+        """Remove empty strings from the list of secret names."""
+        return [secret_name for secret_name in secret_names if secret_name]
     def get_secret_values(self, secret_names: List[str]) -> Dict[str, Optional[str]]:
         if len(secret_names) == 0:
             return {}
-        request_json = {
-            "query": """query getSecretValues($input: GetSecretValuesInput!) {\n
-                getSecretValues(input: $input) {\n
-                    name\n
-                    value\n
-                }\n
+        res_data = self.graph.execute_graphql(
+            query="""query getSecretValues($input: GetSecretValuesInput!) {
+                getSecretValues(input: $input) {
+                    name
+                    value
+                }
             }""",
-            "variables": {"input": {"secrets": secret_names}},
-        }
-        # TODO: Use graph.execute_graphql() instead.
-        # Fetch secrets using GraphQL API f
-        response = self.graph._session.post(
-            f"{self.graph.config.server}/api/graphql", json=request_json
+            variables={"input": {"secrets": self._cleanup_secret_name(secret_names)}},
         )
-        response.raise_for_status()
-        # Verify response
-        res_data = response.json()
-        if "errors" in res_data:
-            raise Exception("Failed to retrieve secrets from DataHub.")
         # Convert list of name, value secret pairs into a dict and return
-        secret_value_list = res_data["data"]["getSecretValues"]
+        secret_value_list = res_data["getSecretValues"]
         secret_value_dict = dict()
         for secret_value in secret_value_list:
             secret_value_dict[secret_value["name"]] = secret_value["value"]

datahub/secret/secret_common.py CHANGED Viewed

@@ -2,10 +2,7 @@ import json
 import logging
 from typing import List
-from datahub.configuration.config_loader import (
-    list_referenced_env_variables,
-    resolve_env_variables,
-)
+from datahub.configuration.config_loader import EnvResolver
 from datahub.secret.secret_store import SecretStore
 logger = logging.getLogger(__name__)
@@ -42,18 +39,27 @@ def resolve_secrets(secret_names: List[str], secret_stores: List[SecretStore]) -
     return final_secret_values
-def resolve_recipe(recipe: str, secret_stores: List[SecretStore]) -> dict:
+def resolve_recipe(
+    recipe: str, secret_stores: List[SecretStore], strict_env_syntax: bool = True
+) -> dict:
+    # Note: the default for `strict_env_syntax` is normally False, but here we override
+    # it to be true. Particularly when fetching secrets from external secret stores, we
+    # want to be more careful about not over-fetching secrets.
     json_recipe_raw = json.loads(recipe)
     # 1. Extract all secrets needing resolved.
-    secrets_to_resolve = list_referenced_env_variables(json_recipe_raw)
+    secrets_to_resolve = EnvResolver.list_referenced_variables(
+        json_recipe_raw, strict_env_syntax=strict_env_syntax
+    )
     # 2. Resolve secret values
     secret_values_dict = resolve_secrets(list(secrets_to_resolve), secret_stores)
     # 3. Substitute secrets into recipe file
-    json_recipe_resolved = resolve_env_variables(
-        json_recipe_raw, environ=secret_values_dict
+    resolver = EnvResolver(
+        environ=secret_values_dict, strict_env_syntax=strict_env_syntax
     )
+    json_recipe_resolved = resolver.resolve(json_recipe_raw)
     return json_recipe_resolved

datahub/specific/aspect_helpers/custom_properties.py CHANGED Viewed

@@ -9,8 +9,7 @@ from datahub.emitter.mcp_patch_builder import MetadataPatchProposal, PatchPath
 class HasCustomPropertiesPatch(MetadataPatchProposal):
     @classmethod
     @abstractmethod
-    def _custom_properties_location(self) -> Tuple[str, PatchPath]:
-        ...
+    def _custom_properties_location(self) -> Tuple[str, PatchPath]: ...
     def add_custom_property(self, key: str, value: str) -> Self:
         """Add a custom property to the entity.

datahub/sql_parsing/schema_resolver.py CHANGED Viewed

@@ -33,14 +33,11 @@ class GraphQLSchemaMetadata(TypedDict):
 class SchemaResolverInterface(Protocol):
     @property
-    def platform(self) -> str:
-        ...
+    def platform(self) -> str: ...
-    def includes_temp_tables(self) -> bool:
-        ...
+    def includes_temp_tables(self) -> bool: ...
-    def resolve_table(self, table: _TableName) -> Tuple[str, Optional[SchemaInfo]]:
-        ...
+    def resolve_table(self, table: _TableName) -> Tuple[str, Optional[SchemaInfo]]: ...
     def __hash__(self) -> int:
         # Mainly to make lru_cache happy in methods that accept a schema resolver.
@@ -232,8 +229,7 @@ class SchemaResolver(Closeable, SchemaResolverInterface):
         return {
             get_simple_field_path_from_v2_field_path(field["fieldPath"]): (
                 # The actual types are more of a "nice to have".
-                field["nativeDataType"]
-                or "str"
+                field["nativeDataType"] or "str"
             )
             for field in schema["fields"]
             # TODO: We can't generate lineage to columns nested within structs yet.
@@ -289,8 +285,7 @@ def _convert_schema_field_list_to_info(
     return {
         get_simple_field_path_from_v2_field_path(col.fieldPath): (
             # The actual types are more of a "nice to have".
-            col.nativeDataType
-            or "str"
+            col.nativeDataType or "str"
         )
         for col in schema_fields
         # TODO: We can't generate lineage to columns nested within structs yet.

datahub/sql_parsing/sql_parsing_aggregator.py CHANGED Viewed

@@ -284,6 +284,7 @@ class SqlAggregatorReport(Report):
     # Queries.
     num_queries_entities_generated: int = 0
+    num_queries_used_in_lineage: Optional[int] = None
     num_queries_skipped_due_to_filters: int = 0
     # Usage-related.
@@ -681,10 +682,10 @@ class SqlParsingAggregator(Closeable):
         query_id = self._known_lineage_query_id()
         # Generate CLL if schema of downstream is known
-        column_lineage: List[
-            ColumnLineageInfo
-        ] = self._generate_identity_column_lineage(
-            upstream_urn=upstream_urn, downstream_urn=downstream_urn
+        column_lineage: List[ColumnLineageInfo] = (
+            self._generate_identity_column_lineage(
+                upstream_urn=upstream_urn, downstream_urn=downstream_urn
+            )
         )
         # Register the query.
@@ -1043,9 +1044,9 @@ class SqlParsingAggregator(Closeable):
             temp_table_schemas: Dict[str, Optional[List[models.SchemaFieldClass]]] = {}
             for temp_table_urn, query_ids in self._temp_lineage_map[session_id].items():
                 for query_id in query_ids:
-                    temp_table_schemas[
-                        temp_table_urn
-                    ] = self._inferred_temp_schemas.get(query_id)
+                    temp_table_schemas[temp_table_urn] = (
+                        self._inferred_temp_schemas.get(query_id)
+                    )
                     if temp_table_schemas:
                         break
@@ -1072,9 +1073,9 @@ class SqlParsingAggregator(Closeable):
             schema_resolver=self._schema_resolver,
         )
         if parsed.debug_info.error:
-            self.report.views_parse_failures[
-                view_urn
-            ] = f"{parsed.debug_info.error} on query: {view_definition.view_definition[:100]}"
+            self.report.views_parse_failures[view_urn] = (
+                f"{parsed.debug_info.error} on query: {view_definition.view_definition[:100]}"
+            )
         if parsed.debug_info.table_error:
             self.report.num_views_failed += 1
             return  # we can't do anything with this query
@@ -1200,6 +1201,7 @@ class SqlParsingAggregator(Closeable):
         queries_generated: Set[QueryId] = set()
         yield from self._gen_lineage_mcps(queries_generated)
+        self.report.num_queries_used_in_lineage = len(queries_generated)
         yield from self._gen_usage_statistics_mcps()
         yield from self._gen_operation_mcps(queries_generated)
         yield from self._gen_remaining_queries(queries_generated)
@@ -1581,9 +1583,9 @@ class SqlParsingAggregator(Closeable):
                                     temp_query_lineage_info
                                 )
                             else:
-                                temp_upstream_queries[
-                                    upstream
-                                ] = temp_query_lineage_info
+                                temp_upstream_queries[upstream] = (
+                                    temp_query_lineage_info
+                                )
             # Compute merged upstreams.
             new_upstreams = OrderedSet[UrnStr]()
@@ -1663,9 +1665,9 @@ class SqlParsingAggregator(Closeable):
         composed_of_queries_truncated: LossyList[str] = LossyList()
         for query_id in composed_of_queries:
             composed_of_queries_truncated.append(query_id)
-        self.report.queries_with_temp_upstreams[
-            composite_query_id
-        ] = composed_of_queries_truncated
+        self.report.queries_with_temp_upstreams[composite_query_id] = (
+            composed_of_queries_truncated
+        )
         merged_query_text = ";\n\n".join(
             [q.formatted_query_string for q in ordered_queries]

datahub/sql_parsing/sqlglot_lineage.py CHANGED Viewed

@@ -442,9 +442,9 @@ def _create_table_ddl_cll(
 ) -> List[_ColumnLineageInfo]:
     column_lineage: List[_ColumnLineageInfo] = []
-    assert (
-        output_table is not None
-    ), "output_table must be set for create DDL statements"
+    assert output_table is not None, (
+        "output_table must be set for create DDL statements"
+    )
     create_schema: sqlglot.exp.Schema = statement.this
     sqlglot_columns = create_schema.expressions

datahub/sql_parsing/sqlglot_utils.py CHANGED Viewed

@@ -404,7 +404,7 @@ def detach_ctes(
         if new_statement == statement:
             if iteration > 1:
                 logger.debug(
-                    f"Required {iteration+1} iterations to detach and eliminate all CTEs"
+                    f"Required {iteration + 1} iterations to detach and eliminate all CTEs"
                 )
             break
         statement = new_statement

datahub/telemetry/stats.py CHANGED Viewed

@@ -5,8 +5,7 @@ from typing_extensions import Protocol
 class SupportsLT(Protocol):
-    def __lt__(self, __other: Any) -> Any:
-        ...
+    def __lt__(self, __other: Any) -> Any: ...
 _SupportsComparisonT = TypeVar("_SupportsComparisonT", bound=SupportsLT)

datahub/testing/mcp_diff.py CHANGED Viewed

@@ -246,7 +246,7 @@ class MCPDiff:
         for urn in self.aspect_changes.keys() - self.urns_added - self.urns_removed:
             aspect_map = self.aspect_changes[urn]
             s.append(f"Urn changed, {urn}:")
-            for aspect_name, aspect_diffs in aspect_map.items():
+            for aspect_diffs in aspect_map.values():
                 for i, ga in aspect_diffs.aspects_added.items():
                     s.append(self.report_aspect(ga, i, "added"))
                     if verbose:

datahub/utilities/file_backed_collections.py CHANGED Viewed

@@ -224,9 +224,9 @@ class FileBackedDict(MutableMapping[str, _VT], Closeable, Generic[_VT]):
     _use_sqlite_on_conflict: bool = field(repr=False, default=True)
     def __post_init__(self) -> None:
-        assert (
-            self.cache_eviction_batch_size > 0
-        ), "cache_eviction_batch_size must be positive"
+        assert self.cache_eviction_batch_size > 0, (
+            "cache_eviction_batch_size must be positive"
+        )
         for reserved_column in ("key", "value", "rowid"):
             if reserved_column in self.extra_columns:
@@ -261,7 +261,7 @@ class FileBackedDict(MutableMapping[str, _VT], Closeable, Generic[_VT]):
                 rowid INTEGER PRIMARY KEY AUTOINCREMENT,
                 key TEXT UNIQUE,
                 value BLOB
-                {''.join(f', {column_name} BLOB' for column_name in self.extra_columns.keys())}
+                {"".join(f", {column_name} BLOB" for column_name in self.extra_columns.keys())}
             )"""
         )
@@ -316,12 +316,12 @@ class FileBackedDict(MutableMapping[str, _VT], Closeable, Generic[_VT]):
                 f"""INSERT INTO {self.tablename} (
                     key,
                     value
-                    {''.join(f', {column_name}' for column_name in self.extra_columns.keys())}
+                    {"".join(f", {column_name}" for column_name in self.extra_columns.keys())}
                 )
-                VALUES ({', '.join(['?'] *(2 + len(self.extra_columns)))})
+                VALUES ({", ".join(["?"] * (2 + len(self.extra_columns)))})
                 ON CONFLICT (key) DO UPDATE SET
                     value = excluded.value
-                    {''.join(f', {column_name} = excluded.{column_name}' for column_name in self.extra_columns.keys())}
+                    {"".join(f", {column_name} = excluded.{column_name}" for column_name in self.extra_columns.keys())}
                 """,
                 items_to_write,
             )
@@ -332,16 +332,16 @@ class FileBackedDict(MutableMapping[str, _VT], Closeable, Generic[_VT]):
                         f"""INSERT INTO {self.tablename} (
                             key,
                             value
-                            {''.join(f', {column_name}' for column_name in self.extra_columns.keys())}
+                            {"".join(f", {column_name}" for column_name in self.extra_columns.keys())}
                         )
-                        VALUES ({', '.join(['?'] *(2 + len(self.extra_columns)))})""",
+                        VALUES ({", ".join(["?"] * (2 + len(self.extra_columns)))})""",
                         item,
                     )
                 except sqlite3.IntegrityError:
                     self._conn.execute(
                         f"""UPDATE {self.tablename} SET
                             value = ?
-                            {''.join(f', {column_name} = ?' for column_name in self.extra_columns.keys())}
+                            {"".join(f", {column_name} = ?" for column_name in self.extra_columns.keys())}
                         WHERE key = ?""",
                         (*item[1:], item[0]),
                     )

datahub/utilities/hive_schema_to_avro.py CHANGED Viewed

@@ -142,10 +142,10 @@ class HiveColumnToAvroConverter:
                 fields.append({"name": field_name, "type": field_type})
         if kwargs.get("ustruct_seqn") is not None:
-            struct_name = f'__structn_{kwargs["ustruct_seqn"]}_{str(uuid.uuid4()).replace("-", "")}'
+            struct_name = f"__structn_{kwargs['ustruct_seqn']}_{str(uuid.uuid4()).replace('-', '')}"
         else:
-            struct_name = f'__struct_{str(uuid.uuid4()).replace("-", "")}'
+            struct_name = f"__struct_{str(uuid.uuid4()).replace('-', '')}"
         return {
             "type": "record",
             "name": struct_name,

datahub/utilities/logging_manager.py CHANGED Viewed

@@ -130,9 +130,9 @@ class _ColorLogFormatter(logging.Formatter):
         # Mimic our default format, but with color.
         message_fg = self.MESSAGE_COLORS.get(record.levelname)
         return (
-            f'{click.style(f"[{self.formatTime(record, self.datefmt)}]", fg="green", dim=True)} '
+            f"{click.style(f'[{self.formatTime(record, self.datefmt)}]', fg='green', dim=True)} "
             f"{click.style(f'{record.levelname:8}', fg=message_fg)} "
-            f'{click.style(f"{{{record.name}:{record.lineno}}}", fg="blue", dim=True)} - '
+            f"{click.style(f'{{{record.name}:{record.lineno}}}', fg='blue', dim=True)} - "
             f"{click.style(record.getMessage(), fg=message_fg)}"
         )

acryl-datahub 0.15.0.2rc6__py3-none-any.whl → 0.15.0.2rc8__py3-none-any.whl

Potentially problematic release.

acryl-datahub 0.15.0.2rc6py3-none-any.whl → 0.15.0.2rc8py3-none-any.whl