PyPI - acryl-datahub - Versions diffs - 0.15.0.2rc7__py3-none-any.whl → 0.15.0.3rc1__py3-none-any.whl - Mend

acryl-datahub 0.15.0.2rc7py3-none-any.whl → 0.15.0.3rc1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of acryl-datahub might be problematic. Click here for more details.

Files changed (161) hide show

{acryl_datahub-0.15.0.2rc7.dist-info → acryl_datahub-0.15.0.3rc1.dist-info}/METADATA +2378 -2380
{acryl_datahub-0.15.0.2rc7.dist-info → acryl_datahub-0.15.0.3rc1.dist-info}/RECORD +161 -161
datahub/__init__.py +1 -1
datahub/api/entities/assertion/assertion_operator.py +3 -5
datahub/api/entities/corpgroup/corpgroup.py +1 -1
datahub/api/entities/datacontract/assertion_operator.py +3 -5
datahub/api/entities/dataproduct/dataproduct.py +4 -4
datahub/api/entities/dataset/dataset.py +2 -1
datahub/api/entities/structuredproperties/structuredproperties.py +6 -6
datahub/cli/cli_utils.py +1 -1
datahub/cli/delete_cli.py +16 -2
datahub/cli/docker_cli.py +6 -6
datahub/cli/lite_cli.py +2 -2
datahub/cli/migrate.py +3 -3
datahub/cli/specific/assertions_cli.py +3 -3
datahub/cli/timeline_cli.py +1 -1
datahub/configuration/common.py +1 -2
datahub/configuration/config_loader.py +73 -50
datahub/configuration/git.py +2 -2
datahub/configuration/time_window_config.py +10 -5
datahub/emitter/mce_builder.py +4 -8
datahub/emitter/mcp_patch_builder.py +1 -2
datahub/ingestion/api/incremental_lineage_helper.py +2 -8
datahub/ingestion/api/report.py +1 -2
datahub/ingestion/api/source_helpers.py +1 -1
datahub/ingestion/extractor/json_schema_util.py +3 -3
datahub/ingestion/extractor/schema_util.py +3 -5
datahub/ingestion/fs/s3_fs.py +3 -3
datahub/ingestion/glossary/datahub_classifier.py +6 -4
datahub/ingestion/graph/client.py +4 -6
datahub/ingestion/run/pipeline.py +8 -7
datahub/ingestion/run/pipeline_config.py +3 -3
datahub/ingestion/source/abs/datalake_profiler_config.py +3 -3
datahub/ingestion/source/abs/source.py +19 -8
datahub/ingestion/source/aws/glue.py +11 -11
datahub/ingestion/source/aws/s3_boto_utils.py +3 -3
datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
datahub/ingestion/source/aws/sagemaker_processors/models.py +2 -2
datahub/ingestion/source/bigquery_v2/bigquery.py +3 -3
datahub/ingestion/source/bigquery_v2/bigquery_audit.py +3 -3
datahub/ingestion/source/bigquery_v2/bigquery_config.py +6 -6
datahub/ingestion/source/bigquery_v2/bigquery_platform_resource_helper.py +8 -4
datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +15 -9
datahub/ingestion/source/bigquery_v2/lineage.py +9 -9
datahub/ingestion/source/bigquery_v2/queries.py +1 -3
datahub/ingestion/source/bigquery_v2/queries_extractor.py +3 -3
datahub/ingestion/source/bigquery_v2/usage.py +3 -3
datahub/ingestion/source/cassandra/cassandra.py +0 -1
datahub/ingestion/source/cassandra/cassandra_utils.py +4 -4
datahub/ingestion/source/confluent_schema_registry.py +6 -6
datahub/ingestion/source/csv_enricher.py +29 -29
datahub/ingestion/source/datahub/datahub_database_reader.py +4 -2
datahub/ingestion/source/dbt/dbt_cloud.py +13 -13
datahub/ingestion/source/dbt/dbt_common.py +9 -7
datahub/ingestion/source/dremio/dremio_api.py +4 -4
datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +3 -3
datahub/ingestion/source/elastic_search.py +4 -4
datahub/ingestion/source/fivetran/config.py +4 -0
datahub/ingestion/source/fivetran/fivetran.py +15 -5
datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +3 -3
datahub/ingestion/source/gcs/gcs_source.py +5 -3
datahub/ingestion/source/ge_data_profiler.py +4 -5
datahub/ingestion/source/ge_profiling_config.py +3 -3
datahub/ingestion/source/iceberg/iceberg.py +3 -3
datahub/ingestion/source/identity/azure_ad.py +3 -3
datahub/ingestion/source/identity/okta.py +3 -3
datahub/ingestion/source/kafka/kafka.py +11 -9
datahub/ingestion/source/kafka_connect/kafka_connect.py +2 -3
datahub/ingestion/source/kafka_connect/sink_connectors.py +3 -3
datahub/ingestion/source/kafka_connect/source_connectors.py +3 -3
datahub/ingestion/source/looker/looker_common.py +19 -19
datahub/ingestion/source/looker/looker_config.py +3 -3
datahub/ingestion/source/looker/looker_source.py +25 -25
datahub/ingestion/source/looker/looker_template_language.py +3 -3
datahub/ingestion/source/looker/looker_usage.py +5 -7
datahub/ingestion/source/looker/lookml_concept_context.py +6 -6
datahub/ingestion/source/looker/lookml_source.py +13 -15
datahub/ingestion/source/looker/view_upstream.py +5 -5
datahub/ingestion/source/mlflow.py +4 -4
datahub/ingestion/source/mongodb.py +6 -4
datahub/ingestion/source/neo4j/neo4j_source.py +1 -1
datahub/ingestion/source/nifi.py +24 -26
datahub/ingestion/source/openapi.py +9 -9
datahub/ingestion/source/powerbi/config.py +12 -12
datahub/ingestion/source/powerbi/m_query/parser.py +11 -11
datahub/ingestion/source/powerbi/m_query/pattern_handler.py +26 -24
datahub/ingestion/source/powerbi/m_query/resolver.py +13 -13
datahub/ingestion/source/powerbi/powerbi.py +6 -6
datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +9 -9
datahub/ingestion/source/qlik_sense/qlik_api.py +1 -1
datahub/ingestion/source/redshift/config.py +3 -3
datahub/ingestion/source/redshift/query.py +77 -47
datahub/ingestion/source/redshift/redshift.py +12 -12
datahub/ingestion/source/redshift/usage.py +8 -8
datahub/ingestion/source/s3/datalake_profiler_config.py +3 -3
datahub/ingestion/source/s3/source.py +1 -1
datahub/ingestion/source/salesforce.py +26 -25
datahub/ingestion/source/schema/json_schema.py +1 -1
datahub/ingestion/source/sigma/sigma.py +3 -3
datahub/ingestion/source/sigma/sigma_api.py +12 -10
datahub/ingestion/source/snowflake/snowflake_config.py +9 -7
datahub/ingestion/source/snowflake/snowflake_connection.py +6 -6
datahub/ingestion/source/snowflake/snowflake_queries.py +2 -2
datahub/ingestion/source/snowflake/snowflake_schema.py +3 -3
datahub/ingestion/source/snowflake/snowflake_schema_gen.py +6 -6
datahub/ingestion/source/snowflake/snowflake_tag.py +7 -7
datahub/ingestion/source/snowflake/snowflake_usage_v2.py +3 -3
datahub/ingestion/source/snowflake/snowflake_utils.py +1 -2
datahub/ingestion/source/snowflake/snowflake_v2.py +13 -4
datahub/ingestion/source/sql/athena.py +1 -3
datahub/ingestion/source/sql/clickhouse.py +8 -14
datahub/ingestion/source/sql/oracle.py +1 -3
datahub/ingestion/source/sql/sql_generic_profiler.py +1 -2
datahub/ingestion/source/sql/teradata.py +16 -3
datahub/ingestion/source/state/profiling_state_handler.py +3 -3
datahub/ingestion/source/state/redundant_run_skip_handler.py +5 -7
datahub/ingestion/source/state/stale_entity_removal_handler.py +3 -3
datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +9 -9
datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py +1 -1
datahub/ingestion/source/tableau/tableau.py +48 -49
datahub/ingestion/source/unity/config.py +3 -1
datahub/ingestion/source/unity/proxy.py +1 -1
datahub/ingestion/source/unity/source.py +3 -3
datahub/ingestion/source/unity/usage.py +3 -1
datahub/ingestion/source/usage/clickhouse_usage.py +4 -4
datahub/ingestion/source/usage/starburst_trino_usage.py +3 -3
datahub/ingestion/source/usage/usage_common.py +1 -1
datahub/ingestion/transformer/add_dataset_dataproduct.py +4 -4
datahub/ingestion/transformer/add_dataset_properties.py +3 -3
datahub/ingestion/transformer/add_dataset_schema_tags.py +3 -3
datahub/ingestion/transformer/add_dataset_schema_terms.py +3 -3
datahub/ingestion/transformer/dataset_domain_based_on_tags.py +4 -4
datahub/ingestion/transformer/extract_ownership_from_tags.py +3 -3
datahub/ingestion/transformer/tags_to_terms.py +7 -7
datahub/integrations/assertion/snowflake/compiler.py +10 -10
datahub/lite/duckdb_lite.py +12 -10
datahub/metadata/_schema_classes.py +1 -1
datahub/metadata/schema.avsc +6 -2
datahub/metadata/schemas/DataProcessInstanceInput.avsc +4 -2
datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -0
datahub/secret/secret_common.py +14 -8
datahub/specific/aspect_helpers/custom_properties.py +1 -2
datahub/sql_parsing/schema_resolver.py +5 -10
datahub/sql_parsing/sql_parsing_aggregator.py +16 -16
datahub/sql_parsing/sqlglot_lineage.py +5 -4
datahub/sql_parsing/sqlglot_utils.py +3 -2
datahub/telemetry/stats.py +1 -2
datahub/testing/mcp_diff.py +1 -1
datahub/utilities/file_backed_collections.py +10 -10
datahub/utilities/hive_schema_to_avro.py +2 -2
datahub/utilities/logging_manager.py +2 -2
datahub/utilities/lossy_collections.py +3 -3
datahub/utilities/mapping.py +3 -3
datahub/utilities/serialized_lru_cache.py +3 -1
datahub/utilities/sqlalchemy_query_combiner.py +6 -6
datahub/utilities/sqllineage_patch.py +1 -1
datahub/utilities/stats_collections.py +3 -1
datahub/utilities/urns/urn_iter.py +2 -2
{acryl_datahub-0.15.0.2rc7.dist-info → acryl_datahub-0.15.0.3rc1.dist-info}/WHEEL +0 -0
{acryl_datahub-0.15.0.2rc7.dist-info → acryl_datahub-0.15.0.3rc1.dist-info}/entry_points.txt +0 -0
{acryl_datahub-0.15.0.2rc7.dist-info → acryl_datahub-0.15.0.3rc1.dist-info}/top_level.txt +0 -0

datahub/__init__.py CHANGED Viewed

@@ -3,7 +3,7 @@ import warnings
 # Published at https://pypi.org/project/acryl-datahub/.
 __package_name__ = "acryl-datahub"
-__version__ = "0.15.0.2rc7"
+__version__ = "0.15.0.3rc1"
 def is_dev_mode() -> bool:

datahub/api/entities/assertion/assertion_operator.py CHANGED Viewed

@@ -20,15 +20,13 @@ class Operator(Protocol):
     operator: str
-    def id(self) -> str:
-        ...
+    def id(self) -> str: ...
-    def generate_parameters(self) -> AssertionStdParametersClass:
-        ...
+    def generate_parameters(self) -> AssertionStdParametersClass: ...
 def _generate_assertion_std_parameter(
-    value: Union[str, int, float, list]
+    value: Union[str, int, float, list],
 ) -> AssertionStdParameterClass:
     if isinstance(value, str):
         return AssertionStdParameterClass(

datahub/api/entities/corpgroup/corpgroup.py CHANGED Viewed

@@ -114,7 +114,7 @@ class CorpGroup(BaseModel):
                 )
                 urns_created.add(m.urn)
             else:
-                logger.warn(
+                logger.warning(
                     f"Suppressing emission of member {m.urn} before we already emitted metadata for it"
                 )

datahub/api/entities/datacontract/assertion_operator.py CHANGED Viewed

@@ -19,15 +19,13 @@ class Operator(Protocol):
     operator: str
-    def id(self) -> str:
-        ...
+    def id(self) -> str: ...
-    def generate_parameters(self) -> AssertionStdParametersClass:
-        ...
+    def generate_parameters(self) -> AssertionStdParametersClass: ...
 def _generate_assertion_std_parameter(
-    value: Union[str, int, float]
+    value: Union[str, int, float],
 ) -> AssertionStdParameterClass:
     if isinstance(value, str):
         return AssertionStdParameterClass(

datahub/api/entities/dataproduct/dataproduct.py CHANGED Viewed

@@ -321,9 +321,9 @@ class DataProduct(ConfigModel):
     @classmethod
     def from_datahub(cls, graph: DataHubGraph, id: str) -> DataProduct:
-        data_product_properties: Optional[
-            DataProductPropertiesClass
-        ] = graph.get_aspect(id, DataProductPropertiesClass)
+        data_product_properties: Optional[DataProductPropertiesClass] = (
+            graph.get_aspect(id, DataProductPropertiesClass)
+        )
         domains: Optional[DomainsClass] = graph.get_aspect(id, DomainsClass)
         assert domains, "Data Product must have an associated domain. Found none."
         owners: Optional[OwnershipClass] = graph.get_aspect(id, OwnershipClass)
@@ -438,7 +438,7 @@ class DataProduct(ConfigModel):
             for replace_index, replace_value in patches_replace.items():
                 list_to_manipulate[replace_index] = replace_value
-            for drop_index, drop_value in patches_drop.items():
+            for drop_value in patches_drop.values():
                 list_to_manipulate.remove(drop_value)
             for add_value in patches_add:

datahub/api/entities/dataset/dataset.py CHANGED Viewed

@@ -266,7 +266,8 @@ class Dataset(BaseModel):
             if self.schema_metadata.fields:
                 for field in self.schema_metadata.fields:
                     field_urn = field.urn or make_schema_field_urn(
-                        self.urn, field.id  # type: ignore[arg-type]
+                        self.urn,  # type: ignore[arg-type]
+                        field.id,  # type: ignore[arg-type]
                     )
                     assert field_urn.startswith("urn:li:schemaField:")

datahub/api/entities/structuredproperties/structuredproperties.py CHANGED Viewed

@@ -118,9 +118,9 @@ class StructuredProperties(ConfigModel):
         id = StructuredPropertyUrn.from_string(self.urn).id
         if self.qualified_name is not None:
             # ensure that qualified name and ID match
-            assert (
-                self.qualified_name == id
-            ), "ID in the urn and the qualified_name must match"
+            assert self.qualified_name == id, (
+                "ID in the urn and the qualified_name must match"
+            )
         return id
     @validator("urn", pre=True, always=True)
@@ -184,9 +184,9 @@ class StructuredProperties(ConfigModel):
     @classmethod
     def from_datahub(cls, graph: DataHubGraph, urn: str) -> "StructuredProperties":
-        structured_property: Optional[
-            StructuredPropertyDefinitionClass
-        ] = graph.get_aspect(urn, StructuredPropertyDefinitionClass)
+        structured_property: Optional[StructuredPropertyDefinitionClass] = (
+            graph.get_aspect(urn, StructuredPropertyDefinitionClass)
+        )
         if structured_property is None:
             raise Exception(
                 "StructuredPropertyDefinition aspect is None. Unable to create structured property."

datahub/cli/cli_utils.py CHANGED Viewed

@@ -412,7 +412,7 @@ def generate_access_token(
 def ensure_has_system_metadata(
     event: Union[
         MetadataChangeProposal, MetadataChangeProposalWrapper, MetadataChangeEvent
-    ]
+    ],
 ) -> None:
     if event.systemMetadata is None:
         event.systemMetadata = SystemMetadataClass()

datahub/cli/delete_cli.py CHANGED Viewed

@@ -265,6 +265,11 @@ def undo_by_filter(
     type=str,
     help="Urn of the entity to delete, for single entity deletion",
 )
+@click.option(
+    "--urn-file",
+    required=False,
+    help="Path of file with urns (one per line) to be deleted",
+)
 @click.option(
     "-a",
     "--aspect",
@@ -353,6 +358,7 @@ def undo_by_filter(
 @telemetry.with_telemetry()
 def by_filter(
     urn: Optional[str],
+    urn_file: Optional[str],
     aspect: Optional[str],
     force: bool,
     soft: bool,
@@ -373,6 +379,7 @@ def by_filter(
     # Validate the cli arguments.
     _validate_user_urn_and_filters(
         urn=urn,
+        urn_file=urn_file,
         entity_type=entity_type,
         platform=platform,
         env=env,
@@ -429,6 +436,12 @@ def by_filter(
                         batch_size=batch_size,
                     )
                 )
+    elif urn_file:
+        with open(urn_file, "r") as r:
+            urns = []
+            for line in r.readlines():
+                urn = line.strip().strip('"')
+                urns.append(urn)
     else:
         urns = list(
             graph.get_urns_by_filter(
@@ -537,6 +550,7 @@ def _delete_urns_parallel(
 def _validate_user_urn_and_filters(
     urn: Optional[str],
+    urn_file: Optional[str],
     entity_type: Optional[str],
     platform: Optional[str],
     env: Optional[str],
@@ -549,9 +563,9 @@ def _validate_user_urn_and_filters(
             raise click.UsageError(
                 "You cannot provide both an urn and a filter rule (entity-type / platform / env / query)."
             )
-    elif not urn and not (entity_type or platform or env or query):
+    elif not urn and not urn_file and not (entity_type or platform or env or query):
         raise click.UsageError(
-            "You must provide either an urn or at least one filter (entity-type / platform / env / query) in order to delete entities."
+            "You must provide either an urn or urn_file or at least one filter (entity-type / platform / env / query) in order to delete entities."
         )
     elif query:
         logger.warning(

datahub/cli/docker_cli.py CHANGED Viewed

@@ -296,9 +296,9 @@ def _restore(
     restore_indices: Optional[bool],
     primary_restore_file: Optional[str],
 ) -> int:
-    assert (
-        restore_primary or restore_indices
-    ), "Either restore_primary or restore_indices must be set"
+    assert restore_primary or restore_indices, (
+        "Either restore_primary or restore_indices must be set"
+    )
     msg = "datahub> "
     if restore_primary:
         msg += f"Will restore primary database from {primary_restore_file}. "
@@ -314,9 +314,9 @@ def _restore(
         assert primary_restore_file
         resolved_restore_file = os.path.expanduser(primary_restore_file)
         logger.info(f"Restoring primary db from backup at {resolved_restore_file}")
-        assert os.path.exists(
-            resolved_restore_file
-        ), f"File {resolved_restore_file} does not exist"
+        assert os.path.exists(resolved_restore_file), (
+            f"File {resolved_restore_file} does not exist"
+        )
         with open(resolved_restore_file) as fp:
             result = subprocess.run(
                 [

datahub/cli/lite_cli.py CHANGED Viewed

@@ -176,7 +176,7 @@ def get(
             )
         )
     end_time = time.time()
-    logger.debug(f"Time taken: {int((end_time - start_time)*1000.0)} millis")
+    logger.debug(f"Time taken: {int((end_time - start_time) * 1000.0)} millis")
 @lite.command()
@@ -228,7 +228,7 @@ def ls(path: Optional[str]) -> None:
     try:
         browseables = lite.ls(path)
         end_time = time.time()
-        logger.debug(f"Time taken: {int((end_time - start_time)*1000.0)} millis")
+        logger.debug(f"Time taken: {int((end_time - start_time) * 1000.0)} millis")
         auto_complete: List[AutoComplete] = [
             b.auto_complete for b in browseables if b.auto_complete is not None
         ]

datahub/cli/migrate.py CHANGED Viewed

@@ -426,9 +426,9 @@ def batch_get_ids(
             entities_yielded += 1
             log.debug(f"yielding {x}")
             yield x
-        assert (
-            entities_yielded == num_entities
-        ), "Did not delete all entities, try running this command again!"
+        assert entities_yielded == num_entities, (
+            "Did not delete all entities, try running this command again!"
+        )
     else:
         log.error(f"Failed to execute batch get with {str(response.content)}")
         response.raise_for_status()

datahub/cli/specific/assertions_cli.py CHANGED Viewed

@@ -136,9 +136,9 @@ def extras_list_to_dict(extras: List[str]) -> Dict[str, str]:
     extra_properties: Dict[str, str] = dict()
     for x in extras:
         parts = x.split("=")
-        assert (
-            len(parts) == 2
-        ), f"Invalid value for extras {x}, should be in format key=value"
+        assert len(parts) == 2, (
+            f"Invalid value for extras {x}, should be in format key=value"
+        )
         extra_properties[parts[0]] = parts[1]
     return extra_properties

datahub/cli/timeline_cli.py CHANGED Viewed

@@ -50,7 +50,7 @@ def pretty_id(id: Optional[str]) -> str:
     if id.startswith("urn:li:dataset"):
         dataset_key = dataset_urn_to_key(id)
         if dataset_key:
-            return f"{click.style('dataset', fg='cyan')}:{click.style(dataset_key.platform[len('urn:li:dataPlatform:'):], fg='white')}:{click.style(dataset_key.name, fg='white')}"
+            return f"{click.style('dataset', fg='cyan')}:{click.style(dataset_key.platform[len('urn:li:dataPlatform:') :], fg='white')}:{click.style(dataset_key.name, fg='white')}"
     # failed to prettify, return original
     return id

datahub/configuration/common.py CHANGED Viewed

@@ -200,8 +200,7 @@ class IgnorableError(MetaError):
 @runtime_checkable
 class ExceptionWithProps(Protocol):
-    def get_telemetry_props(self) -> Dict[str, Any]:
-        ...
+    def get_telemetry_props(self) -> Dict[str, Any]: ...
 def should_show_stack_trace(exc: Exception) -> bool:

datahub/configuration/config_loader.py CHANGED Viewed

@@ -19,64 +19,87 @@ from datahub.configuration.yaml import YamlConfigurationMechanism
 Environ = Mapping[str, str]
-def _resolve_element(element: str, environ: Environ) -> str:
-    if re.search(r"(\$\{).+(\})", element):
-        return expand(element, nounset=True, environ=environ)
-    elif element.startswith("$"):
-        try:
-            return expand(element, nounset=True, environ=environ)
-        except UnboundVariable:
-            return element
-    else:
-        return element
-def _resolve_list(ele_list: list, environ: Environ) -> list:
-    new_v: list = []
-    for ele in ele_list:
-        if isinstance(ele, str):
-            new_v.append(_resolve_element(ele, environ=environ))
-        elif isinstance(ele, list):
-            new_v.append(_resolve_list(ele, environ=environ))
-        elif isinstance(ele, dict):
-            new_v.append(resolve_env_variables(ele, environ=environ))
-        else:
-            new_v.append(ele)
-    return new_v
 def resolve_env_variables(config: dict, environ: Environ) -> dict:
-    new_dict: Dict[Any, Any] = {}
-    for k, v in config.items():
-        if isinstance(v, dict):
-            new_dict[k] = resolve_env_variables(v, environ=environ)
-        elif isinstance(v, list):
-            new_dict[k] = _resolve_list(v, environ=environ)
-        elif isinstance(v, str):
-            new_dict[k] = _resolve_element(v, environ=environ)
-        else:
-            new_dict[k] = v
-    return new_dict
+    # TODO: This is kept around for backwards compatibility.
+    return EnvResolver(environ).resolve(config)
 def list_referenced_env_variables(config: dict) -> Set[str]:
-    # This is a bit of a hack, but expandvars does a bunch of escaping
-    # and other logic that we don't want to duplicate here.
+    # TODO: This is kept around for backwards compatibility.
+    return EnvResolver(environ=os.environ).list_referenced_variables(config)
+class EnvResolver:
+    def __init__(self, environ: Environ, strict_env_syntax: bool = False):
+        self.environ = environ
+        self.strict_env_syntax = strict_env_syntax
-    vars = set()
+    def resolve(self, config: dict) -> dict:
+        return self._resolve_dict(config)
-    def mock_get_env(key: str, default: Optional[str] = None) -> str:
-        vars.add(key)
-        if default is not None:
-            return default
-        return "mocked_value"
+    @classmethod
+    def list_referenced_variables(
+        cls,
+        config: dict,
+        strict_env_syntax: bool = False,
+    ) -> Set[str]:
+        # This is a bit of a hack, but expandvars does a bunch of escaping
+        # and other logic that we don't want to duplicate here.
-    mock = unittest.mock.MagicMock()
-    mock.get.side_effect = mock_get_env
+        vars = set()
-    resolve_env_variables(config, environ=mock)
+        def mock_get_env(key: str, default: Optional[str] = None) -> str:
+            vars.add(key)
+            if default is not None:
+                return default
+            return "mocked_value"
+        mock = unittest.mock.MagicMock()
+        mock.get.side_effect = mock_get_env
+        resolver = EnvResolver(environ=mock, strict_env_syntax=strict_env_syntax)
+        resolver._resolve_dict(config)
+        return vars
+    def _resolve_element(self, element: str) -> str:
+        if re.search(r"(\$\{).+(\})", element):
+            return expand(element, nounset=True, environ=self.environ)
+        elif not self.strict_env_syntax and element.startswith("$"):
+            try:
+                return expand(element, nounset=True, environ=self.environ)
+            except UnboundVariable:
+                # TODO: This fallback is kept around for backwards compatibility, but
+                # doesn't make a ton of sense from first principles.
+                return element
+        else:
+            return element
-    return vars
+    def _resolve_list(self, ele_list: list) -> list:
+        new_v: list = []
+        for ele in ele_list:
+            if isinstance(ele, str):
+                new_v.append(self._resolve_element(ele))
+            elif isinstance(ele, list):
+                new_v.append(self._resolve_list(ele))
+            elif isinstance(ele, dict):
+                new_v.append(self._resolve_dict(ele))
+            else:
+                new_v.append(ele)
+        return new_v
+    def _resolve_dict(self, config: dict) -> dict:
+        new_dict: Dict[Any, Any] = {}
+        for k, v in config.items():
+            if isinstance(v, dict):
+                new_dict[k] = self._resolve_dict(v)
+            elif isinstance(v, list):
+                new_dict[k] = self._resolve_list(v)
+            elif isinstance(v, str):
+                new_dict[k] = self._resolve_element(v)
+            else:
+                new_dict[k] = v
+        return new_dict
 WRITE_TO_FILE_DIRECTIVE_PREFIX = "__DATAHUB_TO_FILE_"
@@ -159,7 +182,7 @@ def load_config_file(
     config = raw_config.copy()
     if resolve_env_vars:
-        config = resolve_env_variables(config, environ=os.environ)
+        config = EnvResolver(environ=os.environ).resolve(config)
     if process_directives:
         config = _process_directives(config)

datahub/configuration/git.py CHANGED Viewed

@@ -121,9 +121,9 @@ class GitInfo(GitReference):
         repo: str = values["repo"]
         if repo.startswith(_GITHUB_PREFIX):
-            return f"git@github.com:{repo[len(_GITHUB_PREFIX):]}.git"
+            return f"git@github.com:{repo[len(_GITHUB_PREFIX) :]}.git"
         elif repo.startswith(_GITLAB_PREFIX):
-            return f"git@gitlab.com:{repo[len(_GITLAB_PREFIX):]}.git"
+            return f"git@gitlab.com:{repo[len(_GITLAB_PREFIX) :]}.git"
         else:
             raise ValueError(
                 "Unable to infer repo_ssh_locator from repo. Please set repo_ssh_locator manually."

datahub/configuration/time_window_config.py CHANGED Viewed

@@ -47,7 +47,10 @@ class BaseTimeWindowConfig(ConfigModel):
         default_factory=lambda: datetime.now(tz=timezone.utc),
         description="Latest date of lineage/usage to consider. Default: Current time in UTC",
     )
-    start_time: datetime = Field(default=None, description="Earliest date of lineage/usage to consider. Default: Last full day in UTC (or hour, depending on `bucket_duration`). You can also specify relative time with respect to end_time such as '-7 days' Or '-7d'.")  # type: ignore
+    start_time: datetime = Field(
+        default=None,
+        description="Earliest date of lineage/usage to consider. Default: Last full day in UTC (or hour, depending on `bucket_duration`). You can also specify relative time with respect to end_time such as '-7 days' Or '-7d'.",
+    )  # type: ignore
     @pydantic.validator("start_time", pre=True, always=True)
     def default_start_time(
@@ -63,12 +66,14 @@ class BaseTimeWindowConfig(ConfigModel):
             # This is where start_time str is resolved to datetime
             try:
                 delta = parse_relative_timespan(v)
-                assert delta < timedelta(
-                    0
-                ), "Relative start time should start with minus sign (-) e.g. '-2 days'."
+                assert delta < timedelta(0), (
+                    "Relative start time should start with minus sign (-) e.g. '-2 days'."
+                )
                 assert abs(delta) >= get_bucket_duration_delta(
                     values["bucket_duration"]
-                ), "Relative start time should be in terms of configured bucket duration. e.g '-2 days' or '-2 hours'."
+                ), (
+                    "Relative start time should be in terms of configured bucket duration. e.g '-2 days' or '-2 hours'."
+                )
                 # The end_time's default value is not yet populated, in which case
                 # we can just manually generate it here.

datahub/emitter/mce_builder.py CHANGED Viewed

@@ -88,13 +88,11 @@ def get_sys_time() -> int:
 @overload
-def make_ts_millis(ts: None) -> None:
-    ...
+def make_ts_millis(ts: None) -> None: ...
 @overload
-def make_ts_millis(ts: datetime) -> int:
-    ...
+def make_ts_millis(ts: datetime) -> int: ...
 def make_ts_millis(ts: Optional[datetime]) -> Optional[int]:
@@ -105,13 +103,11 @@ def make_ts_millis(ts: Optional[datetime]) -> Optional[int]:
 @overload
-def parse_ts_millis(ts: float) -> datetime:
-    ...
+def parse_ts_millis(ts: float) -> datetime: ...
 @overload
-def parse_ts_millis(ts: None) -> None:
-    ...
+def parse_ts_millis(ts: None) -> None: ...
 def parse_ts_millis(ts: Optional[float]) -> Optional[datetime]:

datahub/emitter/mcp_patch_builder.py CHANGED Viewed

@@ -33,8 +33,7 @@ from datahub.utilities.urns.urn import guess_entity_type
 @runtime_checkable
 class SupportsToObj(Protocol):
-    def to_obj(self) -> Any:
-        ...
+    def to_obj(self) -> Any: ...
 def _recursive_to_obj(obj: Any) -> Any:

datahub/ingestion/api/incremental_lineage_helper.py CHANGED Viewed

@@ -55,15 +55,9 @@ def convert_chart_info_to_patch(
         aspect.externalUrl
     ).set_type(aspect.type).set_title(aspect.title).set_access(
         aspect.access
-    ).set_last_modified(
-        aspect.lastModified
-    ).set_last_refreshed(
+    ).set_last_modified(aspect.lastModified).set_last_refreshed(
         aspect.lastRefreshed
-    ).set_description(
-        aspect.description
-    ).add_inputs(
-        aspect.inputs
-    )
+    ).set_description(aspect.description).add_inputs(aspect.inputs)
     values = patch_builder.build()
     if values:

datahub/ingestion/api/report.py CHANGED Viewed

@@ -21,8 +21,7 @@ LogLevel = Literal["ERROR", "WARNING", "INFO", "DEBUG"]
 @runtime_checkable
 class SupportsAsObj(Protocol):
-    def as_obj(self) -> dict:
-        ...
+    def as_obj(self) -> dict: ...
 @dataclass

datahub/ingestion/api/source_helpers.py CHANGED Viewed

@@ -48,7 +48,7 @@ logger = logging.getLogger(__name__)
 def auto_workunit(
-    stream: Iterable[Union[MetadataChangeEventClass, MetadataChangeProposalWrapper]]
+    stream: Iterable[Union[MetadataChangeEventClass, MetadataChangeProposalWrapper]],
 ) -> Iterable[MetadataWorkUnit]:
     """Convert a stream of MCEs and MCPs to a stream of :class:`MetadataWorkUnit`s."""

datahub/ingestion/extractor/json_schema_util.py CHANGED Viewed

@@ -131,9 +131,9 @@ class FieldPath:
             for i, schema_type in enumerate(p.schema_types):
                 if schema_type == schema_str:
                     # return the corresponding type for the schema that's a match
-                    assert (
-                        len(p.type) > i
-                    ), f"p.type({len(p.type)})) and p.schema_types({len(p.schema_types)}) should have the same length"
+                    assert len(p.type) > i, (
+                        f"p.type({len(p.type)})) and p.schema_types({len(p.schema_types)}) should have the same length"
+                    )
                     return p.type[i]
         return None

datahub/ingestion/extractor/schema_util.py CHANGED Viewed

@@ -263,15 +263,13 @@ class AvroToMceSchemaConverter:
     @overload
     def _get_underlying_type_if_option_as_union(
         schema: SchemaOrField, default: SchemaOrField
-    ) -> SchemaOrField:
-        ...
+    ) -> SchemaOrField: ...
     @staticmethod
     @overload
     def _get_underlying_type_if_option_as_union(
         schema: SchemaOrField, default: Optional[SchemaOrField] = None
-    ) -> Optional[SchemaOrField]:
-        ...
+    ) -> Optional[SchemaOrField]: ...
     @staticmethod
     def _get_underlying_type_if_option_as_union(
@@ -386,7 +384,7 @@ class AvroToMceSchemaConverter:
                 if "deprecated" in merged_props:
                     description = (
-                        f"<span style=\"color:red\">DEPRECATED: {merged_props['deprecated']}</span>\n"
+                        f'<span style="color:red">DEPRECATED: {merged_props["deprecated"]}</span>\n'
                         + description
                         if description
                         else ""

datahub/ingestion/fs/s3_fs.py CHANGED Viewed

@@ -17,9 +17,9 @@ def parse_s3_path(path: str) -> "S3Path":
 def assert_ok_status(s3_response):
     is_ok = s3_response["ResponseMetadata"]["HTTPStatusCode"] == 200
-    assert (
-        is_ok
-    ), f"Failed to fetch S3 object, error message: {s3_response['Error']['Message']}"
+    assert is_ok, (
+        f"Failed to fetch S3 object, error message: {s3_response['Error']['Message']}"
+    )
 @dataclass

datahub/ingestion/glossary/datahub_classifier.py CHANGED Viewed

@@ -148,9 +148,9 @@ class DataHubClassifierConfig(ConfigModel):
                 weight,
             ) in custom_infotype_config.Prediction_Factors_and_Weights.dict().items():
                 if weight > 0:
-                    assert (
-                        getattr(custom_infotype_config, factor) is not None
-                    ), f"Missing Configuration for Prediction Factor {factor} for Custom Info Type {custom_infotype}"
+                    assert getattr(custom_infotype_config, factor) is not None, (
+                        f"Missing Configuration for Prediction Factor {factor} for Custom Info Type {custom_infotype}"
+                    )
             # Custom infotype supports only regex based prediction for column values
             if custom_infotype_config.Prediction_Factors_and_Weights.Values > 0:
@@ -158,7 +158,9 @@ class DataHubClassifierConfig(ConfigModel):
                 assert (
                     custom_infotype_config.Values.prediction_type
                     == ValuePredictionType.REGEX
-                ), f"Invalid Prediction Type for Values for Custom Info Type {custom_infotype}. Only `regex` is supported."
+                ), (
+                    f"Invalid Prediction Type for Values for Custom Info Type {custom_infotype}. Only `regex` is supported."
+                )
         return info_types_config

acryl-datahub 0.15.0.2rc7__py3-none-any.whl → 0.15.0.3rc1__py3-none-any.whl

Potentially problematic release.

acryl-datahub 0.15.0.2rc7py3-none-any.whl → 0.15.0.3rc1py3-none-any.whl