PyPI - acryl-datahub - Versions diffs - 0.15.0.2rc7__py3-none-any.whl → 0.15.0.3rc1__py3-none-any.whl - Mend

acryl-datahub 0.15.0.2rc7py3-none-any.whl → 0.15.0.3rc1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of acryl-datahub might be problematic. Click here for more details.

Files changed (161) hide show

{acryl_datahub-0.15.0.2rc7.dist-info → acryl_datahub-0.15.0.3rc1.dist-info}/METADATA +2378 -2380
{acryl_datahub-0.15.0.2rc7.dist-info → acryl_datahub-0.15.0.3rc1.dist-info}/RECORD +161 -161
datahub/__init__.py +1 -1
datahub/api/entities/assertion/assertion_operator.py +3 -5
datahub/api/entities/corpgroup/corpgroup.py +1 -1
datahub/api/entities/datacontract/assertion_operator.py +3 -5
datahub/api/entities/dataproduct/dataproduct.py +4 -4
datahub/api/entities/dataset/dataset.py +2 -1
datahub/api/entities/structuredproperties/structuredproperties.py +6 -6
datahub/cli/cli_utils.py +1 -1
datahub/cli/delete_cli.py +16 -2
datahub/cli/docker_cli.py +6 -6
datahub/cli/lite_cli.py +2 -2
datahub/cli/migrate.py +3 -3
datahub/cli/specific/assertions_cli.py +3 -3
datahub/cli/timeline_cli.py +1 -1
datahub/configuration/common.py +1 -2
datahub/configuration/config_loader.py +73 -50
datahub/configuration/git.py +2 -2
datahub/configuration/time_window_config.py +10 -5
datahub/emitter/mce_builder.py +4 -8
datahub/emitter/mcp_patch_builder.py +1 -2
datahub/ingestion/api/incremental_lineage_helper.py +2 -8
datahub/ingestion/api/report.py +1 -2
datahub/ingestion/api/source_helpers.py +1 -1
datahub/ingestion/extractor/json_schema_util.py +3 -3
datahub/ingestion/extractor/schema_util.py +3 -5
datahub/ingestion/fs/s3_fs.py +3 -3
datahub/ingestion/glossary/datahub_classifier.py +6 -4
datahub/ingestion/graph/client.py +4 -6
datahub/ingestion/run/pipeline.py +8 -7
datahub/ingestion/run/pipeline_config.py +3 -3
datahub/ingestion/source/abs/datalake_profiler_config.py +3 -3
datahub/ingestion/source/abs/source.py +19 -8
datahub/ingestion/source/aws/glue.py +11 -11
datahub/ingestion/source/aws/s3_boto_utils.py +3 -3
datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
datahub/ingestion/source/aws/sagemaker_processors/models.py +2 -2
datahub/ingestion/source/bigquery_v2/bigquery.py +3 -3
datahub/ingestion/source/bigquery_v2/bigquery_audit.py +3 -3
datahub/ingestion/source/bigquery_v2/bigquery_config.py +6 -6
datahub/ingestion/source/bigquery_v2/bigquery_platform_resource_helper.py +8 -4
datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +15 -9
datahub/ingestion/source/bigquery_v2/lineage.py +9 -9
datahub/ingestion/source/bigquery_v2/queries.py +1 -3
datahub/ingestion/source/bigquery_v2/queries_extractor.py +3 -3
datahub/ingestion/source/bigquery_v2/usage.py +3 -3
datahub/ingestion/source/cassandra/cassandra.py +0 -1
datahub/ingestion/source/cassandra/cassandra_utils.py +4 -4
datahub/ingestion/source/confluent_schema_registry.py +6 -6
datahub/ingestion/source/csv_enricher.py +29 -29
datahub/ingestion/source/datahub/datahub_database_reader.py +4 -2
datahub/ingestion/source/dbt/dbt_cloud.py +13 -13
datahub/ingestion/source/dbt/dbt_common.py +9 -7
datahub/ingestion/source/dremio/dremio_api.py +4 -4
datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +3 -3
datahub/ingestion/source/elastic_search.py +4 -4
datahub/ingestion/source/fivetran/config.py +4 -0
datahub/ingestion/source/fivetran/fivetran.py +15 -5
datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +3 -3
datahub/ingestion/source/gcs/gcs_source.py +5 -3
datahub/ingestion/source/ge_data_profiler.py +4 -5
datahub/ingestion/source/ge_profiling_config.py +3 -3
datahub/ingestion/source/iceberg/iceberg.py +3 -3
datahub/ingestion/source/identity/azure_ad.py +3 -3
datahub/ingestion/source/identity/okta.py +3 -3
datahub/ingestion/source/kafka/kafka.py +11 -9
datahub/ingestion/source/kafka_connect/kafka_connect.py +2 -3
datahub/ingestion/source/kafka_connect/sink_connectors.py +3 -3
datahub/ingestion/source/kafka_connect/source_connectors.py +3 -3
datahub/ingestion/source/looker/looker_common.py +19 -19
datahub/ingestion/source/looker/looker_config.py +3 -3
datahub/ingestion/source/looker/looker_source.py +25 -25
datahub/ingestion/source/looker/looker_template_language.py +3 -3
datahub/ingestion/source/looker/looker_usage.py +5 -7
datahub/ingestion/source/looker/lookml_concept_context.py +6 -6
datahub/ingestion/source/looker/lookml_source.py +13 -15
datahub/ingestion/source/looker/view_upstream.py +5 -5
datahub/ingestion/source/mlflow.py +4 -4
datahub/ingestion/source/mongodb.py +6 -4
datahub/ingestion/source/neo4j/neo4j_source.py +1 -1
datahub/ingestion/source/nifi.py +24 -26
datahub/ingestion/source/openapi.py +9 -9
datahub/ingestion/source/powerbi/config.py +12 -12
datahub/ingestion/source/powerbi/m_query/parser.py +11 -11
datahub/ingestion/source/powerbi/m_query/pattern_handler.py +26 -24
datahub/ingestion/source/powerbi/m_query/resolver.py +13 -13
datahub/ingestion/source/powerbi/powerbi.py +6 -6
datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +9 -9
datahub/ingestion/source/qlik_sense/qlik_api.py +1 -1
datahub/ingestion/source/redshift/config.py +3 -3
datahub/ingestion/source/redshift/query.py +77 -47
datahub/ingestion/source/redshift/redshift.py +12 -12
datahub/ingestion/source/redshift/usage.py +8 -8
datahub/ingestion/source/s3/datalake_profiler_config.py +3 -3
datahub/ingestion/source/s3/source.py +1 -1
datahub/ingestion/source/salesforce.py +26 -25
datahub/ingestion/source/schema/json_schema.py +1 -1
datahub/ingestion/source/sigma/sigma.py +3 -3
datahub/ingestion/source/sigma/sigma_api.py +12 -10
datahub/ingestion/source/snowflake/snowflake_config.py +9 -7
datahub/ingestion/source/snowflake/snowflake_connection.py +6 -6
datahub/ingestion/source/snowflake/snowflake_queries.py +2 -2
datahub/ingestion/source/snowflake/snowflake_schema.py +3 -3
datahub/ingestion/source/snowflake/snowflake_schema_gen.py +6 -6
datahub/ingestion/source/snowflake/snowflake_tag.py +7 -7
datahub/ingestion/source/snowflake/snowflake_usage_v2.py +3 -3
datahub/ingestion/source/snowflake/snowflake_utils.py +1 -2
datahub/ingestion/source/snowflake/snowflake_v2.py +13 -4
datahub/ingestion/source/sql/athena.py +1 -3
datahub/ingestion/source/sql/clickhouse.py +8 -14
datahub/ingestion/source/sql/oracle.py +1 -3
datahub/ingestion/source/sql/sql_generic_profiler.py +1 -2
datahub/ingestion/source/sql/teradata.py +16 -3
datahub/ingestion/source/state/profiling_state_handler.py +3 -3
datahub/ingestion/source/state/redundant_run_skip_handler.py +5 -7
datahub/ingestion/source/state/stale_entity_removal_handler.py +3 -3
datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +9 -9
datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py +1 -1
datahub/ingestion/source/tableau/tableau.py +48 -49
datahub/ingestion/source/unity/config.py +3 -1
datahub/ingestion/source/unity/proxy.py +1 -1
datahub/ingestion/source/unity/source.py +3 -3
datahub/ingestion/source/unity/usage.py +3 -1
datahub/ingestion/source/usage/clickhouse_usage.py +4 -4
datahub/ingestion/source/usage/starburst_trino_usage.py +3 -3
datahub/ingestion/source/usage/usage_common.py +1 -1
datahub/ingestion/transformer/add_dataset_dataproduct.py +4 -4
datahub/ingestion/transformer/add_dataset_properties.py +3 -3
datahub/ingestion/transformer/add_dataset_schema_tags.py +3 -3
datahub/ingestion/transformer/add_dataset_schema_terms.py +3 -3
datahub/ingestion/transformer/dataset_domain_based_on_tags.py +4 -4
datahub/ingestion/transformer/extract_ownership_from_tags.py +3 -3
datahub/ingestion/transformer/tags_to_terms.py +7 -7
datahub/integrations/assertion/snowflake/compiler.py +10 -10
datahub/lite/duckdb_lite.py +12 -10
datahub/metadata/_schema_classes.py +1 -1
datahub/metadata/schema.avsc +6 -2
datahub/metadata/schemas/DataProcessInstanceInput.avsc +4 -2
datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -0
datahub/secret/secret_common.py +14 -8
datahub/specific/aspect_helpers/custom_properties.py +1 -2
datahub/sql_parsing/schema_resolver.py +5 -10
datahub/sql_parsing/sql_parsing_aggregator.py +16 -16
datahub/sql_parsing/sqlglot_lineage.py +5 -4
datahub/sql_parsing/sqlglot_utils.py +3 -2
datahub/telemetry/stats.py +1 -2
datahub/testing/mcp_diff.py +1 -1
datahub/utilities/file_backed_collections.py +10 -10
datahub/utilities/hive_schema_to_avro.py +2 -2
datahub/utilities/logging_manager.py +2 -2
datahub/utilities/lossy_collections.py +3 -3
datahub/utilities/mapping.py +3 -3
datahub/utilities/serialized_lru_cache.py +3 -1
datahub/utilities/sqlalchemy_query_combiner.py +6 -6
datahub/utilities/sqllineage_patch.py +1 -1
datahub/utilities/stats_collections.py +3 -1
datahub/utilities/urns/urn_iter.py +2 -2
{acryl_datahub-0.15.0.2rc7.dist-info → acryl_datahub-0.15.0.3rc1.dist-info}/WHEEL +0 -0
{acryl_datahub-0.15.0.2rc7.dist-info → acryl_datahub-0.15.0.3rc1.dist-info}/entry_points.txt +0 -0
{acryl_datahub-0.15.0.2rc7.dist-info → acryl_datahub-0.15.0.3rc1.dist-info}/top_level.txt +0 -0

datahub/ingestion/source/looker/lookml_concept_context.py CHANGED Viewed

@@ -38,16 +38,16 @@ def merge_parent_and_child_fields(
     # Create a map field-name vs field
     child_field_map: dict = {}
     for field in child_fields:
-        assert (
-            NAME in field
-        ), "A lookml view must have a name field"  # name is required field of lookml field array
+        assert NAME in field, (
+            "A lookml view must have a name field"
+        )  # name is required field of lookml field array
         child_field_map[field[NAME]] = field
     for field in parent_fields:
-        assert (
-            NAME in field
-        ), "A lookml view must have a name field"  # name is required field of lookml field array
+        assert NAME in field, (
+            "A lookml view must have a name field"
+        )  # name is required field of lookml field array
         if field[NAME] in child_field_map:
             # Fields defined in the child view take higher precedence.

datahub/ingestion/source/looker/lookml_source.py CHANGED Viewed

@@ -482,14 +482,14 @@ class LookMLSource(StatefulIngestionSourceBase):
         if self.source_config.project_name is not None:
             return self.source_config.project_name
-        assert (
-            self.looker_client is not None
-        ), "Failed to find a configured Looker API client"
+        assert self.looker_client is not None, (
+            "Failed to find a configured Looker API client"
+        )
         try:
             model = self.looker_client.lookml_model(model_name, fields="project_name")
-            assert (
-                model.project_name is not None
-            ), f"Failed to find a project name for model {model_name}"
+            assert model.project_name is not None, (
+                f"Failed to find a project name for model {model_name}"
+            )
             return model.project_name
         except SDKError:
             raise ValueError(
@@ -541,9 +541,9 @@ class LookMLSource(StatefulIngestionSourceBase):
                 self.reporter.git_clone_latency = datetime.now() - start_time
                 self.source_config.base_folder = checkout_dir.resolve()
-            self.base_projects_folder[
-                BASE_PROJECT_NAME
-            ] = self.source_config.base_folder
+            self.base_projects_folder[BASE_PROJECT_NAME] = (
+                self.source_config.base_folder
+            )
             visited_projects: Set[str] = set()
@@ -641,9 +641,9 @@ class LookMLSource(StatefulIngestionSourceBase):
                     repo_url=remote_project.url,
                 )
-                self.base_projects_folder[
-                    remote_project.name
-                ] = p_checkout_dir.resolve()
+                self.base_projects_folder[remote_project.name] = (
+                    p_checkout_dir.resolve()
+                )
                 repo = p_cloner.get_last_repo_cloned()
                 assert repo
                 remote_git_info = GitInfo(
@@ -930,9 +930,7 @@ class LookMLSource(StatefulIngestionSourceBase):
                                         logger.warning(
                                             f"view {maybe_looker_view.id.view_name} from model {model_name}, connection {model.connection} was previously processed via model {prev_model_name}, connection {prev_model_connection} and will likely lead to incorrect lineage to the underlying tables"
                                         )
-                                        if (
-                                            not self.source_config.emit_reachable_views_only
-                                        ):
+                                        if not self.source_config.emit_reachable_views_only:
                                             logger.warning(
                                                 "Consider enabling the `emit_reachable_views_only` flag to handle this case."
                                             )

datahub/ingestion/source/looker/view_upstream.py CHANGED Viewed

@@ -484,11 +484,11 @@ class NativeDerivedViewUpstream(AbstractViewUpstream):
         )
     def __get_upstream_dataset_urn(self) -> List[str]:
-        current_view_id: Optional[
-            LookerViewId
-        ] = self.looker_view_id_cache.get_looker_view_id(
-            view_name=self.view_context.name(),
-            base_folder_path=self.view_context.base_folder_path,
+        current_view_id: Optional[LookerViewId] = (
+            self.looker_view_id_cache.get_looker_view_id(
+                view_name=self.view_context.name(),
+                base_folder_path=self.view_context.base_folder_path,
+            )
         )
         # Current view will always be present in cache. assert  will silence the lint

datahub/ingestion/source/mlflow.py CHANGED Viewed

@@ -172,10 +172,10 @@ class MLflowSource(Source):
         """
         Get all Registered Models in MLflow Model Registry.
         """
-        registered_models: Iterable[
-            RegisteredModel
-        ] = self._traverse_mlflow_search_func(
-            search_func=self.client.search_registered_models,
+        registered_models: Iterable[RegisteredModel] = (
+            self._traverse_mlflow_search_func(
+                search_func=self.client.search_registered_models,
+            )
         )
         return registered_models

datahub/ingestion/source/mongodb.py CHANGED Viewed

@@ -288,7 +288,9 @@ class MongoDBSource(StatefulIngestionSourceBase):
         # See https://pymongo.readthedocs.io/en/stable/examples/datetimes.html#handling-out-of-range-datetimes
         self.mongo_client = MongoClient(
-            self.config.connect_uri, datetime_conversion="DATETIME_AUTO", **options  # type: ignore
+            self.config.connect_uri,
+            datetime_conversion="DATETIME_AUTO",
+            **options,  # type: ignore
         )
         # This cheaply tests the connection. For details, see
@@ -470,9 +472,9 @@ class MongoDBSource(StatefulIngestionSourceBase):
             )
             # Add this information to the custom properties so user can know they are looking at downsampled schema
             dataset_properties.customProperties["schema.downsampled"] = "True"
-            dataset_properties.customProperties[
-                "schema.totalFields"
-            ] = f"{collection_schema_size}"
+            dataset_properties.customProperties["schema.totalFields"] = (
+                f"{collection_schema_size}"
+            )
         logger.debug(f"Size of collection fields = {len(collection_fields)}")
         # append each schema field (sort so output is consistent)

datahub/ingestion/source/neo4j/neo4j_source.py CHANGED Viewed

@@ -286,7 +286,7 @@ class Neo4jSource(Source):
         df = self.get_neo4j_metadata(
             "CALL apoc.meta.schema() YIELD value UNWIND keys(value) AS key RETURN key, value[key] AS value;"
         )
-        for index, row in df.iterrows():
+        for _, row in df.iterrows():
             try:
                 yield MetadataWorkUnit(
                     id=row["key"],

datahub/ingestion/source/nifi.py CHANGED Viewed

@@ -184,9 +184,9 @@ class NifiSourceConfig(EnvConfigMixin):
     @validator("site_url")
     def validator_site_url(cls, site_url: str) -> str:
-        assert site_url.startswith(
-            ("http://", "https://")
-        ), "site_url must start with http:// or https://"
+        assert site_url.startswith(("http://", "https://")), (
+            "site_url must start with http:// or https://"
+        )
         if not site_url.endswith("/"):
             site_url = site_url + "/"
@@ -487,9 +487,7 @@ class NifiSource(Source):
     def get_report(self) -> SourceReport:
         return self.report
-    def update_flow(
-        self, pg_flow_dto: Dict, recursion_level: int = 0
-    ) -> None:  # noqa: C901
+    def update_flow(self, pg_flow_dto: Dict, recursion_level: int = 0) -> None:  # noqa: C901
         """
         Update self.nifi_flow with contents of the input process group `pg_flow_dto`
         """
@@ -548,16 +546,16 @@ class NifiSource(Source):
         for inputPort in flow_dto.get("inputPorts", []):
             component = inputPort.get("component")
             if inputPort.get("allowRemoteAccess"):
-                self.nifi_flow.remotely_accessible_ports[
-                    component.get("id")
-                ] = NifiComponent(
-                    component.get("id"),
-                    component.get("name"),
-                    component.get("type"),
-                    component.get("parentGroupId"),
-                    NifiType.INPUT_PORT,
-                    comments=component.get("comments"),
-                    status=component.get("status", {}).get("runStatus"),
+                self.nifi_flow.remotely_accessible_ports[component.get("id")] = (
+                    NifiComponent(
+                        component.get("id"),
+                        component.get("name"),
+                        component.get("type"),
+                        component.get("parentGroupId"),
+                        NifiType.INPUT_PORT,
+                        comments=component.get("comments"),
+                        status=component.get("status", {}).get("runStatus"),
+                    )
                 )
                 logger.debug(f"Adding remotely accessible port {component.get('id')}")
             else:
@@ -576,16 +574,16 @@ class NifiSource(Source):
         for outputPort in flow_dto.get("outputPorts", []):
             component = outputPort.get("component")
             if outputPort.get("allowRemoteAccess"):
-                self.nifi_flow.remotely_accessible_ports[
-                    component.get("id")
-                ] = NifiComponent(
-                    component.get("id"),
-                    component.get("name"),
-                    component.get("type"),
-                    component.get("parentGroupId"),
-                    NifiType.OUTPUT_PORT,
-                    comments=component.get("comments"),
-                    status=component.get("status", {}).get("runStatus"),
+                self.nifi_flow.remotely_accessible_ports[component.get("id")] = (
+                    NifiComponent(
+                        component.get("id"),
+                        component.get("name"),
+                        component.get("type"),
+                        component.get("parentGroupId"),
+                        NifiType.OUTPUT_PORT,
+                        comments=component.get("comments"),
+                        status=component.get("status", {}).get("runStatus"),
+                    )
                 )
                 logger.debug(f"Adding remotely accessible port {component.get('id')}")
             else:

datahub/ingestion/source/openapi.py CHANGED Viewed

@@ -101,16 +101,16 @@ class OpenApiConfig(ConfigModel):
                 # details there once, and then use that session for all requests.
                 self.token = f"Bearer {self.bearer_token}"
             else:
-                assert (
-                    "url_complement" in self.get_token.keys()
-                ), "When 'request_type' is set to 'get', an url_complement is needed for the request."
+                assert "url_complement" in self.get_token.keys(), (
+                    "When 'request_type' is set to 'get', an url_complement is needed for the request."
+                )
                 if self.get_token["request_type"] == "get":
-                    assert (
-                        "{username}" in self.get_token["url_complement"]
-                    ), "we expect the keyword {username} to be present in the url"
-                    assert (
-                        "{password}" in self.get_token["url_complement"]
-                    ), "we expect the keyword {password} to be present in the url"
+                    assert "{username}" in self.get_token["url_complement"], (
+                        "we expect the keyword {username} to be present in the url"
+                    )
+                    assert "{password}" in self.get_token["url_complement"], (
+                        "we expect the keyword {password} to be present in the url"
+                    )
                     url4req = self.get_token["url_complement"].replace(
                         "{username}", self.username
                     )

datahub/ingestion/source/powerbi/config.py CHANGED Viewed

@@ -225,9 +225,9 @@ class PowerBiDashboardSourceReport(StaleEntityRemovalSourceReport):
 def default_for_dataset_type_mapping() -> Dict[str, str]:
     dict_: dict = {}
     for item in SupportedDataPlatform:
-        dict_[
-            item.value.powerbi_data_platform_name
-        ] = item.value.datahub_data_platform_name
+        dict_[item.value.powerbi_data_platform_name] = (
+            item.value.datahub_data_platform_name
+        )
     return dict_
@@ -303,15 +303,15 @@ class PowerBiDashboardSourceConfig(
     # Dataset type mapping PowerBI support many type of data-sources. Here user needs to define what type of PowerBI
     # DataSource needs to be mapped to corresponding DataHub Platform DataSource. For example, PowerBI `Snowflake` is
     # mapped to DataHub `snowflake` PowerBI `PostgreSQL` is mapped to DataHub `postgres` and so on.
-    dataset_type_mapping: Union[
-        Dict[str, str], Dict[str, PlatformDetail]
-    ] = pydantic.Field(
-        default_factory=default_for_dataset_type_mapping,
-        description="[deprecated] Use server_to_platform_instance instead. Mapping of PowerBI datasource type to "
-        "DataHub supported datasources."
-        "You can configured platform instance for dataset lineage. "
-        "See Quickstart Recipe for mapping",
-        hidden_from_docs=True,
+    dataset_type_mapping: Union[Dict[str, str], Dict[str, PlatformDetail]] = (
+        pydantic.Field(
+            default_factory=default_for_dataset_type_mapping,
+            description="[deprecated] Use server_to_platform_instance instead. Mapping of PowerBI datasource type to "
+            "DataHub supported datasources."
+            "You can configured platform instance for dataset lineage. "
+            "See Quickstart Recipe for mapping",
+            hidden_from_docs=True,
+        )
     )
     # PowerBI datasource's server to platform instance mapping
     server_to_platform_instance: Dict[

datahub/ingestion/source/powerbi/m_query/parser.py CHANGED Viewed

@@ -128,17 +128,17 @@ def get_upstream_tables(
     reporter.m_query_parse_successes += 1
     try:
-        lineage: List[
-            datahub.ingestion.source.powerbi.m_query.data_classes.Lineage
-        ] = resolver.MQueryResolver(
-            table=table,
-            parse_tree=parse_tree,
-            reporter=reporter,
-            parameters=parameters,
-        ).resolve_to_lineage(
-            ctx=ctx,
-            config=config,
-            platform_instance_resolver=platform_instance_resolver,
+        lineage: List[datahub.ingestion.source.powerbi.m_query.data_classes.Lineage] = (
+            resolver.MQueryResolver(
+                table=table,
+                parse_tree=parse_tree,
+                reporter=reporter,
+                parameters=parameters,
+            ).resolve_to_lineage(
+                ctx=ctx,
+                config=config,
+                platform_instance_resolver=platform_instance_resolver,
+            )
         )
         if lineage:

datahub/ingestion/source/powerbi/m_query/pattern_handler.py CHANGED Viewed

@@ -170,8 +170,7 @@ class AbstractLineage(ABC):
         logger.debug(f"Processing arguments {arguments}")
         if (
-            len(arguments)
-            >= 4  # [0] is warehouse FQDN.
+            len(arguments) >= 4  # [0] is warehouse FQDN.
             # [1] is endpoint, we are not using it.
             # [2] is "Catalog" key
             # [3] is catalog's value
@@ -215,16 +214,16 @@ class AbstractLineage(ABC):
             native_sql_parser.remove_special_characters(query)
         )
-        parsed_result: Optional[
-            "SqlParsingResult"
-        ] = native_sql_parser.parse_custom_sql(
-            ctx=self.ctx,
-            query=query,
-            platform=self.get_platform_pair().datahub_data_platform_name,
-            platform_instance=platform_detail.platform_instance,
-            env=platform_detail.env,
-            database=database,
-            schema=schema,
+        parsed_result: Optional["SqlParsingResult"] = (
+            native_sql_parser.parse_custom_sql(
+                ctx=self.ctx,
+                query=query,
+                platform=self.get_platform_pair().datahub_data_platform_name,
+                platform_instance=platform_detail.platform_instance,
+                env=platform_detail.env,
+                database=database,
+                schema=schema,
+            )
         )
         if parsed_result is None:
@@ -410,9 +409,9 @@ class DatabricksLineage(AbstractLineage):
             f"Processing Databrick data-access function detail {data_access_func_detail}"
         )
         table_detail: Dict[str, str] = {}
-        temp_accessor: Optional[
-            IdentifierAccessor
-        ] = data_access_func_detail.identifier_accessor
+        temp_accessor: Optional[IdentifierAccessor] = (
+            data_access_func_detail.identifier_accessor
+        )
         while temp_accessor:
             # Condition to handle databricks M-query pattern where table, schema and database all are present in
@@ -647,11 +646,13 @@ class ThreeStepDataAccessPattern(AbstractLineage, ABC):
         db_name: str = data_access_func_detail.identifier_accessor.items["Name"]  # type: ignore
         # Second is schema name
         schema_name: str = cast(
-            IdentifierAccessor, data_access_func_detail.identifier_accessor.next  # type: ignore
+            IdentifierAccessor,
+            data_access_func_detail.identifier_accessor.next,  # type: ignore
         ).items["Name"]
         # Third is table name
         table_name: str = cast(
-            IdentifierAccessor, data_access_func_detail.identifier_accessor.next.next  # type: ignore
+            IdentifierAccessor,
+            data_access_func_detail.identifier_accessor.next.next,  # type: ignore
         ).items["Name"]
         qualified_table_name: str = f"{db_name}.{schema_name}.{table_name}"
@@ -768,10 +769,13 @@ class NativeQueryLineage(AbstractLineage):
         ):  # database name is explicitly set
             return database
-        return get_next_item(  # database name is set in Name argument
-            data_access_tokens, "Name"
-        ) or get_next_item(  # If both above arguments are not available, then try Catalog
-            data_access_tokens, "Catalog"
+        return (
+            get_next_item(  # database name is set in Name argument
+                data_access_tokens, "Name"
+            )
+            or get_next_item(  # If both above arguments are not available, then try Catalog
+                data_access_tokens, "Catalog"
+            )
         )
     def create_lineage(
@@ -819,9 +823,7 @@ class NativeQueryLineage(AbstractLineage):
             values=tree_function.remove_whitespaces_from_list(
                 tree_function.token_values(flat_argument_list[1])
             ),
-        )[
-            0
-        ]  # Remove any whitespaces and double quotes character
+        )[0]  # Remove any whitespaces and double quotes character
         server = tree_function.strip_char_from_list([data_access_tokens[2]])[0]

datahub/ingestion/source/powerbi/m_query/resolver.py CHANGED Viewed

@@ -188,9 +188,9 @@ class MQueryResolver(AbstractDataAccessMQueryResolver, ABC):
         # - The inner function Table.TransformColumnTypes takes #"Removed Columns1"
         #   (a table reference) as its first argument
         # - Its result is then passed as the first argument to Table.SplitColumn
-        second_invoke_expression: Optional[
-            Tree
-        ] = tree_function.first_invoke_expression_func(first_argument)
+        second_invoke_expression: Optional[Tree] = (
+            tree_function.first_invoke_expression_func(first_argument)
+        )
         if second_invoke_expression:
             # 1. The First argument is function call
             # 2. That function's first argument references next table variable
@@ -304,14 +304,14 @@ class MQueryResolver(AbstractDataAccessMQueryResolver, ABC):
                 logger.debug(v_statement.pretty())
                 return None
-            invoke_expression: Optional[
-                Tree
-            ] = tree_function.first_invoke_expression_func(rh_tree)
+            invoke_expression: Optional[Tree] = (
+                tree_function.first_invoke_expression_func(rh_tree)
+            )
             if invoke_expression is not None:
-                result: Union[
-                    DataAccessFunctionDetail, List[str], None
-                ] = self._process_invoke_expression(invoke_expression)
+                result: Union[DataAccessFunctionDetail, List[str], None] = (
+                    self._process_invoke_expression(invoke_expression)
+                )
                 if result is None:
                     return None  # No need to process some un-expected grammar found while processing invoke_expression
                 if isinstance(result, DataAccessFunctionDetail):
@@ -368,9 +368,9 @@ class MQueryResolver(AbstractDataAccessMQueryResolver, ABC):
             return lineage
         # Parse M-Query and use output_variable as root of tree and create instance of DataAccessFunctionDetail
-        table_links: List[
-            DataAccessFunctionDetail
-        ] = self.create_data_access_functional_detail(output_variable)
+        table_links: List[DataAccessFunctionDetail] = (
+            self.create_data_access_functional_detail(output_variable)
+        )
         # Each item is data-access function
         for f_detail in table_links:
@@ -390,7 +390,7 @@ class MQueryResolver(AbstractDataAccessMQueryResolver, ABC):
             # From supported_resolver enum get respective handler like AmazonRedshift or Snowflake or Oracle or NativeQuery and create instance of it
             # & also pass additional information that will be need to generate lineage
-            pattern_handler: (AbstractLineage) = supported_resolver.handler()(
+            pattern_handler: AbstractLineage = supported_resolver.handler()(
                 ctx=ctx,
                 table=self.table,
                 config=config,

datahub/ingestion/source/powerbi/powerbi.py CHANGED Viewed

@@ -945,9 +945,9 @@ class Mapper:
         # Convert tiles to charts
         ds_mcps, chart_mcps = self.to_datahub_chart(dashboard.tiles, workspace)
         # Lets convert dashboard to datahub dashboard
-        dashboard_mcps: List[
-            MetadataChangeProposalWrapper
-        ] = self.to_datahub_dashboard_mcp(dashboard, workspace, chart_mcps, user_mcps)
+        dashboard_mcps: List[MetadataChangeProposalWrapper] = (
+            self.to_datahub_dashboard_mcp(dashboard, workspace, chart_mcps, user_mcps)
+        )
         # Now add MCPs in sequence
         mcps.extend(ds_mcps)
@@ -1472,9 +1472,9 @@ class PowerBiDashboardSource(StatefulIngestionSourceBase, TestableSource):
     def _get_dashboard_patch_work_unit(
         self, work_unit: MetadataWorkUnit
     ) -> Optional[MetadataWorkUnit]:
-        dashboard_info_aspect: Optional[
-            DashboardInfoClass
-        ] = work_unit.get_aspect_of_type(DashboardInfoClass)
+        dashboard_info_aspect: Optional[DashboardInfoClass] = (
+            work_unit.get_aspect_of_type(DashboardInfoClass)
+        )
         if dashboard_info_aspect and self.source_config.patch_metadata:
             return convert_dashboard_info_to_patch(

datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py CHANGED Viewed

@@ -425,9 +425,9 @@ class DataResolverBase(ABC):
             response.raise_for_status()
-            assert (
-                Constant.VALUE in response.json()
-            ), "'value' key is not present in paginated response"
+            assert Constant.VALUE in response.json(), (
+                "'value' key is not present in paginated response"
+            )
             if not response.json()[Constant.VALUE]:  # if it is an empty list then break
                 break
@@ -447,13 +447,13 @@ class DataResolverBase(ABC):
         if raw_app is None:
             return None
-        assert (
-            Constant.ID in raw_app
-        ), f"{Constant.ID} is required field not present in server response"
+        assert Constant.ID in raw_app, (
+            f"{Constant.ID} is required field not present in server response"
+        )
-        assert (
-            Constant.NAME in raw_app
-        ), f"{Constant.NAME} is required field not present in server response"
+        assert Constant.NAME in raw_app, (
+            f"{Constant.NAME} is required field not present in server response"
+        )
         return App(
             id=raw_app[Constant.ID],

datahub/ingestion/source/qlik_sense/qlik_api.py CHANGED Viewed

@@ -156,7 +156,7 @@ class QlikAPI:
                 )
                 if chart:
                     if not chart.title:
-                        chart.title = f"Object {i+1} of Sheet '{sheet.title}'"
+                        chart.title = f"Object {i + 1} of Sheet '{sheet.title}'"
                     sheet.charts.append(chart)
                 websocket_connection.handle.pop()
             return sheet

datahub/ingestion/source/redshift/config.py CHANGED Viewed

@@ -178,9 +178,9 @@ class RedshiftConfig(
     @root_validator(pre=True)
     def check_email_is_set_on_usage(cls, values):
         if values.get("include_usage_statistics"):
-            assert (
-                "email_domain" in values and values["email_domain"]
-            ), "email_domain needs to be set if usage is enabled"
+            assert "email_domain" in values and values["email_domain"], (
+                "email_domain needs to be set if usage is enabled"
+            )
         return values
     @root_validator(skip_on_failure=True)

acryl-datahub 0.15.0.2rc7__py3-none-any.whl → 0.15.0.3rc1__py3-none-any.whl

Potentially problematic release.

acryl-datahub 0.15.0.2rc7py3-none-any.whl → 0.15.0.3rc1py3-none-any.whl