PyPI - ebi-eva-common-pyutils - Versions diffs - 0.7.4__py3-none-any.whl → 0.8.0__py3-none-any.whl - Mend

ebi-eva-common-pyutils 0.7.4py3-none-any.whl → 0.8.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

ebi_eva_common_pyutils/assembly_utils.py CHANGED Viewed

@@ -14,6 +14,7 @@
 import http
 import requests
+from requests import HTTPError
 from ebi_eva_common_pyutils.assembly import NCBIAssembly
 from ebi_eva_common_pyutils.ena_utils import download_xml_from_ena
@@ -33,7 +34,11 @@ def is_patch_assembly(assembly_accession: str) -> bool:
     Check if a given assembly is a patch assembly
     Please see: https://www.ncbi.nlm.nih.gov/grc/help/patches/
     """
-    xml_root = download_xml_from_ena(f'https://www.ebi.ac.uk/ena/browser/api/xml/{assembly_accession}')
+    try:
+        xml_root = download_xml_from_ena(f'https://www.ebi.ac.uk/ena/browser/api/xml/{assembly_accession}')
+    except HTTPError as e:
+        logger.warning(f'Failed to download assembly {assembly_accession} from ENA: {str(e)}')
+        return False
     xml_assembly = xml_root.xpath("//ASSEMBLY_ATTRIBUTE[TAG='count-patches']/VALUE")
     if len(xml_assembly) == 0:
         return False

{ebi_eva_common_pyutils-0.7.4.dist-info → ebi_eva_common_pyutils-0.8.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: ebi_eva_common_pyutils
-Version: 0.7.4
+Version: 0.8.0
 Summary: EBI EVA - Common Python Utilities
 Home-page: https://github.com/EBIVariation/eva-common-pyutils
 License: Apache

{ebi_eva_common_pyutils-0.7.4.dist-info → ebi_eva_common_pyutils-0.8.0.dist-info}/RECORD RENAMED Viewed

@@ -1,5 +1,5 @@
 ebi_eva_common_pyutils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-ebi_eva_common_pyutils/assembly_utils.py,sha256=CklyCGlCjlFp0e9pugg6kSsh5L0xfCe2qPvA2eLVtn0,4187
+ebi_eva_common_pyutils/assembly_utils.py,sha256=hpOxiZTxHJ-yRexuZ2yUVYUhZTkF3ee3dq4S2HJhgc8,4374
 ebi_eva_common_pyutils/biosamples_communicators.py,sha256=ZkemchAYGrHwqbGviJN5X80nYFizDNVTwUX3c_5PZcM,7799
 ebi_eva_common_pyutils/command_utils.py,sha256=PtelWWqcC0eOwIVesjwBw3F9KaXRzEE_uAUJhQFZ4l8,2340
 ebi_eva_common_pyutils/common_utils.py,sha256=ty_glvfRa3VGhnpAht4qtVkNNmv-IYfVtO958mY-BaA,1192
@@ -22,20 +22,18 @@ ebi_eva_common_pyutils/taxonomy/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm
 ebi_eva_common_pyutils/taxonomy/taxonomy.py,sha256=aXmRQ3NAaJotwmmOA2-u2XtcUT6iih-0_e-3QOxynoA,2578
 ebi_eva_common_pyutils/variation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 ebi_eva_common_pyutils/variation/contig_utils.py,sha256=kMNEW_P2yPnd8Xx1tep19hy5ee7ojxz6ZOO1grTQsRQ,5230
-ebi_eva_common_pyutils-0.7.4.data/scripts/archive_directory.py,sha256=0lWJ0ju_AB2ni7lMnJXPFx6U2OdTGbe-WoQs-4BfKOM,4976
-ebi_eva_internal_pyutils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+ebi_eva_common_pyutils-0.8.0.data/scripts/archive_directory.py,sha256=0lWJ0ju_AB2ni7lMnJXPFx6U2OdTGbe-WoQs-4BfKOM,4976
+ebi_eva_internal_pyutils/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
 ebi_eva_internal_pyutils/archive_directory.py,sha256=IxVEfh_gaCiT652k0Q_-58fonRusy1yzXu7BCO8yVLo,4989
 ebi_eva_internal_pyutils/config_utils.py,sha256=EGRC5rsmU_ug7OY9-t1UW1XZXRsauSyZB9xPcBux8ts,7909
 ebi_eva_internal_pyutils/metadata_utils.py,sha256=t9PcXZdbfjDBP04GJenC4bxm2nOLd8oI_MP9eNe9IBQ,15221
 ebi_eva_internal_pyutils/mongo_utils.py,sha256=YxKHtb5ygDiGLOtEiiAMFCP2ow6FL9Kq0K5R0mWNdXY,3575
 ebi_eva_internal_pyutils/pg_utils.py,sha256=FUQVwiX_7F2-4sSzoaCVX2me0zAqR8nGIj6NW5d304A,4398
 ebi_eva_internal_pyutils/spring_properties.py,sha256=Tn207DmZehFt7oExseNsXFAnsxr7bX9yiGl4t9mpGVA,15165
-ebi_eva_internal_pyutils/mongodb/__init__.py,sha256=0oyTlkYZCV7udlPl09Zl-sDyE3c97QZMMTEFIa6uYIw,76
-ebi_eva_internal_pyutils/mongodb/mongo_database.py,sha256=kesaJaaxYFeF_uYZBgL8tbufGKUXll7bXb4WlOS9vKM,9596
-ebi_eva_internal_pyutils/nextflow/__init__.py,sha256=OOiJS8jZOz98q0t77NNog7aI_fFrVxi4kGmiSskuAqM,122
-ebi_eva_internal_pyutils/nextflow/nextflow_pipeline.py,sha256=ew623hhK8jmFLQjJwLZbgBmW9RTiJBEULVqHfIUv_dc,10114
-ebi_eva_common_pyutils-0.7.4.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-ebi_eva_common_pyutils-0.7.4.dist-info/METADATA,sha256=-dqz8UYrpmrdcGIKdkgV4m8slivOqND7OM-__q9Fs0s,1022
-ebi_eva_common_pyutils-0.7.4.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
-ebi_eva_common_pyutils-0.7.4.dist-info/top_level.txt,sha256=sXoiqiGU8vlMQpFWDlKrekxhlusk06AhkOH3kSvDT6c,48
-ebi_eva_common_pyutils-0.7.4.dist-info/RECORD,,
+ebi_eva_internal_pyutils/mongodb/__init__.py,sha256=cH89mspotx2u8XxvpaDjjLCaSQqE8-8cCd11s2LMvpg,74
+ebi_eva_internal_pyutils/mongodb/mongo_database.py,sha256=P6_PR9_KICxafypM1hESxkOJI52T098ynNUML2FzJac,9668
+ebi_eva_common_pyutils-0.8.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+ebi_eva_common_pyutils-0.8.0.dist-info/METADATA,sha256=HI0gRr-e_clv_BenWgJWkHVIBSR5l4uq3i-iMarOjhk,1022
+ebi_eva_common_pyutils-0.8.0.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
+ebi_eva_common_pyutils-0.8.0.dist-info/top_level.txt,sha256=sXoiqiGU8vlMQpFWDlKrekxhlusk06AhkOH3kSvDT6c,48
+ebi_eva_common_pyutils-0.8.0.dist-info/RECORD,,

ebi_eva_internal_pyutils/__init__.py CHANGED Viewed

	@@ -0,0 +1 @@
1	+

ebi_eva_internal_pyutils/mongodb/__init__.py CHANGED Viewed

	@@ -1,3 +1 @@
1 1	from ebi_eva_internal_pyutils.mongodb.mongo_database import MongoDatabase
2	-
3	-

ebi_eva_internal_pyutils/mongodb/mongo_database.py CHANGED Viewed

@@ -101,6 +101,7 @@ class MongoDatabase(AppLogger):
                 self.mongo_handle[self.db_name][collection_name].create_index(index_keys, name=name, **index_info)
     def enable_sharding(self):
+        # From mongodb 6.0 all database have sharding enable by default
         self.mongo_handle.admin.command({"enableSharding": self.db_name})
     def shard_collections(self, collections_shard_key_map, collections_to_shard):

ebi_eva_internal_pyutils/nextflow/__init__.py DELETED Viewed

	@@ -1 +0,0 @@
1	- from ebi_eva_internal_pyutils.nextflow.nextflow_pipeline import LinearNextFlowPipeline, NextFlowPipeline, NextFlowProcess

ebi_eva_internal_pyutils/nextflow/nextflow_pipeline.py DELETED Viewed

@@ -1,195 +0,0 @@
-# Copyright 2021 EMBL - European Bioinformatics Institute
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Rationale for a Nextflow pipeline abstraction
-# ---------------------------------------------
-# Dynamic pipeline generation
-# Abstraction to represent process dependencies
-# Unit testability of individual steps without scattering logic between Python and Nextflow
-# Ability to combine pipelines
-import networkx as nx
-import os
-from typing import List, Dict, Union
-from ebi_eva_common_pyutils.logger import AppLogger
-from ebi_eva_common_pyutils.command_utils import run_command_with_output
-class NextFlowProcess:
-    def __init__(self, process_name: str, command_to_run: str, process_directives: Dict[str, str] = None) -> None:
-        """
-        Create a Nextflow process
-        :rtype: None
-        :param process_name: Name of the process - should be a valid identifier - ex: p1_merge
-        :type process_name: str
-        :param command_to_run: Command to be run - ex: bash -c "echo p1"
-        :type command_to_run: str
-        :param process_directives: Additional process directives - ex: {"memory": "4GB", "executor": "lsf"}
-        :type process_directives: dict
-        """
-        if not process_name.isidentifier():
-            raise ValueError(f"{process_name} is not a valid Nextflow process name")
-        self.process_name = process_name
-        self.success_flag = f"{self.process_name}_success"
-        self.command_to_run = command_to_run
-        self.process_directives = process_directives if process_directives else dict()
-class NextFlowPipeline(AppLogger):
-    def __init__(self, process_dependency_map: Dict[NextFlowProcess, List[NextFlowProcess]] = None) -> None:
-        """
-        Create a Nextflow pipeline with a process dependency map
-        :param process_dependency_map: Map of Nextflow processes and their corresponding dependencies
-        - ex: {p3 : [p2], p2: [p1]}  where p1, p2 and p3 are Nextflow processes that should be executed sequentially
-        """
-        # Modeling the dependency map as a DiGraph (Directed graph) is advantageous
-        # in ordering/combining flows and detecting cycles
-        self.process_dependency_map = nx.ordered.DiGraph()
-        if process_dependency_map:
-            self.add_dependencies(process_dependency_map)
-    def add_dependencies(self, process_dependency_map: Dict[NextFlowProcess, List[NextFlowProcess]]):
-        for process, dependencies in process_dependency_map.items():
-            if dependencies:
-                for dependency in dependencies:
-                    self.add_process_dependency(process, dependency)
-            else:
-                self.add_process_dependency(process, None)
-    def add_process_dependency(self, process: NextFlowProcess, dependency: Union[NextFlowProcess, None]):
-        if dependency:
-            self.process_dependency_map.add_edge(process, dependency)
-            if not nx.dag.is_directed_acyclic_graph(self.process_dependency_map):
-                raise ValueError(f"Cycles found in pipeline when adding process {process.process_name} "
-                                 f"and its dependency {dependency.process_name}")
-        else:
-            # If no dependency is specified, the process will just be a single node in the DAG
-            self.process_dependency_map.add_node(process)
-    def _write_to_pipeline_file(self, workflow_file_path: str):
-        with open(workflow_file_path, "a") as pipeline_file_handle:
-            pipeline_file_handle.write(self.__str__() + "\n")
-    def run_pipeline(self, workflow_file_path: str, nextflow_binary_path: str = 'nextflow',
-                     nextflow_config_path: str = None, working_dir: str = ".", resume: bool = False,
-                     other_args: dict = None):
-        # Remove pipeline file if it already exists
-        if os.path.exists(workflow_file_path):
-            os.remove(workflow_file_path)
-        self._write_to_pipeline_file(workflow_file_path)
-        workflow_command = f"cd {working_dir} && {nextflow_binary_path} run {workflow_file_path}"
-        workflow_command += f" -c {nextflow_config_path}" if nextflow_config_path else ""
-        workflow_command += f" -with-report {workflow_file_path}.report.html"
-        workflow_command += f" -with-dag {workflow_file_path}.dag.png"
-        workflow_command += " -resume" if resume else ""
-        workflow_command += " ".join([f" -{arg} {val}" for arg, val in other_args.items()]) if other_args else ""
-        run_command_with_output(f"Running pipeline {workflow_file_path}...", workflow_command)
-    @staticmethod
-    def join_pipelines(main_pipeline: 'NextFlowPipeline', dependent_pipeline: 'NextFlowPipeline',
-                       with_dependencies: bool = True) -> 'NextFlowPipeline':
-        """
-        Join two pipelines with or without dependencies
-        With Dependencies it returns a new pipeline where:
-            1) root processes are those of the main pipeline.
-            2) final processes are those of the dependent pipeline and
-            3) every root process of the dependent pipeline depends on the final processes of the main pipeline.
-        Without Dependencies it returns a new pipeline where:
-            1) the two pipeline are left independent
-            2) Only shared dependencies
-            3) every root process of the dependent pipeline depends on the final processes of the main pipeline.
-        """
-        joined_pipeline = NextFlowPipeline()
-        # Aggregate dependency maps of both pipelines
-        joined_pipeline.process_dependency_map = nx.compose(main_pipeline.process_dependency_map,
-                                                            dependent_pipeline.process_dependency_map)
-        if with_dependencies:
-            for final_process_in_main_pipeline in main_pipeline._get_final_processes():
-                for root_process_in_dependent_pipeline in dependent_pipeline._get_root_processes():
-                    joined_pipeline.add_process_dependency(root_process_in_dependent_pipeline,
-                                                           final_process_in_main_pipeline)
-        return joined_pipeline
-    def _get_root_processes(self) -> List[NextFlowProcess]:
-        # Root processes are those which have no dependencies
-        # See https://stackoverflow.com/a/62948641
-        roots = []
-        for component in nx.weakly_connected_components(self.process_dependency_map):
-            subgraph = self.process_dependency_map.subgraph(component)
-            roots.extend([n for n, d in subgraph.out_degree() if d == 0])
-        return roots
-    def _get_final_processes(self) -> List[NextFlowProcess]:
-        # Final processes are those which have no other processes depending on them
-        # See https://stackoverflow.com/a/62948641
-        roots = []
-        for component in nx.weakly_connected_components(self.process_dependency_map):
-            subgraph = self.process_dependency_map.subgraph(component)
-            roots.extend([n for n, d in subgraph.in_degree() if d == 0])
-        return roots
-    @staticmethod
-    def _get_process_repr(process: NextFlowProcess, dependencies: List[NextFlowProcess]) -> str:
-        process_directives_str = "\n".join([f"{key}='{value}'" for key, value in process.process_directives.items()])
-        input_dependencies = "val flag from true"
-        if dependencies:
-            input_dependencies = "\n".join([f"val {dependency.success_flag} from {dependency.success_flag}"
-                                            for dependency in dependencies])
-        return "\n".join(map(str.strip, f"""
-                    process {process.process_name} {{
-                    {process_directives_str}
-                    input:
-                    {input_dependencies}
-                    output:
-                    val true into {process.success_flag}
-                    script:
-                    \"\"\"
-                    {process.command_to_run}
-                    \"\"\"
-                    }}""".split("\n")))
-    def __str__(self):
-        # Order the list of nodes based on the dependency
-        # See https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.traversal.depth_first_search.dfs_postorder_nodes.html?highlight=dfs_postorder_nodes#networkx.algorithms.traversal.depth_first_search.dfs_postorder_nodes
-        ordered_list_of_processes_to_run = list(nx.dfs_postorder_nodes(self.process_dependency_map))
-        # Get a Nextflow pipeline representation of each process and its dependencies
-        return "\n\n".join([NextFlowPipeline._get_process_repr(process, list(self.process_dependency_map[process]))
-                            for process in ordered_list_of_processes_to_run])
-class LinearNextFlowPipeline(NextFlowPipeline):
-    """
-    Simple linear pipeline that supports resumption
-    """
-    previous_process: NextFlowProcess = None
-    def __init__(self, process_list: List[NextFlowProcess] = None):
-        dependency_map = {}
-        if process_list:
-            for index, process in enumerate(process_list):
-                dependency_map[process] = [] if index == 0 else [process_list[index - 1]]
-        super().__init__(dependency_map)
-    def add_process(self, process_name, command_to_run):
-        current_process = NextFlowProcess(process_name=process_name, command_to_run=command_to_run)
-        self._add_new_process(current_process)
-    def _add_new_process(self, current_process):
-        super().add_process_dependency(current_process, self.previous_process)
-        self.previous_process = current_process

{ebi_eva_common_pyutils-0.7.4.data → ebi_eva_common_pyutils-0.8.0.data}/scripts/archive_directory.py RENAMED Viewed

File without changes

{ebi_eva_common_pyutils-0.7.4.dist-info → ebi_eva_common_pyutils-0.8.0.dist-info}/LICENSE RENAMED Viewed

File without changes

{ebi_eva_common_pyutils-0.7.4.dist-info → ebi_eva_common_pyutils-0.8.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{ebi_eva_common_pyutils-0.7.4.dist-info → ebi_eva_common_pyutils-0.8.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

ebi-eva-common-pyutils 0.7.4__py3-none-any.whl → 0.8.0__py3-none-any.whl

ebi-eva-common-pyutils 0.7.4py3-none-any.whl → 0.8.0py3-none-any.whl