ebi-eva-common-pyutils 0.6.15__1-py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. ebi_eva_common_pyutils/__init__.py +0 -0
  2. ebi_eva_common_pyutils/assembly/__init__.py +1 -0
  3. ebi_eva_common_pyutils/assembly/assembly.py +69 -0
  4. ebi_eva_common_pyutils/assembly_utils.py +91 -0
  5. ebi_eva_common_pyutils/biosamples_communicators.py +186 -0
  6. ebi_eva_common_pyutils/command_utils.py +54 -0
  7. ebi_eva_common_pyutils/common_utils.py +30 -0
  8. ebi_eva_common_pyutils/config.py +152 -0
  9. ebi_eva_common_pyutils/contig_alias/__init__.py +0 -0
  10. ebi_eva_common_pyutils/contig_alias/contig_alias.py +115 -0
  11. ebi_eva_common_pyutils/ena_utils.py +35 -0
  12. ebi_eva_common_pyutils/file_utils.py +31 -0
  13. ebi_eva_common_pyutils/logger.py +150 -0
  14. ebi_eva_common_pyutils/ncbi_utils.py +117 -0
  15. ebi_eva_common_pyutils/network_utils.py +64 -0
  16. ebi_eva_common_pyutils/reference/__init__.py +2 -0
  17. ebi_eva_common_pyutils/reference/assembly.py +247 -0
  18. ebi_eva_common_pyutils/reference/sequence.py +101 -0
  19. ebi_eva_common_pyutils/taxonomy/__init__.py +0 -0
  20. ebi_eva_common_pyutils/taxonomy/taxonomy.py +60 -0
  21. ebi_eva_common_pyutils/variation/__init__.py +0 -0
  22. ebi_eva_common_pyutils/variation/contig_utils.py +113 -0
  23. ebi_eva_common_pyutils-0.6.15.data/scripts/archive_directory.py +114 -0
  24. ebi_eva_common_pyutils-0.6.15.dist-info/LICENSE +201 -0
  25. ebi_eva_common_pyutils-0.6.15.dist-info/METADATA +23 -0
  26. ebi_eva_common_pyutils-0.6.15.dist-info/RECORD +39 -0
  27. ebi_eva_common_pyutils-0.6.15.dist-info/WHEEL +5 -0
  28. ebi_eva_common_pyutils-0.6.15.dist-info/top_level.txt +2 -0
  29. ebi_eva_internal_pyutils/__init__.py +0 -0
  30. ebi_eva_internal_pyutils/archive_directory.py +114 -0
  31. ebi_eva_internal_pyutils/config_utils.py +188 -0
  32. ebi_eva_internal_pyutils/metadata_utils.py +288 -0
  33. ebi_eva_internal_pyutils/mongo_utils.py +71 -0
  34. ebi_eva_internal_pyutils/mongodb/__init__.py +3 -0
  35. ebi_eva_internal_pyutils/mongodb/mongo_database.py +170 -0
  36. ebi_eva_internal_pyutils/nextflow/__init__.py +1 -0
  37. ebi_eva_internal_pyutils/nextflow/nextflow_pipeline.py +195 -0
  38. ebi_eva_internal_pyutils/pg_utils.py +107 -0
  39. ebi_eva_internal_pyutils/spring_properties.py +294 -0
@@ -0,0 +1,115 @@
1
+ # Copyright 2022 EMBL - European Bioinformatics Institute
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ import os
15
+
16
+ from ebi_eva_common_pyutils.logger import AppLogger
17
+ import requests
18
+ from retry import retry
19
+
20
+
21
+ class InternalServerError(Exception):
22
+ pass
23
+
24
+
25
+ CONTING_ALIAS_URL = 'https://www.ebi.ac.uk/eva/webservices/contig-alias'
26
+
27
+
28
+ # TODO add the get methods
29
+ class ContigAliasClient(AppLogger):
30
+ """
31
+ Python client for interfacing with the contig alias service.
32
+ Authentication is required if using admin endpoints.
33
+ """
34
+
35
+ def __init__(self, base_url=None, username=None, password=None, default_page_size=1000):
36
+ if base_url:
37
+ self.base_url = base_url
38
+ else:
39
+ self.base_url = os.environ.get('CONTING_ALIAS_URL') or CONTING_ALIAS_URL
40
+ # Used for get method
41
+ self.default_page_size=default_page_size
42
+ # Only required for admin endpoints
43
+ self.username = username
44
+ self.password = password
45
+
46
+ def check_auth(self):
47
+ if self.username is None or self.password is None:
48
+ raise ValueError('Need admin username and password for this method')
49
+
50
+ @retry(InternalServerError, tries=3, delay=2, backoff=1.5, jitter=(1, 3))
51
+ def insert_assembly(self, assembly):
52
+ self.check_auth()
53
+ full_url = os.path.join(self.base_url, f'v1/admin/assemblies/{assembly}')
54
+
55
+ response = requests.put(full_url, auth=(self.username, self.password))
56
+ if response.status_code == 200:
57
+ self.info(f'Assembly accession {assembly} successfully added to Contig-Alias DB')
58
+ elif response.status_code == 409:
59
+ self.warning(f'Assembly accession {assembly} already exists in Contig-Alias DB. Response: {response.text}')
60
+ elif response.status_code == 500:
61
+ self.error(f'Could not save Assembly accession {assembly} to Contig-Alias DB. Error: {response.text}')
62
+ raise InternalServerError
63
+ else:
64
+ self.error(f'Could not save Assembly accession {assembly} to Contig-Alias DB. Error: {response.text}')
65
+ response.raise_for_status()
66
+
67
+ @retry(InternalServerError, tries=3, delay=2, backoff=1.5, jitter=(1, 3))
68
+ def delete_assembly(self, assembly):
69
+ self.check_auth()
70
+ full_url = os.path.join(self.base_url, f'v1/admin/assemblies/{assembly}')
71
+
72
+ response = requests.delete(full_url, auth=(self.username, self.password))
73
+ if response.status_code == 200:
74
+ self.info(f'Assembly accession {assembly} successfully deleted from Contig-Alias DB')
75
+ elif response.status_code == 500:
76
+ self.error(f'Assembly accession {assembly} could not be deleted. Response: {response.text}')
77
+ raise InternalServerError
78
+ else:
79
+ self.error(f'Assembly accession {assembly} could not be deleted. Response: {response.text}')
80
+
81
+ @retry(tries=3, delay=2, backoff=1.2, jitter=(1, 3))
82
+ def _get_page_for_contig_alias_url(self, sub_url, page=0):
83
+ """queries the contig alias to retrieve the page of the provided url"""
84
+ url = f'{self.base_url}/{sub_url}?page={page}&size={self.default_page_size}'
85
+ response = requests.get(url, headers={'accept': 'application/json'})
86
+ response.raise_for_status()
87
+ response_json = response.json()
88
+ return response_json
89
+
90
+ def _depaginate_iter(self, sub_url, entity_to_retrieve):
91
+ """Generator that provides the contigs in the assembly requested."""
92
+ page = 0
93
+ response_json = self._get_page_for_contig_alias_url(sub_url, page=page)
94
+ for entity in response_json.get('_embedded', {}).get(entity_to_retrieve, []):
95
+ yield entity
96
+ while 'next' in response_json['_links']:
97
+ page += 1
98
+ response_json = self._get_page_for_contig_alias_url(sub_url, page=page)
99
+ for entity in response_json.get('_embedded', {}).get(entity_to_retrieve, []):
100
+ yield entity
101
+
102
+ def assembly_contig_iter(self, assembly_accession):
103
+ """Generator that provides the contigs in the assembly requested."""
104
+ sub_url = f'v1/assemblies/{assembly_accession}/chromosomes'
105
+ return self._depaginate_iter(sub_url, 'chromosomeEntities')
106
+
107
+ def assembly(self, assembly_accession):
108
+ """provides the description of the requested assembly."""
109
+ sub_url = f'v1/assemblies/{assembly_accession}'
110
+ response_json = self._get_page_for_contig_alias_url(sub_url)
111
+ return response_json.get('_embedded', {}).get('assemblyEntities', [])[0]
112
+
113
+ def contig_iter(self, insdc_accession):
114
+ sub_url = f'v1/chromosomes/genbank/{insdc_accession}'
115
+ return self._depaginate_iter(sub_url, 'chromosomeEntities')
@@ -0,0 +1,35 @@
1
+ import requests
2
+ from lxml import etree
3
+ from retry import retry
4
+
5
+
6
+ @retry(tries=3, delay=2, backoff=1.2, jitter=(1, 3))
7
+ def download_xml_from_ena(ena_url) -> etree.XML:
8
+ """Download and parse XML from ENA"""
9
+ try: # catches any kind of request error, including non-20X status code
10
+ response = requests.get(ena_url)
11
+ response.raise_for_status()
12
+ except requests.exceptions.RequestException as e:
13
+ raise e
14
+ root = etree.XML(bytes(response.text, encoding='utf-8'))
15
+ return root
16
+
17
+
18
+ def get_assembly_name_and_taxonomy_id(assembly_accession):
19
+ xml_root = download_xml_from_ena(f'https://www.ebi.ac.uk/ena/browser/api/xml/{assembly_accession}')
20
+ xml_assembly = xml_root.xpath('/ASSEMBLY_SET/ASSEMBLY')
21
+ if len(xml_assembly) == 0:
22
+ raise ValueError(f'Assembly {assembly_accession} not found in ENA')
23
+ assembly_name = xml_assembly[0].get('alias')
24
+ taxonomy_id = int(xml_assembly[0].xpath('TAXON/TAXON_ID')[0].text)
25
+ return assembly_name, taxonomy_id
26
+
27
+
28
+ def get_scientific_name_and_common_name(taxonomy_id):
29
+ xml_root = download_xml_from_ena(f'https://www.ebi.ac.uk/ena/browser/api/xml/{taxonomy_id}')
30
+ xml_taxon = xml_root.xpath('/TAXON_SET/taxon')
31
+ if len(xml_taxon) == 0:
32
+ raise ValueError(f'Taxonomy {taxonomy_id} not found in ENA')
33
+ scientific_name = xml_taxon[0].get('scientificName')
34
+ optional_common_name = xml_taxon[0].get('commonName')
35
+ return scientific_name, optional_common_name
@@ -0,0 +1,31 @@
1
+ # Copyright 2020 EMBL - European Bioinformatics Institute
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ from enum import Enum
17
+ from ebi_eva_common_pyutils.command_utils import run_command_with_output
18
+
19
+
20
+ class FileDiffOption(Enum):
21
+ NOT_IN = 1
22
+ COMMON = 2
23
+
24
+
25
+ def file_diff(file1_path: str, file2_path: str, diff_option: FileDiffOption, output_file_path: str):
26
+ if diff_option == FileDiffOption.NOT_IN:
27
+ run_command_with_output("Finding entries in {0} not in {1}".format(file1_path, file2_path),
28
+ "comm -23 {0} {1} > {2}".format(file1_path, file2_path, output_file_path))
29
+ elif diff_option == FileDiffOption.COMMON:
30
+ run_command_with_output("Finding entries common to {0} and {1}".format(file1_path, file2_path),
31
+ "comm -12 {0} {1} > {2}".format(file1_path, file2_path, output_file_path))
@@ -0,0 +1,150 @@
1
+ # Copyright 2020 EMBL - European Bioinformatics Institute
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ import logging
15
+ import logging.config
16
+ import logging.handlers
17
+ from sys import stdout, stderr
18
+ from cached_property import cached_property
19
+
20
+
21
+ class LoggingConfiguration:
22
+ """
23
+ This class provides an all in one management of all loggers in the stack. By default it pulls existing loggers,
24
+ stores additional ones along with handlers and formatters.
25
+ """
26
+
27
+ default_fmt = '[%(asctime)s][%(name)s][%(levelname)s] %(message)s'
28
+ default_datefmt = '%Y-%b-%d %H:%M:%S'
29
+
30
+ def __init__(self, use_existing_logger=True, log_level=logging.INFO):
31
+ self.blank_formatter = logging.Formatter()
32
+ self.handlers = set()
33
+ if use_existing_logger:
34
+ # retrieve all third party loggers
35
+ self.loggers = dict((name, logger)
36
+ for name, logger in logging.root.manager.loggerDict.items()
37
+ if isinstance(logger, logging.Logger))
38
+ else:
39
+ self.loggers = {}
40
+ self._log_level = log_level
41
+
42
+ @cached_property
43
+ def formatter(self):
44
+ return self.default_formatter
45
+
46
+ @cached_property
47
+ def default_formatter(self):
48
+ return logging.Formatter(
49
+ fmt=self.default_fmt,
50
+ datefmt=self.default_datefmt
51
+ )
52
+
53
+ def get_logger(self, name, level=logging.NOTSET):
54
+ """
55
+ Return a logging.Logger object with formatters and handlers added.
56
+ :param name: Name to assign to the logger (usually __name__)
57
+ :param int level: Log level to assign to the logger upon creation
58
+ """
59
+ if name in self.loggers:
60
+ logger = self.loggers[name]
61
+ else:
62
+ logger = logging.getLogger(name)
63
+ self.loggers[name] = logger
64
+
65
+ logger.setLevel(level or self._log_level)
66
+ for h in self.handlers:
67
+ logger.addHandler(h)
68
+
69
+ return logger
70
+
71
+ def add_handler(self, handler, level=logging.NOTSET):
72
+ """
73
+ Add a created handler, set its format/level if needed and register all loggers to it
74
+ :param logging.Handler handler:
75
+ :param int level: Log level to assign to the created handler
76
+ """
77
+ handler.setLevel(level or self._log_level)
78
+ handler.setFormatter(self.formatter)
79
+ for name in self.loggers:
80
+ self.loggers[name].addHandler(handler)
81
+ self.handlers.add(handler)
82
+
83
+ def add_stdout_handler(self, level=None):
84
+ self.add_handler(logging.StreamHandler(stdout), level=level or self._log_level)
85
+
86
+ def add_stderr_handler(self, level=None):
87
+ self.add_handler(logging.StreamHandler(stderr), level=level or self._log_level)
88
+
89
+ def add_file_handler(self, filename, level=None):
90
+ self.add_handler(logging.FileHandler(filename=filename), level=level or self._log_level)
91
+
92
+ def set_log_level(self, level):
93
+ self._log_level = level
94
+ for h in self.handlers:
95
+ h.setLevel(self._log_level)
96
+ for name in self.loggers:
97
+ self.loggers[name].setLevel(self._log_level)
98
+
99
+ def set_formatter(self, formatter):
100
+ """
101
+ Set all handlers to use formatter
102
+ :param logging.Formatter formatter:
103
+ """
104
+ self.__dict__['formatter'] = formatter
105
+ for h in self.handlers:
106
+ h.setFormatter(self.formatter)
107
+
108
+ def reset(self):
109
+ """Remove all handlers of existing logger"""
110
+ for l in self.loggers.values():
111
+ while l.handlers:
112
+ l.removeHandler(l.handlers[0])
113
+
114
+ while self.handlers:
115
+ h = self.handlers.pop()
116
+ del h
117
+
118
+
119
+ # A logging configuration singleton that will be the only source of logger
120
+ logging_config = LoggingConfiguration()
121
+
122
+
123
+ class AppLogger:
124
+ """
125
+ Mixin class for logging. An object subclassing this can log using its class name. Contains a
126
+ logging.Logger object and exposes its log methods.
127
+ """
128
+ log_cfg = logging_config
129
+
130
+ def log(self, level, msg, *args, **kwargs):
131
+ self._logger.log(level, msg, *args, **kwargs)
132
+
133
+ def debug(self, msg, *args):
134
+ self._logger.debug(msg, *args)
135
+
136
+ def info(self, msg, *args):
137
+ self._logger.info(msg, *args)
138
+
139
+ def warning(self, msg, *args):
140
+ self._logger.warning(msg, *args)
141
+
142
+ def error(self, msg, *args):
143
+ self._logger.error(msg, *args)
144
+
145
+ def critical(self, msg, *args):
146
+ self._logger.critical(msg, *args)
147
+
148
+ @cached_property
149
+ def _logger(self):
150
+ return self.log_cfg.get_logger(self.__class__.__name__)
@@ -0,0 +1,117 @@
1
+ import re
2
+
3
+ import requests
4
+ from retry import retry
5
+
6
+ from ebi_eva_common_pyutils.logger import logging_config as log_cfg
7
+
8
+
9
+ logger = log_cfg.get_logger(__name__)
10
+
11
+ eutils_url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/'
12
+ esearch_url = eutils_url + 'esearch.fcgi'
13
+ esummary_url = eutils_url + 'esummary.fcgi'
14
+ efetch_url = eutils_url + 'efetch.fcgi'
15
+ ensembl_url = 'http://rest.ensembl.org/info/assembly'
16
+
17
+
18
+ @retry(tries=3, delay=2, backoff=1.2, jitter=(1, 3))
19
+ def get_ncbi_assembly_dicts_from_term(term, api_key=None):
20
+ """Function to return NCBI assembly objects in the form of a list of dictionaries based on a search term."""
21
+ payload = {'db': 'Assembly', 'term': '"{}"'.format(term), 'retmode': 'JSON'}
22
+ if api_key:
23
+ payload['api_key'] = api_key
24
+ req = requests.get(esearch_url, params=payload)
25
+ req.raise_for_status()
26
+ data = req.json()
27
+ assembly_dicts = []
28
+ if data:
29
+ assembly_id_list = data.get('esearchresult').get('idlist')
30
+ payload = {'db': 'Assembly', 'id': ','.join(assembly_id_list), 'retmode': 'JSON'}
31
+ if api_key:
32
+ payload['api_key'] = api_key
33
+ req = requests.get(esummary_url, params=payload)
34
+ req.raise_for_status()
35
+ summary_list = req.json()
36
+ for assembly_id in summary_list.get('result', {}).get('uids', []):
37
+ assembly_dicts.append(summary_list.get('result').get(assembly_id))
38
+ return assembly_dicts
39
+
40
+
41
+ @retry(tries=3, delay=2, backoff=1.2, jitter=(1, 3))
42
+ def get_ncbi_taxonomy_dicts_from_term(term, api_key=None):
43
+ """Function to return NCBI taxonomy objects in the form of a list of dictionaries based on a search term."""
44
+ payload = {'db': 'Taxonomy', 'term': '"{}"'.format(term), 'retmode': 'JSON'}
45
+ if api_key:
46
+ payload['api_key'] = api_key
47
+ req = requests.get(esearch_url, params=payload)
48
+ req.raise_for_status()
49
+ data = req.json()
50
+ taxonomy_dicts = []
51
+ if data:
52
+ taxonomy_dicts = get_ncbi_taxonomy_dicts_from_ids(data.get('esearchresult').get('idlist'))
53
+ return taxonomy_dicts
54
+
55
+
56
+ @retry(tries=3, delay=2, backoff=1.2, jitter=(1, 3))
57
+ def get_ncbi_taxonomy_dicts_from_ids(taxonomy_ids, api_key=None):
58
+ """Function to return NCBI taxonomy objects in the form of a list of dictionaries
59
+ based on a list of taxonomy ids."""
60
+ taxonomy_dicts = []
61
+ payload = {'db': 'Taxonomy', 'id': ','.join(taxonomy_ids), 'retmode': 'JSON'}
62
+ if api_key:
63
+ payload['api_key'] = api_key
64
+ req = requests.get(esummary_url, params=payload)
65
+ req.raise_for_status()
66
+ summary_list = req.json()
67
+ for taxonomy_id in summary_list.get('result', {}).get('uids', []):
68
+ taxonomy_dicts.append(summary_list.get('result').get(taxonomy_id))
69
+ return taxonomy_dicts
70
+
71
+
72
+ def get_ncbi_assembly_name_from_term(term, api_key=None):
73
+ assembl_dicts = get_ncbi_assembly_dicts_from_term(term, api_key=api_key)
74
+ assembly_names = set([d.get('assemblyname') for d in assembl_dicts])
75
+ if len(assembly_names) > 1:
76
+ # Only keep the one that have the assembly accession as a synonymous and check again
77
+ assembly_names = set([d.get('assemblyname') for d in assembl_dicts
78
+ if term in d['synonym'].values() or term == d['assemblyaccession']])
79
+ if len(assembly_names) != 1:
80
+ raise ValueError(f'Cannot resolve assembly name for assembly {term} in NCBI. '
81
+ f'Found {",".join([str(a) for a in assembly_names])}')
82
+ return assembly_names.pop() if assembly_names else None
83
+
84
+
85
+ def retrieve_species_scientific_name_from_tax_id_ncbi(taxid, api_key=None):
86
+ payload = {'db': 'Taxonomy', 'id': taxid}
87
+ if api_key:
88
+ payload['api_key'] = api_key
89
+ r = requests.get(efetch_url, params=payload)
90
+ match = re.search('<Rank>(.+?)</Rank>', r.text, re.MULTILINE)
91
+ rank = None
92
+ if match:
93
+ rank = match.group(1)
94
+ if rank not in ['species', 'subspecies']:
95
+ logger.warning('Taxonomy id %s does not point to a species', taxid)
96
+ match = re.search('<ScientificName>(.+?)</ScientificName>', r.text, re.MULTILINE)
97
+ if match:
98
+ return match.group(1)
99
+
100
+
101
+ def get_species_name_from_ncbi(assembly_acc, api_key=None):
102
+ # We first need to search for the species associated with the assembly
103
+ assembly_dicts = get_ncbi_assembly_dicts_from_term(assembly_acc, api_key=api_key)
104
+ taxids = set([assembly_dict.get('taxid')
105
+ for assembly_dict in assembly_dicts
106
+ if assembly_dict.get('assemblyaccession') == assembly_acc or
107
+ assembly_dict.get('synonym', {}).get('genbank') == assembly_acc])
108
+
109
+ # This is a search so could retrieve multiple results
110
+ if len(taxids) != 1:
111
+ raise ValueError(f'Multiple species found for {assembly_acc}. '
112
+ f'Cannot resolve single species for assembly {assembly_acc} in NCBI.')
113
+
114
+ taxonomy_id = taxids.pop()
115
+
116
+ scientific_name = retrieve_species_scientific_name_from_tax_id_ncbi(taxonomy_id, api_key=api_key)
117
+ return scientific_name.replace(' ', '_').lower()
@@ -0,0 +1,64 @@
1
+ # Copyright 2020 EMBL - European Bioinformatics Institute
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ import time
15
+
16
+ import requests
17
+ import subprocess
18
+ from retry import retry
19
+
20
+ from ebi_eva_common_pyutils.logger import logging_config as log_cfg
21
+
22
+ logger = log_cfg.get_logger(__name__)
23
+
24
+
25
+ def is_port_in_use(port):
26
+ import socket
27
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
28
+ return s.connect_ex(('localhost', port)) == 0
29
+
30
+
31
+ def get_available_local_port(try_starting_with_port):
32
+ for i in range(0, 20):
33
+ port_to_try = try_starting_with_port + i
34
+ logger.info("Attempting to forward remote mongo port to local port {0}...".format(port_to_try))
35
+ if is_port_in_use(port_to_try):
36
+ logger.info("Port {0} already in use...".format(port_to_try))
37
+ else:
38
+ return port_to_try
39
+ logger.error("Could not forward to any local port!")
40
+
41
+
42
+ def forward_remote_port_to_local_port(remote_host: str, remote_port: int, local_port: int) -> int:
43
+ port_forward_command = 'ssh -N -L{0}:localhost:{1} {2}'.format(local_port, remote_port, remote_host)
44
+ logger.info("Forwarding port to local port using command: " + port_forward_command)
45
+ proc = subprocess.Popen(port_forward_command.split(" "))
46
+ time.sleep(5)
47
+ # Ensure that the process is still running
48
+ poll = proc.poll()
49
+ if poll is not None:
50
+ # The process already completed which mean it most likely crashed
51
+ logger.error(f'Port Forwarding {remote_host}:{remote_port} -> {local_port} failed!')
52
+ raise subprocess.CalledProcessError(proc.returncode, proc.args)
53
+ return proc.pid
54
+
55
+
56
+ @retry(exceptions=(ConnectionError, requests.RequestException), logger=logger,
57
+ tries=4, delay=2, backoff=1.2, jitter=(1, 3))
58
+ def json_request(url: str, payload: dict = None, method=requests.get) -> dict:
59
+ """Makes a request of a specified type (by default GET) with the specified URL and payload, attempts to parse the
60
+ result as a JSON string and return it as a dictionary, on failure raises an exception."""
61
+ result = method(url, data=payload)
62
+ result.raise_for_status()
63
+ return result.json()
64
+
@@ -0,0 +1,2 @@
1
+ from ebi_eva_common_pyutils.reference.assembly import NCBIAssembly
2
+ from ebi_eva_common_pyutils.reference.sequence import NCBISequence