ebi-eva-common-pyutils 0.6.0__py3-none-any.whl → 0.6.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -16,6 +16,7 @@ import http
16
16
  import requests
17
17
 
18
18
  from ebi_eva_common_pyutils.assembly import NCBIAssembly
19
+ from ebi_eva_common_pyutils.ena_utils import download_xml_from_ena
19
20
  from ebi_eva_common_pyutils.logger import logging_config as log_cfg
20
21
 
21
22
  EUTILS_URL = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/'
@@ -27,6 +28,18 @@ EFETCH_URL = EUTILS_URL + 'efetch.fcgi'
27
28
  logger = log_cfg.get_logger(__name__)
28
29
 
29
30
 
31
+ def is_patch_assembly(assembly_accession: str) -> bool:
32
+ """
33
+ Check if a given assembly is a patch assembly
34
+ Please see: https://www.ncbi.nlm.nih.gov/grc/help/patches/
35
+ """
36
+ xml_root = download_xml_from_ena(f'https://www.ebi.ac.uk/ena/browser/api/xml/{assembly_accession}')
37
+ xml_assembly = xml_root.xpath("//ASSEMBLY_ATTRIBUTE[TAG='count-patches']/VALUE")
38
+ if len(xml_assembly) == 0:
39
+ return False
40
+ return int(xml_assembly[0].text) > 0
41
+
42
+
30
43
  def retrieve_genbank_assembly_accessions_from_ncbi(assembly_txt):
31
44
  """
32
45
  Attempt to find any assembly genebank accession base on a free text search.
@@ -11,7 +11,7 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
-
14
+ import logging
15
15
  import subprocess
16
16
 
17
17
  from ebi_eva_common_pyutils.logger import logging_config as log_cfg
@@ -20,11 +20,12 @@ logger = log_cfg.get_logger(__name__)
20
20
 
21
21
 
22
22
  def run_command_with_output(command_description, command, return_process_output=False,
23
- log_error_stream_to_output=False):
23
+ log_error_stream_to_output=False, stdout_log_level=logging.INFO,
24
+ stderr_log_level=logging.ERROR):
24
25
  process_output = ""
25
26
 
26
- logger.info("Starting process: " + command_description)
27
- logger.info("Running command: " + command)
27
+ logger.log(stdout_log_level, "Starting process: " + command_description)
28
+ logger.log(stdout_log_level, "Running command: " + command)
28
29
 
29
30
  stdout = subprocess.PIPE
30
31
  # Some lame utilities like mongodump and mongorestore output non-error messages to error stream
@@ -35,18 +36,18 @@ def run_command_with_output(command_description, command, return_process_output=
35
36
  shell=True) as process:
36
37
  for line in iter(process.stdout.readline, ''):
37
38
  line = str(line).rstrip()
38
- logger.info(line)
39
+ logger.log(stdout_log_level, line)
39
40
  if return_process_output:
40
41
  process_output += line + "\n"
41
42
  if not log_error_stream_to_output:
42
43
  for line in iter(process.stderr.readline, ''):
43
44
  line = str(line).rstrip()
44
- logger.error(line)
45
+ logger.log(stderr_log_level, line)
45
46
  if process.returncode != 0:
46
47
  logger.error(command_description + " failed! Refer to the error messages for details.")
47
48
  raise subprocess.CalledProcessError(process.returncode, process.args)
48
49
  else:
49
- logger.info(command_description + " - completed successfully")
50
+ logger.log(stdout_log_level, command_description + " - completed successfully")
50
51
  if return_process_output:
51
52
  return process_output
52
53
 
@@ -73,3 +73,80 @@ cfg = Configuration()
73
73
  """
74
74
  Provides a singleton that can be used as a central place for configuration.
75
75
  """
76
+
77
+
78
+
79
+
80
+ class WritableConfig(Configuration):
81
+ """Configuration object that allows writes to the config file"""
82
+
83
+ def __init__(self, *search_path, version=None):
84
+ super().__init__(*search_path)
85
+ self.version = version
86
+
87
+ def load_config_file(self, *search_path):
88
+ try:
89
+ super().load_config_file(*search_path)
90
+ except FileNotFoundError:
91
+ # expected if it's the first time we are creating the config file
92
+ # In that case the first search path is set to be the config files
93
+ self.config_file = search_path[0]
94
+ pass
95
+
96
+ def backup(self):
97
+ """
98
+ Rename the config file by adding a '.1' at the end. If the '.1' file exists it move it to a '.2' and so on.
99
+ """
100
+ if os.path.isfile(self.config_file):
101
+ file_name = self.config_file
102
+ suffix = 1
103
+ backup_name = f'{file_name}.{suffix}'
104
+ while os.path.exists(backup_name):
105
+ suffix += 1
106
+ backup_name = f'{file_name}.{suffix}'
107
+
108
+ for i in range(suffix, 1, -1):
109
+ os.rename(f'{file_name}.{i - 1}', f'{file_name}.{i}')
110
+ os.rename(file_name, file_name + '.1')
111
+
112
+ def write(self):
113
+ if self.config_file and self.content and os.path.isdir(os.path.dirname(self.config_file)):
114
+ with open(self.config_file, 'w') as open_config:
115
+ yaml.safe_dump(self.content, open_config)
116
+
117
+ def set(self, *path, value):
118
+ self._set_version()
119
+ top_level = self.content
120
+ for p in path[:-1]:
121
+ if p not in top_level:
122
+ top_level[p] = {}
123
+ top_level = top_level[p]
124
+ top_level[path[-1]] = value
125
+
126
+ def pop(self, *path, default=None):
127
+ """Recursive dictionary pop with default"""
128
+ top_level = self.content
129
+ for p in path[:-1]:
130
+ if p not in top_level:
131
+ return default
132
+ top_level = top_level[p]
133
+ return top_level.pop(path[-1], default)
134
+
135
+ def is_empty(self):
136
+ return not self.content
137
+
138
+ def clear(self):
139
+ self.content = {}
140
+
141
+ def _set_version(self):
142
+ # If we're starting to fill in an empty config, set the version if available
143
+ if self.is_empty() and self.version:
144
+ self.content['version'] = self.version
145
+
146
+ def __contains__(self, item):
147
+ return item in self.content
148
+
149
+ def __setitem__(self, item, value):
150
+ """Allow dict-style write access, e.g. config['this']='that'."""
151
+ self._set_version()
152
+ self.content[item] = value
@@ -4,7 +4,7 @@ from retry import retry
4
4
 
5
5
 
6
6
  @retry(tries=3, delay=2, backoff=1.2, jitter=(1, 3))
7
- def download_xml_from_ena(ena_url):
7
+ def download_xml_from_ena(ena_url) -> etree.XML:
8
8
  """Download and parse XML from ENA"""
9
9
  try: # catches any kind of request error, including non-20X status code
10
10
  response = requests.get(ena_url)
@@ -127,6 +127,9 @@ class AppLogger:
127
127
  """
128
128
  log_cfg = logging_config
129
129
 
130
+ def log(self, level, msg, *args, **kwargs):
131
+ self._logger.log(level, msg, *args, **kwargs)
132
+
130
133
  def debug(self, msg, *args):
131
134
  self._logger.debug(msg, *args)
132
135
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ebi-eva-common-pyutils
3
- Version: 0.6.0
3
+ Version: 0.6.4
4
4
  Summary: EBI EVA - Common Python Utilities
5
5
  Home-page: https://github.com/EBIVariation/eva-common-pyutils
6
6
  License: Apache
@@ -1,11 +1,11 @@
1
1
  ebi_eva_common_pyutils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- ebi_eva_common_pyutils/assembly_utils.py,sha256=idvmNDKJHrzMhl0DZjmozXlWNVQWEW1BSslhFHxXatI,3482
3
- ebi_eva_common_pyutils/command_utils.py,sha256=PhXSS1LPSGaExkwwYJqjpG0agRB2dbOehbm1aAPNSqg,2151
2
+ ebi_eva_common_pyutils/assembly_utils.py,sha256=Lc1bDf0xmi-AF6eyM7pWMgsjnxZmpVHvF3E5aEiKBEI,4018
3
+ ebi_eva_common_pyutils/command_utils.py,sha256=PtelWWqcC0eOwIVesjwBw3F9KaXRzEE_uAUJhQFZ4l8,2340
4
4
  ebi_eva_common_pyutils/common_utils.py,sha256=ty_glvfRa3VGhnpAht4qtVkNNmv-IYfVtO958mY-BaA,1192
5
- ebi_eva_common_pyutils/config.py,sha256=5argukdpNFw0MlSSNws-IFEH8QnKXJzm3nnRtuN4hLE,2242
6
- ebi_eva_common_pyutils/ena_utils.py,sha256=3XBBLwQi00AZGzW0-x2qNi1KFaAPDxsr5GDmiBg9gbc,1452
5
+ ebi_eva_common_pyutils/config.py,sha256=PtD2SgHf96kk21OA9tVIjEgsDXEFuAU-INy_kfQdoPw,4828
6
+ ebi_eva_common_pyutils/ena_utils.py,sha256=S2MmnWQ_9MJjlkaQY_by1-GGbTyi8SKp8XRcpjWnpZs,1465
7
7
  ebi_eva_common_pyutils/file_utils.py,sha256=eIlQKSVKkEjMNX7emrDzaQyQdGvQdb64gnfEhb6uYsE,1375
8
- ebi_eva_common_pyutils/logger.py,sha256=KjnxquAZxJWlD2YvploiHD11JYzWZwdQidHx7Fy6wI4,4990
8
+ ebi_eva_common_pyutils/logger.py,sha256=hT20ktN_oGeB_5ofVfd_aGXB6xYOe0Y5b3EVRYCuFb8,5093
9
9
  ebi_eva_common_pyutils/ncbi_utils.py,sha256=sh9E_oDhDtliihJ0-FZuDd7paeidW5GfqRr5vBTo2k4,4859
10
10
  ebi_eva_common_pyutils/network_utils.py,sha256=_Qf5oNONA4pUzvxfdk25G-dKuWe44aC_Hewvdp-VZyU,2285
11
11
  ebi_eva_common_pyutils/assembly/__init__.py,sha256=KSWPwBY5nZj00odxWFntk8Sqg_rw273xH8S5D6Jo-T4,67
@@ -19,20 +19,20 @@ ebi_eva_common_pyutils/taxonomy/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm
19
19
  ebi_eva_common_pyutils/taxonomy/taxonomy.py,sha256=p3XV4g3y0hEjyeZ4PwgN7Q3Et9G515ctQkSIo1kdDbU,2259
20
20
  ebi_eva_common_pyutils/variation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
21
21
  ebi_eva_common_pyutils/variation/contig_utils.py,sha256=kMNEW_P2yPnd8Xx1tep19hy5ee7ojxz6ZOO1grTQsRQ,5230
22
- ebi_eva_common_pyutils-0.6.0.data/scripts/archive_directory.py,sha256=0lWJ0ju_AB2ni7lMnJXPFx6U2OdTGbe-WoQs-4BfKOM,4976
22
+ ebi_eva_common_pyutils-0.6.4.data/scripts/archive_directory.py,sha256=0lWJ0ju_AB2ni7lMnJXPFx6U2OdTGbe-WoQs-4BfKOM,4976
23
23
  ebi_eva_internal_pyutils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
24
24
  ebi_eva_internal_pyutils/archive_directory.py,sha256=IxVEfh_gaCiT652k0Q_-58fonRusy1yzXu7BCO8yVLo,4989
25
25
  ebi_eva_internal_pyutils/config_utils.py,sha256=EGRC5rsmU_ug7OY9-t1UW1XZXRsauSyZB9xPcBux8ts,7909
26
- ebi_eva_internal_pyutils/metadata_utils.py,sha256=sW7mzFooeXcmxMth5EiBhES-7Keh6Ooe4XeCubLWfJ4,14795
26
+ ebi_eva_internal_pyutils/metadata_utils.py,sha256=NQVD0_-S1EBb0SnlKX0w31g4Me_8VGXYJTDlMhzje9w,15090
27
27
  ebi_eva_internal_pyutils/mongo_utils.py,sha256=YxKHtb5ygDiGLOtEiiAMFCP2ow6FL9Kq0K5R0mWNdXY,3575
28
28
  ebi_eva_internal_pyutils/pg_utils.py,sha256=FUQVwiX_7F2-4sSzoaCVX2me0zAqR8nGIj6NW5d304A,4398
29
29
  ebi_eva_internal_pyutils/spring_properties.py,sha256=WjPozWtXbAZGNqlgvY6GHps2KFB1rY9OaTs46obW3pM,15265
30
30
  ebi_eva_internal_pyutils/mongodb/__init__.py,sha256=0oyTlkYZCV7udlPl09Zl-sDyE3c97QZMMTEFIa6uYIw,76
31
- ebi_eva_internal_pyutils/mongodb/mongo_database.py,sha256=im0HgBgG_9E1nq7fQcNl3Ogc8oka6DI97SgsfeboRm0,9676
31
+ ebi_eva_internal_pyutils/mongodb/mongo_database.py,sha256=kesaJaaxYFeF_uYZBgL8tbufGKUXll7bXb4WlOS9vKM,9596
32
32
  ebi_eva_internal_pyutils/nextflow/__init__.py,sha256=OOiJS8jZOz98q0t77NNog7aI_fFrVxi4kGmiSskuAqM,122
33
33
  ebi_eva_internal_pyutils/nextflow/nextflow_pipeline.py,sha256=ew623hhK8jmFLQjJwLZbgBmW9RTiJBEULVqHfIUv_dc,10114
34
- ebi_eva_common_pyutils-0.6.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
35
- ebi_eva_common_pyutils-0.6.0.dist-info/METADATA,sha256=cW4V0_SM_bvGOMTS6hHp7FmCSr6EVBZkLoHLSPPAXRc,824
36
- ebi_eva_common_pyutils-0.6.0.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
37
- ebi_eva_common_pyutils-0.6.0.dist-info/top_level.txt,sha256=sXoiqiGU8vlMQpFWDlKrekxhlusk06AhkOH3kSvDT6c,48
38
- ebi_eva_common_pyutils-0.6.0.dist-info/RECORD,,
34
+ ebi_eva_common_pyutils-0.6.4.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
35
+ ebi_eva_common_pyutils-0.6.4.dist-info/METADATA,sha256=z3BrpkHtxmrrusChh4Oawx8m6tAeRLZpXLlN8pnu3ic,824
36
+ ebi_eva_common_pyutils-0.6.4.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
37
+ ebi_eva_common_pyutils-0.6.4.dist-info/top_level.txt,sha256=sXoiqiGU8vlMQpFWDlKrekxhlusk06AhkOH3kSvDT6c,48
38
+ ebi_eva_common_pyutils-0.6.4.dist-info/RECORD,,
@@ -17,6 +17,7 @@ from urllib.parse import urlsplit
17
17
 
18
18
  import psycopg2
19
19
 
20
+ from ebi_eva_common_pyutils.assembly_utils import is_patch_assembly
20
21
  from ebi_eva_internal_pyutils.config_utils import get_metadata_creds_for_profile
21
22
  from ebi_eva_common_pyutils.ena_utils import get_scientific_name_and_common_name
22
23
  from ebi_eva_common_pyutils.logger import logging_config
@@ -120,6 +121,9 @@ def get_assembly_code(metadata_connection_handle, assembly):
120
121
  assembly_code = get_assembly_code_from_metadata(metadata_connection_handle, assembly)
121
122
  if not assembly_code:
122
123
  assembly_name = get_ncbi_assembly_name_from_term(assembly)
124
+ # If the assembly is a patch assembly ex: GRCh37.p8, drop the trailing patch i.e., just return grch37
125
+ if is_patch_assembly(assembly):
126
+ assembly_name = re.sub('\\.p[0-9]+$', '', assembly_name.lower())
123
127
  assembly_code = re.sub('[^0-9a-zA-Z]+', '', assembly_name.lower())
124
128
  return assembly_code
125
129
 
@@ -111,9 +111,7 @@ class MongoDatabase(AppLogger):
111
111
  shard_collection_command = f'sh.shardCollection(' \
112
112
  f'"{self.db_name}.{collection_name}", ' \
113
113
  f'{shard_key_repr}, {str(shard_key_uniqueness_flag).lower()})'
114
- sharding_command = f"mongo --host {self.uri} " \
115
- f"--eval " \
116
- f"'{shard_collection_command}' "
114
+ sharding_command = f"mongosh --eval '{shard_collection_command}' {self.uri} "
117
115
  sharding_command += self._get_optional_secrets_file_stdin()
118
116
  run_command_with_output(f"Sharding collection {collection_name} in the database {self.uri_with_db_name} "
119
117
  f"with key {shard_key_repr}...", sharding_command,