ebi-eva-common-pyutils 0.6.0__py3-none-any.whl → 0.6.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ebi_eva_common_pyutils/assembly_utils.py +13 -0
- ebi_eva_common_pyutils/command_utils.py +8 -7
- ebi_eva_common_pyutils/config.py +77 -0
- ebi_eva_common_pyutils/ena_utils.py +1 -1
- ebi_eva_common_pyutils/logger.py +3 -0
- {ebi_eva_common_pyutils-0.6.0.dist-info → ebi_eva_common_pyutils-0.6.4.dist-info}/METADATA +1 -1
- {ebi_eva_common_pyutils-0.6.0.dist-info → ebi_eva_common_pyutils-0.6.4.dist-info}/RECORD +13 -13
- ebi_eva_internal_pyutils/metadata_utils.py +4 -0
- ebi_eva_internal_pyutils/mongodb/mongo_database.py +1 -3
- {ebi_eva_common_pyutils-0.6.0.data → ebi_eva_common_pyutils-0.6.4.data}/scripts/archive_directory.py +0 -0
- {ebi_eva_common_pyutils-0.6.0.dist-info → ebi_eva_common_pyutils-0.6.4.dist-info}/LICENSE +0 -0
- {ebi_eva_common_pyutils-0.6.0.dist-info → ebi_eva_common_pyutils-0.6.4.dist-info}/WHEEL +0 -0
- {ebi_eva_common_pyutils-0.6.0.dist-info → ebi_eva_common_pyutils-0.6.4.dist-info}/top_level.txt +0 -0
|
@@ -16,6 +16,7 @@ import http
|
|
|
16
16
|
import requests
|
|
17
17
|
|
|
18
18
|
from ebi_eva_common_pyutils.assembly import NCBIAssembly
|
|
19
|
+
from ebi_eva_common_pyutils.ena_utils import download_xml_from_ena
|
|
19
20
|
from ebi_eva_common_pyutils.logger import logging_config as log_cfg
|
|
20
21
|
|
|
21
22
|
EUTILS_URL = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/'
|
|
@@ -27,6 +28,18 @@ EFETCH_URL = EUTILS_URL + 'efetch.fcgi'
|
|
|
27
28
|
logger = log_cfg.get_logger(__name__)
|
|
28
29
|
|
|
29
30
|
|
|
31
|
+
def is_patch_assembly(assembly_accession: str) -> bool:
|
|
32
|
+
"""
|
|
33
|
+
Check if a given assembly is a patch assembly
|
|
34
|
+
Please see: https://www.ncbi.nlm.nih.gov/grc/help/patches/
|
|
35
|
+
"""
|
|
36
|
+
xml_root = download_xml_from_ena(f'https://www.ebi.ac.uk/ena/browser/api/xml/{assembly_accession}')
|
|
37
|
+
xml_assembly = xml_root.xpath("//ASSEMBLY_ATTRIBUTE[TAG='count-patches']/VALUE")
|
|
38
|
+
if len(xml_assembly) == 0:
|
|
39
|
+
return False
|
|
40
|
+
return int(xml_assembly[0].text) > 0
|
|
41
|
+
|
|
42
|
+
|
|
30
43
|
def retrieve_genbank_assembly_accessions_from_ncbi(assembly_txt):
|
|
31
44
|
"""
|
|
32
45
|
Attempt to find any assembly genebank accession base on a free text search.
|
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
|
|
14
|
+
import logging
|
|
15
15
|
import subprocess
|
|
16
16
|
|
|
17
17
|
from ebi_eva_common_pyutils.logger import logging_config as log_cfg
|
|
@@ -20,11 +20,12 @@ logger = log_cfg.get_logger(__name__)
|
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
def run_command_with_output(command_description, command, return_process_output=False,
|
|
23
|
-
log_error_stream_to_output=False
|
|
23
|
+
log_error_stream_to_output=False, stdout_log_level=logging.INFO,
|
|
24
|
+
stderr_log_level=logging.ERROR):
|
|
24
25
|
process_output = ""
|
|
25
26
|
|
|
26
|
-
logger.
|
|
27
|
-
logger.
|
|
27
|
+
logger.log(stdout_log_level, "Starting process: " + command_description)
|
|
28
|
+
logger.log(stdout_log_level, "Running command: " + command)
|
|
28
29
|
|
|
29
30
|
stdout = subprocess.PIPE
|
|
30
31
|
# Some lame utilities like mongodump and mongorestore output non-error messages to error stream
|
|
@@ -35,18 +36,18 @@ def run_command_with_output(command_description, command, return_process_output=
|
|
|
35
36
|
shell=True) as process:
|
|
36
37
|
for line in iter(process.stdout.readline, ''):
|
|
37
38
|
line = str(line).rstrip()
|
|
38
|
-
logger.
|
|
39
|
+
logger.log(stdout_log_level, line)
|
|
39
40
|
if return_process_output:
|
|
40
41
|
process_output += line + "\n"
|
|
41
42
|
if not log_error_stream_to_output:
|
|
42
43
|
for line in iter(process.stderr.readline, ''):
|
|
43
44
|
line = str(line).rstrip()
|
|
44
|
-
logger.
|
|
45
|
+
logger.log(stderr_log_level, line)
|
|
45
46
|
if process.returncode != 0:
|
|
46
47
|
logger.error(command_description + " failed! Refer to the error messages for details.")
|
|
47
48
|
raise subprocess.CalledProcessError(process.returncode, process.args)
|
|
48
49
|
else:
|
|
49
|
-
logger.
|
|
50
|
+
logger.log(stdout_log_level, command_description + " - completed successfully")
|
|
50
51
|
if return_process_output:
|
|
51
52
|
return process_output
|
|
52
53
|
|
ebi_eva_common_pyutils/config.py
CHANGED
|
@@ -73,3 +73,80 @@ cfg = Configuration()
|
|
|
73
73
|
"""
|
|
74
74
|
Provides a singleton that can be used as a central place for configuration.
|
|
75
75
|
"""
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
class WritableConfig(Configuration):
|
|
81
|
+
"""Configuration object that allows writes to the config file"""
|
|
82
|
+
|
|
83
|
+
def __init__(self, *search_path, version=None):
|
|
84
|
+
super().__init__(*search_path)
|
|
85
|
+
self.version = version
|
|
86
|
+
|
|
87
|
+
def load_config_file(self, *search_path):
|
|
88
|
+
try:
|
|
89
|
+
super().load_config_file(*search_path)
|
|
90
|
+
except FileNotFoundError:
|
|
91
|
+
# expected if it's the first time we are creating the config file
|
|
92
|
+
# In that case the first search path is set to be the config files
|
|
93
|
+
self.config_file = search_path[0]
|
|
94
|
+
pass
|
|
95
|
+
|
|
96
|
+
def backup(self):
|
|
97
|
+
"""
|
|
98
|
+
Rename the config file by adding a '.1' at the end. If the '.1' file exists it move it to a '.2' and so on.
|
|
99
|
+
"""
|
|
100
|
+
if os.path.isfile(self.config_file):
|
|
101
|
+
file_name = self.config_file
|
|
102
|
+
suffix = 1
|
|
103
|
+
backup_name = f'{file_name}.{suffix}'
|
|
104
|
+
while os.path.exists(backup_name):
|
|
105
|
+
suffix += 1
|
|
106
|
+
backup_name = f'{file_name}.{suffix}'
|
|
107
|
+
|
|
108
|
+
for i in range(suffix, 1, -1):
|
|
109
|
+
os.rename(f'{file_name}.{i - 1}', f'{file_name}.{i}')
|
|
110
|
+
os.rename(file_name, file_name + '.1')
|
|
111
|
+
|
|
112
|
+
def write(self):
|
|
113
|
+
if self.config_file and self.content and os.path.isdir(os.path.dirname(self.config_file)):
|
|
114
|
+
with open(self.config_file, 'w') as open_config:
|
|
115
|
+
yaml.safe_dump(self.content, open_config)
|
|
116
|
+
|
|
117
|
+
def set(self, *path, value):
|
|
118
|
+
self._set_version()
|
|
119
|
+
top_level = self.content
|
|
120
|
+
for p in path[:-1]:
|
|
121
|
+
if p not in top_level:
|
|
122
|
+
top_level[p] = {}
|
|
123
|
+
top_level = top_level[p]
|
|
124
|
+
top_level[path[-1]] = value
|
|
125
|
+
|
|
126
|
+
def pop(self, *path, default=None):
|
|
127
|
+
"""Recursive dictionary pop with default"""
|
|
128
|
+
top_level = self.content
|
|
129
|
+
for p in path[:-1]:
|
|
130
|
+
if p not in top_level:
|
|
131
|
+
return default
|
|
132
|
+
top_level = top_level[p]
|
|
133
|
+
return top_level.pop(path[-1], default)
|
|
134
|
+
|
|
135
|
+
def is_empty(self):
|
|
136
|
+
return not self.content
|
|
137
|
+
|
|
138
|
+
def clear(self):
|
|
139
|
+
self.content = {}
|
|
140
|
+
|
|
141
|
+
def _set_version(self):
|
|
142
|
+
# If we're starting to fill in an empty config, set the version if available
|
|
143
|
+
if self.is_empty() and self.version:
|
|
144
|
+
self.content['version'] = self.version
|
|
145
|
+
|
|
146
|
+
def __contains__(self, item):
|
|
147
|
+
return item in self.content
|
|
148
|
+
|
|
149
|
+
def __setitem__(self, item, value):
|
|
150
|
+
"""Allow dict-style write access, e.g. config['this']='that'."""
|
|
151
|
+
self._set_version()
|
|
152
|
+
self.content[item] = value
|
|
@@ -4,7 +4,7 @@ from retry import retry
|
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
@retry(tries=3, delay=2, backoff=1.2, jitter=(1, 3))
|
|
7
|
-
def download_xml_from_ena(ena_url):
|
|
7
|
+
def download_xml_from_ena(ena_url) -> etree.XML:
|
|
8
8
|
"""Download and parse XML from ENA"""
|
|
9
9
|
try: # catches any kind of request error, including non-20X status code
|
|
10
10
|
response = requests.get(ena_url)
|
ebi_eva_common_pyutils/logger.py
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
ebi_eva_common_pyutils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
ebi_eva_common_pyutils/assembly_utils.py,sha256=
|
|
3
|
-
ebi_eva_common_pyutils/command_utils.py,sha256=
|
|
2
|
+
ebi_eva_common_pyutils/assembly_utils.py,sha256=Lc1bDf0xmi-AF6eyM7pWMgsjnxZmpVHvF3E5aEiKBEI,4018
|
|
3
|
+
ebi_eva_common_pyutils/command_utils.py,sha256=PtelWWqcC0eOwIVesjwBw3F9KaXRzEE_uAUJhQFZ4l8,2340
|
|
4
4
|
ebi_eva_common_pyutils/common_utils.py,sha256=ty_glvfRa3VGhnpAht4qtVkNNmv-IYfVtO958mY-BaA,1192
|
|
5
|
-
ebi_eva_common_pyutils/config.py,sha256=
|
|
6
|
-
ebi_eva_common_pyutils/ena_utils.py,sha256=
|
|
5
|
+
ebi_eva_common_pyutils/config.py,sha256=PtD2SgHf96kk21OA9tVIjEgsDXEFuAU-INy_kfQdoPw,4828
|
|
6
|
+
ebi_eva_common_pyutils/ena_utils.py,sha256=S2MmnWQ_9MJjlkaQY_by1-GGbTyi8SKp8XRcpjWnpZs,1465
|
|
7
7
|
ebi_eva_common_pyutils/file_utils.py,sha256=eIlQKSVKkEjMNX7emrDzaQyQdGvQdb64gnfEhb6uYsE,1375
|
|
8
|
-
ebi_eva_common_pyutils/logger.py,sha256=
|
|
8
|
+
ebi_eva_common_pyutils/logger.py,sha256=hT20ktN_oGeB_5ofVfd_aGXB6xYOe0Y5b3EVRYCuFb8,5093
|
|
9
9
|
ebi_eva_common_pyutils/ncbi_utils.py,sha256=sh9E_oDhDtliihJ0-FZuDd7paeidW5GfqRr5vBTo2k4,4859
|
|
10
10
|
ebi_eva_common_pyutils/network_utils.py,sha256=_Qf5oNONA4pUzvxfdk25G-dKuWe44aC_Hewvdp-VZyU,2285
|
|
11
11
|
ebi_eva_common_pyutils/assembly/__init__.py,sha256=KSWPwBY5nZj00odxWFntk8Sqg_rw273xH8S5D6Jo-T4,67
|
|
@@ -19,20 +19,20 @@ ebi_eva_common_pyutils/taxonomy/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm
|
|
|
19
19
|
ebi_eva_common_pyutils/taxonomy/taxonomy.py,sha256=p3XV4g3y0hEjyeZ4PwgN7Q3Et9G515ctQkSIo1kdDbU,2259
|
|
20
20
|
ebi_eva_common_pyutils/variation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
21
21
|
ebi_eva_common_pyutils/variation/contig_utils.py,sha256=kMNEW_P2yPnd8Xx1tep19hy5ee7ojxz6ZOO1grTQsRQ,5230
|
|
22
|
-
ebi_eva_common_pyutils-0.6.
|
|
22
|
+
ebi_eva_common_pyutils-0.6.4.data/scripts/archive_directory.py,sha256=0lWJ0ju_AB2ni7lMnJXPFx6U2OdTGbe-WoQs-4BfKOM,4976
|
|
23
23
|
ebi_eva_internal_pyutils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
24
24
|
ebi_eva_internal_pyutils/archive_directory.py,sha256=IxVEfh_gaCiT652k0Q_-58fonRusy1yzXu7BCO8yVLo,4989
|
|
25
25
|
ebi_eva_internal_pyutils/config_utils.py,sha256=EGRC5rsmU_ug7OY9-t1UW1XZXRsauSyZB9xPcBux8ts,7909
|
|
26
|
-
ebi_eva_internal_pyutils/metadata_utils.py,sha256=
|
|
26
|
+
ebi_eva_internal_pyutils/metadata_utils.py,sha256=NQVD0_-S1EBb0SnlKX0w31g4Me_8VGXYJTDlMhzje9w,15090
|
|
27
27
|
ebi_eva_internal_pyutils/mongo_utils.py,sha256=YxKHtb5ygDiGLOtEiiAMFCP2ow6FL9Kq0K5R0mWNdXY,3575
|
|
28
28
|
ebi_eva_internal_pyutils/pg_utils.py,sha256=FUQVwiX_7F2-4sSzoaCVX2me0zAqR8nGIj6NW5d304A,4398
|
|
29
29
|
ebi_eva_internal_pyutils/spring_properties.py,sha256=WjPozWtXbAZGNqlgvY6GHps2KFB1rY9OaTs46obW3pM,15265
|
|
30
30
|
ebi_eva_internal_pyutils/mongodb/__init__.py,sha256=0oyTlkYZCV7udlPl09Zl-sDyE3c97QZMMTEFIa6uYIw,76
|
|
31
|
-
ebi_eva_internal_pyutils/mongodb/mongo_database.py,sha256=
|
|
31
|
+
ebi_eva_internal_pyutils/mongodb/mongo_database.py,sha256=kesaJaaxYFeF_uYZBgL8tbufGKUXll7bXb4WlOS9vKM,9596
|
|
32
32
|
ebi_eva_internal_pyutils/nextflow/__init__.py,sha256=OOiJS8jZOz98q0t77NNog7aI_fFrVxi4kGmiSskuAqM,122
|
|
33
33
|
ebi_eva_internal_pyutils/nextflow/nextflow_pipeline.py,sha256=ew623hhK8jmFLQjJwLZbgBmW9RTiJBEULVqHfIUv_dc,10114
|
|
34
|
-
ebi_eva_common_pyutils-0.6.
|
|
35
|
-
ebi_eva_common_pyutils-0.6.
|
|
36
|
-
ebi_eva_common_pyutils-0.6.
|
|
37
|
-
ebi_eva_common_pyutils-0.6.
|
|
38
|
-
ebi_eva_common_pyutils-0.6.
|
|
34
|
+
ebi_eva_common_pyutils-0.6.4.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
35
|
+
ebi_eva_common_pyutils-0.6.4.dist-info/METADATA,sha256=z3BrpkHtxmrrusChh4Oawx8m6tAeRLZpXLlN8pnu3ic,824
|
|
36
|
+
ebi_eva_common_pyutils-0.6.4.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
|
|
37
|
+
ebi_eva_common_pyutils-0.6.4.dist-info/top_level.txt,sha256=sXoiqiGU8vlMQpFWDlKrekxhlusk06AhkOH3kSvDT6c,48
|
|
38
|
+
ebi_eva_common_pyutils-0.6.4.dist-info/RECORD,,
|
|
@@ -17,6 +17,7 @@ from urllib.parse import urlsplit
|
|
|
17
17
|
|
|
18
18
|
import psycopg2
|
|
19
19
|
|
|
20
|
+
from ebi_eva_common_pyutils.assembly_utils import is_patch_assembly
|
|
20
21
|
from ebi_eva_internal_pyutils.config_utils import get_metadata_creds_for_profile
|
|
21
22
|
from ebi_eva_common_pyutils.ena_utils import get_scientific_name_and_common_name
|
|
22
23
|
from ebi_eva_common_pyutils.logger import logging_config
|
|
@@ -120,6 +121,9 @@ def get_assembly_code(metadata_connection_handle, assembly):
|
|
|
120
121
|
assembly_code = get_assembly_code_from_metadata(metadata_connection_handle, assembly)
|
|
121
122
|
if not assembly_code:
|
|
122
123
|
assembly_name = get_ncbi_assembly_name_from_term(assembly)
|
|
124
|
+
# If the assembly is a patch assembly ex: GRCh37.p8, drop the trailing patch i.e., just return grch37
|
|
125
|
+
if is_patch_assembly(assembly):
|
|
126
|
+
assembly_name = re.sub('\\.p[0-9]+$', '', assembly_name.lower())
|
|
123
127
|
assembly_code = re.sub('[^0-9a-zA-Z]+', '', assembly_name.lower())
|
|
124
128
|
return assembly_code
|
|
125
129
|
|
|
@@ -111,9 +111,7 @@ class MongoDatabase(AppLogger):
|
|
|
111
111
|
shard_collection_command = f'sh.shardCollection(' \
|
|
112
112
|
f'"{self.db_name}.{collection_name}", ' \
|
|
113
113
|
f'{shard_key_repr}, {str(shard_key_uniqueness_flag).lower()})'
|
|
114
|
-
sharding_command = f"
|
|
115
|
-
f"--eval " \
|
|
116
|
-
f"'{shard_collection_command}' "
|
|
114
|
+
sharding_command = f"mongosh --eval '{shard_collection_command}' {self.uri} "
|
|
117
115
|
sharding_command += self._get_optional_secrets_file_stdin()
|
|
118
116
|
run_command_with_output(f"Sharding collection {collection_name} in the database {self.uri_with_db_name} "
|
|
119
117
|
f"with key {shard_key_repr}...", sharding_command,
|
{ebi_eva_common_pyutils-0.6.0.data → ebi_eva_common_pyutils-0.6.4.data}/scripts/archive_directory.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ebi_eva_common_pyutils-0.6.0.dist-info → ebi_eva_common_pyutils-0.6.4.dist-info}/top_level.txt
RENAMED
|
File without changes
|