ebi-eva-common-pyutils 0.6.4__py3-none-any.whl → 0.6.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -40,16 +40,20 @@ def is_patch_assembly(assembly_accession: str) -> bool:
40
40
  return int(xml_assembly[0].text) > 0
41
41
 
42
42
 
43
- def retrieve_genbank_assembly_accessions_from_ncbi(assembly_txt):
43
+ def retrieve_genbank_assembly_accessions_from_ncbi(assembly_txt, api_key=None):
44
44
  """
45
45
  Attempt to find any assembly genebank accession base on a free text search.
46
46
  """
47
47
  assembly_accessions = set()
48
48
  payload = {'db': 'Assembly', 'term': '"{}"'.format(assembly_txt), 'retmode': 'JSON'}
49
+ if api_key:
50
+ payload['api_key'] = api_key
49
51
  data = requests.get(ESEARCH_URL, params=payload).json()
50
52
  if data and data.get('esearchresult', {}).get('idlist'):
51
53
  assembly_id_list = data.get('esearchresult').get('idlist')
52
54
  payload = {'db': 'Assembly', 'id': ','.join(assembly_id_list), 'retmode': 'JSON'}
55
+ if api_key:
56
+ payload['api_key'] = api_key
53
57
  summary_list = requests.get(ESUMMARY_URL, params=payload).json()
54
58
  for assembly_id in summary_list.get('result', {}).get('uids', []):
55
59
  assembly_info = summary_list.get('result').get(assembly_id)
@@ -60,8 +64,8 @@ def retrieve_genbank_assembly_accessions_from_ncbi(assembly_txt):
60
64
  return list(assembly_accessions)
61
65
 
62
66
 
63
- def retrieve_genbank_equivalent_for_GCF_accession(assembly_accession):
64
- genbank_synonyms = retrieve_genbank_assembly_accessions_from_ncbi(assembly_accession)
67
+ def retrieve_genbank_equivalent_for_GCF_accession(assembly_accession, ncbi_api_key=None):
68
+ genbank_synonyms = retrieve_genbank_assembly_accessions_from_ncbi(assembly_accession, api_key=ncbi_api_key)
65
69
  if len(genbank_synonyms) != 1:
66
70
  raise ValueError('%s Genbank synonyms found for assembly %s ' % (len(genbank_synonyms), assembly_accession))
67
71
  return genbank_synonyms.pop()
@@ -69,8 +69,8 @@ def get_ncbi_taxonomy_dicts_from_ids(taxonomy_ids, api_key=None):
69
69
  return taxonomy_dicts
70
70
 
71
71
 
72
- def get_ncbi_assembly_name_from_term(term):
73
- assembl_dicts = get_ncbi_assembly_dicts_from_term(term)
72
+ def get_ncbi_assembly_name_from_term(term, api_key=None):
73
+ assembl_dicts = get_ncbi_assembly_dicts_from_term(term, api_key=api_key)
74
74
  assembly_names = set([d.get('assemblyname') for d in assembl_dicts])
75
75
  if len(assembly_names) > 1:
76
76
  # Only keep the one that have the assembly accession as a synonymous and check again
@@ -82,8 +82,10 @@ def get_ncbi_assembly_name_from_term(term):
82
82
  return assembly_names.pop() if assembly_names else None
83
83
 
84
84
 
85
- def retrieve_species_scientific_name_from_tax_id_ncbi(taxid):
85
+ def retrieve_species_scientific_name_from_tax_id_ncbi(taxid, api_key=None):
86
86
  payload = {'db': 'Taxonomy', 'id': taxid}
87
+ if api_key:
88
+ payload['api_key'] = api_key
87
89
  r = requests.get(efetch_url, params=payload)
88
90
  match = re.search('<Rank>(.+?)</Rank>', r.text, re.MULTILINE)
89
91
  rank = None
@@ -96,9 +98,9 @@ def retrieve_species_scientific_name_from_tax_id_ncbi(taxid):
96
98
  return match.group(1)
97
99
 
98
100
 
99
- def get_species_name_from_ncbi(assembly_acc):
101
+ def get_species_name_from_ncbi(assembly_acc, api_key=None):
100
102
  # We first need to search for the species associated with the assembly
101
- assembly_dicts = get_ncbi_assembly_dicts_from_term(assembly_acc)
103
+ assembly_dicts = get_ncbi_assembly_dicts_from_term(assembly_acc, api_key=api_key)
102
104
  taxids = set([assembly_dict.get('taxid')
103
105
  for assembly_dict in assembly_dicts
104
106
  if assembly_dict.get('assemblyaccession') == assembly_acc or
@@ -111,5 +113,5 @@ def get_species_name_from_ncbi(assembly_acc):
111
113
 
112
114
  taxonomy_id = taxids.pop()
113
115
 
114
- scientific_name = retrieve_species_scientific_name_from_tax_id_ncbi(taxonomy_id)
116
+ scientific_name = retrieve_species_scientific_name_from_tax_id_ncbi(taxonomy_id, api_key=api_key)
115
117
  return scientific_name.replace(' ', '_').lower()
@@ -11,7 +11,7 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
-
14
+ import time
15
15
 
16
16
  import requests
17
17
  import subprocess
@@ -43,6 +43,13 @@ def forward_remote_port_to_local_port(remote_host: str, remote_port: int, local_
43
43
  port_forward_command = 'ssh -N -L{0}:localhost:{1} {2}'.format(local_port, remote_port, remote_host)
44
44
  logger.info("Forwarding port to local port using command: " + port_forward_command)
45
45
  proc = subprocess.Popen(port_forward_command.split(" "))
46
+ time.sleep(5)
47
+ # Ensure that the process is still running
48
+ poll = proc.poll()
49
+ if poll is not None:
50
+ # The process already completed which mean it most likely crashed
51
+ logger.error(f'Port Forwarding {remote_host}:{remote_port} -> {local_port} failed!')
52
+ raise subprocess.CalledProcessError(proc.returncode, proc.args)
46
53
  return proc.pid
47
54
 
48
55
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ebi-eva-common-pyutils
3
- Version: 0.6.4
3
+ Version: 0.6.6
4
4
  Summary: EBI EVA - Common Python Utilities
5
5
  Home-page: https://github.com/EBIVariation/eva-common-pyutils
6
6
  License: Apache
@@ -1,13 +1,13 @@
1
1
  ebi_eva_common_pyutils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- ebi_eva_common_pyutils/assembly_utils.py,sha256=Lc1bDf0xmi-AF6eyM7pWMgsjnxZmpVHvF3E5aEiKBEI,4018
2
+ ebi_eva_common_pyutils/assembly_utils.py,sha256=CklyCGlCjlFp0e9pugg6kSsh5L0xfCe2qPvA2eLVtn0,4187
3
3
  ebi_eva_common_pyutils/command_utils.py,sha256=PtelWWqcC0eOwIVesjwBw3F9KaXRzEE_uAUJhQFZ4l8,2340
4
4
  ebi_eva_common_pyutils/common_utils.py,sha256=ty_glvfRa3VGhnpAht4qtVkNNmv-IYfVtO958mY-BaA,1192
5
5
  ebi_eva_common_pyutils/config.py,sha256=PtD2SgHf96kk21OA9tVIjEgsDXEFuAU-INy_kfQdoPw,4828
6
6
  ebi_eva_common_pyutils/ena_utils.py,sha256=S2MmnWQ_9MJjlkaQY_by1-GGbTyi8SKp8XRcpjWnpZs,1465
7
7
  ebi_eva_common_pyutils/file_utils.py,sha256=eIlQKSVKkEjMNX7emrDzaQyQdGvQdb64gnfEhb6uYsE,1375
8
8
  ebi_eva_common_pyutils/logger.py,sha256=hT20ktN_oGeB_5ofVfd_aGXB6xYOe0Y5b3EVRYCuFb8,5093
9
- ebi_eva_common_pyutils/ncbi_utils.py,sha256=sh9E_oDhDtliihJ0-FZuDd7paeidW5GfqRr5vBTo2k4,4859
10
- ebi_eva_common_pyutils/network_utils.py,sha256=_Qf5oNONA4pUzvxfdk25G-dKuWe44aC_Hewvdp-VZyU,2285
9
+ ebi_eva_common_pyutils/ncbi_utils.py,sha256=AKuNKv4ue7l-6rwuDDGubWEHGuscXWfv-Gg6o_x1hT0,5005
10
+ ebi_eva_common_pyutils/network_utils.py,sha256=iJjs5PPzT1V4CceZnCHOTs711AmpwlDo59wb1XHdTak,2648
11
11
  ebi_eva_common_pyutils/assembly/__init__.py,sha256=KSWPwBY5nZj00odxWFntk8Sqg_rw273xH8S5D6Jo-T4,67
12
12
  ebi_eva_common_pyutils/assembly/assembly.py,sha256=IEmleROX4ZchPyhINKCuMmET_Ih1Jg4ok-opAKY6Z9A,3142
13
13
  ebi_eva_common_pyutils/contig_alias/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -19,11 +19,11 @@ ebi_eva_common_pyutils/taxonomy/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm
19
19
  ebi_eva_common_pyutils/taxonomy/taxonomy.py,sha256=p3XV4g3y0hEjyeZ4PwgN7Q3Et9G515ctQkSIo1kdDbU,2259
20
20
  ebi_eva_common_pyutils/variation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
21
21
  ebi_eva_common_pyutils/variation/contig_utils.py,sha256=kMNEW_P2yPnd8Xx1tep19hy5ee7ojxz6ZOO1grTQsRQ,5230
22
- ebi_eva_common_pyutils-0.6.4.data/scripts/archive_directory.py,sha256=0lWJ0ju_AB2ni7lMnJXPFx6U2OdTGbe-WoQs-4BfKOM,4976
22
+ ebi_eva_common_pyutils-0.6.6.data/scripts/archive_directory.py,sha256=0lWJ0ju_AB2ni7lMnJXPFx6U2OdTGbe-WoQs-4BfKOM,4976
23
23
  ebi_eva_internal_pyutils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
24
24
  ebi_eva_internal_pyutils/archive_directory.py,sha256=IxVEfh_gaCiT652k0Q_-58fonRusy1yzXu7BCO8yVLo,4989
25
25
  ebi_eva_internal_pyutils/config_utils.py,sha256=EGRC5rsmU_ug7OY9-t1UW1XZXRsauSyZB9xPcBux8ts,7909
26
- ebi_eva_internal_pyutils/metadata_utils.py,sha256=NQVD0_-S1EBb0SnlKX0w31g4Me_8VGXYJTDlMhzje9w,15090
26
+ ebi_eva_internal_pyutils/metadata_utils.py,sha256=t9PcXZdbfjDBP04GJenC4bxm2nOLd8oI_MP9eNe9IBQ,15221
27
27
  ebi_eva_internal_pyutils/mongo_utils.py,sha256=YxKHtb5ygDiGLOtEiiAMFCP2ow6FL9Kq0K5R0mWNdXY,3575
28
28
  ebi_eva_internal_pyutils/pg_utils.py,sha256=FUQVwiX_7F2-4sSzoaCVX2me0zAqR8nGIj6NW5d304A,4398
29
29
  ebi_eva_internal_pyutils/spring_properties.py,sha256=WjPozWtXbAZGNqlgvY6GHps2KFB1rY9OaTs46obW3pM,15265
@@ -31,8 +31,8 @@ ebi_eva_internal_pyutils/mongodb/__init__.py,sha256=0oyTlkYZCV7udlPl09Zl-sDyE3c9
31
31
  ebi_eva_internal_pyutils/mongodb/mongo_database.py,sha256=kesaJaaxYFeF_uYZBgL8tbufGKUXll7bXb4WlOS9vKM,9596
32
32
  ebi_eva_internal_pyutils/nextflow/__init__.py,sha256=OOiJS8jZOz98q0t77NNog7aI_fFrVxi4kGmiSskuAqM,122
33
33
  ebi_eva_internal_pyutils/nextflow/nextflow_pipeline.py,sha256=ew623hhK8jmFLQjJwLZbgBmW9RTiJBEULVqHfIUv_dc,10114
34
- ebi_eva_common_pyutils-0.6.4.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
35
- ebi_eva_common_pyutils-0.6.4.dist-info/METADATA,sha256=z3BrpkHtxmrrusChh4Oawx8m6tAeRLZpXLlN8pnu3ic,824
36
- ebi_eva_common_pyutils-0.6.4.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
37
- ebi_eva_common_pyutils-0.6.4.dist-info/top_level.txt,sha256=sXoiqiGU8vlMQpFWDlKrekxhlusk06AhkOH3kSvDT6c,48
38
- ebi_eva_common_pyutils-0.6.4.dist-info/RECORD,,
34
+ ebi_eva_common_pyutils-0.6.6.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
35
+ ebi_eva_common_pyutils-0.6.6.dist-info/METADATA,sha256=w_We0B6eam8FRWLiTjedqj7rCvQCSf8w16KsC45XMlI,824
36
+ ebi_eva_common_pyutils-0.6.6.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
37
+ ebi_eva_common_pyutils-0.6.6.dist-info/top_level.txt,sha256=sXoiqiGU8vlMQpFWDlKrekxhlusk06AhkOH3kSvDT6c,48
38
+ ebi_eva_common_pyutils-0.6.6.dist-info/RECORD,,
@@ -117,10 +117,10 @@ def resolve_existing_variant_warehouse_db_name(metadata_connection_handle, assem
117
117
  get_variant_warehouse_db_name_from_assembly_and_taxonomy = resolve_existing_variant_warehouse_db_name
118
118
 
119
119
 
120
- def get_assembly_code(metadata_connection_handle, assembly):
120
+ def get_assembly_code(metadata_connection_handle, assembly, ncbi_api_key=None):
121
121
  assembly_code = get_assembly_code_from_metadata(metadata_connection_handle, assembly)
122
122
  if not assembly_code:
123
- assembly_name = get_ncbi_assembly_name_from_term(assembly)
123
+ assembly_name = get_ncbi_assembly_name_from_term(assembly, api_key=ncbi_api_key)
124
124
  # If the assembly is a patch assembly ex: GRCh37.p8, drop the trailing patch i.e., just return grch37
125
125
  if is_patch_assembly(assembly):
126
126
  assembly_name = re.sub('\\.p[0-9]+$', '', assembly_name.lower())
@@ -141,18 +141,18 @@ def get_taxonomy_code(metadata_connection_handle, taxonomy):
141
141
  return taxonomy_code
142
142
 
143
143
 
144
- def resolve_variant_warehouse_db_name(metadata_connection_handle, assembly, taxonomy):
144
+ def resolve_variant_warehouse_db_name(metadata_connection_handle, assembly, taxonomy, ncbi_api_key=None):
145
145
  """
146
146
  Retrieve the database name for this taxonomy/assembly pair whether it exists or not.
147
147
  It will use existing taxonomy code or assembly code if available in the metadata database.
148
148
  """
149
149
  taxonomy_code = get_taxonomy_code(metadata_connection_handle, taxonomy)
150
- assembly_code = get_assembly_code(metadata_connection_handle, assembly)
150
+ assembly_code = get_assembly_code(metadata_connection_handle, assembly, ncbi_api_key=ncbi_api_key)
151
151
  return build_variant_warehouse_database_name(taxonomy_code, assembly_code)
152
152
 
153
153
 
154
154
  def insert_new_assembly_and_taxonomy(metadata_connection_handle, assembly_accession, taxonomy_id, eva_species_name=None,
155
- in_accessioning=True):
155
+ in_accessioning=True, ncbi_api_key=None):
156
156
  """
157
157
  This script adds new assemblies and taxonomies to EVAPRO.
158
158
  You can also add the assembly with a different taxonomy if you provide the
@@ -166,11 +166,10 @@ def insert_new_assembly_and_taxonomy(metadata_connection_handle, assembly_access
166
166
  Not required if the taxonomy exists or ENA has a common name available.
167
167
  :param in_accessioning: Flag that this assembly is in the accessioning data store.
168
168
  """
169
- assembly_name = get_ncbi_assembly_name_from_term(assembly_accession)
170
-
171
169
  # check if assembly is already in EVAPRO, adding it if not
172
170
  assembly_set_id = get_assembly_set_from_metadata(metadata_connection_handle, taxonomy_id, assembly_accession)
173
171
  if assembly_set_id is None:
172
+ assembly_name = get_ncbi_assembly_name_from_term(assembly_accession, api_key=ncbi_api_key)
174
173
  ensure_taxonomy_is_in_evapro(metadata_connection_handle, taxonomy_id, eva_species_name)
175
174
  assembly_code = get_assembly_code(metadata_connection_handle, assembly_accession)
176
175
  insert_assembly_in_evapro(metadata_connection_handle, taxonomy_id, assembly_accession, assembly_name, assembly_code)