ebi-eva-common-pyutils 0.6.5__tar.gz → 0.6.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. {ebi_eva_common_pyutils-0.6.5 → ebi_eva_common_pyutils-0.6.6}/CHANGELOG.md +6 -0
  2. {ebi_eva_common_pyutils-0.6.5/ebi_eva_common_pyutils.egg-info → ebi_eva_common_pyutils-0.6.6}/PKG-INFO +2 -10
  3. {ebi_eva_common_pyutils-0.6.5 → ebi_eva_common_pyutils-0.6.6}/ebi_eva_common_pyutils/assembly_utils.py +7 -3
  4. {ebi_eva_common_pyutils-0.6.5 → ebi_eva_common_pyutils-0.6.6}/ebi_eva_common_pyutils/ncbi_utils.py +8 -6
  5. {ebi_eva_common_pyutils-0.6.5 → ebi_eva_common_pyutils-0.6.6/ebi_eva_common_pyutils.egg-info}/PKG-INFO +3 -11
  6. {ebi_eva_common_pyutils-0.6.5 → ebi_eva_common_pyutils-0.6.6}/ebi_eva_common_pyutils.egg-info/SOURCES.txt +1 -3
  7. {ebi_eva_common_pyutils-0.6.5 → ebi_eva_common_pyutils-0.6.6}/ebi_eva_internal_pyutils/metadata_utils.py +6 -7
  8. {ebi_eva_common_pyutils-0.6.5 → ebi_eva_common_pyutils-0.6.6}/setup.py +1 -1
  9. ebi_eva_common_pyutils-0.6.5/tests/test_common.py +0 -22
  10. {ebi_eva_common_pyutils-0.6.5 → ebi_eva_common_pyutils-0.6.6}/LICENSE +0 -0
  11. {ebi_eva_common_pyutils-0.6.5 → ebi_eva_common_pyutils-0.6.6}/MANIFEST.in +0 -0
  12. {ebi_eva_common_pyutils-0.6.5 → ebi_eva_common_pyutils-0.6.6}/README.md +0 -0
  13. {ebi_eva_common_pyutils-0.6.5 → ebi_eva_common_pyutils-0.6.6}/ebi_eva_common_pyutils/__init__.py +0 -0
  14. {ebi_eva_common_pyutils-0.6.5 → ebi_eva_common_pyutils-0.6.6}/ebi_eva_common_pyutils/assembly/__init__.py +0 -0
  15. {ebi_eva_common_pyutils-0.6.5 → ebi_eva_common_pyutils-0.6.6}/ebi_eva_common_pyutils/assembly/assembly.py +0 -0
  16. {ebi_eva_common_pyutils-0.6.5 → ebi_eva_common_pyutils-0.6.6}/ebi_eva_common_pyutils/command_utils.py +0 -0
  17. {ebi_eva_common_pyutils-0.6.5 → ebi_eva_common_pyutils-0.6.6}/ebi_eva_common_pyutils/common_utils.py +0 -0
  18. {ebi_eva_common_pyutils-0.6.5 → ebi_eva_common_pyutils-0.6.6}/ebi_eva_common_pyutils/config.py +0 -0
  19. {ebi_eva_common_pyutils-0.6.5 → ebi_eva_common_pyutils-0.6.6}/ebi_eva_common_pyutils/contig_alias/__init__.py +0 -0
  20. {ebi_eva_common_pyutils-0.6.5 → ebi_eva_common_pyutils-0.6.6}/ebi_eva_common_pyutils/contig_alias/contig_alias.py +0 -0
  21. {ebi_eva_common_pyutils-0.6.5 → ebi_eva_common_pyutils-0.6.6}/ebi_eva_common_pyutils/ena_utils.py +0 -0
  22. {ebi_eva_common_pyutils-0.6.5 → ebi_eva_common_pyutils-0.6.6}/ebi_eva_common_pyutils/file_utils.py +0 -0
  23. {ebi_eva_common_pyutils-0.6.5 → ebi_eva_common_pyutils-0.6.6}/ebi_eva_common_pyutils/logger.py +0 -0
  24. {ebi_eva_common_pyutils-0.6.5 → ebi_eva_common_pyutils-0.6.6}/ebi_eva_common_pyutils/network_utils.py +0 -0
  25. {ebi_eva_common_pyutils-0.6.5 → ebi_eva_common_pyutils-0.6.6}/ebi_eva_common_pyutils/reference/__init__.py +0 -0
  26. {ebi_eva_common_pyutils-0.6.5 → ebi_eva_common_pyutils-0.6.6}/ebi_eva_common_pyutils/reference/assembly.py +0 -0
  27. {ebi_eva_common_pyutils-0.6.5 → ebi_eva_common_pyutils-0.6.6}/ebi_eva_common_pyutils/reference/sequence.py +0 -0
  28. {ebi_eva_common_pyutils-0.6.5 → ebi_eva_common_pyutils-0.6.6}/ebi_eva_common_pyutils/taxonomy/__init__.py +0 -0
  29. {ebi_eva_common_pyutils-0.6.5 → ebi_eva_common_pyutils-0.6.6}/ebi_eva_common_pyutils/taxonomy/taxonomy.py +0 -0
  30. {ebi_eva_common_pyutils-0.6.5 → ebi_eva_common_pyutils-0.6.6}/ebi_eva_common_pyutils/variation/__init__.py +0 -0
  31. {ebi_eva_common_pyutils-0.6.5 → ebi_eva_common_pyutils-0.6.6}/ebi_eva_common_pyutils/variation/contig_utils.py +0 -0
  32. {ebi_eva_common_pyutils-0.6.5 → ebi_eva_common_pyutils-0.6.6}/ebi_eva_common_pyutils.egg-info/dependency_links.txt +0 -0
  33. {ebi_eva_common_pyutils-0.6.5 → ebi_eva_common_pyutils-0.6.6}/ebi_eva_common_pyutils.egg-info/requires.txt +0 -0
  34. {ebi_eva_common_pyutils-0.6.5 → ebi_eva_common_pyutils-0.6.6}/ebi_eva_common_pyutils.egg-info/top_level.txt +0 -0
  35. {ebi_eva_common_pyutils-0.6.5 → ebi_eva_common_pyutils-0.6.6}/ebi_eva_internal_pyutils/__init__.py +0 -0
  36. {ebi_eva_common_pyutils-0.6.5 → ebi_eva_common_pyutils-0.6.6}/ebi_eva_internal_pyutils/archive_directory.py +0 -0
  37. {ebi_eva_common_pyutils-0.6.5 → ebi_eva_common_pyutils-0.6.6}/ebi_eva_internal_pyutils/config_utils.py +0 -0
  38. {ebi_eva_common_pyutils-0.6.5 → ebi_eva_common_pyutils-0.6.6}/ebi_eva_internal_pyutils/mongo_utils.py +0 -0
  39. {ebi_eva_common_pyutils-0.6.5 → ebi_eva_common_pyutils-0.6.6}/ebi_eva_internal_pyutils/mongodb/__init__.py +0 -0
  40. {ebi_eva_common_pyutils-0.6.5 → ebi_eva_common_pyutils-0.6.6}/ebi_eva_internal_pyutils/mongodb/mongo_database.py +0 -0
  41. {ebi_eva_common_pyutils-0.6.5 → ebi_eva_common_pyutils-0.6.6}/ebi_eva_internal_pyutils/nextflow/__init__.py +0 -0
  42. {ebi_eva_common_pyutils-0.6.5 → ebi_eva_common_pyutils-0.6.6}/ebi_eva_internal_pyutils/nextflow/nextflow_pipeline.py +0 -0
  43. {ebi_eva_common_pyutils-0.6.5 → ebi_eva_common_pyutils-0.6.6}/ebi_eva_internal_pyutils/pg_utils.py +0 -0
  44. {ebi_eva_common_pyutils-0.6.5 → ebi_eva_common_pyutils-0.6.6}/ebi_eva_internal_pyutils/spring_properties.py +0 -0
  45. {ebi_eva_common_pyutils-0.6.5 → ebi_eva_common_pyutils-0.6.6}/setup.cfg +0 -0
@@ -2,6 +2,12 @@
2
2
  Changelog for ebi_eva_common_pyutils
3
3
  ===========================
4
4
 
5
+ ## 0.6.6 (2024-04-15)
6
+ ---------------------
7
+
8
+ - Allow NCBI function ot use API key
9
+
10
+
5
11
  ## 0.6.5 (2024-04-02)
6
12
  ---------------------
7
13
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ebi_eva_common_pyutils
3
- Version: 0.6.5
3
+ Version: 0.6.6
4
4
  Summary: EBI EVA - Common Python Utilities
5
5
  Home-page: https://github.com/EBIVariation/eva-common-pyutils
6
6
  License: Apache
@@ -10,13 +10,5 @@ Classifier: Intended Audience :: Developers
10
10
  Classifier: Topic :: Software Development :: Build Tools
11
11
  Classifier: License :: OSI Approved :: Apache Software License
12
12
  Classifier: Programming Language :: Python :: 3
13
- License-File: LICENSE
14
- Requires-Dist: requests
15
- Requires-Dist: lxml
16
- Requires-Dist: pyyaml
17
- Requires-Dist: cached-property
18
- Requires-Dist: retry
19
13
  Provides-Extra: eva-internal
20
- Requires-Dist: psycopg2-binary; extra == "eva-internal"
21
- Requires-Dist: pymongo; extra == "eva-internal"
22
- Requires-Dist: networkx<=2.5; extra == "eva-internal"
14
+ License-File: LICENSE
@@ -40,16 +40,20 @@ def is_patch_assembly(assembly_accession: str) -> bool:
40
40
  return int(xml_assembly[0].text) > 0
41
41
 
42
42
 
43
- def retrieve_genbank_assembly_accessions_from_ncbi(assembly_txt):
43
+ def retrieve_genbank_assembly_accessions_from_ncbi(assembly_txt, api_key=None):
44
44
  """
45
45
  Attempt to find any assembly genebank accession base on a free text search.
46
46
  """
47
47
  assembly_accessions = set()
48
48
  payload = {'db': 'Assembly', 'term': '"{}"'.format(assembly_txt), 'retmode': 'JSON'}
49
+ if api_key:
50
+ payload['api_key'] = api_key
49
51
  data = requests.get(ESEARCH_URL, params=payload).json()
50
52
  if data and data.get('esearchresult', {}).get('idlist'):
51
53
  assembly_id_list = data.get('esearchresult').get('idlist')
52
54
  payload = {'db': 'Assembly', 'id': ','.join(assembly_id_list), 'retmode': 'JSON'}
55
+ if api_key:
56
+ payload['api_key'] = api_key
53
57
  summary_list = requests.get(ESUMMARY_URL, params=payload).json()
54
58
  for assembly_id in summary_list.get('result', {}).get('uids', []):
55
59
  assembly_info = summary_list.get('result').get(assembly_id)
@@ -60,8 +64,8 @@ def retrieve_genbank_assembly_accessions_from_ncbi(assembly_txt):
60
64
  return list(assembly_accessions)
61
65
 
62
66
 
63
- def retrieve_genbank_equivalent_for_GCF_accession(assembly_accession):
64
- genbank_synonyms = retrieve_genbank_assembly_accessions_from_ncbi(assembly_accession)
67
+ def retrieve_genbank_equivalent_for_GCF_accession(assembly_accession, ncbi_api_key=None):
68
+ genbank_synonyms = retrieve_genbank_assembly_accessions_from_ncbi(assembly_accession, api_key=ncbi_api_key)
65
69
  if len(genbank_synonyms) != 1:
66
70
  raise ValueError('%s Genbank synonyms found for assembly %s ' % (len(genbank_synonyms), assembly_accession))
67
71
  return genbank_synonyms.pop()
@@ -69,8 +69,8 @@ def get_ncbi_taxonomy_dicts_from_ids(taxonomy_ids, api_key=None):
69
69
  return taxonomy_dicts
70
70
 
71
71
 
72
- def get_ncbi_assembly_name_from_term(term):
73
- assembl_dicts = get_ncbi_assembly_dicts_from_term(term)
72
+ def get_ncbi_assembly_name_from_term(term, api_key=None):
73
+ assembl_dicts = get_ncbi_assembly_dicts_from_term(term, api_key=api_key)
74
74
  assembly_names = set([d.get('assemblyname') for d in assembl_dicts])
75
75
  if len(assembly_names) > 1:
76
76
  # Only keep the one that have the assembly accession as a synonymous and check again
@@ -82,8 +82,10 @@ def get_ncbi_assembly_name_from_term(term):
82
82
  return assembly_names.pop() if assembly_names else None
83
83
 
84
84
 
85
- def retrieve_species_scientific_name_from_tax_id_ncbi(taxid):
85
+ def retrieve_species_scientific_name_from_tax_id_ncbi(taxid, api_key=None):
86
86
  payload = {'db': 'Taxonomy', 'id': taxid}
87
+ if api_key:
88
+ payload['api_key'] = api_key
87
89
  r = requests.get(efetch_url, params=payload)
88
90
  match = re.search('<Rank>(.+?)</Rank>', r.text, re.MULTILINE)
89
91
  rank = None
@@ -96,9 +98,9 @@ def retrieve_species_scientific_name_from_tax_id_ncbi(taxid):
96
98
  return match.group(1)
97
99
 
98
100
 
99
- def get_species_name_from_ncbi(assembly_acc):
101
+ def get_species_name_from_ncbi(assembly_acc, api_key=None):
100
102
  # We first need to search for the species associated with the assembly
101
- assembly_dicts = get_ncbi_assembly_dicts_from_term(assembly_acc)
103
+ assembly_dicts = get_ncbi_assembly_dicts_from_term(assembly_acc, api_key=api_key)
102
104
  taxids = set([assembly_dict.get('taxid')
103
105
  for assembly_dict in assembly_dicts
104
106
  if assembly_dict.get('assemblyaccession') == assembly_acc or
@@ -111,5 +113,5 @@ def get_species_name_from_ncbi(assembly_acc):
111
113
 
112
114
  taxonomy_id = taxids.pop()
113
115
 
114
- scientific_name = retrieve_species_scientific_name_from_tax_id_ncbi(taxonomy_id)
116
+ scientific_name = retrieve_species_scientific_name_from_tax_id_ncbi(taxonomy_id, api_key=api_key)
115
117
  return scientific_name.replace(' ', '_').lower()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
- Name: ebi_eva_common_pyutils
3
- Version: 0.6.5
2
+ Name: ebi-eva-common-pyutils
3
+ Version: 0.6.6
4
4
  Summary: EBI EVA - Common Python Utilities
5
5
  Home-page: https://github.com/EBIVariation/eva-common-pyutils
6
6
  License: Apache
@@ -10,13 +10,5 @@ Classifier: Intended Audience :: Developers
10
10
  Classifier: Topic :: Software Development :: Build Tools
11
11
  Classifier: License :: OSI Approved :: Apache Software License
12
12
  Classifier: Programming Language :: Python :: 3
13
- License-File: LICENSE
14
- Requires-Dist: requests
15
- Requires-Dist: lxml
16
- Requires-Dist: pyyaml
17
- Requires-Dist: cached-property
18
- Requires-Dist: retry
19
13
  Provides-Extra: eva-internal
20
- Requires-Dist: psycopg2-binary; extra == "eva-internal"
21
- Requires-Dist: pymongo; extra == "eva-internal"
22
- Requires-Dist: networkx<=2.5; extra == "eva-internal"
14
+ License-File: LICENSE
@@ -4,7 +4,6 @@ MANIFEST.in
4
4
  README.md
5
5
  setup.cfg
6
6
  setup.py
7
- /tmp/ebi_eva_common_pyutils-v0.6.5-999yjogj/gitclone/ebi_eva_internal_pyutils/archive_directory.py
8
7
  ebi_eva_common_pyutils/__init__.py
9
8
  ebi_eva_common_pyutils/assembly_utils.py
10
9
  ebi_eva_common_pyutils/command_utils.py
@@ -41,5 +40,4 @@ ebi_eva_internal_pyutils/spring_properties.py
41
40
  ebi_eva_internal_pyutils/mongodb/__init__.py
42
41
  ebi_eva_internal_pyutils/mongodb/mongo_database.py
43
42
  ebi_eva_internal_pyutils/nextflow/__init__.py
44
- ebi_eva_internal_pyutils/nextflow/nextflow_pipeline.py
45
- tests/test_common.py
43
+ ebi_eva_internal_pyutils/nextflow/nextflow_pipeline.py
@@ -117,10 +117,10 @@ def resolve_existing_variant_warehouse_db_name(metadata_connection_handle, assem
117
117
  get_variant_warehouse_db_name_from_assembly_and_taxonomy = resolve_existing_variant_warehouse_db_name
118
118
 
119
119
 
120
- def get_assembly_code(metadata_connection_handle, assembly):
120
+ def get_assembly_code(metadata_connection_handle, assembly, ncbi_api_key=None):
121
121
  assembly_code = get_assembly_code_from_metadata(metadata_connection_handle, assembly)
122
122
  if not assembly_code:
123
- assembly_name = get_ncbi_assembly_name_from_term(assembly)
123
+ assembly_name = get_ncbi_assembly_name_from_term(assembly, api_key=ncbi_api_key)
124
124
  # If the assembly is a patch assembly ex: GRCh37.p8, drop the trailing patch i.e., just return grch37
125
125
  if is_patch_assembly(assembly):
126
126
  assembly_name = re.sub('\\.p[0-9]+$', '', assembly_name.lower())
@@ -141,18 +141,18 @@ def get_taxonomy_code(metadata_connection_handle, taxonomy):
141
141
  return taxonomy_code
142
142
 
143
143
 
144
- def resolve_variant_warehouse_db_name(metadata_connection_handle, assembly, taxonomy):
144
+ def resolve_variant_warehouse_db_name(metadata_connection_handle, assembly, taxonomy, ncbi_api_key=None):
145
145
  """
146
146
  Retrieve the database name for this taxonomy/assembly pair whether it exists or not.
147
147
  It will use existing taxonomy code or assembly code if available in the metadata database.
148
148
  """
149
149
  taxonomy_code = get_taxonomy_code(metadata_connection_handle, taxonomy)
150
- assembly_code = get_assembly_code(metadata_connection_handle, assembly)
150
+ assembly_code = get_assembly_code(metadata_connection_handle, assembly, ncbi_api_key=ncbi_api_key)
151
151
  return build_variant_warehouse_database_name(taxonomy_code, assembly_code)
152
152
 
153
153
 
154
154
  def insert_new_assembly_and_taxonomy(metadata_connection_handle, assembly_accession, taxonomy_id, eva_species_name=None,
155
- in_accessioning=True):
155
+ in_accessioning=True, ncbi_api_key=None):
156
156
  """
157
157
  This script adds new assemblies and taxonomies to EVAPRO.
158
158
  You can also add the assembly with a different taxonomy if you provide the
@@ -166,11 +166,10 @@ def insert_new_assembly_and_taxonomy(metadata_connection_handle, assembly_access
166
166
  Not required if the taxonomy exists or ENA has a common name available.
167
167
  :param in_accessioning: Flag that this assembly is in the accessioning data store.
168
168
  """
169
- assembly_name = get_ncbi_assembly_name_from_term(assembly_accession)
170
-
171
169
  # check if assembly is already in EVAPRO, adding it if not
172
170
  assembly_set_id = get_assembly_set_from_metadata(metadata_connection_handle, taxonomy_id, assembly_accession)
173
171
  if assembly_set_id is None:
172
+ assembly_name = get_ncbi_assembly_name_from_term(assembly_accession, api_key=ncbi_api_key)
174
173
  ensure_taxonomy_is_in_evapro(metadata_connection_handle, taxonomy_id, eva_species_name)
175
174
  assembly_code = get_assembly_code(metadata_connection_handle, assembly_accession)
176
175
  insert_assembly_in_evapro(metadata_connection_handle, taxonomy_id, assembly_accession, assembly_name, assembly_code)
@@ -7,7 +7,7 @@ setup(
7
7
  name='ebi_eva_common_pyutils',
8
8
  scripts=[os.path.join(os.path.dirname(__file__), 'ebi_eva_internal_pyutils', 'archive_directory.py')],
9
9
  packages=find_packages(),
10
- version='0.6.5',
10
+ version='0.6.6',
11
11
  license='Apache',
12
12
  description='EBI EVA - Common Python Utilities',
13
13
  url='https://github.com/EBIVariation/eva-common-pyutils',
@@ -1,22 +0,0 @@
1
- import os
2
- from unittest import TestCase
3
-
4
- from ebi_eva_common_pyutils.common_utils import merge_two_dicts, pretty_print
5
-
6
-
7
- class TestCommon(TestCase):
8
-
9
- resources_folder = os.path.join(os.path.dirname(__file__), 'resources')
10
-
11
-
12
- class TestCommonUtils(TestCase):
13
-
14
- def test_merge_two_dicts(self):
15
- d1 = {'a': 1, 'b': 2, 'c': 3}
16
- d2 = {'d': 4, 'a': 5, 'e': 6}
17
- assert merge_two_dicts(d1, d2) == {'a': 5, 'b': 2, 'c': 3, 'd': 4, 'e': 6}
18
- assert merge_two_dicts(d2, d1) == {'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 6}
19
-
20
- def test_pretty_print(self):
21
- pretty_print(['Header 1', 'Long Header 2'],
22
- [['row1 cell 1', 'row1 cell 2'], ['row2 cell 1', 'Super long row2 cell 2']])