protein-quest 0.7.0__py3-none-any.whl → 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- protein_quest/__version__.py +1 -1
- protein_quest/alphafold/fetch.py +265 -78
- protein_quest/cli.py +80 -47
- protein_quest/mcp_server.py +3 -3
- protein_quest/uniprot.py +53 -18
- protein_quest/utils.py +15 -3
- {protein_quest-0.7.0.dist-info → protein_quest-0.8.0.dist-info}/METADATA +23 -2
- {protein_quest-0.7.0.dist-info → protein_quest-0.8.0.dist-info}/RECORD +11 -11
- {protein_quest-0.7.0.dist-info → protein_quest-0.8.0.dist-info}/WHEEL +0 -0
- {protein_quest-0.7.0.dist-info → protein_quest-0.8.0.dist-info}/entry_points.txt +0 -0
- {protein_quest-0.7.0.dist-info → protein_quest-0.8.0.dist-info}/licenses/LICENSE +0 -0
protein_quest/__version__.py
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
__version__ = "0.
|
|
1
|
+
__version__ = "0.8.0"
|
|
2
2
|
"""The version of the package."""
|
protein_quest/alphafold/fetch.py
CHANGED
|
@@ -29,14 +29,19 @@ DownloadableFormat = Literal[
|
|
|
29
29
|
"amAnnotations",
|
|
30
30
|
"amAnnotationsHg19",
|
|
31
31
|
"amAnnotationsHg38",
|
|
32
|
-
"
|
|
33
|
-
"
|
|
32
|
+
"msa",
|
|
33
|
+
"plddtDoc",
|
|
34
34
|
]
|
|
35
35
|
"""Types of formats that can be downloaded from the AlphaFold web service."""
|
|
36
36
|
|
|
37
37
|
downloadable_formats: set[DownloadableFormat] = set(get_args(DownloadableFormat))
|
|
38
38
|
"""Set of formats that can be downloaded from the AlphaFold web service."""
|
|
39
39
|
|
|
40
|
+
UrlFileNamePair = tuple[URL, str]
|
|
41
|
+
"""A tuple of a URL and a filename."""
|
|
42
|
+
UrlFileNamePairsOfFormats = dict[DownloadableFormat, UrlFileNamePair]
|
|
43
|
+
"""A mapping of DownloadableFormat to UrlFileNamePair."""
|
|
44
|
+
|
|
40
45
|
|
|
41
46
|
def _camel_to_snake_case(name: str) -> str:
|
|
42
47
|
"""Convert a camelCase string to snake_case."""
|
|
@@ -51,7 +56,7 @@ class AlphaFoldEntry:
|
|
|
51
56
|
"""
|
|
52
57
|
|
|
53
58
|
uniprot_accession: str
|
|
54
|
-
summary: EntrySummary
|
|
59
|
+
summary: EntrySummary | None = None
|
|
55
60
|
summary_file: Path | None = None
|
|
56
61
|
bcif_file: Path | None = None
|
|
57
62
|
cif_file: Path | None = None
|
|
@@ -105,6 +110,35 @@ class AlphaFoldEntry:
|
|
|
105
110
|
"""
|
|
106
111
|
return sum(1 for attr in vars(self) if attr.endswith("_file") and getattr(self, attr) is not None)
|
|
107
112
|
|
|
113
|
+
def relative_to(self, session_dir: Path) -> "AlphaFoldEntry":
|
|
114
|
+
"""Convert paths in an AlphaFoldEntry to be relative to the session directory.
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
entry: An AlphaFoldEntry instance with absolute paths.
|
|
118
|
+
session_dir: The session directory to which the paths should be made relative.
|
|
119
|
+
|
|
120
|
+
Returns:
|
|
121
|
+
An AlphaFoldEntry instance with paths relative to the session directory.
|
|
122
|
+
"""
|
|
123
|
+
return AlphaFoldEntry(
|
|
124
|
+
uniprot_accession=self.uniprot_accession,
|
|
125
|
+
summary=self.summary,
|
|
126
|
+
summary_file=self.summary_file.relative_to(session_dir) if self.summary_file else None,
|
|
127
|
+
bcif_file=self.bcif_file.relative_to(session_dir) if self.bcif_file else None,
|
|
128
|
+
cif_file=self.cif_file.relative_to(session_dir) if self.cif_file else None,
|
|
129
|
+
pdb_file=self.pdb_file.relative_to(session_dir) if self.pdb_file else None,
|
|
130
|
+
pae_doc_file=self.pae_doc_file.relative_to(session_dir) if self.pae_doc_file else None,
|
|
131
|
+
am_annotations_file=self.am_annotations_file.relative_to(session_dir) if self.am_annotations_file else None,
|
|
132
|
+
am_annotations_hg19_file=(
|
|
133
|
+
self.am_annotations_hg19_file.relative_to(session_dir) if self.am_annotations_hg19_file else None
|
|
134
|
+
),
|
|
135
|
+
am_annotations_hg38_file=(
|
|
136
|
+
self.am_annotations_hg38_file.relative_to(session_dir) if self.am_annotations_hg38_file else None
|
|
137
|
+
),
|
|
138
|
+
msa_file=self.msa_file.relative_to(session_dir) if self.msa_file else None,
|
|
139
|
+
plddt_doc_file=self.plddt_doc_file.relative_to(session_dir) if self.plddt_doc_file else None,
|
|
140
|
+
)
|
|
141
|
+
|
|
108
142
|
|
|
109
143
|
async def fetch_summary(
|
|
110
144
|
qualifier: str, session: RetryClient, semaphore: Semaphore, save_dir: Path | None, cacher: Cacher
|
|
@@ -170,32 +204,16 @@ async def fetch_summaries(
|
|
|
170
204
|
yield qualifier, summary
|
|
171
205
|
|
|
172
206
|
|
|
173
|
-
async def
|
|
207
|
+
async def _fetch_many_async_with_summary(
|
|
174
208
|
uniprot_accessions: Iterable[str],
|
|
175
209
|
save_dir: Path,
|
|
176
|
-
|
|
210
|
+
formats: set[DownloadableFormat],
|
|
177
211
|
max_parallel_downloads: int = 5,
|
|
178
212
|
cacher: Cacher | None = None,
|
|
179
213
|
gzip_files: bool = False,
|
|
180
214
|
all_isoforms: bool = False,
|
|
181
215
|
) -> AsyncGenerator[AlphaFoldEntry]:
|
|
182
|
-
""
|
|
183
|
-
[AlphaFold Protein Structure Database](https://alphafold.ebi.ac.uk/).
|
|
184
|
-
|
|
185
|
-
Args:
|
|
186
|
-
uniprot_accessions: A set of Uniprot accessions to fetch.
|
|
187
|
-
save_dir: The directory to save the fetched files to.
|
|
188
|
-
what: A set of formats to download.
|
|
189
|
-
max_parallel_downloads: The maximum number of parallel downloads.
|
|
190
|
-
cacher: A cacher to use for caching the fetched files. Only used if summary is in what set.
|
|
191
|
-
gzip_files: Whether to gzip the downloaded files.
|
|
192
|
-
all_isoforms: Whether to yield all isoforms of each uniprot entry.
|
|
193
|
-
When False then yields only the canonical sequence of uniprot entry.
|
|
194
|
-
|
|
195
|
-
Yields:
|
|
196
|
-
A dataclass containing the summary, pdb file, and pae file.
|
|
197
|
-
"""
|
|
198
|
-
save_dir_for_summaries = save_dir if "summary" in what and save_dir is not None else None
|
|
216
|
+
save_dir_for_summaries = save_dir if "summary" in formats else None
|
|
199
217
|
|
|
200
218
|
summaries = [
|
|
201
219
|
s
|
|
@@ -206,7 +224,7 @@ async def fetch_many_async(
|
|
|
206
224
|
# O60481 is canonical and O60481-2 is isoform, so we skip the isoform
|
|
207
225
|
if all_isoforms or s[0] == s[1].uniprotAccession
|
|
208
226
|
]
|
|
209
|
-
files = files_to_download(
|
|
227
|
+
files = files_to_download(formats, summaries, gzip_files)
|
|
210
228
|
|
|
211
229
|
await retrieve_files(
|
|
212
230
|
files,
|
|
@@ -223,45 +241,45 @@ async def fetch_many_async(
|
|
|
223
241
|
uniprot_accession=uniprot_accession,
|
|
224
242
|
summary=summary,
|
|
225
243
|
summary_file=save_dir / f"{uniprot_accession}.json" if save_dir_for_summaries is not None else None,
|
|
226
|
-
bcif_file=save_dir / (summary.bcifUrl.name + gzext) if "bcif" in
|
|
227
|
-
cif_file=save_dir / (summary.cifUrl.name + gzext) if "cif" in
|
|
228
|
-
pdb_file=save_dir / (summary.pdbUrl.name + gzext) if "pdb" in
|
|
229
|
-
pae_doc_file=save_dir / (summary.paeDocUrl.name + gzext) if "paeDoc" in
|
|
244
|
+
bcif_file=save_dir / (summary.bcifUrl.name + gzext) if "bcif" in formats else None,
|
|
245
|
+
cif_file=save_dir / (summary.cifUrl.name + gzext) if "cif" in formats else None,
|
|
246
|
+
pdb_file=save_dir / (summary.pdbUrl.name + gzext) if "pdb" in formats else None,
|
|
247
|
+
pae_doc_file=save_dir / (summary.paeDocUrl.name + gzext) if "paeDoc" in formats else None,
|
|
230
248
|
am_annotations_file=(
|
|
231
249
|
save_dir / (summary.amAnnotationsUrl.name + gzext)
|
|
232
|
-
if "amAnnotations" in
|
|
250
|
+
if "amAnnotations" in formats and summary.amAnnotationsUrl
|
|
233
251
|
else None
|
|
234
252
|
),
|
|
235
253
|
am_annotations_hg19_file=(
|
|
236
254
|
save_dir / (summary.amAnnotationsHg19Url.name + gzext)
|
|
237
|
-
if "amAnnotationsHg19" in
|
|
255
|
+
if "amAnnotationsHg19" in formats and summary.amAnnotationsHg19Url
|
|
238
256
|
else None
|
|
239
257
|
),
|
|
240
258
|
am_annotations_hg38_file=(
|
|
241
259
|
save_dir / (summary.amAnnotationsHg38Url.name + gzext)
|
|
242
|
-
if "amAnnotationsHg38" in
|
|
260
|
+
if "amAnnotationsHg38" in formats and summary.amAnnotationsHg38Url
|
|
243
261
|
else None
|
|
244
262
|
),
|
|
245
|
-
msa_file=(save_dir / (summary.msaUrl.name + gzext) if "
|
|
263
|
+
msa_file=(save_dir / (summary.msaUrl.name + gzext) if "msa" in formats and summary.msaUrl else None),
|
|
246
264
|
plddt_doc_file=(
|
|
247
|
-
save_dir / (summary.plddtDocUrl.name + gzext) if "
|
|
265
|
+
save_dir / (summary.plddtDocUrl.name + gzext) if "plddtDoc" in formats and summary.plddtDocUrl else None
|
|
248
266
|
),
|
|
249
267
|
)
|
|
250
268
|
|
|
251
269
|
|
|
252
270
|
def files_to_download(
|
|
253
|
-
|
|
254
|
-
) -> set[
|
|
255
|
-
if not (set(
|
|
271
|
+
formats: set[DownloadableFormat], summaries: Iterable[tuple[str, EntrySummary]], gzip_files: bool
|
|
272
|
+
) -> set[UrlFileNamePair]:
|
|
273
|
+
if not (set(formats) <= downloadable_formats):
|
|
256
274
|
msg = (
|
|
257
|
-
f"Invalid format(s) specified: {set(
|
|
275
|
+
f"Invalid format(s) specified: {set(formats) - downloadable_formats}. "
|
|
258
276
|
f"Valid formats are: {downloadable_formats}"
|
|
259
277
|
)
|
|
260
278
|
raise ValueError(msg)
|
|
261
279
|
|
|
262
|
-
url_filename_pairs: set[
|
|
280
|
+
url_filename_pairs: set[UrlFileNamePair] = set()
|
|
263
281
|
for _, summary in summaries:
|
|
264
|
-
for fmt in
|
|
282
|
+
for fmt in formats:
|
|
265
283
|
if fmt == "summary":
|
|
266
284
|
# summary is handled already in fetch_summary
|
|
267
285
|
continue
|
|
@@ -275,26 +293,224 @@ def files_to_download(
|
|
|
275
293
|
return url_filename_pairs
|
|
276
294
|
|
|
277
295
|
|
|
296
|
+
async def fetch_alphafold_db_version() -> str:
|
|
297
|
+
"""Fetch the current version of the AlphaFold database.
|
|
298
|
+
|
|
299
|
+
Returns:
|
|
300
|
+
The current version of the AlphaFold database as a string. For example: "6".
|
|
301
|
+
"""
|
|
302
|
+
url = "https://ftp.ebi.ac.uk/pub/databases/alphafold/accession_ids.csv"
|
|
303
|
+
headers = {"Range": "bytes=0-200"}
|
|
304
|
+
logger.debug(f"Detecting AlphaFold DB version from head of {url}")
|
|
305
|
+
async with friendly_session() as session, session.get(url, headers=headers) as response:
|
|
306
|
+
response.raise_for_status()
|
|
307
|
+
raw = await response.content.read(200)
|
|
308
|
+
text = raw.decode("utf-8")
|
|
309
|
+
first_line = text.splitlines()[1]
|
|
310
|
+
version = first_line.split(",")[-1]
|
|
311
|
+
logger.debug(f"Found current AlphaFold DB version is '{version}'")
|
|
312
|
+
return version
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
def _files_for_alphafold_entry(
|
|
316
|
+
uniprot_accession: str,
|
|
317
|
+
formats: set[DownloadableFormat],
|
|
318
|
+
db_version: str,
|
|
319
|
+
gzip_files: bool,
|
|
320
|
+
) -> UrlFileNamePairsOfFormats:
|
|
321
|
+
templates: dict[DownloadableFormat, URL] = {
|
|
322
|
+
"bcif": URL(f"https://alphafold.ebi.ac.uk/files/AF-{uniprot_accession}-F1-model_v{db_version}.bcif"),
|
|
323
|
+
"cif": URL(f"https://alphafold.ebi.ac.uk/files/AF-{uniprot_accession}-F1-model_v{db_version}.cif"),
|
|
324
|
+
"pdb": URL(f"https://alphafold.ebi.ac.uk/files/AF-{uniprot_accession}-F1-model_v{db_version}.pdb"),
|
|
325
|
+
"paeDoc": URL(
|
|
326
|
+
f"https://alphafold.ebi.ac.uk/files/AF-{uniprot_accession}-F1-predicted_aligned_error_v{db_version}.json"
|
|
327
|
+
),
|
|
328
|
+
"amAnnotations": URL(f"https://alphafold.ebi.ac.uk/files/AF-{uniprot_accession}-F1-aa-substitutions.csv"),
|
|
329
|
+
"amAnnotationsHg19": URL(f"https://alphafold.ebi.ac.uk/files/AF-{uniprot_accession}-F1-hg19.csv"),
|
|
330
|
+
"amAnnotationsHg38": URL(f"https://alphafold.ebi.ac.uk/files/AF-{uniprot_accession}-F1-hg38.csv"),
|
|
331
|
+
"msa": URL(f"https://alphafold.ebi.ac.uk/files/msa/AF-{uniprot_accession}-F1-msa_v{db_version}.a3m"),
|
|
332
|
+
"plddtDoc": URL(f"https://alphafold.ebi.ac.uk/files/AF-{uniprot_accession}-F1-confidence_v{db_version}.json"),
|
|
333
|
+
}
|
|
334
|
+
url_filename_pairs = {}
|
|
335
|
+
for fmt in formats:
|
|
336
|
+
if fmt == "summary":
|
|
337
|
+
# Summaries are downloaded separately as its using API instead of static files
|
|
338
|
+
continue
|
|
339
|
+
if fmt not in templates:
|
|
340
|
+
logger.warning(f"No URL template found for format '{fmt}'. Skipping.")
|
|
341
|
+
continue
|
|
342
|
+
url = templates[cast("DownloadableFormat", fmt)]
|
|
343
|
+
fn = url.name
|
|
344
|
+
if gzip_files:
|
|
345
|
+
fn += ".gz"
|
|
346
|
+
url_filename_pair = (url, fn)
|
|
347
|
+
url_filename_pairs[fmt] = url_filename_pair
|
|
348
|
+
return url_filename_pairs
|
|
349
|
+
|
|
350
|
+
|
|
351
|
+
def files_for_alphafold_entries(
|
|
352
|
+
uniprot_accessions: Iterable[str],
|
|
353
|
+
formats: set[DownloadableFormat],
|
|
354
|
+
db_version: str,
|
|
355
|
+
gzip_files: bool,
|
|
356
|
+
) -> dict[str, UrlFileNamePairsOfFormats]:
|
|
357
|
+
"""Get the files to download for multiple AlphaFold entries.
|
|
358
|
+
|
|
359
|
+
Args:
|
|
360
|
+
uniprot_accessions: A set of Uniprot accessions.
|
|
361
|
+
formats: A set of formats to download.
|
|
362
|
+
db_version: The version of the AlphaFold database to use.
|
|
363
|
+
gzip_files: Whether to download gzipped files. Otherwise downloads uncompressed files.
|
|
364
|
+
|
|
365
|
+
Returns:
|
|
366
|
+
A mapping of Uniprot accession to a mapping of DownloadableFormat to UrlFileNamePair.
|
|
367
|
+
"""
|
|
368
|
+
return {
|
|
369
|
+
uniprot_accession: _files_for_alphafold_entry(
|
|
370
|
+
uniprot_accession, formats=formats, db_version=db_version, gzip_files=gzip_files
|
|
371
|
+
)
|
|
372
|
+
for uniprot_accession in uniprot_accessions
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
|
|
376
|
+
async def _fetch_many_async_without_summary(
|
|
377
|
+
uniprot_accessions: Iterable[str],
|
|
378
|
+
save_dir: Path,
|
|
379
|
+
formats: set[DownloadableFormat],
|
|
380
|
+
db_version: str | None = None,
|
|
381
|
+
max_parallel_downloads: int = 5,
|
|
382
|
+
cacher: Cacher | None = None,
|
|
383
|
+
gzip_files: bool = False,
|
|
384
|
+
) -> AsyncGenerator[AlphaFoldEntry]:
|
|
385
|
+
if db_version is None:
|
|
386
|
+
db_version = await fetch_alphafold_db_version()
|
|
387
|
+
nested_files = files_for_alphafold_entries(
|
|
388
|
+
uniprot_accessions, formats=formats, db_version=db_version, gzip_files=gzip_files
|
|
389
|
+
)
|
|
390
|
+
files: set[UrlFileNamePair] = set()
|
|
391
|
+
for uniprot_accession in uniprot_accessions:
|
|
392
|
+
files.update(nested_files[uniprot_accession].values())
|
|
393
|
+
|
|
394
|
+
retrieved_files = await retrieve_files(
|
|
395
|
+
files,
|
|
396
|
+
save_dir,
|
|
397
|
+
desc="Downloading AlphaFold files",
|
|
398
|
+
max_parallel_downloads=max_parallel_downloads,
|
|
399
|
+
cacher=cacher,
|
|
400
|
+
gzip_files=gzip_files,
|
|
401
|
+
raise_for_not_found=False,
|
|
402
|
+
)
|
|
403
|
+
|
|
404
|
+
retrieved_files_set = set(retrieved_files)
|
|
405
|
+
for uniprot_accession in uniprot_accessions:
|
|
406
|
+
entry = AlphaFoldEntry(
|
|
407
|
+
uniprot_accession=uniprot_accession,
|
|
408
|
+
)
|
|
409
|
+
|
|
410
|
+
for af_format, url_filename_pair in nested_files[uniprot_accession].items():
|
|
411
|
+
_, filename = url_filename_pair
|
|
412
|
+
filepath = save_dir / filename
|
|
413
|
+
if filepath in retrieved_files_set:
|
|
414
|
+
attr = AlphaFoldEntry.format2attr(af_format)
|
|
415
|
+
setattr(entry, attr, filepath)
|
|
416
|
+
# else: File was not found (404) during download, so we leave the attribute as None
|
|
417
|
+
|
|
418
|
+
yield entry
|
|
419
|
+
|
|
420
|
+
|
|
421
|
+
def fetch_many_async(
|
|
422
|
+
uniprot_accessions: Iterable[str],
|
|
423
|
+
save_dir: Path,
|
|
424
|
+
formats: set[DownloadableFormat],
|
|
425
|
+
db_version: str | None = None,
|
|
426
|
+
max_parallel_downloads: int = 5,
|
|
427
|
+
cacher: Cacher | None = None,
|
|
428
|
+
gzip_files: bool = False,
|
|
429
|
+
all_isoforms: bool = False,
|
|
430
|
+
) -> AsyncGenerator[AlphaFoldEntry]:
|
|
431
|
+
"""Asynchronously fetches summaries and/or files from
|
|
432
|
+
[AlphaFold Protein Structure Database](https://alphafold.ebi.ac.uk/).
|
|
433
|
+
|
|
434
|
+
Args:
|
|
435
|
+
uniprot_accessions: A set of Uniprot accessions to fetch.
|
|
436
|
+
save_dir: The directory to save the fetched files to.
|
|
437
|
+
formats: A set of formats to download.
|
|
438
|
+
If `summary` is in the set then summaries will be fetched using the API endpoint.
|
|
439
|
+
and later the other files will be downloaded using static file URLs.
|
|
440
|
+
If `summary` is not in the set then all files will be downloaded using static file
|
|
441
|
+
URLs only.
|
|
442
|
+
db_version: The version of the AlphaFold database to use. If None, the latest version will be used.
|
|
443
|
+
max_parallel_downloads: The maximum number of parallel downloads.
|
|
444
|
+
cacher: A cacher to use for caching the fetched files.
|
|
445
|
+
gzip_files: Whether to gzip the downloaded files.
|
|
446
|
+
Summaries are never gzipped.
|
|
447
|
+
all_isoforms: Whether to yield all isoforms of each uniprot entry.
|
|
448
|
+
When False then yields only the canonical sequence per uniprot entry.
|
|
449
|
+
|
|
450
|
+
Yields:
|
|
451
|
+
A dataclass containing the summary, pdb file, and pae file.
|
|
452
|
+
|
|
453
|
+
Raises:
|
|
454
|
+
ValueError: If 'formats' set is empty.
|
|
455
|
+
ValueError: If all_isoforms is True and 'summary' is not in 'formats' set.
|
|
456
|
+
"""
|
|
457
|
+
if len(formats) == 0:
|
|
458
|
+
msg = "At least one format must be specified. The 'formats' argument is empty."
|
|
459
|
+
raise ValueError(msg)
|
|
460
|
+
if "summary" in formats:
|
|
461
|
+
if db_version is not None:
|
|
462
|
+
logger.warning("db_version is ignored when 'summary' is in 'formats' set. Always uses latest version.")
|
|
463
|
+
return _fetch_many_async_with_summary(
|
|
464
|
+
uniprot_accessions,
|
|
465
|
+
save_dir,
|
|
466
|
+
formats,
|
|
467
|
+
max_parallel_downloads=max_parallel_downloads,
|
|
468
|
+
cacher=cacher,
|
|
469
|
+
gzip_files=gzip_files,
|
|
470
|
+
all_isoforms=all_isoforms,
|
|
471
|
+
)
|
|
472
|
+
if all_isoforms:
|
|
473
|
+
msg = "Cannot fetch all isoforms when 'summary' is not in 'formats' set."
|
|
474
|
+
raise ValueError(msg)
|
|
475
|
+
return _fetch_many_async_without_summary(
|
|
476
|
+
uniprot_accessions,
|
|
477
|
+
save_dir,
|
|
478
|
+
formats,
|
|
479
|
+
db_version=db_version,
|
|
480
|
+
max_parallel_downloads=max_parallel_downloads,
|
|
481
|
+
cacher=cacher,
|
|
482
|
+
gzip_files=gzip_files,
|
|
483
|
+
)
|
|
484
|
+
|
|
485
|
+
|
|
278
486
|
def fetch_many(
|
|
279
|
-
|
|
487
|
+
uniprot_accessions: Iterable[str],
|
|
280
488
|
save_dir: Path,
|
|
281
|
-
|
|
489
|
+
formats: set[DownloadableFormat],
|
|
490
|
+
db_version: str | None = None,
|
|
282
491
|
max_parallel_downloads: int = 5,
|
|
283
492
|
cacher: Cacher | None = None,
|
|
284
493
|
gzip_files: bool = False,
|
|
285
494
|
all_isoforms: bool = False,
|
|
286
495
|
) -> list[AlphaFoldEntry]:
|
|
287
|
-
"""Synchronously fetches summaries and
|
|
496
|
+
"""Synchronously fetches summaries and/or files like cif from AlphaFold Protein Structure Database.
|
|
288
497
|
|
|
289
498
|
Args:
|
|
290
|
-
|
|
499
|
+
uniprot_accessions: A set of Uniprot accessions to fetch.
|
|
291
500
|
save_dir: The directory to save the fetched files to.
|
|
292
|
-
|
|
501
|
+
formats: A set of formats to download.
|
|
502
|
+
If `summary` is in the set then summaries will be fetched using the API endpoint.
|
|
503
|
+
and later the other files will be downloaded using static file URLs.
|
|
504
|
+
If `summary` is not in the set then all files will be downloaded using static file
|
|
505
|
+
URLs only.
|
|
506
|
+
Excluding 'summary' is much faster as it avoids slow API calls.
|
|
507
|
+
db_version: The version of the AlphaFold database to use. If None, the latest version will be used.
|
|
293
508
|
max_parallel_downloads: The maximum number of parallel downloads.
|
|
294
|
-
cacher: A cacher to use for caching the fetched files.
|
|
509
|
+
cacher: A cacher to use for caching the fetched files.
|
|
295
510
|
gzip_files: Whether to gzip the downloaded files.
|
|
296
|
-
|
|
297
|
-
|
|
511
|
+
Summaries are never gzipped.
|
|
512
|
+
all_isoforms: Whether to yield all isoforms of each uniprot entry.
|
|
513
|
+
When False then yields only the canonical sequence per uniprot entry.
|
|
298
514
|
|
|
299
515
|
Returns:
|
|
300
516
|
A list of AlphaFoldEntry dataclasses containing the summary, pdb file, and pae file.
|
|
@@ -304,9 +520,10 @@ def fetch_many(
|
|
|
304
520
|
return [
|
|
305
521
|
entry
|
|
306
522
|
async for entry in fetch_many_async(
|
|
307
|
-
|
|
523
|
+
uniprot_accessions,
|
|
308
524
|
save_dir,
|
|
309
|
-
|
|
525
|
+
formats,
|
|
526
|
+
db_version=db_version,
|
|
310
527
|
max_parallel_downloads=max_parallel_downloads,
|
|
311
528
|
cacher=cacher,
|
|
312
529
|
gzip_files=gzip_files,
|
|
@@ -315,33 +532,3 @@ def fetch_many(
|
|
|
315
532
|
]
|
|
316
533
|
|
|
317
534
|
return run_async(gather_entries())
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
def relative_to(entry: AlphaFoldEntry, session_dir: Path) -> AlphaFoldEntry:
|
|
321
|
-
"""Convert paths in an AlphaFoldEntry to be relative to the session directory.
|
|
322
|
-
|
|
323
|
-
Args:
|
|
324
|
-
entry: An AlphaFoldEntry instance with absolute paths.
|
|
325
|
-
session_dir: The session directory to which the paths should be made relative.
|
|
326
|
-
|
|
327
|
-
Returns:
|
|
328
|
-
An AlphaFoldEntry instance with paths relative to the session directory.
|
|
329
|
-
"""
|
|
330
|
-
return AlphaFoldEntry(
|
|
331
|
-
uniprot_accession=entry.uniprot_accession,
|
|
332
|
-
summary=entry.summary,
|
|
333
|
-
summary_file=entry.summary_file.relative_to(session_dir) if entry.summary_file else None,
|
|
334
|
-
bcif_file=entry.bcif_file.relative_to(session_dir) if entry.bcif_file else None,
|
|
335
|
-
cif_file=entry.cif_file.relative_to(session_dir) if entry.cif_file else None,
|
|
336
|
-
pdb_file=entry.pdb_file.relative_to(session_dir) if entry.pdb_file else None,
|
|
337
|
-
pae_doc_file=entry.pae_doc_file.relative_to(session_dir) if entry.pae_doc_file else None,
|
|
338
|
-
am_annotations_file=entry.am_annotations_file.relative_to(session_dir) if entry.am_annotations_file else None,
|
|
339
|
-
am_annotations_hg19_file=(
|
|
340
|
-
entry.am_annotations_hg19_file.relative_to(session_dir) if entry.am_annotations_hg19_file else None
|
|
341
|
-
),
|
|
342
|
-
am_annotations_hg38_file=(
|
|
343
|
-
entry.am_annotations_hg38_file.relative_to(session_dir) if entry.am_annotations_hg38_file else None
|
|
344
|
-
),
|
|
345
|
-
msa_file=entry.msa_file.relative_to(session_dir) if entry.msa_file else None,
|
|
346
|
-
plddt_doc_file=entry.plddt_doc_file.relative_to(session_dir) if entry.plddt_doc_file else None,
|
|
347
|
-
)
|
protein_quest/cli.py
CHANGED
|
@@ -13,6 +13,7 @@ from io import BytesIO, TextIOWrapper
|
|
|
13
13
|
from pathlib import Path
|
|
14
14
|
from textwrap import dedent
|
|
15
15
|
|
|
16
|
+
import shtab
|
|
16
17
|
from cattrs import structure
|
|
17
18
|
from rich.console import Console
|
|
18
19
|
from rich.logging import RichHandler
|
|
@@ -81,7 +82,7 @@ def _add_search_uniprot_parser(subparsers: argparse._SubParsersAction):
|
|
|
81
82
|
"output",
|
|
82
83
|
type=argparse.FileType("w", encoding="UTF-8"),
|
|
83
84
|
help="Output text file for UniProt accessions (one per line). Use `-` for stdout.",
|
|
84
|
-
)
|
|
85
|
+
).complete = shtab.FILE
|
|
85
86
|
parser.add_argument("--taxon-id", type=str, help="NCBI Taxon ID, e.g. 9606 for Homo Sapiens")
|
|
86
87
|
parser.add_argument(
|
|
87
88
|
"--reviewed",
|
|
@@ -124,7 +125,7 @@ def _add_search_pdbe_parser(subparsers: argparse._SubParsersAction):
|
|
|
124
125
|
"uniprot_accessions",
|
|
125
126
|
type=argparse.FileType("r", encoding="UTF-8"),
|
|
126
127
|
help="Text file with UniProt accessions (one per line). Use `-` for stdin.",
|
|
127
|
-
)
|
|
128
|
+
).complete = shtab.FILE
|
|
128
129
|
parser.add_argument(
|
|
129
130
|
"output_csv",
|
|
130
131
|
type=argparse.FileType("w", encoding="UTF-8"),
|
|
@@ -136,7 +137,7 @@ def _add_search_pdbe_parser(subparsers: argparse._SubParsersAction):
|
|
|
136
137
|
and `chain_length` is the length of the chain, for example `100`.
|
|
137
138
|
Use `-` for stdout.
|
|
138
139
|
"""),
|
|
139
|
-
)
|
|
140
|
+
).complete = shtab.FILE
|
|
140
141
|
parser.add_argument(
|
|
141
142
|
"--limit", type=int, default=10_000, help="Maximum number of PDB uniprot accessions combinations to return"
|
|
142
143
|
)
|
|
@@ -150,6 +151,15 @@ def _add_search_pdbe_parser(subparsers: argparse._SubParsersAction):
|
|
|
150
151
|
type=int,
|
|
151
152
|
help="Maximum number of residues allowed in chain mapped to the UniProt accession.",
|
|
152
153
|
)
|
|
154
|
+
parser.add_argument(
|
|
155
|
+
"--keep-invalid",
|
|
156
|
+
action="store_true",
|
|
157
|
+
help=dedent("""\
|
|
158
|
+
Keep PDB results when chain length could not be determined.
|
|
159
|
+
If not given, such results are dropped.
|
|
160
|
+
Only applies if min/max residues arguments are set.
|
|
161
|
+
"""),
|
|
162
|
+
)
|
|
153
163
|
parser.add_argument("--timeout", type=int, default=1_800, help="Maximum seconds to wait for query to complete")
|
|
154
164
|
|
|
155
165
|
|
|
@@ -165,12 +175,12 @@ def _add_search_alphafold_parser(subparsers: argparse._SubParsersAction):
|
|
|
165
175
|
"uniprot_accessions",
|
|
166
176
|
type=argparse.FileType("r", encoding="UTF-8"),
|
|
167
177
|
help="Text file with UniProt accessions (one per line). Use `-` for stdin.",
|
|
168
|
-
)
|
|
178
|
+
).complete = shtab.FILE
|
|
169
179
|
parser.add_argument(
|
|
170
180
|
"output_csv",
|
|
171
181
|
type=argparse.FileType("w", encoding="UTF-8"),
|
|
172
182
|
help="Output CSV with AlphaFold IDs per UniProt accession. Use `-` for stdout.",
|
|
173
|
-
)
|
|
183
|
+
).complete = shtab.FILE
|
|
174
184
|
parser.add_argument("--min-sequence-length", type=int, help="Minimum length of the canonical sequence.")
|
|
175
185
|
parser.add_argument("--max-sequence-length", type=int, help="Maximum length of the canonical sequence.")
|
|
176
186
|
parser.add_argument(
|
|
@@ -194,12 +204,12 @@ def _add_search_emdb_parser(subparsers: argparse._SubParsersAction):
|
|
|
194
204
|
"uniprot_accs",
|
|
195
205
|
type=argparse.FileType("r", encoding="UTF-8"),
|
|
196
206
|
help="Text file with UniProt accessions (one per line). Use `-` for stdin.",
|
|
197
|
-
)
|
|
207
|
+
).complete = shtab.FILE
|
|
198
208
|
parser.add_argument(
|
|
199
209
|
"output_csv",
|
|
200
210
|
type=argparse.FileType("w", encoding="UTF-8"),
|
|
201
211
|
help="Output CSV with EMDB IDs per UniProt accession. Use `-` for stdout.",
|
|
202
|
-
)
|
|
212
|
+
).complete = shtab.FILE
|
|
203
213
|
parser.add_argument("--limit", type=int, default=10_000, help="Maximum number of EMDB entry identifiers to return")
|
|
204
214
|
parser.add_argument("--timeout", type=int, default=1_800, help="Maximum seconds to wait for query to complete")
|
|
205
215
|
|
|
@@ -222,7 +232,7 @@ def _add_search_go_parser(subparsers: argparse._SubParsersAction):
|
|
|
222
232
|
"output_csv",
|
|
223
233
|
type=argparse.FileType("w", encoding="UTF-8"),
|
|
224
234
|
help="Output CSV with GO term results. Use `-` for stdout.",
|
|
225
|
-
)
|
|
235
|
+
).complete = shtab.FILE
|
|
226
236
|
parser.add_argument("--limit", type=int, default=100, help="Maximum number of GO term results to return")
|
|
227
237
|
|
|
228
238
|
|
|
@@ -244,7 +254,7 @@ def _add_search_taxonomy_parser(subparser: argparse._SubParsersAction):
|
|
|
244
254
|
"output_csv",
|
|
245
255
|
type=argparse.FileType("w", encoding="UTF-8"),
|
|
246
256
|
help="Output CSV with taxonomy results. Use `-` for stdout.",
|
|
247
|
-
)
|
|
257
|
+
).complete = shtab.FILE
|
|
248
258
|
parser.add_argument(
|
|
249
259
|
"--field",
|
|
250
260
|
type=str,
|
|
@@ -285,7 +295,7 @@ def _add_search_interaction_partners_parser(subparsers: argparse._SubParsersActi
|
|
|
285
295
|
"output_csv",
|
|
286
296
|
type=argparse.FileType("w", encoding="UTF-8"),
|
|
287
297
|
help="Output CSV with interaction partners per UniProt accession. Use `-` for stdout.",
|
|
288
|
-
)
|
|
298
|
+
).complete = shtab.FILE
|
|
289
299
|
parser.add_argument(
|
|
290
300
|
"--limit", type=int, default=10_000, help="Maximum number of interaction partner uniprot accessions to return"
|
|
291
301
|
)
|
|
@@ -316,12 +326,12 @@ def _add_search_complexes_parser(subparsers: argparse._SubParsersAction):
|
|
|
316
326
|
"uniprot_accessions",
|
|
317
327
|
type=argparse.FileType("r", encoding="UTF-8"),
|
|
318
328
|
help="Text file with UniProt accessions (one per line) as query for searching complexes. Use `-` for stdin.",
|
|
319
|
-
)
|
|
329
|
+
).complete = shtab.FILE
|
|
320
330
|
parser.add_argument(
|
|
321
331
|
"output_csv",
|
|
322
332
|
type=argparse.FileType("w", encoding="UTF-8"),
|
|
323
333
|
help="Output CSV file with complex results. Use `-` for stdout.",
|
|
324
|
-
)
|
|
334
|
+
).complete = shtab.FILE
|
|
325
335
|
parser.add_argument("--limit", type=int, default=100, help="Maximum number of complex results to return")
|
|
326
336
|
parser.add_argument("--timeout", type=int, default=1_800, help="Maximum seconds to wait for query to complete")
|
|
327
337
|
|
|
@@ -354,12 +364,12 @@ def _add_search_uniprot_details_parser(subparsers: argparse._SubParsersAction):
|
|
|
354
364
|
"uniprot_accessions",
|
|
355
365
|
type=argparse.FileType("r", encoding="UTF-8"),
|
|
356
366
|
help="Text file with UniProt accessions (one per line). Use `-` for stdin.",
|
|
357
|
-
)
|
|
367
|
+
).complete = shtab.FILE
|
|
358
368
|
parser.add_argument(
|
|
359
369
|
"output_csv",
|
|
360
370
|
type=argparse.FileType("w", encoding="UTF-8"),
|
|
361
371
|
help="Output CSV with UniProt details. Use `-` for stdout.",
|
|
362
|
-
)
|
|
372
|
+
).complete = shtab.FILE
|
|
363
373
|
parser.add_argument("--timeout", type=int, default=1_800, help="Maximum seconds to wait for query to complete")
|
|
364
374
|
parser.add_argument("--batch-size", type=int, default=1_000, help="Number of accessions to query per batch")
|
|
365
375
|
|
|
@@ -387,12 +397,13 @@ def _add_cacher_arguments(parser: argparse.ArgumentParser):
|
|
|
387
397
|
action="store_true",
|
|
388
398
|
help="Disable caching of files to central location.",
|
|
389
399
|
)
|
|
390
|
-
parser.add_argument(
|
|
400
|
+
cache_dir_action = parser.add_argument(
|
|
391
401
|
"--cache-dir",
|
|
392
402
|
type=Path,
|
|
393
403
|
default=user_cache_root_dir(),
|
|
394
404
|
help="Directory to use as cache for files.",
|
|
395
405
|
)
|
|
406
|
+
cache_dir_action.complete = shtab.DIRECTORY # type: ignore[missing-attribute]
|
|
396
407
|
_add_copy_method_arguments(parser)
|
|
397
408
|
|
|
398
409
|
|
|
@@ -411,8 +422,10 @@ def _add_retrieve_pdbe_parser(subparsers: argparse._SubParsersAction):
|
|
|
411
422
|
"pdbe_csv",
|
|
412
423
|
type=argparse.FileType("r", encoding="UTF-8"),
|
|
413
424
|
help="CSV file with `pdb_id` column. Other columns are ignored. Use `-` for stdin.",
|
|
414
|
-
)
|
|
415
|
-
parser.add_argument(
|
|
425
|
+
).complete = shtab.FILE
|
|
426
|
+
parser.add_argument(
|
|
427
|
+
"output_dir", type=Path, help="Directory to store downloaded PDBe mmCIF files"
|
|
428
|
+
).complete = shtab.DIRECTORY
|
|
416
429
|
parser.add_argument(
|
|
417
430
|
"--max-parallel-downloads",
|
|
418
431
|
type=int,
|
|
@@ -434,15 +447,22 @@ def _add_retrieve_alphafold_parser(subparsers: argparse._SubParsersAction):
|
|
|
434
447
|
"alphafold_csv",
|
|
435
448
|
type=argparse.FileType("r", encoding="UTF-8"),
|
|
436
449
|
help="CSV file with `af_id` column. Other columns are ignored. Use `-` for stdin.",
|
|
437
|
-
)
|
|
438
|
-
parser.add_argument("output_dir", type=Path, help="Directory to store downloaded AlphaFold files")
|
|
450
|
+
).complete = shtab.FILE
|
|
439
451
|
parser.add_argument(
|
|
440
|
-
"
|
|
452
|
+
"output_dir", type=Path, help="Directory to store downloaded AlphaFold files"
|
|
453
|
+
).complete = shtab.DIRECTORY
|
|
454
|
+
parser.add_argument(
|
|
455
|
+
"--format",
|
|
441
456
|
type=str,
|
|
442
457
|
action="append",
|
|
443
458
|
choices=sorted(downloadable_formats),
|
|
444
459
|
help=dedent("""AlphaFold formats to retrieve. Can be specified multiple times.
|
|
445
|
-
Default is '
|
|
460
|
+
Default is 'cif'."""),
|
|
461
|
+
)
|
|
462
|
+
parser.add_argument(
|
|
463
|
+
"--db-version",
|
|
464
|
+
type=str,
|
|
465
|
+
help="AlphaFold database version to use. If not given, the latest version is used. For example '6'.",
|
|
446
466
|
)
|
|
447
467
|
parser.add_argument(
|
|
448
468
|
"--gzip-files",
|
|
@@ -481,8 +501,10 @@ def _add_retrieve_emdb_parser(subparsers: argparse._SubParsersAction):
|
|
|
481
501
|
"emdb_csv",
|
|
482
502
|
type=argparse.FileType("r", encoding="UTF-8"),
|
|
483
503
|
help="CSV file with `emdb_id` column. Other columns are ignored. Use `-` for stdin.",
|
|
484
|
-
)
|
|
485
|
-
parser.add_argument(
|
|
504
|
+
).complete = shtab.FILE
|
|
505
|
+
parser.add_argument(
|
|
506
|
+
"output_dir", type=Path, help="Directory to store downloaded EMDB volume files"
|
|
507
|
+
).complete = shtab.DIRECTORY
|
|
486
508
|
_add_cacher_arguments(parser)
|
|
487
509
|
|
|
488
510
|
|
|
@@ -496,8 +518,12 @@ def _add_filter_confidence_parser(subparsers: argparse._SubParsersAction):
|
|
|
496
518
|
Passed files are written with residues below threshold removed."""),
|
|
497
519
|
formatter_class=ArgumentDefaultsRichHelpFormatter,
|
|
498
520
|
)
|
|
499
|
-
parser.add_argument(
|
|
500
|
-
|
|
521
|
+
parser.add_argument(
|
|
522
|
+
"input_dir", type=Path, help="Directory with AlphaFold mmcif/PDB files"
|
|
523
|
+
).complete = shtab.DIRECTORY
|
|
524
|
+
parser.add_argument(
|
|
525
|
+
"output_dir", type=Path, help="Directory to write filtered mmcif/PDB files"
|
|
526
|
+
).complete = shtab.DIRECTORY
|
|
501
527
|
parser.add_argument("--confidence-threshold", type=float, default=70, help="pLDDT confidence threshold (0-100)")
|
|
502
528
|
parser.add_argument(
|
|
503
529
|
"--min-residues", type=int, default=0, help="Minimum number of high-confidence residues a structure should have"
|
|
@@ -515,7 +541,7 @@ def _add_filter_confidence_parser(subparsers: argparse._SubParsersAction):
|
|
|
515
541
|
Write filter statistics to file.
|
|
516
542
|
In CSV format with `<input_file>,<residue_count>,<passed>,<output_file>` columns.
|
|
517
543
|
Use `-` for stdout."""),
|
|
518
|
-
)
|
|
544
|
+
).complete = shtab.FILE
|
|
519
545
|
_add_copy_method_arguments(parser)
|
|
520
546
|
|
|
521
547
|
|
|
@@ -535,7 +561,7 @@ def _add_filter_chain_parser(subparsers: argparse._SubParsersAction):
|
|
|
535
561
|
"chains",
|
|
536
562
|
type=argparse.FileType("r", encoding="UTF-8"),
|
|
537
563
|
help="CSV file with `pdb_id` and `chain` columns. Other columns are ignored.",
|
|
538
|
-
)
|
|
564
|
+
).complete = shtab.FILE
|
|
539
565
|
parser.add_argument(
|
|
540
566
|
"input_dir",
|
|
541
567
|
type=Path,
|
|
@@ -543,13 +569,13 @@ def _add_filter_chain_parser(subparsers: argparse._SubParsersAction):
|
|
|
543
569
|
Directory with PDB/mmCIF files.
|
|
544
570
|
Expected filenames are `{pdb_id}.cif.gz`, `{pdb_id}.cif`, `{pdb_id}.pdb.gz` or `{pdb_id}.pdb`.
|
|
545
571
|
"""),
|
|
546
|
-
)
|
|
572
|
+
).complete = shtab.DIRECTORY
|
|
547
573
|
parser.add_argument(
|
|
548
574
|
"output_dir",
|
|
549
575
|
type=Path,
|
|
550
576
|
help=dedent("""\
|
|
551
577
|
Directory to write the single-chain PDB/mmCIF files. Output files are in same format as input files."""),
|
|
552
|
-
)
|
|
578
|
+
).complete = shtab.DIRECTORY
|
|
553
579
|
parser.add_argument(
|
|
554
580
|
"--scheduler-address",
|
|
555
581
|
help=dedent("""Address of the Dask scheduler to connect to.
|
|
@@ -569,14 +595,16 @@ def _add_filter_residue_parser(subparsers: argparse._SubParsersAction):
|
|
|
569
595
|
"""),
|
|
570
596
|
formatter_class=ArgumentDefaultsRichHelpFormatter,
|
|
571
597
|
)
|
|
572
|
-
parser.add_argument(
|
|
598
|
+
parser.add_argument(
|
|
599
|
+
"input_dir", type=Path, help="Directory with PDB/mmCIF files (e.g., from 'filter chain')"
|
|
600
|
+
).complete = shtab.DIRECTORY
|
|
573
601
|
parser.add_argument(
|
|
574
602
|
"output_dir",
|
|
575
603
|
type=Path,
|
|
576
604
|
help=dedent("""\
|
|
577
605
|
Directory to write filtered PDB/mmCIF files. Files are copied without modification.
|
|
578
606
|
"""),
|
|
579
|
-
)
|
|
607
|
+
).complete = shtab.DIRECTORY
|
|
580
608
|
parser.add_argument("--min-residues", type=int, default=0, help="Min residues in chain A")
|
|
581
609
|
parser.add_argument("--max-residues", type=int, default=10_000_000, help="Max residues in chain A")
|
|
582
610
|
parser.add_argument(
|
|
@@ -586,7 +614,7 @@ def _add_filter_residue_parser(subparsers: argparse._SubParsersAction):
|
|
|
586
614
|
Write filter statistics to file.
|
|
587
615
|
In CSV format with `<input_file>,<residue_count>,<passed>,<output_file>` columns.
|
|
588
616
|
Use `-` for stdout."""),
|
|
589
|
-
)
|
|
617
|
+
).complete = shtab.FILE
|
|
590
618
|
_add_copy_method_arguments(parser)
|
|
591
619
|
|
|
592
620
|
|
|
@@ -598,14 +626,16 @@ def _add_filter_ss_parser(subparsers: argparse._SubParsersAction):
|
|
|
598
626
|
description="Filter PDB/mmCIF files by secondary structure",
|
|
599
627
|
formatter_class=ArgumentDefaultsRichHelpFormatter,
|
|
600
628
|
)
|
|
601
|
-
parser.add_argument(
|
|
629
|
+
parser.add_argument(
|
|
630
|
+
"input_dir", type=Path, help="Directory with PDB/mmCIF files (e.g., from 'filter chain')"
|
|
631
|
+
).complete = shtab.DIRECTORY
|
|
602
632
|
parser.add_argument(
|
|
603
633
|
"output_dir",
|
|
604
634
|
type=Path,
|
|
605
635
|
help=dedent("""\
|
|
606
636
|
Directory to write filtered PDB/mmCIF files. Files are copied without modification.
|
|
607
637
|
"""),
|
|
608
|
-
)
|
|
638
|
+
).complete = shtab.DIRECTORY
|
|
609
639
|
parser.add_argument("--abs-min-helix-residues", type=int, help="Min residues in helices")
|
|
610
640
|
parser.add_argument("--abs-max-helix-residues", type=int, help="Max residues in helices")
|
|
611
641
|
parser.add_argument("--abs-min-sheet-residues", type=int, help="Min residues in sheets")
|
|
@@ -623,7 +653,7 @@ def _add_filter_ss_parser(subparsers: argparse._SubParsersAction):
|
|
|
623
653
|
<helix_ratio>,<sheet_ratio>,<passed>,<output_file>`.
|
|
624
654
|
Use `-` for stdout.
|
|
625
655
|
"""),
|
|
626
|
-
)
|
|
656
|
+
).complete = shtab.FILE
|
|
627
657
|
_add_copy_method_arguments(parser)
|
|
628
658
|
|
|
629
659
|
|
|
@@ -687,12 +717,12 @@ def _add_convert_uniprot_parser(subparsers: argparse._SubParsersAction):
|
|
|
687
717
|
"input_dir",
|
|
688
718
|
type=Path,
|
|
689
719
|
help=f"Directory with structure files. Supported extensions are {valid_structure_file_extensions}",
|
|
690
|
-
)
|
|
720
|
+
).complete = shtab.DIRECTORY
|
|
691
721
|
parser.add_argument(
|
|
692
722
|
"output",
|
|
693
723
|
type=argparse.FileType("wt", encoding="UTF-8"),
|
|
694
724
|
help="Output text file with UniProt accessions (one per line). Use '-' for stdout.",
|
|
695
|
-
)
|
|
725
|
+
).complete = shtab.FILE
|
|
696
726
|
parser.add_argument(
|
|
697
727
|
"--grouped",
|
|
698
728
|
action="store_true",
|
|
@@ -712,14 +742,14 @@ def _add_convert_structures_parser(subparsers: argparse._SubParsersAction):
|
|
|
712
742
|
"input_dir",
|
|
713
743
|
type=Path,
|
|
714
744
|
help=f"Directory with structure files. Supported extensions are {valid_structure_file_extensions}",
|
|
715
|
-
)
|
|
745
|
+
).complete = shtab.DIRECTORY
|
|
716
746
|
parser.add_argument(
|
|
717
747
|
"--output-dir",
|
|
718
748
|
type=Path,
|
|
719
749
|
help=dedent("""\
|
|
720
750
|
Directory to write converted structure files. If not given, files are written to `input_dir`.
|
|
721
751
|
"""),
|
|
722
|
-
)
|
|
752
|
+
).complete = shtab.DIRECTORY
|
|
723
753
|
parser.add_argument(
|
|
724
754
|
"--format",
|
|
725
755
|
type=str,
|
|
@@ -768,6 +798,7 @@ def make_parser() -> argparse.ArgumentParser:
|
|
|
768
798
|
)
|
|
769
799
|
parser.add_argument("--log-level", default="WARNING", choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"])
|
|
770
800
|
parser.add_argument("--version", action="version", version=f"%(prog)s {__version__}")
|
|
801
|
+
shtab.add_argument_to(parser, ["--print-completion"])
|
|
771
802
|
|
|
772
803
|
subparsers = parser.add_subparsers(dest="command", required=True)
|
|
773
804
|
|
|
@@ -825,6 +856,7 @@ def _handle_search_pdbe(args):
|
|
|
825
856
|
output_csv = args.output_csv
|
|
826
857
|
min_residues = converter.structure(args.min_residues, PositiveInt | None) # pyright: ignore[reportArgumentType]
|
|
827
858
|
max_residues = converter.structure(args.max_residues, PositiveInt | None) # pyright: ignore[reportArgumentType]
|
|
859
|
+
keep_invalid = args.keep_invalid
|
|
828
860
|
|
|
829
861
|
accs = set(_read_lines(uniprot_accessions))
|
|
830
862
|
rprint(f"Finding PDB entries for {len(accs)} uniprot accessions")
|
|
@@ -833,7 +865,7 @@ def _handle_search_pdbe(args):
|
|
|
833
865
|
raw_nr_results = len(results)
|
|
834
866
|
raw_total_pdbs = sum([len(v) for v in results.values()])
|
|
835
867
|
if min_residues or max_residues:
|
|
836
|
-
results = filter_pdb_results_on_chain_length(results, min_residues, max_residues)
|
|
868
|
+
results = filter_pdb_results_on_chain_length(results, min_residues, max_residues, keep_invalid=keep_invalid)
|
|
837
869
|
total_pdbs = sum([len(v) for v in results.values()])
|
|
838
870
|
rprint(f"Before filtering found {raw_total_pdbs} PDB entries for {raw_nr_results} uniprot accessions.")
|
|
839
871
|
rprint(
|
|
@@ -976,25 +1008,26 @@ def _handle_retrieve_pdbe(args: argparse.Namespace):
|
|
|
976
1008
|
|
|
977
1009
|
def _handle_retrieve_alphafold(args):
|
|
978
1010
|
download_dir = args.output_dir
|
|
979
|
-
|
|
1011
|
+
raw_formats = args.format
|
|
980
1012
|
alphafold_csv = args.alphafold_csv
|
|
981
1013
|
max_parallel_downloads = args.max_parallel_downloads
|
|
982
1014
|
cacher = _initialize_cacher(args)
|
|
983
1015
|
gzip_files = args.gzip_files
|
|
984
1016
|
all_isoforms = args.all_isoforms
|
|
1017
|
+
db_version = args.db_version
|
|
985
1018
|
|
|
986
|
-
if
|
|
987
|
-
|
|
1019
|
+
if raw_formats is None:
|
|
1020
|
+
raw_formats = {"cif"}
|
|
988
1021
|
|
|
989
1022
|
# TODO besides `uniprot_accession,af_id\n` csv also allow headless single column format
|
|
990
|
-
#
|
|
991
1023
|
af_ids = _read_column_from_csv(alphafold_csv, "af_id")
|
|
992
|
-
|
|
993
|
-
rprint(f"Retrieving {len(af_ids)} AlphaFold entries with formats {
|
|
1024
|
+
formats: set[DownloadableFormat] = structure(raw_formats, set[DownloadableFormat])
|
|
1025
|
+
rprint(f"Retrieving {len(af_ids)} AlphaFold entries with formats {formats}")
|
|
994
1026
|
afs = af_fetch(
|
|
995
1027
|
af_ids,
|
|
996
1028
|
download_dir,
|
|
997
|
-
|
|
1029
|
+
formats=formats,
|
|
1030
|
+
db_version=db_version,
|
|
998
1031
|
max_parallel_downloads=max_parallel_downloads,
|
|
999
1032
|
cacher=cacher,
|
|
1000
1033
|
gzip_files=gzip_files,
|
protein_quest/mcp_server.py
CHANGED
|
@@ -167,7 +167,7 @@ mcp.tool(search4macromolecular_complexes, name="search_macromolecular_complexes"
|
|
|
167
167
|
|
|
168
168
|
@mcp.tool
|
|
169
169
|
def fetch_alphafold_structures(uniprot_accs: set[str], save_dir: Path) -> list[AlphaFoldEntry]:
|
|
170
|
-
"""Fetch the AlphaFold
|
|
170
|
+
"""Fetch the AlphaFold mmCIF file for given UniProt accessions.
|
|
171
171
|
|
|
172
172
|
Args:
|
|
173
173
|
uniprot_accs: A set of UniProt accessions.
|
|
@@ -176,8 +176,8 @@ def fetch_alphafold_structures(uniprot_accs: set[str], save_dir: Path) -> list[A
|
|
|
176
176
|
Returns:
|
|
177
177
|
A list of AlphaFold entries.
|
|
178
178
|
"""
|
|
179
|
-
|
|
180
|
-
return alphafold_fetch(uniprot_accs, save_dir,
|
|
179
|
+
formats: set[DownloadableFormat] = {"cif"}
|
|
180
|
+
return alphafold_fetch(uniprot_accs, save_dir, formats)
|
|
181
181
|
|
|
182
182
|
|
|
183
183
|
@mcp.tool
|
protein_quest/uniprot.py
CHANGED
|
@@ -93,6 +93,14 @@ def _chain_length_from_uniprot_chains(uniprot_chains: str) -> int:
|
|
|
93
93
|
return total_length
|
|
94
94
|
|
|
95
95
|
|
|
96
|
+
class PdbChainLengthError(ValueError):
|
|
97
|
+
"""Raised when a UniProt chain description does not yield a chain length."""
|
|
98
|
+
|
|
99
|
+
def __init__(self, pdb_id: str, uniprot_chains: str):
|
|
100
|
+
msg = f"Could not determine chain length of '{pdb_id}' from '{uniprot_chains}'"
|
|
101
|
+
super().__init__(msg)
|
|
102
|
+
|
|
103
|
+
|
|
96
104
|
@dataclass(frozen=True)
|
|
97
105
|
class PdbResult:
|
|
98
106
|
"""Result of a PDB search in UniProtKB.
|
|
@@ -117,7 +125,10 @@ class PdbResult:
|
|
|
117
125
|
@cached_property
|
|
118
126
|
def chain_length(self) -> int:
|
|
119
127
|
"""The length of the chain from the UniProt chains aka self.uniprot_chains."""
|
|
120
|
-
|
|
128
|
+
try:
|
|
129
|
+
return _chain_length_from_uniprot_chains(self.uniprot_chains)
|
|
130
|
+
except ValueError as e:
|
|
131
|
+
raise PdbChainLengthError(self.id, self.uniprot_chains) from e
|
|
121
132
|
|
|
122
133
|
|
|
123
134
|
type PdbResults = dict[str, set[PdbResult]]
|
|
@@ -128,6 +139,7 @@ def filter_pdb_results_on_chain_length(
|
|
|
128
139
|
pdb_results: PdbResults,
|
|
129
140
|
min_residues: int | None,
|
|
130
141
|
max_residues: int | None,
|
|
142
|
+
keep_invalid: bool = False,
|
|
131
143
|
) -> PdbResults:
|
|
132
144
|
"""Filter PDB results based on chain length.
|
|
133
145
|
|
|
@@ -137,6 +149,9 @@ def filter_pdb_results_on_chain_length(
|
|
|
137
149
|
If None, no minimum is applied.
|
|
138
150
|
max_residues: Maximum number of residues allowed in chain mapped to the UniProt accession.
|
|
139
151
|
If None, no maximum is applied.
|
|
152
|
+
keep_invalid: If True, PDB results with invalid chain length (could not be determined) are kept.
|
|
153
|
+
If False, PDB results with invalid chain length are filtered out.
|
|
154
|
+
Warnings are logged when length can not be determined.
|
|
140
155
|
|
|
141
156
|
Returns:
|
|
142
157
|
Filtered dictionary with protein IDs as keys and sets of PDB results as values.
|
|
@@ -149,12 +164,26 @@ def filter_pdb_results_on_chain_length(
|
|
|
149
164
|
raise ValueError(msg)
|
|
150
165
|
results: PdbResults = {}
|
|
151
166
|
for uniprot_accession, pdb_entries in pdb_results.items():
|
|
152
|
-
filtered_pdb_entries =
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
167
|
+
filtered_pdb_entries = set()
|
|
168
|
+
for pdb_entry in pdb_entries:
|
|
169
|
+
try:
|
|
170
|
+
if (min_residues is None or pdb_entry.chain_length >= min_residues) and (
|
|
171
|
+
max_residues is None or pdb_entry.chain_length <= max_residues
|
|
172
|
+
):
|
|
173
|
+
filtered_pdb_entries.add(pdb_entry)
|
|
174
|
+
except PdbChainLengthError:
|
|
175
|
+
if keep_invalid:
|
|
176
|
+
logger.warning(
|
|
177
|
+
f"Could not determine chain length of '{pdb_entry.id}' from '{pdb_entry.uniprot_chains}' "
|
|
178
|
+
f"belonging to uniprot accession '{uniprot_accession}', "
|
|
179
|
+
"for completeness not filtering it out"
|
|
180
|
+
)
|
|
181
|
+
filtered_pdb_entries.add(pdb_entry)
|
|
182
|
+
else:
|
|
183
|
+
logger.warning(
|
|
184
|
+
f"Filtering out PDB entry '{pdb_entry.id}' belonging to uniprot accession "
|
|
185
|
+
f"'{uniprot_accession}' due to invalid chain length from '{pdb_entry.uniprot_chains}'"
|
|
186
|
+
)
|
|
158
187
|
if filtered_pdb_entries:
|
|
159
188
|
# Only include uniprot_accession if there are any pdb entries left after filtering
|
|
160
189
|
results[uniprot_accession] = filtered_pdb_entries
|
|
@@ -337,13 +366,13 @@ def _build_sparql_query_sequence_length_filter(min_length: int | None = None, ma
|
|
|
337
366
|
# - http://purl.uniprot.org/isoforms/P42284-2 is ok
|
|
338
367
|
# - http://purl.uniprot.org/isoforms/P42284-1 is not ok, because it is based on P42284-2
|
|
339
368
|
# - http://purl.uniprot.org/isoforms/Q7KQZ4-1 is not ok, because it is from another uniprot entry
|
|
340
|
-
# TODO use same approach as in retrieve_uniprot_details function
|
|
341
369
|
header = dedent("""\
|
|
342
370
|
?protein up:sequence ?isoform .
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
371
|
+
?isoform a up:Simple_Sequence .
|
|
372
|
+
BIND (IRI(STRBEFORE(REPLACE(
|
|
373
|
+
STR(?isoform), "http://purl.uniprot.org/isoforms/", "http://purl.uniprot.org/uniprot/"
|
|
374
|
+
), "-")) AS ?ac_of_isoform)
|
|
375
|
+
FILTER (?protein = ?ac_of_isoform)
|
|
347
376
|
?isoform rdf:value ?sequence .
|
|
348
377
|
BIND (STRLEN(?sequence) AS ?seq_length)
|
|
349
378
|
""")
|
|
@@ -875,8 +904,10 @@ def map_uniprot_accessions2uniprot_details(
|
|
|
875
904
|
?protein up:sequence ?isoform .
|
|
876
905
|
?isoform a up:Simple_Sequence .
|
|
877
906
|
?isoform rdf:value ?sequence .
|
|
878
|
-
BIND (STRBEFORE(
|
|
879
|
-
|
|
907
|
+
BIND (IRI(STRBEFORE(REPLACE(
|
|
908
|
+
STR(?isoform), "http://purl.uniprot.org/isoforms/", "http://purl.uniprot.org/uniprot/"
|
|
909
|
+
), "-")) AS ?ac_of_isoform)
|
|
910
|
+
FILTER(?ac_of_isoform = ?protein)
|
|
880
911
|
}
|
|
881
912
|
```
|
|
882
913
|
|
|
@@ -898,17 +929,20 @@ def map_uniprot_accessions2uniprot_details(
|
|
|
898
929
|
(STRLEN(?sequence) AS ?seq_length)
|
|
899
930
|
""")
|
|
900
931
|
where_clause = dedent("""
|
|
901
|
-
?protein a up:Protein .
|
|
902
932
|
?protein up:mnemonic ?uniprot_id .
|
|
903
933
|
?protein up:organism ?organism .
|
|
904
934
|
?organism up:scientificName ?taxon_name .
|
|
905
935
|
?protein up:reviewed ?reviewed .
|
|
936
|
+
OPTIONAL {
|
|
906
937
|
?protein up:recommendedName/up:fullName ?protein_name .
|
|
938
|
+
}
|
|
907
939
|
?protein up:sequence ?isoform .
|
|
908
940
|
?isoform a up:Simple_Sequence .
|
|
909
941
|
?isoform rdf:value ?sequence .
|
|
910
|
-
BIND (STRBEFORE(
|
|
911
|
-
|
|
942
|
+
BIND (IRI(STRBEFORE(REPLACE(
|
|
943
|
+
STR(?isoform), "http://purl.uniprot.org/isoforms/", "http://purl.uniprot.org/uniprot/"
|
|
944
|
+
), "-")) AS ?ac_of_isoform)
|
|
945
|
+
FILTER(?ac_of_isoform = ?protein)
|
|
912
946
|
""")
|
|
913
947
|
total = len(uniprot_accessions)
|
|
914
948
|
with tqdm(
|
|
@@ -927,12 +961,13 @@ def map_uniprot_accessions2uniprot_details(
|
|
|
927
961
|
timeout=timeout,
|
|
928
962
|
)
|
|
929
963
|
for raw_result in raw_results:
|
|
964
|
+
protein_name = raw_result.get("protein_name", {}).get("value", "")
|
|
930
965
|
result = UniprotDetails(
|
|
931
966
|
uniprot_accession=raw_result["uniprot_accession"]["value"],
|
|
932
967
|
uniprot_id=raw_result["uniprot_id"]["value"],
|
|
933
968
|
sequence_length=int(raw_result["seq_length"]["value"]),
|
|
934
969
|
reviewed=raw_result["reviewed"]["value"] == "true",
|
|
935
|
-
protein_name=
|
|
970
|
+
protein_name=protein_name,
|
|
936
971
|
taxon_id=int(raw_result["taxon_id"]["value"]),
|
|
937
972
|
taxon_name=raw_result["taxon_name"]["value"],
|
|
938
973
|
)
|
protein_quest/utils.py
CHANGED
|
@@ -266,6 +266,7 @@ async def retrieve_files(
|
|
|
266
266
|
cacher: Cacher | None = None,
|
|
267
267
|
chunk_size: int = 524288, # 512 KiB
|
|
268
268
|
gzip_files: bool = False,
|
|
269
|
+
raise_for_not_found: bool = True,
|
|
269
270
|
) -> list[Path]:
|
|
270
271
|
"""Retrieve files from a list of URLs and save them to a directory.
|
|
271
272
|
|
|
@@ -279,6 +280,9 @@ async def retrieve_files(
|
|
|
279
280
|
cacher: An optional cacher to use for caching files.
|
|
280
281
|
chunk_size: The size of each chunk to read from the response.
|
|
281
282
|
gzip_files: Whether to gzip the downloaded files.
|
|
283
|
+
This requires the server can send gzip encoded content.
|
|
284
|
+
raise_for_not_found: Whether to raise an error for HTTP 404 errors.
|
|
285
|
+
If false then function does not returns Path for which url gave HTTP 404 error and logs as debug message.
|
|
282
286
|
|
|
283
287
|
Returns:
|
|
284
288
|
A list of paths to the downloaded files.
|
|
@@ -295,11 +299,12 @@ async def retrieve_files(
|
|
|
295
299
|
cacher=cacher,
|
|
296
300
|
chunk_size=chunk_size,
|
|
297
301
|
gzip_files=gzip_files,
|
|
302
|
+
raise_for_not_found=raise_for_not_found,
|
|
298
303
|
)
|
|
299
304
|
for url, filename in urls
|
|
300
305
|
]
|
|
301
|
-
|
|
302
|
-
return
|
|
306
|
+
raw_files: list[Path | None] = await tqdm.gather(*tasks, desc=desc)
|
|
307
|
+
return [f for f in raw_files if f is not None]
|
|
303
308
|
|
|
304
309
|
|
|
305
310
|
class InvalidContentEncodingError(aiohttp.ClientResponseError):
|
|
@@ -314,7 +319,8 @@ async def _retrieve_file(
|
|
|
314
319
|
cacher: Cacher | None = None,
|
|
315
320
|
chunk_size: int = 524288, # 512 KiB
|
|
316
321
|
gzip_files: bool = False,
|
|
317
|
-
|
|
322
|
+
raise_for_not_found=True,
|
|
323
|
+
) -> Path | None:
|
|
318
324
|
"""Retrieve a single file from a URL and save it to a specified path.
|
|
319
325
|
|
|
320
326
|
Args:
|
|
@@ -325,6 +331,9 @@ async def _retrieve_file(
|
|
|
325
331
|
cacher: An optional cacher to use for caching files.
|
|
326
332
|
chunk_size: The size of each chunk to read from the response.
|
|
327
333
|
gzip_files: Whether to gzip the downloaded file.
|
|
334
|
+
This requires the server can send gzip encoded content.
|
|
335
|
+
raise_for_not_found: Whether to raise an error for HTTP 404 errors.
|
|
336
|
+
If false then function returns None on HTTP 404 errors and logs as debug message.
|
|
328
337
|
|
|
329
338
|
Returns:
|
|
330
339
|
The path to the saved file.
|
|
@@ -348,6 +357,9 @@ async def _retrieve_file(
|
|
|
348
357
|
semaphore,
|
|
349
358
|
session.get(url, headers=headers, auto_decompress=auto_decompress) as resp,
|
|
350
359
|
):
|
|
360
|
+
if not raise_for_not_found and resp.status == 404:
|
|
361
|
+
logger.debug(f"File not found at {url}, skipping download.")
|
|
362
|
+
return None
|
|
351
363
|
resp.raise_for_status()
|
|
352
364
|
if gzip_files and resp.headers.get("Content-Encoding") != "gzip":
|
|
353
365
|
msg = f"Server did not send gzip encoded content for {url}, can not save as gzipped file."
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: protein_quest
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.8.0
|
|
4
4
|
Summary: Search/retrieve/filter proteins and protein structures
|
|
5
5
|
Project-URL: Homepage, https://github.com/haddocking/protein-quest
|
|
6
6
|
Project-URL: Issues, https://github.com/haddocking/protein-quest/issues
|
|
@@ -21,6 +21,7 @@ Requires-Dist: platformdirs>=4.3.8
|
|
|
21
21
|
Requires-Dist: psutil>=7.0.0
|
|
22
22
|
Requires-Dist: rich-argparse>=1.7.1
|
|
23
23
|
Requires-Dist: rich>=14.0.0
|
|
24
|
+
Requires-Dist: shtab>=1.7.2
|
|
24
25
|
Requires-Dist: sparqlwrapper>=2.0.0
|
|
25
26
|
Requires-Dist: tqdm>=4.67.1
|
|
26
27
|
Requires-Dist: yarl>=1.20.1
|
|
@@ -154,7 +155,7 @@ protein-quest retrieve pdbe pdbe.csv downloads-pdbe/
|
|
|
154
155
|
protein-quest retrieve alphafold alphafold.csv downloads-af/
|
|
155
156
|
```
|
|
156
157
|
|
|
157
|
-
For each entry downloads the
|
|
158
|
+
For each entry downloads the cif file.
|
|
158
159
|
|
|
159
160
|
### To retrieve EMDB volume files
|
|
160
161
|
|
|
@@ -299,6 +300,26 @@ protein-quest mcp
|
|
|
299
300
|
|
|
300
301
|
The mcp server contains an prompt template to search/retrieve/filter candidate structures.
|
|
301
302
|
|
|
303
|
+
## Shell autocompletion
|
|
304
|
+
|
|
305
|
+
The `protein-quest` command line tool supports shell autocompletion using [shtab](https://shtab.readthedocs.io/).
|
|
306
|
+
|
|
307
|
+
Initialize for bash shell with:
|
|
308
|
+
|
|
309
|
+
```shell
|
|
310
|
+
mkdir -p ~/.local/share/bash-completion/completions
|
|
311
|
+
protein-quest --print-completion bash > ~/.local/share/bash-completion/completions/protein-quest
|
|
312
|
+
```
|
|
313
|
+
|
|
314
|
+
Initialize for zsh shell with:
|
|
315
|
+
|
|
316
|
+
```shell
|
|
317
|
+
mkdir -p ~/.local/share/zsh/site-functions
|
|
318
|
+
protein-quest --print-completion zsh > ~/.local/share/zsh/site-functions/_protein-quest
|
|
319
|
+
fpath=("$HOME/.local/share/zsh/site-functions" $fpath)
|
|
320
|
+
autoload -Uz compinit && compinit
|
|
321
|
+
```
|
|
322
|
+
|
|
302
323
|
## Contributing
|
|
303
324
|
|
|
304
325
|
For development information and contribution guidelines, please see [CONTRIBUTING.md](CONTRIBUTING.md).
|
|
@@ -1,27 +1,27 @@
|
|
|
1
1
|
protein_quest/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
protein_quest/__version__.py,sha256=
|
|
3
|
-
protein_quest/cli.py,sha256=
|
|
2
|
+
protein_quest/__version__.py,sha256=z22DsH46rJUgc917FJyc2z9XDmdScvBS92-z4i4GZ98,56
|
|
3
|
+
protein_quest/cli.py,sha256=bE0Xq93LjdMnDoHeRIDUXUU79LyWICnhX8B3m2Lk8ZE,57264
|
|
4
4
|
protein_quest/converter.py,sha256=Y-Oxf7lDNbEicL6GS-IpNWDwaAiHgIgs5bFAcEHCKdQ,1441
|
|
5
5
|
protein_quest/emdb.py,sha256=641c6RwNYnu-0GBFyCFBiI58fNc0jMkd0ZZ9MW9-Jmc,1501
|
|
6
6
|
protein_quest/filters.py,sha256=Xr-cJTtbNjHKuzmXLBf7yZfqKf_U3RTivcVbr620LVU,5225
|
|
7
7
|
protein_quest/go.py,sha256=lZNEcw8nTc9wpV3cl4y2FG9Lsj8wsXQ6zemmAQs_DWE,5650
|
|
8
8
|
protein_quest/io.py,sha256=ngV_HU2HIQFO-bP2xQj_fhgv0MYjW4puqz_9CxGpBv8,13017
|
|
9
|
-
protein_quest/mcp_server.py,sha256=
|
|
9
|
+
protein_quest/mcp_server.py,sha256=oHbNjN-Lctc2mY-sjEuo82yRsp1bBsHo2Ag5MwsWx8k,8547
|
|
10
10
|
protein_quest/parallel.py,sha256=ZJrLO1t2HXs4EeNctytvBTyROPBq-4-gLf35PiolHf0,3468
|
|
11
11
|
protein_quest/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
12
12
|
protein_quest/ss.py,sha256=4ZGIHfjTlodYTXqGUKhMnGbgaStYOGaWg2oYrWIjdgo,10118
|
|
13
13
|
protein_quest/structure.py,sha256=QozElPz0kbPB_HW-J1WxArTT5e-1vRyBJoBSfHnwoRM,8117
|
|
14
14
|
protein_quest/taxonomy.py,sha256=4mKv8zll4mX02Ow8CTvyqMJE2KJZvcq3QlTjjjLOJJk,5072
|
|
15
|
-
protein_quest/uniprot.py,sha256=
|
|
16
|
-
protein_quest/utils.py,sha256=
|
|
15
|
+
protein_quest/uniprot.py,sha256=kV1lOZ_ugcF-LUff9hvmJPaGwA_uaHPJCL_3DLBIvSE,36798
|
|
16
|
+
protein_quest/utils.py,sha256=5Ncdid-dslggy-Ti1yhOHwdAM7Bxpyia7Re-xDkc2P0,19909
|
|
17
17
|
protein_quest/alphafold/__init__.py,sha256=Ktasi5BRp71wO7-PpOGDpIRRtBEefs8knIdlKQeLQpk,51
|
|
18
18
|
protein_quest/alphafold/confidence.py,sha256=mVAYTIzdbR8xBjRiUzA0at8wJq9vpfEQWPz5cJefLKs,6766
|
|
19
19
|
protein_quest/alphafold/entry_summary.py,sha256=Qhnw75RXFaoOU332g7axg_jYbbdZbUpsGPUOwPNDSeU,2114
|
|
20
|
-
protein_quest/alphafold/fetch.py,sha256=
|
|
20
|
+
protein_quest/alphafold/fetch.py,sha256=eKCQHkAMko-d36VvRHLCllLxuAXBdbBUhUONOSCPsds,21970
|
|
21
21
|
protein_quest/pdbe/__init__.py,sha256=eNNHtN60NAGea7gvRkIzkoTXsYPK99s-ldIcKWYO6So,61
|
|
22
22
|
protein_quest/pdbe/fetch.py,sha256=e8CHWDX2QzWnVLmYXCfNrscw1UcN1lI9Uz6Z5HmEOEQ,2510
|
|
23
|
-
protein_quest-0.
|
|
24
|
-
protein_quest-0.
|
|
25
|
-
protein_quest-0.
|
|
26
|
-
protein_quest-0.
|
|
27
|
-
protein_quest-0.
|
|
23
|
+
protein_quest-0.8.0.dist-info/METADATA,sha256=jotRxaLadElgixAW72Axk8qL8wAvzl-cq26mYJBy9zc,11335
|
|
24
|
+
protein_quest-0.8.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
25
|
+
protein_quest-0.8.0.dist-info/entry_points.txt,sha256=f1RtOxv9TFBO3w01EMEuFXBTMsqKsQcKlkxmj9zE-0g,57
|
|
26
|
+
protein_quest-0.8.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
27
|
+
protein_quest-0.8.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|