protein-quest 0.3.2__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of protein-quest might be problematic. Click here for more details.
- protein_quest/__version__.py +1 -1
- protein_quest/cli.py +139 -1
- protein_quest/converter.py +1 -0
- protein_quest/mcp_server.py +10 -1
- protein_quest/ss.py +20 -0
- protein_quest/uniprot.py +157 -4
- {protein_quest-0.3.2.dist-info → protein_quest-0.4.0.dist-info}/METADATA +33 -3
- {protein_quest-0.3.2.dist-info → protein_quest-0.4.0.dist-info}/RECORD +11 -11
- {protein_quest-0.3.2.dist-info → protein_quest-0.4.0.dist-info}/WHEEL +0 -0
- {protein_quest-0.3.2.dist-info → protein_quest-0.4.0.dist-info}/entry_points.txt +0 -0
- {protein_quest-0.3.2.dist-info → protein_quest-0.4.0.dist-info}/licenses/LICENSE +0 -0
protein_quest/__version__.py
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
__version__ = "0.
|
|
1
|
+
__version__ = "0.4.0"
|
|
2
2
|
"""The version of the package."""
|
protein_quest/cli.py
CHANGED
|
@@ -15,6 +15,7 @@ from textwrap import dedent
|
|
|
15
15
|
from cattrs import structure
|
|
16
16
|
from rich import print as rprint
|
|
17
17
|
from rich.logging import RichHandler
|
|
18
|
+
from rich.markdown import Markdown
|
|
18
19
|
from rich.panel import Panel
|
|
19
20
|
from rich_argparse import ArgumentDefaultsRichHelpFormatter
|
|
20
21
|
from tqdm.rich import tqdm
|
|
@@ -31,7 +32,17 @@ from protein_quest.pdbe import fetch as pdbe_fetch
|
|
|
31
32
|
from protein_quest.pdbe.io import glob_structure_files, locate_structure_file
|
|
32
33
|
from protein_quest.ss import SecondaryStructureFilterQuery, filter_files_on_secondary_structure
|
|
33
34
|
from protein_quest.taxonomy import SearchField, _write_taxonomy_csv, search_fields, search_taxon
|
|
34
|
-
from protein_quest.uniprot import
|
|
35
|
+
from protein_quest.uniprot import (
|
|
36
|
+
ComplexPortalEntry,
|
|
37
|
+
PdbResult,
|
|
38
|
+
Query,
|
|
39
|
+
search4af,
|
|
40
|
+
search4emdb,
|
|
41
|
+
search4interaction_partners,
|
|
42
|
+
search4macromolecular_complexes,
|
|
43
|
+
search4pdb,
|
|
44
|
+
search4uniprot,
|
|
45
|
+
)
|
|
35
46
|
from protein_quest.utils import CopyMethod, copy_methods, copyfile
|
|
36
47
|
|
|
37
48
|
logger = logging.getLogger(__name__)
|
|
@@ -211,6 +222,73 @@ def _add_search_taxonomy_parser(subparser: argparse._SubParsersAction):
|
|
|
211
222
|
parser.add_argument("--limit", type=int, default=100, help="Maximum number of results to return")
|
|
212
223
|
|
|
213
224
|
|
|
225
|
+
def _add_search_interaction_partners_parser(subparsers: argparse._SubParsersAction):
|
|
226
|
+
"""Add search interaction partners subcommand parser."""
|
|
227
|
+
parser = subparsers.add_parser(
|
|
228
|
+
"interaction-partners",
|
|
229
|
+
help="Search for interaction partners of given UniProt accession",
|
|
230
|
+
description=dedent("""\
|
|
231
|
+
Search for interaction partners of given UniProt accession
|
|
232
|
+
in the Uniprot SPARQL endpoint and Complex Portal.
|
|
233
|
+
"""),
|
|
234
|
+
formatter_class=ArgumentDefaultsRichHelpFormatter,
|
|
235
|
+
)
|
|
236
|
+
parser.add_argument(
|
|
237
|
+
"uniprot_acc",
|
|
238
|
+
type=str,
|
|
239
|
+
help="UniProt accession (for example P12345).",
|
|
240
|
+
)
|
|
241
|
+
parser.add_argument(
|
|
242
|
+
"--exclude",
|
|
243
|
+
type=str,
|
|
244
|
+
action="append",
|
|
245
|
+
help="UniProt accessions to exclude from the results. For example already known interaction partners.",
|
|
246
|
+
)
|
|
247
|
+
parser.add_argument(
|
|
248
|
+
"output_csv",
|
|
249
|
+
type=argparse.FileType("w", encoding="UTF-8"),
|
|
250
|
+
help="Output CSV with interaction partners per UniProt accession. Use `-` for stdout.",
|
|
251
|
+
)
|
|
252
|
+
parser.add_argument(
|
|
253
|
+
"--limit", type=int, default=10_000, help="Maximum number of interaction partner uniprot accessions to return"
|
|
254
|
+
)
|
|
255
|
+
parser.add_argument("--timeout", type=int, default=1_800, help="Maximum seconds to wait for query to complete")
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
def _add_search_complexes_parser(subparsers: argparse._SubParsersAction):
|
|
259
|
+
"""Add search complexes subcommand parser."""
|
|
260
|
+
description = dedent("""\
|
|
261
|
+
Search for complexes in the Complex Portal.
|
|
262
|
+
https://www.ebi.ac.uk/complexportal/
|
|
263
|
+
|
|
264
|
+
The output CSV file has the following columns:
|
|
265
|
+
|
|
266
|
+
- query_protein: UniProt accession used as query
|
|
267
|
+
- complex_id: Complex Portal identifier
|
|
268
|
+
- complex_url: URL to the Complex Portal entry
|
|
269
|
+
- complex_title: Title of the complex
|
|
270
|
+
- members: Semicolon-separated list of UniProt accessions of complex members
|
|
271
|
+
""")
|
|
272
|
+
parser = subparsers.add_parser(
|
|
273
|
+
"complexes",
|
|
274
|
+
help="Search for complexes in the Complex Portal",
|
|
275
|
+
description=Markdown(description, style="argparse.text"), # type: ignore using rich formatter makes this OK
|
|
276
|
+
formatter_class=ArgumentDefaultsRichHelpFormatter,
|
|
277
|
+
)
|
|
278
|
+
parser.add_argument(
|
|
279
|
+
"uniprot_accs",
|
|
280
|
+
type=argparse.FileType("r", encoding="UTF-8"),
|
|
281
|
+
help="Text file with UniProt accessions (one per line) as query for searching complexes. Use `-` for stdin.",
|
|
282
|
+
)
|
|
283
|
+
parser.add_argument(
|
|
284
|
+
"output_csv",
|
|
285
|
+
type=argparse.FileType("w", encoding="UTF-8"),
|
|
286
|
+
help="Output CSV file with complex results. Use `-` for stdout.",
|
|
287
|
+
)
|
|
288
|
+
parser.add_argument("--limit", type=int, default=100, help="Maximum number of complex results to return")
|
|
289
|
+
parser.add_argument("--timeout", type=int, default=1_800, help="Maximum seconds to wait for query to complete")
|
|
290
|
+
|
|
291
|
+
|
|
214
292
|
def _add_retrieve_pdbe_parser(subparsers: argparse._SubParsersAction):
|
|
215
293
|
"""Add retrieve pdbe subcommand parser."""
|
|
216
294
|
parser = subparsers.add_parser(
|
|
@@ -458,6 +536,8 @@ def _add_search_subcommands(subparsers: argparse._SubParsersAction):
|
|
|
458
536
|
_add_search_emdb_parser(subsubparsers)
|
|
459
537
|
_add_search_go_parser(subsubparsers)
|
|
460
538
|
_add_search_taxonomy_parser(subsubparsers)
|
|
539
|
+
_add_search_interaction_partners_parser(subsubparsers)
|
|
540
|
+
_add_search_complexes_parser(subsubparsers)
|
|
461
541
|
|
|
462
542
|
|
|
463
543
|
def _add_retrieve_subcommands(subparsers: argparse._SubParsersAction):
|
|
@@ -636,6 +716,32 @@ def _handle_search_taxonomy(args):
|
|
|
636
716
|
_write_taxonomy_csv(results, output_csv)
|
|
637
717
|
|
|
638
718
|
|
|
719
|
+
def _handle_search_interaction_partners(args: argparse.Namespace):
|
|
720
|
+
uniprot_acc: str = args.uniprot_acc
|
|
721
|
+
excludes: set[str] = set(args.exclude) if args.exclude else set()
|
|
722
|
+
limit: int = args.limit
|
|
723
|
+
timeout: int = args.timeout
|
|
724
|
+
output_csv: TextIOWrapper = args.output_csv
|
|
725
|
+
|
|
726
|
+
rprint(f"Searching for interaction partners of '{uniprot_acc}'")
|
|
727
|
+
results = search4interaction_partners(uniprot_acc, excludes=excludes, limit=limit, timeout=timeout)
|
|
728
|
+
rprint(f"Found {len(results)} interaction partners, written to {output_csv.name}")
|
|
729
|
+
_write_lines(output_csv, results.keys())
|
|
730
|
+
|
|
731
|
+
|
|
732
|
+
def _handle_search_complexes(args: argparse.Namespace):
|
|
733
|
+
uniprot_accs = args.uniprot_accs
|
|
734
|
+
limit = args.limit
|
|
735
|
+
timeout = args.timeout
|
|
736
|
+
output_csv = args.output_csv
|
|
737
|
+
|
|
738
|
+
accs = _read_lines(uniprot_accs)
|
|
739
|
+
rprint(f"Finding complexes for {len(accs)} uniprot accessions")
|
|
740
|
+
results = search4macromolecular_complexes(accs, limit=limit, timeout=timeout)
|
|
741
|
+
rprint(f"Found {len(results)} complexes, written to {output_csv.name}")
|
|
742
|
+
_write_complexes_csv(results, output_csv)
|
|
743
|
+
|
|
744
|
+
|
|
639
745
|
def _handle_retrieve_pdbe(args):
|
|
640
746
|
pdbe_csv = args.pdbe_csv
|
|
641
747
|
output_dir = args.output_dir
|
|
@@ -875,6 +981,8 @@ HANDLERS: dict[tuple[str, str | None], Callable] = {
|
|
|
875
981
|
("search", "emdb"): _handle_search_emdb,
|
|
876
982
|
("search", "go"): _handle_search_go,
|
|
877
983
|
("search", "taxonomy"): _handle_search_taxonomy,
|
|
984
|
+
("search", "interaction-partners"): _handle_search_interaction_partners,
|
|
985
|
+
("search", "complexes"): _handle_search_complexes,
|
|
878
986
|
("retrieve", "pdbe"): _handle_retrieve_pdbe,
|
|
879
987
|
("retrieve", "alphafold"): _handle_retrieve_alphafold,
|
|
880
988
|
("retrieve", "emdb"): _handle_retrieve_emdb,
|
|
@@ -937,3 +1045,33 @@ def _iter_csv_rows(file: TextIOWrapper) -> Generator[dict[str, str]]:
|
|
|
937
1045
|
|
|
938
1046
|
def _read_column_from_csv(file: TextIOWrapper, column: str) -> set[str]:
|
|
939
1047
|
return {row[column] for row in _iter_csv_rows(file)}
|
|
1048
|
+
|
|
1049
|
+
|
|
1050
|
+
def _write_complexes_csv(complexes: list[ComplexPortalEntry], output_csv: TextIOWrapper) -> None:
|
|
1051
|
+
"""Write ComplexPortal information to a CSV file.
|
|
1052
|
+
|
|
1053
|
+
Args:
|
|
1054
|
+
complexes: List of ComplexPortalEntry objects.
|
|
1055
|
+
output_csv: TextIOWrapper to write the CSV data to.
|
|
1056
|
+
"""
|
|
1057
|
+
writer = csv.writer(output_csv)
|
|
1058
|
+
writer.writerow(
|
|
1059
|
+
[
|
|
1060
|
+
"query_protein",
|
|
1061
|
+
"complex_id",
|
|
1062
|
+
"complex_url",
|
|
1063
|
+
"complex_title",
|
|
1064
|
+
"members",
|
|
1065
|
+
]
|
|
1066
|
+
)
|
|
1067
|
+
for entry in complexes:
|
|
1068
|
+
members_str = ";".join(sorted(entry.members))
|
|
1069
|
+
writer.writerow(
|
|
1070
|
+
[
|
|
1071
|
+
entry.query_protein,
|
|
1072
|
+
entry.complex_id,
|
|
1073
|
+
entry.complex_url,
|
|
1074
|
+
entry.complex_title,
|
|
1075
|
+
members_str,
|
|
1076
|
+
]
|
|
1077
|
+
)
|
protein_quest/converter.py
CHANGED
|
@@ -13,6 +13,7 @@ type PositiveInt = int
|
|
|
13
13
|
converter = make_converter()
|
|
14
14
|
"""cattrs converter to read JSON document or dict to Python objects."""
|
|
15
15
|
converter.register_structure_hook(URL, lambda v, _: URL(v))
|
|
16
|
+
converter.register_unstructure_hook(URL, lambda u: str(u))
|
|
16
17
|
|
|
17
18
|
|
|
18
19
|
@converter.register_structure_hook
|
protein_quest/mcp_server.py
CHANGED
|
@@ -48,7 +48,15 @@ from protein_quest.pdbe.fetch import fetch as pdbe_fetch
|
|
|
48
48
|
from protein_quest.pdbe.io import glob_structure_files, nr_residues_in_chain, write_single_chain_pdb_file
|
|
49
49
|
from protein_quest.ss import filter_file_on_secondary_structure
|
|
50
50
|
from protein_quest.taxonomy import search_taxon
|
|
51
|
-
from protein_quest.uniprot import
|
|
51
|
+
from protein_quest.uniprot import (
|
|
52
|
+
PdbResult,
|
|
53
|
+
Query,
|
|
54
|
+
search4af,
|
|
55
|
+
search4emdb,
|
|
56
|
+
search4macromolecular_complexes,
|
|
57
|
+
search4pdb,
|
|
58
|
+
search4uniprot,
|
|
59
|
+
)
|
|
52
60
|
|
|
53
61
|
mcp = FastMCP("protein-quest")
|
|
54
62
|
|
|
@@ -137,6 +145,7 @@ def search_alphafolds(
|
|
|
137
145
|
|
|
138
146
|
|
|
139
147
|
mcp.tool(search4emdb, name="search_emdb")
|
|
148
|
+
mcp.tool(search4macromolecular_complexes, name="search_macromolecular_complexes")
|
|
140
149
|
|
|
141
150
|
|
|
142
151
|
@mcp.tool
|
protein_quest/ss.py
CHANGED
|
@@ -111,6 +111,26 @@ class SecondaryStructureFilterQuery:
|
|
|
111
111
|
ratio_min_sheet_residues: Ratio | None = None
|
|
112
112
|
ratio_max_sheet_residues: Ratio | None = None
|
|
113
113
|
|
|
114
|
+
def is_actionable(self) -> bool:
|
|
115
|
+
"""Check if the secondary structure query has any actionable filters.
|
|
116
|
+
|
|
117
|
+
Returns:
|
|
118
|
+
True if any of the filters are set, False otherwise.
|
|
119
|
+
"""
|
|
120
|
+
return any(
|
|
121
|
+
field is not None
|
|
122
|
+
for field in [
|
|
123
|
+
self.abs_min_helix_residues,
|
|
124
|
+
self.abs_max_helix_residues,
|
|
125
|
+
self.abs_min_sheet_residues,
|
|
126
|
+
self.abs_max_sheet_residues,
|
|
127
|
+
self.ratio_min_helix_residues,
|
|
128
|
+
self.ratio_max_helix_residues,
|
|
129
|
+
self.ratio_min_sheet_residues,
|
|
130
|
+
self.ratio_max_sheet_residues,
|
|
131
|
+
]
|
|
132
|
+
)
|
|
133
|
+
|
|
114
134
|
|
|
115
135
|
def _check_range(min_val, max_val, label):
|
|
116
136
|
if min_val is not None and max_val is not None and min_val >= max_val:
|
protein_quest/uniprot.py
CHANGED
|
@@ -201,7 +201,7 @@ def _build_sparql_generic_query(select_clause: str, where_clause: str, limit: in
|
|
|
201
201
|
""")
|
|
202
202
|
|
|
203
203
|
|
|
204
|
-
def
|
|
204
|
+
def _build_sparql_generic_by_uniprot_accessions_query(
|
|
205
205
|
uniprot_accs: Iterable[str], select_clause: str, where_clause: str, limit: int = 10_000, groupby_clause=""
|
|
206
206
|
) -> str:
|
|
207
207
|
values = " ".join(f'("{ac}")' for ac in uniprot_accs)
|
|
@@ -269,7 +269,7 @@ def _build_sparql_query_pdb(uniprot_accs: Iterable[str], limit=10_000) -> str:
|
|
|
269
269
|
""")
|
|
270
270
|
|
|
271
271
|
groupby_clause = "?protein ?pdb_db ?pdb_method ?pdb_resolution"
|
|
272
|
-
return
|
|
272
|
+
return _build_sparql_generic_by_uniprot_accessions_query(
|
|
273
273
|
uniprot_accs, select_clause, where_clause, limit, groupby_clause
|
|
274
274
|
)
|
|
275
275
|
|
|
@@ -284,7 +284,7 @@ def _build_sparql_query_af(uniprot_accs: Iterable[str], limit=10_000) -> str:
|
|
|
284
284
|
?protein rdfs:seeAlso ?af_db .
|
|
285
285
|
?af_db up:database <http://purl.uniprot.org/database/AlphaFoldDB> .
|
|
286
286
|
""")
|
|
287
|
-
return
|
|
287
|
+
return _build_sparql_generic_by_uniprot_accessions_query(uniprot_accs, select_clause, dedent(where_clause), limit)
|
|
288
288
|
|
|
289
289
|
|
|
290
290
|
def _build_sparql_query_emdb(uniprot_accs: Iterable[str], limit=10_000) -> str:
|
|
@@ -297,7 +297,7 @@ def _build_sparql_query_emdb(uniprot_accs: Iterable[str], limit=10_000) -> str:
|
|
|
297
297
|
?protein rdfs:seeAlso ?emdb_db .
|
|
298
298
|
?emdb_db up:database <http://purl.uniprot.org/database/EMDB> .
|
|
299
299
|
""")
|
|
300
|
-
return
|
|
300
|
+
return _build_sparql_generic_by_uniprot_accessions_query(uniprot_accs, select_clause, dedent(where_clause), limit)
|
|
301
301
|
|
|
302
302
|
|
|
303
303
|
def _execute_sparql_search(
|
|
@@ -509,3 +509,156 @@ def search4emdb(uniprot_accs: Iterable[str], limit: int = 10_000, timeout: int =
|
|
|
509
509
|
)
|
|
510
510
|
limit_check("Search for EMDB entries on uniprot", limit, len(raw_results))
|
|
511
511
|
return _flatten_results_emdb(raw_results)
|
|
512
|
+
|
|
513
|
+
|
|
514
|
+
def _build_complex_sparql_query(uniprot_accs: Iterable[str], limit: int) -> str:
|
|
515
|
+
"""Builds a SPARQL query to retrieve ComplexPortal information for given UniProt accessions.
|
|
516
|
+
|
|
517
|
+
Example:
|
|
518
|
+
|
|
519
|
+
```sparql
|
|
520
|
+
PREFIX up: <http://purl.uniprot.org/core/>
|
|
521
|
+
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
|
|
522
|
+
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
|
|
523
|
+
|
|
524
|
+
SELECT
|
|
525
|
+
?protein
|
|
526
|
+
?cp_db
|
|
527
|
+
?cp_comment
|
|
528
|
+
(GROUP_CONCAT(DISTINCT ?member; separator=",") AS ?complex_members)
|
|
529
|
+
(COUNT(DISTINCT ?member) AS ?member_count)
|
|
530
|
+
WHERE {
|
|
531
|
+
# Input UniProt accessions
|
|
532
|
+
VALUES (?ac) { ("P05067") ("P60709") ("Q05471")}
|
|
533
|
+
BIND (IRI(CONCAT("http://purl.uniprot.org/uniprot/", ?ac)) AS ?protein)
|
|
534
|
+
|
|
535
|
+
# ComplexPortal cross-reference for each input protein
|
|
536
|
+
?protein a up:Protein ;
|
|
537
|
+
rdfs:seeAlso ?cp_db .
|
|
538
|
+
?cp_db up:database <http://purl.uniprot.org/database/ComplexPortal> .
|
|
539
|
+
OPTIONAL { ?cp_db rdfs:comment ?cp_comment . }
|
|
540
|
+
|
|
541
|
+
# All member proteins of the same ComplexPortal complex
|
|
542
|
+
?member a up:Protein ;
|
|
543
|
+
rdfs:seeAlso ?cp_db .
|
|
544
|
+
}
|
|
545
|
+
GROUP BY ?protein ?cp_db ?cp_comment
|
|
546
|
+
ORDER BY ?protein ?cp_db
|
|
547
|
+
LIMIT 500
|
|
548
|
+
```
|
|
549
|
+
|
|
550
|
+
"""
|
|
551
|
+
select_clause = dedent("""\
|
|
552
|
+
?protein ?cp_db ?cp_comment
|
|
553
|
+
(GROUP_CONCAT(DISTINCT ?member; separator=",") AS ?complex_members)
|
|
554
|
+
""")
|
|
555
|
+
where_clause = dedent("""
|
|
556
|
+
# --- Complex Info ---
|
|
557
|
+
?protein a up:Protein ;
|
|
558
|
+
rdfs:seeAlso ?cp_db .
|
|
559
|
+
?cp_db up:database <http://purl.uniprot.org/database/ComplexPortal> .
|
|
560
|
+
OPTIONAL { ?cp_db rdfs:comment ?cp_comment . }
|
|
561
|
+
# All member proteins of the same ComplexPortal complex
|
|
562
|
+
?member a up:Protein ;
|
|
563
|
+
rdfs:seeAlso ?cp_db .
|
|
564
|
+
""")
|
|
565
|
+
group_by = dedent("""
|
|
566
|
+
?protein ?cp_db ?cp_comment
|
|
567
|
+
""")
|
|
568
|
+
return _build_sparql_generic_by_uniprot_accessions_query(
|
|
569
|
+
uniprot_accs, select_clause, where_clause, limit, groupby_clause=group_by
|
|
570
|
+
)
|
|
571
|
+
|
|
572
|
+
|
|
573
|
+
@dataclass(frozen=True)
|
|
574
|
+
class ComplexPortalEntry:
|
|
575
|
+
"""A ComplexPortal entry.
|
|
576
|
+
|
|
577
|
+
Parameters:
|
|
578
|
+
query_protein: The UniProt accession used to find entry.
|
|
579
|
+
complex_id: The ComplexPortal identifier (for example "CPX-1234").
|
|
580
|
+
complex_url: The URL to the ComplexPortal entry.
|
|
581
|
+
complex_title: The title of the complex.
|
|
582
|
+
members: UniProt accessions which are members of the complex.
|
|
583
|
+
"""
|
|
584
|
+
|
|
585
|
+
query_protein: str
|
|
586
|
+
complex_id: str
|
|
587
|
+
complex_url: str
|
|
588
|
+
complex_title: str
|
|
589
|
+
members: set[str]
|
|
590
|
+
|
|
591
|
+
|
|
592
|
+
def _flatten_results_complex(raw_results) -> list[ComplexPortalEntry]:
|
|
593
|
+
results = []
|
|
594
|
+
for raw_result in raw_results:
|
|
595
|
+
query_protein = raw_result["protein"]["value"].split("/")[-1]
|
|
596
|
+
complex_id = raw_result["cp_db"]["value"].split("/")[-1]
|
|
597
|
+
complex_url = f"https://www.ebi.ac.uk/complexportal/complex/{complex_id}"
|
|
598
|
+
complex_title = raw_result.get("cp_comment", {}).get("value", "")
|
|
599
|
+
members = {m.split("/")[-1] for m in raw_result["complex_members"]["value"].split(",")}
|
|
600
|
+
results.append(
|
|
601
|
+
ComplexPortalEntry(
|
|
602
|
+
query_protein=query_protein,
|
|
603
|
+
complex_id=complex_id,
|
|
604
|
+
complex_url=complex_url,
|
|
605
|
+
complex_title=complex_title,
|
|
606
|
+
members=members,
|
|
607
|
+
)
|
|
608
|
+
)
|
|
609
|
+
return results
|
|
610
|
+
|
|
611
|
+
|
|
612
|
+
def search4macromolecular_complexes(
|
|
613
|
+
uniprot_accs: Iterable[str], limit: int = 10_000, timeout: int = 1_800
|
|
614
|
+
) -> list[ComplexPortalEntry]:
|
|
615
|
+
"""Search for macromolecular complexes by UniProtKB accessions.
|
|
616
|
+
|
|
617
|
+
Queries for references to/from https://www.ebi.ac.uk/complexportal/ database in the Uniprot SPARQL endpoint.
|
|
618
|
+
|
|
619
|
+
Args:
|
|
620
|
+
uniprot_accs: UniProt accessions.
|
|
621
|
+
limit: Maximum number of results to return.
|
|
622
|
+
timeout: Timeout for the SPARQL query in seconds.
|
|
623
|
+
|
|
624
|
+
Returns:
|
|
625
|
+
List of ComplexPortalEntry objects.
|
|
626
|
+
"""
|
|
627
|
+
sparql_query = _build_complex_sparql_query(uniprot_accs, limit)
|
|
628
|
+
logger.info("Executing SPARQL query for macromolecular complexes: %s", sparql_query)
|
|
629
|
+
raw_results = _execute_sparql_search(
|
|
630
|
+
sparql_query=sparql_query,
|
|
631
|
+
timeout=timeout,
|
|
632
|
+
)
|
|
633
|
+
limit_check("Search for complexes", limit, len(raw_results))
|
|
634
|
+
return _flatten_results_complex(raw_results)
|
|
635
|
+
|
|
636
|
+
|
|
637
|
+
def search4interaction_partners(
|
|
638
|
+
uniprot_acc: str, excludes: set[str] | None = None, limit: int = 10_000, timeout: int = 1_800
|
|
639
|
+
) -> dict[str, set[str]]:
|
|
640
|
+
"""Search for interaction partners of a given UniProt accession using ComplexPortal database references.
|
|
641
|
+
|
|
642
|
+
Args:
|
|
643
|
+
uniprot_acc: UniProt accession to search interaction partners for.
|
|
644
|
+
excludes: Set of UniProt accessions to exclude from the results.
|
|
645
|
+
For example already known interaction partners.
|
|
646
|
+
If None then no complex members are excluded.
|
|
647
|
+
limit: Maximum number of results to return.
|
|
648
|
+
timeout: Timeout for the SPARQL query in seconds.
|
|
649
|
+
|
|
650
|
+
Returns:
|
|
651
|
+
Dictionary with UniProt accessions of interaction partners as keys and sets of ComplexPortal entry IDs
|
|
652
|
+
in which the interaction occurs as values.
|
|
653
|
+
"""
|
|
654
|
+
ucomplexes = search4macromolecular_complexes([uniprot_acc], limit=limit, timeout=timeout)
|
|
655
|
+
hits: dict[str, set[str]] = {}
|
|
656
|
+
if excludes is None:
|
|
657
|
+
excludes = set()
|
|
658
|
+
for ucomplex in ucomplexes:
|
|
659
|
+
for member in ucomplex.members:
|
|
660
|
+
if member != uniprot_acc and member not in excludes:
|
|
661
|
+
if member not in hits:
|
|
662
|
+
hits[member] = set()
|
|
663
|
+
hits[member].add(ucomplex.complex_id)
|
|
664
|
+
return hits
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: protein_quest
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.0
|
|
4
4
|
Summary: Search/retrieve/filter proteins and protein structures
|
|
5
5
|
Project-URL: Homepage, https://github.com/haddocking/protein-quest
|
|
6
6
|
Project-URL: Issues, https://github.com/haddocking/protein-quest/issues
|
|
@@ -56,12 +56,14 @@ graph TB;
|
|
|
56
56
|
searchuniprot --> |uniprot_accessions|searchpdbe[/Search PDBe/]
|
|
57
57
|
searchuniprot --> |uniprot_accessions|searchaf[/Search Alphafold/]
|
|
58
58
|
searchuniprot -. uniprot_accessions .-> searchemdb[/Search EMDB/]
|
|
59
|
+
searchintactionpartners[/Search interaction partners/] -.-x |uniprot_accessions|searchuniprot
|
|
60
|
+
searchcomplexes[/Search complexes/]
|
|
59
61
|
searchpdbe -->|pdb_ids|fetchpdbe[Retrieve PDBe]
|
|
60
62
|
searchaf --> |uniprot_accessions|fetchad(Retrieve AlphaFold)
|
|
61
63
|
searchemdb -. emdb_ids .->fetchemdb[Retrieve EMDB]
|
|
62
|
-
fetchpdbe -->|
|
|
64
|
+
fetchpdbe -->|mmcif_files| chainfilter{{Filter on chain of uniprot}}
|
|
63
65
|
chainfilter --> |mmcif_files| residuefilter{{Filter on chain length}}
|
|
64
|
-
fetchad -->|
|
|
66
|
+
fetchad -->|mmcif_files| confidencefilter{{Filter out low confidence}}
|
|
65
67
|
confidencefilter --> |mmcif_files| ssfilter{{Filter on secondary structure}}
|
|
66
68
|
residuefilter --> |mmcif_files| ssfilter
|
|
67
69
|
classDef dashedBorder stroke-dasharray: 5 5;
|
|
@@ -69,6 +71,8 @@ graph TB;
|
|
|
69
71
|
taxonomy:::dashedBorder
|
|
70
72
|
searchemdb:::dashedBorder
|
|
71
73
|
fetchemdb:::dashedBorder
|
|
74
|
+
searchintactionpartners:::dashedBorder
|
|
75
|
+
searchcomplexes:::dashedBorder
|
|
72
76
|
```
|
|
73
77
|
|
|
74
78
|
(Dotted nodes and edges are side-quests.)
|
|
@@ -204,6 +208,32 @@ You can use following command to search for a Gene Ontology (GO) term.
|
|
|
204
208
|
protein-quest search go --limit 5 --aspect cellular_component apoptosome -
|
|
205
209
|
```
|
|
206
210
|
|
|
211
|
+
### Search for interaction partners
|
|
212
|
+
|
|
213
|
+
Use https://www.ebi.ac.uk/complexportal to find interaction partners of given UniProt accession.
|
|
214
|
+
|
|
215
|
+
```shell
|
|
216
|
+
protein-quest search interaction-partners Q05471 interaction-partners-of-Q05471.txt
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
The `interaction-partners-of-Q05471.txt` file contains uniprot accessions (one per line).
|
|
220
|
+
|
|
221
|
+
### Search for complexes
|
|
222
|
+
|
|
223
|
+
Given Uniprot accessions search for macromolecular complexes at https://www.ebi.ac.uk/complexportal
|
|
224
|
+
and return the complex entries and their members.
|
|
225
|
+
|
|
226
|
+
```shell
|
|
227
|
+
echo Q05471 | protein-quest search complexes - complexes.csv
|
|
228
|
+
```
|
|
229
|
+
|
|
230
|
+
The `complexes.csv` looks like
|
|
231
|
+
|
|
232
|
+
```csv
|
|
233
|
+
query_protein,complex_id,complex_url,complex_title,members
|
|
234
|
+
Q05471,CPX-2122,https://www.ebi.ac.uk/complexportal/complex/CPX-2122,Swr1 chromatin remodelling complex,P31376;P35817;P38326;P53201;P53930;P60010;P80428;Q03388;Q03433;Q03940;Q05471;Q06707;Q12464;Q12509
|
|
235
|
+
```
|
|
236
|
+
|
|
207
237
|
## Model Context Protocol (MCP) server
|
|
208
238
|
|
|
209
239
|
Protein quest can also help LLMs like Claude Sonnet 4 by providing a [set of tools](https://modelcontextprotocol.io/docs/learn/server-concepts#tools-ai-actions) for protein structures.
|
|
@@ -1,16 +1,16 @@
|
|
|
1
1
|
protein_quest/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
protein_quest/__version__.py,sha256=
|
|
3
|
-
protein_quest/cli.py,sha256=
|
|
4
|
-
protein_quest/converter.py,sha256=
|
|
2
|
+
protein_quest/__version__.py,sha256=je7v2gXyxr6yRVCFAS0wS-iABSLJOuCb-IPR-x90UAU,56
|
|
3
|
+
protein_quest/cli.py,sha256=9Cgvn5BXbrAloIU2KCiFxLxJSyAoa2RLdmuB0HGsUJM,43078
|
|
4
|
+
protein_quest/converter.py,sha256=Y-Oxf7lDNbEicL6GS-IpNWDwaAiHgIgs5bFAcEHCKdQ,1441
|
|
5
5
|
protein_quest/emdb.py,sha256=QEeU0VJQ4lLM-o5yAU3QZlrtzDZNgnC5fCjlqPtTyAY,1370
|
|
6
6
|
protein_quest/filters.py,sha256=-gasSXR4g5SzYSYbkfcDwR-tm2KCAhCMdpIVJrUPR1w,5224
|
|
7
7
|
protein_quest/go.py,sha256=lZNEcw8nTc9wpV3cl4y2FG9Lsj8wsXQ6zemmAQs_DWE,5650
|
|
8
|
-
protein_quest/mcp_server.py,sha256=
|
|
8
|
+
protein_quest/mcp_server.py,sha256=CXw5rTStunXdAVQ3BWPXy19zmgQGwV5uPcWlN1HF9do,7389
|
|
9
9
|
protein_quest/parallel.py,sha256=ZJrLO1t2HXs4EeNctytvBTyROPBq-4-gLf35PiolHf0,3468
|
|
10
10
|
protein_quest/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
11
|
-
protein_quest/ss.py,sha256=
|
|
11
|
+
protein_quest/ss.py,sha256=qOr0aMycNAtZmXXvhCN-KZH3Qp4EejnBcE6fsFgCrmY,10343
|
|
12
12
|
protein_quest/taxonomy.py,sha256=4mKv8zll4mX02Ow8CTvyqMJE2KJZvcq3QlTjjjLOJJk,5072
|
|
13
|
-
protein_quest/uniprot.py,sha256=
|
|
13
|
+
protein_quest/uniprot.py,sha256=DIwQYzWZREZ7SGhkJT4Ozgl36pdz47FNfZ1QoEgEaXE,24239
|
|
14
14
|
protein_quest/utils.py,sha256=z4PPPcog6nvPhA93DWVf7stv5uJ4h_2BP5owdhoO5mo,5626
|
|
15
15
|
protein_quest/alphafold/__init__.py,sha256=Ktasi5BRp71wO7-PpOGDpIRRtBEefs8knIdlKQeLQpk,51
|
|
16
16
|
protein_quest/alphafold/confidence.py,sha256=pYIuwYdkuPuHLagcX1dSvSyZ_84xboRLfHUxkEoc4MY,6766
|
|
@@ -19,8 +19,8 @@ protein_quest/alphafold/fetch.py,sha256=iFHORaO-2NvPwmpm33tfOFUcSJx8mBGwMXxwc4bR
|
|
|
19
19
|
protein_quest/pdbe/__init__.py,sha256=eNNHtN60NAGea7gvRkIzkoTXsYPK99s-ldIcKWYO6So,61
|
|
20
20
|
protein_quest/pdbe/fetch.py,sha256=tlCrWoaOrwxnQFrf-PnimUUa6lmtHwwysS51efYsBcA,2379
|
|
21
21
|
protein_quest/pdbe/io.py,sha256=iGLvmsD-eEYnrgZDYfkGWIDCzwDRRD5dwqB480talCs,10037
|
|
22
|
-
protein_quest-0.
|
|
23
|
-
protein_quest-0.
|
|
24
|
-
protein_quest-0.
|
|
25
|
-
protein_quest-0.
|
|
26
|
-
protein_quest-0.
|
|
22
|
+
protein_quest-0.4.0.dist-info/METADATA,sha256=y5DAnM4mhSincjslsvQZ4zk1QcMysGmnsBltK_Vz4MQ,8842
|
|
23
|
+
protein_quest-0.4.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
24
|
+
protein_quest-0.4.0.dist-info/entry_points.txt,sha256=f1RtOxv9TFBO3w01EMEuFXBTMsqKsQcKlkxmj9zE-0g,57
|
|
25
|
+
protein_quest-0.4.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
26
|
+
protein_quest-0.4.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|