protein-quest 0.10.1__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,2 +1,2 @@
1
- __version__ = "0.10.1"
1
+ __version__ = "1.1.0"
2
2
  """The version of the package."""
protein_quest/cli.py CHANGED
@@ -8,6 +8,7 @@ import os
8
8
  import sys
9
9
  from collections.abc import Callable, Generator, Iterable, Sequence
10
10
  from contextlib import suppress
11
+ from functools import lru_cache
11
12
  from importlib.util import find_spec
12
13
  from io import BytesIO, TextIOWrapper
13
14
  from pathlib import Path
@@ -20,6 +21,7 @@ from rich.logging import RichHandler
20
21
  from rich.markdown import Markdown
21
22
  from rich.panel import Panel
22
23
  from rich_argparse import ArgumentDefaultsRichHelpFormatter
24
+ from rocrate_action_recorder import recorded_argparse
23
25
  from tqdm.rich import tqdm
24
26
 
25
27
  from protein_quest.__version__ import __version__
@@ -797,12 +799,18 @@ def _add_mcp_command(subparsers: argparse._SubParsersAction):
797
799
  parser.add_argument("--port", default=8000, type=int, help="Port to bind the server to")
798
800
 
799
801
 
802
+ @lru_cache(maxsize=1)
800
803
  def make_parser() -> argparse.ArgumentParser:
801
804
  parser = argparse.ArgumentParser(
802
805
  description="Protein Quest CLI", prog="protein-quest", formatter_class=ArgumentDefaultsRichHelpFormatter
803
806
  )
804
807
  parser.add_argument("--log-level", default="WARNING", choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"])
805
808
  parser.add_argument("--version", action="version", version=f"%(prog)s {__version__}")
809
+ parser.add_argument(
810
+ "--prov",
811
+ action="store_true",
812
+ help="Whether to write provenance information about the command execution to ro-crate-metadata.json file.",
813
+ )
806
814
  shtab.add_argument_to(parser, ["--print-completion"])
807
815
 
808
816
  subparsers = parser.add_subparsers(dest="command", required=True)
@@ -824,7 +832,26 @@ def _name_of(file: TextIOWrapper | BytesIO) -> str:
824
832
  return "<stdout>"
825
833
 
826
834
 
827
- def _handle_search_uniprot(args):
835
+ def prov(
836
+ input_dirs: list[str] | None = None,
837
+ output_dirs: list[str] | None = None,
838
+ input_files: list[str] | None = None,
839
+ output_files: list[str] | None = None,
840
+ ):
841
+ """Decorator to record provenance for protein-quest commands."""
842
+ return recorded_argparse(
843
+ parser=make_parser(),
844
+ input_dirs=input_dirs,
845
+ output_dirs=output_dirs,
846
+ input_files=input_files,
847
+ output_files=output_files,
848
+ enabled_argument="prov",
849
+ dataset_license="CC BY 4.0",
850
+ )
851
+
852
+
853
+ @prov(output_files=["output"])
854
+ def _handle_search_uniprot(args: argparse.Namespace):
828
855
  taxon_id = args.taxon_id
829
856
  reviewed = args.reviewed
830
857
  subcellular_location_uniprot = args.subcellular_location_uniprot
@@ -854,7 +881,8 @@ def _handle_search_uniprot(args):
854
881
  _write_lines(output_file, sorted(accs))
855
882
 
856
883
 
857
- def _handle_search_pdbe(args):
884
+ @prov(input_files=["uniprot_accessions"], output_files=["output_csv"])
885
+ def _handle_search_pdbe(args: argparse.Namespace):
858
886
  uniprot_accessions = args.uniprot_accessions
859
887
  limit = args.limit
860
888
  timeout = args.timeout
@@ -884,6 +912,7 @@ def _handle_search_pdbe(args):
884
912
  rprint(f"Written to {_name_of(output_csv)}")
885
913
 
886
914
 
915
+ @prov(input_files=["uniprot_accessions"], output_files=["output_csv"])
887
916
  def _handle_search_alphafold(args):
888
917
  uniprot_accessions = args.uniprot_accessions
889
918
  min_sequence_length = converter.structure(args.min_sequence_length, PositiveInt | None) # pyright: ignore[reportArgumentType]
@@ -905,6 +934,7 @@ def _handle_search_alphafold(args):
905
934
  _write_dict_of_sets2csv(output_csv, results, "af_id")
906
935
 
907
936
 
937
+ @prov(input_files=["uniprot_accessions"], output_files=["output_csv"])
908
938
  def _handle_search_emdb(args):
909
939
  uniprot_accessions = args.uniprot_accessions
910
940
  limit = args.limit
@@ -919,6 +949,7 @@ def _handle_search_emdb(args):
919
949
  _write_dict_of_sets2csv(output_csv, results, "emdb_id")
920
950
 
921
951
 
952
+ @prov(output_files=["output_csv"])
922
953
  def _handle_search_go(args):
923
954
  term = structure(args.term, str)
924
955
  aspect: Aspect | None = args.aspect
@@ -934,6 +965,7 @@ def _handle_search_go(args):
934
965
  write_go_terms_to_csv(results, output_csv)
935
966
 
936
967
 
968
+ @prov(output_files=["output_csv"])
937
969
  def _handle_search_taxonomy(args):
938
970
  query: str = args.query
939
971
  field: SearchField | None = args.field
@@ -949,6 +981,7 @@ def _handle_search_taxonomy(args):
949
981
  _write_taxonomy_csv(results, output_csv)
950
982
 
951
983
 
984
+ @prov(input_files=["uniprot_accession"], output_files=["output_csv"])
952
985
  def _handle_search_interaction_partners(args: argparse.Namespace):
953
986
  uniprot_accession: str = args.uniprot_accession
954
987
  excludes: set[str] = set(args.exclude) if args.exclude else set()
@@ -962,6 +995,7 @@ def _handle_search_interaction_partners(args: argparse.Namespace):
962
995
  _write_lines(output_csv, results.keys())
963
996
 
964
997
 
998
+ @prov(input_files=["uniprot_accessions"], output_files=["output_csv"])
965
999
  def _handle_search_complexes(args: argparse.Namespace):
966
1000
  uniprot_accessions = args.uniprot_accessions
967
1001
  limit = args.limit
@@ -975,6 +1009,7 @@ def _handle_search_complexes(args: argparse.Namespace):
975
1009
  _write_complexes_csv(results, output_csv)
976
1010
 
977
1011
 
1012
+ @prov(input_files=["uniprot_accessions"], output_files=["output_csv"])
978
1013
  def _handle_search_uniprot_details(args: argparse.Namespace):
979
1014
  uniprot_accessions = args.uniprot_accessions
980
1015
  timeout = args.timeout
@@ -997,6 +1032,7 @@ def _initialize_cacher(args: argparse.Namespace) -> Cacher:
997
1032
  )
998
1033
 
999
1034
 
1035
+ @prov(input_files=["pdbe_csv"], output_dirs=["output_dir"])
1000
1036
  def _handle_retrieve_pdbe(args: argparse.Namespace):
1001
1037
  pdbe_csv = args.pdbe_csv
1002
1038
  output_dir = args.output_dir
@@ -1011,6 +1047,7 @@ def _handle_retrieve_pdbe(args: argparse.Namespace):
1011
1047
  rprint(f"Retrieved {len(result)} PDBe entries")
1012
1048
 
1013
1049
 
1050
+ @prov(input_files=["alphafold_csv"], output_dirs=["output_dir"])
1014
1051
  def _handle_retrieve_alphafold(args):
1015
1052
  download_dir = args.output_dir
1016
1053
  raw_formats = args.format
@@ -1042,6 +1079,7 @@ def _handle_retrieve_alphafold(args):
1042
1079
  rprint(f"Retrieved {total_nr_files} AlphaFold files and {len(afs)} summaries, written to {download_dir}")
1043
1080
 
1044
1081
 
1082
+ @prov(input_files=["emdb_csv"], output_dirs=["output_dir"])
1045
1083
  def _handle_retrieve_emdb(args):
1046
1084
  emdb_csv = args.emdb_csv
1047
1085
  output_dir = args.output_dir
@@ -1053,6 +1091,7 @@ def _handle_retrieve_emdb(args):
1053
1091
  rprint(f"Retrieved {len(result)} EMDB entries")
1054
1092
 
1055
1093
 
1094
+ @prov(input_dirs=["input_dir"], output_dirs=["output_dir"], output_files=["write_stats"])
1056
1095
  def _handle_filter_confidence(args: argparse.Namespace):
1057
1096
  # we are repeating types here and in add_argument call
1058
1097
  # TODO replace argparse with modern alternative like cyclopts
@@ -1097,6 +1136,7 @@ def _handle_filter_confidence(args: argparse.Namespace):
1097
1136
  rprint(f"Statistics written to {_name_of(stats_file)}")
1098
1137
 
1099
1138
 
1139
+ @prov(input_dirs=["input_dir"], output_dirs=["output_dir"], output_files=["write_stats"])
1100
1140
  def _handle_filter_chain(args):
1101
1141
  input_dir = args.input_dir
1102
1142
  output_dir = structure(args.output_dir, Path)
@@ -1140,6 +1180,7 @@ def _handle_filter_chain(args):
1140
1180
  rprint(f"[red]Discarding {result.input_file} ({result.discard_reason})[/red]")
1141
1181
 
1142
1182
 
1183
+ @prov(input_dirs=["input_dir"], output_dirs=["output_dir"], output_files=["write_stats"])
1143
1184
  def _handle_filter_residue(args):
1144
1185
  input_dir = structure(args.input_dir, Path)
1145
1186
  output_dir = structure(args.output_dir, Path)
@@ -1169,6 +1210,7 @@ def _handle_filter_residue(args):
1169
1210
  rprint(f"Statistics written to {_name_of(stats_file)}")
1170
1211
 
1171
1212
 
1213
+ @prov(input_dirs=["input_dir"], output_dirs=["output_dir"], output_files=["write_stats"])
1172
1214
  def _handle_filter_ss(args):
1173
1215
  input_dir = structure(args.input_dir, Path)
1174
1216
  output_dir = structure(args.output_dir, Path)
@@ -1236,7 +1278,7 @@ def _handle_mcp(args):
1236
1278
  msg = "Unable to start MCP server, please install `protein-quest[mcp]`."
1237
1279
  raise ImportError(msg)
1238
1280
 
1239
- from protein_quest.mcp_server import mcp # noqa: PLC0415
1281
+ from protein_quest.mcp_server import mcp # noqa: PLC0415 fastmcp is an extra dependency
1240
1282
 
1241
1283
  if args.transport == "stdio":
1242
1284
  mcp.run(transport=args.transport)
@@ -1244,6 +1286,7 @@ def _handle_mcp(args):
1244
1286
  mcp.run(transport=args.transport, host=args.host, port=args.port)
1245
1287
 
1246
1288
 
1289
+ @prov(input_dirs=["input_dir"], output_files=["output"])
1247
1290
  def _handle_convert_uniprot(args):
1248
1291
  input_dir = structure(args.input_dir, Path)
1249
1292
  output_file: TextIOWrapper = args.output
@@ -1264,6 +1307,7 @@ def _handle_convert_uniprot(args):
1264
1307
  _write_lines(output_file, sorted(uniprot_accessions))
1265
1308
 
1266
1309
 
1310
+ @prov(input_dirs=["input_dir"], output_dirs=["output_dir"])
1267
1311
  def _handle_convert_structures(args):
1268
1312
  input_dir = structure(args.input_dir, Path)
1269
1313
  output_dir = input_dir if args.output_dir is None else structure(args.output_dir, Path)
@@ -13,7 +13,7 @@ type PositiveInt = int
13
13
  converter = make_converter()
14
14
  """cattrs converter to read JSON document or dict to Python objects."""
15
15
  converter.register_structure_hook(URL, lambda v, _: URL(v))
16
- converter.register_unstructure_hook(URL, lambda u: str(u))
16
+ converter.register_unstructure_hook(URL, str)
17
17
 
18
18
 
19
19
  @converter.register_structure_hook
@@ -7,8 +7,8 @@ Can be run with:
7
7
  fastmcp dev src/protein_quest/mcp_server.py
8
8
  # or from inspector
9
9
  npx @modelcontextprotocol/inspector
10
- # tranport type: stdio
11
- # comand: protein-quest
10
+ # transport type: stdio
11
+ # command: protein-quest
12
12
  # arguments: mcp
13
13
 
14
14
  # or with server and inspector
protein_quest/parallel.py CHANGED
@@ -86,12 +86,15 @@ def _configure_cpu_dask_scheduler(nproc: int, name: str) -> LocalCluster:
86
86
  return LocalCluster(name=name, threads_per_worker=1, n_workers=n_workers)
87
87
 
88
88
 
89
- # Generic type parameters used across helpers
90
- P = ParamSpec("P")
89
+ class MyProgressBar(ProgressBar):
90
+ """Show progress of Dask computations.
91
91
 
92
+ Copy of distributed.diagnostics.progressbar.TextProgressBar that:
92
93
 
93
- class _StderrTextProgressBar(ProgressBar):
94
- """Copy of distributed.diagnostics.progressbar.TextProgressBar that prints to stderr instead of stdout."""
94
+ - prints to stderr instead of stdout
95
+ - Can have its interval (in seconds) set with `TQDM_MININTERVAL` environment variable
96
+
97
+ """
95
98
 
96
99
  __loop: IOLoop | None = None
97
100
 
@@ -107,6 +110,11 @@ class _StderrTextProgressBar(ProgressBar):
107
110
  **kwargs, # noqa: ARG002
108
111
  ):
109
112
  self._loop_runner = loop_runner = LoopRunner(loop=loop)
113
+ if interval == "100ms":
114
+ interval_env = os.getenv("TQDM_MININTERVAL")
115
+ if interval_env is not None:
116
+ interval = interval_env + "s"
117
+
110
118
  super().__init__(keys, scheduler, interval, complete)
111
119
  self.width = width
112
120
 
@@ -144,6 +152,10 @@ class _StderrTextProgressBar(ProgressBar):
144
152
  sys.stderr.flush()
145
153
 
146
154
 
155
+ # Generic type parameters used across helpers
156
+ P = ParamSpec("P")
157
+
158
+
147
159
  def dask_map_with_progress[T, R, **P](
148
160
  client: Client,
149
161
  func: Callable[Concatenate[T, P], R],
@@ -154,6 +166,10 @@ def dask_map_with_progress[T, R, **P](
154
166
  """
155
167
  Wrapper for map, progress, and gather of Dask that returns a correctly typed list.
156
168
 
169
+ Environment variables:
170
+ - Set interval (in seconds) of progress updates with `TQDM_MININTERVAL`
171
+ - Disabled by setting `TQDM_DISABLE` to any value
172
+
157
173
  Args:
158
174
  client: Dask client.
159
175
  func: Function to map; first parameter comes from ``iterable`` and any
@@ -169,6 +185,7 @@ def dask_map_with_progress[T, R, **P](
169
185
  if client.dashboard_link:
170
186
  logger.info(f"Follow progress on dask dashboard at: {client.dashboard_link}")
171
187
  futures = client.map(func, iterable, *args, **kwargs)
172
- _StderrTextProgressBar(futures)
188
+ if not os.getenv("TQDM_DISABLE"):
189
+ MyProgressBar(futures)
173
190
  results = client.gather(futures)
174
191
  return cast("list[R]", results)
protein_quest/uniprot.py CHANGED
@@ -332,7 +332,7 @@ def _build_sparql_generic_by_uniprot_accessions_query(
332
332
 
333
333
  def _build_sparql_query_uniprot(query: Query, limit=10_000) -> str:
334
334
  dynamic_triples = _query2dynamic_sparql_triples(query)
335
- # TODO add usefull columns that have 1:1 mapping to protein
335
+ # TODO add useful columns that have 1:1 mapping to protein
336
336
  # like uniprot_id with `?protein up:mnemonic ?mnemonic .`
337
337
  # and sequence, take care to take first isoform
338
338
  # ?protein up:sequence ?isoform .
@@ -1,12 +1,27 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: protein_quest
3
- Version: 0.10.1
3
+ Version: 1.1.0
4
4
  Summary: Search/retrieve/filter proteins and protein structures
5
5
  Project-URL: Homepage, https://github.com/haddocking/protein-quest
6
6
  Project-URL: Issues, https://github.com/haddocking/protein-quest/issues
7
7
  Project-URL: Documentation, https://www.bonvinlab.org/protein-quest/
8
8
  Project-URL: Source, https://github.com/haddocking/protein-quest
9
9
  License-File: LICENSE
10
+ Keywords: alphafold,mmcif,pdb,protein,protein structure,uniprot
11
+ Classifier: Development Status :: 5 - Production/Stable
12
+ Classifier: Environment :: Console
13
+ Classifier: Framework :: AsyncIO
14
+ Classifier: Intended Audience :: Science/Research
15
+ Classifier: License :: OSI Approved :: Apache Software License
16
+ Classifier: Natural Language :: English
17
+ Classifier: Operating System :: MacOS
18
+ Classifier: Operating System :: POSIX
19
+ Classifier: Operating System :: POSIX :: Linux
20
+ Classifier: Programming Language :: Python :: 3.13
21
+ Classifier: Programming Language :: Python :: 3.14
22
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
23
+ Classifier: Topic :: Scientific/Engineering :: Chemistry
24
+ Classifier: Typing :: Typed
10
25
  Requires-Python: >=3.13
11
26
  Requires-Dist: aiofiles>=24.1.0
12
27
  Requires-Dist: aiohttp-retry>=2.9.1
@@ -21,6 +36,7 @@ Requires-Dist: platformdirs>=4.3.8
21
36
  Requires-Dist: psutil>=7.0.0
22
37
  Requires-Dist: rich-argparse>=1.7.1
23
38
  Requires-Dist: rich>=14.0.0
39
+ Requires-Dist: rocrate-action-recorder>=0.2.0
24
40
  Requires-Dist: shtab>=1.7.2
25
41
  Requires-Dist: sparqlwrapper>=2.0.0
26
42
  Requires-Dist: tqdm>=4.67.1
@@ -38,19 +54,22 @@ Description-Content-Type: text/markdown
38
54
  [![bio.tools](https://img.shields.io/badge/bio.tools-protein--quest-009fdf.svg)](https://bio.tools/protein-quest)
39
55
  [![PyPI](https://img.shields.io/pypi/v/protein-quest)](https://pypi.org/project/protein-quest/)
40
56
  [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.16941288.svg)](https://doi.org/10.5281/zenodo.16941288)
57
+ [![Poster DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.17910832.svg)](https://doi.org/10.5281/zenodo.17910832)
41
58
  [![Codacy Badge](https://app.codacy.com/project/badge/Coverage/7a3f3f1fe64640d583a5e50fe7ba828e)](https://app.codacy.com/gh/haddocking/protein-quest/coverage?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_coverage)
42
59
  [![FAIR checklist badge](https://fairsoftwarechecklist.net/badge.svg)](https://fairsoftwarechecklist.net/v0.2?f=31&a=32113&i=32121&r=133)
43
60
  [![fair-software.eu](https://img.shields.io/badge/fair--software.eu-%E2%97%8F%20%20%E2%97%8F%20%20%E2%97%8F%20%20%E2%97%8F%20%20%E2%97%8F-green)](https://fair-software.eu)
44
61
  [![Copy/paste detector](https://raw.githubusercontent.com/kucherenko/jscpd/refs/tags/v3.5.10/assets/jscpd-badge.svg?sanitize=true)](https://github.com/kucherenko/jscpd/)
45
62
 
46
-
47
63
  Python package to search/retrieve/filter proteins and protein structures.
48
64
 
49
65
  It uses
50
66
 
51
- - [Uniprot Sparql endpoint](https://sparql.uniprot.org/) to search for proteins and their measured or predicted 3D structures.
52
- - [Uniprot taxonomy](https://www.uniprot.org/taxonomy?query=*) to search for taxonomy.
53
- - [QuickGO](https://www.ebi.ac.uk/QuickGO/api/index.html) to search for Gene Ontology terms.
67
+ - [Uniprot Sparql endpoint](https://sparql.uniprot.org/) to search for proteins
68
+ and their measured or predicted 3D structures.
69
+ - [Uniprot taxonomy](https://www.uniprot.org/taxonomy?query=*) to search for
70
+ taxonomy.
71
+ - [QuickGO](https://www.ebi.ac.uk/QuickGO/api/index.html) to search for Gene
72
+ Ontology terms.
54
73
  - [gemmi](https://project-gemmi.github.io/) to work with macromolecular models.
55
74
  - [dask-distributed](https://docs.dask.org/en/latest/) to compute in parallel.
56
75
 
@@ -101,18 +120,24 @@ pip install protein-quest
101
120
  ```
102
121
 
103
122
  Or to use the latest development version:
104
- ```
123
+
124
+ ```shell
105
125
  pip install git+https://github.com/haddocking/protein-quest.git
106
126
  ```
107
127
 
108
128
  ## Usage
109
129
 
110
- The main entry point is the `protein-quest` command line tool which has multiple subcommands to perform actions.
130
+ The main entry point is the `protein-quest` command line tool which has multiple
131
+ subcommands to perform actions.
111
132
 
112
- To use programmaticly, see the [Jupyter notebooks](https://www.bonvinlab.org/protein-quest/notebooks) and [API documentation](https://www.bonvinlab.org/protein-quest/autoapi/protein_quest/).
133
+ To use programmaticly, see the
134
+ [Jupyter notebooks](https://www.bonvinlab.org/protein-quest/notebooks) and
135
+ [API documentation](https://www.bonvinlab.org/protein-quest/autoapi/protein_quest/).
113
136
 
114
- While downloading or copying files it uses a global cache (located at `~/.cache/protein-quest`) and hardlinks to save disk space and improve speed.
115
- This behavior can be customized with the `--no-cache`, `--cache-dir`, and `--copy-method` command line arguments.
137
+ While downloading or copying files it uses a global cache (located at
138
+ `~/.cache/protein-quest`) and hardlinks to save disk space and improve speed.
139
+ This behavior can be customized with the `--no-cache`, `--cache-dir`, and
140
+ `--copy-method` command line arguments.
116
141
 
117
142
  ### Search Uniprot accessions
118
143
 
@@ -126,7 +151,9 @@ protein-quest search uniprot \
126
151
  --limit 100 \
127
152
  uniprot_accs.txt
128
153
  ```
129
- ([GO:0005634](https://www.ebi.ac.uk/QuickGO/term/GO:0005634) is "Nucleus" and [GO:0003677](https://www.ebi.ac.uk/QuickGO/term/GO:0003677) is "DNA binding")
154
+
155
+ ([GO:0005634](https://www.ebi.ac.uk/QuickGO/term/GO:0005634) is "Nucleus" and
156
+ [GO:0003677](https://www.ebi.ac.uk/QuickGO/term/GO:0003677) is "DNA binding")
130
157
 
131
158
  ### Search for PDBe structures of uniprot accessions
132
159
 
@@ -134,7 +161,8 @@ protein-quest search uniprot \
134
161
  protein-quest search pdbe uniprot_accs.txt pdbe.csv
135
162
  ```
136
163
 
137
- `pdbe.csv` file is written containing the the PDB id and chain of each uniprot accession.
164
+ `pdbe.csv` file is written containing the the PDB id and chain of each uniprot
165
+ accession.
138
166
 
139
167
  ### Search for Alphafold structures of uniprot accessions
140
168
 
@@ -170,8 +198,8 @@ protein-quest retrieve emdb emdbs.csv downloads-emdb/
170
198
 
171
199
  ### To filter AlphaFold structures on confidence
172
200
 
173
- Filter AlphaFoldDB structures based on confidence (pLDDT).
174
- Keeps entries with requested number of residues which have a confidence score above the threshold.
201
+ Filter AlphaFoldDB structures based on confidence (pLDDT). Keeps entries with
202
+ requested number of residues which have a confidence score above the threshold.
175
203
  Also writes pdb files with only those residues.
176
204
 
177
205
  ```shell
@@ -184,7 +212,8 @@ protein-quest filter confidence \
184
212
 
185
213
  ### To filter PDBe files on chain of uniprot accession
186
214
 
187
- Make PDBe files smaller by only keeping first chain of found uniprot entry and renaming to chain A.
215
+ Make PDBe files smaller by only keeping first chain of found uniprot entry and
216
+ renaming to chain A.
188
217
 
189
218
  ```shell
190
219
  protein-quest filter chain \
@@ -203,7 +232,10 @@ protein-quest filter residue \
203
232
 
204
233
  ### To filter on secondary structure
205
234
 
206
- To filter on structure being mostly alpha helices and have no beta sheets. See the following [notebook](https://www.bonvinlab.org/protein-detective/SSE_elements.html) to determine the ratio of secondary structure elements.
235
+ To filter on structure being mostly alpha helices and have no beta sheets. See
236
+ the following
237
+ [notebook](https://www.bonvinlab.org/protein-detective/SSE_elements.html) to
238
+ determine the ratio of secondary structure elements.
207
239
 
208
240
  ```shell
209
241
  protein-quest filter secondary-structure \
@@ -221,8 +253,10 @@ protein-quest search taxonomy "Homo sapiens" -
221
253
 
222
254
  ### Search Gene Ontology (GO)
223
255
 
224
- You might not know what the identifier of a [Gene Ontology](https://geneontology.org/) term is at `protein-quest search uniprot`.
225
- You can use following command to search for a Gene Ontology (GO) term.
256
+ You might not know what the identifier of a
257
+ [Gene Ontology](https://geneontology.org/) term is at
258
+ `protein-quest search uniprot`. You can use following command to search for a
259
+ Gene Ontology (GO) term.
226
260
 
227
261
  ```shell
228
262
  protein-quest search go --limit 5 --aspect cellular_component apoptosome -
@@ -230,18 +264,21 @@ protein-quest search go --limit 5 --aspect cellular_component apoptosome -
230
264
 
231
265
  ### Search for interaction partners
232
266
 
233
- Use https://www.ebi.ac.uk/complexportal to find interaction partners of given UniProt accession.
267
+ Use <https://www.ebi.ac.uk/complexportal> to find interaction partners of given
268
+ UniProt accession.
234
269
 
235
270
  ```shell
236
271
  protein-quest search interaction-partners Q05471 interaction-partners-of-Q05471.txt
237
272
  ```
238
273
 
239
- The `interaction-partners-of-Q05471.txt` file contains uniprot accessions (one per line).
274
+ The `interaction-partners-of-Q05471.txt` file contains uniprot accessions (one
275
+ per line).
240
276
 
241
277
  ### Search for complexes
242
278
 
243
- Given Uniprot accessions search for macromolecular complexes at https://www.ebi.ac.uk/complexportal
244
- and return the complex entries and their members.
279
+ Given Uniprot accessions search for macromolecular complexes at
280
+ <https://www.ebi.ac.uk/complexportal> and return the complex entries and their
281
+ members.
245
282
 
246
283
  ```shell
247
284
  echo Q05471 | protein-quest search complexes - complexes.csv
@@ -256,7 +293,8 @@ Q05471,CPX-2122,https://www.ebi.ac.uk/complexportal/complex/CPX-2122,Swr1 chroma
256
293
 
257
294
  ### Search for UniProt details
258
295
 
259
- To get details (like protein name, sequence length, organism) for a list of UniProt accessions.
296
+ To get details (like protein name, sequence length, organism) for a list of
297
+ UniProt accessions.
260
298
 
261
299
  ```shell
262
300
  protein-quest search uniprot-details uniprot_accs.txt uniprot_details.csv
@@ -271,7 +309,8 @@ A0A087WUV0,ZN892_HUMAN,522,True,Zinc finger protein 892,9606,Homo sapiens
271
309
 
272
310
  ### Convert structure files to .cif format
273
311
 
274
- Some tools (for example [powerfit](https://github.com/haddocking/powerfit)) only work with `.cif` files and not `*.cif.gz` or `*.bcif` files.
312
+ Some tools (for example [powerfit](https://github.com/haddocking/powerfit)) only
313
+ work with `.cif` files and not `*.cif.gz` or `*.bcif` files.
275
314
 
276
315
  ```shell
277
316
  protein-quest convert structures --format cif --output-dir ./filtered-cif ./filtered-ss
@@ -279,15 +318,25 @@ protein-quest convert structures --format cif --output-dir ./filtered-cif ./filt
279
318
 
280
319
  ### Convert structure files to UniProt accessions
281
320
 
282
- After running some filters you might want to know which UniProt accessions are still present in the filtered structures.
321
+ After running some filters you might want to know which UniProt accessions are
322
+ still present in the filtered structures.
283
323
 
284
324
  ```shell
285
325
  protein-quest convert uniprot ./filtered-ss uniprot_accs.filtered.txt
286
326
  ```
287
327
 
288
- ## Model Context Protocol (MCP) server
328
+ ## Provenance
329
+
330
+ You can use `protein-quest --prov ...` to store provenance information of your
331
+ CLI invocations in a
332
+ [Research Object crate](https://www.researchobject.org/ro-crate/) file called
333
+ ro-crate-metadata.json.
334
+
335
+ ## Model Context Protocol (MCP) server
289
336
 
290
- Protein quest can also help LLMs like Claude Sonnet 4 by providing a [set of tools](https://modelcontextprotocol.io/docs/learn/server-concepts#tools-ai-actions) for protein structures.
337
+ Protein quest can also help LLMs like Claude Sonnet 4 by providing a
338
+ [set of tools](https://modelcontextprotocol.io/docs/learn/server-concepts#tools-ai-actions)
339
+ for protein structures.
291
340
 
292
341
  ![Protein Quest MCP workflow](https://github.com/haddocking/protein-quest/raw/main/docs/protein-quest-mcp.png)
293
342
 
@@ -303,11 +352,13 @@ The server can be started with:
303
352
  protein-quest mcp
304
353
  ```
305
354
 
306
- The mcp server contains an prompt template to search/retrieve/filter candidate structures.
355
+ The mcp server contains an prompt template to search/retrieve/filter candidate
356
+ structures.
307
357
 
308
358
  ## Shell autocompletion
309
359
 
310
- The `protein-quest` command line tool supports shell autocompletion using [shtab](https://docs.iterative.ai/shtab).
360
+ The `protein-quest` command line tool supports shell autocompletion using
361
+ [shtab](https://docs.iterative.ai/shtab).
311
362
 
312
363
  Initialize for bash shell with:
313
364
 
@@ -327,4 +378,5 @@ autoload -Uz compinit && compinit
327
378
 
328
379
  ## Contributing
329
380
 
330
- For development information and contribution guidelines, please see [CONTRIBUTING.md](CONTRIBUTING.md).
381
+ For development information and contribution guidelines, please see
382
+ [CONTRIBUTING.md](CONTRIBUTING.md).
@@ -1,18 +1,18 @@
1
1
  protein_quest/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- protein_quest/__version__.py,sha256=qCmLtq4uktEgR1D3vZFBaO_0UsYFvPWt7gmxqgWwku0,57
3
- protein_quest/cli.py,sha256=aWqdAeseUm7s8UGmrPFNfJPW6W83RmpJAsEy4sZscQY,57506
4
- protein_quest/converter.py,sha256=Y-Oxf7lDNbEicL6GS-IpNWDwaAiHgIgs5bFAcEHCKdQ,1441
2
+ protein_quest/__version__.py,sha256=1-Y-bSMxz0yut5o_jEVM46EG2KW008II37JW_koD3Oc,56
3
+ protein_quest/cli.py,sha256=2_bEP7gGYxvxaqlcwEaiJc9i6hf_HtMj_xucNeaOqv4,59587
4
+ protein_quest/converter.py,sha256=Qk-hIyp-YGUK4vvOZlES3BktZsK14-ShgBvVyo9Wjh8,1428
5
5
  protein_quest/emdb.py,sha256=641c6RwNYnu-0GBFyCFBiI58fNc0jMkd0ZZ9MW9-Jmc,1501
6
6
  protein_quest/filters.py,sha256=em1FYD7Y9z98ZSaJGYCv1VCGRADLbat8FfSOlNJNAJM,5663
7
7
  protein_quest/go.py,sha256=lZNEcw8nTc9wpV3cl4y2FG9Lsj8wsXQ6zemmAQs_DWE,5650
8
8
  protein_quest/io.py,sha256=ngV_HU2HIQFO-bP2xQj_fhgv0MYjW4puqz_9CxGpBv8,13017
9
- protein_quest/mcp_server.py,sha256=ZmEs18crS_Ce1-b_PM4m5kmS5C8lLlcrgpocTt7GVrg,8551
10
- protein_quest/parallel.py,sha256=uf26nD5l1Gp4Z5AFgb0K3vNBUlzvfFh8NSDbGzePSr0,5856
9
+ protein_quest/mcp_server.py,sha256=N22DT8g6i1EXI2bunpPppLbwsGkBBOdKpmtTuooXuOk,8553
10
+ protein_quest/parallel.py,sha256=hmwjv-KeiC7qSs5xApAvh3ZKkJ9HDW5zmr1zuwOzFpg,6367
11
11
  protein_quest/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
12
  protein_quest/ss.py,sha256=4ZGIHfjTlodYTXqGUKhMnGbgaStYOGaWg2oYrWIjdgo,10118
13
13
  protein_quest/structure.py,sha256=3TdzrXbGpmnskp3gjwVevwD1tfhKfAUPOHWi9ViaheM,9101
14
14
  protein_quest/taxonomy.py,sha256=4mKv8zll4mX02Ow8CTvyqMJE2KJZvcq3QlTjjjLOJJk,5072
15
- protein_quest/uniprot.py,sha256=kV1lOZ_ugcF-LUff9hvmJPaGwA_uaHPJCL_3DLBIvSE,36798
15
+ protein_quest/uniprot.py,sha256=1tqAQqnQIH7OV0dhjWv8TJIIrY6sXgrfFvlf-OieP1s,36797
16
16
  protein_quest/utils.py,sha256=5Ncdid-dslggy-Ti1yhOHwdAM7Bxpyia7Re-xDkc2P0,19909
17
17
  protein_quest/alphafold/__init__.py,sha256=Ktasi5BRp71wO7-PpOGDpIRRtBEefs8knIdlKQeLQpk,51
18
18
  protein_quest/alphafold/confidence.py,sha256=UtS2MJEReaZ1kTXbQf8Vrc9gzGjAOiGLYs4glqN-1do,8098
@@ -20,8 +20,8 @@ protein_quest/alphafold/entry_summary.py,sha256=Qhnw75RXFaoOU332g7axg_jYbbdZbUps
20
20
  protein_quest/alphafold/fetch.py,sha256=D-RWKWo5kWpCko_LNT_sslzrpeR3HX9nu5F4MUOFRtI,21979
21
21
  protein_quest/pdbe/__init__.py,sha256=eNNHtN60NAGea7gvRkIzkoTXsYPK99s-ldIcKWYO6So,61
22
22
  protein_quest/pdbe/fetch.py,sha256=e8CHWDX2QzWnVLmYXCfNrscw1UcN1lI9Uz6Z5HmEOEQ,2510
23
- protein_quest-0.10.1.dist-info/METADATA,sha256=Mz2JLKRAqBvcbMsr1I1rdeYlJK8lTUXCX3AwBpSywxI,11939
24
- protein_quest-0.10.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
25
- protein_quest-0.10.1.dist-info/entry_points.txt,sha256=f1RtOxv9TFBO3w01EMEuFXBTMsqKsQcKlkxmj9zE-0g,57
26
- protein_quest-0.10.1.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
27
- protein_quest-0.10.1.dist-info/RECORD,,
23
+ protein_quest-1.1.0.dist-info/METADATA,sha256=BnrYu853g1P2RJom3E13vTWsumEurdo1XwdEJ2b7wJE,13045
24
+ protein_quest-1.1.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
25
+ protein_quest-1.1.0.dist-info/entry_points.txt,sha256=f1RtOxv9TFBO3w01EMEuFXBTMsqKsQcKlkxmj9zE-0g,57
26
+ protein_quest-1.1.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
27
+ protein_quest-1.1.0.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: hatchling 1.27.0
2
+ Generator: hatchling 1.28.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any