alphapulldown-input-parser 0.4.0__tar.gz → 0.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (14) hide show
  1. {alphapulldown_input_parser-0.4.0/src/alphapulldown_input_parser.egg-info → alphapulldown_input_parser-0.5.0}/PKG-INFO +17 -6
  2. alphapulldown_input_parser-0.5.0/README.md +48 -0
  3. {alphapulldown_input_parser-0.4.0 → alphapulldown_input_parser-0.5.0}/pyproject.toml +1 -1
  4. {alphapulldown_input_parser-0.4.0 → alphapulldown_input_parser-0.5.0}/src/alphapulldown_input_parser/__init__.py +2 -0
  5. {alphapulldown_input_parser-0.4.0 → alphapulldown_input_parser-0.5.0}/src/alphapulldown_input_parser/parser.py +33 -0
  6. {alphapulldown_input_parser-0.4.0 → alphapulldown_input_parser-0.5.0/src/alphapulldown_input_parser.egg-info}/PKG-INFO +17 -6
  7. {alphapulldown_input_parser-0.4.0 → alphapulldown_input_parser-0.5.0}/test/test_parser.py +60 -0
  8. alphapulldown_input_parser-0.4.0/README.md +0 -37
  9. {alphapulldown_input_parser-0.4.0 → alphapulldown_input_parser-0.5.0}/LICENSE +0 -0
  10. {alphapulldown_input_parser-0.4.0 → alphapulldown_input_parser-0.5.0}/setup.cfg +0 -0
  11. {alphapulldown_input_parser-0.4.0 → alphapulldown_input_parser-0.5.0}/src/alphapulldown_input_parser.egg-info/SOURCES.txt +0 -0
  12. {alphapulldown_input_parser-0.4.0 → alphapulldown_input_parser-0.5.0}/src/alphapulldown_input_parser.egg-info/dependency_links.txt +0 -0
  13. {alphapulldown_input_parser-0.4.0 → alphapulldown_input_parser-0.5.0}/src/alphapulldown_input_parser.egg-info/requires.txt +0 -0
  14. {alphapulldown_input_parser-0.4.0 → alphapulldown_input_parser-0.5.0}/src/alphapulldown_input_parser.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: alphapulldown-input-parser
3
- Version: 0.4.0
3
+ Version: 0.5.0
4
4
  Summary: Fold specification parser for AlphaPulldown
5
5
  Author-email: Kosinski Lab <alphapulldown@embl-hamburg.de>
6
6
  License: MIT
@@ -39,26 +39,37 @@ or, for local development:
39
39
  pip install -e /path/to/alphapulldown-input-parser
40
40
  ```
41
41
 
42
- The package exposes two helpers:
42
+ The package exposes three helpers:
43
43
 
44
44
  * `parse_fold(...)` – mirrors the historical AlphaPulldown helper and performs
45
45
  feature existence checks.
46
46
  * `expand_fold_specification(...)` – expands a single fold string without
47
47
  raising if features are missing.
48
+ * `parse_fold_chains(spec, protein_delimiter="+")` – **pure-syntactic** parse
49
+ of a single spec into `(chain_name, copies, regions)` triples, with no
50
+ filesystem access or feature lookup. Useful for resource sizing or input
51
+ validation before features exist on disk.
48
52
 
49
53
  The parser is dependency-free and works across AlphaPulldown, the Snakemake
50
54
  pipeline, or any other tooling that consumes the same fold syntax.
51
55
 
52
56
  As of `0.4.0`, AF3 JSON feature files support the same copy/range suffixes as
53
- classic AlphaPulldown feature pickles. For example:
57
+ classic AlphaPulldown feature pickles, including discontinuous regions and copy
58
+ counts. For example:
54
59
 
55
60
  ```python
56
61
  parse_fold(
57
- ["P01258_af3_input.json:1-100:2"],
62
+ [
63
+ "P01258_af3_input.json:1-100",
64
+ "P01258_af3_input.json:1-100:150-200",
65
+ "P01258_af3_input.json:2:1-100:150-200+P01579_af3_input.json",
66
+ ],
58
67
  features_directory=["/path/to/features"],
59
68
  protein_delimiter="+",
60
69
  )
61
70
  ```
62
71
 
63
- This expands to two folding entries for the same AF3 JSON feature file, each
64
- restricted to residues `1-100`.
72
+ AlphaPulldown and AlphaPulldownSnakemake can then preserve those AF3 JSON
73
+ regions during input preparation. For the AlphaFold 3 backend, discontinuous
74
+ regions are expanded into separate cropped chains rather than one continuous
75
+ polymer chain.
@@ -0,0 +1,48 @@
1
+ # alphapulldown-input-parser
2
+
3
+ Reusable parser for AlphaPulldown-style fold specifications. Install it with:
4
+
5
+ ```bash
6
+ pip install "alphapulldown-input-parser>=0.4.0"
7
+ ```
8
+
9
+ or, for local development:
10
+
11
+ ```bash
12
+ pip install -e /path/to/alphapulldown-input-parser
13
+ ```
14
+
15
+ The package exposes three helpers:
16
+
17
+ * `parse_fold(...)` – mirrors the historical AlphaPulldown helper and performs
18
+ feature existence checks.
19
+ * `expand_fold_specification(...)` – expands a single fold string without
20
+ raising if features are missing.
21
+ * `parse_fold_chains(spec, protein_delimiter="+")` – **pure-syntactic** parse
22
+ of a single spec into `(chain_name, copies, regions)` triples, with no
23
+ filesystem access or feature lookup. Useful for resource sizing or input
24
+ validation before features exist on disk.
25
+
26
+ The parser is dependency-free and works across AlphaPulldown, the Snakemake
27
+ pipeline, or any other tooling that consumes the same fold syntax.
28
+
29
+ As of `0.4.0`, AF3 JSON feature files support the same copy/range suffixes as
30
+ classic AlphaPulldown feature pickles, including discontinuous regions and copy
31
+ counts. For example:
32
+
33
+ ```python
34
+ parse_fold(
35
+ [
36
+ "P01258_af3_input.json:1-100",
37
+ "P01258_af3_input.json:1-100:150-200",
38
+ "P01258_af3_input.json:2:1-100:150-200+P01579_af3_input.json",
39
+ ],
40
+ features_directory=["/path/to/features"],
41
+ protein_delimiter="+",
42
+ )
43
+ ```
44
+
45
+ AlphaPulldown and AlphaPulldownSnakemake can then preserve those AF3 JSON
46
+ regions during input preparation. For the AlphaFold 3 backend, discontinuous
47
+ regions are expanded into separate cropped chains rather than one continuous
48
+ polymer chain.
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "alphapulldown-input-parser"
7
- version = "0.4.0"
7
+ version = "0.5.0"
8
8
  description = "Fold specification parser for AlphaPulldown"
9
9
  readme = "README.md"
10
10
  license = {text = "MIT"}
@@ -8,11 +8,13 @@ from .parser import (
8
8
  expand_fold_specification,
9
9
  generate_fold_specifications,
10
10
  parse_fold,
11
+ parse_fold_chains,
11
12
  )
12
13
 
13
14
  __all__ = [
14
15
  "expand_fold_specification",
15
16
  "parse_fold",
17
+ "parse_fold_chains",
16
18
  "FormatError",
17
19
  "FeatureIndex",
18
20
  "Region",
@@ -276,6 +276,39 @@ def _parse_regions(region_tokens: Sequence[str], spec: str) -> RegionSelection:
276
276
  return RegionSelection(regions=tuple(regions))
277
277
 
278
278
 
279
+ def parse_fold_chains(
280
+ spec: str,
281
+ protein_delimiter: str = "+",
282
+ ) -> List[Tuple[str, int, RegionSelection]]:
283
+ """Parse a single fold spec into ``(chain_name, copies, regions)`` triples.
284
+
285
+ Pure-syntactic parse — no filesystem access, no feature-index lookup —
286
+ intended for tools that need the chain composition of a spec *before* the
287
+ corresponding features exist on disk (e.g. for resource sizing or input
288
+ validation). Follows the AlphaPulldown ``name[:copies][:region...]``
289
+ convention; the chain ``name`` is returned **unchanged** (no path or
290
+ extension stripping; the caller can normalise if needed).
291
+
292
+ Example:
293
+ >>> parse_fold_chains("protA:2:1-100+protB", "+")
294
+ [('protA', 2, RegionSelection(regions=(Region(start=1, end=100),))),
295
+ ('protB', 1, RegionSelection(regions=None))]
296
+ """
297
+ chains: List[Tuple[str, int, RegionSelection]] = []
298
+ for raw_pf in str(spec).split(protein_delimiter):
299
+ pf = raw_pf.strip()
300
+ if not pf:
301
+ continue
302
+ tokens = [token.strip() for token in pf.split(":") if token.strip()]
303
+ if not tokens:
304
+ continue
305
+ name = tokens[0]
306
+ copies, region_tokens = _extract_copy_and_regions(tokens, spec)
307
+ regions = _parse_regions(region_tokens, spec)
308
+ chains.append((name, copies, regions))
309
+ return chains
310
+
311
+
279
312
  # ---------------------------------------------------------------------------
280
313
  # Expansion logic
281
314
  # ---------------------------------------------------------------------------
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: alphapulldown-input-parser
3
- Version: 0.4.0
3
+ Version: 0.5.0
4
4
  Summary: Fold specification parser for AlphaPulldown
5
5
  Author-email: Kosinski Lab <alphapulldown@embl-hamburg.de>
6
6
  License: MIT
@@ -39,26 +39,37 @@ or, for local development:
39
39
  pip install -e /path/to/alphapulldown-input-parser
40
40
  ```
41
41
 
42
- The package exposes two helpers:
42
+ The package exposes three helpers:
43
43
 
44
44
  * `parse_fold(...)` – mirrors the historical AlphaPulldown helper and performs
45
45
  feature existence checks.
46
46
  * `expand_fold_specification(...)` – expands a single fold string without
47
47
  raising if features are missing.
48
+ * `parse_fold_chains(spec, protein_delimiter="+")` – **pure-syntactic** parse
49
+ of a single spec into `(chain_name, copies, regions)` triples, with no
50
+ filesystem access or feature lookup. Useful for resource sizing or input
51
+ validation before features exist on disk.
48
52
 
49
53
  The parser is dependency-free and works across AlphaPulldown, the Snakemake
50
54
  pipeline, or any other tooling that consumes the same fold syntax.
51
55
 
52
56
  As of `0.4.0`, AF3 JSON feature files support the same copy/range suffixes as
53
- classic AlphaPulldown feature pickles. For example:
57
+ classic AlphaPulldown feature pickles, including discontinuous regions and copy
58
+ counts. For example:
54
59
 
55
60
  ```python
56
61
  parse_fold(
57
- ["P01258_af3_input.json:1-100:2"],
62
+ [
63
+ "P01258_af3_input.json:1-100",
64
+ "P01258_af3_input.json:1-100:150-200",
65
+ "P01258_af3_input.json:2:1-100:150-200+P01579_af3_input.json",
66
+ ],
58
67
  features_directory=["/path/to/features"],
59
68
  protein_delimiter="+",
60
69
  )
61
70
  ```
62
71
 
63
- This expands to two folding entries for the same AF3 JSON feature file, each
64
- restricted to residues `1-100`.
72
+ AlphaPulldown and AlphaPulldownSnakemake can then preserve those AF3 JSON
73
+ regions during input preparation. For the AlphaFold 3 backend, discontinuous
74
+ regions are expanded into separate cropped chains rather than one continuous
75
+ polymer chain.
@@ -18,6 +18,7 @@ from alphapulldown_input_parser import (
18
18
  RegionSelection,
19
19
  generate_fold_specifications,
20
20
  parse_fold,
21
+ parse_fold_chains,
21
22
  )
22
23
 
23
24
 
@@ -339,3 +340,62 @@ def test_generate_fold_specifications_writes_to_disk(tmp_path: Path) -> None:
339
340
 
340
341
  assert result == ["p1+p2"]
341
342
  assert output_path.read_text(encoding="utf-8") == "p1+p2\n"
343
+
344
+
345
+ # ---------------------------------------------------------------------------
346
+ # parse_fold_chains
347
+ # ---------------------------------------------------------------------------
348
+
349
+
350
+ def test_parse_fold_chains_basic_heteromer() -> None:
351
+ assert parse_fold_chains("A+B") == [
352
+ ("A", 1, RegionSelection.all()),
353
+ ("B", 1, RegionSelection.all()),
354
+ ]
355
+
356
+
357
+ def test_parse_fold_chains_copies() -> None:
358
+ # copy number as the second token (canonical form)
359
+ assert parse_fold_chains("A:2") == [("A", 2, RegionSelection.all())]
360
+ # copy + region: name:copies:region
361
+ assert parse_fold_chains("A:2:1-100") == [
362
+ ("A", 2, RegionSelection(regions=(Region(start=1, end=100),))),
363
+ ]
364
+
365
+
366
+ def test_parse_fold_chains_region_without_copies() -> None:
367
+ # A region alone (not a bare integer) implies a single copy
368
+ assert parse_fold_chains("A:1-100") == [
369
+ ("A", 1, RegionSelection(regions=(Region(start=1, end=100),))),
370
+ ]
371
+
372
+
373
+ def test_parse_fold_chains_multiple_regions_and_copies() -> None:
374
+ chains = parse_fold_chains("A:2:1-100:200-300+B")
375
+ assert chains[0][0] == "A"
376
+ assert chains[0][1] == 2
377
+ assert chains[0][2] == RegionSelection(
378
+ regions=(Region(start=1, end=100), Region(start=200, end=300))
379
+ )
380
+ assert chains[1] == ("B", 1, RegionSelection.all())
381
+
382
+
383
+ def test_parse_fold_chains_preserves_paths_and_json_names() -> None:
384
+ """Names are returned verbatim — no extension or path stripping."""
385
+ chains = parse_fold_chains("/path/to/protA_af3_input.json:2+protB.fasta")
386
+ assert chains[0][0] == "/path/to/protA_af3_input.json"
387
+ assert chains[0][1] == 2
388
+ assert chains[1][0] == "protB.fasta"
389
+ assert chains[1][1] == 1
390
+
391
+
392
+ def test_parse_fold_chains_custom_delimiter_and_whitespace() -> None:
393
+ assert parse_fold_chains(" A , B ", protein_delimiter=",") == [
394
+ ("A", 1, RegionSelection.all()),
395
+ ("B", 1, RegionSelection.all()),
396
+ ]
397
+ # empty tokens are skipped
398
+ assert parse_fold_chains("A++B") == [
399
+ ("A", 1, RegionSelection.all()),
400
+ ("B", 1, RegionSelection.all()),
401
+ ]
@@ -1,37 +0,0 @@
1
- # alphapulldown-input-parser
2
-
3
- Reusable parser for AlphaPulldown-style fold specifications. Install it with:
4
-
5
- ```bash
6
- pip install "alphapulldown-input-parser>=0.4.0"
7
- ```
8
-
9
- or, for local development:
10
-
11
- ```bash
12
- pip install -e /path/to/alphapulldown-input-parser
13
- ```
14
-
15
- The package exposes two helpers:
16
-
17
- * `parse_fold(...)` – mirrors the historical AlphaPulldown helper and performs
18
- feature existence checks.
19
- * `expand_fold_specification(...)` – expands a single fold string without
20
- raising if features are missing.
21
-
22
- The parser is dependency-free and works across AlphaPulldown, the Snakemake
23
- pipeline, or any other tooling that consumes the same fold syntax.
24
-
25
- As of `0.4.0`, AF3 JSON feature files support the same copy/range suffixes as
26
- classic AlphaPulldown feature pickles. For example:
27
-
28
- ```python
29
- parse_fold(
30
- ["P01258_af3_input.json:1-100:2"],
31
- features_directory=["/path/to/features"],
32
- protein_delimiter="+",
33
- )
34
- ```
35
-
36
- This expands to two folding entries for the same AF3 JSON feature file, each
37
- restricted to residues `1-100`.