alphapulldown-input-parser 0.3.0__tar.gz → 0.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (14) hide show
  1. {alphapulldown_input_parser-0.3.0/src/alphapulldown_input_parser.egg-info → alphapulldown_input_parser-0.5.0}/PKG-INFO +28 -3
  2. alphapulldown_input_parser-0.5.0/README.md +48 -0
  3. {alphapulldown_input_parser-0.3.0 → alphapulldown_input_parser-0.5.0}/pyproject.toml +1 -1
  4. {alphapulldown_input_parser-0.3.0 → alphapulldown_input_parser-0.5.0}/src/alphapulldown_input_parser/__init__.py +2 -0
  5. {alphapulldown_input_parser-0.3.0 → alphapulldown_input_parser-0.5.0}/src/alphapulldown_input_parser/parser.py +42 -26
  6. {alphapulldown_input_parser-0.3.0 → alphapulldown_input_parser-0.5.0/src/alphapulldown_input_parser.egg-info}/PKG-INFO +28 -3
  7. {alphapulldown_input_parser-0.3.0 → alphapulldown_input_parser-0.5.0}/test/test_parser.py +92 -2
  8. alphapulldown_input_parser-0.3.0/README.md +0 -23
  9. {alphapulldown_input_parser-0.3.0 → alphapulldown_input_parser-0.5.0}/LICENSE +0 -0
  10. {alphapulldown_input_parser-0.3.0 → alphapulldown_input_parser-0.5.0}/setup.cfg +0 -0
  11. {alphapulldown_input_parser-0.3.0 → alphapulldown_input_parser-0.5.0}/src/alphapulldown_input_parser.egg-info/SOURCES.txt +0 -0
  12. {alphapulldown_input_parser-0.3.0 → alphapulldown_input_parser-0.5.0}/src/alphapulldown_input_parser.egg-info/dependency_links.txt +0 -0
  13. {alphapulldown_input_parser-0.3.0 → alphapulldown_input_parser-0.5.0}/src/alphapulldown_input_parser.egg-info/requires.txt +0 -0
  14. {alphapulldown_input_parser-0.3.0 → alphapulldown_input_parser-0.5.0}/src/alphapulldown_input_parser.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: alphapulldown-input-parser
3
- Version: 0.3.0
3
+ Version: 0.5.0
4
4
  Summary: Fold specification parser for AlphaPulldown
5
5
  Author-email: Kosinski Lab <alphapulldown@embl-hamburg.de>
6
6
  License: MIT
@@ -30,7 +30,7 @@ Dynamic: license-file
30
30
  Reusable parser for AlphaPulldown-style fold specifications. Install it with:
31
31
 
32
32
  ```bash
33
- pip install alphapulldown-input-parser
33
+ pip install "alphapulldown-input-parser>=0.4.0"
34
34
  ```
35
35
 
36
36
  or, for local development:
@@ -39,12 +39,37 @@ or, for local development:
39
39
  pip install -e /path/to/alphapulldown-input-parser
40
40
  ```
41
41
 
42
- The package exposes two helpers:
42
+ The package exposes three helpers:
43
43
 
44
44
  * `parse_fold(...)` – mirrors the historical AlphaPulldown helper and performs
45
45
  feature existence checks.
46
46
  * `expand_fold_specification(...)` – expands a single fold string without
47
47
  raising if features are missing.
48
+ * `parse_fold_chains(spec, protein_delimiter="+")` – **pure-syntactic** parse
49
+ of a single spec into `(chain_name, copies, regions)` triples, with no
50
+ filesystem access or feature lookup. Useful for resource sizing or input
51
+ validation before features exist on disk.
48
52
 
49
53
  The parser is dependency-free and works across AlphaPulldown, the Snakemake
50
54
  pipeline, or any other tooling that consumes the same fold syntax.
55
+
56
+ As of `0.4.0`, AF3 JSON feature files support the same copy/range suffixes as
57
+ classic AlphaPulldown feature pickles, including discontinuous regions and copy
58
+ counts. For example:
59
+
60
+ ```python
61
+ parse_fold(
62
+ [
63
+ "P01258_af3_input.json:1-100",
64
+ "P01258_af3_input.json:1-100:150-200",
65
+ "P01258_af3_input.json:2:1-100:150-200+P01579_af3_input.json",
66
+ ],
67
+ features_directory=["/path/to/features"],
68
+ protein_delimiter="+",
69
+ )
70
+ ```
71
+
72
+ AlphaPulldown and AlphaPulldownSnakemake can then preserve those AF3 JSON
73
+ regions during input preparation. For the AlphaFold 3 backend, discontinuous
74
+ regions are expanded into separate cropped chains rather than one continuous
75
+ polymer chain.
@@ -0,0 +1,48 @@
1
+ # alphapulldown-input-parser
2
+
3
+ Reusable parser for AlphaPulldown-style fold specifications. Install it with:
4
+
5
+ ```bash
6
+ pip install "alphapulldown-input-parser>=0.4.0"
7
+ ```
8
+
9
+ or, for local development:
10
+
11
+ ```bash
12
+ pip install -e /path/to/alphapulldown-input-parser
13
+ ```
14
+
15
+ The package exposes three helpers:
16
+
17
+ * `parse_fold(...)` – mirrors the historical AlphaPulldown helper and performs
18
+ feature existence checks.
19
+ * `expand_fold_specification(...)` – expands a single fold string without
20
+ raising if features are missing.
21
+ * `parse_fold_chains(spec, protein_delimiter="+")` – **pure-syntactic** parse
22
+ of a single spec into `(chain_name, copies, regions)` triples, with no
23
+ filesystem access or feature lookup. Useful for resource sizing or input
24
+ validation before features exist on disk.
25
+
26
+ The parser is dependency-free and works across AlphaPulldown, the Snakemake
27
+ pipeline, or any other tooling that consumes the same fold syntax.
28
+
29
+ As of `0.4.0`, AF3 JSON feature files support the same copy/range suffixes as
30
+ classic AlphaPulldown feature pickles, including discontinuous regions and copy
31
+ counts. For example:
32
+
33
+ ```python
34
+ parse_fold(
35
+ [
36
+ "P01258_af3_input.json:1-100",
37
+ "P01258_af3_input.json:1-100:150-200",
38
+ "P01258_af3_input.json:2:1-100:150-200+P01579_af3_input.json",
39
+ ],
40
+ features_directory=["/path/to/features"],
41
+ protein_delimiter="+",
42
+ )
43
+ ```
44
+
45
+ AlphaPulldown and AlphaPulldownSnakemake can then preserve those AF3 JSON
46
+ regions during input preparation. For the AlphaFold 3 backend, discontinuous
47
+ regions are expanded into separate cropped chains rather than one continuous
48
+ polymer chain.
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "alphapulldown-input-parser"
7
- version = "0.3.0"
7
+ version = "0.5.0"
8
8
  description = "Fold specification parser for AlphaPulldown"
9
9
  readme = "README.md"
10
10
  license = {text = "MIT"}
@@ -8,11 +8,13 @@ from .parser import (
8
8
  expand_fold_specification,
9
9
  generate_fold_specifications,
10
10
  parse_fold,
11
+ parse_fold_chains,
11
12
  )
12
13
 
13
14
  __all__ = [
14
15
  "expand_fold_specification",
15
16
  "parse_fold",
17
+ "parse_fold_chains",
16
18
  "FormatError",
17
19
  "FeatureIndex",
18
20
  "Region",
@@ -72,7 +72,8 @@ class RegionSelection:
72
72
  return cls(regions=None)
73
73
 
74
74
 
75
- # Either {"json_input": "/path/to.json"} or {"CHAIN_A": RegionSelection(...)}
75
+ # Either {"json_input": "/path/to.json", "regions": RegionSelection(...)?}
76
+ # or {"CHAIN_A": RegionSelection(...)}
76
77
  FoldEntry = Dict[str, Union[str, RegionSelection]]
77
78
 
78
79
 
@@ -275,6 +276,39 @@ def _parse_regions(region_tokens: Sequence[str], spec: str) -> RegionSelection:
275
276
  return RegionSelection(regions=tuple(regions))
276
277
 
277
278
 
279
+ def parse_fold_chains(
280
+ spec: str,
281
+ protein_delimiter: str = "+",
282
+ ) -> List[Tuple[str, int, RegionSelection]]:
283
+ """Parse a single fold spec into ``(chain_name, copies, regions)`` triples.
284
+
285
+ Pure-syntactic parse — no filesystem access, no feature-index lookup —
286
+ intended for tools that need the chain composition of a spec *before* the
287
+ corresponding features exist on disk (e.g. for resource sizing or input
288
+ validation). Follows the AlphaPulldown ``name[:copies][:region...]``
289
+ convention; the chain ``name`` is returned **unchanged** (no path or
290
+ extension stripping; the caller can normalise if needed).
291
+
292
+ Example:
293
+ >>> parse_fold_chains("protA:2:1-100+protB", "+")
294
+ [('protA', 2, RegionSelection(regions=(Region(start=1, end=100),))),
295
+ ('protB', 1, RegionSelection(regions=None))]
296
+ """
297
+ chains: List[Tuple[str, int, RegionSelection]] = []
298
+ for raw_pf in str(spec).split(protein_delimiter):
299
+ pf = raw_pf.strip()
300
+ if not pf:
301
+ continue
302
+ tokens = [token.strip() for token in pf.split(":") if token.strip()]
303
+ if not tokens:
304
+ continue
305
+ name = tokens[0]
306
+ copies, region_tokens = _extract_copy_and_regions(tokens, spec)
307
+ regions = _parse_regions(region_tokens, spec)
308
+ chains.append((name, copies, regions))
309
+ return chains
310
+
311
+
278
312
  # ---------------------------------------------------------------------------
279
313
  # Expansion logic
280
314
  # ---------------------------------------------------------------------------
@@ -312,39 +346,21 @@ def expand_fold_specification(
312
346
  tokens = [token.strip() for token in pf.split(":")]
313
347
  base_token = tokens[0] if tokens else ""
314
348
 
315
- # JSON inputs: support optional copy number, but no ranges.
349
+ # JSON inputs: support optional copy number and region ranges.
316
350
  if base_token.endswith(".json"):
317
351
  path_pf = Path(base_token)
318
352
  json_path: Optional[str] = None
319
353
  for json_key in (path_pf.name, path_pf.stem):
320
354
  json_path = index.json_path(json_key)
321
355
  if json_path:
322
- # Handle optional copy number for JSON inputs.
323
- if len(tokens) == 1:
324
- copies = 1
325
- else:
326
- extra_tokens = tokens[1:]
327
- # Ranges (e.g. "1-10") are not supported for JSON feature files.
328
- if any("-" in tok for tok in extra_tokens):
329
- _format_error(
330
- spec,
331
- msg="Region ranges are not supported for JSON feature files.",
332
- )
333
- if len(extra_tokens) != 1:
334
- _format_error(
335
- spec,
336
- msg="JSON feature files support only an optional copy number.",
337
- )
338
- try:
339
- copies = int(extra_tokens[0])
340
- except ValueError:
341
- _format_error(
342
- spec,
343
- msg="Copy number for JSON feature file must be an integer.",
344
- )
356
+ copies, region_tokens = _extract_copy_and_regions(tokens, spec)
357
+ regions = _parse_regions(region_tokens, spec)
345
358
 
346
359
  for _ in range(copies):
347
- formatted_folds.append({"json_input": json_path})
360
+ json_entry: FoldEntry = {"json_input": json_path}
361
+ if not regions.is_all:
362
+ json_entry["regions"] = regions
363
+ formatted_folds.append(json_entry)
348
364
  break
349
365
  if json_path:
350
366
  continue
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: alphapulldown-input-parser
3
- Version: 0.3.0
3
+ Version: 0.5.0
4
4
  Summary: Fold specification parser for AlphaPulldown
5
5
  Author-email: Kosinski Lab <alphapulldown@embl-hamburg.de>
6
6
  License: MIT
@@ -30,7 +30,7 @@ Dynamic: license-file
30
30
  Reusable parser for AlphaPulldown-style fold specifications. Install it with:
31
31
 
32
32
  ```bash
33
- pip install alphapulldown-input-parser
33
+ pip install "alphapulldown-input-parser>=0.4.0"
34
34
  ```
35
35
 
36
36
  or, for local development:
@@ -39,12 +39,37 @@ or, for local development:
39
39
  pip install -e /path/to/alphapulldown-input-parser
40
40
  ```
41
41
 
42
- The package exposes two helpers:
42
+ The package exposes three helpers:
43
43
 
44
44
  * `parse_fold(...)` – mirrors the historical AlphaPulldown helper and performs
45
45
  feature existence checks.
46
46
  * `expand_fold_specification(...)` – expands a single fold string without
47
47
  raising if features are missing.
48
+ * `parse_fold_chains(spec, protein_delimiter="+")` – **pure-syntactic** parse
49
+ of a single spec into `(chain_name, copies, regions)` triples, with no
50
+ filesystem access or feature lookup. Useful for resource sizing or input
51
+ validation before features exist on disk.
48
52
 
49
53
  The parser is dependency-free and works across AlphaPulldown, the Snakemake
50
54
  pipeline, or any other tooling that consumes the same fold syntax.
55
+
56
+ As of `0.4.0`, AF3 JSON feature files support the same copy/range suffixes as
57
+ classic AlphaPulldown feature pickles, including discontinuous regions and copy
58
+ counts. For example:
59
+
60
+ ```python
61
+ parse_fold(
62
+ [
63
+ "P01258_af3_input.json:1-100",
64
+ "P01258_af3_input.json:1-100:150-200",
65
+ "P01258_af3_input.json:2:1-100:150-200+P01579_af3_input.json",
66
+ ],
67
+ features_directory=["/path/to/features"],
68
+ protein_delimiter="+",
69
+ )
70
+ ```
71
+
72
+ AlphaPulldown and AlphaPulldownSnakemake can then preserve those AF3 JSON
73
+ regions during input preparation. For the AlphaFold 3 backend, discontinuous
74
+ regions are expanded into separate cropped chains rather than one continuous
75
+ polymer chain.
@@ -18,6 +18,7 @@ from alphapulldown_input_parser import (
18
18
  RegionSelection,
19
19
  generate_fold_specifications,
20
20
  parse_fold,
21
+ parse_fold_chains,
21
22
  )
22
23
 
23
24
 
@@ -232,10 +233,40 @@ def patch_feature_index(monkeypatch):
232
233
  ["dir1"],
233
234
  "+",
234
235
  {"pkl": {}, "json": {"protein1.json": "dir1/protein1.json"}},
236
+ [[{"json_input": "dir1/protein1.json", "regions": selection_ranges((1, 10))}]],
235
237
  None,
236
- FormatError,
237
238
  None,
238
- id="json_with_range_not_supported",
239
+ id="json_with_range",
240
+ ),
241
+ pytest.param(
242
+ ["protein1.json:2:1-10:20-30"],
243
+ ["dir1"],
244
+ "+",
245
+ {"pkl": {}, "json": {"protein1.json": "dir1/protein1.json"}},
246
+ [
247
+ [
248
+ {"json_input": "dir1/protein1.json", "regions": selection_ranges((1, 10), (20, 30))},
249
+ {"json_input": "dir1/protein1.json", "regions": selection_ranges((1, 10), (20, 30))},
250
+ ]
251
+ ],
252
+ None,
253
+ None,
254
+ id="json_with_copy_and_regions",
255
+ ),
256
+ pytest.param(
257
+ ["protein1.json:1-10:20-30:2"],
258
+ ["dir1"],
259
+ "+",
260
+ {"pkl": {}, "json": {"protein1.json": "dir1/protein1.json"}},
261
+ [
262
+ [
263
+ {"json_input": "dir1/protein1.json", "regions": selection_ranges((1, 10), (20, 30))},
264
+ {"json_input": "dir1/protein1.json", "regions": selection_ranges((1, 10), (20, 30))},
265
+ ]
266
+ ],
267
+ None,
268
+ None,
269
+ id="json_with_regions_and_copy",
239
270
  ),
240
271
  ],
241
272
  )
@@ -309,3 +340,62 @@ def test_generate_fold_specifications_writes_to_disk(tmp_path: Path) -> None:
309
340
 
310
341
  assert result == ["p1+p2"]
311
342
  assert output_path.read_text(encoding="utf-8") == "p1+p2\n"
343
+
344
+
345
+ # ---------------------------------------------------------------------------
346
+ # parse_fold_chains
347
+ # ---------------------------------------------------------------------------
348
+
349
+
350
+ def test_parse_fold_chains_basic_heteromer() -> None:
351
+ assert parse_fold_chains("A+B") == [
352
+ ("A", 1, RegionSelection.all()),
353
+ ("B", 1, RegionSelection.all()),
354
+ ]
355
+
356
+
357
+ def test_parse_fold_chains_copies() -> None:
358
+ # copy number as the second token (canonical form)
359
+ assert parse_fold_chains("A:2") == [("A", 2, RegionSelection.all())]
360
+ # copy + region: name:copies:region
361
+ assert parse_fold_chains("A:2:1-100") == [
362
+ ("A", 2, RegionSelection(regions=(Region(start=1, end=100),))),
363
+ ]
364
+
365
+
366
+ def test_parse_fold_chains_region_without_copies() -> None:
367
+ # A region alone (not a bare integer) implies a single copy
368
+ assert parse_fold_chains("A:1-100") == [
369
+ ("A", 1, RegionSelection(regions=(Region(start=1, end=100),))),
370
+ ]
371
+
372
+
373
+ def test_parse_fold_chains_multiple_regions_and_copies() -> None:
374
+ chains = parse_fold_chains("A:2:1-100:200-300+B")
375
+ assert chains[0][0] == "A"
376
+ assert chains[0][1] == 2
377
+ assert chains[0][2] == RegionSelection(
378
+ regions=(Region(start=1, end=100), Region(start=200, end=300))
379
+ )
380
+ assert chains[1] == ("B", 1, RegionSelection.all())
381
+
382
+
383
+ def test_parse_fold_chains_preserves_paths_and_json_names() -> None:
384
+ """Names are returned verbatim — no extension or path stripping."""
385
+ chains = parse_fold_chains("/path/to/protA_af3_input.json:2+protB.fasta")
386
+ assert chains[0][0] == "/path/to/protA_af3_input.json"
387
+ assert chains[0][1] == 2
388
+ assert chains[1][0] == "protB.fasta"
389
+ assert chains[1][1] == 1
390
+
391
+
392
+ def test_parse_fold_chains_custom_delimiter_and_whitespace() -> None:
393
+ assert parse_fold_chains(" A , B ", protein_delimiter=",") == [
394
+ ("A", 1, RegionSelection.all()),
395
+ ("B", 1, RegionSelection.all()),
396
+ ]
397
+ # empty tokens are skipped
398
+ assert parse_fold_chains("A++B") == [
399
+ ("A", 1, RegionSelection.all()),
400
+ ("B", 1, RegionSelection.all()),
401
+ ]
@@ -1,23 +0,0 @@
1
- # alphapulldown-input-parser
2
-
3
- Reusable parser for AlphaPulldown-style fold specifications. Install it with:
4
-
5
- ```bash
6
- pip install alphapulldown-input-parser
7
- ```
8
-
9
- or, for local development:
10
-
11
- ```bash
12
- pip install -e /path/to/alphapulldown-input-parser
13
- ```
14
-
15
- The package exposes two helpers:
16
-
17
- * `parse_fold(...)` – mirrors the historical AlphaPulldown helper and performs
18
- feature existence checks.
19
- * `expand_fold_specification(...)` – expands a single fold string without
20
- raising if features are missing.
21
-
22
- The parser is dependency-free and works across AlphaPulldown, the Snakemake
23
- pipeline, or any other tooling that consumes the same fold syntax.