factorforge-cds 3.1.6__tar.gz → 3.1.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {factorforge_cds-3.1.6/src/factorforge_cds.egg-info → factorforge_cds-3.1.8}/PKG-INFO +8 -7
- {factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/README.md +6 -5
- {factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/pyproject.toml +3 -2
- {factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/src/factorforge/__init__.py +1 -1
- {factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/src/factorforge/analysis/feasibility.py +8 -4
- {factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/src/factorforge/analysis/metrics.py +35 -8
- {factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/src/factorforge/data/ntabacum_codons.json +1 -1
- {factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/src/factorforge/engines/__init__.py +1 -1
- {factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/src/factorforge/engines/profile/__init__.py +1 -1
- {factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/src/factorforge/engines/profile/exporter.py +25 -2
- {factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/src/factorforge/engines/profile/optimizer.py +1 -1
- {factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/src/factorforge/engines/profile/rules/reverse_translator.py +34 -10
- {factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/src/factorforge/engines/profile/rules/rule_engine.py +25 -21
- {factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/src/factorforge/engines/profile/scoring.py +77 -27
- {factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/src/factorforge/utils/sequence_validator.py +2 -1
- {factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/src/factorforge/utils/validation.py +3 -1
- {factorforge_cds-3.1.6 → factorforge_cds-3.1.8/src/factorforge_cds.egg-info}/PKG-INFO +8 -7
- {factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/tests/test_sequence_validator.py +2 -1
- {factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/LICENSE +0 -0
- {factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/setup.cfg +0 -0
- {factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/src/factorforge/__main__.py +0 -0
- {factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/src/factorforge/analysis/__init__.py +0 -0
- {factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/src/factorforge/cli/__init__.py +0 -0
- {factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/src/factorforge/cli/legacy_cli.py +0 -0
- {factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/src/factorforge/cli/main.py +0 -0
- {factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/src/factorforge/core/interfaces/__init__.py +0 -0
- {factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/src/factorforge/core/interfaces/exporter.py +0 -0
- {factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/src/factorforge/core/interfaces/optimizer.py +0 -0
- {factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/src/factorforge/core/interfaces/validator.py +0 -0
- {factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/src/factorforge/data/nbenthamiana_codons.json +0 -0
- {factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/src/factorforge/data/nbenthamiana_golden_set.json +0 -0
- {factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/src/factorforge/data/templates/high_expression.json +0 -0
- {factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/src/factorforge/data/templates/standard_expression.json +0 -0
- {factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/src/factorforge/data/wolffia_globosa_codons.json +0 -0
- {factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/src/factorforge/database.py +0 -0
- {factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/src/factorforge/engines/profile/codon_table_builder.py +0 -0
- {factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/src/factorforge/engines/profile/construct_builder.py +0 -0
- {factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/src/factorforge/engines/profile/pipeline.py +0 -0
- {factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/src/factorforge/engines/profile/rules/__init__.py +0 -0
- {factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/src/factorforge/engines/profile/rules/domesticator.py +0 -0
- {factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/src/factorforge/engines/profile/scoring_ml.py +0 -0
- {factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/src/factorforge/engines/profile/utils.py +0 -0
- {factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/src/factorforge/engines/profile/validator.py +0 -0
- {factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/src/factorforge/engines/registry.py +0 -0
- {factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/src/factorforge/schemas/__init__.py +0 -0
- {factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/src/factorforge/schemas/design_package.py +0 -0
- {factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/src/factorforge/schemas/design_package.schema.json +0 -0
- {factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/src/factorforge/utils/__init__.py +0 -0
- {factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/src/factorforge/utils/construct_id.py +0 -0
- {factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/src/factorforge/utils/exceptions.py +0 -0
- {factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/src/factorforge/utils/restriction_sites.py +0 -0
- {factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/src/factorforge/validation/__init__.py +0 -0
- {factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/src/factorforge/validation/cli.py +0 -0
- {factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/src/factorforge/validation/package_generator.py +0 -0
- {factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/src/factorforge_cds.egg-info/SOURCES.txt +0 -0
- {factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/src/factorforge_cds.egg-info/dependency_links.txt +0 -0
- {factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/src/factorforge_cds.egg-info/entry_points.txt +0 -0
- {factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/src/factorforge_cds.egg-info/requires.txt +0 -0
- {factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/src/factorforge_cds.egg-info/top_level.txt +0 -0
- {factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/tests/test_database.py +0 -0
- {factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/tests/test_legacy_cli.py +0 -0
- {factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/tests/test_restriction_sites.py +0 -0
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: factorforge-cds
|
|
3
|
-
Version: 3.1.
|
|
3
|
+
Version: 3.1.8
|
|
4
4
|
Summary: FactorForge - open-source constraint-based CDS design engine by Eijex.
|
|
5
5
|
Author-email: Eijex <eijex.lab@gmail.com>
|
|
6
6
|
License-Expression: AGPL-3.0-only
|
|
7
|
-
Project-URL: Homepage, https://factorforge
|
|
7
|
+
Project-URL: Homepage, https://factorforge.eijex.com
|
|
8
8
|
Project-URL: Repository, https://github.com/eijex/factorforge-cds
|
|
9
9
|
Project-URL: Issues, https://github.com/eijex/factorforge-cds/issues
|
|
10
10
|
Keywords: codon optimization,CDS design,synthetic biology,bioinformatics,Nicotiana benthamiana,Nicotiana tabacum,Tobacco BY-2,constraint optimization,dynamic programming
|
|
@@ -39,7 +39,7 @@ Dynamic: license-file
|
|
|
39
39
|
[](https://github.com/eijex/factorforge-cds/actions/workflows/ci.yml)
|
|
40
40
|
[](https://codecov.io/gh/eijex/factorforge-cds)
|
|
41
41
|
[](https://doi.org/10.5281/zenodo.20407331)
|
|
42
|
-
[](https://factorforge.eijex.com)
|
|
43
43
|
|
|
44
44
|
FactorForge optimizes protein sequences into host-compatible CDS by maximizing CAI, controlling GC content, eliminating PolyA signals, and producing MoClo/Golden Gate-ready constructs. Supports *N. benthamiana* (agroinfiltration) and Tobacco BY-2 (`--host by2`, bioreactor/cGMP workflows).
|
|
45
45
|
|
|
@@ -54,7 +54,7 @@ pip install factorforge-cds
|
|
|
54
54
|
factorforge optimize my_protein.fasta -o output.fasta
|
|
55
55
|
```
|
|
56
56
|
|
|
57
|
-
Or use the **[web app](https://factorforge
|
|
57
|
+
Or use the **[web app](https://factorforge.eijex.com)** — no installation required.
|
|
58
58
|
|
|
59
59
|
---
|
|
60
60
|
|
|
@@ -62,9 +62,10 @@ Or use the **[web app](https://factorforge-cds.vercel.app)** — no installation
|
|
|
62
62
|
|
|
63
63
|
| Method | Description | Link |
|
|
64
64
|
|--------|-------------|------|
|
|
65
|
-
| **Web App** | No installation, demo & light use | [factorforge
|
|
65
|
+
| **Web App** | No installation, demo & light use | [factorforge.eijex.com](https://factorforge.eijex.com) |
|
|
66
66
|
| **CLI / Python** | Local use, batch processing, data privacy | `pip install factorforge-cds` |
|
|
67
67
|
| **Docker** | Full web interface locally | `docker pull ghcr.io/eijex/factorforge-cds:latest` |
|
|
68
|
+
| **Eijex MCP** | AI agent access (Claude Code, Cursor) | [mcp.eijex.com](https://mcp.eijex.com) |
|
|
68
69
|
|
|
69
70
|
---
|
|
70
71
|
|
|
@@ -106,7 +107,7 @@ FactorForge predictions are **in-silico only** and have not been experimentally
|
|
|
106
107
|
## Citing
|
|
107
108
|
|
|
108
109
|
```
|
|
109
|
-
FactorForge v3.1.
|
|
110
|
+
FactorForge v3.1.8 (2026). Open-source constraint-based CDS design engine.
|
|
110
111
|
Eijex. https://github.com/eijex/factorforge-cds
|
|
111
112
|
```
|
|
112
113
|
|
|
@@ -136,4 +137,4 @@ GNU Affero General Public License v3.0 — see [LICENSE](LICENSE).
|
|
|
136
137
|
- **Wet-lab Results** — [Submit via Google Form](https://docs.google.com/forms/d/e/1FAIpQLSeSx-wYvF6YwHhSPdLMl-L44frCugdm25X_eDz50OaqTD66qA/viewform?usp=header) (recommended) or [GitHub Issue](https://github.com/eijex/factorforge-cds/issues/new?template=wet_lab_result.yml)
|
|
137
138
|
- **GitHub Issues** — bugs, features: [github.com/eijex/factorforge-cds/issues](https://github.com/eijex/factorforge-cds/issues)
|
|
138
139
|
- **Email** — eijex.lab@gmail.com
|
|
139
|
-
- **Web** — [factorforge
|
|
140
|
+
- **Web** — [factorforge.eijex.com](https://factorforge.eijex.com)
|
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
[](https://github.com/eijex/factorforge-cds/actions/workflows/ci.yml)
|
|
9
9
|
[](https://codecov.io/gh/eijex/factorforge-cds)
|
|
10
10
|
[](https://doi.org/10.5281/zenodo.20407331)
|
|
11
|
-
[](https://factorforge.eijex.com)
|
|
12
12
|
|
|
13
13
|
FactorForge optimizes protein sequences into host-compatible CDS by maximizing CAI, controlling GC content, eliminating PolyA signals, and producing MoClo/Golden Gate-ready constructs. Supports *N. benthamiana* (agroinfiltration) and Tobacco BY-2 (`--host by2`, bioreactor/cGMP workflows).
|
|
14
14
|
|
|
@@ -23,7 +23,7 @@ pip install factorforge-cds
|
|
|
23
23
|
factorforge optimize my_protein.fasta -o output.fasta
|
|
24
24
|
```
|
|
25
25
|
|
|
26
|
-
Or use the **[web app](https://factorforge
|
|
26
|
+
Or use the **[web app](https://factorforge.eijex.com)** — no installation required.
|
|
27
27
|
|
|
28
28
|
---
|
|
29
29
|
|
|
@@ -31,9 +31,10 @@ Or use the **[web app](https://factorforge-cds.vercel.app)** — no installation
|
|
|
31
31
|
|
|
32
32
|
| Method | Description | Link |
|
|
33
33
|
|--------|-------------|------|
|
|
34
|
-
| **Web App** | No installation, demo & light use | [factorforge
|
|
34
|
+
| **Web App** | No installation, demo & light use | [factorforge.eijex.com](https://factorforge.eijex.com) |
|
|
35
35
|
| **CLI / Python** | Local use, batch processing, data privacy | `pip install factorforge-cds` |
|
|
36
36
|
| **Docker** | Full web interface locally | `docker pull ghcr.io/eijex/factorforge-cds:latest` |
|
|
37
|
+
| **Eijex MCP** | AI agent access (Claude Code, Cursor) | [mcp.eijex.com](https://mcp.eijex.com) |
|
|
37
38
|
|
|
38
39
|
---
|
|
39
40
|
|
|
@@ -75,7 +76,7 @@ FactorForge predictions are **in-silico only** and have not been experimentally
|
|
|
75
76
|
## Citing
|
|
76
77
|
|
|
77
78
|
```
|
|
78
|
-
FactorForge v3.1.
|
|
79
|
+
FactorForge v3.1.8 (2026). Open-source constraint-based CDS design engine.
|
|
79
80
|
Eijex. https://github.com/eijex/factorforge-cds
|
|
80
81
|
```
|
|
81
82
|
|
|
@@ -105,4 +106,4 @@ GNU Affero General Public License v3.0 — see [LICENSE](LICENSE).
|
|
|
105
106
|
- **Wet-lab Results** — [Submit via Google Form](https://docs.google.com/forms/d/e/1FAIpQLSeSx-wYvF6YwHhSPdLMl-L44frCugdm25X_eDz50OaqTD66qA/viewform?usp=header) (recommended) or [GitHub Issue](https://github.com/eijex/factorforge-cds/issues/new?template=wet_lab_result.yml)
|
|
106
107
|
- **GitHub Issues** — bugs, features: [github.com/eijex/factorforge-cds/issues](https://github.com/eijex/factorforge-cds/issues)
|
|
107
108
|
- **Email** — eijex.lab@gmail.com
|
|
108
|
-
- **Web** — [factorforge
|
|
109
|
+
- **Web** — [factorforge.eijex.com](https://factorforge.eijex.com)
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "factorforge-cds"
|
|
7
|
-
version = "3.1.
|
|
7
|
+
version = "3.1.8"
|
|
8
8
|
description = "FactorForge - open-source constraint-based CDS design engine by Eijex."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = "AGPL-3.0-only"
|
|
@@ -39,7 +39,7 @@ ml = [
|
|
|
39
39
|
]
|
|
40
40
|
|
|
41
41
|
[project.urls]
|
|
42
|
-
Homepage = "https://factorforge
|
|
42
|
+
Homepage = "https://factorforge.eijex.com"
|
|
43
43
|
Repository = "https://github.com/eijex/factorforge-cds"
|
|
44
44
|
Issues = "https://github.com/eijex/factorforge-cds/issues"
|
|
45
45
|
|
|
@@ -56,6 +56,7 @@ where = ["src"]
|
|
|
56
56
|
[tool.pytest.ini_options]
|
|
57
57
|
testpaths = ["tests"]
|
|
58
58
|
norecursedirs = ["archive"]
|
|
59
|
+
pythonpath = ["src"]
|
|
59
60
|
|
|
60
61
|
[tool.ruff]
|
|
61
62
|
line-length = 100
|
|
@@ -88,9 +88,9 @@ def _reconstruct_sequence(
|
|
|
88
88
|
def analyze_feasibility(
|
|
89
89
|
protein_sequence: str,
|
|
90
90
|
codon_weights: dict[str, float],
|
|
91
|
-
target_cai: float = 0.
|
|
92
|
-
target_gc_low: float =
|
|
93
|
-
target_gc_high: float =
|
|
91
|
+
target_cai: float = 0.82,
|
|
92
|
+
target_gc_low: float = 55.0,
|
|
93
|
+
target_gc_high: float = 65.0,
|
|
94
94
|
gc_ranges: list[tuple[float, float]] | None = None,
|
|
95
95
|
) -> dict[str, Any]:
|
|
96
96
|
"""Compute exact CAI/GC feasibility over synonymous codon choices.
|
|
@@ -98,12 +98,16 @@ def analyze_feasibility(
|
|
|
98
98
|
The dynamic program keeps the best log-CAI sequence for each reachable
|
|
99
99
|
global GC count. This is exact for global GC and CAI under the supplied
|
|
100
100
|
codon weights.
|
|
101
|
+
|
|
102
|
+
Defaults calibrated to nbenthamiana profile engine output distribution
|
|
103
|
+
(analysis 004, n=49): avg CAI=0.76, avg GC=60.1% (range 55-71%).
|
|
104
|
+
target_cai=0.82 aligns with industry practice (>0.8) and is achievable.
|
|
101
105
|
"""
|
|
102
106
|
protein = "".join(protein_sequence.upper().split()).rstrip("*")
|
|
103
107
|
if not protein:
|
|
104
108
|
raise ValueError("protein_sequence must not be empty")
|
|
105
109
|
|
|
106
|
-
ranges = gc_ranges or [(
|
|
110
|
+
ranges = gc_ranges or [(55.0, 65.0), (50.0, 65.0), (40.0, 65.0)]
|
|
107
111
|
normalized_ranges = [
|
|
108
112
|
(_normalize_gc_bound(low), _normalize_gc_bound(high)) for low, high in ranges
|
|
109
113
|
]
|
|
@@ -11,6 +11,12 @@ from typing import Any
|
|
|
11
11
|
|
|
12
12
|
from factorforge.engines.profile.utils import get_data_path
|
|
13
13
|
|
|
14
|
+
# Homopolymer thresholds — two distinct concerns, intentionally different values.
|
|
15
|
+
# Expression stability: AT-rich runs ≥6 nt can resemble instability elements (ARE).
|
|
16
|
+
# Synthesis/manufacturing: runs ≥8 nt are flagged by gene synthesis vendors as difficult.
|
|
17
|
+
HOMOPOLYMER_EXPRESSION_WARN_NT = 6
|
|
18
|
+
HOMOPOLYMER_SYNTHESIS_WARN_NT = 8
|
|
19
|
+
|
|
14
20
|
|
|
15
21
|
STANDARD_GENETIC_CODE: dict[str, str] = {
|
|
16
22
|
"TTT": "F",
|
|
@@ -286,8 +292,19 @@ def codon_usage_profile(sequence: str) -> dict[str, dict[str, float | int | str]
|
|
|
286
292
|
return profile
|
|
287
293
|
|
|
288
294
|
|
|
289
|
-
def detect_homopolymers(
|
|
290
|
-
|
|
295
|
+
def detect_homopolymers(
|
|
296
|
+
sequence: str,
|
|
297
|
+
max_run: int = HOMOPOLYMER_EXPRESSION_WARN_NT,
|
|
298
|
+
) -> list[dict[str, Any]]:
|
|
299
|
+
"""Detect homopolymer runs for expression stability evaluation.
|
|
300
|
+
|
|
301
|
+
Uses HOMOPOLYMER_EXPRESSION_WARN_NT (default 6 nt) — AT-rich runs of this
|
|
302
|
+
length can resemble AU-rich instability elements (ARE) and affect mRNA
|
|
303
|
+
stability in plant expression systems.
|
|
304
|
+
|
|
305
|
+
For synthesis/manufacturing risk, see RuleEngine.scan_homopolymers()
|
|
306
|
+
which uses HOMOPOLYMER_SYNTHESIS_WARN_NT (8 nt).
|
|
307
|
+
"""
|
|
291
308
|
if max_run <= 1:
|
|
292
309
|
raise ValueError("max_run must be > 1")
|
|
293
310
|
|
|
@@ -303,17 +320,27 @@ def detect_homopolymers(sequence: str, max_run: int = 6) -> list[dict[str, Any]]
|
|
|
303
320
|
continue
|
|
304
321
|
run_length = index - run_start
|
|
305
322
|
if run_length >= max_run:
|
|
306
|
-
findings.append(
|
|
307
|
-
|
|
308
|
-
|
|
323
|
+
findings.append({
|
|
324
|
+
"start": run_start,
|
|
325
|
+
"end": index,
|
|
326
|
+
"base": run_base,
|
|
327
|
+
"length": run_length,
|
|
328
|
+
"context": "expression_stability",
|
|
329
|
+
"threshold_nt": max_run,
|
|
330
|
+
})
|
|
309
331
|
run_base = base
|
|
310
332
|
run_start = index
|
|
311
333
|
|
|
312
334
|
run_length = len(seq) - run_start
|
|
313
335
|
if run_length >= max_run:
|
|
314
|
-
findings.append(
|
|
315
|
-
|
|
316
|
-
|
|
336
|
+
findings.append({
|
|
337
|
+
"start": run_start,
|
|
338
|
+
"end": len(seq),
|
|
339
|
+
"base": run_base,
|
|
340
|
+
"length": run_length,
|
|
341
|
+
"context": "expression_stability",
|
|
342
|
+
"threshold_nt": max_run,
|
|
343
|
+
})
|
|
317
344
|
return findings
|
|
318
345
|
|
|
319
346
|
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"organism": "Nicotiana tabacum",
|
|
3
3
|
"source": "Kazusa Codon Usage Database (species 4097); 1,534 CDS, 609,684 codons, GenBank release 160 (2007)",
|
|
4
|
-
"description": "N. tabacum codon usage for BY-2 suspension culture optimization. Kazusa 2007 data
|
|
4
|
+
"description": "N. tabacum codon usage for BY-2 suspension culture optimization. Kazusa 2007 data. N. tabacum and N. benthamiana share highly similar codon preferences within the Nicotiana genus. Experimental host profile — not wet-lab validated for BY-2 expression.",
|
|
5
5
|
"codons": {
|
|
6
6
|
"TTT": {
|
|
7
7
|
"aa": "F",
|
|
@@ -6,8 +6,10 @@ GenBank and FASTA export module (P0-5)
|
|
|
6
6
|
from __future__ import annotations
|
|
7
7
|
|
|
8
8
|
import hashlib
|
|
9
|
+
import json
|
|
9
10
|
from datetime import datetime
|
|
10
11
|
from io import StringIO
|
|
12
|
+
from pathlib import Path
|
|
11
13
|
from typing import Any
|
|
12
14
|
|
|
13
15
|
|
|
@@ -25,6 +27,26 @@ class SequenceExporter:
|
|
|
25
27
|
"""Initialize"""
|
|
26
28
|
pass
|
|
27
29
|
|
|
30
|
+
def host_species(self, metadata: dict[str, Any]) -> str:
|
|
31
|
+
"""Resolve host species from feature_registry.json when possible."""
|
|
32
|
+
if metadata.get("organism"):
|
|
33
|
+
return str(metadata["organism"])
|
|
34
|
+
|
|
35
|
+
host = str(
|
|
36
|
+
metadata.get("host_profile") or metadata.get("host") or "nbenthamiana"
|
|
37
|
+
).strip().lower()
|
|
38
|
+
host_aliases = {"ntabacum": "by2"}
|
|
39
|
+
registry_key = host_aliases.get(host, host)
|
|
40
|
+
registry_path = Path(__file__).resolve().parents[4] / "scripts" / "feature_registry.json"
|
|
41
|
+
|
|
42
|
+
try:
|
|
43
|
+
registry = json.loads(registry_path.read_text(encoding="utf-8"))
|
|
44
|
+
except (OSError, json.JSONDecodeError):
|
|
45
|
+
registry = {}
|
|
46
|
+
|
|
47
|
+
species = registry.get("hosts", {}).get(registry_key, {}).get("species")
|
|
48
|
+
return str(species or "Nicotiana benthamiana")
|
|
49
|
+
|
|
28
50
|
def generate_run_id(self, sequence: str, params: dict[str, Any]) -> str:
|
|
29
51
|
"""
|
|
30
52
|
Generate a reproducible run_id
|
|
@@ -69,6 +91,7 @@ class SequenceExporter:
|
|
|
69
91
|
"run_id": "abc12345",
|
|
70
92
|
"timestamp": "2026-01-22T12:00:00",
|
|
71
93
|
"organism": "Nicotiana benthamiana",
|
|
94
|
+
"host_profile": "nbenthamiana",
|
|
72
95
|
"gene_name": "GFP",
|
|
73
96
|
"violations_fixed": [...],
|
|
74
97
|
"warnings": [...]
|
|
@@ -99,7 +122,7 @@ class SequenceExporter:
|
|
|
99
122
|
run_id = metadata.get("run_id", self.generate_run_id(sequence, metadata))
|
|
100
123
|
timestamp = metadata.get("timestamp", datetime.now().strftime("%Y%m%d"))
|
|
101
124
|
gene_name = metadata.get("gene_name", "optimized_gene")
|
|
102
|
-
organism =
|
|
125
|
+
organism = self.host_species(metadata)
|
|
103
126
|
|
|
104
127
|
# Build locus ID
|
|
105
128
|
locus_id = f"PFORM_{run_id}_{timestamp}"
|
|
@@ -355,7 +378,7 @@ class SequenceExporter:
|
|
|
355
378
|
"--- Optimization Settings ---",
|
|
356
379
|
f"Profile: {metadata.get('profile', 'N/A')}",
|
|
357
380
|
f"Assembly Standard: {metadata.get('assembly_standard', 'None')}",
|
|
358
|
-
f"Organism: {
|
|
381
|
+
f"Organism: {self.host_species(metadata)}",
|
|
359
382
|
"",
|
|
360
383
|
]
|
|
361
384
|
|
|
@@ -15,7 +15,7 @@ from enum import Enum
|
|
|
15
15
|
from pathlib import Path
|
|
16
16
|
from typing import Any, cast
|
|
17
17
|
|
|
18
|
-
from factorforge.engines.profile.scoring import calculate_composite_score
|
|
18
|
+
from factorforge.engines.profile.scoring import GC_OPT_MID, calculate_composite_score
|
|
19
19
|
from factorforge.engines.profile.utils import (
|
|
20
20
|
build_aa_to_codons_map,
|
|
21
21
|
calculate_gc,
|
|
@@ -440,14 +440,22 @@ class ReverseTranslator:
|
|
|
440
440
|
return "".join(dna_seq)
|
|
441
441
|
|
|
442
442
|
def _gc_target_translate(self, protein_seq: str, **kwargs: Any) -> str:
|
|
443
|
-
"""
|
|
444
|
-
|
|
443
|
+
"""GC-Target profile: drive global GC toward a configurable target.
|
|
444
|
+
|
|
445
|
+
Targets the caller-supplied ``target_gc`` if provided, otherwise the
|
|
446
|
+
host-profile GC midpoint (GC_OPT_MID = 60% for N. benthamiana). To target
|
|
447
|
+
a lower GC (e.g. for specific vector requirements), pass target_gc explicitly.
|
|
445
448
|
|
|
446
449
|
- GC constraint first
|
|
447
450
|
- CAI may be sacrificed
|
|
448
451
|
- Balance local window GC (50 bp)
|
|
452
|
+
|
|
453
|
+
TODO: GC_OPT_MID is currently a single N. benthamiana-calibrated constant.
|
|
454
|
+
When per-host GC profiles are added, source the default from the active host.
|
|
449
455
|
"""
|
|
450
|
-
target_gc = kwargs.get("target_gc"
|
|
456
|
+
target_gc = kwargs.get("target_gc")
|
|
457
|
+
if target_gc is None:
|
|
458
|
+
target_gc = GC_OPT_MID
|
|
451
459
|
|
|
452
460
|
dna_seq: list[str] = []
|
|
453
461
|
|
|
@@ -477,11 +485,22 @@ class ReverseTranslator:
|
|
|
477
485
|
return "".join(dna_seq)
|
|
478
486
|
|
|
479
487
|
def _assembly_friendly_translate(self, protein_seq: str, **kwargs: Any) -> str:
|
|
480
|
-
"""
|
|
481
|
-
|
|
488
|
+
"""Translate for Golden Gate / MoClo assembly compatibility.
|
|
489
|
+
|
|
490
|
+
Strategy:
|
|
491
|
+
- Starts from balanced codon selection (preferred_ratio=0.6)
|
|
492
|
+
- Retries up to max_attempts times until no BsaI/BpiI Type IIS
|
|
493
|
+
restriction sites remain in the CDS (forward + reverse complement)
|
|
494
|
+
- CAI trade-offs are accepted to achieve site-free sequences
|
|
482
495
|
|
|
483
|
-
|
|
484
|
-
-
|
|
496
|
+
Current scope:
|
|
497
|
+
- Supported: BsaI/BpiI site avoidance via stochastic retry
|
|
498
|
+
- Not yet implemented: local GC window uniformity scoring,
|
|
499
|
+
repeat-pattern penalties, synthesis vendor constraint profiles
|
|
500
|
+
|
|
501
|
+
Args:
|
|
502
|
+
protein_seq: Amino acid sequence.
|
|
503
|
+
max_attempts: Retry limit for site removal (default: 10).
|
|
485
504
|
"""
|
|
486
505
|
max_attempts = kwargs.get("max_attempts", 10)
|
|
487
506
|
if max_attempts < 1:
|
|
@@ -529,13 +548,18 @@ class ReverseTranslator:
|
|
|
529
548
|
return self._apply_nterminal_ramp(dna_seq, protein_seq, ramp_codons=ramp_codons)
|
|
530
549
|
|
|
531
550
|
def _apply_nterminal_ramp(self, dna_seq: str, protein_seq: str, ramp_codons: int = 50) -> str:
|
|
532
|
-
"""
|
|
533
|
-
Apply N-terminal codon ramp for co-translational folding.
|
|
551
|
+
"""Apply N-terminal codon ramp for co-translational folding.
|
|
534
552
|
|
|
535
553
|
Replaces the first `ramp_codons` codons with lower-frequency synonymous
|
|
536
554
|
codons (bottom 50% by frequency) to slow the ribosome at the N-terminus.
|
|
537
555
|
Single-codon amino acids (Met, Trp) are left unchanged.
|
|
538
556
|
|
|
557
|
+
TODO: ramp profile is currently not in VALID_PROFILES (not publicly accessible).
|
|
558
|
+
Before re-enabling, revisit ramp_codons=50:
|
|
559
|
+
- Literature suggests 10–30 codons (Tuller et al. 2010, Chu et al. 2014).
|
|
560
|
+
- For short proteins, 50 codons can cover the entire CDS.
|
|
561
|
+
- Consider: ramp_len = min(30, max(10, int(protein_length * 0.15)))
|
|
562
|
+
|
|
539
563
|
Args:
|
|
540
564
|
dna_seq: Full-length DNA sequence.
|
|
541
565
|
protein_seq: Original protein sequence (same length as dna_seq/3).
|
{factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/src/factorforge/engines/profile/rules/rule_engine.py
RENAMED
|
@@ -11,6 +11,7 @@ import math
|
|
|
11
11
|
import re
|
|
12
12
|
from typing import Any
|
|
13
13
|
|
|
14
|
+
from factorforge.analysis.metrics import HOMOPOLYMER_SYNTHESIS_WARN_NT
|
|
14
15
|
from factorforge.engines.profile.rules.reverse_translator import ReverseTranslator
|
|
15
16
|
from factorforge.engines.profile.utils import (
|
|
16
17
|
build_aa_to_codons_map,
|
|
@@ -246,24 +247,27 @@ class RuleEngine:
|
|
|
246
247
|
|
|
247
248
|
return violations
|
|
248
249
|
|
|
249
|
-
def scan_homopolymers(
|
|
250
|
-
|
|
251
|
-
|
|
250
|
+
def scan_homopolymers(
|
|
251
|
+
self, seq: str, min_length: int = HOMOPOLYMER_SYNTHESIS_WARN_NT
|
|
252
|
+
) -> list[dict[str, Any]]:
|
|
253
|
+
"""Detect homopolymer runs for synthesis/manufacturing risk evaluation.
|
|
252
254
|
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
255
|
+
Uses HOMOPOLYMER_SYNTHESIS_WARN_NT (default 8 nt) — the threshold at
|
|
256
|
+
which gene synthesis vendors flag homopolymers as difficult to synthesize
|
|
257
|
+
with high fidelity.
|
|
256
258
|
|
|
257
|
-
|
|
258
|
-
|
|
259
|
+
For expression stability risk (≥6 nt), see
|
|
260
|
+
factorforge.analysis.metrics.detect_homopolymers() which uses
|
|
261
|
+
HOMOPOLYMER_EXPRESSION_WARN_NT.
|
|
259
262
|
|
|
260
|
-
|
|
261
|
-
|
|
263
|
+
Args:
|
|
264
|
+
seq: DNA sequence
|
|
265
|
+
min_length: Minimum run length to flag (default: HOMOPOLYMER_SYNTHESIS_WARN_NT)
|
|
262
266
|
|
|
263
267
|
Examples:
|
|
264
268
|
>>> engine = RuleEngine()
|
|
265
269
|
>>> engine.scan_homopolymers("AAAAAAAA", min_length=8)
|
|
266
|
-
[{'type': 'homopolymer', ...}]
|
|
270
|
+
[{'type': 'homopolymer', 'context': 'synthesis', ...}]
|
|
267
271
|
"""
|
|
268
272
|
violations: list[dict[str, Any]] = []
|
|
269
273
|
|
|
@@ -280,16 +284,16 @@ class RuleEngine:
|
|
|
280
284
|
while idx + actual_length < len(seq) and seq[idx + actual_length] == base:
|
|
281
285
|
actual_length += 1
|
|
282
286
|
|
|
283
|
-
violations.append(
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
)
|
|
287
|
+
violations.append({
|
|
288
|
+
"type": "homopolymer",
|
|
289
|
+
"context": "synthesis",
|
|
290
|
+
"threshold_nt": min_length,
|
|
291
|
+
"base": base,
|
|
292
|
+
"position": idx,
|
|
293
|
+
"length": actual_length,
|
|
294
|
+
"sequence": base * actual_length,
|
|
295
|
+
"severity": "high" if actual_length >= 10 else "medium",
|
|
296
|
+
})
|
|
293
297
|
pos = idx + actual_length
|
|
294
298
|
|
|
295
299
|
return violations
|
|
@@ -11,13 +11,16 @@ from typing import Any
|
|
|
11
11
|
|
|
12
12
|
logger = logging.getLogger(__name__)
|
|
13
13
|
|
|
14
|
-
#
|
|
14
|
+
# GC band for N. benthamiana codon-optimized sequences.
|
|
15
15
|
# Benchmark (analysis 004, n=49): balanced profile output average GC% = 60.1%
|
|
16
16
|
# (range 55-71%). The genome-wide average (~42%) reflects all genes, not the
|
|
17
17
|
# high-expression codon table which exhibits 3rd-position GC bias.
|
|
18
|
+
# These constants define the acceptable band — sequences within [GC_OPT_MIN, GC_OPT_MAX]
|
|
19
|
+
# receive full GC score; outside the band the score decays linearly.
|
|
18
20
|
GC_OPT_MIN = 55.0
|
|
19
21
|
GC_OPT_MAX = 65.0
|
|
20
|
-
GC_OPT_MID = 60.0
|
|
22
|
+
GC_OPT_MID = 60.0 # kept for gc_target point-scoring and viral_delivery centering
|
|
23
|
+
GC_DECAY_WIDTH = 20.0 # percentage points outside band before score reaches 0.0
|
|
21
24
|
|
|
22
25
|
# ViennaRNA availability cache
|
|
23
26
|
_vienna_available: bool | None = None
|
|
@@ -32,7 +35,11 @@ class ScoringConfig:
|
|
|
32
35
|
w_mfe: float = 0.2
|
|
33
36
|
w_dinuc: float = 0.0 # CpG/TpA dinucleotide penalty (opt-in, default off)
|
|
34
37
|
w_syncodonlm: float = 0.0 # SynCodonLM quality score (opt-in, default off)
|
|
35
|
-
gc_opt: float = GC_OPT_MID
|
|
38
|
+
gc_opt: float = GC_OPT_MID # no longer used by calculate_composite_score (superseded by
|
|
39
|
+
# gc_min/gc_max band); retained for external API compatibility
|
|
40
|
+
gc_min: float = GC_OPT_MIN # acceptable band lower boundary
|
|
41
|
+
gc_max: float = GC_OPT_MAX # acceptable band upper boundary
|
|
42
|
+
gc_decay_width: float = GC_DECAY_WIDTH # % points outside band before score → 0
|
|
36
43
|
use_mfe: bool = True
|
|
37
44
|
|
|
38
45
|
def __post_init__(self) -> None:
|
|
@@ -61,22 +68,28 @@ class ScoringConfig:
|
|
|
61
68
|
|
|
62
69
|
# Pre-defined scoring configs per optimization profile
|
|
63
70
|
PROFILE_SCORING_CONFIGS: dict[str, ScoringConfig] = {
|
|
64
|
-
"balanced": ScoringConfig(w_cai=0.5, w_gc=0.3, w_mfe=0.2
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
"
|
|
69
|
-
#
|
|
70
|
-
#
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
71
|
+
"balanced": ScoringConfig(w_cai=0.5, w_gc=0.3, w_mfe=0.2),
|
|
72
|
+
# high_cai: CAI 1.0 mimics naturally high-expression N. benthamiana genes (golden_set).
|
|
73
|
+
# For very long or structurally complex proteins under extreme agroinfiltration overexpression,
|
|
74
|
+
# consider the ramp profile instead (N-terminal codon deoptimization reduces ribosome stalling).
|
|
75
|
+
"high_cai": ScoringConfig(w_cai=0.8, w_gc=0.1, w_mfe=0.1),
|
|
76
|
+
# gc_target: gc_min/gc_max are overridden dynamically from target_gc kwarg in
|
|
77
|
+
# calculate_composite_score — the config defaults here are not used for band scoring.
|
|
78
|
+
"gc_target": ScoringConfig(w_cai=0.1, w_gc=0.7, w_mfe=0.2),
|
|
79
|
+
# assembly_friendly: CAI pressure reduced vs balanced; GC/MFE weights raised to
|
|
80
|
+
# reflect Type IIS restriction-site avoidance priority (Golden Gate / MoClo).
|
|
81
|
+
# w_gc scores GC band compliance (global GC%), NOT local GC uniformity.
|
|
82
|
+
# Window-level GC variance and repeat-pattern penalties are not yet implemented.
|
|
83
|
+
"assembly_friendly": ScoringConfig(w_cai=0.3, w_gc=0.4, w_mfe=0.3),
|
|
84
|
+
"ramp": ScoringConfig(w_cai=0.4, w_gc=0.3, w_mfe=0.3),
|
|
85
|
+
# TRV viral-delivery profile — GC band centered on TRV genome composition (~47.5%).
|
|
86
|
+
# MFE weighted at 0.30 (Peccoud et al. 2024, PMC11718241).
|
|
87
|
+
"viral_delivery": ScoringConfig(
|
|
88
|
+
w_cai=0.35, w_gc=0.35, w_mfe=0.30,
|
|
89
|
+
gc_opt=47.5, gc_min=37.5, gc_max=57.5,
|
|
90
|
+
use_mfe=True,
|
|
79
91
|
),
|
|
92
|
+
"ml_enhanced": ScoringConfig(w_cai=0.35, w_gc=0.25, w_mfe=0.15, w_syncodonlm=0.25),
|
|
80
93
|
}
|
|
81
94
|
|
|
82
95
|
|
|
@@ -144,6 +157,36 @@ def normalize_mfe(mfe: float, seq_length: int) -> float:
|
|
|
144
157
|
return 1.0 + (clamped / 0.5)
|
|
145
158
|
|
|
146
159
|
|
|
160
|
+
def gc_band_score(
|
|
161
|
+
gc: float,
|
|
162
|
+
gc_min: float,
|
|
163
|
+
gc_max: float,
|
|
164
|
+
decay_width: float = GC_DECAY_WIDTH,
|
|
165
|
+
) -> float:
|
|
166
|
+
"""Score GC content against an acceptable band.
|
|
167
|
+
|
|
168
|
+
Returns 1.0 inside [gc_min, gc_max]; linearly decays to 0.0 after
|
|
169
|
+
decay_width percentage points outside the band.
|
|
170
|
+
|
|
171
|
+
Args:
|
|
172
|
+
gc: GC content percentage (0-100).
|
|
173
|
+
gc_min: Lower boundary of acceptable band.
|
|
174
|
+
gc_max: Upper boundary of acceptable band.
|
|
175
|
+
decay_width: Percentage points outside band before score reaches 0.0.
|
|
176
|
+
|
|
177
|
+
Examples:
|
|
178
|
+
gc_min=55, gc_max=65, decay_width=20:
|
|
179
|
+
gc=60 → 1.00 (inside band)
|
|
180
|
+
gc=70 → 0.75 (5 pts above gc_max)
|
|
181
|
+
gc=80 → 0.25 (15 pts above gc_max)
|
|
182
|
+
gc=85 → 0.00 (20 pts above gc_max)
|
|
183
|
+
"""
|
|
184
|
+
if gc_min <= gc <= gc_max:
|
|
185
|
+
return 1.0
|
|
186
|
+
distance = (gc_min - gc) if gc < gc_min else (gc - gc_max)
|
|
187
|
+
return max(0.0, 1.0 - distance / decay_width)
|
|
188
|
+
|
|
189
|
+
|
|
147
190
|
def calculate_dinucleotide_score(sequence: str) -> float:
|
|
148
191
|
"""Calculate a dinucleotide avoidance score (0-1, higher = fewer CpG/TpA).
|
|
149
192
|
|
|
@@ -179,19 +222,23 @@ def calculate_composite_score(
|
|
|
179
222
|
profile: str | None = None,
|
|
180
223
|
**kwargs: Any,
|
|
181
224
|
) -> float:
|
|
182
|
-
"""
|
|
183
|
-
Calculate multidimensional composite score.
|
|
225
|
+
"""Calculate multidimensional composite score.
|
|
184
226
|
|
|
185
|
-
S = w1*CAI + w2*
|
|
227
|
+
S = w1*CAI + w2*gc_band_score + w3*MFE_norm
|
|
186
228
|
+ w4*dinuc_score + w5*SynCodonLM_score
|
|
187
229
|
|
|
230
|
+
GC scoring uses a band function: sequences inside [gc_min, gc_max] receive
|
|
231
|
+
full score (1.0); outside the band the score decays linearly to 0.0 over
|
|
232
|
+
gc_decay_width percentage points. For gc_target profile, the band is
|
|
233
|
+
centred on the caller-supplied target_gc (±5 pp).
|
|
234
|
+
|
|
188
235
|
Args:
|
|
189
236
|
cai: Codon Adaptation Index (0-1).
|
|
190
237
|
gc: GC content percentage (0-100).
|
|
191
238
|
sequence: DNA sequence for optional MFE, dinucleotide, and SynCodonLM calculation.
|
|
192
239
|
config: Explicit ScoringConfig. Overrides profile.
|
|
193
240
|
profile: Profile name for preset config lookup.
|
|
194
|
-
**kwargs:
|
|
241
|
+
**kwargs: target_gc (float) — point target for gc_target profile.
|
|
195
242
|
|
|
196
243
|
Returns:
|
|
197
244
|
Composite score (0-1).
|
|
@@ -203,14 +250,17 @@ def calculate_composite_score(
|
|
|
203
250
|
if config is None:
|
|
204
251
|
config = PROFILE_SCORING_CONFIGS["balanced"]
|
|
205
252
|
|
|
206
|
-
# Allow target_gc override for gc_target profile
|
|
207
|
-
gc_opt = float(kwargs.get("target_gc", config.gc_opt))
|
|
208
|
-
|
|
209
253
|
# Component 1: CAI (already 0-1)
|
|
210
254
|
cai_score = max(0.0, min(1.0, cai))
|
|
211
255
|
|
|
212
|
-
# Component 2: GC
|
|
213
|
-
|
|
256
|
+
# Component 2: GC band scoring
|
|
257
|
+
# gc_target profile: caller supplies target_gc; use a ±5 pp band around it.
|
|
258
|
+
# All other profiles: use the band defined in ScoringConfig (gc_min/gc_max).
|
|
259
|
+
if "target_gc" in kwargs:
|
|
260
|
+
tgt = float(kwargs["target_gc"])
|
|
261
|
+
gc_score = gc_band_score(gc, tgt - 5.0, tgt + 5.0, config.gc_decay_width)
|
|
262
|
+
else:
|
|
263
|
+
gc_score = gc_band_score(gc, config.gc_min, config.gc_max, config.gc_decay_width)
|
|
214
264
|
|
|
215
265
|
# Component 3: MFE (optional)
|
|
216
266
|
mfe_score = 0.5 # neutral default
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
from typing import Literal, Tuple
|
|
4
4
|
|
|
5
|
-
from factorforge.analysis.metrics import detect_invalid_codons, translate_dna
|
|
5
|
+
from factorforge.analysis.metrics import amino_acid_identity, detect_invalid_codons, translate_dna
|
|
6
6
|
|
|
7
7
|
from .exceptions import SequenceValidationError
|
|
8
8
|
|
|
@@ -186,4 +186,5 @@ def validate_cds_output(input_protein: str, dna_sequence: str) -> dict[str, obje
|
|
|
186
186
|
return {
|
|
187
187
|
"passed": not errors,
|
|
188
188
|
"errors": errors,
|
|
189
|
+
"aa_identity": amino_acid_identity(expected, seq),
|
|
189
190
|
}
|
|
@@ -5,6 +5,7 @@ from __future__ import annotations
|
|
|
5
5
|
from typing import Any
|
|
6
6
|
|
|
7
7
|
from factorforge.analysis.metrics import (
|
|
8
|
+
HOMOPOLYMER_EXPRESSION_WARN_NT,
|
|
8
9
|
amino_acid_identity,
|
|
9
10
|
calculate_first_region_gc,
|
|
10
11
|
calculate_gc,
|
|
@@ -24,7 +25,8 @@ DEFAULT_CONFIG: dict[str, Any] = {
|
|
|
24
25
|
"gc_window_step": 30,
|
|
25
26
|
"forbidden_motifs": [],
|
|
26
27
|
"fail_forbidden_motifs": False,
|
|
27
|
-
|
|
28
|
+
# Expression-stability threshold (see HOMOPOLYMER_EXPRESSION_WARN_NT in metrics).
|
|
29
|
+
"homopolymer_max_run": HOMOPOLYMER_EXPRESSION_WARN_NT,
|
|
28
30
|
}
|
|
29
31
|
|
|
30
32
|
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: factorforge-cds
|
|
3
|
-
Version: 3.1.
|
|
3
|
+
Version: 3.1.8
|
|
4
4
|
Summary: FactorForge - open-source constraint-based CDS design engine by Eijex.
|
|
5
5
|
Author-email: Eijex <eijex.lab@gmail.com>
|
|
6
6
|
License-Expression: AGPL-3.0-only
|
|
7
|
-
Project-URL: Homepage, https://factorforge
|
|
7
|
+
Project-URL: Homepage, https://factorforge.eijex.com
|
|
8
8
|
Project-URL: Repository, https://github.com/eijex/factorforge-cds
|
|
9
9
|
Project-URL: Issues, https://github.com/eijex/factorforge-cds/issues
|
|
10
10
|
Keywords: codon optimization,CDS design,synthetic biology,bioinformatics,Nicotiana benthamiana,Nicotiana tabacum,Tobacco BY-2,constraint optimization,dynamic programming
|
|
@@ -39,7 +39,7 @@ Dynamic: license-file
|
|
|
39
39
|
[](https://github.com/eijex/factorforge-cds/actions/workflows/ci.yml)
|
|
40
40
|
[](https://codecov.io/gh/eijex/factorforge-cds)
|
|
41
41
|
[](https://doi.org/10.5281/zenodo.20407331)
|
|
42
|
-
[](https://factorforge.eijex.com)
|
|
43
43
|
|
|
44
44
|
FactorForge optimizes protein sequences into host-compatible CDS by maximizing CAI, controlling GC content, eliminating PolyA signals, and producing MoClo/Golden Gate-ready constructs. Supports *N. benthamiana* (agroinfiltration) and Tobacco BY-2 (`--host by2`, bioreactor/cGMP workflows).
|
|
45
45
|
|
|
@@ -54,7 +54,7 @@ pip install factorforge-cds
|
|
|
54
54
|
factorforge optimize my_protein.fasta -o output.fasta
|
|
55
55
|
```
|
|
56
56
|
|
|
57
|
-
Or use the **[web app](https://factorforge
|
|
57
|
+
Or use the **[web app](https://factorforge.eijex.com)** — no installation required.
|
|
58
58
|
|
|
59
59
|
---
|
|
60
60
|
|
|
@@ -62,9 +62,10 @@ Or use the **[web app](https://factorforge-cds.vercel.app)** — no installation
|
|
|
62
62
|
|
|
63
63
|
| Method | Description | Link |
|
|
64
64
|
|--------|-------------|------|
|
|
65
|
-
| **Web App** | No installation, demo & light use | [factorforge
|
|
65
|
+
| **Web App** | No installation, demo & light use | [factorforge.eijex.com](https://factorforge.eijex.com) |
|
|
66
66
|
| **CLI / Python** | Local use, batch processing, data privacy | `pip install factorforge-cds` |
|
|
67
67
|
| **Docker** | Full web interface locally | `docker pull ghcr.io/eijex/factorforge-cds:latest` |
|
|
68
|
+
| **Eijex MCP** | AI agent access (Claude Code, Cursor) | [mcp.eijex.com](https://mcp.eijex.com) |
|
|
68
69
|
|
|
69
70
|
---
|
|
70
71
|
|
|
@@ -106,7 +107,7 @@ FactorForge predictions are **in-silico only** and have not been experimentally
|
|
|
106
107
|
## Citing
|
|
107
108
|
|
|
108
109
|
```
|
|
109
|
-
FactorForge v3.1.
|
|
110
|
+
FactorForge v3.1.8 (2026). Open-source constraint-based CDS design engine.
|
|
110
111
|
Eijex. https://github.com/eijex/factorforge-cds
|
|
111
112
|
```
|
|
112
113
|
|
|
@@ -136,4 +137,4 @@ GNU Affero General Public License v3.0 — see [LICENSE](LICENSE).
|
|
|
136
137
|
- **Wet-lab Results** — [Submit via Google Form](https://docs.google.com/forms/d/e/1FAIpQLSeSx-wYvF6YwHhSPdLMl-L44frCugdm25X_eDz50OaqTD66qA/viewform?usp=header) (recommended) or [GitHub Issue](https://github.com/eijex/factorforge-cds/issues/new?template=wet_lab_result.yml)
|
|
137
138
|
- **GitHub Issues** — bugs, features: [github.com/eijex/factorforge-cds/issues](https://github.com/eijex/factorforge-cds/issues)
|
|
138
139
|
- **Email** — eijex.lab@gmail.com
|
|
139
|
-
- **Web** — [factorforge
|
|
140
|
+
- **Web** — [factorforge.eijex.com](https://factorforge.eijex.com)
|
|
@@ -66,7 +66,7 @@ def test_validate_protein_sequence():
|
|
|
66
66
|
def test_validate_cds_output_passes_normal_cds():
|
|
67
67
|
result = validate_cds_output("MAF", "ATGGCTTTC")
|
|
68
68
|
|
|
69
|
-
assert result == {"passed": True, "errors": []}
|
|
69
|
+
assert result == {"passed": True, "errors": [], "aa_identity": 1.0}
|
|
70
70
|
|
|
71
71
|
|
|
72
72
|
def test_validate_cds_output_fails_internal_stop():
|
|
@@ -80,6 +80,7 @@ def test_validate_cds_output_fails_aa_mismatch():
|
|
|
80
80
|
result = validate_cds_output("MAF", "ATGGCTTAC")
|
|
81
81
|
|
|
82
82
|
assert result["passed"] is False
|
|
83
|
+
assert result["aa_identity"] == pytest.approx(2 / 3)
|
|
83
84
|
assert any(error.startswith("aa_mismatch") for error in result["errors"])
|
|
84
85
|
|
|
85
86
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/src/factorforge/core/interfaces/optimizer.py
RENAMED
|
File without changes
|
{factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/src/factorforge/core/interfaces/validator.py
RENAMED
|
File without changes
|
{factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/src/factorforge/data/nbenthamiana_codons.json
RENAMED
|
File without changes
|
{factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/src/factorforge/data/nbenthamiana_golden_set.json
RENAMED
|
File without changes
|
{factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/src/factorforge/data/templates/high_expression.json
RENAMED
|
File without changes
|
|
File without changes
|
{factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/src/factorforge/data/wolffia_globosa_codons.json
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/src/factorforge/engines/profile/construct_builder.py
RENAMED
|
File without changes
|
|
File without changes
|
{factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/src/factorforge/engines/profile/rules/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/src/factorforge/engines/profile/scoring_ml.py
RENAMED
|
File without changes
|
|
File without changes
|
{factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/src/factorforge/engines/profile/validator.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/src/factorforge/schemas/design_package.schema.json
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/src/factorforge/validation/package_generator.py
RENAMED
|
File without changes
|
|
File without changes
|
{factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/src/factorforge_cds.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
{factorforge_cds-3.1.6 → factorforge_cds-3.1.8}/src/factorforge_cds.egg-info/entry_points.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|