factorforge-cds 3.1.9__tar.gz → 3.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/PKG-INFO +13 -33
- factorforge_cds-3.2.0/README.md +88 -0
- {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/pyproject.toml +2 -1
- {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/__init__.py +1 -1
- {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/analysis/feasibility.py +15 -6
- {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/engines/__init__.py +1 -1
- {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/engines/profile/__init__.py +1 -1
- {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/engines/profile/optimizer.py +3 -2
- {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/engines/profile/rules/domesticator.py +12 -0
- {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/engines/profile/rules/reverse_translator.py +4 -1
- {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/engines/profile/validator.py +12 -3
- factorforge_cds-3.2.0/src/factorforge/io/__init__.py +14 -0
- factorforge_cds-3.2.0/src/factorforge/io/fasta.py +132 -0
- factorforge_cds-3.2.0/src/factorforge/io/validation.py +48 -0
- factorforge_cds-3.2.0/src/factorforge/registry/__init__.py +0 -0
- factorforge_cds-3.2.0/src/factorforge/registry/registry_loader.py +18 -0
- {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/schemas/design_package.py +18 -1
- factorforge_cds-3.2.0/src/factorforge/schemas/design_package.schema.json +337 -0
- {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge_cds.egg-info/PKG-INFO +13 -33
- {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge_cds.egg-info/SOURCES.txt +25 -1
- {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge_cds.egg-info/requires.txt +1 -0
- factorforge_cds-3.2.0/tests/test_baselines.py +32 -0
- factorforge_cds-3.2.0/tests/test_benchmark_codon_table_metadata.py +142 -0
- factorforge_cds-3.2.0/tests/test_benchmark_regression.py +120 -0
- factorforge_cds-3.2.0/tests/test_benchmark_scoring.py +15 -0
- factorforge_cds-3.2.0/tests/test_benchmark_smoke.py +20 -0
- factorforge_cds-3.2.0/tests/test_cai.py +13 -0
- factorforge_cds-3.2.0/tests/test_codon_table_manifest.py +105 -0
- factorforge_cds-3.2.0/tests/test_design_package_schema.py +44 -0
- factorforge_cds-3.2.0/tests/test_design_package_semantics.py +38 -0
- factorforge_cds-3.2.0/tests/test_design_package_serialization.py +31 -0
- factorforge_cds-3.2.0/tests/test_fasta_io.py +67 -0
- factorforge_cds-3.2.0/tests/test_gc_content.py +13 -0
- factorforge_cds-3.2.0/tests/test_host_profile_metadata.py +63 -0
- factorforge_cds-3.2.0/tests/test_iupac_validation.py +39 -0
- factorforge_cds-3.2.0/tests/test_no_raw_sequence_logging.py +45 -0
- factorforge_cds-3.2.0/tests/test_openbio_missing_metric_contract.py +36 -0
- factorforge_cds-3.2.0/tests/test_parameter_registry.py +77 -0
- factorforge_cds-3.2.0/tests/test_registry_production_sync.py +76 -0
- {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/tests/test_restriction_sites.py +17 -0
- factorforge_cds-3.2.0/tests/test_translation_integrity.py +19 -0
- factorforge_cds-3.1.9/README.md +0 -109
- factorforge_cds-3.1.9/src/factorforge/schemas/design_package.schema.json +0 -373
- {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/LICENSE +0 -0
- {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/setup.cfg +0 -0
- {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/__main__.py +0 -0
- {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/analysis/__init__.py +0 -0
- {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/analysis/metrics.py +0 -0
- {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/cli/__init__.py +0 -0
- {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/cli/legacy_cli.py +0 -0
- {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/cli/main.py +0 -0
- {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/core/interfaces/__init__.py +0 -0
- {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/core/interfaces/exporter.py +0 -0
- {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/core/interfaces/optimizer.py +0 -0
- {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/core/interfaces/validator.py +0 -0
- {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/data/nbenthamiana_codons.json +0 -0
- {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/data/nbenthamiana_golden_set.json +0 -0
- {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/data/ntabacum_codons.json +0 -0
- {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/data/templates/high_expression.json +0 -0
- {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/data/templates/standard_expression.json +0 -0
- {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/data/wolffia_globosa_codons.json +0 -0
- {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/database.py +0 -0
- {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/engines/profile/codon_table_builder.py +0 -0
- {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/engines/profile/construct_builder.py +0 -0
- {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/engines/profile/exporter.py +0 -0
- {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/engines/profile/pipeline.py +0 -0
- {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/engines/profile/rules/__init__.py +0 -0
- {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/engines/profile/rules/rule_engine.py +0 -0
- {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/engines/profile/scoring.py +0 -0
- {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/engines/profile/scoring_ml.py +0 -0
- {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/engines/profile/utils.py +0 -0
- {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/engines/registry.py +0 -0
- {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/schemas/__init__.py +0 -0
- {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/utils/__init__.py +0 -0
- {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/utils/construct_id.py +0 -0
- {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/utils/exceptions.py +0 -0
- {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/utils/restriction_sites.py +0 -0
- {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/utils/sequence_validator.py +0 -0
- {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/utils/validation.py +0 -0
- {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/validation/__init__.py +0 -0
- {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/validation/cli.py +0 -0
- {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/validation/package_generator.py +0 -0
- {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge_cds.egg-info/dependency_links.txt +0 -0
- {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge_cds.egg-info/entry_points.txt +0 -0
- {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge_cds.egg-info/top_level.txt +0 -0
- {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/tests/test_database.py +0 -0
- {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/tests/test_legacy_cli.py +0 -0
- {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/tests/test_sequence_validator.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: factorforge-cds
|
|
3
|
-
Version: 3.
|
|
3
|
+
Version: 3.2.0
|
|
4
4
|
Summary: FactorForge - open-source constraint-based CDS design engine by Eijex.
|
|
5
5
|
Author-email: Eijex <eijex.lab@gmail.com>
|
|
6
6
|
License-Expression: AGPL-3.0-only
|
|
@@ -20,6 +20,7 @@ Requires-Dist: requests>=2.31
|
|
|
20
20
|
Requires-Dist: click>=8.0
|
|
21
21
|
Requires-Dist: pydantic>=2.0
|
|
22
22
|
Provides-Extra: dev
|
|
23
|
+
Requires-Dist: jsonschema>=4.0; extra == "dev"
|
|
23
24
|
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
24
25
|
Requires-Dist: pytest-cov>=4.0; extra == "dev"
|
|
25
26
|
Requires-Dist: ruff>=0.1; extra == "dev"
|
|
@@ -31,7 +32,7 @@ Dynamic: license-file
|
|
|
31
32
|
|
|
32
33
|
# FactorForge
|
|
33
34
|
|
|
34
|
-
**Open-source constraint-based CDS design engine for
|
|
35
|
+
**Open-source constraint-based CDS design engine for sequence-level CDS design, with primary support for *Nicotiana benthamiana* (Tobacco BY-2: experimental).**
|
|
35
36
|
|
|
36
37
|
[](LICENSE)
|
|
37
38
|
[](https://www.python.org/)
|
|
@@ -41,7 +42,7 @@ Dynamic: license-file
|
|
|
41
42
|
[](https://doi.org/10.5281/zenodo.20407331)
|
|
42
43
|
[](https://factorforge.eijex.com)
|
|
43
44
|
|
|
44
|
-
FactorForge
|
|
45
|
+
FactorForge performs profile-guided CDS design with CAI/GC metrics, PolyA-signal screening, and Golden Gate/MoClo-aware checks. Primary support: *N. benthamiana* (agroinfiltration). Experimental host context: Tobacco BY-2 (`--host by2`).
|
|
45
46
|
|
|
46
47
|
**→ [Full Documentation](https://eijex.github.io/factorforge-cds/)**
|
|
47
48
|
|
|
@@ -65,7 +66,7 @@ Or use the **[web app](https://factorforge.eijex.com)** — no installation requ
|
|
|
65
66
|
| **Web App** | No installation, demo & light use | [factorforge.eijex.com](https://factorforge.eijex.com) |
|
|
66
67
|
| **CLI / Python** | Local use, batch processing, data privacy | `pip install factorforge-cds` |
|
|
67
68
|
| **Docker** | Full web interface locally | `docker pull ghcr.io/eijex/factorforge-cds:latest` |
|
|
68
|
-
| **Eijex MCP** |
|
|
69
|
+
| **Eijex MCP** | MCP-compatible agent access | [mcp.eijex.com](https://mcp.eijex.com) |
|
|
69
70
|
|
|
70
71
|
---
|
|
71
72
|
|
|
@@ -82,59 +83,38 @@ and are not imported by the installed package or exposed as supported engines.
|
|
|
82
83
|
|
|
83
84
|
---
|
|
84
85
|
|
|
85
|
-
## Development History
|
|
86
|
-
|
|
87
|
-
FactorForge has gone through several implementation generations before the current public release:
|
|
88
|
-
|
|
89
|
-
| Generation | Status | Description |
|
|
90
|
-
|-----------|--------|-------------|
|
|
91
|
-
| **v1** — NBent_OptiCodon | Internal | Thesis-derived codon optimization baseline for *N. benthamiana* |
|
|
92
|
-
| **v2** — Rule-Based Engine | Internal → Production | Deterministic, constraint-aware design engine; became the foundation for the public release |
|
|
93
|
-
| **v3-alpha** — ML Prototype | Archived | ML-based design attempt; performance was insufficient for production use; preserved under `archive/v3-ml-prototype/` |
|
|
94
|
-
| **v3.0+** — Current release | Public | Open-source release of the matured v2 engine under `factorforge.engines.profile` |
|
|
95
|
-
| **v3.7+** — ML Engine | Planned | ML-based design as `--engine ml`; added once sufficient wet-lab data is available |
|
|
96
|
-
|
|
97
|
-
The `archive/` directory preserves all three earlier tracks for provenance. None are installed or exposed by the current package.
|
|
98
|
-
|
|
99
|
-
---
|
|
100
|
-
|
|
101
86
|
## ⚠️ Validation Status
|
|
102
87
|
|
|
103
|
-
FactorForge
|
|
88
|
+
FactorForge outputs are **in-silico only** and have not been experimentally validated in wet-lab conditions. See [Validation](https://eijex.github.io/factorforge-cds/validation/) and [VALIDATION.md](VALIDATION.md).
|
|
104
89
|
|
|
105
90
|
---
|
|
106
91
|
|
|
107
92
|
## Citing
|
|
108
93
|
|
|
109
94
|
```
|
|
110
|
-
FactorForge v3.
|
|
95
|
+
FactorForge v3.2.0 (2026). Open-source constraint-based CDS design engine.
|
|
111
96
|
Eijex. https://github.com/eijex/factorforge-cds
|
|
112
97
|
```
|
|
113
98
|
|
|
114
|
-
*A citable publication is in preparation.*
|
|
115
|
-
|
|
116
99
|
---
|
|
117
100
|
|
|
118
|
-
##
|
|
101
|
+
## Maintainer
|
|
119
102
|
|
|
120
|
-
|
|
121
|
-
|--|------|------|
|
|
122
|
-
| 👤 | Mun-Kyu Kim ([@eijex](https://github.com/eijex)) | Author & maintainer |
|
|
123
|
-
| 🤖 | Claude (Anthropic) | Design, analysis, planning |
|
|
124
|
-
| 🤖 | Codex (OpenAI) | Implementation |
|
|
103
|
+
Mun-Kyu Kim ([@eijex](https://github.com/eijex))
|
|
125
104
|
|
|
126
105
|
## License
|
|
127
106
|
|
|
128
107
|
GNU Affero General Public License v3.0 — see [LICENSE](LICENSE).
|
|
129
108
|
|
|
130
|
-
**Disclaimer:** FactorForge is provided for research purposes only.
|
|
109
|
+
**Disclaimer:** FactorForge is provided for research purposes only. Outputs are computational and have not been experimentally validated.
|
|
131
110
|
|
|
132
111
|
---
|
|
133
112
|
|
|
134
113
|
## Get in Touch
|
|
135
114
|
|
|
136
115
|
- **Docs** — [eijex.github.io/factorforge-cds](https://eijex.github.io/factorforge-cds/)
|
|
137
|
-
- **Wet-lab Results** —
|
|
116
|
+
- **Wet-lab Results** — Public-safe validation summaries are welcome. Do not submit raw sequences, confidential construct details, internal batch IDs, patient data, private contact information, exact process parameters, or confidential partner/customer data. See [VALIDATION.md](VALIDATION.md) before submitting.
|
|
138
117
|
- **GitHub Issues** — bugs, features: [github.com/eijex/factorforge-cds/issues](https://github.com/eijex/factorforge-cds/issues)
|
|
139
118
|
- **Email** — eijex.lab@gmail.com
|
|
140
|
-
- **
|
|
119
|
+
- **FactorForge** — [factorforge.eijex.com](https://factorforge.eijex.com)
|
|
120
|
+
- **Lab** — [www.eijex.com](https://www.eijex.com)
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
# FactorForge
|
|
2
|
+
|
|
3
|
+
**Open-source constraint-based CDS design engine for sequence-level CDS design, with primary support for *Nicotiana benthamiana* (Tobacco BY-2: experimental).**
|
|
4
|
+
|
|
5
|
+
[](LICENSE)
|
|
6
|
+
[](https://www.python.org/)
|
|
7
|
+
[](https://pypi.org/project/factorforge-cds/)
|
|
8
|
+
[](https://github.com/eijex/factorforge-cds/actions/workflows/ci.yml)
|
|
9
|
+
[](https://codecov.io/gh/eijex/factorforge-cds)
|
|
10
|
+
[](https://doi.org/10.5281/zenodo.20407331)
|
|
11
|
+
[](https://factorforge.eijex.com)
|
|
12
|
+
|
|
13
|
+
FactorForge performs profile-guided CDS design with CAI/GC metrics, PolyA-signal screening, and Golden Gate/MoClo-aware checks. Primary support: *N. benthamiana* (agroinfiltration). Experimental host context: Tobacco BY-2 (`--host by2`).
|
|
14
|
+
|
|
15
|
+
**→ [Full Documentation](https://eijex.github.io/factorforge-cds/)**
|
|
16
|
+
|
|
17
|
+
---
|
|
18
|
+
|
|
19
|
+
## Quick Start
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
pip install factorforge-cds
|
|
23
|
+
factorforge optimize my_protein.fasta -o output.fasta
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
Or use the **[web app](https://factorforge.eijex.com)** — no installation required.
|
|
27
|
+
|
|
28
|
+
---
|
|
29
|
+
|
|
30
|
+
## Access Options
|
|
31
|
+
|
|
32
|
+
| Method | Description | Link |
|
|
33
|
+
|--------|-------------|------|
|
|
34
|
+
| **Web App** | No installation, demo & light use | [factorforge.eijex.com](https://factorforge.eijex.com) |
|
|
35
|
+
| **CLI / Python** | Local use, batch processing, data privacy | `pip install factorforge-cds` |
|
|
36
|
+
| **Docker** | Full web interface locally | `docker pull ghcr.io/eijex/factorforge-cds:latest` |
|
|
37
|
+
| **Eijex MCP** | MCP-compatible agent access | [mcp.eijex.com](https://mcp.eijex.com) |
|
|
38
|
+
|
|
39
|
+
---
|
|
40
|
+
|
|
41
|
+
## Repository Structure
|
|
42
|
+
|
|
43
|
+
The supported production engine is the deterministic profile engine under:
|
|
44
|
+
|
|
45
|
+
```text
|
|
46
|
+
src/factorforge/engines/profile/
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
Historical implementation tracks are preserved under `archive/` for provenance
|
|
50
|
+
and are not imported by the installed package or exposed as supported engines.
|
|
51
|
+
|
|
52
|
+
---
|
|
53
|
+
|
|
54
|
+
## ⚠️ Validation Status
|
|
55
|
+
|
|
56
|
+
FactorForge outputs are **in-silico only** and have not been experimentally validated in wet-lab conditions. See [Validation](https://eijex.github.io/factorforge-cds/validation/) and [VALIDATION.md](VALIDATION.md).
|
|
57
|
+
|
|
58
|
+
---
|
|
59
|
+
|
|
60
|
+
## Citing
|
|
61
|
+
|
|
62
|
+
```
|
|
63
|
+
FactorForge v3.2.0 (2026). Open-source constraint-based CDS design engine.
|
|
64
|
+
Eijex. https://github.com/eijex/factorforge-cds
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
---
|
|
68
|
+
|
|
69
|
+
## Maintainer
|
|
70
|
+
|
|
71
|
+
Mun-Kyu Kim ([@eijex](https://github.com/eijex))
|
|
72
|
+
|
|
73
|
+
## License
|
|
74
|
+
|
|
75
|
+
GNU Affero General Public License v3.0 — see [LICENSE](LICENSE).
|
|
76
|
+
|
|
77
|
+
**Disclaimer:** FactorForge is provided for research purposes only. Outputs are computational and have not been experimentally validated.
|
|
78
|
+
|
|
79
|
+
---
|
|
80
|
+
|
|
81
|
+
## Get in Touch
|
|
82
|
+
|
|
83
|
+
- **Docs** — [eijex.github.io/factorforge-cds](https://eijex.github.io/factorforge-cds/)
|
|
84
|
+
- **Wet-lab Results** — Public-safe validation summaries are welcome. Do not submit raw sequences, confidential construct details, internal batch IDs, patient data, private contact information, exact process parameters, or confidential partner/customer data. See [VALIDATION.md](VALIDATION.md) before submitting.
|
|
85
|
+
- **GitHub Issues** — bugs, features: [github.com/eijex/factorforge-cds/issues](https://github.com/eijex/factorforge-cds/issues)
|
|
86
|
+
- **Email** — eijex.lab@gmail.com
|
|
87
|
+
- **FactorForge** — [factorforge.eijex.com](https://factorforge.eijex.com)
|
|
88
|
+
- **Lab** — [www.eijex.com](https://www.eijex.com)
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "factorforge-cds"
|
|
7
|
-
version = "3.
|
|
7
|
+
version = "3.2.0"
|
|
8
8
|
description = "FactorForge - open-source constraint-based CDS design engine by Eijex."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = "AGPL-3.0-only"
|
|
@@ -28,6 +28,7 @@ dependencies = [
|
|
|
28
28
|
|
|
29
29
|
[project.optional-dependencies]
|
|
30
30
|
dev = [
|
|
31
|
+
"jsonschema>=4.0",
|
|
31
32
|
"pytest>=7.0",
|
|
32
33
|
"pytest-cov>=4.0",
|
|
33
34
|
"ruff>=0.1",
|
|
@@ -14,6 +14,16 @@ from factorforge.analysis.metrics import (
|
|
|
14
14
|
)
|
|
15
15
|
|
|
16
16
|
|
|
17
|
+
# Defaults calibrated to nbenthamiana profile engine output distribution
|
|
18
|
+
# (analysis 004, n=49): avg CAI=0.76, avg GC=60.1% (range 55-71%).
|
|
19
|
+
# DEFAULT_CAI_TARGET=0.82 aligns with industry practice (>0.8) and is achievable.
|
|
20
|
+
# Exported as named constants so tests/test_registry_production_sync.py can
|
|
21
|
+
# strictly compare them against the registry (single source of truth).
|
|
22
|
+
DEFAULT_CAI_TARGET: float = 0.82
|
|
23
|
+
DEFAULT_GC_LOW: float = 55.0
|
|
24
|
+
DEFAULT_GC_HIGH: float = 65.0
|
|
25
|
+
|
|
26
|
+
|
|
17
27
|
AA_TO_CODONS: dict[str, list[str]] = {}
|
|
18
28
|
for _codon, _aa in STANDARD_GENETIC_CODE.items():
|
|
19
29
|
if _aa == "*":
|
|
@@ -88,9 +98,9 @@ def _reconstruct_sequence(
|
|
|
88
98
|
def analyze_feasibility(
|
|
89
99
|
protein_sequence: str,
|
|
90
100
|
codon_weights: dict[str, float],
|
|
91
|
-
target_cai: float =
|
|
92
|
-
target_gc_low: float =
|
|
93
|
-
target_gc_high: float =
|
|
101
|
+
target_cai: float = DEFAULT_CAI_TARGET,
|
|
102
|
+
target_gc_low: float = DEFAULT_GC_LOW,
|
|
103
|
+
target_gc_high: float = DEFAULT_GC_HIGH,
|
|
94
104
|
gc_ranges: list[tuple[float, float]] | None = None,
|
|
95
105
|
) -> dict[str, Any]:
|
|
96
106
|
"""Compute exact CAI/GC feasibility over synonymous codon choices.
|
|
@@ -99,9 +109,8 @@ def analyze_feasibility(
|
|
|
99
109
|
global GC count. This is exact for global GC and CAI under the supplied
|
|
100
110
|
codon weights.
|
|
101
111
|
|
|
102
|
-
|
|
103
|
-
(analysis 004, n=49)
|
|
104
|
-
target_cai=0.82 aligns with industry practice (>0.8) and is achievable.
|
|
112
|
+
See module-level DEFAULT_CAI_TARGET / DEFAULT_GC_LOW / DEFAULT_GC_HIGH for
|
|
113
|
+
the calibration rationale (analysis 004, n=49).
|
|
105
114
|
"""
|
|
106
115
|
protein = "".join(protein_sequence.upper().split()).rstrip("*")
|
|
107
116
|
if not protein:
|
{factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/engines/profile/optimizer.py
RENAMED
|
@@ -17,7 +17,7 @@ class RuleBasedOptimizer(OptimizerEngine):
|
|
|
17
17
|
"""Profile-based rule optimization engine."""
|
|
18
18
|
|
|
19
19
|
name = "Profile-based"
|
|
20
|
-
version = "3.
|
|
20
|
+
version = "3.2.0"
|
|
21
21
|
|
|
22
22
|
def __init__(self) -> None:
|
|
23
23
|
self.validator = InputValidator()
|
|
@@ -30,6 +30,7 @@ class RuleBasedOptimizer(OptimizerEngine):
|
|
|
30
30
|
sequence: str,
|
|
31
31
|
profile: str | None = "balanced",
|
|
32
32
|
host: str = "nbenthamiana",
|
|
33
|
+
seed: int | None = None,
|
|
33
34
|
**kwargs: Any,
|
|
34
35
|
) -> OptimizationResult:
|
|
35
36
|
"""
|
|
@@ -91,7 +92,7 @@ class RuleBasedOptimizer(OptimizerEngine):
|
|
|
91
92
|
candidates = [{"sequence": optimized_dna, "cai": cai, "gc": gc, "score": score}]
|
|
92
93
|
else:
|
|
93
94
|
candidates = translator.generate_candidates(
|
|
94
|
-
processed_seq, profile=opt_profile, n=1
|
|
95
|
+
processed_seq, profile=opt_profile, n=1, seed=seed
|
|
95
96
|
)
|
|
96
97
|
if not candidates:
|
|
97
98
|
raise ValueError("No candidates generated for input sequence.")
|
|
@@ -20,6 +20,18 @@ class Domesticator:
|
|
|
20
20
|
- BioBricks (EcoRI, XbaI, SpeI, PstI)
|
|
21
21
|
"""
|
|
22
22
|
|
|
23
|
+
# Canonical Golden Gate Type IIS enzyme set, exported as GOLDEN_GATE_ENZYMES
|
|
24
|
+
# so tests/test_registry_production_sync.py::test_type_iis_sync can strictly
|
|
25
|
+
# compare it against the registry (single source of truth) instead of warning.
|
|
26
|
+
#
|
|
27
|
+
# BpiI and BbsI share the same GAAGAC Type IIS recognition/cut behavior in
|
|
28
|
+
# FactorForge's Golden Gate scanning context. The existing FactorForge
|
|
29
|
+
# production code and documentation consistently use BpiI as the canonical
|
|
30
|
+
# label; BbsI is a common synonym/vendor naming convention for the same
|
|
31
|
+
# scanning target. This is a naming normalization, not a biological
|
|
32
|
+
# threshold change. Order matches the registry value for stable comparison.
|
|
33
|
+
GOLDEN_GATE_ENZYMES: tuple[str, ...] = ("BsaI", "BpiI", "BsmBI")
|
|
34
|
+
|
|
23
35
|
# Assembly standard definitions
|
|
24
36
|
ASSEMBLY_STANDARDS: dict[str, dict[str, Any]] = {
|
|
25
37
|
"golden_gate": {
|
|
@@ -671,6 +671,7 @@ class ReverseTranslator:
|
|
|
671
671
|
protein_seq: str,
|
|
672
672
|
profile: OptimizationProfile = OptimizationProfile.BALANCED,
|
|
673
673
|
n: int = 5,
|
|
674
|
+
seed: int | None = None,
|
|
674
675
|
**kwargs: Any,
|
|
675
676
|
) -> list[dict[str, Any]]:
|
|
676
677
|
"""
|
|
@@ -697,6 +698,9 @@ class ReverseTranslator:
|
|
|
697
698
|
if n < 1:
|
|
698
699
|
raise ValueError("n must be >= 1")
|
|
699
700
|
|
|
701
|
+
# Seed before any candidate generation (covers both n=1 fast path and n>1).
|
|
702
|
+
random.seed(seed if seed is not None else secrets.randbits(32))
|
|
703
|
+
|
|
700
704
|
def _build_candidate() -> dict[str, Any]:
|
|
701
705
|
dna_seq = self.reverse_translate(protein_seq, profile, **kwargs)
|
|
702
706
|
cai = self.calculate_cai(dna_seq)
|
|
@@ -720,7 +724,6 @@ class ReverseTranslator:
|
|
|
720
724
|
|
|
721
725
|
candidates: list[dict[str, Any]] = []
|
|
722
726
|
last_error: Exception | None = None
|
|
723
|
-
random.seed(secrets.randbits(32))
|
|
724
727
|
|
|
725
728
|
for attempt in range(n):
|
|
726
729
|
try:
|
{factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/engines/profile/validator.py
RENAMED
|
@@ -90,14 +90,23 @@ class InputValidator:
|
|
|
90
90
|
# Analyze character set
|
|
91
91
|
unique_chars = set(clean_seq)
|
|
92
92
|
|
|
93
|
-
#
|
|
94
|
-
if unique_chars <=
|
|
93
|
+
# 1. Pure ATGC → unambiguously DNA
|
|
94
|
+
if unique_chars <= self.DNA_BASES:
|
|
95
95
|
return SequenceType.DNA
|
|
96
96
|
|
|
97
|
-
# Protein
|
|
97
|
+
# 2. Protein check BEFORE ambiguous DNA.
|
|
98
|
+
# IUPAC ambiguous codes (N/R/Y/S/W/K/M/B/D/H/V) overlap with amino acid
|
|
99
|
+
# single-letter codes. When a sequence contains only overlapping characters,
|
|
100
|
+
# protein interpretation takes priority — the optimizer's primary input is
|
|
101
|
+
# protein → CDS. Users passing ambiguous DNA for re-domestication should
|
|
102
|
+
# use FASTA format with a header line.
|
|
98
103
|
if unique_chars <= (self.STANDARD_AA | set(self.AMBIGUOUS_AA.keys())):
|
|
99
104
|
return SequenceType.PROTEIN
|
|
100
105
|
|
|
106
|
+
# 3. DNA with IUPAC ambiguous bases (only reached if non-protein chars present)
|
|
107
|
+
if unique_chars <= (self.DNA_BASES | self.AMBIGUOUS_DNA):
|
|
108
|
+
return SequenceType.DNA
|
|
109
|
+
|
|
101
110
|
return SequenceType.UNKNOWN
|
|
102
111
|
|
|
103
112
|
def validate(self, sequence: str, auto_fix: bool = False) -> dict[str, Any]:
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"""Privacy-aware native sequence I/O helpers."""
|
|
2
|
+
|
|
3
|
+
from .fasta import FastaRecord, format_fasta, parse_fasta, read_fasta, write_fasta
|
|
4
|
+
from .validation import SequenceValidationError, validate_sequence
|
|
5
|
+
|
|
6
|
+
__all__ = [
|
|
7
|
+
"FastaRecord",
|
|
8
|
+
"SequenceValidationError",
|
|
9
|
+
"format_fasta",
|
|
10
|
+
"parse_fasta",
|
|
11
|
+
"read_fasta",
|
|
12
|
+
"validate_sequence",
|
|
13
|
+
"write_fasta",
|
|
14
|
+
]
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
"""Small native FASTA reader/writer with privacy-safe output headers."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Iterable, Mapping
|
|
9
|
+
|
|
10
|
+
from .validation import validate_sequence
|
|
11
|
+
|
|
12
|
+
HEADER_ALLOWLIST = ("engine", "host_profile", "profile", "sequence_hash")
|
|
13
|
+
BLOCKED_HEADER_TERMS = (
|
|
14
|
+
"plantform",
|
|
15
|
+
"confidential",
|
|
16
|
+
"private",
|
|
17
|
+
"secret",
|
|
18
|
+
"partner",
|
|
19
|
+
"yield",
|
|
20
|
+
"wet-lab",
|
|
21
|
+
"wet_lab",
|
|
22
|
+
"clinical",
|
|
23
|
+
)
|
|
24
|
+
RAW_SEQUENCE_PATTERN = re.compile(r"[ACGTRYSWKMBDHVN]{20,}", re.IGNORECASE)
|
|
25
|
+
SAFE_HEADER_VALUE = re.compile(r"^[A-Za-z0-9_.:@/+ -]+$")
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclass(frozen=True)
|
|
29
|
+
class FastaRecord:
|
|
30
|
+
identifier: str
|
|
31
|
+
sequence: str
|
|
32
|
+
metadata: Mapping[str, str] = field(default_factory=dict)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _validate_header_value(value: str) -> str:
|
|
36
|
+
normalized = str(value).strip()
|
|
37
|
+
lowered = normalized.lower()
|
|
38
|
+
if not normalized or len(normalized) > 120:
|
|
39
|
+
raise ValueError("FASTA header values must contain 1-120 characters")
|
|
40
|
+
if any(term in lowered for term in BLOCKED_HEADER_TERMS):
|
|
41
|
+
raise ValueError("FASTA header contains blocked private or claim-related metadata")
|
|
42
|
+
if RAW_SEQUENCE_PATTERN.search(normalized):
|
|
43
|
+
raise ValueError("FASTA header must not contain a raw sequence")
|
|
44
|
+
if not SAFE_HEADER_VALUE.fullmatch(normalized):
|
|
45
|
+
raise ValueError("FASTA header contains unsupported characters")
|
|
46
|
+
return normalized
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def build_fasta_header(identifier: str, metadata: Mapping[str, object] | None = None) -> str:
|
|
50
|
+
"""Build an allowlist-only public FASTA header."""
|
|
51
|
+
parts = [_validate_header_value(identifier)]
|
|
52
|
+
for key in HEADER_ALLOWLIST:
|
|
53
|
+
if metadata is None or key not in metadata or metadata[key] is None:
|
|
54
|
+
continue
|
|
55
|
+
value = _validate_header_value(str(metadata[key]))
|
|
56
|
+
parts.append(f"{key}={value}")
|
|
57
|
+
return " ".join(parts)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def parse_fasta(text: str, validation_mode: str | None = None) -> list[FastaRecord]:
|
|
61
|
+
"""Parse FASTA text, optionally validating each sequence alphabet."""
|
|
62
|
+
records: list[FastaRecord] = []
|
|
63
|
+
identifier: str | None = None
|
|
64
|
+
sequence_lines: list[str] = []
|
|
65
|
+
|
|
66
|
+
def append_record() -> None:
|
|
67
|
+
if identifier is None:
|
|
68
|
+
return
|
|
69
|
+
sequence = "".join(sequence_lines)
|
|
70
|
+
if validation_mode is not None:
|
|
71
|
+
sequence = validate_sequence(sequence, validation_mode)
|
|
72
|
+
records.append(FastaRecord(identifier=identifier, sequence=sequence))
|
|
73
|
+
|
|
74
|
+
for line_no, raw_line in enumerate(text.splitlines(), start=1):
|
|
75
|
+
line = raw_line.strip()
|
|
76
|
+
if not line or line.startswith(";"):
|
|
77
|
+
continue
|
|
78
|
+
if line.startswith(">"):
|
|
79
|
+
append_record()
|
|
80
|
+
identifier = line[1:].strip()
|
|
81
|
+
if not identifier:
|
|
82
|
+
raise ValueError(f"FASTA header at line {line_no} is empty")
|
|
83
|
+
sequence_lines = []
|
|
84
|
+
elif identifier is None:
|
|
85
|
+
raise ValueError(f"FASTA sequence found before first header at line {line_no}")
|
|
86
|
+
else:
|
|
87
|
+
sequence_lines.append(line)
|
|
88
|
+
|
|
89
|
+
append_record()
|
|
90
|
+
if not records:
|
|
91
|
+
raise ValueError("FASTA input contains no records")
|
|
92
|
+
return records
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def format_fasta(
|
|
96
|
+
records: Iterable[FastaRecord],
|
|
97
|
+
*,
|
|
98
|
+
validation_mode: str = "dna_strict",
|
|
99
|
+
line_width: int = 60,
|
|
100
|
+
) -> str:
|
|
101
|
+
"""Serialize records using privacy-safe headers and validated sequences."""
|
|
102
|
+
if line_width < 1:
|
|
103
|
+
raise ValueError("line_width must be positive")
|
|
104
|
+
|
|
105
|
+
lines: list[str] = []
|
|
106
|
+
for record in records:
|
|
107
|
+
header = build_fasta_header(record.identifier, record.metadata)
|
|
108
|
+
sequence = validate_sequence(record.sequence, validation_mode)
|
|
109
|
+
lines.append(f">{header}")
|
|
110
|
+
lines.extend(sequence[i : i + line_width] for i in range(0, len(sequence), line_width))
|
|
111
|
+
if not lines:
|
|
112
|
+
raise ValueError("At least one FASTA record is required")
|
|
113
|
+
return "\n".join(lines) + "\n"
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def read_fasta(path: str | Path, validation_mode: str | None = None) -> list[FastaRecord]:
|
|
117
|
+
return parse_fasta(Path(path).read_text(encoding="utf-8"), validation_mode)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def write_fasta(
|
|
121
|
+
path: str | Path,
|
|
122
|
+
records: Iterable[FastaRecord],
|
|
123
|
+
*,
|
|
124
|
+
validation_mode: str = "dna_strict",
|
|
125
|
+
line_width: int = 60,
|
|
126
|
+
) -> Path:
|
|
127
|
+
output_path = Path(path)
|
|
128
|
+
output_path.write_text(
|
|
129
|
+
format_fasta(records, validation_mode=validation_mode, line_width=line_width),
|
|
130
|
+
encoding="utf-8",
|
|
131
|
+
)
|
|
132
|
+
return output_path
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"""Explicit DNA/protein alphabet validation without raw-sequence leakage."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import hashlib
|
|
6
|
+
import re
|
|
7
|
+
from typing import Final
|
|
8
|
+
|
|
9
|
+
VALIDATION_ALPHABETS: Final[dict[str, frozenset[str]]] = {
|
|
10
|
+
"dna_strict": frozenset("ACGT"),
|
|
11
|
+
"dna_iupac": frozenset("ACGTRYSWKMBDHVN"),
|
|
12
|
+
"protein_strict": frozenset("ACDEFGHIKLMNPQRSTVWY"),
|
|
13
|
+
"protein_extended": frozenset("ACDEFGHIKLMNPQRSTVWYXBZUO*"),
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class SequenceValidationError(ValueError):
|
|
18
|
+
"""Raised when a sequence does not satisfy its explicit alphabet contract."""
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _fingerprint(sequence: str) -> str:
|
|
22
|
+
return hashlib.sha256(sequence.encode("utf-8")).hexdigest()[:12]
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def validate_sequence(sequence: str, mode: str = "dna_strict") -> str:
|
|
26
|
+
"""Normalize and validate a sequence for the requested alphabet mode.
|
|
27
|
+
|
|
28
|
+
Whitespace is removed and letters are uppercased. Error messages include a
|
|
29
|
+
short preview and fingerprint, never the complete input sequence.
|
|
30
|
+
"""
|
|
31
|
+
if mode not in VALIDATION_ALPHABETS:
|
|
32
|
+
choices = ", ".join(sorted(VALIDATION_ALPHABETS))
|
|
33
|
+
raise ValueError(f"Unknown validation mode {mode!r}; expected one of: {choices}")
|
|
34
|
+
if not isinstance(sequence, str):
|
|
35
|
+
raise TypeError("sequence must be a string")
|
|
36
|
+
|
|
37
|
+
normalized = re.sub(r"\s+", "", sequence).upper()
|
|
38
|
+
if not normalized:
|
|
39
|
+
raise SequenceValidationError("Sequence is empty after whitespace normalization")
|
|
40
|
+
|
|
41
|
+
invalid = sorted(set(normalized) - VALIDATION_ALPHABETS[mode])
|
|
42
|
+
if invalid:
|
|
43
|
+
preview = normalized[:8] + ("[truncated]" if len(normalized) > 8 else "")
|
|
44
|
+
raise SequenceValidationError(
|
|
45
|
+
f"Invalid symbols for {mode}: {invalid}; preview={preview!r}; "
|
|
46
|
+
f"length={len(normalized)}; sha256-prefix={_fingerprint(normalized)}"
|
|
47
|
+
)
|
|
48
|
+
return normalized
|
|
File without changes
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
"""Load and resolve the factorforge parameter registry (package source of truth)."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
import yaml
|
|
5
|
+
|
|
6
|
+
REGISTRY_PATH = Path(__file__).resolve().parent / "current_parameter_registry.yaml"
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def load_registry() -> dict:
|
|
10
|
+
return yaml.safe_load(REGISTRY_PATH.read_text(encoding="utf-8"))
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def resolve_ref(registry: dict, dotted: str):
|
|
14
|
+
"""Resolve a dotted path (e.g. 'parameters.optimization.cai_target') into a value."""
|
|
15
|
+
node = registry
|
|
16
|
+
for part in dotted.split("."):
|
|
17
|
+
node = node[part]
|
|
18
|
+
return node
|
|
@@ -1,4 +1,21 @@
|
|
|
1
|
-
"""
|
|
1
|
+
"""Internal API response format for FactorForge CDS artifacts.
|
|
2
|
+
|
|
3
|
+
This module defines the *internal* Pydantic model used by the optimize API
|
|
4
|
+
handler (``api.optimize.handler``). It is NOT the public Open Bio Design
|
|
5
|
+
Package contract.
|
|
6
|
+
|
|
7
|
+
Public contract:
|
|
8
|
+
``src/factorforge/schemas/design_package.schema.json`` (JSON Schema Draft
|
|
9
|
+
2020-12) is the public output specification. It defines the canonical field
|
|
10
|
+
set (``design_id``, ``claim_boundary``, ``evidence``, etc.) and is tested
|
|
11
|
+
by ``tests/test_design_package_schema.py`` and related files.
|
|
12
|
+
|
|
13
|
+
Separation rationale:
|
|
14
|
+
The internal model uses API-convenient field names (``construct_id``,
|
|
15
|
+
``cds_design``) with ``extra="allow"`` for handler flexibility. The public
|
|
16
|
+
schema enforces claim boundary fields and must be validated with jsonschema
|
|
17
|
+
before any output is published or shared externally.
|
|
18
|
+
"""
|
|
2
19
|
|
|
3
20
|
from typing import Any, Optional
|
|
4
21
|
|