factorforge-cds 3.1.8__tar.gz → 3.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. {factorforge_cds-3.1.8 → factorforge_cds-3.2.0}/PKG-INFO +13 -33
  2. factorforge_cds-3.2.0/README.md +88 -0
  3. {factorforge_cds-3.1.8 → factorforge_cds-3.2.0}/pyproject.toml +2 -1
  4. {factorforge_cds-3.1.8 → factorforge_cds-3.2.0}/src/factorforge/__init__.py +1 -1
  5. {factorforge_cds-3.1.8 → factorforge_cds-3.2.0}/src/factorforge/analysis/feasibility.py +15 -6
  6. {factorforge_cds-3.1.8 → factorforge_cds-3.2.0}/src/factorforge/analysis/metrics.py +26 -0
  7. {factorforge_cds-3.1.8 → factorforge_cds-3.2.0}/src/factorforge/engines/__init__.py +1 -1
  8. {factorforge_cds-3.1.8 → factorforge_cds-3.2.0}/src/factorforge/engines/profile/__init__.py +1 -1
  9. {factorforge_cds-3.1.8 → factorforge_cds-3.2.0}/src/factorforge/engines/profile/optimizer.py +8 -3
  10. {factorforge_cds-3.1.8 → factorforge_cds-3.2.0}/src/factorforge/engines/profile/pipeline.py +38 -2
  11. {factorforge_cds-3.1.8 → factorforge_cds-3.2.0}/src/factorforge/engines/profile/rules/domesticator.py +12 -0
  12. {factorforge_cds-3.1.8 → factorforge_cds-3.2.0}/src/factorforge/engines/profile/rules/reverse_translator.py +6 -3
  13. {factorforge_cds-3.1.8 → factorforge_cds-3.2.0}/src/factorforge/engines/profile/rules/rule_engine.py +10 -3
  14. {factorforge_cds-3.1.8 → factorforge_cds-3.2.0}/src/factorforge/engines/profile/scoring.py +64 -23
  15. {factorforge_cds-3.1.8 → factorforge_cds-3.2.0}/src/factorforge/engines/profile/validator.py +12 -3
  16. factorforge_cds-3.2.0/src/factorforge/io/__init__.py +14 -0
  17. factorforge_cds-3.2.0/src/factorforge/io/fasta.py +132 -0
  18. factorforge_cds-3.2.0/src/factorforge/io/validation.py +48 -0
  19. factorforge_cds-3.2.0/src/factorforge/registry/__init__.py +0 -0
  20. factorforge_cds-3.2.0/src/factorforge/registry/registry_loader.py +18 -0
  21. {factorforge_cds-3.1.8 → factorforge_cds-3.2.0}/src/factorforge/schemas/design_package.py +18 -1
  22. factorforge_cds-3.2.0/src/factorforge/schemas/design_package.schema.json +337 -0
  23. {factorforge_cds-3.1.8 → factorforge_cds-3.2.0}/src/factorforge_cds.egg-info/PKG-INFO +13 -33
  24. {factorforge_cds-3.1.8 → factorforge_cds-3.2.0}/src/factorforge_cds.egg-info/SOURCES.txt +25 -1
  25. {factorforge_cds-3.1.8 → factorforge_cds-3.2.0}/src/factorforge_cds.egg-info/requires.txt +1 -0
  26. factorforge_cds-3.2.0/tests/test_baselines.py +32 -0
  27. factorforge_cds-3.2.0/tests/test_benchmark_codon_table_metadata.py +142 -0
  28. factorforge_cds-3.2.0/tests/test_benchmark_regression.py +120 -0
  29. factorforge_cds-3.2.0/tests/test_benchmark_scoring.py +15 -0
  30. factorforge_cds-3.2.0/tests/test_benchmark_smoke.py +20 -0
  31. factorforge_cds-3.2.0/tests/test_cai.py +13 -0
  32. factorforge_cds-3.2.0/tests/test_codon_table_manifest.py +105 -0
  33. factorforge_cds-3.2.0/tests/test_design_package_schema.py +44 -0
  34. factorforge_cds-3.2.0/tests/test_design_package_semantics.py +38 -0
  35. factorforge_cds-3.2.0/tests/test_design_package_serialization.py +31 -0
  36. factorforge_cds-3.2.0/tests/test_fasta_io.py +67 -0
  37. factorforge_cds-3.2.0/tests/test_gc_content.py +13 -0
  38. factorforge_cds-3.2.0/tests/test_host_profile_metadata.py +63 -0
  39. factorforge_cds-3.2.0/tests/test_iupac_validation.py +39 -0
  40. factorforge_cds-3.2.0/tests/test_no_raw_sequence_logging.py +45 -0
  41. factorforge_cds-3.2.0/tests/test_openbio_missing_metric_contract.py +36 -0
  42. factorforge_cds-3.2.0/tests/test_parameter_registry.py +77 -0
  43. factorforge_cds-3.2.0/tests/test_registry_production_sync.py +76 -0
  44. {factorforge_cds-3.1.8 → factorforge_cds-3.2.0}/tests/test_restriction_sites.py +17 -0
  45. factorforge_cds-3.2.0/tests/test_translation_integrity.py +19 -0
  46. factorforge_cds-3.1.8/README.md +0 -109
  47. factorforge_cds-3.1.8/src/factorforge/schemas/design_package.schema.json +0 -373
  48. {factorforge_cds-3.1.8 → factorforge_cds-3.2.0}/LICENSE +0 -0
  49. {factorforge_cds-3.1.8 → factorforge_cds-3.2.0}/setup.cfg +0 -0
  50. {factorforge_cds-3.1.8 → factorforge_cds-3.2.0}/src/factorforge/__main__.py +0 -0
  51. {factorforge_cds-3.1.8 → factorforge_cds-3.2.0}/src/factorforge/analysis/__init__.py +0 -0
  52. {factorforge_cds-3.1.8 → factorforge_cds-3.2.0}/src/factorforge/cli/__init__.py +0 -0
  53. {factorforge_cds-3.1.8 → factorforge_cds-3.2.0}/src/factorforge/cli/legacy_cli.py +0 -0
  54. {factorforge_cds-3.1.8 → factorforge_cds-3.2.0}/src/factorforge/cli/main.py +0 -0
  55. {factorforge_cds-3.1.8 → factorforge_cds-3.2.0}/src/factorforge/core/interfaces/__init__.py +0 -0
  56. {factorforge_cds-3.1.8 → factorforge_cds-3.2.0}/src/factorforge/core/interfaces/exporter.py +0 -0
  57. {factorforge_cds-3.1.8 → factorforge_cds-3.2.0}/src/factorforge/core/interfaces/optimizer.py +0 -0
  58. {factorforge_cds-3.1.8 → factorforge_cds-3.2.0}/src/factorforge/core/interfaces/validator.py +0 -0
  59. {factorforge_cds-3.1.8 → factorforge_cds-3.2.0}/src/factorforge/data/nbenthamiana_codons.json +0 -0
  60. {factorforge_cds-3.1.8 → factorforge_cds-3.2.0}/src/factorforge/data/nbenthamiana_golden_set.json +0 -0
  61. {factorforge_cds-3.1.8 → factorforge_cds-3.2.0}/src/factorforge/data/ntabacum_codons.json +0 -0
  62. {factorforge_cds-3.1.8 → factorforge_cds-3.2.0}/src/factorforge/data/templates/high_expression.json +0 -0
  63. {factorforge_cds-3.1.8 → factorforge_cds-3.2.0}/src/factorforge/data/templates/standard_expression.json +0 -0
  64. {factorforge_cds-3.1.8 → factorforge_cds-3.2.0}/src/factorforge/data/wolffia_globosa_codons.json +0 -0
  65. {factorforge_cds-3.1.8 → factorforge_cds-3.2.0}/src/factorforge/database.py +0 -0
  66. {factorforge_cds-3.1.8 → factorforge_cds-3.2.0}/src/factorforge/engines/profile/codon_table_builder.py +0 -0
  67. {factorforge_cds-3.1.8 → factorforge_cds-3.2.0}/src/factorforge/engines/profile/construct_builder.py +0 -0
  68. {factorforge_cds-3.1.8 → factorforge_cds-3.2.0}/src/factorforge/engines/profile/exporter.py +0 -0
  69. {factorforge_cds-3.1.8 → factorforge_cds-3.2.0}/src/factorforge/engines/profile/rules/__init__.py +0 -0
  70. {factorforge_cds-3.1.8 → factorforge_cds-3.2.0}/src/factorforge/engines/profile/scoring_ml.py +0 -0
  71. {factorforge_cds-3.1.8 → factorforge_cds-3.2.0}/src/factorforge/engines/profile/utils.py +0 -0
  72. {factorforge_cds-3.1.8 → factorforge_cds-3.2.0}/src/factorforge/engines/registry.py +0 -0
  73. {factorforge_cds-3.1.8 → factorforge_cds-3.2.0}/src/factorforge/schemas/__init__.py +0 -0
  74. {factorforge_cds-3.1.8 → factorforge_cds-3.2.0}/src/factorforge/utils/__init__.py +0 -0
  75. {factorforge_cds-3.1.8 → factorforge_cds-3.2.0}/src/factorforge/utils/construct_id.py +0 -0
  76. {factorforge_cds-3.1.8 → factorforge_cds-3.2.0}/src/factorforge/utils/exceptions.py +0 -0
  77. {factorforge_cds-3.1.8 → factorforge_cds-3.2.0}/src/factorforge/utils/restriction_sites.py +0 -0
  78. {factorforge_cds-3.1.8 → factorforge_cds-3.2.0}/src/factorforge/utils/sequence_validator.py +0 -0
  79. {factorforge_cds-3.1.8 → factorforge_cds-3.2.0}/src/factorforge/utils/validation.py +0 -0
  80. {factorforge_cds-3.1.8 → factorforge_cds-3.2.0}/src/factorforge/validation/__init__.py +0 -0
  81. {factorforge_cds-3.1.8 → factorforge_cds-3.2.0}/src/factorforge/validation/cli.py +0 -0
  82. {factorforge_cds-3.1.8 → factorforge_cds-3.2.0}/src/factorforge/validation/package_generator.py +0 -0
  83. {factorforge_cds-3.1.8 → factorforge_cds-3.2.0}/src/factorforge_cds.egg-info/dependency_links.txt +0 -0
  84. {factorforge_cds-3.1.8 → factorforge_cds-3.2.0}/src/factorforge_cds.egg-info/entry_points.txt +0 -0
  85. {factorforge_cds-3.1.8 → factorforge_cds-3.2.0}/src/factorforge_cds.egg-info/top_level.txt +0 -0
  86. {factorforge_cds-3.1.8 → factorforge_cds-3.2.0}/tests/test_database.py +0 -0
  87. {factorforge_cds-3.1.8 → factorforge_cds-3.2.0}/tests/test_legacy_cli.py +0 -0
  88. {factorforge_cds-3.1.8 → factorforge_cds-3.2.0}/tests/test_sequence_validator.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: factorforge-cds
3
- Version: 3.1.8
3
+ Version: 3.2.0
4
4
  Summary: FactorForge - open-source constraint-based CDS design engine by Eijex.
5
5
  Author-email: Eijex <eijex.lab@gmail.com>
6
6
  License-Expression: AGPL-3.0-only
@@ -20,6 +20,7 @@ Requires-Dist: requests>=2.31
20
20
  Requires-Dist: click>=8.0
21
21
  Requires-Dist: pydantic>=2.0
22
22
  Provides-Extra: dev
23
+ Requires-Dist: jsonschema>=4.0; extra == "dev"
23
24
  Requires-Dist: pytest>=7.0; extra == "dev"
24
25
  Requires-Dist: pytest-cov>=4.0; extra == "dev"
25
26
  Requires-Dist: ruff>=0.1; extra == "dev"
@@ -31,7 +32,7 @@ Dynamic: license-file
31
32
 
32
33
  # FactorForge
33
34
 
34
- **Open-source constraint-based CDS design engine for plant expression workflows, with initial focus on *Nicotiana benthamiana* and Tobacco BY-2.**
35
+ **Open-source constraint-based CDS design engine for sequence-level CDS design, with primary support for *Nicotiana benthamiana* (Tobacco BY-2: experimental).**
35
36
 
36
37
  [![License](https://img.shields.io/badge/license-AGPL--3.0-blue.svg)](LICENSE)
37
38
  [![Python](https://img.shields.io/badge/python-3.10%2B-blue.svg)](https://www.python.org/)
@@ -41,7 +42,7 @@ Dynamic: license-file
41
42
  [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.20407331.svg)](https://doi.org/10.5281/zenodo.20407331)
42
43
  [![Web App](https://img.shields.io/badge/web-factorforge.eijex.com-brightgreen.svg)](https://factorforge.eijex.com)
43
44
 
44
- FactorForge optimizes protein sequences into host-compatible CDS by maximizing CAI, controlling GC content, eliminating PolyA signals, and producing MoClo/Golden Gate-ready constructs. Supports *N. benthamiana* (agroinfiltration) and Tobacco BY-2 (`--host by2`, bioreactor/cGMP workflows).
45
+ FactorForge performs profile-guided CDS design with CAI/GC metrics, PolyA-signal screening, and Golden Gate/MoClo-aware checks. Primary support: *N. benthamiana* (agroinfiltration). Experimental host context: Tobacco BY-2 (`--host by2`).
45
46
 
46
47
  **→ [Full Documentation](https://eijex.github.io/factorforge-cds/)**
47
48
 
@@ -65,7 +66,7 @@ Or use the **[web app](https://factorforge.eijex.com)** — no installation requ
65
66
  | **Web App** | No installation, demo & light use | [factorforge.eijex.com](https://factorforge.eijex.com) |
66
67
  | **CLI / Python** | Local use, batch processing, data privacy | `pip install factorforge-cds` |
67
68
  | **Docker** | Full web interface locally | `docker pull ghcr.io/eijex/factorforge-cds:latest` |
68
- | **Eijex MCP** | AI agent access (Claude Code, Cursor) | [mcp.eijex.com](https://mcp.eijex.com) |
69
+ | **Eijex MCP** | MCP-compatible agent access | [mcp.eijex.com](https://mcp.eijex.com) |
69
70
 
70
71
  ---
71
72
 
@@ -82,59 +83,38 @@ and are not imported by the installed package or exposed as supported engines.
82
83
 
83
84
  ---
84
85
 
85
- ## Development History
86
-
87
- FactorForge has gone through several implementation generations before the current public release:
88
-
89
- | Generation | Status | Description |
90
- |-----------|--------|-------------|
91
- | **v1** — NBent_OptiCodon | Internal | Thesis-derived codon optimization baseline for *N. benthamiana* |
92
- | **v2** — Rule-Based Engine | Internal → Production | Deterministic, constraint-aware design engine; became the foundation for the public release |
93
- | **v3-alpha** — ML Prototype | Archived | ML-based design attempt; performance was insufficient for production use; preserved under `archive/v3-ml-prototype/` |
94
- | **v3.0+** — Current release | Public | Open-source release of the matured v2 engine under `factorforge.engines.profile` |
95
- | **v3.7+** — ML Engine | Planned | ML-based design as `--engine ml`; added once sufficient wet-lab data is available |
96
-
97
- The `archive/` directory preserves all three earlier tracks for provenance. None are installed or exposed by the current package.
98
-
99
- ---
100
-
101
86
  ## ⚠️ Validation Status
102
87
 
103
- FactorForge predictions are **in-silico only** and have not been experimentally validated in wet-lab conditions. See [Validation](https://eijex.github.io/factorforge-cds/validation/) and [VALIDATION.md](VALIDATION.md).
88
+ FactorForge outputs are **in-silico only** and have not been experimentally validated in wet-lab conditions. See [Validation](https://eijex.github.io/factorforge-cds/validation/) and [VALIDATION.md](VALIDATION.md).
104
89
 
105
90
  ---
106
91
 
107
92
  ## Citing
108
93
 
109
94
  ```
110
- FactorForge v3.1.8 (2026). Open-source constraint-based CDS design engine.
95
+ FactorForge v3.2.0 (2026). Open-source constraint-based CDS design engine.
111
96
  Eijex. https://github.com/eijex/factorforge-cds
112
97
  ```
113
98
 
114
- *A citable publication is in preparation.*
115
-
116
99
  ---
117
100
 
118
- ## Contributors
101
+ ## Maintainer
119
102
 
120
- | | Name | Role |
121
- |--|------|------|
122
- | 👤 | Mun-Kyu Kim ([@eijex](https://github.com/eijex)) | Author & maintainer |
123
- | 🤖 | Claude (Anthropic) | Design, analysis, planning |
124
- | 🤖 | Codex (OpenAI) | Implementation |
103
+ Mun-Kyu Kim ([@eijex](https://github.com/eijex))
125
104
 
126
105
  ## License
127
106
 
128
107
  GNU Affero General Public License v3.0 — see [LICENSE](LICENSE).
129
108
 
130
- **Disclaimer:** FactorForge is provided for research purposes only. Predictions are computational and have not been experimentally validated.
109
+ **Disclaimer:** FactorForge is provided for research purposes only. Outputs are computational and have not been experimentally validated.
131
110
 
132
111
  ---
133
112
 
134
113
  ## Get in Touch
135
114
 
136
115
  - **Docs** — [eijex.github.io/factorforge-cds](https://eijex.github.io/factorforge-cds/)
137
- - **Wet-lab Results** — [Submit via Google Form](https://docs.google.com/forms/d/e/1FAIpQLSeSx-wYvF6YwHhSPdLMl-L44frCugdm25X_eDz50OaqTD66qA/viewform?usp=header) (recommended) or [GitHub Issue](https://github.com/eijex/factorforge-cds/issues/new?template=wet_lab_result.yml)
116
+ - **Wet-lab Results** — Public-safe validation summaries are welcome. Do not submit raw sequences, confidential construct details, internal batch IDs, patient data, private contact information, exact process parameters, or confidential partner/customer data. See [VALIDATION.md](VALIDATION.md) before submitting.
138
117
  - **GitHub Issues** — bugs, features: [github.com/eijex/factorforge-cds/issues](https://github.com/eijex/factorforge-cds/issues)
139
118
  - **Email** — eijex.lab@gmail.com
140
- - **Web** — [factorforge.eijex.com](https://factorforge.eijex.com)
119
+ - **FactorForge** — [factorforge.eijex.com](https://factorforge.eijex.com)
120
+ - **Lab** — [www.eijex.com](https://www.eijex.com)
@@ -0,0 +1,88 @@
1
+ # FactorForge
2
+
3
+ **Open-source constraint-based CDS design engine for sequence-level CDS design, with primary support for *Nicotiana benthamiana* (Tobacco BY-2: experimental).**
4
+
5
+ [![License](https://img.shields.io/badge/license-AGPL--3.0-blue.svg)](LICENSE)
6
+ [![Python](https://img.shields.io/badge/python-3.10%2B-blue.svg)](https://www.python.org/)
7
+ [![PyPI](https://img.shields.io/pypi/v/factorforge-cds.svg)](https://pypi.org/project/factorforge-cds/)
8
+ [![CI](https://github.com/eijex/factorforge-cds/actions/workflows/ci.yml/badge.svg)](https://github.com/eijex/factorforge-cds/actions/workflows/ci.yml)
9
+ [![codecov](https://codecov.io/gh/eijex/factorforge-cds/branch/main/graph/badge.svg)](https://codecov.io/gh/eijex/factorforge-cds)
10
+ [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.20407331.svg)](https://doi.org/10.5281/zenodo.20407331)
11
+ [![Web App](https://img.shields.io/badge/web-factorforge.eijex.com-brightgreen.svg)](https://factorforge.eijex.com)
12
+
13
+ FactorForge performs profile-guided CDS design with CAI/GC metrics, PolyA-signal screening, and Golden Gate/MoClo-aware checks. Primary support: *N. benthamiana* (agroinfiltration). Experimental host context: Tobacco BY-2 (`--host by2`).
14
+
15
+ **→ [Full Documentation](https://eijex.github.io/factorforge-cds/)**
16
+
17
+ ---
18
+
19
+ ## Quick Start
20
+
21
+ ```bash
22
+ pip install factorforge-cds
23
+ factorforge optimize my_protein.fasta -o output.fasta
24
+ ```
25
+
26
+ Or use the **[web app](https://factorforge.eijex.com)** — no installation required.
27
+
28
+ ---
29
+
30
+ ## Access Options
31
+
32
+ | Method | Description | Link |
33
+ |--------|-------------|------|
34
+ | **Web App** | No installation, demo & light use | [factorforge.eijex.com](https://factorforge.eijex.com) |
35
+ | **CLI / Python** | Local use, batch processing, data privacy | `pip install factorforge-cds` |
36
+ | **Docker** | Full web interface locally | `docker pull ghcr.io/eijex/factorforge-cds:latest` |
37
+ | **Eijex MCP** | MCP-compatible agent access | [mcp.eijex.com](https://mcp.eijex.com) |
38
+
39
+ ---
40
+
41
+ ## Repository Structure
42
+
43
+ The supported production engine is the deterministic profile engine under:
44
+
45
+ ```text
46
+ src/factorforge/engines/profile/
47
+ ```
48
+
49
+ Historical implementation tracks are preserved under `archive/` for provenance
50
+ and are not imported by the installed package or exposed as supported engines.
51
+
52
+ ---
53
+
54
+ ## ⚠️ Validation Status
55
+
56
+ FactorForge outputs are **in-silico only** and have not been experimentally validated in wet-lab conditions. See [Validation](https://eijex.github.io/factorforge-cds/validation/) and [VALIDATION.md](VALIDATION.md).
57
+
58
+ ---
59
+
60
+ ## Citing
61
+
62
+ ```
63
+ FactorForge v3.2.0 (2026). Open-source constraint-based CDS design engine.
64
+ Eijex. https://github.com/eijex/factorforge-cds
65
+ ```
66
+
67
+ ---
68
+
69
+ ## Maintainer
70
+
71
+ Mun-Kyu Kim ([@eijex](https://github.com/eijex))
72
+
73
+ ## License
74
+
75
+ GNU Affero General Public License v3.0 — see [LICENSE](LICENSE).
76
+
77
+ **Disclaimer:** FactorForge is provided for research purposes only. Outputs are computational and have not been experimentally validated.
78
+
79
+ ---
80
+
81
+ ## Get in Touch
82
+
83
+ - **Docs** — [eijex.github.io/factorforge-cds](https://eijex.github.io/factorforge-cds/)
84
+ - **Wet-lab Results** — Public-safe validation summaries are welcome. Do not submit raw sequences, confidential construct details, internal batch IDs, patient data, private contact information, exact process parameters, or confidential partner/customer data. See [VALIDATION.md](VALIDATION.md) before submitting.
85
+ - **GitHub Issues** — bugs, features: [github.com/eijex/factorforge-cds/issues](https://github.com/eijex/factorforge-cds/issues)
86
+ - **Email** — eijex.lab@gmail.com
87
+ - **FactorForge** — [factorforge.eijex.com](https://factorforge.eijex.com)
88
+ - **Lab** — [www.eijex.com](https://www.eijex.com)
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "factorforge-cds"
7
- version = "3.1.8"
7
+ version = "3.2.0"
8
8
  description = "FactorForge - open-source constraint-based CDS design engine by Eijex."
9
9
  readme = "README.md"
10
10
  license = "AGPL-3.0-only"
@@ -28,6 +28,7 @@ dependencies = [
28
28
 
29
29
  [project.optional-dependencies]
30
30
  dev = [
31
+ "jsonschema>=4.0",
31
32
  "pytest>=7.0",
32
33
  "pytest-cov>=4.0",
33
34
  "ruff>=0.1",
@@ -4,7 +4,7 @@ FactorForge - Codon Optimization Platform
4
4
  profile: constraint-aware rule/profile engine
5
5
  """
6
6
 
7
- __version__ = "3.1.8"
7
+ __version__ = "3.2.0"
8
8
  __author__ = "Eijex"
9
9
 
10
10
  # Auto-register engines (safe when running from source tree)
@@ -14,6 +14,16 @@ from factorforge.analysis.metrics import (
14
14
  )
15
15
 
16
16
 
17
+ # Defaults calibrated to nbenthamiana profile engine output distribution
18
+ # (analysis 004, n=49): avg CAI=0.76, avg GC=60.1% (range 55-71%).
19
+ # DEFAULT_CAI_TARGET=0.82 aligns with industry practice (>0.8) and is achievable.
20
+ # Exported as named constants so tests/test_registry_production_sync.py can
21
+ # strictly compare them against the registry (single source of truth).
22
+ DEFAULT_CAI_TARGET: float = 0.82
23
+ DEFAULT_GC_LOW: float = 55.0
24
+ DEFAULT_GC_HIGH: float = 65.0
25
+
26
+
17
27
  AA_TO_CODONS: dict[str, list[str]] = {}
18
28
  for _codon, _aa in STANDARD_GENETIC_CODE.items():
19
29
  if _aa == "*":
@@ -88,9 +98,9 @@ def _reconstruct_sequence(
88
98
  def analyze_feasibility(
89
99
  protein_sequence: str,
90
100
  codon_weights: dict[str, float],
91
- target_cai: float = 0.82,
92
- target_gc_low: float = 55.0,
93
- target_gc_high: float = 65.0,
101
+ target_cai: float = DEFAULT_CAI_TARGET,
102
+ target_gc_low: float = DEFAULT_GC_LOW,
103
+ target_gc_high: float = DEFAULT_GC_HIGH,
94
104
  gc_ranges: list[tuple[float, float]] | None = None,
95
105
  ) -> dict[str, Any]:
96
106
  """Compute exact CAI/GC feasibility over synonymous codon choices.
@@ -99,9 +109,8 @@ def analyze_feasibility(
99
109
  global GC count. This is exact for global GC and CAI under the supplied
100
110
  codon weights.
101
111
 
102
- Defaults calibrated to nbenthamiana profile engine output distribution
103
- (analysis 004, n=49): avg CAI=0.76, avg GC=60.1% (range 55-71%).
104
- target_cai=0.82 aligns with industry practice (>0.8) and is achievable.
112
+ See module-level DEFAULT_CAI_TARGET / DEFAULT_GC_LOW / DEFAULT_GC_HIGH for
113
+ the calibration rationale (analysis 004, n=49).
105
114
  """
106
115
  protein = "".join(protein_sequence.upper().split()).rstrip("*")
107
116
  if not protein:
@@ -277,6 +277,32 @@ def calculate_cai(sequence: str, codon_weights: dict[str, float]) -> float:
277
277
  return math.exp(log_sum / count) if count else 0.0
278
278
 
279
279
 
280
+ def calculate_dinucleotide_score(
281
+ sequence: str,
282
+ cpg_weight: float = 0.0,
283
+ tpa_weight: float = 1.0,
284
+ ) -> float:
285
+ """Score dinucleotide avoidance.
286
+
287
+ Plant default: CpG inactive (cpg_weight=0.0), only TpA is penalized.
288
+ Mammalian opt-in: set cpg_weight=1.0 and tpa_weight=1.0 to penalize both.
289
+ """
290
+ from factorforge.engines.profile.utils import calculate_dinucleotide_ratio
291
+
292
+ if len(sequence) < 6:
293
+ return 1.0
294
+
295
+ total_weight = cpg_weight + tpa_weight
296
+ if total_weight == 0:
297
+ return 1.0
298
+
299
+ cpg_ratio = calculate_dinucleotide_ratio(sequence, "CG")
300
+ tpa_ratio = calculate_dinucleotide_ratio(sequence, "TA")
301
+ cpg_score = max(0.0, 1.0 - cpg_ratio / 2.0)
302
+ tpa_score = max(0.0, 1.0 - tpa_ratio / 2.0)
303
+ return (cpg_weight * cpg_score + tpa_weight * tpa_score) / total_weight
304
+
305
+
280
306
  def codon_usage_profile(sequence: str) -> dict[str, dict[str, float | int | str]]:
281
307
  """Return codon counts and frequencies for a DNA sequence."""
282
308
  codons = _codons(sequence)
@@ -13,7 +13,7 @@ def register_builtin_engines() -> None:
13
13
  "profile",
14
14
  RuleBasedOptimizer,
15
15
  metadata={
16
- "version": "3.1.8",
16
+ "version": "3.2.0",
17
17
  "engine_type": "profile_rule_based",
18
18
  "role": "stable_profile_engine",
19
19
  "stable": True,
@@ -5,7 +5,7 @@ Production system (2026)
5
5
  Plant-specific rule-based optimization
6
6
  """
7
7
 
8
- __version__ = "3.1.8"
8
+ __version__ = "3.2.0"
9
9
 
10
10
  from .optimizer import RuleBasedOptimizer
11
11
  from .pipeline import OptimizationPipeline
@@ -9,7 +9,7 @@ from factorforge.core.interfaces import OptimizationResult, OptimizerEngine
9
9
  from .exporter import SequenceExporter
10
10
  from .rules.reverse_translator import OptimizationProfile, ReverseTranslator
11
11
  from .rules.rule_engine import RuleEngine
12
- from .scoring import calculate_composite_score
12
+ from .scoring import calculate_composite_score, compute_mfe_evidence
13
13
  from .validator import InputValidator
14
14
 
15
15
 
@@ -17,7 +17,7 @@ class RuleBasedOptimizer(OptimizerEngine):
17
17
  """Profile-based rule optimization engine."""
18
18
 
19
19
  name = "Profile-based"
20
- version = "3.1.8"
20
+ version = "3.2.0"
21
21
 
22
22
  def __init__(self) -> None:
23
23
  self.validator = InputValidator()
@@ -30,6 +30,7 @@ class RuleBasedOptimizer(OptimizerEngine):
30
30
  sequence: str,
31
31
  profile: str | None = "balanced",
32
32
  host: str = "nbenthamiana",
33
+ seed: int | None = None,
33
34
  **kwargs: Any,
34
35
  ) -> OptimizationResult:
35
36
  """
@@ -91,7 +92,7 @@ class RuleBasedOptimizer(OptimizerEngine):
91
92
  candidates = [{"sequence": optimized_dna, "cai": cai, "gc": gc, "score": score}]
92
93
  else:
93
94
  candidates = translator.generate_candidates(
94
- processed_seq, profile=opt_profile, n=1
95
+ processed_seq, profile=opt_profile, n=1, seed=seed
95
96
  )
96
97
  if not candidates:
97
98
  raise ValueError("No candidates generated for input sequence.")
@@ -117,6 +118,10 @@ class RuleBasedOptimizer(OptimizerEngine):
117
118
  "score": candidates[0]["score"],
118
119
  "violations": sum(len(v) for v in scan_results.values()),
119
120
  }
121
+ # MFE provenance: expose whether MFE was actually computed so downstream
122
+ # artifacts (API response, Design Package) never report an uncomputed
123
+ # MFE as a misleading 0.0 (016 audit). Score value is unchanged.
124
+ metrics.update(compute_mfe_evidence(optimized_dna, profile=profile_value))
120
125
 
121
126
  return OptimizationResult(
122
127
  sequence=optimized_dna,
@@ -18,9 +18,14 @@ from factorforge.engines.profile.rules.reverse_translator import (
18
18
  ReverseTranslator,
19
19
  )
20
20
  from factorforge.engines.profile.rules.rule_engine import RuleEngine
21
- from factorforge.engines.profile.scoring import calculate_composite_score
21
+ from factorforge.engines.profile.scoring import (
22
+ calculate_composite_score,
23
+ compute_mfe_evidence,
24
+ )
22
25
  from factorforge.engines.profile.validator import InputValidator
26
+ from factorforge.analysis.metrics import translate_dna
23
27
  from factorforge.utils.construct_id import generate_construct_id
28
+ from factorforge.utils.sequence_validator import validate_cds_output
24
29
 
25
30
  logger = logging.getLogger(__name__)
26
31
 
@@ -48,7 +53,15 @@ class PipelineResult:
48
53
  "optimization_profile": self.metadata.get("profile", ""),
49
54
  "cai_score": round(metrics.get("cai", 0.0), 4),
50
55
  "gc_content_pct": round(metrics.get("gc", 0.0), 2),
51
- "mfe_kcal_mol": round(metrics.get("mfe", 0.0), 2),
56
+ # MFE provenance (016 audit): None when not computed (e.g. ViennaRNA
57
+ # unavailable) — never report an uncomputed MFE as a misleading 0.0.
58
+ "mfe_kcal_mol": (
59
+ round(metrics["mfe_kcal_mol"], 2)
60
+ if metrics.get("mfe_kcal_mol") is not None
61
+ else None
62
+ ),
63
+ "mfe_status": metrics.get("mfe_status", "not_computed"),
64
+ "mfe_used": metrics.get("mfe_used", False),
52
65
  "polya_signal_count": len(scan.get("polya", [])),
53
66
  "domestication_edits": len(dom.get("removed_sites", [])),
54
67
  "sequence_length_aa": len(self.sequence) // 3,
@@ -175,6 +188,7 @@ class OptimizationPipeline:
175
188
 
176
189
  if seq_type == "dna":
177
190
  optimized_dna = processed
191
+ expected_protein = translate_dna(processed).rstrip("*")
178
192
  cai = translator.calculate_cai(optimized_dna)
179
193
  gc = translator.calculate_gc_content(optimized_dna)
180
194
  score = calculate_composite_score(
@@ -182,6 +196,7 @@ class OptimizationPipeline:
182
196
  )
183
197
  candidate_metrics = {"cai": cai, "gc": gc, "score": score}
184
198
  else:
199
+ expected_protein = processed.rstrip("*")
185
200
  logger.debug(f"Generating candidates with profile: {opt_profile.value}")
186
201
  candidates = translator.generate_candidates(processed, profile=opt_profile, n=1)
187
202
  if not candidates:
@@ -251,7 +266,20 @@ class OptimizationPipeline:
251
266
 
252
267
  assembly_standard = kwargs.get("assembly_standard", "golden_gate")
253
268
  domestication = domesticator.domesticate(optimized_dna, standard=assembly_standard)
269
+ if not domestication.get("success", False):
270
+ unfixable = domestication.get("unfixable", [])
271
+ error = domestication.get("error")
272
+ detail = error or f"unfixable restriction sites: {unfixable}"
273
+ raise ValueError(f"Domestication failed for {assembly_standard}: {detail}")
274
+
254
275
  domesticated_sequence = domestication.get("domesticated_seq", optimized_dna)
276
+ final_validation = validate_cds_output(expected_protein, domesticated_sequence)
277
+ if not final_validation["passed"]:
278
+ raise ValueError(
279
+ "Final CDS validation failed: "
280
+ f"{final_validation['errors']} "
281
+ f"(aa_identity={final_validation['aa_identity']:.4f})"
282
+ )
255
283
 
256
284
  template_name = construct_template or self.construct_template
257
285
  if template_name:
@@ -269,6 +297,13 @@ class OptimizationPipeline:
269
297
  construct_record = None
270
298
  final_sequence = domesticated_sequence
271
299
 
300
+ # MFE provenance for the final output sequence (016 audit): record
301
+ # whether MFE was computed so export_features / Design Package never
302
+ # report an uncomputed MFE as 0.0.
303
+ candidate_metrics.update(
304
+ compute_mfe_evidence(domesticated_sequence, profile=effective_profile)
305
+ )
306
+
272
307
  metadata: dict[str, Any] = {
273
308
  "construct_id": generate_construct_id(),
274
309
  "profile": effective_profile,
@@ -278,6 +313,7 @@ class OptimizationPipeline:
278
313
  "validation": val_result,
279
314
  "scan_results": scan_results,
280
315
  "domestication": domestication,
316
+ "final_validation": final_validation,
281
317
  "metrics": candidate_metrics,
282
318
  "scan_mode": scan_mode,
283
319
  }
@@ -20,6 +20,18 @@ class Domesticator:
20
20
  - BioBricks (EcoRI, XbaI, SpeI, PstI)
21
21
  """
22
22
 
23
+ # Canonical Golden Gate Type IIS enzyme set, exported as GOLDEN_GATE_ENZYMES
24
+ # so tests/test_registry_production_sync.py::test_type_iis_sync can strictly
25
+ # compare it against the registry (single source of truth) instead of warning.
26
+ #
27
+ # BpiI and BbsI share the same GAAGAC Type IIS recognition/cut behavior in
28
+ # FactorForge's Golden Gate scanning context. The existing FactorForge
29
+ # production code and documentation consistently use BpiI as the canonical
30
+ # label; BbsI is a common synonym/vendor naming convention for the same
31
+ # scanning target. This is a naming normalization, not a biological
32
+ # threshold change. Order matches the registry value for stable comparison.
33
+ GOLDEN_GATE_ENZYMES: tuple[str, ...] = ("BsaI", "BpiI", "BsmBI")
34
+
23
35
  # Assembly standard definitions
24
36
  ASSEMBLY_STANDARDS: dict[str, dict[str, Any]] = {
25
37
  "golden_gate": {
@@ -551,8 +551,8 @@ class ReverseTranslator:
551
551
  """Apply N-terminal codon ramp for co-translational folding.
552
552
 
553
553
  Replaces the first `ramp_codons` codons with lower-frequency synonymous
554
- codons (bottom 50% by frequency) to slow the ribosome at the N-terminus.
555
- Single-codon amino acids (Met, Trp) are left unchanged.
554
+ codons (bottom 25% by frequency; cutoff = 3*len//4) to slow the ribosome
555
+ at the N-terminus. Single-codon amino acids (Met, Trp) are left unchanged.
556
556
 
557
557
  TODO: ramp profile is currently not in VALID_PROFILES (not publicly accessible).
558
558
  Before re-enabling, revisit ramp_codons=50:
@@ -671,6 +671,7 @@ class ReverseTranslator:
671
671
  protein_seq: str,
672
672
  profile: OptimizationProfile = OptimizationProfile.BALANCED,
673
673
  n: int = 5,
674
+ seed: int | None = None,
674
675
  **kwargs: Any,
675
676
  ) -> list[dict[str, Any]]:
676
677
  """
@@ -697,6 +698,9 @@ class ReverseTranslator:
697
698
  if n < 1:
698
699
  raise ValueError("n must be >= 1")
699
700
 
701
+ # Seed before any candidate generation (covers both n=1 fast path and n>1).
702
+ random.seed(seed if seed is not None else secrets.randbits(32))
703
+
700
704
  def _build_candidate() -> dict[str, Any]:
701
705
  dna_seq = self.reverse_translate(protein_seq, profile, **kwargs)
702
706
  cai = self.calculate_cai(dna_seq)
@@ -720,7 +724,6 @@ class ReverseTranslator:
720
724
 
721
725
  candidates: list[dict[str, Any]] = []
722
726
  last_error: Exception | None = None
723
- random.seed(secrets.randbits(32))
724
727
 
725
728
  for attempt in range(n):
726
729
  try:
@@ -354,13 +354,20 @@ class RuleEngine:
354
354
  max_gc: float = 75,
355
355
  ) -> list[dict[str, Any]]:
356
356
  """
357
- Detect extreme GC regions
357
+ Detect extreme GC regions in a sliding local window.
358
+
359
+ This is a LOCAL synthesis/extreme-window guard (default 25-75% over a
360
+ 50 bp window), NOT the global GC target. Global GC is governed separately
361
+ by the scoring band (GC_OPT_MIN/MAX, ~55-65%) and the API/DP gc_min/gc_max
362
+ constraints. The wide 25-75% band intentionally flags only synthesis-hostile
363
+ local windows; narrowing it toward the global optimum would raise false
364
+ positives against the engine's own output distribution (analysis 004: 55-71%).
358
365
 
359
366
  Args:
360
367
  seq: DNA sequence
361
368
  window: Window size (bp)
362
- min_gc: Minimum GC% threshold
363
- max_gc: Maximum GC% threshold
369
+ min_gc: Minimum local GC% threshold (synthesis guard, not global target)
370
+ max_gc: Maximum local GC% threshold (synthesis guard, not global target)
364
371
 
365
372
  Returns:
366
373
  List of violations
@@ -34,6 +34,8 @@ class ScoringConfig:
34
34
  w_gc: float = 0.3
35
35
  w_mfe: float = 0.2
36
36
  w_dinuc: float = 0.0 # CpG/TpA dinucleotide penalty (opt-in, default off)
37
+ cpg_weight: float = 0.0 # plant default: CpG inactive
38
+ tpa_weight: float = 1.0 # plant default: TpA active
37
39
  w_syncodonlm: float = 0.0 # SynCodonLM quality score (opt-in, default off)
38
40
  gc_opt: float = GC_OPT_MID # no longer used by calculate_composite_score (superseded by
39
41
  # gc_min/gc_max band); retained for external API compatibility
@@ -187,31 +189,19 @@ def gc_band_score(
187
189
  return max(0.0, 1.0 - distance / decay_width)
188
190
 
189
191
 
190
- def calculate_dinucleotide_score(sequence: str) -> float:
191
- """Calculate a dinucleotide avoidance score (0-1, higher = fewer CpG/TpA).
192
-
193
- Combines CpG and TpA observed/expected ratios. A sequence with no CpG
194
- and no TpA scores 1.0; high density scores toward 0.0.
195
-
196
- Args:
197
- sequence: DNA sequence.
192
+ def calculate_dinucleotide_score(
193
+ sequence: str,
194
+ cpg_weight: float = 0.0,
195
+ tpa_weight: float = 1.0,
196
+ ) -> float:
197
+ """Score dinucleotide avoidance.
198
198
 
199
- Returns:
200
- Dinucleotide avoidance score (0-1).
199
+ Plant default: CpG inactive (cpg_weight=0.0), only TpA is penalized.
200
+ Mammalian opt-in: set cpg_weight=1.0 and tpa_weight=1.0 to penalize both.
201
201
  """
202
- from factorforge.engines.profile.utils import calculate_dinucleotide_ratio
202
+ from factorforge.analysis.metrics import calculate_dinucleotide_score as _score
203
203
 
204
- if len(sequence) < 6:
205
- return 1.0
206
-
207
- cpg_ratio = calculate_dinucleotide_ratio(sequence, "CG")
208
- tpa_ratio = calculate_dinucleotide_ratio(sequence, "TA")
209
-
210
- # Score: 1.0 when ratio=0, 0.0 when ratio>=2.0
211
- cpg_score = max(0.0, 1.0 - cpg_ratio / 2.0)
212
- tpa_score = max(0.0, 1.0 - tpa_ratio / 2.0)
213
-
214
- return (cpg_score + tpa_score) / 2.0
204
+ return _score(sequence, cpg_weight=cpg_weight, tpa_weight=tpa_weight)
215
205
 
216
206
 
217
207
  def calculate_composite_score(
@@ -279,7 +269,11 @@ def calculate_composite_score(
279
269
  dinuc_score = 0.5 # neutral default
280
270
  actual_w_dinuc = config.w_dinuc
281
271
  if actual_w_dinuc > 0 and sequence is not None:
282
- dinuc_score = calculate_dinucleotide_score(sequence)
272
+ dinuc_score = calculate_dinucleotide_score(
273
+ sequence,
274
+ cpg_weight=config.cpg_weight,
275
+ tpa_weight=config.tpa_weight,
276
+ )
283
277
  elif actual_w_dinuc > 0:
284
278
  actual_w_dinuc = 0.0 # Cannot compute without sequence
285
279
 
@@ -308,3 +302,50 @@ def calculate_composite_score(
308
302
  )
309
303
 
310
304
  return round(score, 3)
305
+
306
+
307
+ def compute_mfe_evidence(
308
+ sequence: str | None,
309
+ config: ScoringConfig | None = None,
310
+ profile: str | None = None,
311
+ ) -> dict[str, Any]:
312
+ """Return MFE provenance metadata for a scored sequence.
313
+
314
+ Mirrors the MFE branch of ``calculate_composite_score`` WITHOUT changing the
315
+ score. Its purpose is honesty: when MFE is not computed (e.g. ViennaRNA is
316
+ unavailable in the deployment, as on Vercel), callers must be able to tell
317
+ that ``mfe_kcal_mol`` is absent rather than a genuine 0.0.
318
+
319
+ Returns a dict with:
320
+ mfe_kcal_mol: float | None (None when not computed)
321
+ mfe_status: "computed" | "not_computed"
322
+ mfe_used: bool (whether MFE contributed to the score)
323
+ mfe_warning: str | None (reason when not used)
324
+ score_components: {cai_used, gc_used, mfe_used}
325
+ """
326
+ if config is None:
327
+ profile_name = (profile or "balanced").lower()
328
+ config = PROFILE_SCORING_CONFIGS.get(profile_name) or PROFILE_SCORING_CONFIGS["balanced"]
329
+
330
+ mfe_value: float | None = None
331
+ reason: str | None = None
332
+
333
+ if not config.use_mfe:
334
+ reason = "MFE scoring is disabled for this profile."
335
+ elif sequence is None:
336
+ reason = "MFE was not computed because no sequence was provided."
337
+ elif not _check_vienna_available():
338
+ reason = "MFE was not computed because ViennaRNA is unavailable in this environment."
339
+ else:
340
+ mfe_value = calculate_mfe(sequence)
341
+ if mfe_value is None:
342
+ reason = "MFE computation failed for this sequence."
343
+
344
+ mfe_used = mfe_value is not None
345
+ return {
346
+ "mfe_kcal_mol": round(mfe_value, 2) if mfe_used else None,
347
+ "mfe_status": "computed" if mfe_used else "not_computed",
348
+ "mfe_used": mfe_used,
349
+ "mfe_warning": None if mfe_used else reason,
350
+ "score_components": {"cai_used": True, "gc_used": True, "mfe_used": mfe_used},
351
+ }