factorforge-cds 3.1.9__tar.gz → 3.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/PKG-INFO +13 -33
  2. factorforge_cds-3.2.0/README.md +88 -0
  3. {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/pyproject.toml +2 -1
  4. {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/__init__.py +1 -1
  5. {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/analysis/feasibility.py +15 -6
  6. {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/engines/__init__.py +1 -1
  7. {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/engines/profile/__init__.py +1 -1
  8. {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/engines/profile/optimizer.py +3 -2
  9. {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/engines/profile/rules/domesticator.py +12 -0
  10. {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/engines/profile/rules/reverse_translator.py +4 -1
  11. {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/engines/profile/validator.py +12 -3
  12. factorforge_cds-3.2.0/src/factorforge/io/__init__.py +14 -0
  13. factorforge_cds-3.2.0/src/factorforge/io/fasta.py +132 -0
  14. factorforge_cds-3.2.0/src/factorforge/io/validation.py +48 -0
  15. factorforge_cds-3.2.0/src/factorforge/registry/__init__.py +0 -0
  16. factorforge_cds-3.2.0/src/factorforge/registry/registry_loader.py +18 -0
  17. {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/schemas/design_package.py +18 -1
  18. factorforge_cds-3.2.0/src/factorforge/schemas/design_package.schema.json +337 -0
  19. {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge_cds.egg-info/PKG-INFO +13 -33
  20. {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge_cds.egg-info/SOURCES.txt +25 -1
  21. {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge_cds.egg-info/requires.txt +1 -0
  22. factorforge_cds-3.2.0/tests/test_baselines.py +32 -0
  23. factorforge_cds-3.2.0/tests/test_benchmark_codon_table_metadata.py +142 -0
  24. factorforge_cds-3.2.0/tests/test_benchmark_regression.py +120 -0
  25. factorforge_cds-3.2.0/tests/test_benchmark_scoring.py +15 -0
  26. factorforge_cds-3.2.0/tests/test_benchmark_smoke.py +20 -0
  27. factorforge_cds-3.2.0/tests/test_cai.py +13 -0
  28. factorforge_cds-3.2.0/tests/test_codon_table_manifest.py +105 -0
  29. factorforge_cds-3.2.0/tests/test_design_package_schema.py +44 -0
  30. factorforge_cds-3.2.0/tests/test_design_package_semantics.py +38 -0
  31. factorforge_cds-3.2.0/tests/test_design_package_serialization.py +31 -0
  32. factorforge_cds-3.2.0/tests/test_fasta_io.py +67 -0
  33. factorforge_cds-3.2.0/tests/test_gc_content.py +13 -0
  34. factorforge_cds-3.2.0/tests/test_host_profile_metadata.py +63 -0
  35. factorforge_cds-3.2.0/tests/test_iupac_validation.py +39 -0
  36. factorforge_cds-3.2.0/tests/test_no_raw_sequence_logging.py +45 -0
  37. factorforge_cds-3.2.0/tests/test_openbio_missing_metric_contract.py +36 -0
  38. factorforge_cds-3.2.0/tests/test_parameter_registry.py +77 -0
  39. factorforge_cds-3.2.0/tests/test_registry_production_sync.py +76 -0
  40. {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/tests/test_restriction_sites.py +17 -0
  41. factorforge_cds-3.2.0/tests/test_translation_integrity.py +19 -0
  42. factorforge_cds-3.1.9/README.md +0 -109
  43. factorforge_cds-3.1.9/src/factorforge/schemas/design_package.schema.json +0 -373
  44. {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/LICENSE +0 -0
  45. {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/setup.cfg +0 -0
  46. {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/__main__.py +0 -0
  47. {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/analysis/__init__.py +0 -0
  48. {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/analysis/metrics.py +0 -0
  49. {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/cli/__init__.py +0 -0
  50. {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/cli/legacy_cli.py +0 -0
  51. {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/cli/main.py +0 -0
  52. {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/core/interfaces/__init__.py +0 -0
  53. {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/core/interfaces/exporter.py +0 -0
  54. {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/core/interfaces/optimizer.py +0 -0
  55. {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/core/interfaces/validator.py +0 -0
  56. {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/data/nbenthamiana_codons.json +0 -0
  57. {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/data/nbenthamiana_golden_set.json +0 -0
  58. {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/data/ntabacum_codons.json +0 -0
  59. {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/data/templates/high_expression.json +0 -0
  60. {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/data/templates/standard_expression.json +0 -0
  61. {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/data/wolffia_globosa_codons.json +0 -0
  62. {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/database.py +0 -0
  63. {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/engines/profile/codon_table_builder.py +0 -0
  64. {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/engines/profile/construct_builder.py +0 -0
  65. {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/engines/profile/exporter.py +0 -0
  66. {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/engines/profile/pipeline.py +0 -0
  67. {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/engines/profile/rules/__init__.py +0 -0
  68. {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/engines/profile/rules/rule_engine.py +0 -0
  69. {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/engines/profile/scoring.py +0 -0
  70. {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/engines/profile/scoring_ml.py +0 -0
  71. {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/engines/profile/utils.py +0 -0
  72. {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/engines/registry.py +0 -0
  73. {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/schemas/__init__.py +0 -0
  74. {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/utils/__init__.py +0 -0
  75. {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/utils/construct_id.py +0 -0
  76. {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/utils/exceptions.py +0 -0
  77. {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/utils/restriction_sites.py +0 -0
  78. {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/utils/sequence_validator.py +0 -0
  79. {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/utils/validation.py +0 -0
  80. {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/validation/__init__.py +0 -0
  81. {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/validation/cli.py +0 -0
  82. {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge/validation/package_generator.py +0 -0
  83. {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge_cds.egg-info/dependency_links.txt +0 -0
  84. {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge_cds.egg-info/entry_points.txt +0 -0
  85. {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/src/factorforge_cds.egg-info/top_level.txt +0 -0
  86. {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/tests/test_database.py +0 -0
  87. {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/tests/test_legacy_cli.py +0 -0
  88. {factorforge_cds-3.1.9 → factorforge_cds-3.2.0}/tests/test_sequence_validator.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: factorforge-cds
3
- Version: 3.1.9
3
+ Version: 3.2.0
4
4
  Summary: FactorForge - open-source constraint-based CDS design engine by Eijex.
5
5
  Author-email: Eijex <eijex.lab@gmail.com>
6
6
  License-Expression: AGPL-3.0-only
@@ -20,6 +20,7 @@ Requires-Dist: requests>=2.31
20
20
  Requires-Dist: click>=8.0
21
21
  Requires-Dist: pydantic>=2.0
22
22
  Provides-Extra: dev
23
+ Requires-Dist: jsonschema>=4.0; extra == "dev"
23
24
  Requires-Dist: pytest>=7.0; extra == "dev"
24
25
  Requires-Dist: pytest-cov>=4.0; extra == "dev"
25
26
  Requires-Dist: ruff>=0.1; extra == "dev"
@@ -31,7 +32,7 @@ Dynamic: license-file
31
32
 
32
33
  # FactorForge
33
34
 
34
- **Open-source constraint-based CDS design engine for plant expression workflows, with initial focus on *Nicotiana benthamiana* and Tobacco BY-2.**
35
+ **Open-source constraint-based CDS design engine for sequence-level CDS design, with primary support for *Nicotiana benthamiana* (Tobacco BY-2: experimental).**
35
36
 
36
37
  [![License](https://img.shields.io/badge/license-AGPL--3.0-blue.svg)](LICENSE)
37
38
  [![Python](https://img.shields.io/badge/python-3.10%2B-blue.svg)](https://www.python.org/)
@@ -41,7 +42,7 @@ Dynamic: license-file
41
42
  [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.20407331.svg)](https://doi.org/10.5281/zenodo.20407331)
42
43
  [![Web App](https://img.shields.io/badge/web-factorforge.eijex.com-brightgreen.svg)](https://factorforge.eijex.com)
43
44
 
44
- FactorForge optimizes protein sequences into host-compatible CDS by maximizing CAI, controlling GC content, eliminating PolyA signals, and producing MoClo/Golden Gate-ready constructs. Supports *N. benthamiana* (agroinfiltration) and Tobacco BY-2 (`--host by2`, bioreactor/cGMP workflows).
45
+ FactorForge performs profile-guided CDS design with CAI/GC metrics, PolyA-signal screening, and Golden Gate/MoClo-aware checks. Primary support: *N. benthamiana* (agroinfiltration). Experimental host context: Tobacco BY-2 (`--host by2`).
45
46
 
46
47
  **→ [Full Documentation](https://eijex.github.io/factorforge-cds/)**
47
48
 
@@ -65,7 +66,7 @@ Or use the **[web app](https://factorforge.eijex.com)** — no installation requ
65
66
  | **Web App** | No installation, demo & light use | [factorforge.eijex.com](https://factorforge.eijex.com) |
66
67
  | **CLI / Python** | Local use, batch processing, data privacy | `pip install factorforge-cds` |
67
68
  | **Docker** | Full web interface locally | `docker pull ghcr.io/eijex/factorforge-cds:latest` |
68
- | **Eijex MCP** | AI agent access (Claude Code, Cursor) | [mcp.eijex.com](https://mcp.eijex.com) |
69
+ | **Eijex MCP** | MCP-compatible agent access | [mcp.eijex.com](https://mcp.eijex.com) |
69
70
 
70
71
  ---
71
72
 
@@ -82,59 +83,38 @@ and are not imported by the installed package or exposed as supported engines.
82
83
 
83
84
  ---
84
85
 
85
- ## Development History
86
-
87
- FactorForge has gone through several implementation generations before the current public release:
88
-
89
- | Generation | Status | Description |
90
- |-----------|--------|-------------|
91
- | **v1** — NBent_OptiCodon | Internal | Thesis-derived codon optimization baseline for *N. benthamiana* |
92
- | **v2** — Rule-Based Engine | Internal → Production | Deterministic, constraint-aware design engine; became the foundation for the public release |
93
- | **v3-alpha** — ML Prototype | Archived | ML-based design attempt; performance was insufficient for production use; preserved under `archive/v3-ml-prototype/` |
94
- | **v3.0+** — Current release | Public | Open-source release of the matured v2 engine under `factorforge.engines.profile` |
95
- | **v3.7+** — ML Engine | Planned | ML-based design as `--engine ml`; added once sufficient wet-lab data is available |
96
-
97
- The `archive/` directory preserves all three earlier tracks for provenance. None are installed or exposed by the current package.
98
-
99
- ---
100
-
101
86
  ## ⚠️ Validation Status
102
87
 
103
- FactorForge predictions are **in-silico only** and have not been experimentally validated in wet-lab conditions. See [Validation](https://eijex.github.io/factorforge-cds/validation/) and [VALIDATION.md](VALIDATION.md).
88
+ FactorForge outputs are **in-silico only** and have not been experimentally validated in wet-lab conditions. See [Validation](https://eijex.github.io/factorforge-cds/validation/) and [VALIDATION.md](VALIDATION.md).
104
89
 
105
90
  ---
106
91
 
107
92
  ## Citing
108
93
 
109
94
  ```
110
- FactorForge v3.1.9 (2026). Open-source constraint-based CDS design engine.
95
+ FactorForge v3.2.0 (2026). Open-source constraint-based CDS design engine.
111
96
  Eijex. https://github.com/eijex/factorforge-cds
112
97
  ```
113
98
 
114
- *A citable publication is in preparation.*
115
-
116
99
  ---
117
100
 
118
- ## Contributors
101
+ ## Maintainer
119
102
 
120
- | | Name | Role |
121
- |--|------|------|
122
- | 👤 | Mun-Kyu Kim ([@eijex](https://github.com/eijex)) | Author & maintainer |
123
- | 🤖 | Claude (Anthropic) | Design, analysis, planning |
124
- | 🤖 | Codex (OpenAI) | Implementation |
103
+ Mun-Kyu Kim ([@eijex](https://github.com/eijex))
125
104
 
126
105
  ## License
127
106
 
128
107
  GNU Affero General Public License v3.0 — see [LICENSE](LICENSE).
129
108
 
130
- **Disclaimer:** FactorForge is provided for research purposes only. Predictions are computational and have not been experimentally validated.
109
+ **Disclaimer:** FactorForge is provided for research purposes only. Outputs are computational and have not been experimentally validated.
131
110
 
132
111
  ---
133
112
 
134
113
  ## Get in Touch
135
114
 
136
115
  - **Docs** — [eijex.github.io/factorforge-cds](https://eijex.github.io/factorforge-cds/)
137
- - **Wet-lab Results** — [Submit via Google Form](https://docs.google.com/forms/d/e/1FAIpQLSeSx-wYvF6YwHhSPdLMl-L44frCugdm25X_eDz50OaqTD66qA/viewform?usp=header) (recommended) or [GitHub Issue](https://github.com/eijex/factorforge-cds/issues/new?template=wet_lab_result.yml)
116
+ - **Wet-lab Results** — Public-safe validation summaries are welcome. Do not submit raw sequences, confidential construct details, internal batch IDs, patient data, private contact information, exact process parameters, or confidential partner/customer data. See [VALIDATION.md](VALIDATION.md) before submitting.
138
117
  - **GitHub Issues** — bugs, features: [github.com/eijex/factorforge-cds/issues](https://github.com/eijex/factorforge-cds/issues)
139
118
  - **Email** — eijex.lab@gmail.com
140
- - **Web** — [factorforge.eijex.com](https://factorforge.eijex.com)
119
+ - **FactorForge** — [factorforge.eijex.com](https://factorforge.eijex.com)
120
+ - **Lab** — [www.eijex.com](https://www.eijex.com)
@@ -0,0 +1,88 @@
1
+ # FactorForge
2
+
3
+ **Open-source constraint-based CDS design engine for sequence-level CDS design, with primary support for *Nicotiana benthamiana* (Tobacco BY-2: experimental).**
4
+
5
+ [![License](https://img.shields.io/badge/license-AGPL--3.0-blue.svg)](LICENSE)
6
+ [![Python](https://img.shields.io/badge/python-3.10%2B-blue.svg)](https://www.python.org/)
7
+ [![PyPI](https://img.shields.io/pypi/v/factorforge-cds.svg)](https://pypi.org/project/factorforge-cds/)
8
+ [![CI](https://github.com/eijex/factorforge-cds/actions/workflows/ci.yml/badge.svg)](https://github.com/eijex/factorforge-cds/actions/workflows/ci.yml)
9
+ [![codecov](https://codecov.io/gh/eijex/factorforge-cds/branch/main/graph/badge.svg)](https://codecov.io/gh/eijex/factorforge-cds)
10
+ [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.20407331.svg)](https://doi.org/10.5281/zenodo.20407331)
11
+ [![Web App](https://img.shields.io/badge/web-factorforge.eijex.com-brightgreen.svg)](https://factorforge.eijex.com)
12
+
13
+ FactorForge performs profile-guided CDS design with CAI/GC metrics, PolyA-signal screening, and Golden Gate/MoClo-aware checks. Primary support: *N. benthamiana* (agroinfiltration). Experimental host context: Tobacco BY-2 (`--host by2`).
14
+
15
+ **→ [Full Documentation](https://eijex.github.io/factorforge-cds/)**
16
+
17
+ ---
18
+
19
+ ## Quick Start
20
+
21
+ ```bash
22
+ pip install factorforge-cds
23
+ factorforge optimize my_protein.fasta -o output.fasta
24
+ ```
25
+
26
+ Or use the **[web app](https://factorforge.eijex.com)** — no installation required.
27
+
28
+ ---
29
+
30
+ ## Access Options
31
+
32
+ | Method | Description | Link |
33
+ |--------|-------------|------|
34
+ | **Web App** | No installation, demo & light use | [factorforge.eijex.com](https://factorforge.eijex.com) |
35
+ | **CLI / Python** | Local use, batch processing, data privacy | `pip install factorforge-cds` |
36
+ | **Docker** | Full web interface locally | `docker pull ghcr.io/eijex/factorforge-cds:latest` |
37
+ | **Eijex MCP** | MCP-compatible agent access | [mcp.eijex.com](https://mcp.eijex.com) |
38
+
39
+ ---
40
+
41
+ ## Repository Structure
42
+
43
+ The supported production engine is the deterministic profile engine under:
44
+
45
+ ```text
46
+ src/factorforge/engines/profile/
47
+ ```
48
+
49
+ Historical implementation tracks are preserved under `archive/` for provenance
50
+ and are not imported by the installed package or exposed as supported engines.
51
+
52
+ ---
53
+
54
+ ## ⚠️ Validation Status
55
+
56
+ FactorForge outputs are **in-silico only** and have not been experimentally validated in wet-lab conditions. See [Validation](https://eijex.github.io/factorforge-cds/validation/) and [VALIDATION.md](VALIDATION.md).
57
+
58
+ ---
59
+
60
+ ## Citing
61
+
62
+ ```
63
+ FactorForge v3.2.0 (2026). Open-source constraint-based CDS design engine.
64
+ Eijex. https://github.com/eijex/factorforge-cds
65
+ ```
66
+
67
+ ---
68
+
69
+ ## Maintainer
70
+
71
+ Mun-Kyu Kim ([@eijex](https://github.com/eijex))
72
+
73
+ ## License
74
+
75
+ GNU Affero General Public License v3.0 — see [LICENSE](LICENSE).
76
+
77
+ **Disclaimer:** FactorForge is provided for research purposes only. Outputs are computational and have not been experimentally validated.
78
+
79
+ ---
80
+
81
+ ## Get in Touch
82
+
83
+ - **Docs** — [eijex.github.io/factorforge-cds](https://eijex.github.io/factorforge-cds/)
84
+ - **Wet-lab Results** — Public-safe validation summaries are welcome. Do not submit raw sequences, confidential construct details, internal batch IDs, patient data, private contact information, exact process parameters, or confidential partner/customer data. See [VALIDATION.md](VALIDATION.md) before submitting.
85
+ - **GitHub Issues** — bugs, features: [github.com/eijex/factorforge-cds/issues](https://github.com/eijex/factorforge-cds/issues)
86
+ - **Email** — eijex.lab@gmail.com
87
+ - **FactorForge** — [factorforge.eijex.com](https://factorforge.eijex.com)
88
+ - **Lab** — [www.eijex.com](https://www.eijex.com)
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "factorforge-cds"
7
- version = "3.1.9"
7
+ version = "3.2.0"
8
8
  description = "FactorForge - open-source constraint-based CDS design engine by Eijex."
9
9
  readme = "README.md"
10
10
  license = "AGPL-3.0-only"
@@ -28,6 +28,7 @@ dependencies = [
28
28
 
29
29
  [project.optional-dependencies]
30
30
  dev = [
31
+ "jsonschema>=4.0",
31
32
  "pytest>=7.0",
32
33
  "pytest-cov>=4.0",
33
34
  "ruff>=0.1",
@@ -4,7 +4,7 @@ FactorForge - Codon Optimization Platform
4
4
  profile: constraint-aware rule/profile engine
5
5
  """
6
6
 
7
- __version__ = "3.1.9"
7
+ __version__ = "3.2.0"
8
8
  __author__ = "Eijex"
9
9
 
10
10
  # Auto-register engines (safe when running from source tree)
@@ -14,6 +14,16 @@ from factorforge.analysis.metrics import (
14
14
  )
15
15
 
16
16
 
17
+ # Defaults calibrated to nbenthamiana profile engine output distribution
18
+ # (analysis 004, n=49): avg CAI=0.76, avg GC=60.1% (range 55-71%).
19
+ # DEFAULT_CAI_TARGET=0.82 aligns with industry practice (>0.8) and is achievable.
20
+ # Exported as named constants so tests/test_registry_production_sync.py can
21
+ # strictly compare them against the registry (single source of truth).
22
+ DEFAULT_CAI_TARGET: float = 0.82
23
+ DEFAULT_GC_LOW: float = 55.0
24
+ DEFAULT_GC_HIGH: float = 65.0
25
+
26
+
17
27
  AA_TO_CODONS: dict[str, list[str]] = {}
18
28
  for _codon, _aa in STANDARD_GENETIC_CODE.items():
19
29
  if _aa == "*":
@@ -88,9 +98,9 @@ def _reconstruct_sequence(
88
98
  def analyze_feasibility(
89
99
  protein_sequence: str,
90
100
  codon_weights: dict[str, float],
91
- target_cai: float = 0.82,
92
- target_gc_low: float = 55.0,
93
- target_gc_high: float = 65.0,
101
+ target_cai: float = DEFAULT_CAI_TARGET,
102
+ target_gc_low: float = DEFAULT_GC_LOW,
103
+ target_gc_high: float = DEFAULT_GC_HIGH,
94
104
  gc_ranges: list[tuple[float, float]] | None = None,
95
105
  ) -> dict[str, Any]:
96
106
  """Compute exact CAI/GC feasibility over synonymous codon choices.
@@ -99,9 +109,8 @@ def analyze_feasibility(
99
109
  global GC count. This is exact for global GC and CAI under the supplied
100
110
  codon weights.
101
111
 
102
- Defaults calibrated to nbenthamiana profile engine output distribution
103
- (analysis 004, n=49): avg CAI=0.76, avg GC=60.1% (range 55-71%).
104
- target_cai=0.82 aligns with industry practice (>0.8) and is achievable.
112
+ See module-level DEFAULT_CAI_TARGET / DEFAULT_GC_LOW / DEFAULT_GC_HIGH for
113
+ the calibration rationale (analysis 004, n=49).
105
114
  """
106
115
  protein = "".join(protein_sequence.upper().split()).rstrip("*")
107
116
  if not protein:
@@ -13,7 +13,7 @@ def register_builtin_engines() -> None:
13
13
  "profile",
14
14
  RuleBasedOptimizer,
15
15
  metadata={
16
- "version": "3.1.9",
16
+ "version": "3.2.0",
17
17
  "engine_type": "profile_rule_based",
18
18
  "role": "stable_profile_engine",
19
19
  "stable": True,
@@ -5,7 +5,7 @@ Production system (2026)
5
5
  Plant-specific rule-based optimization
6
6
  """
7
7
 
8
- __version__ = "3.1.9"
8
+ __version__ = "3.2.0"
9
9
 
10
10
  from .optimizer import RuleBasedOptimizer
11
11
  from .pipeline import OptimizationPipeline
@@ -17,7 +17,7 @@ class RuleBasedOptimizer(OptimizerEngine):
17
17
  """Profile-based rule optimization engine."""
18
18
 
19
19
  name = "Profile-based"
20
- version = "3.1.9"
20
+ version = "3.2.0"
21
21
 
22
22
  def __init__(self) -> None:
23
23
  self.validator = InputValidator()
@@ -30,6 +30,7 @@ class RuleBasedOptimizer(OptimizerEngine):
30
30
  sequence: str,
31
31
  profile: str | None = "balanced",
32
32
  host: str = "nbenthamiana",
33
+ seed: int | None = None,
33
34
  **kwargs: Any,
34
35
  ) -> OptimizationResult:
35
36
  """
@@ -91,7 +92,7 @@ class RuleBasedOptimizer(OptimizerEngine):
91
92
  candidates = [{"sequence": optimized_dna, "cai": cai, "gc": gc, "score": score}]
92
93
  else:
93
94
  candidates = translator.generate_candidates(
94
- processed_seq, profile=opt_profile, n=1
95
+ processed_seq, profile=opt_profile, n=1, seed=seed
95
96
  )
96
97
  if not candidates:
97
98
  raise ValueError("No candidates generated for input sequence.")
@@ -20,6 +20,18 @@ class Domesticator:
20
20
  - BioBricks (EcoRI, XbaI, SpeI, PstI)
21
21
  """
22
22
 
23
+ # Canonical Golden Gate Type IIS enzyme set, exported as GOLDEN_GATE_ENZYMES
24
+ # so tests/test_registry_production_sync.py::test_type_iis_sync can strictly
25
+ # compare it against the registry (single source of truth) instead of warning.
26
+ #
27
+ # BpiI and BbsI share the same GAAGAC Type IIS recognition/cut behavior in
28
+ # FactorForge's Golden Gate scanning context. The existing FactorForge
29
+ # production code and documentation consistently use BpiI as the canonical
30
+ # label; BbsI is a common synonym/vendor naming convention for the same
31
+ # scanning target. This is a naming normalization, not a biological
32
+ # threshold change. Order matches the registry value for stable comparison.
33
+ GOLDEN_GATE_ENZYMES: tuple[str, ...] = ("BsaI", "BpiI", "BsmBI")
34
+
23
35
  # Assembly standard definitions
24
36
  ASSEMBLY_STANDARDS: dict[str, dict[str, Any]] = {
25
37
  "golden_gate": {
@@ -671,6 +671,7 @@ class ReverseTranslator:
671
671
  protein_seq: str,
672
672
  profile: OptimizationProfile = OptimizationProfile.BALANCED,
673
673
  n: int = 5,
674
+ seed: int | None = None,
674
675
  **kwargs: Any,
675
676
  ) -> list[dict[str, Any]]:
676
677
  """
@@ -697,6 +698,9 @@ class ReverseTranslator:
697
698
  if n < 1:
698
699
  raise ValueError("n must be >= 1")
699
700
 
701
+ # Seed before any candidate generation (covers both n=1 fast path and n>1).
702
+ random.seed(seed if seed is not None else secrets.randbits(32))
703
+
700
704
  def _build_candidate() -> dict[str, Any]:
701
705
  dna_seq = self.reverse_translate(protein_seq, profile, **kwargs)
702
706
  cai = self.calculate_cai(dna_seq)
@@ -720,7 +724,6 @@ class ReverseTranslator:
720
724
 
721
725
  candidates: list[dict[str, Any]] = []
722
726
  last_error: Exception | None = None
723
- random.seed(secrets.randbits(32))
724
727
 
725
728
  for attempt in range(n):
726
729
  try:
@@ -90,14 +90,23 @@ class InputValidator:
90
90
  # Analyze character set
91
91
  unique_chars = set(clean_seq)
92
92
 
93
- # DNA: only ATGC or ATGC + IUPAC codes
94
- if unique_chars <= (self.DNA_BASES | self.AMBIGUOUS_DNA):
93
+ # 1. Pure ATGC unambiguously DNA
94
+ if unique_chars <= self.DNA_BASES:
95
95
  return SequenceType.DNA
96
96
 
97
- # Protein: amino acid characters
97
+ # 2. Protein check BEFORE ambiguous DNA.
98
+ # IUPAC ambiguous codes (N/R/Y/S/W/K/M/B/D/H/V) overlap with amino acid
99
+ # single-letter codes. When a sequence contains only overlapping characters,
100
+ # protein interpretation takes priority — the optimizer's primary input is
101
+ # protein → CDS. Users passing ambiguous DNA for re-domestication should
102
+ # use FASTA format with a header line.
98
103
  if unique_chars <= (self.STANDARD_AA | set(self.AMBIGUOUS_AA.keys())):
99
104
  return SequenceType.PROTEIN
100
105
 
106
+ # 3. DNA with IUPAC ambiguous bases (only reached if non-protein chars present)
107
+ if unique_chars <= (self.DNA_BASES | self.AMBIGUOUS_DNA):
108
+ return SequenceType.DNA
109
+
101
110
  return SequenceType.UNKNOWN
102
111
 
103
112
  def validate(self, sequence: str, auto_fix: bool = False) -> dict[str, Any]:
@@ -0,0 +1,14 @@
1
+ """Privacy-aware native sequence I/O helpers."""
2
+
3
+ from .fasta import FastaRecord, format_fasta, parse_fasta, read_fasta, write_fasta
4
+ from .validation import SequenceValidationError, validate_sequence
5
+
6
+ __all__ = [
7
+ "FastaRecord",
8
+ "SequenceValidationError",
9
+ "format_fasta",
10
+ "parse_fasta",
11
+ "read_fasta",
12
+ "validate_sequence",
13
+ "write_fasta",
14
+ ]
@@ -0,0 +1,132 @@
1
+ """Small native FASTA reader/writer with privacy-safe output headers."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+ from dataclasses import dataclass, field
7
+ from pathlib import Path
8
+ from typing import Iterable, Mapping
9
+
10
+ from .validation import validate_sequence
11
+
12
+ HEADER_ALLOWLIST = ("engine", "host_profile", "profile", "sequence_hash")
13
+ BLOCKED_HEADER_TERMS = (
14
+ "plantform",
15
+ "confidential",
16
+ "private",
17
+ "secret",
18
+ "partner",
19
+ "yield",
20
+ "wet-lab",
21
+ "wet_lab",
22
+ "clinical",
23
+ )
24
+ RAW_SEQUENCE_PATTERN = re.compile(r"[ACGTRYSWKMBDHVN]{20,}", re.IGNORECASE)
25
+ SAFE_HEADER_VALUE = re.compile(r"^[A-Za-z0-9_.:@/+ -]+$")
26
+
27
+
28
+ @dataclass(frozen=True)
29
+ class FastaRecord:
30
+ identifier: str
31
+ sequence: str
32
+ metadata: Mapping[str, str] = field(default_factory=dict)
33
+
34
+
35
+ def _validate_header_value(value: str) -> str:
36
+ normalized = str(value).strip()
37
+ lowered = normalized.lower()
38
+ if not normalized or len(normalized) > 120:
39
+ raise ValueError("FASTA header values must contain 1-120 characters")
40
+ if any(term in lowered for term in BLOCKED_HEADER_TERMS):
41
+ raise ValueError("FASTA header contains blocked private or claim-related metadata")
42
+ if RAW_SEQUENCE_PATTERN.search(normalized):
43
+ raise ValueError("FASTA header must not contain a raw sequence")
44
+ if not SAFE_HEADER_VALUE.fullmatch(normalized):
45
+ raise ValueError("FASTA header contains unsupported characters")
46
+ return normalized
47
+
48
+
49
+ def build_fasta_header(identifier: str, metadata: Mapping[str, object] | None = None) -> str:
50
+ """Build an allowlist-only public FASTA header."""
51
+ parts = [_validate_header_value(identifier)]
52
+ for key in HEADER_ALLOWLIST:
53
+ if metadata is None or key not in metadata or metadata[key] is None:
54
+ continue
55
+ value = _validate_header_value(str(metadata[key]))
56
+ parts.append(f"{key}={value}")
57
+ return " ".join(parts)
58
+
59
+
60
+ def parse_fasta(text: str, validation_mode: str | None = None) -> list[FastaRecord]:
61
+ """Parse FASTA text, optionally validating each sequence alphabet."""
62
+ records: list[FastaRecord] = []
63
+ identifier: str | None = None
64
+ sequence_lines: list[str] = []
65
+
66
+ def append_record() -> None:
67
+ if identifier is None:
68
+ return
69
+ sequence = "".join(sequence_lines)
70
+ if validation_mode is not None:
71
+ sequence = validate_sequence(sequence, validation_mode)
72
+ records.append(FastaRecord(identifier=identifier, sequence=sequence))
73
+
74
+ for line_no, raw_line in enumerate(text.splitlines(), start=1):
75
+ line = raw_line.strip()
76
+ if not line or line.startswith(";"):
77
+ continue
78
+ if line.startswith(">"):
79
+ append_record()
80
+ identifier = line[1:].strip()
81
+ if not identifier:
82
+ raise ValueError(f"FASTA header at line {line_no} is empty")
83
+ sequence_lines = []
84
+ elif identifier is None:
85
+ raise ValueError(f"FASTA sequence found before first header at line {line_no}")
86
+ else:
87
+ sequence_lines.append(line)
88
+
89
+ append_record()
90
+ if not records:
91
+ raise ValueError("FASTA input contains no records")
92
+ return records
93
+
94
+
95
+ def format_fasta(
96
+ records: Iterable[FastaRecord],
97
+ *,
98
+ validation_mode: str = "dna_strict",
99
+ line_width: int = 60,
100
+ ) -> str:
101
+ """Serialize records using privacy-safe headers and validated sequences."""
102
+ if line_width < 1:
103
+ raise ValueError("line_width must be positive")
104
+
105
+ lines: list[str] = []
106
+ for record in records:
107
+ header = build_fasta_header(record.identifier, record.metadata)
108
+ sequence = validate_sequence(record.sequence, validation_mode)
109
+ lines.append(f">{header}")
110
+ lines.extend(sequence[i : i + line_width] for i in range(0, len(sequence), line_width))
111
+ if not lines:
112
+ raise ValueError("At least one FASTA record is required")
113
+ return "\n".join(lines) + "\n"
114
+
115
+
116
+ def read_fasta(path: str | Path, validation_mode: str | None = None) -> list[FastaRecord]:
117
+ return parse_fasta(Path(path).read_text(encoding="utf-8"), validation_mode)
118
+
119
+
120
+ def write_fasta(
121
+ path: str | Path,
122
+ records: Iterable[FastaRecord],
123
+ *,
124
+ validation_mode: str = "dna_strict",
125
+ line_width: int = 60,
126
+ ) -> Path:
127
+ output_path = Path(path)
128
+ output_path.write_text(
129
+ format_fasta(records, validation_mode=validation_mode, line_width=line_width),
130
+ encoding="utf-8",
131
+ )
132
+ return output_path
@@ -0,0 +1,48 @@
1
+ """Explicit DNA/protein alphabet validation without raw-sequence leakage."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import hashlib
6
+ import re
7
+ from typing import Final
8
+
9
+ VALIDATION_ALPHABETS: Final[dict[str, frozenset[str]]] = {
10
+ "dna_strict": frozenset("ACGT"),
11
+ "dna_iupac": frozenset("ACGTRYSWKMBDHVN"),
12
+ "protein_strict": frozenset("ACDEFGHIKLMNPQRSTVWY"),
13
+ "protein_extended": frozenset("ACDEFGHIKLMNPQRSTVWYXBZUO*"),
14
+ }
15
+
16
+
17
+ class SequenceValidationError(ValueError):
18
+ """Raised when a sequence does not satisfy its explicit alphabet contract."""
19
+
20
+
21
+ def _fingerprint(sequence: str) -> str:
22
+ return hashlib.sha256(sequence.encode("utf-8")).hexdigest()[:12]
23
+
24
+
25
+ def validate_sequence(sequence: str, mode: str = "dna_strict") -> str:
26
+ """Normalize and validate a sequence for the requested alphabet mode.
27
+
28
+ Whitespace is removed and letters are uppercased. Error messages include a
29
+ short preview and fingerprint, never the complete input sequence.
30
+ """
31
+ if mode not in VALIDATION_ALPHABETS:
32
+ choices = ", ".join(sorted(VALIDATION_ALPHABETS))
33
+ raise ValueError(f"Unknown validation mode {mode!r}; expected one of: {choices}")
34
+ if not isinstance(sequence, str):
35
+ raise TypeError("sequence must be a string")
36
+
37
+ normalized = re.sub(r"\s+", "", sequence).upper()
38
+ if not normalized:
39
+ raise SequenceValidationError("Sequence is empty after whitespace normalization")
40
+
41
+ invalid = sorted(set(normalized) - VALIDATION_ALPHABETS[mode])
42
+ if invalid:
43
+ preview = normalized[:8] + ("[truncated]" if len(normalized) > 8 else "")
44
+ raise SequenceValidationError(
45
+ f"Invalid symbols for {mode}: {invalid}; preview={preview!r}; "
46
+ f"length={len(normalized)}; sha256-prefix={_fingerprint(normalized)}"
47
+ )
48
+ return normalized
@@ -0,0 +1,18 @@
1
+ """Load and resolve the factorforge parameter registry (package source of truth)."""
2
+ from __future__ import annotations
3
+ from pathlib import Path
4
+ import yaml
5
+
6
+ REGISTRY_PATH = Path(__file__).resolve().parent / "current_parameter_registry.yaml"
7
+
8
+
9
+ def load_registry() -> dict:
10
+ return yaml.safe_load(REGISTRY_PATH.read_text(encoding="utf-8"))
11
+
12
+
13
+ def resolve_ref(registry: dict, dotted: str):
14
+ """Resolve a dotted path (e.g. 'parameters.optimization.cai_target') into a value."""
15
+ node = registry
16
+ for part in dotted.split("."):
17
+ node = node[part]
18
+ return node
@@ -1,4 +1,21 @@
1
- """Shared design package schema for FactorForge CDS artifacts."""
1
+ """Internal API response format for FactorForge CDS artifacts.
2
+
3
+ This module defines the *internal* Pydantic model used by the optimize API
4
+ handler (``api.optimize.handler``). It is NOT the public Open Bio Design
5
+ Package contract.
6
+
7
+ Public contract:
8
+ ``src/factorforge/schemas/design_package.schema.json`` (JSON Schema Draft
9
+ 2020-12) is the public output specification. It defines the canonical field
10
+ set (``design_id``, ``claim_boundary``, ``evidence``, etc.) and is tested
11
+ by ``tests/test_design_package_schema.py`` and related files.
12
+
13
+ Separation rationale:
14
+ The internal model uses API-convenient field names (``construct_id``,
15
+ ``cds_design``) with ``extra="allow"`` for handler flexibility. The public
16
+ schema enforces claim boundary fields and must be validated with jsonschema
17
+ before any output is published or shared externally.
18
+ """
2
19
 
3
20
  from typing import Any, Optional
4
21