factorforge-cds 3.0.0__tar.gz → 3.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {factorforge_cds-3.0.0 → factorforge_cds-3.1.0}/PKG-INFO +81 -20
- {factorforge_cds-3.0.0 → factorforge_cds-3.1.0}/README.md +75 -16
- {factorforge_cds-3.0.0 → factorforge_cds-3.1.0}/pyproject.toml +54 -52
- {factorforge_cds-3.0.0 → factorforge_cds-3.1.0}/src/factorforge/__init__.py +3 -4
- {factorforge_cds-3.0.0 → factorforge_cds-3.1.0}/src/factorforge/cli/main.py +2 -2
- {factorforge_cds-3.0.0 → factorforge_cds-3.1.0}/src/factorforge/engines/__init__.py +1 -1
- {factorforge_cds-3.0.0 → factorforge_cds-3.1.0}/src/factorforge/engines/v2/__init__.py +1 -1
- {factorforge_cds-3.0.0 → factorforge_cds-3.1.0}/src/factorforge/engines/v2/optimizer.py +1 -1
- {factorforge_cds-3.0.0 → factorforge_cds-3.1.0}/src/factorforge/engines/v2/pipeline.py +21 -0
- {factorforge_cds-3.0.0 → factorforge_cds-3.1.0}/src/factorforge/engines/v2/rules/reverse_translator.py +3 -3
- {factorforge_cds-3.0.0 → factorforge_cds-3.1.0}/src/factorforge/engines/v2/rules/rule_engine.py +264 -46
- {factorforge_cds-3.0.0 → factorforge_cds-3.1.0}/src/factorforge/engines/v2/scoring.py +7 -4
- {factorforge_cds-3.0.0 → factorforge_cds-3.1.0}/src/factorforge/engines/v3/__init__.py +1 -1
- {factorforge_cds-3.0.0 → factorforge_cds-3.1.0}/src/factorforge/engines/v3/pipeline.py +1 -1
- factorforge_cds-3.1.0/src/factorforge/utils/restriction_sites.py +370 -0
- {factorforge_cds-3.0.0 → factorforge_cds-3.1.0}/src/factorforge_cds.egg-info/PKG-INFO +81 -20
- {factorforge_cds-3.0.0 → factorforge_cds-3.1.0}/src/factorforge_cds.egg-info/SOURCES.txt +2 -0
- {factorforge_cds-3.0.0 → factorforge_cds-3.1.0}/src/factorforge_cds.egg-info/requires.txt +2 -0
- factorforge_cds-3.1.0/tests/test_restriction_sites.py +196 -0
- {factorforge_cds-3.0.0 → factorforge_cds-3.1.0}/LICENSE +0 -0
- {factorforge_cds-3.0.0 → factorforge_cds-3.1.0}/setup.cfg +0 -0
- {factorforge_cds-3.0.0 → factorforge_cds-3.1.0}/src/factorforge/__main__.py +0 -0
- {factorforge_cds-3.0.0 → factorforge_cds-3.1.0}/src/factorforge/cli/__init__.py +0 -0
- {factorforge_cds-3.0.0 → factorforge_cds-3.1.0}/src/factorforge/cli/legacy_cli.py +0 -0
- {factorforge_cds-3.0.0 → factorforge_cds-3.1.0}/src/factorforge/core/interfaces/__init__.py +0 -0
- {factorforge_cds-3.0.0 → factorforge_cds-3.1.0}/src/factorforge/core/interfaces/exporter.py +0 -0
- {factorforge_cds-3.0.0 → factorforge_cds-3.1.0}/src/factorforge/core/interfaces/optimizer.py +0 -0
- {factorforge_cds-3.0.0 → factorforge_cds-3.1.0}/src/factorforge/core/interfaces/validator.py +0 -0
- {factorforge_cds-3.0.0 → factorforge_cds-3.1.0}/src/factorforge/database.py +0 -0
- {factorforge_cds-3.0.0 → factorforge_cds-3.1.0}/src/factorforge/engines/ml/__init__.py +0 -0
- {factorforge_cds-3.0.0 → factorforge_cds-3.1.0}/src/factorforge/engines/ml/plant_optimizer.py +0 -0
- {factorforge_cds-3.0.0 → factorforge_cds-3.1.0}/src/factorforge/engines/registry.py +0 -0
- {factorforge_cds-3.0.0 → factorforge_cds-3.1.0}/src/factorforge/engines/v1_archived/__init__.py +0 -0
- {factorforge_cds-3.0.0 → factorforge_cds-3.1.0}/src/factorforge/engines/v2/codon_table_builder.py +0 -0
- {factorforge_cds-3.0.0 → factorforge_cds-3.1.0}/src/factorforge/engines/v2/construct_builder.py +0 -0
- {factorforge_cds-3.0.0 → factorforge_cds-3.1.0}/src/factorforge/engines/v2/exporter.py +0 -0
- {factorforge_cds-3.0.0 → factorforge_cds-3.1.0}/src/factorforge/engines/v2/rules/__init__.py +0 -0
- {factorforge_cds-3.0.0 → factorforge_cds-3.1.0}/src/factorforge/engines/v2/rules/domesticator.py +0 -0
- {factorforge_cds-3.0.0 → factorforge_cds-3.1.0}/src/factorforge/engines/v2/utils.py +0 -0
- {factorforge_cds-3.0.0 → factorforge_cds-3.1.0}/src/factorforge/engines/v2/validator.py +0 -0
- {factorforge_cds-3.0.0 → factorforge_cds-3.1.0}/src/factorforge/engines/v3/explain.py +0 -0
- {factorforge_cds-3.0.0 → factorforge_cds-3.1.0}/src/factorforge/engines/v3/inference/__init__.py +0 -0
- {factorforge_cds-3.0.0 → factorforge_cds-3.1.0}/src/factorforge/engines/v3/inference/constrained_decoder.py +0 -0
- {factorforge_cds-3.0.0 → factorforge_cds-3.1.0}/src/factorforge/engines/v3/inference/v2_adapter.py +0 -0
- {factorforge_cds-3.0.0 → factorforge_cds-3.1.0}/src/factorforge/engines/v3/metrics.py +0 -0
- {factorforge_cds-3.0.0 → factorforge_cds-3.1.0}/src/factorforge/engines/v3/modeling_bart_decoder.py +0 -0
- {factorforge_cds-3.0.0 → factorforge_cds-3.1.0}/src/factorforge/engines/v3/synonym_mask.py +0 -0
- {factorforge_cds-3.0.0 → factorforge_cds-3.1.0}/src/factorforge/engines/v3/tokenizer.py +0 -0
- {factorforge_cds-3.0.0 → factorforge_cds-3.1.0}/src/factorforge/ml/__init__.py +0 -0
- {factorforge_cds-3.0.0 → factorforge_cds-3.1.0}/src/factorforge/ml/feasibility.py +0 -0
- {factorforge_cds-3.0.0 → factorforge_cds-3.1.0}/src/factorforge/ml/metrics.py +0 -0
- {factorforge_cds-3.0.0 → factorforge_cds-3.1.0}/src/factorforge/utils/__init__.py +0 -0
- {factorforge_cds-3.0.0 → factorforge_cds-3.1.0}/src/factorforge/utils/construct_id.py +0 -0
- {factorforge_cds-3.0.0 → factorforge_cds-3.1.0}/src/factorforge/utils/exceptions.py +0 -0
- {factorforge_cds-3.0.0 → factorforge_cds-3.1.0}/src/factorforge/utils/sequence_validator.py +0 -0
- {factorforge_cds-3.0.0 → factorforge_cds-3.1.0}/src/factorforge/utils/validation.py +0 -0
- {factorforge_cds-3.0.0 → factorforge_cds-3.1.0}/src/factorforge_cds.egg-info/dependency_links.txt +0 -0
- {factorforge_cds-3.0.0 → factorforge_cds-3.1.0}/src/factorforge_cds.egg-info/entry_points.txt +0 -0
- {factorforge_cds-3.0.0 → factorforge_cds-3.1.0}/src/factorforge_cds.egg-info/top_level.txt +0 -0
- {factorforge_cds-3.0.0 → factorforge_cds-3.1.0}/tests/test_compare_v1_v2.py +0 -0
- {factorforge_cds-3.0.0 → factorforge_cds-3.1.0}/tests/test_database.py +0 -0
- {factorforge_cds-3.0.0 → factorforge_cds-3.1.0}/tests/test_sequence_validator.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: factorforge-cds
|
|
3
|
-
Version: 3.
|
|
3
|
+
Version: 3.1.0
|
|
4
4
|
Summary: FactorForge — open-source constraint-based CDS design engine by Eijex.
|
|
5
5
|
Author-email: Eijex <eijex.lab@gmail.com>
|
|
6
6
|
License: Apache License
|
|
@@ -205,9 +205,9 @@ License: Apache License
|
|
|
205
205
|
See the License for the specific language governing permissions and
|
|
206
206
|
limitations under the License.
|
|
207
207
|
|
|
208
|
-
Project-URL: Homepage, https://factorforge.vercel.app
|
|
209
|
-
Project-URL: Repository, https://github.com/eijex/factorforge
|
|
210
|
-
Project-URL: Issues, https://github.com/eijex/factorforge/issues
|
|
208
|
+
Project-URL: Homepage, https://factorforge-cds.vercel.app
|
|
209
|
+
Project-URL: Repository, https://github.com/eijex/factorforge-cds
|
|
210
|
+
Project-URL: Issues, https://github.com/eijex/factorforge-cds/issues
|
|
211
211
|
Keywords: codon optimization,CDS design,synthetic biology,bioinformatics,Nicotiana benthamiana,constraint optimization,dynamic programming
|
|
212
212
|
Classifier: Development Status :: 4 - Beta
|
|
213
213
|
Classifier: Intended Audience :: Science/Research
|
|
@@ -219,6 +219,7 @@ Description-Content-Type: text/markdown
|
|
|
219
219
|
License-File: LICENSE
|
|
220
220
|
Requires-Dist: biopython>=1.81
|
|
221
221
|
Requires-Dist: requests>=2.31
|
|
222
|
+
Requires-Dist: click>=8.0
|
|
222
223
|
Provides-Extra: ml
|
|
223
224
|
Requires-Dist: torch>=2.0; extra == "ml"
|
|
224
225
|
Requires-Dist: transformers>=4.35; extra == "ml"
|
|
@@ -227,6 +228,7 @@ Requires-Dist: pyyaml>=6.0; extra == "ml"
|
|
|
227
228
|
Provides-Extra: dev
|
|
228
229
|
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
229
230
|
Requires-Dist: ruff>=0.1; extra == "dev"
|
|
231
|
+
Requires-Dist: pyyaml>=6.0; extra == "dev"
|
|
230
232
|
Dynamic: license-file
|
|
231
233
|
|
|
232
234
|
# FactorForge
|
|
@@ -235,8 +237,8 @@ Dynamic: license-file
|
|
|
235
237
|
|
|
236
238
|
[](LICENSE)
|
|
237
239
|
[](https://www.python.org/)
|
|
238
|
-
[](https://factorforge.vercel.app)
|
|
240
|
+
[](https://github.com/eijex/factorforge-cds/releases)
|
|
241
|
+
[](https://factorforge-cds.vercel.app)
|
|
240
242
|
|
|
241
243
|
FactorForge optimizes protein sequences into *N. benthamiana*-compatible CDS by maximizing CAI, controlling GC content, eliminating PolyA signals, and producing MoClo/Golden Gate-ready constructs.
|
|
242
244
|
|
|
@@ -266,8 +268,8 @@ print(result.metadata) # CAI, GC%, scan results, domestication edits
|
|
|
266
268
|
|
|
267
269
|
| Method | Description | Link |
|
|
268
270
|
|--------|-------------|------|
|
|
269
|
-
| **Web App** | No installation, demo & light use | [factorforge.vercel.app](https://factorforge.vercel.app) |
|
|
270
|
-
| **CLI / Python** | Local use, batch processing, data privacy | `pip install factorforge` |
|
|
271
|
+
| **Web App** | No installation, demo & light use | [factorforge-cds.vercel.app](https://factorforge-cds.vercel.app) |
|
|
272
|
+
| **CLI / Python** | Local use, batch processing, data privacy | `pip install factorforge-cds` |
|
|
271
273
|
| **Notebooks** | Training & experimentation on Colab / Kaggle | See [notebooks/](notebooks/) |
|
|
272
274
|
|
|
273
275
|
---
|
|
@@ -287,12 +289,17 @@ Protein sequence (FASTA or plain text)
|
|
|
287
289
|
▼
|
|
288
290
|
2. Rule Scan
|
|
289
291
|
Detects PolyA signals, homopolymers,
|
|
290
|
-
|
|
292
|
+
CpG/TpA dinucleotide hotspots,
|
|
293
|
+
repeat sequences, rare codon runs,
|
|
294
|
+
forbidden restriction sites
|
|
291
295
|
│
|
|
292
296
|
▼
|
|
293
297
|
3. Domestication
|
|
294
298
|
Removes Golden Gate / MoClo-incompatible
|
|
295
299
|
BsaI / BsmBI recognition sites via silent edits
|
|
300
|
+
Optional custom restriction sites can be removed
|
|
301
|
+
by synonymous substitution when feasible
|
|
302
|
+
CpG/TpA reduction uses a CAI-budgeted balanced mode
|
|
296
303
|
│
|
|
297
304
|
▼
|
|
298
305
|
4. Output
|
|
@@ -343,20 +350,74 @@ Benchmarked on *N. benthamiana* codon usage table (v2 engine, 3,876 sequences):
|
|
|
343
350
|
pip install factorforge-cds
|
|
344
351
|
```
|
|
345
352
|
|
|
346
|
-
|
|
353
|
+
Experimental ML research modules are available separately:
|
|
347
354
|
|
|
348
355
|
```bash
|
|
349
356
|
pip install "factorforge-cds[ml]"
|
|
350
357
|
```
|
|
351
358
|
|
|
359
|
+
> These modules (ESM2 + BART decoder) are not part of the stable v3.1.0 default optimizer.
|
|
360
|
+
> The default v3.1.0 engine is the constraint-based DP feasibility engine.
|
|
361
|
+
|
|
352
362
|
For development:
|
|
353
363
|
|
|
354
364
|
```bash
|
|
355
|
-
git clone https://github.com/eijex/factorforge.git
|
|
365
|
+
git clone https://github.com/eijex/factorforge-cds.git
|
|
356
366
|
cd factorforge
|
|
357
367
|
pip install -e ".[dev]"
|
|
358
368
|
```
|
|
359
369
|
|
|
370
|
+
### Docker (local web app)
|
|
371
|
+
|
|
372
|
+
Run the full web interface locally — no data leaves your machine:
|
|
373
|
+
|
|
374
|
+
```bash
|
|
375
|
+
docker pull ghcr.io/eijex/factorforge-cds:latest
|
|
376
|
+
docker run -p 8080:8080 ghcr.io/eijex/factorforge-cds:latest
|
|
377
|
+
```
|
|
378
|
+
|
|
379
|
+
Then open [http://localhost:8080](http://localhost:8080).
|
|
380
|
+
|
|
381
|
+
Or build from source:
|
|
382
|
+
|
|
383
|
+
```bash
|
|
384
|
+
git clone https://github.com/eijex/factorforge-cds.git
|
|
385
|
+
cd factorforge
|
|
386
|
+
docker build -t factorforge-cds .
|
|
387
|
+
docker run -p 8080:8080 factorforge-cds
|
|
388
|
+
```
|
|
389
|
+
|
|
390
|
+
### Updating
|
|
391
|
+
|
|
392
|
+
**PyPI (pip install):**
|
|
393
|
+
|
|
394
|
+
```bash
|
|
395
|
+
pip install --upgrade factorforge-cds
|
|
396
|
+
```
|
|
397
|
+
|
|
398
|
+
**Docker:**
|
|
399
|
+
|
|
400
|
+
```bash
|
|
401
|
+
docker pull ghcr.io/eijex/factorforge-cds:latest
|
|
402
|
+
```
|
|
403
|
+
|
|
404
|
+
**Git clone / local development:**
|
|
405
|
+
|
|
406
|
+
```bash
|
|
407
|
+
git pull origin main
|
|
408
|
+
pip install -e ".[dev]"
|
|
409
|
+
```
|
|
410
|
+
|
|
411
|
+
To check your installed version:
|
|
412
|
+
|
|
413
|
+
```bash
|
|
414
|
+
pip show factorforge-cds
|
|
415
|
+
# or
|
|
416
|
+
factorforge --version
|
|
417
|
+
```
|
|
418
|
+
|
|
419
|
+
Release notes for each version are in [CHANGELOG.md](CHANGELOG.md).
|
|
420
|
+
|
|
360
421
|
---
|
|
361
422
|
|
|
362
423
|
## CLI Reference
|
|
@@ -399,8 +460,8 @@ Each optimized sequence includes:
|
|
|
399
460
|
- **Optimized CDS** — synonymous codon replacements only, AA identity 100%
|
|
400
461
|
- **CAI score** — codon adaptation index for *N. benthamiana*
|
|
401
462
|
- **GC content** — global and first-region
|
|
402
|
-
- **Scan report** — PolyA signals detected/fixed, homopolymers, restriction sites
|
|
403
|
-
- **Domestication report** — BsaI/BsmBI sites removed, edit count
|
|
463
|
+
- **Scan report** — PolyA signals detected/fixed, CpG/TpA hotspots, homopolymers, rare codon runs, restriction sites
|
|
464
|
+
- **Domestication report** — BsaI/BsmBI and optional custom restriction sites removed, edit count
|
|
404
465
|
- **Construct ID** — reproducible hash for tracking
|
|
405
466
|
|
|
406
467
|
---
|
|
@@ -417,7 +478,7 @@ FactorForge in your experiments, we'd love to hear from you:
|
|
|
417
478
|
- How did CAI / GC% correlate with actual expression levels?
|
|
418
479
|
- Any unexpected results?
|
|
419
480
|
|
|
420
|
-
**Share your results** → [GitHub Issues](https://github.com/eijex/factorforge/issues)
|
|
481
|
+
**Share your results** → [GitHub Issues](https://github.com/eijex/factorforge-cds/issues)
|
|
421
482
|
or email: eijex.lab@gmail.com
|
|
422
483
|
|
|
423
484
|
Validated results will be credited in [VALIDATION.md](VALIDATION.md) and future releases.
|
|
@@ -432,8 +493,8 @@ This project was built using the following tools and platforms:
|
|
|
432
493
|
|------|------|
|
|
433
494
|
| [Claude](https://claude.ai) / [Claude Code](https://claude.ai/code) (Anthropic) | Architecture design, domain analysis, code review |
|
|
434
495
|
| [Codex](https://github.com/openai/codex) (OpenAI) | Code generation and implementation |
|
|
435
|
-
| [Google Colab](https://colab.research.google.com) | ML training
|
|
436
|
-
| [Kaggle](https://www.kaggle.com) | ML training
|
|
496
|
+
| [Google Colab](https://colab.research.google.com) | ML training experiments |
|
|
497
|
+
| [Kaggle](https://www.kaggle.com) | ML training experiments |
|
|
437
498
|
| [ESM2](https://github.com/facebookresearch/esm) (Meta) | Protein language model (encoder) |
|
|
438
499
|
| [PyTorch](https://pytorch.org) | ML framework |
|
|
439
500
|
| [Conda](https://docs.conda.io) / [Miniconda](https://docs.anaconda.com/miniconda/) | Environment management |
|
|
@@ -449,8 +510,8 @@ This project was built using the following tools and platforms:
|
|
|
449
510
|
If you use FactorForge in your research, please cite:
|
|
450
511
|
|
|
451
512
|
```
|
|
452
|
-
FactorForge v3.
|
|
453
|
-
Eijex. https://github.com/eijex/factorforge
|
|
513
|
+
FactorForge v3.1.0 (2026). Open-source constraint-based CDS design engine.
|
|
514
|
+
Eijex. https://github.com/eijex/factorforge-cds
|
|
454
515
|
```
|
|
455
516
|
|
|
456
517
|
*A citable publication is in preparation. Until then, please cite the GitHub repository.*
|
|
@@ -470,6 +531,6 @@ own discretion.
|
|
|
470
531
|
|
|
471
532
|
## Get in Touch
|
|
472
533
|
|
|
473
|
-
- **GitHub Issues** — bug reports, feature requests, wet-lab results: [github.com/eijex/factorforge/issues](https://github.com/eijex/factorforge/issues)
|
|
534
|
+
- **GitHub Issues** — bug reports, feature requests, wet-lab results: [github.com/eijex/factorforge-cds/issues](https://github.com/eijex/factorforge-cds/issues)
|
|
474
535
|
- **Email** — collaborations, feedback, questions: eijex.lab@gmail.com
|
|
475
|
-
- **Web** — [factorforge.vercel.app](https://factorforge.vercel.app)
|
|
536
|
+
- **Web** — [factorforge-cds.vercel.app](https://factorforge-cds.vercel.app)
|
|
@@ -4,8 +4,8 @@
|
|
|
4
4
|
|
|
5
5
|
[](LICENSE)
|
|
6
6
|
[](https://www.python.org/)
|
|
7
|
-
[](https://factorforge.vercel.app)
|
|
7
|
+
[](https://github.com/eijex/factorforge-cds/releases)
|
|
8
|
+
[](https://factorforge-cds.vercel.app)
|
|
9
9
|
|
|
10
10
|
FactorForge optimizes protein sequences into *N. benthamiana*-compatible CDS by maximizing CAI, controlling GC content, eliminating PolyA signals, and producing MoClo/Golden Gate-ready constructs.
|
|
11
11
|
|
|
@@ -35,8 +35,8 @@ print(result.metadata) # CAI, GC%, scan results, domestication edits
|
|
|
35
35
|
|
|
36
36
|
| Method | Description | Link |
|
|
37
37
|
|--------|-------------|------|
|
|
38
|
-
| **Web App** | No installation, demo & light use | [factorforge.vercel.app](https://factorforge.vercel.app) |
|
|
39
|
-
| **CLI / Python** | Local use, batch processing, data privacy | `pip install factorforge` |
|
|
38
|
+
| **Web App** | No installation, demo & light use | [factorforge-cds.vercel.app](https://factorforge-cds.vercel.app) |
|
|
39
|
+
| **CLI / Python** | Local use, batch processing, data privacy | `pip install factorforge-cds` |
|
|
40
40
|
| **Notebooks** | Training & experimentation on Colab / Kaggle | See [notebooks/](notebooks/) |
|
|
41
41
|
|
|
42
42
|
---
|
|
@@ -56,12 +56,17 @@ Protein sequence (FASTA or plain text)
|
|
|
56
56
|
▼
|
|
57
57
|
2. Rule Scan
|
|
58
58
|
Detects PolyA signals, homopolymers,
|
|
59
|
-
|
|
59
|
+
CpG/TpA dinucleotide hotspots,
|
|
60
|
+
repeat sequences, rare codon runs,
|
|
61
|
+
forbidden restriction sites
|
|
60
62
|
│
|
|
61
63
|
▼
|
|
62
64
|
3. Domestication
|
|
63
65
|
Removes Golden Gate / MoClo-incompatible
|
|
64
66
|
BsaI / BsmBI recognition sites via silent edits
|
|
67
|
+
Optional custom restriction sites can be removed
|
|
68
|
+
by synonymous substitution when feasible
|
|
69
|
+
CpG/TpA reduction uses a CAI-budgeted balanced mode
|
|
65
70
|
│
|
|
66
71
|
▼
|
|
67
72
|
4. Output
|
|
@@ -112,20 +117,74 @@ Benchmarked on *N. benthamiana* codon usage table (v2 engine, 3,876 sequences):
|
|
|
112
117
|
pip install factorforge-cds
|
|
113
118
|
```
|
|
114
119
|
|
|
115
|
-
|
|
120
|
+
Experimental ML research modules are available separately:
|
|
116
121
|
|
|
117
122
|
```bash
|
|
118
123
|
pip install "factorforge-cds[ml]"
|
|
119
124
|
```
|
|
120
125
|
|
|
126
|
+
> These modules (ESM2 + BART decoder) are not part of the stable v3.1.0 default optimizer.
|
|
127
|
+
> The default v3.1.0 engine is the constraint-based DP feasibility engine.
|
|
128
|
+
|
|
121
129
|
For development:
|
|
122
130
|
|
|
123
131
|
```bash
|
|
124
|
-
git clone https://github.com/eijex/factorforge.git
|
|
132
|
+
git clone https://github.com/eijex/factorforge-cds.git
|
|
125
133
|
cd factorforge
|
|
126
134
|
pip install -e ".[dev]"
|
|
127
135
|
```
|
|
128
136
|
|
|
137
|
+
### Docker (local web app)
|
|
138
|
+
|
|
139
|
+
Run the full web interface locally — no data leaves your machine:
|
|
140
|
+
|
|
141
|
+
```bash
|
|
142
|
+
docker pull ghcr.io/eijex/factorforge-cds:latest
|
|
143
|
+
docker run -p 8080:8080 ghcr.io/eijex/factorforge-cds:latest
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
Then open [http://localhost:8080](http://localhost:8080).
|
|
147
|
+
|
|
148
|
+
Or build from source:
|
|
149
|
+
|
|
150
|
+
```bash
|
|
151
|
+
git clone https://github.com/eijex/factorforge-cds.git
|
|
152
|
+
cd factorforge
|
|
153
|
+
docker build -t factorforge-cds .
|
|
154
|
+
docker run -p 8080:8080 factorforge-cds
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
### Updating
|
|
158
|
+
|
|
159
|
+
**PyPI (pip install):**
|
|
160
|
+
|
|
161
|
+
```bash
|
|
162
|
+
pip install --upgrade factorforge-cds
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
**Docker:**
|
|
166
|
+
|
|
167
|
+
```bash
|
|
168
|
+
docker pull ghcr.io/eijex/factorforge-cds:latest
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
**Git clone / local development:**
|
|
172
|
+
|
|
173
|
+
```bash
|
|
174
|
+
git pull origin main
|
|
175
|
+
pip install -e ".[dev]"
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
To check your installed version:
|
|
179
|
+
|
|
180
|
+
```bash
|
|
181
|
+
pip show factorforge-cds
|
|
182
|
+
# or
|
|
183
|
+
factorforge --version
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
Release notes for each version are in [CHANGELOG.md](CHANGELOG.md).
|
|
187
|
+
|
|
129
188
|
---
|
|
130
189
|
|
|
131
190
|
## CLI Reference
|
|
@@ -168,8 +227,8 @@ Each optimized sequence includes:
|
|
|
168
227
|
- **Optimized CDS** — synonymous codon replacements only, AA identity 100%
|
|
169
228
|
- **CAI score** — codon adaptation index for *N. benthamiana*
|
|
170
229
|
- **GC content** — global and first-region
|
|
171
|
-
- **Scan report** — PolyA signals detected/fixed, homopolymers, restriction sites
|
|
172
|
-
- **Domestication report** — BsaI/BsmBI sites removed, edit count
|
|
230
|
+
- **Scan report** — PolyA signals detected/fixed, CpG/TpA hotspots, homopolymers, rare codon runs, restriction sites
|
|
231
|
+
- **Domestication report** — BsaI/BsmBI and optional custom restriction sites removed, edit count
|
|
173
232
|
- **Construct ID** — reproducible hash for tracking
|
|
174
233
|
|
|
175
234
|
---
|
|
@@ -186,7 +245,7 @@ FactorForge in your experiments, we'd love to hear from you:
|
|
|
186
245
|
- How did CAI / GC% correlate with actual expression levels?
|
|
187
246
|
- Any unexpected results?
|
|
188
247
|
|
|
189
|
-
**Share your results** → [GitHub Issues](https://github.com/eijex/factorforge/issues)
|
|
248
|
+
**Share your results** → [GitHub Issues](https://github.com/eijex/factorforge-cds/issues)
|
|
190
249
|
or email: eijex.lab@gmail.com
|
|
191
250
|
|
|
192
251
|
Validated results will be credited in [VALIDATION.md](VALIDATION.md) and future releases.
|
|
@@ -201,8 +260,8 @@ This project was built using the following tools and platforms:
|
|
|
201
260
|
|------|------|
|
|
202
261
|
| [Claude](https://claude.ai) / [Claude Code](https://claude.ai/code) (Anthropic) | Architecture design, domain analysis, code review |
|
|
203
262
|
| [Codex](https://github.com/openai/codex) (OpenAI) | Code generation and implementation |
|
|
204
|
-
| [Google Colab](https://colab.research.google.com) | ML training
|
|
205
|
-
| [Kaggle](https://www.kaggle.com) | ML training
|
|
263
|
+
| [Google Colab](https://colab.research.google.com) | ML training experiments |
|
|
264
|
+
| [Kaggle](https://www.kaggle.com) | ML training experiments |
|
|
206
265
|
| [ESM2](https://github.com/facebookresearch/esm) (Meta) | Protein language model (encoder) |
|
|
207
266
|
| [PyTorch](https://pytorch.org) | ML framework |
|
|
208
267
|
| [Conda](https://docs.conda.io) / [Miniconda](https://docs.anaconda.com/miniconda/) | Environment management |
|
|
@@ -218,8 +277,8 @@ This project was built using the following tools and platforms:
|
|
|
218
277
|
If you use FactorForge in your research, please cite:
|
|
219
278
|
|
|
220
279
|
```
|
|
221
|
-
FactorForge v3.
|
|
222
|
-
Eijex. https://github.com/eijex/factorforge
|
|
280
|
+
FactorForge v3.1.0 (2026). Open-source constraint-based CDS design engine.
|
|
281
|
+
Eijex. https://github.com/eijex/factorforge-cds
|
|
223
282
|
```
|
|
224
283
|
|
|
225
284
|
*A citable publication is in preparation. Until then, please cite the GitHub repository.*
|
|
@@ -239,6 +298,6 @@ own discretion.
|
|
|
239
298
|
|
|
240
299
|
## Get in Touch
|
|
241
300
|
|
|
242
|
-
- **GitHub Issues** — bug reports, feature requests, wet-lab results: [github.com/eijex/factorforge/issues](https://github.com/eijex/factorforge/issues)
|
|
301
|
+
- **GitHub Issues** — bug reports, feature requests, wet-lab results: [github.com/eijex/factorforge-cds/issues](https://github.com/eijex/factorforge-cds/issues)
|
|
243
302
|
- **Email** — collaborations, feedback, questions: eijex.lab@gmail.com
|
|
244
|
-
- **Web** — [factorforge.vercel.app](https://factorforge.vercel.app)
|
|
303
|
+
- **Web** — [factorforge-cds.vercel.app](https://factorforge-cds.vercel.app)
|
|
@@ -1,52 +1,54 @@
|
|
|
1
|
-
[build-system]
|
|
2
|
-
requires = ["setuptools>=68", "wheel"]
|
|
3
|
-
build-backend = "setuptools.build_meta"
|
|
4
|
-
|
|
5
|
-
[project]
|
|
6
|
-
name = "factorforge-cds"
|
|
7
|
-
version = "3.
|
|
8
|
-
description = "FactorForge — open-source constraint-based CDS design engine by Eijex."
|
|
9
|
-
readme = "README.md"
|
|
10
|
-
license = { file = "LICENSE" }
|
|
11
|
-
requires-python = ">=3.10"
|
|
12
|
-
authors = [{ name = "Eijex", email = "eijex.lab@gmail.com" }]
|
|
13
|
-
keywords = ["codon optimization", "CDS design", "synthetic biology", "bioinformatics", "Nicotiana benthamiana", "constraint optimization", "dynamic programming"]
|
|
14
|
-
classifiers = [
|
|
15
|
-
"Development Status :: 4 - Beta",
|
|
16
|
-
"Intended Audience :: Science/Research",
|
|
17
|
-
"License :: OSI Approved :: Apache Software License",
|
|
18
|
-
"Programming Language :: Python :: 3",
|
|
19
|
-
"Topic :: Scientific/Engineering :: Bio-Informatics",
|
|
20
|
-
]
|
|
21
|
-
|
|
22
|
-
dependencies = [
|
|
23
|
-
"biopython>=1.81",
|
|
24
|
-
"requests>=2.31",
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
"
|
|
31
|
-
"
|
|
32
|
-
"
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
"
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
[
|
|
51
|
-
|
|
52
|
-
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "factorforge-cds"
|
|
7
|
+
version = "3.1.0"
|
|
8
|
+
description = "FactorForge — open-source constraint-based CDS design engine by Eijex."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = { file = "LICENSE" }
|
|
11
|
+
requires-python = ">=3.10"
|
|
12
|
+
authors = [{ name = "Eijex", email = "eijex.lab@gmail.com" }]
|
|
13
|
+
keywords = ["codon optimization", "CDS design", "synthetic biology", "bioinformatics", "Nicotiana benthamiana", "constraint optimization", "dynamic programming"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 4 - Beta",
|
|
16
|
+
"Intended Audience :: Science/Research",
|
|
17
|
+
"License :: OSI Approved :: Apache Software License",
|
|
18
|
+
"Programming Language :: Python :: 3",
|
|
19
|
+
"Topic :: Scientific/Engineering :: Bio-Informatics",
|
|
20
|
+
]
|
|
21
|
+
|
|
22
|
+
dependencies = [
|
|
23
|
+
"biopython>=1.81",
|
|
24
|
+
"requests>=2.31",
|
|
25
|
+
"click>=8.0",
|
|
26
|
+
]
|
|
27
|
+
|
|
28
|
+
[project.optional-dependencies]
|
|
29
|
+
ml = [
|
|
30
|
+
"torch>=2.0",
|
|
31
|
+
"transformers>=4.35",
|
|
32
|
+
"fair-esm>=2.0",
|
|
33
|
+
"pyyaml>=6.0",
|
|
34
|
+
]
|
|
35
|
+
dev = [
|
|
36
|
+
"pytest>=7.0",
|
|
37
|
+
"ruff>=0.1",
|
|
38
|
+
"pyyaml>=6.0",
|
|
39
|
+
]
|
|
40
|
+
|
|
41
|
+
[project.urls]
|
|
42
|
+
Homepage = "https://factorforge-cds.vercel.app"
|
|
43
|
+
Repository = "https://github.com/eijex/factorforge-cds"
|
|
44
|
+
Issues = "https://github.com/eijex/factorforge-cds/issues"
|
|
45
|
+
|
|
46
|
+
[project.scripts]
|
|
47
|
+
factorforge = "factorforge.cli.main:main"
|
|
48
|
+
|
|
49
|
+
[tool.setuptools.packages.find]
|
|
50
|
+
where = ["src"]
|
|
51
|
+
|
|
52
|
+
[tool.ruff]
|
|
53
|
+
line-length = 100
|
|
54
|
+
target-version = "py310"
|
|
@@ -1,12 +1,11 @@
|
|
|
1
1
|
"""
|
|
2
2
|
FactorForge - Codon Optimization Platform
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
v3: ML engine / v3-alpha (ESM2 + BART, in development)
|
|
4
|
+
v2: Rule-based engine (Production)
|
|
5
|
+
v3: ML engine (ESM2 + BART)
|
|
7
6
|
"""
|
|
8
7
|
|
|
9
|
-
__version__ = "3.
|
|
8
|
+
__version__ = "3.1.0"
|
|
10
9
|
__author__ = "Eijex"
|
|
11
10
|
|
|
12
11
|
# Auto-register engines (safe when running from source tree)
|
|
@@ -113,8 +113,8 @@ def list_engines():
|
|
|
113
113
|
type=click.Choice(["feasibility_best", "gc_target", "high_cai"], case_sensitive=False),
|
|
114
114
|
help="DP objective",
|
|
115
115
|
)
|
|
116
|
-
@click.option("--gc-min", type=float, default=
|
|
117
|
-
@click.option("--gc-max", type=float, default=
|
|
116
|
+
@click.option("--gc-min", type=float, default=55.0, help="Minimum target GC percentage")
|
|
117
|
+
@click.option("--gc-max", type=float, default=65.0, help="Maximum target GC percentage")
|
|
118
118
|
@click.option("--template", "construct_template", help="Construct template name")
|
|
119
119
|
@click.option("--output", "-o", help="Output file")
|
|
120
120
|
@click.option("--format", "output_format", default="fasta", help="Output format (fasta, genbank)")
|
|
@@ -199,6 +199,27 @@ class OptimizationPipeline:
|
|
|
199
199
|
f"Remaining: {polya_fix.get('remaining_violations', '?')}"
|
|
200
200
|
)
|
|
201
201
|
|
|
202
|
+
# Dinucleotide reduction pass (CpG/TpA greedy synonymous fix)
|
|
203
|
+
if self.rule_engine.scan_dinucleotides(optimized_dna):
|
|
204
|
+
dinu_fix = self.rule_engine.fix_dinucleotides(optimized_dna, mode="balanced")
|
|
205
|
+
if dinu_fix["success"]:
|
|
206
|
+
optimized_dna = dinu_fix["modified_seq"]
|
|
207
|
+
candidate_metrics["cai"] = round(self.translator.calculate_cai(optimized_dna), 4)
|
|
208
|
+
candidate_metrics["gc"] = self.translator.calculate_gc_content(optimized_dna)
|
|
209
|
+
candidate_metrics["score"] = calculate_composite_score(
|
|
210
|
+
cai=candidate_metrics["cai"],
|
|
211
|
+
gc=candidate_metrics["gc"],
|
|
212
|
+
sequence=optimized_dna,
|
|
213
|
+
profile=effective_profile,
|
|
214
|
+
)
|
|
215
|
+
logger.info(
|
|
216
|
+
f"Dinucleotide reduction [{dinu_fix['mode']}]: "
|
|
217
|
+
f"{dinu_fix['initial_count']} -> "
|
|
218
|
+
f"{dinu_fix['final_count']} ({dinu_fix['reduction_pct']}% reduction, "
|
|
219
|
+
f"CAI {dinu_fix['cai_before']} -> {dinu_fix['cai_after']}, "
|
|
220
|
+
f"{dinu_fix['rounds']} round(s))"
|
|
221
|
+
)
|
|
222
|
+
|
|
202
223
|
logger.debug("Scanning for final rule violations")
|
|
203
224
|
scan_mode = str(kwargs.get("scan_mode", "full"))
|
|
204
225
|
scan_include = kwargs.get("scan_include")
|
|
@@ -370,10 +370,10 @@ class ReverseTranslator:
|
|
|
370
370
|
Balanced profile: CAI first, GC balanced
|
|
371
371
|
|
|
372
372
|
- Preferred codon ratio: 70%
|
|
373
|
-
- Target GC:
|
|
373
|
+
- Target GC: 55-65% (benchmark analysis 004: avg output 60.1%)
|
|
374
374
|
"""
|
|
375
|
-
target_gc_min = kwargs.get("target_gc_min",
|
|
376
|
-
target_gc_max = kwargs.get("target_gc_max",
|
|
375
|
+
target_gc_min = kwargs.get("target_gc_min", 55)
|
|
376
|
+
target_gc_max = kwargs.get("target_gc_max", 65)
|
|
377
377
|
preferred_ratio = kwargs.get("preferred_ratio", 0.7)
|
|
378
378
|
max_attempts = kwargs.get("max_gc_attempts", 10)
|
|
379
379
|
if max_attempts < 1:
|