factorforge-cds 3.1.4__tar.gz → 3.1.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {factorforge_cds-3.1.4 → factorforge_cds-3.1.5}/PKG-INFO +5 -4
- {factorforge_cds-3.1.4 → factorforge_cds-3.1.5}/README.md +4 -3
- {factorforge_cds-3.1.4 → factorforge_cds-3.1.5}/pyproject.toml +4 -1
- {factorforge_cds-3.1.4 → factorforge_cds-3.1.5}/src/factorforge/__init__.py +1 -1
- {factorforge_cds-3.1.4 → factorforge_cds-3.1.5}/src/factorforge/analysis/metrics.py +3 -1
- factorforge_cds-3.1.5/src/factorforge/data/nbenthamiana_codons.json +528 -0
- factorforge_cds-3.1.5/src/factorforge/data/nbenthamiana_golden_set.json +100 -0
- factorforge_cds-3.1.5/src/factorforge/data/templates/high_expression.json +34 -0
- factorforge_cds-3.1.5/src/factorforge/data/templates/standard_expression.json +34 -0
- factorforge_cds-3.1.5/src/factorforge/data/wolffia_globosa_codons.json +522 -0
- {factorforge_cds-3.1.4 → factorforge_cds-3.1.5}/src/factorforge/engines/__init__.py +1 -1
- {factorforge_cds-3.1.4 → factorforge_cds-3.1.5}/src/factorforge/engines/profile/__init__.py +1 -1
- {factorforge_cds-3.1.4 → factorforge_cds-3.1.5}/src/factorforge/engines/profile/optimizer.py +1 -1
- {factorforge_cds-3.1.4 → factorforge_cds-3.1.5}/src/factorforge/engines/profile/pipeline.py +3 -2
- {factorforge_cds-3.1.4 → factorforge_cds-3.1.5}/src/factorforge/engines/profile/rules/domesticator.py +2 -4
- factorforge_cds-3.1.5/src/factorforge/schemas/design_package.schema.json +373 -0
- {factorforge_cds-3.1.4 → factorforge_cds-3.1.5}/src/factorforge_cds.egg-info/PKG-INFO +5 -4
- {factorforge_cds-3.1.4 → factorforge_cds-3.1.5}/src/factorforge_cds.egg-info/SOURCES.txt +6 -0
- {factorforge_cds-3.1.4 → factorforge_cds-3.1.5}/LICENSE +0 -0
- {factorforge_cds-3.1.4 → factorforge_cds-3.1.5}/setup.cfg +0 -0
- {factorforge_cds-3.1.4 → factorforge_cds-3.1.5}/src/factorforge/__main__.py +0 -0
- {factorforge_cds-3.1.4 → factorforge_cds-3.1.5}/src/factorforge/analysis/__init__.py +0 -0
- {factorforge_cds-3.1.4 → factorforge_cds-3.1.5}/src/factorforge/analysis/feasibility.py +0 -0
- {factorforge_cds-3.1.4 → factorforge_cds-3.1.5}/src/factorforge/cli/__init__.py +0 -0
- {factorforge_cds-3.1.4 → factorforge_cds-3.1.5}/src/factorforge/cli/legacy_cli.py +0 -0
- {factorforge_cds-3.1.4 → factorforge_cds-3.1.5}/src/factorforge/cli/main.py +0 -0
- {factorforge_cds-3.1.4 → factorforge_cds-3.1.5}/src/factorforge/core/interfaces/__init__.py +0 -0
- {factorforge_cds-3.1.4 → factorforge_cds-3.1.5}/src/factorforge/core/interfaces/exporter.py +0 -0
- {factorforge_cds-3.1.4 → factorforge_cds-3.1.5}/src/factorforge/core/interfaces/optimizer.py +0 -0
- {factorforge_cds-3.1.4 → factorforge_cds-3.1.5}/src/factorforge/core/interfaces/validator.py +0 -0
- {factorforge_cds-3.1.4 → factorforge_cds-3.1.5}/src/factorforge/database.py +0 -0
- {factorforge_cds-3.1.4 → factorforge_cds-3.1.5}/src/factorforge/engines/profile/codon_table_builder.py +0 -0
- {factorforge_cds-3.1.4 → factorforge_cds-3.1.5}/src/factorforge/engines/profile/construct_builder.py +0 -0
- {factorforge_cds-3.1.4 → factorforge_cds-3.1.5}/src/factorforge/engines/profile/exporter.py +0 -0
- {factorforge_cds-3.1.4 → factorforge_cds-3.1.5}/src/factorforge/engines/profile/rules/__init__.py +0 -0
- {factorforge_cds-3.1.4 → factorforge_cds-3.1.5}/src/factorforge/engines/profile/rules/reverse_translator.py +0 -0
- {factorforge_cds-3.1.4 → factorforge_cds-3.1.5}/src/factorforge/engines/profile/rules/rule_engine.py +0 -0
- {factorforge_cds-3.1.4 → factorforge_cds-3.1.5}/src/factorforge/engines/profile/scoring.py +0 -0
- {factorforge_cds-3.1.4 → factorforge_cds-3.1.5}/src/factorforge/engines/profile/utils.py +0 -0
- {factorforge_cds-3.1.4 → factorforge_cds-3.1.5}/src/factorforge/engines/profile/validator.py +0 -0
- {factorforge_cds-3.1.4 → factorforge_cds-3.1.5}/src/factorforge/engines/registry.py +0 -0
- {factorforge_cds-3.1.4 → factorforge_cds-3.1.5}/src/factorforge/schemas/__init__.py +0 -0
- {factorforge_cds-3.1.4 → factorforge_cds-3.1.5}/src/factorforge/schemas/design_package.py +0 -0
- {factorforge_cds-3.1.4 → factorforge_cds-3.1.5}/src/factorforge/utils/__init__.py +0 -0
- {factorforge_cds-3.1.4 → factorforge_cds-3.1.5}/src/factorforge/utils/construct_id.py +0 -0
- {factorforge_cds-3.1.4 → factorforge_cds-3.1.5}/src/factorforge/utils/exceptions.py +0 -0
- {factorforge_cds-3.1.4 → factorforge_cds-3.1.5}/src/factorforge/utils/restriction_sites.py +0 -0
- {factorforge_cds-3.1.4 → factorforge_cds-3.1.5}/src/factorforge/utils/sequence_validator.py +0 -0
- {factorforge_cds-3.1.4 → factorforge_cds-3.1.5}/src/factorforge/utils/validation.py +0 -0
- {factorforge_cds-3.1.4 → factorforge_cds-3.1.5}/src/factorforge/validation/__init__.py +0 -0
- {factorforge_cds-3.1.4 → factorforge_cds-3.1.5}/src/factorforge/validation/cli.py +0 -0
- {factorforge_cds-3.1.4 → factorforge_cds-3.1.5}/src/factorforge/validation/package_generator.py +0 -0
- {factorforge_cds-3.1.4 → factorforge_cds-3.1.5}/src/factorforge_cds.egg-info/dependency_links.txt +0 -0
- {factorforge_cds-3.1.4 → factorforge_cds-3.1.5}/src/factorforge_cds.egg-info/entry_points.txt +0 -0
- {factorforge_cds-3.1.4 → factorforge_cds-3.1.5}/src/factorforge_cds.egg-info/requires.txt +0 -0
- {factorforge_cds-3.1.4 → factorforge_cds-3.1.5}/src/factorforge_cds.egg-info/top_level.txt +0 -0
- {factorforge_cds-3.1.4 → factorforge_cds-3.1.5}/tests/test_database.py +0 -0
- {factorforge_cds-3.1.4 → factorforge_cds-3.1.5}/tests/test_legacy_cli.py +0 -0
- {factorforge_cds-3.1.4 → factorforge_cds-3.1.5}/tests/test_restriction_sites.py +0 -0
- {factorforge_cds-3.1.4 → factorforge_cds-3.1.5}/tests/test_sequence_validator.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: factorforge-cds
|
|
3
|
-
Version: 3.1.
|
|
3
|
+
Version: 3.1.5
|
|
4
4
|
Summary: FactorForge - open-source constraint-based CDS design engine by Eijex.
|
|
5
5
|
Author-email: Eijex <eijex.lab@gmail.com>
|
|
6
6
|
License-Expression: AGPL-3.0-only
|
|
@@ -32,8 +32,9 @@ Dynamic: license-file
|
|
|
32
32
|
|
|
33
33
|
[](LICENSE)
|
|
34
34
|
[](https://www.python.org/)
|
|
35
|
-
[](https://github.com/eijex/factorforge-cds/releases)
|
|
36
36
|
[](https://pypi.org/project/factorforge-cds/)
|
|
37
|
+
[](https://doi.org/10.5281/zenodo.20407331)
|
|
37
38
|
[](https://factorforge-cds.vercel.app)
|
|
38
39
|
|
|
39
40
|
FactorForge optimizes protein sequences into *N. benthamiana*-compatible CDS by maximizing CAI, controlling GC content, eliminating PolyA signals, and producing MoClo/Golden Gate-ready constructs.
|
|
@@ -86,7 +87,7 @@ FactorForge has gone through several implementation generations before the curre
|
|
|
86
87
|
| **v2** — Rule-Based Engine | Internal → Production | Deterministic, constraint-aware design engine; became the foundation for the public release |
|
|
87
88
|
| **v3-alpha** — ML Prototype | Archived | ML-based design attempt; performance was insufficient for production use; preserved under `archive/v3-ml-prototype/` |
|
|
88
89
|
| **v3.0+** — Current release | Public | Open-source release of the matured v2 engine under `factorforge.engines.profile` |
|
|
89
|
-
| **
|
|
90
|
+
| **v3.7+** — ML Engine | Planned | ML-based design as `--engine ml`; added once sufficient wet-lab data is available |
|
|
90
91
|
|
|
91
92
|
The `archive/` directory preserves all three earlier tracks for provenance. None are installed or exposed by the current package.
|
|
92
93
|
|
|
@@ -101,7 +102,7 @@ FactorForge predictions are **in-silico only** and have not been experimentally
|
|
|
101
102
|
## Citing
|
|
102
103
|
|
|
103
104
|
```
|
|
104
|
-
FactorForge v3.1.
|
|
105
|
+
FactorForge v3.1.5 (2026). Open-source constraint-based CDS design engine.
|
|
105
106
|
Eijex. https://github.com/eijex/factorforge-cds
|
|
106
107
|
```
|
|
107
108
|
|
|
@@ -4,8 +4,9 @@
|
|
|
4
4
|
|
|
5
5
|
[](LICENSE)
|
|
6
6
|
[](https://www.python.org/)
|
|
7
|
-
[](https://github.com/eijex/factorforge-cds/releases)
|
|
8
8
|
[](https://pypi.org/project/factorforge-cds/)
|
|
9
|
+
[](https://doi.org/10.5281/zenodo.20407331)
|
|
9
10
|
[](https://factorforge-cds.vercel.app)
|
|
10
11
|
|
|
11
12
|
FactorForge optimizes protein sequences into *N. benthamiana*-compatible CDS by maximizing CAI, controlling GC content, eliminating PolyA signals, and producing MoClo/Golden Gate-ready constructs.
|
|
@@ -58,7 +59,7 @@ FactorForge has gone through several implementation generations before the curre
|
|
|
58
59
|
| **v2** — Rule-Based Engine | Internal → Production | Deterministic, constraint-aware design engine; became the foundation for the public release |
|
|
59
60
|
| **v3-alpha** — ML Prototype | Archived | ML-based design attempt; performance was insufficient for production use; preserved under `archive/v3-ml-prototype/` |
|
|
60
61
|
| **v3.0+** — Current release | Public | Open-source release of the matured v2 engine under `factorforge.engines.profile` |
|
|
61
|
-
| **
|
|
62
|
+
| **v3.7+** — ML Engine | Planned | ML-based design as `--engine ml`; added once sufficient wet-lab data is available |
|
|
62
63
|
|
|
63
64
|
The `archive/` directory preserves all three earlier tracks for provenance. None are installed or exposed by the current package.
|
|
64
65
|
|
|
@@ -73,7 +74,7 @@ FactorForge predictions are **in-silico only** and have not been experimentally
|
|
|
73
74
|
## Citing
|
|
74
75
|
|
|
75
76
|
```
|
|
76
|
-
FactorForge v3.1.
|
|
77
|
+
FactorForge v3.1.5 (2026). Open-source constraint-based CDS design engine.
|
|
77
78
|
Eijex. https://github.com/eijex/factorforge-cds
|
|
78
79
|
```
|
|
79
80
|
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "factorforge-cds"
|
|
7
|
-
version = "3.1.
|
|
7
|
+
version = "3.1.5"
|
|
8
8
|
description = "FactorForge - open-source constraint-based CDS design engine by Eijex."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = "AGPL-3.0-only"
|
|
@@ -46,6 +46,9 @@ factorforge-validate = "factorforge.validation.cli:main"
|
|
|
46
46
|
[tool.setuptools.packages.find]
|
|
47
47
|
where = ["src"]
|
|
48
48
|
|
|
49
|
+
[tool.setuptools.package-data]
|
|
50
|
+
"factorforge" = ["data/*.json", "data/templates/*.json", "schemas/*.json"]
|
|
51
|
+
|
|
49
52
|
[tool.pytest.ini_options]
|
|
50
53
|
testpaths = ["tests"]
|
|
51
54
|
norecursedirs = ["archive"]
|
|
@@ -9,6 +9,8 @@ from dataclasses import dataclass
|
|
|
9
9
|
from pathlib import Path
|
|
10
10
|
from typing import Any
|
|
11
11
|
|
|
12
|
+
from factorforge.engines.profile.utils import get_data_path
|
|
13
|
+
|
|
12
14
|
|
|
13
15
|
STANDARD_GENETIC_CODE: dict[str, str] = {
|
|
14
16
|
"TTT": "F",
|
|
@@ -90,7 +92,7 @@ class CodonUsageTable:
|
|
|
90
92
|
|
|
91
93
|
|
|
92
94
|
def _default_codon_table_path() -> Path:
|
|
93
|
-
return
|
|
95
|
+
return get_data_path() / "nbenthamiana_codons.json"
|
|
94
96
|
|
|
95
97
|
|
|
96
98
|
def load_codon_usage_table(path: Path | None = None) -> CodonUsageTable:
|
|
@@ -0,0 +1,528 @@
|
|
|
1
|
+
{
|
|
2
|
+
"organism": "Nicotiana benthamiana",
|
|
3
|
+
"source": "Kazusa CodonUsage Database (https://www.kazusa.or.jp/codon/) + NCBI N. benthamiana CDS sequences (Sol Genomics Network genome v1.0.1)",
|
|
4
|
+
"description": "Codon usage frequencies for N. benthamiana optimized expression",
|
|
5
|
+
"codons": {
|
|
6
|
+
"TTT": {
|
|
7
|
+
"aa": "F",
|
|
8
|
+
"frequency": 0.38,
|
|
9
|
+
"per_thousand": 18.5
|
|
10
|
+
},
|
|
11
|
+
"TTC": {
|
|
12
|
+
"aa": "F",
|
|
13
|
+
"frequency": 0.62,
|
|
14
|
+
"per_thousand": 30.2
|
|
15
|
+
},
|
|
16
|
+
"TTA": {
|
|
17
|
+
"aa": "L",
|
|
18
|
+
"frequency": 0.08,
|
|
19
|
+
"per_thousand": 7.2
|
|
20
|
+
},
|
|
21
|
+
"TTG": {
|
|
22
|
+
"aa": "L",
|
|
23
|
+
"frequency": 0.14,
|
|
24
|
+
"per_thousand": 12.8
|
|
25
|
+
},
|
|
26
|
+
"CTT": {
|
|
27
|
+
"aa": "L",
|
|
28
|
+
"frequency": 0.15,
|
|
29
|
+
"per_thousand": 13.7
|
|
30
|
+
},
|
|
31
|
+
"CTC": {
|
|
32
|
+
"aa": "L",
|
|
33
|
+
"frequency": 0.22,
|
|
34
|
+
"per_thousand": 20.1
|
|
35
|
+
},
|
|
36
|
+
"CTA": {
|
|
37
|
+
"aa": "L",
|
|
38
|
+
"frequency": 0.08,
|
|
39
|
+
"per_thousand": 7.3
|
|
40
|
+
},
|
|
41
|
+
"CTG": {
|
|
42
|
+
"aa": "L",
|
|
43
|
+
"frequency": 0.33,
|
|
44
|
+
"per_thousand": 30.2
|
|
45
|
+
},
|
|
46
|
+
"ATT": {
|
|
47
|
+
"aa": "I",
|
|
48
|
+
"frequency": 0.35,
|
|
49
|
+
"per_thousand": 16.8
|
|
50
|
+
},
|
|
51
|
+
"ATC": {
|
|
52
|
+
"aa": "I",
|
|
53
|
+
"frequency": 0.48,
|
|
54
|
+
"per_thousand": 23.1
|
|
55
|
+
},
|
|
56
|
+
"ATA": {
|
|
57
|
+
"aa": "I",
|
|
58
|
+
"frequency": 0.17,
|
|
59
|
+
"per_thousand": 8.2
|
|
60
|
+
},
|
|
61
|
+
"ATG": {
|
|
62
|
+
"aa": "M",
|
|
63
|
+
"frequency": 1.00,
|
|
64
|
+
"per_thousand": 22.5
|
|
65
|
+
},
|
|
66
|
+
"GTT": {
|
|
67
|
+
"aa": "V",
|
|
68
|
+
"frequency": 0.22,
|
|
69
|
+
"per_thousand": 11.2
|
|
70
|
+
},
|
|
71
|
+
"GTC": {
|
|
72
|
+
"aa": "V",
|
|
73
|
+
"frequency": 0.28,
|
|
74
|
+
"per_thousand": 14.3
|
|
75
|
+
},
|
|
76
|
+
"GTA": {
|
|
77
|
+
"aa": "V",
|
|
78
|
+
"frequency": 0.12,
|
|
79
|
+
"per_thousand": 6.1
|
|
80
|
+
},
|
|
81
|
+
"GTG": {
|
|
82
|
+
"aa": "V",
|
|
83
|
+
"frequency": 0.38,
|
|
84
|
+
"per_thousand": 19.4
|
|
85
|
+
},
|
|
86
|
+
"TCT": {
|
|
87
|
+
"aa": "S",
|
|
88
|
+
"frequency": 0.18,
|
|
89
|
+
"per_thousand": 15.2
|
|
90
|
+
},
|
|
91
|
+
"TCC": {
|
|
92
|
+
"aa": "S",
|
|
93
|
+
"frequency": 0.22,
|
|
94
|
+
"per_thousand": 18.6
|
|
95
|
+
},
|
|
96
|
+
"TCA": {
|
|
97
|
+
"aa": "S",
|
|
98
|
+
"frequency": 0.12,
|
|
99
|
+
"per_thousand": 10.1
|
|
100
|
+
},
|
|
101
|
+
"TCG": {
|
|
102
|
+
"aa": "S",
|
|
103
|
+
"frequency": 0.10,
|
|
104
|
+
"per_thousand": 8.5
|
|
105
|
+
},
|
|
106
|
+
"AGT": {
|
|
107
|
+
"aa": "S",
|
|
108
|
+
"frequency": 0.15,
|
|
109
|
+
"per_thousand": 12.7
|
|
110
|
+
},
|
|
111
|
+
"AGC": {
|
|
112
|
+
"aa": "S",
|
|
113
|
+
"frequency": 0.23,
|
|
114
|
+
"per_thousand": 19.5
|
|
115
|
+
},
|
|
116
|
+
"CCT": {
|
|
117
|
+
"aa": "P",
|
|
118
|
+
"frequency": 0.28,
|
|
119
|
+
"per_thousand": 17.8
|
|
120
|
+
},
|
|
121
|
+
"CCC": {
|
|
122
|
+
"aa": "P",
|
|
123
|
+
"frequency": 0.32,
|
|
124
|
+
"per_thousand": 20.4
|
|
125
|
+
},
|
|
126
|
+
"CCA": {
|
|
127
|
+
"aa": "P",
|
|
128
|
+
"frequency": 0.22,
|
|
129
|
+
"per_thousand": 14.0
|
|
130
|
+
},
|
|
131
|
+
"CCG": {
|
|
132
|
+
"aa": "P",
|
|
133
|
+
"frequency": 0.18,
|
|
134
|
+
"per_thousand": 11.5
|
|
135
|
+
},
|
|
136
|
+
"ACT": {
|
|
137
|
+
"aa": "T",
|
|
138
|
+
"frequency": 0.24,
|
|
139
|
+
"per_thousand": 13.2
|
|
140
|
+
},
|
|
141
|
+
"ACC": {
|
|
142
|
+
"aa": "T",
|
|
143
|
+
"frequency": 0.36,
|
|
144
|
+
"per_thousand": 19.8
|
|
145
|
+
},
|
|
146
|
+
"ACA": {
|
|
147
|
+
"aa": "T",
|
|
148
|
+
"frequency": 0.22,
|
|
149
|
+
"per_thousand": 12.1
|
|
150
|
+
},
|
|
151
|
+
"ACG": {
|
|
152
|
+
"aa": "T",
|
|
153
|
+
"frequency": 0.18,
|
|
154
|
+
"per_thousand": 9.9
|
|
155
|
+
},
|
|
156
|
+
"GCT": {
|
|
157
|
+
"aa": "A",
|
|
158
|
+
"frequency": 0.26,
|
|
159
|
+
"per_thousand": 18.9
|
|
160
|
+
},
|
|
161
|
+
"GCC": {
|
|
162
|
+
"aa": "A",
|
|
163
|
+
"frequency": 0.40,
|
|
164
|
+
"per_thousand": 29.1
|
|
165
|
+
},
|
|
166
|
+
"GCA": {
|
|
167
|
+
"aa": "A",
|
|
168
|
+
"frequency": 0.20,
|
|
169
|
+
"per_thousand": 14.6
|
|
170
|
+
},
|
|
171
|
+
"GCG": {
|
|
172
|
+
"aa": "A",
|
|
173
|
+
"frequency": 0.14,
|
|
174
|
+
"per_thousand": 10.2
|
|
175
|
+
},
|
|
176
|
+
"TAT": {
|
|
177
|
+
"aa": "Y",
|
|
178
|
+
"frequency": 0.41,
|
|
179
|
+
"per_thousand": 12.8
|
|
180
|
+
},
|
|
181
|
+
"TAC": {
|
|
182
|
+
"aa": "Y",
|
|
183
|
+
"frequency": 0.59,
|
|
184
|
+
"per_thousand": 18.4
|
|
185
|
+
},
|
|
186
|
+
"TAA": {
|
|
187
|
+
"aa": "*",
|
|
188
|
+
"frequency": 0.48,
|
|
189
|
+
"per_thousand": 1.2
|
|
190
|
+
},
|
|
191
|
+
"TAG": {
|
|
192
|
+
"aa": "*",
|
|
193
|
+
"frequency": 0.24,
|
|
194
|
+
"per_thousand": 0.6
|
|
195
|
+
},
|
|
196
|
+
"CAT": {
|
|
197
|
+
"aa": "H",
|
|
198
|
+
"frequency": 0.42,
|
|
199
|
+
"per_thousand": 10.5
|
|
200
|
+
},
|
|
201
|
+
"CAC": {
|
|
202
|
+
"aa": "H",
|
|
203
|
+
"frequency": 0.58,
|
|
204
|
+
"per_thousand": 14.5
|
|
205
|
+
},
|
|
206
|
+
"CAA": {
|
|
207
|
+
"aa": "Q",
|
|
208
|
+
"frequency": 0.45,
|
|
209
|
+
"per_thousand": 12.3
|
|
210
|
+
},
|
|
211
|
+
"CAG": {
|
|
212
|
+
"aa": "Q",
|
|
213
|
+
"frequency": 0.55,
|
|
214
|
+
"per_thousand": 15.0
|
|
215
|
+
},
|
|
216
|
+
"AAT": {
|
|
217
|
+
"aa": "N",
|
|
218
|
+
"frequency": 0.44,
|
|
219
|
+
"per_thousand": 17.2
|
|
220
|
+
},
|
|
221
|
+
"AAC": {
|
|
222
|
+
"aa": "N",
|
|
223
|
+
"frequency": 0.56,
|
|
224
|
+
"per_thousand": 21.9
|
|
225
|
+
},
|
|
226
|
+
"AAA": {
|
|
227
|
+
"aa": "K",
|
|
228
|
+
"frequency": 0.42,
|
|
229
|
+
"per_thousand": 24.8
|
|
230
|
+
},
|
|
231
|
+
"AAG": {
|
|
232
|
+
"aa": "K",
|
|
233
|
+
"frequency": 0.58,
|
|
234
|
+
"per_thousand": 34.2
|
|
235
|
+
},
|
|
236
|
+
"GAT": {
|
|
237
|
+
"aa": "D",
|
|
238
|
+
"frequency": 0.46,
|
|
239
|
+
"per_thousand": 25.3
|
|
240
|
+
},
|
|
241
|
+
"GAC": {
|
|
242
|
+
"aa": "D",
|
|
243
|
+
"frequency": 0.54,
|
|
244
|
+
"per_thousand": 29.7
|
|
245
|
+
},
|
|
246
|
+
"GAA": {
|
|
247
|
+
"aa": "E",
|
|
248
|
+
"frequency": 0.45,
|
|
249
|
+
"per_thousand": 29.2
|
|
250
|
+
},
|
|
251
|
+
"GAG": {
|
|
252
|
+
"aa": "E",
|
|
253
|
+
"frequency": 0.55,
|
|
254
|
+
"per_thousand": 35.7
|
|
255
|
+
},
|
|
256
|
+
"TGT": {
|
|
257
|
+
"aa": "C",
|
|
258
|
+
"frequency": 0.46,
|
|
259
|
+
"per_thousand": 4.8
|
|
260
|
+
},
|
|
261
|
+
"TGC": {
|
|
262
|
+
"aa": "C",
|
|
263
|
+
"frequency": 0.54,
|
|
264
|
+
"per_thousand": 5.6
|
|
265
|
+
},
|
|
266
|
+
"TGA": {
|
|
267
|
+
"aa": "*",
|
|
268
|
+
"frequency": 0.28,
|
|
269
|
+
"per_thousand": 0.7
|
|
270
|
+
},
|
|
271
|
+
"TGG": {
|
|
272
|
+
"aa": "W",
|
|
273
|
+
"frequency": 1.00,
|
|
274
|
+
"per_thousand": 13.2
|
|
275
|
+
},
|
|
276
|
+
"CGT": {
|
|
277
|
+
"aa": "R",
|
|
278
|
+
"frequency": 0.12,
|
|
279
|
+
"per_thousand": 4.8
|
|
280
|
+
},
|
|
281
|
+
"CGC": {
|
|
282
|
+
"aa": "R",
|
|
283
|
+
"frequency": 0.18,
|
|
284
|
+
"per_thousand": 7.2
|
|
285
|
+
},
|
|
286
|
+
"CGA": {
|
|
287
|
+
"aa": "R",
|
|
288
|
+
"frequency": 0.11,
|
|
289
|
+
"per_thousand": 4.4
|
|
290
|
+
},
|
|
291
|
+
"CGG": {
|
|
292
|
+
"aa": "R",
|
|
293
|
+
"frequency": 0.16,
|
|
294
|
+
"per_thousand": 6.4
|
|
295
|
+
},
|
|
296
|
+
"AGA": {
|
|
297
|
+
"aa": "R",
|
|
298
|
+
"frequency": 0.21,
|
|
299
|
+
"per_thousand": 8.4
|
|
300
|
+
},
|
|
301
|
+
"AGG": {
|
|
302
|
+
"aa": "R",
|
|
303
|
+
"frequency": 0.22,
|
|
304
|
+
"per_thousand": 8.8
|
|
305
|
+
},
|
|
306
|
+
"GGT": {
|
|
307
|
+
"aa": "G",
|
|
308
|
+
"frequency": 0.24,
|
|
309
|
+
"per_thousand": 10.8
|
|
310
|
+
},
|
|
311
|
+
"GGC": {
|
|
312
|
+
"aa": "G",
|
|
313
|
+
"frequency": 0.34,
|
|
314
|
+
"per_thousand": 15.3
|
|
315
|
+
},
|
|
316
|
+
"GGA": {
|
|
317
|
+
"aa": "G",
|
|
318
|
+
"frequency": 0.25,
|
|
319
|
+
"per_thousand": 11.3
|
|
320
|
+
},
|
|
321
|
+
"GGG": {
|
|
322
|
+
"aa": "G",
|
|
323
|
+
"frequency": 0.17,
|
|
324
|
+
"per_thousand": 7.7
|
|
325
|
+
}
|
|
326
|
+
},
|
|
327
|
+
"amino_acids": {
|
|
328
|
+
"A": {
|
|
329
|
+
"name": "Alanine",
|
|
330
|
+
"codons": [
|
|
331
|
+
"GCT",
|
|
332
|
+
"GCC",
|
|
333
|
+
"GCA",
|
|
334
|
+
"GCG"
|
|
335
|
+
],
|
|
336
|
+
"preferred": "GCC"
|
|
337
|
+
},
|
|
338
|
+
"C": {
|
|
339
|
+
"name": "Cysteine",
|
|
340
|
+
"codons": [
|
|
341
|
+
"TGT",
|
|
342
|
+
"TGC"
|
|
343
|
+
],
|
|
344
|
+
"preferred": "TGC"
|
|
345
|
+
},
|
|
346
|
+
"D": {
|
|
347
|
+
"name": "Aspartic acid",
|
|
348
|
+
"codons": [
|
|
349
|
+
"GAT",
|
|
350
|
+
"GAC"
|
|
351
|
+
],
|
|
352
|
+
"preferred": "GAC"
|
|
353
|
+
},
|
|
354
|
+
"E": {
|
|
355
|
+
"name": "Glutamic acid",
|
|
356
|
+
"codons": [
|
|
357
|
+
"GAA",
|
|
358
|
+
"GAG"
|
|
359
|
+
],
|
|
360
|
+
"preferred": "GAG"
|
|
361
|
+
},
|
|
362
|
+
"F": {
|
|
363
|
+
"name": "Phenylalanine",
|
|
364
|
+
"codons": [
|
|
365
|
+
"TTT",
|
|
366
|
+
"TTC"
|
|
367
|
+
],
|
|
368
|
+
"preferred": "TTC"
|
|
369
|
+
},
|
|
370
|
+
"G": {
|
|
371
|
+
"name": "Glycine",
|
|
372
|
+
"codons": [
|
|
373
|
+
"GGT",
|
|
374
|
+
"GGC",
|
|
375
|
+
"GGA",
|
|
376
|
+
"GGG"
|
|
377
|
+
],
|
|
378
|
+
"preferred": "GGC"
|
|
379
|
+
},
|
|
380
|
+
"H": {
|
|
381
|
+
"name": "Histidine",
|
|
382
|
+
"codons": [
|
|
383
|
+
"CAT",
|
|
384
|
+
"CAC"
|
|
385
|
+
],
|
|
386
|
+
"preferred": "CAC"
|
|
387
|
+
},
|
|
388
|
+
"I": {
|
|
389
|
+
"name": "Isoleucine",
|
|
390
|
+
"codons": [
|
|
391
|
+
"ATT",
|
|
392
|
+
"ATC",
|
|
393
|
+
"ATA"
|
|
394
|
+
],
|
|
395
|
+
"preferred": "ATC"
|
|
396
|
+
},
|
|
397
|
+
"K": {
|
|
398
|
+
"name": "Lysine",
|
|
399
|
+
"codons": [
|
|
400
|
+
"AAA",
|
|
401
|
+
"AAG"
|
|
402
|
+
],
|
|
403
|
+
"preferred": "AAG"
|
|
404
|
+
},
|
|
405
|
+
"L": {
|
|
406
|
+
"name": "Leucine",
|
|
407
|
+
"codons": [
|
|
408
|
+
"TTA",
|
|
409
|
+
"TTG",
|
|
410
|
+
"CTT",
|
|
411
|
+
"CTC",
|
|
412
|
+
"CTA",
|
|
413
|
+
"CTG"
|
|
414
|
+
],
|
|
415
|
+
"preferred": "CTG"
|
|
416
|
+
},
|
|
417
|
+
"M": {
|
|
418
|
+
"name": "Methionine",
|
|
419
|
+
"codons": [
|
|
420
|
+
"ATG"
|
|
421
|
+
],
|
|
422
|
+
"preferred": "ATG"
|
|
423
|
+
},
|
|
424
|
+
"N": {
|
|
425
|
+
"name": "Asparagine",
|
|
426
|
+
"codons": [
|
|
427
|
+
"AAT",
|
|
428
|
+
"AAC"
|
|
429
|
+
],
|
|
430
|
+
"preferred": "AAC"
|
|
431
|
+
},
|
|
432
|
+
"P": {
|
|
433
|
+
"name": "Proline",
|
|
434
|
+
"codons": [
|
|
435
|
+
"CCT",
|
|
436
|
+
"CCC",
|
|
437
|
+
"CCA",
|
|
438
|
+
"CCG"
|
|
439
|
+
],
|
|
440
|
+
"preferred": "CCC"
|
|
441
|
+
},
|
|
442
|
+
"Q": {
|
|
443
|
+
"name": "Glutamine",
|
|
444
|
+
"codons": [
|
|
445
|
+
"CAA",
|
|
446
|
+
"CAG"
|
|
447
|
+
],
|
|
448
|
+
"preferred": "CAG"
|
|
449
|
+
},
|
|
450
|
+
"R": {
|
|
451
|
+
"name": "Arginine",
|
|
452
|
+
"codons": [
|
|
453
|
+
"CGT",
|
|
454
|
+
"CGC",
|
|
455
|
+
"CGA",
|
|
456
|
+
"CGG",
|
|
457
|
+
"AGA",
|
|
458
|
+
"AGG"
|
|
459
|
+
],
|
|
460
|
+
"preferred": "AGG"
|
|
461
|
+
},
|
|
462
|
+
"S": {
|
|
463
|
+
"name": "Serine",
|
|
464
|
+
"codons": [
|
|
465
|
+
"TCT",
|
|
466
|
+
"TCC",
|
|
467
|
+
"TCA",
|
|
468
|
+
"TCG",
|
|
469
|
+
"AGT",
|
|
470
|
+
"AGC"
|
|
471
|
+
],
|
|
472
|
+
"preferred": "AGC"
|
|
473
|
+
},
|
|
474
|
+
"T": {
|
|
475
|
+
"name": "Threonine",
|
|
476
|
+
"codons": [
|
|
477
|
+
"ACT",
|
|
478
|
+
"ACC",
|
|
479
|
+
"ACA",
|
|
480
|
+
"ACG"
|
|
481
|
+
],
|
|
482
|
+
"preferred": "ACC"
|
|
483
|
+
},
|
|
484
|
+
"V": {
|
|
485
|
+
"name": "Valine",
|
|
486
|
+
"codons": [
|
|
487
|
+
"GTT",
|
|
488
|
+
"GTC",
|
|
489
|
+
"GTA",
|
|
490
|
+
"GTG"
|
|
491
|
+
],
|
|
492
|
+
"preferred": "GTG"
|
|
493
|
+
},
|
|
494
|
+
"W": {
|
|
495
|
+
"name": "Tryptophan",
|
|
496
|
+
"codons": [
|
|
497
|
+
"TGG"
|
|
498
|
+
],
|
|
499
|
+
"preferred": "TGG"
|
|
500
|
+
},
|
|
501
|
+
"Y": {
|
|
502
|
+
"name": "Tyrosine",
|
|
503
|
+
"codons": [
|
|
504
|
+
"TAT",
|
|
505
|
+
"TAC"
|
|
506
|
+
],
|
|
507
|
+
"preferred": "TAC"
|
|
508
|
+
},
|
|
509
|
+
"*": {
|
|
510
|
+
"name": "Stop",
|
|
511
|
+
"codons": [
|
|
512
|
+
"TAA",
|
|
513
|
+
"TAG",
|
|
514
|
+
"TGA"
|
|
515
|
+
],
|
|
516
|
+
"preferred": "TAA"
|
|
517
|
+
}
|
|
518
|
+
},
|
|
519
|
+
"gc_content": {
|
|
520
|
+
"overall": 0.44,
|
|
521
|
+
"description": "N. benthamiana genome average GC content is ~44%"
|
|
522
|
+
},
|
|
523
|
+
"notes": [
|
|
524
|
+
"Frequencies are normalized within each amino acid family",
|
|
525
|
+
"Preferred codons are based on highest frequency",
|
|
526
|
+
"Data optimized for high-level protein expression in N. benthamiana"
|
|
527
|
+
]
|
|
528
|
+
}
|