@aleph-ai/tinyaleph 1.2.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +187 -2
- package/backends/bioinformatics/binding.js +503 -0
- package/backends/bioinformatics/dna-computing.js +664 -0
- package/backends/bioinformatics/encoding.js +339 -0
- package/backends/bioinformatics/folding.js +454 -0
- package/backends/bioinformatics/genetic-code.js +269 -0
- package/backends/bioinformatics/index.js +522 -0
- package/backends/bioinformatics/transcription.js +221 -0
- package/backends/bioinformatics/translation.js +264 -0
- package/backends/index.js +25 -1
- package/core/compound.js +532 -0
- package/core/hilbert.js +454 -1
- package/core/index.js +106 -12
- package/core/inference.js +605 -0
- package/core/resonance.js +245 -616
- package/core/symbols/archetypes.js +478 -0
- package/core/symbols/base.js +302 -0
- package/core/symbols/elements.js +487 -0
- package/core/symbols/hieroglyphs.js +303 -0
- package/core/symbols/iching.js +471 -0
- package/core/symbols/index.js +77 -0
- package/core/symbols/tarot.js +211 -0
- package/core/symbols.js +22 -0
- package/docs/design/BIOINFORMATICS_BACKEND_DESIGN.md +493 -0
- package/docs/guide/06-symbolic-ai.md +370 -0
- package/docs/guide/README.md +2 -1
- package/docs/reference/05-symbolic-ai.md +570 -0
- package/docs/reference/06-bioinformatics.md +546 -0
- package/docs/reference/README.md +32 -2
- package/docs/theory/11-prgraph-memory.md +559 -0
- package/docs/theory/12-resonant-attention.md +661 -0
- package/modular.js +33 -1
- package/package.json +1 -1
- package/physics/index.js +16 -0
- package/physics/kuramoto-coupled-ladder.js +603 -0
|
@@ -0,0 +1,269 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Genetic Code - Codon to Amino Acid Translation
|
|
3
|
+
*
|
|
4
|
+
* The universal genetic code mapping 64 codons to 20 amino acids + stop.
|
|
5
|
+
* Also includes alternative genetic codes for mitochondria and other organisms.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
// ============================================================================
|
|
9
|
+
// Standard Genetic Code
|
|
10
|
+
// ============================================================================
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Standard (universal) genetic code
|
|
14
|
+
* Maps RNA codons to single-letter amino acid codes
|
|
15
|
+
*/
|
|
16
|
+
const STANDARD_GENETIC_CODE = {
|
|
17
|
+
// UUX codons
|
|
18
|
+
'UUU': 'F', 'UUC': 'F', // Phenylalanine
|
|
19
|
+
'UUA': 'L', 'UUG': 'L', // Leucine
|
|
20
|
+
|
|
21
|
+
// UCX codons
|
|
22
|
+
'UCU': 'S', 'UCC': 'S', 'UCA': 'S', 'UCG': 'S', // Serine
|
|
23
|
+
|
|
24
|
+
// UAX codons
|
|
25
|
+
'UAU': 'Y', 'UAC': 'Y', // Tyrosine
|
|
26
|
+
'UAA': '*', 'UAG': '*', // Stop (Ochre, Amber)
|
|
27
|
+
|
|
28
|
+
// UGX codons
|
|
29
|
+
'UGU': 'C', 'UGC': 'C', // Cysteine
|
|
30
|
+
'UGA': '*', // Stop (Opal)
|
|
31
|
+
'UGG': 'W', // Tryptophan
|
|
32
|
+
|
|
33
|
+
// CUX codons
|
|
34
|
+
'CUU': 'L', 'CUC': 'L', 'CUA': 'L', 'CUG': 'L', // Leucine
|
|
35
|
+
|
|
36
|
+
// CCX codons
|
|
37
|
+
'CCU': 'P', 'CCC': 'P', 'CCA': 'P', 'CCG': 'P', // Proline
|
|
38
|
+
|
|
39
|
+
// CAX codons
|
|
40
|
+
'CAU': 'H', 'CAC': 'H', // Histidine
|
|
41
|
+
'CAA': 'Q', 'CAG': 'Q', // Glutamine
|
|
42
|
+
|
|
43
|
+
// CGX codons
|
|
44
|
+
'CGU': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', // Arginine
|
|
45
|
+
|
|
46
|
+
// AUX codons
|
|
47
|
+
'AUU': 'I', 'AUC': 'I', 'AUA': 'I', // Isoleucine
|
|
48
|
+
'AUG': 'M', // Methionine (START)
|
|
49
|
+
|
|
50
|
+
// ACX codons
|
|
51
|
+
'ACU': 'T', 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', // Threonine
|
|
52
|
+
|
|
53
|
+
// AAX codons
|
|
54
|
+
'AAU': 'N', 'AAC': 'N', // Asparagine
|
|
55
|
+
'AAA': 'K', 'AAG': 'K', // Lysine
|
|
56
|
+
|
|
57
|
+
// AGX codons
|
|
58
|
+
'AGU': 'S', 'AGC': 'S', // Serine
|
|
59
|
+
'AGA': 'R', 'AGG': 'R', // Arginine
|
|
60
|
+
|
|
61
|
+
// GUX codons
|
|
62
|
+
'GUU': 'V', 'GUC': 'V', 'GUA': 'V', 'GUG': 'V', // Valine
|
|
63
|
+
|
|
64
|
+
// GCX codons
|
|
65
|
+
'GCU': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', // Alanine
|
|
66
|
+
|
|
67
|
+
// GAX codons
|
|
68
|
+
'GAU': 'D', 'GAC': 'D', // Aspartic acid
|
|
69
|
+
'GAA': 'E', 'GAG': 'E', // Glutamic acid
|
|
70
|
+
|
|
71
|
+
// GGX codons
|
|
72
|
+
'GGU': 'G', 'GGC': 'G', 'GGA': 'G', 'GGG': 'G', // Glycine
|
|
73
|
+
};
|
|
74
|
+
|
|
75
|
+
/**
|
|
76
|
+
* Vertebrate mitochondrial genetic code
|
|
77
|
+
* Differs from standard in UGA=W, AGA/AGG=*, AUA=M
|
|
78
|
+
*/
|
|
79
|
+
const VERTEBRATE_MITOCHONDRIAL_CODE = {
|
|
80
|
+
...STANDARD_GENETIC_CODE,
|
|
81
|
+
'UGA': 'W', // Tryptophan instead of Stop
|
|
82
|
+
'AGA': '*', // Stop instead of Arginine
|
|
83
|
+
'AGG': '*', // Stop instead of Arginine
|
|
84
|
+
'AUA': 'M', // Methionine instead of Isoleucine
|
|
85
|
+
};
|
|
86
|
+
|
|
87
|
+
/**
|
|
88
|
+
* Yeast mitochondrial genetic code
|
|
89
|
+
*/
|
|
90
|
+
const YEAST_MITOCHONDRIAL_CODE = {
|
|
91
|
+
...STANDARD_GENETIC_CODE,
|
|
92
|
+
'UGA': 'W',
|
|
93
|
+
'CUU': 'T', 'CUC': 'T', 'CUA': 'T', 'CUG': 'T', // Threonine
|
|
94
|
+
'AUA': 'M',
|
|
95
|
+
};
|
|
96
|
+
|
|
97
|
+
// ============================================================================
|
|
98
|
+
// Codon Properties
|
|
99
|
+
// ============================================================================
|
|
100
|
+
|
|
101
|
+
/**
|
|
102
|
+
* Start codons by genetic code
|
|
103
|
+
*/
|
|
104
|
+
const START_CODONS = {
|
|
105
|
+
standard: ['AUG'],
|
|
106
|
+
vertebrate_mitochondrial: ['AUG', 'AUA', 'AUU'],
|
|
107
|
+
bacterial: ['AUG', 'GUG', 'UUG'],
|
|
108
|
+
};
|
|
109
|
+
|
|
110
|
+
/**
|
|
111
|
+
* Stop codons by genetic code
|
|
112
|
+
*/
|
|
113
|
+
const STOP_CODONS = {
|
|
114
|
+
standard: ['UAA', 'UAG', 'UGA'],
|
|
115
|
+
vertebrate_mitochondrial: ['UAA', 'UAG', 'AGA', 'AGG'],
|
|
116
|
+
};
|
|
117
|
+
|
|
118
|
+
/**
|
|
119
|
+
* Codon usage frequency table (E. coli)
|
|
120
|
+
* Values are relative frequencies (0-1)
|
|
121
|
+
*/
|
|
122
|
+
const CODON_USAGE_ECOLI = {
|
|
123
|
+
'UUU': 0.58, 'UUC': 0.42, 'UUA': 0.14, 'UUG': 0.13,
|
|
124
|
+
'UCU': 0.17, 'UCC': 0.15, 'UCA': 0.14, 'UCG': 0.14,
|
|
125
|
+
'UAU': 0.59, 'UAC': 0.41, 'UAA': 0.61, 'UAG': 0.09,
|
|
126
|
+
'UGU': 0.46, 'UGC': 0.54, 'UGA': 0.30, 'UGG': 1.00,
|
|
127
|
+
'CUU': 0.12, 'CUC': 0.10, 'CUA': 0.04, 'CUG': 0.47,
|
|
128
|
+
'CCU': 0.18, 'CCC': 0.13, 'CCA': 0.20, 'CCG': 0.49,
|
|
129
|
+
'CAU': 0.57, 'CAC': 0.43, 'CAA': 0.34, 'CAG': 0.66,
|
|
130
|
+
'CGU': 0.36, 'CGC': 0.36, 'CGA': 0.07, 'CGG': 0.11,
|
|
131
|
+
'AUU': 0.49, 'AUC': 0.39, 'AUA': 0.11, 'AUG': 1.00,
|
|
132
|
+
'ACU': 0.19, 'ACC': 0.40, 'ACA': 0.17, 'ACG': 0.25,
|
|
133
|
+
'AAU': 0.49, 'AAC': 0.51, 'AAA': 0.74, 'AAG': 0.26,
|
|
134
|
+
'AGU': 0.16, 'AGC': 0.25, 'AGA': 0.07, 'AGG': 0.04,
|
|
135
|
+
'GUU': 0.28, 'GUC': 0.20, 'GUA': 0.15, 'GUG': 0.37,
|
|
136
|
+
'GCU': 0.18, 'GCC': 0.26, 'GCA': 0.23, 'GCG': 0.33,
|
|
137
|
+
'GAU': 0.63, 'GAC': 0.37, 'GAA': 0.68, 'GAG': 0.32,
|
|
138
|
+
'GGU': 0.35, 'GGC': 0.37, 'GGA': 0.13, 'GGG': 0.15,
|
|
139
|
+
};
|
|
140
|
+
|
|
141
|
+
// ============================================================================
|
|
142
|
+
// Codon Functions
|
|
143
|
+
// ============================================================================
|
|
144
|
+
|
|
145
|
+
/**
|
|
146
|
+
* Get amino acid from codon
|
|
147
|
+
*/
|
|
148
|
+
function translateCodon(codon, geneticCode = STANDARD_GENETIC_CODE) {
|
|
149
|
+
const normalized = codon.toUpperCase().replace(/T/g, 'U');
|
|
150
|
+
return geneticCode[normalized] || '?';
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
/**
|
|
154
|
+
* Get all codons for an amino acid (degeneracy)
|
|
155
|
+
*/
|
|
156
|
+
function getCodonsForAminoAcid(aa, geneticCode = STANDARD_GENETIC_CODE) {
|
|
157
|
+
return Object.entries(geneticCode)
|
|
158
|
+
.filter(([_, aminoAcid]) => aminoAcid === aa.toUpperCase())
|
|
159
|
+
.map(([codon, _]) => codon);
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
/**
|
|
163
|
+
* Get degeneracy (number of codons) for amino acid
|
|
164
|
+
*/
|
|
165
|
+
function getCodonDegeneracy(aa, geneticCode = STANDARD_GENETIC_CODE) {
|
|
166
|
+
return getCodonsForAminoAcid(aa, geneticCode).length;
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
/**
|
|
170
|
+
* Check if codon is a start codon
|
|
171
|
+
*/
|
|
172
|
+
function isStartCodon(codon, codeType = 'standard') {
|
|
173
|
+
const normalized = codon.toUpperCase().replace(/T/g, 'U');
|
|
174
|
+
return START_CODONS[codeType]?.includes(normalized) || false;
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
/**
|
|
178
|
+
* Check if codon is a stop codon
|
|
179
|
+
*/
|
|
180
|
+
function isStopCodon(codon, geneticCode = STANDARD_GENETIC_CODE) {
|
|
181
|
+
const normalized = codon.toUpperCase().replace(/T/g, 'U');
|
|
182
|
+
return geneticCode[normalized] === '*';
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
/**
|
|
186
|
+
* Calculate GC content of a sequence
|
|
187
|
+
*/
|
|
188
|
+
function calculateGCContent(sequence) {
|
|
189
|
+
const clean = sequence.toUpperCase();
|
|
190
|
+
const gc = (clean.match(/[GC]/g) || []).length;
|
|
191
|
+
return gc / clean.length;
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
/**
|
|
195
|
+
* Calculate codon adaptation index (CAI)
|
|
196
|
+
*/
|
|
197
|
+
function calculateCAI(sequence, usageTable = CODON_USAGE_ECOLI) {
|
|
198
|
+
const codons = [];
|
|
199
|
+
const rna = sequence.toUpperCase().replace(/T/g, 'U');
|
|
200
|
+
|
|
201
|
+
for (let i = 0; i < rna.length - 2; i += 3) {
|
|
202
|
+
codons.push(rna.slice(i, i + 3));
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
// Calculate geometric mean of relative adaptiveness
|
|
206
|
+
let sumLog = 0;
|
|
207
|
+
let count = 0;
|
|
208
|
+
|
|
209
|
+
for (const codon of codons) {
|
|
210
|
+
const freq = usageTable[codon];
|
|
211
|
+
if (freq && freq > 0) {
|
|
212
|
+
sumLog += Math.log(freq);
|
|
213
|
+
count++;
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
return count > 0 ? Math.exp(sumLog / count) : 0;
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
/**
|
|
221
|
+
* Synonymous codon substitution possibilities
|
|
222
|
+
*/
|
|
223
|
+
function getSynonymousCodons(codon, geneticCode = STANDARD_GENETIC_CODE) {
|
|
224
|
+
const aa = translateCodon(codon, geneticCode);
|
|
225
|
+
if (aa === '?' || aa === '*') return [];
|
|
226
|
+
|
|
227
|
+
return getCodonsForAminoAcid(aa, geneticCode)
|
|
228
|
+
.filter(c => c !== codon.toUpperCase().replace(/T/g, 'U'));
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
/**
|
|
232
|
+
* Classify mutation effect
|
|
233
|
+
*/
|
|
234
|
+
function classifyMutation(originalCodon, mutatedCodon, geneticCode = STANDARD_GENETIC_CODE) {
|
|
235
|
+
const original = originalCodon.toUpperCase().replace(/T/g, 'U');
|
|
236
|
+
const mutated = mutatedCodon.toUpperCase().replace(/T/g, 'U');
|
|
237
|
+
|
|
238
|
+
const originalAA = geneticCode[original];
|
|
239
|
+
const mutatedAA = geneticCode[mutated];
|
|
240
|
+
|
|
241
|
+
if (!originalAA || !mutatedAA) return 'unknown';
|
|
242
|
+
if (originalAA === mutatedAA) return 'synonymous';
|
|
243
|
+
if (mutatedAA === '*') return 'nonsense';
|
|
244
|
+
if (originalAA === '*') return 'readthrough';
|
|
245
|
+
return 'missense';
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
module.exports = {
|
|
249
|
+
// Genetic codes
|
|
250
|
+
STANDARD_GENETIC_CODE,
|
|
251
|
+
VERTEBRATE_MITOCHONDRIAL_CODE,
|
|
252
|
+
YEAST_MITOCHONDRIAL_CODE,
|
|
253
|
+
|
|
254
|
+
// Codon lists
|
|
255
|
+
START_CODONS,
|
|
256
|
+
STOP_CODONS,
|
|
257
|
+
CODON_USAGE_ECOLI,
|
|
258
|
+
|
|
259
|
+
// Functions
|
|
260
|
+
translateCodon,
|
|
261
|
+
getCodonsForAminoAcid,
|
|
262
|
+
getCodonDegeneracy,
|
|
263
|
+
isStartCodon,
|
|
264
|
+
isStopCodon,
|
|
265
|
+
calculateGCContent,
|
|
266
|
+
calculateCAI,
|
|
267
|
+
getSynonymousCodons,
|
|
268
|
+
classifyMutation,
|
|
269
|
+
};
|