@aleph-ai/tinyaleph 1.2.1 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +187 -2
- package/backends/bioinformatics/binding.js +503 -0
- package/backends/bioinformatics/dna-computing.js +664 -0
- package/backends/bioinformatics/encoding.js +339 -0
- package/backends/bioinformatics/folding.js +454 -0
- package/backends/bioinformatics/genetic-code.js +269 -0
- package/backends/bioinformatics/index.js +522 -0
- package/backends/bioinformatics/transcription.js +221 -0
- package/backends/bioinformatics/translation.js +264 -0
- package/backends/index.js +25 -1
- package/core/compound.js +532 -0
- package/core/hilbert.js +454 -1
- package/core/index.js +106 -12
- package/core/inference.js +605 -0
- package/core/resonance.js +245 -616
- package/core/symbols/archetypes.js +478 -0
- package/core/symbols/base.js +302 -0
- package/core/symbols/elements.js +487 -0
- package/core/symbols/hieroglyphs.js +303 -0
- package/core/symbols/iching.js +471 -0
- package/core/symbols/index.js +77 -0
- package/core/symbols/tarot.js +211 -0
- package/core/symbols.js +22 -0
- package/docs/design/BIOINFORMATICS_BACKEND_DESIGN.md +493 -0
- package/docs/guide/06-symbolic-ai.md +370 -0
- package/docs/guide/README.md +2 -1
- package/docs/reference/05-symbolic-ai.md +570 -0
- package/docs/reference/06-bioinformatics.md +546 -0
- package/docs/reference/README.md +32 -2
- package/docs/theory/11-prgraph-memory.md +559 -0
- package/docs/theory/12-resonant-attention.md +661 -0
- package/modular.js +33 -1
- package/package.json +1 -1
|
@@ -0,0 +1,339 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Bioinformatics Prime Encoding
|
|
3
|
+
*
|
|
4
|
+
* Maps biological molecules to prime numbers following the tinyaleph paradigm.
|
|
5
|
+
*
|
|
6
|
+
* Design principles:
|
|
7
|
+
* - Nucleotides use small primes (2, 3, 5, 7, 11)
|
|
8
|
+
* - Base pairing has prime product symmetry
|
|
9
|
+
* - Amino acids map to primes 23-109
|
|
10
|
+
* - Hydrophobicity correlates with prime magnitude
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
// ============================================================================
|
|
14
|
+
// Nucleotide Prime Basis
|
|
15
|
+
// ============================================================================
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* DNA nucleotide to prime mapping
|
|
19
|
+
* - Purines (A, G): larger primes (7, 11)
|
|
20
|
+
* - Pyrimidines (T, C): smaller primes (2, 3)
|
|
21
|
+
* - Watson-Crick pairs have symmetric products: A-T=14, G-C=33
|
|
22
|
+
*/
|
|
23
|
+
const NUCLEOTIDE_PRIMES = {
|
|
24
|
+
'A': 7, // Adenine (purine)
|
|
25
|
+
'T': 2, // Thymine (pyrimidine) - pairs with A
|
|
26
|
+
'G': 11, // Guanine (purine)
|
|
27
|
+
'C': 3, // Cytosine (pyrimidine) - pairs with G
|
|
28
|
+
'U': 5, // Uracil (RNA) - replaces Thymine
|
|
29
|
+
'N': 1, // Unknown nucleotide (identity element)
|
|
30
|
+
};
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Reverse mapping: prime to nucleotide
|
|
34
|
+
*/
|
|
35
|
+
const PRIME_TO_NUCLEOTIDE = {
|
|
36
|
+
7: 'A',
|
|
37
|
+
2: 'T',
|
|
38
|
+
11: 'G',
|
|
39
|
+
3: 'C',
|
|
40
|
+
5: 'U',
|
|
41
|
+
1: 'N',
|
|
42
|
+
};
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Complement mapping for DNA
|
|
46
|
+
*/
|
|
47
|
+
const DNA_COMPLEMENTS = {
|
|
48
|
+
'A': 'T', 'T': 'A', 'G': 'C', 'C': 'G', 'N': 'N'
|
|
49
|
+
};
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Complement mapping in prime space
|
|
53
|
+
*/
|
|
54
|
+
const PRIME_COMPLEMENTS = {
|
|
55
|
+
7: 2, // A → T
|
|
56
|
+
2: 7, // T → A
|
|
57
|
+
11: 3, // G → C
|
|
58
|
+
3: 11, // C → G
|
|
59
|
+
5: 7, // U → A (RNA)
|
|
60
|
+
1: 1, // N → N
|
|
61
|
+
};
|
|
62
|
+
|
|
63
|
+
// ============================================================================
|
|
64
|
+
// Amino Acid Prime Basis
|
|
65
|
+
// ============================================================================
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* Amino acid to prime mapping
|
|
69
|
+
*
|
|
70
|
+
* Properties encoded by prime magnitude:
|
|
71
|
+
* - Smaller primes (23-43): Hydrophobic/nonpolar
|
|
72
|
+
* - Medium primes (47-79): Aromatic/Polar
|
|
73
|
+
* - Larger primes (83-107): Charged
|
|
74
|
+
*/
|
|
75
|
+
const AMINO_ACID_PRIMES = {
|
|
76
|
+
// Nonpolar, aliphatic (hydrophobic core formers)
|
|
77
|
+
'G': 23, // Glycine - simplest, flexible
|
|
78
|
+
'A': 29, // Alanine - small hydrophobic
|
|
79
|
+
'V': 31, // Valine - branched hydrophobic
|
|
80
|
+
'L': 37, // Leucine - branched hydrophobic
|
|
81
|
+
'I': 41, // Isoleucine - branched hydrophobic
|
|
82
|
+
'M': 43, // Methionine - sulfur-containing
|
|
83
|
+
|
|
84
|
+
// Aromatic (ring structures)
|
|
85
|
+
'F': 47, // Phenylalanine - benzene ring
|
|
86
|
+
'W': 53, // Tryptophan - largest, indole ring
|
|
87
|
+
'Y': 59, // Tyrosine - phenol ring
|
|
88
|
+
|
|
89
|
+
// Polar, uncharged (hydrogen bonding)
|
|
90
|
+
'S': 61, // Serine - hydroxyl group
|
|
91
|
+
'T': 67, // Threonine - hydroxyl group
|
|
92
|
+
'C': 71, // Cysteine - thiol, disulfide bonds
|
|
93
|
+
'N': 73, // Asparagine - amide group
|
|
94
|
+
'Q': 79, // Glutamine - amide group
|
|
95
|
+
|
|
96
|
+
// Positively charged (basic)
|
|
97
|
+
'K': 83, // Lysine - amino group
|
|
98
|
+
'R': 89, // Arginine - guanidinium group
|
|
99
|
+
'H': 97, // Histidine - imidazole ring
|
|
100
|
+
|
|
101
|
+
// Negatively charged (acidic)
|
|
102
|
+
'D': 101, // Aspartic acid - carboxyl
|
|
103
|
+
'E': 103, // Glutamic acid - carboxyl
|
|
104
|
+
|
|
105
|
+
// Special
|
|
106
|
+
'P': 107, // Proline - cyclic, helix breaker
|
|
107
|
+
|
|
108
|
+
// Stop codon
|
|
109
|
+
'*': 109, // Stop signal
|
|
110
|
+
|
|
111
|
+
// Unknown
|
|
112
|
+
'X': 113, // Unknown amino acid
|
|
113
|
+
};
|
|
114
|
+
|
|
115
|
+
/**
|
|
116
|
+
* Reverse mapping: prime to amino acid
|
|
117
|
+
*/
|
|
118
|
+
const PRIME_TO_AMINO_ACID = Object.fromEntries(
|
|
119
|
+
Object.entries(AMINO_ACID_PRIMES).map(([aa, p]) => [p, aa])
|
|
120
|
+
);
|
|
121
|
+
|
|
122
|
+
/**
|
|
123
|
+
* Amino acid properties for physics calculations
|
|
124
|
+
*/
|
|
125
|
+
const AMINO_ACID_PROPERTIES = {
|
|
126
|
+
'G': { hydrophobicity: 0.0, charge: 0, mass: 57.05, volume: 60.1 },
|
|
127
|
+
'A': { hydrophobicity: 1.8, charge: 0, mass: 71.08, volume: 88.6 },
|
|
128
|
+
'V': { hydrophobicity: 4.2, charge: 0, mass: 99.13, volume: 140.0 },
|
|
129
|
+
'L': { hydrophobicity: 3.8, charge: 0, mass: 113.16, volume: 166.7 },
|
|
130
|
+
'I': { hydrophobicity: 4.5, charge: 0, mass: 113.16, volume: 166.7 },
|
|
131
|
+
'M': { hydrophobicity: 1.9, charge: 0, mass: 131.20, volume: 162.9 },
|
|
132
|
+
'F': { hydrophobicity: 2.8, charge: 0, mass: 147.18, volume: 189.9 },
|
|
133
|
+
'W': { hydrophobicity: -0.9, charge: 0, mass: 186.21, volume: 227.8 },
|
|
134
|
+
'Y': { hydrophobicity: -1.3, charge: 0, mass: 163.18, volume: 193.6 },
|
|
135
|
+
'S': { hydrophobicity: -0.8, charge: 0, mass: 87.08, volume: 89.0 },
|
|
136
|
+
'T': { hydrophobicity: -0.7, charge: 0, mass: 101.11, volume: 116.1 },
|
|
137
|
+
'C': { hydrophobicity: 2.5, charge: 0, mass: 103.14, volume: 108.5 },
|
|
138
|
+
'N': { hydrophobicity: -3.5, charge: 0, mass: 114.10, volume: 114.1 },
|
|
139
|
+
'Q': { hydrophobicity: -3.5, charge: 0, mass: 128.13, volume: 143.8 },
|
|
140
|
+
'K': { hydrophobicity: -3.9, charge: 1, mass: 128.17, volume: 168.6 },
|
|
141
|
+
'R': { hydrophobicity: -4.5, charge: 1, mass: 156.19, volume: 173.4 },
|
|
142
|
+
'H': { hydrophobicity: -3.2, charge: 0.5, mass: 137.14, volume: 153.2 },
|
|
143
|
+
'D': { hydrophobicity: -3.5, charge: -1, mass: 115.09, volume: 111.1 },
|
|
144
|
+
'E': { hydrophobicity: -3.5, charge: -1, mass: 129.12, volume: 138.4 },
|
|
145
|
+
'P': { hydrophobicity: -1.6, charge: 0, mass: 97.12, volume: 112.7 },
|
|
146
|
+
};
|
|
147
|
+
|
|
148
|
+
// ============================================================================
|
|
149
|
+
// Codon Encoding
|
|
150
|
+
// ============================================================================
|
|
151
|
+
|
|
152
|
+
/**
|
|
153
|
+
* Encode a codon as a unique prime signature
|
|
154
|
+
* Uses position-weighted scheme: p1 + p2*100 + p3*10000
|
|
155
|
+
*/
|
|
156
|
+
function encodeCodon(codon) {
|
|
157
|
+
if (codon.length !== 3) {
|
|
158
|
+
throw new Error(`Codon must be 3 nucleotides, got: ${codon}`);
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
const n1 = NUCLEOTIDE_PRIMES[codon[0].toUpperCase()];
|
|
162
|
+
const n2 = NUCLEOTIDE_PRIMES[codon[1].toUpperCase()];
|
|
163
|
+
const n3 = NUCLEOTIDE_PRIMES[codon[2].toUpperCase()];
|
|
164
|
+
|
|
165
|
+
if (n1 === undefined || n2 === undefined || n3 === undefined) {
|
|
166
|
+
throw new Error(`Invalid codon: ${codon}`);
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
return n1 + (n2 * 100) + (n3 * 10000);
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
/**
|
|
173
|
+
* Decode a codon prime signature back to nucleotides
|
|
174
|
+
*/
|
|
175
|
+
function decodeCodon(signature) {
|
|
176
|
+
const n3 = Math.floor(signature / 10000);
|
|
177
|
+
const remainder = signature % 10000;
|
|
178
|
+
const n2 = Math.floor(remainder / 100);
|
|
179
|
+
const n1 = remainder % 100;
|
|
180
|
+
|
|
181
|
+
return (PRIME_TO_NUCLEOTIDE[n1] || '?') +
|
|
182
|
+
(PRIME_TO_NUCLEOTIDE[n2] || '?') +
|
|
183
|
+
(PRIME_TO_NUCLEOTIDE[n3] || '?');
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
// ============================================================================
|
|
187
|
+
// Encoding Functions
|
|
188
|
+
// ============================================================================
|
|
189
|
+
|
|
190
|
+
/**
|
|
191
|
+
* Encode DNA sequence to prime array
|
|
192
|
+
*/
|
|
193
|
+
function encodeDNA(sequence) {
|
|
194
|
+
return sequence.toUpperCase().split('')
|
|
195
|
+
.filter(c => NUCLEOTIDE_PRIMES[c] !== undefined)
|
|
196
|
+
.map(c => NUCLEOTIDE_PRIMES[c]);
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
/**
|
|
200
|
+
* Decode prime array to DNA sequence
|
|
201
|
+
*/
|
|
202
|
+
function decodeDNA(primes) {
|
|
203
|
+
return primes.map(p => PRIME_TO_NUCLEOTIDE[p] || '?').join('');
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
/**
|
|
207
|
+
* Encode RNA sequence to prime array
|
|
208
|
+
*/
|
|
209
|
+
function encodeRNA(sequence) {
|
|
210
|
+
return sequence.toUpperCase().split('')
|
|
211
|
+
.filter(c => c === 'A' || c === 'U' || c === 'G' || c === 'C')
|
|
212
|
+
.map(c => NUCLEOTIDE_PRIMES[c]);
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
/**
|
|
216
|
+
* Decode prime array to RNA sequence
|
|
217
|
+
*/
|
|
218
|
+
function decodeRNA(primes) {
|
|
219
|
+
return primes.map(p => {
|
|
220
|
+
if (p === 2) return 'U'; // T in DNA → U in RNA
|
|
221
|
+
return PRIME_TO_NUCLEOTIDE[p] || '?';
|
|
222
|
+
}).join('');
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
/**
|
|
226
|
+
* Encode protein sequence to prime array
|
|
227
|
+
*/
|
|
228
|
+
function encodeProtein(sequence) {
|
|
229
|
+
return sequence.toUpperCase().split('')
|
|
230
|
+
.filter(c => AMINO_ACID_PRIMES[c] !== undefined)
|
|
231
|
+
.map(c => AMINO_ACID_PRIMES[c]);
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
/**
|
|
235
|
+
* Decode prime array to protein sequence
|
|
236
|
+
*/
|
|
237
|
+
function decodeProtein(primes) {
|
|
238
|
+
return primes.map(p => PRIME_TO_AMINO_ACID[p] || '?').join('');
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
/**
|
|
242
|
+
* Detect sequence type from content
|
|
243
|
+
*/
|
|
244
|
+
function detectSequenceType(sequence) {
|
|
245
|
+
const clean = sequence.replace(/[\s\n>]/g, '').toUpperCase();
|
|
246
|
+
|
|
247
|
+
// Check for FASTA header
|
|
248
|
+
if (sequence.trim().startsWith('>')) return 'FASTA';
|
|
249
|
+
|
|
250
|
+
// Pure DNA
|
|
251
|
+
if (/^[ATGCN]+$/.test(clean)) return 'DNA';
|
|
252
|
+
|
|
253
|
+
// RNA (has U)
|
|
254
|
+
if (/^[AUGCN]+$/.test(clean) && clean.includes('U')) return 'RNA';
|
|
255
|
+
|
|
256
|
+
// Protein (has amino acid letters not in DNA)
|
|
257
|
+
if (/^[ACDEFGHIKLMNPQRSTVWYX\*]+$/.test(clean)) return 'PROTEIN';
|
|
258
|
+
|
|
259
|
+
// Ambiguous
|
|
260
|
+
return 'UNKNOWN';
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
/**
|
|
264
|
+
* Parse FASTA format
|
|
265
|
+
*/
|
|
266
|
+
function parseFASTA(input) {
|
|
267
|
+
const lines = input.split('\n');
|
|
268
|
+
const entries = [];
|
|
269
|
+
let current = null;
|
|
270
|
+
|
|
271
|
+
for (const line of lines) {
|
|
272
|
+
if (line.startsWith('>')) {
|
|
273
|
+
if (current) entries.push(current);
|
|
274
|
+
current = { header: line.slice(1).trim(), sequence: '' };
|
|
275
|
+
} else if (current) {
|
|
276
|
+
current.sequence += line.trim();
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
if (current) entries.push(current);
|
|
281
|
+
return entries;
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
/**
|
|
285
|
+
* Get properties for an amino acid
|
|
286
|
+
*/
|
|
287
|
+
function getAminoAcidProperties(aa) {
|
|
288
|
+
return AMINO_ACID_PROPERTIES[aa.toUpperCase()] || null;
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
/**
|
|
292
|
+
* Get charge from amino acid prime
|
|
293
|
+
*/
|
|
294
|
+
function getChargeFromPrime(prime) {
|
|
295
|
+
const aa = PRIME_TO_AMINO_ACID[prime];
|
|
296
|
+
if (!aa) return 0;
|
|
297
|
+
return AMINO_ACID_PROPERTIES[aa]?.charge || 0;
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
/**
|
|
301
|
+
* Get hydrophobicity from amino acid prime
|
|
302
|
+
*/
|
|
303
|
+
function getHydrophobicityFromPrime(prime) {
|
|
304
|
+
const aa = PRIME_TO_AMINO_ACID[prime];
|
|
305
|
+
if (!aa) return 0;
|
|
306
|
+
return AMINO_ACID_PROPERTIES[aa]?.hydrophobicity || 0;
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
module.exports = {
|
|
310
|
+
// Nucleotide mappings
|
|
311
|
+
NUCLEOTIDE_PRIMES,
|
|
312
|
+
PRIME_TO_NUCLEOTIDE,
|
|
313
|
+
DNA_COMPLEMENTS,
|
|
314
|
+
PRIME_COMPLEMENTS,
|
|
315
|
+
|
|
316
|
+
// Amino acid mappings
|
|
317
|
+
AMINO_ACID_PRIMES,
|
|
318
|
+
PRIME_TO_AMINO_ACID,
|
|
319
|
+
AMINO_ACID_PROPERTIES,
|
|
320
|
+
|
|
321
|
+
// Codon functions
|
|
322
|
+
encodeCodon,
|
|
323
|
+
decodeCodon,
|
|
324
|
+
|
|
325
|
+
// Sequence encoding/decoding
|
|
326
|
+
encodeDNA,
|
|
327
|
+
decodeDNA,
|
|
328
|
+
encodeRNA,
|
|
329
|
+
decodeRNA,
|
|
330
|
+
encodeProtein,
|
|
331
|
+
decodeProtein,
|
|
332
|
+
|
|
333
|
+
// Utilities
|
|
334
|
+
detectSequenceType,
|
|
335
|
+
parseFASTA,
|
|
336
|
+
getAminoAcidProperties,
|
|
337
|
+
getChargeFromPrime,
|
|
338
|
+
getHydrophobicityFromPrime,
|
|
339
|
+
};
|