@aleph-ai/tinyaleph 1.2.1 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,221 @@
1
+ /**
2
+ * Transcription Operator - DNA → RNA
3
+ *
4
+ * Implements the first step of the Central Dogma:
5
+ * DNA is transcribed into messenger RNA.
6
+ *
7
+ * In prime space: T(2) → U(5) substitution
8
+ */
9
+
10
+ const { NUCLEOTIDE_PRIMES, PRIME_COMPLEMENTS, PRIME_TO_NUCLEOTIDE } = require('./encoding');
11
+
12
+ /**
13
+ * TranscriptionOperator
14
+ *
15
+ * Transforms DNA prime sequences to RNA prime sequences.
16
+ * Models the biological process of transcription.
17
+ */
18
+ class TranscriptionOperator {
19
+ constructor(options = {}) {
20
+ this.options = {
21
+ senseStrand: true, // If true, use sense strand (same as mRNA except T→U)
22
+ ...options
23
+ };
24
+ }
25
+
26
+ /**
27
+ * Basic transcription: T → U substitution
28
+ * @param {number[]} dnaPrimes - DNA sequence as prime array
29
+ * @returns {number[]} RNA sequence as prime array
30
+ */
31
+ apply(dnaPrimes) {
32
+ return dnaPrimes.map(p => p === 2 ? 5 : p); // T(2) → U(5)
33
+ }
34
+
35
+ /**
36
+ * Full transcription with promoter recognition
37
+ */
38
+ transcribe(dnaPrimes, options = {}) {
39
+ const opts = { ...this.options, ...options };
40
+
41
+ // Find promoter sequence (TATA box analog)
42
+ const promoterPattern = [2, 7, 2, 7]; // TATA in primes
43
+ const promoterPos = opts.force ? 0 : this.findPattern(dnaPrimes, promoterPattern);
44
+
45
+ if (promoterPos === -1 && !opts.force) {
46
+ return {
47
+ success: false,
48
+ error: 'No promoter sequence found',
49
+ dnaPrimes
50
+ };
51
+ }
52
+
53
+ // Determine which strand to transcribe
54
+ let template = dnaPrimes;
55
+ if (!opts.senseStrand) {
56
+ template = this.complement(dnaPrimes);
57
+ }
58
+
59
+ // Find transcription start (+1) - typically ~25bp downstream of TATA
60
+ const startPos = opts.startPos !== undefined ? opts.startPos :
61
+ (promoterPos !== -1 ? Math.min(promoterPos + 8, dnaPrimes.length) : 0);
62
+
63
+ // Find terminator (poly-A signal analog or end)
64
+ const endPos = opts.endPos !== undefined ? opts.endPos :
65
+ this.findTerminator(template, startPos) || template.length;
66
+
67
+ // Transcribe the region
68
+ const transcribedRegion = template.slice(startPos, endPos);
69
+ const rnaPrimes = this.apply(transcribedRegion);
70
+
71
+ return {
72
+ success: true,
73
+ rna: rnaPrimes,
74
+ startPosition: startPos,
75
+ endPosition: endPos,
76
+ length: rnaPrimes.length,
77
+ promoterPosition: promoterPos
78
+ };
79
+ }
80
+
81
+ /**
82
+ * Get complementary strand (template strand from sense strand)
83
+ */
84
+ complement(primes) {
85
+ return primes.map(p => PRIME_COMPLEMENTS[p] || p);
86
+ }
87
+
88
+ /**
89
+ * Get reverse complement
90
+ */
91
+ reverseComplement(primes) {
92
+ return this.complement(primes).reverse();
93
+ }
94
+
95
+ /**
96
+ * Find pattern in sequence
97
+ */
98
+ findPattern(sequence, pattern) {
99
+ for (let i = 0; i <= sequence.length - pattern.length; i++) {
100
+ let match = true;
101
+ for (let j = 0; j < pattern.length; j++) {
102
+ if (sequence[i + j] !== pattern[j]) {
103
+ match = false;
104
+ break;
105
+ }
106
+ }
107
+ if (match) return i;
108
+ }
109
+ return -1;
110
+ }
111
+
112
+ /**
113
+ * Find all occurrences of pattern
114
+ */
115
+ findAllPatterns(sequence, pattern) {
116
+ const positions = [];
117
+ for (let i = 0; i <= sequence.length - pattern.length; i++) {
118
+ let match = true;
119
+ for (let j = 0; j < pattern.length; j++) {
120
+ if (sequence[i + j] !== pattern[j]) {
121
+ match = false;
122
+ break;
123
+ }
124
+ }
125
+ if (match) positions.push(i);
126
+ }
127
+ return positions;
128
+ }
129
+
130
+ /**
131
+ * Find terminator sequence
132
+ * Looks for poly-A signal analog (AATAAA → 7,7,2,7,7,7 in primes)
133
+ */
134
+ findTerminator(sequence, startFrom = 0) {
135
+ const polyASignal = [7, 7, 2, 7, 7, 7]; // AATAAA
136
+
137
+ for (let i = startFrom; i <= sequence.length - polyASignal.length; i++) {
138
+ let match = true;
139
+ for (let j = 0; j < polyASignal.length; j++) {
140
+ if (sequence[i + j] !== polyASignal[j]) {
141
+ match = false;
142
+ break;
143
+ }
144
+ }
145
+ if (match) return i + polyASignal.length + 20; // Include poly-A tail region
146
+ }
147
+ return null;
148
+ }
149
+
150
+ /**
151
+ * Calculate entropy change from transcription
152
+ * Transcription slightly increases entropy (expansion of state space)
153
+ */
154
+ entropyDelta(dnaPrimes) {
155
+ return 0.01 * dnaPrimes.length; // Small positive entropy change
156
+ }
157
+
158
+ /**
159
+ * Add 5' cap (methylated G)
160
+ */
161
+ addFivePrimeCap(rnaPrimes) {
162
+ // 7-methylguanosine cap represented as modified G prime
163
+ const cap = [111]; // Using a distinct prime for m7G
164
+ return [...cap, ...rnaPrimes];
165
+ }
166
+
167
+ /**
168
+ * Add 3' poly-A tail
169
+ */
170
+ addPolyATail(rnaPrimes, length = 200) {
171
+ const tail = Array(length).fill(7); // Poly-A
172
+ return [...rnaPrimes, ...tail];
173
+ }
174
+
175
+ /**
176
+ * Full mRNA processing: cap + splice + poly-A
177
+ */
178
+ processmRNA(rnaPrimes, options = {}) {
179
+ let processed = [...rnaPrimes];
180
+
181
+ // Add 5' cap
182
+ if (options.addCap !== false) {
183
+ processed = this.addFivePrimeCap(processed);
184
+ }
185
+
186
+ // Splice out introns (if splice sites provided)
187
+ if (options.introns && options.introns.length > 0) {
188
+ processed = this.splice(processed, options.introns);
189
+ }
190
+
191
+ // Add poly-A tail
192
+ if (options.addPolyA !== false) {
193
+ const tailLength = options.polyALength || 200;
194
+ processed = this.addPolyATail(processed, tailLength);
195
+ }
196
+
197
+ return processed;
198
+ }
199
+
200
+ /**
201
+ * Splice out introns
202
+ * @param {number[]} rnaPrimes - Pre-mRNA
203
+ * @param {Array<{start: number, end: number}>} introns - Intron positions
204
+ */
205
+ splice(rnaPrimes, introns) {
206
+ // Sort introns by start position (descending) to splice from end first
207
+ const sortedIntrons = [...introns].sort((a, b) => b.start - a.start);
208
+
209
+ let spliced = [...rnaPrimes];
210
+ for (const intron of sortedIntrons) {
211
+ spliced = [
212
+ ...spliced.slice(0, intron.start),
213
+ ...spliced.slice(intron.end)
214
+ ];
215
+ }
216
+
217
+ return spliced;
218
+ }
219
+ }
220
+
221
+ module.exports = { TranscriptionOperator };
@@ -0,0 +1,264 @@
1
+ /**
2
+ * Translation Operator - RNA → Protein
3
+ *
4
+ * Implements the second step of the Central Dogma:
5
+ * mRNA is translated into protein by ribosomes.
6
+ *
7
+ * Key insight: Translation is an entropy-reducing transform
8
+ * (64 codons → 21 amino acids = significant information compression)
9
+ */
10
+
11
+ const { NUCLEOTIDE_PRIMES, AMINO_ACID_PRIMES, PRIME_TO_NUCLEOTIDE } = require('./encoding');
12
+ const { STANDARD_GENETIC_CODE, isStartCodon, isStopCodon, translateCodon } = require('./genetic-code');
13
+
14
+ /**
15
+ * TranslationOperator
16
+ *
17
+ * Transforms RNA prime sequences to protein prime sequences.
18
+ * Models ribosome-mediated translation.
19
+ */
20
+ class TranslationOperator {
21
+ constructor(geneticCode = STANDARD_GENETIC_CODE) {
22
+ this.geneticCode = geneticCode;
23
+
24
+ // Prime-based start codon: AUG → [7, 5, 11]
25
+ this.startCodonPrimes = [7, 5, 11];
26
+
27
+ // Prime-based stop codons
28
+ this.stopCodonPrimes = [
29
+ [5, 7, 7], // UAA (Ochre)
30
+ [5, 7, 11], // UAG (Amber)
31
+ [5, 11, 7], // UGA (Opal)
32
+ ];
33
+ }
34
+
35
+ /**
36
+ * Translate RNA primes to protein primes
37
+ * @param {number[]} rnaPrimes - RNA sequence as prime array
38
+ * @param {object} options - Translation options
39
+ * @returns {object} Translation result
40
+ */
41
+ apply(rnaPrimes, options = {}) {
42
+ const aminoAcids = [];
43
+
44
+ // Find start codon
45
+ const startPos = options.start !== undefined ? options.start :
46
+ this.findStartCodon(rnaPrimes);
47
+
48
+ if (startPos === -1 && !options.force) {
49
+ return {
50
+ success: false,
51
+ error: 'No start codon (AUG) found',
52
+ protein: [],
53
+ length: 0
54
+ };
55
+ }
56
+
57
+ const start = startPos === -1 ? 0 : startPos;
58
+
59
+ // Translate codon by codon
60
+ let stoppedAt = -1;
61
+ for (let i = start; i <= rnaPrimes.length - 3; i += 3) {
62
+ const codon = rnaPrimes.slice(i, i + 3);
63
+
64
+ // Check for stop codon
65
+ if (this.isStopCodonPrimes(codon)) {
66
+ stoppedAt = i;
67
+ break;
68
+ }
69
+
70
+ const aa = this.translateCodonPrimes(codon);
71
+ if (aa !== null) {
72
+ aminoAcids.push(aa);
73
+ }
74
+ }
75
+
76
+ return {
77
+ success: true,
78
+ protein: aminoAcids,
79
+ length: aminoAcids.length,
80
+ startPosition: start,
81
+ stopPosition: stoppedAt,
82
+ readingFrame: start % 3
83
+ };
84
+ }
85
+
86
+ /**
87
+ * Translate a single codon (3 primes) to amino acid prime
88
+ */
89
+ translateCodonPrimes(codonPrimes) {
90
+ if (codonPrimes.length !== 3) return null;
91
+
92
+ // Convert primes to nucleotide string
93
+ const nucMap = { 7: 'A', 5: 'U', 11: 'G', 3: 'C' };
94
+ const codonStr = codonPrimes.map(p => nucMap[p] || '?').join('');
95
+
96
+ if (codonStr.includes('?')) return null;
97
+
98
+ const aa = this.geneticCode[codonStr];
99
+ if (!aa || aa === '*') return null;
100
+
101
+ return AMINO_ACID_PRIMES[aa];
102
+ }
103
+
104
+ /**
105
+ * Find start codon (AUG) position
106
+ */
107
+ findStartCodon(rnaPrimes) {
108
+ for (let i = 0; i <= rnaPrimes.length - 3; i++) {
109
+ if (rnaPrimes[i] === 7 && // A
110
+ rnaPrimes[i + 1] === 5 && // U
111
+ rnaPrimes[i + 2] === 11) { // G
112
+ return i;
113
+ }
114
+ }
115
+ return -1;
116
+ }
117
+
118
+ /**
119
+ * Find all start codons (for alternative translation initiation)
120
+ */
121
+ findAllStartCodons(rnaPrimes) {
122
+ const positions = [];
123
+ for (let i = 0; i <= rnaPrimes.length - 3; i++) {
124
+ if (rnaPrimes[i] === 7 &&
125
+ rnaPrimes[i + 1] === 5 &&
126
+ rnaPrimes[i + 2] === 11) {
127
+ positions.push(i);
128
+ }
129
+ }
130
+ return positions;
131
+ }
132
+
133
+ /**
134
+ * Check if codon primes represent a stop codon
135
+ */
136
+ isStopCodonPrimes(codonPrimes) {
137
+ for (const stop of this.stopCodonPrimes) {
138
+ if (codonPrimes[0] === stop[0] &&
139
+ codonPrimes[1] === stop[1] &&
140
+ codonPrimes[2] === stop[2]) {
141
+ return true;
142
+ }
143
+ }
144
+ return false;
145
+ }
146
+
147
+ /**
148
+ * Calculate entropy reduction from translation
149
+ * Translation compresses 64 states to 21 states per codon
150
+ */
151
+ entropyDelta(rnaPrimes) {
152
+ const codons = Math.floor(rnaPrimes.length / 3);
153
+ // Entropy reduction: log2(64) - log2(21) ≈ 1.6 bits per codon
154
+ return -codons * Math.log2(64 / 21);
155
+ }
156
+
157
+ /**
158
+ * Translate all three reading frames
159
+ */
160
+ translateAllFrames(rnaPrimes) {
161
+ return [0, 1, 2].map(frame => ({
162
+ frame,
163
+ ...this.apply(rnaPrimes.slice(frame), { force: true })
164
+ }));
165
+ }
166
+
167
+ /**
168
+ * Find longest open reading frame (ORF)
169
+ */
170
+ findLongestORF(rnaPrimes) {
171
+ let longestORF = null;
172
+
173
+ for (let frame = 0; frame < 3; frame++) {
174
+ const startCodons = this.findAllStartCodons(rnaPrimes.slice(frame));
175
+
176
+ for (const start of startCodons) {
177
+ const result = this.apply(rnaPrimes.slice(frame), { start });
178
+ if (!longestORF || result.length > longestORF.length) {
179
+ longestORF = {
180
+ ...result,
181
+ frame,
182
+ absoluteStart: frame + start
183
+ };
184
+ }
185
+ }
186
+ }
187
+
188
+ return longestORF;
189
+ }
190
+
191
+ /**
192
+ * Calculate molecular weight of protein
193
+ */
194
+ calculateMolecularWeight(proteinPrimes) {
195
+ const massMap = {
196
+ 23: 57.05, // G
197
+ 29: 71.08, // A
198
+ 31: 99.13, // V
199
+ 37: 113.16, // L
200
+ 41: 113.16, // I
201
+ 43: 131.20, // M
202
+ 47: 147.18, // F
203
+ 53: 186.21, // W
204
+ 59: 163.18, // Y
205
+ 61: 87.08, // S
206
+ 67: 101.11, // T
207
+ 71: 103.14, // C
208
+ 73: 114.10, // N
209
+ 79: 128.13, // Q
210
+ 83: 128.17, // K
211
+ 89: 156.19, // R
212
+ 97: 137.14, // H
213
+ 101: 115.09, // D
214
+ 103: 129.12, // E
215
+ 107: 97.12, // P
216
+ };
217
+
218
+ // Sum residue masses and subtract water for each peptide bond
219
+ const residueMass = proteinPrimes.reduce((sum, p) => sum + (massMap[p] || 0), 0);
220
+ const waterLoss = (proteinPrimes.length - 1) * 18.015;
221
+
222
+ return residueMass - waterLoss;
223
+ }
224
+
225
+ /**
226
+ * Calculate isoelectric point (pI) estimate
227
+ */
228
+ calculateIsoelectricPoint(proteinPrimes) {
229
+ // Count charged residues
230
+ const chargedResidues = {
231
+ positive: 0, // K, R, H
232
+ negative: 0, // D, E
233
+ };
234
+
235
+ for (const p of proteinPrimes) {
236
+ if (p === 83 || p === 89 || p === 97) chargedResidues.positive++;
237
+ if (p === 101 || p === 103) chargedResidues.negative++;
238
+ }
239
+
240
+ // Simple pI estimate based on charge balance
241
+ // Real pI calculation requires pKa values and Henderson-Hasselbalch
242
+ const netCharge = chargedResidues.positive - chargedResidues.negative;
243
+
244
+ if (netCharge > 0) return 8.0 + Math.log(netCharge + 1);
245
+ if (netCharge < 0) return 6.0 - Math.log(-netCharge + 1);
246
+ return 7.0;
247
+ }
248
+
249
+ /**
250
+ * Check for signal peptide (N-terminal hydrophobic region)
251
+ */
252
+ hasSignalPeptide(proteinPrimes, windowSize = 15) {
253
+ if (proteinPrimes.length < windowSize) return false;
254
+
255
+ // Signal peptides have hydrophobic N-terminus
256
+ // Small primes (23-43) are hydrophobic
257
+ const nTerminal = proteinPrimes.slice(0, windowSize);
258
+ const hydrophobicCount = nTerminal.filter(p => p >= 23 && p <= 43).length;
259
+
260
+ return hydrophobicCount >= windowSize * 0.6;
261
+ }
262
+ }
263
+
264
+ module.exports = { TranslationOperator };
package/backends/index.js CHANGED
@@ -6,10 +6,34 @@ const { Backend } = require('./interface');
6
6
  const { SemanticBackend } = require('./semantic');
7
7
  const { CryptographicBackend } = require('./cryptographic');
8
8
  const { ScientificBackend } = require('./scientific');
9
+ const bioinformatics = require('./bioinformatics');
9
10
 
10
11
  module.exports = {
11
12
  Backend,
12
13
  SemanticBackend,
13
14
  CryptographicBackend,
14
- ScientificBackend
15
+ ScientificBackend,
16
+
17
+ // Bioinformatics backend
18
+ BioinformaticsBackend: bioinformatics.BioinformaticsBackend,
19
+
20
+ // Bioinformatics operators
21
+ TranscriptionOperator: bioinformatics.TranscriptionOperator,
22
+ TranslationOperator: bioinformatics.TranslationOperator,
23
+ FoldingTransform: bioinformatics.FoldingTransform,
24
+ BindingAffinityCalculator: bioinformatics.BindingAffinityCalculator,
25
+ MolecularDocker: bioinformatics.MolecularDocker,
26
+
27
+ // DNA Computing
28
+ DNAStrand: bioinformatics.DNAStrand,
29
+ DNADuplex: bioinformatics.DNADuplex,
30
+ ANDGate: bioinformatics.ANDGate,
31
+ ORGate: bioinformatics.ORGate,
32
+ NOTGate: bioinformatics.NOTGate,
33
+ NANDGate: bioinformatics.NANDGate,
34
+ DNACircuit: bioinformatics.DNACircuit,
35
+ StrandDisplacementReaction: bioinformatics.StrandDisplacementReaction,
36
+
37
+ // Full bioinformatics module
38
+ bioinformatics
15
39
  };