@ruvector/rvdna 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.d.ts +95 -0
- package/index.js +175 -0
- package/package.json +58 -0
package/index.d.ts
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @ruvector/rvdna — AI-native genomic analysis and the .rvdna file format.
|
|
3
|
+
*
|
|
4
|
+
* Provides variant calling, protein translation, k-mer vector search,
|
|
5
|
+
* and the compact .rvdna binary format via Rust NAPI-RS bindings.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Encode a DNA string to 2-bit packed bytes (4 bases per byte).
|
|
10
|
+
* A=00, C=01, G=10, T=11. Ambiguous bases (N) map to A.
|
|
11
|
+
*/
|
|
12
|
+
export function encode2bit(sequence: string): Buffer;
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Decode 2-bit packed bytes back to a DNA string.
|
|
16
|
+
* @param buffer - The 2-bit packed buffer
|
|
17
|
+
* @param length - Number of bases to decode
|
|
18
|
+
*/
|
|
19
|
+
export function decode2bit(buffer: Buffer, length: number): string;
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Translate a DNA string to a protein amino acid string.
|
|
23
|
+
* Uses the standard genetic code. Stops at the first stop codon.
|
|
24
|
+
*/
|
|
25
|
+
export function translateDna(sequence: string): string;
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Compute cosine similarity between two numeric arrays.
|
|
29
|
+
* Returns a value between -1 and 1.
|
|
30
|
+
*/
|
|
31
|
+
export function cosineSimilarity(a: number[], b: number[]): number;
|
|
32
|
+
|
|
33
|
+
export interface RvdnaOptions {
|
|
34
|
+
/** K-mer size (default: 11) */
|
|
35
|
+
k?: number;
|
|
36
|
+
/** Vector dimensions (default: 512) */
|
|
37
|
+
dims?: number;
|
|
38
|
+
/** Block size in bases (default: 500) */
|
|
39
|
+
blockSize?: number;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Convert a FASTA sequence string to .rvdna binary format.
|
|
44
|
+
* Requires native bindings.
|
|
45
|
+
*/
|
|
46
|
+
export function fastaToRvdna(sequence: string, options?: RvdnaOptions): Buffer;
|
|
47
|
+
|
|
48
|
+
export interface RvdnaFile {
|
|
49
|
+
/** Format version */
|
|
50
|
+
version: number;
|
|
51
|
+
/** Sequence length in bases */
|
|
52
|
+
sequenceLength: number;
|
|
53
|
+
/** Decoded DNA sequence */
|
|
54
|
+
sequence: string;
|
|
55
|
+
/** Pre-computed k-mer vector blocks */
|
|
56
|
+
kmerVectors: Array<{
|
|
57
|
+
k: number;
|
|
58
|
+
dimensions: number;
|
|
59
|
+
startPos: number;
|
|
60
|
+
regionLen: number;
|
|
61
|
+
vector: Float32Array;
|
|
62
|
+
}>;
|
|
63
|
+
/** Variant positions and genotype likelihoods */
|
|
64
|
+
variants: Array<{
|
|
65
|
+
position: number;
|
|
66
|
+
refAllele: string;
|
|
67
|
+
altAllele: string;
|
|
68
|
+
likelihoods: [number, number, number];
|
|
69
|
+
quality: number;
|
|
70
|
+
}> | null;
|
|
71
|
+
/** Metadata key-value pairs */
|
|
72
|
+
metadata: Record<string, unknown> | null;
|
|
73
|
+
/** File statistics */
|
|
74
|
+
stats: {
|
|
75
|
+
totalSize: number;
|
|
76
|
+
bitsPerBase: number;
|
|
77
|
+
compressionRatio: number;
|
|
78
|
+
};
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
/**
|
|
82
|
+
* Read a .rvdna file from a Buffer. Returns parsed sections.
|
|
83
|
+
* Requires native bindings.
|
|
84
|
+
*/
|
|
85
|
+
export function readRvdna(buffer: Buffer): RvdnaFile;
|
|
86
|
+
|
|
87
|
+
/**
|
|
88
|
+
* Check if native bindings are available for the current platform.
|
|
89
|
+
*/
|
|
90
|
+
export function isNativeAvailable(): boolean;
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* Direct access to the native NAPI-RS module (null if not available).
|
|
94
|
+
*/
|
|
95
|
+
export const native: Record<string, Function> | null;
|
package/index.js
ADDED
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
const { platform, arch } = process;
|
|
2
|
+
|
|
3
|
+
// Platform-specific native binary packages
|
|
4
|
+
const platformMap = {
|
|
5
|
+
'linux': {
|
|
6
|
+
'x64': '@ruvector/rvdna-linux-x64-gnu',
|
|
7
|
+
'arm64': '@ruvector/rvdna-linux-arm64-gnu'
|
|
8
|
+
},
|
|
9
|
+
'darwin': {
|
|
10
|
+
'x64': '@ruvector/rvdna-darwin-x64',
|
|
11
|
+
'arm64': '@ruvector/rvdna-darwin-arm64'
|
|
12
|
+
},
|
|
13
|
+
'win32': {
|
|
14
|
+
'x64': '@ruvector/rvdna-win32-x64-msvc'
|
|
15
|
+
}
|
|
16
|
+
};
|
|
17
|
+
|
|
18
|
+
function loadNativeModule() {
|
|
19
|
+
const platformPackage = platformMap[platform]?.[arch];
|
|
20
|
+
|
|
21
|
+
if (!platformPackage) {
|
|
22
|
+
throw new Error(
|
|
23
|
+
`Unsupported platform: ${platform}-${arch}\n` +
|
|
24
|
+
`@ruvector/rvdna native bindings are available for:\n` +
|
|
25
|
+
`- Linux (x64, ARM64)\n` +
|
|
26
|
+
`- macOS (x64, ARM64)\n` +
|
|
27
|
+
`- Windows (x64)\n\n` +
|
|
28
|
+
`For other platforms, use the WASM build: npm install @ruvector/rvdna-wasm`
|
|
29
|
+
);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
try {
|
|
33
|
+
return require(platformPackage);
|
|
34
|
+
} catch (error) {
|
|
35
|
+
if (error.code === 'MODULE_NOT_FOUND') {
|
|
36
|
+
throw new Error(
|
|
37
|
+
`Native module not found for ${platform}-${arch}\n` +
|
|
38
|
+
`Please install: npm install ${platformPackage}\n` +
|
|
39
|
+
`Or reinstall @ruvector/rvdna to get optional dependencies`
|
|
40
|
+
);
|
|
41
|
+
}
|
|
42
|
+
throw error;
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
// Try native first, fall back to pure JS shim with basic functionality
|
|
47
|
+
let nativeModule;
|
|
48
|
+
try {
|
|
49
|
+
nativeModule = loadNativeModule();
|
|
50
|
+
} catch (e) {
|
|
51
|
+
// Native bindings not available — provide JS shim for basic operations
|
|
52
|
+
nativeModule = null;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// -------------------------------------------------------------------
|
|
56
|
+
// Public API — wraps native bindings or provides JS fallbacks
|
|
57
|
+
// -------------------------------------------------------------------
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* Encode a DNA string to 2-bit packed bytes (4 bases per byte).
|
|
61
|
+
* A=00, C=01, G=10, T=11. Returns a Buffer.
|
|
62
|
+
*/
|
|
63
|
+
function encode2bit(sequence) {
|
|
64
|
+
if (nativeModule?.encode2bit) return nativeModule.encode2bit(sequence);
|
|
65
|
+
|
|
66
|
+
// JS fallback
|
|
67
|
+
const map = { A: 0, C: 1, G: 2, T: 3, N: 0 };
|
|
68
|
+
const len = sequence.length;
|
|
69
|
+
const buf = Buffer.alloc(Math.ceil(len / 4));
|
|
70
|
+
for (let i = 0; i < len; i++) {
|
|
71
|
+
const byteIdx = i >> 2;
|
|
72
|
+
const bitOff = 6 - (i & 3) * 2;
|
|
73
|
+
buf[byteIdx] |= (map[sequence[i]] || 0) << bitOff;
|
|
74
|
+
}
|
|
75
|
+
return buf;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
* Decode 2-bit packed bytes back to a DNA string.
|
|
80
|
+
*/
|
|
81
|
+
function decode2bit(buffer, length) {
|
|
82
|
+
if (nativeModule?.decode2bit) return nativeModule.decode2bit(buffer, length);
|
|
83
|
+
|
|
84
|
+
const bases = ['A', 'C', 'G', 'T'];
|
|
85
|
+
let result = '';
|
|
86
|
+
for (let i = 0; i < length; i++) {
|
|
87
|
+
const byteIdx = i >> 2;
|
|
88
|
+
const bitOff = 6 - (i & 3) * 2;
|
|
89
|
+
result += bases[(buffer[byteIdx] >> bitOff) & 3];
|
|
90
|
+
}
|
|
91
|
+
return result;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
/**
|
|
95
|
+
* Translate a DNA string to a protein amino acid string.
|
|
96
|
+
*/
|
|
97
|
+
function translateDna(sequence) {
|
|
98
|
+
if (nativeModule?.translateDna) return nativeModule.translateDna(sequence);
|
|
99
|
+
|
|
100
|
+
// JS fallback — standard genetic code
|
|
101
|
+
const codons = {
|
|
102
|
+
'TTT':'F','TTC':'F','TTA':'L','TTG':'L','CTT':'L','CTC':'L','CTA':'L','CTG':'L',
|
|
103
|
+
'ATT':'I','ATC':'I','ATA':'I','ATG':'M','GTT':'V','GTC':'V','GTA':'V','GTG':'V',
|
|
104
|
+
'TCT':'S','TCC':'S','TCA':'S','TCG':'S','CCT':'P','CCC':'P','CCA':'P','CCG':'P',
|
|
105
|
+
'ACT':'T','ACC':'T','ACA':'T','ACG':'T','GCT':'A','GCC':'A','GCA':'A','GCG':'A',
|
|
106
|
+
'TAT':'Y','TAC':'Y','TAA':'*','TAG':'*','CAT':'H','CAC':'H','CAA':'Q','CAG':'Q',
|
|
107
|
+
'AAT':'N','AAC':'N','AAA':'K','AAG':'K','GAT':'D','GAC':'D','GAA':'E','GAG':'E',
|
|
108
|
+
'TGT':'C','TGC':'C','TGA':'*','TGG':'W','CGT':'R','CGC':'R','CGA':'R','CGG':'R',
|
|
109
|
+
'AGT':'S','AGC':'S','AGA':'R','AGG':'R','GGT':'G','GGC':'G','GGA':'G','GGG':'G',
|
|
110
|
+
};
|
|
111
|
+
let protein = '';
|
|
112
|
+
for (let i = 0; i + 2 < sequence.length; i += 3) {
|
|
113
|
+
const codon = sequence.slice(i, i + 3).toUpperCase();
|
|
114
|
+
const aa = codons[codon] || 'X';
|
|
115
|
+
if (aa === '*') break;
|
|
116
|
+
protein += aa;
|
|
117
|
+
}
|
|
118
|
+
return protein;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
/**
|
|
122
|
+
* Compute cosine similarity between two numeric arrays.
|
|
123
|
+
*/
|
|
124
|
+
function cosineSimilarity(a, b) {
|
|
125
|
+
if (nativeModule?.cosineSimilarity) return nativeModule.cosineSimilarity(a, b);
|
|
126
|
+
|
|
127
|
+
let dot = 0, magA = 0, magB = 0;
|
|
128
|
+
for (let i = 0; i < a.length; i++) {
|
|
129
|
+
dot += a[i] * b[i];
|
|
130
|
+
magA += a[i] * a[i];
|
|
131
|
+
magB += b[i] * b[i];
|
|
132
|
+
}
|
|
133
|
+
magA = Math.sqrt(magA);
|
|
134
|
+
magB = Math.sqrt(magB);
|
|
135
|
+
return (magA && magB) ? dot / (magA * magB) : 0;
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
/**
|
|
139
|
+
* Convert a FASTA sequence string to .rvdna binary format.
|
|
140
|
+
* Returns a Buffer with the complete .rvdna file contents.
|
|
141
|
+
*/
|
|
142
|
+
function fastaToRvdna(sequence, options = {}) {
|
|
143
|
+
if (nativeModule?.fastaToRvdna) {
|
|
144
|
+
return nativeModule.fastaToRvdna(sequence, options.k || 11, options.dims || 512, options.blockSize || 500);
|
|
145
|
+
}
|
|
146
|
+
throw new Error('fastaToRvdna requires native bindings. Install the platform-specific package.');
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
/**
|
|
150
|
+
* Read a .rvdna file from a Buffer. Returns parsed sections.
|
|
151
|
+
*/
|
|
152
|
+
function readRvdna(buffer) {
|
|
153
|
+
if (nativeModule?.readRvdna) return nativeModule.readRvdna(buffer);
|
|
154
|
+
throw new Error('readRvdna requires native bindings. Install the platform-specific package.');
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
/**
|
|
158
|
+
* Check if native bindings are available.
|
|
159
|
+
*/
|
|
160
|
+
function isNativeAvailable() {
|
|
161
|
+
return nativeModule !== null;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
module.exports = {
|
|
165
|
+
encode2bit,
|
|
166
|
+
decode2bit,
|
|
167
|
+
translateDna,
|
|
168
|
+
cosineSimilarity,
|
|
169
|
+
fastaToRvdna,
|
|
170
|
+
readRvdna,
|
|
171
|
+
isNativeAvailable,
|
|
172
|
+
|
|
173
|
+
// Re-export native module for advanced use
|
|
174
|
+
native: nativeModule,
|
|
175
|
+
};
|
package/package.json
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@ruvector/rvdna",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "rvDNA — AI-native genomic analysis and the .rvdna file format. Variant calling, protein prediction, and HNSW vector search powered by Rust via NAPI-RS.",
|
|
5
|
+
"main": "index.js",
|
|
6
|
+
"types": "index.d.ts",
|
|
7
|
+
"author": "rUv <info@ruv.io> (https://ruv.io)",
|
|
8
|
+
"homepage": "https://github.com/ruvnet/ruvector/tree/main/examples/dna",
|
|
9
|
+
"repository": {
|
|
10
|
+
"type": "git",
|
|
11
|
+
"url": "https://github.com/ruvnet/ruvector.git",
|
|
12
|
+
"directory": "npm/packages/rvdna"
|
|
13
|
+
},
|
|
14
|
+
"bugs": {
|
|
15
|
+
"url": "https://github.com/ruvnet/ruvector/issues"
|
|
16
|
+
},
|
|
17
|
+
"license": "MIT",
|
|
18
|
+
"engines": {
|
|
19
|
+
"node": ">=18.0.0"
|
|
20
|
+
},
|
|
21
|
+
"files": [
|
|
22
|
+
"index.js",
|
|
23
|
+
"index.d.ts",
|
|
24
|
+
"README.md"
|
|
25
|
+
],
|
|
26
|
+
"scripts": {
|
|
27
|
+
"build:napi": "napi build --platform --release --cargo-cwd ../../../examples/dna",
|
|
28
|
+
"test": "node test.js"
|
|
29
|
+
},
|
|
30
|
+
"devDependencies": {
|
|
31
|
+
"@napi-rs/cli": "^2.18.0"
|
|
32
|
+
},
|
|
33
|
+
"optionalDependencies": {
|
|
34
|
+
"@ruvector/rvdna-linux-x64-gnu": "0.1.0",
|
|
35
|
+
"@ruvector/rvdna-linux-arm64-gnu": "0.1.0",
|
|
36
|
+
"@ruvector/rvdna-darwin-x64": "0.1.0",
|
|
37
|
+
"@ruvector/rvdna-darwin-arm64": "0.1.0",
|
|
38
|
+
"@ruvector/rvdna-win32-x64-msvc": "0.1.0"
|
|
39
|
+
},
|
|
40
|
+
"publishConfig": {
|
|
41
|
+
"access": "public"
|
|
42
|
+
},
|
|
43
|
+
"keywords": [
|
|
44
|
+
"genomics",
|
|
45
|
+
"bioinformatics",
|
|
46
|
+
"dna",
|
|
47
|
+
"rvdna",
|
|
48
|
+
"variant-calling",
|
|
49
|
+
"protein",
|
|
50
|
+
"hnsw",
|
|
51
|
+
"vector-search",
|
|
52
|
+
"napi",
|
|
53
|
+
"rust",
|
|
54
|
+
"wasm",
|
|
55
|
+
"ai",
|
|
56
|
+
"machine-learning"
|
|
57
|
+
]
|
|
58
|
+
}
|