@holoscript/holoembed 6.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/dist/index.cjs +580 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +321 -0
- package/dist/index.d.ts +321 -0
- package/dist/index.js +567 -0
- package/dist/index.js.map +1 -0
- package/package.json +70 -0
package/dist/index.js
ADDED
|
@@ -0,0 +1,567 @@
|
|
|
1
|
+
import { ensureNodeWebGpu } from '@holoscript/snn-webgpu';
|
|
2
|
+
|
|
3
|
+
// src/charTrigram.ts
|
|
4
|
+
function camelSplit(s) {
|
|
5
|
+
return s.replace(/([a-z])([A-Z])/g, "$1 $2").replace(/([A-Z]+)([A-Z][a-z])/g, "$1 $2").replace(/[^a-zA-Z0-9]+/g, " ").trim().toLowerCase();
|
|
6
|
+
}
|
|
7
|
+
function trigramHistogram(text, vec, offset, bins) {
|
|
8
|
+
const clean = camelSplit(text);
|
|
9
|
+
if (clean.length < 3) return;
|
|
10
|
+
const counts = new Float32Array(bins);
|
|
11
|
+
let total = 0;
|
|
12
|
+
for (let i = 0; i <= clean.length - 3; i++) {
|
|
13
|
+
const a = clean[i];
|
|
14
|
+
const b = clean[i + 1];
|
|
15
|
+
const c = clean[i + 2];
|
|
16
|
+
if (a === " " || b === " " || c === " ") continue;
|
|
17
|
+
let h = 2166136261;
|
|
18
|
+
h ^= a.charCodeAt(0);
|
|
19
|
+
h = h * 16777619 >>> 0;
|
|
20
|
+
h ^= b.charCodeAt(0);
|
|
21
|
+
h = h * 16777619 >>> 0;
|
|
22
|
+
h ^= c.charCodeAt(0);
|
|
23
|
+
h = h * 16777619 >>> 0;
|
|
24
|
+
counts[h % bins]++;
|
|
25
|
+
total++;
|
|
26
|
+
}
|
|
27
|
+
if (total > 0) {
|
|
28
|
+
for (let i = 0; i < bins; i++) {
|
|
29
|
+
vec[offset + i] = counts[i] / total;
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
function hashString(s) {
|
|
34
|
+
let h = 2166136261;
|
|
35
|
+
for (let i = 0; i < s.length; i++) {
|
|
36
|
+
h ^= s.charCodeAt(i);
|
|
37
|
+
h = h * 16777619 >>> 0;
|
|
38
|
+
}
|
|
39
|
+
return h;
|
|
40
|
+
}
|
|
41
|
+
function spreadHash(hash, vec, offset, count) {
|
|
42
|
+
let state = hash;
|
|
43
|
+
for (let i = 0; i < count; i++) {
|
|
44
|
+
state = state * 1664525 + 1013904223 >>> 0;
|
|
45
|
+
vec[offset + i] = (state >>> 0) / 4294967295;
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
function l2Normalize(vec) {
|
|
49
|
+
let norm = 0;
|
|
50
|
+
for (let i = 0; i < vec.length; i++) norm += vec[i] * vec[i];
|
|
51
|
+
norm = Math.sqrt(norm);
|
|
52
|
+
if (norm > 0) {
|
|
53
|
+
for (let i = 0; i < vec.length; i++) vec[i] /= norm;
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
var LIF_POPULATION_WGSL = (
|
|
57
|
+
/* wgsl */
|
|
58
|
+
`
|
|
59
|
+
struct LIFParams {
|
|
60
|
+
tau : f32, // membrane time constant (ms). Default 20.0
|
|
61
|
+
vThreshold: f32, // spike threshold (mV). Default -55.0
|
|
62
|
+
vReset : f32, // post-spike reset (mV). Default -75.0
|
|
63
|
+
vRest : f32, // resting potential (mV). Default -65.0
|
|
64
|
+
dt : f32, // timestep (ms). Default 1.0
|
|
65
|
+
timeSteps : u32, // number of LIF iterations. Default 50
|
|
66
|
+
neuronCount: u32, // number of neurons (= input dims)
|
|
67
|
+
currentScale: f32, // scale factor for current injection. Default 240.0
|
|
68
|
+
};
|
|
69
|
+
|
|
70
|
+
@group(0) @binding(0) var<uniform> params : LIFParams;
|
|
71
|
+
@group(0) @binding(1) var<storage, read> currents: array<f32>; // [neuronCount] in [0,1]
|
|
72
|
+
@group(0) @binding(2) var<storage, read_write> rates: array<f32>; // [neuronCount] out
|
|
73
|
+
|
|
74
|
+
@compute @workgroup_size(64)
|
|
75
|
+
fn main(@builtin(global_invocation_id) gid: vec3<u32>) {
|
|
76
|
+
let i = gid.x;
|
|
77
|
+
if (i >= params.neuronCount) { return; }
|
|
78
|
+
|
|
79
|
+
let I_ext = currents[i] * params.currentScale; // scale to mV-equivalent current
|
|
80
|
+
var V = params.vRest;
|
|
81
|
+
var spikes: u32 = 0u;
|
|
82
|
+
|
|
83
|
+
for (var t = 0u; t < params.timeSteps; t++) {
|
|
84
|
+
// Euler LIF: dV/dt = (vRest - V + I_ext) / tau
|
|
85
|
+
V += params.dt * (params.vRest - V + I_ext) / params.tau;
|
|
86
|
+
if (V >= params.vThreshold) {
|
|
87
|
+
V = params.vReset;
|
|
88
|
+
spikes += 1u;
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
// Spike rate: fraction of timesteps with a spike
|
|
93
|
+
rates[i] = f32(spikes) / f32(params.timeSteps);
|
|
94
|
+
}
|
|
95
|
+
`
|
|
96
|
+
);
|
|
97
|
+
var DEFAULT_LIF = {
|
|
98
|
+
tau: 20,
|
|
99
|
+
vThreshold: -55,
|
|
100
|
+
vReset: -75,
|
|
101
|
+
vRest: -65,
|
|
102
|
+
dt: 1,
|
|
103
|
+
currentScale: 240
|
|
104
|
+
};
|
|
105
|
+
function encodeLifPopulationCpu(histogram, options = {}) {
|
|
106
|
+
const timeSteps = normalizeTimeSteps(options.timeSteps ?? 50);
|
|
107
|
+
const lif = resolveLifPopulationParams(options.lifParams);
|
|
108
|
+
const rates = new Float32Array(histogram.length);
|
|
109
|
+
for (let i = 0; i < histogram.length; i++) {
|
|
110
|
+
const input = histogram[i] ?? 0;
|
|
111
|
+
const current = input * lif.currentScale;
|
|
112
|
+
let voltage = lif.vRest;
|
|
113
|
+
let spikes = 0;
|
|
114
|
+
for (let t = 0; t < timeSteps; t++) {
|
|
115
|
+
voltage += lif.dt * (lif.vRest - voltage + current) / lif.tau;
|
|
116
|
+
if (voltage >= lif.vThreshold) {
|
|
117
|
+
voltage = lif.vReset;
|
|
118
|
+
spikes++;
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
rates[i] = spikes / timeSteps;
|
|
122
|
+
}
|
|
123
|
+
return rates;
|
|
124
|
+
}
|
|
125
|
+
function resolveLifPopulationParams(lifParams = {}) {
|
|
126
|
+
return { ...DEFAULT_LIF, ...lifParams };
|
|
127
|
+
}
|
|
128
|
+
function normalizeTimeSteps(value) {
|
|
129
|
+
if (!Number.isFinite(value) || value < 1) return 1;
|
|
130
|
+
return Math.floor(value);
|
|
131
|
+
}
|
|
132
|
+
var SnnAccelerator = class {
|
|
133
|
+
constructor() {
|
|
134
|
+
this._available = false;
|
|
135
|
+
this._timeSteps = 50;
|
|
136
|
+
this._lif = { ...DEFAULT_LIF };
|
|
137
|
+
}
|
|
138
|
+
// ── Public ─────────────────────────────────────────────────────────────
|
|
139
|
+
/** Whether the GPU path is active. In Node, requires the `webgpu` binding (auto-activated via ensureNodeWebGpu). */
|
|
140
|
+
get available() {
|
|
141
|
+
return this._available;
|
|
142
|
+
}
|
|
143
|
+
/**
|
|
144
|
+
* Initialize the accelerator.
|
|
145
|
+
* Detects WebGPU, compiles the LIF shader, allocates uniform buffers.
|
|
146
|
+
* Safe to call multiple times; no-op after first successful init.
|
|
147
|
+
*/
|
|
148
|
+
async initialize(opts = {}, lifParams = {}) {
|
|
149
|
+
if (this._available) return;
|
|
150
|
+
this._timeSteps = normalizeTimeSteps(opts.snnTimesteps ?? 50);
|
|
151
|
+
this._lif = resolveLifPopulationParams(lifParams);
|
|
152
|
+
const enabled = opts.enableSnn !== false;
|
|
153
|
+
if (!enabled) return;
|
|
154
|
+
await ensureNodeWebGpu();
|
|
155
|
+
const gpu = globalThis.navigator?.gpu ?? globalThis.GPU;
|
|
156
|
+
if (!gpu) return;
|
|
157
|
+
try {
|
|
158
|
+
const adapter = await gpu.requestAdapter();
|
|
159
|
+
if (!adapter) return;
|
|
160
|
+
this._device = await adapter.requestDevice();
|
|
161
|
+
await this._compilePipeline();
|
|
162
|
+
this._available = true;
|
|
163
|
+
} catch {
|
|
164
|
+
this._device = void 0;
|
|
165
|
+
this._available = false;
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
/**
|
|
169
|
+
* Encode a 128-dim trigram histogram through the LIF population.
|
|
170
|
+
*
|
|
171
|
+
* Returns a 128-dim spike-rate vector.
|
|
172
|
+
* If GPU is unavailable, returns the input histogram unchanged (CPU passthrough).
|
|
173
|
+
*/
|
|
174
|
+
async encode(histogram) {
|
|
175
|
+
if (!this._available || !this._device) {
|
|
176
|
+
return histogram;
|
|
177
|
+
}
|
|
178
|
+
return this._gpuEncode(histogram);
|
|
179
|
+
}
|
|
180
|
+
/**
|
|
181
|
+
* Encode a batch of histograms. Amortizes GPU round-trip cost.
|
|
182
|
+
* Falls back to sequential CPU passthrough if GPU unavailable.
|
|
183
|
+
*/
|
|
184
|
+
async encodeBatch(histograms) {
|
|
185
|
+
if (!this._available || !this._device) {
|
|
186
|
+
return histograms;
|
|
187
|
+
}
|
|
188
|
+
if (histograms.length === 0) return [];
|
|
189
|
+
const n = histograms[0].length;
|
|
190
|
+
const uniform = histograms.every((h) => h.length === n);
|
|
191
|
+
if (uniform) return this._gpuEncodeBatch(histograms, n);
|
|
192
|
+
return Promise.all(histograms.map((h) => this._gpuEncode(h)));
|
|
193
|
+
}
|
|
194
|
+
/** Release GPU resources. */
|
|
195
|
+
dispose() {
|
|
196
|
+
this._paramsBuffer?.destroy();
|
|
197
|
+
this._device?.destroy();
|
|
198
|
+
this._device = void 0;
|
|
199
|
+
this._pipeline = void 0;
|
|
200
|
+
this._paramsBuffer = void 0;
|
|
201
|
+
this._available = false;
|
|
202
|
+
}
|
|
203
|
+
// ── Private ──────────────────────────────────────────────────────────────
|
|
204
|
+
async _compilePipeline() {
|
|
205
|
+
const device = this._device;
|
|
206
|
+
const module = device.createShaderModule({ code: LIF_POPULATION_WGSL });
|
|
207
|
+
this._pipeline = await device.createComputePipelineAsync({
|
|
208
|
+
layout: "auto",
|
|
209
|
+
compute: { module, entryPoint: "main" }
|
|
210
|
+
});
|
|
211
|
+
this._paramsBuffer = device.createBuffer({
|
|
212
|
+
size: 32,
|
|
213
|
+
// 8 × f32/u32
|
|
214
|
+
usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST
|
|
215
|
+
});
|
|
216
|
+
const paramData = new ArrayBuffer(32);
|
|
217
|
+
const f32 = new Float32Array(paramData);
|
|
218
|
+
const u32 = new Uint32Array(paramData);
|
|
219
|
+
f32[0] = this._lif.tau;
|
|
220
|
+
f32[1] = this._lif.vThreshold;
|
|
221
|
+
f32[2] = this._lif.vReset;
|
|
222
|
+
f32[3] = this._lif.vRest;
|
|
223
|
+
f32[4] = this._lif.dt;
|
|
224
|
+
u32[5] = this._timeSteps;
|
|
225
|
+
f32[7] = this._lif.currentScale;
|
|
226
|
+
device.queue.writeBuffer(this._paramsBuffer, 0, paramData);
|
|
227
|
+
}
|
|
228
|
+
async _gpuEncode(histogram) {
|
|
229
|
+
const device = this._device;
|
|
230
|
+
const pipeline = this._pipeline;
|
|
231
|
+
const paramsBuffer = this._paramsBuffer;
|
|
232
|
+
const neuronCount = histogram.length;
|
|
233
|
+
const neuronCountBuf = new Uint32Array([neuronCount]);
|
|
234
|
+
device.queue.writeBuffer(paramsBuffer, 24, neuronCountBuf);
|
|
235
|
+
const inputBuffer = device.createBuffer({
|
|
236
|
+
size: neuronCount * 4,
|
|
237
|
+
usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST,
|
|
238
|
+
mappedAtCreation: true
|
|
239
|
+
});
|
|
240
|
+
new Float32Array(inputBuffer.getMappedRange()).set(histogram);
|
|
241
|
+
inputBuffer.unmap();
|
|
242
|
+
const outputBuffer = device.createBuffer({
|
|
243
|
+
size: neuronCount * 4,
|
|
244
|
+
usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC
|
|
245
|
+
});
|
|
246
|
+
const stagingBuffer = device.createBuffer({
|
|
247
|
+
size: neuronCount * 4,
|
|
248
|
+
usage: GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST
|
|
249
|
+
});
|
|
250
|
+
const bindGroup = device.createBindGroup({
|
|
251
|
+
layout: pipeline.getBindGroupLayout(0),
|
|
252
|
+
entries: [
|
|
253
|
+
{ binding: 0, resource: { buffer: paramsBuffer } },
|
|
254
|
+
{ binding: 1, resource: { buffer: inputBuffer } },
|
|
255
|
+
{ binding: 2, resource: { buffer: outputBuffer } }
|
|
256
|
+
]
|
|
257
|
+
});
|
|
258
|
+
const encoder = device.createCommandEncoder();
|
|
259
|
+
const pass = encoder.beginComputePass();
|
|
260
|
+
pass.setPipeline(pipeline);
|
|
261
|
+
pass.setBindGroup(0, bindGroup);
|
|
262
|
+
pass.dispatchWorkgroups(Math.ceil(neuronCount / 64));
|
|
263
|
+
pass.end();
|
|
264
|
+
encoder.copyBufferToBuffer(outputBuffer, 0, stagingBuffer, 0, neuronCount * 4);
|
|
265
|
+
device.queue.submit([encoder.finish()]);
|
|
266
|
+
await stagingBuffer.mapAsync(GPUMapMode.READ);
|
|
267
|
+
const result = new Float32Array(stagingBuffer.getMappedRange().slice(0));
|
|
268
|
+
stagingBuffer.unmap();
|
|
269
|
+
inputBuffer.destroy();
|
|
270
|
+
outputBuffer.destroy();
|
|
271
|
+
stagingBuffer.destroy();
|
|
272
|
+
return result;
|
|
273
|
+
}
|
|
274
|
+
/**
|
|
275
|
+
* Fused batch encode: M histograms of equal length N processed in a SINGLE
|
|
276
|
+
* dispatch + single readback. Because the LIF update is per-element independent,
|
|
277
|
+
* the existing shader runs unchanged over a flattened (M*N) array; each thread
|
|
278
|
+
* encodes one (histogram, neuron) pair. This removes the M x buffer-create and
|
|
279
|
+
* M x mapAsync round-trips that made the per-item path slower than CPU.
|
|
280
|
+
*/
|
|
281
|
+
async _gpuEncodeBatch(histograms, n) {
|
|
282
|
+
const device = this._device;
|
|
283
|
+
const pipeline = this._pipeline;
|
|
284
|
+
const paramsBuffer = this._paramsBuffer;
|
|
285
|
+
const m = histograms.length;
|
|
286
|
+
const total = m * n;
|
|
287
|
+
device.queue.writeBuffer(paramsBuffer, 24, new Uint32Array([total]));
|
|
288
|
+
const flat = new Float32Array(total);
|
|
289
|
+
for (let j = 0; j < m; j++) flat.set(histograms[j], j * n);
|
|
290
|
+
const inputBuffer = device.createBuffer({
|
|
291
|
+
size: total * 4,
|
|
292
|
+
usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST,
|
|
293
|
+
mappedAtCreation: true
|
|
294
|
+
});
|
|
295
|
+
new Float32Array(inputBuffer.getMappedRange()).set(flat);
|
|
296
|
+
inputBuffer.unmap();
|
|
297
|
+
const outputBuffer = device.createBuffer({
|
|
298
|
+
size: total * 4,
|
|
299
|
+
usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC
|
|
300
|
+
});
|
|
301
|
+
const stagingBuffer = device.createBuffer({
|
|
302
|
+
size: total * 4,
|
|
303
|
+
usage: GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST
|
|
304
|
+
});
|
|
305
|
+
const bindGroup = device.createBindGroup({
|
|
306
|
+
layout: pipeline.getBindGroupLayout(0),
|
|
307
|
+
entries: [
|
|
308
|
+
{ binding: 0, resource: { buffer: paramsBuffer } },
|
|
309
|
+
{ binding: 1, resource: { buffer: inputBuffer } },
|
|
310
|
+
{ binding: 2, resource: { buffer: outputBuffer } }
|
|
311
|
+
]
|
|
312
|
+
});
|
|
313
|
+
const encoder = device.createCommandEncoder();
|
|
314
|
+
const pass = encoder.beginComputePass();
|
|
315
|
+
pass.setPipeline(pipeline);
|
|
316
|
+
pass.setBindGroup(0, bindGroup);
|
|
317
|
+
pass.dispatchWorkgroups(Math.ceil(total / 64));
|
|
318
|
+
pass.end();
|
|
319
|
+
encoder.copyBufferToBuffer(outputBuffer, 0, stagingBuffer, 0, total * 4);
|
|
320
|
+
device.queue.submit([encoder.finish()]);
|
|
321
|
+
await stagingBuffer.mapAsync(GPUMapMode.READ);
|
|
322
|
+
const all = new Float32Array(stagingBuffer.getMappedRange().slice(0));
|
|
323
|
+
stagingBuffer.unmap();
|
|
324
|
+
inputBuffer.destroy();
|
|
325
|
+
outputBuffer.destroy();
|
|
326
|
+
stagingBuffer.destroy();
|
|
327
|
+
const out = new Array(m);
|
|
328
|
+
for (let j = 0; j < m; j++) out[j] = all.slice(j * n, j * n + n);
|
|
329
|
+
return out;
|
|
330
|
+
}
|
|
331
|
+
};
|
|
332
|
+
|
|
333
|
+
// src/types.ts
|
|
334
|
+
var HOLOEMBED_DIM = 768;
|
|
335
|
+
var STRUCTURAL_DIM = 384;
|
|
336
|
+
var SUBWORD_BINS = 128;
|
|
337
|
+
var SUBWORD_BLOCKS = 3;
|
|
338
|
+
|
|
339
|
+
// src/HoloEmbedEncoder.ts
|
|
340
|
+
var KNOWN_PACKAGES = [
|
|
341
|
+
"packages/core",
|
|
342
|
+
"packages/mcp-server",
|
|
343
|
+
"packages/plugins",
|
|
344
|
+
"packages/absorb-service",
|
|
345
|
+
"packages/studio",
|
|
346
|
+
"packages/r3f-renderer"
|
|
347
|
+
];
|
|
348
|
+
var HoloEmbedEncoder = class {
|
|
349
|
+
constructor() {
|
|
350
|
+
this._accel = new SnnAccelerator();
|
|
351
|
+
this._snnEnabled = false;
|
|
352
|
+
}
|
|
353
|
+
/**
|
|
354
|
+
* Initialize the encoder.
|
|
355
|
+
* Attempts SNN-WebGPU setup if `enableSnn` is true (default).
|
|
356
|
+
* Falls back to CPU encoding if GPU is unavailable — always safe to await.
|
|
357
|
+
*/
|
|
358
|
+
async initialize(opts = {}) {
|
|
359
|
+
await this._accel.initialize(opts);
|
|
360
|
+
this._snnEnabled = this._accel.available;
|
|
361
|
+
}
|
|
362
|
+
/** Whether the GPU SNN path is active. */
|
|
363
|
+
get snnActive() {
|
|
364
|
+
return this._snnEnabled;
|
|
365
|
+
}
|
|
366
|
+
/**
|
|
367
|
+
* Encode a symbol to a 768-dim L2-normalized Float32Array.
|
|
368
|
+
*
|
|
369
|
+
* Synchronous when SNN is disabled (CPU-only).
|
|
370
|
+
* Returns a Float32Array that can be passed to cosine-similarity search.
|
|
371
|
+
*/
|
|
372
|
+
encode(sym, graph = {}) {
|
|
373
|
+
const vec = new Float32Array(HOLOEMBED_DIM);
|
|
374
|
+
this._fillStructural(vec, sym, graph);
|
|
375
|
+
this._fillSubword(vec, sym, graph);
|
|
376
|
+
l2Normalize(vec);
|
|
377
|
+
return vec;
|
|
378
|
+
}
|
|
379
|
+
/**
|
|
380
|
+
* Async encode — uses SNN population coding for the trigram blocks when GPU is active.
|
|
381
|
+
* Identical to `encode()` when GPU is unavailable.
|
|
382
|
+
*/
|
|
383
|
+
async encodeAsync(sym, graph = {}) {
|
|
384
|
+
const vec = new Float32Array(HOLOEMBED_DIM);
|
|
385
|
+
this._fillStructural(vec, sym, graph);
|
|
386
|
+
if (this._snnEnabled) {
|
|
387
|
+
await this._fillSubwordSnn(vec, sym, graph);
|
|
388
|
+
} else {
|
|
389
|
+
this._fillSubword(vec, sym, graph);
|
|
390
|
+
}
|
|
391
|
+
l2Normalize(vec);
|
|
392
|
+
return vec;
|
|
393
|
+
}
|
|
394
|
+
/**
|
|
395
|
+
* Encode a plain text string to a 768-dim vector.
|
|
396
|
+
* For use with NL queries — fills the structural + name/sig trigram blocks.
|
|
397
|
+
*/
|
|
398
|
+
encodeText(text) {
|
|
399
|
+
const vec = new Float32Array(HOLOEMBED_DIM);
|
|
400
|
+
this._fillStructuralFromText(vec, text);
|
|
401
|
+
trigramHistogram(text, vec, STRUCTURAL_DIM, SUBWORD_BINS);
|
|
402
|
+
l2Normalize(vec);
|
|
403
|
+
return vec;
|
|
404
|
+
}
|
|
405
|
+
/**
|
|
406
|
+
* Batch-encode text strings. Amortizes SNN GPU round-trip when GPU is active.
|
|
407
|
+
*/
|
|
408
|
+
async encodeTexts(texts) {
|
|
409
|
+
if (!this._snnEnabled) {
|
|
410
|
+
return texts.map((t) => this.encodeText(t));
|
|
411
|
+
}
|
|
412
|
+
const histograms = texts.map((t) => {
|
|
413
|
+
const hist = new Float32Array(SUBWORD_BINS);
|
|
414
|
+
const tmp = new Float32Array(HOLOEMBED_DIM);
|
|
415
|
+
trigramHistogram(t, tmp, STRUCTURAL_DIM, SUBWORD_BINS);
|
|
416
|
+
hist.set(tmp.slice(STRUCTURAL_DIM, STRUCTURAL_DIM + SUBWORD_BINS));
|
|
417
|
+
return hist;
|
|
418
|
+
});
|
|
419
|
+
const snnRates = await this._accel.encodeBatch(histograms);
|
|
420
|
+
return texts.map((t, i) => {
|
|
421
|
+
const vec = new Float32Array(HOLOEMBED_DIM);
|
|
422
|
+
this._fillStructuralFromText(vec, t);
|
|
423
|
+
vec.set(snnRates[i], STRUCTURAL_DIM);
|
|
424
|
+
l2Normalize(vec);
|
|
425
|
+
return vec;
|
|
426
|
+
});
|
|
427
|
+
}
|
|
428
|
+
/** Release GPU resources. */
|
|
429
|
+
dispose() {
|
|
430
|
+
this._accel.dispose();
|
|
431
|
+
}
|
|
432
|
+
// ── Private: structural base ──────────────────────────────────────────────
|
|
433
|
+
_fillStructural(vec, sym, graph) {
|
|
434
|
+
const fp = sym.filePath.replace(/\\/g, "/");
|
|
435
|
+
for (let i = 0; i < KNOWN_PACKAGES.length; i++) {
|
|
436
|
+
vec[i] = fp.includes(KNOWN_PACKAGES[i]) ? 1 : 0;
|
|
437
|
+
}
|
|
438
|
+
vec[6] = fp.includes("__tests__") || fp.includes(".test.") || fp.includes(".spec.") ? 1 : 0;
|
|
439
|
+
vec[7] = fp.includes("/traits/") ? 1 : 0;
|
|
440
|
+
vec[8] = fp.includes("/adapters/") ? 1 : 0;
|
|
441
|
+
vec[9] = fp.includes("/providers/") ? 1 : 0;
|
|
442
|
+
const depth = (fp.match(/\//g) ?? []).length;
|
|
443
|
+
vec[10] = Math.min(depth / 8, 1);
|
|
444
|
+
spreadHash(hashString(fp), vec, 11, 117);
|
|
445
|
+
vec[128] = symbolTypeScore(sym.type);
|
|
446
|
+
vec[129] = visibilityScore(sym.visibility);
|
|
447
|
+
vec[130] = sym.isExported ? 1 : 0;
|
|
448
|
+
vec[131] = sym.signature ? Math.min(countParams(sym.signature) / 10, 1) : 0;
|
|
449
|
+
vec[132] = sym.signature?.includes(":") ? 1 : 0;
|
|
450
|
+
vec[133] = Math.min((sym.lineCount ?? 0) / 200, 1);
|
|
451
|
+
vec[134] = sym.owner ? 1 : 0;
|
|
452
|
+
spreadHash(hashString(sym.signature ?? sym.name), vec, 135, 57);
|
|
453
|
+
vec[192] = Math.min((graph.fanIn ?? 0) / 20, 1);
|
|
454
|
+
vec[193] = Math.min((graph.fanOut ?? 0) / 20, 1);
|
|
455
|
+
spreadHash(hashString(sym.docComment ?? ""), vec, 194, 62);
|
|
456
|
+
vec[256] = (graph.emitCount ?? 0) > 0 ? 1 : 0;
|
|
457
|
+
vec[257] = (graph.listenCount ?? 0) > 0 ? 1 : 0;
|
|
458
|
+
vec[258] = Math.min((graph.emitCount ?? 0) / 5, 1);
|
|
459
|
+
vec[259] = Math.min((graph.listenCount ?? 0) / 5, 1);
|
|
460
|
+
const eventKey = (graph.eventNames ?? []).sort().join("|");
|
|
461
|
+
spreadHash(hashString(eventKey), vec, 260, 60);
|
|
462
|
+
const contentKey = `${sym.type}:${sym.name}:${sym.filePath}:${sym.line}`;
|
|
463
|
+
spreadHash(hashString(contentKey), vec, 320, 64);
|
|
464
|
+
}
|
|
465
|
+
_fillStructuralFromText(vec, text) {
|
|
466
|
+
const fileMatch = /file:\s*(\S+)/i.exec(text);
|
|
467
|
+
const fp = (fileMatch?.[1] ?? "").replace(/\\/g, "/");
|
|
468
|
+
for (let i = 0; i < KNOWN_PACKAGES.length; i++) {
|
|
469
|
+
vec[i] = fp.includes(KNOWN_PACKAGES[i]) ? 1 : 0;
|
|
470
|
+
}
|
|
471
|
+
vec[6] = fp.includes("__tests__") || fp.includes(".test.") || fp.includes(".spec.") ? 1 : 0;
|
|
472
|
+
vec[7] = fp.includes("/traits/") ? 1 : 0;
|
|
473
|
+
vec[8] = fp.includes("/adapters/") ? 1 : 0;
|
|
474
|
+
vec[9] = fp.includes("/providers/") ? 1 : 0;
|
|
475
|
+
const depth = (fp.match(/\//g) ?? []).length;
|
|
476
|
+
vec[10] = Math.min(depth / 8, 1);
|
|
477
|
+
spreadHash(hashString(fp), vec, 11, 117);
|
|
478
|
+
spreadHash(hashString(text), vec, 128, 64);
|
|
479
|
+
spreadHash(hashString(text + fp), vec, 320, 64);
|
|
480
|
+
}
|
|
481
|
+
// ── Private: subword blocks (CPU) ─────────────────────────────────────────
|
|
482
|
+
_fillSubword(vec, sym, graph) {
|
|
483
|
+
trigramHistogram(
|
|
484
|
+
`${sym.name} ${sym.type} ${sym.signature ?? ""}`,
|
|
485
|
+
vec,
|
|
486
|
+
STRUCTURAL_DIM,
|
|
487
|
+
SUBWORD_BINS
|
|
488
|
+
);
|
|
489
|
+
trigramHistogram(sym.docComment ?? "", vec, STRUCTURAL_DIM + SUBWORD_BINS, SUBWORD_BINS);
|
|
490
|
+
trigramHistogram(
|
|
491
|
+
camelSplit((graph.eventNames ?? []).join(" ")),
|
|
492
|
+
vec,
|
|
493
|
+
STRUCTURAL_DIM + 2 * SUBWORD_BINS,
|
|
494
|
+
SUBWORD_BINS
|
|
495
|
+
);
|
|
496
|
+
}
|
|
497
|
+
// ── Private: subword blocks (GPU SNN) ────────────────────────────────────
|
|
498
|
+
async _fillSubwordSnn(vec, sym, graph) {
|
|
499
|
+
const h1 = new Float32Array(SUBWORD_BINS);
|
|
500
|
+
const h2 = new Float32Array(SUBWORD_BINS);
|
|
501
|
+
const h3 = new Float32Array(SUBWORD_BINS);
|
|
502
|
+
const tmp = new Float32Array(SUBWORD_BINS * 3);
|
|
503
|
+
trigramHistogram(`${sym.name} ${sym.type} ${sym.signature ?? ""}`, tmp, 0, SUBWORD_BINS);
|
|
504
|
+
trigramHistogram(sym.docComment ?? "", tmp, SUBWORD_BINS, SUBWORD_BINS);
|
|
505
|
+
trigramHistogram(
|
|
506
|
+
camelSplit((graph.eventNames ?? []).join(" ")),
|
|
507
|
+
tmp,
|
|
508
|
+
2 * SUBWORD_BINS,
|
|
509
|
+
SUBWORD_BINS
|
|
510
|
+
);
|
|
511
|
+
h1.set(tmp.slice(0, SUBWORD_BINS));
|
|
512
|
+
h2.set(tmp.slice(SUBWORD_BINS, 2 * SUBWORD_BINS));
|
|
513
|
+
h3.set(tmp.slice(2 * SUBWORD_BINS, 3 * SUBWORD_BINS));
|
|
514
|
+
const [r1, r2, r3] = await this._accel.encodeBatch([h1, h2, h3]);
|
|
515
|
+
vec.set(r1, STRUCTURAL_DIM);
|
|
516
|
+
vec.set(r2, STRUCTURAL_DIM + SUBWORD_BINS);
|
|
517
|
+
vec.set(r3, STRUCTURAL_DIM + 2 * SUBWORD_BINS);
|
|
518
|
+
}
|
|
519
|
+
};
|
|
520
|
+
function symbolTypeScore(type) {
|
|
521
|
+
switch (type) {
|
|
522
|
+
case "function":
|
|
523
|
+
return 1;
|
|
524
|
+
case "method":
|
|
525
|
+
return 0.9;
|
|
526
|
+
case "class":
|
|
527
|
+
return 0.7;
|
|
528
|
+
case "interface":
|
|
529
|
+
return 0.5;
|
|
530
|
+
case "type_alias":
|
|
531
|
+
return 0.4;
|
|
532
|
+
case "enum":
|
|
533
|
+
return 0.3;
|
|
534
|
+
case "constant":
|
|
535
|
+
return 0.2;
|
|
536
|
+
case "field":
|
|
537
|
+
return 0.15;
|
|
538
|
+
default:
|
|
539
|
+
return 0.1;
|
|
540
|
+
}
|
|
541
|
+
}
|
|
542
|
+
function visibilityScore(v) {
|
|
543
|
+
switch (v) {
|
|
544
|
+
case "public":
|
|
545
|
+
return 1;
|
|
546
|
+
case "protected":
|
|
547
|
+
return 0.5;
|
|
548
|
+
case "internal":
|
|
549
|
+
return 0.3;
|
|
550
|
+
case "private":
|
|
551
|
+
return 0;
|
|
552
|
+
default:
|
|
553
|
+
return 0.8;
|
|
554
|
+
}
|
|
555
|
+
}
|
|
556
|
+
function countParams(signature) {
|
|
557
|
+
const s = signature.indexOf("(");
|
|
558
|
+
const e = signature.lastIndexOf(")");
|
|
559
|
+
if (s < 0 || e <= s) return 0;
|
|
560
|
+
const inner = signature.slice(s + 1, e).trim();
|
|
561
|
+
if (!inner) return 0;
|
|
562
|
+
return inner.split(",").length;
|
|
563
|
+
}
|
|
564
|
+
|
|
565
|
+
export { HOLOEMBED_DIM, HoloEmbedEncoder, STRUCTURAL_DIM, SUBWORD_BINS, SUBWORD_BLOCKS, SnnAccelerator, camelSplit, encodeLifPopulationCpu, hashString, l2Normalize, spreadHash, trigramHistogram };
|
|
566
|
+
//# sourceMappingURL=index.js.map
|
|
567
|
+
//# sourceMappingURL=index.js.map
|