@holoscript/holoembed 6.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/dist/index.cjs +580 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +321 -0
- package/dist/index.d.ts +321 -0
- package/dist/index.js +567 -0
- package/dist/index.js.map +1 -0
- package/package.json +70 -0
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,321 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* HoloEmbed — shared types
|
|
3
|
+
*/
|
|
4
|
+
/**
|
|
5
|
+
* Minimal symbol descriptor needed for encoding.
|
|
6
|
+
* Intentionally independent of `@holoscript/absorb-service` types so this
|
|
7
|
+
* package can be used without absorb-service as a dependency.
|
|
8
|
+
*/
|
|
9
|
+
interface SymbolInput {
|
|
10
|
+
/** Symbol identifier (camelCase, PascalCase, snake_case). */
|
|
11
|
+
name: string;
|
|
12
|
+
/** Symbol kind: function | method | class | interface | type_alias | enum | constant | field */
|
|
13
|
+
type: string;
|
|
14
|
+
/** Absolute or root-relative file path. */
|
|
15
|
+
filePath: string;
|
|
16
|
+
/** 1-based line number. */
|
|
17
|
+
line: number;
|
|
18
|
+
/** 0-based column. */
|
|
19
|
+
column?: number;
|
|
20
|
+
/** Signature string, e.g. "function foo(x: string): void" */
|
|
21
|
+
signature?: string;
|
|
22
|
+
/** JSDoc / block-comment extracted from source. */
|
|
23
|
+
docComment?: string;
|
|
24
|
+
/** Whether the symbol is exported. */
|
|
25
|
+
isExported?: boolean;
|
|
26
|
+
/** Visibility: 'public' | 'protected' | 'private' | 'internal' */
|
|
27
|
+
visibility?: string;
|
|
28
|
+
/** Owner class/interface name (for methods/fields). */
|
|
29
|
+
owner?: string;
|
|
30
|
+
/** Lines of code. */
|
|
31
|
+
lineCount?: number;
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* Optional graph-topology features passed alongside a SymbolInput.
|
|
35
|
+
* When provided, these fill the structural call-graph and event-chain dims.
|
|
36
|
+
*/
|
|
37
|
+
interface GraphEnrichment {
|
|
38
|
+
/** Number of callers of this symbol. */
|
|
39
|
+
fanIn?: number;
|
|
40
|
+
/** Number of callees from this symbol. */
|
|
41
|
+
fanOut?: number;
|
|
42
|
+
/** Number of event emissions. */
|
|
43
|
+
emitCount?: number;
|
|
44
|
+
/** Number of event subscriptions. */
|
|
45
|
+
listenCount?: number;
|
|
46
|
+
/** Names of events emitted or listened to. */
|
|
47
|
+
eventNames?: string[];
|
|
48
|
+
}
|
|
49
|
+
interface EncoderOptions {
|
|
50
|
+
/**
|
|
51
|
+
* Whether to attempt SNN-WebGPU acceleration.
|
|
52
|
+
* Defaults to true; gracefully falls back to CPU if WebGPU is unavailable.
|
|
53
|
+
*/
|
|
54
|
+
enableSnn?: boolean;
|
|
55
|
+
/**
|
|
56
|
+
* Number of LIF timesteps for SNN population coding.
|
|
57
|
+
* Higher values → richer spike-rate patterns, slower GPU batch.
|
|
58
|
+
* Default: 50 (50ms simulated at dt=1ms).
|
|
59
|
+
*/
|
|
60
|
+
snnTimesteps?: number;
|
|
61
|
+
}
|
|
62
|
+
/** Dimensionality of the HoloEmbed output vector. */
|
|
63
|
+
declare const HOLOEMBED_DIM = 768;
|
|
64
|
+
/** Dimensionality of the structural base (dims 0-383). */
|
|
65
|
+
declare const STRUCTURAL_DIM = 384;
|
|
66
|
+
/** Number of bins per char-trigram subword block (dims 384-511, 512-639, 640-767). */
|
|
67
|
+
declare const SUBWORD_BINS = 128;
|
|
68
|
+
/** Number of subword blocks. */
|
|
69
|
+
declare const SUBWORD_BLOCKS = 3;
|
|
70
|
+
|
|
71
|
+
/**
|
|
72
|
+
* HoloEmbedEncoder — main 768-dim encoder
|
|
73
|
+
*
|
|
74
|
+
* Combines:
|
|
75
|
+
* Dims 0–383: structural base (file topology, call-graph, event-chain)
|
|
76
|
+
* Dims 384–511: char-trigram histogram of name + type + signature (128 bins)
|
|
77
|
+
* Dims 512–639: char-trigram histogram of docComment (128 bins)
|
|
78
|
+
* Dims 640–767: char-trigram histogram of event names (128 bins)
|
|
79
|
+
*
|
|
80
|
+
* Optional SNN-WebGPU population coding (Phase 2 acceleration):
|
|
81
|
+
* Each trigram block is passed through a 128-neuron LIF population.
|
|
82
|
+
* Output → spike-rate vector (sparse, threshold-coded) instead of raw histogram.
|
|
83
|
+
* Falls back to raw histogram when GPU is unavailable.
|
|
84
|
+
*
|
|
85
|
+
* ## Canonical use
|
|
86
|
+
*
|
|
87
|
+
* ```ts
|
|
88
|
+
* import { HoloEmbedEncoder } from '@holoscript/holoembed';
|
|
89
|
+
*
|
|
90
|
+
* const encoder = new HoloEmbedEncoder();
|
|
91
|
+
* await encoder.initialize({ enableSnn: true }); // no-op if GPU unavailable
|
|
92
|
+
*
|
|
93
|
+
* const vec = encoder.encode(sym, { fanIn: 3, eventNames: ['pillar:spike'] });
|
|
94
|
+
* const textVec = encoder.encodeText('pillar slice emitter'); // for NL queries
|
|
95
|
+
* ```
|
|
96
|
+
*
|
|
97
|
+
* ## EmbeddingProvider integration
|
|
98
|
+
*
|
|
99
|
+
* See `HoloEmbedProvider` in `@holoscript/absorb-service` — thin wrapper that
|
|
100
|
+
* delegates to this encoder and satisfies the `EmbeddingProvider` interface.
|
|
101
|
+
*/
|
|
102
|
+
|
|
103
|
+
declare class HoloEmbedEncoder {
|
|
104
|
+
private _accel;
|
|
105
|
+
private _snnEnabled;
|
|
106
|
+
/**
|
|
107
|
+
* Initialize the encoder.
|
|
108
|
+
* Attempts SNN-WebGPU setup if `enableSnn` is true (default).
|
|
109
|
+
* Falls back to CPU encoding if GPU is unavailable — always safe to await.
|
|
110
|
+
*/
|
|
111
|
+
initialize(opts?: EncoderOptions): Promise<void>;
|
|
112
|
+
/** Whether the GPU SNN path is active. */
|
|
113
|
+
get snnActive(): boolean;
|
|
114
|
+
/**
|
|
115
|
+
* Encode a symbol to a 768-dim L2-normalized Float32Array.
|
|
116
|
+
*
|
|
117
|
+
* Synchronous when SNN is disabled (CPU-only).
|
|
118
|
+
* Returns a Float32Array that can be passed to cosine-similarity search.
|
|
119
|
+
*/
|
|
120
|
+
encode(sym: SymbolInput, graph?: GraphEnrichment): Float32Array;
|
|
121
|
+
/**
|
|
122
|
+
* Async encode — uses SNN population coding for the trigram blocks when GPU is active.
|
|
123
|
+
* Identical to `encode()` when GPU is unavailable.
|
|
124
|
+
*/
|
|
125
|
+
encodeAsync(sym: SymbolInput, graph?: GraphEnrichment): Promise<Float32Array>;
|
|
126
|
+
/**
|
|
127
|
+
* Encode a plain text string to a 768-dim vector.
|
|
128
|
+
* For use with NL queries — fills the structural + name/sig trigram blocks.
|
|
129
|
+
*/
|
|
130
|
+
encodeText(text: string): Float32Array;
|
|
131
|
+
/**
|
|
132
|
+
* Batch-encode text strings. Amortizes SNN GPU round-trip when GPU is active.
|
|
133
|
+
*/
|
|
134
|
+
encodeTexts(texts: string[]): Promise<Float32Array[]>;
|
|
135
|
+
/** Release GPU resources. */
|
|
136
|
+
dispose(): void;
|
|
137
|
+
private _fillStructural;
|
|
138
|
+
private _fillStructuralFromText;
|
|
139
|
+
private _fillSubword;
|
|
140
|
+
private _fillSubwordSnn;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
/**
|
|
144
|
+
* SnnAccelerator — SNN-WebGPU population coding for HoloEmbed
|
|
145
|
+
*
|
|
146
|
+
* Replaces the static char-trigram histogram blocks with a richer
|
|
147
|
+
* Leaky Integrate-and-Fire population code:
|
|
148
|
+
*
|
|
149
|
+
* Input: 128-dim normalized trigram histogram h[0..127]
|
|
150
|
+
* Process: inject h[i] as synaptic current into LIF neuron i
|
|
151
|
+
* simulate T timesteps at dt=1ms (default T=50)
|
|
152
|
+
* Output: 128-dim spike-rate vector r[i] = spikes_i / T
|
|
153
|
+
*
|
|
154
|
+
* ## Why SNN population coding enriches embeddings
|
|
155
|
+
*
|
|
156
|
+
* The raw trigram histogram encodes frequency linearly: h[i] = count_i / total.
|
|
157
|
+
* The LIF population code adds a nonlinear threshold transformation:
|
|
158
|
+
* - High-frequency trigrams (large h[i]) → many spikes → high r[i]
|
|
159
|
+
* - Rare trigrams (small h[i]) → sub-threshold → r[i] ≈ 0
|
|
160
|
+
*
|
|
161
|
+
* This sparse representation has better cosine similarity properties than
|
|
162
|
+
* a dense histogram: two symbols with the same DOMINANT trigrams score high
|
|
163
|
+
* even if their rare-trigram distribution differs. Matches the biological
|
|
164
|
+
* precedent in Paper 33 (brain white-matter routing).
|
|
165
|
+
*
|
|
166
|
+
* ## WebGPU availability
|
|
167
|
+
*
|
|
168
|
+
* WebGPU is available in:
|
|
169
|
+
* - Chrome 113+ / Edge 113+ (desktop)
|
|
170
|
+
* - Node.js with the `webgpu` npm binding installed (auto-activated via ensureNodeWebGpu)
|
|
171
|
+
*
|
|
172
|
+
* In CI / Node.js without WebGPU, the accelerator reports `available = false`
|
|
173
|
+
* and returns the input histogram unchanged (CPU passthrough). No behavior
|
|
174
|
+
* difference — only performance difference for large batches.
|
|
175
|
+
*
|
|
176
|
+
* ## WGSL shader
|
|
177
|
+
*
|
|
178
|
+
* See `LIF_POPULATION_WGSL` below. One dispatch per symbol block.
|
|
179
|
+
* Workgroup size 64 → 128 neurons fit in 2 workgroups.
|
|
180
|
+
*/
|
|
181
|
+
|
|
182
|
+
interface LIFPopulationParams {
|
|
183
|
+
tau?: number;
|
|
184
|
+
vThreshold?: number;
|
|
185
|
+
vReset?: number;
|
|
186
|
+
vRest?: number;
|
|
187
|
+
dt?: number;
|
|
188
|
+
/** Scale factor applied to normalized trigram histogram values to produce mV-equivalent injection. */
|
|
189
|
+
currentScale?: number;
|
|
190
|
+
}
|
|
191
|
+
interface LIFPopulationCpuOptions {
|
|
192
|
+
/** Number of LIF timesteps. Matches EncoderOptions.snnTimesteps. */
|
|
193
|
+
timeSteps?: number;
|
|
194
|
+
/** Optional LIF parameter overrides. */
|
|
195
|
+
lifParams?: LIFPopulationParams;
|
|
196
|
+
}
|
|
197
|
+
/**
|
|
198
|
+
* CPU reference implementation of the LIF population coder.
|
|
199
|
+
*
|
|
200
|
+
* Production fallback remains identity passthrough so no CPU-only runtime pays
|
|
201
|
+
* for LIF simulation accidentally. Tests and benchmarks use this function as
|
|
202
|
+
* the apples-to-apples reference for the WebGPU shader.
|
|
203
|
+
*/
|
|
204
|
+
declare function encodeLifPopulationCpu(histogram: Float32Array, options?: LIFPopulationCpuOptions): Float32Array;
|
|
205
|
+
/**
|
|
206
|
+
* SNN-WebGPU population coder for HoloEmbed trigram blocks.
|
|
207
|
+
*
|
|
208
|
+
* Usage:
|
|
209
|
+
* ```ts
|
|
210
|
+
* const accel = new SnnAccelerator();
|
|
211
|
+
* await accel.initialize({ enableSnn: true });
|
|
212
|
+
*
|
|
213
|
+
* // In embedding loop:
|
|
214
|
+
* const spikeRates = await accel.encode(trigramHistogram);
|
|
215
|
+
* ```
|
|
216
|
+
*/
|
|
217
|
+
declare class SnnAccelerator {
|
|
218
|
+
private _available;
|
|
219
|
+
private _timeSteps;
|
|
220
|
+
private _lif;
|
|
221
|
+
private _device?;
|
|
222
|
+
private _pipeline?;
|
|
223
|
+
private _paramsBuffer?;
|
|
224
|
+
/** Whether the GPU path is active. In Node, requires the `webgpu` binding (auto-activated via ensureNodeWebGpu). */
|
|
225
|
+
get available(): boolean;
|
|
226
|
+
/**
|
|
227
|
+
* Initialize the accelerator.
|
|
228
|
+
* Detects WebGPU, compiles the LIF shader, allocates uniform buffers.
|
|
229
|
+
* Safe to call multiple times; no-op after first successful init.
|
|
230
|
+
*/
|
|
231
|
+
initialize(opts?: EncoderOptions, lifParams?: LIFPopulationParams): Promise<void>;
|
|
232
|
+
/**
|
|
233
|
+
* Encode a 128-dim trigram histogram through the LIF population.
|
|
234
|
+
*
|
|
235
|
+
* Returns a 128-dim spike-rate vector.
|
|
236
|
+
* If GPU is unavailable, returns the input histogram unchanged (CPU passthrough).
|
|
237
|
+
*/
|
|
238
|
+
encode(histogram: Float32Array): Promise<Float32Array>;
|
|
239
|
+
/**
|
|
240
|
+
* Encode a batch of histograms. Amortizes GPU round-trip cost.
|
|
241
|
+
* Falls back to sequential CPU passthrough if GPU unavailable.
|
|
242
|
+
*/
|
|
243
|
+
encodeBatch(histograms: Float32Array[]): Promise<Float32Array[]>;
|
|
244
|
+
/** Release GPU resources. */
|
|
245
|
+
dispose(): void;
|
|
246
|
+
private _compilePipeline;
|
|
247
|
+
private _gpuEncode;
|
|
248
|
+
/**
|
|
249
|
+
* Fused batch encode: M histograms of equal length N processed in a SINGLE
|
|
250
|
+
* dispatch + single readback. Because the LIF update is per-element independent,
|
|
251
|
+
* the existing shader runs unchanged over a flattened (M*N) array; each thread
|
|
252
|
+
* encodes one (histogram, neuron) pair. This removes the M x buffer-create and
|
|
253
|
+
* M x mapAsync round-trips that made the per-item path slower than CPU.
|
|
254
|
+
*/
|
|
255
|
+
private _gpuEncodeBatch;
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
/**
|
|
259
|
+
* charTrigram — char-trigram histogram utilities
|
|
260
|
+
*
|
|
261
|
+
* Canonical implementation for HoloEmbed subword encoding.
|
|
262
|
+
* Also used by HoloEmbedProvider in absorb-service (via copy-import pattern;
|
|
263
|
+
* absorb-service does not take @holoscript/holoembed as a dep to avoid cycles).
|
|
264
|
+
*
|
|
265
|
+
* ## Algorithm
|
|
266
|
+
*
|
|
267
|
+
* 1. camelSplit: tokenize PascalCase/camelCase/snake_case/event:names into words
|
|
268
|
+
* 2. Lowercase + strip non-alphanumeric → clean token sequence
|
|
269
|
+
* 3. Slide 3-char window over each word; skip cross-word boundaries
|
|
270
|
+
* 4. FNV-1a hash each 3-char sequence → bucket in [0, bins)
|
|
271
|
+
* 5. Normalize bucket counts by total trigram count → [0, 1] histogram
|
|
272
|
+
*
|
|
273
|
+
* ## Why char trigrams?
|
|
274
|
+
*
|
|
275
|
+
* "PillarSliceEmitter" camel-split → "pillar slice emitter"
|
|
276
|
+
* trigrams include: "pil","ill","lla","lar" / "sli","lic","ice" / "emi","mit","itt","tte","ter"
|
|
277
|
+
*
|
|
278
|
+
* NL query "pillar slice emitter" produces IDENTICAL trigrams → perfect histogram overlap.
|
|
279
|
+
* The 128-bin FNV hash means some trigrams collide, but the overlap signal dominates for
|
|
280
|
+
* name-matched pairs.
|
|
281
|
+
*
|
|
282
|
+
* This is a BPE-lite approach: no vocabulary, no model, no training. Pure character algebra.
|
|
283
|
+
*/
|
|
284
|
+
/**
|
|
285
|
+
* Tokenize a camelCase / PascalCase / snake_case / colon-separated identifier
|
|
286
|
+
* into space-separated lowercase words suitable for trigram extraction.
|
|
287
|
+
*
|
|
288
|
+
* Examples:
|
|
289
|
+
* "PillarSliceEmitter" → "pillar slice emitter"
|
|
290
|
+
* "extractEmitSites" → "extract emit sites"
|
|
291
|
+
* "ev:pillar:spike" → "ev pillar spike"
|
|
292
|
+
* "BRAIN_COORD_MAPPER" → "brain coord mapper"
|
|
293
|
+
* "pillar slice emitter" → "pillar slice emitter" (NL query passthrough)
|
|
294
|
+
*/
|
|
295
|
+
declare function camelSplit(s: string): string;
|
|
296
|
+
/**
|
|
297
|
+
* Accumulate a char-trigram histogram of `text` into `vec[offset..offset+bins)`.
|
|
298
|
+
*
|
|
299
|
+
* - Applies camelSplit to the input text first.
|
|
300
|
+
* - Extracts all 3-char windows that don't cross word boundaries.
|
|
301
|
+
* - FNV-1a hashes each trigram into a bin.
|
|
302
|
+
* - Normalizes by total trigram count → values in [0, 1].
|
|
303
|
+
*
|
|
304
|
+
* Note: this function does NOT L2-normalize the output; the caller does a
|
|
305
|
+
* single L2 normalization over the full output vector after all blocks are filled.
|
|
306
|
+
*/
|
|
307
|
+
declare function trigramHistogram(text: string, vec: Float32Array, offset: number, bins: number): void;
|
|
308
|
+
/**
|
|
309
|
+
* FNV-1a 32-bit hash of a string.
|
|
310
|
+
* Used for deterministic structural feature spreading.
|
|
311
|
+
*/
|
|
312
|
+
declare function hashString(s: string): number;
|
|
313
|
+
/**
|
|
314
|
+
* Spread a 32-bit hash deterministically into `count` dims of `vec`
|
|
315
|
+
* starting at `offset`. Each dim is in [0, 1] via LCG.
|
|
316
|
+
*/
|
|
317
|
+
declare function spreadHash(hash: number, vec: Float32Array, offset: number, count: number): void;
|
|
318
|
+
/** In-place L2 normalization. */
|
|
319
|
+
declare function l2Normalize(vec: Float32Array): void;
|
|
320
|
+
|
|
321
|
+
export { type EncoderOptions, type GraphEnrichment, HOLOEMBED_DIM, HoloEmbedEncoder, type LIFPopulationCpuOptions, type LIFPopulationParams, STRUCTURAL_DIM, SUBWORD_BINS, SUBWORD_BLOCKS, SnnAccelerator, type SymbolInput, camelSplit, encodeLifPopulationCpu, hashString, l2Normalize, spreadHash, trigramHistogram };
|