tjs-lang 0.7.8 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,395 @@
1
+ /**
2
+ * Canonical wasm-library acceptance test: vector-search inline vs composed.
3
+ *
4
+ * This is the test that proves the conceptual goal from
5
+ * `wasm-library-plan.md` § "Canonical end-to-end demo". The same
6
+ * cosine-similarity workload is run two ways:
7
+ *
8
+ * Inline baseline: one big `wasm {}` block computing dot/magA/magB
9
+ * together — what the original `wasm-vector-search.md`
10
+ * playground example does today.
11
+ *
12
+ * Composed: a JS outer loop calling imported `dot` and `norm_sq`
13
+ * from `tjs-lang/linalg`. The library's wasm functions
14
+ * are composed into the consumer's wasm module via the
15
+ * Phase 3 ModuleLoader path.
16
+ *
17
+ * Acceptance criteria (matches the plan):
18
+ * 1. Correctness: both implementations pick the same best index across
19
+ * a randomized corpus. ✓ asserted.
20
+ * 2. Performance: within ~5% of the inline baseline. Timing is reported
21
+ * for inspection but not asserted as a hard limit
22
+ * (engine variance makes hard thresholds flaky in CI).
23
+ * 3. Module shape: composed-not-imported. Verified by Phase 3 tests in
24
+ * wasm.test.ts; not re-checked here.
25
+ * 4. Boundary form: same library works for non-tjs consumers. Verified
26
+ * by Phase 4 tests; not re-checked here.
27
+ */
28
+
29
+ import { describe, it, expect } from 'bun:test'
30
+ import { readFileSync } from 'node:fs'
31
+ import { join } from 'node:path'
32
+
33
+ const LINALG_SOURCE = readFileSync(
34
+ join(import.meta.dir, 'index.tjs'),
35
+ 'utf8'
36
+ )
37
+
38
+ // The inline baseline — single wasm{} block computing dot, magA, magB
39
+ // together. Mirrors what guides/examples/tjs/wasm-vector-search.md does.
40
+ const INLINE_SOURCE = `
41
+ function inlineSearch(corpus: Float32Array, query: Float32Array, count: 0, dim: 0) {
42
+ return wasm {
43
+ let bestIdx = 0
44
+ let bestScore = -2.0
45
+
46
+ for (let v = 0; v < count; v++) {
47
+ let dotAcc = f32x4_splat(0.0)
48
+ let magAAcc = f32x4_splat(0.0)
49
+ let magBAcc = f32x4_splat(0.0)
50
+
51
+ for (let j = 0; j < dim; j += 4) {
52
+ let qOff = j * 4
53
+ let cOff = (v * dim + j) * 4
54
+ let a = f32x4_load(query, qOff)
55
+ let b = f32x4_load(corpus, cOff)
56
+ dotAcc = f32x4_add(dotAcc, f32x4_mul(a, b))
57
+ magAAcc = f32x4_add(magAAcc, f32x4_mul(a, a))
58
+ magBAcc = f32x4_add(magBAcc, f32x4_mul(b, b))
59
+ }
60
+
61
+ let dot = f32x4_extract_lane(dotAcc, 0) + f32x4_extract_lane(dotAcc, 1)
62
+ + f32x4_extract_lane(dotAcc, 2) + f32x4_extract_lane(dotAcc, 3)
63
+ let magA = f32x4_extract_lane(magAAcc, 0) + f32x4_extract_lane(magAAcc, 1)
64
+ + f32x4_extract_lane(magAAcc, 2) + f32x4_extract_lane(magAAcc, 3)
65
+ let magB = f32x4_extract_lane(magBAcc, 0) + f32x4_extract_lane(magBAcc, 1)
66
+ + f32x4_extract_lane(magBAcc, 2) + f32x4_extract_lane(magBAcc, 3)
67
+
68
+ let mA = Math.sqrt(magA)
69
+ let mB = Math.sqrt(magB)
70
+ if (mA > 0.000001) {
71
+ if (mB > 0.000001) {
72
+ let score = dot / (mA * mB)
73
+ if (score > bestScore) {
74
+ bestScore = score
75
+ bestIdx = v
76
+ }
77
+ }
78
+ }
79
+ }
80
+ return bestIdx
81
+ }
82
+ }
83
+ `
84
+
85
+ // Composed, JS-outer-loop: outer iteration is JS calling imported linalg
86
+ // kernels. Each row costs 2 JS↔wasm boundary crossings (dot + norm_sq).
87
+ const COMPOSED_JS_LOOP_SOURCE = `
88
+ import { dot, norm_sq } from './linalg.tjs'
89
+
90
+ function composedJsSearch(corpus, query, count, dim) {
91
+ const magA = Math.sqrt(norm_sq(query, dim))
92
+ if (magA < 0.000001) return 0
93
+
94
+ let bestIdx = 0
95
+ let bestScore = -2
96
+
97
+ for (let v = 0; v < count; v++) {
98
+ const row = corpus.subarray(v * dim, (v + 1) * dim)
99
+ const d = dot(query, row, dim)
100
+ const magB = Math.sqrt(norm_sq(row, dim))
101
+ if (magB > 0.000001) {
102
+ const score = d / (magA * magB)
103
+ if (score > bestScore) {
104
+ bestScore = score
105
+ bestIdx = v
106
+ }
107
+ }
108
+ }
109
+ return bestIdx
110
+ }
111
+ `
112
+
113
+ // Composed, WASM-outer-loop: outer iteration is itself a `wasm function`
114
+ // that calls imported `dot_at` / `norm_sq_at` via wasm-to-wasm
115
+ // `call <index>` instructions. NO JS↔wasm boundary in the inner loop —
116
+ // the whole workload runs inside one wasm call. This is the Phase 1.5
117
+ // payoff in action.
118
+ const COMPOSED_WASM_LOOP_SOURCE = `
119
+ import { dot_at, norm_sq_at } from './linalg.tjs'
120
+
121
+ wasm function composedWasmSearch(
122
+ corpus: Float32Array,
123
+ query: Float32Array,
124
+ count: i32,
125
+ dim: i32
126
+ ): f64 {
127
+ let magQ = norm_sq_at(query, 0, dim)
128
+ if (magQ < 0.000001) return 0.0
129
+ let mA = Math.sqrt(magQ)
130
+
131
+ let bestIdx = 0
132
+ let bestScore = -2.0
133
+
134
+ for (let v = 0; v < count; v++) {
135
+ let startIdx = v * dim
136
+ let d = dot_at(corpus, startIdx, query, dim)
137
+ let magB = norm_sq_at(corpus, startIdx, dim)
138
+ if (magB > 0.000001) {
139
+ let mB = Math.sqrt(magB)
140
+ let score = d / (mA * mB)
141
+ if (score > bestScore) {
142
+ bestScore = score
143
+ bestIdx = v
144
+ }
145
+ }
146
+ }
147
+ return bestIdx
148
+ }
149
+ `
150
+
151
+ /**
152
+ * Compile one source and load it into a fresh globalThis.__tjs context,
153
+ * exposing the named search function (and its wasmBuffer) on globalThis
154
+ * under unique keys for the benchmark to pick up.
155
+ *
156
+ * Each variant gets its own wasm module + own __wasmMem, so wasmBuffer
157
+ * allocations stay isolated.
158
+ */
159
+ async function loadVariant(
160
+ code: string,
161
+ fnName: string,
162
+ varName: string
163
+ ): Promise<{
164
+ search: (corpus: Float32Array, query: Float32Array, count: number, dim: number) => number
165
+ wasmBuffer: (Ctor: any, len: number) => any
166
+ }> {
167
+ await new Function(
168
+ '__tjs',
169
+ `return (async () => { ${code}\n` +
170
+ `globalThis.__${varName}_search = ${fnName};\n` +
171
+ `globalThis.__${varName}_wasmBuffer = globalThis.wasmBuffer;\n` +
172
+ `})();`
173
+ )(globalThis.__tjs)
174
+ await new Promise((r) => setTimeout(r, 100))
175
+ const search = (globalThis as any)[`__${varName}_search`]
176
+ const wasmBuffer = (globalThis as any)[`__${varName}_wasmBuffer`]
177
+ if (typeof search !== 'function') {
178
+ throw new Error(`${varName} search function not registered`)
179
+ }
180
+ if (typeof wasmBuffer !== 'function') {
181
+ throw new Error(`${varName} wasmBuffer not available`)
182
+ }
183
+ return { search, wasmBuffer }
184
+ }
185
+
186
+ describe('Canonical demo: vector-search across three forms', () => {
187
+ // Compares THREE implementations of the same cosine-similarity workload:
188
+ // - inline: one big wasm{} block (no boundary crossings)
189
+ // - composedJs: imported linalg + JS outer loop (2 crossings per row)
190
+ // - composedWasm: imported linalg + wasm-function outer loop calling
191
+ // dot_at/norm_sq_at via wasm `call <index>` (1 crossing
192
+ // for the whole workload)
193
+ //
194
+ // The point: composedWasm should match (or beat) inline. If it does,
195
+ // the perf criterion from the wasm-library plan is proven.
196
+ it('all three forms agree on best index; composed-wasm matches inline perf', async () => {
197
+ const { tjs } = await import('../lang/index')
198
+ const { createRuntime } = await import('../lang/runtime')
199
+ const { ModuleLoader, inMemoryFileSystem } = await import(
200
+ '../lang/module-loader'
201
+ )
202
+
203
+ // Compile each source (composed versions share a loader pointing at linalg)
204
+ const inlineResult = tjs(INLINE_SOURCE, { runTests: false })
205
+ expect(inlineResult.wasmCompiled!.every((b) => b.success)).toBe(true)
206
+
207
+ const loader = new ModuleLoader({
208
+ fs: inMemoryFileSystem({ '/proj/linalg.tjs': LINALG_SOURCE }),
209
+ baseDir: '/proj',
210
+ })
211
+
212
+ const composedJsResult = tjs(COMPOSED_JS_LOOP_SOURCE, {
213
+ moduleLoader: loader,
214
+ filename: '/proj/app.tjs',
215
+ runTests: false,
216
+ })
217
+ expect(composedJsResult.wasmCompiled!.every((b) => b.success)).toBe(true)
218
+
219
+ const composedWasmResult = tjs(COMPOSED_WASM_LOOP_SOURCE, {
220
+ moduleLoader: loader,
221
+ filename: '/proj/app.tjs',
222
+ runTests: false,
223
+ })
224
+ expect(composedWasmResult.wasmCompiled!.every((b) => b.success)).toBe(true)
225
+
226
+ const savedTjs = globalThis.__tjs
227
+ try {
228
+ // ---- Inline ----
229
+ globalThis.__tjs = createRuntime()
230
+ const inline = await loadVariant(
231
+ inlineResult.code,
232
+ 'inlineSearch',
233
+ 'inline'
234
+ )
235
+
236
+ // ---- Composed, JS outer loop ----
237
+ globalThis.__tjs = createRuntime()
238
+ const composedJs = await loadVariant(
239
+ composedJsResult.code,
240
+ 'composedJsSearch',
241
+ 'composedJs'
242
+ )
243
+
244
+ // ---- Composed, WASM outer loop ----
245
+ globalThis.__tjs = createRuntime()
246
+ const composedWasm = await loadVariant(
247
+ composedWasmResult.code,
248
+ 'composedWasmSearch',
249
+ 'composedWasm'
250
+ )
251
+
252
+ // ---- Workload configs ----
253
+ // Each config: { dim, count, label }. Sized to keep the test under
254
+ // a few seconds in CI but large enough for SIMD to matter.
255
+ const configs = [
256
+ { dim: 128, count: 500, label: '500x128' },
257
+ { dim: 256, count: 500, label: '500x256' },
258
+ { dim: 128, count: 2000, label: '2000x128' },
259
+ ]
260
+
261
+ const timings: {
262
+ label: string
263
+ inlineMs: number
264
+ composedJsMs: number
265
+ composedWasmMs: number
266
+ bestIdx: number
267
+ }[] = []
268
+
269
+ for (const cfg of configs) {
270
+ const total = cfg.count * cfg.dim
271
+
272
+ // Allocate corpus/query in EACH variant's wasm memory so the
273
+ // wasmBuffer fast path is hit on all three runs.
274
+ const inlineCorpus = inline.wasmBuffer(Float32Array, total)
275
+ const inlineQuery = inline.wasmBuffer(Float32Array, cfg.dim)
276
+ const composedJsCorpus = composedJs.wasmBuffer(Float32Array, total)
277
+ const composedJsQuery = composedJs.wasmBuffer(Float32Array, cfg.dim)
278
+ const composedWasmCorpus = composedWasm.wasmBuffer(Float32Array, total)
279
+ const composedWasmQuery = composedWasm.wasmBuffer(Float32Array, cfg.dim)
280
+
281
+ // Seed all three with the same values
282
+ for (let i = 0; i < total; i++) {
283
+ const v = Math.random() * 2 - 1
284
+ inlineCorpus[i] = v
285
+ composedJsCorpus[i] = v
286
+ composedWasmCorpus[i] = v
287
+ }
288
+ for (let i = 0; i < cfg.dim; i++) {
289
+ const v = Math.random() * 2 - 1
290
+ inlineQuery[i] = v
291
+ composedJsQuery[i] = v
292
+ composedWasmQuery[i] = v
293
+ }
294
+
295
+ // Warm up all three (JIT)
296
+ const warmCount = Math.min(100, cfg.count)
297
+ for (let w = 0; w < 3; w++) {
298
+ inline.search(inlineCorpus, inlineQuery, warmCount, cfg.dim)
299
+ composedJs.search(composedJsCorpus, composedJsQuery, warmCount, cfg.dim)
300
+ composedWasm.search(composedWasmCorpus, composedWasmQuery, warmCount, cfg.dim)
301
+ }
302
+
303
+ // Time inline
304
+ const inlineStart = performance.now()
305
+ const inlineIdx = inline.search(inlineCorpus, inlineQuery, cfg.count, cfg.dim)
306
+ const inlineMs = performance.now() - inlineStart
307
+
308
+ // Time composed JS-outer-loop
309
+ const composedJsStart = performance.now()
310
+ const composedJsIdx = composedJs.search(
311
+ composedJsCorpus,
312
+ composedJsQuery,
313
+ cfg.count,
314
+ cfg.dim
315
+ )
316
+ const composedJsMs = performance.now() - composedJsStart
317
+
318
+ // Time composed wasm-outer-loop
319
+ const composedWasmStart = performance.now()
320
+ const composedWasmIdx = composedWasm.search(
321
+ composedWasmCorpus,
322
+ composedWasmQuery,
323
+ cfg.count,
324
+ cfg.dim
325
+ )
326
+ const composedWasmMs = performance.now() - composedWasmStart
327
+
328
+ // All three implementations must agree on best index
329
+ expect(composedJsIdx).toBe(inlineIdx)
330
+ expect(composedWasmIdx).toBe(inlineIdx)
331
+
332
+ timings.push({
333
+ label: cfg.label,
334
+ inlineMs,
335
+ composedJsMs,
336
+ composedWasmMs,
337
+ bestIdx: inlineIdx,
338
+ })
339
+ }
340
+
341
+ // Report (visible in test output)
342
+ console.log(
343
+ '\n=== Vector-search: inline / composed-JS-loop / composed-WASM-loop ==='
344
+ )
345
+ console.log(
346
+ ' config | inline | composed-JS | ratio | composed-WASM | ratio'
347
+ )
348
+ console.log(
349
+ ' -------------|----------|-------------|--------|---------------|-------'
350
+ )
351
+ for (const t of timings) {
352
+ const jsRatio = t.composedJsMs / t.inlineMs
353
+ const wasmRatio = t.composedWasmMs / t.inlineMs
354
+ console.log(
355
+ ` ${t.label.padEnd(12)} | ${t.inlineMs.toFixed(2).padStart(8)} | ${t.composedJsMs
356
+ .toFixed(2)
357
+ .padStart(11)} | ${jsRatio.toFixed(2).padStart(6)}x | ${t.composedWasmMs
358
+ .toFixed(2)
359
+ .padStart(13)} | ${wasmRatio.toFixed(2).padStart(5)}x`
360
+ )
361
+ }
362
+
363
+ // The composed-WASM path should match inline within a small factor.
364
+ // Engine variance means hard thresholds are flaky; we use a wide
365
+ // 3× ceiling that catches catastrophic regressions while tolerating
366
+ // JIT-warmup noise and CI-environment variability. Observed ratios
367
+ // are typically 1.0–1.3× — i.e., parity with inline.
368
+ for (const t of timings) {
369
+ const wasmRatio = t.composedWasmMs / t.inlineMs
370
+ expect(wasmRatio).toBeLessThan(3.0)
371
+ }
372
+
373
+ // The composed-JS path is expected to be slower than composed-WASM
374
+ // (boundary-crossing tax). This is the "before/after" demonstration:
375
+ // composed-WASM must be at least 2× faster than composed-JS for the
376
+ // wasm-to-wasm optimization to be considered "working." In practice
377
+ // the gap is much larger (5–10×).
378
+ for (const t of timings) {
379
+ expect(t.composedJsMs).toBeGreaterThan(t.composedWasmMs * 2)
380
+ }
381
+ } finally {
382
+ globalThis.__tjs = savedTjs
383
+ for (const v of ['inline', 'composedJs', 'composedWasm']) {
384
+ delete (globalThis as any)[`__${v}_search`]
385
+ delete (globalThis as any)[`__${v}_wasmBuffer`]
386
+ }
387
+ delete (globalThis as any).wasmBuffer
388
+ for (const key of Object.keys(globalThis)) {
389
+ if (key.startsWith('__tjs_wasm_')) {
390
+ delete (globalThis as any)[key]
391
+ }
392
+ }
393
+ }
394
+ })
395
+ })