@goshenkata/dryscan-core 1.4.7 → 1.4.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,7 @@
1
1
  /**
2
- * Computes cosine similarity using a deterministic backend chain.
3
- * Default chain: GPU -> worker threads -> synchronous fallback.
2
+ * Computes cosineSimilarity(A, B) using worker threads for large inputs,
3
+ * falling back to the synchronous library call for small ones.
4
+ * B is packed into a SharedArrayBuffer shared across all workers — no copies.
4
5
  */
5
6
  declare function parallelCosineSimilarity(A: number[][], B: number[][]): Promise<number[][]>;
6
7
 
@@ -3,192 +3,20 @@ import "../chunk-EUXUH3YW.js";
3
3
  // src/services/ParallelSimilarity.ts
4
4
  import os from "os";
5
5
  import { Worker } from "worker_threads";
6
- import debug from "debug";
7
6
  import { cosineSimilarity } from "@langchain/core/utils/math";
8
- var log = debug("DryScan:ParallelSimilarity");
9
- var GPU_TARGET_CELLS = 2e6;
10
- var ZERO_NORM_EPSILON = 1e-12;
11
- var gpuKernel = null;
12
- var gpuRuntime = null;
13
- var gpuKernelDims = null;
14
- var gpuCtor = null;
15
- var gpuCapabilitiesLogged = false;
16
- var normalizedMatrixCache = /* @__PURE__ */ new WeakMap();
17
- async function getGpuCtor() {
18
- if (gpuCtor) return gpuCtor;
19
- const moduleName = "gpu.js";
20
- try {
21
- const mod = await import(moduleName);
22
- gpuCtor = mod.GPU;
23
- return gpuCtor;
24
- } catch (_err) {
25
- return null;
26
- }
27
- }
28
- function logGpuCapabilities(GPU) {
29
- if (gpuCapabilitiesLogged) return;
30
- const flags = GPU;
31
- log(
32
- "SIM_GPU capabilities supported=%s headlessgl=%s webgl=%s webgl2=%s singlePrecision=%s",
33
- String(flags.isGPUSupported),
34
- String(flags.isHeadlessGLSupported),
35
- String(flags.isWebGLSupported),
36
- String(flags.isWebGL2Supported),
37
- String(flags.isSinglePrecisionSupported)
38
- );
39
- gpuCapabilitiesLogged = true;
40
- }
41
- function backendPreference() {
42
- const value = (process.env.DRYSCAN_SIM_BACKEND ?? "auto").toLowerCase();
43
- if (value === "gpu") return "gpu";
44
- if (value === "worker") return "worker";
45
- if (value === "sync") return "sync";
46
- return "auto";
47
- }
48
- function matrixFromGpuResult(result, rows, cols) {
49
- if (!Array.isArray(result)) {
50
- throw new Error("GPU kernel returned a non-array result");
51
- }
52
- return Array.from({ length: rows }, (_, rowIdx) => {
53
- const row = result[rowIdx];
54
- if (!row || typeof row !== "object") {
55
- throw new Error("GPU kernel returned malformed rows");
56
- }
57
- const typed = row;
58
- return Array.from({ length: cols }, (_2, colIdx) => Number(typed[colIdx] ?? 0));
59
- });
60
- }
61
- function normalizeRows(rows, dims) {
62
- return rows.map((row) => {
63
- const normalized = new Float32Array(dims);
64
- let normSq = 0;
65
- for (let i = 0; i < dims; i++) {
66
- const value = Number(row[i] ?? 0);
67
- normalized[i] = value;
68
- normSq += value * value;
69
- }
70
- const norm = Math.sqrt(normSq);
71
- if (norm > ZERO_NORM_EPSILON) {
72
- for (let i = 0; i < dims; i++) {
73
- normalized[i] /= norm;
74
- }
75
- } else {
76
- normalized.fill(0);
77
- }
78
- return normalized;
79
- });
80
- }
81
- function getNormalizedRows(rows, dims) {
82
- const cached = normalizedMatrixCache.get(rows);
83
- if (cached && cached.dims === dims) {
84
- return cached.rows;
85
- }
86
- const normalized = normalizeRows(rows, dims);
87
- normalizedMatrixCache.set(rows, { dims, rows: normalized });
88
- return normalized;
89
- }
90
- async function runGpuCosineSimilarity(A, B) {
91
- const GPU = await getGpuCtor();
92
- if (!GPU) {
93
- throw new Error("gpu.js module not available");
94
- }
95
- logGpuCapabilities(GPU);
96
- if (GPU.isGPUSupported === false) {
97
- throw new Error("GPU.js reported GPU support unavailable");
98
- }
99
- const dims = A[0]?.length ?? 0;
100
- if (!dims || !B[0]?.length || B[0].length !== dims) {
101
- throw new Error("Matrix dimensions are invalid for GPU similarity");
102
- }
103
- const normalizeStartMs = performance.now();
104
- const normalizedA = getNormalizedRows(A, dims);
105
- const normalizedB = getNormalizedRows(B, dims);
106
- const normalizeMs = Math.round(performance.now() - normalizeStartMs);
107
- if (!gpuRuntime) {
108
- gpuRuntime = new GPU({ mode: "gpu" });
109
- const runtimeMode = String(gpuRuntime.mode ?? "unknown");
110
- log("SIM_GPU runtime mode=%s", runtimeMode);
111
- if (runtimeMode === "cpu") {
112
- throw new Error("GPU runtime initialized in CPU mode");
113
- }
114
- }
115
- const shouldRecreateKernel = !gpuKernel || gpuKernelDims !== dims;
116
- if (shouldRecreateKernel) {
117
- const kernel = gpuRuntime.createKernel(function(a, b) {
118
- let dot = 0;
119
- for (let i = 0; i < this.constants.dims; i++) {
120
- const av = a[this.thread.y][i];
121
- const bv = b[this.thread.x][i];
122
- dot += av * bv;
123
- }
124
- return dot;
125
- }).setOutput([1, 1]).setConstants({ dims });
126
- if (typeof kernel.setPrecision === "function") {
127
- kernel.setPrecision("single");
128
- }
129
- if (typeof kernel.setTactic === "function") {
130
- kernel.setTactic("speed");
131
- }
132
- if (typeof kernel.setDynamicArguments === "function") {
133
- kernel.setDynamicArguments(true);
134
- }
135
- if (typeof kernel.setDynamicOutput === "function") {
136
- kernel.setDynamicOutput(true);
137
- }
138
- gpuKernel = kernel;
139
- gpuKernelDims = dims;
140
- }
141
- if (!gpuKernel) {
142
- throw new Error("GPU kernel initialization failed");
143
- }
144
- let targetCells = GPU_TARGET_CELLS;
145
- if (!Number.isFinite(targetCells) || targetCells <= 0) {
146
- targetCells = 2e6;
147
- }
148
- let rowsPerBatch = Math.floor(targetCells / normalizedB.length);
149
- if (rowsPerBatch < 1) {
150
- rowsPerBatch = 1;
151
- }
152
- const result = [];
153
- const batches = Math.ceil(normalizedA.length / rowsPerBatch);
154
- const kernelStartMs = performance.now();
155
- for (let start = 0; start < normalizedA.length; start += rowsPerBatch) {
156
- const batch = normalizedA.slice(start, start + rowsPerBatch);
157
- gpuKernel.setOutput([normalizedB.length, batch.length]);
158
- const raw = gpuKernel(batch, normalizedB);
159
- result.push(...matrixFromGpuResult(raw, batch.length, normalizedB.length));
160
- }
161
- const kernelMs = Math.round(performance.now() - kernelStartMs);
162
- log(
163
- "SIM_GPU details dims=%d aRows=%d bRows=%d rowsPerBatch=%d batches=%d targetCells=%d normalizeMs=%d kernelMs=%d",
164
- dims,
165
- A.length,
166
- B.length,
167
- rowsPerBatch,
168
- batches,
169
- targetCells,
170
- normalizeMs,
171
- kernelMs
172
- );
173
- return result;
174
- }
175
- async function computeWithWorkers(A, B) {
7
+ var MIN_PARALLEL_ROWS = 50;
8
+ async function parallelCosineSimilarity(A, B) {
9
+ if (A.length === 0 || B.length === 0) return [];
10
+ if (A.length < MIN_PARALLEL_ROWS) return cosineSimilarity(A, B);
176
11
  const dims = A[0].length;
177
- const cpuCount = Math.max(1, os.cpus().length);
178
- const chunkSize = Math.max(1, Math.ceil(A.length / cpuCount));
12
+ const chunkSize = Math.ceil(A.length / os.cpus().length);
179
13
  const sharedB = new SharedArrayBuffer(B.length * dims * 8);
180
14
  const bView = new Float64Array(sharedB);
181
15
  B.forEach((row, i) => bView.set(row, i * dims));
182
16
  const runningTsSource = new URL(import.meta.url).pathname.endsWith(".ts");
183
- let workerFile = "./cosineSimilarityWorker.js";
184
- if (runningTsSource) {
185
- workerFile = "./cosineSimilarityWorker.ts";
186
- }
17
+ const workerFile = runningTsSource ? "./cosineSimilarityWorker.ts" : "./cosineSimilarityWorker.js";
187
18
  const workerUrl = new URL(workerFile, import.meta.url);
188
- let execArgv = [];
189
- if (runningTsSource) {
190
- execArgv = ["--import", "tsx/esm"];
191
- }
19
+ const execArgv = runningTsSource ? ["--import", "tsx/esm"] : [];
192
20
  const chunks = Array.from(
193
21
  { length: Math.ceil(A.length / chunkSize) },
194
22
  (_, i) => A.slice(i * chunkSize, (i + 1) * chunkSize)
@@ -196,50 +24,6 @@ async function computeWithWorkers(A, B) {
196
24
  const results = await Promise.all(chunks.map((chunk) => runWorker(chunk, sharedB, B.length, dims, workerUrl, execArgv)));
197
25
  return results.flat();
198
26
  }
199
- function computeSequential(A, B) {
200
- return cosineSimilarity(A, B);
201
- }
202
- async function runWithTiming(name, fn) {
203
- const startMs = performance.now();
204
- log("SIM_TRY backend=%s", name);
205
- try {
206
- const result = await Promise.resolve(fn());
207
- const durationMs = Math.round(performance.now() - startMs);
208
- log("SIM_DONE backend=%s durationMs=%d", name, durationMs);
209
- return result;
210
- } catch (err) {
211
- const durationMs = Math.round(performance.now() - startMs);
212
- log("SIM_FAIL backend=%s durationMs=%d reason=%s", name, durationMs, err?.message ?? "unknown");
213
- throw err;
214
- }
215
- }
216
- async function parallelCosineSimilarity(A, B) {
217
- if (A.length === 0 || B.length === 0) return [];
218
- const preference = backendPreference();
219
- const dims = A[0]?.length ?? 0;
220
- log("SIM_START rows=%d cols=%d dims=%d preference=%s", A.length, B.length, dims, preference);
221
- if (preference === "sync") {
222
- return runWithTiming("sync", () => computeSequential(A, B));
223
- }
224
- if (preference === "worker") {
225
- try {
226
- return await runWithTiming("worker", () => computeWithWorkers(A, B));
227
- } catch (_workerErr) {
228
- return runWithTiming("sync", () => computeSequential(A, B));
229
- }
230
- }
231
- try {
232
- return await runWithTiming("gpu", () => runGpuCosineSimilarity(A, B));
233
- } catch (_gpuErr) {
234
- log("SIM_CHAIN continue_after=gpu-fail next=worker");
235
- }
236
- try {
237
- return await runWithTiming("worker", () => computeWithWorkers(A, B));
238
- } catch (_workerErr) {
239
- log("SIM_CHAIN continue_after=worker-fail next=sync");
240
- return runWithTiming("sync", () => computeSequential(A, B));
241
- }
242
- }
243
27
  function runWorker(chunk, sharedB, bCount, dims, workerUrl, execArgv) {
244
28
  return new Promise((resolve, reject) => {
245
29
  const rowsFlat = new Float64Array(chunk.length * dims);
@@ -1 +1 @@
1
- {"version":3,"sources":["../../src/services/ParallelSimilarity.ts"],"sourcesContent":["import os from \"node:os\";\nimport { Worker } from \"node:worker_threads\";\nimport debug from \"debug\";\nimport { cosineSimilarity } from \"@langchain/core/utils/math\";\n\nconst log = debug(\"DryScan:ParallelSimilarity\");\n\n/**\n * Target number of output cells per GPU launch.\n * Large enough to keep GPU occupancy healthy, bounded enough to avoid oversized output buffers.\n */\nconst GPU_TARGET_CELLS = 2_000_000;\nconst ZERO_NORM_EPSILON = 1e-12;\n\ntype BackendPreference = \"auto\" | \"gpu\" | \"worker\" | \"sync\";\ntype GPUCtor = new (settings?: { mode?: string }) => {\n createKernel: (fn: (...args: any[]) => number) => GpuKernel;\n};\ntype GPUModule = {\n GPU: GPUCtor & { isGPUSupported?: boolean };\n};\n\ntype GpuInputMatrix = ArrayLike<ArrayLike<number>>;\ntype GpuKernel = {\n (a: GpuInputMatrix, b: GpuInputMatrix): number[][] | Float32Array[] | number[];\n setOutput: (output: [number, number]) => GpuKernel;\n setConstants: (constants: { dims: number }) => GpuKernel;\n setDynamicOutput?: (enabled: boolean) => GpuKernel;\n};\n\nlet gpuKernel:\n | GpuKernel\n | null = null;\nlet gpuRuntime: InstanceType<GPUCtor> | null = null;\nlet gpuKernelDims: number | null = null;\nlet gpuCtor: (GPUCtor & { isGPUSupported?: boolean }) | null = null;\nlet gpuCapabilitiesLogged = false;\n\nconst normalizedMatrixCache = new WeakMap<number[][], { dims: number; rows: Float32Array[] }>();\n\ntype BackendName = \"gpu\" | \"worker\" | \"sync\";\n\nasync function getGpuCtor(): Promise<(GPUCtor & { isGPUSupported?: boolean }) | null> {\n if (gpuCtor) return gpuCtor;\n\n const moduleName = \"gpu.js\";\n try {\n const mod = (await import(moduleName)) as unknown as GPUModule;\n gpuCtor = mod.GPU;\n return gpuCtor;\n } catch (_err) {\n return null;\n }\n}\n\nfunction logGpuCapabilities(GPU: GPUCtor & { isGPUSupported?: boolean }): void {\n if (gpuCapabilitiesLogged) return;\n\n const flags = GPU as unknown as {\n isGPUSupported?: boolean;\n isHeadlessGLSupported?: boolean;\n isWebGLSupported?: boolean;\n isWebGL2Supported?: boolean;\n isSinglePrecisionSupported?: boolean;\n };\n\n log(\n \"SIM_GPU capabilities supported=%s headlessgl=%s webgl=%s webgl2=%s singlePrecision=%s\",\n String(flags.isGPUSupported),\n String(flags.isHeadlessGLSupported),\n String(flags.isWebGLSupported),\n String(flags.isWebGL2Supported),\n String(flags.isSinglePrecisionSupported),\n );\n\n gpuCapabilitiesLogged = true;\n}\n\nfunction backendPreference(): BackendPreference {\n const value = (process.env.DRYSCAN_SIM_BACKEND ?? \"auto\").toLowerCase();\n if (value === \"gpu\") return \"gpu\";\n if (value === \"worker\") return \"worker\";\n if (value === \"sync\") return \"sync\";\n return \"auto\";\n}\n\nfunction matrixFromGpuResult(result: unknown, rows: number, cols: number): number[][] {\n if (!Array.isArray(result)) {\n throw new Error(\"GPU kernel returned a non-array result\");\n }\n\n return Array.from({ length: rows }, (_, rowIdx) => {\n const row = result[rowIdx] as unknown;\n if (!row || typeof row !== \"object\") {\n throw new Error(\"GPU kernel returned malformed rows\");\n }\n const typed = row as ArrayLike<number>;\n return Array.from({ length: cols }, (_, colIdx) => Number(typed[colIdx] ?? 0));\n });\n}\n\nfunction normalizeRows(rows: number[][], dims: number): Float32Array[] {\n return rows.map((row) => {\n const normalized = new Float32Array(dims);\n let normSq = 0;\n for (let i = 0; i < dims; i++) {\n const value = Number(row[i] ?? 0);\n normalized[i] = value;\n normSq += value * value;\n }\n\n const norm = Math.sqrt(normSq);\n if (norm > ZERO_NORM_EPSILON) {\n for (let i = 0; i < dims; i++) {\n normalized[i] /= norm;\n }\n } else {\n normalized.fill(0);\n }\n\n return normalized;\n });\n}\n\nfunction getNormalizedRows(rows: number[][], dims: number): Float32Array[] {\n const cached = normalizedMatrixCache.get(rows);\n if (cached && cached.dims === dims) {\n return cached.rows;\n }\n\n const normalized = normalizeRows(rows, dims);\n normalizedMatrixCache.set(rows, { dims, rows: normalized });\n return normalized;\n}\n\nasync function runGpuCosineSimilarity(A: number[][], B: number[][]): Promise<number[][]> {\n const GPU = await getGpuCtor();\n if (!GPU) {\n throw new Error(\"gpu.js module not available\");\n }\n\n logGpuCapabilities(GPU);\n\n if (GPU.isGPUSupported === false) {\n throw new Error(\"GPU.js reported GPU support unavailable\");\n }\n\n const dims = A[0]?.length ?? 0;\n if (!dims || !B[0]?.length || B[0].length !== dims) {\n throw new Error(\"Matrix dimensions are invalid for GPU similarity\");\n }\n\n const normalizeStartMs = performance.now();\n const normalizedA = getNormalizedRows(A, dims);\n const normalizedB = getNormalizedRows(B, dims);\n const normalizeMs = Math.round(performance.now() - normalizeStartMs);\n\n if (!gpuRuntime) {\n gpuRuntime = new GPU({ mode: \"gpu\" });\n const runtimeMode = String((gpuRuntime as unknown as { mode?: string }).mode ?? \"unknown\");\n log(\"SIM_GPU runtime mode=%s\", runtimeMode);\n if (runtimeMode === \"cpu\") {\n throw new Error(\"GPU runtime initialized in CPU mode\");\n }\n }\n\n const shouldRecreateKernel =\n !gpuKernel\n || gpuKernelDims !== dims;\n\n if (shouldRecreateKernel) {\n const kernel = gpuRuntime\n .createKernel(function (this: any, a: number[][], b: number[][]) {\n let dot = 0;\n for (let i = 0; i < this.constants.dims; i++) {\n const av = a[this.thread.y][i];\n const bv = b[this.thread.x][i];\n dot += av * bv;\n }\n return dot;\n })\n .setOutput([1, 1])\n .setConstants({ dims });\n\n if (typeof (kernel as any).setPrecision === \"function\") {\n (kernel as any).setPrecision(\"single\");\n }\n if (typeof (kernel as any).setTactic === \"function\") {\n (kernel as any).setTactic(\"speed\");\n }\n if (typeof (kernel as any).setDynamicArguments === \"function\") {\n (kernel as any).setDynamicArguments(true);\n }\n\n if (typeof kernel.setDynamicOutput === \"function\") {\n kernel.setDynamicOutput(true);\n }\n\n gpuKernel = kernel;\n\n gpuKernelDims = dims;\n }\n\n if (!gpuKernel) {\n throw new Error(\"GPU kernel initialization failed\");\n }\n\n let targetCells = GPU_TARGET_CELLS;\n if (!Number.isFinite(targetCells) || targetCells <= 0) {\n targetCells = 2_000_000;\n }\n\n let rowsPerBatch = Math.floor(targetCells / normalizedB.length);\n if (rowsPerBatch < 1) {\n rowsPerBatch = 1;\n }\n\n const result: number[][] = [];\n const batches = Math.ceil(normalizedA.length / rowsPerBatch);\n const kernelStartMs = performance.now();\n\n for (let start = 0; start < normalizedA.length; start += rowsPerBatch) {\n const batch = normalizedA.slice(start, start + rowsPerBatch);\n gpuKernel.setOutput([normalizedB.length, batch.length]);\n const raw = gpuKernel(batch, normalizedB);\n result.push(...matrixFromGpuResult(raw, batch.length, normalizedB.length));\n }\n const kernelMs = Math.round(performance.now() - kernelStartMs);\n\n log(\n \"SIM_GPU details dims=%d aRows=%d bRows=%d rowsPerBatch=%d batches=%d targetCells=%d normalizeMs=%d kernelMs=%d\",\n dims,\n A.length,\n B.length,\n rowsPerBatch,\n batches,\n targetCells,\n normalizeMs,\n kernelMs,\n );\n\n return result;\n}\n\nasync function computeWithWorkers(A: number[][], B: number[][]): Promise<number[][]> {\n const dims = A[0].length;\n const cpuCount = Math.max(1, os.cpus().length);\n const chunkSize = Math.max(1, Math.ceil(A.length / cpuCount));\n\n const sharedB = new SharedArrayBuffer(B.length * dims * 8);\n const bView = new Float64Array(sharedB);\n B.forEach((row, i) => bView.set(row, i * dims));\n\n const runningTsSource = new URL(import.meta.url).pathname.endsWith(\".ts\");\n let workerFile = \"./cosineSimilarityWorker.js\";\n if (runningTsSource) {\n workerFile = \"./cosineSimilarityWorker.ts\";\n }\n const workerUrl = new URL(workerFile, import.meta.url);\n let execArgv: string[] = [];\n if (runningTsSource) {\n execArgv = [\"--import\", \"tsx/esm\"];\n }\n\n const chunks = Array.from(\n { length: Math.ceil(A.length / chunkSize) },\n (_, i) => A.slice(i * chunkSize, (i + 1) * chunkSize),\n );\n\n const results = await Promise.all(chunks.map(chunk => runWorker(chunk, sharedB, B.length, dims, workerUrl, execArgv)));\n return results.flat();\n}\n\nfunction computeSequential(A: number[][], B: number[][]): number[][] {\n return cosineSimilarity(A, B);\n}\n\nasync function runWithTiming(name: BackendName, fn: () => Promise<number[][]> | number[][]): Promise<number[][]> {\n const startMs = performance.now();\n log(\"SIM_TRY backend=%s\", name);\n\n try {\n const result = await Promise.resolve(fn());\n const durationMs = Math.round(performance.now() - startMs);\n log(\"SIM_DONE backend=%s durationMs=%d\", name, durationMs);\n return result;\n } catch (err: any) {\n const durationMs = Math.round(performance.now() - startMs);\n log(\"SIM_FAIL backend=%s durationMs=%d reason=%s\", name, durationMs, err?.message ?? \"unknown\");\n throw err;\n }\n}\n\n/**\n * Computes cosine similarity using a deterministic backend chain.\n * Default chain: GPU -> worker threads -> synchronous fallback.\n */\nexport async function parallelCosineSimilarity(A: number[][], B: number[][]): Promise<number[][]> {\n if (A.length === 0 || B.length === 0) return [];\n\n const preference = backendPreference();\n const dims = A[0]?.length ?? 0;\n log(\"SIM_START rows=%d cols=%d dims=%d preference=%s\", A.length, B.length, dims, preference);\n\n if (preference === \"sync\") {\n return runWithTiming(\"sync\", () => computeSequential(A, B));\n }\n\n if (preference === \"worker\") {\n try {\n return await runWithTiming(\"worker\", () => computeWithWorkers(A, B));\n } catch (_workerErr) {\n return runWithTiming(\"sync\", () => computeSequential(A, B));\n }\n }\n\n try {\n return await runWithTiming(\"gpu\", () => runGpuCosineSimilarity(A, B));\n } catch (_gpuErr) {\n log(\"SIM_CHAIN continue_after=gpu-fail next=worker\");\n }\n\n try {\n return await runWithTiming(\"worker\", () => computeWithWorkers(A, B));\n } catch (_workerErr) {\n log(\"SIM_CHAIN continue_after=worker-fail next=sync\");\n return runWithTiming(\"sync\", () => computeSequential(A, B));\n }\n}\n\nfunction runWorker(\n chunk: number[][],\n sharedB: SharedArrayBuffer,\n bCount: number,\n dims: number,\n workerUrl: URL,\n execArgv: string[],\n): Promise<number[][]> {\n return new Promise((resolve, reject) => {\n const rowsFlat = new Float64Array(chunk.length * dims);\n chunk.forEach((row, i) => rowsFlat.set(row, i * dims));\n\n const worker = new Worker(workerUrl, {\n workerData: { rowsBuffer: rowsFlat.buffer, rowCount: chunk.length, allBuffer: sharedB, allCount: bCount, dims },\n transferList: [rowsFlat.buffer],\n execArgv,\n });\n\n worker.once(\"message\", ({ result }) => resolve(result));\n worker.once(\"error\", reject);\n });\n}\n"],"mappings":";;;AAAA,OAAO,QAAQ;AACf,SAAS,cAAc;AACvB,OAAO,WAAW;AAClB,SAAS,wBAAwB;AAEjC,IAAM,MAAM,MAAM,4BAA4B;AAM9C,IAAM,mBAAmB;AACzB,IAAM,oBAAoB;AAkB1B,IAAI,YAEO;AACX,IAAI,aAA2C;AAC/C,IAAI,gBAA+B;AACnC,IAAI,UAA2D;AAC/D,IAAI,wBAAwB;AAE5B,IAAM,wBAAwB,oBAAI,QAA4D;AAI9F,eAAe,aAAuE;AACpF,MAAI,QAAS,QAAO;AAEpB,QAAM,aAAa;AACnB,MAAI;AACF,UAAM,MAAO,MAAM,OAAO;AAC1B,cAAU,IAAI;AACd,WAAO;AAAA,EACT,SAAS,MAAM;AACb,WAAO;AAAA,EACT;AACF;AAEA,SAAS,mBAAmB,KAAmD;AAC7E,MAAI,sBAAuB;AAE3B,QAAM,QAAQ;AAQd;AAAA,IACE;AAAA,IACA,OAAO,MAAM,cAAc;AAAA,IAC3B,OAAO,MAAM,qBAAqB;AAAA,IAClC,OAAO,MAAM,gBAAgB;AAAA,IAC7B,OAAO,MAAM,iBAAiB;AAAA,IAC9B,OAAO,MAAM,0BAA0B;AAAA,EACzC;AAEA,0BAAwB;AAC1B;AAEA,SAAS,oBAAuC;AAC9C,QAAM,SAAS,QAAQ,IAAI,uBAAuB,QAAQ,YAAY;AACtE,MAAI,UAAU,MAAO,QAAO;AAC5B,MAAI,UAAU,SAAU,QAAO;AAC/B,MAAI,UAAU,OAAQ,QAAO;AAC7B,SAAO;AACT;AAEA,SAAS,oBAAoB,QAAiB,MAAc,MAA0B;AACpF,MAAI,CAAC,MAAM,QAAQ,MAAM,GAAG;AAC1B,UAAM,IAAI,MAAM,wCAAwC;AAAA,EAC1D;AAEA,SAAO,MAAM,KAAK,EAAE,QAAQ,KAAK,GAAG,CAAC,GAAG,WAAW;AACjD,UAAM,MAAM,OAAO,MAAM;AACzB,QAAI,CAAC,OAAO,OAAO,QAAQ,UAAU;AACnC,YAAM,IAAI,MAAM,oCAAoC;AAAA,IACtD;AACA,UAAM,QAAQ;AACd,WAAO,MAAM,KAAK,EAAE,QAAQ,KAAK,GAAG,CAACA,IAAG,WAAW,OAAO,MAAM,MAAM,KAAK,CAAC,CAAC;AAAA,EAC/E,CAAC;AACH;AAEA,SAAS,cAAc,MAAkB,MAA8B;AACrE,SAAO,KAAK,IAAI,CAAC,QAAQ;AACvB,UAAM,aAAa,IAAI,aAAa,IAAI;AACxC,QAAI,SAAS;AACb,aAAS,IAAI,GAAG,IAAI,MAAM,KAAK;AAC7B,YAAM,QAAQ,OAAO,IAAI,CAAC,KAAK,CAAC;AAChC,iBAAW,CAAC,IAAI;AAChB,gBAAU,QAAQ;AAAA,IACpB;AAEA,UAAM,OAAO,KAAK,KAAK,MAAM;AAC7B,QAAI,OAAO,mBAAmB;AAC5B,eAAS,IAAI,GAAG,IAAI,MAAM,KAAK;AAC7B,mBAAW,CAAC,KAAK;AAAA,MACnB;AAAA,IACF,OAAO;AACL,iBAAW,KAAK,CAAC;AAAA,IACnB;AAEA,WAAO;AAAA,EACT,CAAC;AACH;AAEA,SAAS,kBAAkB,MAAkB,MAA8B;AACzE,QAAM,SAAS,sBAAsB,IAAI,IAAI;AAC7C,MAAI,UAAU,OAAO,SAAS,MAAM;AAClC,WAAO,OAAO;AAAA,EAChB;AAEA,QAAM,aAAa,cAAc,MAAM,IAAI;AAC3C,wBAAsB,IAAI,MAAM,EAAE,MAAM,MAAM,WAAW,CAAC;AAC1D,SAAO;AACT;AAEA,eAAe,uBAAuB,GAAe,GAAoC;AACvF,QAAM,MAAM,MAAM,WAAW;AAC7B,MAAI,CAAC,KAAK;AACR,UAAM,IAAI,MAAM,6BAA6B;AAAA,EAC/C;AAEA,qBAAmB,GAAG;AAEtB,MAAI,IAAI,mBAAmB,OAAO;AAChC,UAAM,IAAI,MAAM,yCAAyC;AAAA,EAC3D;AAEA,QAAM,OAAO,EAAE,CAAC,GAAG,UAAU;AAC7B,MAAI,CAAC,QAAQ,CAAC,EAAE,CAAC,GAAG,UAAU,EAAE,CAAC,EAAE,WAAW,MAAM;AAClD,UAAM,IAAI,MAAM,kDAAkD;AAAA,EACpE;AAEA,QAAM,mBAAmB,YAAY,IAAI;AACzC,QAAM,cAAc,kBAAkB,GAAG,IAAI;AAC7C,QAAM,cAAc,kBAAkB,GAAG,IAAI;AAC7C,QAAM,cAAc,KAAK,MAAM,YAAY,IAAI,IAAI,gBAAgB;AAEnE,MAAI,CAAC,YAAY;AACf,iBAAa,IAAI,IAAI,EAAE,MAAM,MAAM,CAAC;AACpC,UAAM,cAAc,OAAQ,WAA4C,QAAQ,SAAS;AACzF,QAAI,2BAA2B,WAAW;AAC1C,QAAI,gBAAgB,OAAO;AACzB,YAAM,IAAI,MAAM,qCAAqC;AAAA,IACvD;AAAA,EACF;AAEA,QAAM,uBACJ,CAAC,aACE,kBAAkB;AAEvB,MAAI,sBAAsB;AACxB,UAAM,SAAS,WACZ,aAAa,SAAqB,GAAe,GAAe;AAC/D,UAAI,MAAM;AACV,eAAS,IAAI,GAAG,IAAI,KAAK,UAAU,MAAM,KAAK;AAC5C,cAAM,KAAK,EAAE,KAAK,OAAO,CAAC,EAAE,CAAC;AAC7B,cAAM,KAAK,EAAE,KAAK,OAAO,CAAC,EAAE,CAAC;AAC7B,eAAO,KAAK;AAAA,MACd;AACA,aAAO;AAAA,IACT,CAAC,EACA,UAAU,CAAC,GAAG,CAAC,CAAC,EAChB,aAAa,EAAE,KAAK,CAAC;AAExB,QAAI,OAAQ,OAAe,iBAAiB,YAAY;AACtD,MAAC,OAAe,aAAa,QAAQ;AAAA,IACvC;AACA,QAAI,OAAQ,OAAe,cAAc,YAAY;AACnD,MAAC,OAAe,UAAU,OAAO;AAAA,IACnC;AACA,QAAI,OAAQ,OAAe,wBAAwB,YAAY;AAC7D,MAAC,OAAe,oBAAoB,IAAI;AAAA,IAC1C;AAEA,QAAI,OAAO,OAAO,qBAAqB,YAAY;AACjD,aAAO,iBAAiB,IAAI;AAAA,IAC9B;AAEA,gBAAY;AAEZ,oBAAgB;AAAA,EAClB;AAEA,MAAI,CAAC,WAAW;AACd,UAAM,IAAI,MAAM,kCAAkC;AAAA,EACpD;AAEA,MAAI,cAAc;AAClB,MAAI,CAAC,OAAO,SAAS,WAAW,KAAK,eAAe,GAAG;AACrD,kBAAc;AAAA,EAChB;AAEA,MAAI,eAAe,KAAK,MAAM,cAAc,YAAY,MAAM;AAC9D,MAAI,eAAe,GAAG;AACpB,mBAAe;AAAA,EACjB;AAEA,QAAM,SAAqB,CAAC;AAC5B,QAAM,UAAU,KAAK,KAAK,YAAY,SAAS,YAAY;AAC3D,QAAM,gBAAgB,YAAY,IAAI;AAEtC,WAAS,QAAQ,GAAG,QAAQ,YAAY,QAAQ,SAAS,cAAc;AACrE,UAAM,QAAQ,YAAY,MAAM,OAAO,QAAQ,YAAY;AAC3D,cAAU,UAAU,CAAC,YAAY,QAAQ,MAAM,MAAM,CAAC;AACtD,UAAM,MAAM,UAAU,OAAO,WAAW;AACxC,WAAO,KAAK,GAAG,oBAAoB,KAAK,MAAM,QAAQ,YAAY,MAAM,CAAC;AAAA,EAC3E;AACA,QAAM,WAAW,KAAK,MAAM,YAAY,IAAI,IAAI,aAAa;AAE7D;AAAA,IACE;AAAA,IACA;AAAA,IACA,EAAE;AAAA,IACF,EAAE;AAAA,IACF;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF;AAEA,SAAO;AACT;AAEA,eAAe,mBAAmB,GAAe,GAAoC;AACnF,QAAM,OAAO,EAAE,CAAC,EAAE;AAClB,QAAM,WAAW,KAAK,IAAI,GAAG,GAAG,KAAK,EAAE,MAAM;AAC7C,QAAM,YAAY,KAAK,IAAI,GAAG,KAAK,KAAK,EAAE,SAAS,QAAQ,CAAC;AAE5D,QAAM,UAAU,IAAI,kBAAkB,EAAE,SAAS,OAAO,CAAC;AACzD,QAAM,QAAQ,IAAI,aAAa,OAAO;AACtC,IAAE,QAAQ,CAAC,KAAK,MAAM,MAAM,IAAI,KAAK,IAAI,IAAI,CAAC;AAE9C,QAAM,kBAAkB,IAAI,IAAI,YAAY,GAAG,EAAE,SAAS,SAAS,KAAK;AACxE,MAAI,aAAa;AACjB,MAAI,iBAAiB;AACnB,iBAAa;AAAA,EACf;AACA,QAAM,YAAY,IAAI,IAAI,YAAY,YAAY,GAAG;AACrD,MAAI,WAAqB,CAAC;AAC1B,MAAI,iBAAiB;AACnB,eAAW,CAAC,YAAY,SAAS;AAAA,EACnC;AAEA,QAAM,SAAS,MAAM;AAAA,IACnB,EAAE,QAAQ,KAAK,KAAK,EAAE,SAAS,SAAS,EAAE;AAAA,IAC1C,CAAC,GAAG,MAAM,EAAE,MAAM,IAAI,YAAY,IAAI,KAAK,SAAS;AAAA,EACtD;AAEA,QAAM,UAAU,MAAM,QAAQ,IAAI,OAAO,IAAI,WAAS,UAAU,OAAO,SAAS,EAAE,QAAQ,MAAM,WAAW,QAAQ,CAAC,CAAC;AACrH,SAAO,QAAQ,KAAK;AACtB;AAEA,SAAS,kBAAkB,GAAe,GAA2B;AACnE,SAAO,iBAAiB,GAAG,CAAC;AAC9B;AAEA,eAAe,cAAc,MAAmB,IAAiE;AAC/G,QAAM,UAAU,YAAY,IAAI;AAChC,MAAI,sBAAsB,IAAI;AAE9B,MAAI;AACF,UAAM,SAAS,MAAM,QAAQ,QAAQ,GAAG,CAAC;AACzC,UAAM,aAAa,KAAK,MAAM,YAAY,IAAI,IAAI,OAAO;AACzD,QAAI,qCAAqC,MAAM,UAAU;AACzD,WAAO;AAAA,EACT,SAAS,KAAU;AACjB,UAAM,aAAa,KAAK,MAAM,YAAY,IAAI,IAAI,OAAO;AACzD,QAAI,+CAA+C,MAAM,YAAY,KAAK,WAAW,SAAS;AAC9F,UAAM;AAAA,EACR;AACF;AAMA,eAAsB,yBAAyB,GAAe,GAAoC;AAChG,MAAI,EAAE,WAAW,KAAK,EAAE,WAAW,EAAG,QAAO,CAAC;AAE9C,QAAM,aAAa,kBAAkB;AACrC,QAAM,OAAO,EAAE,CAAC,GAAG,UAAU;AAC7B,MAAI,mDAAmD,EAAE,QAAQ,EAAE,QAAQ,MAAM,UAAU;AAE3F,MAAI,eAAe,QAAQ;AACzB,WAAO,cAAc,QAAQ,MAAM,kBAAkB,GAAG,CAAC,CAAC;AAAA,EAC5D;AAEA,MAAI,eAAe,UAAU;AAC3B,QAAI;AACF,aAAO,MAAM,cAAc,UAAU,MAAM,mBAAmB,GAAG,CAAC,CAAC;AAAA,IACrE,SAAS,YAAY;AACnB,aAAO,cAAc,QAAQ,MAAM,kBAAkB,GAAG,CAAC,CAAC;AAAA,IAC5D;AAAA,EACF;AAEA,MAAI;AACF,WAAO,MAAM,cAAc,OAAO,MAAM,uBAAuB,GAAG,CAAC,CAAC;AAAA,EACtE,SAAS,SAAS;AAChB,QAAI,+CAA+C;AAAA,EACrD;AAEA,MAAI;AACF,WAAO,MAAM,cAAc,UAAU,MAAM,mBAAmB,GAAG,CAAC,CAAC;AAAA,EACrE,SAAS,YAAY;AACnB,QAAI,gDAAgD;AACpD,WAAO,cAAc,QAAQ,MAAM,kBAAkB,GAAG,CAAC,CAAC;AAAA,EAC5D;AACF;AAEA,SAAS,UACP,OACA,SACA,QACA,MACA,WACA,UACqB;AACrB,SAAO,IAAI,QAAQ,CAAC,SAAS,WAAW;AACtC,UAAM,WAAW,IAAI,aAAa,MAAM,SAAS,IAAI;AACrD,UAAM,QAAQ,CAAC,KAAK,MAAM,SAAS,IAAI,KAAK,IAAI,IAAI,CAAC;AAErD,UAAM,SAAS,IAAI,OAAO,WAAW;AAAA,MACnC,YAAY,EAAE,YAAY,SAAS,QAAQ,UAAU,MAAM,QAAQ,WAAW,SAAS,UAAU,QAAQ,KAAK;AAAA,MAC9G,cAAc,CAAC,SAAS,MAAM;AAAA,MAC9B;AAAA,IACF,CAAC;AAED,WAAO,KAAK,WAAW,CAAC,EAAE,OAAO,MAAM,QAAQ,MAAM,CAAC;AACtD,WAAO,KAAK,SAAS,MAAM;AAAA,EAC7B,CAAC;AACH;","names":["_"]}
1
+ {"version":3,"sources":["../../src/services/ParallelSimilarity.ts"],"sourcesContent":["import os from \"node:os\";\nimport { Worker } from \"node:worker_threads\";\nimport { cosineSimilarity } from \"@langchain/core/utils/math\";\n\n/** Minimum row count below which synchronous is faster than worker overhead. */\nconst MIN_PARALLEL_ROWS = 50;\n\n/**\n * Computes cosineSimilarity(A, B) using worker threads for large inputs,\n * falling back to the synchronous library call for small ones.\n * B is packed into a SharedArrayBuffer shared across all workers — no copies.\n */\nexport async function parallelCosineSimilarity(A: number[][], B: number[][]): Promise<number[][]> {\n if (A.length === 0 || B.length === 0) return [];\n if (A.length < MIN_PARALLEL_ROWS) return cosineSimilarity(A, B);\n\n const dims = A[0].length;\n const chunkSize = Math.ceil(A.length / os.cpus().length);\n\n const sharedB = new SharedArrayBuffer(B.length * dims * 8);\n const bView = new Float64Array(sharedB);\n B.forEach((row, i) => bView.set(row, i * dims));\n\n const runningTsSource = new URL(import.meta.url).pathname.endsWith('.ts');\n const workerFile = runningTsSource ? './cosineSimilarityWorker.ts' : './cosineSimilarityWorker.js';\n const workerUrl = new URL(workerFile, import.meta.url);\n const execArgv = runningTsSource ? ['--import', 'tsx/esm'] : [];\n\n const chunks = Array.from(\n { length: Math.ceil(A.length / chunkSize) },\n (_, i) => A.slice(i * chunkSize, (i + 1) * chunkSize),\n );\n\n const results = await Promise.all(chunks.map(chunk => runWorker(chunk, sharedB, B.length, dims, workerUrl, execArgv)));\n return results.flat();\n}\n\nfunction runWorker(\n chunk: number[][],\n sharedB: SharedArrayBuffer,\n bCount: number,\n dims: number,\n workerUrl: URL,\n execArgv: string[],\n): Promise<number[][]> {\n return new Promise((resolve, reject) => {\n const rowsFlat = new Float64Array(chunk.length * dims);\n chunk.forEach((row, i) => rowsFlat.set(row, i * dims));\n\n const worker = new Worker(workerUrl, {\n workerData: { rowsBuffer: rowsFlat.buffer, rowCount: chunk.length, allBuffer: sharedB, allCount: bCount, dims },\n transferList: [rowsFlat.buffer],\n execArgv,\n });\n\n worker.once(\"message\", ({ result }) => resolve(result));\n worker.once(\"error\", reject);\n });\n}\n"],"mappings":";;;AAAA,OAAO,QAAQ;AACf,SAAS,cAAc;AACvB,SAAS,wBAAwB;AAGjC,IAAM,oBAAoB;AAO1B,eAAsB,yBAAyB,GAAe,GAAoC;AAChG,MAAI,EAAE,WAAW,KAAK,EAAE,WAAW,EAAG,QAAO,CAAC;AAC9C,MAAI,EAAE,SAAS,kBAAmB,QAAO,iBAAiB,GAAG,CAAC;AAE9D,QAAM,OAAO,EAAE,CAAC,EAAE;AAClB,QAAM,YAAY,KAAK,KAAK,EAAE,SAAS,GAAG,KAAK,EAAE,MAAM;AAEvD,QAAM,UAAU,IAAI,kBAAkB,EAAE,SAAS,OAAO,CAAC;AACzD,QAAM,QAAQ,IAAI,aAAa,OAAO;AACtC,IAAE,QAAQ,CAAC,KAAK,MAAM,MAAM,IAAI,KAAK,IAAI,IAAI,CAAC;AAE9C,QAAM,kBAAkB,IAAI,IAAI,YAAY,GAAG,EAAE,SAAS,SAAS,KAAK;AACxE,QAAM,aAAa,kBAAkB,gCAAgC;AACrE,QAAM,YAAY,IAAI,IAAI,YAAY,YAAY,GAAG;AACrD,QAAM,WAAW,kBAAkB,CAAC,YAAY,SAAS,IAAI,CAAC;AAE9D,QAAM,SAAS,MAAM;AAAA,IACnB,EAAE,QAAQ,KAAK,KAAK,EAAE,SAAS,SAAS,EAAE;AAAA,IAC1C,CAAC,GAAG,MAAM,EAAE,MAAM,IAAI,YAAY,IAAI,KAAK,SAAS;AAAA,EACtD;AAEA,QAAM,UAAU,MAAM,QAAQ,IAAI,OAAO,IAAI,WAAS,UAAU,OAAO,SAAS,EAAE,QAAQ,MAAM,WAAW,QAAQ,CAAC,CAAC;AACrH,SAAO,QAAQ,KAAK;AACtB;AAEA,SAAS,UACP,OACA,SACA,QACA,MACA,WACA,UACqB;AACrB,SAAO,IAAI,QAAQ,CAAC,SAAS,WAAW;AACtC,UAAM,WAAW,IAAI,aAAa,MAAM,SAAS,IAAI;AACrD,UAAM,QAAQ,CAAC,KAAK,MAAM,SAAS,IAAI,KAAK,IAAI,IAAI,CAAC;AAErD,UAAM,SAAS,IAAI,OAAO,WAAW;AAAA,MACnC,YAAY,EAAE,YAAY,SAAS,QAAQ,UAAU,MAAM,QAAQ,WAAW,SAAS,UAAU,QAAQ,KAAK;AAAA,MAC9G,cAAc,CAAC,SAAS,MAAM;AAAA,MAC9B;AAAA,IACF,CAAC;AAED,WAAO,KAAK,WAAW,CAAC,EAAE,OAAO,MAAM,QAAQ,MAAM,CAAC;AACtD,WAAO,KAAK,SAAS,MAAM;AAAA,EAC7B,CAAC;AACH;","names":[]}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@goshenkata/dryscan-core",
3
- "version": "1.4.7",
3
+ "version": "1.4.8",
4
4
  "description": "Core library for DryScan - semantic code duplication analyzer",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",
@@ -54,7 +54,6 @@
54
54
  "@langchain/ollama": "^1.1.0",
55
55
  "debug": "^4.4.3",
56
56
  "glob-gitignore": "^1.0.15",
57
- "gpu.js": "^2.16.0",
58
57
  "ignore": "^7.0.5",
59
58
  "jsonschema": "^1.5.0",
60
59
  "minimatch": "^10.1.1",