@goshenkata/dryscan-core 1.2.5 → 1.2.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chunk-ZZC4V5LV.js +52 -0
- package/dist/chunk-ZZC4V5LV.js.map +1 -0
- package/dist/index.d.ts +1 -1
- package/dist/index.js +249 -194
- package/dist/index.js.map +1 -1
- package/dist/services/ParallelSimilarity.d.ts +8 -0
- package/dist/services/ParallelSimilarity.js +7 -0
- package/dist/services/ParallelSimilarity.js.map +1 -0
- package/dist/services/cosineSimilarityWorker.d.ts +2 -0
- package/dist/services/cosineSimilarityWorker.js +12 -0
- package/dist/services/cosineSimilarityWorker.js.map +1 -0
- package/package.json +1 -1
- package/src/DryScan.ts +5 -4
- package/src/config/dryconfig.ts +1 -1
- package/src/extractors/java.ts +22 -7
- package/src/services/DuplicateService.ts +135 -186
- package/src/services/DuplicationCache.ts +107 -1
- package/src/services/ParallelSimilarity.ts +59 -0
- package/src/services/UpdateService.ts +5 -2
- package/src/services/cosineSimilarityWorker.ts +20 -0
- package/tsup.config.ts +1 -1
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
var __defProp = Object.defineProperty;
|
|
2
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
3
|
+
var __decorateClass = (decorators, target, key, kind) => {
|
|
4
|
+
var result = kind > 1 ? void 0 : kind ? __getOwnPropDesc(target, key) : target;
|
|
5
|
+
for (var i = decorators.length - 1, decorator; i >= 0; i--)
|
|
6
|
+
if (decorator = decorators[i])
|
|
7
|
+
result = (kind ? decorator(target, key, result) : decorator(result)) || result;
|
|
8
|
+
if (kind && result) __defProp(target, key, result);
|
|
9
|
+
return result;
|
|
10
|
+
};
|
|
11
|
+
|
|
12
|
+
// src/services/ParallelSimilarity.ts
|
|
13
|
+
import os from "os";
|
|
14
|
+
import { Worker } from "worker_threads";
|
|
15
|
+
import { cosineSimilarity } from "@langchain/core/utils/math";
|
|
16
|
+
var MIN_PARALLEL_ROWS = 50;
|
|
17
|
+
async function parallelCosineSimilarity(A, B) {
|
|
18
|
+
if (A.length === 0 || B.length === 0) return [];
|
|
19
|
+
if (A.length < MIN_PARALLEL_ROWS) return cosineSimilarity(A, B);
|
|
20
|
+
const dims = A[0].length;
|
|
21
|
+
const chunkSize = Math.ceil(A.length / os.cpus().length);
|
|
22
|
+
const sharedB = new SharedArrayBuffer(B.length * dims * 8);
|
|
23
|
+
const bView = new Float64Array(sharedB);
|
|
24
|
+
B.forEach((row, i) => bView.set(row, i * dims));
|
|
25
|
+
const workerUrl = new URL("./services/cosineSimilarityWorker.js", import.meta.url);
|
|
26
|
+
const execArgv = workerUrl.pathname.endsWith(".ts") ? ["--import", "tsx/esm"] : [];
|
|
27
|
+
const chunks = Array.from(
|
|
28
|
+
{ length: Math.ceil(A.length / chunkSize) },
|
|
29
|
+
(_, i) => A.slice(i * chunkSize, (i + 1) * chunkSize)
|
|
30
|
+
);
|
|
31
|
+
const results = await Promise.all(chunks.map((chunk) => runWorker(chunk, sharedB, B.length, dims, workerUrl, execArgv)));
|
|
32
|
+
return results.flat();
|
|
33
|
+
}
|
|
34
|
+
function runWorker(chunk, sharedB, bCount, dims, workerUrl, execArgv) {
|
|
35
|
+
return new Promise((resolve, reject) => {
|
|
36
|
+
const rowsFlat = new Float64Array(chunk.length * dims);
|
|
37
|
+
chunk.forEach((row, i) => rowsFlat.set(row, i * dims));
|
|
38
|
+
const worker = new Worker(workerUrl, {
|
|
39
|
+
workerData: { rowsBuffer: rowsFlat.buffer, rowCount: chunk.length, allBuffer: sharedB, allCount: bCount, dims },
|
|
40
|
+
transferList: [rowsFlat.buffer],
|
|
41
|
+
execArgv
|
|
42
|
+
});
|
|
43
|
+
worker.once("message", ({ result }) => resolve(result));
|
|
44
|
+
worker.once("error", reject);
|
|
45
|
+
});
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
export {
|
|
49
|
+
__decorateClass,
|
|
50
|
+
parallelCosineSimilarity
|
|
51
|
+
};
|
|
52
|
+
//# sourceMappingURL=chunk-ZZC4V5LV.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/services/ParallelSimilarity.ts"],"sourcesContent":["import os from \"node:os\";\nimport { Worker } from \"node:worker_threads\";\nimport { cosineSimilarity } from \"@langchain/core/utils/math\";\n\n/** Minimum row count below which synchronous is faster than worker overhead. */\nconst MIN_PARALLEL_ROWS = 50;\n\n/**\n * Computes cosineSimilarity(A, B) using worker threads for large inputs,\n * falling back to the synchronous library call for small ones.\n * B is packed into a SharedArrayBuffer shared across all workers — no copies.\n */\nexport async function parallelCosineSimilarity(A: number[][], B: number[][]): Promise<number[][]> {\n if (A.length === 0 || B.length === 0) return [];\n if (A.length < MIN_PARALLEL_ROWS) return cosineSimilarity(A, B);\n\n const dims = A[0].length;\n const chunkSize = Math.ceil(A.length / os.cpus().length);\n\n const sharedB = new SharedArrayBuffer(B.length * dims * 8);\n const bView = new Float64Array(sharedB);\n B.forEach((row, i) => bView.set(row, i * dims));\n\n // import.meta.resolve respects the active module loader:\n // under tsx it remaps .js → .ts; in compiled output it stays .js.\n const workerUrl = new URL(\"./services/cosineSimilarityWorker.js\", import.meta.url);\n const execArgv = workerUrl.pathname.endsWith(\".ts\") ? [\"--import\", \"tsx/esm\"] : [];\n\n const chunks = Array.from(\n { length: Math.ceil(A.length / chunkSize) },\n (_, i) => A.slice(i * chunkSize, (i + 1) * chunkSize),\n );\n\n const results = await Promise.all(chunks.map(chunk => runWorker(chunk, sharedB, B.length, dims, workerUrl, execArgv)));\n return results.flat();\n}\n\nfunction runWorker(\n chunk: number[][],\n sharedB: SharedArrayBuffer,\n bCount: number,\n dims: number,\n workerUrl: URL,\n execArgv: string[],\n): Promise<number[][]> {\n return new Promise((resolve, reject) => {\n const rowsFlat = new Float64Array(chunk.length * dims);\n chunk.forEach((row, i) => rowsFlat.set(row, i * dims));\n\n const worker = new Worker(workerUrl, {\n workerData: { rowsBuffer: rowsFlat.buffer, rowCount: chunk.length, allBuffer: sharedB, allCount: bCount, dims },\n transferList: [rowsFlat.buffer],\n execArgv,\n });\n\n worker.once(\"message\", ({ result }) => resolve(result));\n worker.once(\"error\", reject);\n });\n}\n"],"mappings":";;;;;;;;;;;;AAAA,OAAO,QAAQ;AACf,SAAS,cAAc;AACvB,SAAS,wBAAwB;AAGjC,IAAM,oBAAoB;AAO1B,eAAsB,yBAAyB,GAAe,GAAoC;AAChG,MAAI,EAAE,WAAW,KAAK,EAAE,WAAW,EAAG,QAAO,CAAC;AAC9C,MAAI,EAAE,SAAS,kBAAmB,QAAO,iBAAiB,GAAG,CAAC;AAE9D,QAAM,OAAO,EAAE,CAAC,EAAE;AAClB,QAAM,YAAY,KAAK,KAAK,EAAE,SAAS,GAAG,KAAK,EAAE,MAAM;AAEvD,QAAM,UAAU,IAAI,kBAAkB,EAAE,SAAS,OAAO,CAAC;AACzD,QAAM,QAAQ,IAAI,aAAa,OAAO;AACtC,IAAE,QAAQ,CAAC,KAAK,MAAM,MAAM,IAAI,KAAK,IAAI,IAAI,CAAC;AAI9C,QAAM,YAAY,IAAI,IAAI,wCAAwC,YAAY,GAAG;AACjF,QAAM,WAAW,UAAU,SAAS,SAAS,KAAK,IAAI,CAAC,YAAY,SAAS,IAAI,CAAC;AAEjF,QAAM,SAAS,MAAM;AAAA,IACnB,EAAE,QAAQ,KAAK,KAAK,EAAE,SAAS,SAAS,EAAE;AAAA,IAC1C,CAAC,GAAG,MAAM,EAAE,MAAM,IAAI,YAAY,IAAI,KAAK,SAAS;AAAA,EACtD;AAEA,QAAM,UAAU,MAAM,QAAQ,IAAI,OAAO,IAAI,WAAS,UAAU,OAAO,SAAS,EAAE,QAAQ,MAAM,WAAW,QAAQ,CAAC,CAAC;AACrH,SAAO,QAAQ,KAAK;AACtB;AAEA,SAAS,UACP,OACA,SACA,QACA,MACA,WACA,UACqB;AACrB,SAAO,IAAI,QAAQ,CAAC,SAAS,WAAW;AACtC,UAAM,WAAW,IAAI,aAAa,MAAM,SAAS,IAAI;AACrD,UAAM,QAAQ,CAAC,KAAK,MAAM,SAAS,IAAI,KAAK,IAAI,IAAI,CAAC;AAErD,UAAM,SAAS,IAAI,OAAO,WAAW;AAAA,MACnC,YAAY,EAAE,YAAY,SAAS,QAAQ,UAAU,MAAM,QAAQ,WAAW,SAAS,UAAU,QAAQ,KAAK;AAAA,MAC9G,cAAc,CAAC,SAAS,MAAM;AAAA,MAC9B;AAAA,IACF,CAAC;AAED,WAAO,KAAK,WAAW,CAAC,EAAE,OAAO,MAAM,QAAQ,MAAM,CAAC;AACtD,WAAO,KAAK,SAAS,MAAM;AAAA,EAC7B,CAAC;AACH;","names":[]}
|
package/dist/index.d.ts
CHANGED
|
@@ -221,7 +221,7 @@ declare class DryScan {
|
|
|
221
221
|
* 6. Recompute embeddings for affected units
|
|
222
222
|
* 7. Update file tracking metadata
|
|
223
223
|
*/
|
|
224
|
-
updateIndex(): Promise<
|
|
224
|
+
updateIndex(): Promise<string[]>;
|
|
225
225
|
/**
|
|
226
226
|
* Runs duplicate detection and returns a normalized report payload ready for persistence or display.
|
|
227
227
|
*/
|