@mattgrill/nearline-web 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/finder.d.ts +37 -0
- package/dist/finder.d.ts.map +1 -0
- package/dist/index.cjs +2 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.ts +4 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +2 -0
- package/dist/index.js.map +1 -0
- package/dist/minhash-worker.cjs +2 -0
- package/dist/minhash-worker.cjs.map +1 -0
- package/dist/minhash-worker.js +2 -0
- package/dist/minhash-worker.js.map +1 -0
- package/dist/workers/minhash-worker.d.ts +2 -0
- package/dist/workers/minhash-worker.d.ts.map +1 -0
- package/dist/workers/pool.d.ts +19 -0
- package/dist/workers/pool.d.ts.map +1 -0
- package/package.json +45 -0
package/dist/finder.d.ts
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import { type DuplicateFinderOptions, type DuplicatePair, type FindDuplicatesResult } from "@mattgrill/nearline-core";
|
|
2
|
+
/** Browser-specific options extending the base options */
|
|
3
|
+
export interface ClientDuplicateFinderOptions extends DuplicateFinderOptions {
|
|
4
|
+
/**
|
|
5
|
+
* URL to the compiled Web Worker script.
|
|
6
|
+
* Required for parallel computation in the browser.
|
|
7
|
+
*
|
|
8
|
+
* Example with Vite:
|
|
9
|
+
* ```ts
|
|
10
|
+
* workerUrl: new URL('@mattgrill/duplicate-finder-client/worker', import.meta.url)
|
|
11
|
+
* ```
|
|
12
|
+
*
|
|
13
|
+
* If omitted, runs single-threaded.
|
|
14
|
+
*/
|
|
15
|
+
workerUrl?: URL | string;
|
|
16
|
+
}
|
|
17
|
+
/**
|
|
18
|
+
* Find near-duplicate strings in a dataset (browser-optimized).
|
|
19
|
+
* Uses Web Workers for parallel computation when workerUrl is provided.
|
|
20
|
+
*/
|
|
21
|
+
export declare function findDuplicates(strings: string[], opts?: ClientDuplicateFinderOptions): Promise<FindDuplicatesResult>;
|
|
22
|
+
/**
|
|
23
|
+
* Class API for incremental use and querying against a built index (browser-optimized).
|
|
24
|
+
*/
|
|
25
|
+
export declare class DuplicateFinder {
|
|
26
|
+
private options;
|
|
27
|
+
private workerPoolOptions;
|
|
28
|
+
private strings;
|
|
29
|
+
private signatures;
|
|
30
|
+
private hashA;
|
|
31
|
+
private hashB;
|
|
32
|
+
constructor(opts?: ClientDuplicateFinderOptions);
|
|
33
|
+
addStrings(strings: string[]): void;
|
|
34
|
+
buildIndex(): Promise<void>;
|
|
35
|
+
query(str: string): DuplicatePair[];
|
|
36
|
+
}
|
|
37
|
+
//# sourceMappingURL=finder.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"finder.d.ts","sourceRoot":"","sources":["../src/finder.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,KAAK,sBAAsB,EAC3B,KAAK,aAAa,EAClB,KAAK,oBAAoB,EAO1B,MAAM,0BAA0B,CAAC;AAGlC,0DAA0D;AAC1D,MAAM,WAAW,4BAA6B,SAAQ,sBAAsB;IAC1E;;;;;;;;;;OAUG;IACH,SAAS,CAAC,EAAE,GAAG,GAAG,MAAM,CAAC;CAC1B;AASD;;;GAGG;AACH,wBAAsB,cAAc,CAClC,OAAO,EAAE,MAAM,EAAE,EACjB,IAAI,CAAC,EAAE,4BAA4B,GAClC,OAAO,CAAC,oBAAoB,CAAC,CAM/B;AAED;;GAEG;AACH,qBAAa,eAAe;IAC1B,OAAO,CAAC,OAAO,CAAkB;IACjC,OAAO,CAAC,iBAAiB,CAAuB;IAChD,OAAO,CAAC,OAAO,CAAgB;IAC/B,OAAO,CAAC,UAAU,CAA4B;IAC9C,OAAO,CAAC,KAAK,CAAc;IAC3B,OAAO,CAAC,KAAK,CAAc;gBAEf,IAAI,CAAC,EAAE,4BAA4B;IAW/C,UAAU,CAAC,OAAO,EAAE,MAAM,EAAE,GAAG,IAAI;IAK7B,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;IAyBjC,KAAK,CAAC,GAAG,EAAE,MAAM,GAAG,aAAa,EAAE;CAapC"}
|
package/dist/index.cjs
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
1
|
+
(()=>{"use strict";var t={};t.d=(r,e)=>{for(var s in e)t.o(e,s)&&!t.o(r,s)&&Object.defineProperty(r,s,{enumerable:!0,get:e[s]})},t.o=(t,r)=>Object.prototype.hasOwnProperty.call(t,r),t.r=t=>{"u">typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(t,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(t,"__esModule",{value:!0})};var r={};t.r(r),t.d(r,{DuplicateFinder:()=>o,findDuplicates:()=>n});let e=require("@mattgrill/nearline-core");function s(t){if(!t.workerUrl||"u"<typeof Worker)return null;let r=t.workerUrl;return async(t,e,s,i,n,o,a)=>{let h=t.length,l=new Uint32Array(h*s),u=[];for(let t=0;t<h;t+=a)u.push({start:t,end:Math.min(t+a,h)});let p=Math.min(o,u.length),g=0,c=Array.from(i),d=Array.from(n),m=()=>new Promise((i,n)=>{let o=()=>{if(g>=u.length)return void i();let a=u[g++],h=t.slice(a.start,a.end),p=new Worker(r,{type:"module"});p.onmessage=t=>{let r=new Uint32Array(t.data.signatures),e=t.data.startIndex*s;l.set(r,e),p.terminate(),o()},p.onerror=t=>{p.terminate(),n(Error(t.message))},p.postMessage({strings:h,startIndex:a.start,ngramSize:e,numPermutations:s,hashA:c,hashB:d})};o()}),y=[];for(let t=0;t<p;t++)y.push(m());return await Promise.all(y),l}}function i(){return"u">typeof navigator&&navigator.hardwareConcurrency?Math.max(1,navigator.hardwareConcurrency-1):1}async function n(t,r){let n=(0,e.resolveOptions)(r,i()),o=s({workerUrl:r?.workerUrl});return(0,e.executePipeline)(t,n,o)}class o{options;workerPoolOptions;strings=[];signatures=null;hashA;hashB;constructor(t){this.options=(0,e.resolveOptions)(t,i()),this.workerPoolOptions={workerUrl:t?.workerUrl};const{hashA:r,hashB:s}=(0,e.generateHashCoefficients)(this.options.numPermutations,this.options.seed);this.hashA=r,this.hashB=s}addStrings(t){this.strings.push(...t),this.signatures=null}async buildIndex(){let t=s(this.workerPoolOptions);t&&this.options.workers>0&&this.strings.length>=5e3?this.signatures=await t(this.strings,this.options.ngramSize,this.options.numPermutations,this.hashA,this.hashB,this.options.workers,this.options.batchSize):this.signatures=(0,e.computeAllSignatures)(this.strings,this.options.ngramSize,this.options.numPermutations,this.hashA,this.hashB,this.options.preprocess)}query(t){if(!this.signatures)throw Error("Index not built. Call buildIndex() first.");return(0,e.querySignatures)(t,this.options,this.signatures,this.hashA,this.hashB,this.strings.length)}}module.exports=r})();
|
|
2
|
+
//# sourceMappingURL=index.cjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.cjs","sources":["webpack://@mattgrill/nearline-web/webpack/runtime/define_property_getters","webpack://@mattgrill/nearline-web/webpack/runtime/has_own_property","webpack://@mattgrill/nearline-web/webpack/runtime/make_namespace_object","webpack://@mattgrill/nearline-web/./src/workers/pool.ts","webpack://@mattgrill/nearline-web/./src/finder.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type { ParallelComputeFn } from \"@mattgrill/nearline-core\";\n\n/**\n * Options for creating the Web Worker pool.\n */\nexport interface WebWorkerPoolOptions {\n /**\n * URL to the compiled worker script.\n * - If using a bundler like Vite: `new URL('./minhash-worker.js', import.meta.url)`\n * - If self-hosting: URL to the worker file from this package's `./worker` export\n * - If omitted: falls back to single-threaded computation\n */\n workerUrl?: URL | string;\n}\n\n/**\n * Create a parallel compute function using Web Workers.\n * Returns null if workerUrl is not provided.\n */\nexport function createWebWorkerCompute(\n options: WebWorkerPoolOptions\n): ParallelComputeFn | null {\n if (!options.workerUrl) return null;\n if (typeof Worker === \"undefined\") return null;\n\n const workerUrl = options.workerUrl;\n\n return async (\n strings: string[],\n ngramSize: number,\n numPermutations: number,\n hashA: Uint32Array,\n hashB: Uint32Array,\n numWorkers: number,\n batchSize: number\n ): Promise<Uint32Array> => {\n const n = strings.length;\n const signatures = new Uint32Array(n * numPermutations);\n\n // Create batches\n const batches: Array<{ start: number; end: number }> = [];\n for (let i = 0; i < n; i += batchSize) {\n batches.push({ start: i, end: Math.min(i + batchSize, n) });\n }\n\n const actualWorkers = Math.min(numWorkers, batches.length);\n let batchIndex = 0;\n\n const hashAArr = Array.from(hashA);\n const hashBArr = Array.from(hashB);\n\n const processBatch = (): Promise<void> => {\n return new Promise((resolve, reject) => {\n const processNext = (): void => {\n if (batchIndex >= batches.length) {\n resolve();\n return;\n }\n\n const batch = batches[batchIndex++];\n const batchStrings = strings.slice(batch.start, batch.end);\n\n const worker = new Worker(workerUrl, { type: \"module\" });\n\n worker.onmessage = (\n event: MessageEvent<{\n signatures: ArrayBuffer;\n startIndex: number;\n count: number;\n }>\n ) => {\n const resultSigs = new Uint32Array(event.data.signatures);\n const offset = event.data.startIndex * numPermutations;\n signatures.set(resultSigs, offset);\n worker.terminate();\n processNext();\n };\n\n worker.onerror = (err) => {\n worker.terminate();\n reject(new Error(err.message));\n };\n\n worker.postMessage({\n strings: batchStrings,\n startIndex: batch.start,\n ngramSize,\n numPermutations,\n hashA: hashAArr,\n hashB: hashBArr,\n });\n };\n\n processNext();\n });\n };\n\n const workerPromises: Promise<void>[] = [];\n for (let i = 0; i < actualWorkers; i++) {\n workerPromises.push(processBatch());\n }\n\n await Promise.all(workerPromises);\n return signatures;\n };\n}\n","import {\n type DuplicateFinderOptions,\n type DuplicatePair,\n type FindDuplicatesResult,\n type ResolvedOptions,\n resolveOptions,\n executePipeline,\n querySignatures,\n generateHashCoefficients,\n computeAllSignatures,\n} from \"@mattgrill/nearline-core\";\nimport { createWebWorkerCompute, type WebWorkerPoolOptions } from \"./workers/pool\";\n\n/** Browser-specific options extending the base options */\nexport interface ClientDuplicateFinderOptions extends DuplicateFinderOptions {\n /**\n * URL to the compiled Web Worker script.\n * Required for parallel computation in the browser.\n *\n * Example with Vite:\n * ```ts\n * workerUrl: new URL('@mattgrill/duplicate-finder-client/worker', import.meta.url)\n * ```\n *\n * If omitted, runs single-threaded.\n */\n workerUrl?: URL | string;\n}\n\nfunction getDefaultWorkers(): number {\n if (typeof navigator !== \"undefined\" && navigator.hardwareConcurrency) {\n return Math.max(1, navigator.hardwareConcurrency - 1);\n }\n return 1;\n}\n\n/**\n * Find near-duplicate strings in a dataset (browser-optimized).\n * Uses Web Workers for parallel computation when workerUrl is provided.\n */\nexport async function findDuplicates(\n strings: string[],\n opts?: ClientDuplicateFinderOptions\n): Promise<FindDuplicatesResult> {\n const options = resolveOptions(opts, getDefaultWorkers());\n const parallelCompute = createWebWorkerCompute({\n workerUrl: opts?.workerUrl,\n });\n return executePipeline(strings, options, parallelCompute);\n}\n\n/**\n * Class API for incremental use and querying against a built index (browser-optimized).\n */\nexport class DuplicateFinder {\n private options: ResolvedOptions;\n private workerPoolOptions: WebWorkerPoolOptions;\n private strings: string[] = [];\n private signatures: Uint32Array | null = null;\n private hashA: Uint32Array;\n private hashB: Uint32Array;\n\n constructor(opts?: ClientDuplicateFinderOptions) {\n this.options = resolveOptions(opts, getDefaultWorkers());\n this.workerPoolOptions = { workerUrl: opts?.workerUrl };\n const { hashA, hashB } = generateHashCoefficients(\n this.options.numPermutations,\n this.options.seed\n );\n this.hashA = hashA;\n this.hashB = hashB;\n }\n\n addStrings(strings: string[]): void {\n this.strings.push(...strings);\n this.signatures = null;\n }\n\n async buildIndex(): Promise<void> {\n const parallelCompute = createWebWorkerCompute(this.workerPoolOptions);\n\n if (parallelCompute && this.options.workers > 0 && this.strings.length >= 5000) {\n this.signatures = await parallelCompute(\n this.strings,\n this.options.ngramSize,\n this.options.numPermutations,\n this.hashA,\n this.hashB,\n this.options.workers,\n this.options.batchSize\n );\n } else {\n this.signatures = computeAllSignatures(\n this.strings,\n this.options.ngramSize,\n this.options.numPermutations,\n this.hashA,\n this.hashB,\n this.options.preprocess\n );\n }\n }\n\n query(str: string): DuplicatePair[] {\n if (!this.signatures) {\n throw new Error(\"Index not built. Call buildIndex() first.\");\n }\n return querySignatures(\n str,\n this.options,\n this.signatures,\n this.hashA,\n this.hashB,\n this.strings.length\n );\n }\n}\n"],"names":["Object","Symbol","createWebWorkerCompute","options","Worker","workerUrl","strings","ngramSize","numPermutations","hashA","hashB","numWorkers","batchSize","n","signatures","Uint32Array","batches","i","Math","actualWorkers","batchIndex","hashAArr","Array","hashBArr","processBatch","Promise","resolve","reject","processNext","batch","batchStrings","worker","event","resultSigs","offset","err","Error","workerPromises","getDefaultWorkers","navigator","findDuplicates","opts","resolveOptions","parallelCompute","executePipeline","DuplicateFinder","generateHashCoefficients","computeAllSignatures","str","querySignatures"],"mappings":"2BAAA,GAAoB,CAAC,CAAG,CAAC,EAAS,KACjC,IAAI,IAAI,KAAO,EACL,EAAoB,CAAC,CAAC,EAAY,IAAQ,CAAC,EAAoB,CAAC,CAAC,EAAS,IACzEA,OAAO,cAAc,CAAC,EAAS,EAAK,CAAE,WAAY,GAAM,IAAK,CAAU,CAAC,EAAI,AAAC,EAGzF,ECNA,EAAoB,CAAC,CAAG,CAAC,EAAK,IAAUA,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAAC,EAAK,GCClF,EAAoB,CAAC,CAAG,AAAC,IACrB,AAAkB,IAAlB,OAAOC,QAA0BA,OAAO,WAAW,EACrDD,OAAO,cAAc,CAAC,EAASC,OAAO,WAAW,CAAE,CAAE,MAAO,QAAS,GAEtED,OAAO,cAAc,CAAC,EAAS,aAAc,CAAE,MAAO,EAAK,EAC5D,E,8GCaO,SAASE,EACdC,CAA6B,EAE7B,GAAI,CAACA,EAAQ,SAAS,EAClB,AAAkB,IAAlB,OAAOC,OADa,OAAO,KAG/B,IAAMC,EAAYF,EAAQ,SAAS,CAEnC,OAAO,MACLG,EACAC,EACAC,EACAC,EACAC,EACAC,EACAC,KAEA,IAAMC,EAAIP,EAAQ,MAAM,CAClBQ,EAAa,IAAIC,YAAYF,EAAIL,GAGjCQ,EAAiD,EAAE,CACzD,IAAK,IAAIC,EAAI,EAAGA,EAAIJ,EAAGI,GAAKL,EAC1BI,EAAQ,IAAI,CAAC,CAAE,MAAOC,EAAG,IAAKC,KAAK,GAAG,CAACD,EAAIL,EAAWC,EAAG,GAG3D,IAAMM,EAAgBD,KAAK,GAAG,CAACP,EAAYK,EAAQ,MAAM,EACrDI,EAAa,EAEXC,EAAWC,MAAM,IAAI,CAACb,GACtBc,EAAWD,MAAM,IAAI,CAACZ,GAEtBc,EAAe,IACZ,IAAIC,QAAQ,CAACC,EAASC,KAC3B,IAAMC,EAAc,KAClB,GAAIR,GAAcJ,EAAQ,MAAM,CAAE,YAChCU,IAIF,IAAMG,EAAQb,CAAO,CAACI,IAAa,CAC7BU,EAAexB,EAAQ,KAAK,CAACuB,EAAM,KAAK,CAAEA,EAAM,GAAG,EAEnDE,EAAS,IAAI3B,OAAOC,EAAW,CAAE,KAAM,QAAS,EAEtD0B,CAAAA,EAAO,SAAS,CAAG,AACjBC,IAMA,IAAMC,EAAa,IAAIlB,YAAYiB,EAAM,IAAI,CAAC,UAAU,EAClDE,EAASF,EAAM,IAAI,CAAC,UAAU,CAAGxB,EACvCM,EAAW,GAAG,CAACmB,EAAYC,GAC3BH,EAAO,SAAS,GAChBH,GACF,EAEAG,EAAO,OAAO,CAAG,AAACI,IAChBJ,EAAO,SAAS,GAChBJ,EAAO,AAAIS,MAAMD,EAAI,OAAO,EAC9B,EAEAJ,EAAO,WAAW,CAAC,CACjB,QAASD,EACT,WAAYD,EAAM,KAAK,CACvBtB,UAAAA,EACAC,gBAAAA,EACA,MAAOa,EACP,MAAOE,CACT,EACF,EAEAK,GACF,GAGIS,EAAkC,EAAE,CAC1C,IAAK,IAAIpB,EAAI,EAAGA,EAAIE,EAAeF,IACjCoB,EAAe,IAAI,CAACb,KAItB,OADA,MAAMC,QAAQ,GAAG,CAACY,GACXvB,CACT,CACF,CC5EA,SAASwB,UACP,AAAI,AAAqB,IAArB,OAAOC,WAA6BA,UAAU,mBAAmB,CAC5DrB,KAAK,GAAG,CAAC,EAAGqB,UAAU,mBAAmB,CAAG,GAE9C,CACT,CAMO,eAAeC,EACpBlC,CAAiB,CACjBmC,CAAmC,EAEnC,IAAMtC,EAAUuC,AAAAA,GAAAA,EAAAA,cAAAA,AAAAA,EAAeD,EAAMH,KAC/BK,EAAkBzC,EAAuB,CAC7C,UAAWuC,GAAM,SACnB,GACA,MAAOG,AAAAA,GAAAA,EAAAA,eAAAA,AAAAA,EAAgBtC,EAASH,EAASwC,EAC3C,CAKO,MAAME,EACH,OAAyB,AACzB,kBAAwC,AACxC,SAAoB,EAAE,AAAC,AACvB,YAAiC,IAAK,AACtC,MAAmB,AACnB,MAAmB,AAE3B,aAAYJ,CAAmC,CAAE,CAC/C,IAAI,CAAC,OAAO,CAAGC,AAAAA,GAAAA,EAAAA,cAAAA,AAAAA,EAAeD,EAAMH,KACpC,IAAI,CAAC,iBAAiB,CAAG,CAAE,UAAWG,GAAM,SAAU,EACtD,KAAM,CAAEhC,MAAAA,CAAK,CAAEC,MAAAA,CAAK,CAAE,CAAGoC,AAAAA,GAAAA,EAAAA,wBAAAA,AAAAA,EACvB,IAAI,CAAC,OAAO,CAAC,eAAe,CAC5B,IAAI,CAAC,OAAO,CAAC,IAAI,CAEnB,KAAI,CAAC,KAAK,CAAGrC,EACb,IAAI,CAAC,KAAK,CAAGC,CACf,CAEA,WAAWJ,CAAiB,CAAQ,CAClC,IAAI,CAAC,OAAO,CAAC,IAAI,IAAIA,GACrB,IAAI,CAAC,UAAU,CAAG,IACpB,CAEA,MAAM,YAA4B,CAChC,IAAMqC,EAAkBzC,EAAuB,IAAI,CAAC,iBAAiB,CAEjEyC,CAAAA,GAAmB,IAAI,CAAC,OAAO,CAAC,OAAO,CAAG,GAAK,IAAI,CAAC,OAAO,CAAC,MAAM,EAAI,IACxE,IAAI,CAAC,UAAU,CAAG,MAAMA,EACtB,IAAI,CAAC,OAAO,CACZ,IAAI,CAAC,OAAO,CAAC,SAAS,CACtB,IAAI,CAAC,OAAO,CAAC,eAAe,CAC5B,IAAI,CAAC,KAAK,CACV,IAAI,CAAC,KAAK,CACV,IAAI,CAAC,OAAO,CAAC,OAAO,CACpB,IAAI,CAAC,OAAO,CAAC,SAAS,EAGxB,IAAI,CAAC,UAAU,CAAGI,AAAAA,GAAAA,EAAAA,oBAAAA,AAAAA,EAChB,IAAI,CAAC,OAAO,CACZ,IAAI,CAAC,OAAO,CAAC,SAAS,CACtB,IAAI,CAAC,OAAO,CAAC,eAAe,CAC5B,IAAI,CAAC,KAAK,CACV,IAAI,CAAC,KAAK,CACV,IAAI,CAAC,OAAO,CAAC,UAAU,CAG7B,CAEA,MAAMC,CAAW,CAAmB,CAClC,GAAI,CAAC,IAAI,CAAC,UAAU,CAClB,MAAM,AAAIZ,MAAM,6CAElB,MAAOa,AAAAA,GAAAA,EAAAA,eAAAA,AAAAA,EACLD,EACA,IAAI,CAAC,OAAO,CACZ,IAAI,CAAC,UAAU,CACf,IAAI,CAAC,KAAK,CACV,IAAI,CAAC,KAAK,CACV,IAAI,CAAC,OAAO,CAAC,MAAM,CAEvB,CACF,C"}
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
export { findDuplicates, DuplicateFinder } from "./finder";
|
|
2
|
+
export type { ClientDuplicateFinderOptions } from "./finder";
|
|
3
|
+
export type { DuplicateFinderOptions, DuplicatePair, DuplicateGroup, FindDuplicatesResult, FindDuplicatesStats, } from "@mattgrill/nearline-core";
|
|
4
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,eAAe,EAAE,MAAM,UAAU,CAAC;AAC3D,YAAY,EAAE,4BAA4B,EAAE,MAAM,UAAU,CAAC;AAG7D,YAAY,EACV,sBAAsB,EACtB,aAAa,EACb,cAAc,EACd,oBAAoB,EACpB,mBAAmB,GACpB,MAAM,0BAA0B,CAAC"}
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
1
|
+
import{computeAllSignatures as t,executePipeline as r,generateHashCoefficients as s,querySignatures as e,resolveOptions as i}from"@mattgrill/nearline-core";var n={};n.d=(t,r)=>{for(var s in r)n.o(r,s)&&!n.o(t,s)&&Object.defineProperty(t,s,{enumerable:!0,get:r[s]})},n.o=(t,r)=>Object.prototype.hasOwnProperty.call(t,r);var o={};function a(t){if(!t.workerUrl||"u"<typeof Worker)return null;let r=t.workerUrl;return async(t,s,e,i,n,o,a)=>{let h=t.length,l=new Uint32Array(h*e),u=[];for(let t=0;t<h;t+=a)u.push({start:t,end:Math.min(t+a,h)});let p=Math.min(o,u.length),g=0,c=Array.from(i),d=Array.from(n),m=()=>new Promise((i,n)=>{let o=()=>{if(g>=u.length)return void i();let a=u[g++],h=t.slice(a.start,a.end),p=new Worker(r,{type:"module"});p.onmessage=t=>{let r=new Uint32Array(t.data.signatures),s=t.data.startIndex*e;l.set(r,s),p.terminate(),o()},p.onerror=t=>{p.terminate(),n(Error(t.message))},p.postMessage({strings:h,startIndex:a.start,ngramSize:s,numPermutations:e,hashA:c,hashB:d})};o()}),w=[];for(let t=0;t<p;t++)w.push(m());return await Promise.all(w),l}}function h(){return"u">typeof navigator&&navigator.hardwareConcurrency?Math.max(1,navigator.hardwareConcurrency-1):1}async function l(t,s){return r(t,i(s,h()),a({workerUrl:s?.workerUrl}))}n.d(o,{G:()=>u,I:()=>l});class u{options;workerPoolOptions;strings=[];signatures=null;hashA;hashB;constructor(t){this.options=i(t,h()),this.workerPoolOptions={workerUrl:t?.workerUrl};let{hashA:r,hashB:e}=s(this.options.numPermutations,this.options.seed);this.hashA=r,this.hashB=e}addStrings(t){this.strings.push(...t),this.signatures=null}async buildIndex(){let r=a(this.workerPoolOptions);r&&this.options.workers>0&&this.strings.length>=5e3?this.signatures=await r(this.strings,this.options.ngramSize,this.options.numPermutations,this.hashA,this.hashB,this.options.workers,this.options.batchSize):this.signatures=t(this.strings,this.options.ngramSize,this.options.numPermutations,this.hashA,this.hashB,this.options.preprocess)}query(t){if(!this.signatures)throw Error("Index not built. Call buildIndex() first.");return e(t,this.options,this.signatures,this.hashA,this.hashB,this.strings.length)}}var p=o.G,g=o.I;export{p as DuplicateFinder,g as findDuplicates};
|
|
2
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sources":["webpack://@mattgrill/nearline-web/webpack/runtime/define_property_getters","webpack://@mattgrill/nearline-web/webpack/runtime/has_own_property","webpack://@mattgrill/nearline-web/./src/workers/pool.ts","webpack://@mattgrill/nearline-web/./src/finder.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","import type { ParallelComputeFn } from \"@mattgrill/nearline-core\";\n\n/**\n * Options for creating the Web Worker pool.\n */\nexport interface WebWorkerPoolOptions {\n /**\n * URL to the compiled worker script.\n * - If using a bundler like Vite: `new URL('./minhash-worker.js', import.meta.url)`\n * - If self-hosting: URL to the worker file from this package's `./worker` export\n * - If omitted: falls back to single-threaded computation\n */\n workerUrl?: URL | string;\n}\n\n/**\n * Create a parallel compute function using Web Workers.\n * Returns null if workerUrl is not provided.\n */\nexport function createWebWorkerCompute(\n options: WebWorkerPoolOptions\n): ParallelComputeFn | null {\n if (!options.workerUrl) return null;\n if (typeof Worker === \"undefined\") return null;\n\n const workerUrl = options.workerUrl;\n\n return async (\n strings: string[],\n ngramSize: number,\n numPermutations: number,\n hashA: Uint32Array,\n hashB: Uint32Array,\n numWorkers: number,\n batchSize: number\n ): Promise<Uint32Array> => {\n const n = strings.length;\n const signatures = new Uint32Array(n * numPermutations);\n\n // Create batches\n const batches: Array<{ start: number; end: number }> = [];\n for (let i = 0; i < n; i += batchSize) {\n batches.push({ start: i, end: Math.min(i + batchSize, n) });\n }\n\n const actualWorkers = Math.min(numWorkers, batches.length);\n let batchIndex = 0;\n\n const hashAArr = Array.from(hashA);\n const hashBArr = Array.from(hashB);\n\n const processBatch = (): Promise<void> => {\n return new Promise((resolve, reject) => {\n const processNext = (): void => {\n if (batchIndex >= batches.length) {\n resolve();\n return;\n }\n\n const batch = batches[batchIndex++];\n const batchStrings = strings.slice(batch.start, batch.end);\n\n const worker = new Worker(workerUrl, { type: \"module\" });\n\n worker.onmessage = (\n event: MessageEvent<{\n signatures: ArrayBuffer;\n startIndex: number;\n count: number;\n }>\n ) => {\n const resultSigs = new Uint32Array(event.data.signatures);\n const offset = event.data.startIndex * numPermutations;\n signatures.set(resultSigs, offset);\n worker.terminate();\n processNext();\n };\n\n worker.onerror = (err) => {\n worker.terminate();\n reject(new Error(err.message));\n };\n\n worker.postMessage({\n strings: batchStrings,\n startIndex: batch.start,\n ngramSize,\n numPermutations,\n hashA: hashAArr,\n hashB: hashBArr,\n });\n };\n\n processNext();\n });\n };\n\n const workerPromises: Promise<void>[] = [];\n for (let i = 0; i < actualWorkers; i++) {\n workerPromises.push(processBatch());\n }\n\n await Promise.all(workerPromises);\n return signatures;\n };\n}\n","import {\n type DuplicateFinderOptions,\n type DuplicatePair,\n type FindDuplicatesResult,\n type ResolvedOptions,\n resolveOptions,\n executePipeline,\n querySignatures,\n generateHashCoefficients,\n computeAllSignatures,\n} from \"@mattgrill/nearline-core\";\nimport { createWebWorkerCompute, type WebWorkerPoolOptions } from \"./workers/pool\";\n\n/** Browser-specific options extending the base options */\nexport interface ClientDuplicateFinderOptions extends DuplicateFinderOptions {\n /**\n * URL to the compiled Web Worker script.\n * Required for parallel computation in the browser.\n *\n * Example with Vite:\n * ```ts\n * workerUrl: new URL('@mattgrill/duplicate-finder-client/worker', import.meta.url)\n * ```\n *\n * If omitted, runs single-threaded.\n */\n workerUrl?: URL | string;\n}\n\nfunction getDefaultWorkers(): number {\n if (typeof navigator !== \"undefined\" && navigator.hardwareConcurrency) {\n return Math.max(1, navigator.hardwareConcurrency - 1);\n }\n return 1;\n}\n\n/**\n * Find near-duplicate strings in a dataset (browser-optimized).\n * Uses Web Workers for parallel computation when workerUrl is provided.\n */\nexport async function findDuplicates(\n strings: string[],\n opts?: ClientDuplicateFinderOptions\n): Promise<FindDuplicatesResult> {\n const options = resolveOptions(opts, getDefaultWorkers());\n const parallelCompute = createWebWorkerCompute({\n workerUrl: opts?.workerUrl,\n });\n return executePipeline(strings, options, parallelCompute);\n}\n\n/**\n * Class API for incremental use and querying against a built index (browser-optimized).\n */\nexport class DuplicateFinder {\n private options: ResolvedOptions;\n private workerPoolOptions: WebWorkerPoolOptions;\n private strings: string[] = [];\n private signatures: Uint32Array | null = null;\n private hashA: Uint32Array;\n private hashB: Uint32Array;\n\n constructor(opts?: ClientDuplicateFinderOptions) {\n this.options = resolveOptions(opts, getDefaultWorkers());\n this.workerPoolOptions = { workerUrl: opts?.workerUrl };\n const { hashA, hashB } = generateHashCoefficients(\n this.options.numPermutations,\n this.options.seed\n );\n this.hashA = hashA;\n this.hashB = hashB;\n }\n\n addStrings(strings: string[]): void {\n this.strings.push(...strings);\n this.signatures = null;\n }\n\n async buildIndex(): Promise<void> {\n const parallelCompute = createWebWorkerCompute(this.workerPoolOptions);\n\n if (parallelCompute && this.options.workers > 0 && this.strings.length >= 5000) {\n this.signatures = await parallelCompute(\n this.strings,\n this.options.ngramSize,\n this.options.numPermutations,\n this.hashA,\n this.hashB,\n this.options.workers,\n this.options.batchSize\n );\n } else {\n this.signatures = computeAllSignatures(\n this.strings,\n this.options.ngramSize,\n this.options.numPermutations,\n this.hashA,\n this.hashB,\n this.options.preprocess\n );\n }\n }\n\n query(str: string): DuplicatePair[] {\n if (!this.signatures) {\n throw new Error(\"Index not built. Call buildIndex() first.\");\n }\n return querySignatures(\n str,\n this.options,\n this.signatures,\n this.hashA,\n this.hashB,\n this.strings.length\n );\n }\n}\n"],"names":["Object","createWebWorkerCompute","options","Worker","workerUrl","strings","ngramSize","numPermutations","hashA","hashB","numWorkers","batchSize","n","signatures","Uint32Array","batches","i","Math","actualWorkers","batchIndex","hashAArr","Array","hashBArr","processBatch","Promise","resolve","reject","processNext","batch","batchStrings","worker","event","resultSigs","offset","err","Error","workerPromises","getDefaultWorkers","navigator","findDuplicates","opts","executePipeline","resolveOptions","DuplicateFinder","generateHashCoefficients","parallelCompute","computeAllSignatures","str","querySignatures"],"mappings":"oKAAA,GAAoB,CAAC,CAAG,CAAC,EAAS,KACjC,IAAI,IAAI,KAAO,EACL,EAAoB,CAAC,CAAC,EAAY,IAAQ,CAAC,EAAoB,CAAC,CAAC,EAAS,IACzEA,OAAO,cAAc,CAAC,EAAS,EAAK,CAAE,WAAY,GAAM,IAAK,CAAU,CAAC,EAAI,AAAC,EAGzF,ECNA,EAAoB,CAAC,CAAG,CAAC,EAAK,IAAUA,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAAC,EAAK,G,SCmB3E,SAASC,EACdC,CAA6B,EAE7B,GAAI,CAACA,EAAQ,SAAS,EAClB,AAAkB,IAAlB,OAAOC,OADa,OAAO,KAG/B,IAAMC,EAAYF,EAAQ,SAAS,CAEnC,OAAO,MACLG,EACAC,EACAC,EACAC,EACAC,EACAC,EACAC,KAEA,IAAMC,EAAIP,EAAQ,MAAM,CAClBQ,EAAa,IAAIC,YAAYF,EAAIL,GAGjCQ,EAAiD,EAAE,CACzD,IAAK,IAAIC,EAAI,EAAGA,EAAIJ,EAAGI,GAAKL,EAC1BI,EAAQ,IAAI,CAAC,CAAE,MAAOC,EAAG,IAAKC,KAAK,GAAG,CAACD,EAAIL,EAAWC,EAAG,GAG3D,IAAMM,EAAgBD,KAAK,GAAG,CAACP,EAAYK,EAAQ,MAAM,EACrDI,EAAa,EAEXC,EAAWC,MAAM,IAAI,CAACb,GACtBc,EAAWD,MAAM,IAAI,CAACZ,GAEtBc,EAAe,IACZ,IAAIC,QAAQ,CAACC,EAASC,KAC3B,IAAMC,EAAc,KAClB,GAAIR,GAAcJ,EAAQ,MAAM,CAAE,YAChCU,IAIF,IAAMG,EAAQb,CAAO,CAACI,IAAa,CAC7BU,EAAexB,EAAQ,KAAK,CAACuB,EAAM,KAAK,CAAEA,EAAM,GAAG,EAEnDE,EAAS,IAAI3B,OAAOC,EAAW,CAAE,KAAM,QAAS,EAEtD0B,CAAAA,EAAO,SAAS,CAAG,AACjBC,IAMA,IAAMC,EAAa,IAAIlB,YAAYiB,EAAM,IAAI,CAAC,UAAU,EAClDE,EAASF,EAAM,IAAI,CAAC,UAAU,CAAGxB,EACvCM,EAAW,GAAG,CAACmB,EAAYC,GAC3BH,EAAO,SAAS,GAChBH,GACF,EAEAG,EAAO,OAAO,CAAG,AAACI,IAChBJ,EAAO,SAAS,GAChBJ,EAAO,AAAIS,MAAMD,EAAI,OAAO,EAC9B,EAEAJ,EAAO,WAAW,CAAC,CACjB,QAASD,EACT,WAAYD,EAAM,KAAK,CACvBtB,UAAAA,EACAC,gBAAAA,EACA,MAAOa,EACP,MAAOE,CACT,EACF,EAEAK,GACF,GAGIS,EAAkC,EAAE,CAC1C,IAAK,IAAIpB,EAAI,EAAGA,EAAIE,EAAeF,IACjCoB,EAAe,IAAI,CAACb,KAItB,OADA,MAAMC,QAAQ,GAAG,CAACY,GACXvB,CACT,CACF,CC5EA,SAASwB,UACP,AAAI,AAAqB,IAArB,OAAOC,WAA6BA,UAAU,mBAAmB,CAC5DrB,KAAK,GAAG,CAAC,EAAGqB,UAAU,mBAAmB,CAAG,GAE9C,CACT,CAMO,eAAeC,EACpBlC,CAAiB,CACjBmC,CAAmC,EAMnC,OAAOC,EAAgBpC,EAJPqC,EAAeF,EAAMH,KACbpC,EAAuB,CAC7C,UAAWuC,GAAM,SACnB,GAEF,C,wBAKO,OAAMG,EACH,OAAyB,AACzB,kBAAwC,AACxC,SAAoB,EAAE,AAAC,AACvB,YAAiC,IAAK,AACtC,MAAmB,AACnB,MAAmB,AAE3B,aAAYH,CAAmC,CAAE,CAC/C,IAAI,CAAC,OAAO,CAAGE,EAAeF,EAAMH,KACpC,IAAI,CAAC,iBAAiB,CAAG,CAAE,UAAWG,GAAM,SAAU,EACtD,GAAM,CAAEhC,MAAAA,CAAK,CAAEC,MAAAA,CAAK,CAAE,CAAGmC,EACvB,IAAI,CAAC,OAAO,CAAC,eAAe,CAC5B,IAAI,CAAC,OAAO,CAAC,IAAI,CAEnB,KAAI,CAAC,KAAK,CAAGpC,EACb,IAAI,CAAC,KAAK,CAAGC,CACf,CAEA,WAAWJ,CAAiB,CAAQ,CAClC,IAAI,CAAC,OAAO,CAAC,IAAI,IAAIA,GACrB,IAAI,CAAC,UAAU,CAAG,IACpB,CAEA,MAAM,YAA4B,CAChC,IAAMwC,EAAkB5C,EAAuB,IAAI,CAAC,iBAAiB,CAEjE4C,CAAAA,GAAmB,IAAI,CAAC,OAAO,CAAC,OAAO,CAAG,GAAK,IAAI,CAAC,OAAO,CAAC,MAAM,EAAI,IACxE,IAAI,CAAC,UAAU,CAAG,MAAMA,EACtB,IAAI,CAAC,OAAO,CACZ,IAAI,CAAC,OAAO,CAAC,SAAS,CACtB,IAAI,CAAC,OAAO,CAAC,eAAe,CAC5B,IAAI,CAAC,KAAK,CACV,IAAI,CAAC,KAAK,CACV,IAAI,CAAC,OAAO,CAAC,OAAO,CACpB,IAAI,CAAC,OAAO,CAAC,SAAS,EAGxB,IAAI,CAAC,UAAU,CAAGC,EAChB,IAAI,CAAC,OAAO,CACZ,IAAI,CAAC,OAAO,CAAC,SAAS,CACtB,IAAI,CAAC,OAAO,CAAC,eAAe,CAC5B,IAAI,CAAC,KAAK,CACV,IAAI,CAAC,KAAK,CACV,IAAI,CAAC,OAAO,CAAC,UAAU,CAG7B,CAEA,MAAMC,CAAW,CAAmB,CAClC,GAAI,CAAC,IAAI,CAAC,UAAU,CAClB,MAAM,AAAIZ,MAAM,6CAElB,OAAOa,EACLD,EACA,IAAI,CAAC,OAAO,CACZ,IAAI,CAAC,UAAU,CACf,IAAI,CAAC,KAAK,CACV,IAAI,CAAC,KAAK,CACV,IAAI,CAAC,OAAO,CAAC,MAAM,CAEvB,CACF,C"}
|
|
@@ -0,0 +1,2 @@
|
|
|
1
|
+
(()=>{"use strict";var t={};t.d=(e,r)=>{for(var n in r)t.o(r,n)&&!t.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:r[n]})},t.o=(t,e)=>Object.prototype.hasOwnProperty.call(t,e);var e={};function r(t,e=0,n=t.length){let i=0x811c9dc5,o=e+n;for(let r=e;r<o;r++)i^=t.charCodeAt(r),i=Math.imul(i,0x1000193);return i>>>0}function n(t,e,r){let n=0x811c9dc5,i=e+r;for(let r=e;r<i;r++){let e=t[r];n^=255&e,n=Math.imul(n=Math.imul(n=Math.imul(n=Math.imul(n,0x1000193)^e>>>8&255,0x1000193)^e>>>16&255,0x1000193)^e>>>24&255,0x1000193)}return n>>>0}function i(t){return t^=t<<13,t^=t>>>17,(t^=t<<5)>>>0}function o(t,e){let r=new Uint32Array(t),n=new Uint32Array(t),o=0===e?1:e;for(let e=0;e<t;e++)o=i(o),r[e]=o,o=i(o),n[e]=o;return{hashA:r,hashB:n}}function a(t,e,r){let n=t;return((n=Math.imul((n=Math.imul(n^e,0x85ebca6b))^n>>>13,0xc2b2ae35))^r^n>>>16)>>>0}function s(t,e,r,n,i,o){for(let t=0;t<n;t++)i[o+t]=0xffffffff;let s=t.length;if(0!==s)for(let u=0;u<s;u++){let s=t[u];for(let t=0;t<n;t++){let n=a(s,e[t],r[t]);n<i[o+t]&&(i[o+t]=n)}}}function u(t,e,n,i,o,s){let u=t.length,l=new Uint32Array(u*n);for(let f=0;f<u;f++)!function(t,e,n,i,o,s,u){for(let t=0;t<o;t++)s[u+t]=0xffffffff;let l=t.length;if(0===l)return;if(l<e){let e=r(t,0,l);for(let t=0;t<o;t++)s[u+t]=a(e,n[t],i[t]);return}let f=l-e+1;for(let l=0;l<f;l++){let f=r(t,l,e);for(let t=0;t<o;t++){let e=a(f,n[t],i[t]);e<s[u+t]&&(s[u+t]=e)}}}(s?s(t[f]):t[f],e,i,o,n,l,f*n);return l}function l(t,e,r,n){let i=e*n,o=r*n,a=0;for(let e=0;e<n;e++)t[i+e]===t[o+e]&&a++;return a/n}function f(t,e,r,i,o){let a=new Set;for(let s=0;s<i;s++){let i=s*o,u=new Map;for(let a=0;a<e;a++){let e=n(t,a*r+i,o),s=u.get(e);s||(s=[],u.set(e,s)),s.push(a)}for(let t of u.values())if(!(t.length<2))for(let r=0;r<t.length;r++)for(let n=r+1;n<t.length;n++)a.add(t[r]*e+t[n])}return a}function h(t,e){return[t/e|0,t%e]}t.d(e,{HP:()=>p,sB:()=>w,wQ:()=>n,eE:()=>f,$i:()=>d,vm:()=>h,eL:()=>g,u5:()=>u,Wh:()=>r,g3:()=>o,_M:()=>l,aW:()=>s,Bu:()=>c,nC:()=>m});class m{parent;rank;constructor(t){this.parent=new Uint32Array(t),this.rank=new Uint8Array(t);for(let e=0;e<t;e++)this.parent[e]=e}find(t){let e=t;for(;this.parent[e]!==e;)e=this.parent[e];for(;this.parent[t]!==e;){let r=this.parent[t];this.parent[t]=e,t=r}return e}union(t,e){let r=this.find(t),n=this.find(e);r!==n&&(this.rank[r]<this.rank[n]?this.parent[r]=n:this.rank[r]>this.rank[n]?this.parent[n]=r:(this.parent[n]=r,this.rank[r]++))}getGroups(t){let e=new Map;for(let r of t){let t=this.find(r),n=e.get(t);n||(n=[],e.set(t,n)),n.push(r)}let r=[];for(let t of e.values())t.length>1&&(t.sort((t,e)=>t-e),r.push(t));return r.sort((t,e)=>t[0]-e[0]),r}}function p(t,e){let n=new Set,i=t.length;if(i<e)return i>0&&n.add(r(t,0,i)),n;let o=i-e+1;for(let i=0;i<o;i++)n.add(r(t,i,e));return n}function c(t){let e=new Uint32Array(t.size),r=0;for(let n of t)e[r++]=n;return e}function d(t,e){let r;return r=t?.workers===void 0||t?.workers==="auto"?e:t.workers,{threshold:t?.threshold??.8,ngramSize:t?.ngramSize??3,numPermutations:t?.numPermutations??128,numBands:t?.numBands??32,bandSize:t?.bandSize??4,workers:r,batchSize:t?.batchSize??1e3,preprocess:t?.preprocess??null,seed:t?.seed??42}}async function g(t,e,r){let n,i=performance.now(),a=t.length;if(a<2)return{pairs:[],groups:[],stats:{totalTimeMs:performance.now()-i,inputCount:a,candidateCount:0,duplicateCount:0,groupCount:0}};let{hashA:s,hashB:p}=o(e.numPermutations,e.seed),c=f(n=r&&e.workers>0&&a>=5e3?await r(t,e.ngramSize,e.numPermutations,s,p,e.workers,e.batchSize):u(t,e.ngramSize,e.numPermutations,s,p,e.preprocess),a,e.numPermutations,e.numBands,e.bandSize),d=[],g=new m(a),w=new Set;for(let t of c){let[r,i]=h(t,a),o=l(n,r,i,e.numPermutations);o>=e.threshold&&(d.push({indexA:r,indexB:i,similarity:o}),g.union(r,i),w.add(r),w.add(i))}d.sort((t,e)=>e.similarity-t.similarity);let y=g.getGroups(w).map(t=>({indices:t}));return{pairs:d,groups:y,stats:{totalTimeMs:performance.now()-i,inputCount:a,candidateCount:c.size,duplicateCount:d.length,groupCount:y.length}}}function w(t,e,r,n,i,o){let a=t;e.preprocess&&(a=e.preprocess(a));let u=c(p(a,e.ngramSize)),l=new Uint32Array(e.numPermutations);s(u,n,i,e.numPermutations,l,0);let f=[];for(let t=0;t<o;t++){let n=t*e.numPermutations,i=0;for(let t=0;t<e.numPermutations;t++)l[t]===r[n+t]&&i++;let o=i/e.numPermutations;o>=e.threshold&&f.push({indexA:-1,indexB:t,similarity:o})}return f.sort((t,e)=>e.similarity-t.similarity),f}e.nC;var y=e.u5;e.aW,e.vm,e._M,e.eL,e.eE,e.Wh,e.wQ,e.g3,e.sB,e.$i,e.HP,e.Bu,self.onmessage=t=>{let{strings:e,startIndex:r,ngramSize:n,numPermutations:i,hashA:o,hashB:a}=t.data,s=y(e,n,i,new Uint32Array(o),new Uint32Array(a),null);self.postMessage({signatures:s.buffer,startIndex:r,count:e.length},{transfer:[s.buffer]})},module.exports={}})();
|
|
2
|
+
//# sourceMappingURL=minhash-worker.cjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"minhash-worker.cjs","sources":["webpack://@mattgrill/nearline-web/../core/dist/index.js","webpack://@mattgrill/nearline-web/./src/workers/minhash-worker.ts"],"sourcesContent":["var e={};e.d=(t,n)=>{for(var r in n)e.o(n,r)&&!e.o(t,r)&&Object.defineProperty(t,r,{enumerable:!0,get:n[r]})},e.o=(e,t)=>Object.prototype.hasOwnProperty.call(e,t);var t={};function n(e,t=0,r=e.length){let i=0x811c9dc5,a=t+r;for(let n=t;n<a;n++)i^=e.charCodeAt(n),i=Math.imul(i,0x1000193);return i>>>0}function r(e,t,n){let r=0x811c9dc5,i=t+n;for(let n=t;n<i;n++){let t=e[n];r^=255&t,r=Math.imul(r=Math.imul(r=Math.imul(r=Math.imul(r,0x1000193)^t>>>8&255,0x1000193)^t>>>16&255,0x1000193)^t>>>24&255,0x1000193)}return r>>>0}function i(e){return e^=e<<13,e^=e>>>17,(e^=e<<5)>>>0}function a(e,t){let n=new Uint32Array(e),r=new Uint32Array(e),a=0===t?1:t;for(let t=0;t<e;t++)a=i(a),n[t]=a,a=i(a),r[t]=a;return{hashA:n,hashB:r}}function o(e,t,n){let r=e;return((r=Math.imul((r=Math.imul(r^t,0x85ebca6b))^r>>>13,0xc2b2ae35))^n^r>>>16)>>>0}function s(e,t,n,r,i,a){for(let e=0;e<r;e++)i[a+e]=0xffffffff;let s=e.length;if(0!==s)for(let u=0;u<s;u++){let s=e[u];for(let e=0;e<r;e++){let r=o(s,t[e],n[e]);r<i[a+e]&&(i[a+e]=r)}}}function u(e,t,r,i,a,s){let u=e.length,l=new Uint32Array(u*r);for(let f=0;f<u;f++)!function(e,t,r,i,a,s,u){for(let e=0;e<a;e++)s[u+e]=0xffffffff;let l=e.length;if(0===l)return;if(l<t){let t=n(e,0,l);for(let e=0;e<a;e++)s[u+e]=o(t,r[e],i[e]);return}let f=l-t+1;for(let l=0;l<f;l++){let f=n(e,l,t);for(let e=0;e<a;e++){let t=o(f,r[e],i[e]);t<s[u+e]&&(s[u+e]=t)}}}(s?s(e[f]):e[f],t,i,a,r,l,f*r);return l}function l(e,t,n,r){let i=t*r,a=n*r,o=0;for(let t=0;t<r;t++)e[i+t]===e[a+t]&&o++;return o/r}function f(e,t,n,i,a){let o=new Set;for(let u=0;u<i;u++){let i=u*a,l=new Map;for(let o=0;o<t;o++){let t=r(e,o*n+i,a),s=l.get(t);s||(s=[],l.set(t,s)),s.push(o)}for(let e of l.values())if(!(e.length<2))for(let n=0;n<e.length;n++)for(let r=n+1;r<e.length;r++){var s;o.add((s=e[n],s*t+e[r]))}}return o}function p(e,t){return[e/t|0,e%t]}e.d(t,{HP:()=>c,sB:()=>g,wQ:()=>r,eE:()=>f,$i:()=>m,vm:()=>p,eL:()=>d,u5:()=>u,Wh:()=>n,g3:()=>a,_M:()=>l,aW:()=>s,Bu:()=>h,nC:()=>_});class _{parent;rank;constructor(e){this.parent=new Uint32Array(e),this.rank=new Uint8Array(e);for(let t=0;t<e;t++)this.parent[t]=t}find(e){let t=e;for(;this.parent[t]!==t;)t=this.parent[t];for(;this.parent[e]!==t;){let n=this.parent[e];this.parent[e]=t,e=n}return t}union(e,t){let n=this.find(e),r=this.find(t);n!==r&&(this.rank[n]<this.rank[r]?this.parent[n]=r:this.rank[n]>this.rank[r]?this.parent[r]=n:(this.parent[r]=n,this.rank[n]++))}getGroups(e){let t=new Map;for(let n of e){let e=this.find(n),r=t.get(e);r||(r=[],t.set(e,r)),r.push(n)}let n=[];for(let e of t.values())e.length>1&&(e.sort((e,t)=>e-t),n.push(e));return n.sort((e,t)=>e[0]-t[0]),n}}function c(e,t){let r=new Set,i=e.length;if(i<t)return i>0&&r.add(n(e,0,i)),r;let a=i-t+1;for(let i=0;i<a;i++)r.add(n(e,i,t));return r}function h(e){let t=new Uint32Array(e.size),n=0;for(let r of e)t[n++]=r;return t}function m(e,t){let n;return n=e?.workers===void 0||e?.workers===\"auto\"?t:e.workers,{threshold:e?.threshold??.8,ngramSize:e?.ngramSize??3,numPermutations:e?.numPermutations??128,numBands:e?.numBands??32,bandSize:e?.bandSize??4,workers:n,batchSize:e?.batchSize??1e3,preprocess:e?.preprocess??null,seed:e?.seed??42}}async function d(e,t,n){let r,i=performance.now(),o=e.length;if(o<2)return{pairs:[],groups:[],stats:{totalTimeMs:performance.now()-i,inputCount:o,candidateCount:0,duplicateCount:0,groupCount:0}};let{hashA:s,hashB:c}=a(t.numPermutations,t.seed),h=f(r=n&&t.workers>0&&o>=5e3?await n(e,t.ngramSize,t.numPermutations,s,c,t.workers,t.batchSize):u(e,t.ngramSize,t.numPermutations,s,c,t.preprocess),o,t.numPermutations,t.numBands,t.bandSize),m=[],d=new _(o),g=new Set;for(let e of h){let[n,i]=p(e,o),a=l(r,n,i,t.numPermutations);a>=t.threshold&&(m.push({indexA:n,indexB:i,similarity:a}),d.union(n,i),g.add(n),g.add(i))}m.sort((e,t)=>t.similarity-e.similarity);let w=d.getGroups(g).map(e=>({indices:e}));return{pairs:m,groups:w,stats:{totalTimeMs:performance.now()-i,inputCount:o,candidateCount:h.size,duplicateCount:m.length,groupCount:w.length}}}function g(e,t,n,r,i,a){let o=e;t.preprocess&&(o=t.preprocess(o));let u=h(c(o,t.ngramSize)),l=new Uint32Array(t.numPermutations);s(u,r,i,t.numPermutations,l,0);let f=[];for(let e=0;e<a;e++){let r=e*t.numPermutations,i=0;for(let e=0;e<t.numPermutations;e++)l[e]===n[r+e]&&i++;let a=i/t.numPermutations;a>=t.threshold&&f.push({indexA:-1,indexB:e,similarity:a})}return f.sort((e,t)=>t.similarity-e.similarity),f}var w=t.nC,x=t.u5,b=t.aW,k=t.vm,S=t._M,y=t.eL,P=t.eE,v=t.Wh,C=t.wQ,A=t.g3,z=t.sB,M=t.$i,U=t.HP,B=t.Bu;export{w as UnionFind,x as computeAllSignatures,b as computeSignature,k as decodePair,S as estimateSimilarity,y as executePipeline,P as findCandidates,v as fnv1aString,C as fnv1aUint32,A as generateHashCoefficients,z as querySignatures,M as resolveOptions,U as shingle,B as shingleToArray};\n//# sourceMappingURL=index.js.map","/**\n * Web Worker for browser-based parallel MinHash computation.\n * Receives string batches, computes signatures, posts results back.\n */\nimport { computeAllSignatures } from \"@mattgrill/nearline-core\";\n\ninterface WorkerInput {\n strings: string[];\n startIndex: number;\n ngramSize: number;\n numPermutations: number;\n hashA: number[];\n hashB: number[];\n}\n\nself.onmessage = (event: MessageEvent<WorkerInput>) => {\n const { strings, startIndex, ngramSize, numPermutations, hashA, hashB } = event.data;\n\n const signatures = computeAllSignatures(\n strings,\n ngramSize,\n numPermutations,\n new Uint32Array(hashA),\n new Uint32Array(hashB),\n null\n );\n\n self.postMessage(\n {\n signatures: signatures.buffer,\n startIndex,\n count: strings.length,\n },\n { transfer: [signatures.buffer] }\n );\n};\n"],"names":["Object","i","Math","Uint32Array","s","u","l","f","Set","Map","Uint8Array","performance","self","event","strings","startIndex","ngramSize","numPermutations","hashA","hashB","signatures","computeAllSignatures"],"mappings":"mBAAA,IAAI,EAAE,CAAC,CAAE,GAAE,CAAC,CAAC,CAAC,EAAE,KAAK,IAAI,IAAI,KAAK,EAAE,EAAE,CAAC,CAAC,EAAE,IAAI,CAAC,EAAE,CAAC,CAAC,EAAE,IAAIA,OAAO,cAAc,CAAC,EAAE,EAAE,CAAC,WAAW,CAAC,EAAE,IAAI,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,IAAIA,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAAC,EAAE,GAAG,IAAI,EAAE,CAAC,EAAE,SAAS,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,EAAE,MAAM,EAAE,IAAIC,EAAE,WAAW,EAAE,EAAE,EAAE,IAAI,IAAI,EAAE,EAAE,EAAE,EAAE,IAAIA,GAAG,EAAE,UAAU,CAAC,GAAGA,EAAEC,KAAK,IAAI,CAACD,EAAE,WAAW,OAAOA,IAAI,CAAC,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,WAAWA,EAAE,EAAE,EAAE,IAAI,IAAI,EAAE,EAAE,EAAEA,EAAE,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC,EAAE,AAAC,IAAG,IAAI,EAAE,EAAEC,KAAK,IAAI,CAAC,EAAEA,KAAK,IAAI,CAAC,EAAEA,KAAK,IAAI,CAAC,EAAEA,KAAK,IAAI,CAAC,EAAE,WAAW,IAAI,EAAE,IAAI,WAAW,IAAI,GAAG,IAAI,WAAW,IAAI,GAAG,IAAI,UAAU,CAAC,OAAO,IAAI,CAAC,CAAC,SAASD,EAAE,CAAC,EAAE,OAAO,GAAG,GAAG,GAAG,GAAG,IAAI,GAAG,AAAC,IAAG,GAAG,KAAK,CAAC,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,IAAIE,YAAY,GAAG,EAAE,IAAIA,YAAY,GAAG,EAAE,IAAI,EAAE,EAAE,EAAE,IAAI,IAAI,EAAE,EAAE,EAAE,EAAE,IAAI,EAAEF,EAAE,GAAG,CAAC,CAAC,EAAE,CAAC,EAAE,EAAEA,EAAE,GAAG,CAAC,CAAC,EAAE,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE,MAAM,AAAC,CAAC,GAAEC,KAAK,IAAI,CAAC,AAAC,GAAEA,KAAK,IAAI,CAAC,EAAE,EAAE,WAAU,EAAG,IAAI,GAAG,WAAU,EAAG,EAAE,IAAI,EAAC,IAAK,CAAC,CAAC,SAASE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAACH,CAAC,CAAC,CAAC,EAAE,IAAI,IAAI,EAAE,EAAE,EAAE,EAAE,IAAIA,CAAC,CAAC,EAAE,EAAE,CAAC,WAAW,IAAIG,EAAE,EAAE,MAAM,CAAC,GAAG,IAAIA,EAAE,IAAI,IAAIC,EAAE,EAAEA,EAAED,EAAEC,IAAI,CAAC,IAAID,EAAE,CAAC,CAACC,EAAE,CAAC,IAAI,IAAI,EAAE,EAAE,EAAE,EAAE,IAAI,CAAC,IAAI,EAAE,EAAED,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAE,GAAEH,CAAC,CAAC,EAAE,EAAE,EAAGA,CAAAA,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,SAASI,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAACJ,CAAC,CAAC,CAAC,CAACG,CAAC,EAAE,IAAIC,EAAE,EAAE,MAAM,CAACC,EAAE,IAAIH,YAAYE,EAAE,GAAG,IAAI,IAAIE,EAAE,EAAEA,EAAEF,EAAEE,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,CAACN,CAAC,CAAC,CAAC,CAACG,CAAC,CAACC,CAAC,EAAE,IAAI,IAAI,EAAE,EAAE,EAAE,EAAE,IAAID,CAAC,CAACC,EAAE,EAAE,CAAC,WAAW,IAAIC,EAAE,EAAE,MAAM,CAAC,GAAG,IAAIA,EAAE,OAAO,GAAGA,EAAE,EAAE,CAAC,IAAI,EAAE,EAAE,EAAE,EAAEA,GAAG,IAAI,IAAI,EAAE,EAAE,EAAE,EAAE,IAAIF,CAAC,CAACC,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,CAACJ,CAAC,CAAC,EAAE,EAAE,MAAM,CAAC,IAAIM,EAAED,EAAE,EAAE,EAAE,IAAI,IAAIA,EAAE,EAAEA,EAAEC,EAAED,IAAI,CAAC,IAAIC,EAAE,EAAE,EAAED,EAAE,GAAG,IAAI,IAAI,EAAE,EAAE,EAAE,EAAE,IAAI,CAAC,IAAI,EAAE,EAAEC,EAAE,CAAC,CAAC,EAAE,CAACN,CAAC,CAAC,EAAE,CAAE,GAAEG,CAAC,CAACC,EAAE,EAAE,EAAGD,CAAAA,CAAC,CAACC,EAAE,EAAE,CAAC,EAAE,CAAC,CAAC,EAAED,EAAEA,EAAE,CAAC,CAACG,EAAE,EAAE,CAAC,CAACA,EAAE,CAAC,EAAEN,EAAE,EAAE,EAAEK,EAAEC,EAAE,GAAG,OAAOD,CAAC,CAAC,SAASA,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,IAAIL,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,IAAI,IAAI,EAAE,EAAE,EAAE,EAAE,IAAI,CAAC,CAACA,EAAE,EAAE,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,IAAI,OAAO,EAAE,CAAC,CAAC,SAASM,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAACN,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,IAAIO,IAAI,IAAI,IAAI,EAAE,EAAE,EAAEP,EAAE,IAAI,CAAC,IAAIA,EAAE,EAAE,EAAE,EAAE,IAAIQ,IAAI,IAAI,IAAI,EAAE,EAAE,EAAE,EAAE,IAAI,CAAC,IAAI,EAAE,EAAE,EAAE,EAAE,EAAER,EAAE,GAAGG,EAAE,EAAE,GAAG,CAAC,EAAGA,CAAAA,GAAIA,CAAAA,EAAE,EAAE,CAAC,EAAE,GAAG,CAAC,EAAEA,EAAC,EAAGA,EAAE,IAAI,CAAC,EAAE,CAAC,IAAI,IAAI,KAAK,EAAE,MAAM,GAAG,GAAG,CAAE,GAAE,MAAM,CAAC,GAAG,IAAI,IAAI,EAAE,EAAE,EAAE,EAAE,MAAM,CAAC,IAAI,IAAI,IAAI,EAAE,EAAE,EAAE,EAAE,EAAE,MAAM,CAAC,IAAW,EAAE,GAAG,CAAE,AAAE,CAAC,CAAC,EAAE,CAAG,EAAE,CAAC,CAAC,EAAE,CAAG,CAAC,OAAO,CAAC,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,EAAE,MAAM,CAAC,EAAE,EAAE,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,CAAC,EAAE,CAAC,GAAG,IAAI,EAAE,GAAG,IAAI,EAAE,GAAG,IAAI,EAAE,GAAG,IAAIG,EAAE,GAAG,IAAI,EAAE,GAAG,IAAI,EAAE,GAAG,IAAI,EAAE,GAAG,IAAIF,EAAE,GAAG,IAAI,EAAE,GAAG,IAAI,EAAE,GAAG,IAAIC,EAAE,GAAG,IAAIF,EAAE,GAAG,IAAI,EAAE,GAAG,IAAI,CAAC,EAAG,OAAM,EAAE,MAAO,KAAK,aAAY,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,IAAID,YAAY,GAAG,IAAI,CAAC,IAAI,CAAC,IAAIO,WAAW,GAAG,IAAI,IAAI,EAAE,EAAE,EAAE,EAAE,IAAI,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,EAAE,EAAE,KAAK,IAAI,CAAC,MAAM,CAAC,EAAE,GAAG,GAAG,EAAE,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC,KAAK,IAAI,CAAC,MAAM,CAAC,EAAE,GAAG,GAAG,CAAC,IAAI,EAAE,IAAI,CAAC,MAAM,CAAC,EAAE,AAAC,KAAI,CAAC,MAAM,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,IAAI,CAAC,IAAI,CAAC,EAAG,KAAI,GAAI,KAAI,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC,EAAE,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC,EAAG,KAAI,CAAC,MAAM,CAAC,EAAE,CAAC,EAAE,IAAI,CAAC,IAAI,CAAC,EAAE,EAAC,CAAC,CAAE,CAAC,UAAU,CAAC,CAAC,CAAC,IAAI,EAAE,IAAID,IAAI,IAAI,IAAI,KAAK,EAAE,CAAC,IAAI,EAAE,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,EAAE,GAAG,CAAC,EAAG,IAAI,GAAE,EAAE,CAAC,EAAE,GAAG,CAAC,EAAE,EAAC,EAAG,EAAE,IAAI,CAAC,EAAE,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,IAAI,KAAK,EAAE,MAAM,GAAG,EAAE,MAAM,CAAC,GAAI,GAAE,IAAI,CAAC,CAAC,EAAE,IAAI,EAAE,GAAG,EAAE,IAAI,CAAC,EAAC,EAAG,OAAO,EAAE,IAAI,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,IAAID,IAAIP,EAAE,EAAE,MAAM,CAAC,GAAGA,EAAE,EAAE,OAAOA,EAAE,GAAG,EAAE,GAAG,CAAC,EAAE,EAAE,EAAEA,IAAI,EAAE,IAAI,EAAEA,EAAE,EAAE,EAAE,IAAI,IAAIA,EAAE,EAAEA,EAAE,EAAEA,IAAI,EAAE,GAAG,CAAC,EAAE,EAAEA,EAAE,IAAI,OAAO,CAAC,CAAC,SAAS,EAAE,CAAC,EAAE,IAAI,EAAE,IAAIE,YAAY,EAAE,IAAI,EAAE,EAAE,EAAE,IAAI,IAAI,KAAK,EAAE,CAAC,CAAC,IAAI,CAAC,EAAE,OAAO,CAAC,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,GAAG,UAAU,KAAK,GAAG,GAAG,UAAU,OAAO,EAAE,EAAE,OAAO,CAAC,CAAC,UAAU,GAAG,WAAW,GAAG,UAAU,GAAG,WAAW,EAAE,gBAAgB,GAAG,iBAAiB,IAAI,SAAS,GAAG,UAAU,GAAG,SAAS,GAAG,UAAU,EAAE,QAAQ,EAAE,UAAU,GAAG,WAAW,IAAI,WAAW,GAAG,YAAY,KAAK,KAAK,GAAG,MAAM,EAAE,CAAC,CAAC,eAAe,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,IAAI,EAAEF,EAAEU,YAAY,GAAG,GAAG,EAAE,EAAE,MAAM,CAAC,GAAG,EAAE,EAAE,MAAM,CAAC,MAAM,EAAE,CAAC,OAAO,EAAE,CAAC,MAAM,CAAC,YAAYA,YAAY,GAAG,GAAGV,EAAE,WAAW,EAAE,eAAe,EAAE,eAAe,EAAE,WAAW,CAAC,CAAC,EAAE,GAAG,CAAC,MAAMG,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,eAAe,CAAC,EAAE,IAAI,EAAE,EAAEG,EAAE,EAAE,GAAG,EAAE,OAAO,CAAC,GAAG,GAAG,IAAI,MAAM,EAAE,EAAE,EAAE,SAAS,CAAC,EAAE,eAAe,CAACH,EAAE,EAAE,EAAE,OAAO,CAAC,EAAE,SAAS,EAAEC,EAAE,EAAE,EAAE,SAAS,CAAC,EAAE,eAAe,CAACD,EAAE,EAAE,EAAE,UAAU,EAAE,EAAE,EAAE,eAAe,CAAC,EAAE,QAAQ,CAAC,EAAE,QAAQ,EAAE,EAAE,EAAE,CAAC,EAAE,IAAI,EAAE,GAAG,EAAE,IAAII,IAAI,IAAI,IAAI,KAAK,EAAE,CAAC,GAAG,CAAC,EAAEP,EAAE,CAAC,EAAE,EAAE,GAAG,EAAEK,EAAE,EAAE,EAAEL,EAAE,EAAE,eAAe,CAAE,IAAG,EAAE,SAAS,EAAG,GAAE,IAAI,CAAC,CAAC,OAAO,EAAE,OAAOA,EAAE,WAAW,CAAC,GAAG,EAAE,KAAK,CAAC,EAAEA,GAAG,EAAE,GAAG,CAAC,GAAG,EAAE,GAAG,CAACA,EAAC,CAAE,CAAC,EAAE,IAAI,CAAC,CAAC,EAAE,IAAI,EAAE,UAAU,CAAC,EAAE,UAAU,EAAE,IAAI,EAAE,EAAE,SAAS,CAAC,GAAG,GAAG,CAAC,GAAI,EAAC,QAAQ,CAAC,IAAI,MAAM,CAAC,MAAM,EAAE,OAAO,EAAE,MAAM,CAAC,YAAYU,YAAY,GAAG,GAAGV,EAAE,WAAW,EAAE,eAAe,EAAE,IAAI,CAAC,eAAe,EAAE,MAAM,CAAC,WAAW,EAAE,MAAM,CAAC,CAAC,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAACA,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,CAAE,GAAE,UAAU,EAAG,GAAE,EAAE,UAAU,CAAC,EAAC,EAAG,IAAII,EAAE,EAAE,EAAE,EAAE,EAAE,SAAS,GAAGC,EAAE,IAAIH,YAAY,EAAE,eAAe,EAAEC,EAAEC,EAAE,EAAEJ,EAAE,EAAE,eAAe,CAACK,EAAE,GAAG,IAAIC,EAAE,EAAE,CAAC,IAAI,IAAI,EAAE,EAAE,EAAE,EAAE,IAAI,CAAC,IAAI,EAAE,EAAE,EAAE,eAAe,CAACN,EAAE,EAAE,IAAI,IAAI,EAAE,EAAE,EAAE,EAAE,eAAe,CAAC,IAAIK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,EAAE,EAAE,EAAEL,IAAI,IAAI,EAAEA,EAAE,EAAE,eAAe,AAAC,IAAG,EAAE,SAAS,EAAEM,EAAE,IAAI,CAAC,CAAC,OAAO,GAAG,OAAO,EAAE,WAAW,CAAC,EAAE,CAAC,OAAOA,EAAE,IAAI,CAAC,CAAC,EAAE,IAAI,EAAE,UAAU,CAAC,EAAE,UAAU,EAAEA,CAAC,CAAO,EAAE,EAAE,CAAV,IAAW,EAAE,EAAE,EAAE,AAAG,GAAE,EAAE,CAAG,EAAE,EAAE,CAAG,EAAE,EAAE,CAAG,EAAE,EAAE,CAAG,EAAE,EAAE,CAAG,EAAE,EAAE,CAAG,EAAE,EAAE,CAAG,EAAE,EAAE,CAAG,EAAE,EAAE,CAAG,EAAE,EAAE,CAAG,EAAE,EAAE,CAAG,EAAE,EAAE,CCe95IK,KAAK,SAAS,CAAG,AAACC,IAChB,GAAM,CAAEC,QAAAA,CAAO,CAAEC,WAAAA,CAAU,CAAEC,UAAAA,CAAS,CAAEC,gBAAAA,CAAe,CAAEC,MAAAA,CAAK,CAAEC,MAAAA,CAAK,CAAE,CAAGN,EAAM,IAAI,CAE9EO,EAAaC,EACjBP,EACAE,EACAC,EACA,IAAId,YAAYe,GAChB,IAAIf,YAAYgB,GAChB,MAGFP,KAAK,WAAW,CACd,CACE,WAAYQ,EAAW,MAAM,CAC7BL,WAAAA,EACA,MAAOD,EAAQ,MAAM,AACvB,EACA,CAAE,SAAU,CAACM,EAAW,MAAM,CAAC,AAAC,EAEpC,E"}
|
|
@@ -0,0 +1,2 @@
|
|
|
1
|
+
var t={};t.d=(e,n)=>{for(var r in n)t.o(n,r)&&!t.o(e,r)&&Object.defineProperty(e,r,{enumerable:!0,get:n[r]})},t.o=(t,e)=>Object.prototype.hasOwnProperty.call(t,e);var e={};function n(t,e=0,r=t.length){let i=0x811c9dc5,o=e+r;for(let n=e;n<o;n++)i^=t.charCodeAt(n),i=Math.imul(i,0x1000193);return i>>>0}function r(t,e,n){let r=0x811c9dc5,i=e+n;for(let n=e;n<i;n++){let e=t[n];r^=255&e,r=Math.imul(r=Math.imul(r=Math.imul(r=Math.imul(r,0x1000193)^e>>>8&255,0x1000193)^e>>>16&255,0x1000193)^e>>>24&255,0x1000193)}return r>>>0}function i(t){return t^=t<<13,t^=t>>>17,(t^=t<<5)>>>0}function o(t,e){let n=new Uint32Array(t),r=new Uint32Array(t),o=0===e?1:e;for(let e=0;e<t;e++)o=i(o),n[e]=o,o=i(o),r[e]=o;return{hashA:n,hashB:r}}function a(t,e,n){let r=t;return((r=Math.imul((r=Math.imul(r^e,0x85ebca6b))^r>>>13,0xc2b2ae35))^n^r>>>16)>>>0}function s(t,e,n,r,i,o){for(let t=0;t<r;t++)i[o+t]=0xffffffff;let s=t.length;if(0!==s)for(let u=0;u<s;u++){let s=t[u];for(let t=0;t<r;t++){let r=a(s,e[t],n[t]);r<i[o+t]&&(i[o+t]=r)}}}function u(t,e,r,i,o,s){let u=t.length,l=new Uint32Array(u*r);for(let f=0;f<u;f++)!function(t,e,r,i,o,s,u){for(let t=0;t<o;t++)s[u+t]=0xffffffff;let l=t.length;if(0===l)return;if(l<e){let e=n(t,0,l);for(let t=0;t<o;t++)s[u+t]=a(e,r[t],i[t]);return}let f=l-e+1;for(let l=0;l<f;l++){let f=n(t,l,e);for(let t=0;t<o;t++){let e=a(f,r[t],i[t]);e<s[u+t]&&(s[u+t]=e)}}}(s?s(t[f]):t[f],e,i,o,r,l,f*r);return l}function l(t,e,n,r){let i=e*r,o=n*r,a=0;for(let e=0;e<r;e++)t[i+e]===t[o+e]&&a++;return a/r}function f(t,e,n,i,o){let a=new Set;for(let s=0;s<i;s++){let i=s*o,u=new Map;for(let a=0;a<e;a++){let e=r(t,a*n+i,o),s=u.get(e);s||(s=[],u.set(e,s)),s.push(a)}for(let t of u.values())if(!(t.length<2))for(let n=0;n<t.length;n++)for(let r=n+1;r<t.length;r++)a.add(t[n]*e+t[r])}return a}function h(t,e){return[t/e|0,t%e]}t.d(e,{HP:()=>p,sB:()=>w,wQ:()=>r,eE:()=>f,$i:()=>d,vm:()=>h,eL:()=>g,u5:()=>u,Wh:()=>n,g3:()=>o,_M:()=>l,aW:()=>s,Bu:()=>c,nC:()=>m});class m{parent;rank;constructor(t){this.parent=new Uint32Array(t),this.rank=new Uint8Array(t);for(let e=0;e<t;e++)this.parent[e]=e}find(t){let e=t;for(;this.parent[e]!==e;)e=this.parent[e];for(;this.parent[t]!==e;){let n=this.parent[t];this.parent[t]=e,t=n}return e}union(t,e){let n=this.find(t),r=this.find(e);n!==r&&(this.rank[n]<this.rank[r]?this.parent[n]=r:this.rank[n]>this.rank[r]?this.parent[r]=n:(this.parent[r]=n,this.rank[n]++))}getGroups(t){let e=new Map;for(let n of t){let t=this.find(n),r=e.get(t);r||(r=[],e.set(t,r)),r.push(n)}let n=[];for(let t of e.values())t.length>1&&(t.sort((t,e)=>t-e),n.push(t));return n.sort((t,e)=>t[0]-e[0]),n}}function p(t,e){let r=new Set,i=t.length;if(i<e)return i>0&&r.add(n(t,0,i)),r;let o=i-e+1;for(let i=0;i<o;i++)r.add(n(t,i,e));return r}function c(t){let e=new Uint32Array(t.size),n=0;for(let r of t)e[n++]=r;return e}function d(t,e){let n;return n=t?.workers===void 0||t?.workers==="auto"?e:t.workers,{threshold:t?.threshold??.8,ngramSize:t?.ngramSize??3,numPermutations:t?.numPermutations??128,numBands:t?.numBands??32,bandSize:t?.bandSize??4,workers:n,batchSize:t?.batchSize??1e3,preprocess:t?.preprocess??null,seed:t?.seed??42}}async function g(t,e,n){let r,i=performance.now(),a=t.length;if(a<2)return{pairs:[],groups:[],stats:{totalTimeMs:performance.now()-i,inputCount:a,candidateCount:0,duplicateCount:0,groupCount:0}};let{hashA:s,hashB:p}=o(e.numPermutations,e.seed),c=f(r=n&&e.workers>0&&a>=5e3?await n(t,e.ngramSize,e.numPermutations,s,p,e.workers,e.batchSize):u(t,e.ngramSize,e.numPermutations,s,p,e.preprocess),a,e.numPermutations,e.numBands,e.bandSize),d=[],g=new m(a),w=new Set;for(let t of c){let[n,i]=h(t,a),o=l(r,n,i,e.numPermutations);o>=e.threshold&&(d.push({indexA:n,indexB:i,similarity:o}),g.union(n,i),w.add(n),w.add(i))}d.sort((t,e)=>e.similarity-t.similarity);let y=g.getGroups(w).map(t=>({indices:t}));return{pairs:d,groups:y,stats:{totalTimeMs:performance.now()-i,inputCount:a,candidateCount:c.size,duplicateCount:d.length,groupCount:y.length}}}function w(t,e,n,r,i,o){let a=t;e.preprocess&&(a=e.preprocess(a));let u=c(p(a,e.ngramSize)),l=new Uint32Array(e.numPermutations);s(u,r,i,e.numPermutations,l,0);let f=[];for(let t=0;t<o;t++){let r=t*e.numPermutations,i=0;for(let t=0;t<e.numPermutations;t++)l[t]===n[r+t]&&i++;let o=i/e.numPermutations;o>=e.threshold&&f.push({indexA:-1,indexB:t,similarity:o})}return f.sort((t,e)=>e.similarity-t.similarity),f}e.nC;var y=e.u5;e.aW,e.vm,e._M,e.eL,e.eE,e.Wh,e.wQ,e.g3,e.sB,e.$i,e.HP,e.Bu,self.onmessage=t=>{let{strings:e,startIndex:n,ngramSize:r,numPermutations:i,hashA:o,hashB:a}=t.data,s=y(e,r,i,new Uint32Array(o),new Uint32Array(a),null);self.postMessage({signatures:s.buffer,startIndex:n,count:e.length},{transfer:[s.buffer]})};
|
|
2
|
+
//# sourceMappingURL=minhash-worker.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"minhash-worker.js","sources":["webpack://@mattgrill/nearline-web/../core/dist/index.js","webpack://@mattgrill/nearline-web/./src/workers/minhash-worker.ts"],"sourcesContent":["var e={};e.d=(t,n)=>{for(var r in n)e.o(n,r)&&!e.o(t,r)&&Object.defineProperty(t,r,{enumerable:!0,get:n[r]})},e.o=(e,t)=>Object.prototype.hasOwnProperty.call(e,t);var t={};function n(e,t=0,r=e.length){let i=0x811c9dc5,a=t+r;for(let n=t;n<a;n++)i^=e.charCodeAt(n),i=Math.imul(i,0x1000193);return i>>>0}function r(e,t,n){let r=0x811c9dc5,i=t+n;for(let n=t;n<i;n++){let t=e[n];r^=255&t,r=Math.imul(r=Math.imul(r=Math.imul(r=Math.imul(r,0x1000193)^t>>>8&255,0x1000193)^t>>>16&255,0x1000193)^t>>>24&255,0x1000193)}return r>>>0}function i(e){return e^=e<<13,e^=e>>>17,(e^=e<<5)>>>0}function a(e,t){let n=new Uint32Array(e),r=new Uint32Array(e),a=0===t?1:t;for(let t=0;t<e;t++)a=i(a),n[t]=a,a=i(a),r[t]=a;return{hashA:n,hashB:r}}function o(e,t,n){let r=e;return((r=Math.imul((r=Math.imul(r^t,0x85ebca6b))^r>>>13,0xc2b2ae35))^n^r>>>16)>>>0}function s(e,t,n,r,i,a){for(let e=0;e<r;e++)i[a+e]=0xffffffff;let s=e.length;if(0!==s)for(let u=0;u<s;u++){let s=e[u];for(let e=0;e<r;e++){let r=o(s,t[e],n[e]);r<i[a+e]&&(i[a+e]=r)}}}function u(e,t,r,i,a,s){let u=e.length,l=new Uint32Array(u*r);for(let f=0;f<u;f++)!function(e,t,r,i,a,s,u){for(let e=0;e<a;e++)s[u+e]=0xffffffff;let l=e.length;if(0===l)return;if(l<t){let t=n(e,0,l);for(let e=0;e<a;e++)s[u+e]=o(t,r[e],i[e]);return}let f=l-t+1;for(let l=0;l<f;l++){let f=n(e,l,t);for(let e=0;e<a;e++){let t=o(f,r[e],i[e]);t<s[u+e]&&(s[u+e]=t)}}}(s?s(e[f]):e[f],t,i,a,r,l,f*r);return l}function l(e,t,n,r){let i=t*r,a=n*r,o=0;for(let t=0;t<r;t++)e[i+t]===e[a+t]&&o++;return o/r}function f(e,t,n,i,a){let o=new Set;for(let u=0;u<i;u++){let i=u*a,l=new Map;for(let o=0;o<t;o++){let t=r(e,o*n+i,a),s=l.get(t);s||(s=[],l.set(t,s)),s.push(o)}for(let e of l.values())if(!(e.length<2))for(let n=0;n<e.length;n++)for(let r=n+1;r<e.length;r++){var s;o.add((s=e[n],s*t+e[r]))}}return o}function p(e,t){return[e/t|0,e%t]}e.d(t,{HP:()=>c,sB:()=>g,wQ:()=>r,eE:()=>f,$i:()=>m,vm:()=>p,eL:()=>d,u5:()=>u,Wh:()=>n,g3:()=>a,_M:()=>l,aW:()=>s,Bu:()=>h,nC:()=>_});class _{parent;rank;constructor(e){this.parent=new Uint32Array(e),this.rank=new Uint8Array(e);for(let t=0;t<e;t++)this.parent[t]=t}find(e){let t=e;for(;this.parent[t]!==t;)t=this.parent[t];for(;this.parent[e]!==t;){let n=this.parent[e];this.parent[e]=t,e=n}return t}union(e,t){let n=this.find(e),r=this.find(t);n!==r&&(this.rank[n]<this.rank[r]?this.parent[n]=r:this.rank[n]>this.rank[r]?this.parent[r]=n:(this.parent[r]=n,this.rank[n]++))}getGroups(e){let t=new Map;for(let n of e){let e=this.find(n),r=t.get(e);r||(r=[],t.set(e,r)),r.push(n)}let n=[];for(let e of t.values())e.length>1&&(e.sort((e,t)=>e-t),n.push(e));return n.sort((e,t)=>e[0]-t[0]),n}}function c(e,t){let r=new Set,i=e.length;if(i<t)return i>0&&r.add(n(e,0,i)),r;let a=i-t+1;for(let i=0;i<a;i++)r.add(n(e,i,t));return r}function h(e){let t=new Uint32Array(e.size),n=0;for(let r of e)t[n++]=r;return t}function m(e,t){let n;return n=e?.workers===void 0||e?.workers===\"auto\"?t:e.workers,{threshold:e?.threshold??.8,ngramSize:e?.ngramSize??3,numPermutations:e?.numPermutations??128,numBands:e?.numBands??32,bandSize:e?.bandSize??4,workers:n,batchSize:e?.batchSize??1e3,preprocess:e?.preprocess??null,seed:e?.seed??42}}async function d(e,t,n){let r,i=performance.now(),o=e.length;if(o<2)return{pairs:[],groups:[],stats:{totalTimeMs:performance.now()-i,inputCount:o,candidateCount:0,duplicateCount:0,groupCount:0}};let{hashA:s,hashB:c}=a(t.numPermutations,t.seed),h=f(r=n&&t.workers>0&&o>=5e3?await n(e,t.ngramSize,t.numPermutations,s,c,t.workers,t.batchSize):u(e,t.ngramSize,t.numPermutations,s,c,t.preprocess),o,t.numPermutations,t.numBands,t.bandSize),m=[],d=new _(o),g=new Set;for(let e of h){let[n,i]=p(e,o),a=l(r,n,i,t.numPermutations);a>=t.threshold&&(m.push({indexA:n,indexB:i,similarity:a}),d.union(n,i),g.add(n),g.add(i))}m.sort((e,t)=>t.similarity-e.similarity);let w=d.getGroups(g).map(e=>({indices:e}));return{pairs:m,groups:w,stats:{totalTimeMs:performance.now()-i,inputCount:o,candidateCount:h.size,duplicateCount:m.length,groupCount:w.length}}}function g(e,t,n,r,i,a){let o=e;t.preprocess&&(o=t.preprocess(o));let u=h(c(o,t.ngramSize)),l=new Uint32Array(t.numPermutations);s(u,r,i,t.numPermutations,l,0);let f=[];for(let e=0;e<a;e++){let r=e*t.numPermutations,i=0;for(let e=0;e<t.numPermutations;e++)l[e]===n[r+e]&&i++;let a=i/t.numPermutations;a>=t.threshold&&f.push({indexA:-1,indexB:e,similarity:a})}return f.sort((e,t)=>t.similarity-e.similarity),f}var w=t.nC,x=t.u5,b=t.aW,k=t.vm,S=t._M,y=t.eL,P=t.eE,v=t.Wh,C=t.wQ,A=t.g3,z=t.sB,M=t.$i,U=t.HP,B=t.Bu;export{w as UnionFind,x as computeAllSignatures,b as computeSignature,k as decodePair,S as estimateSimilarity,y as executePipeline,P as findCandidates,v as fnv1aString,C as fnv1aUint32,A as generateHashCoefficients,z as querySignatures,M as resolveOptions,U as shingle,B as shingleToArray};\n//# sourceMappingURL=index.js.map","/**\n * Web Worker for browser-based parallel MinHash computation.\n * Receives string batches, computes signatures, posts results back.\n */\nimport { computeAllSignatures } from \"@mattgrill/nearline-core\";\n\ninterface WorkerInput {\n strings: string[];\n startIndex: number;\n ngramSize: number;\n numPermutations: number;\n hashA: number[];\n hashB: number[];\n}\n\nself.onmessage = (event: MessageEvent<WorkerInput>) => {\n const { strings, startIndex, ngramSize, numPermutations, hashA, hashB } = event.data;\n\n const signatures = computeAllSignatures(\n strings,\n ngramSize,\n numPermutations,\n new Uint32Array(hashA),\n new Uint32Array(hashB),\n null\n );\n\n self.postMessage(\n {\n signatures: signatures.buffer,\n startIndex,\n count: strings.length,\n },\n { transfer: [signatures.buffer] }\n );\n};\n"],"names":["n","r","Object","i","Math","Uint32Array","s","u","l","f","Set","Map","Uint8Array","performance","self","event","strings","startIndex","ngramSize","numPermutations","hashA","hashB","signatures","computeAllSignatures"],"mappings":"AAAA,IAAI,EAAE,CAAC,CAAE,GAAE,CAAC,CAAC,CAAC,EAAEA,KAAK,IAAI,IAAIC,KAAKD,EAAE,EAAE,CAAC,CAACA,EAAEC,IAAI,CAAC,EAAE,CAAC,CAAC,EAAEA,IAAIC,OAAO,cAAc,CAAC,EAAED,EAAE,CAAC,WAAW,CAAC,EAAE,IAAID,CAAC,CAACC,EAAE,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,IAAIC,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAAC,EAAE,GAAG,IAAI,EAAE,CAAC,EAAE,SAASF,EAAE,CAAC,CAAC,EAAE,CAAC,CAACC,EAAE,EAAE,MAAM,EAAE,IAAIE,EAAE,WAAW,EAAE,EAAEF,EAAE,IAAI,IAAID,EAAE,EAAEA,EAAE,EAAEA,IAAIG,GAAG,EAAE,UAAU,CAACH,GAAGG,EAAEC,KAAK,IAAI,CAACD,EAAE,WAAW,OAAOA,IAAI,CAAC,CAAC,SAASF,EAAE,CAAC,CAAC,CAAC,CAACD,CAAC,EAAE,IAAIC,EAAE,WAAWE,EAAE,EAAEH,EAAE,IAAI,IAAIA,EAAE,EAAEA,EAAEG,EAAEH,IAAI,CAAC,IAAI,EAAE,CAAC,CAACA,EAAE,AAACC,CAAAA,GAAG,IAAI,EAAEA,EAAEG,KAAK,IAAI,CAACH,EAAEG,KAAK,IAAI,CAACH,EAAEG,KAAK,IAAI,CAACH,EAAEG,KAAK,IAAI,CAACH,EAAE,WAAW,IAAI,EAAE,IAAI,WAAW,IAAI,GAAG,IAAI,WAAW,IAAI,GAAG,IAAI,UAAU,CAAC,OAAOA,IAAI,CAAC,CAAC,SAASE,EAAE,CAAC,EAAE,OAAO,GAAG,GAAG,GAAG,GAAG,IAAI,GAAG,AAAC,IAAG,GAAG,KAAK,CAAC,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,EAAE,IAAIH,EAAE,IAAIK,YAAY,GAAGJ,EAAE,IAAII,YAAY,GAAG,EAAE,IAAI,EAAE,EAAE,EAAE,IAAI,IAAI,EAAE,EAAE,EAAE,EAAE,IAAI,EAAEF,EAAE,GAAGH,CAAC,CAAC,EAAE,CAAC,EAAE,EAAEG,EAAE,GAAGF,CAAC,CAAC,EAAE,CAAC,EAAE,MAAM,CAAC,MAAMD,EAAE,MAAMC,CAAC,CAAC,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,CAACD,CAAC,EAAE,IAAIC,EAAE,EAAE,MAAM,AAAC,CAACA,CAAAA,EAAEG,KAAK,IAAI,CAAC,AAACH,CAAAA,EAAEG,KAAK,IAAI,CAACH,EAAE,EAAE,WAAU,EAAGA,IAAI,GAAG,WAAU,EAAGD,EAAEC,IAAI,EAAC,IAAK,CAAC,CAAC,SAASK,EAAE,CAAC,CAAC,CAAC,CAACN,CAAC,CAACC,CAAC,CAACE,CAAC,CAAC,CAAC,EAAE,IAAI,IAAI,EAAE,EAAE,EAAEF,EAAE,IAAIE,CAAC,CAAC,EAAE,EAAE,CAAC,WAAW,IAAIG,EAAE,EAAE,MAAM,CAAC,GAAG,IAAIA,EAAE,IAAI,IAAIC,EAAE,EAAEA,EAAED,EAAEC,IAAI,CAAC,IAAID,EAAE,CAAC,CAACC,EAAE,CAAC,IAAI,IAAI,EAAE,EAAE,EAAEN,EAAE,IAAI,CAAC,IAAIA,EAAE,EAAEK,EAAE,CAAC,CAAC,EAAE,CAACN,CAAC,CAAC,EAAE,CAAEC,CAAAA,EAAEE,CAAC,CAAC,EAAE,EAAE,EAAGA,CAAAA,CAAC,CAAC,EAAE,EAAE,CAACF,CAAAA,CAAE,CAAC,CAAC,CAAC,SAASM,EAAE,CAAC,CAAC,CAAC,CAACN,CAAC,CAACE,CAAC,CAAC,CAAC,CAACG,CAAC,EAAE,IAAIC,EAAE,EAAE,MAAM,CAACC,EAAE,IAAIH,YAAYE,EAAEN,GAAG,IAAI,IAAIQ,EAAE,EAAEA,EAAEF,EAAEE,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,CAACR,CAAC,CAACE,CAAC,CAAC,CAAC,CAACG,CAAC,CAACC,CAAC,EAAE,IAAI,IAAI,EAAE,EAAE,EAAE,EAAE,IAAID,CAAC,CAACC,EAAE,EAAE,CAAC,WAAW,IAAIC,EAAE,EAAE,MAAM,CAAC,GAAG,IAAIA,EAAE,OAAO,GAAGA,EAAE,EAAE,CAAC,IAAI,EAAER,EAAE,EAAE,EAAEQ,GAAG,IAAI,IAAI,EAAE,EAAE,EAAE,EAAE,IAAIF,CAAC,CAACC,EAAE,EAAE,CAAC,EAAE,EAAEN,CAAC,CAAC,EAAE,CAACE,CAAC,CAAC,EAAE,EAAE,MAAM,CAAC,IAAIM,EAAED,EAAE,EAAE,EAAE,IAAI,IAAIA,EAAE,EAAEA,EAAEC,EAAED,IAAI,CAAC,IAAIC,EAAET,EAAE,EAAEQ,EAAE,GAAG,IAAI,IAAI,EAAE,EAAE,EAAE,EAAE,IAAI,CAAC,IAAI,EAAE,EAAEC,EAAER,CAAC,CAAC,EAAE,CAACE,CAAC,CAAC,EAAE,CAAE,GAAEG,CAAC,CAACC,EAAE,EAAE,EAAGD,CAAAA,CAAC,CAACC,EAAE,EAAE,CAAC,EAAE,CAAC,CAAC,EAAED,EAAEA,EAAE,CAAC,CAACG,EAAE,EAAE,CAAC,CAACA,EAAE,CAAC,EAAEN,EAAE,EAAEF,EAAEO,EAAEC,EAAER,GAAG,OAAOO,CAAC,CAAC,SAASA,EAAE,CAAC,CAAC,CAAC,CAACR,CAAC,CAACC,CAAC,EAAE,IAAIE,EAAE,EAAEF,EAAE,EAAED,EAAEC,EAAE,EAAE,EAAE,IAAI,IAAI,EAAE,EAAE,EAAEA,EAAE,IAAI,CAAC,CAACE,EAAE,EAAE,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,IAAI,OAAO,EAAEF,CAAC,CAAC,SAASQ,EAAE,CAAC,CAAC,CAAC,CAACT,CAAC,CAACG,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,IAAIO,IAAI,IAAI,IAAI,EAAE,EAAE,EAAEP,EAAE,IAAI,CAAC,IAAIA,EAAE,EAAE,EAAE,EAAE,IAAIQ,IAAI,IAAI,IAAI,EAAE,EAAE,EAAE,EAAE,IAAI,CAAC,IAAI,EAAEV,EAAE,EAAE,EAAED,EAAEG,EAAE,GAAGG,EAAE,EAAE,GAAG,CAAC,EAAGA,CAAAA,GAAIA,CAAAA,EAAE,EAAE,CAAC,EAAE,GAAG,CAAC,EAAEA,EAAC,EAAGA,EAAE,IAAI,CAAC,EAAE,CAAC,IAAI,IAAI,KAAK,EAAE,MAAM,GAAG,GAAG,CAAE,GAAE,MAAM,CAAC,GAAG,IAAI,IAAIN,EAAE,EAAEA,EAAE,EAAE,MAAM,CAACA,IAAI,IAAI,IAAIC,EAAED,EAAE,EAAEC,EAAE,EAAE,MAAM,CAACA,IAAW,EAAE,GAAG,CAAE,AAAE,CAAC,CAACD,EAAE,CAAG,EAAE,CAAC,CAACC,EAAE,CAAG,CAAC,OAAO,CAAC,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,EAAE,MAAM,CAAC,EAAE,EAAE,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,CAAC,EAAE,CAAC,GAAG,IAAI,EAAE,GAAG,IAAI,EAAE,GAAG,IAAIA,EAAE,GAAG,IAAIQ,EAAE,GAAG,IAAI,EAAE,GAAG,IAAI,EAAE,GAAG,IAAI,EAAE,GAAG,IAAIF,EAAE,GAAG,IAAIP,EAAE,GAAG,IAAI,EAAE,GAAG,IAAIQ,EAAE,GAAG,IAAIF,EAAE,GAAG,IAAI,EAAE,GAAG,IAAI,CAAC,EAAG,OAAM,EAAE,MAAO,KAAK,aAAY,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,IAAID,YAAY,GAAG,IAAI,CAAC,IAAI,CAAC,IAAIO,WAAW,GAAG,IAAI,IAAI,EAAE,EAAE,EAAE,EAAE,IAAI,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,EAAE,EAAE,KAAK,IAAI,CAAC,MAAM,CAAC,EAAE,GAAG,GAAG,EAAE,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC,KAAK,IAAI,CAAC,MAAM,CAAC,EAAE,GAAG,GAAG,CAAC,IAAIZ,EAAE,IAAI,CAAC,MAAM,CAAC,EAAE,AAAC,KAAI,CAAC,MAAM,CAAC,EAAE,CAAC,EAAE,EAAEA,CAAC,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,IAAIA,EAAE,IAAI,CAAC,IAAI,CAAC,GAAGC,EAAE,IAAI,CAAC,IAAI,CAAC,EAAGD,CAAAA,IAAIC,GAAI,KAAI,CAAC,IAAI,CAACD,EAAE,CAAC,IAAI,CAAC,IAAI,CAACC,EAAE,CAAC,IAAI,CAAC,MAAM,CAACD,EAAE,CAACC,EAAE,IAAI,CAAC,IAAI,CAACD,EAAE,CAAC,IAAI,CAAC,IAAI,CAACC,EAAE,CAAC,IAAI,CAAC,MAAM,CAACA,EAAE,CAACD,EAAG,KAAI,CAAC,MAAM,CAACC,EAAE,CAACD,EAAE,IAAI,CAAC,IAAI,CAACA,EAAE,EAAC,CAAC,CAAE,CAAC,UAAU,CAAC,CAAC,CAAC,IAAI,EAAE,IAAIW,IAAI,IAAI,IAAIX,KAAK,EAAE,CAAC,IAAI,EAAE,IAAI,CAAC,IAAI,CAACA,GAAGC,EAAE,EAAE,GAAG,CAAC,EAAGA,CAAAA,GAAIA,CAAAA,EAAE,EAAE,CAAC,EAAE,GAAG,CAAC,EAAEA,EAAC,EAAGA,EAAE,IAAI,CAACD,EAAE,CAAC,IAAIA,EAAE,EAAE,CAAC,IAAI,IAAI,KAAK,EAAE,MAAM,GAAG,EAAE,MAAM,CAAC,GAAI,GAAE,IAAI,CAAC,CAAC,EAAE,IAAI,EAAE,GAAGA,EAAE,IAAI,CAAC,EAAC,EAAG,OAAOA,EAAE,IAAI,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,EAAEA,CAAC,CAAC,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,EAAE,IAAIC,EAAE,IAAIS,IAAIP,EAAE,EAAE,MAAM,CAAC,GAAGA,EAAE,EAAE,OAAOA,EAAE,GAAGF,EAAE,GAAG,CAACD,EAAE,EAAE,EAAEG,IAAIF,EAAE,IAAI,EAAEE,EAAE,EAAE,EAAE,IAAI,IAAIA,EAAE,EAAEA,EAAE,EAAEA,IAAIF,EAAE,GAAG,CAACD,EAAE,EAAEG,EAAE,IAAI,OAAOF,CAAC,CAAC,SAAS,EAAE,CAAC,EAAE,IAAI,EAAE,IAAII,YAAY,EAAE,IAAI,EAAEL,EAAE,EAAE,IAAI,IAAIC,KAAK,EAAE,CAAC,CAACD,IAAI,CAACC,EAAE,OAAO,CAAC,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,EAAE,IAAID,EAAE,OAAOA,EAAE,GAAG,UAAU,KAAK,GAAG,GAAG,UAAU,OAAO,EAAE,EAAE,OAAO,CAAC,CAAC,UAAU,GAAG,WAAW,GAAG,UAAU,GAAG,WAAW,EAAE,gBAAgB,GAAG,iBAAiB,IAAI,SAAS,GAAG,UAAU,GAAG,SAAS,GAAG,UAAU,EAAE,QAAQA,EAAE,UAAU,GAAG,WAAW,IAAI,WAAW,GAAG,YAAY,KAAK,KAAK,GAAG,MAAM,EAAE,CAAC,CAAC,eAAe,EAAE,CAAC,CAAC,CAAC,CAACA,CAAC,EAAE,IAAIC,EAAEE,EAAEU,YAAY,GAAG,GAAG,EAAE,EAAE,MAAM,CAAC,GAAG,EAAE,EAAE,MAAM,CAAC,MAAM,EAAE,CAAC,OAAO,EAAE,CAAC,MAAM,CAAC,YAAYA,YAAY,GAAG,GAAGV,EAAE,WAAW,EAAE,eAAe,EAAE,eAAe,EAAE,WAAW,CAAC,CAAC,EAAE,GAAG,CAAC,MAAMG,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,eAAe,CAAC,EAAE,IAAI,EAAE,EAAEG,EAAER,EAAED,GAAG,EAAE,OAAO,CAAC,GAAG,GAAG,IAAI,MAAMA,EAAE,EAAE,EAAE,SAAS,CAAC,EAAE,eAAe,CAACM,EAAE,EAAE,EAAE,OAAO,CAAC,EAAE,SAAS,EAAEC,EAAE,EAAE,EAAE,SAAS,CAAC,EAAE,eAAe,CAACD,EAAE,EAAE,EAAE,UAAU,EAAE,EAAE,EAAE,eAAe,CAAC,EAAE,QAAQ,CAAC,EAAE,QAAQ,EAAE,EAAE,EAAE,CAAC,EAAE,IAAI,EAAE,GAAG,EAAE,IAAII,IAAI,IAAI,IAAI,KAAK,EAAE,CAAC,GAAG,CAACV,EAAEG,EAAE,CAAC,EAAE,EAAE,GAAG,EAAEK,EAAEP,EAAED,EAAEG,EAAE,EAAE,eAAe,CAAE,IAAG,EAAE,SAAS,EAAG,GAAE,IAAI,CAAC,CAAC,OAAOH,EAAE,OAAOG,EAAE,WAAW,CAAC,GAAG,EAAE,KAAK,CAACH,EAAEG,GAAG,EAAE,GAAG,CAACH,GAAG,EAAE,GAAG,CAACG,EAAC,CAAE,CAAC,EAAE,IAAI,CAAC,CAAC,EAAE,IAAI,EAAE,UAAU,CAAC,EAAE,UAAU,EAAE,IAAI,EAAE,EAAE,SAAS,CAAC,GAAG,GAAG,CAAC,GAAI,EAAC,QAAQ,CAAC,IAAI,MAAM,CAAC,MAAM,EAAE,OAAO,EAAE,MAAM,CAAC,YAAYU,YAAY,GAAG,GAAGV,EAAE,WAAW,EAAE,eAAe,EAAE,IAAI,CAAC,eAAe,EAAE,MAAM,CAAC,WAAW,EAAE,MAAM,CAAC,CAAC,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,CAACH,CAAC,CAACC,CAAC,CAACE,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,CAAE,GAAE,UAAU,EAAG,GAAE,EAAE,UAAU,CAAC,EAAC,EAAG,IAAII,EAAE,EAAE,EAAE,EAAE,EAAE,SAAS,GAAGC,EAAE,IAAIH,YAAY,EAAE,eAAe,EAAEC,EAAEC,EAAEN,EAAEE,EAAE,EAAE,eAAe,CAACK,EAAE,GAAG,IAAIC,EAAE,EAAE,CAAC,IAAI,IAAI,EAAE,EAAE,EAAE,EAAE,IAAI,CAAC,IAAIR,EAAE,EAAE,EAAE,eAAe,CAACE,EAAE,EAAE,IAAI,IAAI,EAAE,EAAE,EAAE,EAAE,eAAe,CAAC,IAAIK,CAAC,CAAC,EAAE,GAAGR,CAAC,CAACC,EAAE,EAAE,EAAEE,IAAI,IAAI,EAAEA,EAAE,EAAE,eAAe,AAAC,IAAG,EAAE,SAAS,EAAEM,EAAE,IAAI,CAAC,CAAC,OAAO,GAAG,OAAO,EAAE,WAAW,CAAC,EAAE,CAAC,OAAOA,EAAE,IAAI,CAAC,CAAC,EAAE,IAAI,EAAE,UAAU,CAAC,EAAE,UAAU,EAAEA,CAAC,CAAO,EAAE,EAAE,CAAV,IAAW,EAAE,EAAE,EAAE,AAAG,GAAE,EAAE,CAAG,EAAE,EAAE,CAAG,EAAE,EAAE,CAAG,EAAE,EAAE,CAAG,EAAE,EAAE,CAAG,EAAE,EAAE,CAAG,EAAE,EAAE,CAAG,EAAE,EAAE,CAAG,EAAE,EAAE,CAAG,EAAE,EAAE,CAAG,EAAE,EAAE,CAAG,EAAE,EAAE,CCe95IK,KAAK,SAAS,CAAG,AAACC,IAChB,GAAM,CAAEC,QAAAA,CAAO,CAAEC,WAAAA,CAAU,CAAEC,UAAAA,CAAS,CAAEC,gBAAAA,CAAe,CAAEC,MAAAA,CAAK,CAAEC,MAAAA,CAAK,CAAE,CAAGN,EAAM,IAAI,CAE9EO,EAAaC,EACjBP,EACAE,EACAC,EACA,IAAId,YAAYe,GAChB,IAAIf,YAAYgB,GAChB,MAGFP,KAAK,WAAW,CACd,CACE,WAAYQ,EAAW,MAAM,CAC7BL,WAAAA,EACA,MAAOD,EAAQ,MAAM,AACvB,EACA,CAAE,SAAU,CAACM,EAAW,MAAM,CAAC,AAAC,EAEpC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"minhash-worker.d.ts","sourceRoot":"","sources":["../../src/workers/minhash-worker.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import type { ParallelComputeFn } from "@mattgrill/nearline-core";
|
|
2
|
+
/**
|
|
3
|
+
* Options for creating the Web Worker pool.
|
|
4
|
+
*/
|
|
5
|
+
export interface WebWorkerPoolOptions {
|
|
6
|
+
/**
|
|
7
|
+
* URL to the compiled worker script.
|
|
8
|
+
* - If using a bundler like Vite: `new URL('./minhash-worker.js', import.meta.url)`
|
|
9
|
+
* - If self-hosting: URL to the worker file from this package's `./worker` export
|
|
10
|
+
* - If omitted: falls back to single-threaded computation
|
|
11
|
+
*/
|
|
12
|
+
workerUrl?: URL | string;
|
|
13
|
+
}
|
|
14
|
+
/**
|
|
15
|
+
* Create a parallel compute function using Web Workers.
|
|
16
|
+
* Returns null if workerUrl is not provided.
|
|
17
|
+
*/
|
|
18
|
+
export declare function createWebWorkerCompute(options: WebWorkerPoolOptions): ParallelComputeFn | null;
|
|
19
|
+
//# sourceMappingURL=pool.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"pool.d.ts","sourceRoot":"","sources":["../../src/workers/pool.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,0BAA0B,CAAC;AAElE;;GAEG;AACH,MAAM,WAAW,oBAAoB;IACnC;;;;;OAKG;IACH,SAAS,CAAC,EAAE,GAAG,GAAG,MAAM,CAAC;CAC1B;AAED;;;GAGG;AACH,wBAAgB,sBAAsB,CACpC,OAAO,EAAE,oBAAoB,GAC5B,iBAAiB,GAAG,IAAI,CAoF1B"}
|
package/package.json
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@mattgrill/nearline-web",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "Browser-optimized near-duplicate string detection using MinHash + LSH",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"main": "./dist/index.cjs",
|
|
7
|
+
"module": "./dist/index.js",
|
|
8
|
+
"types": "./dist/index.d.ts",
|
|
9
|
+
"exports": {
|
|
10
|
+
".": {
|
|
11
|
+
"types": "./dist/index.d.ts",
|
|
12
|
+
"import": "./dist/index.js",
|
|
13
|
+
"require": "./dist/index.cjs"
|
|
14
|
+
},
|
|
15
|
+
"./worker": {
|
|
16
|
+
"import": "./dist/minhash-worker.js",
|
|
17
|
+
"require": "./dist/minhash-worker.cjs"
|
|
18
|
+
}
|
|
19
|
+
},
|
|
20
|
+
"files": [
|
|
21
|
+
"dist"
|
|
22
|
+
],
|
|
23
|
+
"scripts": {
|
|
24
|
+
"build": "rm -rf dist && rspack build -c rspack.config.mjs && tsc --emitDeclarationOnly --outDir dist",
|
|
25
|
+
"lint": "tsc --noEmit"
|
|
26
|
+
},
|
|
27
|
+
"dependencies": {
|
|
28
|
+
"@mattgrill/nearline-core": "^1.0.0"
|
|
29
|
+
},
|
|
30
|
+
"repository": {
|
|
31
|
+
"type": "git",
|
|
32
|
+
"url": "https://github.com/matthewgrill/duplicate-finder.git",
|
|
33
|
+
"directory": "packages/client"
|
|
34
|
+
},
|
|
35
|
+
"author": "Matthew Grill",
|
|
36
|
+
"license": "MIT",
|
|
37
|
+
"keywords": [
|
|
38
|
+
"duplicate",
|
|
39
|
+
"deduplication",
|
|
40
|
+
"minhash",
|
|
41
|
+
"lsh",
|
|
42
|
+
"browser",
|
|
43
|
+
"web-worker"
|
|
44
|
+
]
|
|
45
|
+
}
|