gs-search 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/index.cjs CHANGED
@@ -1 +1 @@
1
- "use strict";Object.create,Object.defineProperty,Object.getOwnPropertyDescriptor,Object.getOwnPropertyNames,Object.getPrototypeOf,Object.prototype.hasOwnProperty;class t{#t;constructor(t){this.#t=t}async#e(){return await(await navigator.storage.getDirectory()).getDirectoryHandle(this.#t,{create:!0})}async write(t,e){const s=await(await(await this.#e()).getFileHandle(t,{create:!0})).createWritable();await s.write(e),await s.close()}async append(t,e){const s=await this.#e();let i;try{i=await s.getFileHandle(t,{create:!0})}catch{i=await s.getFileHandle(t,{create:!0})}const n=await i.getFile(),a=await i.createWritable({keepExistingData:!0});await a.seek(n.size),await a.write(e),await a.close()}async read(t){const e=await this.#e();try{return await(await(await e.getFileHandle(t)).getFile()).arrayBuffer()}catch{return null}}async readRange(t,e,s){const i=await this.#e();try{return await(await(await i.getFileHandle(t)).getFile()).slice(e,s).arrayBuffer()}catch{return null}}async remove(t){const e=await this.#e();try{await e.removeEntry(t)}catch{}}async listFiles(){const t=await this.#e(),e=[];for await(const s of t.keys())e.push(s);return e}async clearAll(){const t=await this.#e();for await(const e of t.keys())await t.removeEntry(e,{recursive:!0})}async getFileSize(t){const e=await this.#e();try{return(await(await e.getFileHandle(t)).getFile()).size}catch{return 0}}}class e{#s=null;#i=null;#t;#n="";constructor(t){this.#t=t}async#a(){if(this.#s)return;const t=await import("node:fs"),e=await import("node:path");this.#s=t.promises,this.#i=e.default||e,this.#n=this.#i.join(process.cwd(),this.#t);try{await this.#s.access(this.#n)}catch{await this.#s.mkdir(this.#n,{recursive:!0})}}#r(t){return this.#i.join(this.#n,t)}async write(t,e){await this.#a(),await this.#s.writeFile(this.#r(t),Buffer.from(e))}async append(t,e){await this.#a(),await this.#s.appendFile(this.#r(t),Buffer.from(e))}async read(t){await this.#a();try{const e=await this.#s.readFile(this.#r(t));return e.buffer.slice(e.byteOffset,e.byteOffset+e.byteLength)}catch{return null}}async readRange(t,e,s){await this.#a();try{const i=await this.#s.open(this.#r(t),"r"),n=s-e,a=Buffer.alloc(n);return await i.read(a,0,n,e),await i.close(),a.buffer.slice(a.byteOffset,a.byteOffset+a.byteLength)}catch{return null}}async remove(t){await this.#a();try{await this.#s.unlink(this.#r(t))}catch{}}async listFiles(){await this.#a();try{return await this.#s.readdir(this.#n)}catch{return[]}}async clearAll(){await this.#a();try{const t=await this.#s.readdir(this.#n);for(const e of t)await this.#s.unlink(this.#i.join(this.#n,e))}catch{}}async getFileSize(t){await this.#a();try{return(await this.#s.stat(this.#r(t))).size}catch{return 0}}}const s="search_meta.json",i="deleted_ids.bin",n="added_ids.bin";class a{#o;#h={wordSegments:[],charSegments:[]};#c=new Set;#d=new Set;constructor(t){this.#o=t}async load(){const t=await this.#o.read(s);if(t){const e=(new TextDecoder).decode(t);this.#h=JSON.parse(e)}else this.#h={wordSegments:[],charSegments:[]};const e=await this.#o.read(i);if(e){const t=new DataView(e);let s=0;const i=e.byteLength;for(;s<i&&!(s+4>i);){const e=t.getUint32(s,!0);this.#c.add(e),s+=4,s<i&&30===t.getUint8(s)&&(s+=1)}}const a=await this.#o.read(n);if(a){const t=new DataView(a);let e=0;const s=a.byteLength;for(;e<s&&!(e+4>s);){const i=t.getUint32(e,!0);this.#d.add(i),e+=4,e<s&&30===t.getUint8(e)&&(e+=1)}}}async save(){const t=JSON.stringify(this.#h);if(await this.#o.write(s,(new TextEncoder).encode(t).buffer),0===this.#c.size)await this.#o.remove(i);else{const t=4*this.#c.size+this.#c.size,e=new ArrayBuffer(t),s=new DataView(e);let n=0;for(const t of this.#c)s.setUint32(n,t,!0),n+=4,s.setUint8(n,30),n+=1;await this.#o.write(i,e)}if(0===this.#d.size)await this.#o.remove(n);else{const t=4*this.#d.size+this.#d.size,e=new ArrayBuffer(t),s=new DataView(e);let i=0;for(const t of this.#d)s.setUint32(i,t,!0),i+=4,s.setUint8(i,30),i+=1;await this.#o.write(n,e)}}getSegments(t){return"word"===t?this.#h.wordSegments:this.#h.charSegments}getDeletedIds(){return this.#c}addDeletedId(t){this.#c.add(t)}isDeleted(t){return this.#c.has(t)}addAddedId(t){this.#d.add(t)}removeAddedId(t){this.#d.delete(t)}isAdded(t){return this.#d.has(t)}getAddedIds(){return this.#d}hasDocument(t){return this.#d.has(t)||this.#c.has(t)}getLastSegmentInfo(t){const e=this.getSegments(t);return 0===e.length?null:e[e.length-1]}updateSegment(t,e,s,i,n,a){const r="word"===t?this.#h.wordSegments:this.#h.charSegments;if(a)r.push({filename:e,start:s,end:i,tokenCount:n});else{const t=r[r.length-1];t&&t.filename===e&&(t.end=i,t.tokenCount=n)}}reset(){this.#h={wordSegments:[],charSegments:[]},this.#c.clear(),this.#d.clear()}}class r{static SEPARATOR=30;#o;constructor(t){this.#o=t}async appendBatch(t,e){if(0===e.length)return await this.#o.getFileSize(t);const s=new TextEncoder;let i=0;for(const t of e){i+=8;for(const e of t.tokens){i+=2+Math.min(s.encode(e).byteLength,65535)}i+=1}const n=new Uint8Array(i);let a=0;for(const t of e){const e=[];for(const i of t.tokens){const t=s.encode(i),n=t.byteLength>65535?t.slice(0,65535):t;e.push(n)}const i=new DataView(n.buffer,a);i.setUint32(0,t.id,!0),i.setUint32(4,e.length,!0),a+=8;for(const t of e)new DataView(n.buffer,a).setUint16(0,t.byteLength,!0),a+=2,n.set(t,a),a+=t.byteLength;n[a++]=r.SEPARATOR}return await this.#o.append(t,n.buffer),await this.#o.getFileSize(t)}async readRange(t,e,s){const i=await this.#o.readRange(t,e,s);if(!i||0===i.byteLength)return[];const n=new DataView(i),a=new Uint8Array(i),o=new TextDecoder,h=[];let c=0;const d=i.byteLength;for(;c<d&&!(c+8>d);){const t=n.getUint32(c,!0);c+=4;const e=n.getUint32(c,!0);c+=4;const s=[];for(let t=0;t<e&&!(c+2>d);t++){const t=n.getUint16(c,!0);if(c+=2,c+t>d)break;const e=new Uint8Array(i,c,t);s.push(o.decode(e)),c+=t}c<d&&a[c]===r.SEPARATOR&&(c+=1),h.push({id:t,tokens:s})}return h}async getCurrentSize(t){return await this.#o.getFileSize(t)}}class o{#g;#o;#l=null;#f=null;static hash(t){let e=5381;for(let s=0;s<t.length;s++)e=(e<<5)+e^t.charCodeAt(s);return e>>>0}constructor(t,e){this.#g=t,this.#o=e}async loadIndex(){return!!this.#l||(this.#l=await this.#o.read(this.#g),!!this.#l&&(this.#f=new DataView(this.#l),!0))}async buildAndSave(t){const e=new Map;for(const s of t){const t=new Map;for(const i of s.tokens)if(!t.has(i)){t.set(i,!0);const n=o.hash(i);e.has(n)||e.set(n,[]),e.get(n).push(s.id)}}const s=Array.from(e.keys()).sort((t,e)=>t-e);let i=0;const n=new Array(s.length);for(let t=0;t<s.length;t++){const a=s[t],r=e.get(a);n[t]=r,i+=r.length}const a=12*s.length,r=new ArrayBuffer(8+a+4*i),h=new DataView(r);h.setUint32(0,1229866072),h.setUint32(4,s.length);let c=8,d=8+a;for(let t=0;t<s.length;t++){const e=s[t],i=n[t];h.setUint32(c,e),h.setUint32(c+4,d),h.setUint32(c+8,i.length),c+=12;for(let t=0;t<i.length;t++)h.setUint32(d,i[t],!0),d+=4}await this.#o.write(this.#g,r),this.#l=r,this.#f=h}search(t){if(!this.#f||!this.#l)return[];const e=o.hash(t);let s=0,i=this.#f.getUint32(4)-1;for(;s<=i;){const t=s+i>>>1,n=8+12*t,a=this.#f.getUint32(n);if(a<e)s=t+1;else{if(!(a>e)){const t=this.#f.getUint32(n+4),e=this.#f.getUint32(n+8),s=[];for(let i=0;i<e;i++)s.push(this.#f.getUint32(t+4*i,!0));return s}i=t-1}}return[]}}const h="word_cache.bin",c="char_cache.bin";class d{#o;#h;#w;#u;#m=!1;#y;#p=!1;#D={word:0,char:0};constructor(s){if(!s.baseDir)throw new Error("SearchEngine requires 'baseDir' in config.");if(this.#y={wordSegmentTokenThreshold:1e5,charSegmentTokenThreshold:5e5,minWordTokenSave:0,minCharTokenSave:0,...s},(this.#y.minWordTokenSave||0)>=(this.#y.wordSegmentTokenThreshold||1e5))throw new Error("minWordTokenSave must be less than wordSegmentTokenThreshold");if((this.#y.minCharTokenSave||0)>=(this.#y.charSegmentTokenThreshold||5e5))throw new Error("minCharTokenSave must be less than charSegmentTokenThreshold");let i=null;if(this.#y.storage&&("object"==typeof this.#y.storage?i=this.#y.storage:"browser"===this.#y.storage?i=new t(this.#y.baseDir):"node"===this.#y.storage&&(i=new e(this.#y.baseDir))),!i){const s=typeof navigator<"u"&&navigator?.storage?.getDirectory instanceof Function,n=typeof process<"u"&&null!=process.versions&&null!=process.versions.node;s?i=new t(this.#y.baseDir):n&&(i=new e(this.#y.baseDir))}if(!i)throw new Error('Storage initialization failed. Please configure "storage" explicitly or ensure you are in a supported environment (Browser/Node).');this.#o=i,this.#h=new a(this.#o),this.#w=new r(this.#o),this.#u=new Map}async init(){if(this.#m)return;await this.#h.load();const t=[...this.#h.getSegments("word"),...this.#h.getSegments("char")];for(const e of t)this.#u.has(e.filename)||this.#u.set(e.filename,new o(e.filename,this.#o)),await this.#u.get(e.filename).loadIndex();this.#m=!0}startBatch(){this.#p=!0,this.#D={word:0,char:0}}async endBatch(){this.#p=!1,this.#D.word>0&&await this.#S("word",this.#D.word),this.#D.char>0&&await this.#S("char",this.#D.char),this.#D={word:0,char:0},await this.#h.save()}#k(t){try{if(typeof Intl<"u"&&"function"==typeof Intl.Segmenter&&"function"==typeof Array.from){const e=new Intl.Segmenter([],{granularity:"word"}).segment(t);if("object"==typeof e&&null!==e)return Array.from(e).filter(t=>t?.isWordLike).map(t=>t?.segment?.toLowerCase()||"")}}catch{}return t.toLowerCase().split(/[^a-z0-9\u4e00-\u9fa5]+/g).filter(t=>t.length>0)}#I(t){return this.#y.indexingTokenizer?this.#y.indexingTokenizer(t):this.#k(t.text)}#b(t){return this.#y.searchTokenizer?this.#y.searchTokenizer(t):this.#I(t)}async addDocument(t){return this.addDocuments([t])}async addDocumentIfMissing(t){return this.addDocumentsIfMissing([t])}async addDocumentsIfMissing(t){if(this.#m||await this.init(),0===t.length)return;const e=this.#h.getDeletedIds(),s=[],i=[],n=[];for(const a of t){if(e.has(a.id)||this.#h.isAdded(a.id))continue;const t=this.#I(a),r=[],o=[];for(const e of t)e.length>1?r.push(e):1===e.length&&o.push(e);r.length>0&&s.push({id:a.id,tokens:r}),o.length>0&&i.push({id:a.id,tokens:o}),n.push(a)}if(0===n.length)return;let a=0,r=0;if(s.length>0){await this.#w.appendBatch(h,s);for(const t of s)a+=t.tokens.length}if(i.length>0){await this.#w.appendBatch(c,i);for(const t of i)r+=t.tokens.length}for(const t of n)this.#h.addAddedId(t.id);this.#p?(this.#D.word+=a,this.#D.char+=r):(a>0&&await this.#S("word",a),r>0&&await this.#S("char",r),await this.#h.save())}async addDocuments(t){if(this.#m||await this.init(),0===t.length)return;const e=this.#h.getDeletedIds(),s=[],i=[];for(const n of t){if(e.has(n.id))throw new Error(`Document ID ${n.id} has been deleted and cannot be re-added.`);if(this.#h.isAdded(n.id))throw new Error(`Document ID ${n.id} already exists.`);const t=this.#I(n),a=[],r=[];for(const e of t)e.length>1?a.push(e):1===e.length&&r.push(e);a.length>0&&s.push({id:n.id,tokens:a}),r.length>0&&i.push({id:n.id,tokens:r})}let n=0,a=0;if(s.length>0){await this.#w.appendBatch(h,s);for(const t of s)n+=t.tokens.length}if(i.length>0){await this.#w.appendBatch(c,i);for(const t of i)a+=t.tokens.length}for(const e of t)this.#h.addAddedId(e.id);this.#p?(this.#D.word+=n,this.#D.char+=a):(n>0&&await this.#S("word",n),a>0&&await this.#S("char",a),await this.#h.save())}async#S(t,e){const s="word"===t?h:c,i=await this.#w.getCurrentSize(s),n="word"===t?this.#y.wordSegmentTokenThreshold||1e5:this.#y.charSegmentTokenThreshold||5e5,a="word"===t?this.#y.minWordTokenSave||0:this.#y.minCharTokenSave||0,r=this.#h.getLastSegmentInfo(t);let d,g,l,f;const w=()=>{const e=this.#h.getSegments(t).length+1;return`${t}_seg_${e}.bin`};if(r){const t=r.tokenCount;t>=n||t+e>=n?(d=w(),l=!0,g=r.end,f=e):(d=r.filename,l=!1,g=r.start,f=t+e)}else d=w(),l=!0,g=0,f=e;if(f<a)return void this.#h.updateSegment(t,d,g,i,f,l);const u=await this.#w.readRange(s,g,i);let m=this.#u.get(d);m||(m=new o(d,this.#o),this.#u.set(d,m)),await m.buildAndSave(u),this.#h.updateSegment(t,d,g,i,f,l)}async search(t,e){this.#m||await this.init();const s="string"==typeof t?{text:t}:t,i=this.#b(s),n=i.filter(t=>t.length>1),a=i.filter(t=>1===t.length),r=this.#h.getDeletedIds(),h=new Map,c=new Map,d=t=>{const e=this.#h.getSegments(t);for(const t of e){const e=t.filename;!this.#u.has(e)&&!c.has(e)&&c.set(e,new o(e,this.#o))}};d("word"),d("char"),await Promise.all(Array.from(c.entries()).map(([t,e])=>e.loadIndex().then(s=>{s&&this.#u.set(t,e)})));const g=async(t,e)=>{if(0===e.length)return;const s=this.#h.getSegments(t);for(const t of s){const s=t.filename,i=this.#u.get(s);if(i)for(const t of e){const e=i.search(t),s=1+.1*t.length;for(const i of e)if(!r.has(i))if(h.has(i)){const e=h.get(i);e.score+=s,e.tokens.add(t)}else h.set(i,{score:0,tokens:new Set([t])})}}};await g("word",n),await g("char",a);const l=[];return h.forEach((t,e)=>{l.push({id:e,score:t.score,tokens:Array.from(t.tokens)})}),l.sort((t,e)=>e.score-t.score),"number"==typeof e&&e>0?l.slice(0,e):l}async removeDocument(t){this.#m||await this.init(),this.#h.addDeletedId(t),this.#h.removeAddedId(t),await this.#h.save()}async clearAll(){await this.#o.clearAll(),this.#u.clear(),this.#h.reset(),this.#m=!1,this.#p=!1,this.#D={word:0,char:0}}async getStatus(){return this.#m||await this.init(),{wordSegments:this.#h.getSegments("word").length,charSegments:this.#h.getSegments("char").length,deleted:this.#h.getDeletedIds().size,wordCacheSize:await this.#w.getCurrentSize(h),charCacheSize:await this.#w.getCurrentSize(c),inBatch:this.#p}}async hasDocument(t){return this.#m||await this.init(),this.#h.hasDocument(t)}}exports.BrowserStorage=t,exports.NodeStorage=e,exports.SearchEngine=d,exports.SimpleSearch=class{static#T=null;static#v={baseDir:"simple_search_data",wordSegmentTokenThreshold:1e5,minWordTokenSave:0};static configure(t){const e={...this.#v,...t};this.#T=new d(e)}static#z(){return this.#T||(this.#T=new d(this.#v)),this.#T}static async startBatch(){this.#z().startBatch()}static async endBatch(){return this.#z().endBatch()}static async addDocument(t){return this.#z().addDocument(t)}static async addDocumentIfMissing(t){return this.#z().addDocumentIfMissing(t)}static async addDocuments(t){return this.#z().addDocuments(t)}static async addDocumentsIfMissing(t){return this.#z().addDocumentsIfMissing(t)}static async search(t,e){return this.#z().search(t,e)}static async removeDocument(t){return this.#z().removeDocument(t)}static async clearAll(){return this.#z().clearAll()}static async getStatus(){return this.#z().getStatus()}static async hasDocument(t){return this.#z().hasDocument(t)}};
1
+ "use strict";Object.create,Object.defineProperty,Object.getOwnPropertyDescriptor,Object.getOwnPropertyNames,Object.getPrototypeOf,Object.prototype.hasOwnProperty;class t{#t;constructor(t){this.#t=t}async#e(){return await(await navigator.storage.getDirectory()).getDirectoryHandle(this.#t,{create:!0})}async write(t,e){const s=await(await(await this.#e()).getFileHandle(t,{create:!0})).createWritable();await s.write(e),await s.close()}async append(t,e){const s=await this.#e();let i;try{i=await s.getFileHandle(t,{create:!0})}catch{i=await s.getFileHandle(t,{create:!0})}const n=await i.getFile(),a=await i.createWritable({keepExistingData:!0});await a.seek(n.size),await a.write(e),await a.close()}async read(t){const e=await this.#e();try{return await(await(await e.getFileHandle(t)).getFile()).arrayBuffer()}catch{return null}}async readRange(t,e,s){const i=await this.#e();try{return await(await(await i.getFileHandle(t)).getFile()).slice(e,s).arrayBuffer()}catch{return null}}async remove(t){const e=await this.#e();try{await e.removeEntry(t)}catch{}}async listFiles(){const t=await this.#e(),e=[];for await(const s of t.keys())e.push(s);return e}async clearAll(){const t=await this.#e();for await(const e of t.keys())await t.removeEntry(e,{recursive:!0})}async getFileSize(t){const e=await this.#e();try{return(await(await e.getFileHandle(t)).getFile()).size}catch{return 0}}}class e{#s=null;#i=null;#t;#n="";constructor(t){this.#t=t}async#a(){if(this.#s)return;const t=await import("node:fs"),e=await import("node:path");this.#s=t.promises,this.#i=e.default||e,this.#n=this.#i.join(process.cwd(),this.#t);try{await this.#s.access(this.#n)}catch{await this.#s.mkdir(this.#n,{recursive:!0})}}#r(t){return this.#i.join(this.#n,t)}async write(t,e){await this.#a(),await this.#s.writeFile(this.#r(t),Buffer.from(e))}async append(t,e){await this.#a(),await this.#s.appendFile(this.#r(t),Buffer.from(e))}async read(t){await this.#a();try{const e=await this.#s.readFile(this.#r(t));return e.buffer.slice(e.byteOffset,e.byteOffset+e.byteLength)}catch{return null}}async readRange(t,e,s){await this.#a();try{const i=await this.#s.open(this.#r(t),"r"),n=s-e,a=Buffer.alloc(n);return await i.read(a,0,n,e),await i.close(),a.buffer.slice(a.byteOffset,a.byteOffset+a.byteLength)}catch{return null}}async remove(t){await this.#a();try{await this.#s.unlink(this.#r(t))}catch{}}async listFiles(){await this.#a();try{return await this.#s.readdir(this.#n)}catch{return[]}}async clearAll(){await this.#a();try{const t=await this.#s.readdir(this.#n);for(const e of t)await this.#s.unlink(this.#i.join(this.#n,e))}catch{}}async getFileSize(t){await this.#a();try{return(await this.#s.stat(this.#r(t))).size}catch{return 0}}}const s="search_meta.json",i="deleted_ids.bin",n="added_ids.bin";class a{#o;#h={wordSegments:[],charSegments:[]};#c=new Set;#d=new Set;constructor(t){this.#o=t}async load(){const t=await this.#o.read(s);if(t){const e=(new TextDecoder).decode(t);this.#h=JSON.parse(e)}else this.#h={wordSegments:[],charSegments:[]};const e=await this.#o.read(i);if(e){const t=new DataView(e);let s=0;const i=e.byteLength;for(;s<i&&!(s+4>i);){const e=t.getUint32(s,!0);this.#c.add(e),s+=4,s<i&&30===t.getUint8(s)&&(s+=1)}}const a=await this.#o.read(n);if(a){const t=new DataView(a);let e=0;const s=a.byteLength;for(;e<s&&!(e+4>s);){const i=t.getUint32(e,!0);this.#d.add(i),e+=4,e<s&&30===t.getUint8(e)&&(e+=1)}}}async save(){const t=JSON.stringify(this.#h);if(await this.#o.write(s,(new TextEncoder).encode(t).buffer),0===this.#c.size)await this.#o.remove(i);else{const t=4*this.#c.size+this.#c.size,e=new ArrayBuffer(t),s=new DataView(e);let n=0;for(const t of this.#c)s.setUint32(n,t,!0),n+=4,s.setUint8(n,30),n+=1;await this.#o.write(i,e)}if(0===this.#d.size)await this.#o.remove(n);else{const t=4*this.#d.size+this.#d.size,e=new ArrayBuffer(t),s=new DataView(e);let i=0;for(const t of this.#d)s.setUint32(i,t,!0),i+=4,s.setUint8(i,30),i+=1;await this.#o.write(n,e)}}getSegments(t){return"word"===t?this.#h.wordSegments:this.#h.charSegments}getDeletedIds(){return this.#c}addDeletedId(t){this.#c.add(t)}isDeleted(t){return this.#c.has(t)}addAddedId(t){this.#d.add(t)}removeAddedId(t){this.#d.delete(t)}isAdded(t){return this.#d.has(t)}getAddedIds(){return this.#d}hasDocument(t){return this.#d.has(t)||this.#c.has(t)}getLastSegmentInfo(t){const e=this.getSegments(t);return 0===e.length?null:e[e.length-1]}updateSegment(t,e,s,i,n,a){const r="word"===t?this.#h.wordSegments:this.#h.charSegments;if(a)r.push({filename:e,start:s,end:i,tokenCount:n});else{const t=r[r.length-1];t&&t.filename===e&&(t.end=i,t.tokenCount=n)}}reset(){this.#h={wordSegments:[],charSegments:[]},this.#c.clear(),this.#d.clear()}}class r{static SEPARATOR=30;#o;constructor(t){this.#o=t}async appendBatch(t,e){if(0===e.length)return await this.#o.getFileSize(t);const s=new TextEncoder;let i=0;for(const t of e){i+=8;for(const e of t.tokens){i+=2+Math.min(s.encode(e).byteLength,65535)}i+=1}const n=new Uint8Array(i);let a=0;for(const t of e){const e=[];for(const i of t.tokens){const t=s.encode(i),n=t.byteLength>65535?t.slice(0,65535):t;e.push(n)}const i=new DataView(n.buffer,a);i.setUint32(0,t.id,!0),i.setUint32(4,e.length,!0),a+=8;for(const t of e)new DataView(n.buffer,a).setUint16(0,t.byteLength,!0),a+=2,n.set(t,a),a+=t.byteLength;n[a++]=r.SEPARATOR}return await this.#o.append(t,n.buffer),await this.#o.getFileSize(t)}async readRange(t,e,s){const i=await this.#o.readRange(t,e,s);if(!i||0===i.byteLength)return[];const n=new DataView(i),a=new Uint8Array(i),o=new TextDecoder,h=[];let c=0;const d=i.byteLength;for(;c<d&&!(c+8>d);){const t=n.getUint32(c,!0);c+=4;const e=n.getUint32(c,!0);c+=4;const s=[];for(let t=0;t<e&&!(c+2>d);t++){const t=n.getUint16(c,!0);if(c+=2,c+t>d)break;const e=new Uint8Array(i,c,t);s.push(o.decode(e)),c+=t}c<d&&a[c]===r.SEPARATOR&&(c+=1),h.push({id:t,tokens:s})}return h}async getCurrentSize(t){return await this.#o.getFileSize(t)}}function o(t,e=305419896){const s=t.length,i=s>>2;let n=0;for(;n<i;){let s=255&t.charCodeAt(n)|(255&t.charCodeAt(++n))<<8|(255&t.charCodeAt(++n))<<16|(255&t.charCodeAt(++n))<<24;++n,s=3432918353*(65535&s)+((3432918353*(s>>>16)&65535)<<16)&4294967295,s=s<<15|s>>>17,s=461845907*(65535&s)+((461845907*(s>>>16)&65535)<<16)&4294967295,e=27492+(65535&(e=5*(65535&(e=(e^=s)<<13|e>>>19))+((5*(e>>>16)&65535)<<16)&4294967295))+(((e>>>16)+58964&65535)<<16)}let a=0;const r=3&s;return r>0&&(r>=3&&(a^=(255&t.charCodeAt(n+2))<<16),r>=2&&(a^=(255&t.charCodeAt(n+1))<<8),r>=1&&(a^=255&t.charCodeAt(n)),a=3432918353*(65535&a)+((3432918353*(a>>>16)&65535)<<16)&4294967295,a=a<<15|a>>>17,a=461845907*(65535&a)+((461845907*(a>>>16)&65535)<<16)&4294967295,e^=a),e^=s,e=2246822507*(65535&(e^=e>>>16))+((2246822507*(e>>>16)&65535)<<16)&4294967295,e=3266489909*(65535&(e^=e>>>13))+((3266489909*(e>>>16)&65535)<<16)&4294967295,(e^=e>>>16)>>>0}class h{#g;#o;#l=null;#f=null;static hash(t){return o(t)}constructor(t,e){this.#g=t,this.#o=e}async loadIndex(){return!!this.#l||(this.#l=await this.#o.read(this.#g),!!this.#l&&(this.#f=new DataView(this.#l),!0))}async buildAndSave(t){const e=new Map;for(const s of t){const t=new Map;for(const i of s.tokens)t.has(i)||(t.set(i,!0),e.has(i)||e.set(i,{hash:h.hash(i),postings:[]}),e.get(i).postings.push(s.id))}const s=Array.from(e.entries());s.sort(([t,{hash:e}],[s,{hash:i}])=>e!==i?e-i:t.localeCompare(s));let i=0,n=0;for(const[t,{postings:e}]of s)i+=e.length,n+=t.length+1;const a=20*s.length,r=12+a+4*i,o=new ArrayBuffer(r+n),c=new DataView(o);c.setUint32(0,1229866072),c.setUint32(4,s.length),c.setUint32(8,r);let d=12,g=12+a,l=r;for(const[t,{hash:e,postings:i}]of s){c.setUint32(d,e),c.setUint32(d+4,t.length),c.setUint32(d+8,l),c.setUint32(d+12,g),c.setUint32(d+16,i.length),d+=20;for(let t=0;t<i.length;t++)c.setUint32(g,i[t],!0),g+=4;const s=(new TextEncoder).encode(t);for(let t=0;t<s.length;t++)c.setUint8(l++,s[t]);c.setUint8(l++,0)}await this.#o.write(this.#g,o),this.#l=o,this.#f=c}search(t){if(!this.#f||!this.#l)return[];const e=h.hash(t),s=this.#f.getUint32(4);let i=0,n=s-1;const a=12,r=20,o=new TextDecoder;for(;i<=n;){const h=i+n>>>1,c=a+h*r,d=this.#f.getUint32(c);if(d<e)i=h+1;else{if(!(d>e)){if(!(h>0&&this.#f.getUint32(a+(h-1)*r)===e||h<s-1&&this.#f.getUint32(a+(h+1)*r)===e)){const t=this.#f.getUint32(a+h*r+12),e=this.#f.getUint32(a+h*r+16),s=[];for(let i=0;i<e;i++)s.push(this.#f.getUint32(t+4*i,!0));return s}let i=h;for(;i>0;){const t=a+(i-1)*r;if(this.#f.getUint32(t)!==e)break;i--}for(let n=i;n<s;n++){const s=a+n*r;if(this.#f.getUint32(s)!==e)break;const i=this.#f.getUint32(s+4),h=this.#f.getUint32(s+8),c=new Uint8Array(this.#l,h,i);if(o.decode(c)===t){const t=this.#f.getUint32(s+12),e=this.#f.getUint32(s+16),i=[];for(let s=0;s<e;s++)i.push(this.#f.getUint32(t+4*s,!0));return i}}return[]}n=h-1}}return[]}}const c="word_cache.bin",d="char_cache.bin";class g{#o;#h;#w;#u;#m=!1;#p;#y=!1;#D={word:0,char:0};constructor(s){if(!s.baseDir)throw new Error("SearchEngine requires 'baseDir' in config.");if(this.#p={wordSegmentTokenThreshold:1e5,charSegmentTokenThreshold:5e5,minWordTokenSave:0,minCharTokenSave:0,...s},(this.#p.minWordTokenSave||0)>=(this.#p.wordSegmentTokenThreshold||1e5))throw new Error("minWordTokenSave must be less than wordSegmentTokenThreshold");if((this.#p.minCharTokenSave||0)>=(this.#p.charSegmentTokenThreshold||5e5))throw new Error("minCharTokenSave must be less than charSegmentTokenThreshold");let i=null;if(this.#p.storage&&("object"==typeof this.#p.storage?i=this.#p.storage:"browser"===this.#p.storage?i=new t(this.#p.baseDir):"node"===this.#p.storage&&(i=new e(this.#p.baseDir))),!i){const s=typeof navigator<"u"&&navigator?.storage?.getDirectory instanceof Function,n=typeof process<"u"&&null!=process.versions&&null!=process.versions.node;s?i=new t(this.#p.baseDir):n&&(i=new e(this.#p.baseDir))}if(!i)throw new Error('Storage initialization failed. Please configure "storage" explicitly or ensure you are in a supported environment (Browser/Node).');this.#o=i,this.#h=new a(this.#o),this.#w=new r(this.#o),this.#u=new Map}async init(){if(this.#m)return;await this.#h.load();const t=[...this.#h.getSegments("word"),...this.#h.getSegments("char")];for(const e of t)this.#u.has(e.filename)||this.#u.set(e.filename,new h(e.filename,this.#o)),await this.#u.get(e.filename).loadIndex();this.#m=!0}startBatch(){this.#y=!0,this.#D={word:0,char:0}}async endBatch(){this.#y=!1,this.#D.word>0&&await this.#S("word",this.#D.word),this.#D.char>0&&await this.#S("char",this.#D.char),this.#D={word:0,char:0},await this.#h.save()}#k(t){try{if(typeof Intl<"u"&&"function"==typeof Intl.Segmenter&&"function"==typeof Array.from){const e=new Intl.Segmenter([],{granularity:"word"}).segment(t);if("object"==typeof e&&null!==e)return Array.from(e).filter(t=>t?.isWordLike).map(t=>t?.segment?.toLowerCase()||"")}}catch{}return t.toLowerCase().split(/[^a-z0-9\u4e00-\u9fa5]+/g).filter(t=>t.length>0)}#I(t){return this.#p.indexingTokenizer?this.#p.indexingTokenizer(t):this.#k(t.text)}#b(t){return this.#p.searchTokenizer?this.#p.searchTokenizer(t):this.#I(t)}async addDocument(t){return this.addDocuments([t])}async addDocumentIfMissing(t){return this.addDocumentsIfMissing([t])}async addDocumentsIfMissing(t){if(this.#m||await this.init(),0===t.length)return;const e=this.#h.getDeletedIds(),s=[],i=[],n=[];for(const a of t){if(e.has(a.id)||this.#h.isAdded(a.id))continue;const t=this.#I(a),r=[],o=[];for(const e of t)e.length>1?r.push(e):1===e.length&&o.push(e);r.length>0&&s.push({id:a.id,tokens:r}),o.length>0&&i.push({id:a.id,tokens:o}),n.push(a)}if(0===n.length)return;let a=0,r=0;if(s.length>0){await this.#w.appendBatch(c,s);for(const t of s)a+=t.tokens.length}if(i.length>0){await this.#w.appendBatch(d,i);for(const t of i)r+=t.tokens.length}for(const t of n)this.#h.addAddedId(t.id);this.#y?(this.#D.word+=a,this.#D.char+=r):(a>0&&await this.#S("word",a),r>0&&await this.#S("char",r),await this.#h.save())}async addDocuments(t){if(this.#m||await this.init(),0===t.length)return;const e=this.#h.getDeletedIds(),s=[],i=[];for(const n of t){if(e.has(n.id))throw new Error(`Document ID ${n.id} has been deleted and cannot be re-added.`);if(this.#h.isAdded(n.id))throw new Error(`Document ID ${n.id} already exists.`);const t=this.#I(n),a=[],r=[];for(const e of t)e.length>1?a.push(e):1===e.length&&r.push(e);a.length>0&&s.push({id:n.id,tokens:a}),r.length>0&&i.push({id:n.id,tokens:r})}let n=0,a=0;if(s.length>0){await this.#w.appendBatch(c,s);for(const t of s)n+=t.tokens.length}if(i.length>0){await this.#w.appendBatch(d,i);for(const t of i)a+=t.tokens.length}for(const e of t)this.#h.addAddedId(e.id);this.#y?(this.#D.word+=n,this.#D.char+=a):(n>0&&await this.#S("word",n),a>0&&await this.#S("char",a),await this.#h.save())}async#S(t,e){const s="word"===t?c:d,i=await this.#w.getCurrentSize(s),n="word"===t?this.#p.wordSegmentTokenThreshold||1e5:this.#p.charSegmentTokenThreshold||5e5,a="word"===t?this.#p.minWordTokenSave||0:this.#p.minCharTokenSave||0,r=this.#h.getLastSegmentInfo(t);let o,g,l,f;const w=()=>{const e=this.#h.getSegments(t).length+1;return`${t}_seg_${e}.bin`};if(r){const t=r.tokenCount;t>=n||t+e>=n?(o=w(),l=!0,g=r.end,f=e):(o=r.filename,l=!1,g=r.start,f=t+e)}else o=w(),l=!0,g=0,f=e;if(f<a)return void this.#h.updateSegment(t,o,g,i,f,l);const u=await this.#w.readRange(s,g,i);let m=this.#u.get(o);m||(m=new h(o,this.#o),this.#u.set(o,m)),await m.buildAndSave(u),this.#h.updateSegment(t,o,g,i,f,l)}async search(t,e){this.#m||await this.init();const s="string"==typeof t?{text:t}:t,i=this.#b(s),n=i.filter(t=>t.length>1),a=i.filter(t=>1===t.length),r=this.#h.getDeletedIds(),o=new Map,c=new Map,d=t=>{const e=this.#h.getSegments(t);for(const t of e){const e=t.filename;!this.#u.has(e)&&!c.has(e)&&c.set(e,new h(e,this.#o))}};d("word"),d("char"),await Promise.all(Array.from(c.entries()).map(([t,e])=>e.loadIndex().then(s=>{s&&this.#u.set(t,e)})));const g=async(t,e)=>{if(0===e.length)return;const s=this.#h.getSegments(t);for(const t of s){const s=t.filename,i=this.#u.get(s);if(i)for(const t of e){const e=i.search(t),s=1+.1*t.length;for(const i of e)if(!r.has(i))if(o.has(i)){const e=o.get(i);e.score+=s,e.tokens.add(t)}else o.set(i,{score:0,tokens:new Set([t])})}}};await g("word",n),await g("char",a);const l=[];return o.forEach((t,e)=>{l.push({id:e,score:t.score,tokens:Array.from(t.tokens)})}),l.sort((t,e)=>e.score-t.score),"number"==typeof e&&e>0?l.slice(0,e):l}async removeDocument(t){this.#m||await this.init(),this.#h.addDeletedId(t),this.#h.removeAddedId(t),await this.#h.save()}async clearAll(){await this.#o.clearAll(),this.#u.clear(),this.#h.reset(),this.#m=!1,this.#y=!1,this.#D={word:0,char:0}}async getStatus(){return this.#m||await this.init(),{wordSegments:this.#h.getSegments("word").length,charSegments:this.#h.getSegments("char").length,deleted:this.#h.getDeletedIds().size,wordCacheSize:await this.#w.getCurrentSize(c),charCacheSize:await this.#w.getCurrentSize(d),inBatch:this.#y}}async hasDocument(t){return this.#m||await this.init(),this.#h.hasDocument(t)}}exports.BrowserStorage=t,exports.NodeStorage=e,exports.SearchEngine=g,exports.SimpleSearch=class{static#T=null;static#v={baseDir:"simple_search_data",wordSegmentTokenThreshold:1e5,minWordTokenSave:0};static configure(t){const e={...this.#v,...t};this.#T=new g(e)}static#U(){return this.#T||(this.#T=new g(this.#v)),this.#T}static async startBatch(){this.#U().startBatch()}static async endBatch(){return this.#U().endBatch()}static async addDocument(t){return this.#U().addDocument(t)}static async addDocumentIfMissing(t){return this.#U().addDocumentIfMissing(t)}static async addDocuments(t){return this.#U().addDocuments(t)}static async addDocumentsIfMissing(t){return this.#U().addDocumentsIfMissing(t)}static async search(t,e){return this.#U().search(t,e)}static async removeDocument(t){return this.#U().removeDocument(t)}static async clearAll(){return this.#U().clearAll()}static async getStatus(){return this.#U().getStatus()}static async hasDocument(t){return this.#U().hasDocument(t)}},exports.hash=o,exports.murmur3_32=o;
package/lib/index.d.ts CHANGED
@@ -221,5 +221,17 @@ declare class NodeStorage implements IStorage {
221
221
  getFileSize(filename: string): Promise<number>;
222
222
  }
223
223
 
224
- export { BrowserStorage, NodeStorage, SearchEngine, SimpleSearch };
224
+ /**
225
+ * MurmurHash3 32位实现
226
+ * 高效的非加密哈希函数,适用于哈希表等数据结构
227
+ */
228
+ /**
229
+ * 计算字符串的32位MurmurHash3哈希值
230
+ * @param str 要哈希的字符串
231
+ * @param h
232
+ * @returns 32位无符号哈希值
233
+ */
234
+ declare function murmur3_32(str: string, h?: number): number;
235
+
236
+ export { BrowserStorage, NodeStorage, SearchEngine, SimpleSearch, murmur3_32 as hash, murmur3_32 };
225
237
  export type { IDocument, IDocumentBase, IIndexMeta, IResult, ISearchEngineConfig, ISegmentMeta, IStorage, ITokenizedDoc, IndexType };
package/lib/index.js CHANGED
@@ -1 +1 @@
1
- class t{#t;constructor(t){this.#t=t}async#e(){return await(await navigator.storage.getDirectory()).getDirectoryHandle(this.#t,{create:!0})}async write(t,e){const s=await(await(await this.#e()).getFileHandle(t,{create:!0})).createWritable();await s.write(e),await s.close()}async append(t,e){const s=await this.#e();let i;try{i=await s.getFileHandle(t,{create:!0})}catch{i=await s.getFileHandle(t,{create:!0})}const n=await i.getFile(),a=await i.createWritable({keepExistingData:!0});await a.seek(n.size),await a.write(e),await a.close()}async read(t){const e=await this.#e();try{return await(await(await e.getFileHandle(t)).getFile()).arrayBuffer()}catch{return null}}async readRange(t,e,s){const i=await this.#e();try{return await(await(await i.getFileHandle(t)).getFile()).slice(e,s).arrayBuffer()}catch{return null}}async remove(t){const e=await this.#e();try{await e.removeEntry(t)}catch{}}async listFiles(){const t=await this.#e(),e=[];for await(const s of t.keys())e.push(s);return e}async clearAll(){const t=await this.#e();for await(const e of t.keys())await t.removeEntry(e,{recursive:!0})}async getFileSize(t){const e=await this.#e();try{return(await(await e.getFileHandle(t)).getFile()).size}catch{return 0}}}class e{#s=null;#i=null;#t;#n="";constructor(t){this.#t=t}async#a(){if(this.#s)return;const t=await import("node:fs"),e=await import("node:path");this.#s=t.promises,this.#i=e.default||e,this.#n=this.#i.join(process.cwd(),this.#t);try{await this.#s.access(this.#n)}catch{await this.#s.mkdir(this.#n,{recursive:!0})}}#r(t){return this.#i.join(this.#n,t)}async write(t,e){await this.#a(),await this.#s.writeFile(this.#r(t),Buffer.from(e))}async append(t,e){await this.#a(),await this.#s.appendFile(this.#r(t),Buffer.from(e))}async read(t){await this.#a();try{const e=await this.#s.readFile(this.#r(t));return e.buffer.slice(e.byteOffset,e.byteOffset+e.byteLength)}catch{return null}}async readRange(t,e,s){await this.#a();try{const i=await this.#s.open(this.#r(t),"r"),n=s-e,a=Buffer.alloc(n);return await i.read(a,0,n,e),await i.close(),a.buffer.slice(a.byteOffset,a.byteOffset+a.byteLength)}catch{return null}}async remove(t){await this.#a();try{await this.#s.unlink(this.#r(t))}catch{}}async listFiles(){await this.#a();try{return await this.#s.readdir(this.#n)}catch{return[]}}async clearAll(){await this.#a();try{const t=await this.#s.readdir(this.#n);for(const e of t)await this.#s.unlink(this.#i.join(this.#n,e))}catch{}}async getFileSize(t){await this.#a();try{return(await this.#s.stat(this.#r(t))).size}catch{return 0}}}const s="search_meta.json",i="deleted_ids.bin",n="added_ids.bin";class a{#o;#h={wordSegments:[],charSegments:[]};#c=new Set;#d=new Set;constructor(t){this.#o=t}async load(){const t=await this.#o.read(s);if(t){const e=(new TextDecoder).decode(t);this.#h=JSON.parse(e)}else this.#h={wordSegments:[],charSegments:[]};const e=await this.#o.read(i);if(e){const t=new DataView(e);let s=0;const i=e.byteLength;for(;s<i&&!(s+4>i);){const e=t.getUint32(s,!0);this.#c.add(e),s+=4,s<i&&30===t.getUint8(s)&&(s+=1)}}const a=await this.#o.read(n);if(a){const t=new DataView(a);let e=0;const s=a.byteLength;for(;e<s&&!(e+4>s);){const i=t.getUint32(e,!0);this.#d.add(i),e+=4,e<s&&30===t.getUint8(e)&&(e+=1)}}}async save(){const t=JSON.stringify(this.#h);if(await this.#o.write(s,(new TextEncoder).encode(t).buffer),0===this.#c.size)await this.#o.remove(i);else{const t=4*this.#c.size+this.#c.size,e=new ArrayBuffer(t),s=new DataView(e);let n=0;for(const t of this.#c)s.setUint32(n,t,!0),n+=4,s.setUint8(n,30),n+=1;await this.#o.write(i,e)}if(0===this.#d.size)await this.#o.remove(n);else{const t=4*this.#d.size+this.#d.size,e=new ArrayBuffer(t),s=new DataView(e);let i=0;for(const t of this.#d)s.setUint32(i,t,!0),i+=4,s.setUint8(i,30),i+=1;await this.#o.write(n,e)}}getSegments(t){return"word"===t?this.#h.wordSegments:this.#h.charSegments}getDeletedIds(){return this.#c}addDeletedId(t){this.#c.add(t)}isDeleted(t){return this.#c.has(t)}addAddedId(t){this.#d.add(t)}removeAddedId(t){this.#d.delete(t)}isAdded(t){return this.#d.has(t)}getAddedIds(){return this.#d}hasDocument(t){return this.#d.has(t)||this.#c.has(t)}getLastSegmentInfo(t){const e=this.getSegments(t);return 0===e.length?null:e[e.length-1]}updateSegment(t,e,s,i,n,a){const r="word"===t?this.#h.wordSegments:this.#h.charSegments;if(a)r.push({filename:e,start:s,end:i,tokenCount:n});else{const t=r[r.length-1];t&&t.filename===e&&(t.end=i,t.tokenCount=n)}}reset(){this.#h={wordSegments:[],charSegments:[]},this.#c.clear(),this.#d.clear()}}class r{static SEPARATOR=30;#o;constructor(t){this.#o=t}async appendBatch(t,e){if(0===e.length)return await this.#o.getFileSize(t);const s=new TextEncoder;let i=0;for(const t of e){i+=8;for(const e of t.tokens){i+=2+Math.min(s.encode(e).byteLength,65535)}i+=1}const n=new Uint8Array(i);let a=0;for(const t of e){const e=[];for(const i of t.tokens){const t=s.encode(i),n=t.byteLength>65535?t.slice(0,65535):t;e.push(n)}const i=new DataView(n.buffer,a);i.setUint32(0,t.id,!0),i.setUint32(4,e.length,!0),a+=8;for(const t of e)new DataView(n.buffer,a).setUint16(0,t.byteLength,!0),a+=2,n.set(t,a),a+=t.byteLength;n[a++]=r.SEPARATOR}return await this.#o.append(t,n.buffer),await this.#o.getFileSize(t)}async readRange(t,e,s){const i=await this.#o.readRange(t,e,s);if(!i||0===i.byteLength)return[];const n=new DataView(i),a=new Uint8Array(i),o=new TextDecoder,h=[];let c=0;const d=i.byteLength;for(;c<d&&!(c+8>d);){const t=n.getUint32(c,!0);c+=4;const e=n.getUint32(c,!0);c+=4;const s=[];for(let t=0;t<e&&!(c+2>d);t++){const t=n.getUint16(c,!0);if(c+=2,c+t>d)break;const e=new Uint8Array(i,c,t);s.push(o.decode(e)),c+=t}c<d&&a[c]===r.SEPARATOR&&(c+=1),h.push({id:t,tokens:s})}return h}async getCurrentSize(t){return await this.#o.getFileSize(t)}}class o{#g;#o;#l=null;#f=null;static hash(t){let e=5381;for(let s=0;s<t.length;s++)e=(e<<5)+e^t.charCodeAt(s);return e>>>0}constructor(t,e){this.#g=t,this.#o=e}async loadIndex(){return!!this.#l||(this.#l=await this.#o.read(this.#g),!!this.#l&&(this.#f=new DataView(this.#l),!0))}async buildAndSave(t){const e=new Map;for(const s of t){const t=new Map;for(const i of s.tokens)if(!t.has(i)){t.set(i,!0);const n=o.hash(i);e.has(n)||e.set(n,[]),e.get(n).push(s.id)}}const s=Array.from(e.keys()).sort((t,e)=>t-e);let i=0;const n=new Array(s.length);for(let t=0;t<s.length;t++){const a=s[t],r=e.get(a);n[t]=r,i+=r.length}const a=12*s.length,r=new ArrayBuffer(8+a+4*i),h=new DataView(r);h.setUint32(0,1229866072),h.setUint32(4,s.length);let c=8,d=8+a;for(let t=0;t<s.length;t++){const e=s[t],i=n[t];h.setUint32(c,e),h.setUint32(c+4,d),h.setUint32(c+8,i.length),c+=12;for(let t=0;t<i.length;t++)h.setUint32(d,i[t],!0),d+=4}await this.#o.write(this.#g,r),this.#l=r,this.#f=h}search(t){if(!this.#f||!this.#l)return[];const e=o.hash(t);let s=0,i=this.#f.getUint32(4)-1;for(;s<=i;){const t=s+i>>>1,n=8+12*t,a=this.#f.getUint32(n);if(a<e)s=t+1;else{if(!(a>e)){const t=this.#f.getUint32(n+4),e=this.#f.getUint32(n+8),s=[];for(let i=0;i<e;i++)s.push(this.#f.getUint32(t+4*i,!0));return s}i=t-1}}return[]}}const h="word_cache.bin",c="char_cache.bin";class d{#o;#h;#w;#u;#m=!1;#y;#p=!1;#D={word:0,char:0};constructor(s){if(!s.baseDir)throw new Error("SearchEngine requires 'baseDir' in config.");if(this.#y={wordSegmentTokenThreshold:1e5,charSegmentTokenThreshold:5e5,minWordTokenSave:0,minCharTokenSave:0,...s},(this.#y.minWordTokenSave||0)>=(this.#y.wordSegmentTokenThreshold||1e5))throw new Error("minWordTokenSave must be less than wordSegmentTokenThreshold");if((this.#y.minCharTokenSave||0)>=(this.#y.charSegmentTokenThreshold||5e5))throw new Error("minCharTokenSave must be less than charSegmentTokenThreshold");let i=null;if(this.#y.storage&&("object"==typeof this.#y.storage?i=this.#y.storage:"browser"===this.#y.storage?i=new t(this.#y.baseDir):"node"===this.#y.storage&&(i=new e(this.#y.baseDir))),!i){const s=typeof navigator<"u"&&navigator?.storage?.getDirectory instanceof Function,n=typeof process<"u"&&null!=process.versions&&null!=process.versions.node;s?i=new t(this.#y.baseDir):n&&(i=new e(this.#y.baseDir))}if(!i)throw new Error('Storage initialization failed. Please configure "storage" explicitly or ensure you are in a supported environment (Browser/Node).');this.#o=i,this.#h=new a(this.#o),this.#w=new r(this.#o),this.#u=new Map}async init(){if(this.#m)return;await this.#h.load();const t=[...this.#h.getSegments("word"),...this.#h.getSegments("char")];for(const e of t)this.#u.has(e.filename)||this.#u.set(e.filename,new o(e.filename,this.#o)),await this.#u.get(e.filename).loadIndex();this.#m=!0}startBatch(){this.#p=!0,this.#D={word:0,char:0}}async endBatch(){this.#p=!1,this.#D.word>0&&await this.#S("word",this.#D.word),this.#D.char>0&&await this.#S("char",this.#D.char),this.#D={word:0,char:0},await this.#h.save()}#k(t){try{if(typeof Intl<"u"&&"function"==typeof Intl.Segmenter&&"function"==typeof Array.from){const e=new Intl.Segmenter([],{granularity:"word"}).segment(t);if("object"==typeof e&&null!==e)return Array.from(e).filter(t=>t?.isWordLike).map(t=>t?.segment?.toLowerCase()||"")}}catch{}return t.toLowerCase().split(/[^a-z0-9\u4e00-\u9fa5]+/g).filter(t=>t.length>0)}#I(t){return this.#y.indexingTokenizer?this.#y.indexingTokenizer(t):this.#k(t.text)}#b(t){return this.#y.searchTokenizer?this.#y.searchTokenizer(t):this.#I(t)}async addDocument(t){return this.addDocuments([t])}async addDocumentIfMissing(t){return this.addDocumentsIfMissing([t])}async addDocumentsIfMissing(t){if(this.#m||await this.init(),0===t.length)return;const e=this.#h.getDeletedIds(),s=[],i=[],n=[];for(const a of t){if(e.has(a.id)||this.#h.isAdded(a.id))continue;const t=this.#I(a),r=[],o=[];for(const e of t)e.length>1?r.push(e):1===e.length&&o.push(e);r.length>0&&s.push({id:a.id,tokens:r}),o.length>0&&i.push({id:a.id,tokens:o}),n.push(a)}if(0===n.length)return;let a=0,r=0;if(s.length>0){await this.#w.appendBatch(h,s);for(const t of s)a+=t.tokens.length}if(i.length>0){await this.#w.appendBatch(c,i);for(const t of i)r+=t.tokens.length}for(const t of n)this.#h.addAddedId(t.id);this.#p?(this.#D.word+=a,this.#D.char+=r):(a>0&&await this.#S("word",a),r>0&&await this.#S("char",r),await this.#h.save())}async addDocuments(t){if(this.#m||await this.init(),0===t.length)return;const e=this.#h.getDeletedIds(),s=[],i=[];for(const n of t){if(e.has(n.id))throw new Error(`Document ID ${n.id} has been deleted and cannot be re-added.`);if(this.#h.isAdded(n.id))throw new Error(`Document ID ${n.id} already exists.`);const t=this.#I(n),a=[],r=[];for(const e of t)e.length>1?a.push(e):1===e.length&&r.push(e);a.length>0&&s.push({id:n.id,tokens:a}),r.length>0&&i.push({id:n.id,tokens:r})}let n=0,a=0;if(s.length>0){await this.#w.appendBatch(h,s);for(const t of s)n+=t.tokens.length}if(i.length>0){await this.#w.appendBatch(c,i);for(const t of i)a+=t.tokens.length}for(const e of t)this.#h.addAddedId(e.id);this.#p?(this.#D.word+=n,this.#D.char+=a):(n>0&&await this.#S("word",n),a>0&&await this.#S("char",a),await this.#h.save())}async#S(t,e){const s="word"===t?h:c,i=await this.#w.getCurrentSize(s),n="word"===t?this.#y.wordSegmentTokenThreshold||1e5:this.#y.charSegmentTokenThreshold||5e5,a="word"===t?this.#y.minWordTokenSave||0:this.#y.minCharTokenSave||0,r=this.#h.getLastSegmentInfo(t);let d,g,l,f;const w=()=>{const e=this.#h.getSegments(t).length+1;return`${t}_seg_${e}.bin`};if(r){const t=r.tokenCount;t>=n||t+e>=n?(d=w(),l=!0,g=r.end,f=e):(d=r.filename,l=!1,g=r.start,f=t+e)}else d=w(),l=!0,g=0,f=e;if(f<a)return void this.#h.updateSegment(t,d,g,i,f,l);const u=await this.#w.readRange(s,g,i);let m=this.#u.get(d);m||(m=new o(d,this.#o),this.#u.set(d,m)),await m.buildAndSave(u),this.#h.updateSegment(t,d,g,i,f,l)}async search(t,e){this.#m||await this.init();const s="string"==typeof t?{text:t}:t,i=this.#b(s),n=i.filter(t=>t.length>1),a=i.filter(t=>1===t.length),r=this.#h.getDeletedIds(),h=new Map,c=new Map,d=t=>{const e=this.#h.getSegments(t);for(const t of e){const e=t.filename;!this.#u.has(e)&&!c.has(e)&&c.set(e,new o(e,this.#o))}};d("word"),d("char"),await Promise.all(Array.from(c.entries()).map(([t,e])=>e.loadIndex().then(s=>{s&&this.#u.set(t,e)})));const g=async(t,e)=>{if(0===e.length)return;const s=this.#h.getSegments(t);for(const t of s){const s=t.filename,i=this.#u.get(s);if(i)for(const t of e){const e=i.search(t),s=1+.1*t.length;for(const i of e)if(!r.has(i))if(h.has(i)){const e=h.get(i);e.score+=s,e.tokens.add(t)}else h.set(i,{score:0,tokens:new Set([t])})}}};await g("word",n),await g("char",a);const l=[];return h.forEach((t,e)=>{l.push({id:e,score:t.score,tokens:Array.from(t.tokens)})}),l.sort((t,e)=>e.score-t.score),"number"==typeof e&&e>0?l.slice(0,e):l}async removeDocument(t){this.#m||await this.init(),this.#h.addDeletedId(t),this.#h.removeAddedId(t),await this.#h.save()}async clearAll(){await this.#o.clearAll(),this.#u.clear(),this.#h.reset(),this.#m=!1,this.#p=!1,this.#D={word:0,char:0}}async getStatus(){return this.#m||await this.init(),{wordSegments:this.#h.getSegments("word").length,charSegments:this.#h.getSegments("char").length,deleted:this.#h.getDeletedIds().size,wordCacheSize:await this.#w.getCurrentSize(h),charCacheSize:await this.#w.getCurrentSize(c),inBatch:this.#p}}async hasDocument(t){return this.#m||await this.init(),this.#h.hasDocument(t)}}class g{static#T=null;static#v={baseDir:"simple_search_data",wordSegmentTokenThreshold:1e5,minWordTokenSave:0};static configure(t){const e={...this.#v,...t};this.#T=new d(e)}static#z(){return this.#T||(this.#T=new d(this.#v)),this.#T}static async startBatch(){this.#z().startBatch()}static async endBatch(){return this.#z().endBatch()}static async addDocument(t){return this.#z().addDocument(t)}static async addDocumentIfMissing(t){return this.#z().addDocumentIfMissing(t)}static async addDocuments(t){return this.#z().addDocuments(t)}static async addDocumentsIfMissing(t){return this.#z().addDocumentsIfMissing(t)}static async search(t,e){return this.#z().search(t,e)}static async removeDocument(t){return this.#z().removeDocument(t)}static async clearAll(){return this.#z().clearAll()}static async getStatus(){return this.#z().getStatus()}static async hasDocument(t){return this.#z().hasDocument(t)}}export{t as BrowserStorage,e as NodeStorage,d as SearchEngine,g as SimpleSearch};
1
+ class t{#t;constructor(t){this.#t=t}async#e(){return await(await navigator.storage.getDirectory()).getDirectoryHandle(this.#t,{create:!0})}async write(t,e){const s=await(await(await this.#e()).getFileHandle(t,{create:!0})).createWritable();await s.write(e),await s.close()}async append(t,e){const s=await this.#e();let i;try{i=await s.getFileHandle(t,{create:!0})}catch{i=await s.getFileHandle(t,{create:!0})}const n=await i.getFile(),a=await i.createWritable({keepExistingData:!0});await a.seek(n.size),await a.write(e),await a.close()}async read(t){const e=await this.#e();try{return await(await(await e.getFileHandle(t)).getFile()).arrayBuffer()}catch{return null}}async readRange(t,e,s){const i=await this.#e();try{return await(await(await i.getFileHandle(t)).getFile()).slice(e,s).arrayBuffer()}catch{return null}}async remove(t){const e=await this.#e();try{await e.removeEntry(t)}catch{}}async listFiles(){const t=await this.#e(),e=[];for await(const s of t.keys())e.push(s);return e}async clearAll(){const t=await this.#e();for await(const e of t.keys())await t.removeEntry(e,{recursive:!0})}async getFileSize(t){const e=await this.#e();try{return(await(await e.getFileHandle(t)).getFile()).size}catch{return 0}}}class e{#s=null;#i=null;#t;#n="";constructor(t){this.#t=t}async#a(){if(this.#s)return;const t=await import("node:fs"),e=await import("node:path");this.#s=t.promises,this.#i=e.default||e,this.#n=this.#i.join(process.cwd(),this.#t);try{await this.#s.access(this.#n)}catch{await this.#s.mkdir(this.#n,{recursive:!0})}}#r(t){return this.#i.join(this.#n,t)}async write(t,e){await this.#a(),await this.#s.writeFile(this.#r(t),Buffer.from(e))}async append(t,e){await this.#a(),await this.#s.appendFile(this.#r(t),Buffer.from(e))}async read(t){await this.#a();try{const e=await this.#s.readFile(this.#r(t));return e.buffer.slice(e.byteOffset,e.byteOffset+e.byteLength)}catch{return null}}async readRange(t,e,s){await this.#a();try{const i=await this.#s.open(this.#r(t),"r"),n=s-e,a=Buffer.alloc(n);return await i.read(a,0,n,e),await i.close(),a.buffer.slice(a.byteOffset,a.byteOffset+a.byteLength)}catch{return null}}async remove(t){await this.#a();try{await this.#s.unlink(this.#r(t))}catch{}}async listFiles(){await this.#a();try{return await this.#s.readdir(this.#n)}catch{return[]}}async clearAll(){await this.#a();try{const t=await this.#s.readdir(this.#n);for(const e of t)await this.#s.unlink(this.#i.join(this.#n,e))}catch{}}async getFileSize(t){await this.#a();try{return(await this.#s.stat(this.#r(t))).size}catch{return 0}}}const s="search_meta.json",i="deleted_ids.bin",n="added_ids.bin";class a{#o;#h={wordSegments:[],charSegments:[]};#c=new Set;#d=new Set;constructor(t){this.#o=t}async load(){const t=await this.#o.read(s);if(t){const e=(new TextDecoder).decode(t);this.#h=JSON.parse(e)}else this.#h={wordSegments:[],charSegments:[]};const e=await this.#o.read(i);if(e){const t=new DataView(e);let s=0;const i=e.byteLength;for(;s<i&&!(s+4>i);){const e=t.getUint32(s,!0);this.#c.add(e),s+=4,s<i&&30===t.getUint8(s)&&(s+=1)}}const a=await this.#o.read(n);if(a){const t=new DataView(a);let e=0;const s=a.byteLength;for(;e<s&&!(e+4>s);){const i=t.getUint32(e,!0);this.#d.add(i),e+=4,e<s&&30===t.getUint8(e)&&(e+=1)}}}async save(){const t=JSON.stringify(this.#h);if(await this.#o.write(s,(new TextEncoder).encode(t).buffer),0===this.#c.size)await this.#o.remove(i);else{const t=4*this.#c.size+this.#c.size,e=new ArrayBuffer(t),s=new DataView(e);let n=0;for(const t of this.#c)s.setUint32(n,t,!0),n+=4,s.setUint8(n,30),n+=1;await this.#o.write(i,e)}if(0===this.#d.size)await this.#o.remove(n);else{const t=4*this.#d.size+this.#d.size,e=new ArrayBuffer(t),s=new DataView(e);let i=0;for(const t of this.#d)s.setUint32(i,t,!0),i+=4,s.setUint8(i,30),i+=1;await this.#o.write(n,e)}}getSegments(t){return"word"===t?this.#h.wordSegments:this.#h.charSegments}getDeletedIds(){return this.#c}addDeletedId(t){this.#c.add(t)}isDeleted(t){return this.#c.has(t)}addAddedId(t){this.#d.add(t)}removeAddedId(t){this.#d.delete(t)}isAdded(t){return this.#d.has(t)}getAddedIds(){return this.#d}hasDocument(t){return this.#d.has(t)||this.#c.has(t)}getLastSegmentInfo(t){const e=this.getSegments(t);return 0===e.length?null:e[e.length-1]}updateSegment(t,e,s,i,n,a){const r="word"===t?this.#h.wordSegments:this.#h.charSegments;if(a)r.push({filename:e,start:s,end:i,tokenCount:n});else{const t=r[r.length-1];t&&t.filename===e&&(t.end=i,t.tokenCount=n)}}reset(){this.#h={wordSegments:[],charSegments:[]},this.#c.clear(),this.#d.clear()}}class r{static SEPARATOR=30;#o;constructor(t){this.#o=t}async appendBatch(t,e){if(0===e.length)return await this.#o.getFileSize(t);const s=new TextEncoder;let i=0;for(const t of e){i+=8;for(const e of t.tokens){i+=2+Math.min(s.encode(e).byteLength,65535)}i+=1}const n=new Uint8Array(i);let a=0;for(const t of e){const e=[];for(const i of t.tokens){const t=s.encode(i),n=t.byteLength>65535?t.slice(0,65535):t;e.push(n)}const i=new DataView(n.buffer,a);i.setUint32(0,t.id,!0),i.setUint32(4,e.length,!0),a+=8;for(const t of e)new DataView(n.buffer,a).setUint16(0,t.byteLength,!0),a+=2,n.set(t,a),a+=t.byteLength;n[a++]=r.SEPARATOR}return await this.#o.append(t,n.buffer),await this.#o.getFileSize(t)}async readRange(t,e,s){const i=await this.#o.readRange(t,e,s);if(!i||0===i.byteLength)return[];const n=new DataView(i),a=new Uint8Array(i),o=new TextDecoder,h=[];let c=0;const d=i.byteLength;for(;c<d&&!(c+8>d);){const t=n.getUint32(c,!0);c+=4;const e=n.getUint32(c,!0);c+=4;const s=[];for(let t=0;t<e&&!(c+2>d);t++){const t=n.getUint16(c,!0);if(c+=2,c+t>d)break;const e=new Uint8Array(i,c,t);s.push(o.decode(e)),c+=t}c<d&&a[c]===r.SEPARATOR&&(c+=1),h.push({id:t,tokens:s})}return h}async getCurrentSize(t){return await this.#o.getFileSize(t)}}function o(t,e=305419896){const s=t.length,i=s>>2;let n=0;for(;n<i;){let s=255&t.charCodeAt(n)|(255&t.charCodeAt(++n))<<8|(255&t.charCodeAt(++n))<<16|(255&t.charCodeAt(++n))<<24;++n,s=3432918353*(65535&s)+((3432918353*(s>>>16)&65535)<<16)&4294967295,s=s<<15|s>>>17,s=461845907*(65535&s)+((461845907*(s>>>16)&65535)<<16)&4294967295,e=27492+(65535&(e=5*(65535&(e=(e^=s)<<13|e>>>19))+((5*(e>>>16)&65535)<<16)&4294967295))+(((e>>>16)+58964&65535)<<16)}let a=0;const r=3&s;return r>0&&(r>=3&&(a^=(255&t.charCodeAt(n+2))<<16),r>=2&&(a^=(255&t.charCodeAt(n+1))<<8),r>=1&&(a^=255&t.charCodeAt(n)),a=3432918353*(65535&a)+((3432918353*(a>>>16)&65535)<<16)&4294967295,a=a<<15|a>>>17,a=461845907*(65535&a)+((461845907*(a>>>16)&65535)<<16)&4294967295,e^=a),e^=s,e=2246822507*(65535&(e^=e>>>16))+((2246822507*(e>>>16)&65535)<<16)&4294967295,e=3266489909*(65535&(e^=e>>>13))+((3266489909*(e>>>16)&65535)<<16)&4294967295,(e^=e>>>16)>>>0}class h{#g;#o;#l=null;#f=null;static hash(t){return o(t)}constructor(t,e){this.#g=t,this.#o=e}async loadIndex(){return!!this.#l||(this.#l=await this.#o.read(this.#g),!!this.#l&&(this.#f=new DataView(this.#l),!0))}async buildAndSave(t){const e=new Map;for(const s of t){const t=new Map;for(const i of s.tokens)t.has(i)||(t.set(i,!0),e.has(i)||e.set(i,{hash:h.hash(i),postings:[]}),e.get(i).postings.push(s.id))}const s=Array.from(e.entries());s.sort(([t,{hash:e}],[s,{hash:i}])=>e!==i?e-i:t.localeCompare(s));let i=0,n=0;for(const[t,{postings:e}]of s)i+=e.length,n+=t.length+1;const a=20*s.length,r=12+a+4*i,o=new ArrayBuffer(r+n),c=new DataView(o);c.setUint32(0,1229866072),c.setUint32(4,s.length),c.setUint32(8,r);let d=12,g=12+a,l=r;for(const[t,{hash:e,postings:i}]of s){c.setUint32(d,e),c.setUint32(d+4,t.length),c.setUint32(d+8,l),c.setUint32(d+12,g),c.setUint32(d+16,i.length),d+=20;for(let t=0;t<i.length;t++)c.setUint32(g,i[t],!0),g+=4;const s=(new TextEncoder).encode(t);for(let t=0;t<s.length;t++)c.setUint8(l++,s[t]);c.setUint8(l++,0)}await this.#o.write(this.#g,o),this.#l=o,this.#f=c}search(t){if(!this.#f||!this.#l)return[];const e=h.hash(t),s=this.#f.getUint32(4);let i=0,n=s-1;const a=12,r=20,o=new TextDecoder;for(;i<=n;){const h=i+n>>>1,c=a+h*r,d=this.#f.getUint32(c);if(d<e)i=h+1;else{if(!(d>e)){if(!(h>0&&this.#f.getUint32(a+(h-1)*r)===e||h<s-1&&this.#f.getUint32(a+(h+1)*r)===e)){const t=this.#f.getUint32(a+h*r+12),e=this.#f.getUint32(a+h*r+16),s=[];for(let i=0;i<e;i++)s.push(this.#f.getUint32(t+4*i,!0));return s}let i=h;for(;i>0;){const t=a+(i-1)*r;if(this.#f.getUint32(t)!==e)break;i--}for(let n=i;n<s;n++){const s=a+n*r;if(this.#f.getUint32(s)!==e)break;const i=this.#f.getUint32(s+4),h=this.#f.getUint32(s+8),c=new Uint8Array(this.#l,h,i);if(o.decode(c)===t){const t=this.#f.getUint32(s+12),e=this.#f.getUint32(s+16),i=[];for(let s=0;s<e;s++)i.push(this.#f.getUint32(t+4*s,!0));return i}}return[]}n=h-1}}return[]}}const c="word_cache.bin",d="char_cache.bin";class g{#o;#h;#w;#u;#m=!1;#y;#p=!1;#D={word:0,char:0};constructor(s){if(!s.baseDir)throw new Error("SearchEngine requires 'baseDir' in config.");if(this.#y={wordSegmentTokenThreshold:1e5,charSegmentTokenThreshold:5e5,minWordTokenSave:0,minCharTokenSave:0,...s},(this.#y.minWordTokenSave||0)>=(this.#y.wordSegmentTokenThreshold||1e5))throw new Error("minWordTokenSave must be less than wordSegmentTokenThreshold");if((this.#y.minCharTokenSave||0)>=(this.#y.charSegmentTokenThreshold||5e5))throw new Error("minCharTokenSave must be less than charSegmentTokenThreshold");let i=null;if(this.#y.storage&&("object"==typeof this.#y.storage?i=this.#y.storage:"browser"===this.#y.storage?i=new t(this.#y.baseDir):"node"===this.#y.storage&&(i=new e(this.#y.baseDir))),!i){const s=typeof navigator<"u"&&navigator?.storage?.getDirectory instanceof Function,n=typeof process<"u"&&null!=process.versions&&null!=process.versions.node;s?i=new t(this.#y.baseDir):n&&(i=new e(this.#y.baseDir))}if(!i)throw new Error('Storage initialization failed. Please configure "storage" explicitly or ensure you are in a supported environment (Browser/Node).');this.#o=i,this.#h=new a(this.#o),this.#w=new r(this.#o),this.#u=new Map}async init(){if(this.#m)return;await this.#h.load();const t=[...this.#h.getSegments("word"),...this.#h.getSegments("char")];for(const e of t)this.#u.has(e.filename)||this.#u.set(e.filename,new h(e.filename,this.#o)),await this.#u.get(e.filename).loadIndex();this.#m=!0}startBatch(){this.#p=!0,this.#D={word:0,char:0}}async endBatch(){this.#p=!1,this.#D.word>0&&await this.#S("word",this.#D.word),this.#D.char>0&&await this.#S("char",this.#D.char),this.#D={word:0,char:0},await this.#h.save()}#k(t){try{if(typeof Intl<"u"&&"function"==typeof Intl.Segmenter&&"function"==typeof Array.from){const e=new Intl.Segmenter([],{granularity:"word"}).segment(t);if("object"==typeof e&&null!==e)return Array.from(e).filter(t=>t?.isWordLike).map(t=>t?.segment?.toLowerCase()||"")}}catch{}return t.toLowerCase().split(/[^a-z0-9\u4e00-\u9fa5]+/g).filter(t=>t.length>0)}#I(t){return this.#y.indexingTokenizer?this.#y.indexingTokenizer(t):this.#k(t.text)}#b(t){return this.#y.searchTokenizer?this.#y.searchTokenizer(t):this.#I(t)}async addDocument(t){return this.addDocuments([t])}async addDocumentIfMissing(t){return this.addDocumentsIfMissing([t])}async addDocumentsIfMissing(t){if(this.#m||await this.init(),0===t.length)return;const e=this.#h.getDeletedIds(),s=[],i=[],n=[];for(const a of t){if(e.has(a.id)||this.#h.isAdded(a.id))continue;const t=this.#I(a),r=[],o=[];for(const e of t)e.length>1?r.push(e):1===e.length&&o.push(e);r.length>0&&s.push({id:a.id,tokens:r}),o.length>0&&i.push({id:a.id,tokens:o}),n.push(a)}if(0===n.length)return;let a=0,r=0;if(s.length>0){await this.#w.appendBatch(c,s);for(const t of s)a+=t.tokens.length}if(i.length>0){await this.#w.appendBatch(d,i);for(const t of i)r+=t.tokens.length}for(const t of n)this.#h.addAddedId(t.id);this.#p?(this.#D.word+=a,this.#D.char+=r):(a>0&&await this.#S("word",a),r>0&&await this.#S("char",r),await this.#h.save())}async addDocuments(t){if(this.#m||await this.init(),0===t.length)return;const e=this.#h.getDeletedIds(),s=[],i=[];for(const n of t){if(e.has(n.id))throw new Error(`Document ID ${n.id} has been deleted and cannot be re-added.`);if(this.#h.isAdded(n.id))throw new Error(`Document ID ${n.id} already exists.`);const t=this.#I(n),a=[],r=[];for(const e of t)e.length>1?a.push(e):1===e.length&&r.push(e);a.length>0&&s.push({id:n.id,tokens:a}),r.length>0&&i.push({id:n.id,tokens:r})}let n=0,a=0;if(s.length>0){await this.#w.appendBatch(c,s);for(const t of s)n+=t.tokens.length}if(i.length>0){await this.#w.appendBatch(d,i);for(const t of i)a+=t.tokens.length}for(const e of t)this.#h.addAddedId(e.id);this.#p?(this.#D.word+=n,this.#D.char+=a):(n>0&&await this.#S("word",n),a>0&&await this.#S("char",a),await this.#h.save())}async#S(t,e){const s="word"===t?c:d,i=await this.#w.getCurrentSize(s),n="word"===t?this.#y.wordSegmentTokenThreshold||1e5:this.#y.charSegmentTokenThreshold||5e5,a="word"===t?this.#y.minWordTokenSave||0:this.#y.minCharTokenSave||0,r=this.#h.getLastSegmentInfo(t);let o,g,l,f;const w=()=>{const e=this.#h.getSegments(t).length+1;return`${t}_seg_${e}.bin`};if(r){const t=r.tokenCount;t>=n||t+e>=n?(o=w(),l=!0,g=r.end,f=e):(o=r.filename,l=!1,g=r.start,f=t+e)}else o=w(),l=!0,g=0,f=e;if(f<a)return void this.#h.updateSegment(t,o,g,i,f,l);const u=await this.#w.readRange(s,g,i);let m=this.#u.get(o);m||(m=new h(o,this.#o),this.#u.set(o,m)),await m.buildAndSave(u),this.#h.updateSegment(t,o,g,i,f,l)}async search(t,e){this.#m||await this.init();const s="string"==typeof t?{text:t}:t,i=this.#b(s),n=i.filter(t=>t.length>1),a=i.filter(t=>1===t.length),r=this.#h.getDeletedIds(),o=new Map,c=new Map,d=t=>{const e=this.#h.getSegments(t);for(const t of e){const e=t.filename;!this.#u.has(e)&&!c.has(e)&&c.set(e,new h(e,this.#o))}};d("word"),d("char"),await Promise.all(Array.from(c.entries()).map(([t,e])=>e.loadIndex().then(s=>{s&&this.#u.set(t,e)})));const g=async(t,e)=>{if(0===e.length)return;const s=this.#h.getSegments(t);for(const t of s){const s=t.filename,i=this.#u.get(s);if(i)for(const t of e){const e=i.search(t),s=1+.1*t.length;for(const i of e)if(!r.has(i))if(o.has(i)){const e=o.get(i);e.score+=s,e.tokens.add(t)}else o.set(i,{score:0,tokens:new Set([t])})}}};await g("word",n),await g("char",a);const l=[];return o.forEach((t,e)=>{l.push({id:e,score:t.score,tokens:Array.from(t.tokens)})}),l.sort((t,e)=>e.score-t.score),"number"==typeof e&&e>0?l.slice(0,e):l}async removeDocument(t){this.#m||await this.init(),this.#h.addDeletedId(t),this.#h.removeAddedId(t),await this.#h.save()}async clearAll(){await this.#o.clearAll(),this.#u.clear(),this.#h.reset(),this.#m=!1,this.#p=!1,this.#D={word:0,char:0}}async getStatus(){return this.#m||await this.init(),{wordSegments:this.#h.getSegments("word").length,charSegments:this.#h.getSegments("char").length,deleted:this.#h.getDeletedIds().size,wordCacheSize:await this.#w.getCurrentSize(c),charCacheSize:await this.#w.getCurrentSize(d),inBatch:this.#p}}async hasDocument(t){return this.#m||await this.init(),this.#h.hasDocument(t)}}class l{static#T=null;static#v={baseDir:"simple_search_data",wordSegmentTokenThreshold:1e5,minWordTokenSave:0};static configure(t){const e={...this.#v,...t};this.#T=new g(e)}static#U(){return this.#T||(this.#T=new g(this.#v)),this.#T}static async startBatch(){this.#U().startBatch()}static async endBatch(){return this.#U().endBatch()}static async addDocument(t){return this.#U().addDocument(t)}static async addDocumentIfMissing(t){return this.#U().addDocumentIfMissing(t)}static async addDocuments(t){return this.#U().addDocuments(t)}static async addDocumentsIfMissing(t){return this.#U().addDocumentsIfMissing(t)}static async search(t,e){return this.#U().search(t,e)}static async removeDocument(t){return this.#U().removeDocument(t)}static async clearAll(){return this.#U().clearAll()}static async getStatus(){return this.#U().getStatus()}static async hasDocument(t){return this.#U().hasDocument(t)}}export{t as BrowserStorage,e as NodeStorage,g as SearchEngine,l as SimpleSearch,o as hash,o as murmur3_32};
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "gs-search",
3
- "version": "0.1.2",
3
+ "version": "0.1.3",
4
4
  "type": "module",
5
5
  "main": "lib/index.cjs",
6
6
  "module": "lib/index.js",