npm - gs-search - Versions diffs - 0.1.4 → 0.1.5 - Mend

gs-search 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/README.ja.md CHANGED Viewed

@@ -134,7 +134,7 @@ const engine = new SearchEngine({
 ### SearchEngine
-- `constructor(options: ISearchEngineConfig)`: 新しいコアエンジンインスタンスを作成
+- `constructor(options: ISearchEngineOption)`: 新しいコアエンジンインスタンスを作成
 - `init(): Promise<void>`: エンジンを初期化
 - `addDocument(doc: IDocument): Promise<void>`: 単一ドキュメントを追加
 - `addDocuments(docs: IDocument[]): Promise<void>`: 複数ドキュメントを追加

package/README.ko.md CHANGED Viewed

@@ -134,7 +134,7 @@ const engine = new SearchEngine({
 ### SearchEngine
-- `constructor(options: ISearchEngineConfig)`: 새로운 코어 엔진 인스턴스 생성
+- `constructor(options: ISearchEngineOption)`: 새로운 코어 엔진 인스턴스 생성
 - `init(): Promise<void>`: 엔진 초기화
 - `addDocument(doc: IDocument): Promise<void>`: 단일 문서 추가
 - `addDocuments(docs: IDocument[]): Promise<void>`: 다중 문서 추가

package/README.md CHANGED Viewed

@@ -116,7 +116,7 @@ const customTokenizer = (text: string): string[] => {
 // Create engine with custom tokenizers
 const engine = new SearchEngine({
-  baseDir: 'search-data',
+  storage: new BrowserStorage('search-data'),
   indexingTokenizer: customTokenizer,
   searchTokenizer: customTokenizer
 });
@@ -127,7 +127,7 @@ const engine = new SearchEngine({
 ### SimpleSearch
 **Static Methods (No instance creation required):**
-- `configure(config: Partial<ISearchEngineConfig>): void`: Configure the search engine
+- `configure(config: Partial<ISearchEngineOption>): void`: Configure the search engine
 - `addDocument(doc: IDocument): Promise<void>`: Add a single document
 - `addDocuments(docs: IDocument[]): Promise<void>`: Add multiple documents
 - `addDocumentIfMissing(doc: IDocument): Promise<void>`: Add a single document if it doesn't exist
@@ -141,7 +141,7 @@ const engine = new SearchEngine({
 ### SearchEngine
-- `constructor(options: ISearchEngineConfig)`: Create a new core engine instance
+- `constructor(options: ISearchEngineOption)`: Create a new core engine instance
 - `init(): Promise<void>`: Initialize the engine
 - `addDocument(doc: IDocument): Promise<void>`: Add a single document
 - `addDocuments(docs: IDocument[]): Promise<void>`: Add multiple documents

package/README.zh-CN.md CHANGED Viewed

@@ -225,7 +225,7 @@ SimpleSearch.configure({
 ### SimpleSearch
 **静态方法（无需实例创建）：**
-- `configure(config: Partial<ISearchEngineConfig>): void`: 配置搜索引擎
+- `configure(config: Partial<ISearchEngineOption>): void`: 配置搜索引擎
 - `addDocument(doc: IDocument): Promise<void>`: 添加单个文档
 - `addDocuments(docs: IDocument[]): Promise<void>`: 添加多个文档
 - `addDocumentIfMissing(doc: IDocument): Promise<void>`: 如果文档不存在则添加单个文档
@@ -239,7 +239,7 @@ SimpleSearch.configure({
 ### SearchEngine
-- `constructor(options: ISearchEngineConfig)`: 创建一个新的核心引擎实例
+- `constructor(options: ISearchEngineOption)`: 创建一个新的核心引擎实例
 - `init(): Promise<void>`: 初始化引擎
 - `addDocument(doc: IDocument): Promise<void>`: 添加单个文档
 - `addDocuments(docs: IDocument[]): Promise<void>`: 添加多个文档

package/lib/core.cjs CHANGED Viewed

	@@ -1 +1 @@
1	- "use strict";var e=require('./browser'),t=require('./node');const s="search_meta.json",n="deleted_ids.bin",i="added_ids.bin";class o{#e;#t={wordSegments:[],charSegments:[]};#s=new Set;#n=new Set;constructor(e){this.#e=e}async load(){const e=await this.#e.read(s);if(e){const t=(new TextDecoder).decode(e);this.#t=JSON.parse(t)}else this.#t={wordSegments:[],charSegments:[]};const t=await this.#e.read(n);if(t){const e=new DataView(t);let s=0;const n=t.byteLength;for(;s<n&&!(s+4>n);){const t=e.getUint32(s,!0);this.#s.add(t),s+=4,s<n&&30===e.getUint8(s)&&(s+=1)}}const o=await this.#e.read(i);if(o){const e=new DataView(o);let t=0;const s=o.byteLength;for(;t<s&&!(t+4>s);){const n=e.getUint32(t,!0);this.#n.add(n),t+=4,t<s&&30===e.getUint8(t)&&(t+=1)}}}async save(){const e=JSON.stringify(this.#t);if(await this.#e.write(s,(new TextEncoder).encode(e).buffer),0===this.#s.size)await this.#e.remove(n);else{const e=4this.#s.size+this.#s.size,t=new ArrayBuffer(e),s=new DataView(t);let i=0;for(const e of this.#s)s.setUint32(i,e,!0),i+=4,s.setUint8(i,30),i+=1;await this.#e.write(n,t)}if(0===this.#n.size)await this.#e.remove(i);else{const e=4this.#n.size+this.#n.size,t=new ArrayBuffer(e),s=new DataView(t);let n=0;for(const e of this.#n)s.setUint32(n,e,!0),n+=4,s.setUint8(n,30),n+=1;await this.#e.write(i,t)}}getSegments(e){return"word"===e?this.#t.wordSegments:this.#t.charSegments}getDeletedIds(){return this.#s}addDeletedId(e){this.#s.add(e)}isDeleted(e){return this.#s.has(e)}addAddedId(e){this.#n.add(e)}removeAddedId(e){this.#n.delete(e)}isAdded(e){return this.#n.has(e)}getAddedIds(){return this.#n}hasDocument(e){return this.#n.has(e)\|\|this.#s.has(e)}getLastSegmentInfo(e){const t=this.getSegments(e);return 0===t.length?null:t[t.length-1]}updateSegment(e,t,s,n,i,o){const a="word"===e?this.#t.wordSegments:this.#t.charSegments;if(o)a.push({filename:t,start:s,end:n,tokenCount:i});else{const e=a[a.length-1];e&&e.filename===t&&(e.end=n,e.tokenCount=i)}}reset(){this.#t={wordSegments:[],charSegments:[]},this.#s.clear(),this.#n.clear()}}class a{static SEPARATOR=30;#e;constructor(e){this.#e=e}async appendBatch(e,t){if(0===t.length)return await this.#e.getFileSize(e);const s=new TextEncoder;let n=0;for(const e of t){n+=8;for(const t of e.tokens){n+=2+Math.min(s.encode(t).byteLength,65535)}n+=1}const i=new Uint8Array(n);let o=0;for(const e of t){const t=[];for(const n of e.tokens){const e=s.encode(n),i=e.byteLength>65535?e.slice(0,65535):e;t.push(i)}const n=new DataView(i.buffer,o);n.setUint32(0,e.id,!0),n.setUint32(4,t.length,!0),o+=8;for(const e of t)new DataView(i.buffer,o).setUint16(0,e.byteLength,!0),o+=2,i.set(e,o),o+=e.byteLength;i[o++]=a.SEPARATOR}return await this.#e.append(e,i.buffer),await this.#e.getFileSize(e)}async readRange(e,t,s){const n=await this.#e.readRange(e,t,s);if(!n\|\|0===n.byteLength)return[];const i=new DataView(n),o=new Uint8Array(n),r=new TextDecoder,h=[];let d=0;const c=n.byteLength;for(;d<c&&!(d+8>c);){const e=i.getUint32(d,!0);d+=4;const t=i.getUint32(d,!0);d+=4;const s=[];for(let e=0;e<t&&!(d+2>c);e++){const e=i.getUint16(d,!0);if(d+=2,d+e>c)break;const t=new Uint8Array(n,d,e);s.push(r.decode(t)),d+=e}d<c&&o[d]===a.SEPARATOR&&(d+=1),h.push({id:e,tokens:s})}return h}async getCurrentSize(e){return await this.#e.getFileSize(e)}}function r(e,t=305419896){const s=e.length,n=s>>2;let i=0;for(;i<n;){let s=255&e.charCodeAt(i)\|(255&e.charCodeAt(++i))<<8\|(255&e.charCodeAt(++i))<<16\|(255&e.charCodeAt(++i))<<24;++i,s=3432918353(65535&s)+((3432918353(s>>>16)&65535)<<16)&4294967295,s=s<<15\|s>>>17,s=461845907(65535&s)+((461845907(s>>>16)&65535)<<16)&4294967295,t=27492+(65535&(t=5(65535&(t=(t^=s)<<13\|t>>>19))+((5(t>>>16)&65535)<<16)&4294967295))+(((t>>>16)+58964&65535)<<16)}let o=0;const a=3&s;return a>0&&(a>=3&&(o^=(255&e.charCodeAt(i+2))<<16),a>=2&&(o^=(255&e.charCodeAt(i+1))<<8),a>=1&&(o^=255&e.charCodeAt(i)),o=3432918353(65535&o)+((3432918353(o>>>16)&65535)<<16)&4294967295,o=o<<15\|o>>>17,o=461845907(65535&o)+((461845907(o>>>16)&65535)<<16)&4294967295,t^=o),t^=s,t=2246822507(65535&(t^=t>>>16))+((2246822507(t>>>16)&65535)<<16)&4294967295,t=3266489909(65535&(t^=t>>>13))+((3266489909(t>>>16)&65535)<<16)&4294967295,(t^=t>>>16)>>>0}class h{#i;#e;#o=null;#a=null;static hash(e){return r(e)}constructor(e,t){this.#i=e,this.#e=t}async loadIndex(){return!!this.#o\|\|(this.#o=await this.#e.read(this.#i),!!this.#o&&(this.#a=new DataView(this.#o),!0))}async buildAndSave(e){const t=new Map;for(const s of e){const e=new Map;for(const n of s.tokens)e.has(n)\|\|(e.set(n,!0),t.has(n)\|\|t.set(n,{hash:h.hash(n),postings:[]}),t.get(n).postings.push(s.id))}const s=Array.from(t.entries());s.sort(([e,{hash:t}],[s,{hash:n}])=>t!==n?t-n:e.localeCompare(s));let n=0,i=0;for(const[e,{postings:t}]of s)n+=t.length,i+=e.length+1;const o=20s.length,a=12+o+4n,r=new ArrayBuffer(a+i),d=new DataView(r);d.setUint32(0,1229866072),d.setUint32(4,s.length),d.setUint32(8,a);let c=12,g=12+o,f=a;for(const[e,{hash:t,postings:n}]of s){d.setUint32(c,t),d.setUint32(c+4,e.length),d.setUint32(c+8,f),d.setUint32(c+12,g),d.setUint32(c+16,n.length),c+=20;for(let e=0;e<n.length;e++)d.setUint32(g,n[e],!0),g+=4;const s=(new TextEncoder).encode(e);for(let e=0;e<s.length;e++)d.setUint8(f++,s[e]);d.setUint8(f++,0)}await this.#e.write(this.#i,r),this.#o=r,this.#a=d}search(e){if(!this.#a\|\|!this.#o)return[];const t=h.hash(e),s=this.#a.getUint32(4);let n=0,i=s-1;const o=12,a=20,r=new TextDecoder;for(;n<=i;){const h=n+i>>>1,d=o+ha,c=this.#a.getUint32(d);if(c<t)n=h+1;else{if(!(c>t)){if(!(h>0&&this.#a.getUint32(o+(h-1)a)===t\|\|h<s-1&&this.#a.getUint32(o+(h+1)a)===t)){const e=this.#a.getUint32(o+ha+12),t=this.#a.getUint32(o+ha+16),s=[];for(let n=0;n<t;n++)s.push(this.#a.getUint32(e+4n,!0));return s}let n=h;for(;n>0;){const e=o+(n-1)a;if(this.#a.getUint32(e)!==t)break;n--}for(let i=n;i<s;i++){const s=o+ia;if(this.#a.getUint32(s)!==t)break;const n=this.#a.getUint32(s+4),h=this.#a.getUint32(s+8),d=new Uint8Array(this.#o,h,n);if(r.decode(d)===e){const e=this.#a.getUint32(s+12),t=this.#a.getUint32(s+16),n=[];for(let s=0;s<t;s++)n.push(this.#a.getUint32(e+4s,!0));return n}}return[]}i=h-1}}return[]}}const d="word_cache.bin",c="char_cache.bin";exports.SearchEngine=class{#e;#t;#r;#h;#d=!1;#c;#g=!1;#f={word:0,char:0};constructor(s){if(!s.baseDir)throw new Error("SearchEngine requires 'baseDir' in config.");if(this.#c={wordSegmentTokenThreshold:1e5,charSegmentTokenThreshold:5e5,minWordTokenSave:0,minCharTokenSave:0,...s},(this.#c.minWordTokenSave\|\|0)>=(this.#c.wordSegmentTokenThreshold\|\|1e5))throw new Error("minWordTokenSave must be less than wordSegmentTokenThreshold");if((this.#c.minCharTokenSave\|\|0)>=(this.#c.charSegmentTokenThreshold\|\|5e5))throw new Error("minCharTokenSave must be less than charSegmentTokenThreshold");let n=null;if(this.#c.storage&&("object"==typeof this.#c.storage?n=this.#c.storage:"browser"===this.#c.storage?n=new e.BrowserStorage(this.#c.baseDir):"node"===this.#c.storage&&(n=new t.NodeStorage(this.#c.baseDir))),!n){const s=typeof navigator<"u"&&navigator?.storage?.getDirectory instanceof Function,i=typeof process<"u"&&null!=process.versions&&null!=process.versions.node;s?n=new e.BrowserStorage(this.#c.baseDir):i&&(n=new t.NodeStorage(this.#c.baseDir))}if(!n)throw new Error('Storage initialization failed. Please configure "storage" explicitly or ensure you are in a supported environment (Browser/Node).');this.#e=n,this.#t=new o(this.#e),this.#r=new a(this.#e),this.#h=new Map}async init(){if(this.#d)return;await this.#t.load();const e=[...this.#t.getSegments("word"),...this.#t.getSegments("char")];for(const t of e)this.#h.has(t.filename)\|\|this.#h.set(t.filename,new h(t.filename,this.#e)),await this.#h.get(t.filename).loadIndex();this.#d=!0}startBatch(){this.#g=!0,this.#f={word:0,char:0}}async endBatch(){this.#g=!1,this.#f.word>0&&await this.#l("word",this.#f.word),this.#f.char>0&&await this.#l("char",this.#f.char),this.#f={word:0,char:0},await this.#t.save()}#w(e){try{if(typeof Intl<"u"&&"function"==typeof Intl.Segmenter&&"function"==typeof Array.from){const t=new Intl.Segmenter([],{granularity:"word"}).segment(e);if("object"==typeof t&&null!==t)return Array.from(t).filter(e=>e?.isWordLike).map(e=>e?.segment?.toLowerCase()\|\|"")}}catch{}return e.toLowerCase().split(/[^a-z0-9\u4e00-\u9fa5]+/g).filter(e=>e.length>0)}#m(e){return this.#c.indexingTokenizer?this.#c.indexingTokenizer(e):this.#w(e.text)}#u(e){return this.#c.searchTokenizer?this.#c.searchTokenizer(e):this.#m(e)}async addDocument(e){return this.addDocuments([e])}async addDocumentIfMissing(e){return this.addDocumentsIfMissing([e])}async addDocumentsIfMissing(e){if(this.#d\|\|await this.init(),0===e.length)return;const t=this.#t.getDeletedIds(),s=[],n=[],i=[];for(const o of e){if(t.has(o.id)\|\|this.#t.isAdded(o.id))continue;const e=this.#m(o),a=[],r=[];for(const t of e)t.length>1?a.push(t):1===t.length&&r.push(t);a.length>0&&s.push({id:o.id,tokens:a}),r.length>0&&n.push({id:o.id,tokens:r}),i.push(o)}if(0===i.length)return;let o=0,a=0;if(s.length>0){await this.#r.appendBatch(d,s);for(const e of s)o+=e.tokens.length}if(n.length>0){await this.#r.appendBatch(c,n);for(const e of n)a+=e.tokens.length}for(const e of i)this.#t.addAddedId(e.id);this.#g?(this.#f.word+=o,this.#f.char+=a):(o>0&&await this.#l("word",o),a>0&&await this.#l("char",a),await this.#t.save())}async addDocuments(e){if(this.#d\|\|await this.init(),0===e.length)return;const t=this.#t.getDeletedIds(),s=[],n=[];for(const i of e){if(t.has(i.id))throw new Error(`Document ID ${i.id} has been deleted and cannot be re-added.`);if(this.#t.isAdded(i.id))throw new Error(`Document ID ${i.id} already exists.`);const e=this.#m(i),o=[],a=[];for(const t of e)t.length>1?o.push(t):1===t.length&&a.push(t);o.length>0&&s.push({id:i.id,tokens:o}),a.length>0&&n.push({id:i.id,tokens:a})}let i=0,o=0;if(s.length>0){await this.#r.appendBatch(d,s);for(const e of s)i+=e.tokens.length}if(n.length>0){await this.#r.appendBatch(c,n);for(const e of n)o+=e.tokens.length}for(const t of e)this.#t.addAddedId(t.id);this.#g?(this.#f.word+=i,this.#f.char+=o):(i>0&&await this.#l("word",i),o>0&&await this.#l("char",o),await this.#t.save())}async#l(e,t){const s="word"===e?d:c,n=await this.#r.getCurrentSize(s),i="word"===e?this.#c.wordSegmentTokenThreshold\|\|1e5:this.#c.charSegmentTokenThreshold\|\|5e5,o="word"===e?this.#c.minWordTokenSave\|\|0:this.#c.minCharTokenSave\|\|0,a=this.#t.getLastSegmentInfo(e);let r,g,f,l;const w=()=>{const t=this.#t.getSegments(e).length+1;return`${e}_seg_${t}.bin`};if(a){const e=a.tokenCount;e>=i\|\|e+t>=i?(r=w(),f=!0,g=a.end,l=t):(r=a.filename,f=!1,g=a.start,l=e+t)}else r=w(),f=!0,g=0,l=t;if(l<o)return void this.#t.updateSegment(e,r,g,n,l,f);const m=await this.#r.readRange(s,g,n);let u=this.#h.get(r);u\|\|(u=new h(r,this.#e),this.#h.set(r,u)),await u.buildAndSave(m),this.#t.updateSegment(e,r,g,n,l,f)}async search(e,t){this.#d\|\|await this.init();const s="string"==typeof e?{text:e}:e,n=this.#u(s),i=n.filter(e=>e.length>1),o=n.filter(e=>1===e.length),a=this.#t.getDeletedIds(),r=new Map,d=new Map,c=e=>{const t=this.#t.getSegments(e);for(const e of t){const t=e.filename;!this.#h.has(t)&&!d.has(t)&&d.set(t,new h(t,this.#e))}};c("word"),c("char"),await Promise.all(Array.from(d.entries()).map(([e,t])=>t.loadIndex().then(s=>{s&&this.#h.set(e,t)})));const g=async(e,t)=>{if(0===t.length)return;const s=this.#t.getSegments(e);for(const e of s){const s=e.filename,n=this.#h.get(s);if(n)for(const e of t){const t=n.search(e),s=1+.1e.length;for(const n of t)if(!a.has(n))if(r.has(n)){const t=r.get(n);t.score+=s,t.tokens.add(e)}else r.set(n,{score:0,tokens:new Set([e])})}}};await g("word",i),await g("char",o);const f=[];return r.forEach((e,t)=>{f.push({id:t,score:e.score,tokens:Array.from(e.tokens)})}),f.sort((e,t)=>t.score-e.score),"number"==typeof t&&t>0?f.slice(0,t):f}async removeDocument(e){this.#d\|\|await this.init(),this.#t.addDeletedId(e),this.#t.removeAddedId(e),await this.#t.save()}async clearAll(){await this.#e.clearAll(),this.#h.clear(),this.#t.reset(),this.#d=!1,this.#g=!1,this.#f={word:0,char:0}}async getStatus(){return this.#d\|\|await this.init(),{wordSegments:this.#t.getSegments("word").length,charSegments:this.#t.getSegments("char").length,deleted:this.#t.getDeletedIds().size,wordCacheSize:await this.#r.getCurrentSize(d),charCacheSize:await this.#r.getCurrentSize(c),inBatch:this.#g}}async hasDocument(e){return this.#d\|\|await this.init(),this.#t.hasDocument(e)}},exports.hash=r,exports.murmur3_32=r;
1	+ "use strict";const e="search_meta.json",t="deleted_ids.bin",s="added_ids.bin";class n{#e;#t={wordSegments:[],charSegments:[]};#s=new Set;#n=new Set;constructor(e){this.#e=e}async load(){const n=await this.#e.read(e);if(n){const e=(new TextDecoder).decode(n);this.#t=JSON.parse(e)}else this.#t={wordSegments:[],charSegments:[]};const i=await this.#e.read(t);if(i){const e=new DataView(i);let t=0;const s=i.byteLength;for(;t<s&&!(t+4>s);){const n=e.getUint32(t,!0);this.#s.add(n),t+=4,t<s&&30===e.getUint8(t)&&(t+=1)}}const a=await this.#e.read(s);if(a){const e=new DataView(a);let t=0;const s=a.byteLength;for(;t<s&&!(t+4>s);){const n=e.getUint32(t,!0);this.#n.add(n),t+=4,t<s&&30===e.getUint8(t)&&(t+=1)}}}async save(){const n=JSON.stringify(this.#t);if(await this.#e.write(e,(new TextEncoder).encode(n).buffer),0===this.#s.size)await this.#e.remove(t);else{const e=4this.#s.size+this.#s.size,s=new ArrayBuffer(e),n=new DataView(s);let i=0;for(const e of this.#s)n.setUint32(i,e,!0),i+=4,n.setUint8(i,30),i+=1;await this.#e.write(t,s)}if(0===this.#n.size)await this.#e.remove(s);else{const e=4this.#n.size+this.#n.size,t=new ArrayBuffer(e),n=new DataView(t);let i=0;for(const e of this.#n)n.setUint32(i,e,!0),i+=4,n.setUint8(i,30),i+=1;await this.#e.write(s,t)}}getSegments(e){return"word"===e?this.#t.wordSegments:this.#t.charSegments}getDeletedIds(){return this.#s}addDeletedId(e){this.#s.add(e)}isDeleted(e){return this.#s.has(e)}addAddedId(e){this.#n.add(e)}removeAddedId(e){this.#n.delete(e)}isAdded(e){return this.#n.has(e)}getAddedIds(){return this.#n}hasDocument(e){return this.#n.has(e)\|\|this.#s.has(e)}getLastSegmentInfo(e){const t=this.getSegments(e);return 0===t.length?null:t[t.length-1]}updateSegment(e,t,s,n,i,a){const o="word"===e?this.#t.wordSegments:this.#t.charSegments;if(a)o.push({filename:t,start:s,end:n,tokenCount:i});else{const e=o[o.length-1];e&&e.filename===t&&(e.end=n,e.tokenCount=i)}}reset(){this.#t={wordSegments:[],charSegments:[]},this.#s.clear(),this.#n.clear()}}class i{static SEPARATOR=30;#e;constructor(e){this.#e=e}async appendBatch(e,t){if(0===t.length)return await this.#e.getFileSize(e);const s=new TextEncoder;let n=0;for(const e of t){n+=8;for(const t of e.tokens){n+=2+Math.min(s.encode(t).byteLength,65535)}n+=1}const a=new Uint8Array(n);let o=0;for(const e of t){const t=[];for(const n of e.tokens){const e=s.encode(n),i=e.byteLength>65535?e.slice(0,65535):e;t.push(i)}const n=new DataView(a.buffer,o);n.setUint32(0,e.id,!0),n.setUint32(4,t.length,!0),o+=8;for(const e of t)new DataView(a.buffer,o).setUint16(0,e.byteLength,!0),o+=2,a.set(e,o),o+=e.byteLength;a[o++]=i.SEPARATOR}return await this.#e.append(e,a.buffer),await this.#e.getFileSize(e)}async readRange(e,t,s){const n=await this.#e.readRange(e,t,s);if(!n\|\|0===n.byteLength)return[];const a=new DataView(n),o=new Uint8Array(n),r=new TextDecoder,h=[];let d=0;const c=n.byteLength;for(;d<c&&!(d+8>c);){const e=a.getUint32(d,!0);d+=4;const t=a.getUint32(d,!0);d+=4;const s=[];for(let e=0;e<t&&!(d+2>c);e++){const e=a.getUint16(d,!0);if(d+=2,d+e>c)break;const t=new Uint8Array(n,d,e);s.push(r.decode(t)),d+=e}d<c&&o[d]===i.SEPARATOR&&(d+=1),h.push({id:e,tokens:s})}return h}async getCurrentSize(e){return await this.#e.getFileSize(e)}}function a(e,t=305419896){const s=e.length,n=s>>2;let i=0;for(;i<n;){let s=255&e.charCodeAt(i)\|(255&e.charCodeAt(++i))<<8\|(255&e.charCodeAt(++i))<<16\|(255&e.charCodeAt(++i))<<24;++i,s=3432918353(65535&s)+((3432918353(s>>>16)&65535)<<16)&4294967295,s=s<<15\|s>>>17,s=461845907(65535&s)+((461845907(s>>>16)&65535)<<16)&4294967295,t=27492+(65535&(t=5(65535&(t=(t^=s)<<13\|t>>>19))+((5(t>>>16)&65535)<<16)&4294967295))+(((t>>>16)+58964&65535)<<16)}let a=0;const o=3&s;return o>0&&(o>=3&&(a^=(255&e.charCodeAt(i+2))<<16),o>=2&&(a^=(255&e.charCodeAt(i+1))<<8),o>=1&&(a^=255&e.charCodeAt(i)),a=3432918353(65535&a)+((3432918353(a>>>16)&65535)<<16)&4294967295,a=a<<15\|a>>>17,a=461845907(65535&a)+((461845907(a>>>16)&65535)<<16)&4294967295,t^=a),t^=s,t=2246822507(65535&(t^=t>>>16))+((2246822507(t>>>16)&65535)<<16)&4294967295,t=3266489909(65535&(t^=t>>>13))+((3266489909(t>>>16)&65535)<<16)&4294967295,(t^=t>>>16)>>>0}class o{#i;#e;#a=null;#o=null;static hash(e){return a(e)}constructor(e,t){this.#i=e,this.#e=t}async loadIndex(){return!!this.#a\|\|(this.#a=await this.#e.read(this.#i),!!this.#a&&(this.#o=new DataView(this.#a),!0))}async buildAndSave(e){const t=new Map;for(const s of e){const e=new Map;for(const n of s.tokens)e.has(n)\|\|(e.set(n,!0),t.has(n)\|\|t.set(n,{hash:o.hash(n),postings:[]}),t.get(n).postings.push(s.id))}const s=Array.from(t.entries());s.sort(([e,{hash:t}],[s,{hash:n}])=>t!==n?t-n:e.localeCompare(s));let n=0,i=0;for(const[e,{postings:t}]of s)n+=t.length,i+=e.length+1;const a=20s.length,r=12+a+4n,h=new ArrayBuffer(r+i),d=new DataView(h);d.setUint32(0,1229866072),d.setUint32(4,s.length),d.setUint32(8,r);let c=12,g=12+a,f=r;for(const[e,{hash:t,postings:n}]of s){d.setUint32(c,t),d.setUint32(c+4,e.length),d.setUint32(c+8,f),d.setUint32(c+12,g),d.setUint32(c+16,n.length),c+=20;for(let e=0;e<n.length;e++)d.setUint32(g,n[e],!0),g+=4;const s=(new TextEncoder).encode(e);for(let e=0;e<s.length;e++)d.setUint8(f++,s[e]);d.setUint8(f++,0)}await this.#e.write(this.#i,h),this.#a=h,this.#o=d}search(e){if(!this.#o\|\|!this.#a)return[];const t=o.hash(e),s=this.#o.getUint32(4);let n=0,i=s-1;const a=12,r=20,h=new TextDecoder;for(;n<=i;){const o=n+i>>>1,d=a+or,c=this.#o.getUint32(d);if(c<t)n=o+1;else{if(!(c>t)){if(!(o>0&&this.#o.getUint32(a+(o-1)r)===t\|\|o<s-1&&this.#o.getUint32(a+(o+1)r)===t)){const e=this.#o.getUint32(a+or+12),t=this.#o.getUint32(a+or+16),s=[];for(let n=0;n<t;n++)s.push(this.#o.getUint32(e+4n,!0));return s}let n=o;for(;n>0;){const e=a+(n-1)r;if(this.#o.getUint32(e)!==t)break;n--}for(let i=n;i<s;i++){const s=a+ir;if(this.#o.getUint32(s)!==t)break;const n=this.#o.getUint32(s+4),o=this.#o.getUint32(s+8),d=new Uint8Array(this.#a,o,n);if(h.decode(d)===e){const e=this.#o.getUint32(s+12),t=this.#o.getUint32(s+16),n=[];for(let s=0;s<t;s++)n.push(this.#o.getUint32(e+4s,!0));return n}}return[]}i=o-1}}return[]}}const r=({text:e})=>{try{if(typeof Intl<"u"&&"function"==typeof Intl.Segmenter&&"function"==typeof Array.from){const t=new Intl.Segmenter([],{granularity:"word"}).segment(e);if("object"==typeof t&&null!==t)return Array.from(t).filter(e=>e?.isWordLike).map(e=>e?.segment?.toLowerCase()\|\|"")}}catch{}return e.toLowerCase().split(/[^a-z0-9\u4e00-\u9fa5]+/g).filter(e=>e.length>0)},h="word_cache.bin",d="char_cache.bin";exports.SearchEngine=class{#e;#t;#r;#h;#d=!1;#c;#g=!1;#f={word:0,char:0};constructor(e){if(this.#c={wordSegmentTokenThreshold:1e5,charSegmentTokenThreshold:5e5,minWordTokenSave:0,minCharTokenSave:0,indexingTokenizer:e.indexingTokenizer\|\|r,...e},(this.#c.minWordTokenSave\|\|0)>=(this.#c.wordSegmentTokenThreshold\|\|1e5))throw new Error("minWordTokenSave must be less than wordSegmentTokenThreshold");if((this.#c.minCharTokenSave\|\|0)>=(this.#c.charSegmentTokenThreshold\|\|5e5))throw new Error("minCharTokenSave must be less than charSegmentTokenThreshold");this.#e=e.storage,this.#t=new n(this.#e),this.#r=new i(this.#e),this.#h=new Map}startBatch(){this.#g=!0,this.#f={word:0,char:0}}async endBatch(){this.#g=!1,this.#f.word>0&&await this.#l("word",this.#f.word),this.#f.char>0&&await this.#l("char",this.#f.char),this.#f={word:0,char:0},await this.#t.save()}async addDocument(e){return this.addDocuments([e])}async addDocumentIfMissing(e){return this.addDocumentsIfMissing([e])}async addDocumentsIfMissing(e){if(this.#d\|\|await this.#w(),0===e.length)return;const t=this.#t.getDeletedIds(),s=[],n=[],i=[];for(const a of e){if(t.has(a.id)\|\|this.#t.isAdded(a.id))continue;const e=this.#m(a),o=[],r=[];for(const t of e)t.length>1?o.push(t):1===t.length&&r.push(t);o.length>0&&s.push({id:a.id,tokens:o}),r.length>0&&n.push({id:a.id,tokens:r}),i.push(a)}if(0===i.length)return;let a=0,o=0;if(s.length>0){await this.#r.appendBatch(h,s);for(const e of s)a+=e.tokens.length}if(n.length>0){await this.#r.appendBatch(d,n);for(const e of n)o+=e.tokens.length}for(const e of i)this.#t.addAddedId(e.id);this.#g?(this.#f.word+=a,this.#f.char+=o):(a>0&&await this.#l("word",a),o>0&&await this.#l("char",o),await this.#t.save())}async addDocuments(e){if(this.#d\|\|await this.#w(),0===e.length)return;const t=this.#t.getDeletedIds(),s=[],n=[];for(const i of e){if(t.has(i.id))throw new Error(`Document ID ${i.id} has been deleted and cannot be re-added.`);if(this.#t.isAdded(i.id))throw new Error(`Document ID ${i.id} already exists.`);const e=this.#m(i),a=[],o=[];for(const t of e)t.length>1?a.push(t):1===t.length&&o.push(t);a.length>0&&s.push({id:i.id,tokens:a}),o.length>0&&n.push({id:i.id,tokens:o})}let i=0,a=0;if(s.length>0){await this.#r.appendBatch(h,s);for(const e of s)i+=e.tokens.length}if(n.length>0){await this.#r.appendBatch(d,n);for(const e of n)a+=e.tokens.length}for(const t of e)this.#t.addAddedId(t.id);this.#g?(this.#f.word+=i,this.#f.char+=a):(i>0&&await this.#l("word",i),a>0&&await this.#l("char",a),await this.#t.save())}async search(e,t){this.#d\|\|await this.#w();const s="string"==typeof e?{text:e}:e,n=this.#u(s),i=n.filter(e=>e.length>1),a=n.filter(e=>1===e.length),r=this.#t.getDeletedIds(),h=new Map,d=new Map,c=e=>{const t=this.#t.getSegments(e);for(const e of t){const t=e.filename;!this.#h.has(t)&&!d.has(t)&&d.set(t,new o(t,this.#e))}};c("word"),c("char"),await Promise.all(Array.from(d.entries()).map(([e,t])=>t.loadIndex().then(s=>{s&&this.#h.set(e,t)})));const g=async(e,t)=>{if(0===t.length)return;const s=this.#t.getSegments(e);for(const e of s){const s=e.filename,n=this.#h.get(s);if(n)for(const e of t){const t=n.search(e),s=1+.1e.length;for(const n of t)if(!r.has(n))if(h.has(n)){const t=h.get(n);t.score+=s,t.tokens.add(e)}else h.set(n,{score:0,tokens:new Set([e])})}}};await g("word",i),await g("char",a);const f=[];return h.forEach((e,t)=>{f.push({id:t,score:e.score,tokens:Array.from(e.tokens)})}),f.sort((e,t)=>t.score-e.score),"number"==typeof t&&t>0?f.slice(0,t):f}async removeDocument(e){this.#d\|\|await this.#w(),this.#t.addDeletedId(e),this.#t.removeAddedId(e),await this.#t.save()}async clearAll(){await this.#e.clearAll(),this.#h.clear(),this.#t.reset(),this.#d=!1,this.#g=!1,this.#f={word:0,char:0}}async getStatus(){return this.#d\|\|await this.#w(),{wordSegments:this.#t.getSegments("word").length,charSegments:this.#t.getSegments("char").length,deleted:this.#t.getDeletedIds().size,wordCacheSize:await this.#r.getCurrentSize(h),charCacheSize:await this.#r.getCurrentSize(d),inBatch:this.#g}}async hasDocument(e){return this.#d\|\|await this.#w(),this.#t.hasDocument(e)}async#w(){if(this.#d)return;await this.#t.load();const e=[...this.#t.getSegments("word"),...this.#t.getSegments("char")];for(const t of e)this.#h.has(t.filename)\|\|this.#h.set(t.filename,new o(t.filename,this.#e)),await this.#h.get(t.filename).loadIndex();this.#d=!0}#m(e){return this.#c.indexingTokenizer(e)}#u(e){return this.#c.searchTokenizer?this.#c.searchTokenizer(e):this.#m(e)}async#l(e,t){const s="word"===e?h:d,n=await this.#r.getCurrentSize(s),i="word"===e?this.#c.wordSegmentTokenThreshold\|\|1e5:this.#c.charSegmentTokenThreshold\|\|5e5,a="word"===e?this.#c.minWordTokenSave\|\|0:this.#c.minCharTokenSave\|\|0,r=this.#t.getLastSegmentInfo(e);let c,g,f,l;const w=()=>{const t=this.#t.getSegments(e).length+1;return`${e}_seg_${t}.bin`};if(r){const e=r.tokenCount;e>=i\|\|e+t>=i?(c=w(),f=!0,g=r.end,l=t):(c=r.filename,f=!1,g=r.start,l=e+t)}else c=w(),f=!0,g=0,l=t;if(l<a)return void this.#t.updateSegment(e,c,g,n,l,f);const m=await this.#r.readRange(s,g,n);let u=this.#h.get(c);u\|\|(u=new o(c,this.#e),this.#h.set(c,u)),await u.buildAndSave(m),this.#t.updateSegment(e,c,g,n,l,f)}},exports.hash=a,exports.murmur3_32=a;

package/lib/core.d.ts CHANGED Viewed

@@ -1,12 +1,11 @@
-import { ISearchEngineConfig, IDocument, IDocumentBase, IResult } from './type';
+import { ISearchEngine, ISearchEngineOption, IDocument, IDocumentBase, IResult, ISearchEngineStatus } from './type';
 /**
  * 核心搜索引擎类 (多实例支持)
  */
-declare class SearchEngine {
+declare class SearchEngine implements ISearchEngine {
     #private;
-    constructor(config: ISearchEngineConfig);
-    init(): Promise<void>;
+    constructor(config: ISearchEngineOption);
     /**
      * 开启批处理
      * 批处理期间 addDocuments 只写入缓存，不触发索引段构建
@@ -29,17 +28,10 @@ declare class SearchEngine {
      */
     addDocumentsIfMissing<T extends IDocument = IDocument>(docs: T[]): Promise<void>;
     addDocuments<T extends IDocument = IDocument>(docs: T[]): Promise<void>;
-    search<T extends IDocumentBase = IDocumentBase>(query: T | string, limit?: number): Promise<IResult[]>;
+    search<T extends IDocumentBase | string = any>(query: T, limit?: number): Promise<IResult[]>;
     removeDocument(id: number): Promise<void>;
     clearAll(): Promise<void>;
-    getStatus(): Promise<{
-        wordSegments: number;
-        charSegments: number;
-        deleted: number;
-        wordCacheSize: number;
-        charCacheSize: number;
-        inBatch: boolean;
-    }>;
+    getStatus(): Promise<ISearchEngineStatus>;
     /**
      * 检查文档ID是否曾经添加过（包括已删除的）
      * @param id 文档ID

package/lib/core.js CHANGED Viewed

	@@ -1 +1 @@
1	- import{BrowserStorage as e}from'./browser';import{NodeStorage as t}from'./node';const s="search_meta.json",n="deleted_ids.bin",i="added_ids.bin";class o{#e;#t={wordSegments:[],charSegments:[]};#s=new Set;#n=new Set;constructor(e){this.#e=e}async load(){const e=await this.#e.read(s);if(e){const t=(new TextDecoder).decode(e);this.#t=JSON.parse(t)}else this.#t={wordSegments:[],charSegments:[]};const t=await this.#e.read(n);if(t){const e=new DataView(t);let s=0;const n=t.byteLength;for(;s<n&&!(s+4>n);){const t=e.getUint32(s,!0);this.#s.add(t),s+=4,s<n&&30===e.getUint8(s)&&(s+=1)}}const o=await this.#e.read(i);if(o){const e=new DataView(o);let t=0;const s=o.byteLength;for(;t<s&&!(t+4>s);){const n=e.getUint32(t,!0);this.#n.add(n),t+=4,t<s&&30===e.getUint8(t)&&(t+=1)}}}async save(){const e=JSON.stringify(this.#t);if(await this.#e.write(s,(new TextEncoder).encode(e).buffer),0===this.#s.size)await this.#e.remove(n);else{const e=4this.#s.size+this.#s.size,t=new ArrayBuffer(e),s=new DataView(t);let i=0;for(const e of this.#s)s.setUint32(i,e,!0),i+=4,s.setUint8(i,30),i+=1;await this.#e.write(n,t)}if(0===this.#n.size)await this.#e.remove(i);else{const e=4this.#n.size+this.#n.size,t=new ArrayBuffer(e),s=new DataView(t);let n=0;for(const e of this.#n)s.setUint32(n,e,!0),n+=4,s.setUint8(n,30),n+=1;await this.#e.write(i,t)}}getSegments(e){return"word"===e?this.#t.wordSegments:this.#t.charSegments}getDeletedIds(){return this.#s}addDeletedId(e){this.#s.add(e)}isDeleted(e){return this.#s.has(e)}addAddedId(e){this.#n.add(e)}removeAddedId(e){this.#n.delete(e)}isAdded(e){return this.#n.has(e)}getAddedIds(){return this.#n}hasDocument(e){return this.#n.has(e)\|\|this.#s.has(e)}getLastSegmentInfo(e){const t=this.getSegments(e);return 0===t.length?null:t[t.length-1]}updateSegment(e,t,s,n,i,o){const a="word"===e?this.#t.wordSegments:this.#t.charSegments;if(o)a.push({filename:t,start:s,end:n,tokenCount:i});else{const e=a[a.length-1];e&&e.filename===t&&(e.end=n,e.tokenCount=i)}}reset(){this.#t={wordSegments:[],charSegments:[]},this.#s.clear(),this.#n.clear()}}class a{static SEPARATOR=30;#e;constructor(e){this.#e=e}async appendBatch(e,t){if(0===t.length)return await this.#e.getFileSize(e);const s=new TextEncoder;let n=0;for(const e of t){n+=8;for(const t of e.tokens){n+=2+Math.min(s.encode(t).byteLength,65535)}n+=1}const i=new Uint8Array(n);let o=0;for(const e of t){const t=[];for(const n of e.tokens){const e=s.encode(n),i=e.byteLength>65535?e.slice(0,65535):e;t.push(i)}const n=new DataView(i.buffer,o);n.setUint32(0,e.id,!0),n.setUint32(4,t.length,!0),o+=8;for(const e of t)new DataView(i.buffer,o).setUint16(0,e.byteLength,!0),o+=2,i.set(e,o),o+=e.byteLength;i[o++]=a.SEPARATOR}return await this.#e.append(e,i.buffer),await this.#e.getFileSize(e)}async readRange(e,t,s){const n=await this.#e.readRange(e,t,s);if(!n\|\|0===n.byteLength)return[];const i=new DataView(n),o=new Uint8Array(n),r=new TextDecoder,h=[];let d=0;const c=n.byteLength;for(;d<c&&!(d+8>c);){const e=i.getUint32(d,!0);d+=4;const t=i.getUint32(d,!0);d+=4;const s=[];for(let e=0;e<t&&!(d+2>c);e++){const e=i.getUint16(d,!0);if(d+=2,d+e>c)break;const t=new Uint8Array(n,d,e);s.push(r.decode(t)),d+=e}d<c&&o[d]===a.SEPARATOR&&(d+=1),h.push({id:e,tokens:s})}return h}async getCurrentSize(e){return await this.#e.getFileSize(e)}}function r(e,t=305419896){const s=e.length,n=s>>2;let i=0;for(;i<n;){let s=255&e.charCodeAt(i)\|(255&e.charCodeAt(++i))<<8\|(255&e.charCodeAt(++i))<<16\|(255&e.charCodeAt(++i))<<24;++i,s=3432918353(65535&s)+((3432918353(s>>>16)&65535)<<16)&4294967295,s=s<<15\|s>>>17,s=461845907(65535&s)+((461845907(s>>>16)&65535)<<16)&4294967295,t=27492+(65535&(t=5(65535&(t=(t^=s)<<13\|t>>>19))+((5(t>>>16)&65535)<<16)&4294967295))+(((t>>>16)+58964&65535)<<16)}let o=0;const a=3&s;return a>0&&(a>=3&&(o^=(255&e.charCodeAt(i+2))<<16),a>=2&&(o^=(255&e.charCodeAt(i+1))<<8),a>=1&&(o^=255&e.charCodeAt(i)),o=3432918353(65535&o)+((3432918353(o>>>16)&65535)<<16)&4294967295,o=o<<15\|o>>>17,o=461845907(65535&o)+((461845907(o>>>16)&65535)<<16)&4294967295,t^=o),t^=s,t=2246822507(65535&(t^=t>>>16))+((2246822507(t>>>16)&65535)<<16)&4294967295,t=3266489909(65535&(t^=t>>>13))+((3266489909(t>>>16)&65535)<<16)&4294967295,(t^=t>>>16)>>>0}class h{#i;#e;#o=null;#a=null;static hash(e){return r(e)}constructor(e,t){this.#i=e,this.#e=t}async loadIndex(){return!!this.#o\|\|(this.#o=await this.#e.read(this.#i),!!this.#o&&(this.#a=new DataView(this.#o),!0))}async buildAndSave(e){const t=new Map;for(const s of e){const e=new Map;for(const n of s.tokens)e.has(n)\|\|(e.set(n,!0),t.has(n)\|\|t.set(n,{hash:h.hash(n),postings:[]}),t.get(n).postings.push(s.id))}const s=Array.from(t.entries());s.sort(([e,{hash:t}],[s,{hash:n}])=>t!==n?t-n:e.localeCompare(s));let n=0,i=0;for(const[e,{postings:t}]of s)n+=t.length,i+=e.length+1;const o=20s.length,a=12+o+4n,r=new ArrayBuffer(a+i),d=new DataView(r);d.setUint32(0,1229866072),d.setUint32(4,s.length),d.setUint32(8,a);let c=12,g=12+o,f=a;for(const[e,{hash:t,postings:n}]of s){d.setUint32(c,t),d.setUint32(c+4,e.length),d.setUint32(c+8,f),d.setUint32(c+12,g),d.setUint32(c+16,n.length),c+=20;for(let e=0;e<n.length;e++)d.setUint32(g,n[e],!0),g+=4;const s=(new TextEncoder).encode(e);for(let e=0;e<s.length;e++)d.setUint8(f++,s[e]);d.setUint8(f++,0)}await this.#e.write(this.#i,r),this.#o=r,this.#a=d}search(e){if(!this.#a\|\|!this.#o)return[];const t=h.hash(e),s=this.#a.getUint32(4);let n=0,i=s-1;const o=12,a=20,r=new TextDecoder;for(;n<=i;){const h=n+i>>>1,d=o+ha,c=this.#a.getUint32(d);if(c<t)n=h+1;else{if(!(c>t)){if(!(h>0&&this.#a.getUint32(o+(h-1)a)===t\|\|h<s-1&&this.#a.getUint32(o+(h+1)a)===t)){const e=this.#a.getUint32(o+ha+12),t=this.#a.getUint32(o+ha+16),s=[];for(let n=0;n<t;n++)s.push(this.#a.getUint32(e+4n,!0));return s}let n=h;for(;n>0;){const e=o+(n-1)a;if(this.#a.getUint32(e)!==t)break;n--}for(let i=n;i<s;i++){const s=o+ia;if(this.#a.getUint32(s)!==t)break;const n=this.#a.getUint32(s+4),h=this.#a.getUint32(s+8),d=new Uint8Array(this.#o,h,n);if(r.decode(d)===e){const e=this.#a.getUint32(s+12),t=this.#a.getUint32(s+16),n=[];for(let s=0;s<t;s++)n.push(this.#a.getUint32(e+4s,!0));return n}}return[]}i=h-1}}return[]}}const d="word_cache.bin",c="char_cache.bin";class g{#e;#t;#r;#h;#d=!1;#c;#g=!1;#f={word:0,char:0};constructor(s){if(!s.baseDir)throw new Error("SearchEngine requires 'baseDir' in config.");if(this.#c={wordSegmentTokenThreshold:1e5,charSegmentTokenThreshold:5e5,minWordTokenSave:0,minCharTokenSave:0,...s},(this.#c.minWordTokenSave\|\|0)>=(this.#c.wordSegmentTokenThreshold\|\|1e5))throw new Error("minWordTokenSave must be less than wordSegmentTokenThreshold");if((this.#c.minCharTokenSave\|\|0)>=(this.#c.charSegmentTokenThreshold\|\|5e5))throw new Error("minCharTokenSave must be less than charSegmentTokenThreshold");let n=null;if(this.#c.storage&&("object"==typeof this.#c.storage?n=this.#c.storage:"browser"===this.#c.storage?n=new e(this.#c.baseDir):"node"===this.#c.storage&&(n=new t(this.#c.baseDir))),!n){const s=typeof navigator<"u"&&navigator?.storage?.getDirectory instanceof Function,i=typeof process<"u"&&null!=process.versions&&null!=process.versions.node;s?n=new e(this.#c.baseDir):i&&(n=new t(this.#c.baseDir))}if(!n)throw new Error('Storage initialization failed. Please configure "storage" explicitly or ensure you are in a supported environment (Browser/Node).');this.#e=n,this.#t=new o(this.#e),this.#r=new a(this.#e),this.#h=new Map}async init(){if(this.#d)return;await this.#t.load();const e=[...this.#t.getSegments("word"),...this.#t.getSegments("char")];for(const t of e)this.#h.has(t.filename)\|\|this.#h.set(t.filename,new h(t.filename,this.#e)),await this.#h.get(t.filename).loadIndex();this.#d=!0}startBatch(){this.#g=!0,this.#f={word:0,char:0}}async endBatch(){this.#g=!1,this.#f.word>0&&await this.#l("word",this.#f.word),this.#f.char>0&&await this.#l("char",this.#f.char),this.#f={word:0,char:0},await this.#t.save()}#w(e){try{if(typeof Intl<"u"&&"function"==typeof Intl.Segmenter&&"function"==typeof Array.from){const t=new Intl.Segmenter([],{granularity:"word"}).segment(e);if("object"==typeof t&&null!==t)return Array.from(t).filter(e=>e?.isWordLike).map(e=>e?.segment?.toLowerCase()\|\|"")}}catch{}return e.toLowerCase().split(/[^a-z0-9\u4e00-\u9fa5]+/g).filter(e=>e.length>0)}#m(e){return this.#c.indexingTokenizer?this.#c.indexingTokenizer(e):this.#w(e.text)}#u(e){return this.#c.searchTokenizer?this.#c.searchTokenizer(e):this.#m(e)}async addDocument(e){return this.addDocuments([e])}async addDocumentIfMissing(e){return this.addDocumentsIfMissing([e])}async addDocumentsIfMissing(e){if(this.#d\|\|await this.init(),0===e.length)return;const t=this.#t.getDeletedIds(),s=[],n=[],i=[];for(const o of e){if(t.has(o.id)\|\|this.#t.isAdded(o.id))continue;const e=this.#m(o),a=[],r=[];for(const t of e)t.length>1?a.push(t):1===t.length&&r.push(t);a.length>0&&s.push({id:o.id,tokens:a}),r.length>0&&n.push({id:o.id,tokens:r}),i.push(o)}if(0===i.length)return;let o=0,a=0;if(s.length>0){await this.#r.appendBatch(d,s);for(const e of s)o+=e.tokens.length}if(n.length>0){await this.#r.appendBatch(c,n);for(const e of n)a+=e.tokens.length}for(const e of i)this.#t.addAddedId(e.id);this.#g?(this.#f.word+=o,this.#f.char+=a):(o>0&&await this.#l("word",o),a>0&&await this.#l("char",a),await this.#t.save())}async addDocuments(e){if(this.#d\|\|await this.init(),0===e.length)return;const t=this.#t.getDeletedIds(),s=[],n=[];for(const i of e){if(t.has(i.id))throw new Error(`Document ID ${i.id} has been deleted and cannot be re-added.`);if(this.#t.isAdded(i.id))throw new Error(`Document ID ${i.id} already exists.`);const e=this.#m(i),o=[],a=[];for(const t of e)t.length>1?o.push(t):1===t.length&&a.push(t);o.length>0&&s.push({id:i.id,tokens:o}),a.length>0&&n.push({id:i.id,tokens:a})}let i=0,o=0;if(s.length>0){await this.#r.appendBatch(d,s);for(const e of s)i+=e.tokens.length}if(n.length>0){await this.#r.appendBatch(c,n);for(const e of n)o+=e.tokens.length}for(const t of e)this.#t.addAddedId(t.id);this.#g?(this.#f.word+=i,this.#f.char+=o):(i>0&&await this.#l("word",i),o>0&&await this.#l("char",o),await this.#t.save())}async#l(e,t){const s="word"===e?d:c,n=await this.#r.getCurrentSize(s),i="word"===e?this.#c.wordSegmentTokenThreshold\|\|1e5:this.#c.charSegmentTokenThreshold\|\|5e5,o="word"===e?this.#c.minWordTokenSave\|\|0:this.#c.minCharTokenSave\|\|0,a=this.#t.getLastSegmentInfo(e);let r,g,f,l;const w=()=>{const t=this.#t.getSegments(e).length+1;return`${e}_seg_${t}.bin`};if(a){const e=a.tokenCount;e>=i\|\|e+t>=i?(r=w(),f=!0,g=a.end,l=t):(r=a.filename,f=!1,g=a.start,l=e+t)}else r=w(),f=!0,g=0,l=t;if(l<o)return void this.#t.updateSegment(e,r,g,n,l,f);const m=await this.#r.readRange(s,g,n);let u=this.#h.get(r);u\|\|(u=new h(r,this.#e),this.#h.set(r,u)),await u.buildAndSave(m),this.#t.updateSegment(e,r,g,n,l,f)}async search(e,t){this.#d\|\|await this.init();const s="string"==typeof e?{text:e}:e,n=this.#u(s),i=n.filter(e=>e.length>1),o=n.filter(e=>1===e.length),a=this.#t.getDeletedIds(),r=new Map,d=new Map,c=e=>{const t=this.#t.getSegments(e);for(const e of t){const t=e.filename;!this.#h.has(t)&&!d.has(t)&&d.set(t,new h(t,this.#e))}};c("word"),c("char"),await Promise.all(Array.from(d.entries()).map(([e,t])=>t.loadIndex().then(s=>{s&&this.#h.set(e,t)})));const g=async(e,t)=>{if(0===t.length)return;const s=this.#t.getSegments(e);for(const e of s){const s=e.filename,n=this.#h.get(s);if(n)for(const e of t){const t=n.search(e),s=1+.1e.length;for(const n of t)if(!a.has(n))if(r.has(n)){const t=r.get(n);t.score+=s,t.tokens.add(e)}else r.set(n,{score:0,tokens:new Set([e])})}}};await g("word",i),await g("char",o);const f=[];return r.forEach((e,t)=>{f.push({id:t,score:e.score,tokens:Array.from(e.tokens)})}),f.sort((e,t)=>t.score-e.score),"number"==typeof t&&t>0?f.slice(0,t):f}async removeDocument(e){this.#d\|\|await this.init(),this.#t.addDeletedId(e),this.#t.removeAddedId(e),await this.#t.save()}async clearAll(){await this.#e.clearAll(),this.#h.clear(),this.#t.reset(),this.#d=!1,this.#g=!1,this.#f={word:0,char:0}}async getStatus(){return this.#d\|\|await this.init(),{wordSegments:this.#t.getSegments("word").length,charSegments:this.#t.getSegments("char").length,deleted:this.#t.getDeletedIds().size,wordCacheSize:await this.#r.getCurrentSize(d),charCacheSize:await this.#r.getCurrentSize(c),inBatch:this.#g}}async hasDocument(e){return this.#d\|\|await this.init(),this.#t.hasDocument(e)}}export{g as SearchEngine,r as hash,r as murmur3_32};
1	+ const e="search_meta.json",t="deleted_ids.bin",s="added_ids.bin";class n{#e;#t={wordSegments:[],charSegments:[]};#s=new Set;#n=new Set;constructor(e){this.#e=e}async load(){const n=await this.#e.read(e);if(n){const e=(new TextDecoder).decode(n);this.#t=JSON.parse(e)}else this.#t={wordSegments:[],charSegments:[]};const i=await this.#e.read(t);if(i){const e=new DataView(i);let t=0;const s=i.byteLength;for(;t<s&&!(t+4>s);){const n=e.getUint32(t,!0);this.#s.add(n),t+=4,t<s&&30===e.getUint8(t)&&(t+=1)}}const a=await this.#e.read(s);if(a){const e=new DataView(a);let t=0;const s=a.byteLength;for(;t<s&&!(t+4>s);){const n=e.getUint32(t,!0);this.#n.add(n),t+=4,t<s&&30===e.getUint8(t)&&(t+=1)}}}async save(){const n=JSON.stringify(this.#t);if(await this.#e.write(e,(new TextEncoder).encode(n).buffer),0===this.#s.size)await this.#e.remove(t);else{const e=4this.#s.size+this.#s.size,s=new ArrayBuffer(e),n=new DataView(s);let i=0;for(const e of this.#s)n.setUint32(i,e,!0),i+=4,n.setUint8(i,30),i+=1;await this.#e.write(t,s)}if(0===this.#n.size)await this.#e.remove(s);else{const e=4this.#n.size+this.#n.size,t=new ArrayBuffer(e),n=new DataView(t);let i=0;for(const e of this.#n)n.setUint32(i,e,!0),i+=4,n.setUint8(i,30),i+=1;await this.#e.write(s,t)}}getSegments(e){return"word"===e?this.#t.wordSegments:this.#t.charSegments}getDeletedIds(){return this.#s}addDeletedId(e){this.#s.add(e)}isDeleted(e){return this.#s.has(e)}addAddedId(e){this.#n.add(e)}removeAddedId(e){this.#n.delete(e)}isAdded(e){return this.#n.has(e)}getAddedIds(){return this.#n}hasDocument(e){return this.#n.has(e)\|\|this.#s.has(e)}getLastSegmentInfo(e){const t=this.getSegments(e);return 0===t.length?null:t[t.length-1]}updateSegment(e,t,s,n,i,a){const o="word"===e?this.#t.wordSegments:this.#t.charSegments;if(a)o.push({filename:t,start:s,end:n,tokenCount:i});else{const e=o[o.length-1];e&&e.filename===t&&(e.end=n,e.tokenCount=i)}}reset(){this.#t={wordSegments:[],charSegments:[]},this.#s.clear(),this.#n.clear()}}class i{static SEPARATOR=30;#e;constructor(e){this.#e=e}async appendBatch(e,t){if(0===t.length)return await this.#e.getFileSize(e);const s=new TextEncoder;let n=0;for(const e of t){n+=8;for(const t of e.tokens){n+=2+Math.min(s.encode(t).byteLength,65535)}n+=1}const a=new Uint8Array(n);let o=0;for(const e of t){const t=[];for(const n of e.tokens){const e=s.encode(n),i=e.byteLength>65535?e.slice(0,65535):e;t.push(i)}const n=new DataView(a.buffer,o);n.setUint32(0,e.id,!0),n.setUint32(4,t.length,!0),o+=8;for(const e of t)new DataView(a.buffer,o).setUint16(0,e.byteLength,!0),o+=2,a.set(e,o),o+=e.byteLength;a[o++]=i.SEPARATOR}return await this.#e.append(e,a.buffer),await this.#e.getFileSize(e)}async readRange(e,t,s){const n=await this.#e.readRange(e,t,s);if(!n\|\|0===n.byteLength)return[];const a=new DataView(n),o=new Uint8Array(n),r=new TextDecoder,h=[];let d=0;const c=n.byteLength;for(;d<c&&!(d+8>c);){const e=a.getUint32(d,!0);d+=4;const t=a.getUint32(d,!0);d+=4;const s=[];for(let e=0;e<t&&!(d+2>c);e++){const e=a.getUint16(d,!0);if(d+=2,d+e>c)break;const t=new Uint8Array(n,d,e);s.push(r.decode(t)),d+=e}d<c&&o[d]===i.SEPARATOR&&(d+=1),h.push({id:e,tokens:s})}return h}async getCurrentSize(e){return await this.#e.getFileSize(e)}}function a(e,t=305419896){const s=e.length,n=s>>2;let i=0;for(;i<n;){let s=255&e.charCodeAt(i)\|(255&e.charCodeAt(++i))<<8\|(255&e.charCodeAt(++i))<<16\|(255&e.charCodeAt(++i))<<24;++i,s=3432918353(65535&s)+((3432918353(s>>>16)&65535)<<16)&4294967295,s=s<<15\|s>>>17,s=461845907(65535&s)+((461845907(s>>>16)&65535)<<16)&4294967295,t=27492+(65535&(t=5(65535&(t=(t^=s)<<13\|t>>>19))+((5(t>>>16)&65535)<<16)&4294967295))+(((t>>>16)+58964&65535)<<16)}let a=0;const o=3&s;return o>0&&(o>=3&&(a^=(255&e.charCodeAt(i+2))<<16),o>=2&&(a^=(255&e.charCodeAt(i+1))<<8),o>=1&&(a^=255&e.charCodeAt(i)),a=3432918353(65535&a)+((3432918353(a>>>16)&65535)<<16)&4294967295,a=a<<15\|a>>>17,a=461845907(65535&a)+((461845907(a>>>16)&65535)<<16)&4294967295,t^=a),t^=s,t=2246822507(65535&(t^=t>>>16))+((2246822507(t>>>16)&65535)<<16)&4294967295,t=3266489909(65535&(t^=t>>>13))+((3266489909(t>>>16)&65535)<<16)&4294967295,(t^=t>>>16)>>>0}class o{#i;#e;#a=null;#o=null;static hash(e){return a(e)}constructor(e,t){this.#i=e,this.#e=t}async loadIndex(){return!!this.#a\|\|(this.#a=await this.#e.read(this.#i),!!this.#a&&(this.#o=new DataView(this.#a),!0))}async buildAndSave(e){const t=new Map;for(const s of e){const e=new Map;for(const n of s.tokens)e.has(n)\|\|(e.set(n,!0),t.has(n)\|\|t.set(n,{hash:o.hash(n),postings:[]}),t.get(n).postings.push(s.id))}const s=Array.from(t.entries());s.sort(([e,{hash:t}],[s,{hash:n}])=>t!==n?t-n:e.localeCompare(s));let n=0,i=0;for(const[e,{postings:t}]of s)n+=t.length,i+=e.length+1;const a=20s.length,r=12+a+4n,h=new ArrayBuffer(r+i),d=new DataView(h);d.setUint32(0,1229866072),d.setUint32(4,s.length),d.setUint32(8,r);let c=12,g=12+a,f=r;for(const[e,{hash:t,postings:n}]of s){d.setUint32(c,t),d.setUint32(c+4,e.length),d.setUint32(c+8,f),d.setUint32(c+12,g),d.setUint32(c+16,n.length),c+=20;for(let e=0;e<n.length;e++)d.setUint32(g,n[e],!0),g+=4;const s=(new TextEncoder).encode(e);for(let e=0;e<s.length;e++)d.setUint8(f++,s[e]);d.setUint8(f++,0)}await this.#e.write(this.#i,h),this.#a=h,this.#o=d}search(e){if(!this.#o\|\|!this.#a)return[];const t=o.hash(e),s=this.#o.getUint32(4);let n=0,i=s-1;const a=12,r=20,h=new TextDecoder;for(;n<=i;){const o=n+i>>>1,d=a+or,c=this.#o.getUint32(d);if(c<t)n=o+1;else{if(!(c>t)){if(!(o>0&&this.#o.getUint32(a+(o-1)r)===t\|\|o<s-1&&this.#o.getUint32(a+(o+1)r)===t)){const e=this.#o.getUint32(a+or+12),t=this.#o.getUint32(a+or+16),s=[];for(let n=0;n<t;n++)s.push(this.#o.getUint32(e+4n,!0));return s}let n=o;for(;n>0;){const e=a+(n-1)r;if(this.#o.getUint32(e)!==t)break;n--}for(let i=n;i<s;i++){const s=a+ir;if(this.#o.getUint32(s)!==t)break;const n=this.#o.getUint32(s+4),o=this.#o.getUint32(s+8),d=new Uint8Array(this.#a,o,n);if(h.decode(d)===e){const e=this.#o.getUint32(s+12),t=this.#o.getUint32(s+16),n=[];for(let s=0;s<t;s++)n.push(this.#o.getUint32(e+4s,!0));return n}}return[]}i=o-1}}return[]}}const r=({text:e})=>{try{if(typeof Intl<"u"&&"function"==typeof Intl.Segmenter&&"function"==typeof Array.from){const t=new Intl.Segmenter([],{granularity:"word"}).segment(e);if("object"==typeof t&&null!==t)return Array.from(t).filter(e=>e?.isWordLike).map(e=>e?.segment?.toLowerCase()\|\|"")}}catch{}return e.toLowerCase().split(/[^a-z0-9\u4e00-\u9fa5]+/g).filter(e=>e.length>0)},h="word_cache.bin",d="char_cache.bin";class c{#e;#t;#r;#h;#d=!1;#c;#g=!1;#f={word:0,char:0};constructor(e){if(this.#c={wordSegmentTokenThreshold:1e5,charSegmentTokenThreshold:5e5,minWordTokenSave:0,minCharTokenSave:0,indexingTokenizer:e.indexingTokenizer\|\|r,...e},(this.#c.minWordTokenSave\|\|0)>=(this.#c.wordSegmentTokenThreshold\|\|1e5))throw new Error("minWordTokenSave must be less than wordSegmentTokenThreshold");if((this.#c.minCharTokenSave\|\|0)>=(this.#c.charSegmentTokenThreshold\|\|5e5))throw new Error("minCharTokenSave must be less than charSegmentTokenThreshold");this.#e=e.storage,this.#t=new n(this.#e),this.#r=new i(this.#e),this.#h=new Map}startBatch(){this.#g=!0,this.#f={word:0,char:0}}async endBatch(){this.#g=!1,this.#f.word>0&&await this.#l("word",this.#f.word),this.#f.char>0&&await this.#l("char",this.#f.char),this.#f={word:0,char:0},await this.#t.save()}async addDocument(e){return this.addDocuments([e])}async addDocumentIfMissing(e){return this.addDocumentsIfMissing([e])}async addDocumentsIfMissing(e){if(this.#d\|\|await this.#w(),0===e.length)return;const t=this.#t.getDeletedIds(),s=[],n=[],i=[];for(const a of e){if(t.has(a.id)\|\|this.#t.isAdded(a.id))continue;const e=this.#m(a),o=[],r=[];for(const t of e)t.length>1?o.push(t):1===t.length&&r.push(t);o.length>0&&s.push({id:a.id,tokens:o}),r.length>0&&n.push({id:a.id,tokens:r}),i.push(a)}if(0===i.length)return;let a=0,o=0;if(s.length>0){await this.#r.appendBatch(h,s);for(const e of s)a+=e.tokens.length}if(n.length>0){await this.#r.appendBatch(d,n);for(const e of n)o+=e.tokens.length}for(const e of i)this.#t.addAddedId(e.id);this.#g?(this.#f.word+=a,this.#f.char+=o):(a>0&&await this.#l("word",a),o>0&&await this.#l("char",o),await this.#t.save())}async addDocuments(e){if(this.#d\|\|await this.#w(),0===e.length)return;const t=this.#t.getDeletedIds(),s=[],n=[];for(const i of e){if(t.has(i.id))throw new Error(`Document ID ${i.id} has been deleted and cannot be re-added.`);if(this.#t.isAdded(i.id))throw new Error(`Document ID ${i.id} already exists.`);const e=this.#m(i),a=[],o=[];for(const t of e)t.length>1?a.push(t):1===t.length&&o.push(t);a.length>0&&s.push({id:i.id,tokens:a}),o.length>0&&n.push({id:i.id,tokens:o})}let i=0,a=0;if(s.length>0){await this.#r.appendBatch(h,s);for(const e of s)i+=e.tokens.length}if(n.length>0){await this.#r.appendBatch(d,n);for(const e of n)a+=e.tokens.length}for(const t of e)this.#t.addAddedId(t.id);this.#g?(this.#f.word+=i,this.#f.char+=a):(i>0&&await this.#l("word",i),a>0&&await this.#l("char",a),await this.#t.save())}async search(e,t){this.#d\|\|await this.#w();const s="string"==typeof e?{text:e}:e,n=this.#u(s),i=n.filter(e=>e.length>1),a=n.filter(e=>1===e.length),r=this.#t.getDeletedIds(),h=new Map,d=new Map,c=e=>{const t=this.#t.getSegments(e);for(const e of t){const t=e.filename;!this.#h.has(t)&&!d.has(t)&&d.set(t,new o(t,this.#e))}};c("word"),c("char"),await Promise.all(Array.from(d.entries()).map(([e,t])=>t.loadIndex().then(s=>{s&&this.#h.set(e,t)})));const g=async(e,t)=>{if(0===t.length)return;const s=this.#t.getSegments(e);for(const e of s){const s=e.filename,n=this.#h.get(s);if(n)for(const e of t){const t=n.search(e),s=1+.1e.length;for(const n of t)if(!r.has(n))if(h.has(n)){const t=h.get(n);t.score+=s,t.tokens.add(e)}else h.set(n,{score:0,tokens:new Set([e])})}}};await g("word",i),await g("char",a);const f=[];return h.forEach((e,t)=>{f.push({id:t,score:e.score,tokens:Array.from(e.tokens)})}),f.sort((e,t)=>t.score-e.score),"number"==typeof t&&t>0?f.slice(0,t):f}async removeDocument(e){this.#d\|\|await this.#w(),this.#t.addDeletedId(e),this.#t.removeAddedId(e),await this.#t.save()}async clearAll(){await this.#e.clearAll(),this.#h.clear(),this.#t.reset(),this.#d=!1,this.#g=!1,this.#f={word:0,char:0}}async getStatus(){return this.#d\|\|await this.#w(),{wordSegments:this.#t.getSegments("word").length,charSegments:this.#t.getSegments("char").length,deleted:this.#t.getDeletedIds().size,wordCacheSize:await this.#r.getCurrentSize(h),charCacheSize:await this.#r.getCurrentSize(d),inBatch:this.#g}}async hasDocument(e){return this.#d\|\|await this.#w(),this.#t.hasDocument(e)}async#w(){if(this.#d)return;await this.#t.load();const e=[...this.#t.getSegments("word"),...this.#t.getSegments("char")];for(const t of e)this.#h.has(t.filename)\|\|this.#h.set(t.filename,new o(t.filename,this.#e)),await this.#h.get(t.filename).loadIndex();this.#d=!0}#m(e){return this.#c.indexingTokenizer(e)}#u(e){return this.#c.searchTokenizer?this.#c.searchTokenizer(e):this.#m(e)}async#l(e,t){const s="word"===e?h:d,n=await this.#r.getCurrentSize(s),i="word"===e?this.#c.wordSegmentTokenThreshold\|\|1e5:this.#c.charSegmentTokenThreshold\|\|5e5,a="word"===e?this.#c.minWordTokenSave\|\|0:this.#c.minCharTokenSave\|\|0,r=this.#t.getLastSegmentInfo(e);let c,g,f,l;const w=()=>{const t=this.#t.getSegments(e).length+1;return`${e}_seg_${t}.bin`};if(r){const e=r.tokenCount;e>=i\|\|e+t>=i?(c=w(),f=!0,g=r.end,l=t):(c=r.filename,f=!1,g=r.start,l=e+t)}else c=w(),f=!0,g=0,l=t;if(l<a)return void this.#t.updateSegment(e,c,g,n,l,f);const m=await this.#r.readRange(s,g,n);let u=this.#h.get(c);u\|\|(u=new o(c,this.#e),this.#h.set(c,u)),await u.buildAndSave(m),this.#t.updateSegment(e,c,g,n,l,f)}}export{c as SearchEngine,a as hash,a as murmur3_32};

package/lib/simple.cjs CHANGED Viewed

	@@ -1 +1 @@
1	- "use strict";var t=require('./core');exports.SimpleSearch=class{static#t=null;static#e~~={baseDir:"simple_search_data",wordSegmentTokenThreshold:1e5,minWordTokenSave:0}~~;static ~~configure~~(e){const n={...~~this.#e,...e~~};~~this.#~~t=new t.~~SearchEngine~~(n)}static#n(){~~return~~ this.#t\|\|(this.#t=new t.SearchEngine(this~~.#e~~))~~,this.#t~~}static async startBatch(){this.#n().startBatch()}static async endBatch(){return this.#n().endBatch()}static async addDocument(t){return this.#n().addDocument(t)}static async addDocumentIfMissing(t){return this.#n().addDocumentIfMissing(t)}static async addDocuments(t){return this.#n().addDocuments(t)}static async addDocumentsIfMissing(t){return this.#n().addDocumentsIfMissing(t)}static async search(t,e){return this.#n().search(t,e)}static async removeDocument(t){return this.#n().removeDocument(t)}static async clearAll(){return this.#n().clearAll()}static async getStatus(){return this.#n().getStatus()}static async hasDocument(t){return this.#n().hasDocument(t)}};
1	+ "use strict";var t=require('./core'),e=require('./browser'),n=require('./node');const s=Object.freeze({wordSegmentTokenThreshold:1e5,minWordTokenSave:0}),c="simple-search";exports.SimpleSearch=class{static#t=null;static#e;static get config(){if(this.#e)return this.#e;const t={...s};return typeof navigator<"u"&&navigator?.storage?.getDirectory instanceof Function?t.storage=new e.BrowserStorage(c):t.storage=new n.NodeStorage(c),this.#e=t}static configure(e){this.#e={...this.config,...e},this.#t&&(this.#t=new t.SearchEngine(this.config))}static async startBatch(){this.#n().startBatch()}static async endBatch(){return this.#n().endBatch()}static async addDocument(t){return this.#n().addDocument(t)}static async addDocumentIfMissing(t){return this.#n().addDocumentIfMissing(t)}static async addDocuments(t){return this.#n().addDocuments(t)}static async addDocumentsIfMissing(t){return this.#n().addDocumentsIfMissing(t)}static async search(t,e){return this.#n().search(t,e)}static async removeDocument(t){return this.#n().removeDocument(t)}static async clearAll(){return this.#n().clearAll()}static async getStatus(){return this.#n().getStatus()}static async hasDocument(t){return this.#n().hasDocument(t)}static#n(){return this.#t\|\|(this.#t=new t.SearchEngine(this.config)),this.#t}};

package/lib/simple.d.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 import * as ___type from './type';
-import { ISearchEngineConfig, IDocument, IDocumentBase } from './type';
+import { ISearchEngineOption, IDocument, IDocumentBase } from './type';
 /**
  * 快速使用封装
@@ -7,27 +7,21 @@ import { ISearchEngineConfig, IDocument, IDocumentBase } from './type';
  */
 declare class SimpleSearch {
     #private;
+    static get config(): ISearchEngineOption;
     /**
      * 配置并初始化单例
      */
-    static configure(config: Partial<ISearchEngineConfig>): void;
+    static configure(config: Partial<ISearchEngineOption>): void;
     static startBatch(): Promise<void>;
     static endBatch(): Promise<void>;
     static addDocument<T extends IDocument = IDocument>(doc: T): Promise<void>;
     static addDocumentIfMissing<T extends IDocument = IDocument>(doc: T): Promise<void>;
     static addDocuments<T extends IDocument = IDocument>(docs: T[]): Promise<void>;
     static addDocumentsIfMissing<T extends IDocument = IDocument>(docs: T[]): Promise<void>;
-    static search<T extends IDocumentBase = IDocumentBase>(query: T | string, limit?: number): Promise<___type.IResult[]>;
+    static search<T extends IDocumentBase = any>(query: T | string, limit?: number): Promise<___type.IResult[]>;
     static removeDocument(id: number): Promise<void>;
     static clearAll(): Promise<void>;
-    static getStatus(): Promise<{
-        wordSegments: number;
-        charSegments: number;
-        deleted: number;
-        wordCacheSize: number;
-        charCacheSize: number;
-        inBatch: boolean;
-    }>;
+    static getStatus(): Promise<___type.ISearchEngineStatus>;
     /**
      * 检查文档ID是否曾经添加过（包括已删除的）
      * @param id 文档ID

package/lib/simple.js CHANGED Viewed

	@@ -1 +1 @@
1	- import{SearchEngine as t}from'./core';~~class~~ s~~{static#t=null~~;~~static#s~~={~~baseDir:"simple_search_data",~~wordSegmentTokenThreshold:1e5,minWordTokenSave:0};static ~~configure~~(s){const n={...~~this.#s,...s~~};~~this.#~~t=new t(n)}static#n(){~~return~~ this.#t\|\|(this.#t=new t(this~~.#s~~))~~,this.#t~~}static async startBatch(){this.#n().startBatch()}static async endBatch(){return this.#n().endBatch()}static async addDocument(t){return this.#n().addDocument(t)}static async addDocumentIfMissing(t){return this.#n().addDocumentIfMissing(t)}static async addDocuments(t){return this.#n().addDocuments(t)}static async addDocumentsIfMissing(t){return this.#n().addDocumentsIfMissing(t)}static async search(t,s){return this.#n().search(t,s)}static async removeDocument(t){return this.#n().removeDocument(t)}static async clearAll(){return this.#n().clearAll()}static async getStatus(){return this.#n().getStatus()}static async hasDocument(t){return this.#n().hasDocument(t)}}export{s as SimpleSearch};
1	+ import{SearchEngine as t}from'./core';import{BrowserStorage as n}from'./browser';import{NodeStorage as s}from'./node';const e=Object.freeze({wordSegmentTokenThreshold:1e5,minWordTokenSave:0}),c="simple-search";class a{static#t=null;static#n;static get config(){if(this.#n)return this.#n;const t={...e};return typeof navigator<"u"&&navigator?.storage?.getDirectory instanceof Function?t.storage=new n(c):t.storage=new s(c),this.#n=t}static configure(n){this.#n={...this.config,...n},this.#t&&(this.#t=new t(this.config))}static async startBatch(){this.#s().startBatch()}static async endBatch(){return this.#s().endBatch()}static async addDocument(t){return this.#s().addDocument(t)}static async addDocumentIfMissing(t){return this.#s().addDocumentIfMissing(t)}static async addDocuments(t){return this.#s().addDocuments(t)}static async addDocumentsIfMissing(t){return this.#s().addDocumentsIfMissing(t)}static async search(t,n){return this.#s().search(t,n)}static async removeDocument(t){return this.#s().removeDocument(t)}static async clearAll(){return this.#s().clearAll()}static async getStatus(){return this.#s().getStatus()}static async hasDocument(t){return this.#s().hasDocument(t)}static#s(){return this.#t\|\|(this.#t=new t(this.config)),this.#t}}export{a as SimpleSearch};

package/lib/type.d.ts CHANGED Viewed

@@ -66,12 +66,9 @@ interface IStorage {
  */
 type IndexType = 'word' | 'char';
-interface ISearchEngineConfig {
-    /**
-     * 数据存储的基础目录 (必填)
-     * 用于区分不同的搜索引擎实例
-     */
-    baseDir: string;
+type IndexingTokenizer = <T extends IDocument = IDocument>(doc: T) => string[];
+type SearchTokenizer = <T extends IDocumentBase = IDocumentBase>(doc: T) => string[];
+interface ISearchEngineOption {
     /**
      * 存储实现配置 (可选)
      * - 'browser': 强制使用 OPFS (BrowserStorage)
@@ -79,7 +76,7 @@ interface ISearchEngineConfig {
      * - IStorage: 传入自定义的存储实例
      * - undefined: 自动检测环境
      */
-    storage?: 'browser' | 'node' | IStorage;
+    storage: IStorage;
     /**
      * 索引时使用的分词器 (算法核心配置)
      * - 作用: 将文档文本转换为索引用的token序列
@@ -87,7 +84,7 @@ interface ISearchEngineConfig {
      * - 建议: 针对不同语言(中文/英文/日文等)使用专门的分词实现
      * - 影响: 直接决定索引的粒度和搜索的准确性
      */
-    indexingTokenizer?: <T extends IDocument = IDocument>(doc: T) => string[];
+    indexingTokenizer?: IndexingTokenizer;
     /**
      * 搜索时使用的分词器 (算法核心配置)
      * - 作用: 将查询文本转换为搜索用的token序列
@@ -95,7 +92,7 @@ interface ISearchEngineConfig {
      * - 建议: 与indexingTokenizer保持一致的分词策略以确保搜索准确性
      * - 影响: 直接决定搜索匹配的范围和结果的相关性
      */
-    searchTokenizer?: <T extends IDocumentBase = IDocumentBase>(doc: T) => string[];
+    searchTokenizer?: SearchTokenizer;
     /**
      * 词索引分段阈值 (Token数) - 分段算法配置
      * - 作用: 控制词索引文件的大小，超过阈值时创建新的索引段
@@ -130,6 +127,52 @@ interface ISearchEngineConfig {
     minCharTokenSave?: number;
 }
+interface ISearchEngineStatus {
+    wordSegments: number;
+    charSegments: number;
+    deleted: number;
+    wordCacheSize: number;
+    charCacheSize: number;
+    inBatch: boolean;
+}
+/**
+ * 核心搜索引擎
+ */
+interface ISearchEngine {
+    /**
+     * 开启批处理
+     * 批处理期间 addDocuments 只写入缓存，不触发索引段构建
+     */
+    startBatch(): void;
+    /**
+     * 结束批处理
+     * 触发索引构建检查并保存元数据
+     */
+    endBatch(): Promise<void>;
+    addDocument<T extends IDocument = IDocument>(doc: T): Promise<void>;
+    /**
+     * 添加单个文档，如果文档ID已存在则跳过
+     * 用于在批量添加中途出错后的恢复添加行为，也可直接用于单个文档添加
+     */
+    addDocumentIfMissing<T extends IDocument = IDocument>(doc: T): Promise<void>;
+    /**
+     * 添加多个文档，跳过已存在的文档ID
+     * 用于在批量添加中途出错后的恢复添加行为，也可直接用于批量添加
+     */
+    addDocumentsIfMissing<T extends IDocument = IDocument>(docs: T[]): Promise<void>;
+    addDocuments<T extends IDocument = IDocument>(docs: T[]): Promise<void>;
+    search<T extends IDocumentBase | string = any>(query: string, limit?: number): Promise<IResult[]>;
+    removeDocument(id: number): Promise<void>;
+    clearAll(): Promise<void>;
+    getStatus(): Promise<ISearchEngineStatus>;
+    /**
+     * 检查文档ID是否曾经添加过（包括已删除的）
+     * @param id 文档ID
+     * @returns 文档是否曾经添加过的布尔值
+     */
+    hasDocument(id: number): Promise<boolean>;
+}
-export type { IDocument, IDocumentBase, IIndexMeta, IResult, ISearchEngineConfig, ISegmentMeta, IStorage, ITokenizedDoc, IndexType };
+export type { IDocument, IDocumentBase, IIndexMeta, IResult, ISearchEngine, ISearchEngineOption, ISearchEngineStatus, ISegmentMeta, IStorage, ITokenizedDoc, IndexType, IndexingTokenizer, SearchTokenizer };

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "gs-search",
-  "version": "0.1.4",
+  "version": "0.1.5",
   "type": "module",
   "main": "lib/index.cjs",
   "module": "lib/index.js",