gs-search 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.ja.md CHANGED
@@ -134,7 +134,7 @@ const engine = new SearchEngine({
134
134
 
135
135
  ### SearchEngine
136
136
 
137
- - `constructor(options: ISearchEngineConfig)`: 新しいコアエンジンインスタンスを作成
137
+ - `constructor(options: ISearchEngineOption)`: 新しいコアエンジンインスタンスを作成
138
138
  - `init(): Promise<void>`: エンジンを初期化
139
139
  - `addDocument(doc: IDocument): Promise<void>`: 単一ドキュメントを追加
140
140
  - `addDocuments(docs: IDocument[]): Promise<void>`: 複数ドキュメントを追加
package/README.ko.md CHANGED
@@ -134,7 +134,7 @@ const engine = new SearchEngine({
134
134
 
135
135
  ### SearchEngine
136
136
 
137
- - `constructor(options: ISearchEngineConfig)`: 새로운 코어 엔진 인스턴스 생성
137
+ - `constructor(options: ISearchEngineOption)`: 새로운 코어 엔진 인스턴스 생성
138
138
  - `init(): Promise<void>`: 엔진 초기화
139
139
  - `addDocument(doc: IDocument): Promise<void>`: 단일 문서 추가
140
140
  - `addDocuments(docs: IDocument[]): Promise<void>`: 다중 문서 추가
package/README.md CHANGED
@@ -116,7 +116,7 @@ const customTokenizer = (text: string): string[] => {
116
116
 
117
117
  // Create engine with custom tokenizers
118
118
  const engine = new SearchEngine({
119
- baseDir: 'search-data',
119
+ storage: new BrowserStorage('search-data'),
120
120
  indexingTokenizer: customTokenizer,
121
121
  searchTokenizer: customTokenizer
122
122
  });
@@ -127,7 +127,7 @@ const engine = new SearchEngine({
127
127
  ### SimpleSearch
128
128
 
129
129
  **Static Methods (No instance creation required):**
130
- - `configure(config: Partial<ISearchEngineConfig>): void`: Configure the search engine
130
+ - `configure(config: Partial<ISearchEngineOption>): void`: Configure the search engine
131
131
  - `addDocument(doc: IDocument): Promise<void>`: Add a single document
132
132
  - `addDocuments(docs: IDocument[]): Promise<void>`: Add multiple documents
133
133
  - `addDocumentIfMissing(doc: IDocument): Promise<void>`: Add a single document if it doesn't exist
@@ -141,7 +141,7 @@ const engine = new SearchEngine({
141
141
 
142
142
  ### SearchEngine
143
143
 
144
- - `constructor(options: ISearchEngineConfig)`: Create a new core engine instance
144
+ - `constructor(options: ISearchEngineOption)`: Create a new core engine instance
145
145
  - `init(): Promise<void>`: Initialize the engine
146
146
  - `addDocument(doc: IDocument): Promise<void>`: Add a single document
147
147
  - `addDocuments(docs: IDocument[]): Promise<void>`: Add multiple documents
package/README.zh-CN.md CHANGED
@@ -225,7 +225,7 @@ SimpleSearch.configure({
225
225
  ### SimpleSearch
226
226
 
227
227
  **静态方法(无需实例创建):**
228
- - `configure(config: Partial<ISearchEngineConfig>): void`: 配置搜索引擎
228
+ - `configure(config: Partial<ISearchEngineOption>): void`: 配置搜索引擎
229
229
  - `addDocument(doc: IDocument): Promise<void>`: 添加单个文档
230
230
  - `addDocuments(docs: IDocument[]): Promise<void>`: 添加多个文档
231
231
  - `addDocumentIfMissing(doc: IDocument): Promise<void>`: 如果文档不存在则添加单个文档
@@ -239,7 +239,7 @@ SimpleSearch.configure({
239
239
 
240
240
  ### SearchEngine
241
241
 
242
- - `constructor(options: ISearchEngineConfig)`: 创建一个新的核心引擎实例
242
+ - `constructor(options: ISearchEngineOption)`: 创建一个新的核心引擎实例
243
243
  - `init(): Promise<void>`: 初始化引擎
244
244
  - `addDocument(doc: IDocument): Promise<void>`: 添加单个文档
245
245
  - `addDocuments(docs: IDocument[]): Promise<void>`: 添加多个文档
package/lib/core.cjs CHANGED
@@ -1 +1 @@
1
- "use strict";var e=require('./browser'),t=require('./node');const s="search_meta.json",n="deleted_ids.bin",i="added_ids.bin";class o{#e;#t={wordSegments:[],charSegments:[]};#s=new Set;#n=new Set;constructor(e){this.#e=e}async load(){const e=await this.#e.read(s);if(e){const t=(new TextDecoder).decode(e);this.#t=JSON.parse(t)}else this.#t={wordSegments:[],charSegments:[]};const t=await this.#e.read(n);if(t){const e=new DataView(t);let s=0;const n=t.byteLength;for(;s<n&&!(s+4>n);){const t=e.getUint32(s,!0);this.#s.add(t),s+=4,s<n&&30===e.getUint8(s)&&(s+=1)}}const o=await this.#e.read(i);if(o){const e=new DataView(o);let t=0;const s=o.byteLength;for(;t<s&&!(t+4>s);){const n=e.getUint32(t,!0);this.#n.add(n),t+=4,t<s&&30===e.getUint8(t)&&(t+=1)}}}async save(){const e=JSON.stringify(this.#t);if(await this.#e.write(s,(new TextEncoder).encode(e).buffer),0===this.#s.size)await this.#e.remove(n);else{const e=4*this.#s.size+this.#s.size,t=new ArrayBuffer(e),s=new DataView(t);let i=0;for(const e of this.#s)s.setUint32(i,e,!0),i+=4,s.setUint8(i,30),i+=1;await this.#e.write(n,t)}if(0===this.#n.size)await this.#e.remove(i);else{const e=4*this.#n.size+this.#n.size,t=new ArrayBuffer(e),s=new DataView(t);let n=0;for(const e of this.#n)s.setUint32(n,e,!0),n+=4,s.setUint8(n,30),n+=1;await this.#e.write(i,t)}}getSegments(e){return"word"===e?this.#t.wordSegments:this.#t.charSegments}getDeletedIds(){return this.#s}addDeletedId(e){this.#s.add(e)}isDeleted(e){return this.#s.has(e)}addAddedId(e){this.#n.add(e)}removeAddedId(e){this.#n.delete(e)}isAdded(e){return this.#n.has(e)}getAddedIds(){return this.#n}hasDocument(e){return this.#n.has(e)||this.#s.has(e)}getLastSegmentInfo(e){const t=this.getSegments(e);return 0===t.length?null:t[t.length-1]}updateSegment(e,t,s,n,i,o){const a="word"===e?this.#t.wordSegments:this.#t.charSegments;if(o)a.push({filename:t,start:s,end:n,tokenCount:i});else{const e=a[a.length-1];e&&e.filename===t&&(e.end=n,e.tokenCount=i)}}reset(){this.#t={wordSegments:[],charSegments:[]},this.#s.clear(),this.#n.clear()}}class a{static SEPARATOR=30;#e;constructor(e){this.#e=e}async appendBatch(e,t){if(0===t.length)return await this.#e.getFileSize(e);const s=new TextEncoder;let n=0;for(const e of t){n+=8;for(const t of e.tokens){n+=2+Math.min(s.encode(t).byteLength,65535)}n+=1}const i=new Uint8Array(n);let o=0;for(const e of t){const t=[];for(const n of e.tokens){const e=s.encode(n),i=e.byteLength>65535?e.slice(0,65535):e;t.push(i)}const n=new DataView(i.buffer,o);n.setUint32(0,e.id,!0),n.setUint32(4,t.length,!0),o+=8;for(const e of t)new DataView(i.buffer,o).setUint16(0,e.byteLength,!0),o+=2,i.set(e,o),o+=e.byteLength;i[o++]=a.SEPARATOR}return await this.#e.append(e,i.buffer),await this.#e.getFileSize(e)}async readRange(e,t,s){const n=await this.#e.readRange(e,t,s);if(!n||0===n.byteLength)return[];const i=new DataView(n),o=new Uint8Array(n),r=new TextDecoder,h=[];let d=0;const c=n.byteLength;for(;d<c&&!(d+8>c);){const e=i.getUint32(d,!0);d+=4;const t=i.getUint32(d,!0);d+=4;const s=[];for(let e=0;e<t&&!(d+2>c);e++){const e=i.getUint16(d,!0);if(d+=2,d+e>c)break;const t=new Uint8Array(n,d,e);s.push(r.decode(t)),d+=e}d<c&&o[d]===a.SEPARATOR&&(d+=1),h.push({id:e,tokens:s})}return h}async getCurrentSize(e){return await this.#e.getFileSize(e)}}function r(e,t=305419896){const s=e.length,n=s>>2;let i=0;for(;i<n;){let s=255&e.charCodeAt(i)|(255&e.charCodeAt(++i))<<8|(255&e.charCodeAt(++i))<<16|(255&e.charCodeAt(++i))<<24;++i,s=3432918353*(65535&s)+((3432918353*(s>>>16)&65535)<<16)&4294967295,s=s<<15|s>>>17,s=461845907*(65535&s)+((461845907*(s>>>16)&65535)<<16)&4294967295,t=27492+(65535&(t=5*(65535&(t=(t^=s)<<13|t>>>19))+((5*(t>>>16)&65535)<<16)&4294967295))+(((t>>>16)+58964&65535)<<16)}let o=0;const a=3&s;return a>0&&(a>=3&&(o^=(255&e.charCodeAt(i+2))<<16),a>=2&&(o^=(255&e.charCodeAt(i+1))<<8),a>=1&&(o^=255&e.charCodeAt(i)),o=3432918353*(65535&o)+((3432918353*(o>>>16)&65535)<<16)&4294967295,o=o<<15|o>>>17,o=461845907*(65535&o)+((461845907*(o>>>16)&65535)<<16)&4294967295,t^=o),t^=s,t=2246822507*(65535&(t^=t>>>16))+((2246822507*(t>>>16)&65535)<<16)&4294967295,t=3266489909*(65535&(t^=t>>>13))+((3266489909*(t>>>16)&65535)<<16)&4294967295,(t^=t>>>16)>>>0}class h{#i;#e;#o=null;#a=null;static hash(e){return r(e)}constructor(e,t){this.#i=e,this.#e=t}async loadIndex(){return!!this.#o||(this.#o=await this.#e.read(this.#i),!!this.#o&&(this.#a=new DataView(this.#o),!0))}async buildAndSave(e){const t=new Map;for(const s of e){const e=new Map;for(const n of s.tokens)e.has(n)||(e.set(n,!0),t.has(n)||t.set(n,{hash:h.hash(n),postings:[]}),t.get(n).postings.push(s.id))}const s=Array.from(t.entries());s.sort(([e,{hash:t}],[s,{hash:n}])=>t!==n?t-n:e.localeCompare(s));let n=0,i=0;for(const[e,{postings:t}]of s)n+=t.length,i+=e.length+1;const o=20*s.length,a=12+o+4*n,r=new ArrayBuffer(a+i),d=new DataView(r);d.setUint32(0,1229866072),d.setUint32(4,s.length),d.setUint32(8,a);let c=12,g=12+o,f=a;for(const[e,{hash:t,postings:n}]of s){d.setUint32(c,t),d.setUint32(c+4,e.length),d.setUint32(c+8,f),d.setUint32(c+12,g),d.setUint32(c+16,n.length),c+=20;for(let e=0;e<n.length;e++)d.setUint32(g,n[e],!0),g+=4;const s=(new TextEncoder).encode(e);for(let e=0;e<s.length;e++)d.setUint8(f++,s[e]);d.setUint8(f++,0)}await this.#e.write(this.#i,r),this.#o=r,this.#a=d}search(e){if(!this.#a||!this.#o)return[];const t=h.hash(e),s=this.#a.getUint32(4);let n=0,i=s-1;const o=12,a=20,r=new TextDecoder;for(;n<=i;){const h=n+i>>>1,d=o+h*a,c=this.#a.getUint32(d);if(c<t)n=h+1;else{if(!(c>t)){if(!(h>0&&this.#a.getUint32(o+(h-1)*a)===t||h<s-1&&this.#a.getUint32(o+(h+1)*a)===t)){const e=this.#a.getUint32(o+h*a+12),t=this.#a.getUint32(o+h*a+16),s=[];for(let n=0;n<t;n++)s.push(this.#a.getUint32(e+4*n,!0));return s}let n=h;for(;n>0;){const e=o+(n-1)*a;if(this.#a.getUint32(e)!==t)break;n--}for(let i=n;i<s;i++){const s=o+i*a;if(this.#a.getUint32(s)!==t)break;const n=this.#a.getUint32(s+4),h=this.#a.getUint32(s+8),d=new Uint8Array(this.#o,h,n);if(r.decode(d)===e){const e=this.#a.getUint32(s+12),t=this.#a.getUint32(s+16),n=[];for(let s=0;s<t;s++)n.push(this.#a.getUint32(e+4*s,!0));return n}}return[]}i=h-1}}return[]}}const d="word_cache.bin",c="char_cache.bin";exports.SearchEngine=class{#e;#t;#r;#h;#d=!1;#c;#g=!1;#f={word:0,char:0};constructor(s){if(!s.baseDir)throw new Error("SearchEngine requires 'baseDir' in config.");if(this.#c={wordSegmentTokenThreshold:1e5,charSegmentTokenThreshold:5e5,minWordTokenSave:0,minCharTokenSave:0,...s},(this.#c.minWordTokenSave||0)>=(this.#c.wordSegmentTokenThreshold||1e5))throw new Error("minWordTokenSave must be less than wordSegmentTokenThreshold");if((this.#c.minCharTokenSave||0)>=(this.#c.charSegmentTokenThreshold||5e5))throw new Error("minCharTokenSave must be less than charSegmentTokenThreshold");let n=null;if(this.#c.storage&&("object"==typeof this.#c.storage?n=this.#c.storage:"browser"===this.#c.storage?n=new e.BrowserStorage(this.#c.baseDir):"node"===this.#c.storage&&(n=new t.NodeStorage(this.#c.baseDir))),!n){const s=typeof navigator<"u"&&navigator?.storage?.getDirectory instanceof Function,i=typeof process<"u"&&null!=process.versions&&null!=process.versions.node;s?n=new e.BrowserStorage(this.#c.baseDir):i&&(n=new t.NodeStorage(this.#c.baseDir))}if(!n)throw new Error('Storage initialization failed. Please configure "storage" explicitly or ensure you are in a supported environment (Browser/Node).');this.#e=n,this.#t=new o(this.#e),this.#r=new a(this.#e),this.#h=new Map}async init(){if(this.#d)return;await this.#t.load();const e=[...this.#t.getSegments("word"),...this.#t.getSegments("char")];for(const t of e)this.#h.has(t.filename)||this.#h.set(t.filename,new h(t.filename,this.#e)),await this.#h.get(t.filename).loadIndex();this.#d=!0}startBatch(){this.#g=!0,this.#f={word:0,char:0}}async endBatch(){this.#g=!1,this.#f.word>0&&await this.#l("word",this.#f.word),this.#f.char>0&&await this.#l("char",this.#f.char),this.#f={word:0,char:0},await this.#t.save()}#w(e){try{if(typeof Intl<"u"&&"function"==typeof Intl.Segmenter&&"function"==typeof Array.from){const t=new Intl.Segmenter([],{granularity:"word"}).segment(e);if("object"==typeof t&&null!==t)return Array.from(t).filter(e=>e?.isWordLike).map(e=>e?.segment?.toLowerCase()||"")}}catch{}return e.toLowerCase().split(/[^a-z0-9\u4e00-\u9fa5]+/g).filter(e=>e.length>0)}#m(e){return this.#c.indexingTokenizer?this.#c.indexingTokenizer(e):this.#w(e.text)}#u(e){return this.#c.searchTokenizer?this.#c.searchTokenizer(e):this.#m(e)}async addDocument(e){return this.addDocuments([e])}async addDocumentIfMissing(e){return this.addDocumentsIfMissing([e])}async addDocumentsIfMissing(e){if(this.#d||await this.init(),0===e.length)return;const t=this.#t.getDeletedIds(),s=[],n=[],i=[];for(const o of e){if(t.has(o.id)||this.#t.isAdded(o.id))continue;const e=this.#m(o),a=[],r=[];for(const t of e)t.length>1?a.push(t):1===t.length&&r.push(t);a.length>0&&s.push({id:o.id,tokens:a}),r.length>0&&n.push({id:o.id,tokens:r}),i.push(o)}if(0===i.length)return;let o=0,a=0;if(s.length>0){await this.#r.appendBatch(d,s);for(const e of s)o+=e.tokens.length}if(n.length>0){await this.#r.appendBatch(c,n);for(const e of n)a+=e.tokens.length}for(const e of i)this.#t.addAddedId(e.id);this.#g?(this.#f.word+=o,this.#f.char+=a):(o>0&&await this.#l("word",o),a>0&&await this.#l("char",a),await this.#t.save())}async addDocuments(e){if(this.#d||await this.init(),0===e.length)return;const t=this.#t.getDeletedIds(),s=[],n=[];for(const i of e){if(t.has(i.id))throw new Error(`Document ID ${i.id} has been deleted and cannot be re-added.`);if(this.#t.isAdded(i.id))throw new Error(`Document ID ${i.id} already exists.`);const e=this.#m(i),o=[],a=[];for(const t of e)t.length>1?o.push(t):1===t.length&&a.push(t);o.length>0&&s.push({id:i.id,tokens:o}),a.length>0&&n.push({id:i.id,tokens:a})}let i=0,o=0;if(s.length>0){await this.#r.appendBatch(d,s);for(const e of s)i+=e.tokens.length}if(n.length>0){await this.#r.appendBatch(c,n);for(const e of n)o+=e.tokens.length}for(const t of e)this.#t.addAddedId(t.id);this.#g?(this.#f.word+=i,this.#f.char+=o):(i>0&&await this.#l("word",i),o>0&&await this.#l("char",o),await this.#t.save())}async#l(e,t){const s="word"===e?d:c,n=await this.#r.getCurrentSize(s),i="word"===e?this.#c.wordSegmentTokenThreshold||1e5:this.#c.charSegmentTokenThreshold||5e5,o="word"===e?this.#c.minWordTokenSave||0:this.#c.minCharTokenSave||0,a=this.#t.getLastSegmentInfo(e);let r,g,f,l;const w=()=>{const t=this.#t.getSegments(e).length+1;return`${e}_seg_${t}.bin`};if(a){const e=a.tokenCount;e>=i||e+t>=i?(r=w(),f=!0,g=a.end,l=t):(r=a.filename,f=!1,g=a.start,l=e+t)}else r=w(),f=!0,g=0,l=t;if(l<o)return void this.#t.updateSegment(e,r,g,n,l,f);const m=await this.#r.readRange(s,g,n);let u=this.#h.get(r);u||(u=new h(r,this.#e),this.#h.set(r,u)),await u.buildAndSave(m),this.#t.updateSegment(e,r,g,n,l,f)}async search(e,t){this.#d||await this.init();const s="string"==typeof e?{text:e}:e,n=this.#u(s),i=n.filter(e=>e.length>1),o=n.filter(e=>1===e.length),a=this.#t.getDeletedIds(),r=new Map,d=new Map,c=e=>{const t=this.#t.getSegments(e);for(const e of t){const t=e.filename;!this.#h.has(t)&&!d.has(t)&&d.set(t,new h(t,this.#e))}};c("word"),c("char"),await Promise.all(Array.from(d.entries()).map(([e,t])=>t.loadIndex().then(s=>{s&&this.#h.set(e,t)})));const g=async(e,t)=>{if(0===t.length)return;const s=this.#t.getSegments(e);for(const e of s){const s=e.filename,n=this.#h.get(s);if(n)for(const e of t){const t=n.search(e),s=1+.1*e.length;for(const n of t)if(!a.has(n))if(r.has(n)){const t=r.get(n);t.score+=s,t.tokens.add(e)}else r.set(n,{score:0,tokens:new Set([e])})}}};await g("word",i),await g("char",o);const f=[];return r.forEach((e,t)=>{f.push({id:t,score:e.score,tokens:Array.from(e.tokens)})}),f.sort((e,t)=>t.score-e.score),"number"==typeof t&&t>0?f.slice(0,t):f}async removeDocument(e){this.#d||await this.init(),this.#t.addDeletedId(e),this.#t.removeAddedId(e),await this.#t.save()}async clearAll(){await this.#e.clearAll(),this.#h.clear(),this.#t.reset(),this.#d=!1,this.#g=!1,this.#f={word:0,char:0}}async getStatus(){return this.#d||await this.init(),{wordSegments:this.#t.getSegments("word").length,charSegments:this.#t.getSegments("char").length,deleted:this.#t.getDeletedIds().size,wordCacheSize:await this.#r.getCurrentSize(d),charCacheSize:await this.#r.getCurrentSize(c),inBatch:this.#g}}async hasDocument(e){return this.#d||await this.init(),this.#t.hasDocument(e)}},exports.hash=r,exports.murmur3_32=r;
1
+ "use strict";const e="search_meta.json",t="deleted_ids.bin",s="added_ids.bin";class n{#e;#t={wordSegments:[],charSegments:[]};#s=new Set;#n=new Set;constructor(e){this.#e=e}async load(){const n=await this.#e.read(e);if(n){const e=(new TextDecoder).decode(n);this.#t=JSON.parse(e)}else this.#t={wordSegments:[],charSegments:[]};const i=await this.#e.read(t);if(i){const e=new DataView(i);let t=0;const s=i.byteLength;for(;t<s&&!(t+4>s);){const n=e.getUint32(t,!0);this.#s.add(n),t+=4,t<s&&30===e.getUint8(t)&&(t+=1)}}const a=await this.#e.read(s);if(a){const e=new DataView(a);let t=0;const s=a.byteLength;for(;t<s&&!(t+4>s);){const n=e.getUint32(t,!0);this.#n.add(n),t+=4,t<s&&30===e.getUint8(t)&&(t+=1)}}}async save(){const n=JSON.stringify(this.#t);if(await this.#e.write(e,(new TextEncoder).encode(n).buffer),0===this.#s.size)await this.#e.remove(t);else{const e=4*this.#s.size+this.#s.size,s=new ArrayBuffer(e),n=new DataView(s);let i=0;for(const e of this.#s)n.setUint32(i,e,!0),i+=4,n.setUint8(i,30),i+=1;await this.#e.write(t,s)}if(0===this.#n.size)await this.#e.remove(s);else{const e=4*this.#n.size+this.#n.size,t=new ArrayBuffer(e),n=new DataView(t);let i=0;for(const e of this.#n)n.setUint32(i,e,!0),i+=4,n.setUint8(i,30),i+=1;await this.#e.write(s,t)}}getSegments(e){return"word"===e?this.#t.wordSegments:this.#t.charSegments}getDeletedIds(){return this.#s}addDeletedId(e){this.#s.add(e)}isDeleted(e){return this.#s.has(e)}addAddedId(e){this.#n.add(e)}removeAddedId(e){this.#n.delete(e)}isAdded(e){return this.#n.has(e)}getAddedIds(){return this.#n}hasDocument(e){return this.#n.has(e)||this.#s.has(e)}getLastSegmentInfo(e){const t=this.getSegments(e);return 0===t.length?null:t[t.length-1]}updateSegment(e,t,s,n,i,a){const o="word"===e?this.#t.wordSegments:this.#t.charSegments;if(a)o.push({filename:t,start:s,end:n,tokenCount:i});else{const e=o[o.length-1];e&&e.filename===t&&(e.end=n,e.tokenCount=i)}}reset(){this.#t={wordSegments:[],charSegments:[]},this.#s.clear(),this.#n.clear()}}class i{static SEPARATOR=30;#e;constructor(e){this.#e=e}async appendBatch(e,t){if(0===t.length)return await this.#e.getFileSize(e);const s=new TextEncoder;let n=0;for(const e of t){n+=8;for(const t of e.tokens){n+=2+Math.min(s.encode(t).byteLength,65535)}n+=1}const a=new Uint8Array(n);let o=0;for(const e of t){const t=[];for(const n of e.tokens){const e=s.encode(n),i=e.byteLength>65535?e.slice(0,65535):e;t.push(i)}const n=new DataView(a.buffer,o);n.setUint32(0,e.id,!0),n.setUint32(4,t.length,!0),o+=8;for(const e of t)new DataView(a.buffer,o).setUint16(0,e.byteLength,!0),o+=2,a.set(e,o),o+=e.byteLength;a[o++]=i.SEPARATOR}return await this.#e.append(e,a.buffer),await this.#e.getFileSize(e)}async readRange(e,t,s){const n=await this.#e.readRange(e,t,s);if(!n||0===n.byteLength)return[];const a=new DataView(n),o=new Uint8Array(n),r=new TextDecoder,h=[];let d=0;const c=n.byteLength;for(;d<c&&!(d+8>c);){const e=a.getUint32(d,!0);d+=4;const t=a.getUint32(d,!0);d+=4;const s=[];for(let e=0;e<t&&!(d+2>c);e++){const e=a.getUint16(d,!0);if(d+=2,d+e>c)break;const t=new Uint8Array(n,d,e);s.push(r.decode(t)),d+=e}d<c&&o[d]===i.SEPARATOR&&(d+=1),h.push({id:e,tokens:s})}return h}async getCurrentSize(e){return await this.#e.getFileSize(e)}}function a(e,t=305419896){const s=e.length,n=s>>2;let i=0;for(;i<n;){let s=255&e.charCodeAt(i)|(255&e.charCodeAt(++i))<<8|(255&e.charCodeAt(++i))<<16|(255&e.charCodeAt(++i))<<24;++i,s=3432918353*(65535&s)+((3432918353*(s>>>16)&65535)<<16)&4294967295,s=s<<15|s>>>17,s=461845907*(65535&s)+((461845907*(s>>>16)&65535)<<16)&4294967295,t=27492+(65535&(t=5*(65535&(t=(t^=s)<<13|t>>>19))+((5*(t>>>16)&65535)<<16)&4294967295))+(((t>>>16)+58964&65535)<<16)}let a=0;const o=3&s;return o>0&&(o>=3&&(a^=(255&e.charCodeAt(i+2))<<16),o>=2&&(a^=(255&e.charCodeAt(i+1))<<8),o>=1&&(a^=255&e.charCodeAt(i)),a=3432918353*(65535&a)+((3432918353*(a>>>16)&65535)<<16)&4294967295,a=a<<15|a>>>17,a=461845907*(65535&a)+((461845907*(a>>>16)&65535)<<16)&4294967295,t^=a),t^=s,t=2246822507*(65535&(t^=t>>>16))+((2246822507*(t>>>16)&65535)<<16)&4294967295,t=3266489909*(65535&(t^=t>>>13))+((3266489909*(t>>>16)&65535)<<16)&4294967295,(t^=t>>>16)>>>0}class o{#i;#e;#a=null;#o=null;static hash(e){return a(e)}constructor(e,t){this.#i=e,this.#e=t}async loadIndex(){return!!this.#a||(this.#a=await this.#e.read(this.#i),!!this.#a&&(this.#o=new DataView(this.#a),!0))}async buildAndSave(e){const t=new Map;for(const s of e){const e=new Map;for(const n of s.tokens)e.has(n)||(e.set(n,!0),t.has(n)||t.set(n,{hash:o.hash(n),postings:[]}),t.get(n).postings.push(s.id))}const s=Array.from(t.entries());s.sort(([e,{hash:t}],[s,{hash:n}])=>t!==n?t-n:e.localeCompare(s));let n=0,i=0;for(const[e,{postings:t}]of s)n+=t.length,i+=e.length+1;const a=20*s.length,r=12+a+4*n,h=new ArrayBuffer(r+i),d=new DataView(h);d.setUint32(0,1229866072),d.setUint32(4,s.length),d.setUint32(8,r);let c=12,g=12+a,f=r;for(const[e,{hash:t,postings:n}]of s){d.setUint32(c,t),d.setUint32(c+4,e.length),d.setUint32(c+8,f),d.setUint32(c+12,g),d.setUint32(c+16,n.length),c+=20;for(let e=0;e<n.length;e++)d.setUint32(g,n[e],!0),g+=4;const s=(new TextEncoder).encode(e);for(let e=0;e<s.length;e++)d.setUint8(f++,s[e]);d.setUint8(f++,0)}await this.#e.write(this.#i,h),this.#a=h,this.#o=d}search(e){if(!this.#o||!this.#a)return[];const t=o.hash(e),s=this.#o.getUint32(4);let n=0,i=s-1;const a=12,r=20,h=new TextDecoder;for(;n<=i;){const o=n+i>>>1,d=a+o*r,c=this.#o.getUint32(d);if(c<t)n=o+1;else{if(!(c>t)){if(!(o>0&&this.#o.getUint32(a+(o-1)*r)===t||o<s-1&&this.#o.getUint32(a+(o+1)*r)===t)){const e=this.#o.getUint32(a+o*r+12),t=this.#o.getUint32(a+o*r+16),s=[];for(let n=0;n<t;n++)s.push(this.#o.getUint32(e+4*n,!0));return s}let n=o;for(;n>0;){const e=a+(n-1)*r;if(this.#o.getUint32(e)!==t)break;n--}for(let i=n;i<s;i++){const s=a+i*r;if(this.#o.getUint32(s)!==t)break;const n=this.#o.getUint32(s+4),o=this.#o.getUint32(s+8),d=new Uint8Array(this.#a,o,n);if(h.decode(d)===e){const e=this.#o.getUint32(s+12),t=this.#o.getUint32(s+16),n=[];for(let s=0;s<t;s++)n.push(this.#o.getUint32(e+4*s,!0));return n}}return[]}i=o-1}}return[]}}const r=({text:e})=>{try{if(typeof Intl<"u"&&"function"==typeof Intl.Segmenter&&"function"==typeof Array.from){const t=new Intl.Segmenter([],{granularity:"word"}).segment(e);if("object"==typeof t&&null!==t)return Array.from(t).filter(e=>e?.isWordLike).map(e=>e?.segment?.toLowerCase()||"")}}catch{}return e.toLowerCase().split(/[^a-z0-9\u4e00-\u9fa5]+/g).filter(e=>e.length>0)},h="word_cache.bin",d="char_cache.bin";exports.SearchEngine=class{#e;#t;#r;#h;#d=!1;#c;#g=!1;#f={word:0,char:0};constructor(e){if(this.#c={wordSegmentTokenThreshold:1e5,charSegmentTokenThreshold:5e5,minWordTokenSave:0,minCharTokenSave:0,indexingTokenizer:e.indexingTokenizer||r,...e},(this.#c.minWordTokenSave||0)>=(this.#c.wordSegmentTokenThreshold||1e5))throw new Error("minWordTokenSave must be less than wordSegmentTokenThreshold");if((this.#c.minCharTokenSave||0)>=(this.#c.charSegmentTokenThreshold||5e5))throw new Error("minCharTokenSave must be less than charSegmentTokenThreshold");this.#e=e.storage,this.#t=new n(this.#e),this.#r=new i(this.#e),this.#h=new Map}startBatch(){this.#g=!0,this.#f={word:0,char:0}}async endBatch(){this.#g=!1,this.#f.word>0&&await this.#l("word",this.#f.word),this.#f.char>0&&await this.#l("char",this.#f.char),this.#f={word:0,char:0},await this.#t.save()}async addDocument(e){return this.addDocuments([e])}async addDocumentIfMissing(e){return this.addDocumentsIfMissing([e])}async addDocumentsIfMissing(e){if(this.#d||await this.#w(),0===e.length)return;const t=this.#t.getDeletedIds(),s=[],n=[],i=[];for(const a of e){if(t.has(a.id)||this.#t.isAdded(a.id))continue;const e=this.#m(a),o=[],r=[];for(const t of e)t.length>1?o.push(t):1===t.length&&r.push(t);o.length>0&&s.push({id:a.id,tokens:o}),r.length>0&&n.push({id:a.id,tokens:r}),i.push(a)}if(0===i.length)return;let a=0,o=0;if(s.length>0){await this.#r.appendBatch(h,s);for(const e of s)a+=e.tokens.length}if(n.length>0){await this.#r.appendBatch(d,n);for(const e of n)o+=e.tokens.length}for(const e of i)this.#t.addAddedId(e.id);this.#g?(this.#f.word+=a,this.#f.char+=o):(a>0&&await this.#l("word",a),o>0&&await this.#l("char",o),await this.#t.save())}async addDocuments(e){if(this.#d||await this.#w(),0===e.length)return;const t=this.#t.getDeletedIds(),s=[],n=[];for(const i of e){if(t.has(i.id))throw new Error(`Document ID ${i.id} has been deleted and cannot be re-added.`);if(this.#t.isAdded(i.id))throw new Error(`Document ID ${i.id} already exists.`);const e=this.#m(i),a=[],o=[];for(const t of e)t.length>1?a.push(t):1===t.length&&o.push(t);a.length>0&&s.push({id:i.id,tokens:a}),o.length>0&&n.push({id:i.id,tokens:o})}let i=0,a=0;if(s.length>0){await this.#r.appendBatch(h,s);for(const e of s)i+=e.tokens.length}if(n.length>0){await this.#r.appendBatch(d,n);for(const e of n)a+=e.tokens.length}for(const t of e)this.#t.addAddedId(t.id);this.#g?(this.#f.word+=i,this.#f.char+=a):(i>0&&await this.#l("word",i),a>0&&await this.#l("char",a),await this.#t.save())}async search(e,t){this.#d||await this.#w();const s="string"==typeof e?{text:e}:e,n=this.#u(s),i=n.filter(e=>e.length>1),a=n.filter(e=>1===e.length),r=this.#t.getDeletedIds(),h=new Map,d=new Map,c=e=>{const t=this.#t.getSegments(e);for(const e of t){const t=e.filename;!this.#h.has(t)&&!d.has(t)&&d.set(t,new o(t,this.#e))}};c("word"),c("char"),await Promise.all(Array.from(d.entries()).map(([e,t])=>t.loadIndex().then(s=>{s&&this.#h.set(e,t)})));const g=async(e,t)=>{if(0===t.length)return;const s=this.#t.getSegments(e);for(const e of s){const s=e.filename,n=this.#h.get(s);if(n)for(const e of t){const t=n.search(e),s=1+.1*e.length;for(const n of t)if(!r.has(n))if(h.has(n)){const t=h.get(n);t.score+=s,t.tokens.add(e)}else h.set(n,{score:0,tokens:new Set([e])})}}};await g("word",i),await g("char",a);const f=[];return h.forEach((e,t)=>{f.push({id:t,score:e.score,tokens:Array.from(e.tokens)})}),f.sort((e,t)=>t.score-e.score),"number"==typeof t&&t>0?f.slice(0,t):f}async removeDocument(e){this.#d||await this.#w(),this.#t.addDeletedId(e),this.#t.removeAddedId(e),await this.#t.save()}async clearAll(){await this.#e.clearAll(),this.#h.clear(),this.#t.reset(),this.#d=!1,this.#g=!1,this.#f={word:0,char:0}}async getStatus(){return this.#d||await this.#w(),{wordSegments:this.#t.getSegments("word").length,charSegments:this.#t.getSegments("char").length,deleted:this.#t.getDeletedIds().size,wordCacheSize:await this.#r.getCurrentSize(h),charCacheSize:await this.#r.getCurrentSize(d),inBatch:this.#g}}async hasDocument(e){return this.#d||await this.#w(),this.#t.hasDocument(e)}async#w(){if(this.#d)return;await this.#t.load();const e=[...this.#t.getSegments("word"),...this.#t.getSegments("char")];for(const t of e)this.#h.has(t.filename)||this.#h.set(t.filename,new o(t.filename,this.#e)),await this.#h.get(t.filename).loadIndex();this.#d=!0}#m(e){return this.#c.indexingTokenizer(e)}#u(e){return this.#c.searchTokenizer?this.#c.searchTokenizer(e):this.#m(e)}async#l(e,t){const s="word"===e?h:d,n=await this.#r.getCurrentSize(s),i="word"===e?this.#c.wordSegmentTokenThreshold||1e5:this.#c.charSegmentTokenThreshold||5e5,a="word"===e?this.#c.minWordTokenSave||0:this.#c.minCharTokenSave||0,r=this.#t.getLastSegmentInfo(e);let c,g,f,l;const w=()=>{const t=this.#t.getSegments(e).length+1;return`${e}_seg_${t}.bin`};if(r){const e=r.tokenCount;e>=i||e+t>=i?(c=w(),f=!0,g=r.end,l=t):(c=r.filename,f=!1,g=r.start,l=e+t)}else c=w(),f=!0,g=0,l=t;if(l<a)return void this.#t.updateSegment(e,c,g,n,l,f);const m=await this.#r.readRange(s,g,n);let u=this.#h.get(c);u||(u=new o(c,this.#e),this.#h.set(c,u)),await u.buildAndSave(m),this.#t.updateSegment(e,c,g,n,l,f)}},exports.hash=a,exports.murmur3_32=a;
package/lib/core.d.ts CHANGED
@@ -1,12 +1,11 @@
1
- import { ISearchEngineConfig, IDocument, IDocumentBase, IResult } from './type';
1
+ import { ISearchEngine, ISearchEngineOption, IDocument, IDocumentBase, IResult, ISearchEngineStatus } from './type';
2
2
 
3
3
  /**
4
4
  * 核心搜索引擎类 (多实例支持)
5
5
  */
6
- declare class SearchEngine {
6
+ declare class SearchEngine implements ISearchEngine {
7
7
  #private;
8
- constructor(config: ISearchEngineConfig);
9
- init(): Promise<void>;
8
+ constructor(config: ISearchEngineOption);
10
9
  /**
11
10
  * 开启批处理
12
11
  * 批处理期间 addDocuments 只写入缓存,不触发索引段构建
@@ -29,17 +28,10 @@ declare class SearchEngine {
29
28
  */
30
29
  addDocumentsIfMissing<T extends IDocument = IDocument>(docs: T[]): Promise<void>;
31
30
  addDocuments<T extends IDocument = IDocument>(docs: T[]): Promise<void>;
32
- search<T extends IDocumentBase = IDocumentBase>(query: T | string, limit?: number): Promise<IResult[]>;
31
+ search<T extends IDocumentBase | string = any>(query: T, limit?: number): Promise<IResult[]>;
33
32
  removeDocument(id: number): Promise<void>;
34
33
  clearAll(): Promise<void>;
35
- getStatus(): Promise<{
36
- wordSegments: number;
37
- charSegments: number;
38
- deleted: number;
39
- wordCacheSize: number;
40
- charCacheSize: number;
41
- inBatch: boolean;
42
- }>;
34
+ getStatus(): Promise<ISearchEngineStatus>;
43
35
  /**
44
36
  * 检查文档ID是否曾经添加过(包括已删除的)
45
37
  * @param id 文档ID
package/lib/core.js CHANGED
@@ -1 +1 @@
1
- import{BrowserStorage as e}from'./browser';import{NodeStorage as t}from'./node';const s="search_meta.json",n="deleted_ids.bin",i="added_ids.bin";class o{#e;#t={wordSegments:[],charSegments:[]};#s=new Set;#n=new Set;constructor(e){this.#e=e}async load(){const e=await this.#e.read(s);if(e){const t=(new TextDecoder).decode(e);this.#t=JSON.parse(t)}else this.#t={wordSegments:[],charSegments:[]};const t=await this.#e.read(n);if(t){const e=new DataView(t);let s=0;const n=t.byteLength;for(;s<n&&!(s+4>n);){const t=e.getUint32(s,!0);this.#s.add(t),s+=4,s<n&&30===e.getUint8(s)&&(s+=1)}}const o=await this.#e.read(i);if(o){const e=new DataView(o);let t=0;const s=o.byteLength;for(;t<s&&!(t+4>s);){const n=e.getUint32(t,!0);this.#n.add(n),t+=4,t<s&&30===e.getUint8(t)&&(t+=1)}}}async save(){const e=JSON.stringify(this.#t);if(await this.#e.write(s,(new TextEncoder).encode(e).buffer),0===this.#s.size)await this.#e.remove(n);else{const e=4*this.#s.size+this.#s.size,t=new ArrayBuffer(e),s=new DataView(t);let i=0;for(const e of this.#s)s.setUint32(i,e,!0),i+=4,s.setUint8(i,30),i+=1;await this.#e.write(n,t)}if(0===this.#n.size)await this.#e.remove(i);else{const e=4*this.#n.size+this.#n.size,t=new ArrayBuffer(e),s=new DataView(t);let n=0;for(const e of this.#n)s.setUint32(n,e,!0),n+=4,s.setUint8(n,30),n+=1;await this.#e.write(i,t)}}getSegments(e){return"word"===e?this.#t.wordSegments:this.#t.charSegments}getDeletedIds(){return this.#s}addDeletedId(e){this.#s.add(e)}isDeleted(e){return this.#s.has(e)}addAddedId(e){this.#n.add(e)}removeAddedId(e){this.#n.delete(e)}isAdded(e){return this.#n.has(e)}getAddedIds(){return this.#n}hasDocument(e){return this.#n.has(e)||this.#s.has(e)}getLastSegmentInfo(e){const t=this.getSegments(e);return 0===t.length?null:t[t.length-1]}updateSegment(e,t,s,n,i,o){const a="word"===e?this.#t.wordSegments:this.#t.charSegments;if(o)a.push({filename:t,start:s,end:n,tokenCount:i});else{const e=a[a.length-1];e&&e.filename===t&&(e.end=n,e.tokenCount=i)}}reset(){this.#t={wordSegments:[],charSegments:[]},this.#s.clear(),this.#n.clear()}}class a{static SEPARATOR=30;#e;constructor(e){this.#e=e}async appendBatch(e,t){if(0===t.length)return await this.#e.getFileSize(e);const s=new TextEncoder;let n=0;for(const e of t){n+=8;for(const t of e.tokens){n+=2+Math.min(s.encode(t).byteLength,65535)}n+=1}const i=new Uint8Array(n);let o=0;for(const e of t){const t=[];for(const n of e.tokens){const e=s.encode(n),i=e.byteLength>65535?e.slice(0,65535):e;t.push(i)}const n=new DataView(i.buffer,o);n.setUint32(0,e.id,!0),n.setUint32(4,t.length,!0),o+=8;for(const e of t)new DataView(i.buffer,o).setUint16(0,e.byteLength,!0),o+=2,i.set(e,o),o+=e.byteLength;i[o++]=a.SEPARATOR}return await this.#e.append(e,i.buffer),await this.#e.getFileSize(e)}async readRange(e,t,s){const n=await this.#e.readRange(e,t,s);if(!n||0===n.byteLength)return[];const i=new DataView(n),o=new Uint8Array(n),r=new TextDecoder,h=[];let d=0;const c=n.byteLength;for(;d<c&&!(d+8>c);){const e=i.getUint32(d,!0);d+=4;const t=i.getUint32(d,!0);d+=4;const s=[];for(let e=0;e<t&&!(d+2>c);e++){const e=i.getUint16(d,!0);if(d+=2,d+e>c)break;const t=new Uint8Array(n,d,e);s.push(r.decode(t)),d+=e}d<c&&o[d]===a.SEPARATOR&&(d+=1),h.push({id:e,tokens:s})}return h}async getCurrentSize(e){return await this.#e.getFileSize(e)}}function r(e,t=305419896){const s=e.length,n=s>>2;let i=0;for(;i<n;){let s=255&e.charCodeAt(i)|(255&e.charCodeAt(++i))<<8|(255&e.charCodeAt(++i))<<16|(255&e.charCodeAt(++i))<<24;++i,s=3432918353*(65535&s)+((3432918353*(s>>>16)&65535)<<16)&4294967295,s=s<<15|s>>>17,s=461845907*(65535&s)+((461845907*(s>>>16)&65535)<<16)&4294967295,t=27492+(65535&(t=5*(65535&(t=(t^=s)<<13|t>>>19))+((5*(t>>>16)&65535)<<16)&4294967295))+(((t>>>16)+58964&65535)<<16)}let o=0;const a=3&s;return a>0&&(a>=3&&(o^=(255&e.charCodeAt(i+2))<<16),a>=2&&(o^=(255&e.charCodeAt(i+1))<<8),a>=1&&(o^=255&e.charCodeAt(i)),o=3432918353*(65535&o)+((3432918353*(o>>>16)&65535)<<16)&4294967295,o=o<<15|o>>>17,o=461845907*(65535&o)+((461845907*(o>>>16)&65535)<<16)&4294967295,t^=o),t^=s,t=2246822507*(65535&(t^=t>>>16))+((2246822507*(t>>>16)&65535)<<16)&4294967295,t=3266489909*(65535&(t^=t>>>13))+((3266489909*(t>>>16)&65535)<<16)&4294967295,(t^=t>>>16)>>>0}class h{#i;#e;#o=null;#a=null;static hash(e){return r(e)}constructor(e,t){this.#i=e,this.#e=t}async loadIndex(){return!!this.#o||(this.#o=await this.#e.read(this.#i),!!this.#o&&(this.#a=new DataView(this.#o),!0))}async buildAndSave(e){const t=new Map;for(const s of e){const e=new Map;for(const n of s.tokens)e.has(n)||(e.set(n,!0),t.has(n)||t.set(n,{hash:h.hash(n),postings:[]}),t.get(n).postings.push(s.id))}const s=Array.from(t.entries());s.sort(([e,{hash:t}],[s,{hash:n}])=>t!==n?t-n:e.localeCompare(s));let n=0,i=0;for(const[e,{postings:t}]of s)n+=t.length,i+=e.length+1;const o=20*s.length,a=12+o+4*n,r=new ArrayBuffer(a+i),d=new DataView(r);d.setUint32(0,1229866072),d.setUint32(4,s.length),d.setUint32(8,a);let c=12,g=12+o,f=a;for(const[e,{hash:t,postings:n}]of s){d.setUint32(c,t),d.setUint32(c+4,e.length),d.setUint32(c+8,f),d.setUint32(c+12,g),d.setUint32(c+16,n.length),c+=20;for(let e=0;e<n.length;e++)d.setUint32(g,n[e],!0),g+=4;const s=(new TextEncoder).encode(e);for(let e=0;e<s.length;e++)d.setUint8(f++,s[e]);d.setUint8(f++,0)}await this.#e.write(this.#i,r),this.#o=r,this.#a=d}search(e){if(!this.#a||!this.#o)return[];const t=h.hash(e),s=this.#a.getUint32(4);let n=0,i=s-1;const o=12,a=20,r=new TextDecoder;for(;n<=i;){const h=n+i>>>1,d=o+h*a,c=this.#a.getUint32(d);if(c<t)n=h+1;else{if(!(c>t)){if(!(h>0&&this.#a.getUint32(o+(h-1)*a)===t||h<s-1&&this.#a.getUint32(o+(h+1)*a)===t)){const e=this.#a.getUint32(o+h*a+12),t=this.#a.getUint32(o+h*a+16),s=[];for(let n=0;n<t;n++)s.push(this.#a.getUint32(e+4*n,!0));return s}let n=h;for(;n>0;){const e=o+(n-1)*a;if(this.#a.getUint32(e)!==t)break;n--}for(let i=n;i<s;i++){const s=o+i*a;if(this.#a.getUint32(s)!==t)break;const n=this.#a.getUint32(s+4),h=this.#a.getUint32(s+8),d=new Uint8Array(this.#o,h,n);if(r.decode(d)===e){const e=this.#a.getUint32(s+12),t=this.#a.getUint32(s+16),n=[];for(let s=0;s<t;s++)n.push(this.#a.getUint32(e+4*s,!0));return n}}return[]}i=h-1}}return[]}}const d="word_cache.bin",c="char_cache.bin";class g{#e;#t;#r;#h;#d=!1;#c;#g=!1;#f={word:0,char:0};constructor(s){if(!s.baseDir)throw new Error("SearchEngine requires 'baseDir' in config.");if(this.#c={wordSegmentTokenThreshold:1e5,charSegmentTokenThreshold:5e5,minWordTokenSave:0,minCharTokenSave:0,...s},(this.#c.minWordTokenSave||0)>=(this.#c.wordSegmentTokenThreshold||1e5))throw new Error("minWordTokenSave must be less than wordSegmentTokenThreshold");if((this.#c.minCharTokenSave||0)>=(this.#c.charSegmentTokenThreshold||5e5))throw new Error("minCharTokenSave must be less than charSegmentTokenThreshold");let n=null;if(this.#c.storage&&("object"==typeof this.#c.storage?n=this.#c.storage:"browser"===this.#c.storage?n=new e(this.#c.baseDir):"node"===this.#c.storage&&(n=new t(this.#c.baseDir))),!n){const s=typeof navigator<"u"&&navigator?.storage?.getDirectory instanceof Function,i=typeof process<"u"&&null!=process.versions&&null!=process.versions.node;s?n=new e(this.#c.baseDir):i&&(n=new t(this.#c.baseDir))}if(!n)throw new Error('Storage initialization failed. Please configure "storage" explicitly or ensure you are in a supported environment (Browser/Node).');this.#e=n,this.#t=new o(this.#e),this.#r=new a(this.#e),this.#h=new Map}async init(){if(this.#d)return;await this.#t.load();const e=[...this.#t.getSegments("word"),...this.#t.getSegments("char")];for(const t of e)this.#h.has(t.filename)||this.#h.set(t.filename,new h(t.filename,this.#e)),await this.#h.get(t.filename).loadIndex();this.#d=!0}startBatch(){this.#g=!0,this.#f={word:0,char:0}}async endBatch(){this.#g=!1,this.#f.word>0&&await this.#l("word",this.#f.word),this.#f.char>0&&await this.#l("char",this.#f.char),this.#f={word:0,char:0},await this.#t.save()}#w(e){try{if(typeof Intl<"u"&&"function"==typeof Intl.Segmenter&&"function"==typeof Array.from){const t=new Intl.Segmenter([],{granularity:"word"}).segment(e);if("object"==typeof t&&null!==t)return Array.from(t).filter(e=>e?.isWordLike).map(e=>e?.segment?.toLowerCase()||"")}}catch{}return e.toLowerCase().split(/[^a-z0-9\u4e00-\u9fa5]+/g).filter(e=>e.length>0)}#m(e){return this.#c.indexingTokenizer?this.#c.indexingTokenizer(e):this.#w(e.text)}#u(e){return this.#c.searchTokenizer?this.#c.searchTokenizer(e):this.#m(e)}async addDocument(e){return this.addDocuments([e])}async addDocumentIfMissing(e){return this.addDocumentsIfMissing([e])}async addDocumentsIfMissing(e){if(this.#d||await this.init(),0===e.length)return;const t=this.#t.getDeletedIds(),s=[],n=[],i=[];for(const o of e){if(t.has(o.id)||this.#t.isAdded(o.id))continue;const e=this.#m(o),a=[],r=[];for(const t of e)t.length>1?a.push(t):1===t.length&&r.push(t);a.length>0&&s.push({id:o.id,tokens:a}),r.length>0&&n.push({id:o.id,tokens:r}),i.push(o)}if(0===i.length)return;let o=0,a=0;if(s.length>0){await this.#r.appendBatch(d,s);for(const e of s)o+=e.tokens.length}if(n.length>0){await this.#r.appendBatch(c,n);for(const e of n)a+=e.tokens.length}for(const e of i)this.#t.addAddedId(e.id);this.#g?(this.#f.word+=o,this.#f.char+=a):(o>0&&await this.#l("word",o),a>0&&await this.#l("char",a),await this.#t.save())}async addDocuments(e){if(this.#d||await this.init(),0===e.length)return;const t=this.#t.getDeletedIds(),s=[],n=[];for(const i of e){if(t.has(i.id))throw new Error(`Document ID ${i.id} has been deleted and cannot be re-added.`);if(this.#t.isAdded(i.id))throw new Error(`Document ID ${i.id} already exists.`);const e=this.#m(i),o=[],a=[];for(const t of e)t.length>1?o.push(t):1===t.length&&a.push(t);o.length>0&&s.push({id:i.id,tokens:o}),a.length>0&&n.push({id:i.id,tokens:a})}let i=0,o=0;if(s.length>0){await this.#r.appendBatch(d,s);for(const e of s)i+=e.tokens.length}if(n.length>0){await this.#r.appendBatch(c,n);for(const e of n)o+=e.tokens.length}for(const t of e)this.#t.addAddedId(t.id);this.#g?(this.#f.word+=i,this.#f.char+=o):(i>0&&await this.#l("word",i),o>0&&await this.#l("char",o),await this.#t.save())}async#l(e,t){const s="word"===e?d:c,n=await this.#r.getCurrentSize(s),i="word"===e?this.#c.wordSegmentTokenThreshold||1e5:this.#c.charSegmentTokenThreshold||5e5,o="word"===e?this.#c.minWordTokenSave||0:this.#c.minCharTokenSave||0,a=this.#t.getLastSegmentInfo(e);let r,g,f,l;const w=()=>{const t=this.#t.getSegments(e).length+1;return`${e}_seg_${t}.bin`};if(a){const e=a.tokenCount;e>=i||e+t>=i?(r=w(),f=!0,g=a.end,l=t):(r=a.filename,f=!1,g=a.start,l=e+t)}else r=w(),f=!0,g=0,l=t;if(l<o)return void this.#t.updateSegment(e,r,g,n,l,f);const m=await this.#r.readRange(s,g,n);let u=this.#h.get(r);u||(u=new h(r,this.#e),this.#h.set(r,u)),await u.buildAndSave(m),this.#t.updateSegment(e,r,g,n,l,f)}async search(e,t){this.#d||await this.init();const s="string"==typeof e?{text:e}:e,n=this.#u(s),i=n.filter(e=>e.length>1),o=n.filter(e=>1===e.length),a=this.#t.getDeletedIds(),r=new Map,d=new Map,c=e=>{const t=this.#t.getSegments(e);for(const e of t){const t=e.filename;!this.#h.has(t)&&!d.has(t)&&d.set(t,new h(t,this.#e))}};c("word"),c("char"),await Promise.all(Array.from(d.entries()).map(([e,t])=>t.loadIndex().then(s=>{s&&this.#h.set(e,t)})));const g=async(e,t)=>{if(0===t.length)return;const s=this.#t.getSegments(e);for(const e of s){const s=e.filename,n=this.#h.get(s);if(n)for(const e of t){const t=n.search(e),s=1+.1*e.length;for(const n of t)if(!a.has(n))if(r.has(n)){const t=r.get(n);t.score+=s,t.tokens.add(e)}else r.set(n,{score:0,tokens:new Set([e])})}}};await g("word",i),await g("char",o);const f=[];return r.forEach((e,t)=>{f.push({id:t,score:e.score,tokens:Array.from(e.tokens)})}),f.sort((e,t)=>t.score-e.score),"number"==typeof t&&t>0?f.slice(0,t):f}async removeDocument(e){this.#d||await this.init(),this.#t.addDeletedId(e),this.#t.removeAddedId(e),await this.#t.save()}async clearAll(){await this.#e.clearAll(),this.#h.clear(),this.#t.reset(),this.#d=!1,this.#g=!1,this.#f={word:0,char:0}}async getStatus(){return this.#d||await this.init(),{wordSegments:this.#t.getSegments("word").length,charSegments:this.#t.getSegments("char").length,deleted:this.#t.getDeletedIds().size,wordCacheSize:await this.#r.getCurrentSize(d),charCacheSize:await this.#r.getCurrentSize(c),inBatch:this.#g}}async hasDocument(e){return this.#d||await this.init(),this.#t.hasDocument(e)}}export{g as SearchEngine,r as hash,r as murmur3_32};
1
+ const e="search_meta.json",t="deleted_ids.bin",s="added_ids.bin";class n{#e;#t={wordSegments:[],charSegments:[]};#s=new Set;#n=new Set;constructor(e){this.#e=e}async load(){const n=await this.#e.read(e);if(n){const e=(new TextDecoder).decode(n);this.#t=JSON.parse(e)}else this.#t={wordSegments:[],charSegments:[]};const i=await this.#e.read(t);if(i){const e=new DataView(i);let t=0;const s=i.byteLength;for(;t<s&&!(t+4>s);){const n=e.getUint32(t,!0);this.#s.add(n),t+=4,t<s&&30===e.getUint8(t)&&(t+=1)}}const a=await this.#e.read(s);if(a){const e=new DataView(a);let t=0;const s=a.byteLength;for(;t<s&&!(t+4>s);){const n=e.getUint32(t,!0);this.#n.add(n),t+=4,t<s&&30===e.getUint8(t)&&(t+=1)}}}async save(){const n=JSON.stringify(this.#t);if(await this.#e.write(e,(new TextEncoder).encode(n).buffer),0===this.#s.size)await this.#e.remove(t);else{const e=4*this.#s.size+this.#s.size,s=new ArrayBuffer(e),n=new DataView(s);let i=0;for(const e of this.#s)n.setUint32(i,e,!0),i+=4,n.setUint8(i,30),i+=1;await this.#e.write(t,s)}if(0===this.#n.size)await this.#e.remove(s);else{const e=4*this.#n.size+this.#n.size,t=new ArrayBuffer(e),n=new DataView(t);let i=0;for(const e of this.#n)n.setUint32(i,e,!0),i+=4,n.setUint8(i,30),i+=1;await this.#e.write(s,t)}}getSegments(e){return"word"===e?this.#t.wordSegments:this.#t.charSegments}getDeletedIds(){return this.#s}addDeletedId(e){this.#s.add(e)}isDeleted(e){return this.#s.has(e)}addAddedId(e){this.#n.add(e)}removeAddedId(e){this.#n.delete(e)}isAdded(e){return this.#n.has(e)}getAddedIds(){return this.#n}hasDocument(e){return this.#n.has(e)||this.#s.has(e)}getLastSegmentInfo(e){const t=this.getSegments(e);return 0===t.length?null:t[t.length-1]}updateSegment(e,t,s,n,i,a){const o="word"===e?this.#t.wordSegments:this.#t.charSegments;if(a)o.push({filename:t,start:s,end:n,tokenCount:i});else{const e=o[o.length-1];e&&e.filename===t&&(e.end=n,e.tokenCount=i)}}reset(){this.#t={wordSegments:[],charSegments:[]},this.#s.clear(),this.#n.clear()}}class i{static SEPARATOR=30;#e;constructor(e){this.#e=e}async appendBatch(e,t){if(0===t.length)return await this.#e.getFileSize(e);const s=new TextEncoder;let n=0;for(const e of t){n+=8;for(const t of e.tokens){n+=2+Math.min(s.encode(t).byteLength,65535)}n+=1}const a=new Uint8Array(n);let o=0;for(const e of t){const t=[];for(const n of e.tokens){const e=s.encode(n),i=e.byteLength>65535?e.slice(0,65535):e;t.push(i)}const n=new DataView(a.buffer,o);n.setUint32(0,e.id,!0),n.setUint32(4,t.length,!0),o+=8;for(const e of t)new DataView(a.buffer,o).setUint16(0,e.byteLength,!0),o+=2,a.set(e,o),o+=e.byteLength;a[o++]=i.SEPARATOR}return await this.#e.append(e,a.buffer),await this.#e.getFileSize(e)}async readRange(e,t,s){const n=await this.#e.readRange(e,t,s);if(!n||0===n.byteLength)return[];const a=new DataView(n),o=new Uint8Array(n),r=new TextDecoder,h=[];let d=0;const c=n.byteLength;for(;d<c&&!(d+8>c);){const e=a.getUint32(d,!0);d+=4;const t=a.getUint32(d,!0);d+=4;const s=[];for(let e=0;e<t&&!(d+2>c);e++){const e=a.getUint16(d,!0);if(d+=2,d+e>c)break;const t=new Uint8Array(n,d,e);s.push(r.decode(t)),d+=e}d<c&&o[d]===i.SEPARATOR&&(d+=1),h.push({id:e,tokens:s})}return h}async getCurrentSize(e){return await this.#e.getFileSize(e)}}function a(e,t=305419896){const s=e.length,n=s>>2;let i=0;for(;i<n;){let s=255&e.charCodeAt(i)|(255&e.charCodeAt(++i))<<8|(255&e.charCodeAt(++i))<<16|(255&e.charCodeAt(++i))<<24;++i,s=3432918353*(65535&s)+((3432918353*(s>>>16)&65535)<<16)&4294967295,s=s<<15|s>>>17,s=461845907*(65535&s)+((461845907*(s>>>16)&65535)<<16)&4294967295,t=27492+(65535&(t=5*(65535&(t=(t^=s)<<13|t>>>19))+((5*(t>>>16)&65535)<<16)&4294967295))+(((t>>>16)+58964&65535)<<16)}let a=0;const o=3&s;return o>0&&(o>=3&&(a^=(255&e.charCodeAt(i+2))<<16),o>=2&&(a^=(255&e.charCodeAt(i+1))<<8),o>=1&&(a^=255&e.charCodeAt(i)),a=3432918353*(65535&a)+((3432918353*(a>>>16)&65535)<<16)&4294967295,a=a<<15|a>>>17,a=461845907*(65535&a)+((461845907*(a>>>16)&65535)<<16)&4294967295,t^=a),t^=s,t=2246822507*(65535&(t^=t>>>16))+((2246822507*(t>>>16)&65535)<<16)&4294967295,t=3266489909*(65535&(t^=t>>>13))+((3266489909*(t>>>16)&65535)<<16)&4294967295,(t^=t>>>16)>>>0}class o{#i;#e;#a=null;#o=null;static hash(e){return a(e)}constructor(e,t){this.#i=e,this.#e=t}async loadIndex(){return!!this.#a||(this.#a=await this.#e.read(this.#i),!!this.#a&&(this.#o=new DataView(this.#a),!0))}async buildAndSave(e){const t=new Map;for(const s of e){const e=new Map;for(const n of s.tokens)e.has(n)||(e.set(n,!0),t.has(n)||t.set(n,{hash:o.hash(n),postings:[]}),t.get(n).postings.push(s.id))}const s=Array.from(t.entries());s.sort(([e,{hash:t}],[s,{hash:n}])=>t!==n?t-n:e.localeCompare(s));let n=0,i=0;for(const[e,{postings:t}]of s)n+=t.length,i+=e.length+1;const a=20*s.length,r=12+a+4*n,h=new ArrayBuffer(r+i),d=new DataView(h);d.setUint32(0,1229866072),d.setUint32(4,s.length),d.setUint32(8,r);let c=12,g=12+a,f=r;for(const[e,{hash:t,postings:n}]of s){d.setUint32(c,t),d.setUint32(c+4,e.length),d.setUint32(c+8,f),d.setUint32(c+12,g),d.setUint32(c+16,n.length),c+=20;for(let e=0;e<n.length;e++)d.setUint32(g,n[e],!0),g+=4;const s=(new TextEncoder).encode(e);for(let e=0;e<s.length;e++)d.setUint8(f++,s[e]);d.setUint8(f++,0)}await this.#e.write(this.#i,h),this.#a=h,this.#o=d}search(e){if(!this.#o||!this.#a)return[];const t=o.hash(e),s=this.#o.getUint32(4);let n=0,i=s-1;const a=12,r=20,h=new TextDecoder;for(;n<=i;){const o=n+i>>>1,d=a+o*r,c=this.#o.getUint32(d);if(c<t)n=o+1;else{if(!(c>t)){if(!(o>0&&this.#o.getUint32(a+(o-1)*r)===t||o<s-1&&this.#o.getUint32(a+(o+1)*r)===t)){const e=this.#o.getUint32(a+o*r+12),t=this.#o.getUint32(a+o*r+16),s=[];for(let n=0;n<t;n++)s.push(this.#o.getUint32(e+4*n,!0));return s}let n=o;for(;n>0;){const e=a+(n-1)*r;if(this.#o.getUint32(e)!==t)break;n--}for(let i=n;i<s;i++){const s=a+i*r;if(this.#o.getUint32(s)!==t)break;const n=this.#o.getUint32(s+4),o=this.#o.getUint32(s+8),d=new Uint8Array(this.#a,o,n);if(h.decode(d)===e){const e=this.#o.getUint32(s+12),t=this.#o.getUint32(s+16),n=[];for(let s=0;s<t;s++)n.push(this.#o.getUint32(e+4*s,!0));return n}}return[]}i=o-1}}return[]}}const r=({text:e})=>{try{if(typeof Intl<"u"&&"function"==typeof Intl.Segmenter&&"function"==typeof Array.from){const t=new Intl.Segmenter([],{granularity:"word"}).segment(e);if("object"==typeof t&&null!==t)return Array.from(t).filter(e=>e?.isWordLike).map(e=>e?.segment?.toLowerCase()||"")}}catch{}return e.toLowerCase().split(/[^a-z0-9\u4e00-\u9fa5]+/g).filter(e=>e.length>0)},h="word_cache.bin",d="char_cache.bin";class c{#e;#t;#r;#h;#d=!1;#c;#g=!1;#f={word:0,char:0};constructor(e){if(this.#c={wordSegmentTokenThreshold:1e5,charSegmentTokenThreshold:5e5,minWordTokenSave:0,minCharTokenSave:0,indexingTokenizer:e.indexingTokenizer||r,...e},(this.#c.minWordTokenSave||0)>=(this.#c.wordSegmentTokenThreshold||1e5))throw new Error("minWordTokenSave must be less than wordSegmentTokenThreshold");if((this.#c.minCharTokenSave||0)>=(this.#c.charSegmentTokenThreshold||5e5))throw new Error("minCharTokenSave must be less than charSegmentTokenThreshold");this.#e=e.storage,this.#t=new n(this.#e),this.#r=new i(this.#e),this.#h=new Map}startBatch(){this.#g=!0,this.#f={word:0,char:0}}async endBatch(){this.#g=!1,this.#f.word>0&&await this.#l("word",this.#f.word),this.#f.char>0&&await this.#l("char",this.#f.char),this.#f={word:0,char:0},await this.#t.save()}async addDocument(e){return this.addDocuments([e])}async addDocumentIfMissing(e){return this.addDocumentsIfMissing([e])}async addDocumentsIfMissing(e){if(this.#d||await this.#w(),0===e.length)return;const t=this.#t.getDeletedIds(),s=[],n=[],i=[];for(const a of e){if(t.has(a.id)||this.#t.isAdded(a.id))continue;const e=this.#m(a),o=[],r=[];for(const t of e)t.length>1?o.push(t):1===t.length&&r.push(t);o.length>0&&s.push({id:a.id,tokens:o}),r.length>0&&n.push({id:a.id,tokens:r}),i.push(a)}if(0===i.length)return;let a=0,o=0;if(s.length>0){await this.#r.appendBatch(h,s);for(const e of s)a+=e.tokens.length}if(n.length>0){await this.#r.appendBatch(d,n);for(const e of n)o+=e.tokens.length}for(const e of i)this.#t.addAddedId(e.id);this.#g?(this.#f.word+=a,this.#f.char+=o):(a>0&&await this.#l("word",a),o>0&&await this.#l("char",o),await this.#t.save())}async addDocuments(e){if(this.#d||await this.#w(),0===e.length)return;const t=this.#t.getDeletedIds(),s=[],n=[];for(const i of e){if(t.has(i.id))throw new Error(`Document ID ${i.id} has been deleted and cannot be re-added.`);if(this.#t.isAdded(i.id))throw new Error(`Document ID ${i.id} already exists.`);const e=this.#m(i),a=[],o=[];for(const t of e)t.length>1?a.push(t):1===t.length&&o.push(t);a.length>0&&s.push({id:i.id,tokens:a}),o.length>0&&n.push({id:i.id,tokens:o})}let i=0,a=0;if(s.length>0){await this.#r.appendBatch(h,s);for(const e of s)i+=e.tokens.length}if(n.length>0){await this.#r.appendBatch(d,n);for(const e of n)a+=e.tokens.length}for(const t of e)this.#t.addAddedId(t.id);this.#g?(this.#f.word+=i,this.#f.char+=a):(i>0&&await this.#l("word",i),a>0&&await this.#l("char",a),await this.#t.save())}async search(e,t){this.#d||await this.#w();const s="string"==typeof e?{text:e}:e,n=this.#u(s),i=n.filter(e=>e.length>1),a=n.filter(e=>1===e.length),r=this.#t.getDeletedIds(),h=new Map,d=new Map,c=e=>{const t=this.#t.getSegments(e);for(const e of t){const t=e.filename;!this.#h.has(t)&&!d.has(t)&&d.set(t,new o(t,this.#e))}};c("word"),c("char"),await Promise.all(Array.from(d.entries()).map(([e,t])=>t.loadIndex().then(s=>{s&&this.#h.set(e,t)})));const g=async(e,t)=>{if(0===t.length)return;const s=this.#t.getSegments(e);for(const e of s){const s=e.filename,n=this.#h.get(s);if(n)for(const e of t){const t=n.search(e),s=1+.1*e.length;for(const n of t)if(!r.has(n))if(h.has(n)){const t=h.get(n);t.score+=s,t.tokens.add(e)}else h.set(n,{score:0,tokens:new Set([e])})}}};await g("word",i),await g("char",a);const f=[];return h.forEach((e,t)=>{f.push({id:t,score:e.score,tokens:Array.from(e.tokens)})}),f.sort((e,t)=>t.score-e.score),"number"==typeof t&&t>0?f.slice(0,t):f}async removeDocument(e){this.#d||await this.#w(),this.#t.addDeletedId(e),this.#t.removeAddedId(e),await this.#t.save()}async clearAll(){await this.#e.clearAll(),this.#h.clear(),this.#t.reset(),this.#d=!1,this.#g=!1,this.#f={word:0,char:0}}async getStatus(){return this.#d||await this.#w(),{wordSegments:this.#t.getSegments("word").length,charSegments:this.#t.getSegments("char").length,deleted:this.#t.getDeletedIds().size,wordCacheSize:await this.#r.getCurrentSize(h),charCacheSize:await this.#r.getCurrentSize(d),inBatch:this.#g}}async hasDocument(e){return this.#d||await this.#w(),this.#t.hasDocument(e)}async#w(){if(this.#d)return;await this.#t.load();const e=[...this.#t.getSegments("word"),...this.#t.getSegments("char")];for(const t of e)this.#h.has(t.filename)||this.#h.set(t.filename,new o(t.filename,this.#e)),await this.#h.get(t.filename).loadIndex();this.#d=!0}#m(e){return this.#c.indexingTokenizer(e)}#u(e){return this.#c.searchTokenizer?this.#c.searchTokenizer(e):this.#m(e)}async#l(e,t){const s="word"===e?h:d,n=await this.#r.getCurrentSize(s),i="word"===e?this.#c.wordSegmentTokenThreshold||1e5:this.#c.charSegmentTokenThreshold||5e5,a="word"===e?this.#c.minWordTokenSave||0:this.#c.minCharTokenSave||0,r=this.#t.getLastSegmentInfo(e);let c,g,f,l;const w=()=>{const t=this.#t.getSegments(e).length+1;return`${e}_seg_${t}.bin`};if(r){const e=r.tokenCount;e>=i||e+t>=i?(c=w(),f=!0,g=r.end,l=t):(c=r.filename,f=!1,g=r.start,l=e+t)}else c=w(),f=!0,g=0,l=t;if(l<a)return void this.#t.updateSegment(e,c,g,n,l,f);const m=await this.#r.readRange(s,g,n);let u=this.#h.get(c);u||(u=new o(c,this.#e),this.#h.set(c,u)),await u.buildAndSave(m),this.#t.updateSegment(e,c,g,n,l,f)}}export{c as SearchEngine,a as hash,a as murmur3_32};
package/lib/simple.cjs CHANGED
@@ -1 +1 @@
1
- "use strict";var t=require('./core');exports.SimpleSearch=class{static#t=null;static#e={baseDir:"simple_search_data",wordSegmentTokenThreshold:1e5,minWordTokenSave:0};static configure(e){const n={...this.#e,...e};this.#t=new t.SearchEngine(n)}static#n(){return this.#t||(this.#t=new t.SearchEngine(this.#e)),this.#t}static async startBatch(){this.#n().startBatch()}static async endBatch(){return this.#n().endBatch()}static async addDocument(t){return this.#n().addDocument(t)}static async addDocumentIfMissing(t){return this.#n().addDocumentIfMissing(t)}static async addDocuments(t){return this.#n().addDocuments(t)}static async addDocumentsIfMissing(t){return this.#n().addDocumentsIfMissing(t)}static async search(t,e){return this.#n().search(t,e)}static async removeDocument(t){return this.#n().removeDocument(t)}static async clearAll(){return this.#n().clearAll()}static async getStatus(){return this.#n().getStatus()}static async hasDocument(t){return this.#n().hasDocument(t)}};
1
+ "use strict";var t=require('./core'),e=require('./browser'),n=require('./node');const s=Object.freeze({wordSegmentTokenThreshold:1e5,minWordTokenSave:0}),c="simple-search";exports.SimpleSearch=class{static#t=null;static#e;static get config(){if(this.#e)return this.#e;const t={...s};return typeof navigator<"u"&&navigator?.storage?.getDirectory instanceof Function?t.storage=new e.BrowserStorage(c):t.storage=new n.NodeStorage(c),this.#e=t}static configure(e){this.#e={...this.config,...e},this.#t&&(this.#t=new t.SearchEngine(this.config))}static async startBatch(){this.#n().startBatch()}static async endBatch(){return this.#n().endBatch()}static async addDocument(t){return this.#n().addDocument(t)}static async addDocumentIfMissing(t){return this.#n().addDocumentIfMissing(t)}static async addDocuments(t){return this.#n().addDocuments(t)}static async addDocumentsIfMissing(t){return this.#n().addDocumentsIfMissing(t)}static async search(t,e){return this.#n().search(t,e)}static async removeDocument(t){return this.#n().removeDocument(t)}static async clearAll(){return this.#n().clearAll()}static async getStatus(){return this.#n().getStatus()}static async hasDocument(t){return this.#n().hasDocument(t)}static#n(){return this.#t||(this.#t=new t.SearchEngine(this.config)),this.#t}};
package/lib/simple.d.ts CHANGED
@@ -1,5 +1,5 @@
1
1
  import * as ___type from './type';
2
- import { ISearchEngineConfig, IDocument, IDocumentBase } from './type';
2
+ import { ISearchEngineOption, IDocument, IDocumentBase } from './type';
3
3
 
4
4
  /**
5
5
  * 快速使用封装
@@ -7,27 +7,21 @@ import { ISearchEngineConfig, IDocument, IDocumentBase } from './type';
7
7
  */
8
8
  declare class SimpleSearch {
9
9
  #private;
10
+ static get config(): ISearchEngineOption;
10
11
  /**
11
12
  * 配置并初始化单例
12
13
  */
13
- static configure(config: Partial<ISearchEngineConfig>): void;
14
+ static configure(config: Partial<ISearchEngineOption>): void;
14
15
  static startBatch(): Promise<void>;
15
16
  static endBatch(): Promise<void>;
16
17
  static addDocument<T extends IDocument = IDocument>(doc: T): Promise<void>;
17
18
  static addDocumentIfMissing<T extends IDocument = IDocument>(doc: T): Promise<void>;
18
19
  static addDocuments<T extends IDocument = IDocument>(docs: T[]): Promise<void>;
19
20
  static addDocumentsIfMissing<T extends IDocument = IDocument>(docs: T[]): Promise<void>;
20
- static search<T extends IDocumentBase = IDocumentBase>(query: T | string, limit?: number): Promise<___type.IResult[]>;
21
+ static search<T extends IDocumentBase = any>(query: T | string, limit?: number): Promise<___type.IResult[]>;
21
22
  static removeDocument(id: number): Promise<void>;
22
23
  static clearAll(): Promise<void>;
23
- static getStatus(): Promise<{
24
- wordSegments: number;
25
- charSegments: number;
26
- deleted: number;
27
- wordCacheSize: number;
28
- charCacheSize: number;
29
- inBatch: boolean;
30
- }>;
24
+ static getStatus(): Promise<___type.ISearchEngineStatus>;
31
25
  /**
32
26
  * 检查文档ID是否曾经添加过(包括已删除的)
33
27
  * @param id 文档ID
package/lib/simple.js CHANGED
@@ -1 +1 @@
1
- import{SearchEngine as t}from'./core';class s{static#t=null;static#s={baseDir:"simple_search_data",wordSegmentTokenThreshold:1e5,minWordTokenSave:0};static configure(s){const n={...this.#s,...s};this.#t=new t(n)}static#n(){return this.#t||(this.#t=new t(this.#s)),this.#t}static async startBatch(){this.#n().startBatch()}static async endBatch(){return this.#n().endBatch()}static async addDocument(t){return this.#n().addDocument(t)}static async addDocumentIfMissing(t){return this.#n().addDocumentIfMissing(t)}static async addDocuments(t){return this.#n().addDocuments(t)}static async addDocumentsIfMissing(t){return this.#n().addDocumentsIfMissing(t)}static async search(t,s){return this.#n().search(t,s)}static async removeDocument(t){return this.#n().removeDocument(t)}static async clearAll(){return this.#n().clearAll()}static async getStatus(){return this.#n().getStatus()}static async hasDocument(t){return this.#n().hasDocument(t)}}export{s as SimpleSearch};
1
+ import{SearchEngine as t}from'./core';import{BrowserStorage as n}from'./browser';import{NodeStorage as s}from'./node';const e=Object.freeze({wordSegmentTokenThreshold:1e5,minWordTokenSave:0}),c="simple-search";class a{static#t=null;static#n;static get config(){if(this.#n)return this.#n;const t={...e};return typeof navigator<"u"&&navigator?.storage?.getDirectory instanceof Function?t.storage=new n(c):t.storage=new s(c),this.#n=t}static configure(n){this.#n={...this.config,...n},this.#t&&(this.#t=new t(this.config))}static async startBatch(){this.#s().startBatch()}static async endBatch(){return this.#s().endBatch()}static async addDocument(t){return this.#s().addDocument(t)}static async addDocumentIfMissing(t){return this.#s().addDocumentIfMissing(t)}static async addDocuments(t){return this.#s().addDocuments(t)}static async addDocumentsIfMissing(t){return this.#s().addDocumentsIfMissing(t)}static async search(t,n){return this.#s().search(t,n)}static async removeDocument(t){return this.#s().removeDocument(t)}static async clearAll(){return this.#s().clearAll()}static async getStatus(){return this.#s().getStatus()}static async hasDocument(t){return this.#s().hasDocument(t)}static#s(){return this.#t||(this.#t=new t(this.config)),this.#t}}export{a as SimpleSearch};
package/lib/type.d.ts CHANGED
@@ -66,12 +66,9 @@ interface IStorage {
66
66
  */
67
67
  type IndexType = 'word' | 'char';
68
68
 
69
- interface ISearchEngineConfig {
70
- /**
71
- * 数据存储的基础目录 (必填)
72
- * 用于区分不同的搜索引擎实例
73
- */
74
- baseDir: string;
69
+ type IndexingTokenizer = <T extends IDocument = IDocument>(doc: T) => string[];
70
+ type SearchTokenizer = <T extends IDocumentBase = IDocumentBase>(doc: T) => string[];
71
+ interface ISearchEngineOption {
75
72
  /**
76
73
  * 存储实现配置 (可选)
77
74
  * - 'browser': 强制使用 OPFS (BrowserStorage)
@@ -79,7 +76,7 @@ interface ISearchEngineConfig {
79
76
  * - IStorage: 传入自定义的存储实例
80
77
  * - undefined: 自动检测环境
81
78
  */
82
- storage?: 'browser' | 'node' | IStorage;
79
+ storage: IStorage;
83
80
  /**
84
81
  * 索引时使用的分词器 (算法核心配置)
85
82
  * - 作用: 将文档文本转换为索引用的token序列
@@ -87,7 +84,7 @@ interface ISearchEngineConfig {
87
84
  * - 建议: 针对不同语言(中文/英文/日文等)使用专门的分词实现
88
85
  * - 影响: 直接决定索引的粒度和搜索的准确性
89
86
  */
90
- indexingTokenizer?: <T extends IDocument = IDocument>(doc: T) => string[];
87
+ indexingTokenizer?: IndexingTokenizer;
91
88
  /**
92
89
  * 搜索时使用的分词器 (算法核心配置)
93
90
  * - 作用: 将查询文本转换为搜索用的token序列
@@ -95,7 +92,7 @@ interface ISearchEngineConfig {
95
92
  * - 建议: 与indexingTokenizer保持一致的分词策略以确保搜索准确性
96
93
  * - 影响: 直接决定搜索匹配的范围和结果的相关性
97
94
  */
98
- searchTokenizer?: <T extends IDocumentBase = IDocumentBase>(doc: T) => string[];
95
+ searchTokenizer?: SearchTokenizer;
99
96
  /**
100
97
  * 词索引分段阈值 (Token数) - 分段算法配置
101
98
  * - 作用: 控制词索引文件的大小,超过阈值时创建新的索引段
@@ -130,6 +127,52 @@ interface ISearchEngineConfig {
130
127
  minCharTokenSave?: number;
131
128
  }
132
129
 
130
+ interface ISearchEngineStatus {
131
+ wordSegments: number;
132
+ charSegments: number;
133
+ deleted: number;
134
+ wordCacheSize: number;
135
+ charCacheSize: number;
136
+ inBatch: boolean;
137
+ }
138
+ /**
139
+ * 核心搜索引擎
140
+ */
141
+ interface ISearchEngine {
142
+ /**
143
+ * 开启批处理
144
+ * 批处理期间 addDocuments 只写入缓存,不触发索引段构建
145
+ */
146
+ startBatch(): void;
147
+ /**
148
+ * 结束批处理
149
+ * 触发索引构建检查并保存元数据
150
+ */
151
+ endBatch(): Promise<void>;
152
+ addDocument<T extends IDocument = IDocument>(doc: T): Promise<void>;
153
+ /**
154
+ * 添加单个文档,如果文档ID已存在则跳过
155
+ * 用于在批量添加中途出错后的恢复添加行为,也可直接用于单个文档添加
156
+ */
157
+ addDocumentIfMissing<T extends IDocument = IDocument>(doc: T): Promise<void>;
158
+ /**
159
+ * 添加多个文档,跳过已存在的文档ID
160
+ * 用于在批量添加中途出错后的恢复添加行为,也可直接用于批量添加
161
+ */
162
+ addDocumentsIfMissing<T extends IDocument = IDocument>(docs: T[]): Promise<void>;
163
+ addDocuments<T extends IDocument = IDocument>(docs: T[]): Promise<void>;
164
+ search<T extends IDocumentBase | string = any>(query: string, limit?: number): Promise<IResult[]>;
165
+ removeDocument(id: number): Promise<void>;
166
+ clearAll(): Promise<void>;
167
+ getStatus(): Promise<ISearchEngineStatus>;
168
+ /**
169
+ * 检查文档ID是否曾经添加过(包括已删除的)
170
+ * @param id 文档ID
171
+ * @returns 文档是否曾经添加过的布尔值
172
+ */
173
+ hasDocument(id: number): Promise<boolean>;
174
+ }
175
+
133
176
 
134
177
 
135
- export type { IDocument, IDocumentBase, IIndexMeta, IResult, ISearchEngineConfig, ISegmentMeta, IStorage, ITokenizedDoc, IndexType };
178
+ export type { IDocument, IDocumentBase, IIndexMeta, IResult, ISearchEngine, ISearchEngineOption, ISearchEngineStatus, ISegmentMeta, IStorage, ITokenizedDoc, IndexType, IndexingTokenizer, SearchTokenizer };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "gs-search",
3
- "version": "0.1.4",
3
+ "version": "0.1.5",
4
4
  "type": "module",
5
5
  "main": "lib/index.cjs",
6
6
  "module": "lib/index.js",