gs-search 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1 @@
1
+ "use strict";exports.BrowserStorage=class{#a;constructor(a){this.#a=a}async#t(){return await(await navigator.storage.getDirectory()).getDirectoryHandle(this.#a,{create:!0})}async write(a,t){const e=await(await(await this.#t()).getFileHandle(a,{create:!0})).createWritable();await e.write(t),await e.close()}async append(a,t){const e=await this.#t();let i;try{i=await e.getFileHandle(a,{create:!0})}catch{i=await e.getFileHandle(a,{create:!0})}const r=await i.getFile(),s=await i.createWritable({keepExistingData:!0});await s.seek(r.size),await s.write(t),await s.close()}async read(a){const t=await this.#t();try{return await(await(await t.getFileHandle(a)).getFile()).arrayBuffer()}catch{return null}}async readRange(a,t,e){const i=await this.#t();try{return await(await(await i.getFileHandle(a)).getFile()).slice(t,e).arrayBuffer()}catch{return null}}async remove(a){const t=await this.#t();try{await t.removeEntry(a)}catch{}}async listFiles(){const a=await this.#t(),t=[];for await(const e of a.keys())t.push(e);return t}async clearAll(){const a=await this.#t();for await(const t of a.keys())await a.removeEntry(t,{recursive:!0})}async getFileSize(a){const t=await this.#t();try{return(await(await t.getFileHandle(a)).getFile()).size}catch{return 0}}};
@@ -0,0 +1,21 @@
1
+ import { IStorage } from './type';
2
+
3
+ /**
4
+ * 浏览器实现 (OPFS - 子目录隔离)
5
+ * 支持 Main Thread 和 Web Worker
6
+ */
7
+
8
+ declare class BrowserStorage implements IStorage {
9
+ #private;
10
+ constructor(baseDir: string);
11
+ write(filename: string, data: ArrayBuffer): Promise<void>;
12
+ append(filename: string, data: ArrayBuffer): Promise<void>;
13
+ read(filename: string): Promise<ArrayBuffer | null>;
14
+ readRange(filename: string, start: number, end: number): Promise<ArrayBuffer | null>;
15
+ remove(filename: string): Promise<void>;
16
+ listFiles(): Promise<string[]>;
17
+ clearAll(): Promise<void>;
18
+ getFileSize(filename: string): Promise<number>;
19
+ }
20
+
21
+ export { BrowserStorage };
package/lib/browser.js ADDED
@@ -0,0 +1 @@
1
+ class a{#a;constructor(a){this.#a=a}async#t(){return await(await navigator.storage.getDirectory()).getDirectoryHandle(this.#a,{create:!0})}async write(a,t){const e=await(await(await this.#t()).getFileHandle(a,{create:!0})).createWritable();await e.write(t),await e.close()}async append(a,t){const e=await this.#t();let i;try{i=await e.getFileHandle(a,{create:!0})}catch{i=await e.getFileHandle(a,{create:!0})}const r=await i.getFile(),n=await i.createWritable({keepExistingData:!0});await n.seek(r.size),await n.write(t),await n.close()}async read(a){const t=await this.#t();try{return await(await(await t.getFileHandle(a)).getFile()).arrayBuffer()}catch{return null}}async readRange(a,t,e){const i=await this.#t();try{return await(await(await i.getFileHandle(a)).getFile()).slice(t,e).arrayBuffer()}catch{return null}}async remove(a){const t=await this.#t();try{await t.removeEntry(a)}catch{}}async listFiles(){const a=await this.#t(),t=[];for await(const e of a.keys())t.push(e);return t}async clearAll(){const a=await this.#t();for await(const t of a.keys())await a.removeEntry(t,{recursive:!0})}async getFileSize(a){const t=await this.#t();try{return(await(await t.getFileHandle(a)).getFile()).size}catch{return 0}}}export{a as BrowserStorage};
package/lib/core.cjs ADDED
@@ -0,0 +1 @@
1
+ "use strict";var e=require('./browser'),t=require('./node');const s="search_meta.json",n="deleted_ids.bin",i="added_ids.bin";class o{#e;#t={wordSegments:[],charSegments:[]};#s=new Set;#n=new Set;constructor(e){this.#e=e}async load(){const e=await this.#e.read(s);if(e){const t=(new TextDecoder).decode(e);this.#t=JSON.parse(t)}else this.#t={wordSegments:[],charSegments:[]};const t=await this.#e.read(n);if(t){const e=new DataView(t);let s=0;const n=t.byteLength;for(;s<n&&!(s+4>n);){const t=e.getUint32(s,!0);this.#s.add(t),s+=4,s<n&&30===e.getUint8(s)&&(s+=1)}}const o=await this.#e.read(i);if(o){const e=new DataView(o);let t=0;const s=o.byteLength;for(;t<s&&!(t+4>s);){const n=e.getUint32(t,!0);this.#n.add(n),t+=4,t<s&&30===e.getUint8(t)&&(t+=1)}}}async save(){const e=JSON.stringify(this.#t);if(await this.#e.write(s,(new TextEncoder).encode(e).buffer),0===this.#s.size)await this.#e.remove(n);else{const e=4*this.#s.size+this.#s.size,t=new ArrayBuffer(e),s=new DataView(t);let i=0;for(const e of this.#s)s.setUint32(i,e,!0),i+=4,s.setUint8(i,30),i+=1;await this.#e.write(n,t)}if(0===this.#n.size)await this.#e.remove(i);else{const e=4*this.#n.size+this.#n.size,t=new ArrayBuffer(e),s=new DataView(t);let n=0;for(const e of this.#n)s.setUint32(n,e,!0),n+=4,s.setUint8(n,30),n+=1;await this.#e.write(i,t)}}getSegments(e){return"word"===e?this.#t.wordSegments:this.#t.charSegments}getDeletedIds(){return this.#s}addDeletedId(e){this.#s.add(e)}isDeleted(e){return this.#s.has(e)}addAddedId(e){this.#n.add(e)}removeAddedId(e){this.#n.delete(e)}isAdded(e){return this.#n.has(e)}getAddedIds(){return this.#n}hasDocument(e){return this.#n.has(e)||this.#s.has(e)}getLastSegmentInfo(e){const t=this.getSegments(e);return 0===t.length?null:t[t.length-1]}updateSegment(e,t,s,n,i,o){const a="word"===e?this.#t.wordSegments:this.#t.charSegments;if(o)a.push({filename:t,start:s,end:n,tokenCount:i});else{const e=a[a.length-1];e&&e.filename===t&&(e.end=n,e.tokenCount=i)}}reset(){this.#t={wordSegments:[],charSegments:[]},this.#s.clear(),this.#n.clear()}}class a{static SEPARATOR=30;#e;constructor(e){this.#e=e}async appendBatch(e,t){if(0===t.length)return await this.#e.getFileSize(e);const s=new TextEncoder;let n=0;for(const e of t){n+=8;for(const t of e.tokens){n+=2+Math.min(s.encode(t).byteLength,65535)}n+=1}const i=new Uint8Array(n);let o=0;for(const e of t){const t=[];for(const n of e.tokens){const e=s.encode(n),i=e.byteLength>65535?e.slice(0,65535):e;t.push(i)}const n=new DataView(i.buffer,o);n.setUint32(0,e.id,!0),n.setUint32(4,t.length,!0),o+=8;for(const e of t)new DataView(i.buffer,o).setUint16(0,e.byteLength,!0),o+=2,i.set(e,o),o+=e.byteLength;i[o++]=a.SEPARATOR}return await this.#e.append(e,i.buffer),await this.#e.getFileSize(e)}async readRange(e,t,s){const n=await this.#e.readRange(e,t,s);if(!n||0===n.byteLength)return[];const i=new DataView(n),o=new Uint8Array(n),r=new TextDecoder,h=[];let d=0;const c=n.byteLength;for(;d<c&&!(d+8>c);){const e=i.getUint32(d,!0);d+=4;const t=i.getUint32(d,!0);d+=4;const s=[];for(let e=0;e<t&&!(d+2>c);e++){const e=i.getUint16(d,!0);if(d+=2,d+e>c)break;const t=new Uint8Array(n,d,e);s.push(r.decode(t)),d+=e}d<c&&o[d]===a.SEPARATOR&&(d+=1),h.push({id:e,tokens:s})}return h}async getCurrentSize(e){return await this.#e.getFileSize(e)}}function r(e,t=305419896){const s=e.length,n=s>>2;let i=0;for(;i<n;){let s=255&e.charCodeAt(i)|(255&e.charCodeAt(++i))<<8|(255&e.charCodeAt(++i))<<16|(255&e.charCodeAt(++i))<<24;++i,s=3432918353*(65535&s)+((3432918353*(s>>>16)&65535)<<16)&4294967295,s=s<<15|s>>>17,s=461845907*(65535&s)+((461845907*(s>>>16)&65535)<<16)&4294967295,t=27492+(65535&(t=5*(65535&(t=(t^=s)<<13|t>>>19))+((5*(t>>>16)&65535)<<16)&4294967295))+(((t>>>16)+58964&65535)<<16)}let o=0;const a=3&s;return a>0&&(a>=3&&(o^=(255&e.charCodeAt(i+2))<<16),a>=2&&(o^=(255&e.charCodeAt(i+1))<<8),a>=1&&(o^=255&e.charCodeAt(i)),o=3432918353*(65535&o)+((3432918353*(o>>>16)&65535)<<16)&4294967295,o=o<<15|o>>>17,o=461845907*(65535&o)+((461845907*(o>>>16)&65535)<<16)&4294967295,t^=o),t^=s,t=2246822507*(65535&(t^=t>>>16))+((2246822507*(t>>>16)&65535)<<16)&4294967295,t=3266489909*(65535&(t^=t>>>13))+((3266489909*(t>>>16)&65535)<<16)&4294967295,(t^=t>>>16)>>>0}class h{#i;#e;#o=null;#a=null;static hash(e){return r(e)}constructor(e,t){this.#i=e,this.#e=t}async loadIndex(){return!!this.#o||(this.#o=await this.#e.read(this.#i),!!this.#o&&(this.#a=new DataView(this.#o),!0))}async buildAndSave(e){const t=new Map;for(const s of e){const e=new Map;for(const n of s.tokens)e.has(n)||(e.set(n,!0),t.has(n)||t.set(n,{hash:h.hash(n),postings:[]}),t.get(n).postings.push(s.id))}const s=Array.from(t.entries());s.sort(([e,{hash:t}],[s,{hash:n}])=>t!==n?t-n:e.localeCompare(s));let n=0,i=0;for(const[e,{postings:t}]of s)n+=t.length,i+=e.length+1;const o=20*s.length,a=12+o+4*n,r=new ArrayBuffer(a+i),d=new DataView(r);d.setUint32(0,1229866072),d.setUint32(4,s.length),d.setUint32(8,a);let c=12,g=12+o,f=a;for(const[e,{hash:t,postings:n}]of s){d.setUint32(c,t),d.setUint32(c+4,e.length),d.setUint32(c+8,f),d.setUint32(c+12,g),d.setUint32(c+16,n.length),c+=20;for(let e=0;e<n.length;e++)d.setUint32(g,n[e],!0),g+=4;const s=(new TextEncoder).encode(e);for(let e=0;e<s.length;e++)d.setUint8(f++,s[e]);d.setUint8(f++,0)}await this.#e.write(this.#i,r),this.#o=r,this.#a=d}search(e){if(!this.#a||!this.#o)return[];const t=h.hash(e),s=this.#a.getUint32(4);let n=0,i=s-1;const o=12,a=20,r=new TextDecoder;for(;n<=i;){const h=n+i>>>1,d=o+h*a,c=this.#a.getUint32(d);if(c<t)n=h+1;else{if(!(c>t)){if(!(h>0&&this.#a.getUint32(o+(h-1)*a)===t||h<s-1&&this.#a.getUint32(o+(h+1)*a)===t)){const e=this.#a.getUint32(o+h*a+12),t=this.#a.getUint32(o+h*a+16),s=[];for(let n=0;n<t;n++)s.push(this.#a.getUint32(e+4*n,!0));return s}let n=h;for(;n>0;){const e=o+(n-1)*a;if(this.#a.getUint32(e)!==t)break;n--}for(let i=n;i<s;i++){const s=o+i*a;if(this.#a.getUint32(s)!==t)break;const n=this.#a.getUint32(s+4),h=this.#a.getUint32(s+8),d=new Uint8Array(this.#o,h,n);if(r.decode(d)===e){const e=this.#a.getUint32(s+12),t=this.#a.getUint32(s+16),n=[];for(let s=0;s<t;s++)n.push(this.#a.getUint32(e+4*s,!0));return n}}return[]}i=h-1}}return[]}}const d="word_cache.bin",c="char_cache.bin";exports.SearchEngine=class{#e;#t;#r;#h;#d=!1;#c;#g=!1;#f={word:0,char:0};constructor(s){if(!s.baseDir)throw new Error("SearchEngine requires 'baseDir' in config.");if(this.#c={wordSegmentTokenThreshold:1e5,charSegmentTokenThreshold:5e5,minWordTokenSave:0,minCharTokenSave:0,...s},(this.#c.minWordTokenSave||0)>=(this.#c.wordSegmentTokenThreshold||1e5))throw new Error("minWordTokenSave must be less than wordSegmentTokenThreshold");if((this.#c.minCharTokenSave||0)>=(this.#c.charSegmentTokenThreshold||5e5))throw new Error("minCharTokenSave must be less than charSegmentTokenThreshold");let n=null;if(this.#c.storage&&("object"==typeof this.#c.storage?n=this.#c.storage:"browser"===this.#c.storage?n=new e.BrowserStorage(this.#c.baseDir):"node"===this.#c.storage&&(n=new t.NodeStorage(this.#c.baseDir))),!n){const s=typeof navigator<"u"&&navigator?.storage?.getDirectory instanceof Function,i=typeof process<"u"&&null!=process.versions&&null!=process.versions.node;s?n=new e.BrowserStorage(this.#c.baseDir):i&&(n=new t.NodeStorage(this.#c.baseDir))}if(!n)throw new Error('Storage initialization failed. Please configure "storage" explicitly or ensure you are in a supported environment (Browser/Node).');this.#e=n,this.#t=new o(this.#e),this.#r=new a(this.#e),this.#h=new Map}async init(){if(this.#d)return;await this.#t.load();const e=[...this.#t.getSegments("word"),...this.#t.getSegments("char")];for(const t of e)this.#h.has(t.filename)||this.#h.set(t.filename,new h(t.filename,this.#e)),await this.#h.get(t.filename).loadIndex();this.#d=!0}startBatch(){this.#g=!0,this.#f={word:0,char:0}}async endBatch(){this.#g=!1,this.#f.word>0&&await this.#l("word",this.#f.word),this.#f.char>0&&await this.#l("char",this.#f.char),this.#f={word:0,char:0},await this.#t.save()}#w(e){try{if(typeof Intl<"u"&&"function"==typeof Intl.Segmenter&&"function"==typeof Array.from){const t=new Intl.Segmenter([],{granularity:"word"}).segment(e);if("object"==typeof t&&null!==t)return Array.from(t).filter(e=>e?.isWordLike).map(e=>e?.segment?.toLowerCase()||"")}}catch{}return e.toLowerCase().split(/[^a-z0-9\u4e00-\u9fa5]+/g).filter(e=>e.length>0)}#m(e){return this.#c.indexingTokenizer?this.#c.indexingTokenizer(e):this.#w(e.text)}#u(e){return this.#c.searchTokenizer?this.#c.searchTokenizer(e):this.#m(e)}async addDocument(e){return this.addDocuments([e])}async addDocumentIfMissing(e){return this.addDocumentsIfMissing([e])}async addDocumentsIfMissing(e){if(this.#d||await this.init(),0===e.length)return;const t=this.#t.getDeletedIds(),s=[],n=[],i=[];for(const o of e){if(t.has(o.id)||this.#t.isAdded(o.id))continue;const e=this.#m(o),a=[],r=[];for(const t of e)t.length>1?a.push(t):1===t.length&&r.push(t);a.length>0&&s.push({id:o.id,tokens:a}),r.length>0&&n.push({id:o.id,tokens:r}),i.push(o)}if(0===i.length)return;let o=0,a=0;if(s.length>0){await this.#r.appendBatch(d,s);for(const e of s)o+=e.tokens.length}if(n.length>0){await this.#r.appendBatch(c,n);for(const e of n)a+=e.tokens.length}for(const e of i)this.#t.addAddedId(e.id);this.#g?(this.#f.word+=o,this.#f.char+=a):(o>0&&await this.#l("word",o),a>0&&await this.#l("char",a),await this.#t.save())}async addDocuments(e){if(this.#d||await this.init(),0===e.length)return;const t=this.#t.getDeletedIds(),s=[],n=[];for(const i of e){if(t.has(i.id))throw new Error(`Document ID ${i.id} has been deleted and cannot be re-added.`);if(this.#t.isAdded(i.id))throw new Error(`Document ID ${i.id} already exists.`);const e=this.#m(i),o=[],a=[];for(const t of e)t.length>1?o.push(t):1===t.length&&a.push(t);o.length>0&&s.push({id:i.id,tokens:o}),a.length>0&&n.push({id:i.id,tokens:a})}let i=0,o=0;if(s.length>0){await this.#r.appendBatch(d,s);for(const e of s)i+=e.tokens.length}if(n.length>0){await this.#r.appendBatch(c,n);for(const e of n)o+=e.tokens.length}for(const t of e)this.#t.addAddedId(t.id);this.#g?(this.#f.word+=i,this.#f.char+=o):(i>0&&await this.#l("word",i),o>0&&await this.#l("char",o),await this.#t.save())}async#l(e,t){const s="word"===e?d:c,n=await this.#r.getCurrentSize(s),i="word"===e?this.#c.wordSegmentTokenThreshold||1e5:this.#c.charSegmentTokenThreshold||5e5,o="word"===e?this.#c.minWordTokenSave||0:this.#c.minCharTokenSave||0,a=this.#t.getLastSegmentInfo(e);let r,g,f,l;const w=()=>{const t=this.#t.getSegments(e).length+1;return`${e}_seg_${t}.bin`};if(a){const e=a.tokenCount;e>=i||e+t>=i?(r=w(),f=!0,g=a.end,l=t):(r=a.filename,f=!1,g=a.start,l=e+t)}else r=w(),f=!0,g=0,l=t;if(l<o)return void this.#t.updateSegment(e,r,g,n,l,f);const m=await this.#r.readRange(s,g,n);let u=this.#h.get(r);u||(u=new h(r,this.#e),this.#h.set(r,u)),await u.buildAndSave(m),this.#t.updateSegment(e,r,g,n,l,f)}async search(e,t){this.#d||await this.init();const s="string"==typeof e?{text:e}:e,n=this.#u(s),i=n.filter(e=>e.length>1),o=n.filter(e=>1===e.length),a=this.#t.getDeletedIds(),r=new Map,d=new Map,c=e=>{const t=this.#t.getSegments(e);for(const e of t){const t=e.filename;!this.#h.has(t)&&!d.has(t)&&d.set(t,new h(t,this.#e))}};c("word"),c("char"),await Promise.all(Array.from(d.entries()).map(([e,t])=>t.loadIndex().then(s=>{s&&this.#h.set(e,t)})));const g=async(e,t)=>{if(0===t.length)return;const s=this.#t.getSegments(e);for(const e of s){const s=e.filename,n=this.#h.get(s);if(n)for(const e of t){const t=n.search(e),s=1+.1*e.length;for(const n of t)if(!a.has(n))if(r.has(n)){const t=r.get(n);t.score+=s,t.tokens.add(e)}else r.set(n,{score:0,tokens:new Set([e])})}}};await g("word",i),await g("char",o);const f=[];return r.forEach((e,t)=>{f.push({id:t,score:e.score,tokens:Array.from(e.tokens)})}),f.sort((e,t)=>t.score-e.score),"number"==typeof t&&t>0?f.slice(0,t):f}async removeDocument(e){this.#d||await this.init(),this.#t.addDeletedId(e),this.#t.removeAddedId(e),await this.#t.save()}async clearAll(){await this.#e.clearAll(),this.#h.clear(),this.#t.reset(),this.#d=!1,this.#g=!1,this.#f={word:0,char:0}}async getStatus(){return this.#d||await this.init(),{wordSegments:this.#t.getSegments("word").length,charSegments:this.#t.getSegments("char").length,deleted:this.#t.getDeletedIds().size,wordCacheSize:await this.#r.getCurrentSize(d),charCacheSize:await this.#r.getCurrentSize(c),inBatch:this.#g}}async hasDocument(e){return this.#d||await this.init(),this.#t.hasDocument(e)}},exports.hash=r,exports.murmur3_32=r;
package/lib/core.d.ts ADDED
@@ -0,0 +1,63 @@
1
+ import { ISearchEngineConfig, IDocument, IDocumentBase, IResult } from './type';
2
+
3
+ /**
4
+ * 核心搜索引擎类 (多实例支持)
5
+ */
6
+ declare class SearchEngine {
7
+ #private;
8
+ constructor(config: ISearchEngineConfig);
9
+ init(): Promise<void>;
10
+ /**
11
+ * 开启批处理
12
+ * 批处理期间 addDocuments 只写入缓存,不触发索引段构建
13
+ */
14
+ startBatch(): void;
15
+ /**
16
+ * 结束批处理
17
+ * 触发索引构建检查并保存元数据
18
+ */
19
+ endBatch(): Promise<void>;
20
+ addDocument<T extends IDocument = IDocument>(doc: T): Promise<void>;
21
+ /**
22
+ * 添加单个文档,如果文档ID已存在则跳过
23
+ * 用于在批量添加中途出错后的恢复添加行为,也可直接用于单个文档添加
24
+ */
25
+ addDocumentIfMissing<T extends IDocument = IDocument>(doc: T): Promise<void>;
26
+ /**
27
+ * 添加多个文档,跳过已存在的文档ID
28
+ * 用于在批量添加中途出错后的恢复添加行为,也可直接用于批量添加
29
+ */
30
+ addDocumentsIfMissing<T extends IDocument = IDocument>(docs: T[]): Promise<void>;
31
+ addDocuments<T extends IDocument = IDocument>(docs: T[]): Promise<void>;
32
+ search<T extends IDocumentBase = IDocumentBase>(query: T | string, limit?: number): Promise<IResult[]>;
33
+ removeDocument(id: number): Promise<void>;
34
+ clearAll(): Promise<void>;
35
+ getStatus(): Promise<{
36
+ wordSegments: number;
37
+ charSegments: number;
38
+ deleted: number;
39
+ wordCacheSize: number;
40
+ charCacheSize: number;
41
+ inBatch: boolean;
42
+ }>;
43
+ /**
44
+ * 检查文档ID是否曾经添加过(包括已删除的)
45
+ * @param id 文档ID
46
+ * @returns 文档是否曾经添加过的布尔值
47
+ */
48
+ hasDocument(id: number): Promise<boolean>;
49
+ }
50
+
51
+ /**
52
+ * MurmurHash3 32位实现
53
+ * 高效的非加密哈希函数,适用于哈希表等数据结构
54
+ */
55
+ /**
56
+ * 计算字符串的32位MurmurHash3哈希值
57
+ * @param str 要哈希的字符串
58
+ * @param h
59
+ * @returns 32位无符号哈希值
60
+ */
61
+ declare function murmur3_32(str: string, h?: number): number;
62
+
63
+ export { SearchEngine, murmur3_32 as hash, murmur3_32 };
package/lib/core.js ADDED
@@ -0,0 +1 @@
1
+ import{BrowserStorage as e}from'./browser';import{NodeStorage as t}from'./node';const s="search_meta.json",n="deleted_ids.bin",i="added_ids.bin";class o{#e;#t={wordSegments:[],charSegments:[]};#s=new Set;#n=new Set;constructor(e){this.#e=e}async load(){const e=await this.#e.read(s);if(e){const t=(new TextDecoder).decode(e);this.#t=JSON.parse(t)}else this.#t={wordSegments:[],charSegments:[]};const t=await this.#e.read(n);if(t){const e=new DataView(t);let s=0;const n=t.byteLength;for(;s<n&&!(s+4>n);){const t=e.getUint32(s,!0);this.#s.add(t),s+=4,s<n&&30===e.getUint8(s)&&(s+=1)}}const o=await this.#e.read(i);if(o){const e=new DataView(o);let t=0;const s=o.byteLength;for(;t<s&&!(t+4>s);){const n=e.getUint32(t,!0);this.#n.add(n),t+=4,t<s&&30===e.getUint8(t)&&(t+=1)}}}async save(){const e=JSON.stringify(this.#t);if(await this.#e.write(s,(new TextEncoder).encode(e).buffer),0===this.#s.size)await this.#e.remove(n);else{const e=4*this.#s.size+this.#s.size,t=new ArrayBuffer(e),s=new DataView(t);let i=0;for(const e of this.#s)s.setUint32(i,e,!0),i+=4,s.setUint8(i,30),i+=1;await this.#e.write(n,t)}if(0===this.#n.size)await this.#e.remove(i);else{const e=4*this.#n.size+this.#n.size,t=new ArrayBuffer(e),s=new DataView(t);let n=0;for(const e of this.#n)s.setUint32(n,e,!0),n+=4,s.setUint8(n,30),n+=1;await this.#e.write(i,t)}}getSegments(e){return"word"===e?this.#t.wordSegments:this.#t.charSegments}getDeletedIds(){return this.#s}addDeletedId(e){this.#s.add(e)}isDeleted(e){return this.#s.has(e)}addAddedId(e){this.#n.add(e)}removeAddedId(e){this.#n.delete(e)}isAdded(e){return this.#n.has(e)}getAddedIds(){return this.#n}hasDocument(e){return this.#n.has(e)||this.#s.has(e)}getLastSegmentInfo(e){const t=this.getSegments(e);return 0===t.length?null:t[t.length-1]}updateSegment(e,t,s,n,i,o){const a="word"===e?this.#t.wordSegments:this.#t.charSegments;if(o)a.push({filename:t,start:s,end:n,tokenCount:i});else{const e=a[a.length-1];e&&e.filename===t&&(e.end=n,e.tokenCount=i)}}reset(){this.#t={wordSegments:[],charSegments:[]},this.#s.clear(),this.#n.clear()}}class a{static SEPARATOR=30;#e;constructor(e){this.#e=e}async appendBatch(e,t){if(0===t.length)return await this.#e.getFileSize(e);const s=new TextEncoder;let n=0;for(const e of t){n+=8;for(const t of e.tokens){n+=2+Math.min(s.encode(t).byteLength,65535)}n+=1}const i=new Uint8Array(n);let o=0;for(const e of t){const t=[];for(const n of e.tokens){const e=s.encode(n),i=e.byteLength>65535?e.slice(0,65535):e;t.push(i)}const n=new DataView(i.buffer,o);n.setUint32(0,e.id,!0),n.setUint32(4,t.length,!0),o+=8;for(const e of t)new DataView(i.buffer,o).setUint16(0,e.byteLength,!0),o+=2,i.set(e,o),o+=e.byteLength;i[o++]=a.SEPARATOR}return await this.#e.append(e,i.buffer),await this.#e.getFileSize(e)}async readRange(e,t,s){const n=await this.#e.readRange(e,t,s);if(!n||0===n.byteLength)return[];const i=new DataView(n),o=new Uint8Array(n),r=new TextDecoder,h=[];let d=0;const c=n.byteLength;for(;d<c&&!(d+8>c);){const e=i.getUint32(d,!0);d+=4;const t=i.getUint32(d,!0);d+=4;const s=[];for(let e=0;e<t&&!(d+2>c);e++){const e=i.getUint16(d,!0);if(d+=2,d+e>c)break;const t=new Uint8Array(n,d,e);s.push(r.decode(t)),d+=e}d<c&&o[d]===a.SEPARATOR&&(d+=1),h.push({id:e,tokens:s})}return h}async getCurrentSize(e){return await this.#e.getFileSize(e)}}function r(e,t=305419896){const s=e.length,n=s>>2;let i=0;for(;i<n;){let s=255&e.charCodeAt(i)|(255&e.charCodeAt(++i))<<8|(255&e.charCodeAt(++i))<<16|(255&e.charCodeAt(++i))<<24;++i,s=3432918353*(65535&s)+((3432918353*(s>>>16)&65535)<<16)&4294967295,s=s<<15|s>>>17,s=461845907*(65535&s)+((461845907*(s>>>16)&65535)<<16)&4294967295,t=27492+(65535&(t=5*(65535&(t=(t^=s)<<13|t>>>19))+((5*(t>>>16)&65535)<<16)&4294967295))+(((t>>>16)+58964&65535)<<16)}let o=0;const a=3&s;return a>0&&(a>=3&&(o^=(255&e.charCodeAt(i+2))<<16),a>=2&&(o^=(255&e.charCodeAt(i+1))<<8),a>=1&&(o^=255&e.charCodeAt(i)),o=3432918353*(65535&o)+((3432918353*(o>>>16)&65535)<<16)&4294967295,o=o<<15|o>>>17,o=461845907*(65535&o)+((461845907*(o>>>16)&65535)<<16)&4294967295,t^=o),t^=s,t=2246822507*(65535&(t^=t>>>16))+((2246822507*(t>>>16)&65535)<<16)&4294967295,t=3266489909*(65535&(t^=t>>>13))+((3266489909*(t>>>16)&65535)<<16)&4294967295,(t^=t>>>16)>>>0}class h{#i;#e;#o=null;#a=null;static hash(e){return r(e)}constructor(e,t){this.#i=e,this.#e=t}async loadIndex(){return!!this.#o||(this.#o=await this.#e.read(this.#i),!!this.#o&&(this.#a=new DataView(this.#o),!0))}async buildAndSave(e){const t=new Map;for(const s of e){const e=new Map;for(const n of s.tokens)e.has(n)||(e.set(n,!0),t.has(n)||t.set(n,{hash:h.hash(n),postings:[]}),t.get(n).postings.push(s.id))}const s=Array.from(t.entries());s.sort(([e,{hash:t}],[s,{hash:n}])=>t!==n?t-n:e.localeCompare(s));let n=0,i=0;for(const[e,{postings:t}]of s)n+=t.length,i+=e.length+1;const o=20*s.length,a=12+o+4*n,r=new ArrayBuffer(a+i),d=new DataView(r);d.setUint32(0,1229866072),d.setUint32(4,s.length),d.setUint32(8,a);let c=12,g=12+o,f=a;for(const[e,{hash:t,postings:n}]of s){d.setUint32(c,t),d.setUint32(c+4,e.length),d.setUint32(c+8,f),d.setUint32(c+12,g),d.setUint32(c+16,n.length),c+=20;for(let e=0;e<n.length;e++)d.setUint32(g,n[e],!0),g+=4;const s=(new TextEncoder).encode(e);for(let e=0;e<s.length;e++)d.setUint8(f++,s[e]);d.setUint8(f++,0)}await this.#e.write(this.#i,r),this.#o=r,this.#a=d}search(e){if(!this.#a||!this.#o)return[];const t=h.hash(e),s=this.#a.getUint32(4);let n=0,i=s-1;const o=12,a=20,r=new TextDecoder;for(;n<=i;){const h=n+i>>>1,d=o+h*a,c=this.#a.getUint32(d);if(c<t)n=h+1;else{if(!(c>t)){if(!(h>0&&this.#a.getUint32(o+(h-1)*a)===t||h<s-1&&this.#a.getUint32(o+(h+1)*a)===t)){const e=this.#a.getUint32(o+h*a+12),t=this.#a.getUint32(o+h*a+16),s=[];for(let n=0;n<t;n++)s.push(this.#a.getUint32(e+4*n,!0));return s}let n=h;for(;n>0;){const e=o+(n-1)*a;if(this.#a.getUint32(e)!==t)break;n--}for(let i=n;i<s;i++){const s=o+i*a;if(this.#a.getUint32(s)!==t)break;const n=this.#a.getUint32(s+4),h=this.#a.getUint32(s+8),d=new Uint8Array(this.#o,h,n);if(r.decode(d)===e){const e=this.#a.getUint32(s+12),t=this.#a.getUint32(s+16),n=[];for(let s=0;s<t;s++)n.push(this.#a.getUint32(e+4*s,!0));return n}}return[]}i=h-1}}return[]}}const d="word_cache.bin",c="char_cache.bin";class g{#e;#t;#r;#h;#d=!1;#c;#g=!1;#f={word:0,char:0};constructor(s){if(!s.baseDir)throw new Error("SearchEngine requires 'baseDir' in config.");if(this.#c={wordSegmentTokenThreshold:1e5,charSegmentTokenThreshold:5e5,minWordTokenSave:0,minCharTokenSave:0,...s},(this.#c.minWordTokenSave||0)>=(this.#c.wordSegmentTokenThreshold||1e5))throw new Error("minWordTokenSave must be less than wordSegmentTokenThreshold");if((this.#c.minCharTokenSave||0)>=(this.#c.charSegmentTokenThreshold||5e5))throw new Error("minCharTokenSave must be less than charSegmentTokenThreshold");let n=null;if(this.#c.storage&&("object"==typeof this.#c.storage?n=this.#c.storage:"browser"===this.#c.storage?n=new e(this.#c.baseDir):"node"===this.#c.storage&&(n=new t(this.#c.baseDir))),!n){const s=typeof navigator<"u"&&navigator?.storage?.getDirectory instanceof Function,i=typeof process<"u"&&null!=process.versions&&null!=process.versions.node;s?n=new e(this.#c.baseDir):i&&(n=new t(this.#c.baseDir))}if(!n)throw new Error('Storage initialization failed. Please configure "storage" explicitly or ensure you are in a supported environment (Browser/Node).');this.#e=n,this.#t=new o(this.#e),this.#r=new a(this.#e),this.#h=new Map}async init(){if(this.#d)return;await this.#t.load();const e=[...this.#t.getSegments("word"),...this.#t.getSegments("char")];for(const t of e)this.#h.has(t.filename)||this.#h.set(t.filename,new h(t.filename,this.#e)),await this.#h.get(t.filename).loadIndex();this.#d=!0}startBatch(){this.#g=!0,this.#f={word:0,char:0}}async endBatch(){this.#g=!1,this.#f.word>0&&await this.#l("word",this.#f.word),this.#f.char>0&&await this.#l("char",this.#f.char),this.#f={word:0,char:0},await this.#t.save()}#w(e){try{if(typeof Intl<"u"&&"function"==typeof Intl.Segmenter&&"function"==typeof Array.from){const t=new Intl.Segmenter([],{granularity:"word"}).segment(e);if("object"==typeof t&&null!==t)return Array.from(t).filter(e=>e?.isWordLike).map(e=>e?.segment?.toLowerCase()||"")}}catch{}return e.toLowerCase().split(/[^a-z0-9\u4e00-\u9fa5]+/g).filter(e=>e.length>0)}#m(e){return this.#c.indexingTokenizer?this.#c.indexingTokenizer(e):this.#w(e.text)}#u(e){return this.#c.searchTokenizer?this.#c.searchTokenizer(e):this.#m(e)}async addDocument(e){return this.addDocuments([e])}async addDocumentIfMissing(e){return this.addDocumentsIfMissing([e])}async addDocumentsIfMissing(e){if(this.#d||await this.init(),0===e.length)return;const t=this.#t.getDeletedIds(),s=[],n=[],i=[];for(const o of e){if(t.has(o.id)||this.#t.isAdded(o.id))continue;const e=this.#m(o),a=[],r=[];for(const t of e)t.length>1?a.push(t):1===t.length&&r.push(t);a.length>0&&s.push({id:o.id,tokens:a}),r.length>0&&n.push({id:o.id,tokens:r}),i.push(o)}if(0===i.length)return;let o=0,a=0;if(s.length>0){await this.#r.appendBatch(d,s);for(const e of s)o+=e.tokens.length}if(n.length>0){await this.#r.appendBatch(c,n);for(const e of n)a+=e.tokens.length}for(const e of i)this.#t.addAddedId(e.id);this.#g?(this.#f.word+=o,this.#f.char+=a):(o>0&&await this.#l("word",o),a>0&&await this.#l("char",a),await this.#t.save())}async addDocuments(e){if(this.#d||await this.init(),0===e.length)return;const t=this.#t.getDeletedIds(),s=[],n=[];for(const i of e){if(t.has(i.id))throw new Error(`Document ID ${i.id} has been deleted and cannot be re-added.`);if(this.#t.isAdded(i.id))throw new Error(`Document ID ${i.id} already exists.`);const e=this.#m(i),o=[],a=[];for(const t of e)t.length>1?o.push(t):1===t.length&&a.push(t);o.length>0&&s.push({id:i.id,tokens:o}),a.length>0&&n.push({id:i.id,tokens:a})}let i=0,o=0;if(s.length>0){await this.#r.appendBatch(d,s);for(const e of s)i+=e.tokens.length}if(n.length>0){await this.#r.appendBatch(c,n);for(const e of n)o+=e.tokens.length}for(const t of e)this.#t.addAddedId(t.id);this.#g?(this.#f.word+=i,this.#f.char+=o):(i>0&&await this.#l("word",i),o>0&&await this.#l("char",o),await this.#t.save())}async#l(e,t){const s="word"===e?d:c,n=await this.#r.getCurrentSize(s),i="word"===e?this.#c.wordSegmentTokenThreshold||1e5:this.#c.charSegmentTokenThreshold||5e5,o="word"===e?this.#c.minWordTokenSave||0:this.#c.minCharTokenSave||0,a=this.#t.getLastSegmentInfo(e);let r,g,f,l;const w=()=>{const t=this.#t.getSegments(e).length+1;return`${e}_seg_${t}.bin`};if(a){const e=a.tokenCount;e>=i||e+t>=i?(r=w(),f=!0,g=a.end,l=t):(r=a.filename,f=!1,g=a.start,l=e+t)}else r=w(),f=!0,g=0,l=t;if(l<o)return void this.#t.updateSegment(e,r,g,n,l,f);const m=await this.#r.readRange(s,g,n);let u=this.#h.get(r);u||(u=new h(r,this.#e),this.#h.set(r,u)),await u.buildAndSave(m),this.#t.updateSegment(e,r,g,n,l,f)}async search(e,t){this.#d||await this.init();const s="string"==typeof e?{text:e}:e,n=this.#u(s),i=n.filter(e=>e.length>1),o=n.filter(e=>1===e.length),a=this.#t.getDeletedIds(),r=new Map,d=new Map,c=e=>{const t=this.#t.getSegments(e);for(const e of t){const t=e.filename;!this.#h.has(t)&&!d.has(t)&&d.set(t,new h(t,this.#e))}};c("word"),c("char"),await Promise.all(Array.from(d.entries()).map(([e,t])=>t.loadIndex().then(s=>{s&&this.#h.set(e,t)})));const g=async(e,t)=>{if(0===t.length)return;const s=this.#t.getSegments(e);for(const e of s){const s=e.filename,n=this.#h.get(s);if(n)for(const e of t){const t=n.search(e),s=1+.1*e.length;for(const n of t)if(!a.has(n))if(r.has(n)){const t=r.get(n);t.score+=s,t.tokens.add(e)}else r.set(n,{score:0,tokens:new Set([e])})}}};await g("word",i),await g("char",o);const f=[];return r.forEach((e,t)=>{f.push({id:t,score:e.score,tokens:Array.from(e.tokens)})}),f.sort((e,t)=>t.score-e.score),"number"==typeof t&&t>0?f.slice(0,t):f}async removeDocument(e){this.#d||await this.init(),this.#t.addDeletedId(e),this.#t.removeAddedId(e),await this.#t.save()}async clearAll(){await this.#e.clearAll(),this.#h.clear(),this.#t.reset(),this.#d=!1,this.#g=!1,this.#f={word:0,char:0}}async getStatus(){return this.#d||await this.init(),{wordSegments:this.#t.getSegments("word").length,charSegments:this.#t.getSegments("char").length,deleted:this.#t.getDeletedIds().size,wordCacheSize:await this.#r.getCurrentSize(d),charCacheSize:await this.#r.getCurrentSize(c),inBatch:this.#g}}async hasDocument(e){return this.#d||await this.init(),this.#t.hasDocument(e)}}export{g as SearchEngine,r as hash,r as murmur3_32};
package/lib/index.cjs CHANGED
@@ -1 +1 @@
1
- "use strict";Object.create,Object.defineProperty,Object.getOwnPropertyDescriptor,Object.getOwnPropertyNames,Object.getPrototypeOf,Object.prototype.hasOwnProperty;class t{#t;constructor(t){this.#t=t}async#e(){return await(await navigator.storage.getDirectory()).getDirectoryHandle(this.#t,{create:!0})}async write(t,e){const s=await(await(await this.#e()).getFileHandle(t,{create:!0})).createWritable();await s.write(e),await s.close()}async append(t,e){const s=await this.#e();let i;try{i=await s.getFileHandle(t,{create:!0})}catch{i=await s.getFileHandle(t,{create:!0})}const n=await i.getFile(),a=await i.createWritable({keepExistingData:!0});await a.seek(n.size),await a.write(e),await a.close()}async read(t){const e=await this.#e();try{return await(await(await e.getFileHandle(t)).getFile()).arrayBuffer()}catch{return null}}async readRange(t,e,s){const i=await this.#e();try{return await(await(await i.getFileHandle(t)).getFile()).slice(e,s).arrayBuffer()}catch{return null}}async remove(t){const e=await this.#e();try{await e.removeEntry(t)}catch{}}async listFiles(){const t=await this.#e(),e=[];for await(const s of t.keys())e.push(s);return e}async clearAll(){const t=await this.#e();for await(const e of t.keys())await t.removeEntry(e,{recursive:!0})}async getFileSize(t){const e=await this.#e();try{return(await(await e.getFileHandle(t)).getFile()).size}catch{return 0}}}class e{#s=null;#i=null;#t;#n="";constructor(t){this.#t=t}async#a(){if(this.#s)return;const t=await import("node:fs"),e=await import("node:path");this.#s=t.promises,this.#i=e.default||e,this.#n=this.#i.join(process.cwd(),this.#t);try{await this.#s.access(this.#n)}catch{await this.#s.mkdir(this.#n,{recursive:!0})}}#r(t){return this.#i.join(this.#n,t)}async write(t,e){await this.#a(),await this.#s.writeFile(this.#r(t),Buffer.from(e))}async append(t,e){await this.#a(),await this.#s.appendFile(this.#r(t),Buffer.from(e))}async read(t){await this.#a();try{const e=await this.#s.readFile(this.#r(t));return e.buffer.slice(e.byteOffset,e.byteOffset+e.byteLength)}catch{return null}}async readRange(t,e,s){await this.#a();try{const i=await this.#s.open(this.#r(t),"r"),n=s-e,a=Buffer.alloc(n);return await i.read(a,0,n,e),await i.close(),a.buffer.slice(a.byteOffset,a.byteOffset+a.byteLength)}catch{return null}}async remove(t){await this.#a();try{await this.#s.unlink(this.#r(t))}catch{}}async listFiles(){await this.#a();try{return await this.#s.readdir(this.#n)}catch{return[]}}async clearAll(){await this.#a();try{const t=await this.#s.readdir(this.#n);for(const e of t)await this.#s.unlink(this.#i.join(this.#n,e))}catch{}}async getFileSize(t){await this.#a();try{return(await this.#s.stat(this.#r(t))).size}catch{return 0}}}const s="search_meta.json",i="deleted_ids.bin",n="added_ids.bin";class a{#o;#h={wordSegments:[],charSegments:[]};#c=new Set;#d=new Set;constructor(t){this.#o=t}async load(){const t=await this.#o.read(s);if(t){const e=(new TextDecoder).decode(t);this.#h=JSON.parse(e)}else this.#h={wordSegments:[],charSegments:[]};const e=await this.#o.read(i);if(e){const t=new DataView(e);let s=0;const i=e.byteLength;for(;s<i&&!(s+4>i);){const e=t.getUint32(s,!0);this.#c.add(e),s+=4,s<i&&30===t.getUint8(s)&&(s+=1)}}const a=await this.#o.read(n);if(a){const t=new DataView(a);let e=0;const s=a.byteLength;for(;e<s&&!(e+4>s);){const i=t.getUint32(e,!0);this.#d.add(i),e+=4,e<s&&30===t.getUint8(e)&&(e+=1)}}}async save(){const t=JSON.stringify(this.#h);if(await this.#o.write(s,(new TextEncoder).encode(t).buffer),0===this.#c.size)await this.#o.remove(i);else{const t=4*this.#c.size+this.#c.size,e=new ArrayBuffer(t),s=new DataView(e);let n=0;for(const t of this.#c)s.setUint32(n,t,!0),n+=4,s.setUint8(n,30),n+=1;await this.#o.write(i,e)}if(0===this.#d.size)await this.#o.remove(n);else{const t=4*this.#d.size+this.#d.size,e=new ArrayBuffer(t),s=new DataView(e);let i=0;for(const t of this.#d)s.setUint32(i,t,!0),i+=4,s.setUint8(i,30),i+=1;await this.#o.write(n,e)}}getSegments(t){return"word"===t?this.#h.wordSegments:this.#h.charSegments}getDeletedIds(){return this.#c}addDeletedId(t){this.#c.add(t)}isDeleted(t){return this.#c.has(t)}addAddedId(t){this.#d.add(t)}removeAddedId(t){this.#d.delete(t)}isAdded(t){return this.#d.has(t)}getAddedIds(){return this.#d}hasDocument(t){return this.#d.has(t)||this.#c.has(t)}getLastSegmentInfo(t){const e=this.getSegments(t);return 0===e.length?null:e[e.length-1]}updateSegment(t,e,s,i,n,a){const r="word"===t?this.#h.wordSegments:this.#h.charSegments;if(a)r.push({filename:e,start:s,end:i,tokenCount:n});else{const t=r[r.length-1];t&&t.filename===e&&(t.end=i,t.tokenCount=n)}}reset(){this.#h={wordSegments:[],charSegments:[]},this.#c.clear(),this.#d.clear()}}class r{static SEPARATOR=30;#o;constructor(t){this.#o=t}async appendBatch(t,e){if(0===e.length)return await this.#o.getFileSize(t);const s=new TextEncoder;let i=0;for(const t of e){i+=8;for(const e of t.tokens){i+=2+Math.min(s.encode(e).byteLength,65535)}i+=1}const n=new Uint8Array(i);let a=0;for(const t of e){const e=[];for(const i of t.tokens){const t=s.encode(i),n=t.byteLength>65535?t.slice(0,65535):t;e.push(n)}const i=new DataView(n.buffer,a);i.setUint32(0,t.id,!0),i.setUint32(4,e.length,!0),a+=8;for(const t of e)new DataView(n.buffer,a).setUint16(0,t.byteLength,!0),a+=2,n.set(t,a),a+=t.byteLength;n[a++]=r.SEPARATOR}return await this.#o.append(t,n.buffer),await this.#o.getFileSize(t)}async readRange(t,e,s){const i=await this.#o.readRange(t,e,s);if(!i||0===i.byteLength)return[];const n=new DataView(i),a=new Uint8Array(i),o=new TextDecoder,h=[];let c=0;const d=i.byteLength;for(;c<d&&!(c+8>d);){const t=n.getUint32(c,!0);c+=4;const e=n.getUint32(c,!0);c+=4;const s=[];for(let t=0;t<e&&!(c+2>d);t++){const t=n.getUint16(c,!0);if(c+=2,c+t>d)break;const e=new Uint8Array(i,c,t);s.push(o.decode(e)),c+=t}c<d&&a[c]===r.SEPARATOR&&(c+=1),h.push({id:t,tokens:s})}return h}async getCurrentSize(t){return await this.#o.getFileSize(t)}}class o{#g;#o;#l=null;#f=null;static hash(t){let e=5381;for(let s=0;s<t.length;s++)e=(e<<5)+e^t.charCodeAt(s);return e>>>0}constructor(t,e){this.#g=t,this.#o=e}async loadIndex(){return!!this.#l||(this.#l=await this.#o.read(this.#g),!!this.#l&&(this.#f=new DataView(this.#l),!0))}async buildAndSave(t){const e=new Map;for(const s of t){const t=new Map;for(const i of s.tokens)if(!t.has(i)){t.set(i,!0);const n=o.hash(i);e.has(n)||e.set(n,[]),e.get(n).push(s.id)}}const s=Array.from(e.keys()).sort((t,e)=>t-e);let i=0;const n=new Array(s.length);for(let t=0;t<s.length;t++){const a=s[t],r=e.get(a);n[t]=r,i+=r.length}const a=12*s.length,r=new ArrayBuffer(8+a+4*i),h=new DataView(r);h.setUint32(0,1229866072),h.setUint32(4,s.length);let c=8,d=8+a;for(let t=0;t<s.length;t++){const e=s[t],i=n[t];h.setUint32(c,e),h.setUint32(c+4,d),h.setUint32(c+8,i.length),c+=12;for(let t=0;t<i.length;t++)h.setUint32(d,i[t],!0),d+=4}await this.#o.write(this.#g,r),this.#l=r,this.#f=h}search(t){if(!this.#f||!this.#l)return[];const e=o.hash(t);let s=0,i=this.#f.getUint32(4)-1;for(;s<=i;){const t=s+i>>>1,n=8+12*t,a=this.#f.getUint32(n);if(a<e)s=t+1;else{if(!(a>e)){const t=this.#f.getUint32(n+4),e=this.#f.getUint32(n+8),s=[];for(let i=0;i<e;i++)s.push(this.#f.getUint32(t+4*i,!0));return s}i=t-1}}return[]}}const h="word_cache.bin",c="char_cache.bin";class d{#o;#h;#w;#u;#m=!1;#y;#p=!1;#D={word:0,char:0};constructor(s){if(!s.baseDir)throw new Error("SearchEngine requires 'baseDir' in config.");if(this.#y={wordSegmentTokenThreshold:1e5,charSegmentTokenThreshold:5e5,minWordTokenSave:0,minCharTokenSave:0,...s},(this.#y.minWordTokenSave||0)>=(this.#y.wordSegmentTokenThreshold||1e5))throw new Error("minWordTokenSave must be less than wordSegmentTokenThreshold");if((this.#y.minCharTokenSave||0)>=(this.#y.charSegmentTokenThreshold||5e5))throw new Error("minCharTokenSave must be less than charSegmentTokenThreshold");let i=null;if(this.#y.storage&&("object"==typeof this.#y.storage?i=this.#y.storage:"browser"===this.#y.storage?i=new t(this.#y.baseDir):"node"===this.#y.storage&&(i=new e(this.#y.baseDir))),!i){const s=typeof navigator<"u"&&navigator?.storage?.getDirectory instanceof Function,n=typeof process<"u"&&null!=process.versions&&null!=process.versions.node;s?i=new t(this.#y.baseDir):n&&(i=new e(this.#y.baseDir))}if(!i)throw new Error('Storage initialization failed. Please configure "storage" explicitly or ensure you are in a supported environment (Browser/Node).');this.#o=i,this.#h=new a(this.#o),this.#w=new r(this.#o),this.#u=new Map}async init(){if(this.#m)return;await this.#h.load();const t=[...this.#h.getSegments("word"),...this.#h.getSegments("char")];for(const e of t)this.#u.has(e.filename)||this.#u.set(e.filename,new o(e.filename,this.#o)),await this.#u.get(e.filename).loadIndex();this.#m=!0}startBatch(){this.#p=!0,this.#D={word:0,char:0}}async endBatch(){this.#p=!1,this.#D.word>0&&await this.#S("word",this.#D.word),this.#D.char>0&&await this.#S("char",this.#D.char),this.#D={word:0,char:0},await this.#h.save()}#k(t){try{if(typeof Intl<"u"&&"function"==typeof Intl.Segmenter&&"function"==typeof Array.from){const e=new Intl.Segmenter([],{granularity:"word"}).segment(t);if("object"==typeof e&&null!==e)return Array.from(e).filter(t=>t?.isWordLike).map(t=>t?.segment?.toLowerCase()||"")}}catch{}return t.toLowerCase().split(/[^a-z0-9\u4e00-\u9fa5]+/g).filter(t=>t.length>0)}#I(t){return this.#y.indexingTokenizer?this.#y.indexingTokenizer(t):this.#k(t.text)}#b(t){return this.#y.searchTokenizer?this.#y.searchTokenizer(t):this.#I(t)}async addDocument(t){return this.addDocuments([t])}async addDocumentIfMissing(t){return this.addDocumentsIfMissing([t])}async addDocumentsIfMissing(t){if(this.#m||await this.init(),0===t.length)return;const e=this.#h.getDeletedIds(),s=[],i=[],n=[];for(const a of t){if(e.has(a.id)||this.#h.isAdded(a.id))continue;const t=this.#I(a),r=[],o=[];for(const e of t)e.length>1?r.push(e):1===e.length&&o.push(e);r.length>0&&s.push({id:a.id,tokens:r}),o.length>0&&i.push({id:a.id,tokens:o}),n.push(a)}if(0===n.length)return;let a=0,r=0;if(s.length>0){await this.#w.appendBatch(h,s);for(const t of s)a+=t.tokens.length}if(i.length>0){await this.#w.appendBatch(c,i);for(const t of i)r+=t.tokens.length}for(const t of n)this.#h.addAddedId(t.id);this.#p?(this.#D.word+=a,this.#D.char+=r):(a>0&&await this.#S("word",a),r>0&&await this.#S("char",r),await this.#h.save())}async addDocuments(t){if(this.#m||await this.init(),0===t.length)return;const e=this.#h.getDeletedIds(),s=[],i=[];for(const n of t){if(e.has(n.id))throw new Error(`Document ID ${n.id} has been deleted and cannot be re-added.`);if(this.#h.isAdded(n.id))throw new Error(`Document ID ${n.id} already exists.`);const t=this.#I(n),a=[],r=[];for(const e of t)e.length>1?a.push(e):1===e.length&&r.push(e);a.length>0&&s.push({id:n.id,tokens:a}),r.length>0&&i.push({id:n.id,tokens:r})}let n=0,a=0;if(s.length>0){await this.#w.appendBatch(h,s);for(const t of s)n+=t.tokens.length}if(i.length>0){await this.#w.appendBatch(c,i);for(const t of i)a+=t.tokens.length}for(const e of t)this.#h.addAddedId(e.id);this.#p?(this.#D.word+=n,this.#D.char+=a):(n>0&&await this.#S("word",n),a>0&&await this.#S("char",a),await this.#h.save())}async#S(t,e){const s="word"===t?h:c,i=await this.#w.getCurrentSize(s),n="word"===t?this.#y.wordSegmentTokenThreshold||1e5:this.#y.charSegmentTokenThreshold||5e5,a="word"===t?this.#y.minWordTokenSave||0:this.#y.minCharTokenSave||0,r=this.#h.getLastSegmentInfo(t);let d,g,l,f;const w=()=>{const e=this.#h.getSegments(t).length+1;return`${t}_seg_${e}.bin`};if(r){const t=r.tokenCount;t>=n||t+e>=n?(d=w(),l=!0,g=r.end,f=e):(d=r.filename,l=!1,g=r.start,f=t+e)}else d=w(),l=!0,g=0,f=e;if(f<a)return void this.#h.updateSegment(t,d,g,i,f,l);const u=await this.#w.readRange(s,g,i);let m=this.#u.get(d);m||(m=new o(d,this.#o),this.#u.set(d,m)),await m.buildAndSave(u),this.#h.updateSegment(t,d,g,i,f,l)}async search(t,e){this.#m||await this.init();const s="string"==typeof t?{text:t}:t,i=this.#b(s),n=i.filter(t=>t.length>1),a=i.filter(t=>1===t.length),r=this.#h.getDeletedIds(),h=new Map,c=new Map,d=t=>{const e=this.#h.getSegments(t);for(const t of e){const e=t.filename;!this.#u.has(e)&&!c.has(e)&&c.set(e,new o(e,this.#o))}};d("word"),d("char"),await Promise.all(Array.from(c.entries()).map(([t,e])=>e.loadIndex().then(s=>{s&&this.#u.set(t,e)})));const g=async(t,e)=>{if(0===e.length)return;const s=this.#h.getSegments(t);for(const t of s){const s=t.filename,i=this.#u.get(s);if(i)for(const t of e){const e=i.search(t),s=1+.1*t.length;for(const i of e)if(!r.has(i))if(h.has(i)){const e=h.get(i);e.score+=s,e.tokens.add(t)}else h.set(i,{score:0,tokens:new Set([t])})}}};await g("word",n),await g("char",a);const l=[];return h.forEach((t,e)=>{l.push({id:e,score:t.score,tokens:Array.from(t.tokens)})}),l.sort((t,e)=>e.score-t.score),"number"==typeof e&&e>0?l.slice(0,e):l}async removeDocument(t){this.#m||await this.init(),this.#h.addDeletedId(t),this.#h.removeAddedId(t),await this.#h.save()}async clearAll(){await this.#o.clearAll(),this.#u.clear(),this.#h.reset(),this.#m=!1,this.#p=!1,this.#D={word:0,char:0}}async getStatus(){return this.#m||await this.init(),{wordSegments:this.#h.getSegments("word").length,charSegments:this.#h.getSegments("char").length,deleted:this.#h.getDeletedIds().size,wordCacheSize:await this.#w.getCurrentSize(h),charCacheSize:await this.#w.getCurrentSize(c),inBatch:this.#p}}async hasDocument(t){return this.#m||await this.init(),this.#h.hasDocument(t)}}exports.BrowserStorage=t,exports.NodeStorage=e,exports.SearchEngine=d,exports.SimpleSearch=class{static#T=null;static#v={baseDir:"simple_search_data",wordSegmentTokenThreshold:1e5,minWordTokenSave:0};static configure(t){const e={...this.#v,...t};this.#T=new d(e)}static#z(){return this.#T||(this.#T=new d(this.#v)),this.#T}static async startBatch(){this.#z().startBatch()}static async endBatch(){return this.#z().endBatch()}static async addDocument(t){return this.#z().addDocument(t)}static async addDocumentIfMissing(t){return this.#z().addDocumentIfMissing(t)}static async addDocuments(t){return this.#z().addDocuments(t)}static async addDocumentsIfMissing(t){return this.#z().addDocumentsIfMissing(t)}static async search(t,e){return this.#z().search(t,e)}static async removeDocument(t){return this.#z().removeDocument(t)}static async clearAll(){return this.#z().clearAll()}static async getStatus(){return this.#z().getStatus()}static async hasDocument(t){return this.#z().hasDocument(t)}};
1
+ "use strict";var e=require('./type'),t=require('./core'),r=require('./simple'),o=require('./browser'),n=require('./node');Object.keys(e).forEach(function(t){"default"!==t&&!Object.prototype.hasOwnProperty.call(exports,t)&&Object.defineProperty(exports,t,{enumerable:!0,get:function(){return e[t]}})}),Object.keys(t).forEach(function(e){"default"!==e&&!Object.prototype.hasOwnProperty.call(exports,e)&&Object.defineProperty(exports,e,{enumerable:!0,get:function(){return t[e]}})}),Object.keys(r).forEach(function(e){"default"!==e&&!Object.prototype.hasOwnProperty.call(exports,e)&&Object.defineProperty(exports,e,{enumerable:!0,get:function(){return r[e]}})}),Object.keys(o).forEach(function(e){"default"!==e&&!Object.prototype.hasOwnProperty.call(exports,e)&&Object.defineProperty(exports,e,{enumerable:!0,get:function(){return o[e]}})}),Object.keys(n).forEach(function(e){"default"!==e&&!Object.prototype.hasOwnProperty.call(exports,e)&&Object.defineProperty(exports,e,{enumerable:!0,get:function(){return n[e]}})});
package/lib/index.d.ts CHANGED
@@ -1,225 +1,5 @@
1
- /**
2
- * 核心类型定义
3
- */
4
- interface IDocumentBase {
5
- text: string;
6
- }
7
- interface IDocument extends IDocumentBase {
8
- id: number;
9
- }
10
- interface IResult {
11
- id: number;
12
- score: number;
13
- tokens: string[];
14
- }
15
- interface ISegmentMeta {
16
- filename: string;
17
- start: number;
18
- end: number;
19
- tokenCount: number;
20
- }
21
- interface IIndexMeta {
22
- wordSegments: ISegmentMeta[];
23
- charSegments: ISegmentMeta[];
24
- }
25
- interface ITokenizedDoc {
26
- id: number;
27
- tokens: string[];
28
- }
29
- type IndexType = 'word' | 'char';
30
- /**
31
- * 存储层接口 (外部化)
32
- */
33
- interface IStorage {
34
- write(filename: string, data: ArrayBuffer): Promise<void>;
35
- append(filename: string, data: ArrayBuffer): Promise<void>;
36
- read(filename: string): Promise<ArrayBuffer | null>;
37
- readRange(filename: string, start: number, end: number): Promise<ArrayBuffer | null>;
38
- remove(filename: string): Promise<void>;
39
- listFiles(): Promise<string[]>;
40
- clearAll(): Promise<void>;
41
- getFileSize(filename: string): Promise<number>;
42
- }
43
- /**
44
- * 核心搜索引擎配置
45
- */
46
- interface ISearchEngineConfig {
47
- /** * 数据存储的基础目录 (必填)
48
- * 用于区分不同的搜索引擎实例
49
- */
50
- baseDir: string;
51
- /**
52
- * 存储实现配置 (可选)
53
- * - 'browser': 强制使用 OPFS (BrowserStorage)
54
- * - 'node': 强制使用 Node.js fs (NodeStorage)
55
- * - IStorage: 传入自定义的存储实例
56
- * - undefined: 自动检测环境
57
- */
58
- storage?: 'browser' | 'node' | IStorage;
59
- /**
60
- * 索引时使用的分词器 (算法核心配置)
61
- * - 作用: 将文档文本转换为索引用的token序列
62
- * - 算法: 自定义实现的分词逻辑,需满足返回字符串数组的格式要求
63
- * - 建议: 针对不同语言(中文/英文/日文等)使用专门的分词实现
64
- * - 影响: 直接决定索引的粒度和搜索的准确性
65
- */
66
- indexingTokenizer?: <T extends IDocument = IDocument>(doc: T) => string[];
67
- /**
68
- * 搜索时使用的分词器 (算法核心配置)
69
- * - 作用: 将查询文本转换为搜索用的token序列
70
- * - 算法: 自定义实现的分词逻辑,需满足返回字符串数组的格式要求
71
- * - 建议: 与indexingTokenizer保持一致的分词策略以确保搜索准确性
72
- * - 影响: 直接决定搜索匹配的范围和结果的相关性
73
- */
74
- searchTokenizer?: <T extends IDocumentBase = IDocumentBase>(doc: T) => string[];
75
- /**
76
- * 词索引分段阈值 (Token数) - 分段算法配置
77
- * - 作用: 控制词索引文件的大小,超过阈值时创建新的索引段
78
- * - 算法: 基于Token数量的分段策略,当新增Token数加上已有Token数超过阈值时触发分段
79
- * - 默认值: 100000
80
- * - 影响: 过小会导致索引文件过多,过大可能影响搜索性能
81
- */
82
- wordSegmentTokenThreshold?: number;
83
- /**
84
- * 字索引分段阈值 (Token数) - 分段算法配置
85
- * - 作用: 控制字索引文件的大小,超过阈值时创建新的索引段
86
- * - 算法: 基于Token数量的分段策略,当新增Token数加上已有Token数超过阈值时触发分段
87
- * - 默认值: 500000
88
- * - 影响: 过小会导致索引文件过多,过大可能影响搜索性能
89
- */
90
- charSegmentTokenThreshold?: number;
91
- /**
92
- * 词索引最小保存阈值 (Token数) - 缓存算法配置
93
- * - 作用: 控制词索引是否立即写入磁盘,低于阈值时只保存在内存缓存中
94
- * - 算法: 基于Token数量的缓存策略,当累计Token数达到阈值时才进行持久化
95
- * - 默认值: 0
96
- * - 影响: 适当设置可减少磁盘IO次数,提高索引性能
97
- */
98
- minWordTokenSave?: number;
99
- /**
100
- * 字索引最小保存阈值 (Token数) - 缓存算法配置
101
- * - 作用: 控制字索引是否立即写入磁盘,低于阈值时只保存在内存缓存中
102
- * - 算法: 基于Token数量的缓存策略,当累计Token数达到阈值时才进行持久化
103
- * - 默认值: 0
104
- * - 影响: 适当设置可减少磁盘IO次数,提高索引性能
105
- */
106
- minCharTokenSave?: number;
107
- }
108
-
109
- /**
110
- * 核心搜索引擎类 (多实例支持)
111
- */
112
- declare class SearchEngine {
113
- #private;
114
- constructor(config: ISearchEngineConfig);
115
- init(): Promise<void>;
116
- /**
117
- * 开启批处理
118
- * 批处理期间 addDocuments 只写入缓存,不触发索引段构建
119
- */
120
- startBatch(): void;
121
- /**
122
- * 结束批处理
123
- * 触发索引构建检查并保存元数据
124
- */
125
- endBatch(): Promise<void>;
126
- addDocument<T extends IDocument = IDocument>(doc: T): Promise<void>;
127
- /**
128
- * 添加单个文档,如果文档ID已存在则跳过
129
- * 用于在批量添加中途出错后的恢复添加行为,也可直接用于单个文档添加
130
- */
131
- addDocumentIfMissing<T extends IDocument = IDocument>(doc: T): Promise<void>;
132
- /**
133
- * 添加多个文档,跳过已存在的文档ID
134
- * 用于在批量添加中途出错后的恢复添加行为,也可直接用于批量添加
135
- */
136
- addDocumentsIfMissing<T extends IDocument = IDocument>(docs: T[]): Promise<void>;
137
- addDocuments<T extends IDocument = IDocument>(docs: T[]): Promise<void>;
138
- search<T extends IDocumentBase = IDocumentBase>(query: T | string, limit?: number): Promise<IResult[]>;
139
- removeDocument(id: number): Promise<void>;
140
- clearAll(): Promise<void>;
141
- getStatus(): Promise<{
142
- wordSegments: number;
143
- charSegments: number;
144
- deleted: number;
145
- wordCacheSize: number;
146
- charCacheSize: number;
147
- inBatch: boolean;
148
- }>;
149
- /**
150
- * 检查文档ID是否曾经添加过(包括已删除的)
151
- * @param id 文档ID
152
- * @returns 文档是否曾经添加过的布尔值
153
- */
154
- hasDocument(id: number): Promise<boolean>;
155
- }
156
-
157
- /**
158
- * 快速使用封装
159
- * 提供单例模式和默认配置
160
- */
161
- declare class SimpleSearch {
162
- #private;
163
- /**
164
- * 配置并初始化单例
165
- */
166
- static configure(config: Partial<ISearchEngineConfig>): void;
167
- static startBatch(): Promise<void>;
168
- static endBatch(): Promise<void>;
169
- static addDocument<T extends IDocument = IDocument>(doc: T): Promise<void>;
170
- static addDocumentIfMissing<T extends IDocument = IDocument>(doc: T): Promise<void>;
171
- static addDocuments<T extends IDocument = IDocument>(docs: T[]): Promise<void>;
172
- static addDocumentsIfMissing<T extends IDocument = IDocument>(docs: T[]): Promise<void>;
173
- static search<T extends IDocumentBase = IDocumentBase>(query: T | string, limit?: number): Promise<IResult[]>;
174
- static removeDocument(id: number): Promise<void>;
175
- static clearAll(): Promise<void>;
176
- static getStatus(): Promise<{
177
- wordSegments: number;
178
- charSegments: number;
179
- deleted: number;
180
- wordCacheSize: number;
181
- charCacheSize: number;
182
- inBatch: boolean;
183
- }>;
184
- /**
185
- * 检查文档ID是否曾经添加过(包括已删除的)
186
- * @param id 文档ID
187
- * @returns 文档是否曾经添加过的布尔值
188
- */
189
- static hasDocument(id: number): Promise<boolean>;
190
- }
191
-
192
- /**
193
- * 浏览器实现 (OPFS - 子目录隔离)
194
- * 支持 Main Thread 和 Web Worker
195
- */
196
- declare class BrowserStorage implements IStorage {
197
- #private;
198
- constructor(baseDir: string);
199
- write(filename: string, data: ArrayBuffer): Promise<void>;
200
- append(filename: string, data: ArrayBuffer): Promise<void>;
201
- read(filename: string): Promise<ArrayBuffer | null>;
202
- readRange(filename: string, start: number, end: number): Promise<ArrayBuffer | null>;
203
- remove(filename: string): Promise<void>;
204
- listFiles(): Promise<string[]>;
205
- clearAll(): Promise<void>;
206
- getFileSize(filename: string): Promise<number>;
207
- }
208
- /**
209
- * Node.js 实现
210
- */
211
- declare class NodeStorage implements IStorage {
212
- #private;
213
- constructor(baseDir: string);
214
- write(filename: string, data: ArrayBuffer): Promise<void>;
215
- append(filename: string, data: ArrayBuffer): Promise<void>;
216
- read(filename: string): Promise<ArrayBuffer | null>;
217
- readRange(filename: string, start: number, end: number): Promise<ArrayBuffer | null>;
218
- remove(filename: string): Promise<void>;
219
- listFiles(): Promise<string[]>;
220
- clearAll(): Promise<void>;
221
- getFileSize(filename: string): Promise<number>;
222
- }
223
-
224
- export { BrowserStorage, NodeStorage, SearchEngine, SimpleSearch };
225
- export type { IDocument, IDocumentBase, IIndexMeta, IResult, ISearchEngineConfig, ISegmentMeta, IStorage, ITokenizedDoc, IndexType };
1
+ export * from './type';
2
+ export * from './core';
3
+ export * from './simple';
4
+ export * from './browser';
5
+ export * from './node';
package/lib/index.js CHANGED
@@ -1 +1 @@
1
- class t{#t;constructor(t){this.#t=t}async#e(){return await(await navigator.storage.getDirectory()).getDirectoryHandle(this.#t,{create:!0})}async write(t,e){const s=await(await(await this.#e()).getFileHandle(t,{create:!0})).createWritable();await s.write(e),await s.close()}async append(t,e){const s=await this.#e();let i;try{i=await s.getFileHandle(t,{create:!0})}catch{i=await s.getFileHandle(t,{create:!0})}const n=await i.getFile(),a=await i.createWritable({keepExistingData:!0});await a.seek(n.size),await a.write(e),await a.close()}async read(t){const e=await this.#e();try{return await(await(await e.getFileHandle(t)).getFile()).arrayBuffer()}catch{return null}}async readRange(t,e,s){const i=await this.#e();try{return await(await(await i.getFileHandle(t)).getFile()).slice(e,s).arrayBuffer()}catch{return null}}async remove(t){const e=await this.#e();try{await e.removeEntry(t)}catch{}}async listFiles(){const t=await this.#e(),e=[];for await(const s of t.keys())e.push(s);return e}async clearAll(){const t=await this.#e();for await(const e of t.keys())await t.removeEntry(e,{recursive:!0})}async getFileSize(t){const e=await this.#e();try{return(await(await e.getFileHandle(t)).getFile()).size}catch{return 0}}}class e{#s=null;#i=null;#t;#n="";constructor(t){this.#t=t}async#a(){if(this.#s)return;const t=await import("node:fs"),e=await import("node:path");this.#s=t.promises,this.#i=e.default||e,this.#n=this.#i.join(process.cwd(),this.#t);try{await this.#s.access(this.#n)}catch{await this.#s.mkdir(this.#n,{recursive:!0})}}#r(t){return this.#i.join(this.#n,t)}async write(t,e){await this.#a(),await this.#s.writeFile(this.#r(t),Buffer.from(e))}async append(t,e){await this.#a(),await this.#s.appendFile(this.#r(t),Buffer.from(e))}async read(t){await this.#a();try{const e=await this.#s.readFile(this.#r(t));return e.buffer.slice(e.byteOffset,e.byteOffset+e.byteLength)}catch{return null}}async readRange(t,e,s){await this.#a();try{const i=await this.#s.open(this.#r(t),"r"),n=s-e,a=Buffer.alloc(n);return await i.read(a,0,n,e),await i.close(),a.buffer.slice(a.byteOffset,a.byteOffset+a.byteLength)}catch{return null}}async remove(t){await this.#a();try{await this.#s.unlink(this.#r(t))}catch{}}async listFiles(){await this.#a();try{return await this.#s.readdir(this.#n)}catch{return[]}}async clearAll(){await this.#a();try{const t=await this.#s.readdir(this.#n);for(const e of t)await this.#s.unlink(this.#i.join(this.#n,e))}catch{}}async getFileSize(t){await this.#a();try{return(await this.#s.stat(this.#r(t))).size}catch{return 0}}}const s="search_meta.json",i="deleted_ids.bin",n="added_ids.bin";class a{#o;#h={wordSegments:[],charSegments:[]};#c=new Set;#d=new Set;constructor(t){this.#o=t}async load(){const t=await this.#o.read(s);if(t){const e=(new TextDecoder).decode(t);this.#h=JSON.parse(e)}else this.#h={wordSegments:[],charSegments:[]};const e=await this.#o.read(i);if(e){const t=new DataView(e);let s=0;const i=e.byteLength;for(;s<i&&!(s+4>i);){const e=t.getUint32(s,!0);this.#c.add(e),s+=4,s<i&&30===t.getUint8(s)&&(s+=1)}}const a=await this.#o.read(n);if(a){const t=new DataView(a);let e=0;const s=a.byteLength;for(;e<s&&!(e+4>s);){const i=t.getUint32(e,!0);this.#d.add(i),e+=4,e<s&&30===t.getUint8(e)&&(e+=1)}}}async save(){const t=JSON.stringify(this.#h);if(await this.#o.write(s,(new TextEncoder).encode(t).buffer),0===this.#c.size)await this.#o.remove(i);else{const t=4*this.#c.size+this.#c.size,e=new ArrayBuffer(t),s=new DataView(e);let n=0;for(const t of this.#c)s.setUint32(n,t,!0),n+=4,s.setUint8(n,30),n+=1;await this.#o.write(i,e)}if(0===this.#d.size)await this.#o.remove(n);else{const t=4*this.#d.size+this.#d.size,e=new ArrayBuffer(t),s=new DataView(e);let i=0;for(const t of this.#d)s.setUint32(i,t,!0),i+=4,s.setUint8(i,30),i+=1;await this.#o.write(n,e)}}getSegments(t){return"word"===t?this.#h.wordSegments:this.#h.charSegments}getDeletedIds(){return this.#c}addDeletedId(t){this.#c.add(t)}isDeleted(t){return this.#c.has(t)}addAddedId(t){this.#d.add(t)}removeAddedId(t){this.#d.delete(t)}isAdded(t){return this.#d.has(t)}getAddedIds(){return this.#d}hasDocument(t){return this.#d.has(t)||this.#c.has(t)}getLastSegmentInfo(t){const e=this.getSegments(t);return 0===e.length?null:e[e.length-1]}updateSegment(t,e,s,i,n,a){const r="word"===t?this.#h.wordSegments:this.#h.charSegments;if(a)r.push({filename:e,start:s,end:i,tokenCount:n});else{const t=r[r.length-1];t&&t.filename===e&&(t.end=i,t.tokenCount=n)}}reset(){this.#h={wordSegments:[],charSegments:[]},this.#c.clear(),this.#d.clear()}}class r{static SEPARATOR=30;#o;constructor(t){this.#o=t}async appendBatch(t,e){if(0===e.length)return await this.#o.getFileSize(t);const s=new TextEncoder;let i=0;for(const t of e){i+=8;for(const e of t.tokens){i+=2+Math.min(s.encode(e).byteLength,65535)}i+=1}const n=new Uint8Array(i);let a=0;for(const t of e){const e=[];for(const i of t.tokens){const t=s.encode(i),n=t.byteLength>65535?t.slice(0,65535):t;e.push(n)}const i=new DataView(n.buffer,a);i.setUint32(0,t.id,!0),i.setUint32(4,e.length,!0),a+=8;for(const t of e)new DataView(n.buffer,a).setUint16(0,t.byteLength,!0),a+=2,n.set(t,a),a+=t.byteLength;n[a++]=r.SEPARATOR}return await this.#o.append(t,n.buffer),await this.#o.getFileSize(t)}async readRange(t,e,s){const i=await this.#o.readRange(t,e,s);if(!i||0===i.byteLength)return[];const n=new DataView(i),a=new Uint8Array(i),o=new TextDecoder,h=[];let c=0;const d=i.byteLength;for(;c<d&&!(c+8>d);){const t=n.getUint32(c,!0);c+=4;const e=n.getUint32(c,!0);c+=4;const s=[];for(let t=0;t<e&&!(c+2>d);t++){const t=n.getUint16(c,!0);if(c+=2,c+t>d)break;const e=new Uint8Array(i,c,t);s.push(o.decode(e)),c+=t}c<d&&a[c]===r.SEPARATOR&&(c+=1),h.push({id:t,tokens:s})}return h}async getCurrentSize(t){return await this.#o.getFileSize(t)}}class o{#g;#o;#l=null;#f=null;static hash(t){let e=5381;for(let s=0;s<t.length;s++)e=(e<<5)+e^t.charCodeAt(s);return e>>>0}constructor(t,e){this.#g=t,this.#o=e}async loadIndex(){return!!this.#l||(this.#l=await this.#o.read(this.#g),!!this.#l&&(this.#f=new DataView(this.#l),!0))}async buildAndSave(t){const e=new Map;for(const s of t){const t=new Map;for(const i of s.tokens)if(!t.has(i)){t.set(i,!0);const n=o.hash(i);e.has(n)||e.set(n,[]),e.get(n).push(s.id)}}const s=Array.from(e.keys()).sort((t,e)=>t-e);let i=0;const n=new Array(s.length);for(let t=0;t<s.length;t++){const a=s[t],r=e.get(a);n[t]=r,i+=r.length}const a=12*s.length,r=new ArrayBuffer(8+a+4*i),h=new DataView(r);h.setUint32(0,1229866072),h.setUint32(4,s.length);let c=8,d=8+a;for(let t=0;t<s.length;t++){const e=s[t],i=n[t];h.setUint32(c,e),h.setUint32(c+4,d),h.setUint32(c+8,i.length),c+=12;for(let t=0;t<i.length;t++)h.setUint32(d,i[t],!0),d+=4}await this.#o.write(this.#g,r),this.#l=r,this.#f=h}search(t){if(!this.#f||!this.#l)return[];const e=o.hash(t);let s=0,i=this.#f.getUint32(4)-1;for(;s<=i;){const t=s+i>>>1,n=8+12*t,a=this.#f.getUint32(n);if(a<e)s=t+1;else{if(!(a>e)){const t=this.#f.getUint32(n+4),e=this.#f.getUint32(n+8),s=[];for(let i=0;i<e;i++)s.push(this.#f.getUint32(t+4*i,!0));return s}i=t-1}}return[]}}const h="word_cache.bin",c="char_cache.bin";class d{#o;#h;#w;#u;#m=!1;#y;#p=!1;#D={word:0,char:0};constructor(s){if(!s.baseDir)throw new Error("SearchEngine requires 'baseDir' in config.");if(this.#y={wordSegmentTokenThreshold:1e5,charSegmentTokenThreshold:5e5,minWordTokenSave:0,minCharTokenSave:0,...s},(this.#y.minWordTokenSave||0)>=(this.#y.wordSegmentTokenThreshold||1e5))throw new Error("minWordTokenSave must be less than wordSegmentTokenThreshold");if((this.#y.minCharTokenSave||0)>=(this.#y.charSegmentTokenThreshold||5e5))throw new Error("minCharTokenSave must be less than charSegmentTokenThreshold");let i=null;if(this.#y.storage&&("object"==typeof this.#y.storage?i=this.#y.storage:"browser"===this.#y.storage?i=new t(this.#y.baseDir):"node"===this.#y.storage&&(i=new e(this.#y.baseDir))),!i){const s=typeof navigator<"u"&&navigator?.storage?.getDirectory instanceof Function,n=typeof process<"u"&&null!=process.versions&&null!=process.versions.node;s?i=new t(this.#y.baseDir):n&&(i=new e(this.#y.baseDir))}if(!i)throw new Error('Storage initialization failed. Please configure "storage" explicitly or ensure you are in a supported environment (Browser/Node).');this.#o=i,this.#h=new a(this.#o),this.#w=new r(this.#o),this.#u=new Map}async init(){if(this.#m)return;await this.#h.load();const t=[...this.#h.getSegments("word"),...this.#h.getSegments("char")];for(const e of t)this.#u.has(e.filename)||this.#u.set(e.filename,new o(e.filename,this.#o)),await this.#u.get(e.filename).loadIndex();this.#m=!0}startBatch(){this.#p=!0,this.#D={word:0,char:0}}async endBatch(){this.#p=!1,this.#D.word>0&&await this.#S("word",this.#D.word),this.#D.char>0&&await this.#S("char",this.#D.char),this.#D={word:0,char:0},await this.#h.save()}#k(t){try{if(typeof Intl<"u"&&"function"==typeof Intl.Segmenter&&"function"==typeof Array.from){const e=new Intl.Segmenter([],{granularity:"word"}).segment(t);if("object"==typeof e&&null!==e)return Array.from(e).filter(t=>t?.isWordLike).map(t=>t?.segment?.toLowerCase()||"")}}catch{}return t.toLowerCase().split(/[^a-z0-9\u4e00-\u9fa5]+/g).filter(t=>t.length>0)}#I(t){return this.#y.indexingTokenizer?this.#y.indexingTokenizer(t):this.#k(t.text)}#b(t){return this.#y.searchTokenizer?this.#y.searchTokenizer(t):this.#I(t)}async addDocument(t){return this.addDocuments([t])}async addDocumentIfMissing(t){return this.addDocumentsIfMissing([t])}async addDocumentsIfMissing(t){if(this.#m||await this.init(),0===t.length)return;const e=this.#h.getDeletedIds(),s=[],i=[],n=[];for(const a of t){if(e.has(a.id)||this.#h.isAdded(a.id))continue;const t=this.#I(a),r=[],o=[];for(const e of t)e.length>1?r.push(e):1===e.length&&o.push(e);r.length>0&&s.push({id:a.id,tokens:r}),o.length>0&&i.push({id:a.id,tokens:o}),n.push(a)}if(0===n.length)return;let a=0,r=0;if(s.length>0){await this.#w.appendBatch(h,s);for(const t of s)a+=t.tokens.length}if(i.length>0){await this.#w.appendBatch(c,i);for(const t of i)r+=t.tokens.length}for(const t of n)this.#h.addAddedId(t.id);this.#p?(this.#D.word+=a,this.#D.char+=r):(a>0&&await this.#S("word",a),r>0&&await this.#S("char",r),await this.#h.save())}async addDocuments(t){if(this.#m||await this.init(),0===t.length)return;const e=this.#h.getDeletedIds(),s=[],i=[];for(const n of t){if(e.has(n.id))throw new Error(`Document ID ${n.id} has been deleted and cannot be re-added.`);if(this.#h.isAdded(n.id))throw new Error(`Document ID ${n.id} already exists.`);const t=this.#I(n),a=[],r=[];for(const e of t)e.length>1?a.push(e):1===e.length&&r.push(e);a.length>0&&s.push({id:n.id,tokens:a}),r.length>0&&i.push({id:n.id,tokens:r})}let n=0,a=0;if(s.length>0){await this.#w.appendBatch(h,s);for(const t of s)n+=t.tokens.length}if(i.length>0){await this.#w.appendBatch(c,i);for(const t of i)a+=t.tokens.length}for(const e of t)this.#h.addAddedId(e.id);this.#p?(this.#D.word+=n,this.#D.char+=a):(n>0&&await this.#S("word",n),a>0&&await this.#S("char",a),await this.#h.save())}async#S(t,e){const s="word"===t?h:c,i=await this.#w.getCurrentSize(s),n="word"===t?this.#y.wordSegmentTokenThreshold||1e5:this.#y.charSegmentTokenThreshold||5e5,a="word"===t?this.#y.minWordTokenSave||0:this.#y.minCharTokenSave||0,r=this.#h.getLastSegmentInfo(t);let d,g,l,f;const w=()=>{const e=this.#h.getSegments(t).length+1;return`${t}_seg_${e}.bin`};if(r){const t=r.tokenCount;t>=n||t+e>=n?(d=w(),l=!0,g=r.end,f=e):(d=r.filename,l=!1,g=r.start,f=t+e)}else d=w(),l=!0,g=0,f=e;if(f<a)return void this.#h.updateSegment(t,d,g,i,f,l);const u=await this.#w.readRange(s,g,i);let m=this.#u.get(d);m||(m=new o(d,this.#o),this.#u.set(d,m)),await m.buildAndSave(u),this.#h.updateSegment(t,d,g,i,f,l)}async search(t,e){this.#m||await this.init();const s="string"==typeof t?{text:t}:t,i=this.#b(s),n=i.filter(t=>t.length>1),a=i.filter(t=>1===t.length),r=this.#h.getDeletedIds(),h=new Map,c=new Map,d=t=>{const e=this.#h.getSegments(t);for(const t of e){const e=t.filename;!this.#u.has(e)&&!c.has(e)&&c.set(e,new o(e,this.#o))}};d("word"),d("char"),await Promise.all(Array.from(c.entries()).map(([t,e])=>e.loadIndex().then(s=>{s&&this.#u.set(t,e)})));const g=async(t,e)=>{if(0===e.length)return;const s=this.#h.getSegments(t);for(const t of s){const s=t.filename,i=this.#u.get(s);if(i)for(const t of e){const e=i.search(t),s=1+.1*t.length;for(const i of e)if(!r.has(i))if(h.has(i)){const e=h.get(i);e.score+=s,e.tokens.add(t)}else h.set(i,{score:0,tokens:new Set([t])})}}};await g("word",n),await g("char",a);const l=[];return h.forEach((t,e)=>{l.push({id:e,score:t.score,tokens:Array.from(t.tokens)})}),l.sort((t,e)=>e.score-t.score),"number"==typeof e&&e>0?l.slice(0,e):l}async removeDocument(t){this.#m||await this.init(),this.#h.addDeletedId(t),this.#h.removeAddedId(t),await this.#h.save()}async clearAll(){await this.#o.clearAll(),this.#u.clear(),this.#h.reset(),this.#m=!1,this.#p=!1,this.#D={word:0,char:0}}async getStatus(){return this.#m||await this.init(),{wordSegments:this.#h.getSegments("word").length,charSegments:this.#h.getSegments("char").length,deleted:this.#h.getDeletedIds().size,wordCacheSize:await this.#w.getCurrentSize(h),charCacheSize:await this.#w.getCurrentSize(c),inBatch:this.#p}}async hasDocument(t){return this.#m||await this.init(),this.#h.hasDocument(t)}}class g{static#T=null;static#v={baseDir:"simple_search_data",wordSegmentTokenThreshold:1e5,minWordTokenSave:0};static configure(t){const e={...this.#v,...t};this.#T=new d(e)}static#z(){return this.#T||(this.#T=new d(this.#v)),this.#T}static async startBatch(){this.#z().startBatch()}static async endBatch(){return this.#z().endBatch()}static async addDocument(t){return this.#z().addDocument(t)}static async addDocumentIfMissing(t){return this.#z().addDocumentIfMissing(t)}static async addDocuments(t){return this.#z().addDocuments(t)}static async addDocumentsIfMissing(t){return this.#z().addDocumentsIfMissing(t)}static async search(t,e){return this.#z().search(t,e)}static async removeDocument(t){return this.#z().removeDocument(t)}static async clearAll(){return this.#z().clearAll()}static async getStatus(){return this.#z().getStatus()}static async hasDocument(t){return this.#z().hasDocument(t)}}export{t as BrowserStorage,e as NodeStorage,d as SearchEngine,g as SimpleSearch};
1
+ export*from'./type';export*from'./core';export*from'./simple';export*from'./browser';export*from'./node';
package/lib/node.cjs ADDED
@@ -0,0 +1 @@
1
+ "use strict";Object.create,Object.defineProperty,Object.getOwnPropertyDescriptor,Object.getOwnPropertyNames,Object.getPrototypeOf,Object.prototype.hasOwnProperty;exports.NodeStorage=class{#t=null;#i=null;#a;#s="";constructor(t){this.#a=t}async#e(){if(this.#t)return;const t=await import("fs/promises"),i=await import("path");this.#t=t,this.#i=i,this.#s=this.#i.join(process.cwd(),this.#a);try{await this.#t.access(this.#s)}catch{await this.#t.mkdir(this.#s,{recursive:!0})}}#r(t){return this.#i.join(this.#s,t)}async write(t,i){await this.#e(),await this.#t.writeFile(this.#r(t),Buffer.from(i))}async append(t,i){await this.#e(),await this.#t.appendFile(this.#r(t),Buffer.from(i))}async read(t){await this.#e();try{const i=await this.#t.readFile(this.#r(t));return i.buffer.slice(i.byteOffset,i.byteOffset+i.byteLength)}catch{return null}}async readRange(t,i,a){await this.#e();try{const s=await this.#t.open(this.#r(t),"r"),e=a-i,r=Buffer.alloc(e);return await s.read(r,0,e,i),await s.close(),r.buffer.slice(r.byteOffset,r.byteOffset+r.byteLength)}catch{return null}}async remove(t){await this.#e();try{await this.#t.unlink(this.#r(t))}catch{}}async listFiles(){await this.#e();try{return await this.#t.readdir(this.#s)}catch{return[]}}async clearAll(){await this.#e();try{const t=await this.#t.readdir(this.#s);for(const i of t)await this.#t.unlink(this.#i.join(this.#s,i))}catch{}}async getFileSize(t){await this.#e();try{return(await this.#t.stat(this.#r(t))).size}catch{return 0}}};
package/lib/node.d.ts ADDED
@@ -0,0 +1,20 @@
1
+ import { IStorage } from './type';
2
+
3
+ /**
4
+ * Node.js 实现
5
+ */
6
+
7
+ declare class NodeStorage implements IStorage {
8
+ #private;
9
+ constructor(baseDir: string);
10
+ write(filename: string, data: ArrayBuffer): Promise<void>;
11
+ append(filename: string, data: ArrayBuffer): Promise<void>;
12
+ read(filename: string): Promise<ArrayBuffer | null>;
13
+ readRange(filename: string, start: number, end: number): Promise<ArrayBuffer | null>;
14
+ remove(filename: string): Promise<void>;
15
+ listFiles(): Promise<string[]>;
16
+ clearAll(): Promise<void>;
17
+ getFileSize(filename: string): Promise<number>;
18
+ }
19
+
20
+ export { NodeStorage };
package/lib/node.js ADDED
@@ -0,0 +1 @@
1
+ class t{#t=null;#i=null;#a;#s="";constructor(t){this.#a=t}async#r(){if(this.#t)return;const t=await import("fs/promises"),i=await import("path");this.#t=t,this.#i=i,this.#s=this.#i.join(process.cwd(),this.#a);try{await this.#t.access(this.#s)}catch{await this.#t.mkdir(this.#s,{recursive:!0})}}#e(t){return this.#i.join(this.#s,t)}async write(t,i){await this.#r(),await this.#t.writeFile(this.#e(t),Buffer.from(i))}async append(t,i){await this.#r(),await this.#t.appendFile(this.#e(t),Buffer.from(i))}async read(t){await this.#r();try{const i=await this.#t.readFile(this.#e(t));return i.buffer.slice(i.byteOffset,i.byteOffset+i.byteLength)}catch{return null}}async readRange(t,i,a){await this.#r();try{const s=await this.#t.open(this.#e(t),"r"),r=a-i,e=Buffer.alloc(r);return await s.read(e,0,r,i),await s.close(),e.buffer.slice(e.byteOffset,e.byteOffset+e.byteLength)}catch{return null}}async remove(t){await this.#r();try{await this.#t.unlink(this.#e(t))}catch{}}async listFiles(){await this.#r();try{return await this.#t.readdir(this.#s)}catch{return[]}}async clearAll(){await this.#r();try{const t=await this.#t.readdir(this.#s);for(const i of t)await this.#t.unlink(this.#i.join(this.#s,i))}catch{}}async getFileSize(t){await this.#r();try{return(await this.#t.stat(this.#e(t))).size}catch{return 0}}}export{t as NodeStorage};
package/lib/simple.cjs ADDED
@@ -0,0 +1 @@
1
+ "use strict";var t=require('./core');exports.SimpleSearch=class{static#t=null;static#e={baseDir:"simple_search_data",wordSegmentTokenThreshold:1e5,minWordTokenSave:0};static configure(e){const n={...this.#e,...e};this.#t=new t.SearchEngine(n)}static#n(){return this.#t||(this.#t=new t.SearchEngine(this.#e)),this.#t}static async startBatch(){this.#n().startBatch()}static async endBatch(){return this.#n().endBatch()}static async addDocument(t){return this.#n().addDocument(t)}static async addDocumentIfMissing(t){return this.#n().addDocumentIfMissing(t)}static async addDocuments(t){return this.#n().addDocuments(t)}static async addDocumentsIfMissing(t){return this.#n().addDocumentsIfMissing(t)}static async search(t,e){return this.#n().search(t,e)}static async removeDocument(t){return this.#n().removeDocument(t)}static async clearAll(){return this.#n().clearAll()}static async getStatus(){return this.#n().getStatus()}static async hasDocument(t){return this.#n().hasDocument(t)}};
@@ -0,0 +1,39 @@
1
+ import * as ___type from './type';
2
+ import { ISearchEngineConfig, IDocument, IDocumentBase } from './type';
3
+
4
+ /**
5
+ * 快速使用封装
6
+ * 提供单例模式和默认配置
7
+ */
8
+ declare class SimpleSearch {
9
+ #private;
10
+ /**
11
+ * 配置并初始化单例
12
+ */
13
+ static configure(config: Partial<ISearchEngineConfig>): void;
14
+ static startBatch(): Promise<void>;
15
+ static endBatch(): Promise<void>;
16
+ static addDocument<T extends IDocument = IDocument>(doc: T): Promise<void>;
17
+ static addDocumentIfMissing<T extends IDocument = IDocument>(doc: T): Promise<void>;
18
+ static addDocuments<T extends IDocument = IDocument>(docs: T[]): Promise<void>;
19
+ static addDocumentsIfMissing<T extends IDocument = IDocument>(docs: T[]): Promise<void>;
20
+ static search<T extends IDocumentBase = IDocumentBase>(query: T | string, limit?: number): Promise<___type.IResult[]>;
21
+ static removeDocument(id: number): Promise<void>;
22
+ static clearAll(): Promise<void>;
23
+ static getStatus(): Promise<{
24
+ wordSegments: number;
25
+ charSegments: number;
26
+ deleted: number;
27
+ wordCacheSize: number;
28
+ charCacheSize: number;
29
+ inBatch: boolean;
30
+ }>;
31
+ /**
32
+ * 检查文档ID是否曾经添加过(包括已删除的)
33
+ * @param id 文档ID
34
+ * @returns 文档是否曾经添加过的布尔值
35
+ */
36
+ static hasDocument(id: number): Promise<boolean>;
37
+ }
38
+
39
+ export { SimpleSearch };
package/lib/simple.js ADDED
@@ -0,0 +1 @@
1
+ import{SearchEngine as t}from'./core';class s{static#t=null;static#s={baseDir:"simple_search_data",wordSegmentTokenThreshold:1e5,minWordTokenSave:0};static configure(s){const n={...this.#s,...s};this.#t=new t(n)}static#n(){return this.#t||(this.#t=new t(this.#s)),this.#t}static async startBatch(){this.#n().startBatch()}static async endBatch(){return this.#n().endBatch()}static async addDocument(t){return this.#n().addDocument(t)}static async addDocumentIfMissing(t){return this.#n().addDocumentIfMissing(t)}static async addDocuments(t){return this.#n().addDocuments(t)}static async addDocumentsIfMissing(t){return this.#n().addDocumentsIfMissing(t)}static async search(t,s){return this.#n().search(t,s)}static async removeDocument(t){return this.#n().removeDocument(t)}static async clearAll(){return this.#n().clearAll()}static async getStatus(){return this.#n().getStatus()}static async hasDocument(t){return this.#n().hasDocument(t)}}export{s as SimpleSearch};
package/lib/type.cjs ADDED
@@ -0,0 +1 @@
1
+ "use strict";
package/lib/type.d.ts ADDED
@@ -0,0 +1,135 @@
1
+ /**
2
+ * 文档基础接口
3
+ */
4
+ interface IDocumentBase {
5
+ text: string;
6
+ }
7
+ /**
8
+ * 文档接口(包含ID)
9
+ */
10
+ interface IDocument extends IDocumentBase {
11
+ id: number;
12
+ }
13
+
14
+ /**
15
+ * 搜索结果接口
16
+ */
17
+ interface IResult {
18
+ id: number;
19
+ score: number;
20
+ tokens: string[];
21
+ }
22
+
23
+ /**
24
+ * 索引段元数据接口
25
+ */
26
+ interface ISegmentMeta {
27
+ filename: string;
28
+ start: number;
29
+ end: number;
30
+ tokenCount: number;
31
+ }
32
+
33
+ /**
34
+ * 索引元数据接口
35
+ */
36
+
37
+ interface IIndexMeta {
38
+ wordSegments: ISegmentMeta[];
39
+ charSegments: ISegmentMeta[];
40
+ }
41
+
42
+ /**
43
+ * 分词后的文档接口
44
+ */
45
+ interface ITokenizedDoc {
46
+ id: number;
47
+ tokens: string[];
48
+ }
49
+
50
+ /**
51
+ * 存储层接口 (外部化)
52
+ */
53
+ interface IStorage {
54
+ write(filename: string, data: ArrayBuffer): Promise<void>;
55
+ append(filename: string, data: ArrayBuffer): Promise<void>;
56
+ read(filename: string): Promise<ArrayBuffer | null>;
57
+ readRange(filename: string, start: number, end: number): Promise<ArrayBuffer | null>;
58
+ remove(filename: string): Promise<void>;
59
+ listFiles(): Promise<string[]>;
60
+ clearAll(): Promise<void>;
61
+ getFileSize(filename: string): Promise<number>;
62
+ }
63
+
64
+ /**
65
+ * 索引类型
66
+ */
67
+ type IndexType = 'word' | 'char';
68
+
69
+ interface ISearchEngineConfig {
70
+ /**
71
+ * 数据存储的基础目录 (必填)
72
+ * 用于区分不同的搜索引擎实例
73
+ */
74
+ baseDir: string;
75
+ /**
76
+ * 存储实现配置 (可选)
77
+ * - 'browser': 强制使用 OPFS (BrowserStorage)
78
+ * - 'node': 强制使用 Node.js fs (NodeStorage)
79
+ * - IStorage: 传入自定义的存储实例
80
+ * - undefined: 自动检测环境
81
+ */
82
+ storage?: 'browser' | 'node' | IStorage;
83
+ /**
84
+ * 索引时使用的分词器 (算法核心配置)
85
+ * - 作用: 将文档文本转换为索引用的token序列
86
+ * - 算法: 自定义实现的分词逻辑,需满足返回字符串数组的格式要求
87
+ * - 建议: 针对不同语言(中文/英文/日文等)使用专门的分词实现
88
+ * - 影响: 直接决定索引的粒度和搜索的准确性
89
+ */
90
+ indexingTokenizer?: <T extends IDocument = IDocument>(doc: T) => string[];
91
+ /**
92
+ * 搜索时使用的分词器 (算法核心配置)
93
+ * - 作用: 将查询文本转换为搜索用的token序列
94
+ * - 算法: 自定义实现的分词逻辑,需满足返回字符串数组的格式要求
95
+ * - 建议: 与indexingTokenizer保持一致的分词策略以确保搜索准确性
96
+ * - 影响: 直接决定搜索匹配的范围和结果的相关性
97
+ */
98
+ searchTokenizer?: <T extends IDocumentBase = IDocumentBase>(doc: T) => string[];
99
+ /**
100
+ * 词索引分段阈值 (Token数) - 分段算法配置
101
+ * - 作用: 控制词索引文件的大小,超过阈值时创建新的索引段
102
+ * - 算法: 基于Token数量的分段策略,当新增Token数加上已有Token数超过阈值时触发分段
103
+ * - 默认值: 100000
104
+ * - 影响: 过小会导致索引文件过多,过大可能影响搜索性能
105
+ */
106
+ wordSegmentTokenThreshold?: number;
107
+ /**
108
+ * 字索引分段阈值 (Token数) - 分段算法配置
109
+ * - 作用: 控制字索引文件的大小,超过阈值时创建新的索引段
110
+ * - 算法: 基于Token数量的分段策略,当新增Token数加上已有Token数超过阈值时触发分段
111
+ * - 默认值: 500000
112
+ * - 影响: 过小会导致索引文件过多,过大可能影响搜索性能
113
+ */
114
+ charSegmentTokenThreshold?: number;
115
+ /**
116
+ * 词索引最小保存阈值 (Token数) - 缓存算法配置
117
+ * - 作用: 控制词索引是否立即写入磁盘,低于阈值时只保存在内存缓存中
118
+ * - 算法: 基于Token数量的缓存策略,当累计Token数达到阈值时才进行持久化
119
+ * - 默认值: 0
120
+ * - 影响: 适当设置可减少磁盘IO次数,提高索引性能
121
+ */
122
+ minWordTokenSave?: number;
123
+ /**
124
+ * 字索引最小保存阈值 (Token数) - 缓存算法配置
125
+ * - 作用: 控制字索引是否立即写入磁盘,低于阈值时只保存在内存缓存中
126
+ * - 算法: 基于Token数量的缓存策略,当累计Token数达到阈值时才进行持久化
127
+ * - 默认值: 0
128
+ * - 影响: 适当设置可减少磁盘IO次数,提高索引性能
129
+ */
130
+ minCharTokenSave?: number;
131
+ }
132
+
133
+
134
+
135
+ export type { IDocument, IDocumentBase, IIndexMeta, IResult, ISearchEngineConfig, ISegmentMeta, IStorage, ITokenizedDoc, IndexType };
package/lib/type.js ADDED
File without changes
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "gs-search",
3
- "version": "0.1.2",
3
+ "version": "0.1.4",
4
4
  "type": "module",
5
5
  "main": "lib/index.cjs",
6
6
  "module": "lib/index.js",
@@ -9,6 +9,31 @@
9
9
  "import": "./lib/index.js",
10
10
  "require": "./lib/index.cjs",
11
11
  "types": "./lib/index.d.ts"
12
+ },
13
+ "./browser": {
14
+ "import": "./lib/browser.js",
15
+ "require": "./lib/browser.cjs",
16
+ "types": "./lib/browser.d.ts"
17
+ },
18
+ "./core": {
19
+ "import": "./lib/core.js",
20
+ "require": "./lib/core.cjs",
21
+ "types": "./lib/core.d.ts"
22
+ },
23
+ "./node": {
24
+ "import": "./lib/node.js",
25
+ "require": "./lib/node.cjs",
26
+ "types": "./lib/node.d.ts"
27
+ },
28
+ "./simple": {
29
+ "import": "./lib/simple.js",
30
+ "require": "./lib/simple.cjs",
31
+ "types": "./lib/simple.d.ts"
32
+ },
33
+ "./type": {
34
+ "import": "./lib/type.js",
35
+ "require": "./lib/type.cjs",
36
+ "types": "./lib/type.d.ts"
12
37
  }
13
38
  },
14
39
  "types": "lib/index.d.ts",