gs-search 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.ja.md CHANGED
@@ -72,18 +72,53 @@ const engine = new SearchEngine({
72
72
  // エンジンを初期化
73
73
  await engine.init();
74
74
 
75
- // トランザクション内でドキュメントを追加
76
- await engine.beginTransaction();
75
+ // バッチ操作でドキュメントを追加
76
+ await engine.startBatch();
77
77
  try {
78
78
  await engine.addDocuments([
79
79
  // ... ドキュメント
80
80
  ]);
81
- await engine.commit();
82
81
  } catch (error) {
83
- await engine.rollback();
82
+ // エラー処理
83
+ } finally {
84
+ // エラーが発生しても必ずバッチを終了し、インデックスが正しく再構築されるようにする
85
+ await engine.endBatch();
84
86
  }
85
87
  ```
86
88
 
89
+ ### カスタムトークナイザ
90
+
91
+ 特定の言語やトークン化の要件をサポートするために、カスタムトークナイザを設定できます。トークナイザは完全なドキュメントオブジェクトにアクセスできます:
92
+
93
+ ```typescript
94
+ import { SearchEngine } from 'gs-search';
95
+
96
+ // カスタムインデックストークナイザ:ドキュメントの複数フィールドを使用
97
+ const indexingTokenizer = (doc: { id: string; text: string; category: string; author: string }): string[] => {
98
+ // ドキュメントの全てのプロパティにアクセスできます
99
+ const fullText = `${doc.text} ${doc.category} ${doc.author}`;
100
+ return fullText.toLowerCase().split(/\s+/);
101
+ };
102
+
103
+ // カスタム検索トークナイザ:検索コンテキストをサポート
104
+ const searchTokenizer = (query: { text: string; language?: string; context?: string }): string[] => {
105
+ // クエリの言語やコンテキストに応じてトークン化を調整できます
106
+ const tokens = query.text.toLowerCase().split(/\s+/);
107
+ // コンテキストに応じて追加の検索語を追加
108
+ if (query.context === 'technical') {
109
+ tokens.push('technical');
110
+ }
111
+ return tokens;
112
+ };
113
+
114
+ // カスタムトークナイザを設定してエンジンを作成
115
+ const engine = new SearchEngine({
116
+ baseDir: 'search-data',
117
+ indexingTokenizer,
118
+ searchTokenizer
119
+ });
120
+ ```
121
+
87
122
  ## APIリファレンス
88
123
 
89
124
  ### SimpleSearch
@@ -91,22 +126,26 @@ try {
91
126
  - `constructor()`: 新しい検索エンジンインスタンスを作成
92
127
  - `addDocument(doc: IDocument): Promise<void>`: 単一のドキュメントを追加
93
128
  - `addDocuments(docs: IDocument[]): Promise<void>`: 複数のドキュメントを追加
94
- - `deleteDocument(id: number): Promise<void>`: ドキュメントを削除
129
+ - `addDocumentIfMissing(doc: IDocument): Promise<void>`: ドキュメントが存在しない場合は単一のドキュメントを追加
130
+ - `addDocumentsIfMissing(docs: IDocument[]): Promise<void>`: 複数のドキュメントを追加し、既存のドキュメントはスキップ
131
+ - `removeDocument(id: number): Promise<void>`: ドキュメントを削除
95
132
  - `search(query: string, limit?: number): Promise<IResult[]>`: ドキュメントを検索
96
133
  - `getStatus(): Promise<IStatus>`: 検索エンジンのステータスを取得
97
134
 
98
- ### CoreSearchEngine
135
+ ### SearchEngine
99
136
 
100
- - `constructor(options: ICoreSearchOptions)`: 新しいコアエンジンインスタンスを作成
137
+ - `constructor(options: ISearchEngineConfig)`: 新しいコアエンジンインスタンスを作成
101
138
  - `init(): Promise<void>`: エンジンを初期化
102
- - `addDocument(doc: IDocument): Promise<void>`: 単一のドキュメントを追加
103
- - `addDocuments(docs: IDocument[]): Promise<void>`: 複数のドキュメントを追加
104
- - `deleteDocument(id: number): Promise<void>`: ドキュメントを削除
139
+ - `addDocument(doc: IDocument): Promise<void>`: 単一ドキュメントを追加
140
+ - `addDocuments(docs: IDocument[]): Promise<void>`: 複数ドキュメントを追加
141
+ - `addDocumentIfMissing(doc: IDocument): Promise<void>`: ドキュメントが存在しない場合は単一のドキュメントを追加
142
+ - `addDocumentsIfMissing(docs: IDocument[]): Promise<void>`: 複数のドキュメントを追加し、既存のドキュメントはスキップ
143
+ - `removeDocument(id: number): Promise<void>`: ドキュメントを削除
105
144
  - `search(query: string, limit?: number): Promise<IResult[]>`: ドキュメントを検索
106
- - `getStatus(): Promise<IStatus>`: 検索エンジンのステータスを取得
107
- - `beginTransaction(): void`: トランザクションを開始
108
- - `commit(): Promise<void>`: トランザクションをコミット
109
- - `rollback(): void`: トランザクションをロールバック
145
+ - `getStatus(): Promise<IStatus>`: 検索エンジンの状態を取得する
146
+ - `hasDocument(id: number): Promise<boolean>`: ドキュメントIDが追加されたことがあるかを確認(削除されたものも含む)
147
+ - `startBatch(): void`: バッチ操作を開始する
148
+ - `endBatch(): Promise<void>`: バッチ操作を終了
110
149
 
111
150
  ## ストレージ
112
151
 
package/README.ko.md CHANGED
@@ -72,18 +72,53 @@ const engine = new SearchEngine({
72
72
  // 엔진 초기화
73
73
  await engine.init();
74
74
 
75
- // 트랜잭션 내에서 문서 추가
76
- await engine.beginTransaction();
75
+ // 일괄 작업으로 문서 추가
76
+ await engine.startBatch();
77
77
  try {
78
78
  await engine.addDocuments([
79
79
  // ... 문서
80
80
  ]);
81
- await engine.commit();
82
81
  } catch (error) {
83
- await engine.rollback();
82
+ // 오류 처리
83
+ } finally {
84
+ // 오류가 발생하더라도 항상 일괄 작업을 종료하여 인덱스가 올바르게 재구축되도록 합니다
85
+ await engine.endBatch();
84
86
  }
85
87
  ```
86
88
 
89
+ ### 커스텀 토크나이저
90
+
91
+ 특정 언어나 토크나이징 요구사항을 지원하기 위해 커스텀 토크나이저를 설정할 수 있습니다. 토크나이저는 전체 문서 객체에 액세스할 수 있습니다:
92
+
93
+ ```typescript
94
+ import { SearchEngine } from 'gs-search';
95
+
96
+ // 커스텀 인덱스 토크나이저: 문서의 text와 category 필드를 사용
97
+ const indexingTokenizer = (doc: { id: string; text: string; category: string; author: string }): string[] => {
98
+ // 문서의 모든 속성에 액세스할 수 있습니다
99
+ const fullText = `${doc.text} ${doc.category} ${doc.author}`;
100
+ return fullText.toLowerCase().split(/\s+/);
101
+ };
102
+
103
+ // 커스텀 검색 토크나이저: 검색 컨텍스트 지원
104
+ const searchTokenizer = (query: { text: string; language?: string; context?: string }): string[] => {
105
+ // 쿼리의 언어나 컨텍스트에 따라 토크나이징을 조정할 수 있습니다
106
+ const tokens = query.text.toLowerCase().split(/\s+/);
107
+ // 컨텍스트에 따라 추가 검색어를 추가합니다
108
+ if (query.context === 'technical') {
109
+ tokens.push('technical');
110
+ }
111
+ return tokens;
112
+ };
113
+
114
+ // 커스텀 토크나이저를 설정하여 엔진 생성
115
+ const engine = new SearchEngine({
116
+ baseDir: 'search-data',
117
+ indexingTokenizer,
118
+ searchTokenizer
119
+ });
120
+ ```
121
+
87
122
  ## API 참조
88
123
 
89
124
  ### SimpleSearch
@@ -91,22 +126,26 @@ try {
91
126
  - `constructor()`: 새로운 검색 엔진 인스턴스 생성
92
127
  - `addDocument(doc: IDocument): Promise<void>`: 단일 문서 추가
93
128
  - `addDocuments(docs: IDocument[]): Promise<void>`: 여러 문서 추가
129
+ - `addDocumentIfMissing(doc: IDocument): Promise<void>`: 문서가 존재하지 않는 경우 단일 문서 추가
130
+ - `addDocumentsIfMissing(docs: IDocument[]): Promise<void>`: 여러 문서를 추가하고 기존 문서는 건너뜀
94
131
  - `deleteDocument(id: number): Promise<void>`: 문서 삭제
95
132
  - `search(query: string, limit?: number): Promise<IResult[]>`: 문서 검색
96
133
  - `getStatus(): Promise<IStatus>`: 검색 엔진 상태 가져오기
97
134
 
98
- ### CoreSearchEngine
135
+ ### SearchEngine
99
136
 
100
- - `constructor(options: ICoreSearchOptions)`: 새로운 코어 엔진 인스턴스 생성
137
+ - `constructor(options: ISearchEngineConfig)`: 새로운 코어 엔진 인스턴스 생성
101
138
  - `init(): Promise<void>`: 엔진 초기화
102
139
  - `addDocument(doc: IDocument): Promise<void>`: 단일 문서 추가
103
- - `addDocuments(docs: IDocument[]): Promise<void>`: 여러 문서 추가
104
- - `deleteDocument(id: number): Promise<void>`: 문서 삭제
140
+ - `addDocuments(docs: IDocument[]): Promise<void>`: 다중 문서 추가
141
+ - `addDocumentIfMissing(doc: IDocument): Promise<void>`: 문서가 존재하지 않는 경우 단일 문서 추가
142
+ - `addDocumentsIfMissing(docs: IDocument[]): Promise<void>`: 여러 문서를 추가하고 기존 문서는 건너뜀
143
+ - `removeDocument(id: number): Promise<void>`: 문서 삭제
105
144
  - `search(query: string, limit?: number): Promise<IResult[]>`: 문서 검색
106
- - `getStatus(): Promise<IStatus>`: 검색 엔진 상태 가져오기
107
- - `beginTransaction(): void`: 트랜잭션 시작
108
- - `commit(): Promise<void>`: 트랜잭션 커밋
109
- - `rollback(): void`: 트랜잭션 롤백
145
+ - `getStatus(): Promise<IStatus>`: 검색 엔진 상태 조회
146
+ - `hasDocument(id: number): Promise<boolean>`: 문서 ID가 추가된 적이 있는지 확인합니다 (삭제된 문서도 포함)
147
+ - `startBatch(): void`: 배치 작업 시작
148
+ - `endBatch(): Promise<void>`: 배치 작업 종료
110
149
 
111
150
  ## 스토리지
112
151
 
package/README.md CHANGED
@@ -78,12 +78,50 @@ try {
78
78
  await engine.addDocuments([
79
79
  // ... documents
80
80
  ]);
81
- await engine.endBatch();
82
81
  } catch (error) {
83
82
  // Handle error
83
+ } finally {
84
+ // Always end batch to ensure index rebuilds properly
85
+ await engine.endBatch();
84
86
  }
85
87
  ```
86
88
 
89
+ ### Custom Tokenizers
90
+
91
+ You can configure custom tokenizers to support specific languages or tokenization requirements:
92
+
93
+ ```typescript
94
+ import { SearchEngine, BrowserStorage } from 'gs-search';
95
+
96
+ // Custom tokenizer that splits by spaces and limits token length
97
+ const customTokenizer = (text: string): string[] => {
98
+ // Split by whitespace
99
+ const tokens: string[] = [];
100
+ const words = text.toLowerCase().split(/\s+/);
101
+
102
+ // Process each word, limiting token length to 5 characters
103
+ for (const word of words) {
104
+ if (word.length <= 5) {
105
+ tokens.push(word);
106
+ } else {
107
+ // Split long words character by character
108
+ for (let i = 0; i < word.length; i++) {
109
+ tokens.push(word[i]);
110
+ }
111
+ }
112
+ }
113
+
114
+ return tokens;
115
+ };
116
+
117
+ // Create engine with custom tokenizers
118
+ const engine = new SearchEngine({
119
+ baseDir: 'search-data',
120
+ indexingTokenizer: customTokenizer,
121
+ searchTokenizer: customTokenizer
122
+ });
123
+ ```
124
+
87
125
  ## API Reference
88
126
 
89
127
  ### SimpleSearch
@@ -92,18 +130,23 @@ try {
92
130
  - `configure(config: Partial<ISearchEngineConfig>): void`: Configure the search engine
93
131
  - `addDocument(doc: IDocument): Promise<void>`: Add a single document
94
132
  - `addDocuments(docs: IDocument[]): Promise<void>`: Add multiple documents
133
+ - `addDocumentIfMissing(doc: IDocument): Promise<void>`: Add a single document if it doesn't exist
134
+ - `addDocumentsIfMissing(docs: IDocument[]): Promise<void>`: Add multiple documents, skipping existing ones
95
135
  - `removeDocument(id: number): Promise<void>`: Delete a document
96
136
  - `search(query: string, limit?: number): Promise<IResult[]>`: Search for documents
97
137
  - `getStatus(): Promise<IStatus>`: Get search engine status
138
+ - `hasDocument(id: number): Promise<boolean>`: Checks if a document ID has been added (including deleted ones)
98
139
  - `startBatch(): void`: Start batch operations
99
140
  - `endBatch(): Promise<void>`: End batch operations
100
141
 
101
- ### CoreSearchEngine
142
+ ### SearchEngine
102
143
 
103
- - `constructor(options: ICoreSearchOptions)`: Create a new core engine instance
144
+ - `constructor(options: ISearchEngineConfig)`: Create a new core engine instance
104
145
  - `init(): Promise<void>`: Initialize the engine
105
146
  - `addDocument(doc: IDocument): Promise<void>`: Add a single document
106
147
  - `addDocuments(docs: IDocument[]): Promise<void>`: Add multiple documents
148
+ - `addDocumentIfMissing(doc: IDocument): Promise<void>`: Add a single document if it doesn't exist
149
+ - `addDocumentsIfMissing(docs: IDocument[]): Promise<void>`: Add multiple documents, skipping existing ones
107
150
  - `removeDocument(id: number): Promise<void>`: Delete a document
108
151
  - `search(query: string, limit?: number): Promise<IResult[]>`: Search for documents
109
152
  - `getStatus(): Promise<IStatus>`: Get search engine status
package/README.zh-CN.md CHANGED
@@ -78,9 +78,11 @@ try {
78
78
  await engine.addDocuments([
79
79
  // ... 文档
80
80
  ]);
81
- await engine.endBatch();
82
81
  } catch (error) {
83
82
  // 处理错误
83
+ } finally {
84
+ // 始终结束批处理以确保索引正确重建
85
+ await engine.endBatch();
84
86
  }
85
87
  ```
86
88
 
@@ -111,30 +113,81 @@ const storage = new CustomStorage();
111
113
  const engine = new SearchEngine({ storage });
112
114
  ```
113
115
 
114
- ### 事务支持
116
+ ### 批处理操作
115
117
 
116
- 使用事务进行批量操作以提高性能:
118
+ 使用批处理操作进行高效的文档索引:
117
119
 
118
120
  ```typescript
119
- await engine.startTransaction();
121
+ // 开始批处理操作
122
+ await engine.startBatch();
120
123
 
121
124
  try {
122
125
  // 批量添加文档
123
126
  for (let i = 0; i < 1000; i++) {
124
127
  await engine.addDocuments([{ id: i, text: `文档 ${i}` }]);
125
128
  }
126
-
127
- // 提交事务
128
- await engine.commitTransaction();
129
129
  } catch (error) {
130
- // 回滚事务
131
- await engine.rollbackTransaction();
130
+ // 处理错误
131
+ console.error('批处理操作失败:', error);
132
+ } finally {
133
+ // 无论是否发生错误,都必须结束批处理以确保索引正常重建
134
+ await engine.endBatch();
132
135
  }
133
136
  ```
134
137
 
135
138
  ## 自定义分词器
136
139
 
137
- 您可以通过配置自定义分词器来支持特定的语言或分词需求。以下是一个简单的正则分词器示例,按空格和字符分词,且最长token不超过5字符:
140
+ ### 支持完整文档对象的分词器
141
+
142
+ 您可以通过配置自定义分词器来支持特定的语言或分词需求。分词器可以访问完整的文档对象,让您能够基于文档的多个属性进行分词:
143
+
144
+ ```typescript
145
+ import { SearchEngine } from 'gs-search';
146
+
147
+ // 自定义索引分词器:使用文档的text和category字段进行分词
148
+ const indexingTokenizer = (doc: { id: string; text: string; category: string; author: string }): string[] => {
149
+ // 可以访问文档的所有属性
150
+ const fullText = `${doc.text} ${doc.category} ${doc.author}`;
151
+ return fullText.toLowerCase().split(/\s+/);
152
+ };
153
+
154
+ // 自定义搜索分词器:支持搜索上下文
155
+ const searchTokenizer = (query: { text: string; language?: string; context?: string }): string[] => {
156
+ // 可以根据查询的语言或上下文调整分词
157
+ const tokens = query.text.toLowerCase().split(/\s+/);
158
+ // 根据上下文添加额外的搜索词
159
+ if (query.context === 'technical') {
160
+ tokens.push('technical');
161
+ }
162
+ return tokens;
163
+ };
164
+
165
+ // 创建引擎并配置自定义分词器
166
+ const engine = new SearchEngine({
167
+ baseDir: 'search-data',
168
+ indexingTokenizer,
169
+ searchTokenizer
170
+ });
171
+
172
+ // 索引包含额外属性的文档
173
+ await engine.addDocument({
174
+ id: '1',
175
+ text: '这是一个技术文档',
176
+ category: '技术',
177
+ author: '张三'
178
+ });
179
+
180
+ // 使用包含上下文的查询进行搜索
181
+ const results = await engine.search({
182
+ text: '技术',
183
+ language: 'zh',
184
+ context: 'technical'
185
+ });
186
+ ```
187
+
188
+ ### 简单的字符/空格分词器
189
+
190
+ 以下是一个简单的正则分词器示例,按空格和字符分词,且最长token不超过5字符:
138
191
 
139
192
  ```typescript
140
193
  import { SimpleSearch } from 'gs-search';
@@ -171,22 +224,27 @@ SimpleSearch.configure({
171
224
 
172
225
  ### SimpleSearch
173
226
 
174
- **静态方法(无需创建实例):**
227
+ **静态方法(无需实例创建):**
175
228
  - `configure(config: Partial<ISearchEngineConfig>): void`: 配置搜索引擎
176
229
  - `addDocument(doc: IDocument): Promise<void>`: 添加单个文档
177
- - `addDocuments(docs: IDocument[]): Promise<void>`: 批量添加文档
230
+ - `addDocuments(docs: IDocument[]): Promise<void>`: 添加多个文档
231
+ - `addDocumentIfMissing(doc: IDocument): Promise<void>`: 如果文档不存在则添加单个文档
232
+ - `addDocumentsIfMissing(docs: IDocument[]): Promise<void>`: 添加多个文档,跳过已存在的文档
178
233
  - `removeDocument(id: number): Promise<void>`: 删除文档
179
234
  - `search(query: string, limit?: number): Promise<IResult[]>`: 搜索文档
180
235
  - `getStatus(): Promise<IStatus>`: 获取搜索引擎状态
181
- - `startBatch(): void`: 开始批处理操作
236
+ - `hasDocument(id: number): Promise<boolean>`: 检查文档ID是否曾经添加过(包括已删除的)
237
+ - `startBatch(): void`: 开始批量操作
182
238
  - `endBatch(): Promise<void>`: 结束批处理操作
183
239
 
184
- ### CoreSearchEngine
240
+ ### SearchEngine
185
241
 
186
- - `constructor(options: ICoreSearchOptions)`: 创建核心引擎实例
242
+ - `constructor(options: ISearchEngineConfig)`: 创建一个新的核心引擎实例
187
243
  - `init(): Promise<void>`: 初始化引擎
188
244
  - `addDocument(doc: IDocument): Promise<void>`: 添加单个文档
189
- - `addDocuments(docs: IDocument[]): Promise<void>`: 批量添加文档
245
+ - `addDocuments(docs: IDocument[]): Promise<void>`: 添加多个文档
246
+ - `addDocumentIfMissing(doc: IDocument): Promise<void>`: 如果文档不存在则添加单个文档
247
+ - `addDocumentsIfMissing(docs: IDocument[]): Promise<void>`: 添加多个文档,跳过已存在的文档
190
248
  - `removeDocument(id: number): Promise<void>`: 删除文档
191
249
  - `search(query: string, limit?: number): Promise<IResult[]>`: 搜索文档
192
250
  - `getStatus(): Promise<IStatus>`: 获取搜索引擎状态
package/lib/index.cjs CHANGED
@@ -1 +1 @@
1
- "use strict";Object.create,Object.defineProperty,Object.getOwnPropertyDescriptor,Object.getOwnPropertyNames,Object.getPrototypeOf,Object.prototype.hasOwnProperty;class t{#t;constructor(t){this.#t=t}async#e(){return await(await navigator.storage.getDirectory()).getDirectoryHandle(this.#t,{create:!0})}async write(t,e){const s=await(await(await this.#e()).getFileHandle(t,{create:!0})).createWritable();await s.write(e),await s.close()}async append(t,e){const s=await this.#e();let i;try{i=await s.getFileHandle(t,{create:!0})}catch{i=await s.getFileHandle(t,{create:!0})}const n=await i.getFile(),a=await i.createWritable({keepExistingData:!0});await a.seek(n.size),await a.write(e),await a.close()}async read(t){const e=await this.#e();try{return await(await(await e.getFileHandle(t)).getFile()).arrayBuffer()}catch{return null}}async readRange(t,e,s){const i=await this.#e();try{return await(await(await i.getFileHandle(t)).getFile()).slice(e,s).arrayBuffer()}catch{return null}}async remove(t){const e=await this.#e();try{await e.removeEntry(t)}catch{}}async listFiles(){const t=await this.#e(),e=[];for await(const s of t.keys())e.push(s);return e}async clearAll(){const t=await this.#e();for await(const e of t.keys())await t.removeEntry(e,{recursive:!0})}async getFileSize(t){const e=await this.#e();try{return(await(await e.getFileHandle(t)).getFile()).size}catch{return 0}}}class e{#s=null;#i=null;#t;#n="";constructor(t){this.#t=t}async#a(){if(this.#s)return;const t=await import("node:fs"),e=await import("node:path");this.#s=t.promises,this.#i=e.default||e,this.#n=this.#i.join(process.cwd(),this.#t);try{await this.#s.access(this.#n)}catch{await this.#s.mkdir(this.#n,{recursive:!0})}}#r(t){return this.#i.join(this.#n,t)}async write(t,e){await this.#a(),await this.#s.writeFile(this.#r(t),Buffer.from(e))}async append(t,e){await this.#a(),await this.#s.appendFile(this.#r(t),Buffer.from(e))}async read(t){await this.#a();try{const e=await this.#s.readFile(this.#r(t));return e.buffer.slice(e.byteOffset,e.byteOffset+e.byteLength)}catch{return null}}async readRange(t,e,s){await this.#a();try{const i=await this.#s.open(this.#r(t),"r"),n=s-e,a=Buffer.alloc(n);return await i.read(a,0,n,e),await i.close(),a.buffer.slice(a.byteOffset,a.byteOffset+a.byteLength)}catch{return null}}async remove(t){await this.#a();try{await this.#s.unlink(this.#r(t))}catch{}}async listFiles(){await this.#a();try{return await this.#s.readdir(this.#n)}catch{return[]}}async clearAll(){await this.#a();try{const t=await this.#s.readdir(this.#n);for(const e of t)await this.#s.unlink(this.#i.join(this.#n,e))}catch{}}async getFileSize(t){await this.#a();try{return(await this.#s.stat(this.#r(t))).size}catch{return 0}}}const s="search_meta.json",i="deleted_ids.bin",n="added_ids.bin";class a{#o;#h={wordSegments:[],charSegments:[]};#c=new Set;#d=new Set;constructor(t){this.#o=t}async load(){const t=await this.#o.read(s);if(t){const e=(new TextDecoder).decode(t);this.#h=JSON.parse(e)}else this.#h={wordSegments:[],charSegments:[]};const e=await this.#o.read(i);if(e){const t=new DataView(e);let s=0;const i=e.byteLength;for(;s<i&&!(s+4>i);){const e=t.getUint32(s,!0);this.#c.add(e),s+=4,s<i&&30===t.getUint8(s)&&(s+=1)}}const a=await this.#o.read(n);if(a){const t=new DataView(a);let e=0;const s=a.byteLength;for(;e<s&&!(e+4>s);){const i=t.getUint32(e,!0);this.#d.add(i),e+=4,e<s&&30===t.getUint8(e)&&(e+=1)}}}async save(){const t=JSON.stringify(this.#h);if(await this.#o.write(s,(new TextEncoder).encode(t).buffer),0===this.#c.size)await this.#o.remove(i);else{const t=4*this.#c.size+this.#c.size,e=new ArrayBuffer(t),s=new DataView(e);let n=0;for(const t of this.#c)s.setUint32(n,t,!0),n+=4,s.setUint8(n,30),n+=1;await this.#o.write(i,e)}if(0===this.#d.size)await this.#o.remove(n);else{const t=4*this.#d.size+this.#d.size,e=new ArrayBuffer(t),s=new DataView(e);let i=0;for(const t of this.#d)s.setUint32(i,t,!0),i+=4,s.setUint8(i,30),i+=1;await this.#o.write(n,e)}}getSegments(t){return"word"===t?this.#h.wordSegments:this.#h.charSegments}getDeletedIds(){return this.#c}addDeletedId(t){this.#c.add(t)}isDeleted(t){return this.#c.has(t)}addAddedId(t){this.#d.add(t)}removeAddedId(t){this.#d.delete(t)}isAdded(t){return this.#d.has(t)}getAddedIds(){return this.#d}getLastSegmentInfo(t){const e=this.getSegments(t);return 0===e.length?null:e[e.length-1]}updateSegment(t,e,s,i,n,a){const r="word"===t?this.#h.wordSegments:this.#h.charSegments;if(a)r.push({filename:e,start:s,end:i,tokenCount:n});else{const t=r[r.length-1];t&&t.filename===e&&(t.end=i,t.tokenCount=n)}}reset(){this.#h={wordSegments:[],charSegments:[]},this.#c.clear(),this.#d.clear()}}class r{static SEPARATOR=30;#o;constructor(t){this.#o=t}async appendBatch(t,e){if(0===e.length)return await this.#o.getFileSize(t);const s=new TextEncoder;let i=0;for(const t of e){i+=8;for(const e of t.tokens){i+=2+Math.min(s.encode(e).byteLength,65535)}i+=1}const n=new Uint8Array(i);let a=0;for(const t of e){const e=[];for(const i of t.tokens){const t=s.encode(i),n=t.byteLength>65535?t.slice(0,65535):t;e.push(n)}const i=new DataView(n.buffer,a);i.setUint32(0,t.id,!0),i.setUint32(4,e.length,!0),a+=8;for(const t of e)new DataView(n.buffer,a).setUint16(0,t.byteLength,!0),a+=2,n.set(t,a),a+=t.byteLength;n[a++]=r.SEPARATOR}return await this.#o.append(t,n.buffer),await this.#o.getFileSize(t)}async readRange(t,e,s){const i=await this.#o.readRange(t,e,s);if(!i||0===i.byteLength)return[];const n=new DataView(i),a=new Uint8Array(i),o=new TextDecoder,h=[];let c=0;const d=i.byteLength;for(;c<d&&!(c+8>d);){const t=n.getUint32(c,!0);c+=4;const e=n.getUint32(c,!0);c+=4;const s=[];for(let t=0;t<e&&!(c+2>d);t++){const t=n.getUint16(c,!0);if(c+=2,c+t>d)break;const e=new Uint8Array(i,c,t);s.push(o.decode(e)),c+=t}c<d&&a[c]===r.SEPARATOR&&(c+=1),h.push({id:t,tokens:s})}return h}async getCurrentSize(t){return await this.#o.getFileSize(t)}}class o{#g;#o;#l=null;#f=null;static hash(t){let e=5381;for(let s=0;s<t.length;s++)e=(e<<5)+e^t.charCodeAt(s);return e>>>0}constructor(t,e){this.#g=t,this.#o=e}async loadIndex(){return!!this.#l||(this.#l=await this.#o.read(this.#g),!!this.#l&&(this.#f=new DataView(this.#l),!0))}async buildAndSave(t){const e=new Map;for(const s of t){const t=new Map;for(const i of s.tokens)if(!t.has(i)){t.set(i,!0);const n=o.hash(i);e.has(n)||e.set(n,[]),e.get(n).push(s.id)}}const s=Array.from(e.keys()).sort((t,e)=>t-e);let i=0;const n=new Array(s.length);for(let t=0;t<s.length;t++){const a=s[t],r=e.get(a);n[t]=r,i+=r.length}const a=12*s.length,r=new ArrayBuffer(8+a+4*i),h=new DataView(r);h.setUint32(0,1229866072),h.setUint32(4,s.length);let c=8,d=8+a;for(let t=0;t<s.length;t++){const e=s[t],i=n[t];h.setUint32(c,e),h.setUint32(c+4,d),h.setUint32(c+8,i.length),c+=12;for(let t=0;t<i.length;t++)h.setUint32(d,i[t],!0),d+=4}await this.#o.write(this.#g,r),this.#l=r,this.#f=h}search(t){if(!this.#f||!this.#l)return[];const e=o.hash(t);let s=0,i=this.#f.getUint32(4)-1;for(;s<=i;){const t=s+i>>>1,n=8+12*t,a=this.#f.getUint32(n);if(a<e)s=t+1;else{if(!(a>e)){const t=this.#f.getUint32(n+4),e=this.#f.getUint32(n+8),s=[];for(let i=0;i<e;i++)s.push(this.#f.getUint32(t+4*i,!0));return s}i=t-1}}return[]}}const h="word_cache.bin",c="char_cache.bin";class d{#o;#h;#w;#u;#m=!1;#y;#p=!1;#S={word:0,char:0};constructor(s){if(!s.baseDir)throw new Error("SearchEngine requires 'baseDir' in config.");if(this.#y={wordSegmentTokenThreshold:1e5,charSegmentTokenThreshold:5e5,minWordTokenSave:0,minCharTokenSave:0,...s},(this.#y.minWordTokenSave||0)>=(this.#y.wordSegmentTokenThreshold||1e5))throw new Error("minWordTokenSave must be less than wordSegmentTokenThreshold");if((this.#y.minCharTokenSave||0)>=(this.#y.charSegmentTokenThreshold||5e5))throw new Error("minCharTokenSave must be less than charSegmentTokenThreshold");let i=null;if(this.#y.storage&&("object"==typeof this.#y.storage?i=this.#y.storage:"browser"===this.#y.storage?i=new t(this.#y.baseDir):"node"===this.#y.storage&&(i=new e(this.#y.baseDir))),!i){const s=typeof navigator<"u"&&navigator?.storage?.getDirectory instanceof Function,n=typeof process<"u"&&null!=process.versions&&null!=process.versions.node;s?i=new t(this.#y.baseDir):n&&(i=new e(this.#y.baseDir))}if(!i)throw new Error('Storage initialization failed. Please configure "storage" explicitly or ensure you are in a supported environment (Browser/Node).');this.#o=i,this.#h=new a(this.#o),this.#w=new r(this.#o),this.#u=new Map}async init(){if(this.#m)return;await this.#h.load();const t=[...this.#h.getSegments("word"),...this.#h.getSegments("char")];for(const e of t)this.#u.has(e.filename)||this.#u.set(e.filename,new o(e.filename,this.#o)),await this.#u.get(e.filename).loadIndex();this.#m=!0}startBatch(){this.#p=!0,this.#S={word:0,char:0}}async endBatch(){this.#p=!1,this.#S.word>0&&await this.#D("word",this.#S.word),this.#S.char>0&&await this.#D("char",this.#S.char),this.#S={word:0,char:0},await this.#h.save()}#k(t){if(typeof Intl<"u"&&Intl.Segmenter){const e=new Intl.Segmenter([],{granularity:"word"});return Array.from(e.segment(t)).filter(t=>t.isWordLike).map(t=>t.segment.toLowerCase())}return t.toLowerCase().split(/[^a-z0-9\u4e00-\u9fa5]+/g).filter(t=>t.length>0)}#b(t){return this.#y.indexingTokenizer?this.#y.indexingTokenizer(t):this.#k(t)}#T(t){return this.#y.searchTokenizer?this.#y.searchTokenizer(t):this.#y.indexingTokenizer?this.#y.indexingTokenizer(t):this.#k(t)}async addDocument(t){return this.addDocuments([t])}async addDocuments(t){if(this.#m||await this.init(),0===t.length)return;const e=this.#h.getDeletedIds(),s=[],i=[];for(const n of t){if(e.has(n.id))throw new Error(`Document ID ${n.id} has been deleted and cannot be re-added.`);if(this.#h.isAdded(n.id))throw new Error(`Document ID ${n.id} already exists.`);const t=this.#b(n.text),a=[],r=[];for(const e of t)e.length>1?a.push(e):1===e.length&&r.push(e);a.length>0&&s.push({id:n.id,tokens:a}),r.length>0&&i.push({id:n.id,tokens:r})}let n=0,a=0;if(s.length>0){await this.#w.appendBatch(h,s);for(const t of s)n+=t.tokens.length}if(i.length>0){await this.#w.appendBatch(c,i);for(const t of i)a+=t.tokens.length}for(const e of t)this.#h.addAddedId(e.id);this.#p?(this.#S.word+=n,this.#S.char+=a):(n>0&&await this.#D("word",n),a>0&&await this.#D("char",a),await this.#h.save())}async#D(t,e){const s="word"===t?h:c,i=await this.#w.getCurrentSize(s),n="word"===t?this.#y.wordSegmentTokenThreshold||1e5:this.#y.charSegmentTokenThreshold||5e5,a="word"===t?this.#y.minWordTokenSave||0:this.#y.minCharTokenSave||0,r=this.#h.getLastSegmentInfo(t);let d,g,l,f;const w=()=>{const e=this.#h.getSegments(t).length+1;return`${t}_seg_${e}.bin`};if(r){const t=r.tokenCount;t>=n||t+e>=n?(d=w(),l=!0,g=r.end,f=e):(d=r.filename,l=!1,g=r.start,f=t+e)}else d=w(),l=!0,g=0,f=e;if(f<a)return void this.#h.updateSegment(t,d,g,i,f,l);const u=await this.#w.readRange(s,g,i);let m=this.#u.get(d);m||(m=new o(d,this.#o),this.#u.set(d,m)),await m.buildAndSave(u),this.#h.updateSegment(t,d,g,i,f,l)}async search(t,e){this.#m||await this.init();const s=this.#T(t),i=s.filter(t=>t.length>1),n=s.filter(t=>1===t.length),a=this.#h.getDeletedIds(),r=new Map,h=new Map,c=t=>{const e=this.#h.getSegments(t);for(const t of e){const e=t.filename;!this.#u.has(e)&&!h.has(e)&&h.set(e,new o(e,this.#o))}};c("word"),c("char"),await Promise.all(Array.from(h.entries()).map(([t,e])=>e.loadIndex().then(s=>{s&&this.#u.set(t,e)})));const d=async(t,e)=>{if(0===e.length)return;const s=this.#h.getSegments(t);for(const t of s){const s=t.filename,i=this.#u.get(s);if(i)for(const t of e){const e=i.search(t),s=1+.1*t.length;for(const i of e)if(!a.has(i))if(r.has(i)){const e=r.get(i);e.score+=s,e.tokens.add(t)}else r.set(i,{score:0,tokens:new Set([t])})}}};await d("word",i),await d("char",n);const g=[];return r.forEach((t,e)=>{g.push({id:e,score:t.score,tokens:Array.from(t.tokens)})}),g.sort((t,e)=>e.score-t.score),"number"==typeof e&&e>0?g.slice(0,e):g}async removeDocument(t){this.#m||await this.init(),this.#h.addDeletedId(t),this.#h.removeAddedId(t),await this.#h.save()}async clearAll(){await this.#o.clearAll(),this.#u.clear(),this.#h.reset(),this.#m=!1,this.#p=!1,this.#S={word:0,char:0}}async getStatus(){return this.#m||await this.init(),{wordSegments:this.#h.getSegments("word").length,charSegments:this.#h.getSegments("char").length,deleted:this.#h.getDeletedIds().size,wordCacheSize:await this.#w.getCurrentSize(h),charCacheSize:await this.#w.getCurrentSize(c),inBatch:this.#p}}}exports.BrowserStorage=t,exports.NodeStorage=e,exports.SearchEngine=d,exports.SimpleSearch=class{static#I=null;static#v={baseDir:"simple_search_data",wordSegmentTokenThreshold:1e5,minWordTokenSave:0};static configure(t){const e={...this.#v,...t};this.#I=new d(e)}static#z(){return this.#I||(this.#I=new d(this.#v)),this.#I}static async startBatch(){this.#z().startBatch()}static async endBatch(){return this.#z().endBatch()}static async addDocument(t){return this.#z().addDocument(t)}static async addDocuments(t){return this.#z().addDocuments(t)}static async search(t,e){return this.#z().search(t,e)}static async removeDocument(t){return this.#z().removeDocument(t)}static async clearAll(){return this.#z().clearAll()}static async getStatus(){return this.#z().getStatus()}};
1
+ "use strict";Object.create,Object.defineProperty,Object.getOwnPropertyDescriptor,Object.getOwnPropertyNames,Object.getPrototypeOf,Object.prototype.hasOwnProperty;class t{#t;constructor(t){this.#t=t}async#e(){return await(await navigator.storage.getDirectory()).getDirectoryHandle(this.#t,{create:!0})}async write(t,e){const s=await(await(await this.#e()).getFileHandle(t,{create:!0})).createWritable();await s.write(e),await s.close()}async append(t,e){const s=await this.#e();let i;try{i=await s.getFileHandle(t,{create:!0})}catch{i=await s.getFileHandle(t,{create:!0})}const n=await i.getFile(),a=await i.createWritable({keepExistingData:!0});await a.seek(n.size),await a.write(e),await a.close()}async read(t){const e=await this.#e();try{return await(await(await e.getFileHandle(t)).getFile()).arrayBuffer()}catch{return null}}async readRange(t,e,s){const i=await this.#e();try{return await(await(await i.getFileHandle(t)).getFile()).slice(e,s).arrayBuffer()}catch{return null}}async remove(t){const e=await this.#e();try{await e.removeEntry(t)}catch{}}async listFiles(){const t=await this.#e(),e=[];for await(const s of t.keys())e.push(s);return e}async clearAll(){const t=await this.#e();for await(const e of t.keys())await t.removeEntry(e,{recursive:!0})}async getFileSize(t){const e=await this.#e();try{return(await(await e.getFileHandle(t)).getFile()).size}catch{return 0}}}class e{#s=null;#i=null;#t;#n="";constructor(t){this.#t=t}async#a(){if(this.#s)return;const t=await import("node:fs"),e=await import("node:path");this.#s=t.promises,this.#i=e.default||e,this.#n=this.#i.join(process.cwd(),this.#t);try{await this.#s.access(this.#n)}catch{await this.#s.mkdir(this.#n,{recursive:!0})}}#r(t){return this.#i.join(this.#n,t)}async write(t,e){await this.#a(),await this.#s.writeFile(this.#r(t),Buffer.from(e))}async append(t,e){await this.#a(),await this.#s.appendFile(this.#r(t),Buffer.from(e))}async read(t){await this.#a();try{const e=await this.#s.readFile(this.#r(t));return e.buffer.slice(e.byteOffset,e.byteOffset+e.byteLength)}catch{return null}}async readRange(t,e,s){await this.#a();try{const i=await this.#s.open(this.#r(t),"r"),n=s-e,a=Buffer.alloc(n);return await i.read(a,0,n,e),await i.close(),a.buffer.slice(a.byteOffset,a.byteOffset+a.byteLength)}catch{return null}}async remove(t){await this.#a();try{await this.#s.unlink(this.#r(t))}catch{}}async listFiles(){await this.#a();try{return await this.#s.readdir(this.#n)}catch{return[]}}async clearAll(){await this.#a();try{const t=await this.#s.readdir(this.#n);for(const e of t)await this.#s.unlink(this.#i.join(this.#n,e))}catch{}}async getFileSize(t){await this.#a();try{return(await this.#s.stat(this.#r(t))).size}catch{return 0}}}const s="search_meta.json",i="deleted_ids.bin",n="added_ids.bin";class a{#o;#h={wordSegments:[],charSegments:[]};#c=new Set;#d=new Set;constructor(t){this.#o=t}async load(){const t=await this.#o.read(s);if(t){const e=(new TextDecoder).decode(t);this.#h=JSON.parse(e)}else this.#h={wordSegments:[],charSegments:[]};const e=await this.#o.read(i);if(e){const t=new DataView(e);let s=0;const i=e.byteLength;for(;s<i&&!(s+4>i);){const e=t.getUint32(s,!0);this.#c.add(e),s+=4,s<i&&30===t.getUint8(s)&&(s+=1)}}const a=await this.#o.read(n);if(a){const t=new DataView(a);let e=0;const s=a.byteLength;for(;e<s&&!(e+4>s);){const i=t.getUint32(e,!0);this.#d.add(i),e+=4,e<s&&30===t.getUint8(e)&&(e+=1)}}}async save(){const t=JSON.stringify(this.#h);if(await this.#o.write(s,(new TextEncoder).encode(t).buffer),0===this.#c.size)await this.#o.remove(i);else{const t=4*this.#c.size+this.#c.size,e=new ArrayBuffer(t),s=new DataView(e);let n=0;for(const t of this.#c)s.setUint32(n,t,!0),n+=4,s.setUint8(n,30),n+=1;await this.#o.write(i,e)}if(0===this.#d.size)await this.#o.remove(n);else{const t=4*this.#d.size+this.#d.size,e=new ArrayBuffer(t),s=new DataView(e);let i=0;for(const t of this.#d)s.setUint32(i,t,!0),i+=4,s.setUint8(i,30),i+=1;await this.#o.write(n,e)}}getSegments(t){return"word"===t?this.#h.wordSegments:this.#h.charSegments}getDeletedIds(){return this.#c}addDeletedId(t){this.#c.add(t)}isDeleted(t){return this.#c.has(t)}addAddedId(t){this.#d.add(t)}removeAddedId(t){this.#d.delete(t)}isAdded(t){return this.#d.has(t)}getAddedIds(){return this.#d}hasDocument(t){return this.#d.has(t)||this.#c.has(t)}getLastSegmentInfo(t){const e=this.getSegments(t);return 0===e.length?null:e[e.length-1]}updateSegment(t,e,s,i,n,a){const r="word"===t?this.#h.wordSegments:this.#h.charSegments;if(a)r.push({filename:e,start:s,end:i,tokenCount:n});else{const t=r[r.length-1];t&&t.filename===e&&(t.end=i,t.tokenCount=n)}}reset(){this.#h={wordSegments:[],charSegments:[]},this.#c.clear(),this.#d.clear()}}class r{static SEPARATOR=30;#o;constructor(t){this.#o=t}async appendBatch(t,e){if(0===e.length)return await this.#o.getFileSize(t);const s=new TextEncoder;let i=0;for(const t of e){i+=8;for(const e of t.tokens){i+=2+Math.min(s.encode(e).byteLength,65535)}i+=1}const n=new Uint8Array(i);let a=0;for(const t of e){const e=[];for(const i of t.tokens){const t=s.encode(i),n=t.byteLength>65535?t.slice(0,65535):t;e.push(n)}const i=new DataView(n.buffer,a);i.setUint32(0,t.id,!0),i.setUint32(4,e.length,!0),a+=8;for(const t of e)new DataView(n.buffer,a).setUint16(0,t.byteLength,!0),a+=2,n.set(t,a),a+=t.byteLength;n[a++]=r.SEPARATOR}return await this.#o.append(t,n.buffer),await this.#o.getFileSize(t)}async readRange(t,e,s){const i=await this.#o.readRange(t,e,s);if(!i||0===i.byteLength)return[];const n=new DataView(i),a=new Uint8Array(i),o=new TextDecoder,h=[];let c=0;const d=i.byteLength;for(;c<d&&!(c+8>d);){const t=n.getUint32(c,!0);c+=4;const e=n.getUint32(c,!0);c+=4;const s=[];for(let t=0;t<e&&!(c+2>d);t++){const t=n.getUint16(c,!0);if(c+=2,c+t>d)break;const e=new Uint8Array(i,c,t);s.push(o.decode(e)),c+=t}c<d&&a[c]===r.SEPARATOR&&(c+=1),h.push({id:t,tokens:s})}return h}async getCurrentSize(t){return await this.#o.getFileSize(t)}}class o{#g;#o;#l=null;#f=null;static hash(t){let e=5381;for(let s=0;s<t.length;s++)e=(e<<5)+e^t.charCodeAt(s);return e>>>0}constructor(t,e){this.#g=t,this.#o=e}async loadIndex(){return!!this.#l||(this.#l=await this.#o.read(this.#g),!!this.#l&&(this.#f=new DataView(this.#l),!0))}async buildAndSave(t){const e=new Map;for(const s of t){const t=new Map;for(const i of s.tokens)if(!t.has(i)){t.set(i,!0);const n=o.hash(i);e.has(n)||e.set(n,[]),e.get(n).push(s.id)}}const s=Array.from(e.keys()).sort((t,e)=>t-e);let i=0;const n=new Array(s.length);for(let t=0;t<s.length;t++){const a=s[t],r=e.get(a);n[t]=r,i+=r.length}const a=12*s.length,r=new ArrayBuffer(8+a+4*i),h=new DataView(r);h.setUint32(0,1229866072),h.setUint32(4,s.length);let c=8,d=8+a;for(let t=0;t<s.length;t++){const e=s[t],i=n[t];h.setUint32(c,e),h.setUint32(c+4,d),h.setUint32(c+8,i.length),c+=12;for(let t=0;t<i.length;t++)h.setUint32(d,i[t],!0),d+=4}await this.#o.write(this.#g,r),this.#l=r,this.#f=h}search(t){if(!this.#f||!this.#l)return[];const e=o.hash(t);let s=0,i=this.#f.getUint32(4)-1;for(;s<=i;){const t=s+i>>>1,n=8+12*t,a=this.#f.getUint32(n);if(a<e)s=t+1;else{if(!(a>e)){const t=this.#f.getUint32(n+4),e=this.#f.getUint32(n+8),s=[];for(let i=0;i<e;i++)s.push(this.#f.getUint32(t+4*i,!0));return s}i=t-1}}return[]}}const h="word_cache.bin",c="char_cache.bin";class d{#o;#h;#w;#u;#m=!1;#y;#p=!1;#D={word:0,char:0};constructor(s){if(!s.baseDir)throw new Error("SearchEngine requires 'baseDir' in config.");if(this.#y={wordSegmentTokenThreshold:1e5,charSegmentTokenThreshold:5e5,minWordTokenSave:0,minCharTokenSave:0,...s},(this.#y.minWordTokenSave||0)>=(this.#y.wordSegmentTokenThreshold||1e5))throw new Error("minWordTokenSave must be less than wordSegmentTokenThreshold");if((this.#y.minCharTokenSave||0)>=(this.#y.charSegmentTokenThreshold||5e5))throw new Error("minCharTokenSave must be less than charSegmentTokenThreshold");let i=null;if(this.#y.storage&&("object"==typeof this.#y.storage?i=this.#y.storage:"browser"===this.#y.storage?i=new t(this.#y.baseDir):"node"===this.#y.storage&&(i=new e(this.#y.baseDir))),!i){const s=typeof navigator<"u"&&navigator?.storage?.getDirectory instanceof Function,n=typeof process<"u"&&null!=process.versions&&null!=process.versions.node;s?i=new t(this.#y.baseDir):n&&(i=new e(this.#y.baseDir))}if(!i)throw new Error('Storage initialization failed. Please configure "storage" explicitly or ensure you are in a supported environment (Browser/Node).');this.#o=i,this.#h=new a(this.#o),this.#w=new r(this.#o),this.#u=new Map}async init(){if(this.#m)return;await this.#h.load();const t=[...this.#h.getSegments("word"),...this.#h.getSegments("char")];for(const e of t)this.#u.has(e.filename)||this.#u.set(e.filename,new o(e.filename,this.#o)),await this.#u.get(e.filename).loadIndex();this.#m=!0}startBatch(){this.#p=!0,this.#D={word:0,char:0}}async endBatch(){this.#p=!1,this.#D.word>0&&await this.#S("word",this.#D.word),this.#D.char>0&&await this.#S("char",this.#D.char),this.#D={word:0,char:0},await this.#h.save()}#k(t){try{if(typeof Intl<"u"&&"function"==typeof Intl.Segmenter&&"function"==typeof Array.from){const e=new Intl.Segmenter([],{granularity:"word"}).segment(t);if("object"==typeof e&&null!==e)return Array.from(e).filter(t=>t?.isWordLike).map(t=>t?.segment?.toLowerCase()||"")}}catch{}return t.toLowerCase().split(/[^a-z0-9\u4e00-\u9fa5]+/g).filter(t=>t.length>0)}#I(t){return this.#y.indexingTokenizer?this.#y.indexingTokenizer(t):this.#k(t.text)}#b(t){return this.#y.searchTokenizer?this.#y.searchTokenizer(t):this.#I(t)}async addDocument(t){return this.addDocuments([t])}async addDocumentIfMissing(t){return this.addDocumentsIfMissing([t])}async addDocumentsIfMissing(t){if(this.#m||await this.init(),0===t.length)return;const e=this.#h.getDeletedIds(),s=[],i=[],n=[];for(const a of t){if(e.has(a.id)||this.#h.isAdded(a.id))continue;const t=this.#I(a),r=[],o=[];for(const e of t)e.length>1?r.push(e):1===e.length&&o.push(e);r.length>0&&s.push({id:a.id,tokens:r}),o.length>0&&i.push({id:a.id,tokens:o}),n.push(a)}if(0===n.length)return;let a=0,r=0;if(s.length>0){await this.#w.appendBatch(h,s);for(const t of s)a+=t.tokens.length}if(i.length>0){await this.#w.appendBatch(c,i);for(const t of i)r+=t.tokens.length}for(const t of n)this.#h.addAddedId(t.id);this.#p?(this.#D.word+=a,this.#D.char+=r):(a>0&&await this.#S("word",a),r>0&&await this.#S("char",r),await this.#h.save())}async addDocuments(t){if(this.#m||await this.init(),0===t.length)return;const e=this.#h.getDeletedIds(),s=[],i=[];for(const n of t){if(e.has(n.id))throw new Error(`Document ID ${n.id} has been deleted and cannot be re-added.`);if(this.#h.isAdded(n.id))throw new Error(`Document ID ${n.id} already exists.`);const t=this.#I(n),a=[],r=[];for(const e of t)e.length>1?a.push(e):1===e.length&&r.push(e);a.length>0&&s.push({id:n.id,tokens:a}),r.length>0&&i.push({id:n.id,tokens:r})}let n=0,a=0;if(s.length>0){await this.#w.appendBatch(h,s);for(const t of s)n+=t.tokens.length}if(i.length>0){await this.#w.appendBatch(c,i);for(const t of i)a+=t.tokens.length}for(const e of t)this.#h.addAddedId(e.id);this.#p?(this.#D.word+=n,this.#D.char+=a):(n>0&&await this.#S("word",n),a>0&&await this.#S("char",a),await this.#h.save())}async#S(t,e){const s="word"===t?h:c,i=await this.#w.getCurrentSize(s),n="word"===t?this.#y.wordSegmentTokenThreshold||1e5:this.#y.charSegmentTokenThreshold||5e5,a="word"===t?this.#y.minWordTokenSave||0:this.#y.minCharTokenSave||0,r=this.#h.getLastSegmentInfo(t);let d,g,l,f;const w=()=>{const e=this.#h.getSegments(t).length+1;return`${t}_seg_${e}.bin`};if(r){const t=r.tokenCount;t>=n||t+e>=n?(d=w(),l=!0,g=r.end,f=e):(d=r.filename,l=!1,g=r.start,f=t+e)}else d=w(),l=!0,g=0,f=e;if(f<a)return void this.#h.updateSegment(t,d,g,i,f,l);const u=await this.#w.readRange(s,g,i);let m=this.#u.get(d);m||(m=new o(d,this.#o),this.#u.set(d,m)),await m.buildAndSave(u),this.#h.updateSegment(t,d,g,i,f,l)}async search(t,e){this.#m||await this.init();const s="string"==typeof t?{text:t}:t,i=this.#b(s),n=i.filter(t=>t.length>1),a=i.filter(t=>1===t.length),r=this.#h.getDeletedIds(),h=new Map,c=new Map,d=t=>{const e=this.#h.getSegments(t);for(const t of e){const e=t.filename;!this.#u.has(e)&&!c.has(e)&&c.set(e,new o(e,this.#o))}};d("word"),d("char"),await Promise.all(Array.from(c.entries()).map(([t,e])=>e.loadIndex().then(s=>{s&&this.#u.set(t,e)})));const g=async(t,e)=>{if(0===e.length)return;const s=this.#h.getSegments(t);for(const t of s){const s=t.filename,i=this.#u.get(s);if(i)for(const t of e){const e=i.search(t),s=1+.1*t.length;for(const i of e)if(!r.has(i))if(h.has(i)){const e=h.get(i);e.score+=s,e.tokens.add(t)}else h.set(i,{score:0,tokens:new Set([t])})}}};await g("word",n),await g("char",a);const l=[];return h.forEach((t,e)=>{l.push({id:e,score:t.score,tokens:Array.from(t.tokens)})}),l.sort((t,e)=>e.score-t.score),"number"==typeof e&&e>0?l.slice(0,e):l}async removeDocument(t){this.#m||await this.init(),this.#h.addDeletedId(t),this.#h.removeAddedId(t),await this.#h.save()}async clearAll(){await this.#o.clearAll(),this.#u.clear(),this.#h.reset(),this.#m=!1,this.#p=!1,this.#D={word:0,char:0}}async getStatus(){return this.#m||await this.init(),{wordSegments:this.#h.getSegments("word").length,charSegments:this.#h.getSegments("char").length,deleted:this.#h.getDeletedIds().size,wordCacheSize:await this.#w.getCurrentSize(h),charCacheSize:await this.#w.getCurrentSize(c),inBatch:this.#p}}async hasDocument(t){return this.#m||await this.init(),this.#h.hasDocument(t)}}exports.BrowserStorage=t,exports.NodeStorage=e,exports.SearchEngine=d,exports.SimpleSearch=class{static#T=null;static#v={baseDir:"simple_search_data",wordSegmentTokenThreshold:1e5,minWordTokenSave:0};static configure(t){const e={...this.#v,...t};this.#T=new d(e)}static#z(){return this.#T||(this.#T=new d(this.#v)),this.#T}static async startBatch(){this.#z().startBatch()}static async endBatch(){return this.#z().endBatch()}static async addDocument(t){return this.#z().addDocument(t)}static async addDocumentIfMissing(t){return this.#z().addDocumentIfMissing(t)}static async addDocuments(t){return this.#z().addDocuments(t)}static async addDocumentsIfMissing(t){return this.#z().addDocumentsIfMissing(t)}static async search(t,e){return this.#z().search(t,e)}static async removeDocument(t){return this.#z().removeDocument(t)}static async clearAll(){return this.#z().clearAll()}static async getStatus(){return this.#z().getStatus()}static async hasDocument(t){return this.#z().hasDocument(t)}};
package/lib/index.d.ts CHANGED
@@ -1,10 +1,12 @@
1
1
  /**
2
2
  * 核心类型定义
3
3
  */
4
- interface IDocument {
5
- id: number;
4
+ interface IDocumentBase {
6
5
  text: string;
7
6
  }
7
+ interface IDocument extends IDocumentBase {
8
+ id: number;
9
+ }
8
10
  interface IResult {
9
11
  id: number;
10
12
  score: number;
@@ -61,7 +63,7 @@ interface ISearchEngineConfig {
61
63
  * - 建议: 针对不同语言(中文/英文/日文等)使用专门的分词实现
62
64
  * - 影响: 直接决定索引的粒度和搜索的准确性
63
65
  */
64
- indexingTokenizer?: (text: string) => string[];
66
+ indexingTokenizer?: <T extends IDocument = IDocument>(doc: T) => string[];
65
67
  /**
66
68
  * 搜索时使用的分词器 (算法核心配置)
67
69
  * - 作用: 将查询文本转换为搜索用的token序列
@@ -69,7 +71,7 @@ interface ISearchEngineConfig {
69
71
  * - 建议: 与indexingTokenizer保持一致的分词策略以确保搜索准确性
70
72
  * - 影响: 直接决定搜索匹配的范围和结果的相关性
71
73
  */
72
- searchTokenizer?: (text: string) => string[];
74
+ searchTokenizer?: <T extends IDocumentBase = IDocumentBase>(doc: T) => string[];
73
75
  /**
74
76
  * 词索引分段阈值 (Token数) - 分段算法配置
75
77
  * - 作用: 控制词索引文件的大小,超过阈值时创建新的索引段
@@ -121,9 +123,19 @@ declare class SearchEngine {
121
123
  * 触发索引构建检查并保存元数据
122
124
  */
123
125
  endBatch(): Promise<void>;
124
- addDocument(doc: IDocument): Promise<void>;
125
- addDocuments(docs: IDocument[]): Promise<void>;
126
- search(query: string, limit?: number): Promise<IResult[]>;
126
+ addDocument<T extends IDocument = IDocument>(doc: T): Promise<void>;
127
+ /**
128
+ * 添加单个文档,如果文档ID已存在则跳过
129
+ * 用于在批量添加中途出错后的恢复添加行为,也可直接用于单个文档添加
130
+ */
131
+ addDocumentIfMissing<T extends IDocument = IDocument>(doc: T): Promise<void>;
132
+ /**
133
+ * 添加多个文档,跳过已存在的文档ID
134
+ * 用于在批量添加中途出错后的恢复添加行为,也可直接用于批量添加
135
+ */
136
+ addDocumentsIfMissing<T extends IDocument = IDocument>(docs: T[]): Promise<void>;
137
+ addDocuments<T extends IDocument = IDocument>(docs: T[]): Promise<void>;
138
+ search<T extends IDocumentBase = IDocumentBase>(query: T | string, limit?: number): Promise<IResult[]>;
127
139
  removeDocument(id: number): Promise<void>;
128
140
  clearAll(): Promise<void>;
129
141
  getStatus(): Promise<{
@@ -134,6 +146,12 @@ declare class SearchEngine {
134
146
  charCacheSize: number;
135
147
  inBatch: boolean;
136
148
  }>;
149
+ /**
150
+ * 检查文档ID是否曾经添加过(包括已删除的)
151
+ * @param id 文档ID
152
+ * @returns 文档是否曾经添加过的布尔值
153
+ */
154
+ hasDocument(id: number): Promise<boolean>;
137
155
  }
138
156
 
139
157
  /**
@@ -148,9 +166,11 @@ declare class SimpleSearch {
148
166
  static configure(config: Partial<ISearchEngineConfig>): void;
149
167
  static startBatch(): Promise<void>;
150
168
  static endBatch(): Promise<void>;
151
- static addDocument(doc: IDocument): Promise<void>;
152
- static addDocuments(docs: IDocument[]): Promise<void>;
153
- static search(query: string, limit?: number): Promise<IResult[]>;
169
+ static addDocument<T extends IDocument = IDocument>(doc: T): Promise<void>;
170
+ static addDocumentIfMissing<T extends IDocument = IDocument>(doc: T): Promise<void>;
171
+ static addDocuments<T extends IDocument = IDocument>(docs: T[]): Promise<void>;
172
+ static addDocumentsIfMissing<T extends IDocument = IDocument>(docs: T[]): Promise<void>;
173
+ static search<T extends IDocumentBase = IDocumentBase>(query: T | string, limit?: number): Promise<IResult[]>;
154
174
  static removeDocument(id: number): Promise<void>;
155
175
  static clearAll(): Promise<void>;
156
176
  static getStatus(): Promise<{
@@ -161,6 +181,12 @@ declare class SimpleSearch {
161
181
  charCacheSize: number;
162
182
  inBatch: boolean;
163
183
  }>;
184
+ /**
185
+ * 检查文档ID是否曾经添加过(包括已删除的)
186
+ * @param id 文档ID
187
+ * @returns 文档是否曾经添加过的布尔值
188
+ */
189
+ static hasDocument(id: number): Promise<boolean>;
164
190
  }
165
191
 
166
192
  /**
@@ -196,4 +222,4 @@ declare class NodeStorage implements IStorage {
196
222
  }
197
223
 
198
224
  export { BrowserStorage, NodeStorage, SearchEngine, SimpleSearch };
199
- export type { IDocument, IIndexMeta, IResult, ISearchEngineConfig, ISegmentMeta, IStorage, ITokenizedDoc, IndexType };
225
+ export type { IDocument, IDocumentBase, IIndexMeta, IResult, ISearchEngineConfig, ISegmentMeta, IStorage, ITokenizedDoc, IndexType };
package/lib/index.js CHANGED
@@ -1 +1 @@
1
- class t{#t;constructor(t){this.#t=t}async#e(){return await(await navigator.storage.getDirectory()).getDirectoryHandle(this.#t,{create:!0})}async write(t,e){const s=await(await(await this.#e()).getFileHandle(t,{create:!0})).createWritable();await s.write(e),await s.close()}async append(t,e){const s=await this.#e();let i;try{i=await s.getFileHandle(t,{create:!0})}catch{i=await s.getFileHandle(t,{create:!0})}const n=await i.getFile(),a=await i.createWritable({keepExistingData:!0});await a.seek(n.size),await a.write(e),await a.close()}async read(t){const e=await this.#e();try{return await(await(await e.getFileHandle(t)).getFile()).arrayBuffer()}catch{return null}}async readRange(t,e,s){const i=await this.#e();try{return await(await(await i.getFileHandle(t)).getFile()).slice(e,s).arrayBuffer()}catch{return null}}async remove(t){const e=await this.#e();try{await e.removeEntry(t)}catch{}}async listFiles(){const t=await this.#e(),e=[];for await(const s of t.keys())e.push(s);return e}async clearAll(){const t=await this.#e();for await(const e of t.keys())await t.removeEntry(e,{recursive:!0})}async getFileSize(t){const e=await this.#e();try{return(await(await e.getFileHandle(t)).getFile()).size}catch{return 0}}}class e{#s=null;#i=null;#t;#n="";constructor(t){this.#t=t}async#a(){if(this.#s)return;const t=await import("node:fs"),e=await import("node:path");this.#s=t.promises,this.#i=e.default||e,this.#n=this.#i.join(process.cwd(),this.#t);try{await this.#s.access(this.#n)}catch{await this.#s.mkdir(this.#n,{recursive:!0})}}#r(t){return this.#i.join(this.#n,t)}async write(t,e){await this.#a(),await this.#s.writeFile(this.#r(t),Buffer.from(e))}async append(t,e){await this.#a(),await this.#s.appendFile(this.#r(t),Buffer.from(e))}async read(t){await this.#a();try{const e=await this.#s.readFile(this.#r(t));return e.buffer.slice(e.byteOffset,e.byteOffset+e.byteLength)}catch{return null}}async readRange(t,e,s){await this.#a();try{const i=await this.#s.open(this.#r(t),"r"),n=s-e,a=Buffer.alloc(n);return await i.read(a,0,n,e),await i.close(),a.buffer.slice(a.byteOffset,a.byteOffset+a.byteLength)}catch{return null}}async remove(t){await this.#a();try{await this.#s.unlink(this.#r(t))}catch{}}async listFiles(){await this.#a();try{return await this.#s.readdir(this.#n)}catch{return[]}}async clearAll(){await this.#a();try{const t=await this.#s.readdir(this.#n);for(const e of t)await this.#s.unlink(this.#i.join(this.#n,e))}catch{}}async getFileSize(t){await this.#a();try{return(await this.#s.stat(this.#r(t))).size}catch{return 0}}}const s="search_meta.json",i="deleted_ids.bin",n="added_ids.bin";class a{#o;#h={wordSegments:[],charSegments:[]};#c=new Set;#d=new Set;constructor(t){this.#o=t}async load(){const t=await this.#o.read(s);if(t){const e=(new TextDecoder).decode(t);this.#h=JSON.parse(e)}else this.#h={wordSegments:[],charSegments:[]};const e=await this.#o.read(i);if(e){const t=new DataView(e);let s=0;const i=e.byteLength;for(;s<i&&!(s+4>i);){const e=t.getUint32(s,!0);this.#c.add(e),s+=4,s<i&&30===t.getUint8(s)&&(s+=1)}}const a=await this.#o.read(n);if(a){const t=new DataView(a);let e=0;const s=a.byteLength;for(;e<s&&!(e+4>s);){const i=t.getUint32(e,!0);this.#d.add(i),e+=4,e<s&&30===t.getUint8(e)&&(e+=1)}}}async save(){const t=JSON.stringify(this.#h);if(await this.#o.write(s,(new TextEncoder).encode(t).buffer),0===this.#c.size)await this.#o.remove(i);else{const t=4*this.#c.size+this.#c.size,e=new ArrayBuffer(t),s=new DataView(e);let n=0;for(const t of this.#c)s.setUint32(n,t,!0),n+=4,s.setUint8(n,30),n+=1;await this.#o.write(i,e)}if(0===this.#d.size)await this.#o.remove(n);else{const t=4*this.#d.size+this.#d.size,e=new ArrayBuffer(t),s=new DataView(e);let i=0;for(const t of this.#d)s.setUint32(i,t,!0),i+=4,s.setUint8(i,30),i+=1;await this.#o.write(n,e)}}getSegments(t){return"word"===t?this.#h.wordSegments:this.#h.charSegments}getDeletedIds(){return this.#c}addDeletedId(t){this.#c.add(t)}isDeleted(t){return this.#c.has(t)}addAddedId(t){this.#d.add(t)}removeAddedId(t){this.#d.delete(t)}isAdded(t){return this.#d.has(t)}getAddedIds(){return this.#d}getLastSegmentInfo(t){const e=this.getSegments(t);return 0===e.length?null:e[e.length-1]}updateSegment(t,e,s,i,n,a){const r="word"===t?this.#h.wordSegments:this.#h.charSegments;if(a)r.push({filename:e,start:s,end:i,tokenCount:n});else{const t=r[r.length-1];t&&t.filename===e&&(t.end=i,t.tokenCount=n)}}reset(){this.#h={wordSegments:[],charSegments:[]},this.#c.clear(),this.#d.clear()}}class r{static SEPARATOR=30;#o;constructor(t){this.#o=t}async appendBatch(t,e){if(0===e.length)return await this.#o.getFileSize(t);const s=new TextEncoder;let i=0;for(const t of e){i+=8;for(const e of t.tokens){i+=2+Math.min(s.encode(e).byteLength,65535)}i+=1}const n=new Uint8Array(i);let a=0;for(const t of e){const e=[];for(const i of t.tokens){const t=s.encode(i),n=t.byteLength>65535?t.slice(0,65535):t;e.push(n)}const i=new DataView(n.buffer,a);i.setUint32(0,t.id,!0),i.setUint32(4,e.length,!0),a+=8;for(const t of e)new DataView(n.buffer,a).setUint16(0,t.byteLength,!0),a+=2,n.set(t,a),a+=t.byteLength;n[a++]=r.SEPARATOR}return await this.#o.append(t,n.buffer),await this.#o.getFileSize(t)}async readRange(t,e,s){const i=await this.#o.readRange(t,e,s);if(!i||0===i.byteLength)return[];const n=new DataView(i),a=new Uint8Array(i),o=new TextDecoder,h=[];let c=0;const d=i.byteLength;for(;c<d&&!(c+8>d);){const t=n.getUint32(c,!0);c+=4;const e=n.getUint32(c,!0);c+=4;const s=[];for(let t=0;t<e&&!(c+2>d);t++){const t=n.getUint16(c,!0);if(c+=2,c+t>d)break;const e=new Uint8Array(i,c,t);s.push(o.decode(e)),c+=t}c<d&&a[c]===r.SEPARATOR&&(c+=1),h.push({id:t,tokens:s})}return h}async getCurrentSize(t){return await this.#o.getFileSize(t)}}class o{#g;#o;#l=null;#f=null;static hash(t){let e=5381;for(let s=0;s<t.length;s++)e=(e<<5)+e^t.charCodeAt(s);return e>>>0}constructor(t,e){this.#g=t,this.#o=e}async loadIndex(){return!!this.#l||(this.#l=await this.#o.read(this.#g),!!this.#l&&(this.#f=new DataView(this.#l),!0))}async buildAndSave(t){const e=new Map;for(const s of t){const t=new Map;for(const i of s.tokens)if(!t.has(i)){t.set(i,!0);const n=o.hash(i);e.has(n)||e.set(n,[]),e.get(n).push(s.id)}}const s=Array.from(e.keys()).sort((t,e)=>t-e);let i=0;const n=new Array(s.length);for(let t=0;t<s.length;t++){const a=s[t],r=e.get(a);n[t]=r,i+=r.length}const a=12*s.length,r=new ArrayBuffer(8+a+4*i),h=new DataView(r);h.setUint32(0,1229866072),h.setUint32(4,s.length);let c=8,d=8+a;for(let t=0;t<s.length;t++){const e=s[t],i=n[t];h.setUint32(c,e),h.setUint32(c+4,d),h.setUint32(c+8,i.length),c+=12;for(let t=0;t<i.length;t++)h.setUint32(d,i[t],!0),d+=4}await this.#o.write(this.#g,r),this.#l=r,this.#f=h}search(t){if(!this.#f||!this.#l)return[];const e=o.hash(t);let s=0,i=this.#f.getUint32(4)-1;for(;s<=i;){const t=s+i>>>1,n=8+12*t,a=this.#f.getUint32(n);if(a<e)s=t+1;else{if(!(a>e)){const t=this.#f.getUint32(n+4),e=this.#f.getUint32(n+8),s=[];for(let i=0;i<e;i++)s.push(this.#f.getUint32(t+4*i,!0));return s}i=t-1}}return[]}}const h="word_cache.bin",c="char_cache.bin";class d{#o;#h;#w;#u;#m=!1;#y;#p=!1;#S={word:0,char:0};constructor(s){if(!s.baseDir)throw new Error("SearchEngine requires 'baseDir' in config.");if(this.#y={wordSegmentTokenThreshold:1e5,charSegmentTokenThreshold:5e5,minWordTokenSave:0,minCharTokenSave:0,...s},(this.#y.minWordTokenSave||0)>=(this.#y.wordSegmentTokenThreshold||1e5))throw new Error("minWordTokenSave must be less than wordSegmentTokenThreshold");if((this.#y.minCharTokenSave||0)>=(this.#y.charSegmentTokenThreshold||5e5))throw new Error("minCharTokenSave must be less than charSegmentTokenThreshold");let i=null;if(this.#y.storage&&("object"==typeof this.#y.storage?i=this.#y.storage:"browser"===this.#y.storage?i=new t(this.#y.baseDir):"node"===this.#y.storage&&(i=new e(this.#y.baseDir))),!i){const s=typeof navigator<"u"&&navigator?.storage?.getDirectory instanceof Function,n=typeof process<"u"&&null!=process.versions&&null!=process.versions.node;s?i=new t(this.#y.baseDir):n&&(i=new e(this.#y.baseDir))}if(!i)throw new Error('Storage initialization failed. Please configure "storage" explicitly or ensure you are in a supported environment (Browser/Node).');this.#o=i,this.#h=new a(this.#o),this.#w=new r(this.#o),this.#u=new Map}async init(){if(this.#m)return;await this.#h.load();const t=[...this.#h.getSegments("word"),...this.#h.getSegments("char")];for(const e of t)this.#u.has(e.filename)||this.#u.set(e.filename,new o(e.filename,this.#o)),await this.#u.get(e.filename).loadIndex();this.#m=!0}startBatch(){this.#p=!0,this.#S={word:0,char:0}}async endBatch(){this.#p=!1,this.#S.word>0&&await this.#D("word",this.#S.word),this.#S.char>0&&await this.#D("char",this.#S.char),this.#S={word:0,char:0},await this.#h.save()}#k(t){if(typeof Intl<"u"&&Intl.Segmenter){const e=new Intl.Segmenter([],{granularity:"word"});return Array.from(e.segment(t)).filter(t=>t.isWordLike).map(t=>t.segment.toLowerCase())}return t.toLowerCase().split(/[^a-z0-9\u4e00-\u9fa5]+/g).filter(t=>t.length>0)}#b(t){return this.#y.indexingTokenizer?this.#y.indexingTokenizer(t):this.#k(t)}#T(t){return this.#y.searchTokenizer?this.#y.searchTokenizer(t):this.#y.indexingTokenizer?this.#y.indexingTokenizer(t):this.#k(t)}async addDocument(t){return this.addDocuments([t])}async addDocuments(t){if(this.#m||await this.init(),0===t.length)return;const e=this.#h.getDeletedIds(),s=[],i=[];for(const n of t){if(e.has(n.id))throw new Error(`Document ID ${n.id} has been deleted and cannot be re-added.`);if(this.#h.isAdded(n.id))throw new Error(`Document ID ${n.id} already exists.`);const t=this.#b(n.text),a=[],r=[];for(const e of t)e.length>1?a.push(e):1===e.length&&r.push(e);a.length>0&&s.push({id:n.id,tokens:a}),r.length>0&&i.push({id:n.id,tokens:r})}let n=0,a=0;if(s.length>0){await this.#w.appendBatch(h,s);for(const t of s)n+=t.tokens.length}if(i.length>0){await this.#w.appendBatch(c,i);for(const t of i)a+=t.tokens.length}for(const e of t)this.#h.addAddedId(e.id);this.#p?(this.#S.word+=n,this.#S.char+=a):(n>0&&await this.#D("word",n),a>0&&await this.#D("char",a),await this.#h.save())}async#D(t,e){const s="word"===t?h:c,i=await this.#w.getCurrentSize(s),n="word"===t?this.#y.wordSegmentTokenThreshold||1e5:this.#y.charSegmentTokenThreshold||5e5,a="word"===t?this.#y.minWordTokenSave||0:this.#y.minCharTokenSave||0,r=this.#h.getLastSegmentInfo(t);let d,g,l,f;const w=()=>{const e=this.#h.getSegments(t).length+1;return`${t}_seg_${e}.bin`};if(r){const t=r.tokenCount;t>=n||t+e>=n?(d=w(),l=!0,g=r.end,f=e):(d=r.filename,l=!1,g=r.start,f=t+e)}else d=w(),l=!0,g=0,f=e;if(f<a)return void this.#h.updateSegment(t,d,g,i,f,l);const u=await this.#w.readRange(s,g,i);let m=this.#u.get(d);m||(m=new o(d,this.#o),this.#u.set(d,m)),await m.buildAndSave(u),this.#h.updateSegment(t,d,g,i,f,l)}async search(t,e){this.#m||await this.init();const s=this.#T(t),i=s.filter(t=>t.length>1),n=s.filter(t=>1===t.length),a=this.#h.getDeletedIds(),r=new Map,h=new Map,c=t=>{const e=this.#h.getSegments(t);for(const t of e){const e=t.filename;!this.#u.has(e)&&!h.has(e)&&h.set(e,new o(e,this.#o))}};c("word"),c("char"),await Promise.all(Array.from(h.entries()).map(([t,e])=>e.loadIndex().then(s=>{s&&this.#u.set(t,e)})));const d=async(t,e)=>{if(0===e.length)return;const s=this.#h.getSegments(t);for(const t of s){const s=t.filename,i=this.#u.get(s);if(i)for(const t of e){const e=i.search(t),s=1+.1*t.length;for(const i of e)if(!a.has(i))if(r.has(i)){const e=r.get(i);e.score+=s,e.tokens.add(t)}else r.set(i,{score:0,tokens:new Set([t])})}}};await d("word",i),await d("char",n);const g=[];return r.forEach((t,e)=>{g.push({id:e,score:t.score,tokens:Array.from(t.tokens)})}),g.sort((t,e)=>e.score-t.score),"number"==typeof e&&e>0?g.slice(0,e):g}async removeDocument(t){this.#m||await this.init(),this.#h.addDeletedId(t),this.#h.removeAddedId(t),await this.#h.save()}async clearAll(){await this.#o.clearAll(),this.#u.clear(),this.#h.reset(),this.#m=!1,this.#p=!1,this.#S={word:0,char:0}}async getStatus(){return this.#m||await this.init(),{wordSegments:this.#h.getSegments("word").length,charSegments:this.#h.getSegments("char").length,deleted:this.#h.getDeletedIds().size,wordCacheSize:await this.#w.getCurrentSize(h),charCacheSize:await this.#w.getCurrentSize(c),inBatch:this.#p}}}class g{static#I=null;static#v={baseDir:"simple_search_data",wordSegmentTokenThreshold:1e5,minWordTokenSave:0};static configure(t){const e={...this.#v,...t};this.#I=new d(e)}static#z(){return this.#I||(this.#I=new d(this.#v)),this.#I}static async startBatch(){this.#z().startBatch()}static async endBatch(){return this.#z().endBatch()}static async addDocument(t){return this.#z().addDocument(t)}static async addDocuments(t){return this.#z().addDocuments(t)}static async search(t,e){return this.#z().search(t,e)}static async removeDocument(t){return this.#z().removeDocument(t)}static async clearAll(){return this.#z().clearAll()}static async getStatus(){return this.#z().getStatus()}}export{t as BrowserStorage,e as NodeStorage,d as SearchEngine,g as SimpleSearch};
1
+ class t{#t;constructor(t){this.#t=t}async#e(){return await(await navigator.storage.getDirectory()).getDirectoryHandle(this.#t,{create:!0})}async write(t,e){const s=await(await(await this.#e()).getFileHandle(t,{create:!0})).createWritable();await s.write(e),await s.close()}async append(t,e){const s=await this.#e();let i;try{i=await s.getFileHandle(t,{create:!0})}catch{i=await s.getFileHandle(t,{create:!0})}const n=await i.getFile(),a=await i.createWritable({keepExistingData:!0});await a.seek(n.size),await a.write(e),await a.close()}async read(t){const e=await this.#e();try{return await(await(await e.getFileHandle(t)).getFile()).arrayBuffer()}catch{return null}}async readRange(t,e,s){const i=await this.#e();try{return await(await(await i.getFileHandle(t)).getFile()).slice(e,s).arrayBuffer()}catch{return null}}async remove(t){const e=await this.#e();try{await e.removeEntry(t)}catch{}}async listFiles(){const t=await this.#e(),e=[];for await(const s of t.keys())e.push(s);return e}async clearAll(){const t=await this.#e();for await(const e of t.keys())await t.removeEntry(e,{recursive:!0})}async getFileSize(t){const e=await this.#e();try{return(await(await e.getFileHandle(t)).getFile()).size}catch{return 0}}}class e{#s=null;#i=null;#t;#n="";constructor(t){this.#t=t}async#a(){if(this.#s)return;const t=await import("node:fs"),e=await import("node:path");this.#s=t.promises,this.#i=e.default||e,this.#n=this.#i.join(process.cwd(),this.#t);try{await this.#s.access(this.#n)}catch{await this.#s.mkdir(this.#n,{recursive:!0})}}#r(t){return this.#i.join(this.#n,t)}async write(t,e){await this.#a(),await this.#s.writeFile(this.#r(t),Buffer.from(e))}async append(t,e){await this.#a(),await this.#s.appendFile(this.#r(t),Buffer.from(e))}async read(t){await this.#a();try{const e=await this.#s.readFile(this.#r(t));return e.buffer.slice(e.byteOffset,e.byteOffset+e.byteLength)}catch{return null}}async readRange(t,e,s){await this.#a();try{const i=await this.#s.open(this.#r(t),"r"),n=s-e,a=Buffer.alloc(n);return await i.read(a,0,n,e),await i.close(),a.buffer.slice(a.byteOffset,a.byteOffset+a.byteLength)}catch{return null}}async remove(t){await this.#a();try{await this.#s.unlink(this.#r(t))}catch{}}async listFiles(){await this.#a();try{return await this.#s.readdir(this.#n)}catch{return[]}}async clearAll(){await this.#a();try{const t=await this.#s.readdir(this.#n);for(const e of t)await this.#s.unlink(this.#i.join(this.#n,e))}catch{}}async getFileSize(t){await this.#a();try{return(await this.#s.stat(this.#r(t))).size}catch{return 0}}}const s="search_meta.json",i="deleted_ids.bin",n="added_ids.bin";class a{#o;#h={wordSegments:[],charSegments:[]};#c=new Set;#d=new Set;constructor(t){this.#o=t}async load(){const t=await this.#o.read(s);if(t){const e=(new TextDecoder).decode(t);this.#h=JSON.parse(e)}else this.#h={wordSegments:[],charSegments:[]};const e=await this.#o.read(i);if(e){const t=new DataView(e);let s=0;const i=e.byteLength;for(;s<i&&!(s+4>i);){const e=t.getUint32(s,!0);this.#c.add(e),s+=4,s<i&&30===t.getUint8(s)&&(s+=1)}}const a=await this.#o.read(n);if(a){const t=new DataView(a);let e=0;const s=a.byteLength;for(;e<s&&!(e+4>s);){const i=t.getUint32(e,!0);this.#d.add(i),e+=4,e<s&&30===t.getUint8(e)&&(e+=1)}}}async save(){const t=JSON.stringify(this.#h);if(await this.#o.write(s,(new TextEncoder).encode(t).buffer),0===this.#c.size)await this.#o.remove(i);else{const t=4*this.#c.size+this.#c.size,e=new ArrayBuffer(t),s=new DataView(e);let n=0;for(const t of this.#c)s.setUint32(n,t,!0),n+=4,s.setUint8(n,30),n+=1;await this.#o.write(i,e)}if(0===this.#d.size)await this.#o.remove(n);else{const t=4*this.#d.size+this.#d.size,e=new ArrayBuffer(t),s=new DataView(e);let i=0;for(const t of this.#d)s.setUint32(i,t,!0),i+=4,s.setUint8(i,30),i+=1;await this.#o.write(n,e)}}getSegments(t){return"word"===t?this.#h.wordSegments:this.#h.charSegments}getDeletedIds(){return this.#c}addDeletedId(t){this.#c.add(t)}isDeleted(t){return this.#c.has(t)}addAddedId(t){this.#d.add(t)}removeAddedId(t){this.#d.delete(t)}isAdded(t){return this.#d.has(t)}getAddedIds(){return this.#d}hasDocument(t){return this.#d.has(t)||this.#c.has(t)}getLastSegmentInfo(t){const e=this.getSegments(t);return 0===e.length?null:e[e.length-1]}updateSegment(t,e,s,i,n,a){const r="word"===t?this.#h.wordSegments:this.#h.charSegments;if(a)r.push({filename:e,start:s,end:i,tokenCount:n});else{const t=r[r.length-1];t&&t.filename===e&&(t.end=i,t.tokenCount=n)}}reset(){this.#h={wordSegments:[],charSegments:[]},this.#c.clear(),this.#d.clear()}}class r{static SEPARATOR=30;#o;constructor(t){this.#o=t}async appendBatch(t,e){if(0===e.length)return await this.#o.getFileSize(t);const s=new TextEncoder;let i=0;for(const t of e){i+=8;for(const e of t.tokens){i+=2+Math.min(s.encode(e).byteLength,65535)}i+=1}const n=new Uint8Array(i);let a=0;for(const t of e){const e=[];for(const i of t.tokens){const t=s.encode(i),n=t.byteLength>65535?t.slice(0,65535):t;e.push(n)}const i=new DataView(n.buffer,a);i.setUint32(0,t.id,!0),i.setUint32(4,e.length,!0),a+=8;for(const t of e)new DataView(n.buffer,a).setUint16(0,t.byteLength,!0),a+=2,n.set(t,a),a+=t.byteLength;n[a++]=r.SEPARATOR}return await this.#o.append(t,n.buffer),await this.#o.getFileSize(t)}async readRange(t,e,s){const i=await this.#o.readRange(t,e,s);if(!i||0===i.byteLength)return[];const n=new DataView(i),a=new Uint8Array(i),o=new TextDecoder,h=[];let c=0;const d=i.byteLength;for(;c<d&&!(c+8>d);){const t=n.getUint32(c,!0);c+=4;const e=n.getUint32(c,!0);c+=4;const s=[];for(let t=0;t<e&&!(c+2>d);t++){const t=n.getUint16(c,!0);if(c+=2,c+t>d)break;const e=new Uint8Array(i,c,t);s.push(o.decode(e)),c+=t}c<d&&a[c]===r.SEPARATOR&&(c+=1),h.push({id:t,tokens:s})}return h}async getCurrentSize(t){return await this.#o.getFileSize(t)}}class o{#g;#o;#l=null;#f=null;static hash(t){let e=5381;for(let s=0;s<t.length;s++)e=(e<<5)+e^t.charCodeAt(s);return e>>>0}constructor(t,e){this.#g=t,this.#o=e}async loadIndex(){return!!this.#l||(this.#l=await this.#o.read(this.#g),!!this.#l&&(this.#f=new DataView(this.#l),!0))}async buildAndSave(t){const e=new Map;for(const s of t){const t=new Map;for(const i of s.tokens)if(!t.has(i)){t.set(i,!0);const n=o.hash(i);e.has(n)||e.set(n,[]),e.get(n).push(s.id)}}const s=Array.from(e.keys()).sort((t,e)=>t-e);let i=0;const n=new Array(s.length);for(let t=0;t<s.length;t++){const a=s[t],r=e.get(a);n[t]=r,i+=r.length}const a=12*s.length,r=new ArrayBuffer(8+a+4*i),h=new DataView(r);h.setUint32(0,1229866072),h.setUint32(4,s.length);let c=8,d=8+a;for(let t=0;t<s.length;t++){const e=s[t],i=n[t];h.setUint32(c,e),h.setUint32(c+4,d),h.setUint32(c+8,i.length),c+=12;for(let t=0;t<i.length;t++)h.setUint32(d,i[t],!0),d+=4}await this.#o.write(this.#g,r),this.#l=r,this.#f=h}search(t){if(!this.#f||!this.#l)return[];const e=o.hash(t);let s=0,i=this.#f.getUint32(4)-1;for(;s<=i;){const t=s+i>>>1,n=8+12*t,a=this.#f.getUint32(n);if(a<e)s=t+1;else{if(!(a>e)){const t=this.#f.getUint32(n+4),e=this.#f.getUint32(n+8),s=[];for(let i=0;i<e;i++)s.push(this.#f.getUint32(t+4*i,!0));return s}i=t-1}}return[]}}const h="word_cache.bin",c="char_cache.bin";class d{#o;#h;#w;#u;#m=!1;#y;#p=!1;#D={word:0,char:0};constructor(s){if(!s.baseDir)throw new Error("SearchEngine requires 'baseDir' in config.");if(this.#y={wordSegmentTokenThreshold:1e5,charSegmentTokenThreshold:5e5,minWordTokenSave:0,minCharTokenSave:0,...s},(this.#y.minWordTokenSave||0)>=(this.#y.wordSegmentTokenThreshold||1e5))throw new Error("minWordTokenSave must be less than wordSegmentTokenThreshold");if((this.#y.minCharTokenSave||0)>=(this.#y.charSegmentTokenThreshold||5e5))throw new Error("minCharTokenSave must be less than charSegmentTokenThreshold");let i=null;if(this.#y.storage&&("object"==typeof this.#y.storage?i=this.#y.storage:"browser"===this.#y.storage?i=new t(this.#y.baseDir):"node"===this.#y.storage&&(i=new e(this.#y.baseDir))),!i){const s=typeof navigator<"u"&&navigator?.storage?.getDirectory instanceof Function,n=typeof process<"u"&&null!=process.versions&&null!=process.versions.node;s?i=new t(this.#y.baseDir):n&&(i=new e(this.#y.baseDir))}if(!i)throw new Error('Storage initialization failed. Please configure "storage" explicitly or ensure you are in a supported environment (Browser/Node).');this.#o=i,this.#h=new a(this.#o),this.#w=new r(this.#o),this.#u=new Map}async init(){if(this.#m)return;await this.#h.load();const t=[...this.#h.getSegments("word"),...this.#h.getSegments("char")];for(const e of t)this.#u.has(e.filename)||this.#u.set(e.filename,new o(e.filename,this.#o)),await this.#u.get(e.filename).loadIndex();this.#m=!0}startBatch(){this.#p=!0,this.#D={word:0,char:0}}async endBatch(){this.#p=!1,this.#D.word>0&&await this.#S("word",this.#D.word),this.#D.char>0&&await this.#S("char",this.#D.char),this.#D={word:0,char:0},await this.#h.save()}#k(t){try{if(typeof Intl<"u"&&"function"==typeof Intl.Segmenter&&"function"==typeof Array.from){const e=new Intl.Segmenter([],{granularity:"word"}).segment(t);if("object"==typeof e&&null!==e)return Array.from(e).filter(t=>t?.isWordLike).map(t=>t?.segment?.toLowerCase()||"")}}catch{}return t.toLowerCase().split(/[^a-z0-9\u4e00-\u9fa5]+/g).filter(t=>t.length>0)}#I(t){return this.#y.indexingTokenizer?this.#y.indexingTokenizer(t):this.#k(t.text)}#b(t){return this.#y.searchTokenizer?this.#y.searchTokenizer(t):this.#I(t)}async addDocument(t){return this.addDocuments([t])}async addDocumentIfMissing(t){return this.addDocumentsIfMissing([t])}async addDocumentsIfMissing(t){if(this.#m||await this.init(),0===t.length)return;const e=this.#h.getDeletedIds(),s=[],i=[],n=[];for(const a of t){if(e.has(a.id)||this.#h.isAdded(a.id))continue;const t=this.#I(a),r=[],o=[];for(const e of t)e.length>1?r.push(e):1===e.length&&o.push(e);r.length>0&&s.push({id:a.id,tokens:r}),o.length>0&&i.push({id:a.id,tokens:o}),n.push(a)}if(0===n.length)return;let a=0,r=0;if(s.length>0){await this.#w.appendBatch(h,s);for(const t of s)a+=t.tokens.length}if(i.length>0){await this.#w.appendBatch(c,i);for(const t of i)r+=t.tokens.length}for(const t of n)this.#h.addAddedId(t.id);this.#p?(this.#D.word+=a,this.#D.char+=r):(a>0&&await this.#S("word",a),r>0&&await this.#S("char",r),await this.#h.save())}async addDocuments(t){if(this.#m||await this.init(),0===t.length)return;const e=this.#h.getDeletedIds(),s=[],i=[];for(const n of t){if(e.has(n.id))throw new Error(`Document ID ${n.id} has been deleted and cannot be re-added.`);if(this.#h.isAdded(n.id))throw new Error(`Document ID ${n.id} already exists.`);const t=this.#I(n),a=[],r=[];for(const e of t)e.length>1?a.push(e):1===e.length&&r.push(e);a.length>0&&s.push({id:n.id,tokens:a}),r.length>0&&i.push({id:n.id,tokens:r})}let n=0,a=0;if(s.length>0){await this.#w.appendBatch(h,s);for(const t of s)n+=t.tokens.length}if(i.length>0){await this.#w.appendBatch(c,i);for(const t of i)a+=t.tokens.length}for(const e of t)this.#h.addAddedId(e.id);this.#p?(this.#D.word+=n,this.#D.char+=a):(n>0&&await this.#S("word",n),a>0&&await this.#S("char",a),await this.#h.save())}async#S(t,e){const s="word"===t?h:c,i=await this.#w.getCurrentSize(s),n="word"===t?this.#y.wordSegmentTokenThreshold||1e5:this.#y.charSegmentTokenThreshold||5e5,a="word"===t?this.#y.minWordTokenSave||0:this.#y.minCharTokenSave||0,r=this.#h.getLastSegmentInfo(t);let d,g,l,f;const w=()=>{const e=this.#h.getSegments(t).length+1;return`${t}_seg_${e}.bin`};if(r){const t=r.tokenCount;t>=n||t+e>=n?(d=w(),l=!0,g=r.end,f=e):(d=r.filename,l=!1,g=r.start,f=t+e)}else d=w(),l=!0,g=0,f=e;if(f<a)return void this.#h.updateSegment(t,d,g,i,f,l);const u=await this.#w.readRange(s,g,i);let m=this.#u.get(d);m||(m=new o(d,this.#o),this.#u.set(d,m)),await m.buildAndSave(u),this.#h.updateSegment(t,d,g,i,f,l)}async search(t,e){this.#m||await this.init();const s="string"==typeof t?{text:t}:t,i=this.#b(s),n=i.filter(t=>t.length>1),a=i.filter(t=>1===t.length),r=this.#h.getDeletedIds(),h=new Map,c=new Map,d=t=>{const e=this.#h.getSegments(t);for(const t of e){const e=t.filename;!this.#u.has(e)&&!c.has(e)&&c.set(e,new o(e,this.#o))}};d("word"),d("char"),await Promise.all(Array.from(c.entries()).map(([t,e])=>e.loadIndex().then(s=>{s&&this.#u.set(t,e)})));const g=async(t,e)=>{if(0===e.length)return;const s=this.#h.getSegments(t);for(const t of s){const s=t.filename,i=this.#u.get(s);if(i)for(const t of e){const e=i.search(t),s=1+.1*t.length;for(const i of e)if(!r.has(i))if(h.has(i)){const e=h.get(i);e.score+=s,e.tokens.add(t)}else h.set(i,{score:0,tokens:new Set([t])})}}};await g("word",n),await g("char",a);const l=[];return h.forEach((t,e)=>{l.push({id:e,score:t.score,tokens:Array.from(t.tokens)})}),l.sort((t,e)=>e.score-t.score),"number"==typeof e&&e>0?l.slice(0,e):l}async removeDocument(t){this.#m||await this.init(),this.#h.addDeletedId(t),this.#h.removeAddedId(t),await this.#h.save()}async clearAll(){await this.#o.clearAll(),this.#u.clear(),this.#h.reset(),this.#m=!1,this.#p=!1,this.#D={word:0,char:0}}async getStatus(){return this.#m||await this.init(),{wordSegments:this.#h.getSegments("word").length,charSegments:this.#h.getSegments("char").length,deleted:this.#h.getDeletedIds().size,wordCacheSize:await this.#w.getCurrentSize(h),charCacheSize:await this.#w.getCurrentSize(c),inBatch:this.#p}}async hasDocument(t){return this.#m||await this.init(),this.#h.hasDocument(t)}}class g{static#T=null;static#v={baseDir:"simple_search_data",wordSegmentTokenThreshold:1e5,minWordTokenSave:0};static configure(t){const e={...this.#v,...t};this.#T=new d(e)}static#z(){return this.#T||(this.#T=new d(this.#v)),this.#T}static async startBatch(){this.#z().startBatch()}static async endBatch(){return this.#z().endBatch()}static async addDocument(t){return this.#z().addDocument(t)}static async addDocumentIfMissing(t){return this.#z().addDocumentIfMissing(t)}static async addDocuments(t){return this.#z().addDocuments(t)}static async addDocumentsIfMissing(t){return this.#z().addDocumentsIfMissing(t)}static async search(t,e){return this.#z().search(t,e)}static async removeDocument(t){return this.#z().removeDocument(t)}static async clearAll(){return this.#z().clearAll()}static async getStatus(){return this.#z().getStatus()}static async hasDocument(t){return this.#z().hasDocument(t)}}export{t as BrowserStorage,e as NodeStorage,d as SearchEngine,g as SimpleSearch};
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "gs-search",
3
- "version": "0.1.1",
3
+ "version": "0.1.2",
4
4
  "type": "module",
5
5
  "main": "lib/index.cjs",
6
6
  "module": "lib/index.js",