gs-search 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.ja.md +53 -14
- package/README.ko.md +51 -12
- package/README.md +46 -3
- package/README.zh-CN.md +74 -16
- package/lib/index.cjs +1 -1
- package/lib/index.d.ts +37 -11
- package/lib/index.js +1 -1
- package/package.json +1 -1
package/README.ja.md
CHANGED
|
@@ -72,18 +72,53 @@ const engine = new SearchEngine({
|
|
|
72
72
|
// エンジンを初期化
|
|
73
73
|
await engine.init();
|
|
74
74
|
|
|
75
|
-
//
|
|
76
|
-
await engine.
|
|
75
|
+
// バッチ操作でドキュメントを追加
|
|
76
|
+
await engine.startBatch();
|
|
77
77
|
try {
|
|
78
78
|
await engine.addDocuments([
|
|
79
79
|
// ... ドキュメント
|
|
80
80
|
]);
|
|
81
|
-
await engine.commit();
|
|
82
81
|
} catch (error) {
|
|
83
|
-
|
|
82
|
+
// エラー処理
|
|
83
|
+
} finally {
|
|
84
|
+
// エラーが発生しても必ずバッチを終了し、インデックスが正しく再構築されるようにする
|
|
85
|
+
await engine.endBatch();
|
|
84
86
|
}
|
|
85
87
|
```
|
|
86
88
|
|
|
89
|
+
### カスタムトークナイザ
|
|
90
|
+
|
|
91
|
+
特定の言語やトークン化の要件をサポートするために、カスタムトークナイザを設定できます。トークナイザは完全なドキュメントオブジェクトにアクセスできます:
|
|
92
|
+
|
|
93
|
+
```typescript
|
|
94
|
+
import { SearchEngine } from 'gs-search';
|
|
95
|
+
|
|
96
|
+
// カスタムインデックストークナイザ:ドキュメントの複数フィールドを使用
|
|
97
|
+
const indexingTokenizer = (doc: { id: string; text: string; category: string; author: string }): string[] => {
|
|
98
|
+
// ドキュメントの全てのプロパティにアクセスできます
|
|
99
|
+
const fullText = `${doc.text} ${doc.category} ${doc.author}`;
|
|
100
|
+
return fullText.toLowerCase().split(/\s+/);
|
|
101
|
+
};
|
|
102
|
+
|
|
103
|
+
// カスタム検索トークナイザ:検索コンテキストをサポート
|
|
104
|
+
const searchTokenizer = (query: { text: string; language?: string; context?: string }): string[] => {
|
|
105
|
+
// クエリの言語やコンテキストに応じてトークン化を調整できます
|
|
106
|
+
const tokens = query.text.toLowerCase().split(/\s+/);
|
|
107
|
+
// コンテキストに応じて追加の検索語を追加
|
|
108
|
+
if (query.context === 'technical') {
|
|
109
|
+
tokens.push('technical');
|
|
110
|
+
}
|
|
111
|
+
return tokens;
|
|
112
|
+
};
|
|
113
|
+
|
|
114
|
+
// カスタムトークナイザを設定してエンジンを作成
|
|
115
|
+
const engine = new SearchEngine({
|
|
116
|
+
baseDir: 'search-data',
|
|
117
|
+
indexingTokenizer,
|
|
118
|
+
searchTokenizer
|
|
119
|
+
});
|
|
120
|
+
```
|
|
121
|
+
|
|
87
122
|
## APIリファレンス
|
|
88
123
|
|
|
89
124
|
### SimpleSearch
|
|
@@ -91,22 +126,26 @@ try {
|
|
|
91
126
|
- `constructor()`: 新しい検索エンジンインスタンスを作成
|
|
92
127
|
- `addDocument(doc: IDocument): Promise<void>`: 単一のドキュメントを追加
|
|
93
128
|
- `addDocuments(docs: IDocument[]): Promise<void>`: 複数のドキュメントを追加
|
|
94
|
-
- `
|
|
129
|
+
- `addDocumentIfMissing(doc: IDocument): Promise<void>`: ドキュメントが存在しない場合は単一のドキュメントを追加
|
|
130
|
+
- `addDocumentsIfMissing(docs: IDocument[]): Promise<void>`: 複数のドキュメントを追加し、既存のドキュメントはスキップ
|
|
131
|
+
- `removeDocument(id: number): Promise<void>`: ドキュメントを削除
|
|
95
132
|
- `search(query: string, limit?: number): Promise<IResult[]>`: ドキュメントを検索
|
|
96
133
|
- `getStatus(): Promise<IStatus>`: 検索エンジンのステータスを取得
|
|
97
134
|
|
|
98
|
-
###
|
|
135
|
+
### SearchEngine
|
|
99
136
|
|
|
100
|
-
- `constructor(options:
|
|
137
|
+
- `constructor(options: ISearchEngineConfig)`: 新しいコアエンジンインスタンスを作成
|
|
101
138
|
- `init(): Promise<void>`: エンジンを初期化
|
|
102
|
-
- `addDocument(doc: IDocument): Promise<void>`:
|
|
103
|
-
- `addDocuments(docs: IDocument[]): Promise<void>`:
|
|
104
|
-
- `
|
|
139
|
+
- `addDocument(doc: IDocument): Promise<void>`: 単一ドキュメントを追加
|
|
140
|
+
- `addDocuments(docs: IDocument[]): Promise<void>`: 複数ドキュメントを追加
|
|
141
|
+
- `addDocumentIfMissing(doc: IDocument): Promise<void>`: ドキュメントが存在しない場合は単一のドキュメントを追加
|
|
142
|
+
- `addDocumentsIfMissing(docs: IDocument[]): Promise<void>`: 複数のドキュメントを追加し、既存のドキュメントはスキップ
|
|
143
|
+
- `removeDocument(id: number): Promise<void>`: ドキュメントを削除
|
|
105
144
|
- `search(query: string, limit?: number): Promise<IResult[]>`: ドキュメントを検索
|
|
106
|
-
- `getStatus(): Promise<IStatus>`:
|
|
107
|
-
- `
|
|
108
|
-
- `
|
|
109
|
-
- `
|
|
145
|
+
- `getStatus(): Promise<IStatus>`: 検索エンジンの状態を取得する
|
|
146
|
+
- `hasDocument(id: number): Promise<boolean>`: ドキュメントIDが追加されたことがあるかを確認(削除されたものも含む)
|
|
147
|
+
- `startBatch(): void`: バッチ操作を開始する
|
|
148
|
+
- `endBatch(): Promise<void>`: バッチ操作を終了
|
|
110
149
|
|
|
111
150
|
## ストレージ
|
|
112
151
|
|
package/README.ko.md
CHANGED
|
@@ -72,18 +72,53 @@ const engine = new SearchEngine({
|
|
|
72
72
|
// 엔진 초기화
|
|
73
73
|
await engine.init();
|
|
74
74
|
|
|
75
|
-
//
|
|
76
|
-
await engine.
|
|
75
|
+
// 일괄 작업으로 문서 추가
|
|
76
|
+
await engine.startBatch();
|
|
77
77
|
try {
|
|
78
78
|
await engine.addDocuments([
|
|
79
79
|
// ... 문서
|
|
80
80
|
]);
|
|
81
|
-
await engine.commit();
|
|
82
81
|
} catch (error) {
|
|
83
|
-
|
|
82
|
+
// 오류 처리
|
|
83
|
+
} finally {
|
|
84
|
+
// 오류가 발생하더라도 항상 일괄 작업을 종료하여 인덱스가 올바르게 재구축되도록 합니다
|
|
85
|
+
await engine.endBatch();
|
|
84
86
|
}
|
|
85
87
|
```
|
|
86
88
|
|
|
89
|
+
### 커스텀 토크나이저
|
|
90
|
+
|
|
91
|
+
특정 언어나 토크나이징 요구사항을 지원하기 위해 커스텀 토크나이저를 설정할 수 있습니다. 토크나이저는 전체 문서 객체에 액세스할 수 있습니다:
|
|
92
|
+
|
|
93
|
+
```typescript
|
|
94
|
+
import { SearchEngine } from 'gs-search';
|
|
95
|
+
|
|
96
|
+
// 커스텀 인덱스 토크나이저: 문서의 text와 category 필드를 사용
|
|
97
|
+
const indexingTokenizer = (doc: { id: string; text: string; category: string; author: string }): string[] => {
|
|
98
|
+
// 문서의 모든 속성에 액세스할 수 있습니다
|
|
99
|
+
const fullText = `${doc.text} ${doc.category} ${doc.author}`;
|
|
100
|
+
return fullText.toLowerCase().split(/\s+/);
|
|
101
|
+
};
|
|
102
|
+
|
|
103
|
+
// 커스텀 검색 토크나이저: 검색 컨텍스트 지원
|
|
104
|
+
const searchTokenizer = (query: { text: string; language?: string; context?: string }): string[] => {
|
|
105
|
+
// 쿼리의 언어나 컨텍스트에 따라 토크나이징을 조정할 수 있습니다
|
|
106
|
+
const tokens = query.text.toLowerCase().split(/\s+/);
|
|
107
|
+
// 컨텍스트에 따라 추가 검색어를 추가합니다
|
|
108
|
+
if (query.context === 'technical') {
|
|
109
|
+
tokens.push('technical');
|
|
110
|
+
}
|
|
111
|
+
return tokens;
|
|
112
|
+
};
|
|
113
|
+
|
|
114
|
+
// 커스텀 토크나이저를 설정하여 엔진 생성
|
|
115
|
+
const engine = new SearchEngine({
|
|
116
|
+
baseDir: 'search-data',
|
|
117
|
+
indexingTokenizer,
|
|
118
|
+
searchTokenizer
|
|
119
|
+
});
|
|
120
|
+
```
|
|
121
|
+
|
|
87
122
|
## API 참조
|
|
88
123
|
|
|
89
124
|
### SimpleSearch
|
|
@@ -91,22 +126,26 @@ try {
|
|
|
91
126
|
- `constructor()`: 새로운 검색 엔진 인스턴스 생성
|
|
92
127
|
- `addDocument(doc: IDocument): Promise<void>`: 단일 문서 추가
|
|
93
128
|
- `addDocuments(docs: IDocument[]): Promise<void>`: 여러 문서 추가
|
|
129
|
+
- `addDocumentIfMissing(doc: IDocument): Promise<void>`: 문서가 존재하지 않는 경우 단일 문서 추가
|
|
130
|
+
- `addDocumentsIfMissing(docs: IDocument[]): Promise<void>`: 여러 문서를 추가하고 기존 문서는 건너뜀
|
|
94
131
|
- `deleteDocument(id: number): Promise<void>`: 문서 삭제
|
|
95
132
|
- `search(query: string, limit?: number): Promise<IResult[]>`: 문서 검색
|
|
96
133
|
- `getStatus(): Promise<IStatus>`: 검색 엔진 상태 가져오기
|
|
97
134
|
|
|
98
|
-
###
|
|
135
|
+
### SearchEngine
|
|
99
136
|
|
|
100
|
-
- `constructor(options:
|
|
137
|
+
- `constructor(options: ISearchEngineConfig)`: 새로운 코어 엔진 인스턴스 생성
|
|
101
138
|
- `init(): Promise<void>`: 엔진 초기화
|
|
102
139
|
- `addDocument(doc: IDocument): Promise<void>`: 단일 문서 추가
|
|
103
|
-
- `addDocuments(docs: IDocument[]): Promise<void>`:
|
|
104
|
-
- `
|
|
140
|
+
- `addDocuments(docs: IDocument[]): Promise<void>`: 다중 문서 추가
|
|
141
|
+
- `addDocumentIfMissing(doc: IDocument): Promise<void>`: 문서가 존재하지 않는 경우 단일 문서 추가
|
|
142
|
+
- `addDocumentsIfMissing(docs: IDocument[]): Promise<void>`: 여러 문서를 추가하고 기존 문서는 건너뜀
|
|
143
|
+
- `removeDocument(id: number): Promise<void>`: 문서 삭제
|
|
105
144
|
- `search(query: string, limit?: number): Promise<IResult[]>`: 문서 검색
|
|
106
|
-
- `getStatus(): Promise<IStatus>`: 검색 엔진 상태
|
|
107
|
-
- `
|
|
108
|
-
- `
|
|
109
|
-
- `
|
|
145
|
+
- `getStatus(): Promise<IStatus>`: 검색 엔진 상태 조회
|
|
146
|
+
- `hasDocument(id: number): Promise<boolean>`: 문서 ID가 추가된 적이 있는지 확인합니다 (삭제된 문서도 포함)
|
|
147
|
+
- `startBatch(): void`: 배치 작업 시작
|
|
148
|
+
- `endBatch(): Promise<void>`: 배치 작업 종료
|
|
110
149
|
|
|
111
150
|
## 스토리지
|
|
112
151
|
|
package/README.md
CHANGED
|
@@ -78,12 +78,50 @@ try {
|
|
|
78
78
|
await engine.addDocuments([
|
|
79
79
|
// ... documents
|
|
80
80
|
]);
|
|
81
|
-
await engine.endBatch();
|
|
82
81
|
} catch (error) {
|
|
83
82
|
// Handle error
|
|
83
|
+
} finally {
|
|
84
|
+
// Always end batch to ensure index rebuilds properly
|
|
85
|
+
await engine.endBatch();
|
|
84
86
|
}
|
|
85
87
|
```
|
|
86
88
|
|
|
89
|
+
### Custom Tokenizers
|
|
90
|
+
|
|
91
|
+
You can configure custom tokenizers to support specific languages or tokenization requirements:
|
|
92
|
+
|
|
93
|
+
```typescript
|
|
94
|
+
import { SearchEngine, BrowserStorage } from 'gs-search';
|
|
95
|
+
|
|
96
|
+
// Custom tokenizer that splits by spaces and limits token length
|
|
97
|
+
const customTokenizer = (text: string): string[] => {
|
|
98
|
+
// Split by whitespace
|
|
99
|
+
const tokens: string[] = [];
|
|
100
|
+
const words = text.toLowerCase().split(/\s+/);
|
|
101
|
+
|
|
102
|
+
// Process each word, limiting token length to 5 characters
|
|
103
|
+
for (const word of words) {
|
|
104
|
+
if (word.length <= 5) {
|
|
105
|
+
tokens.push(word);
|
|
106
|
+
} else {
|
|
107
|
+
// Split long words character by character
|
|
108
|
+
for (let i = 0; i < word.length; i++) {
|
|
109
|
+
tokens.push(word[i]);
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
return tokens;
|
|
115
|
+
};
|
|
116
|
+
|
|
117
|
+
// Create engine with custom tokenizers
|
|
118
|
+
const engine = new SearchEngine({
|
|
119
|
+
baseDir: 'search-data',
|
|
120
|
+
indexingTokenizer: customTokenizer,
|
|
121
|
+
searchTokenizer: customTokenizer
|
|
122
|
+
});
|
|
123
|
+
```
|
|
124
|
+
|
|
87
125
|
## API Reference
|
|
88
126
|
|
|
89
127
|
### SimpleSearch
|
|
@@ -92,18 +130,23 @@ try {
|
|
|
92
130
|
- `configure(config: Partial<ISearchEngineConfig>): void`: Configure the search engine
|
|
93
131
|
- `addDocument(doc: IDocument): Promise<void>`: Add a single document
|
|
94
132
|
- `addDocuments(docs: IDocument[]): Promise<void>`: Add multiple documents
|
|
133
|
+
- `addDocumentIfMissing(doc: IDocument): Promise<void>`: Add a single document if it doesn't exist
|
|
134
|
+
- `addDocumentsIfMissing(docs: IDocument[]): Promise<void>`: Add multiple documents, skipping existing ones
|
|
95
135
|
- `removeDocument(id: number): Promise<void>`: Delete a document
|
|
96
136
|
- `search(query: string, limit?: number): Promise<IResult[]>`: Search for documents
|
|
97
137
|
- `getStatus(): Promise<IStatus>`: Get search engine status
|
|
138
|
+
- `hasDocument(id: number): Promise<boolean>`: Checks if a document ID has been added (including deleted ones)
|
|
98
139
|
- `startBatch(): void`: Start batch operations
|
|
99
140
|
- `endBatch(): Promise<void>`: End batch operations
|
|
100
141
|
|
|
101
|
-
###
|
|
142
|
+
### SearchEngine
|
|
102
143
|
|
|
103
|
-
- `constructor(options:
|
|
144
|
+
- `constructor(options: ISearchEngineConfig)`: Create a new core engine instance
|
|
104
145
|
- `init(): Promise<void>`: Initialize the engine
|
|
105
146
|
- `addDocument(doc: IDocument): Promise<void>`: Add a single document
|
|
106
147
|
- `addDocuments(docs: IDocument[]): Promise<void>`: Add multiple documents
|
|
148
|
+
- `addDocumentIfMissing(doc: IDocument): Promise<void>`: Add a single document if it doesn't exist
|
|
149
|
+
- `addDocumentsIfMissing(docs: IDocument[]): Promise<void>`: Add multiple documents, skipping existing ones
|
|
107
150
|
- `removeDocument(id: number): Promise<void>`: Delete a document
|
|
108
151
|
- `search(query: string, limit?: number): Promise<IResult[]>`: Search for documents
|
|
109
152
|
- `getStatus(): Promise<IStatus>`: Get search engine status
|
package/README.zh-CN.md
CHANGED
|
@@ -78,9 +78,11 @@ try {
|
|
|
78
78
|
await engine.addDocuments([
|
|
79
79
|
// ... 文档
|
|
80
80
|
]);
|
|
81
|
-
await engine.endBatch();
|
|
82
81
|
} catch (error) {
|
|
83
82
|
// 处理错误
|
|
83
|
+
} finally {
|
|
84
|
+
// 始终结束批处理以确保索引正确重建
|
|
85
|
+
await engine.endBatch();
|
|
84
86
|
}
|
|
85
87
|
```
|
|
86
88
|
|
|
@@ -111,30 +113,81 @@ const storage = new CustomStorage();
|
|
|
111
113
|
const engine = new SearchEngine({ storage });
|
|
112
114
|
```
|
|
113
115
|
|
|
114
|
-
###
|
|
116
|
+
### 批处理操作
|
|
115
117
|
|
|
116
|
-
|
|
118
|
+
使用批处理操作进行高效的文档索引:
|
|
117
119
|
|
|
118
120
|
```typescript
|
|
119
|
-
|
|
121
|
+
// 开始批处理操作
|
|
122
|
+
await engine.startBatch();
|
|
120
123
|
|
|
121
124
|
try {
|
|
122
125
|
// 批量添加文档
|
|
123
126
|
for (let i = 0; i < 1000; i++) {
|
|
124
127
|
await engine.addDocuments([{ id: i, text: `文档 ${i}` }]);
|
|
125
128
|
}
|
|
126
|
-
|
|
127
|
-
// 提交事务
|
|
128
|
-
await engine.commitTransaction();
|
|
129
129
|
} catch (error) {
|
|
130
|
-
//
|
|
131
|
-
|
|
130
|
+
// 处理错误
|
|
131
|
+
console.error('批处理操作失败:', error);
|
|
132
|
+
} finally {
|
|
133
|
+
// 无论是否发生错误,都必须结束批处理以确保索引正常重建
|
|
134
|
+
await engine.endBatch();
|
|
132
135
|
}
|
|
133
136
|
```
|
|
134
137
|
|
|
135
138
|
## 自定义分词器
|
|
136
139
|
|
|
137
|
-
|
|
140
|
+
### 支持完整文档对象的分词器
|
|
141
|
+
|
|
142
|
+
您可以通过配置自定义分词器来支持特定的语言或分词需求。分词器可以访问完整的文档对象,让您能够基于文档的多个属性进行分词:
|
|
143
|
+
|
|
144
|
+
```typescript
|
|
145
|
+
import { SearchEngine } from 'gs-search';
|
|
146
|
+
|
|
147
|
+
// 自定义索引分词器:使用文档的text和category字段进行分词
|
|
148
|
+
const indexingTokenizer = (doc: { id: string; text: string; category: string; author: string }): string[] => {
|
|
149
|
+
// 可以访问文档的所有属性
|
|
150
|
+
const fullText = `${doc.text} ${doc.category} ${doc.author}`;
|
|
151
|
+
return fullText.toLowerCase().split(/\s+/);
|
|
152
|
+
};
|
|
153
|
+
|
|
154
|
+
// 自定义搜索分词器:支持搜索上下文
|
|
155
|
+
const searchTokenizer = (query: { text: string; language?: string; context?: string }): string[] => {
|
|
156
|
+
// 可以根据查询的语言或上下文调整分词
|
|
157
|
+
const tokens = query.text.toLowerCase().split(/\s+/);
|
|
158
|
+
// 根据上下文添加额外的搜索词
|
|
159
|
+
if (query.context === 'technical') {
|
|
160
|
+
tokens.push('technical');
|
|
161
|
+
}
|
|
162
|
+
return tokens;
|
|
163
|
+
};
|
|
164
|
+
|
|
165
|
+
// 创建引擎并配置自定义分词器
|
|
166
|
+
const engine = new SearchEngine({
|
|
167
|
+
baseDir: 'search-data',
|
|
168
|
+
indexingTokenizer,
|
|
169
|
+
searchTokenizer
|
|
170
|
+
});
|
|
171
|
+
|
|
172
|
+
// 索引包含额外属性的文档
|
|
173
|
+
await engine.addDocument({
|
|
174
|
+
id: '1',
|
|
175
|
+
text: '这是一个技术文档',
|
|
176
|
+
category: '技术',
|
|
177
|
+
author: '张三'
|
|
178
|
+
});
|
|
179
|
+
|
|
180
|
+
// 使用包含上下文的查询进行搜索
|
|
181
|
+
const results = await engine.search({
|
|
182
|
+
text: '技术',
|
|
183
|
+
language: 'zh',
|
|
184
|
+
context: 'technical'
|
|
185
|
+
});
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
### 简单的字符/空格分词器
|
|
189
|
+
|
|
190
|
+
以下是一个简单的正则分词器示例,按空格和字符分词,且最长token不超过5字符:
|
|
138
191
|
|
|
139
192
|
```typescript
|
|
140
193
|
import { SimpleSearch } from 'gs-search';
|
|
@@ -171,22 +224,27 @@ SimpleSearch.configure({
|
|
|
171
224
|
|
|
172
225
|
### SimpleSearch
|
|
173
226
|
|
|
174
|
-
|
|
227
|
+
**静态方法(无需实例创建):**
|
|
175
228
|
- `configure(config: Partial<ISearchEngineConfig>): void`: 配置搜索引擎
|
|
176
229
|
- `addDocument(doc: IDocument): Promise<void>`: 添加单个文档
|
|
177
|
-
- `addDocuments(docs: IDocument[]): Promise<void>`:
|
|
230
|
+
- `addDocuments(docs: IDocument[]): Promise<void>`: 添加多个文档
|
|
231
|
+
- `addDocumentIfMissing(doc: IDocument): Promise<void>`: 如果文档不存在则添加单个文档
|
|
232
|
+
- `addDocumentsIfMissing(docs: IDocument[]): Promise<void>`: 添加多个文档,跳过已存在的文档
|
|
178
233
|
- `removeDocument(id: number): Promise<void>`: 删除文档
|
|
179
234
|
- `search(query: string, limit?: number): Promise<IResult[]>`: 搜索文档
|
|
180
235
|
- `getStatus(): Promise<IStatus>`: 获取搜索引擎状态
|
|
181
|
-
- `
|
|
236
|
+
- `hasDocument(id: number): Promise<boolean>`: 检查文档ID是否曾经添加过(包括已删除的)
|
|
237
|
+
- `startBatch(): void`: 开始批量操作
|
|
182
238
|
- `endBatch(): Promise<void>`: 结束批处理操作
|
|
183
239
|
|
|
184
|
-
###
|
|
240
|
+
### SearchEngine
|
|
185
241
|
|
|
186
|
-
- `constructor(options:
|
|
242
|
+
- `constructor(options: ISearchEngineConfig)`: 创建一个新的核心引擎实例
|
|
187
243
|
- `init(): Promise<void>`: 初始化引擎
|
|
188
244
|
- `addDocument(doc: IDocument): Promise<void>`: 添加单个文档
|
|
189
|
-
- `addDocuments(docs: IDocument[]): Promise<void>`:
|
|
245
|
+
- `addDocuments(docs: IDocument[]): Promise<void>`: 添加多个文档
|
|
246
|
+
- `addDocumentIfMissing(doc: IDocument): Promise<void>`: 如果文档不存在则添加单个文档
|
|
247
|
+
- `addDocumentsIfMissing(docs: IDocument[]): Promise<void>`: 添加多个文档,跳过已存在的文档
|
|
190
248
|
- `removeDocument(id: number): Promise<void>`: 删除文档
|
|
191
249
|
- `search(query: string, limit?: number): Promise<IResult[]>`: 搜索文档
|
|
192
250
|
- `getStatus(): Promise<IStatus>`: 获取搜索引擎状态
|
package/lib/index.cjs
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
"use strict";Object.create,Object.defineProperty,Object.getOwnPropertyDescriptor,Object.getOwnPropertyNames,Object.getPrototypeOf,Object.prototype.hasOwnProperty;class t{#t;constructor(t){this.#t=t}async#e(){return await(await navigator.storage.getDirectory()).getDirectoryHandle(this.#t,{create:!0})}async write(t,e){const s=await(await(await this.#e()).getFileHandle(t,{create:!0})).createWritable();await s.write(e),await s.close()}async append(t,e){const s=await this.#e();let i;try{i=await s.getFileHandle(t,{create:!0})}catch{i=await s.getFileHandle(t,{create:!0})}const n=await i.getFile(),a=await i.createWritable({keepExistingData:!0});await a.seek(n.size),await a.write(e),await a.close()}async read(t){const e=await this.#e();try{return await(await(await e.getFileHandle(t)).getFile()).arrayBuffer()}catch{return null}}async readRange(t,e,s){const i=await this.#e();try{return await(await(await i.getFileHandle(t)).getFile()).slice(e,s).arrayBuffer()}catch{return null}}async remove(t){const e=await this.#e();try{await e.removeEntry(t)}catch{}}async listFiles(){const t=await this.#e(),e=[];for await(const s of t.keys())e.push(s);return e}async clearAll(){const t=await this.#e();for await(const e of t.keys())await t.removeEntry(e,{recursive:!0})}async getFileSize(t){const e=await this.#e();try{return(await(await e.getFileHandle(t)).getFile()).size}catch{return 0}}}class e{#s=null;#i=null;#t;#n="";constructor(t){this.#t=t}async#a(){if(this.#s)return;const t=await import("node:fs"),e=await import("node:path");this.#s=t.promises,this.#i=e.default||e,this.#n=this.#i.join(process.cwd(),this.#t);try{await this.#s.access(this.#n)}catch{await this.#s.mkdir(this.#n,{recursive:!0})}}#r(t){return this.#i.join(this.#n,t)}async write(t,e){await this.#a(),await this.#s.writeFile(this.#r(t),Buffer.from(e))}async append(t,e){await this.#a(),await this.#s.appendFile(this.#r(t),Buffer.from(e))}async read(t){await this.#a();try{const e=await this.#s.readFile(this.#r(t));return e.buffer.slice(e.byteOffset,e.byteOffset+e.byteLength)}catch{return null}}async readRange(t,e,s){await this.#a();try{const i=await this.#s.open(this.#r(t),"r"),n=s-e,a=Buffer.alloc(n);return await i.read(a,0,n,e),await i.close(),a.buffer.slice(a.byteOffset,a.byteOffset+a.byteLength)}catch{return null}}async remove(t){await this.#a();try{await this.#s.unlink(this.#r(t))}catch{}}async listFiles(){await this.#a();try{return await this.#s.readdir(this.#n)}catch{return[]}}async clearAll(){await this.#a();try{const t=await this.#s.readdir(this.#n);for(const e of t)await this.#s.unlink(this.#i.join(this.#n,e))}catch{}}async getFileSize(t){await this.#a();try{return(await this.#s.stat(this.#r(t))).size}catch{return 0}}}const s="search_meta.json",i="deleted_ids.bin",n="added_ids.bin";class a{#o;#h={wordSegments:[],charSegments:[]};#c=new Set;#d=new Set;constructor(t){this.#o=t}async load(){const t=await this.#o.read(s);if(t){const e=(new TextDecoder).decode(t);this.#h=JSON.parse(e)}else this.#h={wordSegments:[],charSegments:[]};const e=await this.#o.read(i);if(e){const t=new DataView(e);let s=0;const i=e.byteLength;for(;s<i&&!(s+4>i);){const e=t.getUint32(s,!0);this.#c.add(e),s+=4,s<i&&30===t.getUint8(s)&&(s+=1)}}const a=await this.#o.read(n);if(a){const t=new DataView(a);let e=0;const s=a.byteLength;for(;e<s&&!(e+4>s);){const i=t.getUint32(e,!0);this.#d.add(i),e+=4,e<s&&30===t.getUint8(e)&&(e+=1)}}}async save(){const t=JSON.stringify(this.#h);if(await this.#o.write(s,(new TextEncoder).encode(t).buffer),0===this.#c.size)await this.#o.remove(i);else{const t=4*this.#c.size+this.#c.size,e=new ArrayBuffer(t),s=new DataView(e);let n=0;for(const t of this.#c)s.setUint32(n,t,!0),n+=4,s.setUint8(n,30),n+=1;await this.#o.write(i,e)}if(0===this.#d.size)await this.#o.remove(n);else{const t=4*this.#d.size+this.#d.size,e=new ArrayBuffer(t),s=new DataView(e);let i=0;for(const t of this.#d)s.setUint32(i,t,!0),i+=4,s.setUint8(i,30),i+=1;await this.#o.write(n,e)}}getSegments(t){return"word"===t?this.#h.wordSegments:this.#h.charSegments}getDeletedIds(){return this.#c}addDeletedId(t){this.#c.add(t)}isDeleted(t){return this.#c.has(t)}addAddedId(t){this.#d.add(t)}removeAddedId(t){this.#d.delete(t)}isAdded(t){return this.#d.has(t)}getAddedIds(){return this.#d}getLastSegmentInfo(t){const e=this.getSegments(t);return 0===e.length?null:e[e.length-1]}updateSegment(t,e,s,i,n,a){const r="word"===t?this.#h.wordSegments:this.#h.charSegments;if(a)r.push({filename:e,start:s,end:i,tokenCount:n});else{const t=r[r.length-1];t&&t.filename===e&&(t.end=i,t.tokenCount=n)}}reset(){this.#h={wordSegments:[],charSegments:[]},this.#c.clear(),this.#d.clear()}}class r{static SEPARATOR=30;#o;constructor(t){this.#o=t}async appendBatch(t,e){if(0===e.length)return await this.#o.getFileSize(t);const s=new TextEncoder;let i=0;for(const t of e){i+=8;for(const e of t.tokens){i+=2+Math.min(s.encode(e).byteLength,65535)}i+=1}const n=new Uint8Array(i);let a=0;for(const t of e){const e=[];for(const i of t.tokens){const t=s.encode(i),n=t.byteLength>65535?t.slice(0,65535):t;e.push(n)}const i=new DataView(n.buffer,a);i.setUint32(0,t.id,!0),i.setUint32(4,e.length,!0),a+=8;for(const t of e)new DataView(n.buffer,a).setUint16(0,t.byteLength,!0),a+=2,n.set(t,a),a+=t.byteLength;n[a++]=r.SEPARATOR}return await this.#o.append(t,n.buffer),await this.#o.getFileSize(t)}async readRange(t,e,s){const i=await this.#o.readRange(t,e,s);if(!i||0===i.byteLength)return[];const n=new DataView(i),a=new Uint8Array(i),o=new TextDecoder,h=[];let c=0;const d=i.byteLength;for(;c<d&&!(c+8>d);){const t=n.getUint32(c,!0);c+=4;const e=n.getUint32(c,!0);c+=4;const s=[];for(let t=0;t<e&&!(c+2>d);t++){const t=n.getUint16(c,!0);if(c+=2,c+t>d)break;const e=new Uint8Array(i,c,t);s.push(o.decode(e)),c+=t}c<d&&a[c]===r.SEPARATOR&&(c+=1),h.push({id:t,tokens:s})}return h}async getCurrentSize(t){return await this.#o.getFileSize(t)}}class o{#g;#o;#l=null;#f=null;static hash(t){let e=5381;for(let s=0;s<t.length;s++)e=(e<<5)+e^t.charCodeAt(s);return e>>>0}constructor(t,e){this.#g=t,this.#o=e}async loadIndex(){return!!this.#l||(this.#l=await this.#o.read(this.#g),!!this.#l&&(this.#f=new DataView(this.#l),!0))}async buildAndSave(t){const e=new Map;for(const s of t){const t=new Map;for(const i of s.tokens)if(!t.has(i)){t.set(i,!0);const n=o.hash(i);e.has(n)||e.set(n,[]),e.get(n).push(s.id)}}const s=Array.from(e.keys()).sort((t,e)=>t-e);let i=0;const n=new Array(s.length);for(let t=0;t<s.length;t++){const a=s[t],r=e.get(a);n[t]=r,i+=r.length}const a=12*s.length,r=new ArrayBuffer(8+a+4*i),h=new DataView(r);h.setUint32(0,1229866072),h.setUint32(4,s.length);let c=8,d=8+a;for(let t=0;t<s.length;t++){const e=s[t],i=n[t];h.setUint32(c,e),h.setUint32(c+4,d),h.setUint32(c+8,i.length),c+=12;for(let t=0;t<i.length;t++)h.setUint32(d,i[t],!0),d+=4}await this.#o.write(this.#g,r),this.#l=r,this.#f=h}search(t){if(!this.#f||!this.#l)return[];const e=o.hash(t);let s=0,i=this.#f.getUint32(4)-1;for(;s<=i;){const t=s+i>>>1,n=8+12*t,a=this.#f.getUint32(n);if(a<e)s=t+1;else{if(!(a>e)){const t=this.#f.getUint32(n+4),e=this.#f.getUint32(n+8),s=[];for(let i=0;i<e;i++)s.push(this.#f.getUint32(t+4*i,!0));return s}i=t-1}}return[]}}const h="word_cache.bin",c="char_cache.bin";class d{#o;#h;#w;#u;#m=!1;#y;#p=!1;#S={word:0,char:0};constructor(s){if(!s.baseDir)throw new Error("SearchEngine requires 'baseDir' in config.");if(this.#y={wordSegmentTokenThreshold:1e5,charSegmentTokenThreshold:5e5,minWordTokenSave:0,minCharTokenSave:0,...s},(this.#y.minWordTokenSave||0)>=(this.#y.wordSegmentTokenThreshold||1e5))throw new Error("minWordTokenSave must be less than wordSegmentTokenThreshold");if((this.#y.minCharTokenSave||0)>=(this.#y.charSegmentTokenThreshold||5e5))throw new Error("minCharTokenSave must be less than charSegmentTokenThreshold");let i=null;if(this.#y.storage&&("object"==typeof this.#y.storage?i=this.#y.storage:"browser"===this.#y.storage?i=new t(this.#y.baseDir):"node"===this.#y.storage&&(i=new e(this.#y.baseDir))),!i){const s=typeof navigator<"u"&&navigator?.storage?.getDirectory instanceof Function,n=typeof process<"u"&&null!=process.versions&&null!=process.versions.node;s?i=new t(this.#y.baseDir):n&&(i=new e(this.#y.baseDir))}if(!i)throw new Error('Storage initialization failed. Please configure "storage" explicitly or ensure you are in a supported environment (Browser/Node).');this.#o=i,this.#h=new a(this.#o),this.#w=new r(this.#o),this.#u=new Map}async init(){if(this.#m)return;await this.#h.load();const t=[...this.#h.getSegments("word"),...this.#h.getSegments("char")];for(const e of t)this.#u.has(e.filename)||this.#u.set(e.filename,new o(e.filename,this.#o)),await this.#u.get(e.filename).loadIndex();this.#m=!0}startBatch(){this.#p=!0,this.#S={word:0,char:0}}async endBatch(){this.#p=!1,this.#S.word>0&&await this.#D("word",this.#S.word),this.#S.char>0&&await this.#D("char",this.#S.char),this.#S={word:0,char:0},await this.#h.save()}#k(t){if(typeof Intl<"u"&&Intl.Segmenter){const e=new Intl.Segmenter([],{granularity:"word"});return Array.from(e.segment(t)).filter(t=>t.isWordLike).map(t=>t.segment.toLowerCase())}return t.toLowerCase().split(/[^a-z0-9\u4e00-\u9fa5]+/g).filter(t=>t.length>0)}#b(t){return this.#y.indexingTokenizer?this.#y.indexingTokenizer(t):this.#k(t)}#T(t){return this.#y.searchTokenizer?this.#y.searchTokenizer(t):this.#y.indexingTokenizer?this.#y.indexingTokenizer(t):this.#k(t)}async addDocument(t){return this.addDocuments([t])}async addDocuments(t){if(this.#m||await this.init(),0===t.length)return;const e=this.#h.getDeletedIds(),s=[],i=[];for(const n of t){if(e.has(n.id))throw new Error(`Document ID ${n.id} has been deleted and cannot be re-added.`);if(this.#h.isAdded(n.id))throw new Error(`Document ID ${n.id} already exists.`);const t=this.#b(n.text),a=[],r=[];for(const e of t)e.length>1?a.push(e):1===e.length&&r.push(e);a.length>0&&s.push({id:n.id,tokens:a}),r.length>0&&i.push({id:n.id,tokens:r})}let n=0,a=0;if(s.length>0){await this.#w.appendBatch(h,s);for(const t of s)n+=t.tokens.length}if(i.length>0){await this.#w.appendBatch(c,i);for(const t of i)a+=t.tokens.length}for(const e of t)this.#h.addAddedId(e.id);this.#p?(this.#S.word+=n,this.#S.char+=a):(n>0&&await this.#D("word",n),a>0&&await this.#D("char",a),await this.#h.save())}async#D(t,e){const s="word"===t?h:c,i=await this.#w.getCurrentSize(s),n="word"===t?this.#y.wordSegmentTokenThreshold||1e5:this.#y.charSegmentTokenThreshold||5e5,a="word"===t?this.#y.minWordTokenSave||0:this.#y.minCharTokenSave||0,r=this.#h.getLastSegmentInfo(t);let d,g,l,f;const w=()=>{const e=this.#h.getSegments(t).length+1;return`${t}_seg_${e}.bin`};if(r){const t=r.tokenCount;t>=n||t+e>=n?(d=w(),l=!0,g=r.end,f=e):(d=r.filename,l=!1,g=r.start,f=t+e)}else d=w(),l=!0,g=0,f=e;if(f<a)return void this.#h.updateSegment(t,d,g,i,f,l);const u=await this.#w.readRange(s,g,i);let m=this.#u.get(d);m||(m=new o(d,this.#o),this.#u.set(d,m)),await m.buildAndSave(u),this.#h.updateSegment(t,d,g,i,f,l)}async search(t,e){this.#m||await this.init();const s=this.#T(t),i=s.filter(t=>t.length>1),n=s.filter(t=>1===t.length),a=this.#h.getDeletedIds(),r=new Map,h=new Map,c=t=>{const e=this.#h.getSegments(t);for(const t of e){const e=t.filename;!this.#u.has(e)&&!h.has(e)&&h.set(e,new o(e,this.#o))}};c("word"),c("char"),await Promise.all(Array.from(h.entries()).map(([t,e])=>e.loadIndex().then(s=>{s&&this.#u.set(t,e)})));const d=async(t,e)=>{if(0===e.length)return;const s=this.#h.getSegments(t);for(const t of s){const s=t.filename,i=this.#u.get(s);if(i)for(const t of e){const e=i.search(t),s=1+.1*t.length;for(const i of e)if(!a.has(i))if(r.has(i)){const e=r.get(i);e.score+=s,e.tokens.add(t)}else r.set(i,{score:0,tokens:new Set([t])})}}};await d("word",i),await d("char",n);const g=[];return r.forEach((t,e)=>{g.push({id:e,score:t.score,tokens:Array.from(t.tokens)})}),g.sort((t,e)=>e.score-t.score),"number"==typeof e&&e>0?g.slice(0,e):g}async removeDocument(t){this.#m||await this.init(),this.#h.addDeletedId(t),this.#h.removeAddedId(t),await this.#h.save()}async clearAll(){await this.#o.clearAll(),this.#u.clear(),this.#h.reset(),this.#m=!1,this.#p=!1,this.#S={word:0,char:0}}async getStatus(){return this.#m||await this.init(),{wordSegments:this.#h.getSegments("word").length,charSegments:this.#h.getSegments("char").length,deleted:this.#h.getDeletedIds().size,wordCacheSize:await this.#w.getCurrentSize(h),charCacheSize:await this.#w.getCurrentSize(c),inBatch:this.#p}}}exports.BrowserStorage=t,exports.NodeStorage=e,exports.SearchEngine=d,exports.SimpleSearch=class{static#I=null;static#v={baseDir:"simple_search_data",wordSegmentTokenThreshold:1e5,minWordTokenSave:0};static configure(t){const e={...this.#v,...t};this.#I=new d(e)}static#z(){return this.#I||(this.#I=new d(this.#v)),this.#I}static async startBatch(){this.#z().startBatch()}static async endBatch(){return this.#z().endBatch()}static async addDocument(t){return this.#z().addDocument(t)}static async addDocuments(t){return this.#z().addDocuments(t)}static async search(t,e){return this.#z().search(t,e)}static async removeDocument(t){return this.#z().removeDocument(t)}static async clearAll(){return this.#z().clearAll()}static async getStatus(){return this.#z().getStatus()}};
|
|
1
|
+
"use strict";Object.create,Object.defineProperty,Object.getOwnPropertyDescriptor,Object.getOwnPropertyNames,Object.getPrototypeOf,Object.prototype.hasOwnProperty;class t{#t;constructor(t){this.#t=t}async#e(){return await(await navigator.storage.getDirectory()).getDirectoryHandle(this.#t,{create:!0})}async write(t,e){const s=await(await(await this.#e()).getFileHandle(t,{create:!0})).createWritable();await s.write(e),await s.close()}async append(t,e){const s=await this.#e();let i;try{i=await s.getFileHandle(t,{create:!0})}catch{i=await s.getFileHandle(t,{create:!0})}const n=await i.getFile(),a=await i.createWritable({keepExistingData:!0});await a.seek(n.size),await a.write(e),await a.close()}async read(t){const e=await this.#e();try{return await(await(await e.getFileHandle(t)).getFile()).arrayBuffer()}catch{return null}}async readRange(t,e,s){const i=await this.#e();try{return await(await(await i.getFileHandle(t)).getFile()).slice(e,s).arrayBuffer()}catch{return null}}async remove(t){const e=await this.#e();try{await e.removeEntry(t)}catch{}}async listFiles(){const t=await this.#e(),e=[];for await(const s of t.keys())e.push(s);return e}async clearAll(){const t=await this.#e();for await(const e of t.keys())await t.removeEntry(e,{recursive:!0})}async getFileSize(t){const e=await this.#e();try{return(await(await e.getFileHandle(t)).getFile()).size}catch{return 0}}}class e{#s=null;#i=null;#t;#n="";constructor(t){this.#t=t}async#a(){if(this.#s)return;const t=await import("node:fs"),e=await import("node:path");this.#s=t.promises,this.#i=e.default||e,this.#n=this.#i.join(process.cwd(),this.#t);try{await this.#s.access(this.#n)}catch{await this.#s.mkdir(this.#n,{recursive:!0})}}#r(t){return this.#i.join(this.#n,t)}async write(t,e){await this.#a(),await this.#s.writeFile(this.#r(t),Buffer.from(e))}async append(t,e){await this.#a(),await this.#s.appendFile(this.#r(t),Buffer.from(e))}async read(t){await this.#a();try{const e=await this.#s.readFile(this.#r(t));return e.buffer.slice(e.byteOffset,e.byteOffset+e.byteLength)}catch{return null}}async readRange(t,e,s){await this.#a();try{const i=await this.#s.open(this.#r(t),"r"),n=s-e,a=Buffer.alloc(n);return await i.read(a,0,n,e),await i.close(),a.buffer.slice(a.byteOffset,a.byteOffset+a.byteLength)}catch{return null}}async remove(t){await this.#a();try{await this.#s.unlink(this.#r(t))}catch{}}async listFiles(){await this.#a();try{return await this.#s.readdir(this.#n)}catch{return[]}}async clearAll(){await this.#a();try{const t=await this.#s.readdir(this.#n);for(const e of t)await this.#s.unlink(this.#i.join(this.#n,e))}catch{}}async getFileSize(t){await this.#a();try{return(await this.#s.stat(this.#r(t))).size}catch{return 0}}}const s="search_meta.json",i="deleted_ids.bin",n="added_ids.bin";class a{#o;#h={wordSegments:[],charSegments:[]};#c=new Set;#d=new Set;constructor(t){this.#o=t}async load(){const t=await this.#o.read(s);if(t){const e=(new TextDecoder).decode(t);this.#h=JSON.parse(e)}else this.#h={wordSegments:[],charSegments:[]};const e=await this.#o.read(i);if(e){const t=new DataView(e);let s=0;const i=e.byteLength;for(;s<i&&!(s+4>i);){const e=t.getUint32(s,!0);this.#c.add(e),s+=4,s<i&&30===t.getUint8(s)&&(s+=1)}}const a=await this.#o.read(n);if(a){const t=new DataView(a);let e=0;const s=a.byteLength;for(;e<s&&!(e+4>s);){const i=t.getUint32(e,!0);this.#d.add(i),e+=4,e<s&&30===t.getUint8(e)&&(e+=1)}}}async save(){const t=JSON.stringify(this.#h);if(await this.#o.write(s,(new TextEncoder).encode(t).buffer),0===this.#c.size)await this.#o.remove(i);else{const t=4*this.#c.size+this.#c.size,e=new ArrayBuffer(t),s=new DataView(e);let n=0;for(const t of this.#c)s.setUint32(n,t,!0),n+=4,s.setUint8(n,30),n+=1;await this.#o.write(i,e)}if(0===this.#d.size)await this.#o.remove(n);else{const t=4*this.#d.size+this.#d.size,e=new ArrayBuffer(t),s=new DataView(e);let i=0;for(const t of this.#d)s.setUint32(i,t,!0),i+=4,s.setUint8(i,30),i+=1;await this.#o.write(n,e)}}getSegments(t){return"word"===t?this.#h.wordSegments:this.#h.charSegments}getDeletedIds(){return this.#c}addDeletedId(t){this.#c.add(t)}isDeleted(t){return this.#c.has(t)}addAddedId(t){this.#d.add(t)}removeAddedId(t){this.#d.delete(t)}isAdded(t){return this.#d.has(t)}getAddedIds(){return this.#d}hasDocument(t){return this.#d.has(t)||this.#c.has(t)}getLastSegmentInfo(t){const e=this.getSegments(t);return 0===e.length?null:e[e.length-1]}updateSegment(t,e,s,i,n,a){const r="word"===t?this.#h.wordSegments:this.#h.charSegments;if(a)r.push({filename:e,start:s,end:i,tokenCount:n});else{const t=r[r.length-1];t&&t.filename===e&&(t.end=i,t.tokenCount=n)}}reset(){this.#h={wordSegments:[],charSegments:[]},this.#c.clear(),this.#d.clear()}}class r{static SEPARATOR=30;#o;constructor(t){this.#o=t}async appendBatch(t,e){if(0===e.length)return await this.#o.getFileSize(t);const s=new TextEncoder;let i=0;for(const t of e){i+=8;for(const e of t.tokens){i+=2+Math.min(s.encode(e).byteLength,65535)}i+=1}const n=new Uint8Array(i);let a=0;for(const t of e){const e=[];for(const i of t.tokens){const t=s.encode(i),n=t.byteLength>65535?t.slice(0,65535):t;e.push(n)}const i=new DataView(n.buffer,a);i.setUint32(0,t.id,!0),i.setUint32(4,e.length,!0),a+=8;for(const t of e)new DataView(n.buffer,a).setUint16(0,t.byteLength,!0),a+=2,n.set(t,a),a+=t.byteLength;n[a++]=r.SEPARATOR}return await this.#o.append(t,n.buffer),await this.#o.getFileSize(t)}async readRange(t,e,s){const i=await this.#o.readRange(t,e,s);if(!i||0===i.byteLength)return[];const n=new DataView(i),a=new Uint8Array(i),o=new TextDecoder,h=[];let c=0;const d=i.byteLength;for(;c<d&&!(c+8>d);){const t=n.getUint32(c,!0);c+=4;const e=n.getUint32(c,!0);c+=4;const s=[];for(let t=0;t<e&&!(c+2>d);t++){const t=n.getUint16(c,!0);if(c+=2,c+t>d)break;const e=new Uint8Array(i,c,t);s.push(o.decode(e)),c+=t}c<d&&a[c]===r.SEPARATOR&&(c+=1),h.push({id:t,tokens:s})}return h}async getCurrentSize(t){return await this.#o.getFileSize(t)}}class o{#g;#o;#l=null;#f=null;static hash(t){let e=5381;for(let s=0;s<t.length;s++)e=(e<<5)+e^t.charCodeAt(s);return e>>>0}constructor(t,e){this.#g=t,this.#o=e}async loadIndex(){return!!this.#l||(this.#l=await this.#o.read(this.#g),!!this.#l&&(this.#f=new DataView(this.#l),!0))}async buildAndSave(t){const e=new Map;for(const s of t){const t=new Map;for(const i of s.tokens)if(!t.has(i)){t.set(i,!0);const n=o.hash(i);e.has(n)||e.set(n,[]),e.get(n).push(s.id)}}const s=Array.from(e.keys()).sort((t,e)=>t-e);let i=0;const n=new Array(s.length);for(let t=0;t<s.length;t++){const a=s[t],r=e.get(a);n[t]=r,i+=r.length}const a=12*s.length,r=new ArrayBuffer(8+a+4*i),h=new DataView(r);h.setUint32(0,1229866072),h.setUint32(4,s.length);let c=8,d=8+a;for(let t=0;t<s.length;t++){const e=s[t],i=n[t];h.setUint32(c,e),h.setUint32(c+4,d),h.setUint32(c+8,i.length),c+=12;for(let t=0;t<i.length;t++)h.setUint32(d,i[t],!0),d+=4}await this.#o.write(this.#g,r),this.#l=r,this.#f=h}search(t){if(!this.#f||!this.#l)return[];const e=o.hash(t);let s=0,i=this.#f.getUint32(4)-1;for(;s<=i;){const t=s+i>>>1,n=8+12*t,a=this.#f.getUint32(n);if(a<e)s=t+1;else{if(!(a>e)){const t=this.#f.getUint32(n+4),e=this.#f.getUint32(n+8),s=[];for(let i=0;i<e;i++)s.push(this.#f.getUint32(t+4*i,!0));return s}i=t-1}}return[]}}const h="word_cache.bin",c="char_cache.bin";class d{#o;#h;#w;#u;#m=!1;#y;#p=!1;#D={word:0,char:0};constructor(s){if(!s.baseDir)throw new Error("SearchEngine requires 'baseDir' in config.");if(this.#y={wordSegmentTokenThreshold:1e5,charSegmentTokenThreshold:5e5,minWordTokenSave:0,minCharTokenSave:0,...s},(this.#y.minWordTokenSave||0)>=(this.#y.wordSegmentTokenThreshold||1e5))throw new Error("minWordTokenSave must be less than wordSegmentTokenThreshold");if((this.#y.minCharTokenSave||0)>=(this.#y.charSegmentTokenThreshold||5e5))throw new Error("minCharTokenSave must be less than charSegmentTokenThreshold");let i=null;if(this.#y.storage&&("object"==typeof this.#y.storage?i=this.#y.storage:"browser"===this.#y.storage?i=new t(this.#y.baseDir):"node"===this.#y.storage&&(i=new e(this.#y.baseDir))),!i){const s=typeof navigator<"u"&&navigator?.storage?.getDirectory instanceof Function,n=typeof process<"u"&&null!=process.versions&&null!=process.versions.node;s?i=new t(this.#y.baseDir):n&&(i=new e(this.#y.baseDir))}if(!i)throw new Error('Storage initialization failed. Please configure "storage" explicitly or ensure you are in a supported environment (Browser/Node).');this.#o=i,this.#h=new a(this.#o),this.#w=new r(this.#o),this.#u=new Map}async init(){if(this.#m)return;await this.#h.load();const t=[...this.#h.getSegments("word"),...this.#h.getSegments("char")];for(const e of t)this.#u.has(e.filename)||this.#u.set(e.filename,new o(e.filename,this.#o)),await this.#u.get(e.filename).loadIndex();this.#m=!0}startBatch(){this.#p=!0,this.#D={word:0,char:0}}async endBatch(){this.#p=!1,this.#D.word>0&&await this.#S("word",this.#D.word),this.#D.char>0&&await this.#S("char",this.#D.char),this.#D={word:0,char:0},await this.#h.save()}#k(t){try{if(typeof Intl<"u"&&"function"==typeof Intl.Segmenter&&"function"==typeof Array.from){const e=new Intl.Segmenter([],{granularity:"word"}).segment(t);if("object"==typeof e&&null!==e)return Array.from(e).filter(t=>t?.isWordLike).map(t=>t?.segment?.toLowerCase()||"")}}catch{}return t.toLowerCase().split(/[^a-z0-9\u4e00-\u9fa5]+/g).filter(t=>t.length>0)}#I(t){return this.#y.indexingTokenizer?this.#y.indexingTokenizer(t):this.#k(t.text)}#b(t){return this.#y.searchTokenizer?this.#y.searchTokenizer(t):this.#I(t)}async addDocument(t){return this.addDocuments([t])}async addDocumentIfMissing(t){return this.addDocumentsIfMissing([t])}async addDocumentsIfMissing(t){if(this.#m||await this.init(),0===t.length)return;const e=this.#h.getDeletedIds(),s=[],i=[],n=[];for(const a of t){if(e.has(a.id)||this.#h.isAdded(a.id))continue;const t=this.#I(a),r=[],o=[];for(const e of t)e.length>1?r.push(e):1===e.length&&o.push(e);r.length>0&&s.push({id:a.id,tokens:r}),o.length>0&&i.push({id:a.id,tokens:o}),n.push(a)}if(0===n.length)return;let a=0,r=0;if(s.length>0){await this.#w.appendBatch(h,s);for(const t of s)a+=t.tokens.length}if(i.length>0){await this.#w.appendBatch(c,i);for(const t of i)r+=t.tokens.length}for(const t of n)this.#h.addAddedId(t.id);this.#p?(this.#D.word+=a,this.#D.char+=r):(a>0&&await this.#S("word",a),r>0&&await this.#S("char",r),await this.#h.save())}async addDocuments(t){if(this.#m||await this.init(),0===t.length)return;const e=this.#h.getDeletedIds(),s=[],i=[];for(const n of t){if(e.has(n.id))throw new Error(`Document ID ${n.id} has been deleted and cannot be re-added.`);if(this.#h.isAdded(n.id))throw new Error(`Document ID ${n.id} already exists.`);const t=this.#I(n),a=[],r=[];for(const e of t)e.length>1?a.push(e):1===e.length&&r.push(e);a.length>0&&s.push({id:n.id,tokens:a}),r.length>0&&i.push({id:n.id,tokens:r})}let n=0,a=0;if(s.length>0){await this.#w.appendBatch(h,s);for(const t of s)n+=t.tokens.length}if(i.length>0){await this.#w.appendBatch(c,i);for(const t of i)a+=t.tokens.length}for(const e of t)this.#h.addAddedId(e.id);this.#p?(this.#D.word+=n,this.#D.char+=a):(n>0&&await this.#S("word",n),a>0&&await this.#S("char",a),await this.#h.save())}async#S(t,e){const s="word"===t?h:c,i=await this.#w.getCurrentSize(s),n="word"===t?this.#y.wordSegmentTokenThreshold||1e5:this.#y.charSegmentTokenThreshold||5e5,a="word"===t?this.#y.minWordTokenSave||0:this.#y.minCharTokenSave||0,r=this.#h.getLastSegmentInfo(t);let d,g,l,f;const w=()=>{const e=this.#h.getSegments(t).length+1;return`${t}_seg_${e}.bin`};if(r){const t=r.tokenCount;t>=n||t+e>=n?(d=w(),l=!0,g=r.end,f=e):(d=r.filename,l=!1,g=r.start,f=t+e)}else d=w(),l=!0,g=0,f=e;if(f<a)return void this.#h.updateSegment(t,d,g,i,f,l);const u=await this.#w.readRange(s,g,i);let m=this.#u.get(d);m||(m=new o(d,this.#o),this.#u.set(d,m)),await m.buildAndSave(u),this.#h.updateSegment(t,d,g,i,f,l)}async search(t,e){this.#m||await this.init();const s="string"==typeof t?{text:t}:t,i=this.#b(s),n=i.filter(t=>t.length>1),a=i.filter(t=>1===t.length),r=this.#h.getDeletedIds(),h=new Map,c=new Map,d=t=>{const e=this.#h.getSegments(t);for(const t of e){const e=t.filename;!this.#u.has(e)&&!c.has(e)&&c.set(e,new o(e,this.#o))}};d("word"),d("char"),await Promise.all(Array.from(c.entries()).map(([t,e])=>e.loadIndex().then(s=>{s&&this.#u.set(t,e)})));const g=async(t,e)=>{if(0===e.length)return;const s=this.#h.getSegments(t);for(const t of s){const s=t.filename,i=this.#u.get(s);if(i)for(const t of e){const e=i.search(t),s=1+.1*t.length;for(const i of e)if(!r.has(i))if(h.has(i)){const e=h.get(i);e.score+=s,e.tokens.add(t)}else h.set(i,{score:0,tokens:new Set([t])})}}};await g("word",n),await g("char",a);const l=[];return h.forEach((t,e)=>{l.push({id:e,score:t.score,tokens:Array.from(t.tokens)})}),l.sort((t,e)=>e.score-t.score),"number"==typeof e&&e>0?l.slice(0,e):l}async removeDocument(t){this.#m||await this.init(),this.#h.addDeletedId(t),this.#h.removeAddedId(t),await this.#h.save()}async clearAll(){await this.#o.clearAll(),this.#u.clear(),this.#h.reset(),this.#m=!1,this.#p=!1,this.#D={word:0,char:0}}async getStatus(){return this.#m||await this.init(),{wordSegments:this.#h.getSegments("word").length,charSegments:this.#h.getSegments("char").length,deleted:this.#h.getDeletedIds().size,wordCacheSize:await this.#w.getCurrentSize(h),charCacheSize:await this.#w.getCurrentSize(c),inBatch:this.#p}}async hasDocument(t){return this.#m||await this.init(),this.#h.hasDocument(t)}}exports.BrowserStorage=t,exports.NodeStorage=e,exports.SearchEngine=d,exports.SimpleSearch=class{static#T=null;static#v={baseDir:"simple_search_data",wordSegmentTokenThreshold:1e5,minWordTokenSave:0};static configure(t){const e={...this.#v,...t};this.#T=new d(e)}static#z(){return this.#T||(this.#T=new d(this.#v)),this.#T}static async startBatch(){this.#z().startBatch()}static async endBatch(){return this.#z().endBatch()}static async addDocument(t){return this.#z().addDocument(t)}static async addDocumentIfMissing(t){return this.#z().addDocumentIfMissing(t)}static async addDocuments(t){return this.#z().addDocuments(t)}static async addDocumentsIfMissing(t){return this.#z().addDocumentsIfMissing(t)}static async search(t,e){return this.#z().search(t,e)}static async removeDocument(t){return this.#z().removeDocument(t)}static async clearAll(){return this.#z().clearAll()}static async getStatus(){return this.#z().getStatus()}static async hasDocument(t){return this.#z().hasDocument(t)}};
|
package/lib/index.d.ts
CHANGED
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* 核心类型定义
|
|
3
3
|
*/
|
|
4
|
-
interface
|
|
5
|
-
id: number;
|
|
4
|
+
interface IDocumentBase {
|
|
6
5
|
text: string;
|
|
7
6
|
}
|
|
7
|
+
interface IDocument extends IDocumentBase {
|
|
8
|
+
id: number;
|
|
9
|
+
}
|
|
8
10
|
interface IResult {
|
|
9
11
|
id: number;
|
|
10
12
|
score: number;
|
|
@@ -61,7 +63,7 @@ interface ISearchEngineConfig {
|
|
|
61
63
|
* - 建议: 针对不同语言(中文/英文/日文等)使用专门的分词实现
|
|
62
64
|
* - 影响: 直接决定索引的粒度和搜索的准确性
|
|
63
65
|
*/
|
|
64
|
-
indexingTokenizer?: (
|
|
66
|
+
indexingTokenizer?: <T extends IDocument = IDocument>(doc: T) => string[];
|
|
65
67
|
/**
|
|
66
68
|
* 搜索时使用的分词器 (算法核心配置)
|
|
67
69
|
* - 作用: 将查询文本转换为搜索用的token序列
|
|
@@ -69,7 +71,7 @@ interface ISearchEngineConfig {
|
|
|
69
71
|
* - 建议: 与indexingTokenizer保持一致的分词策略以确保搜索准确性
|
|
70
72
|
* - 影响: 直接决定搜索匹配的范围和结果的相关性
|
|
71
73
|
*/
|
|
72
|
-
searchTokenizer?: (
|
|
74
|
+
searchTokenizer?: <T extends IDocumentBase = IDocumentBase>(doc: T) => string[];
|
|
73
75
|
/**
|
|
74
76
|
* 词索引分段阈值 (Token数) - 分段算法配置
|
|
75
77
|
* - 作用: 控制词索引文件的大小,超过阈值时创建新的索引段
|
|
@@ -121,9 +123,19 @@ declare class SearchEngine {
|
|
|
121
123
|
* 触发索引构建检查并保存元数据
|
|
122
124
|
*/
|
|
123
125
|
endBatch(): Promise<void>;
|
|
124
|
-
addDocument(doc:
|
|
125
|
-
|
|
126
|
-
|
|
126
|
+
addDocument<T extends IDocument = IDocument>(doc: T): Promise<void>;
|
|
127
|
+
/**
|
|
128
|
+
* 添加单个文档,如果文档ID已存在则跳过
|
|
129
|
+
* 用于在批量添加中途出错后的恢复添加行为,也可直接用于单个文档添加
|
|
130
|
+
*/
|
|
131
|
+
addDocumentIfMissing<T extends IDocument = IDocument>(doc: T): Promise<void>;
|
|
132
|
+
/**
|
|
133
|
+
* 添加多个文档,跳过已存在的文档ID
|
|
134
|
+
* 用于在批量添加中途出错后的恢复添加行为,也可直接用于批量添加
|
|
135
|
+
*/
|
|
136
|
+
addDocumentsIfMissing<T extends IDocument = IDocument>(docs: T[]): Promise<void>;
|
|
137
|
+
addDocuments<T extends IDocument = IDocument>(docs: T[]): Promise<void>;
|
|
138
|
+
search<T extends IDocumentBase = IDocumentBase>(query: T | string, limit?: number): Promise<IResult[]>;
|
|
127
139
|
removeDocument(id: number): Promise<void>;
|
|
128
140
|
clearAll(): Promise<void>;
|
|
129
141
|
getStatus(): Promise<{
|
|
@@ -134,6 +146,12 @@ declare class SearchEngine {
|
|
|
134
146
|
charCacheSize: number;
|
|
135
147
|
inBatch: boolean;
|
|
136
148
|
}>;
|
|
149
|
+
/**
|
|
150
|
+
* 检查文档ID是否曾经添加过(包括已删除的)
|
|
151
|
+
* @param id 文档ID
|
|
152
|
+
* @returns 文档是否曾经添加过的布尔值
|
|
153
|
+
*/
|
|
154
|
+
hasDocument(id: number): Promise<boolean>;
|
|
137
155
|
}
|
|
138
156
|
|
|
139
157
|
/**
|
|
@@ -148,9 +166,11 @@ declare class SimpleSearch {
|
|
|
148
166
|
static configure(config: Partial<ISearchEngineConfig>): void;
|
|
149
167
|
static startBatch(): Promise<void>;
|
|
150
168
|
static endBatch(): Promise<void>;
|
|
151
|
-
static addDocument(doc:
|
|
152
|
-
static
|
|
153
|
-
static
|
|
169
|
+
static addDocument<T extends IDocument = IDocument>(doc: T): Promise<void>;
|
|
170
|
+
static addDocumentIfMissing<T extends IDocument = IDocument>(doc: T): Promise<void>;
|
|
171
|
+
static addDocuments<T extends IDocument = IDocument>(docs: T[]): Promise<void>;
|
|
172
|
+
static addDocumentsIfMissing<T extends IDocument = IDocument>(docs: T[]): Promise<void>;
|
|
173
|
+
static search<T extends IDocumentBase = IDocumentBase>(query: T | string, limit?: number): Promise<IResult[]>;
|
|
154
174
|
static removeDocument(id: number): Promise<void>;
|
|
155
175
|
static clearAll(): Promise<void>;
|
|
156
176
|
static getStatus(): Promise<{
|
|
@@ -161,6 +181,12 @@ declare class SimpleSearch {
|
|
|
161
181
|
charCacheSize: number;
|
|
162
182
|
inBatch: boolean;
|
|
163
183
|
}>;
|
|
184
|
+
/**
|
|
185
|
+
* 检查文档ID是否曾经添加过(包括已删除的)
|
|
186
|
+
* @param id 文档ID
|
|
187
|
+
* @returns 文档是否曾经添加过的布尔值
|
|
188
|
+
*/
|
|
189
|
+
static hasDocument(id: number): Promise<boolean>;
|
|
164
190
|
}
|
|
165
191
|
|
|
166
192
|
/**
|
|
@@ -196,4 +222,4 @@ declare class NodeStorage implements IStorage {
|
|
|
196
222
|
}
|
|
197
223
|
|
|
198
224
|
export { BrowserStorage, NodeStorage, SearchEngine, SimpleSearch };
|
|
199
|
-
export type { IDocument, IIndexMeta, IResult, ISearchEngineConfig, ISegmentMeta, IStorage, ITokenizedDoc, IndexType };
|
|
225
|
+
export type { IDocument, IDocumentBase, IIndexMeta, IResult, ISearchEngineConfig, ISegmentMeta, IStorage, ITokenizedDoc, IndexType };
|
package/lib/index.js
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
class t{#t;constructor(t){this.#t=t}async#e(){return await(await navigator.storage.getDirectory()).getDirectoryHandle(this.#t,{create:!0})}async write(t,e){const s=await(await(await this.#e()).getFileHandle(t,{create:!0})).createWritable();await s.write(e),await s.close()}async append(t,e){const s=await this.#e();let i;try{i=await s.getFileHandle(t,{create:!0})}catch{i=await s.getFileHandle(t,{create:!0})}const n=await i.getFile(),a=await i.createWritable({keepExistingData:!0});await a.seek(n.size),await a.write(e),await a.close()}async read(t){const e=await this.#e();try{return await(await(await e.getFileHandle(t)).getFile()).arrayBuffer()}catch{return null}}async readRange(t,e,s){const i=await this.#e();try{return await(await(await i.getFileHandle(t)).getFile()).slice(e,s).arrayBuffer()}catch{return null}}async remove(t){const e=await this.#e();try{await e.removeEntry(t)}catch{}}async listFiles(){const t=await this.#e(),e=[];for await(const s of t.keys())e.push(s);return e}async clearAll(){const t=await this.#e();for await(const e of t.keys())await t.removeEntry(e,{recursive:!0})}async getFileSize(t){const e=await this.#e();try{return(await(await e.getFileHandle(t)).getFile()).size}catch{return 0}}}class e{#s=null;#i=null;#t;#n="";constructor(t){this.#t=t}async#a(){if(this.#s)return;const t=await import("node:fs"),e=await import("node:path");this.#s=t.promises,this.#i=e.default||e,this.#n=this.#i.join(process.cwd(),this.#t);try{await this.#s.access(this.#n)}catch{await this.#s.mkdir(this.#n,{recursive:!0})}}#r(t){return this.#i.join(this.#n,t)}async write(t,e){await this.#a(),await this.#s.writeFile(this.#r(t),Buffer.from(e))}async append(t,e){await this.#a(),await this.#s.appendFile(this.#r(t),Buffer.from(e))}async read(t){await this.#a();try{const e=await this.#s.readFile(this.#r(t));return e.buffer.slice(e.byteOffset,e.byteOffset+e.byteLength)}catch{return null}}async readRange(t,e,s){await this.#a();try{const i=await this.#s.open(this.#r(t),"r"),n=s-e,a=Buffer.alloc(n);return await i.read(a,0,n,e),await i.close(),a.buffer.slice(a.byteOffset,a.byteOffset+a.byteLength)}catch{return null}}async remove(t){await this.#a();try{await this.#s.unlink(this.#r(t))}catch{}}async listFiles(){await this.#a();try{return await this.#s.readdir(this.#n)}catch{return[]}}async clearAll(){await this.#a();try{const t=await this.#s.readdir(this.#n);for(const e of t)await this.#s.unlink(this.#i.join(this.#n,e))}catch{}}async getFileSize(t){await this.#a();try{return(await this.#s.stat(this.#r(t))).size}catch{return 0}}}const s="search_meta.json",i="deleted_ids.bin",n="added_ids.bin";class a{#o;#h={wordSegments:[],charSegments:[]};#c=new Set;#d=new Set;constructor(t){this.#o=t}async load(){const t=await this.#o.read(s);if(t){const e=(new TextDecoder).decode(t);this.#h=JSON.parse(e)}else this.#h={wordSegments:[],charSegments:[]};const e=await this.#o.read(i);if(e){const t=new DataView(e);let s=0;const i=e.byteLength;for(;s<i&&!(s+4>i);){const e=t.getUint32(s,!0);this.#c.add(e),s+=4,s<i&&30===t.getUint8(s)&&(s+=1)}}const a=await this.#o.read(n);if(a){const t=new DataView(a);let e=0;const s=a.byteLength;for(;e<s&&!(e+4>s);){const i=t.getUint32(e,!0);this.#d.add(i),e+=4,e<s&&30===t.getUint8(e)&&(e+=1)}}}async save(){const t=JSON.stringify(this.#h);if(await this.#o.write(s,(new TextEncoder).encode(t).buffer),0===this.#c.size)await this.#o.remove(i);else{const t=4*this.#c.size+this.#c.size,e=new ArrayBuffer(t),s=new DataView(e);let n=0;for(const t of this.#c)s.setUint32(n,t,!0),n+=4,s.setUint8(n,30),n+=1;await this.#o.write(i,e)}if(0===this.#d.size)await this.#o.remove(n);else{const t=4*this.#d.size+this.#d.size,e=new ArrayBuffer(t),s=new DataView(e);let i=0;for(const t of this.#d)s.setUint32(i,t,!0),i+=4,s.setUint8(i,30),i+=1;await this.#o.write(n,e)}}getSegments(t){return"word"===t?this.#h.wordSegments:this.#h.charSegments}getDeletedIds(){return this.#c}addDeletedId(t){this.#c.add(t)}isDeleted(t){return this.#c.has(t)}addAddedId(t){this.#d.add(t)}removeAddedId(t){this.#d.delete(t)}isAdded(t){return this.#d.has(t)}getAddedIds(){return this.#d}getLastSegmentInfo(t){const e=this.getSegments(t);return 0===e.length?null:e[e.length-1]}updateSegment(t,e,s,i,n,a){const r="word"===t?this.#h.wordSegments:this.#h.charSegments;if(a)r.push({filename:e,start:s,end:i,tokenCount:n});else{const t=r[r.length-1];t&&t.filename===e&&(t.end=i,t.tokenCount=n)}}reset(){this.#h={wordSegments:[],charSegments:[]},this.#c.clear(),this.#d.clear()}}class r{static SEPARATOR=30;#o;constructor(t){this.#o=t}async appendBatch(t,e){if(0===e.length)return await this.#o.getFileSize(t);const s=new TextEncoder;let i=0;for(const t of e){i+=8;for(const e of t.tokens){i+=2+Math.min(s.encode(e).byteLength,65535)}i+=1}const n=new Uint8Array(i);let a=0;for(const t of e){const e=[];for(const i of t.tokens){const t=s.encode(i),n=t.byteLength>65535?t.slice(0,65535):t;e.push(n)}const i=new DataView(n.buffer,a);i.setUint32(0,t.id,!0),i.setUint32(4,e.length,!0),a+=8;for(const t of e)new DataView(n.buffer,a).setUint16(0,t.byteLength,!0),a+=2,n.set(t,a),a+=t.byteLength;n[a++]=r.SEPARATOR}return await this.#o.append(t,n.buffer),await this.#o.getFileSize(t)}async readRange(t,e,s){const i=await this.#o.readRange(t,e,s);if(!i||0===i.byteLength)return[];const n=new DataView(i),a=new Uint8Array(i),o=new TextDecoder,h=[];let c=0;const d=i.byteLength;for(;c<d&&!(c+8>d);){const t=n.getUint32(c,!0);c+=4;const e=n.getUint32(c,!0);c+=4;const s=[];for(let t=0;t<e&&!(c+2>d);t++){const t=n.getUint16(c,!0);if(c+=2,c+t>d)break;const e=new Uint8Array(i,c,t);s.push(o.decode(e)),c+=t}c<d&&a[c]===r.SEPARATOR&&(c+=1),h.push({id:t,tokens:s})}return h}async getCurrentSize(t){return await this.#o.getFileSize(t)}}class o{#g;#o;#l=null;#f=null;static hash(t){let e=5381;for(let s=0;s<t.length;s++)e=(e<<5)+e^t.charCodeAt(s);return e>>>0}constructor(t,e){this.#g=t,this.#o=e}async loadIndex(){return!!this.#l||(this.#l=await this.#o.read(this.#g),!!this.#l&&(this.#f=new DataView(this.#l),!0))}async buildAndSave(t){const e=new Map;for(const s of t){const t=new Map;for(const i of s.tokens)if(!t.has(i)){t.set(i,!0);const n=o.hash(i);e.has(n)||e.set(n,[]),e.get(n).push(s.id)}}const s=Array.from(e.keys()).sort((t,e)=>t-e);let i=0;const n=new Array(s.length);for(let t=0;t<s.length;t++){const a=s[t],r=e.get(a);n[t]=r,i+=r.length}const a=12*s.length,r=new ArrayBuffer(8+a+4*i),h=new DataView(r);h.setUint32(0,1229866072),h.setUint32(4,s.length);let c=8,d=8+a;for(let t=0;t<s.length;t++){const e=s[t],i=n[t];h.setUint32(c,e),h.setUint32(c+4,d),h.setUint32(c+8,i.length),c+=12;for(let t=0;t<i.length;t++)h.setUint32(d,i[t],!0),d+=4}await this.#o.write(this.#g,r),this.#l=r,this.#f=h}search(t){if(!this.#f||!this.#l)return[];const e=o.hash(t);let s=0,i=this.#f.getUint32(4)-1;for(;s<=i;){const t=s+i>>>1,n=8+12*t,a=this.#f.getUint32(n);if(a<e)s=t+1;else{if(!(a>e)){const t=this.#f.getUint32(n+4),e=this.#f.getUint32(n+8),s=[];for(let i=0;i<e;i++)s.push(this.#f.getUint32(t+4*i,!0));return s}i=t-1}}return[]}}const h="word_cache.bin",c="char_cache.bin";class d{#o;#h;#w;#u;#m=!1;#y;#p=!1;#S={word:0,char:0};constructor(s){if(!s.baseDir)throw new Error("SearchEngine requires 'baseDir' in config.");if(this.#y={wordSegmentTokenThreshold:1e5,charSegmentTokenThreshold:5e5,minWordTokenSave:0,minCharTokenSave:0,...s},(this.#y.minWordTokenSave||0)>=(this.#y.wordSegmentTokenThreshold||1e5))throw new Error("minWordTokenSave must be less than wordSegmentTokenThreshold");if((this.#y.minCharTokenSave||0)>=(this.#y.charSegmentTokenThreshold||5e5))throw new Error("minCharTokenSave must be less than charSegmentTokenThreshold");let i=null;if(this.#y.storage&&("object"==typeof this.#y.storage?i=this.#y.storage:"browser"===this.#y.storage?i=new t(this.#y.baseDir):"node"===this.#y.storage&&(i=new e(this.#y.baseDir))),!i){const s=typeof navigator<"u"&&navigator?.storage?.getDirectory instanceof Function,n=typeof process<"u"&&null!=process.versions&&null!=process.versions.node;s?i=new t(this.#y.baseDir):n&&(i=new e(this.#y.baseDir))}if(!i)throw new Error('Storage initialization failed. Please configure "storage" explicitly or ensure you are in a supported environment (Browser/Node).');this.#o=i,this.#h=new a(this.#o),this.#w=new r(this.#o),this.#u=new Map}async init(){if(this.#m)return;await this.#h.load();const t=[...this.#h.getSegments("word"),...this.#h.getSegments("char")];for(const e of t)this.#u.has(e.filename)||this.#u.set(e.filename,new o(e.filename,this.#o)),await this.#u.get(e.filename).loadIndex();this.#m=!0}startBatch(){this.#p=!0,this.#S={word:0,char:0}}async endBatch(){this.#p=!1,this.#S.word>0&&await this.#D("word",this.#S.word),this.#S.char>0&&await this.#D("char",this.#S.char),this.#S={word:0,char:0},await this.#h.save()}#k(t){if(typeof Intl<"u"&&Intl.Segmenter){const e=new Intl.Segmenter([],{granularity:"word"});return Array.from(e.segment(t)).filter(t=>t.isWordLike).map(t=>t.segment.toLowerCase())}return t.toLowerCase().split(/[^a-z0-9\u4e00-\u9fa5]+/g).filter(t=>t.length>0)}#b(t){return this.#y.indexingTokenizer?this.#y.indexingTokenizer(t):this.#k(t)}#T(t){return this.#y.searchTokenizer?this.#y.searchTokenizer(t):this.#y.indexingTokenizer?this.#y.indexingTokenizer(t):this.#k(t)}async addDocument(t){return this.addDocuments([t])}async addDocuments(t){if(this.#m||await this.init(),0===t.length)return;const e=this.#h.getDeletedIds(),s=[],i=[];for(const n of t){if(e.has(n.id))throw new Error(`Document ID ${n.id} has been deleted and cannot be re-added.`);if(this.#h.isAdded(n.id))throw new Error(`Document ID ${n.id} already exists.`);const t=this.#b(n.text),a=[],r=[];for(const e of t)e.length>1?a.push(e):1===e.length&&r.push(e);a.length>0&&s.push({id:n.id,tokens:a}),r.length>0&&i.push({id:n.id,tokens:r})}let n=0,a=0;if(s.length>0){await this.#w.appendBatch(h,s);for(const t of s)n+=t.tokens.length}if(i.length>0){await this.#w.appendBatch(c,i);for(const t of i)a+=t.tokens.length}for(const e of t)this.#h.addAddedId(e.id);this.#p?(this.#S.word+=n,this.#S.char+=a):(n>0&&await this.#D("word",n),a>0&&await this.#D("char",a),await this.#h.save())}async#D(t,e){const s="word"===t?h:c,i=await this.#w.getCurrentSize(s),n="word"===t?this.#y.wordSegmentTokenThreshold||1e5:this.#y.charSegmentTokenThreshold||5e5,a="word"===t?this.#y.minWordTokenSave||0:this.#y.minCharTokenSave||0,r=this.#h.getLastSegmentInfo(t);let d,g,l,f;const w=()=>{const e=this.#h.getSegments(t).length+1;return`${t}_seg_${e}.bin`};if(r){const t=r.tokenCount;t>=n||t+e>=n?(d=w(),l=!0,g=r.end,f=e):(d=r.filename,l=!1,g=r.start,f=t+e)}else d=w(),l=!0,g=0,f=e;if(f<a)return void this.#h.updateSegment(t,d,g,i,f,l);const u=await this.#w.readRange(s,g,i);let m=this.#u.get(d);m||(m=new o(d,this.#o),this.#u.set(d,m)),await m.buildAndSave(u),this.#h.updateSegment(t,d,g,i,f,l)}async search(t,e){this.#m||await this.init();const s=this.#T(t),i=s.filter(t=>t.length>1),n=s.filter(t=>1===t.length),a=this.#h.getDeletedIds(),r=new Map,h=new Map,c=t=>{const e=this.#h.getSegments(t);for(const t of e){const e=t.filename;!this.#u.has(e)&&!h.has(e)&&h.set(e,new o(e,this.#o))}};c("word"),c("char"),await Promise.all(Array.from(h.entries()).map(([t,e])=>e.loadIndex().then(s=>{s&&this.#u.set(t,e)})));const d=async(t,e)=>{if(0===e.length)return;const s=this.#h.getSegments(t);for(const t of s){const s=t.filename,i=this.#u.get(s);if(i)for(const t of e){const e=i.search(t),s=1+.1*t.length;for(const i of e)if(!a.has(i))if(r.has(i)){const e=r.get(i);e.score+=s,e.tokens.add(t)}else r.set(i,{score:0,tokens:new Set([t])})}}};await d("word",i),await d("char",n);const g=[];return r.forEach((t,e)=>{g.push({id:e,score:t.score,tokens:Array.from(t.tokens)})}),g.sort((t,e)=>e.score-t.score),"number"==typeof e&&e>0?g.slice(0,e):g}async removeDocument(t){this.#m||await this.init(),this.#h.addDeletedId(t),this.#h.removeAddedId(t),await this.#h.save()}async clearAll(){await this.#o.clearAll(),this.#u.clear(),this.#h.reset(),this.#m=!1,this.#p=!1,this.#S={word:0,char:0}}async getStatus(){return this.#m||await this.init(),{wordSegments:this.#h.getSegments("word").length,charSegments:this.#h.getSegments("char").length,deleted:this.#h.getDeletedIds().size,wordCacheSize:await this.#w.getCurrentSize(h),charCacheSize:await this.#w.getCurrentSize(c),inBatch:this.#p}}}class g{static#I=null;static#v={baseDir:"simple_search_data",wordSegmentTokenThreshold:1e5,minWordTokenSave:0};static configure(t){const e={...this.#v,...t};this.#I=new d(e)}static#z(){return this.#I||(this.#I=new d(this.#v)),this.#I}static async startBatch(){this.#z().startBatch()}static async endBatch(){return this.#z().endBatch()}static async addDocument(t){return this.#z().addDocument(t)}static async addDocuments(t){return this.#z().addDocuments(t)}static async search(t,e){return this.#z().search(t,e)}static async removeDocument(t){return this.#z().removeDocument(t)}static async clearAll(){return this.#z().clearAll()}static async getStatus(){return this.#z().getStatus()}}export{t as BrowserStorage,e as NodeStorage,d as SearchEngine,g as SimpleSearch};
|
|
1
|
+
class t{#t;constructor(t){this.#t=t}async#e(){return await(await navigator.storage.getDirectory()).getDirectoryHandle(this.#t,{create:!0})}async write(t,e){const s=await(await(await this.#e()).getFileHandle(t,{create:!0})).createWritable();await s.write(e),await s.close()}async append(t,e){const s=await this.#e();let i;try{i=await s.getFileHandle(t,{create:!0})}catch{i=await s.getFileHandle(t,{create:!0})}const n=await i.getFile(),a=await i.createWritable({keepExistingData:!0});await a.seek(n.size),await a.write(e),await a.close()}async read(t){const e=await this.#e();try{return await(await(await e.getFileHandle(t)).getFile()).arrayBuffer()}catch{return null}}async readRange(t,e,s){const i=await this.#e();try{return await(await(await i.getFileHandle(t)).getFile()).slice(e,s).arrayBuffer()}catch{return null}}async remove(t){const e=await this.#e();try{await e.removeEntry(t)}catch{}}async listFiles(){const t=await this.#e(),e=[];for await(const s of t.keys())e.push(s);return e}async clearAll(){const t=await this.#e();for await(const e of t.keys())await t.removeEntry(e,{recursive:!0})}async getFileSize(t){const e=await this.#e();try{return(await(await e.getFileHandle(t)).getFile()).size}catch{return 0}}}class e{#s=null;#i=null;#t;#n="";constructor(t){this.#t=t}async#a(){if(this.#s)return;const t=await import("node:fs"),e=await import("node:path");this.#s=t.promises,this.#i=e.default||e,this.#n=this.#i.join(process.cwd(),this.#t);try{await this.#s.access(this.#n)}catch{await this.#s.mkdir(this.#n,{recursive:!0})}}#r(t){return this.#i.join(this.#n,t)}async write(t,e){await this.#a(),await this.#s.writeFile(this.#r(t),Buffer.from(e))}async append(t,e){await this.#a(),await this.#s.appendFile(this.#r(t),Buffer.from(e))}async read(t){await this.#a();try{const e=await this.#s.readFile(this.#r(t));return e.buffer.slice(e.byteOffset,e.byteOffset+e.byteLength)}catch{return null}}async readRange(t,e,s){await this.#a();try{const i=await this.#s.open(this.#r(t),"r"),n=s-e,a=Buffer.alloc(n);return await i.read(a,0,n,e),await i.close(),a.buffer.slice(a.byteOffset,a.byteOffset+a.byteLength)}catch{return null}}async remove(t){await this.#a();try{await this.#s.unlink(this.#r(t))}catch{}}async listFiles(){await this.#a();try{return await this.#s.readdir(this.#n)}catch{return[]}}async clearAll(){await this.#a();try{const t=await this.#s.readdir(this.#n);for(const e of t)await this.#s.unlink(this.#i.join(this.#n,e))}catch{}}async getFileSize(t){await this.#a();try{return(await this.#s.stat(this.#r(t))).size}catch{return 0}}}const s="search_meta.json",i="deleted_ids.bin",n="added_ids.bin";class a{#o;#h={wordSegments:[],charSegments:[]};#c=new Set;#d=new Set;constructor(t){this.#o=t}async load(){const t=await this.#o.read(s);if(t){const e=(new TextDecoder).decode(t);this.#h=JSON.parse(e)}else this.#h={wordSegments:[],charSegments:[]};const e=await this.#o.read(i);if(e){const t=new DataView(e);let s=0;const i=e.byteLength;for(;s<i&&!(s+4>i);){const e=t.getUint32(s,!0);this.#c.add(e),s+=4,s<i&&30===t.getUint8(s)&&(s+=1)}}const a=await this.#o.read(n);if(a){const t=new DataView(a);let e=0;const s=a.byteLength;for(;e<s&&!(e+4>s);){const i=t.getUint32(e,!0);this.#d.add(i),e+=4,e<s&&30===t.getUint8(e)&&(e+=1)}}}async save(){const t=JSON.stringify(this.#h);if(await this.#o.write(s,(new TextEncoder).encode(t).buffer),0===this.#c.size)await this.#o.remove(i);else{const t=4*this.#c.size+this.#c.size,e=new ArrayBuffer(t),s=new DataView(e);let n=0;for(const t of this.#c)s.setUint32(n,t,!0),n+=4,s.setUint8(n,30),n+=1;await this.#o.write(i,e)}if(0===this.#d.size)await this.#o.remove(n);else{const t=4*this.#d.size+this.#d.size,e=new ArrayBuffer(t),s=new DataView(e);let i=0;for(const t of this.#d)s.setUint32(i,t,!0),i+=4,s.setUint8(i,30),i+=1;await this.#o.write(n,e)}}getSegments(t){return"word"===t?this.#h.wordSegments:this.#h.charSegments}getDeletedIds(){return this.#c}addDeletedId(t){this.#c.add(t)}isDeleted(t){return this.#c.has(t)}addAddedId(t){this.#d.add(t)}removeAddedId(t){this.#d.delete(t)}isAdded(t){return this.#d.has(t)}getAddedIds(){return this.#d}hasDocument(t){return this.#d.has(t)||this.#c.has(t)}getLastSegmentInfo(t){const e=this.getSegments(t);return 0===e.length?null:e[e.length-1]}updateSegment(t,e,s,i,n,a){const r="word"===t?this.#h.wordSegments:this.#h.charSegments;if(a)r.push({filename:e,start:s,end:i,tokenCount:n});else{const t=r[r.length-1];t&&t.filename===e&&(t.end=i,t.tokenCount=n)}}reset(){this.#h={wordSegments:[],charSegments:[]},this.#c.clear(),this.#d.clear()}}class r{static SEPARATOR=30;#o;constructor(t){this.#o=t}async appendBatch(t,e){if(0===e.length)return await this.#o.getFileSize(t);const s=new TextEncoder;let i=0;for(const t of e){i+=8;for(const e of t.tokens){i+=2+Math.min(s.encode(e).byteLength,65535)}i+=1}const n=new Uint8Array(i);let a=0;for(const t of e){const e=[];for(const i of t.tokens){const t=s.encode(i),n=t.byteLength>65535?t.slice(0,65535):t;e.push(n)}const i=new DataView(n.buffer,a);i.setUint32(0,t.id,!0),i.setUint32(4,e.length,!0),a+=8;for(const t of e)new DataView(n.buffer,a).setUint16(0,t.byteLength,!0),a+=2,n.set(t,a),a+=t.byteLength;n[a++]=r.SEPARATOR}return await this.#o.append(t,n.buffer),await this.#o.getFileSize(t)}async readRange(t,e,s){const i=await this.#o.readRange(t,e,s);if(!i||0===i.byteLength)return[];const n=new DataView(i),a=new Uint8Array(i),o=new TextDecoder,h=[];let c=0;const d=i.byteLength;for(;c<d&&!(c+8>d);){const t=n.getUint32(c,!0);c+=4;const e=n.getUint32(c,!0);c+=4;const s=[];for(let t=0;t<e&&!(c+2>d);t++){const t=n.getUint16(c,!0);if(c+=2,c+t>d)break;const e=new Uint8Array(i,c,t);s.push(o.decode(e)),c+=t}c<d&&a[c]===r.SEPARATOR&&(c+=1),h.push({id:t,tokens:s})}return h}async getCurrentSize(t){return await this.#o.getFileSize(t)}}class o{#g;#o;#l=null;#f=null;static hash(t){let e=5381;for(let s=0;s<t.length;s++)e=(e<<5)+e^t.charCodeAt(s);return e>>>0}constructor(t,e){this.#g=t,this.#o=e}async loadIndex(){return!!this.#l||(this.#l=await this.#o.read(this.#g),!!this.#l&&(this.#f=new DataView(this.#l),!0))}async buildAndSave(t){const e=new Map;for(const s of t){const t=new Map;for(const i of s.tokens)if(!t.has(i)){t.set(i,!0);const n=o.hash(i);e.has(n)||e.set(n,[]),e.get(n).push(s.id)}}const s=Array.from(e.keys()).sort((t,e)=>t-e);let i=0;const n=new Array(s.length);for(let t=0;t<s.length;t++){const a=s[t],r=e.get(a);n[t]=r,i+=r.length}const a=12*s.length,r=new ArrayBuffer(8+a+4*i),h=new DataView(r);h.setUint32(0,1229866072),h.setUint32(4,s.length);let c=8,d=8+a;for(let t=0;t<s.length;t++){const e=s[t],i=n[t];h.setUint32(c,e),h.setUint32(c+4,d),h.setUint32(c+8,i.length),c+=12;for(let t=0;t<i.length;t++)h.setUint32(d,i[t],!0),d+=4}await this.#o.write(this.#g,r),this.#l=r,this.#f=h}search(t){if(!this.#f||!this.#l)return[];const e=o.hash(t);let s=0,i=this.#f.getUint32(4)-1;for(;s<=i;){const t=s+i>>>1,n=8+12*t,a=this.#f.getUint32(n);if(a<e)s=t+1;else{if(!(a>e)){const t=this.#f.getUint32(n+4),e=this.#f.getUint32(n+8),s=[];for(let i=0;i<e;i++)s.push(this.#f.getUint32(t+4*i,!0));return s}i=t-1}}return[]}}const h="word_cache.bin",c="char_cache.bin";class d{#o;#h;#w;#u;#m=!1;#y;#p=!1;#D={word:0,char:0};constructor(s){if(!s.baseDir)throw new Error("SearchEngine requires 'baseDir' in config.");if(this.#y={wordSegmentTokenThreshold:1e5,charSegmentTokenThreshold:5e5,minWordTokenSave:0,minCharTokenSave:0,...s},(this.#y.minWordTokenSave||0)>=(this.#y.wordSegmentTokenThreshold||1e5))throw new Error("minWordTokenSave must be less than wordSegmentTokenThreshold");if((this.#y.minCharTokenSave||0)>=(this.#y.charSegmentTokenThreshold||5e5))throw new Error("minCharTokenSave must be less than charSegmentTokenThreshold");let i=null;if(this.#y.storage&&("object"==typeof this.#y.storage?i=this.#y.storage:"browser"===this.#y.storage?i=new t(this.#y.baseDir):"node"===this.#y.storage&&(i=new e(this.#y.baseDir))),!i){const s=typeof navigator<"u"&&navigator?.storage?.getDirectory instanceof Function,n=typeof process<"u"&&null!=process.versions&&null!=process.versions.node;s?i=new t(this.#y.baseDir):n&&(i=new e(this.#y.baseDir))}if(!i)throw new Error('Storage initialization failed. Please configure "storage" explicitly or ensure you are in a supported environment (Browser/Node).');this.#o=i,this.#h=new a(this.#o),this.#w=new r(this.#o),this.#u=new Map}async init(){if(this.#m)return;await this.#h.load();const t=[...this.#h.getSegments("word"),...this.#h.getSegments("char")];for(const e of t)this.#u.has(e.filename)||this.#u.set(e.filename,new o(e.filename,this.#o)),await this.#u.get(e.filename).loadIndex();this.#m=!0}startBatch(){this.#p=!0,this.#D={word:0,char:0}}async endBatch(){this.#p=!1,this.#D.word>0&&await this.#S("word",this.#D.word),this.#D.char>0&&await this.#S("char",this.#D.char),this.#D={word:0,char:0},await this.#h.save()}#k(t){try{if(typeof Intl<"u"&&"function"==typeof Intl.Segmenter&&"function"==typeof Array.from){const e=new Intl.Segmenter([],{granularity:"word"}).segment(t);if("object"==typeof e&&null!==e)return Array.from(e).filter(t=>t?.isWordLike).map(t=>t?.segment?.toLowerCase()||"")}}catch{}return t.toLowerCase().split(/[^a-z0-9\u4e00-\u9fa5]+/g).filter(t=>t.length>0)}#I(t){return this.#y.indexingTokenizer?this.#y.indexingTokenizer(t):this.#k(t.text)}#b(t){return this.#y.searchTokenizer?this.#y.searchTokenizer(t):this.#I(t)}async addDocument(t){return this.addDocuments([t])}async addDocumentIfMissing(t){return this.addDocumentsIfMissing([t])}async addDocumentsIfMissing(t){if(this.#m||await this.init(),0===t.length)return;const e=this.#h.getDeletedIds(),s=[],i=[],n=[];for(const a of t){if(e.has(a.id)||this.#h.isAdded(a.id))continue;const t=this.#I(a),r=[],o=[];for(const e of t)e.length>1?r.push(e):1===e.length&&o.push(e);r.length>0&&s.push({id:a.id,tokens:r}),o.length>0&&i.push({id:a.id,tokens:o}),n.push(a)}if(0===n.length)return;let a=0,r=0;if(s.length>0){await this.#w.appendBatch(h,s);for(const t of s)a+=t.tokens.length}if(i.length>0){await this.#w.appendBatch(c,i);for(const t of i)r+=t.tokens.length}for(const t of n)this.#h.addAddedId(t.id);this.#p?(this.#D.word+=a,this.#D.char+=r):(a>0&&await this.#S("word",a),r>0&&await this.#S("char",r),await this.#h.save())}async addDocuments(t){if(this.#m||await this.init(),0===t.length)return;const e=this.#h.getDeletedIds(),s=[],i=[];for(const n of t){if(e.has(n.id))throw new Error(`Document ID ${n.id} has been deleted and cannot be re-added.`);if(this.#h.isAdded(n.id))throw new Error(`Document ID ${n.id} already exists.`);const t=this.#I(n),a=[],r=[];for(const e of t)e.length>1?a.push(e):1===e.length&&r.push(e);a.length>0&&s.push({id:n.id,tokens:a}),r.length>0&&i.push({id:n.id,tokens:r})}let n=0,a=0;if(s.length>0){await this.#w.appendBatch(h,s);for(const t of s)n+=t.tokens.length}if(i.length>0){await this.#w.appendBatch(c,i);for(const t of i)a+=t.tokens.length}for(const e of t)this.#h.addAddedId(e.id);this.#p?(this.#D.word+=n,this.#D.char+=a):(n>0&&await this.#S("word",n),a>0&&await this.#S("char",a),await this.#h.save())}async#S(t,e){const s="word"===t?h:c,i=await this.#w.getCurrentSize(s),n="word"===t?this.#y.wordSegmentTokenThreshold||1e5:this.#y.charSegmentTokenThreshold||5e5,a="word"===t?this.#y.minWordTokenSave||0:this.#y.minCharTokenSave||0,r=this.#h.getLastSegmentInfo(t);let d,g,l,f;const w=()=>{const e=this.#h.getSegments(t).length+1;return`${t}_seg_${e}.bin`};if(r){const t=r.tokenCount;t>=n||t+e>=n?(d=w(),l=!0,g=r.end,f=e):(d=r.filename,l=!1,g=r.start,f=t+e)}else d=w(),l=!0,g=0,f=e;if(f<a)return void this.#h.updateSegment(t,d,g,i,f,l);const u=await this.#w.readRange(s,g,i);let m=this.#u.get(d);m||(m=new o(d,this.#o),this.#u.set(d,m)),await m.buildAndSave(u),this.#h.updateSegment(t,d,g,i,f,l)}async search(t,e){this.#m||await this.init();const s="string"==typeof t?{text:t}:t,i=this.#b(s),n=i.filter(t=>t.length>1),a=i.filter(t=>1===t.length),r=this.#h.getDeletedIds(),h=new Map,c=new Map,d=t=>{const e=this.#h.getSegments(t);for(const t of e){const e=t.filename;!this.#u.has(e)&&!c.has(e)&&c.set(e,new o(e,this.#o))}};d("word"),d("char"),await Promise.all(Array.from(c.entries()).map(([t,e])=>e.loadIndex().then(s=>{s&&this.#u.set(t,e)})));const g=async(t,e)=>{if(0===e.length)return;const s=this.#h.getSegments(t);for(const t of s){const s=t.filename,i=this.#u.get(s);if(i)for(const t of e){const e=i.search(t),s=1+.1*t.length;for(const i of e)if(!r.has(i))if(h.has(i)){const e=h.get(i);e.score+=s,e.tokens.add(t)}else h.set(i,{score:0,tokens:new Set([t])})}}};await g("word",n),await g("char",a);const l=[];return h.forEach((t,e)=>{l.push({id:e,score:t.score,tokens:Array.from(t.tokens)})}),l.sort((t,e)=>e.score-t.score),"number"==typeof e&&e>0?l.slice(0,e):l}async removeDocument(t){this.#m||await this.init(),this.#h.addDeletedId(t),this.#h.removeAddedId(t),await this.#h.save()}async clearAll(){await this.#o.clearAll(),this.#u.clear(),this.#h.reset(),this.#m=!1,this.#p=!1,this.#D={word:0,char:0}}async getStatus(){return this.#m||await this.init(),{wordSegments:this.#h.getSegments("word").length,charSegments:this.#h.getSegments("char").length,deleted:this.#h.getDeletedIds().size,wordCacheSize:await this.#w.getCurrentSize(h),charCacheSize:await this.#w.getCurrentSize(c),inBatch:this.#p}}async hasDocument(t){return this.#m||await this.init(),this.#h.hasDocument(t)}}class g{static#T=null;static#v={baseDir:"simple_search_data",wordSegmentTokenThreshold:1e5,minWordTokenSave:0};static configure(t){const e={...this.#v,...t};this.#T=new d(e)}static#z(){return this.#T||(this.#T=new d(this.#v)),this.#T}static async startBatch(){this.#z().startBatch()}static async endBatch(){return this.#z().endBatch()}static async addDocument(t){return this.#z().addDocument(t)}static async addDocumentIfMissing(t){return this.#z().addDocumentIfMissing(t)}static async addDocuments(t){return this.#z().addDocuments(t)}static async addDocumentsIfMissing(t){return this.#z().addDocumentsIfMissing(t)}static async search(t,e){return this.#z().search(t,e)}static async removeDocument(t){return this.#z().removeDocument(t)}static async clearAll(){return this.#z().clearAll()}static async getStatus(){return this.#z().getStatus()}static async hasDocument(t){return this.#z().hasDocument(t)}}export{t as BrowserStorage,e as NodeStorage,d as SearchEngine,g as SimpleSearch};
|