@robthepcguy/rag-vault 1.5.2 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +86 -14
- package/dist/bin/install-skills.d.ts.map +1 -0
- package/dist/bin/install-skills.js +16 -17
- package/dist/bin/install-skills.js.map +1 -0
- package/dist/chunker/index.d.ts.map +1 -0
- package/dist/chunker/index.js +1 -5
- package/dist/chunker/index.js.map +1 -0
- package/dist/chunker/semantic-chunker.d.ts.map +1 -0
- package/dist/chunker/semantic-chunker.js +9 -13
- package/dist/chunker/semantic-chunker.js.map +1 -0
- package/dist/chunker/sentence-splitter.d.ts.map +1 -0
- package/dist/chunker/sentence-splitter.js +1 -4
- package/dist/chunker/sentence-splitter.js.map +1 -0
- package/dist/embedder/index.d.ts +10 -0
- package/dist/embedder/index.d.ts.map +1 -0
- package/dist/embedder/index.js +58 -32
- package/dist/embedder/index.js.map +1 -0
- package/dist/errors/index.d.ts.map +1 -0
- package/dist/errors/index.js +10 -17
- package/dist/errors/index.js.map +1 -0
- package/dist/explainability/index.d.ts.map +1 -0
- package/dist/explainability/index.js +1 -5
- package/dist/explainability/index.js.map +1 -0
- package/dist/explainability/keywords.d.ts.map +1 -0
- package/dist/explainability/keywords.js +1 -4
- package/dist/explainability/keywords.js.map +1 -0
- package/dist/flywheel/feedback.d.ts.map +1 -0
- package/dist/flywheel/feedback.js +15 -19
- package/dist/flywheel/feedback.js.map +1 -0
- package/dist/flywheel/index.d.ts.map +1 -0
- package/dist/flywheel/index.js +1 -5
- package/dist/flywheel/index.js.map +1 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +86 -64
- package/dist/index.js.map +1 -0
- package/dist/parser/html-parser.d.ts.map +1 -0
- package/dist/parser/html-parser.js +7 -13
- package/dist/parser/html-parser.js.map +1 -0
- package/dist/parser/index.d.ts.map +1 -0
- package/dist/parser/index.js +44 -52
- package/dist/parser/index.js.map +1 -0
- package/dist/parser/pdf-filter.d.ts.map +1 -0
- package/dist/parser/pdf-filter.js +10 -16
- package/dist/parser/pdf-filter.js.map +1 -0
- package/dist/query/index.d.ts.map +1 -0
- package/dist/query/index.js +1 -8
- package/dist/query/index.js.map +1 -0
- package/dist/query/parser.d.ts.map +1 -0
- package/dist/query/parser.js +5 -12
- package/dist/query/parser.js.map +1 -0
- package/dist/server/index.d.ts +6 -0
- package/dist/server/index.d.ts.map +1 -0
- package/dist/server/index.js +106 -95
- package/dist/server/index.js.map +1 -0
- package/dist/server/raw-data-utils.d.ts.map +1 -0
- package/dist/server/raw-data-utils.js +21 -33
- package/dist/server/raw-data-utils.js.map +1 -0
- package/dist/server/remote-transport.d.ts +31 -0
- package/dist/server/remote-transport.d.ts.map +1 -0
- package/dist/server/remote-transport.js +177 -0
- package/dist/server/remote-transport.js.map +1 -0
- package/dist/server/schemas.d.ts.map +1 -0
- package/dist/server/schemas.js +35 -38
- package/dist/server/schemas.js.map +1 -0
- package/dist/utils/config-parsers.d.ts.map +1 -0
- package/dist/utils/config-parsers.js +3 -8
- package/dist/utils/config-parsers.js.map +1 -0
- package/dist/utils/config.d.ts.map +1 -0
- package/dist/utils/config.js +12 -19
- package/dist/utils/config.js.map +1 -0
- package/dist/utils/embedding-device-cli.d.ts +12 -0
- package/dist/utils/embedding-device-cli.d.ts.map +1 -0
- package/dist/utils/embedding-device-cli.js +61 -0
- package/dist/utils/embedding-device-cli.js.map +1 -0
- package/dist/utils/file-utils.d.ts.map +1 -0
- package/dist/utils/file-utils.js +12 -18
- package/dist/utils/file-utils.js.map +1 -0
- package/dist/utils/math.d.ts.map +1 -0
- package/dist/utils/math.js +2 -6
- package/dist/utils/math.js.map +1 -0
- package/dist/utils/process-handlers.d.ts.map +1 -0
- package/dist/utils/process-handlers.js +5 -10
- package/dist/utils/process-handlers.js.map +1 -0
- package/dist/vectordb/index.d.ts.map +1 -0
- package/dist/vectordb/index.js +39 -45
- package/dist/vectordb/index.js.map +1 -0
- package/dist/web/api-routes.d.ts.map +1 -0
- package/dist/web/api-routes.js +41 -47
- package/dist/web/api-routes.js.map +1 -0
- package/dist/web/config-routes.d.ts.map +1 -0
- package/dist/web/config-routes.js +30 -33
- package/dist/web/config-routes.js.map +1 -0
- package/dist/web/database-manager.d.ts.map +1 -0
- package/dist/web/database-manager.js +67 -72
- package/dist/web/database-manager.js.map +1 -0
- package/dist/web/http-server.d.ts.map +1 -0
- package/dist/web/http-server.js +56 -89
- package/dist/web/http-server.js.map +1 -0
- package/dist/web/index.d.ts.map +1 -0
- package/dist/web/index.js +31 -59
- package/dist/web/index.js.map +1 -0
- package/dist/web/middleware/async-handler.d.ts.map +1 -0
- package/dist/web/middleware/async-handler.js +1 -4
- package/dist/web/middleware/async-handler.js.map +1 -0
- package/dist/web/middleware/auth.d.ts.map +1 -0
- package/dist/web/middleware/auth.js +3 -6
- package/dist/web/middleware/auth.js.map +1 -0
- package/dist/web/middleware/error-handler.d.ts.map +1 -0
- package/dist/web/middleware/error-handler.js +4 -8
- package/dist/web/middleware/error-handler.js.map +1 -0
- package/dist/web/middleware/index.d.ts.map +1 -0
- package/dist/web/middleware/index.js +5 -17
- package/dist/web/middleware/index.js.map +1 -0
- package/dist/web/middleware/rate-limit.d.ts.map +1 -0
- package/dist/web/middleware/rate-limit.js +3 -8
- package/dist/web/middleware/rate-limit.js.map +1 -0
- package/dist/web/middleware/request-logger.d.ts.map +1 -0
- package/dist/web/middleware/request-logger.js +2 -6
- package/dist/web/middleware/request-logger.js.map +1 -0
- package/dist/web/types.d.ts.map +1 -0
- package/dist/web/types.js +1 -2
- package/dist/web/types.js.map +1 -0
- package/package.json +31 -35
- package/web-ui/dist/assets/CollectionsPage-BDmEfv3V.js +1 -0
- package/web-ui/dist/assets/FilesPage-pG9HmpgQ.js +1 -0
- package/web-ui/dist/assets/ReaderPage-CwMN03NU.js +28 -0
- package/web-ui/dist/assets/ReaderSettingsContext-CkSjqsRh.js +1 -0
- package/web-ui/dist/assets/SearchPage-DAltjnLL.js +1 -0
- package/web-ui/dist/assets/SettingsPage-C6J5BITP.js +1 -0
- package/web-ui/dist/assets/StatusPage-powRGmW3.js +1 -0
- package/web-ui/dist/assets/UploadPage-eyfSjL4u.js +8 -0
- package/web-ui/dist/assets/format-DOJ3IkRX.js +1 -0
- package/web-ui/dist/assets/index-BpwaiuGL.css +1 -0
- package/web-ui/dist/assets/index-D068MV_o.js +6 -0
- package/web-ui/dist/assets/link-indexer.worker-DGXNsXmZ.js +1 -0
- package/web-ui/dist/assets/motion-CKwJwI3J.js +9 -0
- package/web-ui/dist/assets/query-DPt-uCb6.js +1 -0
- package/web-ui/dist/assets/vendor-C2QPsZ3S.js +10 -0
- package/web-ui/dist/index.html +7 -2
- package/web-ui/dist/assets/index-SBHxoAwi.js +0 -120
- package/web-ui/dist/assets/index-ej8i4PGl.css +0 -1
package/README.md
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# RAG Vault
|
|
2
2
|
|
|
3
3
|
[](https://opensource.org/licenses/MIT)
|
|
4
|
-
[](https://www.typescriptlang.org/)
|
|
5
5
|
[](https://registry.modelcontextprotocol.io/servers/io.github.RobThePCGuy/rag-vault)
|
|
6
6
|
|
|
7
7
|
**Your documents. Your machine. Your control.**
|
|
@@ -215,6 +215,51 @@ curl -X POST "http://localhost:3000/api/v1/chunks/batch-related" \
|
|
|
215
215
|
-d '{"chunks": [{"filePath": "/path/to/doc.pdf", "chunkIndex": 0}], "limit": 3}'
|
|
216
216
|
```
|
|
217
217
|
|
|
218
|
+
## Remote Mode
|
|
219
|
+
|
|
220
|
+
RAG Vault can also run as an HTTP server for remote MCP clients like Claude.ai, Claude Desktop, or any client supporting Streamable HTTP or SSE transports.
|
|
221
|
+
|
|
222
|
+
```bash
|
|
223
|
+
# Start remote server (default port 3001)
|
|
224
|
+
npx github:RobThePCGuy/rag-vault --remote
|
|
225
|
+
|
|
226
|
+
# Custom port
|
|
227
|
+
npx github:RobThePCGuy/rag-vault --remote --port 8080
|
|
228
|
+
```
|
|
229
|
+
|
|
230
|
+
Stdio mode is unchanged -- omit `--remote` and everything works as before with Cursor, Claude Code, and Codex.
|
|
231
|
+
|
|
232
|
+
### Connecting from Claude Desktop
|
|
233
|
+
|
|
234
|
+
Add to your Claude Desktop config:
|
|
235
|
+
|
|
236
|
+
```json
|
|
237
|
+
{
|
|
238
|
+
"mcpServers": {
|
|
239
|
+
"rag-vault-remote": {
|
|
240
|
+
"type": "url",
|
|
241
|
+
"url": "http://localhost:3001/mcp"
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
```
|
|
246
|
+
|
|
247
|
+
Or via Claude Code CLI:
|
|
248
|
+
|
|
249
|
+
```bash
|
|
250
|
+
claude mcp add --transport http rag-vault http://localhost:3001/mcp
|
|
251
|
+
```
|
|
252
|
+
|
|
253
|
+
### Connecting from Claude.ai
|
|
254
|
+
|
|
255
|
+
For Claude.ai (Pro/Max/Team/Enterprise), add as a custom connector with URL `https://your-host:3001/mcp`. For local development, expose your server with a tunnel:
|
|
256
|
+
|
|
257
|
+
```bash
|
|
258
|
+
cloudflared tunnel --url http://localhost:3001
|
|
259
|
+
```
|
|
260
|
+
|
|
261
|
+
Set `RAG_API_KEY` for authentication when exposing remotely. The server supports both Streamable HTTP (`/mcp`) and legacy SSE (`/sse`) transports, plus a health check at `/health`.
|
|
262
|
+
|
|
218
263
|
## Real-World Examples
|
|
219
264
|
|
|
220
265
|
### Search Your Codebase Documentation
|
|
@@ -277,7 +322,7 @@ Query → Embed → Vector search → Keyword boost → Quality filter → Resul
|
|
|
277
322
|
|
|
278
323
|
**Local by default**: Embeddings via Transformers.js. Storage via LanceDB. Network is only needed for initial model download or if you explicitly ingest remote URLs.
|
|
279
324
|
|
|
280
|
-
**MCP tools included**: `ingest_file`, `ingest_data`, `
|
|
325
|
+
**MCP tools included**: `query_documents`, `ingest_file`, `ingest_data`, `delete_file`, `list_files`, `status`, `feedback_pin`, `feedback_dismiss`, and `feedback_stats`.
|
|
281
326
|
|
|
282
327
|
## Supported Formats
|
|
283
328
|
|
|
@@ -300,8 +345,22 @@ Query → Embed → Vector search → Keyword boost → Quality filter → Resul
|
|
|
300
345
|
| `BASE_DIR` | Current directory | Only files under this path can be accessed |
|
|
301
346
|
| `DB_PATH` | `./lancedb/` | Where vectors are stored |
|
|
302
347
|
| `MODEL_NAME` | `Xenova/all-MiniLM-L6-v2` | HuggingFace embedding model |
|
|
348
|
+
| `RAG_EMBEDDING_DEVICE` | `auto` | Inference device hint (`auto`, `cpu`, `cuda`, `dml`, `webgpu`, etc.) |
|
|
303
349
|
| `WEB_PORT` | `3000` | Port for web interface |
|
|
304
350
|
|
|
351
|
+
One-command override (no `.env` edit):
|
|
352
|
+
|
|
353
|
+
```bash
|
|
354
|
+
# MCP mode
|
|
355
|
+
npx github:RobThePCGuy/rag-vault --embedding-device dml
|
|
356
|
+
|
|
357
|
+
# Web mode
|
|
358
|
+
npx github:RobThePCGuy/rag-vault web --embedding-device dml
|
|
359
|
+
|
|
360
|
+
# Explicitly force auto detection
|
|
361
|
+
npx github:RobThePCGuy/rag-vault --gpu-auto
|
|
362
|
+
```
|
|
363
|
+
|
|
305
364
|
### Search Tuning
|
|
306
365
|
|
|
307
366
|
| Variable | Default | What it does |
|
|
@@ -358,7 +417,12 @@ Yes, after the first run. The model caches locally.
|
|
|
358
417
|
<details>
|
|
359
418
|
<summary><strong>What about GPU acceleration?</strong></summary>
|
|
360
419
|
|
|
361
|
-
Transformers.js
|
|
420
|
+
RAG Vault now uses Transformers.js device auto-selection by default (`RAG_EMBEDDING_DEVICE=auto`), which can use GPU providers when available and fall back to CPU.
|
|
421
|
+
|
|
422
|
+
Practical notes:
|
|
423
|
+
- Windows Node runtime typically uses DirectML (`dml`).
|
|
424
|
+
- Linux x64 can use CUDA when ONNX Runtime CUDA binaries are available.
|
|
425
|
+
- You can force CPU with `RAG_EMBEDDING_DEVICE=cpu` if you prefer stability.
|
|
362
426
|
|
|
363
427
|
</details>
|
|
364
428
|
|
|
@@ -425,14 +489,17 @@ pnpm test:integration
|
|
|
425
489
|
# Build
|
|
426
490
|
pnpm build
|
|
427
491
|
|
|
428
|
-
# Run MCP server locally
|
|
492
|
+
# Run MCP server locally (stdio)
|
|
429
493
|
pnpm dev
|
|
430
494
|
|
|
495
|
+
# Run MCP server locally (remote HTTP + SSE)
|
|
496
|
+
pnpm dev:remote
|
|
497
|
+
|
|
431
498
|
# Run web server locally
|
|
432
499
|
pnpm web:dev
|
|
433
500
|
|
|
434
501
|
# Release to npm (local, guarded)
|
|
435
|
-
pnpm release
|
|
502
|
+
pnpm release # patch
|
|
436
503
|
pnpm release:minor
|
|
437
504
|
pnpm release:major
|
|
438
505
|
pnpm release:dry
|
|
@@ -458,15 +525,20 @@ Use `RUN_EMBEDDING_INTEGRATION=1` to explicitly opt into network/model-dependent
|
|
|
458
525
|
|
|
459
526
|
```
|
|
460
527
|
src/
|
|
461
|
-
├──
|
|
462
|
-
├──
|
|
463
|
-
├──
|
|
464
|
-
├──
|
|
465
|
-
├──
|
|
466
|
-
├──
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
528
|
+
├── bin/ # CLI subcommands (skills install)
|
|
529
|
+
├── chunker/ # Semantic text splitting
|
|
530
|
+
├── embedder/ # Transformers.js wrapper
|
|
531
|
+
├── errors/ # Error handling utilities
|
|
532
|
+
├── explainability/ # Keyword-based result explanations
|
|
533
|
+
├── flywheel/ # Feedback loop (pin/dismiss reranking)
|
|
534
|
+
├── parser/ # PDF, DOCX, HTML parsing
|
|
535
|
+
├── query/ # Advanced query syntax parser
|
|
536
|
+
├── server/ # MCP tool handlers + remote transport
|
|
537
|
+
├── utils/ # Config, file helpers, process handlers
|
|
538
|
+
├── vectordb/ # LanceDB + hybrid search
|
|
539
|
+
└── web/ # Express server + REST API
|
|
540
|
+
|
|
541
|
+
web-ui/ # React frontend (Vite + Tailwind)
|
|
470
542
|
```
|
|
471
543
|
|
|
472
544
|
## Documentation
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"install-skills.d.ts","sourceRoot":"","sources":["../../src/bin/install-skills.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAqLH;;;GAGG;AACH,wBAAgB,GAAG,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,IAAI,CA+BxC"}
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
"use strict";
|
|
2
1
|
/**
|
|
3
2
|
* MCP Local RAG Skills Installer
|
|
4
3
|
*
|
|
@@ -13,11 +12,10 @@
|
|
|
13
12
|
* npx rag-vault skills install --codex # Codex
|
|
14
13
|
* npx rag-vault skills install --path /custom/path # Custom
|
|
15
14
|
*/
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
const node_path_1 = require("node:path");
|
|
15
|
+
import { cpSync, existsSync, mkdirSync } from 'node:fs';
|
|
16
|
+
import { fileURLToPath } from 'node:url';
|
|
17
|
+
import { homedir } from 'node:os';
|
|
18
|
+
import { dirname, join, resolve } from 'node:path';
|
|
21
19
|
// ============================================
|
|
22
20
|
// Constants
|
|
23
21
|
// ============================================
|
|
@@ -25,16 +23,17 @@ const node_path_1 = require("node:path");
|
|
|
25
23
|
// dist/bin/install-skills.js -> dist/skills/rag-vault
|
|
26
24
|
// But skills are actually in package root: skills/rag-vault
|
|
27
25
|
// So from dist/bin, go up twice: ../.. then skills/rag-vault
|
|
28
|
-
const
|
|
26
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
27
|
+
const SKILLS_SOURCE = resolve(__dirname, '..', '..', 'skills', 'rag-vault');
|
|
29
28
|
// Codex home directory (supports CODEX_HOME environment variable)
|
|
30
29
|
// https://developers.openai.com/codex/local-config/
|
|
31
|
-
const CODEX_HOME = process.env['CODEX_HOME'] ||
|
|
30
|
+
const CODEX_HOME = process.env['CODEX_HOME'] || join(homedir(), '.codex');
|
|
32
31
|
// Installation targets
|
|
33
32
|
const TARGETS = {
|
|
34
33
|
'claude-code-project': './.claude/skills/rag-vault',
|
|
35
|
-
'claude-code-global':
|
|
34
|
+
'claude-code-global': join(homedir(), '.claude', 'skills', 'rag-vault'),
|
|
36
35
|
'codex-project': './.codex/skills/rag-vault',
|
|
37
|
-
'codex-global':
|
|
36
|
+
'codex-global': join(CODEX_HOME, 'skills', 'rag-vault'),
|
|
38
37
|
};
|
|
39
38
|
function parseArgs(args) {
|
|
40
39
|
const options = {
|
|
@@ -140,24 +139,24 @@ function getTargetPath(options) {
|
|
|
140
139
|
console.error('Error: Custom path not specified');
|
|
141
140
|
process.exit(1);
|
|
142
141
|
}
|
|
143
|
-
return
|
|
142
|
+
return resolve(options.customPath, 'rag-vault');
|
|
144
143
|
}
|
|
145
144
|
return TARGETS[options.target];
|
|
146
145
|
}
|
|
147
146
|
function install(targetPath) {
|
|
148
147
|
// Check source exists
|
|
149
|
-
if (!
|
|
148
|
+
if (!existsSync(SKILLS_SOURCE)) {
|
|
150
149
|
console.error(`Error: Skills source not found at ${SKILLS_SOURCE}`);
|
|
151
150
|
process.exit(1);
|
|
152
151
|
}
|
|
153
152
|
// Create target directory
|
|
154
|
-
const targetDir =
|
|
155
|
-
if (!
|
|
156
|
-
|
|
153
|
+
const targetDir = dirname(targetPath);
|
|
154
|
+
if (!existsSync(targetDir)) {
|
|
155
|
+
mkdirSync(targetDir, { recursive: true });
|
|
157
156
|
console.log(`Created directory: ${targetDir}`);
|
|
158
157
|
}
|
|
159
158
|
// Copy skills
|
|
160
|
-
|
|
159
|
+
cpSync(SKILLS_SOURCE, targetPath, { recursive: true });
|
|
161
160
|
console.log(`Installed skills to: ${targetPath}`);
|
|
162
161
|
}
|
|
163
162
|
// ============================================
|
|
@@ -167,7 +166,7 @@ function install(targetPath) {
|
|
|
167
166
|
* Run the skills installer with the given arguments
|
|
168
167
|
* @param args - Command line arguments (after "skills install")
|
|
169
168
|
*/
|
|
170
|
-
function run(args) {
|
|
169
|
+
export function run(args) {
|
|
171
170
|
// Default to help if no args
|
|
172
171
|
if (args.length === 0) {
|
|
173
172
|
printHelp();
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"install-skills.js","sourceRoot":"","sources":["../../src/bin/install-skills.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAEH,OAAO,EAAE,MAAM,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,SAAS,CAAA;AACvD,OAAO,EAAE,aAAa,EAAE,MAAM,UAAU,CAAA;AACxC,OAAO,EAAE,OAAO,EAAE,MAAM,SAAS,CAAA;AACjC,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,MAAM,WAAW,CAAA;AAElD,+CAA+C;AAC/C,YAAY;AACZ,+CAA+C;AAE/C,+DAA+D;AAC/D,sDAAsD;AACtD,4DAA4D;AAC5D,6DAA6D;AAC7D,MAAM,SAAS,GAAG,OAAO,CAAC,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAA;AACzD,MAAM,aAAa,GAAG,OAAO,CAAC,SAAS,EAAE,IAAI,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,CAAC,CAAA;AAE3E,kEAAkE;AAClE,oDAAoD;AACpD,MAAM,UAAU,GAAG,OAAO,CAAC,GAAG,CAAC,YAAY,CAAC,IAAI,IAAI,CAAC,OAAO,EAAE,EAAE,QAAQ,CAAC,CAAA;AAEzE,uBAAuB;AACvB,MAAM,OAAO,GAAG;IACd,qBAAqB,EAAE,4BAA4B;IACnD,oBAAoB,EAAE,IAAI,CAAC,OAAO,EAAE,EAAE,SAAS,EAAE,QAAQ,EAAE,WAAW,CAAC;IACvE,eAAe,EAAE,2BAA2B;IAC5C,cAAc,EAAE,IAAI,CAAC,UAAU,EAAE,QAAQ,EAAE,WAAW,CAAC;CAC/C,CAAA;AAYV,SAAS,SAAS,CAAC,IAAc;IAC/B,MAAM,OAAO,GAAY;QACvB,MAAM,EAAE,qBAAqB;QAC7B,IAAI,EAAE,KAAK;KACZ,CAAA;IAED,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACrC,MAAM,GAAG,GAAG,IAAI,CAAC,CAAC,CAAC,CAAA;QAEnB,QAAQ,GAAG,EAAE,CAAC;YACZ,KAAK,QAAQ,CAAC;YACd,KAAK,IAAI;gBACP,OAAO,CAAC,IAAI,GAAG,IAAI,CAAA;gBACnB,MAAK;YAEP,KAAK,eAAe;gBAClB,0BAA0B;gBAC1B,IAAI,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,KAAK,UAAU,EAAE,CAAC;oBAC/B,OAAO,CAAC,MAAM,GAAG,oBAAoB,CAAA;oBACrC,CAAC,EAAE,CAAA,CAAC,gBAAgB;gBACtB,CAAC;qBAAM,CAAC;oBACN,OAAO,CAAC,MAAM,GAAG,qBAAqB,CAAA;gBACxC,CAAC;gBACD,MAAK;YAEP,KAAK,SAAS;gBACZ,uCAAuC;gBACvC,IAAI,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,KAAK,WAAW,EAAE,CAAC;oBAChC,OAAO,CAAC,MAAM,GAAG,eAAe,CAAA;oBAChC,CAAC,EAAE,CAAA,CAAC,gBAAgB;gBACtB,CAAC;qBAAM,IAAI,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,KAAK,UAAU,EAAE,CAAC;oBACtC,OAAO,CAAC,MAAM,GAAG,cAAc,CAAA;oBAC/B,CAAC,EAAE,CAAA,CAAC,gBAAgB;gBACtB,CAAC;qBAAM,CAAC;oBACN,gDAAgD;oBAChD,OAAO,CAAC,MAAM,GAAG,cAAc,CAAA;gBACjC,CAAC;gBACD,MAAK;YAEP,KAAK,QAAQ,CAAC,CAAC,CAAC;gBACd,MAAM,OAAO,GAAG,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAA;gBAC3B,IAAI,CAAC,OAAO,EAAE,CAAC;oBACb,OAAO,CAAC,KAAK,CAAC,wCAAwC,CAAC,CAAA;oBACvD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;gBACjB,CAAC;gBACD,OAAO,CAAC,MAAM,GAAG,QAAQ,CAAA;gBACzB,OAAO,CAAC,UAAU,GAAG,OAAO,CAAA;gBAC5B,CAAC,EAAE,CAAA,CAAC,gBAAgB;gBACpB,MAAK;YACP,CAAC;YAED;gBACE,IAAI,GAAG,EAAE,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;oBACzB,OAAO,CAAC,KAAK,CAAC,mBAAmB,GAAG,EAAE,CAAC,CAAA;oBACvC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;gBACjB,CAAC;QACL,CAAC;IACH,CAAC;IAED,OAAO,OAAO,CAAA;AAChB,CAAC;AAED,+CAA+C;AAC/C,eAAe;AACf,+CAA+C;AAE/C,SAAS,SAAS;IAChB,OAAO,CAAC,GAAG,CAAC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAgCb,CAAC,CAAA;AACF,CAAC;AAED,+CAA+C;AAC/C,eAAe;AACf,+CAA+C;AAE/C,SAAS,aAAa,CAAC,OAAgB;IACrC,IAAI,OAAO,CAAC,MAAM,KAAK,QAAQ,EAAE,CAAC;QAChC,IAAI,CAAC,OAAO,CAAC,UAAU,EAAE,CAAC;YACxB,OAAO,CAAC,KAAK,CAAC,kCAAkC,CAAC,CAAA;YACjD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;QACjB,CAAC;QACD,OAAO,OAAO,CAAC,OAAO,CAAC,UAAU,EAAE,WAAW,CAAC,CAAA;IACjD,CAAC;IAED,OAAO,OAAO,CAAC,OAAO,CAAC,MAAM,CAAC,CAAA;AAChC,CAAC;AAED,SAAS,OAAO,CAAC,UAAkB;IACjC,sBAAsB;IACtB,IAAI,CAAC,UAAU,CAAC,aAAa,CAAC,EAAE,CAAC;QAC/B,OAAO,CAAC,KAAK,CAAC,qCAAqC,aAAa,EAAE,CAAC,CAAA;QACnE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IACjB,CAAC;IAED,0BAA0B;IAC1B,MAAM,SAAS,GAAG,OAAO,CAAC,UAAU,CAAC,CAAA;IACrC,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC,EAAE,CAAC;QAC3B,SAAS,CAAC,SAAS,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAA;QACzC,OAAO,CAAC,GAAG,CAAC,sBAAsB,SAAS,EAAE,CAAC,CAAA;IAChD,CAAC;IAED,cAAc;IACd,MAAM,CAAC,aAAa,EAAE,UAAU,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAA;IACtD,OAAO,CAAC,GAAG,CAAC,wBAAwB,UAAU,EAAE,CAAC,CAAA;AACnD,CAAC;AAED,+CAA+C;AAC/C,wBAAwB;AACxB,+CAA+C;AAE/C;;;GAGG;AACH,MAAM,UAAU,GAAG,CAAC,IAAc;IAChC,6BAA6B;IAC7B,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACtB,SAAS,EAAE,CAAA;QACX,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IACjB,CAAC;IAED,MAAM,OAAO,GAAG,SAAS,CAAC,IAAI,CAAC,CAAA;IAE/B,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC;QACjB,SAAS,EAAE,CAAA;QACX,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IACjB,CAAC;IAED,MAAM,UAAU,GAAG,aAAa,CAAC,OAAO,CAAC,CAAA;IAEzC,OAAO,CAAC,GAAG,CAAC,oCAAoC,CAAC,CAAA;IACjD,OAAO,CAAC,GAAG,CAAC,WAAW,OAAO,CAAC,MAAM,EAAE,CAAC,CAAA;IACxC,OAAO,CAAC,GAAG,CAAC,SAAS,UAAU,EAAE,CAAC,CAAA;IAClC,OAAO,CAAC,GAAG,EAAE,CAAA;IAEb,OAAO,CAAC,UAAU,CAAC,CAAA;IAEnB,OAAO,CAAC,GAAG,EAAE,CAAA;IACb,OAAO,CAAC,GAAG,CAAC,wBAAwB,CAAC,CAAA;IACrC,OAAO,CAAC,GAAG,EAAE,CAAA;IACb,OAAO,CAAC,GAAG,CAAC,yCAAyC,CAAC,CAAA;IACtD,OAAO,CAAC,GAAG,CAAC,0BAA0B,CAAC,CAAA;IACvC,OAAO,CAAC,GAAG,CAAC,kCAAkC,CAAC,CAAA;IAC/C,OAAO,CAAC,GAAG,CAAC,sCAAsC,CAAC,CAAA;IACnD,OAAO,CAAC,GAAG,CAAC,qCAAqC,CAAC,CAAA;AACpD,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/chunker/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,MAAM,WAAW,SAAS;IACxB,iBAAiB;IACjB,IAAI,EAAE,MAAM,CAAA;IACZ,+BAA+B;IAC/B,KAAK,EAAE,MAAM,CAAA;CACd;AAED,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAA"}
|
package/dist/chunker/index.js
CHANGED
|
@@ -1,6 +1,2 @@
|
|
|
1
|
-
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.SemanticChunker = void 0;
|
|
4
|
-
var semantic_chunker_js_1 = require("./semantic-chunker.js");
|
|
5
|
-
Object.defineProperty(exports, "SemanticChunker", { enumerable: true, get: function () { return semantic_chunker_js_1.SemanticChunker; } });
|
|
1
|
+
export { SemanticChunker } from './semantic-chunker.js';
|
|
6
2
|
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/chunker/index.ts"],"names":[],"mappings":"AAUA,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAA"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"semantic-chunker.d.ts","sourceRoot":"","sources":["../../src/chunker/semantic-chunker.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,YAAY,CAAA;AAQ3C;;;GAGG;AACH,MAAM,WAAW,qBAAqB;IACpC,2DAA2D;IAC3D,aAAa,EAAE,MAAM,CAAA;IACrB,8DAA8D;IAC9D,SAAS,EAAE,MAAM,CAAA;IACjB,gEAAgE;IAChE,CAAC,EAAE,MAAM,CAAA;IACT,uDAAuD;IACvD,cAAc,EAAE,MAAM,CAAA;CACvB;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAAA;CACjD;AAoBD;;;;;;;;;;;;;GAaG;AACH,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAmBpD;AAMD,eAAO,MAAM,+BAA+B,EAAE,qBAK7C,CAAA;AAMD;;;;;;;;;;;GAWG;AACH,qBAAa,eAAe;IAC1B,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAuB;gBAElC,MAAM,GAAE,OAAO,CAAC,qBAAqB,CAAM;IAIvD;;;;;;OAMG;IACG,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,iBAAiB,GAAG,OAAO,CAAC,SAAS,EAAE,CAAC;IAsChF;;OAEG;IACH,OAAO,CAAC,cAAc;IAmEtB;;;OAGG;IACH,OAAO,CAAC,gBAAgB;IAaxB;;;;;OAKG;IACH,OAAO,CAAC,gBAAgB;IAuBxB;;OAEG;IACH,OAAO,CAAC,gBAAgB;IAWxB;;;OAGG;IACH,OAAO,CAAC,kBAAkB;IAM1B;;OAEG;IACH,OAAO,CAAC,OAAO;IAIf;;;OAGG;IACH,gBAAgB,CAAC,IAAI,EAAE,MAAM,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,GAAG,MAAM;CAGzD"}
|
|
@@ -1,11 +1,7 @@
|
|
|
1
|
-
"use strict";
|
|
2
1
|
// Semantic Chunker implementation using Max-Min algorithm
|
|
3
2
|
// Based on: "Max–Min semantic chunking of documents for RAG application" (Springer, 2025)
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
exports.isGarbageChunk = isGarbageChunk;
|
|
7
|
-
const sentence_splitter_js_1 = require("./sentence-splitter.js");
|
|
8
|
-
const math_js_1 = require("../utils/math.js");
|
|
3
|
+
import { splitIntoSentences } from './sentence-splitter.js';
|
|
4
|
+
import { cosineSimilarity as cosineSimilarityUtil } from '../utils/math.js';
|
|
9
5
|
// ============================================
|
|
10
6
|
// Performance Optimization Constants
|
|
11
7
|
// ============================================
|
|
@@ -35,7 +31,7 @@ const MAX_SENTENCES = 15;
|
|
|
35
31
|
* @param text - Chunk text to check
|
|
36
32
|
* @returns true if chunk is garbage and should be removed
|
|
37
33
|
*/
|
|
38
|
-
function isGarbageChunk(text) {
|
|
34
|
+
export function isGarbageChunk(text) {
|
|
39
35
|
const trimmed = text.trim();
|
|
40
36
|
if (trimmed.length === 0)
|
|
41
37
|
return true;
|
|
@@ -58,7 +54,7 @@ function isGarbageChunk(text) {
|
|
|
58
54
|
// ============================================
|
|
59
55
|
// Default Configuration
|
|
60
56
|
// ============================================
|
|
61
|
-
|
|
57
|
+
export const DEFAULT_SEMANTIC_CHUNKER_CONFIG = {
|
|
62
58
|
hardThreshold: 0.6,
|
|
63
59
|
initConst: 1.5,
|
|
64
60
|
c: 0.9,
|
|
@@ -79,9 +75,10 @@ exports.DEFAULT_SEMANTIC_CHUNKER_CONFIG = {
|
|
|
79
75
|
* Key insight: A sentence belongs to a chunk if its maximum similarity to any chunk member
|
|
80
76
|
* is greater than the minimum similarity between existing chunk members (with threshold adjustment)
|
|
81
77
|
*/
|
|
82
|
-
class SemanticChunker {
|
|
78
|
+
export class SemanticChunker {
|
|
79
|
+
config;
|
|
83
80
|
constructor(config = {}) {
|
|
84
|
-
this.config = { ...
|
|
81
|
+
this.config = { ...DEFAULT_SEMANTIC_CHUNKER_CONFIG, ...config };
|
|
85
82
|
}
|
|
86
83
|
/**
|
|
87
84
|
* Split text into semantically coherent chunks
|
|
@@ -96,7 +93,7 @@ class SemanticChunker {
|
|
|
96
93
|
return [];
|
|
97
94
|
}
|
|
98
95
|
// Split into sentences
|
|
99
|
-
const sentences =
|
|
96
|
+
const sentences = splitIntoSentences(text);
|
|
100
97
|
if (sentences.length === 0) {
|
|
101
98
|
return [];
|
|
102
99
|
}
|
|
@@ -260,8 +257,7 @@ class SemanticChunker {
|
|
|
260
257
|
* Public for testing - delegates to shared utility
|
|
261
258
|
*/
|
|
262
259
|
cosineSimilarity(vec1, vec2) {
|
|
263
|
-
return (
|
|
260
|
+
return cosineSimilarityUtil(vec1, vec2);
|
|
264
261
|
}
|
|
265
262
|
}
|
|
266
|
-
exports.SemanticChunker = SemanticChunker;
|
|
267
263
|
//# sourceMappingURL=semantic-chunker.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"semantic-chunker.js","sourceRoot":"","sources":["../../src/chunker/semantic-chunker.ts"],"names":[],"mappings":"AAAA,0DAA0D;AAC1D,0FAA0F;AAG1F,OAAO,EAAE,kBAAkB,EAAE,MAAM,wBAAwB,CAAA;AAC3D,OAAO,EAAE,gBAAgB,IAAI,oBAAoB,EAAE,MAAM,kBAAkB,CAAA;AA4B3E,+CAA+C;AAC/C,qCAAqC;AACrC,+CAA+C;AAE/C;;;;GAIG;AACH,MAAM,WAAW,GAAG,CAAC,CAAA;AAErB;;;;GAIG;AACH,MAAM,aAAa,GAAG,EAAE,CAAA;AAExB;;;;;;;;;;;;;GAaG;AACH,MAAM,UAAU,cAAc,CAAC,IAAY;IACzC,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAA;IAC3B,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,IAAI,CAAA;IAErC,uDAAuD;IACvD,IAAI,aAAa,CAAC,IAAI,CAAC,OAAO,CAAC;QAAE,OAAO,KAAK,CAAA;IAE7C,yDAAyD;IACzD,IAAI,sCAAsC,CAAC,IAAI,CAAC,OAAO,CAAC;QAAE,OAAO,IAAI,CAAA;IAErE,kDAAkD;IAClD,MAAM,UAAU,GAAG,IAAI,GAAG,EAAkB,CAAA;IAC5C,KAAK,MAAM,IAAI,IAAI,OAAO,EAAE,CAAC;QAC3B,UAAU,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAA;IACvD,CAAC;IACD,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,UAAU,CAAC,MAAM,EAAE,CAAC,CAAA;IACjD,IAAI,QAAQ,GAAG,OAAO,CAAC,MAAM,GAAG,GAAG;QAAE,OAAO,IAAI,CAAA;IAEhD,OAAO,KAAK,CAAA;AACd,CAAC;AAED,+CAA+C;AAC/C,wBAAwB;AACxB,+CAA+C;AAE/C,MAAM,CAAC,MAAM,+BAA+B,GAA0B;IACpE,aAAa,EAAE,GAAG;IAClB,SAAS,EAAE,GAAG;IACd,CAAC,EAAE,GAAG;IACN,cAAc,EAAE,EAAE;CACnB,CAAA;AAED,+CAA+C;AAC/C,wBAAwB;AACxB,+CAA+C;AAE/C;;;;;;;;;;;GAWG;AACH,MAAM,OAAO,eAAe;IACT,MAAM,CAAuB;IAE9C,YAAY,SAAyC,EAAE;QACrD,IAAI,CAAC,MAAM,GAAG,EAAE,GAAG,+BAA+B,EAAE,GAAG,MAAM,EAAE,CAAA;IACjE,CAAC;IAED;;;;;;OAMG;IACH,KAAK,CAAC,SAAS,CAAC,IAAY,EAAE,QAA2B;QACvD,qBAAqB;QACrB,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACtC,OAAO,EAAE,CAAA;QACX,CAAC;QAED,uBAAuB;QACvB,MAAM,SAAS,GAAG,kBAAkB,CAAC,IAAI,CAAC,CAAA;QAC1C,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC3B,OAAO,EAAE,CAAA;QACX,CAAC;QAED,wCAAwC;QACxC,MAAM,UAAU,GAAG,MAAM,QAAQ,CAAC,UAAU,CAAC,SAAS,CAAC,CAAA;QAEvD,yDAAyD;QACzD,MAAM,cAAc,GAAG,IAAI,CAAC,cAAc,CAAC,SAAS,EAAE,UAAU,CAAC,CAAA;QAEjE,+BAA+B;QAC/B,MAAM,MAAM,GAAgB,EAAE,CAAA;QAC9B,IAAI,UAAU,GAAG,CAAC,CAAA;QAElB,KAAK,MAAM,KAAK,IAAI,cAAc,EAAE,CAAC;YACnC,MAAM,SAAS,GAAG,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;YAEjC,kDAAkD;YAClD,IAAI,SAAS,CAAC,MAAM,IAAI,IAAI,CAAC,MAAM,CAAC,cAAc,IAAI,CAAC,cAAc,CAAC,SAAS,CAAC,EAAE,CAAC;gBACjF,MAAM,CAAC,IAAI,CAAC;oBACV,IAAI,EAAE,SAAS;oBACf,KAAK,EAAE,UAAU;iBAClB,CAAC,CAAA;gBACF,UAAU,EAAE,CAAA;YACd,CAAC;QACH,CAAC;QAED,OAAO,MAAM,CAAA;IACf,CAAC;IAED;;OAEG;IACK,cAAc,CAAC,SAAmB,EAAE,UAAsB;QAChE,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,EAAE,CAAA;QACrC,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAA;QAEzD,MAAM,MAAM,GAAe,EAAE,CAAA;QAC7B,IAAI,YAAY,GAAa,EAAE,CAAA;QAC/B,IAAI,sBAAsB,GAAe,EAAE,CAAA;QAE3C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAC1C,MAAM,QAAQ,GAAG,SAAS,CAAC,CAAC,CAAC,CAAA;YAC7B,MAAM,SAAS,GAAG,UAAU,CAAC,CAAC,CAAC,CAAA;YAE/B,IAAI,CAAC,QAAQ,IAAI,CAAC,SAAS;gBAAE,SAAQ;YAErC,IAAI,YAAY,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBAC9B,sCAAsC;gBACtC,YAAY,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAA;gBAC3B,sBAAsB,CAAC,IAAI,CAAC,SAAS,CAAC,CAAA;YACxC,CAAC;iBAAM,IAAI,YAAY,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBACrC,gDAAgD;gBAChD,MAAM,cAAc,GAAG,sBAAsB,CAAC,CAAC,CAAC,CAAA;gBAChD,IAAI,CAAC,cAAc;oBAAE,SAAQ;gBAE7B,MAAM,UAAU,GAAG,IAAI,CAAC,gBAAgB,CAAC,cAAc,EAAE,SAAS,CAAC,CAAA;gBAEnE,IAAI,IAAI,CAAC,MAAM,CAAC,SAAS,GAAG,UAAU,GAAG,IAAI,CAAC,MAAM,CAAC,aAAa,EAAE,CAAC;oBACnE,uBAAuB;oBACvB,YAAY,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAA;oBAC3B,sBAAsB,CAAC,IAAI,CAAC,SAAS,CAAC,CAAA;gBACxC,CAAC;qBAAM,CAAC;oBACN,kBAAkB;oBAClB,MAAM,CAAC,IAAI,CAAC,CAAC,GAAG,YAAY,CAAC,CAAC,CAAA;oBAC9B,YAAY,GAAG,CAAC,QAAQ,CAAC,CAAA;oBACzB,sBAAsB,GAAG,CAAC,SAAS,CAAC,CAAA;gBACtC,CAAC;YACH,CAAC;iBAAM,CAAC;gBACN,4EAA4E;gBAC5E,IAAI,YAAY,CAAC,MAAM,IAAI,aAAa,EAAE,CAAC;oBACzC,MAAM,CAAC,IAAI,CAAC,CAAC,GAAG,YAAY,CAAC,CAAC,CAAA;oBAC9B,YAAY,GAAG,CAAC,QAAQ,CAAC,CAAA;oBACzB,sBAAsB,GAAG,CAAC,SAAS,CAAC,CAAA;oBACpC,SAAQ;gBACV,CAAC;gBAED,2DAA2D;gBAC3D,MAAM,SAAS,GAAG,IAAI,CAAC,gBAAgB,CAAC,SAAS,EAAE,sBAAsB,CAAC,CAAA;gBAE1E,IAAI,SAAS,EAAE,CAAC;oBACd,YAAY,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAA;oBAC3B,sBAAsB,CAAC,IAAI,CAAC,SAAS,CAAC,CAAA;gBACxC,CAAC;qBAAM,CAAC;oBACN,kBAAkB;oBAClB,MAAM,CAAC,IAAI,CAAC,CAAC,GAAG,YAAY,CAAC,CAAC,CAAA;oBAC9B,YAAY,GAAG,CAAC,QAAQ,CAAC,CAAA;oBACzB,sBAAsB,GAAG,CAAC,SAAS,CAAC,CAAA;gBACtC,CAAC;YACH,CAAC;QACH,CAAC;QAED,8BAA8B;QAC9B,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC5B,MAAM,CAAC,IAAI,CAAC,YAAY,CAAC,CAAA;QAC3B,CAAC;QAED,OAAO,MAAM,CAAA;IACf,CAAC;IAED;;;OAGG;IACK,gBAAgB,CAAC,YAAsB,EAAE,eAA2B;QAC1E,gDAAgD;QAChD,MAAM,MAAM,GAAG,IAAI,CAAC,gBAAgB,CAAC,eAAe,CAAC,CAAA;QAErD,0DAA0D;QAC1D,MAAM,MAAM,GAAG,IAAI,CAAC,gBAAgB,CAAC,YAAY,EAAE,eAAe,CAAC,CAAA;QAEnE,8BAA8B;QAC9B,MAAM,SAAS,GAAG,IAAI,CAAC,kBAAkB,CAAC,MAAM,EAAE,eAAe,CAAC,MAAM,CAAC,CAAA;QAEzE,OAAO,MAAM,GAAG,SAAS,CAAA;IAC3B,CAAC;IAED;;;;;OAKG;IACK,gBAAgB,CAAC,UAAsB;QAC7C,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC;YAAE,OAAO,GAAG,CAAA;QAErC,uEAAuE;QACvE,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,UAAU,CAAC,MAAM,GAAG,WAAW,CAAC,CAAA;QAC7D,MAAM,gBAAgB,GAAG,UAAU,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAA;QAEnD,IAAI,MAAM,GAAG,GAAG,CAAA;QAChB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,gBAAgB,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACjD,KAAK,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,gBAAgB,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBACrD,MAAM,IAAI,GAAG,gBAAgB,CAAC,CAAC,CAAC,CAAA;gBAChC,MAAM,IAAI,GAAG,gBAAgB,CAAC,CAAC,CAAC,CAAA;gBAChC,IAAI,CAAC,IAAI,IAAI,CAAC,IAAI;oBAAE,SAAQ;gBAE5B,MAAM,GAAG,GAAG,IAAI,CAAC,gBAAgB,CAAC,IAAI,EAAE,IAAI,CAAC,CAAA;gBAC7C,IAAI,GAAG,GAAG,MAAM,EAAE,CAAC;oBACjB,MAAM,GAAG,GAAG,CAAA;gBACd,CAAC;YACH,CAAC;QACH,CAAC;QACD,OAAO,MAAM,CAAA;IACf,CAAC;IAED;;OAEG;IACK,gBAAgB,CAAC,SAAmB,EAAE,eAA2B;QACvE,IAAI,MAAM,GAAG,CAAC,GAAG,CAAA;QACjB,KAAK,MAAM,QAAQ,IAAI,eAAe,EAAE,CAAC;YACvC,MAAM,GAAG,GAAG,IAAI,CAAC,gBAAgB,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAA;YACtD,IAAI,GAAG,GAAG,MAAM,EAAE,CAAC;gBACjB,MAAM,GAAG,GAAG,CAAA;YACd,CAAC;QACH,CAAC;QACD,OAAO,MAAM,CAAA;IACf,CAAC;IAED;;;OAGG;IACK,kBAAkB,CAAC,MAAc,EAAE,SAAiB;QAC1D,MAAM,YAAY,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,CAAC,CAAA;QAC5C,MAAM,gBAAgB,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,GAAG,MAAM,GAAG,YAAY,CAAA;QAC9D,OAAO,IAAI,CAAC,GAAG,CAAC,gBAAgB,EAAE,IAAI,CAAC,MAAM,CAAC,aAAa,CAAC,CAAA;IAC9D,CAAC;IAED;;OAEG;IACK,OAAO,CAAC,CAAS;QACvB,OAAO,CAAC,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAA;IAC/B,CAAC;IAED;;;OAGG;IACH,gBAAgB,CAAC,IAAc,EAAE,IAAc;QAC7C,OAAO,oBAAoB,CAAC,IAAI,EAAE,IAAI,CAAC,CAAA;IACzC,CAAC;CACF"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"sentence-splitter.d.ts","sourceRoot":"","sources":["../../src/chunker/sentence-splitter.ts"],"names":[],"mappings":"AAoFA;;;;;;;;;;;;;GAaG;AACH,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE,CAwCzD"}
|
|
@@ -1,9 +1,6 @@
|
|
|
1
|
-
"use strict";
|
|
2
1
|
// Sentence Splitter for Semantic Chunking
|
|
3
2
|
// Created: 2025-12-27
|
|
4
3
|
// Purpose: Split text into sentences using Intl.Segmenter (Unicode standard)
|
|
5
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
-
exports.splitIntoSentences = splitIntoSentences;
|
|
7
4
|
// ============================================
|
|
8
5
|
// Constants
|
|
9
6
|
// ============================================
|
|
@@ -77,7 +74,7 @@ const segmenter = new Intl.Segmenter('und', { granularity: 'sentence' });
|
|
|
77
74
|
* @param text - The text to split into sentences
|
|
78
75
|
* @returns Array of sentences
|
|
79
76
|
*/
|
|
80
|
-
function splitIntoSentences(text) {
|
|
77
|
+
export function splitIntoSentences(text) {
|
|
81
78
|
// Handle empty input
|
|
82
79
|
if (!text || text.trim().length === 0) {
|
|
83
80
|
return [];
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"sentence-splitter.js","sourceRoot":"","sources":["../../src/chunker/sentence-splitter.ts"],"names":[],"mappings":"AAAA,0CAA0C;AAC1C,sBAAsB;AACtB,6EAA6E;AAE7E,+CAA+C;AAC/C,YAAY;AACZ,+CAA+C;AAE/C;;GAEG;AACH,MAAM,sBAAsB,GAAG,wBAAwB,CAAA;AAEvD;;GAEG;AACH,MAAM,uBAAuB,GAAG,yBAAyB,CAAA;AAWzD,+CAA+C;AAC/C,mBAAmB;AACnB,+CAA+C;AAE/C;;GAEG;AACH,SAAS,iBAAiB,CAAC,IAAY;IACrC,MAAM,MAAM,GAAoB,EAAE,CAAA;IAClC,IAAI,aAAa,GAAG,IAAI,CAAA;IAExB,yCAAyC;IACzC,MAAM,cAAc,GAAG,iBAAiB,CAAA;IACxC,IAAI,KAAK,GAAG,CAAC,CAAA;IAEb,MAAM,gBAAgB,GAAG,IAAI,CAAC,QAAQ,CAAC,cAAc,CAAC,CAAA;IACtD,KAAK,MAAM,KAAK,IAAI,gBAAgB,EAAE,CAAC;QACrC,MAAM,WAAW,GAAG,GAAG,sBAAsB,GAAG,KAAK,GAAG,sBAAsB,EAAE,CAAA;QAChF,MAAM,CAAC,IAAI,CAAC,EAAE,WAAW,EAAE,OAAO,EAAE,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,CAAA;QAC/C,aAAa,GAAG,aAAa,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,WAAW,CAAC,CAAA;QAC5D,KAAK,EAAE,CAAA;IACT,CAAC;IAED,8BAA8B;IAC9B,MAAM,eAAe,GAAG,UAAU,CAAA;IAClC,MAAM,aAAa,GAAG,aAAa,CAAC,QAAQ,CAAC,eAAe,CAAC,CAAA;IAC7D,KAAK,MAAM,KAAK,IAAI,aAAa,EAAE,CAAC;QAClC,MAAM,WAAW,GAAG,GAAG,uBAAuB,GAAG,KAAK,GAAG,uBAAuB,EAAE,CAAA;QAClF,MAAM,CAAC,IAAI,CAAC,EAAE,WAAW,EAAE,OAAO,EAAE,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,CAAA;QAC/C,aAAa,GAAG,aAAa,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,WAAW,CAAC,CAAA;QAC5D,KAAK,EAAE,CAAA;IACT,CAAC;IAED,OAAO,EAAE,IAAI,EAAE,aAAa,EAAE,MAAM,EAAE,CAAA;AACxC,CAAC;AAED;;GAEG;AACH,SAAS,iBAAiB,CAAC,SAAmB,EAAE,MAAuB;IACrE,OAAO,SAAS,CAAC,GAAG,CAAC,CAAC,QAAQ,EAAE,EAAE;QAChC,IAAI,QAAQ,GAAG,QAAQ,CAAA;QACvB,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;YAC3B,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,KAAK,CAAC,WAAW,EAAE,KAAK,CAAC,OAAO,CAAC,CAAA;QAC/D,CAAC;QACD,OAAO,QAAQ,CAAA;IACjB,CAAC,CAAC,CAAA;AACJ,CAAC;AAED,+CAA+C;AAC/C,iCAAiC;AACjC,+CAA+C;AAE/C,4CAA4C;AAC5C,qEAAqE;AACrE,MAAM,SAAS,GAAG,IAAI,IAAI,CAAC,SAAS,CAAC,KAAK,EAAE,EAAE,WAAW,EAAE,UAAU,EAAE,CAAC,CAAA;AAExE;;;;;;;;;;;;;GAaG;AACH,MAAM,UAAU,kBAAkB,CAAC,IAAY;IAC7C,qBAAqB;IACrB,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACtC,OAAO,EAAE,CAAA;IACX,CAAC;IAED,qDAAqD;IACrD,MAAM,EAAE,IAAI,EAAE,aAAa,EAAE,MAAM,EAAE,GAAG,iBAAiB,CAAC,IAAI,CAAC,CAAA;IAE/D,sCAAsC;IACtC,sHAAsH;IACtH,MAAM,UAAU,GAAG,aAAa,CAAC,KAAK,CAAC,+BAA+B,CAAC,CAAA;IAEvE,MAAM,SAAS,GAAa,EAAE,CAAA;IAE9B,KAAK,MAAM,SAAS,IAAI,UAAU,EAAE,CAAC;QACnC,MAAM,gBAAgB,GAAG,SAAS,CAAC,IAAI,EAAE,CAAA;QACzC,IAAI,CAAC,gBAAgB;YAAE,SAAQ;QAE/B,8DAA8D;QAC9D,IAAI,WAAW,CAAC,IAAI,CAAC,gBAAgB,CAAC,EAAE,CAAC;YACvC,SAAS,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAA;YAChC,SAAQ;QACV,CAAC;QAED,4CAA4C;QAC5C,MAAM,QAAQ,GAAG,SAAS,CAAC,OAAO,CAAC,gBAAgB,CAAC,CAAA;QACpD,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;YAC/B,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,IAAI,EAAE,CAAA;YACtC,IAAI,OAAO,EAAE,CAAC;gBACZ,SAAS,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;YACzB,CAAC;QACH,CAAC;IACH,CAAC;IAED,sBAAsB;IACtB,MAAM,iBAAiB,GAAG,iBAAiB,CAAC,SAAS,EAAE,MAAM,CAAC,CAAA;IAE9D,kCAAkC;IAClC,OAAO,iBAAiB,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAA;AAC3E,CAAC"}
|
package/dist/embedder/index.d.ts
CHANGED
|
@@ -9,6 +9,11 @@ export interface EmbedderConfig {
|
|
|
9
9
|
batchSize: number;
|
|
10
10
|
/** Model cache directory */
|
|
11
11
|
cacheDir: string;
|
|
12
|
+
/**
|
|
13
|
+
* Device hint for Transformers.js runtime.
|
|
14
|
+
* Examples: auto, cpu, cuda, dml, webgpu
|
|
15
|
+
*/
|
|
16
|
+
device?: string;
|
|
12
17
|
}
|
|
13
18
|
/**
|
|
14
19
|
* Embedding generation class using Transformers.js
|
|
@@ -59,5 +64,10 @@ export declare class Embedder {
|
|
|
59
64
|
* Build a model-specific fallback cache path to avoid reusing corrupted artifacts.
|
|
60
65
|
*/
|
|
61
66
|
private getRecoveryCacheDir;
|
|
67
|
+
/**
|
|
68
|
+
* Resolve device preference for Transformers.js.
|
|
69
|
+
* Priority: constructor config -> RAG_EMBEDDING_DEVICE env -> auto
|
|
70
|
+
*/
|
|
71
|
+
private resolveDevice;
|
|
62
72
|
}
|
|
63
73
|
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/embedder/index.ts"],"names":[],"mappings":"AAQA,OAAO,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAA;AAMnD;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,6BAA6B;IAC7B,SAAS,EAAE,MAAM,CAAA;IACjB,iBAAiB;IACjB,SAAS,EAAE,MAAM,CAAA;IACjB,4BAA4B;IAC5B,QAAQ,EAAE,MAAM,CAAA;IAChB;;;OAGG;IACH,MAAM,CAAC,EAAE,MAAM,CAAA;CAChB;AAuBD;;;;;;;GAOG;AACH,qBAAa,QAAQ;IAEnB,OAAO,CAAC,KAAK,CAAgB;IAC7B,OAAO,CAAC,WAAW,CAA6B;IAChD,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAgB;gBAE3B,MAAM,EAAE,cAAc;IAIlC;;OAEG;IACH,YAAY,IAAI,MAAM;IAItB;;OAEG;IACG,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;IA+CjC;;;OAGG;YACW,iBAAiB;IA+B/B;;;;;OAKG;IACG,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IAiC5C;;;;;;OAMG;IACG,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE,MAAM,CAAC,EAAE,WAAW,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;IAoC5E;;OAEG;IACH,OAAO,CAAC,uBAAuB;IAY/B;;OAEG;IACH,OAAO,CAAC,mBAAmB;IAK3B;;;OAGG;IACH,OAAO,CAAC,aAAa;CAiBtB"}
|
package/dist/embedder/index.js
CHANGED
|
@@ -1,17 +1,24 @@
|
|
|
1
|
-
"use strict";
|
|
2
1
|
// Embedder implementation with Transformers.js
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
};
|
|
6
|
-
|
|
7
|
-
exports.Embedder = exports.EmbeddingError = void 0;
|
|
8
|
-
const promises_1 = require("node:fs/promises");
|
|
9
|
-
const node_path_1 = __importDefault(require("node:path"));
|
|
10
|
-
const transformers_1 = require("@huggingface/transformers");
|
|
11
|
-
const index_js_1 = require("../errors/index.js");
|
|
2
|
+
import { mkdir } from 'node:fs/promises';
|
|
3
|
+
import path from 'node:path';
|
|
4
|
+
import { env, pipeline } from '@huggingface/transformers';
|
|
5
|
+
import { EmbeddingError } from '../errors/index.js';
|
|
12
6
|
// Re-export error class for backwards compatibility
|
|
13
|
-
|
|
14
|
-
|
|
7
|
+
export { EmbeddingError } from '../errors/index.js';
|
|
8
|
+
const SUPPORTED_EMBEDDING_DEVICES = [
|
|
9
|
+
'auto',
|
|
10
|
+
'gpu',
|
|
11
|
+
'cpu',
|
|
12
|
+
'wasm',
|
|
13
|
+
'webgpu',
|
|
14
|
+
'cuda',
|
|
15
|
+
'dml',
|
|
16
|
+
'webnn',
|
|
17
|
+
'webnn-npu',
|
|
18
|
+
'webnn-gpu',
|
|
19
|
+
'webnn-cpu',
|
|
20
|
+
];
|
|
21
|
+
const SUPPORTED_EMBEDDING_DEVICE_SET = new Set(SUPPORTED_EMBEDDING_DEVICES);
|
|
15
22
|
// ============================================
|
|
16
23
|
// Embedder Class
|
|
17
24
|
// ============================================
|
|
@@ -23,11 +30,12 @@ Object.defineProperty(exports, "EmbeddingError", { enumerable: true, get: functi
|
|
|
23
30
|
* - Transformers.js wrapper
|
|
24
31
|
* - Batch processing (size 8)
|
|
25
32
|
*/
|
|
26
|
-
class Embedder {
|
|
33
|
+
export class Embedder {
|
|
34
|
+
// Using unknown to avoid TS2590 (union type too complex with @types/jsdom)
|
|
35
|
+
model = null;
|
|
36
|
+
initPromise = null;
|
|
37
|
+
config;
|
|
27
38
|
constructor(config) {
|
|
28
|
-
// Using unknown to avoid TS2590 (union type too complex with @types/jsdom)
|
|
29
|
-
this.model = null;
|
|
30
|
-
this.initPromise = null;
|
|
31
39
|
this.config = config;
|
|
32
40
|
}
|
|
33
41
|
/**
|
|
@@ -46,11 +54,13 @@ class Embedder {
|
|
|
46
54
|
}
|
|
47
55
|
try {
|
|
48
56
|
// Set cache directory BEFORE creating pipeline
|
|
49
|
-
|
|
57
|
+
env.cacheDir = this.config.cacheDir;
|
|
58
|
+
const device = this.resolveDevice();
|
|
50
59
|
console.error(`Embedder: Setting cache directory to "${this.config.cacheDir}"`);
|
|
60
|
+
console.error(`Embedder: Using device preference "${device}"`);
|
|
51
61
|
console.error(`Embedder: Loading model "${this.config.modelPath}"...`);
|
|
52
62
|
// Use type assertion to avoid TS2590 (union type too complex with @types/jsdom)
|
|
53
|
-
this.model = await
|
|
63
|
+
this.model = await pipeline('feature-extraction', this.config.modelPath, { device });
|
|
54
64
|
console.error('Embedder: Model loaded successfully');
|
|
55
65
|
}
|
|
56
66
|
catch (error) {
|
|
@@ -59,17 +69,18 @@ class Embedder {
|
|
|
59
69
|
const recoveryCacheDir = this.getRecoveryCacheDir();
|
|
60
70
|
console.error(`Embedder: Detected corrupted model cache. Retrying with isolated cache: "${recoveryCacheDir}"`);
|
|
61
71
|
try {
|
|
62
|
-
await
|
|
63
|
-
|
|
64
|
-
|
|
72
|
+
await mkdir(recoveryCacheDir, { recursive: true });
|
|
73
|
+
env.cacheDir = recoveryCacheDir;
|
|
74
|
+
const device = this.resolveDevice();
|
|
75
|
+
this.model = await pipeline('feature-extraction', this.config.modelPath, { device });
|
|
65
76
|
console.error('Embedder: Model loaded successfully via recovery cache');
|
|
66
77
|
return;
|
|
67
78
|
}
|
|
68
79
|
catch (recoveryError) {
|
|
69
|
-
throw new
|
|
80
|
+
throw new EmbeddingError(`Failed to initialize Embedder after cache recovery attempt: ${recoveryError.message}`, recoveryError);
|
|
70
81
|
}
|
|
71
82
|
}
|
|
72
|
-
throw new
|
|
83
|
+
throw new EmbeddingError(`Failed to initialize Embedder: ${error.message}`, error);
|
|
73
84
|
}
|
|
74
85
|
}
|
|
75
86
|
/**
|
|
@@ -92,7 +103,7 @@ class Embedder {
|
|
|
92
103
|
// Clear initPromise on failure to allow retry
|
|
93
104
|
this.initPromise = null;
|
|
94
105
|
// Enhance error message with detailed guidance
|
|
95
|
-
throw new
|
|
106
|
+
throw new EmbeddingError(`Failed to initialize embedder on first use: ${error.message}\n\nPossible causes:\n • Network connectivity issues during model download\n • Insufficient disk space (need ~90MB)\n • Corrupted model cache\n\nRecommended actions:\n 1. Check your internet connection and try again\n 2. Ensure sufficient disk space is available\n 3. If problem persists, delete cache: ${this.config.cacheDir}\n 4. Then retry your query\n`, error);
|
|
96
107
|
});
|
|
97
108
|
await this.initPromise;
|
|
98
109
|
}
|
|
@@ -108,7 +119,7 @@ class Embedder {
|
|
|
108
119
|
try {
|
|
109
120
|
// Fail-fast for empty string: cannot generate meaningful embedding
|
|
110
121
|
if (text.length === 0) {
|
|
111
|
-
throw new
|
|
122
|
+
throw new EmbeddingError('Cannot generate embedding for empty text');
|
|
112
123
|
}
|
|
113
124
|
// Use type assertion to avoid complex Transformers.js type definitions
|
|
114
125
|
// This is due to external library type definition constraints, runtime behavior is guaranteed
|
|
@@ -120,10 +131,10 @@ class Embedder {
|
|
|
120
131
|
return embedding;
|
|
121
132
|
}
|
|
122
133
|
catch (error) {
|
|
123
|
-
if (error instanceof
|
|
134
|
+
if (error instanceof EmbeddingError) {
|
|
124
135
|
throw error;
|
|
125
136
|
}
|
|
126
|
-
throw new
|
|
137
|
+
throw new EmbeddingError(`Failed to generate embedding: ${error.message}`, error);
|
|
127
138
|
}
|
|
128
139
|
}
|
|
129
140
|
/**
|
|
@@ -145,7 +156,7 @@ class Embedder {
|
|
|
145
156
|
for (let i = 0; i < texts.length; i += this.config.batchSize) {
|
|
146
157
|
// Check for cancellation before each batch
|
|
147
158
|
if (signal?.aborted) {
|
|
148
|
-
throw new
|
|
159
|
+
throw new EmbeddingError('Embedding operation was cancelled');
|
|
149
160
|
}
|
|
150
161
|
const batch = texts.slice(i, i + this.config.batchSize);
|
|
151
162
|
const batchEmbeddings = await Promise.all(batch.map((text) => this.embed(text)));
|
|
@@ -154,11 +165,11 @@ class Embedder {
|
|
|
154
165
|
return embeddings;
|
|
155
166
|
}
|
|
156
167
|
catch (error) {
|
|
157
|
-
if (error instanceof
|
|
168
|
+
if (error instanceof EmbeddingError) {
|
|
158
169
|
throw error;
|
|
159
170
|
}
|
|
160
171
|
const message = error instanceof Error ? error.message : String(error);
|
|
161
|
-
throw new
|
|
172
|
+
throw new EmbeddingError(`Failed to generate batch embeddings: ${message}`, error instanceof Error ? error : undefined);
|
|
162
173
|
}
|
|
163
174
|
}
|
|
164
175
|
/**
|
|
@@ -177,8 +188,23 @@ class Embedder {
|
|
|
177
188
|
*/
|
|
178
189
|
getRecoveryCacheDir() {
|
|
179
190
|
const safeModelName = this.config.modelPath.replace(/[^a-z0-9_./-]/gi, '_').replace(/\//g, '__');
|
|
180
|
-
return
|
|
191
|
+
return path.join(this.config.cacheDir, '.recovery-cache', safeModelName);
|
|
192
|
+
}
|
|
193
|
+
/**
|
|
194
|
+
* Resolve device preference for Transformers.js.
|
|
195
|
+
* Priority: constructor config -> RAG_EMBEDDING_DEVICE env -> auto
|
|
196
|
+
*/
|
|
197
|
+
resolveDevice() {
|
|
198
|
+
const rawDevice = this.config.device ?? process.env['RAG_EMBEDDING_DEVICE'] ?? 'auto';
|
|
199
|
+
const normalized = rawDevice.trim().toLowerCase();
|
|
200
|
+
if (normalized === 'directml') {
|
|
201
|
+
return 'dml';
|
|
202
|
+
}
|
|
203
|
+
if (SUPPORTED_EMBEDDING_DEVICE_SET.has(normalized)) {
|
|
204
|
+
return normalized;
|
|
205
|
+
}
|
|
206
|
+
console.warn(`Embedder: Unsupported device "${rawDevice}". Falling back to "auto". Supported values: ${SUPPORTED_EMBEDDING_DEVICES.join(', ')}`);
|
|
207
|
+
return 'auto';
|
|
181
208
|
}
|
|
182
209
|
}
|
|
183
|
-
exports.Embedder = Embedder;
|
|
184
210
|
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/embedder/index.ts"],"names":[],"mappings":"AAAA,+CAA+C;AAE/C,OAAO,EAAE,KAAK,EAAE,MAAM,kBAAkB,CAAA;AACxC,OAAO,IAAI,MAAM,WAAW,CAAA;AAC5B,OAAO,EAAE,GAAG,EAAE,QAAQ,EAAE,MAAM,2BAA2B,CAAA;AACzD,OAAO,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAA;AAEnD,oDAAoD;AACpD,OAAO,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAA;AAuBnD,MAAM,2BAA2B,GAAG;IAClC,MAAM;IACN,KAAK;IACL,KAAK;IACL,MAAM;IACN,QAAQ;IACR,MAAM;IACN,KAAK;IACL,OAAO;IACP,WAAW;IACX,WAAW;IACX,WAAW;CACH,CAAA;AAGV,MAAM,8BAA8B,GAAG,IAAI,GAAG,CAAS,2BAA2B,CAAC,CAAA;AAEnF,+CAA+C;AAC/C,iBAAiB;AACjB,+CAA+C;AAE/C;;;;;;;GAOG;AACH,MAAM,OAAO,QAAQ;IACnB,2EAA2E;IACnE,KAAK,GAAY,IAAI,CAAA;IACrB,WAAW,GAAyB,IAAI,CAAA;IAC/B,MAAM,CAAgB;IAEvC,YAAY,MAAsB;QAChC,IAAI,CAAC,MAAM,GAAG,MAAM,CAAA;IACtB,CAAC;IAED;;OAEG;IACH,YAAY;QACV,OAAO,IAAI,CAAC,MAAM,CAAC,SAAS,CAAA;IAC9B,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,UAAU;QACd,8BAA8B;QAC9B,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;YACf,OAAM;QACR,CAAC;QAED,IAAI,CAAC;YACH,+CAA+C;YAC/C,GAAG,CAAC,QAAQ,GAAG,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAA;YACnC,MAAM,MAAM,GAAG,IAAI,CAAC,aAAa,EAAE,CAAA;YAEnC,OAAO,CAAC,KAAK,CAAC,yCAAyC,IAAI,CAAC,MAAM,CAAC,QAAQ,GAAG,CAAC,CAAA;YAC/E,OAAO,CAAC,KAAK,CAAC,sCAAsC,MAAM,GAAG,CAAC,CAAA;YAC9D,OAAO,CAAC,KAAK,CAAC,4BAA4B,IAAI,CAAC,MAAM,CAAC,SAAS,MAAM,CAAC,CAAA;YACtE,gFAAgF;YAChF,IAAI,CAAC,KAAK,GAAG,MAAM,QAAQ,CAAC,oBAAoB,EAAE,IAAI,CAAC,MAAM,CAAC,SAAS,EAAE,EAAE,MAAM,EAAE,CAAC,CAAA;YACpF,OAAO,CAAC,KAAK,CAAC,qCAAqC,CAAC,CAAA;QACtD,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,6FAA6F;YAC7F,IAAI,IAAI,CAAC,uBAAuB,CAAC,KAAK,CAAC,EAAE,CAAC;gBACxC,MAAM,gBAAgB,GAAG,IAAI,CAAC,mBAAmB,EAAE,CAAA;gBACnD,OAAO,CAAC,KAAK,CACX,4EAA4E,gBAAgB,GAAG,CAChG,CAAA;gBAED,IAAI,CAAC;oBACH,MAAM,KAAK,CAAC,gBAAgB,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAA;oBAClD,GAAG,CAAC,QAAQ,GAAG,gBAAgB,CAAA;oBAC/B,MAAM,MAAM,GAAG,IAAI,CAAC,aAAa,EAAE,CAAA;oBACnC,IAAI,CAAC,KAAK,GAAG,MAAM,QAAQ,CAAC,oBAAoB,EAAE,IAAI,CAAC,MAAM,CAAC,SAAS,EAAE,EAAE,MAAM,EAAE,CAAC,CAAA;oBACpF,OAAO,CAAC,KAAK,CAAC,wDAAwD,CAAC,CAAA;oBACvE,OAAM;gBACR,CAAC;gBAAC,OAAO,aAAa,EAAE,CAAC;oBACvB,MAAM,IAAI,cAAc,CACtB,+DAAgE,aAAuB,CAAC,OAAO,EAAE,EACjG,aAAsB,CACvB,CAAA;gBACH,CAAC;YACH,CAAC;YAED,MAAM,IAAI,cAAc,CACtB,kCAAmC,KAAe,CAAC,OAAO,EAAE,EAC5D,KAAc,CACf,CAAA;QACH,CAAC;IACH,CAAC;IAED;;;OAGG;IACK,KAAK,CAAC,iBAAiB;QAC7B,sBAAsB;QACtB,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;YACf,OAAM;QACR,CAAC;QAED,kDAAkD;QAClD,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;YACrB,MAAM,IAAI,CAAC,WAAW,CAAA;YACtB,OAAM;QACR,CAAC;QAED,uBAAuB;QACvB,OAAO,CAAC,KAAK,CACX,+FAA+F,CAChG,CAAA;QAED,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC,UAAU,EAAE,CAAC,KAAK,CAAC,CAAC,KAAK,EAAE,EAAE;YACnD,8CAA8C;YAC9C,IAAI,CAAC,WAAW,GAAG,IAAI,CAAA;YAEvB,+CAA+C;YAC/C,MAAM,IAAI,cAAc,CACtB,+CAAgD,KAAe,CAAC,OAAO,wTAAwT,IAAI,CAAC,MAAM,CAAC,QAAQ,gCAAgC,EACnb,KAAc,CACf,CAAA;QACH,CAAC,CAAC,CAAA;QAEF,MAAM,IAAI,CAAC,WAAW,CAAA;IACxB,CAAC;IAED;;;;;OAKG;IACH,KAAK,CAAC,KAAK,CAAC,IAAY;QACtB,0EAA0E;QAC1E,MAAM,IAAI,CAAC,iBAAiB,EAAE,CAAA;QAE9B,IAAI,CAAC;YACH,mEAAmE;YACnE,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBACtB,MAAM,IAAI,cAAc,CAAC,0CAA0C,CAAC,CAAA;YACtE,CAAC;YAED,uEAAuE;YACvE,8FAA8F;YAC9F,MAAM,OAAO,GAAG,EAAE,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,IAAI,EAAE,CAAA;YACpD,MAAM,SAAS,GAAG,IAAI,CAAC,KAGa,CAAA;YACpC,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,IAAI,EAAE,OAAO,CAAC,CAAA;YAE7C,qCAAqC;YACrC,MAAM,SAAS,GAAG,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAA;YACzC,OAAO,SAAS,CAAA;QAClB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,IAAI,KAAK,YAAY,cAAc,EAAE,CAAC;gBACpC,MAAM,KAAK,CAAA;YACb,CAAC;YACD,MAAM,IAAI,cAAc,CACtB,iCAAkC,KAAe,CAAC,OAAO,EAAE,EAC3D,KAAc,CACf,CAAA;QACH,CAAC;IACH,CAAC;IAED;;;;;;OAMG;IACH,KAAK,CAAC,UAAU,CAAC,KAAe,EAAE,MAAoB;QACpD,0EAA0E;QAC1E,MAAM,IAAI,CAAC,iBAAiB,EAAE,CAAA;QAE9B,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACvB,OAAO,EAAE,CAAA;QACX,CAAC;QAED,IAAI,CAAC;YACH,MAAM,UAAU,GAAe,EAAE,CAAA;YAEjC,6CAA6C;YAC7C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,IAAI,IAAI,CAAC,MAAM,CAAC,SAAS,EAAE,CAAC;gBAC7D,2CAA2C;gBAC3C,IAAI,MAAM,EAAE,OAAO,EAAE,CAAC;oBACpB,MAAM,IAAI,cAAc,CAAC,mCAAmC,CAAC,CAAA;gBAC/D,CAAC;gBAED,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,CAAA;gBACvD,MAAM,eAAe,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;gBAChF,UAAU,CAAC,IAAI,CAAC,GAAG,eAAe,CAAC,CAAA;YACrC,CAAC;YAED,OAAO,UAAU,CAAA;QACnB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,IAAI,KAAK,YAAY,cAAc,EAAE,CAAC;gBACpC,MAAM,KAAK,CAAA;YACb,CAAC;YACD,MAAM,OAAO,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAA;YACtE,MAAM,IAAI,cAAc,CACtB,wCAAwC,OAAO,EAAE,EACjD,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS,CAC3C,CAAA;QACH,CAAC;IACH,CAAC;IAED;;OAEG;IACK,uBAAuB,CAAC,KAAc;QAC5C,IAAI,CAAC,CAAC,KAAK,YAAY,KAAK,CAAC,EAAE,CAAC;YAC9B,OAAO,KAAK,CAAA;QACd,CAAC;QAED,MAAM,OAAO,GAAG,KAAK,CAAC,OAAO,CAAC,WAAW,EAAE,CAAA;QAC3C,OAAO,CACL,OAAO,CAAC,QAAQ,CAAC,yBAAyB,CAAC;YAC3C,CAAC,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAC,IAAI,OAAO,CAAC,QAAQ,CAAC,iBAAiB,CAAC,CAAC,CACtE,CAAA;IACH,CAAC;IAED;;OAEG;IACK,mBAAmB;QACzB,MAAM,aAAa,GAAG,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,OAAO,CAAC,iBAAiB,EAAE,GAAG,CAAC,CAAC,OAAO,CAAC,KAAK,EAAE,IAAI,CAAC,CAAA;QAChG,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,QAAQ,EAAE,iBAAiB,EAAE,aAAa,CAAC,CAAA;IAC1E,CAAC;IAED;;;OAGG;IACK,aAAa;QACnB,MAAM,SAAS,GAAG,IAAI,CAAC,MAAM,CAAC,MAAM,IAAI,OAAO,CAAC,GAAG,CAAC,sBAAsB,CAAC,IAAI,MAAM,CAAA;QACrF,MAAM,UAAU,GAAG,SAAS,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAA;QAEjD,IAAI,UAAU,KAAK,UAAU,EAAE,CAAC;YAC9B,OAAO,KAAK,CAAA;QACd,CAAC;QAED,IAAI,8BAA8B,CAAC,GAAG,CAAC,UAAU,CAAC,EAAE,CAAC;YACnD,OAAO,UAA6B,CAAA;QACtC,CAAC;QAED,OAAO,CAAC,IAAI,CACV,iCAAiC,SAAS,gDAAgD,2BAA2B,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CACnI,CAAA;QACD,OAAO,MAAM,CAAA;IACf,CAAC;CACF"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/errors/index.ts"],"names":[],"mappings":"AAMA;;GAEG;AACH,QAAA,MAAM,UAAU;;;;;;;;;;;;;CAsBN,CAAA;AAEV,MAAM,MAAM,SAAS,GAAG,CAAC,OAAO,UAAU,CAAC,CAAC,MAAM,OAAO,UAAU,CAAC,CAAA;AAMpE;;GAEG;AACH,qBAAa,QAAS,SAAQ,KAAK;IACjC,SAAgB,IAAI,EAAE,SAAS,CAAA;IAC/B,SAAgB,UAAU,EAAE,MAAM,CAAA;IAClC,SAAgB,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,SAAS,CAAA;gBAG1D,OAAO,EAAE,MAAM,EACf,OAAO,GAAE;QACP,IAAI,CAAC,EAAE,SAAS,CAAA;QAChB,UAAU,CAAC,EAAE,MAAM,CAAA;QACnB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAA;QACjC,KAAK,CAAC,EAAE,KAAK,CAAA;KACT;IASR,MAAM,IAAI,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC;CASlC;AAMD;;GAEG;AACH,qBAAa,eAAgB,SAAQ,QAAQ;gBAC/B,OAAO,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EAAE,KAAK,CAAC,EAAE,KAAK;CAe9E;AAMD;;GAEG;AACH,qBAAa,aAAc,SAAQ,QAAQ;gBAC7B,OAAO,EAAE,MAAM,EAAE,KAAK,CAAC,EAAE,KAAK,EAAE,IAAI,GAAE,SAAqC;CAaxF;AAMD;;GAEG;AACH,qBAAa,qBAAsB,SAAQ,QAAQ;gBACrC,OAAO,EAAE,MAAM,EAAE,KAAK,CAAC,EAAE,KAAK;CAa3C;AAED;;GAEG;AACH,qBAAa,wBAAyB,SAAQ,QAAQ;gBACxC,OAAO,EAAE,MAAM,EAAE,KAAK,CAAC,EAAE,KAAK;CAa3C;AAMD;;GAEG;AACH,qBAAa,cAAe,SAAQ,QAAQ;gBAC9B,OAAO,EAAE,MAAM,EAAE,KAAK,CAAC,EAAE,KAAK;CAa3C;AAMD;;GAEG;AACH,wBAAgB,eAAe,CAAC,KAAK,EAAE,KAAK,GAAG,MAAM,CAKpD"}
|