@robthepcguy/rag-vault 1.3.0 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +0 -0
- package/README.md +478 -441
- package/dist/bin/install-skills.d.ts +0 -0
- package/dist/bin/install-skills.js +0 -0
- package/dist/chunker/index.d.ts +0 -0
- package/dist/chunker/index.js +0 -0
- package/dist/chunker/semantic-chunker.d.ts +0 -0
- package/dist/chunker/semantic-chunker.js +0 -0
- package/dist/chunker/sentence-splitter.d.ts +0 -0
- package/dist/chunker/sentence-splitter.js +0 -0
- package/dist/embedder/index.d.ts +8 -0
- package/dist/embedder/index.js +38 -0
- package/dist/errors/index.d.ts +0 -0
- package/dist/errors/index.js +0 -0
- package/dist/explainability/index.d.ts +0 -0
- package/dist/explainability/index.js +0 -0
- package/dist/explainability/keywords.d.ts +0 -0
- package/dist/explainability/keywords.js +0 -0
- package/dist/flywheel/feedback.d.ts +0 -0
- package/dist/flywheel/feedback.js +0 -0
- package/dist/flywheel/index.d.ts +0 -0
- package/dist/flywheel/index.js +0 -0
- package/dist/index.d.ts +0 -0
- package/dist/parser/html-parser.d.ts +0 -0
- package/dist/parser/html-parser.js +0 -0
- package/dist/parser/index.d.ts +0 -0
- package/dist/parser/index.js +21 -4
- package/dist/parser/pdf-filter.d.ts +0 -0
- package/dist/parser/pdf-filter.js +0 -0
- package/dist/query/index.d.ts +0 -0
- package/dist/query/index.js +0 -0
- package/dist/query/parser.d.ts +0 -0
- package/dist/query/parser.js +0 -0
- package/dist/server/index.d.ts +0 -0
- package/dist/server/index.js +33 -12
- package/dist/server/raw-data-utils.d.ts +9 -0
- package/dist/server/raw-data-utils.js +15 -0
- package/dist/server/schemas.d.ts +0 -0
- package/dist/server/schemas.js +0 -0
- package/dist/utils/config-parsers.d.ts +0 -0
- package/dist/utils/config-parsers.js +0 -0
- package/dist/utils/config.d.ts +0 -0
- package/dist/utils/config.js +0 -0
- package/dist/utils/file-utils.d.ts +0 -0
- package/dist/utils/file-utils.js +0 -0
- package/dist/utils/math.d.ts +0 -0
- package/dist/utils/math.js +0 -0
- package/dist/utils/process-handlers.d.ts +0 -0
- package/dist/utils/process-handlers.js +0 -0
- package/dist/vectordb/index.d.ts +0 -0
- package/dist/vectordb/index.js +2 -1
- package/dist/web/api-routes.d.ts +0 -0
- package/dist/web/api-routes.js +0 -0
- package/dist/web/config-routes.d.ts +0 -0
- package/dist/web/config-routes.js +0 -0
- package/dist/web/database-manager.d.ts +4 -0
- package/dist/web/database-manager.js +15 -0
- package/dist/web/http-server.d.ts +0 -0
- package/dist/web/http-server.js +13 -1
- package/dist/web/index.d.ts +0 -0
- package/dist/web/index.js +0 -0
- package/dist/web/middleware/async-handler.d.ts +0 -0
- package/dist/web/middleware/async-handler.js +0 -0
- package/dist/web/middleware/auth.d.ts +0 -0
- package/dist/web/middleware/auth.js +0 -0
- package/dist/web/middleware/error-handler.d.ts +0 -0
- package/dist/web/middleware/error-handler.js +0 -0
- package/dist/web/middleware/index.d.ts +0 -0
- package/dist/web/middleware/index.js +0 -0
- package/dist/web/middleware/rate-limit.d.ts +0 -0
- package/dist/web/middleware/rate-limit.js +0 -0
- package/dist/web/middleware/request-logger.d.ts +0 -0
- package/dist/web/middleware/request-logger.js +0 -0
- package/dist/web/types.d.ts +0 -0
- package/dist/web/types.js +0 -0
- package/package.json +54 -36
- package/skills/rag-vault/SKILL.md +0 -0
- package/skills/rag-vault/references/html-ingestion.md +0 -0
- package/skills/rag-vault/references/query-optimization.md +0 -0
- package/skills/rag-vault/references/result-refinement.md +0 -0
- package/web-ui/dist/assets/{index-BcRp9-z9.js → index-SBHxoAwi.js} +2 -2
- package/web-ui/dist/assets/index-ej8i4PGl.css +0 -0
- package/web-ui/dist/index.html +1 -1
- package/web-ui/dist/vite.svg +0 -0
- package/dist/bin/install-skills.d.ts.map +0 -1
- package/dist/bin/install-skills.js.map +0 -1
- package/dist/chunker/index.d.ts.map +0 -1
- package/dist/chunker/index.js.map +0 -1
- package/dist/chunker/semantic-chunker.d.ts.map +0 -1
- package/dist/chunker/semantic-chunker.js.map +0 -1
- package/dist/chunker/sentence-splitter.d.ts.map +0 -1
- package/dist/chunker/sentence-splitter.js.map +0 -1
- package/dist/embedder/index.d.ts.map +0 -1
- package/dist/embedder/index.js.map +0 -1
- package/dist/errors/index.d.ts.map +0 -1
- package/dist/errors/index.js.map +0 -1
- package/dist/explainability/index.d.ts.map +0 -1
- package/dist/explainability/index.js.map +0 -1
- package/dist/explainability/keywords.d.ts.map +0 -1
- package/dist/explainability/keywords.js.map +0 -1
- package/dist/flywheel/feedback.d.ts.map +0 -1
- package/dist/flywheel/feedback.js.map +0 -1
- package/dist/flywheel/index.d.ts.map +0 -1
- package/dist/flywheel/index.js.map +0 -1
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js.map +0 -1
- package/dist/parser/html-parser.d.ts.map +0 -1
- package/dist/parser/html-parser.js.map +0 -1
- package/dist/parser/index.d.ts.map +0 -1
- package/dist/parser/index.js.map +0 -1
- package/dist/parser/pdf-filter.d.ts.map +0 -1
- package/dist/parser/pdf-filter.js.map +0 -1
- package/dist/query/index.d.ts.map +0 -1
- package/dist/query/index.js.map +0 -1
- package/dist/query/parser.d.ts.map +0 -1
- package/dist/query/parser.js.map +0 -1
- package/dist/server/index.d.ts.map +0 -1
- package/dist/server/index.js.map +0 -1
- package/dist/server/raw-data-utils.d.ts.map +0 -1
- package/dist/server/raw-data-utils.js.map +0 -1
- package/dist/server/schemas.d.ts.map +0 -1
- package/dist/server/schemas.js.map +0 -1
- package/dist/utils/config-parsers.d.ts.map +0 -1
- package/dist/utils/config-parsers.js.map +0 -1
- package/dist/utils/config.d.ts.map +0 -1
- package/dist/utils/config.js.map +0 -1
- package/dist/utils/file-utils.d.ts.map +0 -1
- package/dist/utils/file-utils.js.map +0 -1
- package/dist/utils/math.d.ts.map +0 -1
- package/dist/utils/math.js.map +0 -1
- package/dist/utils/process-handlers.d.ts.map +0 -1
- package/dist/utils/process-handlers.js.map +0 -1
- package/dist/vectordb/index.d.ts.map +0 -1
- package/dist/vectordb/index.js.map +0 -1
- package/dist/web/api-routes.d.ts.map +0 -1
- package/dist/web/api-routes.js.map +0 -1
- package/dist/web/config-routes.d.ts.map +0 -1
- package/dist/web/config-routes.js.map +0 -1
- package/dist/web/database-manager.d.ts.map +0 -1
- package/dist/web/database-manager.js.map +0 -1
- package/dist/web/http-server.d.ts.map +0 -1
- package/dist/web/http-server.js.map +0 -1
- package/dist/web/index.d.ts.map +0 -1
- package/dist/web/index.js.map +0 -1
- package/dist/web/middleware/async-handler.d.ts.map +0 -1
- package/dist/web/middleware/async-handler.js.map +0 -1
- package/dist/web/middleware/auth.d.ts.map +0 -1
- package/dist/web/middleware/auth.js.map +0 -1
- package/dist/web/middleware/error-handler.d.ts.map +0 -1
- package/dist/web/middleware/error-handler.js.map +0 -1
- package/dist/web/middleware/index.d.ts.map +0 -1
- package/dist/web/middleware/index.js.map +0 -1
- package/dist/web/middleware/rate-limit.d.ts.map +0 -1
- package/dist/web/middleware/rate-limit.js.map +0 -1
- package/dist/web/middleware/request-logger.d.ts.map +0 -1
- package/dist/web/middleware/request-logger.js.map +0 -1
- package/dist/web/types.d.ts.map +0 -1
- package/dist/web/types.js.map +0 -1
|
File without changes
|
|
File without changes
|
package/dist/chunker/index.d.ts
CHANGED
|
File without changes
|
package/dist/chunker/index.js
CHANGED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
package/dist/embedder/index.d.ts
CHANGED
|
@@ -51,5 +51,13 @@ export declare class Embedder {
|
|
|
51
51
|
* @returns Array of embedding vectors (dimension depends on model)
|
|
52
52
|
*/
|
|
53
53
|
embedBatch(texts: string[], signal?: AbortSignal): Promise<number[][]>;
|
|
54
|
+
/**
|
|
55
|
+
* Detect known cache-corruption signatures from ONNX/protobuf loaders.
|
|
56
|
+
*/
|
|
57
|
+
private isRecoverableCacheError;
|
|
58
|
+
/**
|
|
59
|
+
* Build a model-specific fallback cache path to avoid reusing corrupted artifacts.
|
|
60
|
+
*/
|
|
61
|
+
private getRecoveryCacheDir;
|
|
54
62
|
}
|
|
55
63
|
//# sourceMappingURL=index.d.ts.map
|
package/dist/embedder/index.js
CHANGED
|
@@ -1,7 +1,12 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
// Embedder implementation with Transformers.js
|
|
3
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
4
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
5
|
+
};
|
|
3
6
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
4
7
|
exports.Embedder = exports.EmbeddingError = void 0;
|
|
8
|
+
const promises_1 = require("node:fs/promises");
|
|
9
|
+
const node_path_1 = __importDefault(require("node:path"));
|
|
5
10
|
const transformers_1 = require("@huggingface/transformers");
|
|
6
11
|
const index_js_1 = require("../errors/index.js");
|
|
7
12
|
// Re-export error class for backwards compatibility
|
|
@@ -49,6 +54,21 @@ class Embedder {
|
|
|
49
54
|
console.error('Embedder: Model loaded successfully');
|
|
50
55
|
}
|
|
51
56
|
catch (error) {
|
|
57
|
+
// Some ONNX caches fail with "Protobuf parsing failed". Retry once with isolated cache path.
|
|
58
|
+
if (this.isRecoverableCacheError(error)) {
|
|
59
|
+
const recoveryCacheDir = this.getRecoveryCacheDir();
|
|
60
|
+
console.error(`Embedder: Detected corrupted model cache. Retrying with isolated cache: "${recoveryCacheDir}"`);
|
|
61
|
+
try {
|
|
62
|
+
await (0, promises_1.mkdir)(recoveryCacheDir, { recursive: true });
|
|
63
|
+
transformers_1.env.cacheDir = recoveryCacheDir;
|
|
64
|
+
this.model = await (0, transformers_1.pipeline)('feature-extraction', this.config.modelPath);
|
|
65
|
+
console.error('Embedder: Model loaded successfully via recovery cache');
|
|
66
|
+
return;
|
|
67
|
+
}
|
|
68
|
+
catch (recoveryError) {
|
|
69
|
+
throw new index_js_1.EmbeddingError(`Failed to initialize Embedder after cache recovery attempt: ${recoveryError.message}`, recoveryError);
|
|
70
|
+
}
|
|
71
|
+
}
|
|
52
72
|
throw new index_js_1.EmbeddingError(`Failed to initialize Embedder: ${error.message}`, error);
|
|
53
73
|
}
|
|
54
74
|
}
|
|
@@ -141,6 +161,24 @@ class Embedder {
|
|
|
141
161
|
throw new index_js_1.EmbeddingError(`Failed to generate batch embeddings: ${message}`, error instanceof Error ? error : undefined);
|
|
142
162
|
}
|
|
143
163
|
}
|
|
164
|
+
/**
|
|
165
|
+
* Detect known cache-corruption signatures from ONNX/protobuf loaders.
|
|
166
|
+
*/
|
|
167
|
+
isRecoverableCacheError(error) {
|
|
168
|
+
if (!(error instanceof Error)) {
|
|
169
|
+
return false;
|
|
170
|
+
}
|
|
171
|
+
const message = error.message.toLowerCase();
|
|
172
|
+
return (message.includes('protobuf parsing failed') ||
|
|
173
|
+
(message.includes('protobuf') && message.includes('failed to parse')));
|
|
174
|
+
}
|
|
175
|
+
/**
|
|
176
|
+
* Build a model-specific fallback cache path to avoid reusing corrupted artifacts.
|
|
177
|
+
*/
|
|
178
|
+
getRecoveryCacheDir() {
|
|
179
|
+
const safeModelName = this.config.modelPath.replace(/[^a-z0-9_./-]/gi, '_').replace(/\//g, '__');
|
|
180
|
+
return node_path_1.default.join(this.config.cacheDir, '.recovery-cache', safeModelName);
|
|
181
|
+
}
|
|
144
182
|
}
|
|
145
183
|
exports.Embedder = Embedder;
|
|
146
184
|
//# sourceMappingURL=index.js.map
|
package/dist/errors/index.d.ts
CHANGED
|
File without changes
|
package/dist/errors/index.js
CHANGED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
package/dist/flywheel/index.d.ts
CHANGED
|
File without changes
|
package/dist/flywheel/index.js
CHANGED
|
File without changes
|
package/dist/index.d.ts
CHANGED
|
File without changes
|
|
File without changes
|
|
File without changes
|
package/dist/parser/index.d.ts
CHANGED
|
File without changes
|
package/dist/parser/index.js
CHANGED
|
@@ -113,10 +113,27 @@ class DocumentParser {
|
|
|
113
113
|
if (!(0, node_path_1.isAbsolute)(filePath)) {
|
|
114
114
|
throw new index_js_1.ParserValidationError(`File path must be absolute path (received: ${filePath}). Please provide an absolute path within BASE_DIR.`);
|
|
115
115
|
}
|
|
116
|
-
//
|
|
117
|
-
|
|
118
|
-
const
|
|
119
|
-
|
|
116
|
+
// Resolve symlinks for both base and target to prevent symlink escape attacks.
|
|
117
|
+
// Fall back to resolve() only when path does not exist yet.
|
|
118
|
+
const resolveCanonicalPath = (targetPath) => {
|
|
119
|
+
try {
|
|
120
|
+
return (0, node_fs_1.realpathSync)(targetPath);
|
|
121
|
+
}
|
|
122
|
+
catch (error) {
|
|
123
|
+
const nodeError = error;
|
|
124
|
+
if (nodeError.code === 'ENOENT') {
|
|
125
|
+
return (0, node_path_1.resolve)(targetPath);
|
|
126
|
+
}
|
|
127
|
+
throw new index_js_1.ParserValidationError(`Failed to resolve path for security validation: ${targetPath}`);
|
|
128
|
+
}
|
|
129
|
+
};
|
|
130
|
+
// Check if path is within BASE_DIR using relative-path boundary check.
|
|
131
|
+
// This avoids prefix bypasses such as /base matching /base2.
|
|
132
|
+
const baseDir = resolveCanonicalPath(this.config.baseDir);
|
|
133
|
+
const normalizedPath = resolveCanonicalPath(filePath);
|
|
134
|
+
const relativePath = (0, node_path_1.relative)(baseDir, normalizedPath);
|
|
135
|
+
const isOutsideBaseDir = relativePath.startsWith('..') || relativePath === '..' || (0, node_path_1.isAbsolute)(relativePath);
|
|
136
|
+
if (isOutsideBaseDir) {
|
|
120
137
|
throw new index_js_1.ParserValidationError(`File path must be within BASE_DIR (${baseDir}). Received path outside BASE_DIR: ${filePath}`);
|
|
121
138
|
}
|
|
122
139
|
}
|
|
File without changes
|
|
File without changes
|
package/dist/query/index.d.ts
CHANGED
|
File without changes
|
package/dist/query/index.js
CHANGED
|
File without changes
|
package/dist/query/parser.d.ts
CHANGED
|
File without changes
|
package/dist/query/parser.js
CHANGED
|
File without changes
|
package/dist/server/index.d.ts
CHANGED
|
File without changes
|
package/dist/server/index.js
CHANGED
|
@@ -145,7 +145,10 @@ Results include score (0 = most relevant, higher = less relevant). Set explain=t
|
|
|
145
145
|
sourceQuery: zod_1.z.string().describe('The query that returned this result'),
|
|
146
146
|
targetFilePath: zod_1.z.string().describe('File path of the result to pin'),
|
|
147
147
|
targetChunkIndex: zod_1.z.number().describe('Chunk index of the result to pin'),
|
|
148
|
-
targetFingerprint: zod_1.z
|
|
148
|
+
targetFingerprint: zod_1.z
|
|
149
|
+
.string()
|
|
150
|
+
.optional()
|
|
151
|
+
.describe('Optional fingerprint for resilient matching'),
|
|
149
152
|
}, async (args) => {
|
|
150
153
|
try {
|
|
151
154
|
const result = this.executeFeedbackPin(args);
|
|
@@ -155,7 +158,12 @@ Results include score (0 = most relevant, higher = less relevant). Set explain=t
|
|
|
155
158
|
}
|
|
156
159
|
catch (error) {
|
|
157
160
|
return {
|
|
158
|
-
content: [
|
|
161
|
+
content: [
|
|
162
|
+
{
|
|
163
|
+
type: 'text',
|
|
164
|
+
text: JSON.stringify({ error: (0, index_js_3.getErrorMessage)(error) }),
|
|
165
|
+
},
|
|
166
|
+
],
|
|
159
167
|
isError: true,
|
|
160
168
|
};
|
|
161
169
|
}
|
|
@@ -165,7 +173,10 @@ Results include score (0 = most relevant, higher = less relevant). Set explain=t
|
|
|
165
173
|
sourceQuery: zod_1.z.string().describe('The query that returned this result'),
|
|
166
174
|
targetFilePath: zod_1.z.string().describe('File path of the result to dismiss'),
|
|
167
175
|
targetChunkIndex: zod_1.z.number().describe('Chunk index of the result to dismiss'),
|
|
168
|
-
targetFingerprint: zod_1.z
|
|
176
|
+
targetFingerprint: zod_1.z
|
|
177
|
+
.string()
|
|
178
|
+
.optional()
|
|
179
|
+
.describe('Optional fingerprint for resilient matching'),
|
|
169
180
|
}, async (args) => {
|
|
170
181
|
try {
|
|
171
182
|
const result = this.executeFeedbackDismiss(args);
|
|
@@ -175,7 +186,12 @@ Results include score (0 = most relevant, higher = less relevant). Set explain=t
|
|
|
175
186
|
}
|
|
176
187
|
catch (error) {
|
|
177
188
|
return {
|
|
178
|
-
content: [
|
|
189
|
+
content: [
|
|
190
|
+
{
|
|
191
|
+
type: 'text',
|
|
192
|
+
text: JSON.stringify({ error: (0, index_js_3.getErrorMessage)(error) }),
|
|
193
|
+
},
|
|
194
|
+
],
|
|
179
195
|
isError: true,
|
|
180
196
|
};
|
|
181
197
|
}
|
|
@@ -190,7 +206,12 @@ Results include score (0 = most relevant, higher = less relevant). Set explain=t
|
|
|
190
206
|
}
|
|
191
207
|
catch (error) {
|
|
192
208
|
return {
|
|
193
|
-
content: [
|
|
209
|
+
content: [
|
|
210
|
+
{
|
|
211
|
+
type: 'text',
|
|
212
|
+
text: JSON.stringify({ error: (0, index_js_3.getErrorMessage)(error) }),
|
|
213
|
+
},
|
|
214
|
+
],
|
|
194
215
|
isError: true,
|
|
195
216
|
};
|
|
196
217
|
}
|
|
@@ -288,7 +309,7 @@ Results include score (0 = most relevant, higher = less relevant). Set explain=t
|
|
|
288
309
|
score: result.score,
|
|
289
310
|
};
|
|
290
311
|
// Restore source for raw-data files (ingested via ingest_data)
|
|
291
|
-
if ((0, raw_data_utils_js_1.
|
|
312
|
+
if ((0, raw_data_utils_js_1.isManagedRawDataPath)(this.dbPath, result.filePath)) {
|
|
292
313
|
const source = (0, raw_data_utils_js_1.extractSourceFromPath)(result.filePath);
|
|
293
314
|
if (source) {
|
|
294
315
|
queryResult.source = source;
|
|
@@ -336,7 +357,7 @@ Results include score (0 = most relevant, higher = less relevant). Set explain=t
|
|
|
336
357
|
// since the path is internally generated and content is already processed
|
|
337
358
|
const isPdf = args.filePath.toLowerCase().endsWith('.pdf');
|
|
338
359
|
let text;
|
|
339
|
-
if ((0, raw_data_utils_js_1.
|
|
360
|
+
if ((0, raw_data_utils_js_1.isManagedRawDataPath)(this.dbPath, args.filePath)) {
|
|
340
361
|
// Raw-data files: skip validation, read directly
|
|
341
362
|
text = await (0, promises_1.readFile)(args.filePath, 'utf-8');
|
|
342
363
|
console.error(`Read raw-data file: ${args.filePath} (${text.length} characters)`);
|
|
@@ -505,7 +526,7 @@ Results include score (0 = most relevant, higher = less relevant). Set explain=t
|
|
|
505
526
|
const files = await this.vectorStore.listFiles();
|
|
506
527
|
// Enrich raw-data files with source information
|
|
507
528
|
return files.map((file) => {
|
|
508
|
-
if ((0, raw_data_utils_js_1.
|
|
529
|
+
if ((0, raw_data_utils_js_1.isManagedRawDataPath)(this.dbPath, file.filePath)) {
|
|
509
530
|
const source = (0, raw_data_utils_js_1.extractSourceFromPath)(file.filePath);
|
|
510
531
|
if (source) {
|
|
511
532
|
return { ...file, source };
|
|
@@ -648,7 +669,7 @@ Results include score (0 = most relevant, higher = less relevant). Set explain=t
|
|
|
648
669
|
// Delete chunks from vector database
|
|
649
670
|
await this.vectorStore.deleteChunks(targetPath);
|
|
650
671
|
// Also delete physical raw-data file if applicable
|
|
651
|
-
if ((0, raw_data_utils_js_1.
|
|
672
|
+
if ((0, raw_data_utils_js_1.isManagedRawDataPath)(this.dbPath, targetPath)) {
|
|
652
673
|
try {
|
|
653
674
|
await (0, promises_1.unlink)(targetPath);
|
|
654
675
|
console.error(`Deleted raw-data file: ${targetPath}`);
|
|
@@ -693,7 +714,7 @@ Results include score (0 = most relevant, higher = less relevant). Set explain=t
|
|
|
693
714
|
const chunks = await this.vectorStore.getDocumentChunks(filePath);
|
|
694
715
|
// Enrich with source information for raw-data files
|
|
695
716
|
const enrichedChunks = chunks.map((chunk) => {
|
|
696
|
-
if ((0, raw_data_utils_js_1.
|
|
717
|
+
if ((0, raw_data_utils_js_1.isManagedRawDataPath)(this.dbPath, chunk.filePath)) {
|
|
697
718
|
const source = (0, raw_data_utils_js_1.extractSourceFromPath)(chunk.filePath);
|
|
698
719
|
if (source) {
|
|
699
720
|
return { ...chunk, source };
|
|
@@ -724,7 +745,7 @@ Results include score (0 = most relevant, higher = less relevant). Set explain=t
|
|
|
724
745
|
const relatedChunks = await this.vectorStore.findRelatedChunks(filePath, chunkIndex, limit ?? 5, excludeSameDocument ?? true);
|
|
725
746
|
// Enrich with source information for raw-data files
|
|
726
747
|
const enrichedChunks = relatedChunks.map((chunk) => {
|
|
727
|
-
if ((0, raw_data_utils_js_1.
|
|
748
|
+
if ((0, raw_data_utils_js_1.isManagedRawDataPath)(this.dbPath, chunk.filePath)) {
|
|
728
749
|
const source = (0, raw_data_utils_js_1.extractSourceFromPath)(chunk.filePath);
|
|
729
750
|
if (source) {
|
|
730
751
|
return { ...chunk, source };
|
|
@@ -760,7 +781,7 @@ Results include score (0 = most relevant, higher = less relevant). Set explain=t
|
|
|
760
781
|
);
|
|
761
782
|
// Enrich with source information
|
|
762
783
|
results[key] = relatedChunks.map((related) => {
|
|
763
|
-
if ((0, raw_data_utils_js_1.
|
|
784
|
+
if ((0, raw_data_utils_js_1.isManagedRawDataPath)(this.dbPath, related.filePath)) {
|
|
764
785
|
const source = (0, raw_data_utils_js_1.extractSourceFromPath)(related.filePath);
|
|
765
786
|
if (source) {
|
|
766
787
|
return { ...related, source };
|
|
@@ -70,6 +70,15 @@ export declare function saveRawData(dbPath: string, source: string, content: str
|
|
|
70
70
|
* @returns True if path is in raw-data directory
|
|
71
71
|
*/
|
|
72
72
|
export declare function isRawDataPath(filePath: string): boolean;
|
|
73
|
+
/**
|
|
74
|
+
* Check whether a path belongs to this database's managed raw-data directory.
|
|
75
|
+
* This is stricter than isRawDataPath() and should be used for trust decisions.
|
|
76
|
+
*
|
|
77
|
+
* @param dbPath - LanceDB database path for the current server
|
|
78
|
+
* @param filePath - File path to validate
|
|
79
|
+
* @returns True only if filePath is inside <dbPath>/raw-data
|
|
80
|
+
*/
|
|
81
|
+
export declare function isManagedRawDataPath(dbPath: string, filePath: string): boolean;
|
|
73
82
|
/**
|
|
74
83
|
* Extract original source from raw-data file path
|
|
75
84
|
* Returns null if not a raw-data path
|
|
@@ -10,6 +10,7 @@ exports.getRawDataDir = getRawDataDir;
|
|
|
10
10
|
exports.generateRawDataPath = generateRawDataPath;
|
|
11
11
|
exports.saveRawData = saveRawData;
|
|
12
12
|
exports.isRawDataPath = isRawDataPath;
|
|
13
|
+
exports.isManagedRawDataPath = isManagedRawDataPath;
|
|
13
14
|
exports.extractSourceFromPath = extractSourceFromPath;
|
|
14
15
|
const promises_1 = require("node:fs/promises");
|
|
15
16
|
const node_path_1 = require("node:path");
|
|
@@ -167,6 +168,20 @@ const RAW_DATA_NATIVE = `${node_path_1.sep}raw-data${node_path_1.sep}`;
|
|
|
167
168
|
function isRawDataPath(filePath) {
|
|
168
169
|
return filePath.includes(RAW_DATA_NATIVE) || filePath.includes(RAW_DATA_POSIX);
|
|
169
170
|
}
|
|
171
|
+
/**
|
|
172
|
+
* Check whether a path belongs to this database's managed raw-data directory.
|
|
173
|
+
* This is stricter than isRawDataPath() and should be used for trust decisions.
|
|
174
|
+
*
|
|
175
|
+
* @param dbPath - LanceDB database path for the current server
|
|
176
|
+
* @param filePath - File path to validate
|
|
177
|
+
* @returns True only if filePath is inside <dbPath>/raw-data
|
|
178
|
+
*/
|
|
179
|
+
function isManagedRawDataPath(dbPath, filePath) {
|
|
180
|
+
const rawDataDir = (0, node_path_1.resolve)(getRawDataDir(dbPath));
|
|
181
|
+
const resolvedPath = (0, node_path_1.resolve)(filePath);
|
|
182
|
+
const rel = (0, node_path_1.relative)(rawDataDir, resolvedPath);
|
|
183
|
+
return rel === '' || (!rel.startsWith('..') && !(0, node_path_1.isAbsolute)(rel));
|
|
184
|
+
}
|
|
170
185
|
/**
|
|
171
186
|
* Extract original source from raw-data file path
|
|
172
187
|
* Returns null if not a raw-data path
|
package/dist/server/schemas.d.ts
CHANGED
|
File without changes
|
package/dist/server/schemas.js
CHANGED
|
File without changes
|
|
File without changes
|
|
File without changes
|
package/dist/utils/config.d.ts
CHANGED
|
File without changes
|
package/dist/utils/config.js
CHANGED
|
File without changes
|
|
File without changes
|
package/dist/utils/file-utils.js
CHANGED
|
File without changes
|
package/dist/utils/math.d.ts
CHANGED
|
File without changes
|
package/dist/utils/math.js
CHANGED
|
File without changes
|
|
File without changes
|
|
File without changes
|
package/dist/vectordb/index.d.ts
CHANGED
|
File without changes
|
package/dist/vectordb/index.js
CHANGED
|
@@ -143,7 +143,8 @@ function isDocumentMetadata(value) {
|
|
|
143
143
|
return false;
|
|
144
144
|
}
|
|
145
145
|
// Optional custom field must be an object if present
|
|
146
|
-
if (obj['custom'] !== undefined &&
|
|
146
|
+
if (obj['custom'] !== undefined &&
|
|
147
|
+
(typeof obj['custom'] !== 'object' || obj['custom'] === null)) {
|
|
147
148
|
return false;
|
|
148
149
|
}
|
|
149
150
|
return true;
|
package/dist/web/api-routes.d.ts
CHANGED
|
File without changes
|
package/dist/web/api-routes.js
CHANGED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
@@ -246,5 +246,9 @@ export declare class DatabaseManager {
|
|
|
246
246
|
* Ensure config directory exists
|
|
247
247
|
*/
|
|
248
248
|
private ensureConfigDir;
|
|
249
|
+
/**
|
|
250
|
+
* Enforce allowed-roots policy for database mutation operations.
|
|
251
|
+
*/
|
|
252
|
+
private assertPathAllowedForMutation;
|
|
249
253
|
}
|
|
250
254
|
//# sourceMappingURL=database-manager.d.ts.map
|
|
@@ -226,6 +226,7 @@ class DatabaseManager {
|
|
|
226
226
|
}
|
|
227
227
|
// Expand tilde to home directory
|
|
228
228
|
const resolvedPath = expandTilde(newDbPath);
|
|
229
|
+
await this.assertPathAllowedForMutation('Switch path', resolvedPath);
|
|
229
230
|
// Validate the new path exists and is a valid database
|
|
230
231
|
if (!(0, node_fs_1.existsSync)(resolvedPath)) {
|
|
231
232
|
throw new Error(`Database path does not exist: ${resolvedPath}`);
|
|
@@ -290,6 +291,7 @@ class DatabaseManager {
|
|
|
290
291
|
async createDatabase(options) {
|
|
291
292
|
// Expand tilde to home directory
|
|
292
293
|
const resolvedPath = expandTilde(options.dbPath);
|
|
294
|
+
await this.assertPathAllowedForMutation('Create path', resolvedPath);
|
|
293
295
|
// Check if path already exists
|
|
294
296
|
if ((0, node_fs_1.existsSync)(resolvedPath)) {
|
|
295
297
|
const lanceDbPath = node_path_1.default.join(resolvedPath, LANCEDB_DIR_NAME);
|
|
@@ -662,6 +664,7 @@ class DatabaseManager {
|
|
|
662
664
|
*/
|
|
663
665
|
async deleteDatabase(dbPath, deleteFiles = false) {
|
|
664
666
|
const resolvedPath = expandTilde(dbPath);
|
|
667
|
+
await this.assertPathAllowedForMutation('Delete path', resolvedPath);
|
|
665
668
|
// Cannot delete the currently active database
|
|
666
669
|
if (this.currentConfig && this.currentConfig.dbPath === resolvedPath) {
|
|
667
670
|
throw new Error('Cannot delete the currently active database. Switch to another database first.');
|
|
@@ -699,6 +702,18 @@ class DatabaseManager {
|
|
|
699
702
|
await (0, promises_1.mkdir)(CONFIG_DIR, { recursive: true });
|
|
700
703
|
}
|
|
701
704
|
}
|
|
705
|
+
/**
|
|
706
|
+
* Enforce allowed-roots policy for database mutation operations.
|
|
707
|
+
*/
|
|
708
|
+
async assertPathAllowedForMutation(action, targetPath) {
|
|
709
|
+
if (await this.isPathAllowed(targetPath)) {
|
|
710
|
+
return;
|
|
711
|
+
}
|
|
712
|
+
const allowedRoots = await this.getEffectiveAllowedRoots();
|
|
713
|
+
throw new Error(`${action} "${targetPath}" is outside allowed roots. ` +
|
|
714
|
+
`Allowed: ${allowedRoots.join(', ')}. ` +
|
|
715
|
+
`Add this path to allowed roots or set ALLOWED_SCAN_ROOTS environment variable.`);
|
|
716
|
+
}
|
|
702
717
|
}
|
|
703
718
|
exports.DatabaseManager = DatabaseManager;
|
|
704
719
|
//# sourceMappingURL=database-manager.js.map
|
|
File without changes
|
package/dist/web/http-server.js
CHANGED
|
@@ -96,6 +96,9 @@ const ALLOWED_MIME_TYPES = [
|
|
|
96
96
|
'text/markdown',
|
|
97
97
|
'text/html',
|
|
98
98
|
'application/json',
|
|
99
|
+
'application/x-ndjson',
|
|
100
|
+
'application/ndjson',
|
|
101
|
+
'application/jsonl',
|
|
99
102
|
];
|
|
100
103
|
/**
|
|
101
104
|
* Create and configure Express app with DatabaseManager
|
|
@@ -252,7 +255,16 @@ async function createHttpServerInternal(serverAccessor, config, configRouter) {
|
|
|
252
255
|
},
|
|
253
256
|
fileFilter: (_req, file, cb) => {
|
|
254
257
|
// Allow common document types by MIME type or extension
|
|
255
|
-
const allowedExtensions = [
|
|
258
|
+
const allowedExtensions = [
|
|
259
|
+
'.pdf',
|
|
260
|
+
'.docx',
|
|
261
|
+
'.txt',
|
|
262
|
+
'.md',
|
|
263
|
+
'.html',
|
|
264
|
+
'.json',
|
|
265
|
+
'.jsonl',
|
|
266
|
+
'.ndjson',
|
|
267
|
+
];
|
|
256
268
|
const ext = node_path_1.default.extname(file.originalname).toLowerCase();
|
|
257
269
|
if (ALLOWED_MIME_TYPES.includes(file.mimetype) || allowedExtensions.includes(ext)) {
|
|
258
270
|
cb(null, true);
|
package/dist/web/index.d.ts
CHANGED
|
File without changes
|
package/dist/web/index.js
CHANGED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
package/dist/web/types.d.ts
CHANGED
|
File without changes
|
package/dist/web/types.js
CHANGED
|
File without changes
|