@robthepcguy/rag-vault 1.3.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (158) hide show
  1. package/LICENSE +0 -0
  2. package/README.md +478 -441
  3. package/dist/bin/install-skills.d.ts +0 -0
  4. package/dist/bin/install-skills.js +0 -0
  5. package/dist/chunker/index.d.ts +0 -0
  6. package/dist/chunker/index.js +0 -0
  7. package/dist/chunker/semantic-chunker.d.ts +0 -0
  8. package/dist/chunker/semantic-chunker.js +0 -0
  9. package/dist/chunker/sentence-splitter.d.ts +0 -0
  10. package/dist/chunker/sentence-splitter.js +0 -0
  11. package/dist/embedder/index.d.ts +8 -0
  12. package/dist/embedder/index.js +38 -0
  13. package/dist/errors/index.d.ts +0 -0
  14. package/dist/errors/index.js +0 -0
  15. package/dist/explainability/index.d.ts +0 -0
  16. package/dist/explainability/index.js +0 -0
  17. package/dist/explainability/keywords.d.ts +0 -0
  18. package/dist/explainability/keywords.js +0 -0
  19. package/dist/flywheel/feedback.d.ts +0 -0
  20. package/dist/flywheel/feedback.js +0 -0
  21. package/dist/flywheel/index.d.ts +0 -0
  22. package/dist/flywheel/index.js +0 -0
  23. package/dist/index.d.ts +0 -0
  24. package/dist/parser/html-parser.d.ts +0 -0
  25. package/dist/parser/html-parser.js +0 -0
  26. package/dist/parser/index.d.ts +0 -0
  27. package/dist/parser/index.js +21 -4
  28. package/dist/parser/pdf-filter.d.ts +0 -0
  29. package/dist/parser/pdf-filter.js +0 -0
  30. package/dist/query/index.d.ts +0 -0
  31. package/dist/query/index.js +0 -0
  32. package/dist/query/parser.d.ts +0 -0
  33. package/dist/query/parser.js +0 -0
  34. package/dist/server/index.d.ts +0 -0
  35. package/dist/server/index.js +33 -12
  36. package/dist/server/raw-data-utils.d.ts +9 -0
  37. package/dist/server/raw-data-utils.js +15 -0
  38. package/dist/server/schemas.d.ts +0 -0
  39. package/dist/server/schemas.js +0 -0
  40. package/dist/utils/config-parsers.d.ts +0 -0
  41. package/dist/utils/config-parsers.js +0 -0
  42. package/dist/utils/config.d.ts +0 -0
  43. package/dist/utils/config.js +0 -0
  44. package/dist/utils/file-utils.d.ts +0 -0
  45. package/dist/utils/file-utils.js +0 -0
  46. package/dist/utils/math.d.ts +0 -0
  47. package/dist/utils/math.js +0 -0
  48. package/dist/utils/process-handlers.d.ts +0 -0
  49. package/dist/utils/process-handlers.js +0 -0
  50. package/dist/vectordb/index.d.ts +0 -0
  51. package/dist/vectordb/index.js +2 -1
  52. package/dist/web/api-routes.d.ts +0 -0
  53. package/dist/web/api-routes.js +0 -0
  54. package/dist/web/config-routes.d.ts +0 -0
  55. package/dist/web/config-routes.js +0 -0
  56. package/dist/web/database-manager.d.ts +4 -0
  57. package/dist/web/database-manager.js +15 -0
  58. package/dist/web/http-server.d.ts +0 -0
  59. package/dist/web/http-server.js +13 -1
  60. package/dist/web/index.d.ts +0 -0
  61. package/dist/web/index.js +0 -0
  62. package/dist/web/middleware/async-handler.d.ts +0 -0
  63. package/dist/web/middleware/async-handler.js +0 -0
  64. package/dist/web/middleware/auth.d.ts +0 -0
  65. package/dist/web/middleware/auth.js +0 -0
  66. package/dist/web/middleware/error-handler.d.ts +0 -0
  67. package/dist/web/middleware/error-handler.js +0 -0
  68. package/dist/web/middleware/index.d.ts +0 -0
  69. package/dist/web/middleware/index.js +0 -0
  70. package/dist/web/middleware/rate-limit.d.ts +0 -0
  71. package/dist/web/middleware/rate-limit.js +0 -0
  72. package/dist/web/middleware/request-logger.d.ts +0 -0
  73. package/dist/web/middleware/request-logger.js +0 -0
  74. package/dist/web/types.d.ts +0 -0
  75. package/dist/web/types.js +0 -0
  76. package/package.json +54 -36
  77. package/skills/rag-vault/SKILL.md +0 -0
  78. package/skills/rag-vault/references/html-ingestion.md +0 -0
  79. package/skills/rag-vault/references/query-optimization.md +0 -0
  80. package/skills/rag-vault/references/result-refinement.md +0 -0
  81. package/web-ui/dist/assets/{index-BcRp9-z9.js → index-SBHxoAwi.js} +2 -2
  82. package/web-ui/dist/assets/index-ej8i4PGl.css +0 -0
  83. package/web-ui/dist/index.html +1 -1
  84. package/web-ui/dist/vite.svg +0 -0
  85. package/dist/bin/install-skills.d.ts.map +0 -1
  86. package/dist/bin/install-skills.js.map +0 -1
  87. package/dist/chunker/index.d.ts.map +0 -1
  88. package/dist/chunker/index.js.map +0 -1
  89. package/dist/chunker/semantic-chunker.d.ts.map +0 -1
  90. package/dist/chunker/semantic-chunker.js.map +0 -1
  91. package/dist/chunker/sentence-splitter.d.ts.map +0 -1
  92. package/dist/chunker/sentence-splitter.js.map +0 -1
  93. package/dist/embedder/index.d.ts.map +0 -1
  94. package/dist/embedder/index.js.map +0 -1
  95. package/dist/errors/index.d.ts.map +0 -1
  96. package/dist/errors/index.js.map +0 -1
  97. package/dist/explainability/index.d.ts.map +0 -1
  98. package/dist/explainability/index.js.map +0 -1
  99. package/dist/explainability/keywords.d.ts.map +0 -1
  100. package/dist/explainability/keywords.js.map +0 -1
  101. package/dist/flywheel/feedback.d.ts.map +0 -1
  102. package/dist/flywheel/feedback.js.map +0 -1
  103. package/dist/flywheel/index.d.ts.map +0 -1
  104. package/dist/flywheel/index.js.map +0 -1
  105. package/dist/index.d.ts.map +0 -1
  106. package/dist/index.js.map +0 -1
  107. package/dist/parser/html-parser.d.ts.map +0 -1
  108. package/dist/parser/html-parser.js.map +0 -1
  109. package/dist/parser/index.d.ts.map +0 -1
  110. package/dist/parser/index.js.map +0 -1
  111. package/dist/parser/pdf-filter.d.ts.map +0 -1
  112. package/dist/parser/pdf-filter.js.map +0 -1
  113. package/dist/query/index.d.ts.map +0 -1
  114. package/dist/query/index.js.map +0 -1
  115. package/dist/query/parser.d.ts.map +0 -1
  116. package/dist/query/parser.js.map +0 -1
  117. package/dist/server/index.d.ts.map +0 -1
  118. package/dist/server/index.js.map +0 -1
  119. package/dist/server/raw-data-utils.d.ts.map +0 -1
  120. package/dist/server/raw-data-utils.js.map +0 -1
  121. package/dist/server/schemas.d.ts.map +0 -1
  122. package/dist/server/schemas.js.map +0 -1
  123. package/dist/utils/config-parsers.d.ts.map +0 -1
  124. package/dist/utils/config-parsers.js.map +0 -1
  125. package/dist/utils/config.d.ts.map +0 -1
  126. package/dist/utils/config.js.map +0 -1
  127. package/dist/utils/file-utils.d.ts.map +0 -1
  128. package/dist/utils/file-utils.js.map +0 -1
  129. package/dist/utils/math.d.ts.map +0 -1
  130. package/dist/utils/math.js.map +0 -1
  131. package/dist/utils/process-handlers.d.ts.map +0 -1
  132. package/dist/utils/process-handlers.js.map +0 -1
  133. package/dist/vectordb/index.d.ts.map +0 -1
  134. package/dist/vectordb/index.js.map +0 -1
  135. package/dist/web/api-routes.d.ts.map +0 -1
  136. package/dist/web/api-routes.js.map +0 -1
  137. package/dist/web/config-routes.d.ts.map +0 -1
  138. package/dist/web/config-routes.js.map +0 -1
  139. package/dist/web/database-manager.d.ts.map +0 -1
  140. package/dist/web/database-manager.js.map +0 -1
  141. package/dist/web/http-server.d.ts.map +0 -1
  142. package/dist/web/http-server.js.map +0 -1
  143. package/dist/web/index.d.ts.map +0 -1
  144. package/dist/web/index.js.map +0 -1
  145. package/dist/web/middleware/async-handler.d.ts.map +0 -1
  146. package/dist/web/middleware/async-handler.js.map +0 -1
  147. package/dist/web/middleware/auth.d.ts.map +0 -1
  148. package/dist/web/middleware/auth.js.map +0 -1
  149. package/dist/web/middleware/error-handler.d.ts.map +0 -1
  150. package/dist/web/middleware/error-handler.js.map +0 -1
  151. package/dist/web/middleware/index.d.ts.map +0 -1
  152. package/dist/web/middleware/index.js.map +0 -1
  153. package/dist/web/middleware/rate-limit.d.ts.map +0 -1
  154. package/dist/web/middleware/rate-limit.js.map +0 -1
  155. package/dist/web/middleware/request-logger.d.ts.map +0 -1
  156. package/dist/web/middleware/request-logger.js.map +0 -1
  157. package/dist/web/types.d.ts.map +0 -1
  158. package/dist/web/types.js.map +0 -1
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
@@ -51,5 +51,13 @@ export declare class Embedder {
51
51
  * @returns Array of embedding vectors (dimension depends on model)
52
52
  */
53
53
  embedBatch(texts: string[], signal?: AbortSignal): Promise<number[][]>;
54
+ /**
55
+ * Detect known cache-corruption signatures from ONNX/protobuf loaders.
56
+ */
57
+ private isRecoverableCacheError;
58
+ /**
59
+ * Build a model-specific fallback cache path to avoid reusing corrupted artifacts.
60
+ */
61
+ private getRecoveryCacheDir;
54
62
  }
55
63
  //# sourceMappingURL=index.d.ts.map
@@ -1,7 +1,12 @@
1
1
  "use strict";
2
2
  // Embedder implementation with Transformers.js
3
+ var __importDefault = (this && this.__importDefault) || function (mod) {
4
+ return (mod && mod.__esModule) ? mod : { "default": mod };
5
+ };
3
6
  Object.defineProperty(exports, "__esModule", { value: true });
4
7
  exports.Embedder = exports.EmbeddingError = void 0;
8
+ const promises_1 = require("node:fs/promises");
9
+ const node_path_1 = __importDefault(require("node:path"));
5
10
  const transformers_1 = require("@huggingface/transformers");
6
11
  const index_js_1 = require("../errors/index.js");
7
12
  // Re-export error class for backwards compatibility
@@ -49,6 +54,21 @@ class Embedder {
49
54
  console.error('Embedder: Model loaded successfully');
50
55
  }
51
56
  catch (error) {
57
+ // Some ONNX caches fail with "Protobuf parsing failed". Retry once with isolated cache path.
58
+ if (this.isRecoverableCacheError(error)) {
59
+ const recoveryCacheDir = this.getRecoveryCacheDir();
60
+ console.error(`Embedder: Detected corrupted model cache. Retrying with isolated cache: "${recoveryCacheDir}"`);
61
+ try {
62
+ await (0, promises_1.mkdir)(recoveryCacheDir, { recursive: true });
63
+ transformers_1.env.cacheDir = recoveryCacheDir;
64
+ this.model = await (0, transformers_1.pipeline)('feature-extraction', this.config.modelPath);
65
+ console.error('Embedder: Model loaded successfully via recovery cache');
66
+ return;
67
+ }
68
+ catch (recoveryError) {
69
+ throw new index_js_1.EmbeddingError(`Failed to initialize Embedder after cache recovery attempt: ${recoveryError.message}`, recoveryError);
70
+ }
71
+ }
52
72
  throw new index_js_1.EmbeddingError(`Failed to initialize Embedder: ${error.message}`, error);
53
73
  }
54
74
  }
@@ -141,6 +161,24 @@ class Embedder {
141
161
  throw new index_js_1.EmbeddingError(`Failed to generate batch embeddings: ${message}`, error instanceof Error ? error : undefined);
142
162
  }
143
163
  }
164
+ /**
165
+ * Detect known cache-corruption signatures from ONNX/protobuf loaders.
166
+ */
167
+ isRecoverableCacheError(error) {
168
+ if (!(error instanceof Error)) {
169
+ return false;
170
+ }
171
+ const message = error.message.toLowerCase();
172
+ return (message.includes('protobuf parsing failed') ||
173
+ (message.includes('protobuf') && message.includes('failed to parse')));
174
+ }
175
+ /**
176
+ * Build a model-specific fallback cache path to avoid reusing corrupted artifacts.
177
+ */
178
+ getRecoveryCacheDir() {
179
+ const safeModelName = this.config.modelPath.replace(/[^a-z0-9_./-]/gi, '_').replace(/\//g, '__');
180
+ return node_path_1.default.join(this.config.cacheDir, '.recovery-cache', safeModelName);
181
+ }
144
182
  }
145
183
  exports.Embedder = Embedder;
146
184
  //# sourceMappingURL=index.js.map
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
package/dist/index.d.ts CHANGED
File without changes
File without changes
File without changes
File without changes
@@ -113,10 +113,27 @@ class DocumentParser {
113
113
  if (!(0, node_path_1.isAbsolute)(filePath)) {
114
114
  throw new index_js_1.ParserValidationError(`File path must be absolute path (received: ${filePath}). Please provide an absolute path within BASE_DIR.`);
115
115
  }
116
- // Check if path is within BASE_DIR
117
- const baseDir = (0, node_path_1.resolve)(this.config.baseDir);
118
- const normalizedPath = (0, node_path_1.resolve)(filePath);
119
- if (!normalizedPath.startsWith(baseDir)) {
116
+ // Resolve symlinks for both base and target to prevent symlink escape attacks.
117
+ // Fall back to resolve() only when path does not exist yet.
118
+ const resolveCanonicalPath = (targetPath) => {
119
+ try {
120
+ return (0, node_fs_1.realpathSync)(targetPath);
121
+ }
122
+ catch (error) {
123
+ const nodeError = error;
124
+ if (nodeError.code === 'ENOENT') {
125
+ return (0, node_path_1.resolve)(targetPath);
126
+ }
127
+ throw new index_js_1.ParserValidationError(`Failed to resolve path for security validation: ${targetPath}`);
128
+ }
129
+ };
130
+ // Check if path is within BASE_DIR using relative-path boundary check.
131
+ // This avoids prefix bypasses such as /base matching /base2.
132
+ const baseDir = resolveCanonicalPath(this.config.baseDir);
133
+ const normalizedPath = resolveCanonicalPath(filePath);
134
+ const relativePath = (0, node_path_1.relative)(baseDir, normalizedPath);
135
+ const isOutsideBaseDir = relativePath.startsWith('..') || relativePath === '..' || (0, node_path_1.isAbsolute)(relativePath);
136
+ if (isOutsideBaseDir) {
120
137
  throw new index_js_1.ParserValidationError(`File path must be within BASE_DIR (${baseDir}). Received path outside BASE_DIR: ${filePath}`);
121
138
  }
122
139
  }
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
@@ -145,7 +145,10 @@ Results include score (0 = most relevant, higher = less relevant). Set explain=t
145
145
  sourceQuery: zod_1.z.string().describe('The query that returned this result'),
146
146
  targetFilePath: zod_1.z.string().describe('File path of the result to pin'),
147
147
  targetChunkIndex: zod_1.z.number().describe('Chunk index of the result to pin'),
148
- targetFingerprint: zod_1.z.string().optional().describe('Optional fingerprint for resilient matching'),
148
+ targetFingerprint: zod_1.z
149
+ .string()
150
+ .optional()
151
+ .describe('Optional fingerprint for resilient matching'),
149
152
  }, async (args) => {
150
153
  try {
151
154
  const result = this.executeFeedbackPin(args);
@@ -155,7 +158,12 @@ Results include score (0 = most relevant, higher = less relevant). Set explain=t
155
158
  }
156
159
  catch (error) {
157
160
  return {
158
- content: [{ type: 'text', text: JSON.stringify({ error: (0, index_js_3.getErrorMessage)(error) }) }],
161
+ content: [
162
+ {
163
+ type: 'text',
164
+ text: JSON.stringify({ error: (0, index_js_3.getErrorMessage)(error) }),
165
+ },
166
+ ],
159
167
  isError: true,
160
168
  };
161
169
  }
@@ -165,7 +173,10 @@ Results include score (0 = most relevant, higher = less relevant). Set explain=t
165
173
  sourceQuery: zod_1.z.string().describe('The query that returned this result'),
166
174
  targetFilePath: zod_1.z.string().describe('File path of the result to dismiss'),
167
175
  targetChunkIndex: zod_1.z.number().describe('Chunk index of the result to dismiss'),
168
- targetFingerprint: zod_1.z.string().optional().describe('Optional fingerprint for resilient matching'),
176
+ targetFingerprint: zod_1.z
177
+ .string()
178
+ .optional()
179
+ .describe('Optional fingerprint for resilient matching'),
169
180
  }, async (args) => {
170
181
  try {
171
182
  const result = this.executeFeedbackDismiss(args);
@@ -175,7 +186,12 @@ Results include score (0 = most relevant, higher = less relevant). Set explain=t
175
186
  }
176
187
  catch (error) {
177
188
  return {
178
- content: [{ type: 'text', text: JSON.stringify({ error: (0, index_js_3.getErrorMessage)(error) }) }],
189
+ content: [
190
+ {
191
+ type: 'text',
192
+ text: JSON.stringify({ error: (0, index_js_3.getErrorMessage)(error) }),
193
+ },
194
+ ],
179
195
  isError: true,
180
196
  };
181
197
  }
@@ -190,7 +206,12 @@ Results include score (0 = most relevant, higher = less relevant). Set explain=t
190
206
  }
191
207
  catch (error) {
192
208
  return {
193
- content: [{ type: 'text', text: JSON.stringify({ error: (0, index_js_3.getErrorMessage)(error) }) }],
209
+ content: [
210
+ {
211
+ type: 'text',
212
+ text: JSON.stringify({ error: (0, index_js_3.getErrorMessage)(error) }),
213
+ },
214
+ ],
194
215
  isError: true,
195
216
  };
196
217
  }
@@ -288,7 +309,7 @@ Results include score (0 = most relevant, higher = less relevant). Set explain=t
288
309
  score: result.score,
289
310
  };
290
311
  // Restore source for raw-data files (ingested via ingest_data)
291
- if ((0, raw_data_utils_js_1.isRawDataPath)(result.filePath)) {
312
+ if ((0, raw_data_utils_js_1.isManagedRawDataPath)(this.dbPath, result.filePath)) {
292
313
  const source = (0, raw_data_utils_js_1.extractSourceFromPath)(result.filePath);
293
314
  if (source) {
294
315
  queryResult.source = source;
@@ -336,7 +357,7 @@ Results include score (0 = most relevant, higher = less relevant). Set explain=t
336
357
  // since the path is internally generated and content is already processed
337
358
  const isPdf = args.filePath.toLowerCase().endsWith('.pdf');
338
359
  let text;
339
- if ((0, raw_data_utils_js_1.isRawDataPath)(args.filePath)) {
360
+ if ((0, raw_data_utils_js_1.isManagedRawDataPath)(this.dbPath, args.filePath)) {
340
361
  // Raw-data files: skip validation, read directly
341
362
  text = await (0, promises_1.readFile)(args.filePath, 'utf-8');
342
363
  console.error(`Read raw-data file: ${args.filePath} (${text.length} characters)`);
@@ -505,7 +526,7 @@ Results include score (0 = most relevant, higher = less relevant). Set explain=t
505
526
  const files = await this.vectorStore.listFiles();
506
527
  // Enrich raw-data files with source information
507
528
  return files.map((file) => {
508
- if ((0, raw_data_utils_js_1.isRawDataPath)(file.filePath)) {
529
+ if ((0, raw_data_utils_js_1.isManagedRawDataPath)(this.dbPath, file.filePath)) {
509
530
  const source = (0, raw_data_utils_js_1.extractSourceFromPath)(file.filePath);
510
531
  if (source) {
511
532
  return { ...file, source };
@@ -648,7 +669,7 @@ Results include score (0 = most relevant, higher = less relevant). Set explain=t
648
669
  // Delete chunks from vector database
649
670
  await this.vectorStore.deleteChunks(targetPath);
650
671
  // Also delete physical raw-data file if applicable
651
- if ((0, raw_data_utils_js_1.isRawDataPath)(targetPath)) {
672
+ if ((0, raw_data_utils_js_1.isManagedRawDataPath)(this.dbPath, targetPath)) {
652
673
  try {
653
674
  await (0, promises_1.unlink)(targetPath);
654
675
  console.error(`Deleted raw-data file: ${targetPath}`);
@@ -693,7 +714,7 @@ Results include score (0 = most relevant, higher = less relevant). Set explain=t
693
714
  const chunks = await this.vectorStore.getDocumentChunks(filePath);
694
715
  // Enrich with source information for raw-data files
695
716
  const enrichedChunks = chunks.map((chunk) => {
696
- if ((0, raw_data_utils_js_1.isRawDataPath)(chunk.filePath)) {
717
+ if ((0, raw_data_utils_js_1.isManagedRawDataPath)(this.dbPath, chunk.filePath)) {
697
718
  const source = (0, raw_data_utils_js_1.extractSourceFromPath)(chunk.filePath);
698
719
  if (source) {
699
720
  return { ...chunk, source };
@@ -724,7 +745,7 @@ Results include score (0 = most relevant, higher = less relevant). Set explain=t
724
745
  const relatedChunks = await this.vectorStore.findRelatedChunks(filePath, chunkIndex, limit ?? 5, excludeSameDocument ?? true);
725
746
  // Enrich with source information for raw-data files
726
747
  const enrichedChunks = relatedChunks.map((chunk) => {
727
- if ((0, raw_data_utils_js_1.isRawDataPath)(chunk.filePath)) {
748
+ if ((0, raw_data_utils_js_1.isManagedRawDataPath)(this.dbPath, chunk.filePath)) {
728
749
  const source = (0, raw_data_utils_js_1.extractSourceFromPath)(chunk.filePath);
729
750
  if (source) {
730
751
  return { ...chunk, source };
@@ -760,7 +781,7 @@ Results include score (0 = most relevant, higher = less relevant). Set explain=t
760
781
  );
761
782
  // Enrich with source information
762
783
  results[key] = relatedChunks.map((related) => {
763
- if ((0, raw_data_utils_js_1.isRawDataPath)(related.filePath)) {
784
+ if ((0, raw_data_utils_js_1.isManagedRawDataPath)(this.dbPath, related.filePath)) {
764
785
  const source = (0, raw_data_utils_js_1.extractSourceFromPath)(related.filePath);
765
786
  if (source) {
766
787
  return { ...related, source };
@@ -70,6 +70,15 @@ export declare function saveRawData(dbPath: string, source: string, content: str
70
70
  * @returns True if path is in raw-data directory
71
71
  */
72
72
  export declare function isRawDataPath(filePath: string): boolean;
73
+ /**
74
+ * Check whether a path belongs to this database's managed raw-data directory.
75
+ * This is stricter than isRawDataPath() and should be used for trust decisions.
76
+ *
77
+ * @param dbPath - LanceDB database path for the current server
78
+ * @param filePath - File path to validate
79
+ * @returns True only if filePath is inside <dbPath>/raw-data
80
+ */
81
+ export declare function isManagedRawDataPath(dbPath: string, filePath: string): boolean;
73
82
  /**
74
83
  * Extract original source from raw-data file path
75
84
  * Returns null if not a raw-data path
@@ -10,6 +10,7 @@ exports.getRawDataDir = getRawDataDir;
10
10
  exports.generateRawDataPath = generateRawDataPath;
11
11
  exports.saveRawData = saveRawData;
12
12
  exports.isRawDataPath = isRawDataPath;
13
+ exports.isManagedRawDataPath = isManagedRawDataPath;
13
14
  exports.extractSourceFromPath = extractSourceFromPath;
14
15
  const promises_1 = require("node:fs/promises");
15
16
  const node_path_1 = require("node:path");
@@ -167,6 +168,20 @@ const RAW_DATA_NATIVE = `${node_path_1.sep}raw-data${node_path_1.sep}`;
167
168
  function isRawDataPath(filePath) {
168
169
  return filePath.includes(RAW_DATA_NATIVE) || filePath.includes(RAW_DATA_POSIX);
169
170
  }
171
+ /**
172
+ * Check whether a path belongs to this database's managed raw-data directory.
173
+ * This is stricter than isRawDataPath() and should be used for trust decisions.
174
+ *
175
+ * @param dbPath - LanceDB database path for the current server
176
+ * @param filePath - File path to validate
177
+ * @returns True only if filePath is inside <dbPath>/raw-data
178
+ */
179
+ function isManagedRawDataPath(dbPath, filePath) {
180
+ const rawDataDir = (0, node_path_1.resolve)(getRawDataDir(dbPath));
181
+ const resolvedPath = (0, node_path_1.resolve)(filePath);
182
+ const rel = (0, node_path_1.relative)(rawDataDir, resolvedPath);
183
+ return rel === '' || (!rel.startsWith('..') && !(0, node_path_1.isAbsolute)(rel));
184
+ }
170
185
  /**
171
186
  * Extract original source from raw-data file path
172
187
  * Returns null if not a raw-data path
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
@@ -143,7 +143,8 @@ function isDocumentMetadata(value) {
143
143
  return false;
144
144
  }
145
145
  // Optional custom field must be an object if present
146
- if (obj['custom'] !== undefined && (typeof obj['custom'] !== 'object' || obj['custom'] === null)) {
146
+ if (obj['custom'] !== undefined &&
147
+ (typeof obj['custom'] !== 'object' || obj['custom'] === null)) {
147
148
  return false;
148
149
  }
149
150
  return true;
File without changes
File without changes
File without changes
File without changes
@@ -246,5 +246,9 @@ export declare class DatabaseManager {
246
246
  * Ensure config directory exists
247
247
  */
248
248
  private ensureConfigDir;
249
+ /**
250
+ * Enforce allowed-roots policy for database mutation operations.
251
+ */
252
+ private assertPathAllowedForMutation;
249
253
  }
250
254
  //# sourceMappingURL=database-manager.d.ts.map
@@ -226,6 +226,7 @@ class DatabaseManager {
226
226
  }
227
227
  // Expand tilde to home directory
228
228
  const resolvedPath = expandTilde(newDbPath);
229
+ await this.assertPathAllowedForMutation('Switch path', resolvedPath);
229
230
  // Validate the new path exists and is a valid database
230
231
  if (!(0, node_fs_1.existsSync)(resolvedPath)) {
231
232
  throw new Error(`Database path does not exist: ${resolvedPath}`);
@@ -290,6 +291,7 @@ class DatabaseManager {
290
291
  async createDatabase(options) {
291
292
  // Expand tilde to home directory
292
293
  const resolvedPath = expandTilde(options.dbPath);
294
+ await this.assertPathAllowedForMutation('Create path', resolvedPath);
293
295
  // Check if path already exists
294
296
  if ((0, node_fs_1.existsSync)(resolvedPath)) {
295
297
  const lanceDbPath = node_path_1.default.join(resolvedPath, LANCEDB_DIR_NAME);
@@ -662,6 +664,7 @@ class DatabaseManager {
662
664
  */
663
665
  async deleteDatabase(dbPath, deleteFiles = false) {
664
666
  const resolvedPath = expandTilde(dbPath);
667
+ await this.assertPathAllowedForMutation('Delete path', resolvedPath);
665
668
  // Cannot delete the currently active database
666
669
  if (this.currentConfig && this.currentConfig.dbPath === resolvedPath) {
667
670
  throw new Error('Cannot delete the currently active database. Switch to another database first.');
@@ -699,6 +702,18 @@ class DatabaseManager {
699
702
  await (0, promises_1.mkdir)(CONFIG_DIR, { recursive: true });
700
703
  }
701
704
  }
705
+ /**
706
+ * Enforce allowed-roots policy for database mutation operations.
707
+ */
708
+ async assertPathAllowedForMutation(action, targetPath) {
709
+ if (await this.isPathAllowed(targetPath)) {
710
+ return;
711
+ }
712
+ const allowedRoots = await this.getEffectiveAllowedRoots();
713
+ throw new Error(`${action} "${targetPath}" is outside allowed roots. ` +
714
+ `Allowed: ${allowedRoots.join(', ')}. ` +
715
+ `Add this path to allowed roots or set ALLOWED_SCAN_ROOTS environment variable.`);
716
+ }
702
717
  }
703
718
  exports.DatabaseManager = DatabaseManager;
704
719
  //# sourceMappingURL=database-manager.js.map
File without changes
@@ -96,6 +96,9 @@ const ALLOWED_MIME_TYPES = [
96
96
  'text/markdown',
97
97
  'text/html',
98
98
  'application/json',
99
+ 'application/x-ndjson',
100
+ 'application/ndjson',
101
+ 'application/jsonl',
99
102
  ];
100
103
  /**
101
104
  * Create and configure Express app with DatabaseManager
@@ -252,7 +255,16 @@ async function createHttpServerInternal(serverAccessor, config, configRouter) {
252
255
  },
253
256
  fileFilter: (_req, file, cb) => {
254
257
  // Allow common document types by MIME type or extension
255
- const allowedExtensions = ['.pdf', '.docx', '.txt', '.md', '.html', '.json'];
258
+ const allowedExtensions = [
259
+ '.pdf',
260
+ '.docx',
261
+ '.txt',
262
+ '.md',
263
+ '.html',
264
+ '.json',
265
+ '.jsonl',
266
+ '.ndjson',
267
+ ];
256
268
  const ext = node_path_1.default.extname(file.originalname).toLowerCase();
257
269
  if (ALLOWED_MIME_TYPES.includes(file.mimetype) || allowedExtensions.includes(ext)) {
258
270
  cb(null, true);
File without changes
package/dist/web/index.js CHANGED
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
package/dist/web/types.js CHANGED
File without changes