@robthepcguy/rag-vault 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. package/LICENSE +24 -0
  2. package/README.md +421 -0
  3. package/dist/bin/install-skills.d.ts +20 -0
  4. package/dist/bin/install-skills.d.ts.map +1 -0
  5. package/dist/bin/install-skills.js +196 -0
  6. package/dist/bin/install-skills.js.map +1 -0
  7. package/dist/chunker/index.d.ts +11 -0
  8. package/dist/chunker/index.d.ts.map +1 -0
  9. package/dist/chunker/index.js +6 -0
  10. package/dist/chunker/index.js.map +1 -0
  11. package/dist/chunker/semantic-chunker.d.ts +96 -0
  12. package/dist/chunker/semantic-chunker.d.ts.map +1 -0
  13. package/dist/chunker/semantic-chunker.js +267 -0
  14. package/dist/chunker/semantic-chunker.js.map +1 -0
  15. package/dist/chunker/sentence-splitter.d.ts +16 -0
  16. package/dist/chunker/sentence-splitter.d.ts.map +1 -0
  17. package/dist/chunker/sentence-splitter.js +114 -0
  18. package/dist/chunker/sentence-splitter.js.map +1 -0
  19. package/dist/embedder/index.d.ts +55 -0
  20. package/dist/embedder/index.d.ts.map +1 -0
  21. package/dist/embedder/index.js +146 -0
  22. package/dist/embedder/index.js.map +1 -0
  23. package/dist/errors/index.d.ts +73 -0
  24. package/dist/errors/index.d.ts.map +1 -0
  25. package/dist/errors/index.js +170 -0
  26. package/dist/errors/index.js.map +1 -0
  27. package/dist/index.d.ts +3 -0
  28. package/dist/index.d.ts.map +1 -0
  29. package/dist/index.js +91 -0
  30. package/dist/index.js.map +1 -0
  31. package/dist/parser/html-parser.d.ts +14 -0
  32. package/dist/parser/html-parser.d.ts.map +1 -0
  33. package/dist/parser/html-parser.js +99 -0
  34. package/dist/parser/html-parser.js.map +1 -0
  35. package/dist/parser/index.d.ts +144 -0
  36. package/dist/parser/index.d.ts.map +1 -0
  37. package/dist/parser/index.js +446 -0
  38. package/dist/parser/index.js.map +1 -0
  39. package/dist/parser/pdf-filter.d.ts +89 -0
  40. package/dist/parser/pdf-filter.d.ts.map +1 -0
  41. package/dist/parser/pdf-filter.js +304 -0
  42. package/dist/parser/pdf-filter.js.map +1 -0
  43. package/dist/server/index.d.ts +144 -0
  44. package/dist/server/index.d.ts.map +1 -0
  45. package/dist/server/index.js +518 -0
  46. package/dist/server/index.js.map +1 -0
  47. package/dist/server/raw-data-utils.d.ts +81 -0
  48. package/dist/server/raw-data-utils.d.ts.map +1 -0
  49. package/dist/server/raw-data-utils.js +196 -0
  50. package/dist/server/raw-data-utils.js.map +1 -0
  51. package/dist/server/schemas.d.ts +186 -0
  52. package/dist/server/schemas.d.ts.map +1 -0
  53. package/dist/server/schemas.js +99 -0
  54. package/dist/server/schemas.js.map +1 -0
  55. package/dist/utils/config-parsers.d.ts +14 -0
  56. package/dist/utils/config-parsers.d.ts.map +1 -0
  57. package/dist/utils/config-parsers.js +47 -0
  58. package/dist/utils/config-parsers.js.map +1 -0
  59. package/dist/utils/config.d.ts +37 -0
  60. package/dist/utils/config.d.ts.map +1 -0
  61. package/dist/utils/config.js +52 -0
  62. package/dist/utils/config.js.map +1 -0
  63. package/dist/utils/logger.d.ts +36 -0
  64. package/dist/utils/logger.d.ts.map +1 -0
  65. package/dist/utils/logger.js +64 -0
  66. package/dist/utils/logger.js.map +1 -0
  67. package/dist/utils/math.d.ts +34 -0
  68. package/dist/utils/math.d.ts.map +1 -0
  69. package/dist/utils/math.js +73 -0
  70. package/dist/utils/math.js.map +1 -0
  71. package/dist/utils/process-handlers.d.ts +26 -0
  72. package/dist/utils/process-handlers.d.ts.map +1 -0
  73. package/dist/utils/process-handlers.js +69 -0
  74. package/dist/utils/process-handlers.js.map +1 -0
  75. package/dist/vectordb/index.d.ts +210 -0
  76. package/dist/vectordb/index.d.ts.map +1 -0
  77. package/dist/vectordb/index.js +613 -0
  78. package/dist/vectordb/index.js.map +1 -0
  79. package/dist/web/api-routes.d.ts +9 -0
  80. package/dist/web/api-routes.d.ts.map +1 -0
  81. package/dist/web/api-routes.js +127 -0
  82. package/dist/web/api-routes.js.map +1 -0
  83. package/dist/web/config-routes.d.ts +7 -0
  84. package/dist/web/config-routes.d.ts.map +1 -0
  85. package/dist/web/config-routes.js +54 -0
  86. package/dist/web/config-routes.js.map +1 -0
  87. package/dist/web/database-manager.d.ts +130 -0
  88. package/dist/web/database-manager.d.ts.map +1 -0
  89. package/dist/web/database-manager.js +382 -0
  90. package/dist/web/database-manager.js.map +1 -0
  91. package/dist/web/http-server.d.ts +28 -0
  92. package/dist/web/http-server.d.ts.map +1 -0
  93. package/dist/web/http-server.js +311 -0
  94. package/dist/web/http-server.js.map +1 -0
  95. package/dist/web/index.d.ts +3 -0
  96. package/dist/web/index.d.ts.map +1 -0
  97. package/dist/web/index.js +114 -0
  98. package/dist/web/index.js.map +1 -0
  99. package/dist/web/middleware/async-handler.d.ts +17 -0
  100. package/dist/web/middleware/async-handler.d.ts.map +1 -0
  101. package/dist/web/middleware/async-handler.js +26 -0
  102. package/dist/web/middleware/async-handler.js.map +1 -0
  103. package/dist/web/middleware/auth.d.ts +22 -0
  104. package/dist/web/middleware/auth.d.ts.map +1 -0
  105. package/dist/web/middleware/auth.js +81 -0
  106. package/dist/web/middleware/auth.js.map +1 -0
  107. package/dist/web/middleware/error-handler.d.ts +36 -0
  108. package/dist/web/middleware/error-handler.d.ts.map +1 -0
  109. package/dist/web/middleware/error-handler.js +68 -0
  110. package/dist/web/middleware/error-handler.js.map +1 -0
  111. package/dist/web/middleware/index.d.ts +6 -0
  112. package/dist/web/middleware/index.d.ts.map +1 -0
  113. package/dist/web/middleware/index.js +19 -0
  114. package/dist/web/middleware/index.js.map +1 -0
  115. package/dist/web/middleware/rate-limit.d.ts +38 -0
  116. package/dist/web/middleware/rate-limit.d.ts.map +1 -0
  117. package/dist/web/middleware/rate-limit.js +116 -0
  118. package/dist/web/middleware/rate-limit.js.map +1 -0
  119. package/dist/web/middleware/request-logger.d.ts +52 -0
  120. package/dist/web/middleware/request-logger.d.ts.map +1 -0
  121. package/dist/web/middleware/request-logger.js +74 -0
  122. package/dist/web/middleware/request-logger.js.map +1 -0
  123. package/dist/web/types.d.ts +6 -0
  124. package/dist/web/types.d.ts.map +1 -0
  125. package/dist/web/types.js +4 -0
  126. package/dist/web/types.js.map +1 -0
  127. package/package.json +135 -0
  128. package/skills/rag-vault/SKILL.md +111 -0
  129. package/skills/rag-vault/references/html-ingestion.md +73 -0
  130. package/skills/rag-vault/references/query-optimization.md +57 -0
  131. package/skills/rag-vault/references/result-refinement.md +54 -0
@@ -0,0 +1,89 @@
1
+ import type { EmbedderInterface } from '../chunker/semantic-chunker.js';
2
+ export type { EmbedderInterface };
3
+ /**
4
+ * Text item with position information from PDF
5
+ */
6
+ export interface TextItemWithPosition {
7
+ text: string;
8
+ x: number;
9
+ y: number;
10
+ fontSize: number;
11
+ hasEOL: boolean;
12
+ }
13
+ /**
14
+ * Page data containing positioned text items
15
+ */
16
+ export interface PageData {
17
+ pageNum: number;
18
+ items: TextItemWithPosition[];
19
+ }
20
+ /**
21
+ * Join filtered pages into text
22
+ *
23
+ * @param pages - Filtered page data
24
+ * @returns Joined text with proper line breaks
25
+ */
26
+ export declare function joinFilteredPages(pages: PageData[]): string;
27
+ /**
28
+ * Configuration for sentence-level pattern detection
29
+ */
30
+ export interface SentencePatternConfig {
31
+ /** Similarity threshold for pattern detection (default: 0.85) */
32
+ similarityThreshold: number;
33
+ /** Minimum pages required for pattern detection (default: 3) */
34
+ minPages: number;
35
+ /** Number of pages to sample from center for pattern detection (default: 5) */
36
+ samplePages: number;
37
+ }
38
+ /** Default configuration for sentence-level pattern detection */
39
+ export declare const DEFAULT_SENTENCE_PATTERN_CONFIG: SentencePatternConfig;
40
+ /**
41
+ * Result of sentence-level pattern detection
42
+ */
43
+ export interface SentencePatternResult {
44
+ /** Whether first sentences should be removed (detected as header) */
45
+ removeFirstSentence: boolean;
46
+ /** Whether last sentences should be removed (detected as footer) */
47
+ removeLastSentence: boolean;
48
+ /** Median similarity of first sentences */
49
+ headerSimilarity: number;
50
+ /** Median similarity of last sentences */
51
+ footerSimilarity: number;
52
+ }
53
+ /**
54
+ * Detect header/footer patterns at sentence level
55
+ *
56
+ * Algorithm:
57
+ * 1. Sample pages from the CENTER of the document (guaranteed to be content pages)
58
+ * 2. Split each page into sentences with Y coordinate
59
+ * 3. Collect first/last sentences from sampled pages
60
+ * 4. Embed and calculate median pairwise similarity
61
+ * 5. If similarity > threshold, mark as header/footer
62
+ *
63
+ * Key insight: Middle pages are always content pages (cover, TOC, index are at edges).
64
+ * Using median instead of mean provides robustness against outliers.
65
+ *
66
+ * This approach handles variable content like page numbers ("7 of 75")
67
+ * by using semantic similarity instead of exact text matching.
68
+ *
69
+ * @param pages - Array of page data
70
+ * @param embedder - Embedder for generating embeddings
71
+ * @param config - Configuration options
72
+ * @returns Detection result
73
+ */
74
+ export declare function detectSentencePatterns(pages: PageData[], embedder: EmbedderInterface, config?: Partial<SentencePatternConfig>): Promise<SentencePatternResult>;
75
+ /**
76
+ * Filter page boundary sentences and join into text
77
+ *
78
+ * This is the main entry point for sentence-level header/footer filtering.
79
+ * It detects and removes repeating sentence patterns at page boundaries.
80
+ *
81
+ * Use this instead of joinFilteredPages when embedder is available.
82
+ *
83
+ * @param pages - Array of page data
84
+ * @param embedder - Embedder for generating embeddings
85
+ * @param config - Configuration options
86
+ * @returns Filtered text with header/footer sentences removed
87
+ */
88
+ export declare function filterPageBoundarySentences(pages: PageData[], embedder: EmbedderInterface, config?: Partial<SentencePatternConfig>): Promise<string>;
89
+ //# sourceMappingURL=pdf-filter.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"pdf-filter.d.ts","sourceRoot":"","sources":["../../src/parser/pdf-filter.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,gCAAgC,CAAA;AAIvE,YAAY,EAAE,iBAAiB,EAAE,CAAA;AAMjC;;GAEG;AACH,MAAM,WAAW,oBAAoB;IACnC,IAAI,EAAE,MAAM,CAAA;IACZ,CAAC,EAAE,MAAM,CAAA;IACT,CAAC,EAAE,MAAM,CAAA;IACT,QAAQ,EAAE,MAAM,CAAA;IAChB,MAAM,EAAE,OAAO,CAAA;CAChB;AAED;;GAEG;AACH,MAAM,WAAW,QAAQ;IACvB,OAAO,EAAE,MAAM,CAAA;IACf,KAAK,EAAE,oBAAoB,EAAE,CAAA;CAC9B;AAoCD;;;;;GAKG;AACH,wBAAgB,iBAAiB,CAAC,KAAK,EAAE,QAAQ,EAAE,GAAG,MAAM,CAK3D;AA6JD;;GAEG;AACH,MAAM,WAAW,qBAAqB;IACpC,iEAAiE;IACjE,mBAAmB,EAAE,MAAM,CAAA;IAC3B,gEAAgE;IAChE,QAAQ,EAAE,MAAM,CAAA;IAChB,+EAA+E;IAC/E,WAAW,EAAE,MAAM,CAAA;CACpB;AAED,iEAAiE;AACjE,eAAO,MAAM,+BAA+B,EAAE,qBAI7C,CAAA;AAED;;GAEG;AACH,MAAM,WAAW,qBAAqB;IACpC,qEAAqE;IACrE,mBAAmB,EAAE,OAAO,CAAA;IAC5B,oEAAoE;IACpE,kBAAkB,EAAE,OAAO,CAAA;IAC3B,2CAA2C;IAC3C,gBAAgB,EAAE,MAAM,CAAA;IACxB,0CAA0C;IAC1C,gBAAgB,EAAE,MAAM,CAAA;CACzB;AAED;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,wBAAsB,sBAAsB,CAC1C,KAAK,EAAE,QAAQ,EAAE,EACjB,QAAQ,EAAE,iBAAiB,EAC3B,MAAM,GAAE,OAAO,CAAC,qBAAqB,CAAM,GAC1C,OAAO,CAAC,qBAAqB,CAAC,CAsEhC;AAED;;;;;;;;;;;;GAYG;AACH,wBAAsB,2BAA2B,CAC/C,KAAK,EAAE,QAAQ,EAAE,EACjB,QAAQ,EAAE,iBAAiB,EAC3B,MAAM,GAAE,OAAO,CAAC,qBAAqB,CAAM,GAC1C,OAAO,CAAC,MAAM,CAAC,CAyCjB"}
@@ -0,0 +1,304 @@
1
+ "use strict";
2
+ // PDF Header/Footer Filter
3
+ // - Detects and removes repeating patterns across pages
4
+ // - Semantic similarity-based header/footer detection (sentence-level)
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.DEFAULT_SENTENCE_PATTERN_CONFIG = void 0;
7
+ exports.joinFilteredPages = joinFilteredPages;
8
+ exports.detectSentencePatterns = detectSentencePatterns;
9
+ exports.filterPageBoundarySentences = filterPageBoundarySentences;
10
+ const sentence_splitter_js_1 = require("../chunker/sentence-splitter.js");
11
+ // ============================================
12
+ // Text Joining
13
+ // ============================================
14
+ /**
15
+ * Join page items into text
16
+ *
17
+ * Groups items by Y coordinate (same Y = same line),
18
+ * sorts each group by X coordinate (left to right),
19
+ * then joins groups with newlines (top to bottom).
20
+ */
21
+ function joinPageItems(items) {
22
+ // Group by Y coordinate (rounded to handle minor variations)
23
+ const yGroups = new Map();
24
+ for (const item of items) {
25
+ const y = Math.round(item.y);
26
+ const group = yGroups.get(y) || [];
27
+ group.push(item);
28
+ yGroups.set(y, group);
29
+ }
30
+ // Sort groups by Y descending (top to bottom), items by X ascending (left to right)
31
+ return [...yGroups.entries()]
32
+ .sort((a, b) => b[0] - a[0])
33
+ .map(([_, group]) => group
34
+ .sort((a, b) => a.x - b.x)
35
+ .map((i) => i.text)
36
+ .join(' '))
37
+ .join('\n')
38
+ .trim();
39
+ }
40
+ /**
41
+ * Join filtered pages into text
42
+ *
43
+ * @param pages - Filtered page data
44
+ * @returns Joined text with proper line breaks
45
+ */
46
+ function joinFilteredPages(pages) {
47
+ return pages
48
+ .map((page) => joinPageItems(page.items))
49
+ .filter((text) => text.length > 0)
50
+ .join('\n\n');
51
+ }
52
+ /**
53
+ * Split page items into sentences with Y coordinate
54
+ *
55
+ * 1. Join items into text (preserving item boundaries)
56
+ * 2. Split into sentences using splitIntoSentences
57
+ * 3. Map each sentence to the Y coordinate of its first item
58
+ * 4. Merge sentences with same Y coordinate
59
+ *
60
+ * @param items - Text items with position
61
+ * @returns Sentences with Y coordinate (merged by Y)
62
+ */
63
+ function splitItemsIntoSentencesWithY(items) {
64
+ if (items.length === 0)
65
+ return [];
66
+ // Sort items by Y descending, then X ascending (reading order)
67
+ const sortedItems = [...items].sort((a, b) => {
68
+ const yDiff = b.y - a.y;
69
+ if (Math.abs(yDiff) > 1)
70
+ return yDiff;
71
+ return a.x - b.x;
72
+ });
73
+ // Build text and track character positions to item mapping
74
+ const charToItem = [];
75
+ let fullText = '';
76
+ let prevY = null;
77
+ for (const item of sortedItems) {
78
+ // Insert newline when Y coordinate changes (different line)
79
+ // This matches joinPageItems behavior: same Y = space, different Y = newline
80
+ if (prevY !== null && Math.abs(prevY - item.y) > 1) {
81
+ fullText = `${fullText.trimEnd()}\n`;
82
+ }
83
+ charToItem.push({ start: fullText.length, item });
84
+ fullText += `${item.text} `;
85
+ prevY = item.y;
86
+ }
87
+ // Split into sentences
88
+ const sentences = (0, sentence_splitter_js_1.splitIntoSentences)(fullText);
89
+ // Map each sentence to Y coordinate of its first character's item
90
+ const sentencesWithY = [];
91
+ let searchStart = 0;
92
+ for (const sentence of sentences) {
93
+ // Find where this sentence starts in fullText
94
+ const sentenceStart = fullText.indexOf(sentence.trim(), searchStart);
95
+ if (sentenceStart === -1)
96
+ continue;
97
+ // Find the item that contains this position
98
+ let firstItemY = sortedItems[0]?.y ?? 0;
99
+ for (let i = charToItem.length - 1; i >= 0; i--) {
100
+ const entry = charToItem[i];
101
+ if (entry && entry.start <= sentenceStart) {
102
+ firstItemY = Math.round(entry.item.y);
103
+ break;
104
+ }
105
+ }
106
+ sentencesWithY.push({ text: sentence, y: firstItemY });
107
+ searchStart = sentenceStart + sentence.length;
108
+ }
109
+ // Merge sentences with same Y coordinate
110
+ return mergeSentencesByY(sentencesWithY);
111
+ }
112
+ /**
113
+ * Merge sentences with same Y coordinate
114
+ *
115
+ * @param sentences - Sentences with Y coordinate
116
+ * @returns Merged sentences (same Y = one sentence)
117
+ */
118
+ function mergeSentencesByY(sentences) {
119
+ if (sentences.length === 0)
120
+ return [];
121
+ const merged = [];
122
+ let current = null;
123
+ for (const sentence of sentences) {
124
+ if (current === null) {
125
+ current = { ...sentence };
126
+ }
127
+ else if (current.y === sentence.y) {
128
+ // Same Y: merge text
129
+ current.text += ` ${sentence.text}`;
130
+ }
131
+ else {
132
+ // Different Y: push current and start new
133
+ merged.push(current);
134
+ current = { ...sentence };
135
+ }
136
+ }
137
+ if (current !== null) {
138
+ merged.push(current);
139
+ }
140
+ return merged;
141
+ }
142
+ // ============================================
143
+ // Sentence-Level Header/Footer Detection
144
+ // ============================================
145
+ // Use shared cosine similarity function
146
+ const math_js_1 = require("../utils/math.js");
147
+ /**
148
+ * Calculate median pairwise similarity for a list of embeddings
149
+ *
150
+ * Uses median instead of mean for robustness against outliers.
151
+ * This handles cases where some pages have different header content
152
+ * (e.g., chapter title changes) that would otherwise drag down the average.
153
+ */
154
+ function medianPairwiseSimilarity(embeddings) {
155
+ if (embeddings.length < 2)
156
+ return 1.0;
157
+ const similarities = [];
158
+ for (let i = 0; i < embeddings.length; i++) {
159
+ for (let j = i + 1; j < embeddings.length; j++) {
160
+ const embI = embeddings[i];
161
+ const embJ = embeddings[j];
162
+ if (embI && embJ) {
163
+ similarities.push((0, math_js_1.cosineSimilarity)(embI, embJ));
164
+ }
165
+ }
166
+ }
167
+ if (similarities.length === 0)
168
+ return 0;
169
+ // Sort and find median
170
+ similarities.sort((a, b) => a - b);
171
+ const mid = Math.floor(similarities.length / 2);
172
+ if (similarities.length % 2 === 0) {
173
+ // Even: average of two middle values
174
+ return ((similarities[mid - 1] ?? 0) + (similarities[mid] ?? 0)) / 2;
175
+ }
176
+ // Odd: middle value
177
+ return similarities[mid] ?? 0;
178
+ }
179
+ /** Default configuration for sentence-level pattern detection */
180
+ exports.DEFAULT_SENTENCE_PATTERN_CONFIG = {
181
+ similarityThreshold: 0.85,
182
+ minPages: 3,
183
+ samplePages: 5,
184
+ };
185
+ /**
186
+ * Detect header/footer patterns at sentence level
187
+ *
188
+ * Algorithm:
189
+ * 1. Sample pages from the CENTER of the document (guaranteed to be content pages)
190
+ * 2. Split each page into sentences with Y coordinate
191
+ * 3. Collect first/last sentences from sampled pages
192
+ * 4. Embed and calculate median pairwise similarity
193
+ * 5. If similarity > threshold, mark as header/footer
194
+ *
195
+ * Key insight: Middle pages are always content pages (cover, TOC, index are at edges).
196
+ * Using median instead of mean provides robustness against outliers.
197
+ *
198
+ * This approach handles variable content like page numbers ("7 of 75")
199
+ * by using semantic similarity instead of exact text matching.
200
+ *
201
+ * @param pages - Array of page data
202
+ * @param embedder - Embedder for generating embeddings
203
+ * @param config - Configuration options
204
+ * @returns Detection result
205
+ */
206
+ async function detectSentencePatterns(pages, embedder, config = {}) {
207
+ const cfg = { ...exports.DEFAULT_SENTENCE_PATTERN_CONFIG, ...config };
208
+ const result = {
209
+ removeFirstSentence: false,
210
+ removeLastSentence: false,
211
+ headerSimilarity: 0,
212
+ footerSimilarity: 0,
213
+ };
214
+ // Need minimum pages to detect patterns reliably
215
+ if (pages.length < cfg.minPages) {
216
+ return result;
217
+ }
218
+ // 1. Sample pages from the CENTER of the document
219
+ // Middle pages are guaranteed to be content (not cover, TOC, or index)
220
+ const centerIndex = Math.floor(pages.length / 2);
221
+ const halfSample = Math.floor(cfg.samplePages / 2);
222
+ const startIndex = Math.max(0, centerIndex - halfSample);
223
+ const endIndex = Math.min(pages.length, startIndex + cfg.samplePages);
224
+ const samplePages = pages.slice(startIndex, endIndex);
225
+ // 2. Split each page into sentences with Y coordinate (merged by Y)
226
+ const pageSentences = samplePages.map((page) => splitItemsIntoSentencesWithY(page.items));
227
+ // 3. Collect first and last sentences from sampled pages
228
+ const firstSentences = [];
229
+ const lastSentences = [];
230
+ for (const sentences of pageSentences) {
231
+ if (sentences.length > 0) {
232
+ firstSentences.push(sentences[0].text);
233
+ if (sentences.length > 1) {
234
+ lastSentences.push(sentences[sentences.length - 1].text);
235
+ }
236
+ }
237
+ }
238
+ // 5. Detect header pattern (sampled first sentences are semantically similar)
239
+ if (firstSentences.length >= cfg.minPages) {
240
+ const embeddings = await embedder.embedBatch(firstSentences);
241
+ const medianSim = medianPairwiseSimilarity(embeddings);
242
+ result.headerSimilarity = medianSim;
243
+ if (medianSim >= cfg.similarityThreshold) {
244
+ result.removeFirstSentence = true;
245
+ console.error(`Sentence header detected: sampled ${firstSentences.length} center pages (${startIndex + 1}-${endIndex}), median similarity: ${medianSim.toFixed(3)}`);
246
+ }
247
+ }
248
+ // 6. Detect footer pattern (sampled last sentences are semantically similar)
249
+ if (lastSentences.length >= cfg.minPages) {
250
+ const embeddings = await embedder.embedBatch(lastSentences);
251
+ const medianSim = medianPairwiseSimilarity(embeddings);
252
+ result.footerSimilarity = medianSim;
253
+ if (medianSim >= cfg.similarityThreshold) {
254
+ result.removeLastSentence = true;
255
+ console.error(`Sentence footer detected: sampled ${lastSentences.length} center pages (${startIndex + 1}-${endIndex}), median similarity: ${medianSim.toFixed(3)}`);
256
+ }
257
+ }
258
+ return result;
259
+ }
260
+ /**
261
+ * Filter page boundary sentences and join into text
262
+ *
263
+ * This is the main entry point for sentence-level header/footer filtering.
264
+ * It detects and removes repeating sentence patterns at page boundaries.
265
+ *
266
+ * Use this instead of joinFilteredPages when embedder is available.
267
+ *
268
+ * @param pages - Array of page data
269
+ * @param embedder - Embedder for generating embeddings
270
+ * @param config - Configuration options
271
+ * @returns Filtered text with header/footer sentences removed
272
+ */
273
+ async function filterPageBoundarySentences(pages, embedder, config = {}) {
274
+ const cfg = { ...exports.DEFAULT_SENTENCE_PATTERN_CONFIG, ...config };
275
+ // Need minimum pages to detect patterns
276
+ if (pages.length < cfg.minPages) {
277
+ return joinFilteredPages(pages);
278
+ }
279
+ // Detect patterns
280
+ const patterns = await detectSentencePatterns(pages, embedder, cfg);
281
+ // If no patterns detected, return normally joined text
282
+ if (!patterns.removeFirstSentence && !patterns.removeLastSentence) {
283
+ return joinFilteredPages(pages);
284
+ }
285
+ // Split each page into sentences with Y coordinate (merged by Y)
286
+ const pageSentences = pages.map((page) => splitItemsIntoSentencesWithY(page.items));
287
+ // Remove detected patterns from page sentences
288
+ const cleanedPageSentences = pageSentences.map((sentences) => {
289
+ let cleaned = [...sentences];
290
+ if (patterns.removeFirstSentence && cleaned.length > 0) {
291
+ cleaned = cleaned.slice(1);
292
+ }
293
+ if (patterns.removeLastSentence && cleaned.length > 0) {
294
+ cleaned = cleaned.slice(0, -1);
295
+ }
296
+ return cleaned;
297
+ });
298
+ // Join back into final text
299
+ return cleanedPageSentences
300
+ .map((sentences) => sentences.map((s) => s.text).join(' '))
301
+ .filter((text) => text.length > 0)
302
+ .join('\n\n');
303
+ }
304
+ //# sourceMappingURL=pdf-filter.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"pdf-filter.js","sourceRoot":"","sources":["../../src/parser/pdf-filter.ts"],"names":[],"mappings":";AAAA,2BAA2B;AAC3B,wDAAwD;AACxD,uEAAuE;;;AAuEvE,8CAKC;AAmND,wDA0EC;AAeD,kEA6CC;AAlaD,0EAAoE;AA4BpE,+CAA+C;AAC/C,eAAe;AACf,+CAA+C;AAE/C;;;;;;GAMG;AACH,SAAS,aAAa,CAAC,KAA6B;IAClD,6DAA6D;IAC7D,MAAM,OAAO,GAAG,IAAI,GAAG,EAAkC,CAAA;IACzD,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;QAC5B,MAAM,KAAK,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,CAAA;QAClC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;QAChB,OAAO,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,CAAC,CAAA;IACvB,CAAC;IAED,oFAAoF;IACpF,OAAO,CAAC,GAAG,OAAO,CAAC,OAAO,EAAE,CAAC;SAC1B,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;SAC3B,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,KAAK,CAAC,EAAE,EAAE,CAClB,KAAK;SACF,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;SACzB,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;SAClB,IAAI,CAAC,GAAG,CAAC,CACb;SACA,IAAI,CAAC,IAAI,CAAC;SACV,IAAI,EAAE,CAAA;AACX,CAAC;AAED;;;;;GAKG;AACH,SAAgB,iBAAiB,CAAC,KAAiB;IACjD,OAAO,KAAK;SACT,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,aAAa,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;SACxC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC;SACjC,IAAI,CAAC,MAAM,CAAC,CAAA;AACjB,CAAC;AAcD;;;;;;;;;;GAUG;AACH,SAAS,4BAA4B,CAAC,KAA6B;IACjE,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAA;IAEjC,+DAA+D;IAC/D,MAAM,WAAW,GAAG,CAAC,GAAG,KAAK,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;QAC3C,MAAM,KAAK,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAA;QACvB,IAAI,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC;YAAE,OAAO,KAAK,CAAA;QACrC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAA;IAClB,CAAC,CAAC,CAAA;IAEF,2DAA2D;IAC3D,MAAM,UAAU,GAAyD,EAAE,CAAA;IAC3E,IAAI,QAAQ,GAAG,EAAE,CAAA;IACjB,IAAI,KAAK,GAAkB,IAAI,CAAA;IAE/B,KAAK,MAAM,IAAI,IAAI,WAAW,EAAE,CAAC;QAC/B,4DAA4D;QAC5D,6EAA6E;QAC7E,IAAI,KAAK,KAAK,IAAI,IAAI,IAAI,CAAC,GAAG,CAAC,KAAK,GAAG,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC;YACnD,QAAQ,GAAG,GAAG,QAAQ,CAAC,OAAO,EAAE,IAAI,CAAA;QACtC,CAAC;QAED,UAAU,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,QAAQ,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAA;QACjD,QAAQ,IAAI,GAAG,IAAI,CAAC,IAAI,GAAG,CAAA;QAC3B,KAAK,GAAG,IAAI,CAAC,CAAC,CAAA;IAChB,CAAC;IAED,uBAAuB;IACvB,MAAM,SAAS,GAAG,IAAA,yCAAkB,EAAC,QAAQ,CAAC,CAAA;IAE9C,kEAAkE;IAClE,MAAM,cAAc,GAAoB,EAAE,CAAA;IAC1C,IAAI,WAAW,GAAG,CAAC,CAAA;IAEnB,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;QACjC,8CAA8C;QAC9C,MAAM,aAAa,GAAG,QAAQ,CAAC,OAAO,CAAC,QAAQ,CAAC,IAAI,EAAE,EAAE,WAAW,CAAC,CAAA;QACpE,IAAI,aAAa,KAAK,CAAC,CAAC;YAAE,SAAQ;QAElC,4CAA4C;QAC5C,IAAI,UAAU,GAAG,WAAW,CAAC,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC,CAAA;QACvC,KAAK,IAAI,CAAC,GAAG,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YAChD,MAAM,KAAK,GAAG,UAAU,CAAC,CAAC,CAAC,CAAA;YAC3B,IAAI,KAAK,IAAI,KAAK,CAAC,KAAK,IAAI,aAAa,EAAE,CAAC;gBAC1C,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;gBACrC,MAAK;YACP,CAAC;QACH,CAAC;QAED,cAAc,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC,EAAE,UAAU,EAAE,CAAC,CAAA;QACtD,WAAW,GAAG,aAAa,GAAG,QAAQ,CAAC,MAAM,CAAA;IAC/C,CAAC;IAED,yCAAyC;IACzC,OAAO,iBAAiB,CAAC,cAAc,CAAC,CAAA;AAC1C,CAAC;AAED;;;;;GAKG;AACH,SAAS,iBAAiB,CAAC,SAA0B;IACnD,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAA;IAErC,MAAM,MAAM,GAAoB,EAAE,CAAA;IAClC,IAAI,OAAO,GAAyB,IAAI,CAAA;IAExC,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;QACjC,IAAI,OAAO,KAAK,IAAI,EAAE,CAAC;YACrB,OAAO,GAAG,EAAE,GAAG,QAAQ,EAAE,CAAA;QAC3B,CAAC;aAAM,IAAI,OAAO,CAAC,CAAC,KAAK,QAAQ,CAAC,CAAC,EAAE,CAAC;YACpC,qBAAqB;YACrB,OAAO,CAAC,IAAI,IAAI,IAAI,QAAQ,CAAC,IAAI,EAAE,CAAA;QACrC,CAAC;aAAM,CAAC;YACN,0CAA0C;YAC1C,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;YACpB,OAAO,GAAG,EAAE,GAAG,QAAQ,EAAE,CAAA;QAC3B,CAAC;IACH,CAAC;IAED,IAAI,OAAO,KAAK,IAAI,EAAE,CAAC;QACrB,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;IACtB,CAAC;IAED,OAAO,MAAM,CAAA;AACf,CAAC;AAED,+CAA+C;AAC/C,yCAAyC;AACzC,+CAA+C;AAE/C,wCAAwC;AACxC,8CAAmD;AAEnD;;;;;;GAMG;AACH,SAAS,wBAAwB,CAAC,UAAsB;IACtD,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC;QAAE,OAAO,GAAG,CAAA;IAErC,MAAM,YAAY,GAAa,EAAE,CAAA;IAEjC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC3C,KAAK,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAC/C,MAAM,IAAI,GAAG,UAAU,CAAC,CAAC,CAAC,CAAA;YAC1B,MAAM,IAAI,GAAG,UAAU,CAAC,CAAC,CAAC,CAAA;YAC1B,IAAI,IAAI,IAAI,IAAI,EAAE,CAAC;gBACjB,YAAY,CAAC,IAAI,CAAC,IAAA,0BAAgB,EAAC,IAAI,EAAE,IAAI,CAAC,CAAC,CAAA;YACjD,CAAC;QACH,CAAC;IACH,CAAC;IAED,IAAI,YAAY,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,CAAC,CAAA;IAEvC,uBAAuB;IACvB,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAA;IAClC,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,CAAA;IAE/C,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC;QAClC,qCAAqC;QACrC,OAAO,CAAC,CAAC,YAAY,CAAC,GAAG,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAA;IACtE,CAAC;IACD,oBAAoB;IACpB,OAAO,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,CAAA;AAC/B,CAAC;AAcD,iEAAiE;AACpD,QAAA,+BAA+B,GAA0B;IACpE,mBAAmB,EAAE,IAAI;IACzB,QAAQ,EAAE,CAAC;IACX,WAAW,EAAE,CAAC;CACf,CAAA;AAgBD;;;;;;;;;;;;;;;;;;;;GAoBG;AACI,KAAK,UAAU,sBAAsB,CAC1C,KAAiB,EACjB,QAA2B,EAC3B,SAAyC,EAAE;IAE3C,MAAM,GAAG,GAAG,EAAE,GAAG,uCAA+B,EAAE,GAAG,MAAM,EAAE,CAAA;IAE7D,MAAM,MAAM,GAA0B;QACpC,mBAAmB,EAAE,KAAK;QAC1B,kBAAkB,EAAE,KAAK;QACzB,gBAAgB,EAAE,CAAC;QACnB,gBAAgB,EAAE,CAAC;KACpB,CAAA;IAED,iDAAiD;IACjD,IAAI,KAAK,CAAC,MAAM,GAAG,GAAG,CAAC,QAAQ,EAAE,CAAC;QAChC,OAAO,MAAM,CAAA;IACf,CAAC;IAED,kDAAkD;IAClD,uEAAuE;IACvE,MAAM,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAA;IAChD,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,WAAW,GAAG,CAAC,CAAC,CAAA;IAClD,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,WAAW,GAAG,UAAU,CAAC,CAAA;IACxD,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,MAAM,EAAE,UAAU,GAAG,GAAG,CAAC,WAAW,CAAC,CAAA;IACrE,MAAM,WAAW,GAAG,KAAK,CAAC,KAAK,CAAC,UAAU,EAAE,QAAQ,CAAC,CAAA;IAErD,oEAAoE;IACpE,MAAM,aAAa,GAAsB,WAAW,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAChE,4BAA4B,CAAC,IAAI,CAAC,KAAK,CAAC,CACzC,CAAA;IAED,yDAAyD;IACzD,MAAM,cAAc,GAAa,EAAE,CAAA;IACnC,MAAM,aAAa,GAAa,EAAE,CAAA;IAElC,KAAK,MAAM,SAAS,IAAI,aAAa,EAAE,CAAC;QACtC,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACzB,cAAc,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,CAAE,CAAC,IAAI,CAAC,CAAA;YACvC,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACzB,aAAa,CAAC,IAAI,CAAC,SAAS,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,CAAE,CAAC,IAAI,CAAC,CAAA;YAC3D,CAAC;QACH,CAAC;IACH,CAAC;IAED,8EAA8E;IAC9E,IAAI,cAAc,CAAC,MAAM,IAAI,GAAG,CAAC,QAAQ,EAAE,CAAC;QAC1C,MAAM,UAAU,GAAG,MAAM,QAAQ,CAAC,UAAU,CAAC,cAAc,CAAC,CAAA;QAC5D,MAAM,SAAS,GAAG,wBAAwB,CAAC,UAAU,CAAC,CAAA;QACtD,MAAM,CAAC,gBAAgB,GAAG,SAAS,CAAA;QAEnC,IAAI,SAAS,IAAI,GAAG,CAAC,mBAAmB,EAAE,CAAC;YACzC,MAAM,CAAC,mBAAmB,GAAG,IAAI,CAAA;YACjC,OAAO,CAAC,KAAK,CACX,qCAAqC,cAAc,CAAC,MAAM,kBAAkB,UAAU,GAAG,CAAC,IAAI,QAAQ,yBAAyB,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CACtJ,CAAA;QACH,CAAC;IACH,CAAC;IAED,6EAA6E;IAC7E,IAAI,aAAa,CAAC,MAAM,IAAI,GAAG,CAAC,QAAQ,EAAE,CAAC;QACzC,MAAM,UAAU,GAAG,MAAM,QAAQ,CAAC,UAAU,CAAC,aAAa,CAAC,CAAA;QAC3D,MAAM,SAAS,GAAG,wBAAwB,CAAC,UAAU,CAAC,CAAA;QACtD,MAAM,CAAC,gBAAgB,GAAG,SAAS,CAAA;QAEnC,IAAI,SAAS,IAAI,GAAG,CAAC,mBAAmB,EAAE,CAAC;YACzC,MAAM,CAAC,kBAAkB,GAAG,IAAI,CAAA;YAChC,OAAO,CAAC,KAAK,CACX,qCAAqC,aAAa,CAAC,MAAM,kBAAkB,UAAU,GAAG,CAAC,IAAI,QAAQ,yBAAyB,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CACrJ,CAAA;QACH,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAA;AACf,CAAC;AAED;;;;;;;;;;;;GAYG;AACI,KAAK,UAAU,2BAA2B,CAC/C,KAAiB,EACjB,QAA2B,EAC3B,SAAyC,EAAE;IAE3C,MAAM,GAAG,GAAG,EAAE,GAAG,uCAA+B,EAAE,GAAG,MAAM,EAAE,CAAA;IAE7D,wCAAwC;IACxC,IAAI,KAAK,CAAC,MAAM,GAAG,GAAG,CAAC,QAAQ,EAAE,CAAC;QAChC,OAAO,iBAAiB,CAAC,KAAK,CAAC,CAAA;IACjC,CAAC;IAED,kBAAkB;IAClB,MAAM,QAAQ,GAAG,MAAM,sBAAsB,CAAC,KAAK,EAAE,QAAQ,EAAE,GAAG,CAAC,CAAA;IAEnE,uDAAuD;IACvD,IAAI,CAAC,QAAQ,CAAC,mBAAmB,IAAI,CAAC,QAAQ,CAAC,kBAAkB,EAAE,CAAC;QAClE,OAAO,iBAAiB,CAAC,KAAK,CAAC,CAAA;IACjC,CAAC;IAED,iEAAiE;IACjE,MAAM,aAAa,GAAsB,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAC1D,4BAA4B,CAAC,IAAI,CAAC,KAAK,CAAC,CACzC,CAAA;IAED,+CAA+C;IAC/C,MAAM,oBAAoB,GAAG,aAAa,CAAC,GAAG,CAAC,CAAC,SAAS,EAAE,EAAE;QAC3D,IAAI,OAAO,GAAG,CAAC,GAAG,SAAS,CAAC,CAAA;QAE5B,IAAI,QAAQ,CAAC,mBAAmB,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACvD,OAAO,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAA;QAC5B,CAAC;QAED,IAAI,QAAQ,CAAC,kBAAkB,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACtD,OAAO,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAA;QAChC,CAAC;QAED,OAAO,OAAO,CAAA;IAChB,CAAC,CAAC,CAAA;IAEF,4BAA4B;IAC5B,OAAO,oBAAoB;SACxB,GAAG,CAAC,CAAC,SAAS,EAAE,EAAE,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;SAC1D,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC;SACjC,IAAI,CAAC,MAAM,CAAC,CAAA;AACjB,CAAC"}
@@ -0,0 +1,144 @@
1
+ import { type GroupingMode } from '../vectordb/index.js';
2
+ import { type DeleteFileInput, type IngestDataInput, type IngestFileInput, type QueryDocumentsInput } from './schemas.js';
3
+ /**
4
+ * RAGServer configuration
5
+ */
6
+ export interface RAGServerConfig {
7
+ /** LanceDB database path */
8
+ dbPath: string;
9
+ /** Transformers.js model path */
10
+ modelName: string;
11
+ /** Model cache directory */
12
+ cacheDir: string;
13
+ /** Document base directory */
14
+ baseDir: string;
15
+ /** Maximum file size (100MB) */
16
+ maxFileSize: number;
17
+ /** Maximum distance threshold for quality filtering (optional) */
18
+ maxDistance?: number;
19
+ /** Grouping mode for quality filtering (optional) */
20
+ grouping?: GroupingMode;
21
+ /** Hybrid search weight for BM25 (0.0 = vector only, 1.0 = BM25 only, default 0.6) */
22
+ hybridWeight?: number;
23
+ }
24
+ /**
25
+ * RAG server compliant with MCP Protocol
26
+ *
27
+ * Responsibilities:
28
+ * - MCP tool integration (6 tools)
29
+ * - Tool handler implementation with Zod validation
30
+ * - Error handling
31
+ * - Initialization (LanceDB, Transformers.js)
32
+ */
33
+ export declare class RAGServer {
34
+ private readonly server;
35
+ private readonly vectorStore;
36
+ private readonly embedder;
37
+ private readonly chunker;
38
+ private readonly parser;
39
+ private readonly dbPath;
40
+ constructor(config: RAGServerConfig);
41
+ /**
42
+ * Set up MCP handlers using tool() API
43
+ * Note: Type casts are used to work around Zod version compatibility between project and SDK
44
+ */
45
+ private setupHandlers;
46
+ /**
47
+ * Initialization
48
+ */
49
+ initialize(): Promise<void>;
50
+ /**
51
+ * Close the server and release resources
52
+ */
53
+ close(): Promise<void>;
54
+ /**
55
+ * Get the current database configuration
56
+ */
57
+ getConfig(): {
58
+ dbPath: string;
59
+ modelName: string;
60
+ };
61
+ /**
62
+ * Execute query_documents logic (returns plain data)
63
+ */
64
+ private executeQueryDocuments;
65
+ /**
66
+ * query_documents tool handler (for test compatibility)
67
+ */
68
+ handleQueryDocuments(args: QueryDocumentsInput): Promise<{
69
+ content: [{
70
+ type: 'text';
71
+ text: string;
72
+ }];
73
+ }>;
74
+ /**
75
+ * Execute ingest_file logic (returns plain data)
76
+ */
77
+ private executeIngestFile;
78
+ /**
79
+ * ingest_file tool handler (for test compatibility)
80
+ */
81
+ handleIngestFile(args: IngestFileInput): Promise<{
82
+ content: [{
83
+ type: 'text';
84
+ text: string;
85
+ }];
86
+ }>;
87
+ /**
88
+ * Execute ingest_data logic (returns plain data)
89
+ */
90
+ private executeIngestData;
91
+ /**
92
+ * ingest_data tool handler (for test compatibility)
93
+ */
94
+ handleIngestData(args: IngestDataInput): Promise<{
95
+ content: [{
96
+ type: 'text';
97
+ text: string;
98
+ }];
99
+ }>;
100
+ /**
101
+ * Execute list_files logic (returns plain data)
102
+ */
103
+ private executeListFiles;
104
+ /**
105
+ * list_files tool handler (for test compatibility)
106
+ */
107
+ handleListFiles(): Promise<{
108
+ content: [{
109
+ type: 'text';
110
+ text: string;
111
+ }];
112
+ }>;
113
+ /**
114
+ * Execute status logic (returns plain data)
115
+ */
116
+ private executeStatus;
117
+ /**
118
+ * status tool handler (for test compatibility)
119
+ */
120
+ handleStatus(): Promise<{
121
+ content: [{
122
+ type: 'text';
123
+ text: string;
124
+ }];
125
+ }>;
126
+ /**
127
+ * Execute delete_file logic (returns plain data)
128
+ */
129
+ private executeDeleteFile;
130
+ /**
131
+ * delete_file tool handler (for test compatibility)
132
+ */
133
+ handleDeleteFile(args: DeleteFileInput): Promise<{
134
+ content: [{
135
+ type: 'text';
136
+ text: string;
137
+ }];
138
+ }>;
139
+ /**
140
+ * Start the server
141
+ */
142
+ run(): Promise<void>;
143
+ }
144
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/server/index.ts"],"names":[],"mappings":"AAYA,OAAO,EAAE,KAAK,YAAY,EAAiC,MAAM,sBAAsB,CAAA;AAQvF,OAAO,EACL,KAAK,eAAe,EAIpB,KAAK,eAAe,EACpB,KAAK,eAAe,EAEpB,KAAK,mBAAmB,EAGzB,MAAM,cAAc,CAAA;AAMrB;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,4BAA4B;IAC5B,MAAM,EAAE,MAAM,CAAA;IACd,iCAAiC;IACjC,SAAS,EAAE,MAAM,CAAA;IACjB,4BAA4B;IAC5B,QAAQ,EAAE,MAAM,CAAA;IAChB,8BAA8B;IAC9B,OAAO,EAAE,MAAM,CAAA;IACf,gCAAgC;IAChC,WAAW,EAAE,MAAM,CAAA;IACnB,kEAAkE;IAClE,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,qDAAqD;IACrD,QAAQ,CAAC,EAAE,YAAY,CAAA;IACvB,sFAAsF;IACtF,YAAY,CAAC,EAAE,MAAM,CAAA;CACtB;AAMD;;;;;;;;GAQG;AACH,qBAAa,SAAS;IACpB,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAW;IAClC,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAa;IACzC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAU;IACnC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAiB;IACzC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAgB;IACvC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAQ;gBAEnB,MAAM,EAAE,eAAe;IAqCnC;;;OAGG;IACH,OAAO,CAAC,aAAa;IAuGrB;;OAEG;IACG,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;IAKjC;;OAEG;IACG,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;IAK5B;;OAEG;IACH,SAAS,IAAI;QAAE,MAAM,EAAE,MAAM,CAAC;QAAC,SAAS,EAAE,MAAM,CAAA;KAAE;IAOlD;;OAEG;YACW,qBAAqB;IA4BnC;;OAEG;IACG,oBAAoB,CACxB,IAAI,EAAE,mBAAmB,GACxB,OAAO,CAAC;QAAE,OAAO,EAAE,CAAC;YAAE,IAAI,EAAE,MAAM,CAAC;YAAC,IAAI,EAAE,MAAM,CAAA;SAAE,CAAC,CAAA;KAAE,CAAC;IAiBzD;;OAEG;YACW,iBAAiB;IA6G/B;;OAEG;IACG,gBAAgB,CACpB,IAAI,EAAE,eAAe,GACpB,OAAO,CAAC;QAAE,OAAO,EAAE,CAAC;YAAE,IAAI,EAAE,MAAM,CAAC;YAAC,IAAI,EAAE,MAAM,CAAA;SAAE,CAAC,CAAA;KAAE,CAAC;IAkBzD;;OAEG;YACW,iBAAiB;IA6C/B;;OAEG;IACG,gBAAgB,CACpB,IAAI,EAAE,eAAe,GACpB,OAAO,CAAC;QAAE,OAAO,EAAE,CAAC;YAAE,IAAI,EAAE,MAAM,CAAC;YAAC,IAAI,EAAE,MAAM,CAAA;SAAE,CAAC,CAAA;KAAE,CAAC;IAkBzD;;OAEG;YACW,gBAAgB;IAe9B;;OAEG;IACG,eAAe,IAAI,OAAO,CAAC;QAAE,OAAO,EAAE,CAAC;YAAE,IAAI,EAAE,MAAM,CAAC;YAAC,IAAI,EAAE,MAAM,CAAA;SAAE,CAAC,CAAA;KAAE,CAAC;IAiB/E;;OAEG;YACW,aAAa;IAI3B;;OAEG;IACG,YAAY,IAAI,OAAO,CAAC;QAAE,OAAO,EAAE,CAAC;YAAE,IAAI,EAAE,MAAM,CAAC;YAAC,IAAI,EAAE,MAAM,CAAA;SAAE,CAAC,CAAA;KAAE,CAAC;IAiB5E;;OAEG;YACW,iBAAiB;IAyC/B;;OAEG;IACG,gBAAgB,CACpB,IAAI,EAAE,eAAe,GACpB,OAAO,CAAC;QAAE,OAAO,EAAE,CAAC;YAAE,IAAI,EAAE,MAAM,CAAC;YAAC,IAAI,EAAE,MAAM,CAAA;SAAE,CAAC,CAAA;KAAE,CAAC;IAkBzD;;OAEG;IACG,GAAG,IAAI,OAAO,CAAC,IAAI,CAAC;CAK3B"}