mcp-local-rag 0.5.6 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/README.md +2 -1
  2. package/dist/parser/html-parser.d.ts +6 -2
  3. package/dist/parser/html-parser.d.ts.map +1 -1
  4. package/dist/parser/html-parser.js +19 -10
  5. package/dist/parser/html-parser.js.map +1 -1
  6. package/dist/parser/index.d.ts +18 -7
  7. package/dist/parser/index.d.ts.map +1 -1
  8. package/dist/parser/index.js +52 -13
  9. package/dist/parser/index.js.map +1 -1
  10. package/dist/parser/pdf-filter.d.ts +4 -3
  11. package/dist/parser/pdf-filter.d.ts.map +1 -1
  12. package/dist/parser/pdf-filter.js +8 -10
  13. package/dist/parser/pdf-filter.js.map +1 -1
  14. package/dist/parser/title-extractor.d.ts +64 -0
  15. package/dist/parser/title-extractor.d.ts.map +1 -0
  16. package/dist/parser/title-extractor.js +139 -0
  17. package/dist/parser/title-extractor.js.map +1 -0
  18. package/dist/server/index.d.ts.map +1 -1
  19. package/dist/server/index.js +45 -6
  20. package/dist/server/index.js.map +1 -1
  21. package/dist/server/raw-data-utils.d.ts +32 -0
  22. package/dist/server/raw-data-utils.d.ts.map +1 -1
  23. package/dist/server/raw-data-utils.js +46 -0
  24. package/dist/server/raw-data-utils.js.map +1 -1
  25. package/dist/server/types.d.ts +6 -0
  26. package/dist/server/types.d.ts.map +1 -1
  27. package/dist/server-main.d.ts.map +1 -1
  28. package/dist/server-main.js +17 -0
  29. package/dist/server-main.js.map +1 -1
  30. package/dist/vectordb/index.d.ts +9 -94
  31. package/dist/vectordb/index.d.ts.map +1 -1
  32. package/dist/vectordb/index.js +55 -185
  33. package/dist/vectordb/index.js.map +1 -1
  34. package/dist/vectordb/search-filters.d.ts +45 -0
  35. package/dist/vectordb/search-filters.d.ts.map +1 -0
  36. package/dist/vectordb/search-filters.js +142 -0
  37. package/dist/vectordb/search-filters.js.map +1 -0
  38. package/dist/vectordb/types.d.ts +112 -0
  39. package/dist/vectordb/types.d.ts.map +1 -0
  40. package/dist/vectordb/types.js +74 -0
  41. package/dist/vectordb/types.js.map +1 -0
  42. package/package.json +1 -1
  43. package/skills/mcp-local-rag/SKILL.md +10 -0
  44. package/skills/mcp-local-rag/references/html-ingestion.md +2 -1
  45. package/skills/mcp-local-rag/references/result-refinement.md +1 -0
package/README.md CHANGED
@@ -129,7 +129,7 @@ HTML is automatically cleaned—you get the article content, not the boilerplate
129
129
 
130
130
  Search uses semantic similarity with keyword boost. This means `useEffect` finds documents containing that exact term, not just semantically similar React concepts.
131
131
 
132
- Results include text content, source file, and relevance score. Adjust result count with `limit` (1-20, default 10).
132
+ Results include text content, source file, document title, and relevance score. The document title provides context for each chunk, helping identify which document a result belongs to. Adjust result count with `limit` (1-20, default 10).
133
133
 
134
134
  ### Managing Files
135
135
 
@@ -148,6 +148,7 @@ Adjust these for your use case:
148
148
  | `RAG_HYBRID_WEIGHT` | `0.6` | Keyword boost factor. 0 = semantic only, higher = stronger keyword boost. |
149
149
  | `RAG_GROUPING` | (not set) | `similar` for top group only, `related` for top 2 groups. |
150
150
  | `RAG_MAX_DISTANCE` | (not set) | Filter out low-relevance results (e.g., `0.5`). |
151
+ | `RAG_MAX_FILES` | (not set) | Limit results to top N files (e.g., `1` for single best file). |
151
152
 
152
153
  ### Code-focused tuning
153
154
 
@@ -5,10 +5,14 @@
5
5
  * 1. HTML string → JSDOM (DOM creation)
6
6
  * 2. JSDOM → Readability (main content extraction, noise removal)
7
7
  * 3. Readability result → Turndown (Markdown conversion)
8
+ * 4. Title extracted separately via extractHtmlTitle (NOT prepended to content)
8
9
  *
9
10
  * @param html - Raw HTML string
10
11
  * @param url - Source URL (used for resolving relative links)
11
- * @returns Markdown string of extracted content
12
+ * @returns Object with content (markdown) and title (extracted separately)
12
13
  */
13
- export declare function parseHtml(html: string, url: string): Promise<string>;
14
+ export declare function parseHtml(html: string, url: string): Promise<{
15
+ content: string;
16
+ title: string;
17
+ }>;
14
18
  //# sourceMappingURL=html-parser.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"html-parser.d.ts","sourceRoot":"","sources":["../../src/parser/html-parser.ts"],"names":[],"mappings":"AAsDA;;;;;;;;;;;GAWG;AACH,wBAAsB,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAoD1E"}
1
+ {"version":3,"file":"html-parser.d.ts","sourceRoot":"","sources":["../../src/parser/html-parser.ts"],"names":[],"mappings":"AAuDA;;;;;;;;;;;;GAYG;AACH,wBAAsB,SAAS,CAC7B,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,GACV,OAAO,CAAC;IAAE,OAAO,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,CAAC,CA0D7C"}
@@ -9,6 +9,7 @@ exports.parseHtml = parseHtml;
9
9
  const readability_1 = require("@mozilla/readability");
10
10
  const jsdom_1 = require("jsdom");
11
11
  const turndown_1 = __importDefault(require("turndown"));
12
+ const title_extractor_js_1 = require("./title-extractor.js");
12
13
  // ============================================
13
14
  // Turndown Service Configuration
14
15
  // ============================================
@@ -46,15 +47,16 @@ function createTurndownService() {
46
47
  * 1. HTML string → JSDOM (DOM creation)
47
48
  * 2. JSDOM → Readability (main content extraction, noise removal)
48
49
  * 3. Readability result → Turndown (Markdown conversion)
50
+ * 4. Title extracted separately via extractHtmlTitle (NOT prepended to content)
49
51
  *
50
52
  * @param html - Raw HTML string
51
53
  * @param url - Source URL (used for resolving relative links)
52
- * @returns Markdown string of extracted content
54
+ * @returns Object with content (markdown) and title (extracted separately)
53
55
  */
54
56
  async function parseHtml(html, url) {
55
57
  // Handle empty or whitespace-only HTML
56
58
  if (!html || html.trim().length === 0) {
57
- return '';
59
+ return { content: '', title: '' };
58
60
  }
59
61
  try {
60
62
  // Create DOM from HTML string
@@ -75,25 +77,32 @@ async function parseHtml(html, url) {
75
77
  // Try to get body content directly
76
78
  const bodyContent = document.body?.innerHTML || '';
77
79
  if (!bodyContent.trim()) {
78
- return '';
80
+ return { content: '', title: '' };
79
81
  }
80
82
  // Convert raw body HTML to Markdown
81
83
  const turndownService = createTurndownService();
82
- return turndownService.turndown(bodyContent).trim();
84
+ return { content: turndownService.turndown(bodyContent).trim(), title: '' };
83
85
  }
84
86
  // Convert extracted HTML content to Markdown
85
87
  const turndownService = createTurndownService();
86
88
  const markdown = turndownService.turndown(article.content);
87
- // Add title if available
88
- if (article.title) {
89
- return `# ${article.title}\n\n${markdown}`.trim();
89
+ // Extract title separately (NOT prepended to markdown content)
90
+ // Use URL-derived filename as fallback when Readability has no title
91
+ let urlFileName = '';
92
+ try {
93
+ urlFileName = new URL(url).pathname.split('/').filter(Boolean).pop() || '';
90
94
  }
91
- return markdown.trim();
95
+ catch {
96
+ // Non-URL string, empty fallback
97
+ }
98
+ const titleResult = (0, title_extractor_js_1.extractHtmlTitle)(article.title || '', urlFileName);
99
+ const title = titleResult.title;
100
+ return { content: markdown.trim(), title };
92
101
  }
93
102
  catch (error) {
94
- // Log error but don't throw - return empty string for graceful degradation
103
+ // Log error but don't throw - return empty values for graceful degradation
95
104
  console.error('Failed to parse HTML:', error);
96
- return '';
105
+ return { content: '', title: '' };
97
106
  }
98
107
  }
99
108
  //# sourceMappingURL=html-parser.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"html-parser.js","sourceRoot":"","sources":["../../src/parser/html-parser.ts"],"names":[],"mappings":";AAAA,6CAA6C;AAC7C,2DAA2D;;;;;AAiE3D,8BAoDC;AAnHD,sDAAkD;AAClD,iCAA6B;AAC7B,wDAAsC;AActC,+CAA+C;AAC/C,iCAAiC;AACjC,+CAA+C;AAE/C;;GAEG;AACH,SAAS,qBAAqB;IAC5B,MAAM,eAAe,GAAG,IAAI,kBAAe,CAAC;QAC1C,YAAY,EAAE,KAAK,EAAE,uBAAuB;QAC5C,cAAc,EAAE,QAAQ,EAAE,0BAA0B;QACpD,gBAAgB,EAAE,GAAG,EAAE,yBAAyB;QAChD,WAAW,EAAE,GAAG,EAAE,qBAAqB;QACvC,eAAe,EAAE,IAAI,EAAE,kBAAkB;KAC1C,CAAC,CAAA;IAEF,0BAA0B;IAC1B,eAAe,CAAC,OAAO,CAAC,YAAY,EAAE;QACpC,MAAM,EAAE,CAAC,KAAK,CAAC;QACf,WAAW,EAAE,CAAC,QAAQ,EAAE,IAAI,EAAE,EAAE;YAC9B,MAAM,OAAO,GAAG,IAAe,CAAA;YAC/B,MAAM,WAAW,GAAG,OAAO,CAAC,aAAa,CAAC,MAAM,CAAC,CAAA;YACjD,MAAM,IAAI,GAAG,WAAW,CAAC,CAAC,CAAC,WAAW,CAAC,WAAW,CAAC,CAAC,CAAC,OAAO,CAAC,WAAW,CAAA;YACxE,MAAM,QAAQ,GAAG,WAAW,EAAE,SAAS,EAAE,OAAO,CAAC,WAAW,EAAE,EAAE,CAAC,IAAI,EAAE,CAAA;YACvE,OAAO,WAAW,QAAQ,KAAK,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,YAAY,CAAA;QAC/D,CAAC;KACF,CAAC,CAAA;IAEF,OAAO,eAAe,CAAA;AACxB,CAAC;AAED,+CAA+C;AAC/C,cAAc;AACd,+CAA+C;AAE/C;;;;;;;;;;;GAWG;AACI,KAAK,UAAU,SAAS,CAAC,IAAY,EAAE,GAAW;IACvD,uCAAuC;IACvC,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACtC,OAAO,EAAE,CAAA;IACX,CAAC;IAED,IAAI,CAAC;QACH,8BAA8B;QAC9B,MAAM,GAAG,GAAG,IAAI,aAAK,CAAC,IAAI,EAAE;YAC1B,GAAG;YACH,yCAAyC;YACzC,UAAU,EAAE,cAAc;SAC3B,CAAC,CAAA;QAEF,MAAM,QAAQ,GAAG,GAAG,CAAC,MAAM,CAAC,QAAQ,CAAA;QAEpC,0CAA0C;QAC1C,MAAM,MAAM,GAAG,IAAI,yBAAW,CAAC,QAAQ,EAAE;YACvC,WAAW,EAAE,KAAK;YAClB,KAAK,EAAE,KAAK;SACb,CAAC,CAAA;QAEF,MAAM,OAAO,GAAG,MAAM,CAAC,KAAK,EAA8B,CAAA;QAE1D,kEAAkE;QAClE,IAAI,CAAC,OAAO,IAAI,CAAC,OAAO,CAAC,OAAO,EAAE,CAAC;YACjC,mCAAmC;YACnC,MAAM,WAAW,GAAG,QAAQ,CAAC,IAAI,EAAE,SAAS,IAAI,EAAE,CAAA;YAClD,IAAI,CAAC,WAAW,CAAC,IAAI,EAAE,EAAE,CAAC;gBACxB,OAAO,EAAE,CAAA;YACX,CAAC;YAED,oCAAoC;YACpC,MAAM,eAAe,GAAG,qBAAqB,EAAE,CAAA;YAC/C,OAAO,eAAe,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC,IAAI,EAAE,CAAA;QACrD,CAAC;QAED,6CAA6C;QAC7C,MAAM,eAAe,GAAG,qBAAqB,EAAE,CAAA;QAC/C,MAAM,QAAQ,GAAG,eAAe,CAAC,QAAQ,CAAC,OAAO,CAAC,OAAO,CAAC,CAAA;QAE1D,yBAAyB;QACzB,IAAI,OAAO,CAAC,KAAK,EAAE,CAAC;YAClB,OAAO,KAAK,OAAO,CAAC,KAAK,OAAO,QAAQ,EAAE,CAAC,IAAI,EAAE,CAAA;QACnD,CAAC;QAED,OAAO,QAAQ,CAAC,IAAI,EAAE,CAAA;IACxB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,2EAA2E;QAC3E,OAAO,CAAC,KAAK,CAAC,uBAAuB,EAAE,KAAK,CAAC,CAAA;QAC7C,OAAO,EAAE,CAAA;IACX,CAAC;AACH,CAAC"}
1
+ {"version":3,"file":"html-parser.js","sourceRoot":"","sources":["../../src/parser/html-parser.ts"],"names":[],"mappings":";AAAA,6CAA6C;AAC7C,2DAA2D;;;;;AAmE3D,8BA6DC;AA9HD,sDAAkD;AAClD,iCAA6B;AAC7B,wDAAsC;AACtC,6DAAuD;AAcvD,+CAA+C;AAC/C,iCAAiC;AACjC,+CAA+C;AAE/C;;GAEG;AACH,SAAS,qBAAqB;IAC5B,MAAM,eAAe,GAAG,IAAI,kBAAe,CAAC;QAC1C,YAAY,EAAE,KAAK,EAAE,uBAAuB;QAC5C,cAAc,EAAE,QAAQ,EAAE,0BAA0B;QACpD,gBAAgB,EAAE,GAAG,EAAE,yBAAyB;QAChD,WAAW,EAAE,GAAG,EAAE,qBAAqB;QACvC,eAAe,EAAE,IAAI,EAAE,kBAAkB;KAC1C,CAAC,CAAA;IAEF,0BAA0B;IAC1B,eAAe,CAAC,OAAO,CAAC,YAAY,EAAE;QACpC,MAAM,EAAE,CAAC,KAAK,CAAC;QACf,WAAW,EAAE,CAAC,QAAQ,EAAE,IAAI,EAAE,EAAE;YAC9B,MAAM,OAAO,GAAG,IAAe,CAAA;YAC/B,MAAM,WAAW,GAAG,OAAO,CAAC,aAAa,CAAC,MAAM,CAAC,CAAA;YACjD,MAAM,IAAI,GAAG,WAAW,CAAC,CAAC,CAAC,WAAW,CAAC,WAAW,CAAC,CAAC,CAAC,OAAO,CAAC,WAAW,CAAA;YACxE,MAAM,QAAQ,GAAG,WAAW,EAAE,SAAS,EAAE,OAAO,CAAC,WAAW,EAAE,EAAE,CAAC,IAAI,EAAE,CAAA;YACvE,OAAO,WAAW,QAAQ,KAAK,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,YAAY,CAAA;QAC/D,CAAC;KACF,CAAC,CAAA;IAEF,OAAO,eAAe,CAAA;AACxB,CAAC;AAED,+CAA+C;AAC/C,cAAc;AACd,+CAA+C;AAE/C;;;;;;;;;;;;GAYG;AACI,KAAK,UAAU,SAAS,CAC7B,IAAY,EACZ,GAAW;IAEX,uCAAuC;IACvC,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACtC,OAAO,EAAE,OAAO,EAAE,EAAE,EAAE,KAAK,EAAE,EAAE,EAAE,CAAA;IACnC,CAAC;IAED,IAAI,CAAC;QACH,8BAA8B;QAC9B,MAAM,GAAG,GAAG,IAAI,aAAK,CAAC,IAAI,EAAE;YAC1B,GAAG;YACH,yCAAyC;YACzC,UAAU,EAAE,cAAc;SAC3B,CAAC,CAAA;QAEF,MAAM,QAAQ,GAAG,GAAG,CAAC,MAAM,CAAC,QAAQ,CAAA;QAEpC,0CAA0C;QAC1C,MAAM,MAAM,GAAG,IAAI,yBAAW,CAAC,QAAQ,EAAE;YACvC,WAAW,EAAE,KAAK;YAClB,KAAK,EAAE,KAAK;SACb,CAAC,CAAA;QAEF,MAAM,OAAO,GAAG,MAAM,CAAC,KAAK,EAA8B,CAAA;QAE1D,kEAAkE;QAClE,IAAI,CAAC,OAAO,IAAI,CAAC,OAAO,CAAC,OAAO,EAAE,CAAC;YACjC,mCAAmC;YACnC,MAAM,WAAW,GAAG,QAAQ,CAAC,IAAI,EAAE,SAAS,IAAI,EAAE,CAAA;YAClD,IAAI,CAAC,WAAW,CAAC,IAAI,EAAE,EAAE,CAAC;gBACxB,OAAO,EAAE,OAAO,EAAE,EAAE,EAAE,KAAK,EAAE,EAAE,EAAE,CAAA;YACnC,CAAC;YAED,oCAAoC;YACpC,MAAM,eAAe,GAAG,qBAAqB,EAAE,CAAA;YAC/C,OAAO,EAAE,OAAO,EAAE,eAAe,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC,IAAI,EAAE,EAAE,KAAK,EAAE,EAAE,EAAE,CAAA;QAC7E,CAAC;QAED,6CAA6C;QAC7C,MAAM,eAAe,GAAG,qBAAqB,EAAE,CAAA;QAC/C,MAAM,QAAQ,GAAG,eAAe,CAAC,QAAQ,CAAC,OAAO,CAAC,OAAO,CAAC,CAAA;QAE1D,+DAA+D;QAC/D,qEAAqE;QACrE,IAAI,WAAW,GAAG,EAAE,CAAA;QACpB,IAAI,CAAC;YACH,WAAW,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,GAAG,EAAE,IAAI,EAAE,CAAA;QAC5E,CAAC;QAAC,MAAM,CAAC;YACP,iCAAiC;QACnC,CAAC;QACD,MAAM,WAAW,GAAG,IAAA,qCAAgB,EAAC,OAAO,CAAC,KAAK,IAAI,EAAE,EAAE,WAAW,CAAC,CAAA;QACtE,MAAM,KAAK,GAAG,WAAW,CAAC,KAAK,CAAA;QAE/B,OAAO,EAAE,OAAO,EAAE,QAAQ,CAAC,IAAI,EAAE,EAAE,KAAK,EAAE,CAAA;IAC5C,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,2EAA2E;QAC3E,OAAO,CAAC,KAAK,CAAC,uBAAuB,EAAE,KAAK,CAAC,CAAA;QAC7C,OAAO,EAAE,OAAO,EAAE,EAAE,EAAE,KAAK,EAAE,EAAE,EAAE,CAAA;IACnC,CAAC;AACH,CAAC"}
@@ -1,4 +1,12 @@
1
1
  import { type EmbedderInterface } from './pdf-filter.js';
2
+ /**
3
+ * Result from parsing a document, containing both content and extracted title.
4
+ * Title is display-only metadata (NOT used for search scoring).
5
+ */
6
+ export interface ParseResult {
7
+ content: string;
8
+ title: string;
9
+ }
2
10
  /**
3
11
  * DocumentParser configuration
4
12
  */
@@ -54,11 +62,11 @@ export declare class DocumentParser {
54
62
  * File parsing (auto format detection)
55
63
  *
56
64
  * @param filePath - File path to parse
57
- * @returns Parsed text
65
+ * @returns ParseResult with content and extracted title
58
66
  * @throws ValidationError - Path traversal, size exceeded, unsupported format
59
67
  * @throws FileOperationError - File read failed, parse failed
60
68
  */
61
- parseFile(filePath: string): Promise<string>;
69
+ parseFile(filePath: string): Promise<ParseResult>;
62
70
  /**
63
71
  * PDF parsing with header/footer filtering
64
72
  *
@@ -66,18 +74,21 @@ export declare class DocumentParser {
66
74
  * - Extracts text with position information (x, y, fontSize)
67
75
  * - Semantic header/footer detection using embedding similarity
68
76
  * - Uses hasEOL for proper line break handling
77
+ * - Extracts document title from PDF metadata and first page font heuristic
69
78
  *
70
79
  * @param filePath - PDF file path
71
80
  * @param embedder - Embedder for semantic header/footer detection
72
- * @returns Parsed text with header/footer removed
81
+ * @returns ParseResult with content and extracted title
73
82
  * @throws FileOperationError - File read failed, parse failed
74
83
  */
75
- parsePdf(filePath: string, embedder: EmbedderInterface): Promise<string>;
84
+ parsePdf(filePath: string, embedder: EmbedderInterface): Promise<ParseResult>;
76
85
  /**
77
86
  * DOCX parsing (using mammoth)
78
87
  *
88
+ * Uses extractRawText for content and convertToHtml additionally for title detection.
89
+ *
79
90
  * @param filePath - DOCX file path
80
- * @returns Parsed text
91
+ * @returns ParseResult with content and extracted title
81
92
  * @throws FileOperationError - File read failed, parse failed
82
93
  */
83
94
  private parseDocx;
@@ -85,7 +96,7 @@ export declare class DocumentParser {
85
96
  * TXT parsing (using fs.readFile)
86
97
  *
87
98
  * @param filePath - TXT file path
88
- * @returns Parsed text
99
+ * @returns ParseResult with content and extracted title
89
100
  * @throws FileOperationError - File read failed
90
101
  */
91
102
  private parseTxt;
@@ -93,7 +104,7 @@ export declare class DocumentParser {
93
104
  * MD parsing (using fs.readFile)
94
105
  *
95
106
  * @param filePath - MD file path
96
- * @returns Parsed text
107
+ * @returns ParseResult with content and extracted title
97
108
  * @throws FileOperationError - File read failed
98
109
  */
99
110
  private parseMd;
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/parser/index.ts"],"names":[],"mappings":"AAQA,OAAO,EAAE,KAAK,iBAAiB,EAA8C,MAAM,iBAAiB,CAAA;AAMpG;;GAEG;AACH,UAAU,YAAY;IACpB,uCAAuC;IACvC,OAAO,EAAE,MAAM,CAAA;IACf,gCAAgC;IAChC,WAAW,EAAE,MAAM,CAAA;CACpB;AAED;;GAEG;AACH,qBAAa,eAAgB,SAAQ,KAAK;aAGb,KAAK,CAAC,EAAE,KAAK;gBADtC,OAAO,EAAE,MAAM,EACU,KAAK,CAAC,EAAE,KAAK,YAAA;CAKzC;AAED;;GAEG;AACH,qBAAa,kBAAmB,SAAQ,KAAK;aAGhB,KAAK,CAAC,EAAE,KAAK;gBADtC,OAAO,EAAE,MAAM,EACU,KAAK,CAAC,EAAE,KAAK,YAAA;CAKzC;AAMD;;;;;;;GAOG;AACH,qBAAa,cAAc;IACzB,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAc;IACrC,6FAA6F;IAC7F,OAAO,CAAC,eAAe,CAAsB;gBAEjC,MAAM,EAAE,YAAY;IAIhC;;;;;OAKG;IACG,gBAAgB,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAiDvD;;;;;;OAMG;IACH,gBAAgB,CAAC,QAAQ,EAAE,MAAM,GAAG,IAAI;IAgBxC;;;;;;;OAOG;IACG,SAAS,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;IAmBlD;;;;;;;;;;;;OAYG;IACG,QAAQ,CAAC,QAAQ,EAAE,MAAM,EAAE,QAAQ,EAAE,iBAAiB,GAAG,OAAO,CAAC,MAAM,CAAC;IA4C9E;;;;;;OAMG;YACW,SAAS;IAUvB;;;;;;OAMG;YACW,QAAQ;IAUtB;;;;;;OAMG;YACW,OAAO;CAStB"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/parser/index.ts"],"names":[],"mappings":"AASA,OAAO,EAAE,KAAK,iBAAiB,EAA8C,MAAM,iBAAiB,CAAA;AAYpG;;;GAGG;AACH,MAAM,WAAW,WAAW;IAC1B,OAAO,EAAE,MAAM,CAAA;IACf,KAAK,EAAE,MAAM,CAAA;CACd;AAED;;GAEG;AACH,UAAU,YAAY;IACpB,uCAAuC;IACvC,OAAO,EAAE,MAAM,CAAA;IACf,gCAAgC;IAChC,WAAW,EAAE,MAAM,CAAA;CACpB;AAED;;GAEG;AACH,qBAAa,eAAgB,SAAQ,KAAK;aAGb,KAAK,CAAC,EAAE,KAAK;gBADtC,OAAO,EAAE,MAAM,EACU,KAAK,CAAC,EAAE,KAAK,YAAA;CAKzC;AAED;;GAEG;AACH,qBAAa,kBAAmB,SAAQ,KAAK;aAGhB,KAAK,CAAC,EAAE,KAAK;gBADtC,OAAO,EAAE,MAAM,EACU,KAAK,CAAC,EAAE,KAAK,YAAA;CAKzC;AAMD;;;;;;;GAOG;AACH,qBAAa,cAAc;IACzB,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAc;IACrC,6FAA6F;IAC7F,OAAO,CAAC,eAAe,CAAsB;gBAEjC,MAAM,EAAE,YAAY;IAIhC;;;;;OAKG;IACG,gBAAgB,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAiDvD;;;;;;OAMG;IACH,gBAAgB,CAAC,QAAQ,EAAE,MAAM,GAAG,IAAI;IAgBxC;;;;;;;OAOG;IACG,SAAS,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,CAAC;IAmBvD;;;;;;;;;;;;;OAaG;IACG,QAAQ,CAAC,QAAQ,EAAE,MAAM,EAAE,QAAQ,EAAE,iBAAiB,GAAG,OAAO,CAAC,WAAW,CAAC;IAqEnF;;;;;;;;OAQG;YACW,SAAS;IAqBvB;;;;;;OAMG;YACW,QAAQ;IAYtB;;;;;;OAMG;YACW,OAAO;CAWtB"}
@@ -10,7 +10,9 @@ const promises_1 = require("node:fs/promises");
10
10
  const node_path_1 = require("node:path");
11
11
  const mammoth_1 = __importDefault(require("mammoth"));
12
12
  const pdf_mjs_1 = require("pdfjs-dist/legacy/build/pdf.mjs");
13
+ const index_js_1 = require("../chunker/index.js");
13
14
  const pdf_filter_js_1 = require("./pdf-filter.js");
15
+ const title_extractor_js_1 = require("./title-extractor.js");
14
16
  /**
15
17
  * Validation error (equivalent to 400)
16
18
  */
@@ -118,7 +120,7 @@ class DocumentParser {
118
120
  * File parsing (auto format detection)
119
121
  *
120
122
  * @param filePath - File path to parse
121
- * @returns Parsed text
123
+ * @returns ParseResult with content and extracted title
122
124
  * @throws ValidationError - Path traversal, size exceeded, unsupported format
123
125
  * @throws FileOperationError - File read failed, parse failed
124
126
  */
@@ -146,10 +148,11 @@ class DocumentParser {
146
148
  * - Extracts text with position information (x, y, fontSize)
147
149
  * - Semantic header/footer detection using embedding similarity
148
150
  * - Uses hasEOL for proper line break handling
151
+ * - Extracts document title from PDF metadata and first page font heuristic
149
152
  *
150
153
  * @param filePath - PDF file path
151
154
  * @param embedder - Embedder for semantic header/footer detection
152
- * @returns Parsed text with header/footer removed
155
+ * @returns ParseResult with content and extracted title
153
156
  * @throws FileOperationError - File read failed, parse failed
154
157
  */
155
158
  async parsePdf(filePath, embedder) {
@@ -163,6 +166,9 @@ class DocumentParser {
163
166
  useSystemFonts: true,
164
167
  isEvalSupported: false,
165
168
  }).promise;
169
+ // Extract metadata for title extraction
170
+ const metadata = await pdf.getMetadata();
171
+ const metadataTitle = metadata?.info?.['Title'];
166
172
  // Extract text with position information from each page
167
173
  const pages = [];
168
174
  for (let i = 1; i <= pdf.numPages; i++) {
@@ -179,11 +185,30 @@ class DocumentParser {
179
185
  }));
180
186
  pages.push({ pageNum: i, items });
181
187
  }
182
- // Apply sentence-level header/footer filtering
188
+ // Apply sentence-level header/footer filtering (returns per-page filtered text)
183
189
  // This handles variable content like page numbers ("7 of 75") using semantic similarity
184
- const text = await (0, pdf_filter_js_1.filterPageBoundarySentences)(pages, embedder);
190
+ const filteredPages = await (0, pdf_filter_js_1.filterPageBoundarySentences)(pages, embedder);
191
+ const text = filteredPages.filter((t) => t.length > 0).join('\n\n');
192
+ // Extract title from filtered page 1 via semantic chunking
193
+ // Isolated try-catch: title extraction failure should not abort PDF ingestion
194
+ const fileName = (0, node_path_1.basename)(filePath);
195
+ let firstPageChunkText;
196
+ try {
197
+ const filteredPage1 = filteredPages[0];
198
+ if (filteredPage1 && filteredPage1.trim().length > 0) {
199
+ const chunker = new index_js_1.SemanticChunker();
200
+ const page1Chunks = await chunker.chunkText(filteredPage1, embedder);
201
+ if (page1Chunks.length > 0) {
202
+ firstPageChunkText = page1Chunks[0].text;
203
+ }
204
+ }
205
+ }
206
+ catch (titleError) {
207
+ console.error(`Title extraction failed, falling back to filename: ${titleError}`);
208
+ }
209
+ const titleResult = (0, title_extractor_js_1.extractPdfTitle)(metadataTitle, firstPageChunkText, fileName);
185
210
  console.error(`Parsed PDF: ${filePath} (${text.length} characters, ${pdf.numPages} pages)`);
186
- return text;
211
+ return { content: text, title: titleResult.title };
187
212
  }
188
213
  catch (error) {
189
214
  throw new FileOperationError(`Failed to parse PDF: ${filePath}`, error);
@@ -192,15 +217,25 @@ class DocumentParser {
192
217
  /**
193
218
  * DOCX parsing (using mammoth)
194
219
  *
220
+ * Uses extractRawText for content and convertToHtml additionally for title detection.
221
+ *
195
222
  * @param filePath - DOCX file path
196
- * @returns Parsed text
223
+ * @returns ParseResult with content and extracted title
197
224
  * @throws FileOperationError - File read failed, parse failed
198
225
  */
199
226
  async parseDocx(filePath) {
200
227
  try {
201
- const result = await mammoth_1.default.extractRawText({ path: filePath });
202
- console.error(`Parsed DOCX: ${filePath} (${result.value.length} characters)`);
203
- return result.value;
228
+ // Read file once and pass buffer to both mammoth calls
229
+ const buffer = await (0, promises_1.readFile)(filePath);
230
+ // Use extractRawText for content (unchanged behavior)
231
+ const result = await mammoth_1.default.extractRawText({ buffer });
232
+ const rawText = result.value;
233
+ // Use convertToHtml additionally for title extraction (first <h1>)
234
+ const htmlResult = await mammoth_1.default.convertToHtml({ buffer });
235
+ const fileName = (0, node_path_1.basename)(filePath);
236
+ const titleResult = (0, title_extractor_js_1.extractDocxTitle)(htmlResult.value, fileName);
237
+ console.error(`Parsed DOCX: ${filePath} (${rawText.length} characters)`);
238
+ return { content: rawText, title: titleResult.title };
204
239
  }
205
240
  catch (error) {
206
241
  throw new FileOperationError(`Failed to parse DOCX: ${filePath}`, error);
@@ -210,14 +245,16 @@ class DocumentParser {
210
245
  * TXT parsing (using fs.readFile)
211
246
  *
212
247
  * @param filePath - TXT file path
213
- * @returns Parsed text
248
+ * @returns ParseResult with content and extracted title
214
249
  * @throws FileOperationError - File read failed
215
250
  */
216
251
  async parseTxt(filePath) {
217
252
  try {
218
253
  const text = await (0, promises_1.readFile)(filePath, 'utf-8');
254
+ const fileName = (0, node_path_1.basename)(filePath);
255
+ const titleResult = (0, title_extractor_js_1.extractTxtTitle)(text, fileName);
219
256
  console.error(`Parsed TXT: ${filePath} (${text.length} characters)`);
220
- return text;
257
+ return { content: text, title: titleResult.title };
221
258
  }
222
259
  catch (error) {
223
260
  throw new FileOperationError(`Failed to parse TXT: ${filePath}`, error);
@@ -227,14 +264,16 @@ class DocumentParser {
227
264
  * MD parsing (using fs.readFile)
228
265
  *
229
266
  * @param filePath - MD file path
230
- * @returns Parsed text
267
+ * @returns ParseResult with content and extracted title
231
268
  * @throws FileOperationError - File read failed
232
269
  */
233
270
  async parseMd(filePath) {
234
271
  try {
235
272
  const text = await (0, promises_1.readFile)(filePath, 'utf-8');
273
+ const fileName = (0, node_path_1.basename)(filePath);
274
+ const titleResult = (0, title_extractor_js_1.extractMarkdownTitle)(text, fileName);
236
275
  console.error(`Parsed MD: ${filePath} (${text.length} characters)`);
237
- return text;
276
+ return { content: text, title: titleResult.title };
238
277
  }
239
278
  catch (error) {
240
279
  throw new FileOperationError(`Failed to parse MD: ${filePath}`, error);
@@ -1 +1 @@
1
- {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/parser/index.ts"],"names":[],"mappings":";AAAA,6DAA6D;;;;;;AAE7D,qCAAkC;AAClC,+CAA4D;AAC5D,yCAA6D;AAC7D,sDAA6B;AAC7B,6DAA6D;AAE7D,mDAAoG;AAgBpG;;GAEG;AACH,MAAa,eAAgB,SAAQ,KAAK;IACxC,YACE,OAAe,EACU,KAAa;QAEtC,KAAK,CAAC,OAAO,CAAC,CAAA;QAFW,UAAK,GAAL,KAAK,CAAQ;QAGtC,IAAI,CAAC,IAAI,GAAG,iBAAiB,CAAA;IAC/B,CAAC;CACF;AARD,0CAQC;AAED;;GAEG;AACH,MAAa,kBAAmB,SAAQ,KAAK;IAC3C,YACE,OAAe,EACU,KAAa;QAEtC,KAAK,CAAC,OAAO,CAAC,CAAA;QAFW,UAAK,GAAL,KAAK,CAAQ;QAGtC,IAAI,CAAC,IAAI,GAAG,oBAAoB,CAAA;IAClC,CAAC;CACF;AARD,gDAQC;AAED,+CAA+C;AAC/C,uBAAuB;AACvB,+CAA+C;AAE/C;;;;;;;GAOG;AACH,MAAa,cAAc;IAKzB,YAAY,MAAoB;QAHhC,6FAA6F;QACrF,oBAAe,GAAkB,IAAI,CAAA;QAG3C,IAAI,CAAC,MAAM,GAAG,MAAM,CAAA;IACtB,CAAC;IAED;;;;;OAKG;IACH,KAAK,CAAC,gBAAgB,CAAC,QAAgB;QACrC,wDAAwD;QACxD,IAAI,CAAC,IAAA,sBAAU,EAAC,QAAQ,CAAC,EAAE,CAAC;YAC1B,MAAM,IAAI,eAAe,CACvB,8CAA8C,QAAQ,qDAAqD,CAC5G,CAAA;QACH,CAAC;QAED,oEAAoE;QACpE,IAAI,CAAC,IAAI,CAAC,eAAe,EAAE,CAAC;YAC1B,MAAM,QAAQ,GAAG,MAAM,IAAA,mBAAQ,EAAC,IAAA,mBAAO,EAAC,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAA;YAC7D,uDAAuD;YACvD,IAAI,CAAC,eAAe,GAAG,QAAQ,CAAC,QAAQ,CAAC,eAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,QAAQ,GAAG,eAAG,CAAA;QAC3E,CAAC;QAED,uDAAuD;QACvD,IAAI,YAAoB,CAAA;QACxB,IAAI,CAAC;YACH,YAAY,GAAG,MAAM,IAAA,mBAAQ,EAAC,QAAQ,CAAC,CAAA;QACzC,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,sDAAsD;YACtD,iEAAiE;YACjE,oEAAoE;YACpE,4EAA4E;YAC5E,MAAM,SAAS,GAAG,MAAM,IAAA,gBAAK,EAAC,QAAQ,CAAC;iBACpC,IAAI,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,cAAc,EAAE,CAAC;iBACvC,KAAK,CAAC,GAAG,EAAE,CAAC,KAAK,CAAC,CAAA;YAErB,IAAI,SAAS,EAAE,CAAC;gBACd,MAAM,IAAI,eAAe,CACvB,6BAA6B,QAAQ,kDAAkD,EACvF,KAAc,CACf,CAAA;YACH,CAAC;YAED,0EAA0E;YAC1E,kFAAkF;YAClF,2FAA2F;YAC3F,YAAY,GAAG,IAAA,mBAAO,EAAC,QAAQ,CAAC,CAAA;QAClC,CAAC;QAED,4CAA4C;QAC5C,IAAI,CAAC,YAAY,CAAC,UAAU,CAAC,IAAI,CAAC,eAAe,CAAC,EAAE,CAAC;YACnD,MAAM,IAAI,eAAe,CACvB,sCAAsC,IAAI,CAAC,eAAe,sCAAsC,QAAQ,EAAE,CAC3G,CAAA;QACH,CAAC;IACH,CAAC;IAED;;;;;;OAMG;IACH,gBAAgB,CAAC,QAAgB;QAC/B,IAAI,CAAC;YACH,MAAM,KAAK,GAAG,IAAA,kBAAQ,EAAC,QAAQ,CAAC,CAAA;YAChC,IAAI,KAAK,CAAC,IAAI,GAAG,IAAI,CAAC,MAAM,CAAC,WAAW,EAAE,CAAC;gBACzC,MAAM,IAAI,eAAe,CACvB,4BAA4B,KAAK,CAAC,IAAI,MAAM,IAAI,CAAC,MAAM,CAAC,WAAW,EAAE,CACtE,CAAA;YACH,CAAC;QACH,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,IAAI,KAAK,YAAY,eAAe,EAAE,CAAC;gBACrC,MAAM,KAAK,CAAA;YACb,CAAC;YACD,MAAM,IAAI,kBAAkB,CAAC,8BAA8B,QAAQ,EAAE,EAAE,KAAc,CAAC,CAAA;QACxF,CAAC;IACH,CAAC;IAED;;;;;;;OAOG;IACH,KAAK,CAAC,SAAS,CAAC,QAAgB;QAC9B,aAAa;QACb,MAAM,IAAI,CAAC,gBAAgB,CAAC,QAAQ,CAAC,CAAA;QACrC,IAAI,CAAC,gBAAgB,CAAC,QAAQ,CAAC,CAAA;QAE/B,gDAAgD;QAChD,MAAM,GAAG,GAAG,IAAA,mBAAO,EAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAA;QAC3C,QAAQ,GAAG,EAAE,CAAC;YACZ,KAAK,OAAO;gBACV,OAAO,MAAM,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAA;YACvC,KAAK,MAAM;gBACT,OAAO,MAAM,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAA;YACtC,KAAK,KAAK;gBACR,OAAO,MAAM,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAA;YACrC;gBACE,MAAM,IAAI,eAAe,CAAC,4BAA4B,GAAG,EAAE,CAAC,CAAA;QAChE,CAAC;IACH,CAAC;IAED;;;;;;;;;;;;OAYG;IACH,KAAK,CAAC,QAAQ,CAAC,QAAgB,EAAE,QAA2B;QAC1D,aAAa;QACb,MAAM,IAAI,CAAC,gBAAgB,CAAC,QAAQ,CAAC,CAAA;QACrC,IAAI,CAAC,gBAAgB,CAAC,QAAQ,CAAC,CAAA;QAE/B,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,IAAA,mBAAQ,EAAC,QAAQ,CAAC,CAAA;YACvC,MAAM,GAAG,GAAG,MAAM,IAAA,qBAAW,EAAC;gBAC5B,IAAI,EAAE,IAAI,UAAU,CAAC,MAAM,CAAC;gBAC5B,cAAc,EAAE,IAAI;gBACpB,eAAe,EAAE,KAAK;aACvB,CAAC,CAAC,OAAO,CAAA;YAEV,wDAAwD;YACxD,MAAM,KAAK,GAAe,EAAE,CAAA;YAC5B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,GAAG,CAAC,QAAQ,EAAE,CAAC,EAAE,EAAE,CAAC;gBACvC,MAAM,IAAI,GAAG,MAAM,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,CAAA;gBACjC,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,cAAc,EAAE,CAAA;gBAE/C,MAAM,KAAK,GAAG,WAAW,CAAC,KAAK;qBAC5B,MAAM,CAAC,CAAC,IAAI,EAAoB,EAAE,CAAC,KAAK,IAAI,IAAI,CAAC;qBACjD,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;oBACd,IAAI,EAAE,IAAI,CAAC,GAAG;oBACd,CAAC,EAAE,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC;oBACpB,CAAC,EAAE,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC;oBACpB,QAAQ,EAAE,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC;oBACrC,MAAM,EAAE,IAAI,CAAC,MAAM,IAAI,KAAK;iBAC7B,CAAC,CAAC,CAAA;gBAEL,KAAK,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,CAAA;YACnC,CAAC;YAED,+CAA+C;YAC/C,wFAAwF;YACxF,MAAM,IAAI,GAAG,MAAM,IAAA,2CAA2B,EAAC,KAAK,EAAE,QAAQ,CAAC,CAAA;YAE/D,OAAO,CAAC,KAAK,CAAC,eAAe,QAAQ,KAAK,IAAI,CAAC,MAAM,gBAAgB,GAAG,CAAC,QAAQ,SAAS,CAAC,CAAA;YAE3F,OAAO,IAAI,CAAA;QACb,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,IAAI,kBAAkB,CAAC,wBAAwB,QAAQ,EAAE,EAAE,KAAc,CAAC,CAAA;QAClF,CAAC;IACH,CAAC;IAED;;;;;;OAMG;IACK,KAAK,CAAC,SAAS,CAAC,QAAgB;QACtC,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,iBAAO,CAAC,cAAc,CAAC,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC,CAAA;YAC/D,OAAO,CAAC,KAAK,CAAC,gBAAgB,QAAQ,KAAK,MAAM,CAAC,KAAK,CAAC,MAAM,cAAc,CAAC,CAAA;YAC7E,OAAO,MAAM,CAAC,KAAK,CAAA;QACrB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,IAAI,kBAAkB,CAAC,yBAAyB,QAAQ,EAAE,EAAE,KAAc,CAAC,CAAA;QACnF,CAAC;IACH,CAAC;IAED;;;;;;OAMG;IACK,KAAK,CAAC,QAAQ,CAAC,QAAgB;QACrC,IAAI,CAAC;YACH,MAAM,IAAI,GAAG,MAAM,IAAA,mBAAQ,EAAC,QAAQ,EAAE,OAAO,CAAC,CAAA;YAC9C,OAAO,CAAC,KAAK,CAAC,eAAe,QAAQ,KAAK,IAAI,CAAC,MAAM,cAAc,CAAC,CAAA;YACpE,OAAO,IAAI,CAAA;QACb,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,IAAI,kBAAkB,CAAC,wBAAwB,QAAQ,EAAE,EAAE,KAAc,CAAC,CAAA;QAClF,CAAC;IACH,CAAC;IAED;;;;;;OAMG;IACK,KAAK,CAAC,OAAO,CAAC,QAAgB;QACpC,IAAI,CAAC;YACH,MAAM,IAAI,GAAG,MAAM,IAAA,mBAAQ,EAAC,QAAQ,EAAE,OAAO,CAAC,CAAA;YAC9C,OAAO,CAAC,KAAK,CAAC,cAAc,QAAQ,KAAK,IAAI,CAAC,MAAM,cAAc,CAAC,CAAA;YACnE,OAAO,IAAI,CAAA;QACb,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,IAAI,kBAAkB,CAAC,uBAAuB,QAAQ,EAAE,EAAE,KAAc,CAAC,CAAA;QACjF,CAAC;IACH,CAAC;CACF;AA7ND,wCA6NC"}
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/parser/index.ts"],"names":[],"mappings":";AAAA,6DAA6D;;;;;;AAE7D,qCAAkC;AAClC,+CAA4D;AAC5D,yCAAuE;AACvE,sDAA6B;AAC7B,6DAA6D;AAE7D,kDAAqD;AACrD,mDAAoG;AACpG,6DAK6B;AAyB7B;;GAEG;AACH,MAAa,eAAgB,SAAQ,KAAK;IACxC,YACE,OAAe,EACU,KAAa;QAEtC,KAAK,CAAC,OAAO,CAAC,CAAA;QAFW,UAAK,GAAL,KAAK,CAAQ;QAGtC,IAAI,CAAC,IAAI,GAAG,iBAAiB,CAAA;IAC/B,CAAC;CACF;AARD,0CAQC;AAED;;GAEG;AACH,MAAa,kBAAmB,SAAQ,KAAK;IAC3C,YACE,OAAe,EACU,KAAa;QAEtC,KAAK,CAAC,OAAO,CAAC,CAAA;QAFW,UAAK,GAAL,KAAK,CAAQ;QAGtC,IAAI,CAAC,IAAI,GAAG,oBAAoB,CAAA;IAClC,CAAC;CACF;AARD,gDAQC;AAED,+CAA+C;AAC/C,uBAAuB;AACvB,+CAA+C;AAE/C;;;;;;;GAOG;AACH,MAAa,cAAc;IAKzB,YAAY,MAAoB;QAHhC,6FAA6F;QACrF,oBAAe,GAAkB,IAAI,CAAA;QAG3C,IAAI,CAAC,MAAM,GAAG,MAAM,CAAA;IACtB,CAAC;IAED;;;;;OAKG;IACH,KAAK,CAAC,gBAAgB,CAAC,QAAgB;QACrC,wDAAwD;QACxD,IAAI,CAAC,IAAA,sBAAU,EAAC,QAAQ,CAAC,EAAE,CAAC;YAC1B,MAAM,IAAI,eAAe,CACvB,8CAA8C,QAAQ,qDAAqD,CAC5G,CAAA;QACH,CAAC;QAED,oEAAoE;QACpE,IAAI,CAAC,IAAI,CAAC,eAAe,EAAE,CAAC;YAC1B,MAAM,QAAQ,GAAG,MAAM,IAAA,mBAAQ,EAAC,IAAA,mBAAO,EAAC,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAA;YAC7D,uDAAuD;YACvD,IAAI,CAAC,eAAe,GAAG,QAAQ,CAAC,QAAQ,CAAC,eAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,QAAQ,GAAG,eAAG,CAAA;QAC3E,CAAC;QAED,uDAAuD;QACvD,IAAI,YAAoB,CAAA;QACxB,IAAI,CAAC;YACH,YAAY,GAAG,MAAM,IAAA,mBAAQ,EAAC,QAAQ,CAAC,CAAA;QACzC,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,sDAAsD;YACtD,iEAAiE;YACjE,oEAAoE;YACpE,4EAA4E;YAC5E,MAAM,SAAS,GAAG,MAAM,IAAA,gBAAK,EAAC,QAAQ,CAAC;iBACpC,IAAI,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,cAAc,EAAE,CAAC;iBACvC,KAAK,CAAC,GAAG,EAAE,CAAC,KAAK,CAAC,CAAA;YAErB,IAAI,SAAS,EAAE,CAAC;gBACd,MAAM,IAAI,eAAe,CACvB,6BAA6B,QAAQ,kDAAkD,EACvF,KAAc,CACf,CAAA;YACH,CAAC;YAED,0EAA0E;YAC1E,kFAAkF;YAClF,2FAA2F;YAC3F,YAAY,GAAG,IAAA,mBAAO,EAAC,QAAQ,CAAC,CAAA;QAClC,CAAC;QAED,4CAA4C;QAC5C,IAAI,CAAC,YAAY,CAAC,UAAU,CAAC,IAAI,CAAC,eAAe,CAAC,EAAE,CAAC;YACnD,MAAM,IAAI,eAAe,CACvB,sCAAsC,IAAI,CAAC,eAAe,sCAAsC,QAAQ,EAAE,CAC3G,CAAA;QACH,CAAC;IACH,CAAC;IAED;;;;;;OAMG;IACH,gBAAgB,CAAC,QAAgB;QAC/B,IAAI,CAAC;YACH,MAAM,KAAK,GAAG,IAAA,kBAAQ,EAAC,QAAQ,CAAC,CAAA;YAChC,IAAI,KAAK,CAAC,IAAI,GAAG,IAAI,CAAC,MAAM,CAAC,WAAW,EAAE,CAAC;gBACzC,MAAM,IAAI,eAAe,CACvB,4BAA4B,KAAK,CAAC,IAAI,MAAM,IAAI,CAAC,MAAM,CAAC,WAAW,EAAE,CACtE,CAAA;YACH,CAAC;QACH,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,IAAI,KAAK,YAAY,eAAe,EAAE,CAAC;gBACrC,MAAM,KAAK,CAAA;YACb,CAAC;YACD,MAAM,IAAI,kBAAkB,CAAC,8BAA8B,QAAQ,EAAE,EAAE,KAAc,CAAC,CAAA;QACxF,CAAC;IACH,CAAC;IAED;;;;;;;OAOG;IACH,KAAK,CAAC,SAAS,CAAC,QAAgB;QAC9B,aAAa;QACb,MAAM,IAAI,CAAC,gBAAgB,CAAC,QAAQ,CAAC,CAAA;QACrC,IAAI,CAAC,gBAAgB,CAAC,QAAQ,CAAC,CAAA;QAE/B,gDAAgD;QAChD,MAAM,GAAG,GAAG,IAAA,mBAAO,EAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAA;QAC3C,QAAQ,GAAG,EAAE,CAAC;YACZ,KAAK,OAAO;gBACV,OAAO,MAAM,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAA;YACvC,KAAK,MAAM;gBACT,OAAO,MAAM,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAA;YACtC,KAAK,KAAK;gBACR,OAAO,MAAM,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAA;YACrC;gBACE,MAAM,IAAI,eAAe,CAAC,4BAA4B,GAAG,EAAE,CAAC,CAAA;QAChE,CAAC;IACH,CAAC;IAED;;;;;;;;;;;;;OAaG;IACH,KAAK,CAAC,QAAQ,CAAC,QAAgB,EAAE,QAA2B;QAC1D,aAAa;QACb,MAAM,IAAI,CAAC,gBAAgB,CAAC,QAAQ,CAAC,CAAA;QACrC,IAAI,CAAC,gBAAgB,CAAC,QAAQ,CAAC,CAAA;QAE/B,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,IAAA,mBAAQ,EAAC,QAAQ,CAAC,CAAA;YACvC,MAAM,GAAG,GAAG,MAAM,IAAA,qBAAW,EAAC;gBAC5B,IAAI,EAAE,IAAI,UAAU,CAAC,MAAM,CAAC;gBAC5B,cAAc,EAAE,IAAI;gBACpB,eAAe,EAAE,KAAK;aACvB,CAAC,CAAC,OAAO,CAAA;YAEV,wCAAwC;YACxC,MAAM,QAAQ,GAAG,MAAM,GAAG,CAAC,WAAW,EAAE,CAAA;YACxC,MAAM,aAAa,GAAI,QAAQ,EAAE,IAAgC,EAAE,CAAC,OAAO,CAE9D,CAAA;YAEb,wDAAwD;YACxD,MAAM,KAAK,GAAe,EAAE,CAAA;YAC5B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,GAAG,CAAC,QAAQ,EAAE,CAAC,EAAE,EAAE,CAAC;gBACvC,MAAM,IAAI,GAAG,MAAM,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,CAAA;gBACjC,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,cAAc,EAAE,CAAA;gBAE/C,MAAM,KAAK,GAAG,WAAW,CAAC,KAAK;qBAC5B,MAAM,CAAC,CAAC,IAAI,EAAoB,EAAE,CAAC,KAAK,IAAI,IAAI,CAAC;qBACjD,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;oBACd,IAAI,EAAE,IAAI,CAAC,GAAG;oBACd,CAAC,EAAE,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC;oBACpB,CAAC,EAAE,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC;oBACpB,QAAQ,EAAE,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC;oBACrC,MAAM,EAAE,IAAI,CAAC,MAAM,IAAI,KAAK;iBAC7B,CAAC,CAAC,CAAA;gBAEL,KAAK,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,CAAA;YACnC,CAAC;YAED,gFAAgF;YAChF,wFAAwF;YACxF,MAAM,aAAa,GAAG,MAAM,IAAA,2CAA2B,EAAC,KAAK,EAAE,QAAQ,CAAC,CAAA;YACxE,MAAM,IAAI,GAAG,aAAa,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;YAEnE,2DAA2D;YAC3D,8EAA8E;YAC9E,MAAM,QAAQ,GAAG,IAAA,oBAAQ,EAAC,QAAQ,CAAC,CAAA;YACnC,IAAI,kBAAsC,CAAA;YAC1C,IAAI,CAAC;gBACH,MAAM,aAAa,GAAG,aAAa,CAAC,CAAC,CAAC,CAAA;gBACtC,IAAI,aAAa,IAAI,aAAa,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;oBACrD,MAAM,OAAO,GAAG,IAAI,0BAAe,EAAE,CAAA;oBACrC,MAAM,WAAW,GAAG,MAAM,OAAO,CAAC,SAAS,CAAC,aAAa,EAAE,QAAQ,CAAC,CAAA;oBACpE,IAAI,WAAW,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;wBAC3B,kBAAkB,GAAI,WAAW,CAAC,CAAC,CAAsB,CAAC,IAAI,CAAA;oBAChE,CAAC;gBACH,CAAC;YACH,CAAC;YAAC,OAAO,UAAU,EAAE,CAAC;gBACpB,OAAO,CAAC,KAAK,CAAC,sDAAsD,UAAU,EAAE,CAAC,CAAA;YACnF,CAAC;YACD,MAAM,WAAW,GAAG,IAAA,oCAAe,EAAC,aAAa,EAAE,kBAAkB,EAAE,QAAQ,CAAC,CAAA;YAEhF,OAAO,CAAC,KAAK,CAAC,eAAe,QAAQ,KAAK,IAAI,CAAC,MAAM,gBAAgB,GAAG,CAAC,QAAQ,SAAS,CAAC,CAAA;YAE3F,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,KAAK,EAAE,WAAW,CAAC,KAAK,EAAE,CAAA;QACpD,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,IAAI,kBAAkB,CAAC,wBAAwB,QAAQ,EAAE,EAAE,KAAc,CAAC,CAAA;QAClF,CAAC;IACH,CAAC;IAED;;;;;;;;OAQG;IACK,KAAK,CAAC,SAAS,CAAC,QAAgB;QACtC,IAAI,CAAC;YACH,uDAAuD;YACvD,MAAM,MAAM,GAAG,MAAM,IAAA,mBAAQ,EAAC,QAAQ,CAAC,CAAA;YAEvC,sDAAsD;YACtD,MAAM,MAAM,GAAG,MAAM,iBAAO,CAAC,cAAc,CAAC,EAAE,MAAM,EAAE,CAAC,CAAA;YACvD,MAAM,OAAO,GAAG,MAAM,CAAC,KAAK,CAAA;YAE5B,mEAAmE;YACnE,MAAM,UAAU,GAAG,MAAM,iBAAO,CAAC,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC,CAAA;YAC1D,MAAM,QAAQ,GAAG,IAAA,oBAAQ,EAAC,QAAQ,CAAC,CAAA;YACnC,MAAM,WAAW,GAAG,IAAA,qCAAgB,EAAC,UAAU,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAA;YAEhE,OAAO,CAAC,KAAK,CAAC,gBAAgB,QAAQ,KAAK,OAAO,CAAC,MAAM,cAAc,CAAC,CAAA;YACxE,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,WAAW,CAAC,KAAK,EAAE,CAAA;QACvD,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,IAAI,kBAAkB,CAAC,yBAAyB,QAAQ,EAAE,EAAE,KAAc,CAAC,CAAA;QACnF,CAAC;IACH,CAAC;IAED;;;;;;OAMG;IACK,KAAK,CAAC,QAAQ,CAAC,QAAgB;QACrC,IAAI,CAAC;YACH,MAAM,IAAI,GAAG,MAAM,IAAA,mBAAQ,EAAC,QAAQ,EAAE,OAAO,CAAC,CAAA;YAC9C,MAAM,QAAQ,GAAG,IAAA,oBAAQ,EAAC,QAAQ,CAAC,CAAA;YACnC,MAAM,WAAW,GAAG,IAAA,oCAAe,EAAC,IAAI,EAAE,QAAQ,CAAC,CAAA;YACnD,OAAO,CAAC,KAAK,CAAC,eAAe,QAAQ,KAAK,IAAI,CAAC,MAAM,cAAc,CAAC,CAAA;YACpE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,KAAK,EAAE,WAAW,CAAC,KAAK,EAAE,CAAA;QACpD,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,IAAI,kBAAkB,CAAC,wBAAwB,QAAQ,EAAE,EAAE,KAAc,CAAC,CAAA;QAClF,CAAC;IACH,CAAC;IAED;;;;;;OAMG;IACK,KAAK,CAAC,OAAO,CAAC,QAAgB;QACpC,IAAI,CAAC;YACH,MAAM,IAAI,GAAG,MAAM,IAAA,mBAAQ,EAAC,QAAQ,EAAE,OAAO,CAAC,CAAA;YAC9C,MAAM,QAAQ,GAAG,IAAA,oBAAQ,EAAC,QAAQ,CAAC,CAAA;YACnC,MAAM,WAAW,GAAG,IAAA,yCAAoB,EAAC,IAAI,EAAE,QAAQ,CAAC,CAAA;YACxD,OAAO,CAAC,KAAK,CAAC,cAAc,QAAQ,KAAK,IAAI,CAAC,MAAM,cAAc,CAAC,CAAA;YACnE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,KAAK,EAAE,WAAW,CAAC,KAAK,EAAE,CAAA;QACpD,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,IAAI,kBAAkB,CAAC,uBAAuB,QAAQ,EAAE,EAAE,KAAc,CAAC,CAAA;QACjF,CAAC;IACH,CAAC;CACF;AAxQD,wCAwQC"}
@@ -71,17 +71,18 @@ interface SentencePatternResult {
71
71
  */
72
72
  export declare function detectSentencePatterns(pages: PageData[], embedder: EmbedderInterface, config?: Partial<SentencePatternConfig>): Promise<SentencePatternResult>;
73
73
  /**
74
- * Filter page boundary sentences and join into text
74
+ * Filter page boundary sentences and return per-page filtered text
75
75
  *
76
76
  * This is the main entry point for sentence-level header/footer filtering.
77
77
  * It detects and removes repeating sentence patterns at page boundaries.
78
+ * Returns an array of filtered text per page, preserving page boundaries.
78
79
  *
79
80
  * Use this instead of joinFilteredPages when embedder is available.
80
81
  *
81
82
  * @param pages - Array of page data
82
83
  * @param embedder - Embedder for generating embeddings
83
84
  * @param config - Configuration options
84
- * @returns Filtered text with header/footer sentences removed
85
+ * @returns Array of filtered text strings, one per page
85
86
  */
86
- export declare function filterPageBoundarySentences(pages: PageData[], embedder: EmbedderInterface, config?: Partial<SentencePatternConfig>): Promise<string>;
87
+ export declare function filterPageBoundarySentences(pages: PageData[], embedder: EmbedderInterface, config?: Partial<SentencePatternConfig>): Promise<string[]>;
87
88
  //# sourceMappingURL=pdf-filter.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"pdf-filter.d.ts","sourceRoot":"","sources":["../../src/parser/pdf-filter.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,gCAAgC,CAAA;AAIvE,YAAY,EAAE,iBAAiB,EAAE,CAAA;AAMjC;;GAEG;AACH,UAAU,oBAAoB;IAC5B,IAAI,EAAE,MAAM,CAAA;IACZ,CAAC,EAAE,MAAM,CAAA;IACT,CAAC,EAAE,MAAM,CAAA;IACT,QAAQ,EAAE,MAAM,CAAA;IAChB,MAAM,EAAE,OAAO,CAAA;CAChB;AAED;;GAEG;AACH,MAAM,WAAW,QAAQ;IACvB,OAAO,EAAE,MAAM,CAAA;IACf,KAAK,EAAE,oBAAoB,EAAE,CAAA;CAC9B;AAoCD;;;;;GAKG;AACH,wBAAgB,iBAAiB,CAAC,KAAK,EAAE,QAAQ,EAAE,GAAG,MAAM,CAK3D;AAoLD;;GAEG;AACH,UAAU,qBAAqB;IAC7B,iEAAiE;IACjE,mBAAmB,EAAE,MAAM,CAAA;IAC3B,gEAAgE;IAChE,QAAQ,EAAE,MAAM,CAAA;IAChB,+EAA+E;IAC/E,WAAW,EAAE,MAAM,CAAA;CACpB;AASD;;GAEG;AACH,UAAU,qBAAqB;IAC7B,qEAAqE;IACrE,mBAAmB,EAAE,OAAO,CAAA;IAC5B,oEAAoE;IACpE,kBAAkB,EAAE,OAAO,CAAA;IAC3B,2CAA2C;IAC3C,gBAAgB,EAAE,MAAM,CAAA;IACxB,0CAA0C;IAC1C,gBAAgB,EAAE,MAAM,CAAA;CACzB;AAED;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,wBAAsB,sBAAsB,CAC1C,KAAK,EAAE,QAAQ,EAAE,EACjB,QAAQ,EAAE,iBAAiB,EAC3B,MAAM,GAAE,OAAO,CAAC,qBAAqB,CAAM,GAC1C,OAAO,CAAC,qBAAqB,CAAC,CAsEhC;AAED;;;;;;;;;;;;GAYG;AACH,wBAAsB,2BAA2B,CAC/C,KAAK,EAAE,QAAQ,EAAE,EACjB,QAAQ,EAAE,iBAAiB,EAC3B,MAAM,GAAE,OAAO,CAAC,qBAAqB,CAAM,GAC1C,OAAO,CAAC,MAAM,CAAC,CAyCjB"}
1
+ {"version":3,"file":"pdf-filter.d.ts","sourceRoot":"","sources":["../../src/parser/pdf-filter.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,gCAAgC,CAAA;AAIvE,YAAY,EAAE,iBAAiB,EAAE,CAAA;AAMjC;;GAEG;AACH,UAAU,oBAAoB;IAC5B,IAAI,EAAE,MAAM,CAAA;IACZ,CAAC,EAAE,MAAM,CAAA;IACT,CAAC,EAAE,MAAM,CAAA;IACT,QAAQ,EAAE,MAAM,CAAA;IAChB,MAAM,EAAE,OAAO,CAAA;CAChB;AAED;;GAEG;AACH,MAAM,WAAW,QAAQ;IACvB,OAAO,EAAE,MAAM,CAAA;IACf,KAAK,EAAE,oBAAoB,EAAE,CAAA;CAC9B;AAoCD;;;;;GAKG;AACH,wBAAgB,iBAAiB,CAAC,KAAK,EAAE,QAAQ,EAAE,GAAG,MAAM,CAK3D;AAoLD;;GAEG;AACH,UAAU,qBAAqB;IAC7B,iEAAiE;IACjE,mBAAmB,EAAE,MAAM,CAAA;IAC3B,gEAAgE;IAChE,QAAQ,EAAE,MAAM,CAAA;IAChB,+EAA+E;IAC/E,WAAW,EAAE,MAAM,CAAA;CACpB;AASD;;GAEG;AACH,UAAU,qBAAqB;IAC7B,qEAAqE;IACrE,mBAAmB,EAAE,OAAO,CAAA;IAC5B,oEAAoE;IACpE,kBAAkB,EAAE,OAAO,CAAA;IAC3B,2CAA2C;IAC3C,gBAAgB,EAAE,MAAM,CAAA;IACxB,0CAA0C;IAC1C,gBAAgB,EAAE,MAAM,CAAA;CACzB;AAED;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,wBAAsB,sBAAsB,CAC1C,KAAK,EAAE,QAAQ,EAAE,EACjB,QAAQ,EAAE,iBAAiB,EAC3B,MAAM,GAAE,OAAO,CAAC,qBAAqB,CAAM,GAC1C,OAAO,CAAC,qBAAqB,CAAC,CAsEhC;AAED;;;;;;;;;;;;;GAaG;AACH,wBAAsB,2BAA2B,CAC/C,KAAK,EAAE,QAAQ,EAAE,EACjB,QAAQ,EAAE,iBAAiB,EAC3B,MAAM,GAAE,OAAO,CAAC,qBAAqB,CAAM,GAC1C,OAAO,CAAC,MAAM,EAAE,CAAC,CAsCnB"}
@@ -277,29 +277,30 @@ async function detectSentencePatterns(pages, embedder, config = {}) {
277
277
  return result;
278
278
  }
279
279
  /**
280
- * Filter page boundary sentences and join into text
280
+ * Filter page boundary sentences and return per-page filtered text
281
281
  *
282
282
  * This is the main entry point for sentence-level header/footer filtering.
283
283
  * It detects and removes repeating sentence patterns at page boundaries.
284
+ * Returns an array of filtered text per page, preserving page boundaries.
284
285
  *
285
286
  * Use this instead of joinFilteredPages when embedder is available.
286
287
  *
287
288
  * @param pages - Array of page data
288
289
  * @param embedder - Embedder for generating embeddings
289
290
  * @param config - Configuration options
290
- * @returns Filtered text with header/footer sentences removed
291
+ * @returns Array of filtered text strings, one per page
291
292
  */
292
293
  async function filterPageBoundarySentences(pages, embedder, config = {}) {
293
294
  const cfg = { ...DEFAULT_SENTENCE_PATTERN_CONFIG, ...config };
294
295
  // Need minimum pages to detect patterns
295
296
  if (pages.length < cfg.minPages) {
296
- return joinFilteredPages(pages);
297
+ return pages.map((page) => joinFilteredPages([page]));
297
298
  }
298
299
  // Detect patterns
299
300
  const patterns = await detectSentencePatterns(pages, embedder, cfg);
300
- // If no patterns detected, return normally joined text
301
+ // If no patterns detected, return normally joined text per page
301
302
  if (!patterns.removeFirstSentence && !patterns.removeLastSentence) {
302
- return joinFilteredPages(pages);
303
+ return pages.map((page) => joinFilteredPages([page]));
303
304
  }
304
305
  // Split each page into sentences with Y coordinate (merged by Y)
305
306
  const pageSentences = pages.map((page) => splitItemsIntoSentencesWithY(page.items));
@@ -314,10 +315,7 @@ async function filterPageBoundarySentences(pages, embedder, config = {}) {
314
315
  }
315
316
  return cleaned;
316
317
  });
317
- // Join back into final text
318
- return cleanedPageSentences
319
- .map((sentences) => sentences.map((s) => s.text).join(' '))
320
- .filter((text) => text.length > 0)
321
- .join('\n\n');
318
+ // Return per-page filtered text
319
+ return cleanedPageSentences.map((sentences) => sentences.map((s) => s.text).join(' '));
322
320
  }
323
321
  //# sourceMappingURL=pdf-filter.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"pdf-filter.js","sourceRoot":"","sources":["../../src/parser/pdf-filter.ts"],"names":[],"mappings":";AAAA,2BAA2B;AAC3B,wDAAwD;AACxD,uEAAuE;;AAuEvE,8CAKC;AA0OD,wDA0EC;AAeD,kEA6CC;AAzbD,0EAAoE;AA4BpE,+CAA+C;AAC/C,eAAe;AACf,+CAA+C;AAE/C;;;;;;GAMG;AACH,SAAS,aAAa,CAAC,KAA6B;IAClD,6DAA6D;IAC7D,MAAM,OAAO,GAAG,IAAI,GAAG,EAAkC,CAAA;IACzD,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;QAC5B,MAAM,KAAK,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,CAAA;QAClC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;QAChB,OAAO,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,CAAC,CAAA;IACvB,CAAC;IAED,oFAAoF;IACpF,OAAO,CAAC,GAAG,OAAO,CAAC,OAAO,EAAE,CAAC;SAC1B,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;SAC3B,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,KAAK,CAAC,EAAE,EAAE,CAClB,KAAK;SACF,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;SACzB,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;SAClB,IAAI,CAAC,GAAG,CAAC,CACb;SACA,IAAI,CAAC,IAAI,CAAC;SACV,IAAI,EAAE,CAAA;AACX,CAAC;AAED;;;;;GAKG;AACH,SAAgB,iBAAiB,CAAC,KAAiB;IACjD,OAAO,KAAK;SACT,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,aAAa,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;SACxC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC;SACjC,IAAI,CAAC,MAAM,CAAC,CAAA;AACjB,CAAC;AAcD;;;;;;;;;;GAUG;AACH,SAAS,4BAA4B,CAAC,KAA6B;IACjE,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAA;IAEjC,+DAA+D;IAC/D,MAAM,WAAW,GAAG,CAAC,GAAG,KAAK,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;QAC3C,MAAM,KAAK,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAA;QACvB,IAAI,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC;YAAE,OAAO,KAAK,CAAA;QACrC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAA;IAClB,CAAC,CAAC,CAAA;IAEF,2DAA2D;IAC3D,MAAM,UAAU,GAAyD,EAAE,CAAA;IAC3E,IAAI,QAAQ,GAAG,EAAE,CAAA;IACjB,IAAI,KAAK,GAAkB,IAAI,CAAA;IAE/B,KAAK,MAAM,IAAI,IAAI,WAAW,EAAE,CAAC;QAC/B,4DAA4D;QAC5D,6EAA6E;QAC7E,IAAI,KAAK,KAAK,IAAI,IAAI,IAAI,CAAC,GAAG,CAAC,KAAK,GAAG,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC;YACnD,QAAQ,GAAG,GAAG,QAAQ,CAAC,OAAO,EAAE,IAAI,CAAA;QACtC,CAAC;QAED,UAAU,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,QAAQ,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAA;QACjD,QAAQ,IAAI,GAAG,IAAI,CAAC,IAAI,GAAG,CAAA;QAC3B,KAAK,GAAG,IAAI,CAAC,CAAC,CAAA;IAChB,CAAC;IAED,uBAAuB;IACvB,MAAM,SAAS,GAAG,IAAA,yCAAkB,EAAC,QAAQ,CAAC,CAAA;IAE9C,kEAAkE;IAClE,MAAM,cAAc,GAAoB,EAAE,CAAA;IAC1C,IAAI,WAAW,GAAG,CAAC,CAAA;IAEnB,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;QACjC,8CAA8C;QAC9C,MAAM,aAAa,GAAG,QAAQ,CAAC,OAAO,CAAC,QAAQ,CAAC,IAAI,EAAE,EAAE,WAAW,CAAC,CAAA;QACpE,IAAI,aAAa,KAAK,CAAC,CAAC;YAAE,SAAQ;QAElC,4CAA4C;QAC5C,IAAI,UAAU,GAAG,WAAW,CAAC,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC,CAAA;QACvC,KAAK,IAAI,CAAC,GAAG,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YAChD,MAAM,KAAK,GAAG,UAAU,CAAC,CAAC,CAAC,CAAA;YAC3B,IAAI,KAAK,IAAI,KAAK,CAAC,KAAK,IAAI,aAAa,EAAE,CAAC;gBAC1C,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;gBACrC,MAAK;YACP,CAAC;QACH,CAAC;QAED,cAAc,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC,EAAE,UAAU,EAAE,CAAC,CAAA;QACtD,WAAW,GAAG,aAAa,GAAG,QAAQ,CAAC,MAAM,CAAA;IAC/C,CAAC;IAED,yCAAyC;IACzC,OAAO,iBAAiB,CAAC,cAAc,CAAC,CAAA;AAC1C,CAAC;AAED;;;;;GAKG;AACH,SAAS,iBAAiB,CAAC,SAA0B;IACnD,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAA;IAErC,MAAM,MAAM,GAAoB,EAAE,CAAA;IAClC,IAAI,OAAO,GAAyB,IAAI,CAAA;IAExC,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;QACjC,IAAI,OAAO,KAAK,IAAI,EAAE,CAAC;YACrB,OAAO,GAAG,EAAE,GAAG,QAAQ,EAAE,CAAA;QAC3B,CAAC;aAAM,IAAI,OAAO,CAAC,CAAC,KAAK,QAAQ,CAAC,CAAC,EAAE,CAAC;YACpC,qBAAqB;YACrB,OAAO,CAAC,IAAI,IAAI,IAAI,QAAQ,CAAC,IAAI,EAAE,CAAA;QACrC,CAAC;aAAM,CAAC;YACN,0CAA0C;YAC1C,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;YACpB,OAAO,GAAG,EAAE,GAAG,QAAQ,EAAE,CAAA;QAC3B,CAAC;IACH,CAAC;IAED,IAAI,OAAO,KAAK,IAAI,EAAE,CAAC;QACrB,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;IACtB,CAAC;IAED,OAAO,MAAM,CAAA;AACf,CAAC;AAED,+CAA+C;AAC/C,yCAAyC;AACzC,+CAA+C;AAE/C;;GAEG;AACH,SAAS,gBAAgB,CAAC,IAAc,EAAE,IAAc;IACtD,IAAI,IAAI,CAAC,MAAM,KAAK,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACrD,OAAO,CAAC,CAAA;IACV,CAAC;IAED,IAAI,UAAU,GAAG,CAAC,CAAA;IAClB,IAAI,KAAK,GAAG,CAAC,CAAA;IACb,IAAI,KAAK,GAAG,CAAC,CAAA;IAEb,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACrC,MAAM,EAAE,GAAG,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,CAAA;QACvB,MAAM,EAAE,GAAG,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,CAAA;QACvB,UAAU,IAAI,EAAE,GAAG,EAAE,CAAA;QACrB,KAAK,IAAI,EAAE,GAAG,EAAE,CAAA;QAChB,KAAK,IAAI,EAAE,GAAG,EAAE,CAAA;IAClB,CAAC;IAED,MAAM,WAAW,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;IACvD,IAAI,WAAW,KAAK,CAAC;QAAE,OAAO,CAAC,CAAA;IAE/B,OAAO,UAAU,GAAG,WAAW,CAAA;AACjC,CAAC;AAED;;;;;;GAMG;AACH,SAAS,wBAAwB,CAAC,UAAsB;IACtD,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC;QAAE,OAAO,GAAG,CAAA;IAErC,MAAM,YAAY,GAAa,EAAE,CAAA;IAEjC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC3C,KAAK,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAC/C,MAAM,IAAI,GAAG,UAAU,CAAC,CAAC,CAAC,CAAA;YAC1B,MAAM,IAAI,GAAG,UAAU,CAAC,CAAC,CAAC,CAAA;YAC1B,IAAI,IAAI,IAAI,IAAI,EAAE,CAAC;gBACjB,YAAY,CAAC,IAAI,CAAC,gBAAgB,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC,CAAA;YACjD,CAAC;QACH,CAAC;IACH,CAAC;IAED,IAAI,YAAY,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,CAAC,CAAA;IAEvC,uBAAuB;IACvB,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAA;IAClC,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,CAAA;IAE/C,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC;QAClC,qCAAqC;QACrC,OAAO,CAAC,CAAC,YAAY,CAAC,GAAG,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAA;IACtE,CAAC;IACD,oBAAoB;IACpB,OAAO,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,CAAA;AAC/B,CAAC;AAcD,iEAAiE;AACjE,MAAM,+BAA+B,GAA0B;IAC7D,mBAAmB,EAAE,IAAI;IACzB,QAAQ,EAAE,CAAC;IACX,WAAW,EAAE,CAAC;CACf,CAAA;AAgBD;;;;;;;;;;;;;;;;;;;;GAoBG;AACI,KAAK,UAAU,sBAAsB,CAC1C,KAAiB,EACjB,QAA2B,EAC3B,SAAyC,EAAE;IAE3C,MAAM,GAAG,GAAG,EAAE,GAAG,+BAA+B,EAAE,GAAG,MAAM,EAAE,CAAA;IAE7D,MAAM,MAAM,GAA0B;QACpC,mBAAmB,EAAE,KAAK;QAC1B,kBAAkB,EAAE,KAAK;QACzB,gBAAgB,EAAE,CAAC;QACnB,gBAAgB,EAAE,CAAC;KACpB,CAAA;IAED,iDAAiD;IACjD,IAAI,KAAK,CAAC,MAAM,GAAG,GAAG,CAAC,QAAQ,EAAE,CAAC;QAChC,OAAO,MAAM,CAAA;IACf,CAAC;IAED,kDAAkD;IAClD,uEAAuE;IACvE,MAAM,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAA;IAChD,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,WAAW,GAAG,CAAC,CAAC,CAAA;IAClD,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,WAAW,GAAG,UAAU,CAAC,CAAA;IACxD,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,MAAM,EAAE,UAAU,GAAG,GAAG,CAAC,WAAW,CAAC,CAAA;IACrE,MAAM,WAAW,GAAG,KAAK,CAAC,KAAK,CAAC,UAAU,EAAE,QAAQ,CAAC,CAAA;IAErD,oEAAoE;IACpE,MAAM,aAAa,GAAsB,WAAW,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAChE,4BAA4B,CAAC,IAAI,CAAC,KAAK,CAAC,CACzC,CAAA;IAED,yDAAyD;IACzD,MAAM,cAAc,GAAa,EAAE,CAAA;IACnC,MAAM,aAAa,GAAa,EAAE,CAAA;IAElC,KAAK,MAAM,SAAS,IAAI,aAAa,EAAE,CAAC;QACtC,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACzB,cAAc,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,CAAE,CAAC,IAAI,CAAC,CAAA;YACvC,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACzB,aAAa,CAAC,IAAI,CAAC,SAAS,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,CAAE,CAAC,IAAI,CAAC,CAAA;YAC3D,CAAC;QACH,CAAC;IACH,CAAC;IAED,8EAA8E;IAC9E,IAAI,cAAc,CAAC,MAAM,IAAI,GAAG,CAAC,QAAQ,EAAE,CAAC;QAC1C,MAAM,UAAU,GAAG,MAAM,QAAQ,CAAC,UAAU,CAAC,cAAc,CAAC,CAAA;QAC5D,MAAM,SAAS,GAAG,wBAAwB,CAAC,UAAU,CAAC,CAAA;QACtD,MAAM,CAAC,gBAAgB,GAAG,SAAS,CAAA;QAEnC,IAAI,SAAS,IAAI,GAAG,CAAC,mBAAmB,EAAE,CAAC;YACzC,MAAM,CAAC,mBAAmB,GAAG,IAAI,CAAA;YACjC,OAAO,CAAC,KAAK,CACX,qCAAqC,cAAc,CAAC,MAAM,kBAAkB,UAAU,GAAG,CAAC,IAAI,QAAQ,yBAAyB,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CACtJ,CAAA;QACH,CAAC;IACH,CAAC;IAED,6EAA6E;IAC7E,IAAI,aAAa,CAAC,MAAM,IAAI,GAAG,CAAC,QAAQ,EAAE,CAAC;QACzC,MAAM,UAAU,GAAG,MAAM,QAAQ,CAAC,UAAU,CAAC,aAAa,CAAC,CAAA;QAC3D,MAAM,SAAS,GAAG,wBAAwB,CAAC,UAAU,CAAC,CAAA;QACtD,MAAM,CAAC,gBAAgB,GAAG,SAAS,CAAA;QAEnC,IAAI,SAAS,IAAI,GAAG,CAAC,mBAAmB,EAAE,CAAC;YACzC,MAAM,CAAC,kBAAkB,GAAG,IAAI,CAAA;YAChC,OAAO,CAAC,KAAK,CACX,qCAAqC,aAAa,CAAC,MAAM,kBAAkB,UAAU,GAAG,CAAC,IAAI,QAAQ,yBAAyB,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CACrJ,CAAA;QACH,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAA;AACf,CAAC;AAED;;;;;;;;;;;;GAYG;AACI,KAAK,UAAU,2BAA2B,CAC/C,KAAiB,EACjB,QAA2B,EAC3B,SAAyC,EAAE;IAE3C,MAAM,GAAG,GAAG,EAAE,GAAG,+BAA+B,EAAE,GAAG,MAAM,EAAE,CAAA;IAE7D,wCAAwC;IACxC,IAAI,KAAK,CAAC,MAAM,GAAG,GAAG,CAAC,QAAQ,EAAE,CAAC;QAChC,OAAO,iBAAiB,CAAC,KAAK,CAAC,CAAA;IACjC,CAAC;IAED,kBAAkB;IAClB,MAAM,QAAQ,GAAG,MAAM,sBAAsB,CAAC,KAAK,EAAE,QAAQ,EAAE,GAAG,CAAC,CAAA;IAEnE,uDAAuD;IACvD,IAAI,CAAC,QAAQ,CAAC,mBAAmB,IAAI,CAAC,QAAQ,CAAC,kBAAkB,EAAE,CAAC;QAClE,OAAO,iBAAiB,CAAC,KAAK,CAAC,CAAA;IACjC,CAAC;IAED,iEAAiE;IACjE,MAAM,aAAa,GAAsB,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAC1D,4BAA4B,CAAC,IAAI,CAAC,KAAK,CAAC,CACzC,CAAA;IAED,+CAA+C;IAC/C,MAAM,oBAAoB,GAAG,aAAa,CAAC,GAAG,CAAC,CAAC,SAAS,EAAE,EAAE;QAC3D,IAAI,OAAO,GAAG,CAAC,GAAG,SAAS,CAAC,CAAA;QAE5B,IAAI,QAAQ,CAAC,mBAAmB,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACvD,OAAO,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAA;QAC5B,CAAC;QAED,IAAI,QAAQ,CAAC,kBAAkB,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACtD,OAAO,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAA;QAChC,CAAC;QAED,OAAO,OAAO,CAAA;IAChB,CAAC,CAAC,CAAA;IAEF,4BAA4B;IAC5B,OAAO,oBAAoB;SACxB,GAAG,CAAC,CAAC,SAAS,EAAE,EAAE,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;SAC1D,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC;SACjC,IAAI,CAAC,MAAM,CAAC,CAAA;AACjB,CAAC"}
1
+ {"version":3,"file":"pdf-filter.js","sourceRoot":"","sources":["../../src/parser/pdf-filter.ts"],"names":[],"mappings":";AAAA,2BAA2B;AAC3B,wDAAwD;AACxD,uEAAuE;;AAuEvE,8CAKC;AA0OD,wDA0EC;AAgBD,kEA0CC;AAvbD,0EAAoE;AA4BpE,+CAA+C;AAC/C,eAAe;AACf,+CAA+C;AAE/C;;;;;;GAMG;AACH,SAAS,aAAa,CAAC,KAA6B;IAClD,6DAA6D;IAC7D,MAAM,OAAO,GAAG,IAAI,GAAG,EAAkC,CAAA;IACzD,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;QAC5B,MAAM,KAAK,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,CAAA;QAClC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;QAChB,OAAO,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,CAAC,CAAA;IACvB,CAAC;IAED,oFAAoF;IACpF,OAAO,CAAC,GAAG,OAAO,CAAC,OAAO,EAAE,CAAC;SAC1B,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;SAC3B,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,KAAK,CAAC,EAAE,EAAE,CAClB,KAAK;SACF,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;SACzB,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;SAClB,IAAI,CAAC,GAAG,CAAC,CACb;SACA,IAAI,CAAC,IAAI,CAAC;SACV,IAAI,EAAE,CAAA;AACX,CAAC;AAED;;;;;GAKG;AACH,SAAgB,iBAAiB,CAAC,KAAiB;IACjD,OAAO,KAAK;SACT,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,aAAa,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;SACxC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC;SACjC,IAAI,CAAC,MAAM,CAAC,CAAA;AACjB,CAAC;AAcD;;;;;;;;;;GAUG;AACH,SAAS,4BAA4B,CAAC,KAA6B;IACjE,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAA;IAEjC,+DAA+D;IAC/D,MAAM,WAAW,GAAG,CAAC,GAAG,KAAK,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;QAC3C,MAAM,KAAK,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAA;QACvB,IAAI,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC;YAAE,OAAO,KAAK,CAAA;QACrC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAA;IAClB,CAAC,CAAC,CAAA;IAEF,2DAA2D;IAC3D,MAAM,UAAU,GAAyD,EAAE,CAAA;IAC3E,IAAI,QAAQ,GAAG,EAAE,CAAA;IACjB,IAAI,KAAK,GAAkB,IAAI,CAAA;IAE/B,KAAK,MAAM,IAAI,IAAI,WAAW,EAAE,CAAC;QAC/B,4DAA4D;QAC5D,6EAA6E;QAC7E,IAAI,KAAK,KAAK,IAAI,IAAI,IAAI,CAAC,GAAG,CAAC,KAAK,GAAG,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC;YACnD,QAAQ,GAAG,GAAG,QAAQ,CAAC,OAAO,EAAE,IAAI,CAAA;QACtC,CAAC;QAED,UAAU,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,QAAQ,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAA;QACjD,QAAQ,IAAI,GAAG,IAAI,CAAC,IAAI,GAAG,CAAA;QAC3B,KAAK,GAAG,IAAI,CAAC,CAAC,CAAA;IAChB,CAAC;IAED,uBAAuB;IACvB,MAAM,SAAS,GAAG,IAAA,yCAAkB,EAAC,QAAQ,CAAC,CAAA;IAE9C,kEAAkE;IAClE,MAAM,cAAc,GAAoB,EAAE,CAAA;IAC1C,IAAI,WAAW,GAAG,CAAC,CAAA;IAEnB,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;QACjC,8CAA8C;QAC9C,MAAM,aAAa,GAAG,QAAQ,CAAC,OAAO,CAAC,QAAQ,CAAC,IAAI,EAAE,EAAE,WAAW,CAAC,CAAA;QACpE,IAAI,aAAa,KAAK,CAAC,CAAC;YAAE,SAAQ;QAElC,4CAA4C;QAC5C,IAAI,UAAU,GAAG,WAAW,CAAC,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC,CAAA;QACvC,KAAK,IAAI,CAAC,GAAG,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YAChD,MAAM,KAAK,GAAG,UAAU,CAAC,CAAC,CAAC,CAAA;YAC3B,IAAI,KAAK,IAAI,KAAK,CAAC,KAAK,IAAI,aAAa,EAAE,CAAC;gBAC1C,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;gBACrC,MAAK;YACP,CAAC;QACH,CAAC;QAED,cAAc,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC,EAAE,UAAU,EAAE,CAAC,CAAA;QACtD,WAAW,GAAG,aAAa,GAAG,QAAQ,CAAC,MAAM,CAAA;IAC/C,CAAC;IAED,yCAAyC;IACzC,OAAO,iBAAiB,CAAC,cAAc,CAAC,CAAA;AAC1C,CAAC;AAED;;;;;GAKG;AACH,SAAS,iBAAiB,CAAC,SAA0B;IACnD,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAA;IAErC,MAAM,MAAM,GAAoB,EAAE,CAAA;IAClC,IAAI,OAAO,GAAyB,IAAI,CAAA;IAExC,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;QACjC,IAAI,OAAO,KAAK,IAAI,EAAE,CAAC;YACrB,OAAO,GAAG,EAAE,GAAG,QAAQ,EAAE,CAAA;QAC3B,CAAC;aAAM,IAAI,OAAO,CAAC,CAAC,KAAK,QAAQ,CAAC,CAAC,EAAE,CAAC;YACpC,qBAAqB;YACrB,OAAO,CAAC,IAAI,IAAI,IAAI,QAAQ,CAAC,IAAI,EAAE,CAAA;QACrC,CAAC;aAAM,CAAC;YACN,0CAA0C;YAC1C,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;YACpB,OAAO,GAAG,EAAE,GAAG,QAAQ,EAAE,CAAA;QAC3B,CAAC;IACH,CAAC;IAED,IAAI,OAAO,KAAK,IAAI,EAAE,CAAC;QACrB,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;IACtB,CAAC;IAED,OAAO,MAAM,CAAA;AACf,CAAC;AAED,+CAA+C;AAC/C,yCAAyC;AACzC,+CAA+C;AAE/C;;GAEG;AACH,SAAS,gBAAgB,CAAC,IAAc,EAAE,IAAc;IACtD,IAAI,IAAI,CAAC,MAAM,KAAK,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACrD,OAAO,CAAC,CAAA;IACV,CAAC;IAED,IAAI,UAAU,GAAG,CAAC,CAAA;IAClB,IAAI,KAAK,GAAG,CAAC,CAAA;IACb,IAAI,KAAK,GAAG,CAAC,CAAA;IAEb,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACrC,MAAM,EAAE,GAAG,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,CAAA;QACvB,MAAM,EAAE,GAAG,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,CAAA;QACvB,UAAU,IAAI,EAAE,GAAG,EAAE,CAAA;QACrB,KAAK,IAAI,EAAE,GAAG,EAAE,CAAA;QAChB,KAAK,IAAI,EAAE,GAAG,EAAE,CAAA;IAClB,CAAC;IAED,MAAM,WAAW,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;IACvD,IAAI,WAAW,KAAK,CAAC;QAAE,OAAO,CAAC,CAAA;IAE/B,OAAO,UAAU,GAAG,WAAW,CAAA;AACjC,CAAC;AAED;;;;;;GAMG;AACH,SAAS,wBAAwB,CAAC,UAAsB;IACtD,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC;QAAE,OAAO,GAAG,CAAA;IAErC,MAAM,YAAY,GAAa,EAAE,CAAA;IAEjC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC3C,KAAK,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAC/C,MAAM,IAAI,GAAG,UAAU,CAAC,CAAC,CAAC,CAAA;YAC1B,MAAM,IAAI,GAAG,UAAU,CAAC,CAAC,CAAC,CAAA;YAC1B,IAAI,IAAI,IAAI,IAAI,EAAE,CAAC;gBACjB,YAAY,CAAC,IAAI,CAAC,gBAAgB,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC,CAAA;YACjD,CAAC;QACH,CAAC;IACH,CAAC;IAED,IAAI,YAAY,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,CAAC,CAAA;IAEvC,uBAAuB;IACvB,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAA;IAClC,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,CAAA;IAE/C,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC;QAClC,qCAAqC;QACrC,OAAO,CAAC,CAAC,YAAY,CAAC,GAAG,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAA;IACtE,CAAC;IACD,oBAAoB;IACpB,OAAO,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,CAAA;AAC/B,CAAC;AAcD,iEAAiE;AACjE,MAAM,+BAA+B,GAA0B;IAC7D,mBAAmB,EAAE,IAAI;IACzB,QAAQ,EAAE,CAAC;IACX,WAAW,EAAE,CAAC;CACf,CAAA;AAgBD;;;;;;;;;;;;;;;;;;;;GAoBG;AACI,KAAK,UAAU,sBAAsB,CAC1C,KAAiB,EACjB,QAA2B,EAC3B,SAAyC,EAAE;IAE3C,MAAM,GAAG,GAAG,EAAE,GAAG,+BAA+B,EAAE,GAAG,MAAM,EAAE,CAAA;IAE7D,MAAM,MAAM,GAA0B;QACpC,mBAAmB,EAAE,KAAK;QAC1B,kBAAkB,EAAE,KAAK;QACzB,gBAAgB,EAAE,CAAC;QACnB,gBAAgB,EAAE,CAAC;KACpB,CAAA;IAED,iDAAiD;IACjD,IAAI,KAAK,CAAC,MAAM,GAAG,GAAG,CAAC,QAAQ,EAAE,CAAC;QAChC,OAAO,MAAM,CAAA;IACf,CAAC;IAED,kDAAkD;IAClD,uEAAuE;IACvE,MAAM,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAA;IAChD,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,WAAW,GAAG,CAAC,CAAC,CAAA;IAClD,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,WAAW,GAAG,UAAU,CAAC,CAAA;IACxD,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,MAAM,EAAE,UAAU,GAAG,GAAG,CAAC,WAAW,CAAC,CAAA;IACrE,MAAM,WAAW,GAAG,KAAK,CAAC,KAAK,CAAC,UAAU,EAAE,QAAQ,CAAC,CAAA;IAErD,oEAAoE;IACpE,MAAM,aAAa,GAAsB,WAAW,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAChE,4BAA4B,CAAC,IAAI,CAAC,KAAK,CAAC,CACzC,CAAA;IAED,yDAAyD;IACzD,MAAM,cAAc,GAAa,EAAE,CAAA;IACnC,MAAM,aAAa,GAAa,EAAE,CAAA;IAElC,KAAK,MAAM,SAAS,IAAI,aAAa,EAAE,CAAC;QACtC,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACzB,cAAc,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,CAAE,CAAC,IAAI,CAAC,CAAA;YACvC,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACzB,aAAa,CAAC,IAAI,CAAC,SAAS,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,CAAE,CAAC,IAAI,CAAC,CAAA;YAC3D,CAAC;QACH,CAAC;IACH,CAAC;IAED,8EAA8E;IAC9E,IAAI,cAAc,CAAC,MAAM,IAAI,GAAG,CAAC,QAAQ,EAAE,CAAC;QAC1C,MAAM,UAAU,GAAG,MAAM,QAAQ,CAAC,UAAU,CAAC,cAAc,CAAC,CAAA;QAC5D,MAAM,SAAS,GAAG,wBAAwB,CAAC,UAAU,CAAC,CAAA;QACtD,MAAM,CAAC,gBAAgB,GAAG,SAAS,CAAA;QAEnC,IAAI,SAAS,IAAI,GAAG,CAAC,mBAAmB,EAAE,CAAC;YACzC,MAAM,CAAC,mBAAmB,GAAG,IAAI,CAAA;YACjC,OAAO,CAAC,KAAK,CACX,qCAAqC,cAAc,CAAC,MAAM,kBAAkB,UAAU,GAAG,CAAC,IAAI,QAAQ,yBAAyB,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CACtJ,CAAA;QACH,CAAC;IACH,CAAC;IAED,6EAA6E;IAC7E,IAAI,aAAa,CAAC,MAAM,IAAI,GAAG,CAAC,QAAQ,EAAE,CAAC;QACzC,MAAM,UAAU,GAAG,MAAM,QAAQ,CAAC,UAAU,CAAC,aAAa,CAAC,CAAA;QAC3D,MAAM,SAAS,GAAG,wBAAwB,CAAC,UAAU,CAAC,CAAA;QACtD,MAAM,CAAC,gBAAgB,GAAG,SAAS,CAAA;QAEnC,IAAI,SAAS,IAAI,GAAG,CAAC,mBAAmB,EAAE,CAAC;YACzC,MAAM,CAAC,kBAAkB,GAAG,IAAI,CAAA;YAChC,OAAO,CAAC,KAAK,CACX,qCAAqC,aAAa,CAAC,MAAM,kBAAkB,UAAU,GAAG,CAAC,IAAI,QAAQ,yBAAyB,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CACrJ,CAAA;QACH,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAA;AACf,CAAC;AAED;;;;;;;;;;;;;GAaG;AACI,KAAK,UAAU,2BAA2B,CAC/C,KAAiB,EACjB,QAA2B,EAC3B,SAAyC,EAAE;IAE3C,MAAM,GAAG,GAAG,EAAE,GAAG,+BAA+B,EAAE,GAAG,MAAM,EAAE,CAAA;IAE7D,wCAAwC;IACxC,IAAI,KAAK,CAAC,MAAM,GAAG,GAAG,CAAC,QAAQ,EAAE,CAAC;QAChC,OAAO,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,iBAAiB,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IACvD,CAAC;IAED,kBAAkB;IAClB,MAAM,QAAQ,GAAG,MAAM,sBAAsB,CAAC,KAAK,EAAE,QAAQ,EAAE,GAAG,CAAC,CAAA;IAEnE,gEAAgE;IAChE,IAAI,CAAC,QAAQ,CAAC,mBAAmB,IAAI,CAAC,QAAQ,CAAC,kBAAkB,EAAE,CAAC;QAClE,OAAO,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,iBAAiB,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IACvD,CAAC;IAED,iEAAiE;IACjE,MAAM,aAAa,GAAsB,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAC1D,4BAA4B,CAAC,IAAI,CAAC,KAAK,CAAC,CACzC,CAAA;IAED,+CAA+C;IAC/C,MAAM,oBAAoB,GAAG,aAAa,CAAC,GAAG,CAAC,CAAC,SAAS,EAAE,EAAE;QAC3D,IAAI,OAAO,GAAG,CAAC,GAAG,SAAS,CAAC,CAAA;QAE5B,IAAI,QAAQ,CAAC,mBAAmB,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACvD,OAAO,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAA;QAC5B,CAAC;QAED,IAAI,QAAQ,CAAC,kBAAkB,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACtD,OAAO,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAA;QAChC,CAAC;QAED,OAAO,OAAO,CAAA;IAChB,CAAC,CAAC,CAAA;IAEF,gCAAgC;IAChC,OAAO,oBAAoB,CAAC,GAAG,CAAC,CAAC,SAAS,EAAE,EAAE,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAA;AACxF,CAAC"}
@@ -0,0 +1,64 @@
1
+ /**
2
+ * Result of title extraction, including how the title was determined
3
+ */
4
+ export interface TitleExtractionResult {
5
+ title: string;
6
+ source: 'metadata' | 'content' | 'filename';
7
+ }
8
+ /**
9
+ * Convert a file name to a human-readable title
10
+ * Strips the extension and replaces hyphens/underscores with spaces
11
+ *
12
+ * @param fileName - File name (e.g., "2024-annual-report.pdf")
13
+ * @returns Human-readable title (e.g., "2024 annual report")
14
+ */
15
+ export declare function fileNameToTitle(fileName: string): string;
16
+ /**
17
+ * Extract title from Markdown content
18
+ * Priority: YAML frontmatter title -> first # H1 -> file name
19
+ *
20
+ * @param text - Markdown content
21
+ * @param fileName - File name for fallback
22
+ * @returns Title extraction result
23
+ */
24
+ export declare function extractMarkdownTitle(text: string, fileName: string): TitleExtractionResult;
25
+ /**
26
+ * Extract title from plain text content
27
+ * Priority: first line followed by empty line -> file name
28
+ *
29
+ * @param text - Plain text content
30
+ * @param fileName - File name for fallback
31
+ * @returns Title extraction result
32
+ */
33
+ export declare function extractTxtTitle(text: string, fileName: string): TitleExtractionResult;
34
+ /**
35
+ * Extract title from HTML content (using Readability title)
36
+ * Priority: readability title -> file name
37
+ *
38
+ * @param readabilityTitle - Title extracted by Readability
39
+ * @param fileName - File name for fallback
40
+ * @returns Title extraction result
41
+ */
42
+ export declare function extractHtmlTitle(readabilityTitle: string, fileName: string): TitleExtractionResult;
43
+ /**
44
+ * Extract title from PDF metadata or first page chunk text
45
+ * Priority: PDF metadata /Title -> first page chunk 0 text -> file name
46
+ *
47
+ * Rejects metadata titles that look like file paths (contain / or \) or are empty/whitespace-only.
48
+ *
49
+ * @param metadataTitle - PDF metadata /Title value (may be undefined)
50
+ * @param firstPageChunkText - Text of chunk 0 from semantic chunking of page 1 (may be undefined)
51
+ * @param fileName - File name for fallback
52
+ * @returns Title extraction result
53
+ */
54
+ export declare function extractPdfTitle(metadataTitle: string | undefined, firstPageChunkText: string | undefined, fileName: string): TitleExtractionResult;
55
+ /**
56
+ * Extract title from DOCX mammoth HTML output
57
+ * Priority: first <h1> from mammoth HTML -> file name
58
+ *
59
+ * @param htmlContent - HTML content generated by mammoth.convertToHtml()
60
+ * @param fileName - File name for fallback
61
+ * @returns Title extraction result
62
+ */
63
+ export declare function extractDocxTitle(htmlContent: string, fileName: string): TitleExtractionResult;
64
+ //# sourceMappingURL=title-extractor.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"title-extractor.d.ts","sourceRoot":"","sources":["../../src/parser/title-extractor.ts"],"names":[],"mappings":"AAOA;;GAEG;AACH,MAAM,WAAW,qBAAqB;IACpC,KAAK,EAAE,MAAM,CAAA;IACb,MAAM,EAAE,UAAU,GAAG,SAAS,GAAG,UAAU,CAAA;CAC5C;AAMD;;;;;;GAMG;AACH,wBAAgB,eAAe,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM,CAMxD;AAMD;;;;;;;GAOG;AACH,wBAAgB,oBAAoB,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,qBAAqB,CAe1F;AAED;;;;;;;GAOG;AACH,wBAAgB,eAAe,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,qBAAqB,CAkBrF;AAED;;;;;;;GAOG;AACH,wBAAgB,gBAAgB,CAC9B,gBAAgB,EAAE,MAAM,EACxB,QAAQ,EAAE,MAAM,GACf,qBAAqB,CAOvB;AAED;;;;;;;;;;GAUG;AACH,wBAAgB,eAAe,CAC7B,aAAa,EAAE,MAAM,GAAG,SAAS,EACjC,kBAAkB,EAAE,MAAM,GAAG,SAAS,EACtC,QAAQ,EAAE,MAAM,GACf,qBAAqB,CAiBvB;AAED;;;;;;;GAOG;AACH,wBAAgB,gBAAgB,CAAC,WAAW,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,qBAAqB,CAY7F"}