@copilotkit/pathfinder 1.4.0 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. package/CHANGELOG.md +35 -0
  2. package/LICENSE +106 -21
  3. package/README.md +14 -3
  4. package/dist/cli.js +11 -1
  5. package/dist/cli.js.map +1 -1
  6. package/dist/config.d.ts +9 -0
  7. package/dist/config.d.ts.map +1 -1
  8. package/dist/config.js +64 -5
  9. package/dist/config.js.map +1 -1
  10. package/dist/db/queries.d.ts +12 -1
  11. package/dist/db/queries.d.ts.map +1 -1
  12. package/dist/db/queries.js +64 -0
  13. package/dist/db/queries.js.map +1 -1
  14. package/dist/faq-txt.d.ts +12 -0
  15. package/dist/faq-txt.d.ts.map +1 -0
  16. package/dist/faq-txt.js +37 -0
  17. package/dist/faq-txt.js.map +1 -0
  18. package/dist/indexing/chunking/index.js +4 -0
  19. package/dist/indexing/chunking/index.js.map +1 -1
  20. package/dist/indexing/chunking/qa.d.ts +8 -0
  21. package/dist/indexing/chunking/qa.d.ts.map +1 -0
  22. package/dist/indexing/chunking/qa.js +22 -0
  23. package/dist/indexing/chunking/qa.js.map +1 -0
  24. package/dist/indexing/distiller.d.ts +29 -0
  25. package/dist/indexing/distiller.d.ts.map +1 -0
  26. package/dist/indexing/distiller.js +104 -0
  27. package/dist/indexing/distiller.js.map +1 -0
  28. package/dist/indexing/orchestrator.d.ts +8 -3
  29. package/dist/indexing/orchestrator.d.ts.map +1 -1
  30. package/dist/indexing/orchestrator.js +99 -93
  31. package/dist/indexing/orchestrator.js.map +1 -1
  32. package/dist/indexing/pipeline.d.ts +18 -0
  33. package/dist/indexing/pipeline.d.ts.map +1 -0
  34. package/dist/indexing/pipeline.js +68 -0
  35. package/dist/indexing/pipeline.js.map +1 -0
  36. package/dist/indexing/providers/discord-api.d.ts +79 -0
  37. package/dist/indexing/providers/discord-api.d.ts.map +1 -0
  38. package/dist/indexing/providers/discord-api.js +167 -0
  39. package/dist/indexing/providers/discord-api.js.map +1 -0
  40. package/dist/indexing/providers/discord.d.ts +25 -0
  41. package/dist/indexing/providers/discord.d.ts.map +1 -0
  42. package/dist/indexing/providers/discord.js +282 -0
  43. package/dist/indexing/providers/discord.js.map +1 -0
  44. package/dist/indexing/providers/file.d.ts +18 -0
  45. package/dist/indexing/providers/file.d.ts.map +1 -0
  46. package/dist/indexing/providers/file.js +262 -0
  47. package/dist/indexing/providers/file.js.map +1 -0
  48. package/dist/indexing/providers/index.d.ts +5 -0
  49. package/dist/indexing/providers/index.d.ts.map +1 -0
  50. package/dist/indexing/providers/index.js +24 -0
  51. package/dist/indexing/providers/index.js.map +1 -0
  52. package/dist/indexing/providers/notion-api.d.ts +101 -0
  53. package/dist/indexing/providers/notion-api.d.ts.map +1 -0
  54. package/dist/indexing/providers/notion-api.js +419 -0
  55. package/dist/indexing/providers/notion-api.js.map +1 -0
  56. package/dist/indexing/providers/notion.d.ts +29 -0
  57. package/dist/indexing/providers/notion.d.ts.map +1 -0
  58. package/dist/indexing/providers/notion.js +236 -0
  59. package/dist/indexing/providers/notion.js.map +1 -0
  60. package/dist/indexing/providers/slack-api.d.ts +62 -0
  61. package/dist/indexing/providers/slack-api.d.ts.map +1 -0
  62. package/dist/indexing/providers/slack-api.js +167 -0
  63. package/dist/indexing/providers/slack-api.js.map +1 -0
  64. package/dist/indexing/providers/slack.d.ts +21 -0
  65. package/dist/indexing/providers/slack.d.ts.map +1 -0
  66. package/dist/indexing/providers/slack.js +192 -0
  67. package/dist/indexing/providers/slack.js.map +1 -0
  68. package/dist/indexing/providers/types.d.ts +57 -0
  69. package/dist/indexing/providers/types.d.ts.map +1 -0
  70. package/dist/indexing/providers/types.js +3 -0
  71. package/dist/indexing/providers/types.js.map +1 -0
  72. package/dist/indexing/url-derivation.d.ts +2 -2
  73. package/dist/indexing/url-derivation.d.ts.map +1 -1
  74. package/dist/indexing/url-derivation.js.map +1 -1
  75. package/dist/indexing/utils.d.ts +19 -0
  76. package/dist/indexing/utils.d.ts.map +1 -0
  77. package/dist/indexing/utils.js +63 -0
  78. package/dist/indexing/utils.js.map +1 -0
  79. package/dist/mcp/server.d.ts.map +1 -1
  80. package/dist/mcp/server.js +4 -0
  81. package/dist/mcp/server.js.map +1 -1
  82. package/dist/mcp/tools/bash-fs.d.ts.map +1 -1
  83. package/dist/mcp/tools/bash-fs.js +4 -1
  84. package/dist/mcp/tools/bash-fs.js.map +1 -1
  85. package/dist/mcp/tools/knowledge.d.ts +13 -0
  86. package/dist/mcp/tools/knowledge.d.ts.map +1 -0
  87. package/dist/mcp/tools/knowledge.js +92 -0
  88. package/dist/mcp/tools/knowledge.js.map +1 -0
  89. package/dist/server.d.ts.map +1 -1
  90. package/dist/server.js +111 -8
  91. package/dist/server.js.map +1 -1
  92. package/dist/types.d.ts +1075 -79
  93. package/dist/types.d.ts.map +1 -1
  94. package/dist/types.js +94 -4
  95. package/dist/types.js.map +1 -1
  96. package/dist/validate.d.ts +29 -0
  97. package/dist/validate.d.ts.map +1 -0
  98. package/dist/validate.js +197 -0
  99. package/dist/validate.js.map +1 -0
  100. package/dist/webhooks/discord.d.ts +13 -0
  101. package/dist/webhooks/discord.d.ts.map +1 -0
  102. package/dist/webhooks/discord.js +57 -0
  103. package/dist/webhooks/discord.js.map +1 -0
  104. package/dist/webhooks/slack.d.ts +13 -0
  105. package/dist/webhooks/slack.d.ts.map +1 -0
  106. package/dist/webhooks/slack.js +106 -0
  107. package/dist/webhooks/slack.js.map +1 -0
  108. package/package.json +17 -3
  109. package/dist/indexing/source-indexer.d.ts +0 -68
  110. package/dist/indexing/source-indexer.d.ts.map +0 -1
  111. package/dist/indexing/source-indexer.js +0 -380
  112. package/dist/indexing/source-indexer.js.map +0 -1
@@ -0,0 +1,37 @@
1
+ /**
2
+ * Generate the /faq.txt content from FAQ chunks.
3
+ * Groups Q&A pairs by source, with source headings.
4
+ */
5
+ export function generateFaqTxt(chunks, serverName, faqSources) {
6
+ const lines = [
7
+ `# ${serverName} — Frequently Asked Questions`,
8
+ '',
9
+ ];
10
+ // Group chunks by source_name
11
+ const bySource = new Map();
12
+ for (const chunk of chunks) {
13
+ if (!bySource.has(chunk.source_name))
14
+ bySource.set(chunk.source_name, []);
15
+ bySource.get(chunk.source_name).push(chunk);
16
+ }
17
+ // Emit each source section in the order of faqSources config
18
+ let hasContent = false;
19
+ for (const source of faqSources) {
20
+ const sourceChunks = bySource.get(source.name);
21
+ if (!sourceChunks || sourceChunks.length === 0)
22
+ continue;
23
+ hasContent = true;
24
+ lines.push(`## ${source.name}`, '');
25
+ for (const chunk of sourceChunks) {
26
+ // Content is stored as "Q: ...\n\nA: ..."
27
+ lines.push(chunk.content);
28
+ lines.push('');
29
+ }
30
+ }
31
+ if (!hasContent) {
32
+ lines.push('No FAQ content available yet.');
33
+ lines.push('');
34
+ }
35
+ return lines.join('\n');
36
+ }
37
+ //# sourceMappingURL=faq-txt.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"faq-txt.js","sourceRoot":"","sources":["../src/faq-txt.ts"],"names":[],"mappings":"AAOA;;;GAGG;AACH,MAAM,UAAU,cAAc,CAC1B,MAAwB,EACxB,UAAkB,EAClB,UAAuB;IAEvB,MAAM,KAAK,GAAa;QACpB,KAAK,UAAU,+BAA+B;QAC9C,EAAE;KACL,CAAC;IAEF,8BAA8B;IAC9B,MAAM,QAAQ,GAAG,IAAI,GAAG,EAA4B,CAAC;IACrD,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QACzB,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,KAAK,CAAC,WAAW,CAAC;YAAE,QAAQ,CAAC,GAAG,CAAC,KAAK,CAAC,WAAW,EAAE,EAAE,CAAC,CAAC;QAC1E,QAAQ,CAAC,GAAG,CAAC,KAAK,CAAC,WAAW,CAAE,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACjD,CAAC;IAED,6DAA6D;IAC7D,IAAI,UAAU,GAAG,KAAK,CAAC;IACvB,KAAK,MAAM,MAAM,IAAI,UAAU,EAAE,CAAC;QAC9B,MAAM,YAAY,GAAG,QAAQ,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;QAC/C,IAAI,CAAC,YAAY,IAAI,YAAY,CAAC,MAAM,KAAK,CAAC;YAAE,SAAS;QAEzD,UAAU,GAAG,IAAI,CAAC;QAClB,KAAK,CAAC,IAAI,CAAC,MAAM,MAAM,CAAC,IAAI,EAAE,EAAE,EAAE,CAAC,CAAC;QAEpC,KAAK,MAAM,KAAK,IAAI,YAAY,EAAE,CAAC;YAC/B,0CAA0C;YAC1C,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;YAC1B,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACnB,CAAC;IACL,CAAC;IAED,IAAI,CAAC,UAAU,EAAE,CAAC;QACd,KAAK,CAAC,IAAI,CAAC,+BAA+B,CAAC,CAAC;QAC5C,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACnB,CAAC;IAED,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC5B,CAAC"}
@@ -18,4 +18,8 @@ registerChunker('markdown', chunkMarkdown);
18
18
  registerChunker('code', chunkCode);
19
19
  registerChunker('raw-text', chunkRawText);
20
20
  registerChunker('html', chunkHtml);
21
+ import { chunkQa } from './qa.js';
22
+ registerChunker('slack', chunkQa);
23
+ registerChunker('discord', chunkQa);
24
+ registerChunker('notion', chunkMarkdown);
21
25
  //# sourceMappingURL=index.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/indexing/chunking/index.ts"],"names":[],"mappings":"AAAA,8DAA8D;AAM9D,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAqB,CAAC;AAE9C,MAAM,UAAU,eAAe,CAAC,IAAY,EAAE,EAAa;IACvD,QAAQ,CAAC,GAAG,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;AAC3B,CAAC;AAED,MAAM,UAAU,UAAU,CAAC,IAAY;IACnC,MAAM,EAAE,GAAG,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IAC9B,IAAI,CAAC,EAAE;QAAE,MAAM,IAAI,KAAK,CAAC,0BAA0B,IAAI,iBAAiB,CAAC,GAAG,QAAQ,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAC3G,OAAO,EAAE,CAAC;AACd,CAAC;AAED,+BAA+B;AAC/B,OAAO,EAAE,aAAa,EAAE,MAAM,eAAe,CAAC;AAC9C,OAAO,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AACtC,OAAO,EAAE,YAAY,EAAE,MAAM,eAAe,CAAC;AAC7C,OAAO,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AAEtC,eAAe,CAAC,UAAU,EAAE,aAAa,CAAC,CAAC;AAC3C,eAAe,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;AACnC,eAAe,CAAC,UAAU,EAAE,YAAY,CAAC,CAAC;AAC1C,eAAe,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC"}
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/indexing/chunking/index.ts"],"names":[],"mappings":"AAAA,8DAA8D;AAM9D,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAqB,CAAC;AAE9C,MAAM,UAAU,eAAe,CAAC,IAAY,EAAE,EAAa;IACvD,QAAQ,CAAC,GAAG,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;AAC3B,CAAC;AAED,MAAM,UAAU,UAAU,CAAC,IAAY;IACnC,MAAM,EAAE,GAAG,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IAC9B,IAAI,CAAC,EAAE;QAAE,MAAM,IAAI,KAAK,CAAC,0BAA0B,IAAI,iBAAiB,CAAC,GAAG,QAAQ,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAC3G,OAAO,EAAE,CAAC;AACd,CAAC;AAED,+BAA+B;AAC/B,OAAO,EAAE,aAAa,EAAE,MAAM,eAAe,CAAC;AAC9C,OAAO,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AACtC,OAAO,EAAE,YAAY,EAAE,MAAM,eAAe,CAAC;AAC7C,OAAO,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AAEtC,eAAe,CAAC,UAAU,EAAE,aAAa,CAAC,CAAC;AAC3C,eAAe,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;AACnC,eAAe,CAAC,UAAU,EAAE,YAAY,CAAC,CAAC;AAC1C,eAAe,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;AAEnC,OAAO,EAAE,OAAO,EAAE,MAAM,SAAS,CAAC;AAElC,eAAe,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;AAClC,eAAe,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC;AACpC,eAAe,CAAC,QAAQ,EAAE,aAAa,CAAC,CAAC"}
@@ -0,0 +1,8 @@
1
+ import type { ChunkOutput, SourceConfig } from '../../types.js';
2
+ /**
3
+ * Chunk Q&A content. Each content item from a FAQ-category provider
4
+ * is a single Q&A pair, already sized appropriately.
5
+ * The chunker formats it and returns a single ChunkOutput.
6
+ */
7
+ export declare function chunkQa(content: string, filePath: string, config: SourceConfig): ChunkOutput[];
8
+ //# sourceMappingURL=qa.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"qa.d.ts","sourceRoot":"","sources":["../../../src/indexing/chunking/qa.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,WAAW,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAEhE;;;;GAIG;AACH,wBAAgB,OAAO,CAAC,OAAO,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,MAAM,EAAE,YAAY,GAAG,WAAW,EAAE,CAe9F"}
@@ -0,0 +1,22 @@
1
+ // Q&A chunker — formats distilled Q&A pairs for embedding.
2
+ // Source-agnostic: used by any source that produces Q&A-formatted content.
3
+ /**
4
+ * Chunk Q&A content. Each content item from a FAQ-category provider
5
+ * is a single Q&A pair, already sized appropriately.
6
+ * The chunker formats it and returns a single ChunkOutput.
7
+ */
8
+ export function chunkQa(content, filePath, config) {
9
+ if (!content || !content.trim()) {
10
+ return [];
11
+ }
12
+ // The content is already formatted as "Q: ...\n\nA: ..." by the provider.
13
+ // Extract the question for use as title.
14
+ const questionMatch = content.match(/^Q:\s*(.+?)(?:\n|$)/);
15
+ const title = questionMatch ? questionMatch[1].trim() : undefined;
16
+ return [{
17
+ content: content.trim(),
18
+ title,
19
+ chunkIndex: 0,
20
+ }];
21
+ }
22
+ //# sourceMappingURL=qa.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"qa.js","sourceRoot":"","sources":["../../../src/indexing/chunking/qa.ts"],"names":[],"mappings":"AAAA,2DAA2D;AAC3D,2EAA2E;AAI3E;;;;GAIG;AACH,MAAM,UAAU,OAAO,CAAC,OAAe,EAAE,QAAgB,EAAE,MAAoB;IAC3E,IAAI,CAAC,OAAO,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC;QAC9B,OAAO,EAAE,CAAC;IACd,CAAC;IAED,0EAA0E;IAC1E,yCAAyC;IACzC,MAAM,aAAa,GAAG,OAAO,CAAC,KAAK,CAAC,qBAAqB,CAAC,CAAC;IAC3D,MAAM,KAAK,GAAG,aAAa,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC;IAElE,OAAO,CAAC;YACJ,OAAO,EAAE,OAAO,CAAC,IAAI,EAAE;YACvB,KAAK;YACL,UAAU,EAAE,CAAC;SAChB,CAAC,CAAC;AACP,CAAC"}
@@ -0,0 +1,29 @@
1
+ import OpenAI from 'openai';
2
+ export interface ThreadMessage {
3
+ author: string;
4
+ content: string;
5
+ timestamp: string;
6
+ reactions?: Array<{
7
+ name: string;
8
+ count: number;
9
+ }>;
10
+ }
11
+ export interface DistilledPair {
12
+ question: string;
13
+ answer: string;
14
+ confidence: number;
15
+ }
16
+ export interface DistillerResult {
17
+ pairs: DistilledPair[];
18
+ }
19
+ export interface DistillerOptions {
20
+ model?: string;
21
+ maxMessages?: number;
22
+ apiKey?: string;
23
+ client?: OpenAI;
24
+ }
25
+ /**
26
+ * Distill a conversation thread into Q&A pairs using an LLM.
27
+ */
28
+ export declare function distillThread(messages: ThreadMessage[], options?: DistillerOptions): Promise<DistillerResult>;
29
+ //# sourceMappingURL=distiller.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"distiller.d.ts","sourceRoot":"","sources":["../../src/indexing/distiller.ts"],"names":[],"mappings":"AAGA,OAAO,MAAM,MAAM,QAAQ,CAAC;AAI5B,MAAM,WAAW,aAAa;IAC1B,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,CAAC,EAAE,KAAK,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;CACtD;AAED,MAAM,WAAW,aAAa;IAC1B,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,EAAE,MAAM,CAAC;IACf,UAAU,EAAE,MAAM,CAAC;CACtB;AAED,MAAM,WAAW,eAAe;IAC5B,KAAK,EAAE,aAAa,EAAE,CAAC;CAC1B;AAED,MAAM,WAAW,gBAAgB;IAC7B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;CACnB;AAuCD;;GAEG;AACH,wBAAsB,aAAa,CAC/B,QAAQ,EAAE,aAAa,EAAE,EACzB,OAAO,CAAC,EAAE,gBAAgB,GAC3B,OAAO,CAAC,eAAe,CAAC,CAyE1B"}
@@ -0,0 +1,104 @@
1
+ // LLM thread distiller — extracts Q&A pairs from conversation threads.
2
+ // Source-agnostic: takes structured messages, returns structured Q&A pairs.
3
+ import OpenAI from 'openai';
4
+ // ── Constants ────────────────────────────────────────────────────────────────
5
+ const DEFAULT_MODEL = 'gpt-4o-mini';
6
+ const DEFAULT_MAX_MESSAGES = 100;
7
+ const SYSTEM_PROMPT = `You are a Q&A extraction engine. Given a conversation thread, identify distinct question-answer pairs.
8
+
9
+ For each pair:
10
+ 1. Extract the core question (rephrase if needed for clarity)
11
+ 2. Extract the best answer (synthesize from multiple replies if needed)
12
+ 3. Score confidence from 0.0 to 1.0 based on:
13
+ - Answer completeness (does it fully address the question?)
14
+ - Questioner satisfaction signals ("thanks", "that worked", etc.)
15
+ - Community validation (reactions like thumbsup, check marks)
16
+ - Answer specificity (concrete steps vs vague suggestions)
17
+
18
+ Return JSON with this exact structure:
19
+ {
20
+ "pairs": [
21
+ {
22
+ "question": "How do I configure X?",
23
+ "answer": "You can configure X by...",
24
+ "confidence": 0.85
25
+ }
26
+ ]
27
+ }
28
+
29
+ Rules:
30
+ - A thread may contain multiple Q&A pairs (follow-up questions)
31
+ - Skip greetings, pleasantries, and off-topic tangents
32
+ - If no clear Q&A exists, return {"pairs": []}
33
+ - Keep answers concise but complete (aim for 1-3 paragraphs)
34
+ - Preserve code blocks, URLs, and technical details from answers
35
+ - Confidence below 0.3 means the answer is likely incomplete or wrong`;
36
+ // ── Distiller ────────────────────────────────────────────────────────────────
37
+ /**
38
+ * Distill a conversation thread into Q&A pairs using an LLM.
39
+ */
40
+ export async function distillThread(messages, options) {
41
+ const model = options?.model ?? DEFAULT_MODEL;
42
+ const maxMessages = options?.maxMessages ?? DEFAULT_MAX_MESSAGES;
43
+ if (messages.length === 0) {
44
+ return { pairs: [] };
45
+ }
46
+ // Truncate to max messages
47
+ const truncated = messages.slice(0, maxMessages);
48
+ // Format as conversation transcript
49
+ const transcript = truncated.map(msg => {
50
+ const reactions = msg.reactions && msg.reactions.length > 0
51
+ ? ` [reactions: ${msg.reactions.map(r => `:${r.name}: x${r.count}`).join(', ')}]`
52
+ : '';
53
+ return `[${msg.timestamp}] ${msg.author}: ${msg.content}${reactions}`;
54
+ }).join('\n\n');
55
+ const client = options?.client ?? new OpenAI({ apiKey: options?.apiKey });
56
+ try {
57
+ const response = await client.chat.completions.create({
58
+ model,
59
+ messages: [
60
+ { role: 'system', content: SYSTEM_PROMPT },
61
+ { role: 'user', content: transcript },
62
+ ],
63
+ response_format: { type: 'json_object' },
64
+ temperature: 0.1,
65
+ });
66
+ const content = response.choices[0]?.message?.content;
67
+ if (!content) {
68
+ console.warn('[distiller] Empty response from LLM');
69
+ return { pairs: [] };
70
+ }
71
+ const parsed = JSON.parse(content);
72
+ // Validate structure
73
+ if (!Array.isArray(parsed.pairs)) {
74
+ console.warn('[distiller] Invalid response structure — missing pairs array');
75
+ return { pairs: [] };
76
+ }
77
+ // Validate and filter each pair
78
+ const validPairs = [];
79
+ for (const pair of parsed.pairs) {
80
+ if (typeof pair.question === 'string' && pair.question.trim() &&
81
+ typeof pair.answer === 'string' && pair.answer.trim() &&
82
+ typeof pair.confidence === 'number' &&
83
+ pair.confidence >= 0 && pair.confidence <= 1) {
84
+ validPairs.push({
85
+ question: pair.question.trim(),
86
+ answer: pair.answer.trim(),
87
+ confidence: pair.confidence,
88
+ });
89
+ }
90
+ else {
91
+ console.warn('[distiller] Skipping malformed pair:', JSON.stringify(pair).slice(0, 200));
92
+ }
93
+ }
94
+ return { pairs: validPairs };
95
+ }
96
+ catch (error) {
97
+ if (error instanceof SyntaxError) {
98
+ console.error('[distiller] Failed to parse LLM JSON response:', error.message);
99
+ return { pairs: [] };
100
+ }
101
+ throw error; // Re-throw API errors for caller to handle
102
+ }
103
+ }
104
+ //# sourceMappingURL=distiller.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"distiller.js","sourceRoot":"","sources":["../../src/indexing/distiller.ts"],"names":[],"mappings":"AAAA,uEAAuE;AACvE,4EAA4E;AAE5E,OAAO,MAAM,MAAM,QAAQ,CAAC;AA4B5B,gFAAgF;AAEhF,MAAM,aAAa,GAAG,aAAa,CAAC;AACpC,MAAM,oBAAoB,GAAG,GAAG,CAAC;AAEjC,MAAM,aAAa,GAAG;;;;;;;;;;;;;;;;;;;;;;;;;;;;sEA4BgD,CAAC;AAEvE,gFAAgF;AAEhF;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,aAAa,CAC/B,QAAyB,EACzB,OAA0B;IAE1B,MAAM,KAAK,GAAG,OAAO,EAAE,KAAK,IAAI,aAAa,CAAC;IAC9C,MAAM,WAAW,GAAG,OAAO,EAAE,WAAW,IAAI,oBAAoB,CAAC;IAEjE,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACxB,OAAO,EAAE,KAAK,EAAE,EAAE,EAAE,CAAC;IACzB,CAAC;IAED,2BAA2B;IAC3B,MAAM,SAAS,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,WAAW,CAAC,CAAC;IAEjD,oCAAoC;IACpC,MAAM,UAAU,GAAG,SAAS,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE;QACnC,MAAM,SAAS,GAAG,GAAG,CAAC,SAAS,IAAI,GAAG,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC;YACvD,CAAC,CAAC,gBAAgB,GAAG,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC,CAAC,IAAI,MAAM,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG;YACjF,CAAC,CAAC,EAAE,CAAC;QACT,OAAO,IAAI,GAAG,CAAC,SAAS,KAAK,GAAG,CAAC,MAAM,KAAK,GAAG,CAAC,OAAO,GAAG,SAAS,EAAE,CAAC;IAC1E,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAEhB,MAAM,MAAM,GAAG,OAAO,EAAE,MAAM,IAAI,IAAI,MAAM,CAAC,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC,CAAC;IAE1E,IAAI,CAAC;QACD,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,MAAM,CAAC;YAClD,KAAK;YACL,QAAQ,EAAE;gBACN,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,aAAa,EAAE;gBAC1C,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,UAAU,EAAE;aACxC;YACD,eAAe,EAAE,EAAE,IAAI,EAAE,aAAa,EAAE;YACxC,WAAW,EAAE,GAAG;SACnB,CAAC,CAAC;QAEH,MAAM,OAAO,GAAG,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,OAAO,CAAC;QACtD,IAAI,CAAC,OAAO,EAAE,CAAC;YACX,OAAO,CAAC,IAAI,CAAC,qCAAqC,CAAC,CAAC;YACpD,OAAO,EAAE,KAAK,EAAE,EAAE,EAAE,CAAC;QACzB,CAAC;QAED,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;QAEnC,qBAAqB;QACrB,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC;YAC/B,OAAO,CAAC,IAAI,CAAC,8DAA8D,CAAC,CAAC;YAC7E,OAAO,EAAE,KAAK,EAAE,EAAE,EAAE,CAAC;QACzB,CAAC;QAED,gCAAgC;QAChC,MAAM,UAAU,GAAoB,EAAE,CAAC;QACvC,KAAK,MAAM,IAAI,IAAI,MAAM,CAAC,KAAK,EAAE,CAAC;YAC9B,IACI,OAAO,IAAI,CAAC,QAAQ,KAAK,QAAQ,IAAI,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE;gBACzD,OAAO,IAAI,CAAC,MAAM,KAAK,QAAQ,IAAI,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE;gBACrD,OAAO,IAAI,CAAC,UAAU,KAAK,QAAQ;gBACnC,IAAI,CAAC,UAAU,IAAI,CAAC,IAAI,IAAI,CAAC,UAAU,IAAI,CAAC,EAC9C,CAAC;gBACC,UAAU,CAAC,IAAI,CAAC;oBACZ,QAAQ,EAAE,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE;oBAC9B,MAAM,EAAE,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE;oBAC1B,UAAU,EAAE,IAAI,CAAC,UAAU;iBAC9B,CAAC,CAAC;YACP,CAAC;iBAAM,CAAC;gBACJ,OAAO,CAAC,IAAI,CAAC,sCAAsC,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC;YAC7F,CAAC;QACL,CAAC;QAED,OAAO,EAAE,KAAK,EAAE,UAAU,EAAE,CAAC;IACjC,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACb,IAAI,KAAK,YAAY,WAAW,EAAE,CAAC;YAC/B,OAAO,CAAC,KAAK,CAAC,gDAAgD,EAAE,KAAK,CAAC,OAAO,CAAC,CAAC;YAC/E,OAAO,EAAE,KAAK,EAAE,EAAE,EAAE,CAAC;QACzB,CAAC;QACD,MAAM,KAAK,CAAC,CAAC,2CAA2C;IAC5D,CAAC;AACL,CAAC"}
@@ -13,10 +13,10 @@ export declare class IndexingOrchestrator {
13
13
  */
14
14
  checkAndIndex(): Promise<void>;
15
15
  /**
16
- * Get the HEAD SHA of a remote repo without cloning.
17
- * Uses `git ls-remote` which only fetches refs.
16
+ * Get the current state token for a source without acquiring items.
17
+ * Returns null if the source is unavailable.
18
18
  */
19
- private getRemoteHead;
19
+ private getSourceStateToken;
20
20
  /**
21
21
  * Queue a full re-index of all sources. Returns immediately.
22
22
  */
@@ -25,6 +25,11 @@ export declare class IndexingOrchestrator {
25
25
  * Queue an incremental re-index for a specific repo. Returns immediately.
26
26
  */
27
27
  queueIncrementalReindex(repoUrl: string): void;
28
+ /**
29
+ * Queue a reindex for a single named source. Returns immediately.
30
+ * Used by webhook handlers to trigger reindexing of specific sources.
31
+ */
32
+ queueSourceReindex(sourceName: string): void;
28
33
  /**
29
34
  * Returns true if any indexing job is currently running.
30
35
  */
@@ -1 +1 @@
1
- {"version":3,"file":"orchestrator.d.ts","sourceRoot":"","sources":["../../src/indexing/orchestrator.ts"],"names":[],"mappings":"AAiCA,qBAAa,oBAAoB;IAC7B,OAAO,CAAC,KAAK,CAAa;IAC1B,OAAO,CAAC,OAAO,CAAS;IACxB,OAAO,CAAC,UAAU,CAAS;IAG3B,OAAO,CAAC,aAAa,CAAqB;IAG1C,OAAO,CAAC,eAAe,CAAuB;IAG9C,iBAAiB,CAAC,EAAE,CAAC,WAAW,EAAE,MAAM,EAAE,KAAK,IAAI,CAAC;;IAMpD;;;;OAIG;IACG,aAAa,IAAI,OAAO,CAAC,IAAI,CAAC;IA4GpC;;;OAGG;YACW,aAAa;IAa3B;;OAEG;IACH,gBAAgB,IAAI,IAAI;IAQxB;;OAEG;IACH,uBAAuB,CAAC,OAAO,EAAE,MAAM,GAAG,IAAI;IAU9C;;OAEG;IACH,UAAU,IAAI,OAAO;IAIrB;;;OAGG;IACH,mBAAmB,IAAI,IAAI;IA4B3B;;OAEG;IACH,OAAO,CAAC,OAAO;IASf;;OAEG;YACW,KAAK;IAwBnB;;OAEG;YACW,UAAU;IAkDxB;;OAEG;YACW,cAAc;IAqB5B;;OAEG;YACW,qBAAqB;IAoDnC;;OAEG;YACW,oBAAoB;IAgDlC;;OAEG;YACW,cAAc;IAmB5B;;OAEG;YACW,cAAc;CAgB/B"}
1
+ {"version":3,"file":"orchestrator.d.ts","sourceRoot":"","sources":["../../src/indexing/orchestrator.ts"],"names":[],"mappings":"AAqCA,qBAAa,oBAAoB;IAC7B,OAAO,CAAC,KAAK,CAAa;IAC1B,OAAO,CAAC,OAAO,CAAS;IACxB,OAAO,CAAC,UAAU,CAAS;IAG3B,OAAO,CAAC,aAAa,CAAqB;IAG1C,OAAO,CAAC,eAAe,CAAuB;IAG9C,iBAAiB,CAAC,EAAE,CAAC,WAAW,EAAE,MAAM,EAAE,KAAK,IAAI,CAAC;;IAMpD;;;;OAIG;IACG,aAAa,IAAI,OAAO,CAAC,IAAI,CAAC;IA+GpC;;;OAGG;YACW,mBAAmB;IAajC;;OAEG;IACH,gBAAgB,IAAI,IAAI;IAQxB;;OAEG;IACH,uBAAuB,CAAC,OAAO,EAAE,MAAM,GAAG,IAAI;IAU9C;;;OAGG;IACH,kBAAkB,CAAC,UAAU,EAAE,MAAM,GAAG,IAAI;IAQ5C;;OAEG;IACH,UAAU,IAAI,OAAO;IAIrB;;;OAGG;IACH,mBAAmB,IAAI,IAAI;IA4B3B;;OAEG;IACH,OAAO,CAAC,OAAO;IASf;;OAEG;YACW,KAAK;IAwBnB;;OAEG;YACW,UAAU;IA8DxB;;OAEG;YACW,cAAc;IAqB5B;;OAEG;YACW,qBAAqB;IAiBnC;;OAEG;YACW,oBAAoB;IAsDlC;;OAEG;YACW,cAAc;IAmB5B;;OAEG;YACW,cAAc;CAgB/B"}
@@ -1,17 +1,18 @@
1
1
  // Job queue and coordination for indexing pipelines.
2
2
  // Fully config-driven: indexes sources referenced by search tools in mcp-docs.yaml.
3
- import fs from 'fs';
4
- import path from 'path';
5
- import { simpleGit } from 'simple-git';
3
+ import fs from 'node:fs';
4
+ import path from 'node:path';
6
5
  import { getConfig, getServerConfig, getIndexableSourceNames } from '../config.js';
7
6
  import { EmbeddingClient } from './embeddings.js';
8
- import { SourceIndexer } from './source-indexer.js';
7
+ import { getProvider } from './providers/index.js';
8
+ import { IndexingPipeline } from './pipeline.js';
9
9
  import { getIndexState, upsertIndexState, } from '../db/queries.js';
10
+ import { isFileSourceConfig } from '../types.js';
10
11
  /**
11
12
  * Find all source configs that reference a given repo URL.
12
13
  */
13
14
  function getSourcesByRepo(repoUrl) {
14
- return getServerConfig().sources.filter(s => s.repo === repoUrl);
15
+ return getServerConfig().sources.filter(s => isFileSourceConfig(s) && s.repo === repoUrl);
15
16
  }
16
17
  function getStaleThresholdMs() {
17
18
  const serverCfg = getServerConfig();
@@ -67,89 +68,94 @@ export class IndexingOrchestrator {
67
68
  return;
68
69
  }
69
70
  // Queue incremental reindexes for each affected git-backed repo
70
- const reposToReindex = new Set(sourcesNeedingFullReindex.filter(s => s.repo).map(s => s.repo));
71
+ const reposToReindex = new Set();
72
+ for (const s of sourcesNeedingFullReindex) {
73
+ if (isFileSourceConfig(s) && s.repo)
74
+ reposToReindex.add(s.repo);
75
+ }
71
76
  for (const repoUrl of reposToReindex) {
72
77
  this.queueIncrementalReindex(repoUrl);
73
78
  }
74
79
  // Local sources (no repo) get queued as a full reindex of just those sources
75
- const localSources = sourcesNeedingFullReindex.filter(s => !s.repo);
80
+ const localSources = sourcesNeedingFullReindex.filter(s => isFileSourceConfig(s) && !s.repo);
76
81
  if (localSources.length > 0) {
77
82
  this.queue.push({ type: 'full-reindex-local', sources: localSources });
78
83
  this.drain().catch(err => console.error('[orchestrator] drain() failed:', err));
79
84
  }
85
+ // Non-file sources (e.g., Slack) that need reindexing
86
+ const nonFileSources = sourcesNeedingFullReindex.filter(s => !isFileSourceConfig(s));
87
+ for (const source of nonFileSources) {
88
+ this.queueSourceReindex(source.name);
89
+ }
80
90
  }
81
91
  if (sourcesOk.length === 0)
82
92
  return;
83
93
  // Local sources in sourcesOk have no remote to check — always reindex on startup
84
- const localSourcesOk = sourcesOk.filter(s => !s.repo);
94
+ const localSourcesOk = sourcesOk.filter(s => isFileSourceConfig(s) && !s.repo);
85
95
  if (localSourcesOk.length > 0) {
86
96
  console.log(`[orchestrator] Queuing reindex for ${localSourcesOk.length} local source(s)`);
87
97
  this.queue.push({ type: 'full-reindex-local', sources: localSourcesOk });
88
98
  this.drain().catch(err => console.error('[orchestrator] drain() failed:', err));
89
99
  }
90
100
  console.log('[orchestrator] Checking remotes for changes on indexed sources...');
91
- // Only check remotes for git-backed sources
92
- const repos = [...new Set(sourcesOk.filter(s => s.repo).map(s => s.repo))];
93
- for (const repoUrl of repos) {
101
+ // Check each git-backed source for changes
102
+ const gitSourcesOk = sourcesOk.filter(s => isFileSourceConfig(s) && s.repo);
103
+ for (const source of gitSourcesOk) {
94
104
  try {
95
- const remoteHead = await this.getRemoteHead(repoUrl);
96
- const sources = getSourcesByRepo(repoUrl).filter(s => indexableNames.has(s.name));
97
- let anyChanged = false;
98
- for (const source of sources) {
99
- const state = await getIndexState(source.type, source.name);
100
- if (state?.last_commit_sha !== remoteHead) {
101
- anyChanged = true;
102
- break;
105
+ const currentToken = await this.getSourceStateToken(source);
106
+ const state = await getIndexState(source.type, source.name);
107
+ if (currentToken === null || state?.last_commit_sha !== currentToken) {
108
+ const reason = currentToken === null
109
+ ? 'source unavailable (clone missing?)'
110
+ : `remote ${currentToken.slice(0, 8)} differs from indexed`;
111
+ console.log(`[orchestrator] ${reason} for ${source.name} — queuing reindex`);
112
+ if (isFileSourceConfig(source) && source.repo) {
113
+ this.queueIncrementalReindex(source.repo);
103
114
  }
104
115
  }
105
- // Even if DB says current, verify clone dir exists (fresh container = empty /tmp)
106
- const repoName = repoUrl.split('/').pop()?.replace(/\.git$/, '') ?? '';
107
- const cloneDir = getConfig().cloneDir;
108
- const repoDir = path.join(cloneDir, repoName);
109
- const cloneMissing = !fs.existsSync(repoDir);
110
- if (anyChanged || cloneMissing) {
111
- const reason = cloneMissing
112
- ? `clone dir missing at ${repoDir}`
113
- : `remote HEAD ${remoteHead.slice(0, 8)} differs from indexed`;
114
- console.log(`[orchestrator] ${reason} for ${repoUrl} — queuing incremental reindex`);
115
- this.queueIncrementalReindex(repoUrl);
116
- }
117
116
  else {
118
- console.log(`[orchestrator] Index current at ${remoteHead.slice(0, 8)}`);
117
+ console.log(`[orchestrator] ${source.name} index current at ${currentToken.slice(0, 8)}`);
119
118
  }
120
119
  }
121
120
  catch (err) {
122
- // If we can't check remote, fall back to age-based staleness
123
- console.warn(`[orchestrator] Failed to check remote HEAD for ${repoUrl}, falling back to age check:`, err);
124
- const repoSources = getSourcesByRepo(repoUrl);
125
- const firstState = await getIndexState(repoSources[0].type, repoSources[0].name);
126
- if (this.isStale(firstState)) {
127
- const thresholdHours = getServerConfig().indexing?.stale_threshold_hours ?? 24;
128
- console.log(`[orchestrator] Index for ${repoUrl} is stale (>${thresholdHours}h) — queuing full reindex`);
121
+ console.warn(`[orchestrator] Failed to check state for ${source.name}, falling back to age check:`, err);
122
+ const state = await getIndexState(source.type, source.name);
123
+ if (this.isStale(state)) {
124
+ console.log(`[orchestrator] Index for ${source.name} is stale — queuing full reindex`);
129
125
  this.queueFullReindex();
130
126
  }
131
- else {
132
- console.log(`[orchestrator] Index for ${repoUrl} appears fresh, skipping`);
133
- }
127
+ }
128
+ }
129
+ // Ensure git repos are cloned even when index is current.
130
+ // On fresh deploys, the container has no local clones but the DB may have valid state.
131
+ // Bash tools need the clone directories to build their filesystem.
132
+ const cloneDir = getConfig().cloneDir;
133
+ for (const source of gitSourcesOk) {
134
+ if (!isFileSourceConfig(source) || !source.repo)
135
+ continue;
136
+ const repoName = source.repo.replace(/\.git$/, '').split('/').pop();
137
+ const repoDir = path.join(cloneDir, repoName);
138
+ if (!fs.existsSync(repoDir)) {
139
+ console.log(`[orchestrator] Clone directory missing for ${source.name}, queuing reindex to populate`);
140
+ this.queueIncrementalReindex(source.repo);
134
141
  }
135
142
  }
136
143
  }
137
144
  /**
138
- * Get the HEAD SHA of a remote repo without cloning.
139
- * Uses `git ls-remote` which only fetches refs.
145
+ * Get the current state token for a source without acquiring items.
146
+ * Returns null if the source is unavailable.
140
147
  */
141
- async getRemoteHead(repoUrl) {
148
+ async getSourceStateToken(source) {
142
149
  const config = getConfig();
143
- let url = repoUrl;
144
- if (config.githubToken) {
145
- url = repoUrl.replace('https://github.com/', `https://x-access-token:${config.githubToken}@github.com/`);
146
- }
147
- const git = simpleGit();
148
- const result = await git.listRemote([url, 'HEAD']);
149
- const sha = result.split('\t')[0]?.trim();
150
- if (!sha)
151
- throw new Error(`Could not resolve HEAD for ${repoUrl}`);
152
- return sha;
150
+ const providerOptions = {
151
+ cloneDir: config.cloneDir,
152
+ githubToken: config.githubToken,
153
+ slackBotToken: config.slackBotToken,
154
+ discordBotToken: config.discordBotToken,
155
+ notionToken: config.notionToken,
156
+ };
157
+ const provider = getProvider(source.type)(source, providerOptions);
158
+ return provider.getCurrentStateToken();
153
159
  }
154
160
  /**
155
161
  * Queue a full re-index of all sources. Returns immediately.
@@ -171,6 +177,17 @@ export class IndexingOrchestrator {
171
177
  console.error('[orchestrator] drain() failed:', err);
172
178
  });
173
179
  }
180
+ /**
181
+ * Queue a reindex for a single named source. Returns immediately.
182
+ * Used by webhook handlers to trigger reindexing of specific sources.
183
+ */
184
+ queueSourceReindex(sourceName) {
185
+ this.queue.push({ type: 'source-reindex', sourceName });
186
+ console.log(`[orchestrator] Source re-index queued for ${sourceName}`);
187
+ this.drain().catch((err) => {
188
+ console.error('[orchestrator] drain() failed:', err);
189
+ });
190
+ }
174
191
  /**
175
192
  * Returns true if any indexing job is currently running.
176
193
  */
@@ -276,6 +293,19 @@ export class IndexingOrchestrator {
276
293
  await this.runIncrementalReindex(embeddingClient, config.cloneDir, config.githubToken, job.repoUrl);
277
294
  affectedSourceNames = getSourcesByRepo(job.repoUrl).map(s => s.name);
278
295
  }
296
+ else if (job.type === 'source-reindex') {
297
+ if (!job.sourceName) {
298
+ console.warn('[orchestrator] source-reindex job has no sourceName, skipping');
299
+ return;
300
+ }
301
+ const sourceConfig = serverCfg2.sources.find(s => s.name === job.sourceName);
302
+ if (!sourceConfig) {
303
+ console.warn(`[orchestrator] source-reindex: source "${job.sourceName}" not found in config`);
304
+ return;
305
+ }
306
+ await this.indexSourceWithState(sourceConfig, embeddingClient, config.cloneDir);
307
+ affectedSourceNames = [job.sourceName];
308
+ }
279
309
  if (affectedSourceNames.length > 0 && this.onReindexComplete) {
280
310
  try {
281
311
  this.onReindexComplete(affectedSourceNames);
@@ -305,38 +335,7 @@ export class IndexingOrchestrator {
305
335
  const indexableNames = getIndexableSourceNames();
306
336
  const sources = getSourcesByRepo(repoUrl).filter(s => indexableNames.has(s.name));
307
337
  for (const sourceConfig of sources) {
308
- const state = await getIndexState(sourceConfig.type, sourceConfig.name);
309
- if (state?.last_commit_sha) {
310
- await this.withSourceLock(`${sourceConfig.type}:${sourceConfig.name}`, async () => {
311
- await this.setIndexStatus(sourceConfig.type, sourceConfig.name, 'indexing');
312
- try {
313
- const indexer = new SourceIndexer(sourceConfig, embeddingClient, cloneDir, githubToken);
314
- await indexer.incrementalIndex(state.last_commit_sha);
315
- const headSha = await indexer.getHeadSha();
316
- await upsertIndexState({
317
- source_type: sourceConfig.type,
318
- source_key: sourceConfig.name,
319
- last_commit_sha: headSha,
320
- last_indexed_at: new Date(),
321
- status: 'idle',
322
- });
323
- }
324
- catch (err) {
325
- console.error(`[orchestrator] Incremental reindex failed for ${sourceConfig.name}:`, err);
326
- try {
327
- await this.setIndexStatus(sourceConfig.type, sourceConfig.name, 'error', err instanceof Error ? err.message : String(err));
328
- }
329
- catch (statusErr) {
330
- console.error('[orchestrator] Failed to update index status:', statusErr);
331
- }
332
- // Don't rethrow — continue with remaining sources
333
- }
334
- });
335
- }
336
- else {
337
- // No previous state — do a full index for this source
338
- await this.indexSourceWithState(sourceConfig, embeddingClient, cloneDir, githubToken);
339
- }
338
+ await this.indexSourceWithState(sourceConfig, embeddingClient, cloneDir, githubToken);
340
339
  }
341
340
  console.log(`[orchestrator] Incremental re-index complete for ${repoUrl}`);
342
341
  }
@@ -346,21 +345,29 @@ export class IndexingOrchestrator {
346
345
  async indexSourceWithState(sourceConfig, embeddingClient, cloneDir, githubToken) {
347
346
  const lockKey = `${sourceConfig.type}:${sourceConfig.name}`;
348
347
  await this.withSourceLock(lockKey, async () => {
349
- const indexer = new SourceIndexer(sourceConfig, embeddingClient, cloneDir, githubToken);
348
+ const providerOptions = { cloneDir, githubToken, slackBotToken: getConfig().slackBotToken, discordBotToken: getConfig().discordBotToken, notionToken: getConfig().notionToken };
349
+ const provider = getProvider(sourceConfig.type)(sourceConfig, providerOptions);
350
+ const pipeline = new IndexingPipeline(embeddingClient, sourceConfig);
350
351
  await this.setIndexStatus(sourceConfig.type, sourceConfig.name, 'indexing');
351
352
  try {
352
353
  const state = await getIndexState(sourceConfig.type, sourceConfig.name);
354
+ let result;
353
355
  if (state?.last_commit_sha) {
354
- await indexer.incrementalIndex(state.last_commit_sha);
356
+ result = await provider.incrementalAcquire(state.last_commit_sha);
355
357
  }
356
358
  else {
357
- await indexer.fullIndex();
359
+ result = await provider.fullAcquire();
360
+ }
361
+ if (result.removedIds.length > 0) {
362
+ await pipeline.removeItems(result.removedIds);
363
+ }
364
+ if (result.items.length > 0) {
365
+ await pipeline.indexItems(result.items, result.stateToken);
358
366
  }
359
- const headSha = await indexer.getHeadSha();
360
367
  await upsertIndexState({
361
368
  source_type: sourceConfig.type,
362
369
  source_key: sourceConfig.name,
363
- last_commit_sha: headSha,
370
+ last_commit_sha: result.stateToken,
364
371
  last_indexed_at: new Date(),
365
372
  status: 'idle',
366
373
  });
@@ -374,7 +381,6 @@ export class IndexingOrchestrator {
374
381
  catch (statusErr) {
375
382
  console.error('[orchestrator] Failed to update index status:', statusErr);
376
383
  }
377
- // Don't rethrow — continue with remaining sources
378
384
  }
379
385
  });
380
386
  }