@copilotkit/pathfinder 1.4.0 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +35 -0
- package/LICENSE +106 -21
- package/README.md +14 -3
- package/dist/cli.js +11 -1
- package/dist/cli.js.map +1 -1
- package/dist/config.d.ts +9 -0
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js +64 -5
- package/dist/config.js.map +1 -1
- package/dist/db/queries.d.ts +12 -1
- package/dist/db/queries.d.ts.map +1 -1
- package/dist/db/queries.js +64 -0
- package/dist/db/queries.js.map +1 -1
- package/dist/faq-txt.d.ts +12 -0
- package/dist/faq-txt.d.ts.map +1 -0
- package/dist/faq-txt.js +37 -0
- package/dist/faq-txt.js.map +1 -0
- package/dist/indexing/chunking/index.js +4 -0
- package/dist/indexing/chunking/index.js.map +1 -1
- package/dist/indexing/chunking/qa.d.ts +8 -0
- package/dist/indexing/chunking/qa.d.ts.map +1 -0
- package/dist/indexing/chunking/qa.js +22 -0
- package/dist/indexing/chunking/qa.js.map +1 -0
- package/dist/indexing/distiller.d.ts +29 -0
- package/dist/indexing/distiller.d.ts.map +1 -0
- package/dist/indexing/distiller.js +104 -0
- package/dist/indexing/distiller.js.map +1 -0
- package/dist/indexing/orchestrator.d.ts +8 -3
- package/dist/indexing/orchestrator.d.ts.map +1 -1
- package/dist/indexing/orchestrator.js +99 -93
- package/dist/indexing/orchestrator.js.map +1 -1
- package/dist/indexing/pipeline.d.ts +18 -0
- package/dist/indexing/pipeline.d.ts.map +1 -0
- package/dist/indexing/pipeline.js +68 -0
- package/dist/indexing/pipeline.js.map +1 -0
- package/dist/indexing/providers/discord-api.d.ts +79 -0
- package/dist/indexing/providers/discord-api.d.ts.map +1 -0
- package/dist/indexing/providers/discord-api.js +167 -0
- package/dist/indexing/providers/discord-api.js.map +1 -0
- package/dist/indexing/providers/discord.d.ts +25 -0
- package/dist/indexing/providers/discord.d.ts.map +1 -0
- package/dist/indexing/providers/discord.js +282 -0
- package/dist/indexing/providers/discord.js.map +1 -0
- package/dist/indexing/providers/file.d.ts +18 -0
- package/dist/indexing/providers/file.d.ts.map +1 -0
- package/dist/indexing/providers/file.js +262 -0
- package/dist/indexing/providers/file.js.map +1 -0
- package/dist/indexing/providers/index.d.ts +5 -0
- package/dist/indexing/providers/index.d.ts.map +1 -0
- package/dist/indexing/providers/index.js +24 -0
- package/dist/indexing/providers/index.js.map +1 -0
- package/dist/indexing/providers/notion-api.d.ts +101 -0
- package/dist/indexing/providers/notion-api.d.ts.map +1 -0
- package/dist/indexing/providers/notion-api.js +419 -0
- package/dist/indexing/providers/notion-api.js.map +1 -0
- package/dist/indexing/providers/notion.d.ts +29 -0
- package/dist/indexing/providers/notion.d.ts.map +1 -0
- package/dist/indexing/providers/notion.js +236 -0
- package/dist/indexing/providers/notion.js.map +1 -0
- package/dist/indexing/providers/slack-api.d.ts +62 -0
- package/dist/indexing/providers/slack-api.d.ts.map +1 -0
- package/dist/indexing/providers/slack-api.js +167 -0
- package/dist/indexing/providers/slack-api.js.map +1 -0
- package/dist/indexing/providers/slack.d.ts +21 -0
- package/dist/indexing/providers/slack.d.ts.map +1 -0
- package/dist/indexing/providers/slack.js +192 -0
- package/dist/indexing/providers/slack.js.map +1 -0
- package/dist/indexing/providers/types.d.ts +57 -0
- package/dist/indexing/providers/types.d.ts.map +1 -0
- package/dist/indexing/providers/types.js +3 -0
- package/dist/indexing/providers/types.js.map +1 -0
- package/dist/indexing/url-derivation.d.ts +2 -2
- package/dist/indexing/url-derivation.d.ts.map +1 -1
- package/dist/indexing/url-derivation.js.map +1 -1
- package/dist/indexing/utils.d.ts +19 -0
- package/dist/indexing/utils.d.ts.map +1 -0
- package/dist/indexing/utils.js +63 -0
- package/dist/indexing/utils.js.map +1 -0
- package/dist/mcp/server.d.ts.map +1 -1
- package/dist/mcp/server.js +4 -0
- package/dist/mcp/server.js.map +1 -1
- package/dist/mcp/tools/bash-fs.d.ts.map +1 -1
- package/dist/mcp/tools/bash-fs.js +4 -1
- package/dist/mcp/tools/bash-fs.js.map +1 -1
- package/dist/mcp/tools/knowledge.d.ts +13 -0
- package/dist/mcp/tools/knowledge.d.ts.map +1 -0
- package/dist/mcp/tools/knowledge.js +92 -0
- package/dist/mcp/tools/knowledge.js.map +1 -0
- package/dist/server.d.ts.map +1 -1
- package/dist/server.js +111 -8
- package/dist/server.js.map +1 -1
- package/dist/types.d.ts +1075 -79
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js +94 -4
- package/dist/types.js.map +1 -1
- package/dist/validate.d.ts +29 -0
- package/dist/validate.d.ts.map +1 -0
- package/dist/validate.js +197 -0
- package/dist/validate.js.map +1 -0
- package/dist/webhooks/discord.d.ts +13 -0
- package/dist/webhooks/discord.d.ts.map +1 -0
- package/dist/webhooks/discord.js +57 -0
- package/dist/webhooks/discord.js.map +1 -0
- package/dist/webhooks/slack.d.ts +13 -0
- package/dist/webhooks/slack.d.ts.map +1 -0
- package/dist/webhooks/slack.js +106 -0
- package/dist/webhooks/slack.js.map +1 -0
- package/package.json +17 -3
- package/dist/indexing/source-indexer.d.ts +0 -68
- package/dist/indexing/source-indexer.d.ts.map +0 -1
- package/dist/indexing/source-indexer.js +0 -380
- package/dist/indexing/source-indexer.js.map +0 -1
package/dist/faq-txt.js
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Generate the /faq.txt content from FAQ chunks.
|
|
3
|
+
* Groups Q&A pairs by source, with source headings.
|
|
4
|
+
*/
|
|
5
|
+
export function generateFaqTxt(chunks, serverName, faqSources) {
|
|
6
|
+
const lines = [
|
|
7
|
+
`# ${serverName} — Frequently Asked Questions`,
|
|
8
|
+
'',
|
|
9
|
+
];
|
|
10
|
+
// Group chunks by source_name
|
|
11
|
+
const bySource = new Map();
|
|
12
|
+
for (const chunk of chunks) {
|
|
13
|
+
if (!bySource.has(chunk.source_name))
|
|
14
|
+
bySource.set(chunk.source_name, []);
|
|
15
|
+
bySource.get(chunk.source_name).push(chunk);
|
|
16
|
+
}
|
|
17
|
+
// Emit each source section in the order of faqSources config
|
|
18
|
+
let hasContent = false;
|
|
19
|
+
for (const source of faqSources) {
|
|
20
|
+
const sourceChunks = bySource.get(source.name);
|
|
21
|
+
if (!sourceChunks || sourceChunks.length === 0)
|
|
22
|
+
continue;
|
|
23
|
+
hasContent = true;
|
|
24
|
+
lines.push(`## ${source.name}`, '');
|
|
25
|
+
for (const chunk of sourceChunks) {
|
|
26
|
+
// Content is stored as "Q: ...\n\nA: ..."
|
|
27
|
+
lines.push(chunk.content);
|
|
28
|
+
lines.push('');
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
if (!hasContent) {
|
|
32
|
+
lines.push('No FAQ content available yet.');
|
|
33
|
+
lines.push('');
|
|
34
|
+
}
|
|
35
|
+
return lines.join('\n');
|
|
36
|
+
}
|
|
37
|
+
//# sourceMappingURL=faq-txt.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"faq-txt.js","sourceRoot":"","sources":["../src/faq-txt.ts"],"names":[],"mappings":"AAOA;;;GAGG;AACH,MAAM,UAAU,cAAc,CAC1B,MAAwB,EACxB,UAAkB,EAClB,UAAuB;IAEvB,MAAM,KAAK,GAAa;QACpB,KAAK,UAAU,+BAA+B;QAC9C,EAAE;KACL,CAAC;IAEF,8BAA8B;IAC9B,MAAM,QAAQ,GAAG,IAAI,GAAG,EAA4B,CAAC;IACrD,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QACzB,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,KAAK,CAAC,WAAW,CAAC;YAAE,QAAQ,CAAC,GAAG,CAAC,KAAK,CAAC,WAAW,EAAE,EAAE,CAAC,CAAC;QAC1E,QAAQ,CAAC,GAAG,CAAC,KAAK,CAAC,WAAW,CAAE,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACjD,CAAC;IAED,6DAA6D;IAC7D,IAAI,UAAU,GAAG,KAAK,CAAC;IACvB,KAAK,MAAM,MAAM,IAAI,UAAU,EAAE,CAAC;QAC9B,MAAM,YAAY,GAAG,QAAQ,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;QAC/C,IAAI,CAAC,YAAY,IAAI,YAAY,CAAC,MAAM,KAAK,CAAC;YAAE,SAAS;QAEzD,UAAU,GAAG,IAAI,CAAC;QAClB,KAAK,CAAC,IAAI,CAAC,MAAM,MAAM,CAAC,IAAI,EAAE,EAAE,EAAE,CAAC,CAAC;QAEpC,KAAK,MAAM,KAAK,IAAI,YAAY,EAAE,CAAC;YAC/B,0CAA0C;YAC1C,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;YAC1B,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACnB,CAAC;IACL,CAAC;IAED,IAAI,CAAC,UAAU,EAAE,CAAC;QACd,KAAK,CAAC,IAAI,CAAC,+BAA+B,CAAC,CAAC;QAC5C,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACnB,CAAC;IAED,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC5B,CAAC"}
|
|
@@ -18,4 +18,8 @@ registerChunker('markdown', chunkMarkdown);
|
|
|
18
18
|
registerChunker('code', chunkCode);
|
|
19
19
|
registerChunker('raw-text', chunkRawText);
|
|
20
20
|
registerChunker('html', chunkHtml);
|
|
21
|
+
import { chunkQa } from './qa.js';
|
|
22
|
+
registerChunker('slack', chunkQa);
|
|
23
|
+
registerChunker('discord', chunkQa);
|
|
24
|
+
registerChunker('notion', chunkMarkdown);
|
|
21
25
|
//# sourceMappingURL=index.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/indexing/chunking/index.ts"],"names":[],"mappings":"AAAA,8DAA8D;AAM9D,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAqB,CAAC;AAE9C,MAAM,UAAU,eAAe,CAAC,IAAY,EAAE,EAAa;IACvD,QAAQ,CAAC,GAAG,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;AAC3B,CAAC;AAED,MAAM,UAAU,UAAU,CAAC,IAAY;IACnC,MAAM,EAAE,GAAG,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IAC9B,IAAI,CAAC,EAAE;QAAE,MAAM,IAAI,KAAK,CAAC,0BAA0B,IAAI,iBAAiB,CAAC,GAAG,QAAQ,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAC3G,OAAO,EAAE,CAAC;AACd,CAAC;AAED,+BAA+B;AAC/B,OAAO,EAAE,aAAa,EAAE,MAAM,eAAe,CAAC;AAC9C,OAAO,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AACtC,OAAO,EAAE,YAAY,EAAE,MAAM,eAAe,CAAC;AAC7C,OAAO,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AAEtC,eAAe,CAAC,UAAU,EAAE,aAAa,CAAC,CAAC;AAC3C,eAAe,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;AACnC,eAAe,CAAC,UAAU,EAAE,YAAY,CAAC,CAAC;AAC1C,eAAe,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC"}
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/indexing/chunking/index.ts"],"names":[],"mappings":"AAAA,8DAA8D;AAM9D,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAqB,CAAC;AAE9C,MAAM,UAAU,eAAe,CAAC,IAAY,EAAE,EAAa;IACvD,QAAQ,CAAC,GAAG,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;AAC3B,CAAC;AAED,MAAM,UAAU,UAAU,CAAC,IAAY;IACnC,MAAM,EAAE,GAAG,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IAC9B,IAAI,CAAC,EAAE;QAAE,MAAM,IAAI,KAAK,CAAC,0BAA0B,IAAI,iBAAiB,CAAC,GAAG,QAAQ,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAC3G,OAAO,EAAE,CAAC;AACd,CAAC;AAED,+BAA+B;AAC/B,OAAO,EAAE,aAAa,EAAE,MAAM,eAAe,CAAC;AAC9C,OAAO,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AACtC,OAAO,EAAE,YAAY,EAAE,MAAM,eAAe,CAAC;AAC7C,OAAO,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AAEtC,eAAe,CAAC,UAAU,EAAE,aAAa,CAAC,CAAC;AAC3C,eAAe,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;AACnC,eAAe,CAAC,UAAU,EAAE,YAAY,CAAC,CAAC;AAC1C,eAAe,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;AAEnC,OAAO,EAAE,OAAO,EAAE,MAAM,SAAS,CAAC;AAElC,eAAe,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;AAClC,eAAe,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC;AACpC,eAAe,CAAC,QAAQ,EAAE,aAAa,CAAC,CAAC"}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import type { ChunkOutput, SourceConfig } from '../../types.js';
|
|
2
|
+
/**
|
|
3
|
+
* Chunk Q&A content. Each content item from a FAQ-category provider
|
|
4
|
+
* is a single Q&A pair, already sized appropriately.
|
|
5
|
+
* The chunker formats it and returns a single ChunkOutput.
|
|
6
|
+
*/
|
|
7
|
+
export declare function chunkQa(content: string, filePath: string, config: SourceConfig): ChunkOutput[];
|
|
8
|
+
//# sourceMappingURL=qa.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"qa.d.ts","sourceRoot":"","sources":["../../../src/indexing/chunking/qa.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,WAAW,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAEhE;;;;GAIG;AACH,wBAAgB,OAAO,CAAC,OAAO,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,MAAM,EAAE,YAAY,GAAG,WAAW,EAAE,CAe9F"}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
// Q&A chunker — formats distilled Q&A pairs for embedding.
|
|
2
|
+
// Source-agnostic: used by any source that produces Q&A-formatted content.
|
|
3
|
+
/**
|
|
4
|
+
* Chunk Q&A content. Each content item from a FAQ-category provider
|
|
5
|
+
* is a single Q&A pair, already sized appropriately.
|
|
6
|
+
* The chunker formats it and returns a single ChunkOutput.
|
|
7
|
+
*/
|
|
8
|
+
export function chunkQa(content, filePath, config) {
|
|
9
|
+
if (!content || !content.trim()) {
|
|
10
|
+
return [];
|
|
11
|
+
}
|
|
12
|
+
// The content is already formatted as "Q: ...\n\nA: ..." by the provider.
|
|
13
|
+
// Extract the question for use as title.
|
|
14
|
+
const questionMatch = content.match(/^Q:\s*(.+?)(?:\n|$)/);
|
|
15
|
+
const title = questionMatch ? questionMatch[1].trim() : undefined;
|
|
16
|
+
return [{
|
|
17
|
+
content: content.trim(),
|
|
18
|
+
title,
|
|
19
|
+
chunkIndex: 0,
|
|
20
|
+
}];
|
|
21
|
+
}
|
|
22
|
+
//# sourceMappingURL=qa.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"qa.js","sourceRoot":"","sources":["../../../src/indexing/chunking/qa.ts"],"names":[],"mappings":"AAAA,2DAA2D;AAC3D,2EAA2E;AAI3E;;;;GAIG;AACH,MAAM,UAAU,OAAO,CAAC,OAAe,EAAE,QAAgB,EAAE,MAAoB;IAC3E,IAAI,CAAC,OAAO,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC;QAC9B,OAAO,EAAE,CAAC;IACd,CAAC;IAED,0EAA0E;IAC1E,yCAAyC;IACzC,MAAM,aAAa,GAAG,OAAO,CAAC,KAAK,CAAC,qBAAqB,CAAC,CAAC;IAC3D,MAAM,KAAK,GAAG,aAAa,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC;IAElE,OAAO,CAAC;YACJ,OAAO,EAAE,OAAO,CAAC,IAAI,EAAE;YACvB,KAAK;YACL,UAAU,EAAE,CAAC;SAChB,CAAC,CAAC;AACP,CAAC"}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
import OpenAI from 'openai';
|
|
2
|
+
export interface ThreadMessage {
|
|
3
|
+
author: string;
|
|
4
|
+
content: string;
|
|
5
|
+
timestamp: string;
|
|
6
|
+
reactions?: Array<{
|
|
7
|
+
name: string;
|
|
8
|
+
count: number;
|
|
9
|
+
}>;
|
|
10
|
+
}
|
|
11
|
+
export interface DistilledPair {
|
|
12
|
+
question: string;
|
|
13
|
+
answer: string;
|
|
14
|
+
confidence: number;
|
|
15
|
+
}
|
|
16
|
+
export interface DistillerResult {
|
|
17
|
+
pairs: DistilledPair[];
|
|
18
|
+
}
|
|
19
|
+
export interface DistillerOptions {
|
|
20
|
+
model?: string;
|
|
21
|
+
maxMessages?: number;
|
|
22
|
+
apiKey?: string;
|
|
23
|
+
client?: OpenAI;
|
|
24
|
+
}
|
|
25
|
+
/**
|
|
26
|
+
* Distill a conversation thread into Q&A pairs using an LLM.
|
|
27
|
+
*/
|
|
28
|
+
export declare function distillThread(messages: ThreadMessage[], options?: DistillerOptions): Promise<DistillerResult>;
|
|
29
|
+
//# sourceMappingURL=distiller.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"distiller.d.ts","sourceRoot":"","sources":["../../src/indexing/distiller.ts"],"names":[],"mappings":"AAGA,OAAO,MAAM,MAAM,QAAQ,CAAC;AAI5B,MAAM,WAAW,aAAa;IAC1B,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,CAAC,EAAE,KAAK,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;CACtD;AAED,MAAM,WAAW,aAAa;IAC1B,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,EAAE,MAAM,CAAC;IACf,UAAU,EAAE,MAAM,CAAC;CACtB;AAED,MAAM,WAAW,eAAe;IAC5B,KAAK,EAAE,aAAa,EAAE,CAAC;CAC1B;AAED,MAAM,WAAW,gBAAgB;IAC7B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;CACnB;AAuCD;;GAEG;AACH,wBAAsB,aAAa,CAC/B,QAAQ,EAAE,aAAa,EAAE,EACzB,OAAO,CAAC,EAAE,gBAAgB,GAC3B,OAAO,CAAC,eAAe,CAAC,CAyE1B"}
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
// LLM thread distiller — extracts Q&A pairs from conversation threads.
|
|
2
|
+
// Source-agnostic: takes structured messages, returns structured Q&A pairs.
|
|
3
|
+
import OpenAI from 'openai';
|
|
4
|
+
// ── Constants ────────────────────────────────────────────────────────────────
|
|
5
|
+
const DEFAULT_MODEL = 'gpt-4o-mini';
|
|
6
|
+
const DEFAULT_MAX_MESSAGES = 100;
|
|
7
|
+
const SYSTEM_PROMPT = `You are a Q&A extraction engine. Given a conversation thread, identify distinct question-answer pairs.
|
|
8
|
+
|
|
9
|
+
For each pair:
|
|
10
|
+
1. Extract the core question (rephrase if needed for clarity)
|
|
11
|
+
2. Extract the best answer (synthesize from multiple replies if needed)
|
|
12
|
+
3. Score confidence from 0.0 to 1.0 based on:
|
|
13
|
+
- Answer completeness (does it fully address the question?)
|
|
14
|
+
- Questioner satisfaction signals ("thanks", "that worked", etc.)
|
|
15
|
+
- Community validation (reactions like thumbsup, check marks)
|
|
16
|
+
- Answer specificity (concrete steps vs vague suggestions)
|
|
17
|
+
|
|
18
|
+
Return JSON with this exact structure:
|
|
19
|
+
{
|
|
20
|
+
"pairs": [
|
|
21
|
+
{
|
|
22
|
+
"question": "How do I configure X?",
|
|
23
|
+
"answer": "You can configure X by...",
|
|
24
|
+
"confidence": 0.85
|
|
25
|
+
}
|
|
26
|
+
]
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
Rules:
|
|
30
|
+
- A thread may contain multiple Q&A pairs (follow-up questions)
|
|
31
|
+
- Skip greetings, pleasantries, and off-topic tangents
|
|
32
|
+
- If no clear Q&A exists, return {"pairs": []}
|
|
33
|
+
- Keep answers concise but complete (aim for 1-3 paragraphs)
|
|
34
|
+
- Preserve code blocks, URLs, and technical details from answers
|
|
35
|
+
- Confidence below 0.3 means the answer is likely incomplete or wrong`;
|
|
36
|
+
// ── Distiller ────────────────────────────────────────────────────────────────
|
|
37
|
+
/**
|
|
38
|
+
* Distill a conversation thread into Q&A pairs using an LLM.
|
|
39
|
+
*/
|
|
40
|
+
export async function distillThread(messages, options) {
|
|
41
|
+
const model = options?.model ?? DEFAULT_MODEL;
|
|
42
|
+
const maxMessages = options?.maxMessages ?? DEFAULT_MAX_MESSAGES;
|
|
43
|
+
if (messages.length === 0) {
|
|
44
|
+
return { pairs: [] };
|
|
45
|
+
}
|
|
46
|
+
// Truncate to max messages
|
|
47
|
+
const truncated = messages.slice(0, maxMessages);
|
|
48
|
+
// Format as conversation transcript
|
|
49
|
+
const transcript = truncated.map(msg => {
|
|
50
|
+
const reactions = msg.reactions && msg.reactions.length > 0
|
|
51
|
+
? ` [reactions: ${msg.reactions.map(r => `:${r.name}: x${r.count}`).join(', ')}]`
|
|
52
|
+
: '';
|
|
53
|
+
return `[${msg.timestamp}] ${msg.author}: ${msg.content}${reactions}`;
|
|
54
|
+
}).join('\n\n');
|
|
55
|
+
const client = options?.client ?? new OpenAI({ apiKey: options?.apiKey });
|
|
56
|
+
try {
|
|
57
|
+
const response = await client.chat.completions.create({
|
|
58
|
+
model,
|
|
59
|
+
messages: [
|
|
60
|
+
{ role: 'system', content: SYSTEM_PROMPT },
|
|
61
|
+
{ role: 'user', content: transcript },
|
|
62
|
+
],
|
|
63
|
+
response_format: { type: 'json_object' },
|
|
64
|
+
temperature: 0.1,
|
|
65
|
+
});
|
|
66
|
+
const content = response.choices[0]?.message?.content;
|
|
67
|
+
if (!content) {
|
|
68
|
+
console.warn('[distiller] Empty response from LLM');
|
|
69
|
+
return { pairs: [] };
|
|
70
|
+
}
|
|
71
|
+
const parsed = JSON.parse(content);
|
|
72
|
+
// Validate structure
|
|
73
|
+
if (!Array.isArray(parsed.pairs)) {
|
|
74
|
+
console.warn('[distiller] Invalid response structure — missing pairs array');
|
|
75
|
+
return { pairs: [] };
|
|
76
|
+
}
|
|
77
|
+
// Validate and filter each pair
|
|
78
|
+
const validPairs = [];
|
|
79
|
+
for (const pair of parsed.pairs) {
|
|
80
|
+
if (typeof pair.question === 'string' && pair.question.trim() &&
|
|
81
|
+
typeof pair.answer === 'string' && pair.answer.trim() &&
|
|
82
|
+
typeof pair.confidence === 'number' &&
|
|
83
|
+
pair.confidence >= 0 && pair.confidence <= 1) {
|
|
84
|
+
validPairs.push({
|
|
85
|
+
question: pair.question.trim(),
|
|
86
|
+
answer: pair.answer.trim(),
|
|
87
|
+
confidence: pair.confidence,
|
|
88
|
+
});
|
|
89
|
+
}
|
|
90
|
+
else {
|
|
91
|
+
console.warn('[distiller] Skipping malformed pair:', JSON.stringify(pair).slice(0, 200));
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
return { pairs: validPairs };
|
|
95
|
+
}
|
|
96
|
+
catch (error) {
|
|
97
|
+
if (error instanceof SyntaxError) {
|
|
98
|
+
console.error('[distiller] Failed to parse LLM JSON response:', error.message);
|
|
99
|
+
return { pairs: [] };
|
|
100
|
+
}
|
|
101
|
+
throw error; // Re-throw API errors for caller to handle
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
//# sourceMappingURL=distiller.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"distiller.js","sourceRoot":"","sources":["../../src/indexing/distiller.ts"],"names":[],"mappings":"AAAA,uEAAuE;AACvE,4EAA4E;AAE5E,OAAO,MAAM,MAAM,QAAQ,CAAC;AA4B5B,gFAAgF;AAEhF,MAAM,aAAa,GAAG,aAAa,CAAC;AACpC,MAAM,oBAAoB,GAAG,GAAG,CAAC;AAEjC,MAAM,aAAa,GAAG;;;;;;;;;;;;;;;;;;;;;;;;;;;;sEA4BgD,CAAC;AAEvE,gFAAgF;AAEhF;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,aAAa,CAC/B,QAAyB,EACzB,OAA0B;IAE1B,MAAM,KAAK,GAAG,OAAO,EAAE,KAAK,IAAI,aAAa,CAAC;IAC9C,MAAM,WAAW,GAAG,OAAO,EAAE,WAAW,IAAI,oBAAoB,CAAC;IAEjE,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACxB,OAAO,EAAE,KAAK,EAAE,EAAE,EAAE,CAAC;IACzB,CAAC;IAED,2BAA2B;IAC3B,MAAM,SAAS,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,WAAW,CAAC,CAAC;IAEjD,oCAAoC;IACpC,MAAM,UAAU,GAAG,SAAS,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE;QACnC,MAAM,SAAS,GAAG,GAAG,CAAC,SAAS,IAAI,GAAG,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC;YACvD,CAAC,CAAC,gBAAgB,GAAG,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC,CAAC,IAAI,MAAM,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG;YACjF,CAAC,CAAC,EAAE,CAAC;QACT,OAAO,IAAI,GAAG,CAAC,SAAS,KAAK,GAAG,CAAC,MAAM,KAAK,GAAG,CAAC,OAAO,GAAG,SAAS,EAAE,CAAC;IAC1E,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAEhB,MAAM,MAAM,GAAG,OAAO,EAAE,MAAM,IAAI,IAAI,MAAM,CAAC,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC,CAAC;IAE1E,IAAI,CAAC;QACD,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,MAAM,CAAC;YAClD,KAAK;YACL,QAAQ,EAAE;gBACN,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,aAAa,EAAE;gBAC1C,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,UAAU,EAAE;aACxC;YACD,eAAe,EAAE,EAAE,IAAI,EAAE,aAAa,EAAE;YACxC,WAAW,EAAE,GAAG;SACnB,CAAC,CAAC;QAEH,MAAM,OAAO,GAAG,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,OAAO,CAAC;QACtD,IAAI,CAAC,OAAO,EAAE,CAAC;YACX,OAAO,CAAC,IAAI,CAAC,qCAAqC,CAAC,CAAC;YACpD,OAAO,EAAE,KAAK,EAAE,EAAE,EAAE,CAAC;QACzB,CAAC;QAED,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;QAEnC,qBAAqB;QACrB,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC;YAC/B,OAAO,CAAC,IAAI,CAAC,8DAA8D,CAAC,CAAC;YAC7E,OAAO,EAAE,KAAK,EAAE,EAAE,EAAE,CAAC;QACzB,CAAC;QAED,gCAAgC;QAChC,MAAM,UAAU,GAAoB,EAAE,CAAC;QACvC,KAAK,MAAM,IAAI,IAAI,MAAM,CAAC,KAAK,EAAE,CAAC;YAC9B,IACI,OAAO,IAAI,CAAC,QAAQ,KAAK,QAAQ,IAAI,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE;gBACzD,OAAO,IAAI,CAAC,MAAM,KAAK,QAAQ,IAAI,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE;gBACrD,OAAO,IAAI,CAAC,UAAU,KAAK,QAAQ;gBACnC,IAAI,CAAC,UAAU,IAAI,CAAC,IAAI,IAAI,CAAC,UAAU,IAAI,CAAC,EAC9C,CAAC;gBACC,UAAU,CAAC,IAAI,CAAC;oBACZ,QAAQ,EAAE,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE;oBAC9B,MAAM,EAAE,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE;oBAC1B,UAAU,EAAE,IAAI,CAAC,UAAU;iBAC9B,CAAC,CAAC;YACP,CAAC;iBAAM,CAAC;gBACJ,OAAO,CAAC,IAAI,CAAC,sCAAsC,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC;YAC7F,CAAC;QACL,CAAC;QAED,OAAO,EAAE,KAAK,EAAE,UAAU,EAAE,CAAC;IACjC,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACb,IAAI,KAAK,YAAY,WAAW,EAAE,CAAC;YAC/B,OAAO,CAAC,KAAK,CAAC,gDAAgD,EAAE,KAAK,CAAC,OAAO,CAAC,CAAC;YAC/E,OAAO,EAAE,KAAK,EAAE,EAAE,EAAE,CAAC;QACzB,CAAC;QACD,MAAM,KAAK,CAAC,CAAC,2CAA2C;IAC5D,CAAC;AACL,CAAC"}
|
|
@@ -13,10 +13,10 @@ export declare class IndexingOrchestrator {
|
|
|
13
13
|
*/
|
|
14
14
|
checkAndIndex(): Promise<void>;
|
|
15
15
|
/**
|
|
16
|
-
* Get the
|
|
17
|
-
*
|
|
16
|
+
* Get the current state token for a source without acquiring items.
|
|
17
|
+
* Returns null if the source is unavailable.
|
|
18
18
|
*/
|
|
19
|
-
private
|
|
19
|
+
private getSourceStateToken;
|
|
20
20
|
/**
|
|
21
21
|
* Queue a full re-index of all sources. Returns immediately.
|
|
22
22
|
*/
|
|
@@ -25,6 +25,11 @@ export declare class IndexingOrchestrator {
|
|
|
25
25
|
* Queue an incremental re-index for a specific repo. Returns immediately.
|
|
26
26
|
*/
|
|
27
27
|
queueIncrementalReindex(repoUrl: string): void;
|
|
28
|
+
/**
|
|
29
|
+
* Queue a reindex for a single named source. Returns immediately.
|
|
30
|
+
* Used by webhook handlers to trigger reindexing of specific sources.
|
|
31
|
+
*/
|
|
32
|
+
queueSourceReindex(sourceName: string): void;
|
|
28
33
|
/**
|
|
29
34
|
* Returns true if any indexing job is currently running.
|
|
30
35
|
*/
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"orchestrator.d.ts","sourceRoot":"","sources":["../../src/indexing/orchestrator.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"orchestrator.d.ts","sourceRoot":"","sources":["../../src/indexing/orchestrator.ts"],"names":[],"mappings":"AAqCA,qBAAa,oBAAoB;IAC7B,OAAO,CAAC,KAAK,CAAa;IAC1B,OAAO,CAAC,OAAO,CAAS;IACxB,OAAO,CAAC,UAAU,CAAS;IAG3B,OAAO,CAAC,aAAa,CAAqB;IAG1C,OAAO,CAAC,eAAe,CAAuB;IAG9C,iBAAiB,CAAC,EAAE,CAAC,WAAW,EAAE,MAAM,EAAE,KAAK,IAAI,CAAC;;IAMpD;;;;OAIG;IACG,aAAa,IAAI,OAAO,CAAC,IAAI,CAAC;IA+GpC;;;OAGG;YACW,mBAAmB;IAajC;;OAEG;IACH,gBAAgB,IAAI,IAAI;IAQxB;;OAEG;IACH,uBAAuB,CAAC,OAAO,EAAE,MAAM,GAAG,IAAI;IAU9C;;;OAGG;IACH,kBAAkB,CAAC,UAAU,EAAE,MAAM,GAAG,IAAI;IAQ5C;;OAEG;IACH,UAAU,IAAI,OAAO;IAIrB;;;OAGG;IACH,mBAAmB,IAAI,IAAI;IA4B3B;;OAEG;IACH,OAAO,CAAC,OAAO;IASf;;OAEG;YACW,KAAK;IAwBnB;;OAEG;YACW,UAAU;IA8DxB;;OAEG;YACW,cAAc;IAqB5B;;OAEG;YACW,qBAAqB;IAiBnC;;OAEG;YACW,oBAAoB;IAsDlC;;OAEG;YACW,cAAc;IAmB5B;;OAEG;YACW,cAAc;CAgB/B"}
|
|
@@ -1,17 +1,18 @@
|
|
|
1
1
|
// Job queue and coordination for indexing pipelines.
|
|
2
2
|
// Fully config-driven: indexes sources referenced by search tools in mcp-docs.yaml.
|
|
3
|
-
import fs from 'fs';
|
|
4
|
-
import path from 'path';
|
|
5
|
-
import { simpleGit } from 'simple-git';
|
|
3
|
+
import fs from 'node:fs';
|
|
4
|
+
import path from 'node:path';
|
|
6
5
|
import { getConfig, getServerConfig, getIndexableSourceNames } from '../config.js';
|
|
7
6
|
import { EmbeddingClient } from './embeddings.js';
|
|
8
|
-
import {
|
|
7
|
+
import { getProvider } from './providers/index.js';
|
|
8
|
+
import { IndexingPipeline } from './pipeline.js';
|
|
9
9
|
import { getIndexState, upsertIndexState, } from '../db/queries.js';
|
|
10
|
+
import { isFileSourceConfig } from '../types.js';
|
|
10
11
|
/**
|
|
11
12
|
* Find all source configs that reference a given repo URL.
|
|
12
13
|
*/
|
|
13
14
|
function getSourcesByRepo(repoUrl) {
|
|
14
|
-
return getServerConfig().sources.filter(s => s.repo === repoUrl);
|
|
15
|
+
return getServerConfig().sources.filter(s => isFileSourceConfig(s) && s.repo === repoUrl);
|
|
15
16
|
}
|
|
16
17
|
function getStaleThresholdMs() {
|
|
17
18
|
const serverCfg = getServerConfig();
|
|
@@ -67,89 +68,94 @@ export class IndexingOrchestrator {
|
|
|
67
68
|
return;
|
|
68
69
|
}
|
|
69
70
|
// Queue incremental reindexes for each affected git-backed repo
|
|
70
|
-
const reposToReindex = new Set(
|
|
71
|
+
const reposToReindex = new Set();
|
|
72
|
+
for (const s of sourcesNeedingFullReindex) {
|
|
73
|
+
if (isFileSourceConfig(s) && s.repo)
|
|
74
|
+
reposToReindex.add(s.repo);
|
|
75
|
+
}
|
|
71
76
|
for (const repoUrl of reposToReindex) {
|
|
72
77
|
this.queueIncrementalReindex(repoUrl);
|
|
73
78
|
}
|
|
74
79
|
// Local sources (no repo) get queued as a full reindex of just those sources
|
|
75
|
-
const localSources = sourcesNeedingFullReindex.filter(s => !s.repo);
|
|
80
|
+
const localSources = sourcesNeedingFullReindex.filter(s => isFileSourceConfig(s) && !s.repo);
|
|
76
81
|
if (localSources.length > 0) {
|
|
77
82
|
this.queue.push({ type: 'full-reindex-local', sources: localSources });
|
|
78
83
|
this.drain().catch(err => console.error('[orchestrator] drain() failed:', err));
|
|
79
84
|
}
|
|
85
|
+
// Non-file sources (e.g., Slack) that need reindexing
|
|
86
|
+
const nonFileSources = sourcesNeedingFullReindex.filter(s => !isFileSourceConfig(s));
|
|
87
|
+
for (const source of nonFileSources) {
|
|
88
|
+
this.queueSourceReindex(source.name);
|
|
89
|
+
}
|
|
80
90
|
}
|
|
81
91
|
if (sourcesOk.length === 0)
|
|
82
92
|
return;
|
|
83
93
|
// Local sources in sourcesOk have no remote to check — always reindex on startup
|
|
84
|
-
const localSourcesOk = sourcesOk.filter(s => !s.repo);
|
|
94
|
+
const localSourcesOk = sourcesOk.filter(s => isFileSourceConfig(s) && !s.repo);
|
|
85
95
|
if (localSourcesOk.length > 0) {
|
|
86
96
|
console.log(`[orchestrator] Queuing reindex for ${localSourcesOk.length} local source(s)`);
|
|
87
97
|
this.queue.push({ type: 'full-reindex-local', sources: localSourcesOk });
|
|
88
98
|
this.drain().catch(err => console.error('[orchestrator] drain() failed:', err));
|
|
89
99
|
}
|
|
90
100
|
console.log('[orchestrator] Checking remotes for changes on indexed sources...');
|
|
91
|
-
//
|
|
92
|
-
const
|
|
93
|
-
for (const
|
|
101
|
+
// Check each git-backed source for changes
|
|
102
|
+
const gitSourcesOk = sourcesOk.filter(s => isFileSourceConfig(s) && s.repo);
|
|
103
|
+
for (const source of gitSourcesOk) {
|
|
94
104
|
try {
|
|
95
|
-
const
|
|
96
|
-
const
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
105
|
+
const currentToken = await this.getSourceStateToken(source);
|
|
106
|
+
const state = await getIndexState(source.type, source.name);
|
|
107
|
+
if (currentToken === null || state?.last_commit_sha !== currentToken) {
|
|
108
|
+
const reason = currentToken === null
|
|
109
|
+
? 'source unavailable (clone missing?)'
|
|
110
|
+
: `remote ${currentToken.slice(0, 8)} differs from indexed`;
|
|
111
|
+
console.log(`[orchestrator] ${reason} for ${source.name} — queuing reindex`);
|
|
112
|
+
if (isFileSourceConfig(source) && source.repo) {
|
|
113
|
+
this.queueIncrementalReindex(source.repo);
|
|
103
114
|
}
|
|
104
115
|
}
|
|
105
|
-
// Even if DB says current, verify clone dir exists (fresh container = empty /tmp)
|
|
106
|
-
const repoName = repoUrl.split('/').pop()?.replace(/\.git$/, '') ?? '';
|
|
107
|
-
const cloneDir = getConfig().cloneDir;
|
|
108
|
-
const repoDir = path.join(cloneDir, repoName);
|
|
109
|
-
const cloneMissing = !fs.existsSync(repoDir);
|
|
110
|
-
if (anyChanged || cloneMissing) {
|
|
111
|
-
const reason = cloneMissing
|
|
112
|
-
? `clone dir missing at ${repoDir}`
|
|
113
|
-
: `remote HEAD ${remoteHead.slice(0, 8)} differs from indexed`;
|
|
114
|
-
console.log(`[orchestrator] ${reason} for ${repoUrl} — queuing incremental reindex`);
|
|
115
|
-
this.queueIncrementalReindex(repoUrl);
|
|
116
|
-
}
|
|
117
116
|
else {
|
|
118
|
-
console.log(`[orchestrator]
|
|
117
|
+
console.log(`[orchestrator] ${source.name} index current at ${currentToken.slice(0, 8)}`);
|
|
119
118
|
}
|
|
120
119
|
}
|
|
121
120
|
catch (err) {
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
if (this.isStale(firstState)) {
|
|
127
|
-
const thresholdHours = getServerConfig().indexing?.stale_threshold_hours ?? 24;
|
|
128
|
-
console.log(`[orchestrator] Index for ${repoUrl} is stale (>${thresholdHours}h) — queuing full reindex`);
|
|
121
|
+
console.warn(`[orchestrator] Failed to check state for ${source.name}, falling back to age check:`, err);
|
|
122
|
+
const state = await getIndexState(source.type, source.name);
|
|
123
|
+
if (this.isStale(state)) {
|
|
124
|
+
console.log(`[orchestrator] Index for ${source.name} is stale — queuing full reindex`);
|
|
129
125
|
this.queueFullReindex();
|
|
130
126
|
}
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
// Ensure git repos are cloned even when index is current.
|
|
130
|
+
// On fresh deploys, the container has no local clones but the DB may have valid state.
|
|
131
|
+
// Bash tools need the clone directories to build their filesystem.
|
|
132
|
+
const cloneDir = getConfig().cloneDir;
|
|
133
|
+
for (const source of gitSourcesOk) {
|
|
134
|
+
if (!isFileSourceConfig(source) || !source.repo)
|
|
135
|
+
continue;
|
|
136
|
+
const repoName = source.repo.replace(/\.git$/, '').split('/').pop();
|
|
137
|
+
const repoDir = path.join(cloneDir, repoName);
|
|
138
|
+
if (!fs.existsSync(repoDir)) {
|
|
139
|
+
console.log(`[orchestrator] Clone directory missing for ${source.name}, queuing reindex to populate`);
|
|
140
|
+
this.queueIncrementalReindex(source.repo);
|
|
134
141
|
}
|
|
135
142
|
}
|
|
136
143
|
}
|
|
137
144
|
/**
|
|
138
|
-
* Get the
|
|
139
|
-
*
|
|
145
|
+
* Get the current state token for a source without acquiring items.
|
|
146
|
+
* Returns null if the source is unavailable.
|
|
140
147
|
*/
|
|
141
|
-
async
|
|
148
|
+
async getSourceStateToken(source) {
|
|
142
149
|
const config = getConfig();
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
return sha;
|
|
150
|
+
const providerOptions = {
|
|
151
|
+
cloneDir: config.cloneDir,
|
|
152
|
+
githubToken: config.githubToken,
|
|
153
|
+
slackBotToken: config.slackBotToken,
|
|
154
|
+
discordBotToken: config.discordBotToken,
|
|
155
|
+
notionToken: config.notionToken,
|
|
156
|
+
};
|
|
157
|
+
const provider = getProvider(source.type)(source, providerOptions);
|
|
158
|
+
return provider.getCurrentStateToken();
|
|
153
159
|
}
|
|
154
160
|
/**
|
|
155
161
|
* Queue a full re-index of all sources. Returns immediately.
|
|
@@ -171,6 +177,17 @@ export class IndexingOrchestrator {
|
|
|
171
177
|
console.error('[orchestrator] drain() failed:', err);
|
|
172
178
|
});
|
|
173
179
|
}
|
|
180
|
+
/**
|
|
181
|
+
* Queue a reindex for a single named source. Returns immediately.
|
|
182
|
+
* Used by webhook handlers to trigger reindexing of specific sources.
|
|
183
|
+
*/
|
|
184
|
+
queueSourceReindex(sourceName) {
|
|
185
|
+
this.queue.push({ type: 'source-reindex', sourceName });
|
|
186
|
+
console.log(`[orchestrator] Source re-index queued for ${sourceName}`);
|
|
187
|
+
this.drain().catch((err) => {
|
|
188
|
+
console.error('[orchestrator] drain() failed:', err);
|
|
189
|
+
});
|
|
190
|
+
}
|
|
174
191
|
/**
|
|
175
192
|
* Returns true if any indexing job is currently running.
|
|
176
193
|
*/
|
|
@@ -276,6 +293,19 @@ export class IndexingOrchestrator {
|
|
|
276
293
|
await this.runIncrementalReindex(embeddingClient, config.cloneDir, config.githubToken, job.repoUrl);
|
|
277
294
|
affectedSourceNames = getSourcesByRepo(job.repoUrl).map(s => s.name);
|
|
278
295
|
}
|
|
296
|
+
else if (job.type === 'source-reindex') {
|
|
297
|
+
if (!job.sourceName) {
|
|
298
|
+
console.warn('[orchestrator] source-reindex job has no sourceName, skipping');
|
|
299
|
+
return;
|
|
300
|
+
}
|
|
301
|
+
const sourceConfig = serverCfg2.sources.find(s => s.name === job.sourceName);
|
|
302
|
+
if (!sourceConfig) {
|
|
303
|
+
console.warn(`[orchestrator] source-reindex: source "${job.sourceName}" not found in config`);
|
|
304
|
+
return;
|
|
305
|
+
}
|
|
306
|
+
await this.indexSourceWithState(sourceConfig, embeddingClient, config.cloneDir);
|
|
307
|
+
affectedSourceNames = [job.sourceName];
|
|
308
|
+
}
|
|
279
309
|
if (affectedSourceNames.length > 0 && this.onReindexComplete) {
|
|
280
310
|
try {
|
|
281
311
|
this.onReindexComplete(affectedSourceNames);
|
|
@@ -305,38 +335,7 @@ export class IndexingOrchestrator {
|
|
|
305
335
|
const indexableNames = getIndexableSourceNames();
|
|
306
336
|
const sources = getSourcesByRepo(repoUrl).filter(s => indexableNames.has(s.name));
|
|
307
337
|
for (const sourceConfig of sources) {
|
|
308
|
-
|
|
309
|
-
if (state?.last_commit_sha) {
|
|
310
|
-
await this.withSourceLock(`${sourceConfig.type}:${sourceConfig.name}`, async () => {
|
|
311
|
-
await this.setIndexStatus(sourceConfig.type, sourceConfig.name, 'indexing');
|
|
312
|
-
try {
|
|
313
|
-
const indexer = new SourceIndexer(sourceConfig, embeddingClient, cloneDir, githubToken);
|
|
314
|
-
await indexer.incrementalIndex(state.last_commit_sha);
|
|
315
|
-
const headSha = await indexer.getHeadSha();
|
|
316
|
-
await upsertIndexState({
|
|
317
|
-
source_type: sourceConfig.type,
|
|
318
|
-
source_key: sourceConfig.name,
|
|
319
|
-
last_commit_sha: headSha,
|
|
320
|
-
last_indexed_at: new Date(),
|
|
321
|
-
status: 'idle',
|
|
322
|
-
});
|
|
323
|
-
}
|
|
324
|
-
catch (err) {
|
|
325
|
-
console.error(`[orchestrator] Incremental reindex failed for ${sourceConfig.name}:`, err);
|
|
326
|
-
try {
|
|
327
|
-
await this.setIndexStatus(sourceConfig.type, sourceConfig.name, 'error', err instanceof Error ? err.message : String(err));
|
|
328
|
-
}
|
|
329
|
-
catch (statusErr) {
|
|
330
|
-
console.error('[orchestrator] Failed to update index status:', statusErr);
|
|
331
|
-
}
|
|
332
|
-
// Don't rethrow — continue with remaining sources
|
|
333
|
-
}
|
|
334
|
-
});
|
|
335
|
-
}
|
|
336
|
-
else {
|
|
337
|
-
// No previous state — do a full index for this source
|
|
338
|
-
await this.indexSourceWithState(sourceConfig, embeddingClient, cloneDir, githubToken);
|
|
339
|
-
}
|
|
338
|
+
await this.indexSourceWithState(sourceConfig, embeddingClient, cloneDir, githubToken);
|
|
340
339
|
}
|
|
341
340
|
console.log(`[orchestrator] Incremental re-index complete for ${repoUrl}`);
|
|
342
341
|
}
|
|
@@ -346,21 +345,29 @@ export class IndexingOrchestrator {
|
|
|
346
345
|
async indexSourceWithState(sourceConfig, embeddingClient, cloneDir, githubToken) {
|
|
347
346
|
const lockKey = `${sourceConfig.type}:${sourceConfig.name}`;
|
|
348
347
|
await this.withSourceLock(lockKey, async () => {
|
|
349
|
-
const
|
|
348
|
+
const providerOptions = { cloneDir, githubToken, slackBotToken: getConfig().slackBotToken, discordBotToken: getConfig().discordBotToken, notionToken: getConfig().notionToken };
|
|
349
|
+
const provider = getProvider(sourceConfig.type)(sourceConfig, providerOptions);
|
|
350
|
+
const pipeline = new IndexingPipeline(embeddingClient, sourceConfig);
|
|
350
351
|
await this.setIndexStatus(sourceConfig.type, sourceConfig.name, 'indexing');
|
|
351
352
|
try {
|
|
352
353
|
const state = await getIndexState(sourceConfig.type, sourceConfig.name);
|
|
354
|
+
let result;
|
|
353
355
|
if (state?.last_commit_sha) {
|
|
354
|
-
await
|
|
356
|
+
result = await provider.incrementalAcquire(state.last_commit_sha);
|
|
355
357
|
}
|
|
356
358
|
else {
|
|
357
|
-
await
|
|
359
|
+
result = await provider.fullAcquire();
|
|
360
|
+
}
|
|
361
|
+
if (result.removedIds.length > 0) {
|
|
362
|
+
await pipeline.removeItems(result.removedIds);
|
|
363
|
+
}
|
|
364
|
+
if (result.items.length > 0) {
|
|
365
|
+
await pipeline.indexItems(result.items, result.stateToken);
|
|
358
366
|
}
|
|
359
|
-
const headSha = await indexer.getHeadSha();
|
|
360
367
|
await upsertIndexState({
|
|
361
368
|
source_type: sourceConfig.type,
|
|
362
369
|
source_key: sourceConfig.name,
|
|
363
|
-
last_commit_sha:
|
|
370
|
+
last_commit_sha: result.stateToken,
|
|
364
371
|
last_indexed_at: new Date(),
|
|
365
372
|
status: 'idle',
|
|
366
373
|
});
|
|
@@ -374,7 +381,6 @@ export class IndexingOrchestrator {
|
|
|
374
381
|
catch (statusErr) {
|
|
375
382
|
console.error('[orchestrator] Failed to update index status:', statusErr);
|
|
376
383
|
}
|
|
377
|
-
// Don't rethrow — continue with remaining sources
|
|
378
384
|
}
|
|
379
385
|
});
|
|
380
386
|
}
|