@mcp-graph-workflow/mcp-graph 5.6.1 → 5.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/dist/core/importer/import-graph.d.ts +23 -0
- package/dist/core/importer/import-graph.d.ts.map +1 -0
- package/dist/core/importer/import-graph.js +127 -0
- package/dist/core/importer/import-graph.js.map +1 -0
- package/dist/core/rag/benchmark-indexer.d.ts +32 -0
- package/dist/core/rag/benchmark-indexer.d.ts.map +1 -0
- package/dist/core/rag/benchmark-indexer.js +85 -0
- package/dist/core/rag/benchmark-indexer.js.map +1 -0
- package/dist/core/rag/citation-mapper.d.ts +32 -0
- package/dist/core/rag/citation-mapper.d.ts.map +1 -0
- package/dist/core/rag/citation-mapper.js +45 -0
- package/dist/core/rag/citation-mapper.js.map +1 -0
- package/dist/core/rag/enrichment-pipeline.d.ts +48 -0
- package/dist/core/rag/enrichment-pipeline.d.ts.map +1 -0
- package/dist/core/rag/enrichment-pipeline.js +127 -0
- package/dist/core/rag/enrichment-pipeline.js.map +1 -0
- package/dist/core/rag/post-retrieval.d.ts +40 -0
- package/dist/core/rag/post-retrieval.d.ts.map +1 -0
- package/dist/core/rag/post-retrieval.js +119 -0
- package/dist/core/rag/post-retrieval.js.map +1 -0
- package/dist/core/rag/query-cache.d.ts +54 -0
- package/dist/core/rag/query-cache.d.ts.map +1 -0
- package/dist/core/rag/query-cache.js +104 -0
- package/dist/core/rag/query-cache.js.map +1 -0
- package/dist/core/rag/query-understanding.d.ts +37 -0
- package/dist/core/rag/query-understanding.d.ts.map +1 -0
- package/dist/core/rag/query-understanding.js +123 -0
- package/dist/core/rag/query-understanding.js.map +1 -0
- package/dist/core/rag/rag-trace.d.ts +67 -0
- package/dist/core/rag/rag-trace.d.ts.map +1 -0
- package/dist/core/rag/rag-trace.js +82 -0
- package/dist/core/rag/rag-trace.js.map +1 -0
- package/dist/core/rag/source-contribution.d.ts +37 -0
- package/dist/core/rag/source-contribution.d.ts.map +1 -0
- package/dist/core/rag/source-contribution.js +54 -0
- package/dist/core/rag/source-contribution.js.map +1 -0
- package/dist/core/store/sqlite-store.d.ts +9 -0
- package/dist/core/store/sqlite-store.d.ts.map +1 -1
- package/dist/core/store/sqlite-store.js +42 -0
- package/dist/core/store/sqlite-store.js.map +1 -1
- package/dist/mcp/tools/import-graph.d.ts +4 -0
- package/dist/mcp/tools/import-graph.d.ts.map +1 -0
- package/dist/mcp/tools/import-graph.js +81 -0
- package/dist/mcp/tools/import-graph.js.map +1 -0
- package/dist/mcp/tools/index.d.ts.map +1 -1
- package/dist/mcp/tools/index.js +2 -0
- package/dist/mcp/tools/index.js.map +1 -1
- package/dist/schemas/knowledge.schema.d.ts +2 -0
- package/dist/schemas/knowledge.schema.d.ts.map +1 -1
- package/dist/schemas/knowledge.schema.js +1 -0
- package/dist/schemas/knowledge.schema.js.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -114,7 +114,7 @@ mcp-graph serve --port 3000 # or: npm run dev
|
|
|
114
114
|
|
|
115
115
|
| | Count | Reference |
|
|
116
116
|
|---|---|---|
|
|
117
|
-
| **MCP Tools** |
|
|
117
|
+
| **MCP Tools** | 32 | [MCP-TOOLS-REFERENCE.md](docs/MCP-TOOLS-REFERENCE.md) |
|
|
118
118
|
| **REST Endpoints** | 44 (17 routers) | [REST-API-REFERENCE.md](docs/REST-API-REFERENCE.md) |
|
|
119
119
|
| **CLI Commands** | 6 | `init`, `import`, `index`, `stats`, `serve`, `doctor` |
|
|
120
120
|
|
|
@@ -145,7 +145,7 @@ npm run test:all # Everything
|
|
|
145
145
|
|----------|-------------|
|
|
146
146
|
| [Getting Started](docs/GETTING-STARTED.md) | Step-by-step guide for new users |
|
|
147
147
|
| [Architecture](docs/ARCHITECTURE-GUIDE.md) | System layers, modules, data flows |
|
|
148
|
-
| [MCP Tools Reference](docs/MCP-TOOLS-REFERENCE.md) |
|
|
148
|
+
| [MCP Tools Reference](docs/MCP-TOOLS-REFERENCE.md) | 32 tools with full parameters |
|
|
149
149
|
| [REST API Reference](docs/REST-API-REFERENCE.md) | 17 routers, 44 endpoints |
|
|
150
150
|
| [Knowledge Pipeline](docs/KNOWLEDGE-PIPELINE.md) | RAG, embeddings, context assembly |
|
|
151
151
|
| [Integrations](docs/INTEGRATIONS-GUIDE.md) | Serena, GitNexus, Context7, Playwright |
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import type { SqliteStore } from "../store/sqlite-store.js";
|
|
2
|
+
import type { GraphDocument } from "../graph/graph-types.js";
|
|
3
|
+
export interface MergeGraphOptions {
|
|
4
|
+
dryRun?: boolean;
|
|
5
|
+
}
|
|
6
|
+
export interface MergeGraphResult {
|
|
7
|
+
nodesInserted: number;
|
|
8
|
+
nodesSkipped: number;
|
|
9
|
+
edgesInserted: number;
|
|
10
|
+
edgesSkipped: number;
|
|
11
|
+
edgesOrphaned: number;
|
|
12
|
+
sourceProject: string;
|
|
13
|
+
}
|
|
14
|
+
/**
|
|
15
|
+
* Merge an exported GraphDocument into the local store without overriding existing data.
|
|
16
|
+
*
|
|
17
|
+
* - Nodes with IDs already present locally are SKIPPED (local version wins).
|
|
18
|
+
* - Edges with the same (from, to, relationType) are SKIPPED via INSERT OR IGNORE.
|
|
19
|
+
* - Edges referencing nodes that don't exist (locally or in the import) are SKIPPED as orphans.
|
|
20
|
+
* - In dryRun mode, no data is written; only counts are returned.
|
|
21
|
+
*/
|
|
22
|
+
export declare function mergeGraph(store: SqliteStore, incoming: GraphDocument, options?: MergeGraphOptions): MergeGraphResult;
|
|
23
|
+
//# sourceMappingURL=import-graph.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"import-graph.d.ts","sourceRoot":"","sources":["../../../src/core/importer/import-graph.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,0BAA0B,CAAC;AAC5D,OAAO,KAAK,EAAE,aAAa,EAAa,MAAM,yBAAyB,CAAC;AAKxE,MAAM,WAAW,iBAAiB;IAChC,MAAM,CAAC,EAAE,OAAO,CAAC;CAClB;AAED,MAAM,WAAW,gBAAgB;IAC/B,aAAa,EAAE,MAAM,CAAC;IACtB,YAAY,EAAE,MAAM,CAAC;IACrB,aAAa,EAAE,MAAM,CAAC;IACtB,YAAY,EAAE,MAAM,CAAC;IACrB,aAAa,EAAE,MAAM,CAAC;IACtB,aAAa,EAAE,MAAM,CAAC;CACvB;AAED;;;;;;;GAOG;AACH,wBAAgB,UAAU,CACxB,KAAK,EAAE,WAAW,EAClB,QAAQ,EAAE,aAAa,EACvB,OAAO,CAAC,EAAE,iBAAiB,GAC1B,gBAAgB,CA+HlB"}
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
import { z } from "zod/v4";
|
|
2
|
+
import { GraphDocumentSchema } from "../../schemas/graph.schema.js";
|
|
3
|
+
import { ValidationError } from "../utils/errors.js";
|
|
4
|
+
import { logger } from "../utils/logger.js";
|
|
5
|
+
/**
|
|
6
|
+
* Merge an exported GraphDocument into the local store without overriding existing data.
|
|
7
|
+
*
|
|
8
|
+
* - Nodes with IDs already present locally are SKIPPED (local version wins).
|
|
9
|
+
* - Edges with the same (from, to, relationType) are SKIPPED via INSERT OR IGNORE.
|
|
10
|
+
* - Edges referencing nodes that don't exist (locally or in the import) are SKIPPED as orphans.
|
|
11
|
+
* - In dryRun mode, no data is written; only counts are returned.
|
|
12
|
+
*/
|
|
13
|
+
export function mergeGraph(store, incoming, options) {
|
|
14
|
+
// 1. Validate incoming document
|
|
15
|
+
try {
|
|
16
|
+
GraphDocumentSchema.parse(incoming);
|
|
17
|
+
}
|
|
18
|
+
catch (err) {
|
|
19
|
+
if (err instanceof z.ZodError) {
|
|
20
|
+
throw new ValidationError("Invalid graph document", err.issues);
|
|
21
|
+
}
|
|
22
|
+
throw err;
|
|
23
|
+
}
|
|
24
|
+
const sourceProject = incoming.project.name;
|
|
25
|
+
const dryRun = options?.dryRun ?? false;
|
|
26
|
+
logger.info("merge-graph:start", {
|
|
27
|
+
sourceProject,
|
|
28
|
+
incomingNodes: incoming.nodes.length,
|
|
29
|
+
incomingEdges: incoming.edges.length,
|
|
30
|
+
dryRun,
|
|
31
|
+
});
|
|
32
|
+
// 2. Collect existing node IDs
|
|
33
|
+
const existingNodes = store.getAllNodes();
|
|
34
|
+
const existingNodeIds = new Set(existingNodes.map((n) => n.id));
|
|
35
|
+
// 3. Partition incoming nodes
|
|
36
|
+
const nodesToInsert = [];
|
|
37
|
+
let nodesSkipped = 0;
|
|
38
|
+
for (const node of incoming.nodes) {
|
|
39
|
+
if (existingNodeIds.has(node.id)) {
|
|
40
|
+
nodesSkipped++;
|
|
41
|
+
}
|
|
42
|
+
else {
|
|
43
|
+
// Tag with merge origin metadata
|
|
44
|
+
const taggedNode = {
|
|
45
|
+
...node,
|
|
46
|
+
metadata: {
|
|
47
|
+
...node.metadata,
|
|
48
|
+
mergedFrom: sourceProject,
|
|
49
|
+
},
|
|
50
|
+
};
|
|
51
|
+
nodesToInsert.push(taggedNode);
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
// 4. Build set of all available node IDs (existing + to-be-inserted)
|
|
55
|
+
const availableNodeIds = new Set(existingNodeIds);
|
|
56
|
+
for (const node of nodesToInsert) {
|
|
57
|
+
availableNodeIds.add(node.id);
|
|
58
|
+
}
|
|
59
|
+
// 5. Filter edges: keep only those where both from and to exist
|
|
60
|
+
const validEdges = [];
|
|
61
|
+
let edgesOrphaned = 0;
|
|
62
|
+
for (const edge of incoming.edges) {
|
|
63
|
+
if (availableNodeIds.has(edge.from) && availableNodeIds.has(edge.to)) {
|
|
64
|
+
validEdges.push(edge);
|
|
65
|
+
}
|
|
66
|
+
else {
|
|
67
|
+
edgesOrphaned++;
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
// 6. In dry-run mode, estimate edge skips without writing
|
|
71
|
+
if (dryRun) {
|
|
72
|
+
// We can't know exact edge skip counts without trying INSERT OR IGNORE,
|
|
73
|
+
// but we can count edges whose (from, to, relationType) match existing edges
|
|
74
|
+
const existingEdges = store.getAllEdges();
|
|
75
|
+
const existingEdgeKeys = new Set(existingEdges.map((e) => `${e.from}|${e.to}|${e.relationType}`));
|
|
76
|
+
let edgesInserted = 0;
|
|
77
|
+
let edgesSkipped = 0;
|
|
78
|
+
for (const edge of validEdges) {
|
|
79
|
+
const key = `${edge.from}|${edge.to}|${edge.relationType}`;
|
|
80
|
+
if (existingEdgeKeys.has(key)) {
|
|
81
|
+
edgesSkipped++;
|
|
82
|
+
}
|
|
83
|
+
else {
|
|
84
|
+
edgesInserted++;
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
logger.info("merge-graph:dry-run", {
|
|
88
|
+
nodesInserted: nodesToInsert.length,
|
|
89
|
+
nodesSkipped,
|
|
90
|
+
edgesInserted,
|
|
91
|
+
edgesSkipped,
|
|
92
|
+
edgesOrphaned,
|
|
93
|
+
});
|
|
94
|
+
return {
|
|
95
|
+
nodesInserted: nodesToInsert.length,
|
|
96
|
+
nodesSkipped,
|
|
97
|
+
edgesInserted,
|
|
98
|
+
edgesSkipped,
|
|
99
|
+
edgesOrphaned,
|
|
100
|
+
sourceProject,
|
|
101
|
+
};
|
|
102
|
+
}
|
|
103
|
+
// 7. Create snapshot before merge (safety net)
|
|
104
|
+
store.createSnapshot();
|
|
105
|
+
// 8. Perform merge insert
|
|
106
|
+
const { nodesInserted, edgesInserted } = store.mergeInsert(nodesToInsert, validEdges);
|
|
107
|
+
const edgesSkipped = validEdges.length - edgesInserted;
|
|
108
|
+
// 9. Record import
|
|
109
|
+
store.recordImport(`merge:${sourceProject}`, nodesInserted, edgesInserted);
|
|
110
|
+
logger.info("merge-graph:done", {
|
|
111
|
+
sourceProject,
|
|
112
|
+
nodesInserted,
|
|
113
|
+
nodesSkipped,
|
|
114
|
+
edgesInserted,
|
|
115
|
+
edgesSkipped,
|
|
116
|
+
edgesOrphaned,
|
|
117
|
+
});
|
|
118
|
+
return {
|
|
119
|
+
nodesInserted,
|
|
120
|
+
nodesSkipped,
|
|
121
|
+
edgesInserted,
|
|
122
|
+
edgesSkipped,
|
|
123
|
+
edgesOrphaned,
|
|
124
|
+
sourceProject,
|
|
125
|
+
};
|
|
126
|
+
}
|
|
127
|
+
//# sourceMappingURL=import-graph.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"import-graph.js","sourceRoot":"","sources":["../../../src/core/importer/import-graph.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,QAAQ,CAAC;AAG3B,OAAO,EAAE,mBAAmB,EAAE,MAAM,+BAA+B,CAAC;AACpE,OAAO,EAAE,eAAe,EAAE,MAAM,oBAAoB,CAAC;AACrD,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAe5C;;;;;;;GAOG;AACH,MAAM,UAAU,UAAU,CACxB,KAAkB,EAClB,QAAuB,EACvB,OAA2B;IAE3B,gCAAgC;IAChC,IAAI,CAAC;QACH,mBAAmB,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;IACtC,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,IAAI,GAAG,YAAY,CAAC,CAAC,QAAQ,EAAE,CAAC;YAC9B,MAAM,IAAI,eAAe,CAAC,wBAAwB,EAAE,GAAG,CAAC,MAAM,CAAC,CAAC;QAClE,CAAC;QACD,MAAM,GAAG,CAAC;IACZ,CAAC;IAED,MAAM,aAAa,GAAG,QAAQ,CAAC,OAAO,CAAC,IAAI,CAAC;IAC5C,MAAM,MAAM,GAAG,OAAO,EAAE,MAAM,IAAI,KAAK,CAAC;IAExC,MAAM,CAAC,IAAI,CAAC,mBAAmB,EAAE;QAC/B,aAAa;QACb,aAAa,EAAE,QAAQ,CAAC,KAAK,CAAC,MAAM;QACpC,aAAa,EAAE,QAAQ,CAAC,KAAK,CAAC,MAAM;QACpC,MAAM;KACP,CAAC,CAAC;IAEH,+BAA+B;IAC/B,MAAM,aAAa,GAAG,KAAK,CAAC,WAAW,EAAE,CAAC;IAC1C,MAAM,eAAe,GAAG,IAAI,GAAG,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;IAEhE,8BAA8B;IAC9B,MAAM,aAAa,GAAgB,EAAE,CAAC;IACtC,IAAI,YAAY,GAAG,CAAC,CAAC;IAErB,KAAK,MAAM,IAAI,IAAI,QAAQ,CAAC,KAAK,EAAE,CAAC;QAClC,IAAI,eAAe,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,CAAC;YACjC,YAAY,EAAE,CAAC;QACjB,CAAC;aAAM,CAAC;YACN,iCAAiC;YACjC,MAAM,UAAU,GAAc;gBAC5B,GAAG,IAAI;gBACP,QAAQ,EAAE;oBACR,GAAG,IAAI,CAAC,QAAQ;oBAChB,UAAU,EAAE,aAAa;iBAC1B;aACF,CAAC;YACF,aAAa,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QACjC,CAAC;IACH,CAAC;IAED,qEAAqE;IACrE,MAAM,gBAAgB,GAAG,IAAI,GAAG,CAAC,eAAe,CAAC,CAAC;IAClD,KAAK,MAAM,IAAI,IAAI,aAAa,EAAE,CAAC;QACjC,gBAAgB,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAChC,CAAC;IAED,gEAAgE;IAChE,MAAM,UAAU,GAAG,EAAE,CAAC;IACtB,IAAI,aAAa,GAAG,CAAC,CAAC;IAEtB,KAAK,MAAM,IAAI,IAAI,QAAQ,CAAC,KAAK,EAAE,CAAC;QAClC,IAAI,gBAAgB,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,gBAAgB,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,CAAC;YACrE,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACxB,CAAC;aAAM,CAAC;YACN,aAAa,EAAE,CAAC;QAClB,CAAC;IACH,CAAC;IAED,0DAA0D;IAC1D,IAAI,MAAM,EAAE,CAAC;QACX,wEAAwE;QACxE,6EAA6E;QAC7E,MAAM,aAAa,GAAG,KAAK,CAAC,WAAW,EAAE,CAAC;QAC1C,MAAM,gBAAgB,GAAG,IAAI,GAAG,CAC9B,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,YAAY,EAAE,CAAC,CAChE,CAAC;QACF,IAAI,aAAa,GAAG,CAAC,CAAC;QACtB,IAAI,YAAY,GAAG,CAAC,CAAC;QACrB,KAAK,MAAM,IAAI,IAAI,UAAU,EAAE,CAAC;YAC9B,MAAM,GAAG,GAAG,GAAG,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,EAAE,IAAI,IAAI,CAAC,YAAY,EAAE,CAAC;YAC3D,IAAI,gBAAgB,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;gBAC9B,YAAY,EAAE,CAAC;YACjB,CAAC;iBAAM,CAAC;gBACN,aAAa,EAAE,CAAC;YAClB,CAAC;QACH,CAAC;QAED,MAAM,CAAC,IAAI,CAAC,qBAAqB,EAAE;YACjC,aAAa,EAAE,aAAa,CAAC,MAAM;YACnC,YAAY;YACZ,aAAa;YACb,YAAY;YACZ,aAAa;SACd,CAAC,CAAC;QAEH,OAAO;YACL,aAAa,EAAE,aAAa,CAAC,MAAM;YACnC,YAAY;YACZ,aAAa;YACb,YAAY;YACZ,aAAa;YACb,aAAa;SACd,CAAC;IACJ,CAAC;IAED,+CAA+C;IAC/C,KAAK,CAAC,cAAc,EAAE,CAAC;IAEvB,0BAA0B;IAC1B,MAAM,EAAE,aAAa,EAAE,aAAa,EAAE,GAAG,KAAK,CAAC,WAAW,CAAC,aAAa,EAAE,UAAU,CAAC,CAAC;IACtF,MAAM,YAAY,GAAG,UAAU,CAAC,MAAM,GAAG,aAAa,CAAC;IAEvD,mBAAmB;IACnB,KAAK,CAAC,YAAY,CAAC,SAAS,aAAa,EAAE,EAAE,aAAa,EAAE,aAAa,CAAC,CAAC;IAE3E,MAAM,CAAC,IAAI,CAAC,kBAAkB,EAAE;QAC9B,aAAa;QACb,aAAa;QACb,YAAY;QACZ,aAAa;QACb,YAAY;QACZ,aAAa;KACd,CAAC,CAAC;IAEH,OAAO;QACL,aAAa;QACb,YAAY;QACZ,aAAa;QACb,YAAY;QACZ,aAAa;QACb,aAAa;KACd,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Benchmark Indexer — indexes performance benchmark results as knowledge documents.
|
|
3
|
+
* Enables RAG queries about performance metrics, token economy, and SLA compliance.
|
|
4
|
+
*/
|
|
5
|
+
import type { KnowledgeStore } from "../store/knowledge-store.js";
|
|
6
|
+
export interface BenchmarkMetric {
|
|
7
|
+
name: string;
|
|
8
|
+
value: number;
|
|
9
|
+
target: number;
|
|
10
|
+
passed: boolean;
|
|
11
|
+
}
|
|
12
|
+
export interface BenchmarkData {
|
|
13
|
+
timestamp: string;
|
|
14
|
+
metrics: BenchmarkMetric[];
|
|
15
|
+
tokenEconomy?: {
|
|
16
|
+
avgCompression: number;
|
|
17
|
+
totalTokensSaved: number;
|
|
18
|
+
};
|
|
19
|
+
toolUsage?: Record<string, {
|
|
20
|
+
inputTokens: number;
|
|
21
|
+
outputTokens: number;
|
|
22
|
+
}>;
|
|
23
|
+
}
|
|
24
|
+
export interface BenchmarkIndexResult {
|
|
25
|
+
documentsIndexed: number;
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* Index benchmark results into the knowledge store.
|
|
29
|
+
* Creates documents for metrics summary and token economy.
|
|
30
|
+
*/
|
|
31
|
+
export declare function indexBenchmarkResults(store: KnowledgeStore, data: BenchmarkData): BenchmarkIndexResult;
|
|
32
|
+
//# sourceMappingURL=benchmark-indexer.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"benchmark-indexer.d.ts","sourceRoot":"","sources":["../../../src/core/rag/benchmark-indexer.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,6BAA6B,CAAC;AAGlE,MAAM,WAAW,eAAe;IAC9B,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,OAAO,CAAC;CACjB;AAED,MAAM,WAAW,aAAa;IAC5B,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,eAAe,EAAE,CAAC;IAC3B,YAAY,CAAC,EAAE;QACb,cAAc,EAAE,MAAM,CAAC;QACvB,gBAAgB,EAAE,MAAM,CAAC;KAC1B,CAAC;IACF,SAAS,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE;QAAE,WAAW,EAAE,MAAM,CAAC;QAAC,YAAY,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;CAC3E;AAED,MAAM,WAAW,oBAAoB;IACnC,gBAAgB,EAAE,MAAM,CAAC;CAC1B;AAED;;;GAGG;AACH,wBAAgB,qBAAqB,CACnC,KAAK,EAAE,cAAc,EACrB,IAAI,EAAE,aAAa,GAClB,oBAAoB,CAwFtB"}
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Benchmark Indexer — indexes performance benchmark results as knowledge documents.
|
|
3
|
+
* Enables RAG queries about performance metrics, token economy, and SLA compliance.
|
|
4
|
+
*/
|
|
5
|
+
import { logger } from "../utils/logger.js";
|
|
6
|
+
/**
|
|
7
|
+
* Index benchmark results into the knowledge store.
|
|
8
|
+
* Creates documents for metrics summary and token economy.
|
|
9
|
+
*/
|
|
10
|
+
export function indexBenchmarkResults(store, data) {
|
|
11
|
+
let documentsIndexed = 0;
|
|
12
|
+
// Index metrics summary
|
|
13
|
+
if (data.metrics.length > 0) {
|
|
14
|
+
const metricsLines = data.metrics.map((m) => {
|
|
15
|
+
const status = m.passed ? "PASSED" : "FAILED — exceeded target";
|
|
16
|
+
return `- ${m.name}: ${m.value} (target: ${m.target}) [${status}]`;
|
|
17
|
+
});
|
|
18
|
+
const passing = data.metrics.filter((m) => m.passed).length;
|
|
19
|
+
const failing = data.metrics.length - passing;
|
|
20
|
+
const content = [
|
|
21
|
+
`## Benchmark Results (${data.timestamp})`,
|
|
22
|
+
"",
|
|
23
|
+
`Summary: ${passing} passed, ${failing} failed out of ${data.metrics.length} metrics.`,
|
|
24
|
+
"",
|
|
25
|
+
...metricsLines,
|
|
26
|
+
].join("\n");
|
|
27
|
+
store.insert({
|
|
28
|
+
sourceType: "benchmark",
|
|
29
|
+
sourceId: `benchmark:${data.timestamp}`,
|
|
30
|
+
title: `Benchmark Results ${data.timestamp}`,
|
|
31
|
+
content,
|
|
32
|
+
chunkIndex: 0,
|
|
33
|
+
metadata: {
|
|
34
|
+
timestamp: data.timestamp,
|
|
35
|
+
totalMetrics: data.metrics.length,
|
|
36
|
+
passing,
|
|
37
|
+
failing,
|
|
38
|
+
},
|
|
39
|
+
});
|
|
40
|
+
documentsIndexed++;
|
|
41
|
+
}
|
|
42
|
+
// Index token economy
|
|
43
|
+
if (data.tokenEconomy) {
|
|
44
|
+
const content = [
|
|
45
|
+
`## Token Economy (${data.timestamp})`,
|
|
46
|
+
"",
|
|
47
|
+
`- Average compression ratio: ${data.tokenEconomy.avgCompression}`,
|
|
48
|
+
`- Total tokens saved: ${data.tokenEconomy.totalTokensSaved}`,
|
|
49
|
+
].join("\n");
|
|
50
|
+
store.insert({
|
|
51
|
+
sourceType: "benchmark",
|
|
52
|
+
sourceId: `benchmark:token-economy:${data.timestamp}`,
|
|
53
|
+
title: `Token Economy ${data.timestamp}`,
|
|
54
|
+
content,
|
|
55
|
+
chunkIndex: 0,
|
|
56
|
+
metadata: {
|
|
57
|
+
timestamp: data.timestamp,
|
|
58
|
+
avgCompression: data.tokenEconomy.avgCompression,
|
|
59
|
+
totalTokensSaved: data.tokenEconomy.totalTokensSaved,
|
|
60
|
+
},
|
|
61
|
+
});
|
|
62
|
+
documentsIndexed++;
|
|
63
|
+
}
|
|
64
|
+
// Index tool usage
|
|
65
|
+
if (data.toolUsage && Object.keys(data.toolUsage).length > 0) {
|
|
66
|
+
const lines = Object.entries(data.toolUsage).map(([tool, usage]) => `- ${tool}: input=${usage.inputTokens}, output=${usage.outputTokens}`);
|
|
67
|
+
const content = [
|
|
68
|
+
`## Tool Token Usage (${data.timestamp})`,
|
|
69
|
+
"",
|
|
70
|
+
...lines,
|
|
71
|
+
].join("\n");
|
|
72
|
+
store.insert({
|
|
73
|
+
sourceType: "benchmark",
|
|
74
|
+
sourceId: `benchmark:tool-usage:${data.timestamp}`,
|
|
75
|
+
title: `Tool Token Usage ${data.timestamp}`,
|
|
76
|
+
content,
|
|
77
|
+
chunkIndex: 0,
|
|
78
|
+
metadata: { timestamp: data.timestamp },
|
|
79
|
+
});
|
|
80
|
+
documentsIndexed++;
|
|
81
|
+
}
|
|
82
|
+
logger.info("Benchmark results indexed", { documentsIndexed, timestamp: data.timestamp });
|
|
83
|
+
return { documentsIndexed };
|
|
84
|
+
}
|
|
85
|
+
//# sourceMappingURL=benchmark-indexer.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"benchmark-indexer.js","sourceRoot":"","sources":["../../../src/core/rag/benchmark-indexer.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAGH,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAuB5C;;;GAGG;AACH,MAAM,UAAU,qBAAqB,CACnC,KAAqB,EACrB,IAAmB;IAEnB,IAAI,gBAAgB,GAAG,CAAC,CAAC;IAEzB,wBAAwB;IACxB,IAAI,IAAI,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC5B,MAAM,YAAY,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE;YAC1C,MAAM,MAAM,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,0BAA0B,CAAC;YAChE,OAAO,KAAK,CAAC,CAAC,IAAI,KAAK,CAAC,CAAC,KAAK,aAAa,CAAC,CAAC,MAAM,MAAM,MAAM,GAAG,CAAC;QACrE,CAAC,CAAC,CAAC;QAEH,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC;QAC5D,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,GAAG,OAAO,CAAC;QAE9C,MAAM,OAAO,GAAG;YACd,yBAAyB,IAAI,CAAC,SAAS,GAAG;YAC1C,EAAE;YACF,YAAY,OAAO,YAAY,OAAO,kBAAkB,IAAI,CAAC,OAAO,CAAC,MAAM,WAAW;YACtF,EAAE;YACF,GAAG,YAAY;SAChB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAEb,KAAK,CAAC,MAAM,CAAC;YACX,UAAU,EAAE,WAAW;YACvB,QAAQ,EAAE,aAAa,IAAI,CAAC,SAAS,EAAE;YACvC,KAAK,EAAE,qBAAqB,IAAI,CAAC,SAAS,EAAE;YAC5C,OAAO;YACP,UAAU,EAAE,CAAC;YACb,QAAQ,EAAE;gBACR,SAAS,EAAE,IAAI,CAAC,SAAS;gBACzB,YAAY,EAAE,IAAI,CAAC,OAAO,CAAC,MAAM;gBACjC,OAAO;gBACP,OAAO;aACR;SACF,CAAC,CAAC;QACH,gBAAgB,EAAE,CAAC;IACrB,CAAC;IAED,sBAAsB;IACtB,IAAI,IAAI,CAAC,YAAY,EAAE,CAAC;QACtB,MAAM,OAAO,GAAG;YACd,qBAAqB,IAAI,CAAC,SAAS,GAAG;YACtC,EAAE;YACF,gCAAgC,IAAI,CAAC,YAAY,CAAC,cAAc,EAAE;YAClE,yBAAyB,IAAI,CAAC,YAAY,CAAC,gBAAgB,EAAE;SAC9D,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAEb,KAAK,CAAC,MAAM,CAAC;YACX,UAAU,EAAE,WAAW;YACvB,QAAQ,EAAE,2BAA2B,IAAI,CAAC,SAAS,EAAE;YACrD,KAAK,EAAE,iBAAiB,IAAI,CAAC,SAAS,EAAE;YACxC,OAAO;YACP,UAAU,EAAE,CAAC;YACb,QAAQ,EAAE;gBACR,SAAS,EAAE,IAAI,CAAC,SAAS;gBACzB,cAAc,EAAE,IAAI,CAAC,YAAY,CAAC,cAAc;gBAChD,gBAAgB,EAAE,IAAI,CAAC,YAAY,CAAC,gBAAgB;aACrD;SACF,CAAC,CAAC;QACH,gBAAgB,EAAE,CAAC;IACrB,CAAC;IAED,mBAAmB;IACnB,IAAI,IAAI,CAAC,SAAS,IAAI,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC7D,MAAM,KAAK,GAAG,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,GAAG,CAC9C,CAAC,CAAC,IAAI,EAAE,KAAK,CAAC,EAAE,EAAE,CAChB,KAAK,IAAI,WAAW,KAAK,CAAC,WAAW,YAAY,KAAK,CAAC,YAAY,EAAE,CACxE,CAAC;QAEF,MAAM,OAAO,GAAG;YACd,wBAAwB,IAAI,CAAC,SAAS,GAAG;YACzC,EAAE;YACF,GAAG,KAAK;SACT,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAEb,KAAK,CAAC,MAAM,CAAC;YACX,UAAU,EAAE,WAAW;YACvB,QAAQ,EAAE,wBAAwB,IAAI,CAAC,SAAS,EAAE;YAClD,KAAK,EAAE,oBAAoB,IAAI,CAAC,SAAS,EAAE;YAC3C,OAAO;YACP,UAAU,EAAE,CAAC;YACb,QAAQ,EAAE,EAAE,SAAS,EAAE,IAAI,CAAC,SAAS,EAAE;SACxC,CAAC,CAAC;QACH,gBAAgB,EAAE,CAAC;IACrB,CAAC;IAED,MAAM,CAAC,IAAI,CAAC,2BAA2B,EAAE,EAAE,gBAAgB,EAAE,SAAS,EAAE,IAAI,CAAC,SAAS,EAAE,CAAC,CAAC;IAE1F,OAAO,EAAE,gBAAgB,EAAE,CAAC;AAC9B,CAAC"}
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Citation Mapper — maps search results to citations with source traceability.
|
|
3
|
+
*
|
|
4
|
+
* Each chunk in the assembled context gets a [N] citation marker,
|
|
5
|
+
* enabling users to trace information back to its source.
|
|
6
|
+
*/
|
|
7
|
+
import type { RankedResult } from "./multi-strategy-retrieval.js";
|
|
8
|
+
export interface Citation {
|
|
9
|
+
id: string;
|
|
10
|
+
chunkId: string;
|
|
11
|
+
sourceType: string;
|
|
12
|
+
sourceId: string;
|
|
13
|
+
title: string;
|
|
14
|
+
snippet: string;
|
|
15
|
+
relevanceScore: number;
|
|
16
|
+
position: number;
|
|
17
|
+
}
|
|
18
|
+
export interface CitedContext {
|
|
19
|
+
assembledText: string;
|
|
20
|
+
citations: Citation[];
|
|
21
|
+
sourceBreakdown: Record<string, number>;
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* Create citations from ranked results.
|
|
25
|
+
* Each result gets a position-based citation (1-indexed).
|
|
26
|
+
*/
|
|
27
|
+
export declare function mapCitations(results: RankedResult[]): Citation[];
|
|
28
|
+
/**
|
|
29
|
+
* Build a cited context: assembled text with [N] markers + citation list + source breakdown.
|
|
30
|
+
*/
|
|
31
|
+
export declare function buildCitedContext(results: RankedResult[]): CitedContext;
|
|
32
|
+
//# sourceMappingURL=citation-mapper.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"citation-mapper.d.ts","sourceRoot":"","sources":["../../../src/core/rag/citation-mapper.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,+BAA+B,CAAC;AAGlE,MAAM,WAAW,QAAQ;IACvB,EAAE,EAAE,MAAM,CAAC;IACX,OAAO,EAAE,MAAM,CAAC;IAChB,UAAU,EAAE,MAAM,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,MAAM,CAAC;IAChB,cAAc,EAAE,MAAM,CAAC;IACvB,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,YAAY;IAC3B,aAAa,EAAE,MAAM,CAAC;IACtB,SAAS,EAAE,QAAQ,EAAE,CAAC;IACtB,eAAe,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CACzC;AAID;;;GAGG;AACH,wBAAgB,YAAY,CAAC,OAAO,EAAE,YAAY,EAAE,GAAG,QAAQ,EAAE,CAahE;AAED;;GAEG;AACH,wBAAgB,iBAAiB,CAAC,OAAO,EAAE,YAAY,EAAE,GAAG,YAAY,CAoBvE"}
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Citation Mapper — maps search results to citations with source traceability.
|
|
3
|
+
*
|
|
4
|
+
* Each chunk in the assembled context gets a [N] citation marker,
|
|
5
|
+
* enabling users to trace information back to its source.
|
|
6
|
+
*/
|
|
7
|
+
import { generateId } from "../utils/id.js";
|
|
8
|
+
const MAX_SNIPPET_LENGTH = 400;
|
|
9
|
+
/**
|
|
10
|
+
* Create citations from ranked results.
|
|
11
|
+
* Each result gets a position-based citation (1-indexed).
|
|
12
|
+
*/
|
|
13
|
+
export function mapCitations(results) {
|
|
14
|
+
return results.map((r, i) => ({
|
|
15
|
+
id: generateId("cite"),
|
|
16
|
+
chunkId: r.id,
|
|
17
|
+
sourceType: r.sourceType,
|
|
18
|
+
sourceId: r.sourceId,
|
|
19
|
+
title: r.title,
|
|
20
|
+
snippet: r.content.length > MAX_SNIPPET_LENGTH
|
|
21
|
+
? r.content.slice(0, MAX_SNIPPET_LENGTH)
|
|
22
|
+
: r.content,
|
|
23
|
+
relevanceScore: r.score,
|
|
24
|
+
position: i + 1,
|
|
25
|
+
}));
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* Build a cited context: assembled text with [N] markers + citation list + source breakdown.
|
|
29
|
+
*/
|
|
30
|
+
export function buildCitedContext(results) {
|
|
31
|
+
if (results.length === 0) {
|
|
32
|
+
return { assembledText: "", citations: [], sourceBreakdown: {} };
|
|
33
|
+
}
|
|
34
|
+
const citations = mapCitations(results);
|
|
35
|
+
const sourceBreakdown = {};
|
|
36
|
+
const textParts = [];
|
|
37
|
+
for (let i = 0; i < results.length; i++) {
|
|
38
|
+
const r = results[i];
|
|
39
|
+
textParts.push(`[${i + 1}] ${r.content}`);
|
|
40
|
+
sourceBreakdown[r.sourceType] = (sourceBreakdown[r.sourceType] ?? 0) + 1;
|
|
41
|
+
}
|
|
42
|
+
const assembledText = textParts.join("\n\n");
|
|
43
|
+
return { assembledText, citations, sourceBreakdown };
|
|
44
|
+
}
|
|
45
|
+
//# sourceMappingURL=citation-mapper.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"citation-mapper.js","sourceRoot":"","sources":["../../../src/core/rag/citation-mapper.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAGH,OAAO,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAmB5C,MAAM,kBAAkB,GAAG,GAAG,CAAC;AAE/B;;;GAGG;AACH,MAAM,UAAU,YAAY,CAAC,OAAuB;IAClD,OAAO,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC;QAC5B,EAAE,EAAE,UAAU,CAAC,MAAM,CAAC;QACtB,OAAO,EAAE,CAAC,CAAC,EAAE;QACb,UAAU,EAAE,CAAC,CAAC,UAAU;QACxB,QAAQ,EAAE,CAAC,CAAC,QAAQ;QACpB,KAAK,EAAE,CAAC,CAAC,KAAK;QACd,OAAO,EAAE,CAAC,CAAC,OAAO,CAAC,MAAM,GAAG,kBAAkB;YAC5C,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,kBAAkB,CAAC;YACxC,CAAC,CAAC,CAAC,CAAC,OAAO;QACb,cAAc,EAAE,CAAC,CAAC,KAAK;QACvB,QAAQ,EAAE,CAAC,GAAG,CAAC;KAChB,CAAC,CAAC,CAAC;AACN,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,iBAAiB,CAAC,OAAuB;IACvD,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACzB,OAAO,EAAE,aAAa,EAAE,EAAE,EAAE,SAAS,EAAE,EAAE,EAAE,eAAe,EAAE,EAAE,EAAE,CAAC;IACnE,CAAC;IAED,MAAM,SAAS,GAAG,YAAY,CAAC,OAAO,CAAC,CAAC;IACxC,MAAM,eAAe,GAA2B,EAAE,CAAC;IAEnD,MAAM,SAAS,GAAa,EAAE,CAAC;IAE/B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACxC,MAAM,CAAC,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC;QACrB,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC;QAE1C,eAAe,CAAC,CAAC,CAAC,UAAU,CAAC,GAAG,CAAC,eAAe,CAAC,CAAC,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;IAC3E,CAAC;IAED,MAAM,aAAa,GAAG,SAAS,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAE7C,OAAO,EAAE,aAAa,EAAE,SAAS,EAAE,eAAe,EAAE,CAAC;AACvD,CAAC"}
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Enrichment Pipeline — enriches text chunks with keywords, entities, and summaries
|
|
3
|
+
* before indexation into the knowledge store.
|
|
4
|
+
*
|
|
5
|
+
* Transforms raw TextChunks into EnrichedChunks with:
|
|
6
|
+
* - TF-IDF-based keyword extraction (top-N terms)
|
|
7
|
+
* - Regex-based entity detection (PascalCase, camelCase, file paths)
|
|
8
|
+
* - Auto-generated summary (first sentence or markdown heading)
|
|
9
|
+
* - Parent-child chunk linking for later chunk stitching
|
|
10
|
+
*/
|
|
11
|
+
import type { TextChunk } from "./chunk-text.js";
|
|
12
|
+
export interface EnrichedChunk extends TextChunk {
|
|
13
|
+
/** Top-N keywords extracted via TF-IDF term frequency */
|
|
14
|
+
keywords: string[];
|
|
15
|
+
/** Detected entities: class names, function names, file paths */
|
|
16
|
+
entities: string[];
|
|
17
|
+
/** Auto-generated summary (first sentence or heading) */
|
|
18
|
+
summary: string;
|
|
19
|
+
/** Source type discriminator */
|
|
20
|
+
sourceType: string;
|
|
21
|
+
/** Reference to parent chunk index for chunk stitching */
|
|
22
|
+
parentChunkIndex?: number;
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* Extract top-N keywords from text using term frequency scoring.
|
|
26
|
+
* Uses the project tokenizer (handles PT + EN, strips stopwords).
|
|
27
|
+
*/
|
|
28
|
+
export declare function extractKeywords(text: string, topN?: number): string[];
|
|
29
|
+
/**
|
|
30
|
+
* Extract named entities from text using regex patterns.
|
|
31
|
+
* Detects PascalCase types, camelCase functions, and file paths.
|
|
32
|
+
*/
|
|
33
|
+
export declare function extractEntities(text: string): string[];
|
|
34
|
+
/**
|
|
35
|
+
* Generate a short summary from chunk content.
|
|
36
|
+
* Prefers markdown heading if present, otherwise first sentence.
|
|
37
|
+
*/
|
|
38
|
+
export declare function generateSummary(text: string): string;
|
|
39
|
+
/**
|
|
40
|
+
* Enrich a single text chunk with keywords, entities, summary, and source metadata.
|
|
41
|
+
*/
|
|
42
|
+
export declare function enrichChunk(chunk: TextChunk, sourceType: string, parentChunkIndex?: number): EnrichedChunk;
|
|
43
|
+
/**
|
|
44
|
+
* Enrich multiple chunks from the same document.
|
|
45
|
+
* Applies parent-child linking for multi-chunk documents.
|
|
46
|
+
*/
|
|
47
|
+
export declare function enrichChunks(chunks: TextChunk[], sourceType: string): EnrichedChunk[];
|
|
48
|
+
//# sourceMappingURL=enrichment-pipeline.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"enrichment-pipeline.d.ts","sourceRoot":"","sources":["../../../src/core/rag/enrichment-pipeline.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAC;AAIjD,MAAM,WAAW,aAAc,SAAQ,SAAS;IAC9C,yDAAyD;IACzD,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,iEAAiE;IACjE,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,yDAAyD;IACzD,OAAO,EAAE,MAAM,CAAC;IAChB,gCAAgC;IAChC,UAAU,EAAE,MAAM,CAAC;IACnB,0DAA0D;IAC1D,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC3B;AAKD;;;GAGG;AACH,wBAAgB,eAAe,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,GAAE,MAAsB,GAAG,MAAM,EAAE,CAepF;AAcD;;;GAGG;AACH,wBAAgB,eAAe,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE,CAwBtD;AAED;;;GAGG;AACH,wBAAgB,eAAe,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CA0BpD;AAED;;GAEG;AACH,wBAAgB,WAAW,CACzB,KAAK,EAAE,SAAS,EAChB,UAAU,EAAE,MAAM,EAClB,gBAAgB,CAAC,EAAE,MAAM,GACxB,aAAa,CAoBf;AAED;;;GAGG;AACH,wBAAgB,YAAY,CAC1B,MAAM,EAAE,SAAS,EAAE,EACnB,UAAU,EAAE,MAAM,GACjB,aAAa,EAAE,CASjB"}
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Enrichment Pipeline — enriches text chunks with keywords, entities, and summaries
|
|
3
|
+
* before indexation into the knowledge store.
|
|
4
|
+
*
|
|
5
|
+
* Transforms raw TextChunks into EnrichedChunks with:
|
|
6
|
+
* - TF-IDF-based keyword extraction (top-N terms)
|
|
7
|
+
* - Regex-based entity detection (PascalCase, camelCase, file paths)
|
|
8
|
+
* - Auto-generated summary (first sentence or markdown heading)
|
|
9
|
+
* - Parent-child chunk linking for later chunk stitching
|
|
10
|
+
*/
|
|
11
|
+
import { tokenize } from "../search/tokenizer.js";
|
|
12
|
+
import { logger } from "../utils/logger.js";
|
|
13
|
+
const MAX_SUMMARY_LENGTH = 200;
|
|
14
|
+
const DEFAULT_TOP_N = 5;
|
|
15
|
+
/**
|
|
16
|
+
* Extract top-N keywords from text using term frequency scoring.
|
|
17
|
+
* Uses the project tokenizer (handles PT + EN, strips stopwords).
|
|
18
|
+
*/
|
|
19
|
+
export function extractKeywords(text, topN = DEFAULT_TOP_N) {
|
|
20
|
+
const tokens = tokenize(text);
|
|
21
|
+
if (tokens.length === 0)
|
|
22
|
+
return [];
|
|
23
|
+
// Count term frequencies
|
|
24
|
+
const freq = new Map();
|
|
25
|
+
for (const token of tokens) {
|
|
26
|
+
freq.set(token, (freq.get(token) ?? 0) + 1);
|
|
27
|
+
}
|
|
28
|
+
// Sort by frequency descending, then alphabetically for stability
|
|
29
|
+
const sorted = Array.from(freq.entries())
|
|
30
|
+
.sort((a, b) => b[1] - a[1] || a[0].localeCompare(b[0]));
|
|
31
|
+
return sorted.slice(0, topN).map(([term]) => term);
|
|
32
|
+
}
|
|
33
|
+
// ── Entity detection patterns ──────────────────────────────
|
|
34
|
+
/** PascalCase: at least two words joined (e.g., GraphNode, SqliteStore) */
|
|
35
|
+
// eslint-disable-next-line security/detect-unsafe-regex -- bounded pattern, safe for short identifiers
|
|
36
|
+
const PASCAL_CASE_RE = /\b([A-Z][a-z]+(?:[A-Z][a-z0-9]*)+)\b/g;
|
|
37
|
+
/** camelCase: starts lowercase, has at least one uppercase (e.g., findNextTask) */
|
|
38
|
+
const CAMEL_CASE_RE = /\b([a-z][a-zA-Z0-9]*[A-Z][a-zA-Z0-9]*)\b/g;
|
|
39
|
+
/** File paths: src/... or similar path patterns ending in common extensions */
|
|
40
|
+
const FILE_PATH_RE = /\b((?:src|lib|dist|test|tests)\/[\w\-./]+\.(?:ts|js|tsx|jsx|json|md|sql))\b/g;
|
|
41
|
+
/**
|
|
42
|
+
* Extract named entities from text using regex patterns.
|
|
43
|
+
* Detects PascalCase types, camelCase functions, and file paths.
|
|
44
|
+
*/
|
|
45
|
+
export function extractEntities(text) {
|
|
46
|
+
const entities = new Set();
|
|
47
|
+
let match;
|
|
48
|
+
// PascalCase (reset lastIndex before each scan)
|
|
49
|
+
PASCAL_CASE_RE.lastIndex = 0;
|
|
50
|
+
while ((match = PASCAL_CASE_RE.exec(text)) !== null) {
|
|
51
|
+
entities.add(match[1]);
|
|
52
|
+
}
|
|
53
|
+
// camelCase
|
|
54
|
+
CAMEL_CASE_RE.lastIndex = 0;
|
|
55
|
+
while ((match = CAMEL_CASE_RE.exec(text)) !== null) {
|
|
56
|
+
entities.add(match[1]);
|
|
57
|
+
}
|
|
58
|
+
// File paths
|
|
59
|
+
FILE_PATH_RE.lastIndex = 0;
|
|
60
|
+
while ((match = FILE_PATH_RE.exec(text)) !== null) {
|
|
61
|
+
entities.add(match[1]);
|
|
62
|
+
}
|
|
63
|
+
return Array.from(entities);
|
|
64
|
+
}
|
|
65
|
+
/**
|
|
66
|
+
* Generate a short summary from chunk content.
|
|
67
|
+
* Prefers markdown heading if present, otherwise first sentence.
|
|
68
|
+
*/
|
|
69
|
+
export function generateSummary(text) {
|
|
70
|
+
const trimmed = text.trim();
|
|
71
|
+
if (!trimmed)
|
|
72
|
+
return "";
|
|
73
|
+
// Check for markdown heading
|
|
74
|
+
const headingMatch = trimmed.match(/^#{1,6}\s+(.+)/m);
|
|
75
|
+
if (headingMatch) {
|
|
76
|
+
const heading = headingMatch[1].trim();
|
|
77
|
+
return heading.length > MAX_SUMMARY_LENGTH
|
|
78
|
+
? heading.slice(0, MAX_SUMMARY_LENGTH)
|
|
79
|
+
: heading;
|
|
80
|
+
}
|
|
81
|
+
// First sentence (ends with . ! or ?)
|
|
82
|
+
const sentenceMatch = trimmed.match(/^[^.!?]+[.!?]/);
|
|
83
|
+
if (sentenceMatch) {
|
|
84
|
+
const sentence = sentenceMatch[0].trim();
|
|
85
|
+
return sentence.length > MAX_SUMMARY_LENGTH
|
|
86
|
+
? sentence.slice(0, MAX_SUMMARY_LENGTH)
|
|
87
|
+
: sentence;
|
|
88
|
+
}
|
|
89
|
+
// Fallback: truncate to MAX_SUMMARY_LENGTH
|
|
90
|
+
return trimmed.length > MAX_SUMMARY_LENGTH
|
|
91
|
+
? trimmed.slice(0, MAX_SUMMARY_LENGTH)
|
|
92
|
+
: trimmed;
|
|
93
|
+
}
|
|
94
|
+
/**
|
|
95
|
+
* Enrich a single text chunk with keywords, entities, summary, and source metadata.
|
|
96
|
+
*/
|
|
97
|
+
export function enrichChunk(chunk, sourceType, parentChunkIndex) {
|
|
98
|
+
const keywords = extractKeywords(chunk.content);
|
|
99
|
+
const entities = extractEntities(chunk.content);
|
|
100
|
+
const summary = generateSummary(chunk.content);
|
|
101
|
+
logger.debug("Chunk enriched", {
|
|
102
|
+
index: chunk.index,
|
|
103
|
+
sourceType,
|
|
104
|
+
keywordCount: keywords.length,
|
|
105
|
+
entityCount: entities.length,
|
|
106
|
+
});
|
|
107
|
+
return {
|
|
108
|
+
...chunk,
|
|
109
|
+
keywords,
|
|
110
|
+
entities,
|
|
111
|
+
summary,
|
|
112
|
+
sourceType,
|
|
113
|
+
parentChunkIndex,
|
|
114
|
+
};
|
|
115
|
+
}
|
|
116
|
+
/**
|
|
117
|
+
* Enrich multiple chunks from the same document.
|
|
118
|
+
* Applies parent-child linking for multi-chunk documents.
|
|
119
|
+
*/
|
|
120
|
+
export function enrichChunks(chunks, sourceType) {
|
|
121
|
+
if (chunks.length <= 1) {
|
|
122
|
+
return chunks.map((c) => enrichChunk(c, sourceType));
|
|
123
|
+
}
|
|
124
|
+
// For multi-chunk documents, first chunk (index 0) is the "parent" reference
|
|
125
|
+
return chunks.map((c, i) => enrichChunk(c, sourceType, i === 0 ? undefined : 0));
|
|
126
|
+
}
|
|
127
|
+
//# sourceMappingURL=enrichment-pipeline.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"enrichment-pipeline.js","sourceRoot":"","sources":["../../../src/core/rag/enrichment-pipeline.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAGH,OAAO,EAAE,QAAQ,EAAE,MAAM,wBAAwB,CAAC;AAClD,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAe5C,MAAM,kBAAkB,GAAG,GAAG,CAAC;AAC/B,MAAM,aAAa,GAAG,CAAC,CAAC;AAExB;;;GAGG;AACH,MAAM,UAAU,eAAe,CAAC,IAAY,EAAE,OAAe,aAAa;IACxE,MAAM,MAAM,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC;IAC9B,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAEnC,yBAAyB;IACzB,MAAM,IAAI,GAAG,IAAI,GAAG,EAAkB,CAAC;IACvC,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,IAAI,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAC9C,CAAC;IAED,kEAAkE;IAClE,MAAM,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC;SACtC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAE3D,OAAO,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,CAAC;AACrD,CAAC;AAED,8DAA8D;AAE9D,2EAA2E;AAC3E,uGAAuG;AACvG,MAAM,cAAc,GAAG,uCAAuC,CAAC;AAE/D,mFAAmF;AACnF,MAAM,aAAa,GAAG,2CAA2C,CAAC;AAElE,+EAA+E;AAC/E,MAAM,YAAY,GAAG,8EAA8E,CAAC;AAEpG;;;GAGG;AACH,MAAM,UAAU,eAAe,CAAC,IAAY;IAC1C,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAU,CAAC;IAEnC,IAAI,KAA6B,CAAC;IAElC,gDAAgD;IAChD,cAAc,CAAC,SAAS,GAAG,CAAC,CAAC;IAC7B,OAAO,CAAC,KAAK,GAAG,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACpD,QAAQ,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IACzB,CAAC;IAED,YAAY;IACZ,aAAa,CAAC,SAAS,GAAG,CAAC,CAAC;IAC5B,OAAO,CAAC,KAAK,GAAG,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACnD,QAAQ,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IACzB,CAAC;IAED,aAAa;IACb,YAAY,CAAC,SAAS,GAAG,CAAC,CAAC;IAC3B,OAAO,CAAC,KAAK,GAAG,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QAClD,QAAQ,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IACzB,CAAC;IAED,OAAO,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;AAC9B,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,eAAe,CAAC,IAAY;IAC1C,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAC5B,IAAI,CAAC,OAAO;QAAE,OAAO,EAAE,CAAC;IAExB,6BAA6B;IAC7B,MAAM,YAAY,GAAG,OAAO,CAAC,KAAK,CAAC,iBAAiB,CAAC,CAAC;IACtD,IAAI,YAAY,EAAE,CAAC;QACjB,MAAM,OAAO,GAAG,YAAY,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QACvC,OAAO,OAAO,CAAC,MAAM,GAAG,kBAAkB;YACxC,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,kBAAkB,CAAC;YACtC,CAAC,CAAC,OAAO,CAAC;IACd,CAAC;IAED,sCAAsC;IACtC,MAAM,aAAa,GAAG,OAAO,CAAC,KAAK,CAAC,eAAe,CAAC,CAAC;IACrD,IAAI,aAAa,EAAE,CAAC;QAClB,MAAM,QAAQ,GAAG,aAAa,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QACzC,OAAO,QAAQ,CAAC,MAAM,GAAG,kBAAkB;YACzC,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,kBAAkB,CAAC;YACvC,CAAC,CAAC,QAAQ,CAAC;IACf,CAAC;IAED,2CAA2C;IAC3C,OAAO,OAAO,CAAC,MAAM,GAAG,kBAAkB;QACxC,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,kBAAkB,CAAC;QACtC,CAAC,CAAC,OAAO,CAAC;AACd,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,WAAW,CACzB,KAAgB,EAChB,UAAkB,EAClB,gBAAyB;IAEzB,MAAM,QAAQ,GAAG,eAAe,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;IAChD,MAAM,QAAQ,GAAG,eAAe,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;IAChD,MAAM,OAAO,GAAG,eAAe,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;IAE/C,MAAM,CAAC,KAAK,CAAC,gBAAgB,EAAE;QAC7B,KAAK,EAAE,KAAK,CAAC,KAAK;QAClB,UAAU;QACV,YAAY,EAAE,QAAQ,CAAC,MAAM;QAC7B,WAAW,EAAE,QAAQ,CAAC,MAAM;KAC7B,CAAC,CAAC;IAEH,OAAO;QACL,GAAG,KAAK;QACR,QAAQ;QACR,QAAQ;QACR,OAAO;QACP,UAAU;QACV,gBAAgB;KACjB,CAAC;AACJ,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,YAAY,CAC1B,MAAmB,EACnB,UAAkB;IAElB,IAAI,MAAM,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;QACvB,OAAO,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,WAAW,CAAC,CAAC,EAAE,UAAU,CAAC,CAAC,CAAC;IACvD,CAAC;IAED,6EAA6E;IAC7E,OAAO,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CACzB,WAAW,CAAC,CAAC,EAAE,UAAU,EAAE,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,CACpD,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Post-Retrieval Pipeline — processes search results after initial retrieval.
|
|
3
|
+
*
|
|
4
|
+
* Pipeline stages:
|
|
5
|
+
* 1. Deduplication — remove results with identical content
|
|
6
|
+
* 2. Reranking — boost results with higher query keyword overlap
|
|
7
|
+
* 3. Chunk stitching — merge adjacent chunks from the same source
|
|
8
|
+
* 4. Limit — enforce maxResults
|
|
9
|
+
*/
|
|
10
|
+
import type { RankedResult } from "./multi-strategy-retrieval.js";
|
|
11
|
+
export interface PostRetrievalOptions {
|
|
12
|
+
query: string;
|
|
13
|
+
results: RankedResult[];
|
|
14
|
+
maxResults: number;
|
|
15
|
+
chunkMeta?: Map<string, number>;
|
|
16
|
+
}
|
|
17
|
+
export interface PostRetrievalResult {
|
|
18
|
+
results: RankedResult[];
|
|
19
|
+
deduplicated: number;
|
|
20
|
+
stitchedChunks: number;
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* Remove results with identical content, keeping the highest-scored one.
|
|
24
|
+
*/
|
|
25
|
+
export declare function deduplicateResults(results: RankedResult[]): RankedResult[];
|
|
26
|
+
/**
|
|
27
|
+
* Rerank results by keyword overlap with the query.
|
|
28
|
+
* Combines original score with keyword overlap boost.
|
|
29
|
+
*/
|
|
30
|
+
export declare function rerankByKeywordOverlap(results: RankedResult[], query: string): RankedResult[];
|
|
31
|
+
/**
|
|
32
|
+
* Merge adjacent chunks from the same source document.
|
|
33
|
+
* Chunks are considered adjacent if their chunk indices differ by 1.
|
|
34
|
+
*/
|
|
35
|
+
export declare function stitchAdjacentChunks(results: RankedResult[], chunkMeta: Map<string, number>): RankedResult[];
|
|
36
|
+
/**
|
|
37
|
+
* Full post-retrieval pipeline.
|
|
38
|
+
*/
|
|
39
|
+
export declare function postRetrievalPipeline(options: PostRetrievalOptions): PostRetrievalResult;
|
|
40
|
+
//# sourceMappingURL=post-retrieval.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"post-retrieval.d.ts","sourceRoot":"","sources":["../../../src/core/rag/post-retrieval.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,+BAA+B,CAAC;AAIlE,MAAM,WAAW,oBAAoB;IACnC,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,YAAY,EAAE,CAAC;IACxB,UAAU,EAAE,MAAM,CAAC;IACnB,SAAS,CAAC,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CACjC;AAED,MAAM,WAAW,mBAAmB;IAClC,OAAO,EAAE,YAAY,EAAE,CAAC;IACxB,YAAY,EAAE,MAAM,CAAC;IACrB,cAAc,EAAE,MAAM,CAAC;CACxB;AAED;;GAEG;AACH,wBAAgB,kBAAkB,CAAC,OAAO,EAAE,YAAY,EAAE,GAAG,YAAY,EAAE,CAY1E;AAED;;;GAGG;AACH,wBAAgB,sBAAsB,CACpC,OAAO,EAAE,YAAY,EAAE,EACvB,KAAK,EAAE,MAAM,GACZ,YAAY,EAAE,CAchB;AAED;;;GAGG;AACH,wBAAgB,oBAAoB,CAClC,OAAO,EAAE,YAAY,EAAE,EACvB,SAAS,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,GAC7B,YAAY,EAAE,CAmDhB;AAED;;GAEG;AACH,wBAAgB,qBAAqB,CAAC,OAAO,EAAE,oBAAoB,GAAG,mBAAmB,CA2BxF"}
|