@mcp-graph-workflow/mcp-graph 5.6.1 → 5.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/README.md +2 -2
  2. package/dist/core/importer/import-graph.d.ts +23 -0
  3. package/dist/core/importer/import-graph.d.ts.map +1 -0
  4. package/dist/core/importer/import-graph.js +127 -0
  5. package/dist/core/importer/import-graph.js.map +1 -0
  6. package/dist/core/rag/benchmark-indexer.d.ts +32 -0
  7. package/dist/core/rag/benchmark-indexer.d.ts.map +1 -0
  8. package/dist/core/rag/benchmark-indexer.js +85 -0
  9. package/dist/core/rag/benchmark-indexer.js.map +1 -0
  10. package/dist/core/rag/citation-mapper.d.ts +32 -0
  11. package/dist/core/rag/citation-mapper.d.ts.map +1 -0
  12. package/dist/core/rag/citation-mapper.js +45 -0
  13. package/dist/core/rag/citation-mapper.js.map +1 -0
  14. package/dist/core/rag/enrichment-pipeline.d.ts +48 -0
  15. package/dist/core/rag/enrichment-pipeline.d.ts.map +1 -0
  16. package/dist/core/rag/enrichment-pipeline.js +127 -0
  17. package/dist/core/rag/enrichment-pipeline.js.map +1 -0
  18. package/dist/core/rag/post-retrieval.d.ts +40 -0
  19. package/dist/core/rag/post-retrieval.d.ts.map +1 -0
  20. package/dist/core/rag/post-retrieval.js +119 -0
  21. package/dist/core/rag/post-retrieval.js.map +1 -0
  22. package/dist/core/rag/query-cache.d.ts +54 -0
  23. package/dist/core/rag/query-cache.d.ts.map +1 -0
  24. package/dist/core/rag/query-cache.js +104 -0
  25. package/dist/core/rag/query-cache.js.map +1 -0
  26. package/dist/core/rag/query-understanding.d.ts +37 -0
  27. package/dist/core/rag/query-understanding.d.ts.map +1 -0
  28. package/dist/core/rag/query-understanding.js +123 -0
  29. package/dist/core/rag/query-understanding.js.map +1 -0
  30. package/dist/core/rag/rag-trace.d.ts +67 -0
  31. package/dist/core/rag/rag-trace.d.ts.map +1 -0
  32. package/dist/core/rag/rag-trace.js +82 -0
  33. package/dist/core/rag/rag-trace.js.map +1 -0
  34. package/dist/core/rag/source-contribution.d.ts +37 -0
  35. package/dist/core/rag/source-contribution.d.ts.map +1 -0
  36. package/dist/core/rag/source-contribution.js +54 -0
  37. package/dist/core/rag/source-contribution.js.map +1 -0
  38. package/dist/core/store/sqlite-store.d.ts +9 -0
  39. package/dist/core/store/sqlite-store.d.ts.map +1 -1
  40. package/dist/core/store/sqlite-store.js +42 -0
  41. package/dist/core/store/sqlite-store.js.map +1 -1
  42. package/dist/mcp/tools/import-graph.d.ts +4 -0
  43. package/dist/mcp/tools/import-graph.d.ts.map +1 -0
  44. package/dist/mcp/tools/import-graph.js +81 -0
  45. package/dist/mcp/tools/import-graph.js.map +1 -0
  46. package/dist/mcp/tools/index.d.ts.map +1 -1
  47. package/dist/mcp/tools/index.js +2 -0
  48. package/dist/mcp/tools/index.js.map +1 -1
  49. package/dist/schemas/knowledge.schema.d.ts +2 -0
  50. package/dist/schemas/knowledge.schema.d.ts.map +1 -1
  51. package/dist/schemas/knowledge.schema.js +1 -0
  52. package/dist/schemas/knowledge.schema.js.map +1 -1
  53. package/package.json +1 -1
package/README.md CHANGED
@@ -114,7 +114,7 @@ mcp-graph serve --port 3000 # or: npm run dev
114
114
 
115
115
  | | Count | Reference |
116
116
  |---|---|---|
117
- | **MCP Tools** | 26 | [MCP-TOOLS-REFERENCE.md](docs/MCP-TOOLS-REFERENCE.md) |
117
+ | **MCP Tools** | 32 | [MCP-TOOLS-REFERENCE.md](docs/MCP-TOOLS-REFERENCE.md) |
118
118
  | **REST Endpoints** | 44 (17 routers) | [REST-API-REFERENCE.md](docs/REST-API-REFERENCE.md) |
119
119
  | **CLI Commands** | 6 | `init`, `import`, `index`, `stats`, `serve`, `doctor` |
120
120
 
@@ -145,7 +145,7 @@ npm run test:all # Everything
145
145
  |----------|-------------|
146
146
  | [Getting Started](docs/GETTING-STARTED.md) | Step-by-step guide for new users |
147
147
  | [Architecture](docs/ARCHITECTURE-GUIDE.md) | System layers, modules, data flows |
148
- | [MCP Tools Reference](docs/MCP-TOOLS-REFERENCE.md) | 26 tools with full parameters |
148
+ | [MCP Tools Reference](docs/MCP-TOOLS-REFERENCE.md) | 32 tools with full parameters |
149
149
  | [REST API Reference](docs/REST-API-REFERENCE.md) | 17 routers, 44 endpoints |
150
150
  | [Knowledge Pipeline](docs/KNOWLEDGE-PIPELINE.md) | RAG, embeddings, context assembly |
151
151
  | [Integrations](docs/INTEGRATIONS-GUIDE.md) | Serena, GitNexus, Context7, Playwright |
@@ -0,0 +1,23 @@
1
+ import type { SqliteStore } from "../store/sqlite-store.js";
2
+ import type { GraphDocument } from "../graph/graph-types.js";
3
+ export interface MergeGraphOptions {
4
+ dryRun?: boolean;
5
+ }
6
+ export interface MergeGraphResult {
7
+ nodesInserted: number;
8
+ nodesSkipped: number;
9
+ edgesInserted: number;
10
+ edgesSkipped: number;
11
+ edgesOrphaned: number;
12
+ sourceProject: string;
13
+ }
14
+ /**
15
+ * Merge an exported GraphDocument into the local store without overriding existing data.
16
+ *
17
+ * - Nodes with IDs already present locally are SKIPPED (local version wins).
18
+ * - Edges with the same (from, to, relationType) are SKIPPED via INSERT OR IGNORE.
19
+ * - Edges referencing nodes that don't exist (locally or in the import) are SKIPPED as orphans.
20
+ * - In dryRun mode, no data is written; only counts are returned.
21
+ */
22
+ export declare function mergeGraph(store: SqliteStore, incoming: GraphDocument, options?: MergeGraphOptions): MergeGraphResult;
23
+ //# sourceMappingURL=import-graph.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"import-graph.d.ts","sourceRoot":"","sources":["../../../src/core/importer/import-graph.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,0BAA0B,CAAC;AAC5D,OAAO,KAAK,EAAE,aAAa,EAAa,MAAM,yBAAyB,CAAC;AAKxE,MAAM,WAAW,iBAAiB;IAChC,MAAM,CAAC,EAAE,OAAO,CAAC;CAClB;AAED,MAAM,WAAW,gBAAgB;IAC/B,aAAa,EAAE,MAAM,CAAC;IACtB,YAAY,EAAE,MAAM,CAAC;IACrB,aAAa,EAAE,MAAM,CAAC;IACtB,YAAY,EAAE,MAAM,CAAC;IACrB,aAAa,EAAE,MAAM,CAAC;IACtB,aAAa,EAAE,MAAM,CAAC;CACvB;AAED;;;;;;;GAOG;AACH,wBAAgB,UAAU,CACxB,KAAK,EAAE,WAAW,EAClB,QAAQ,EAAE,aAAa,EACvB,OAAO,CAAC,EAAE,iBAAiB,GAC1B,gBAAgB,CA+HlB"}
@@ -0,0 +1,127 @@
1
+ import { z } from "zod/v4";
2
+ import { GraphDocumentSchema } from "../../schemas/graph.schema.js";
3
+ import { ValidationError } from "../utils/errors.js";
4
+ import { logger } from "../utils/logger.js";
5
+ /**
6
+ * Merge an exported GraphDocument into the local store without overriding existing data.
7
+ *
8
+ * - Nodes with IDs already present locally are SKIPPED (local version wins).
9
+ * - Edges with the same (from, to, relationType) are SKIPPED via INSERT OR IGNORE.
10
+ * - Edges referencing nodes that don't exist (locally or in the import) are SKIPPED as orphans.
11
+ * - In dryRun mode, no data is written; only counts are returned.
12
+ */
13
+ export function mergeGraph(store, incoming, options) {
14
+ // 1. Validate incoming document
15
+ try {
16
+ GraphDocumentSchema.parse(incoming);
17
+ }
18
+ catch (err) {
19
+ if (err instanceof z.ZodError) {
20
+ throw new ValidationError("Invalid graph document", err.issues);
21
+ }
22
+ throw err;
23
+ }
24
+ const sourceProject = incoming.project.name;
25
+ const dryRun = options?.dryRun ?? false;
26
+ logger.info("merge-graph:start", {
27
+ sourceProject,
28
+ incomingNodes: incoming.nodes.length,
29
+ incomingEdges: incoming.edges.length,
30
+ dryRun,
31
+ });
32
+ // 2. Collect existing node IDs
33
+ const existingNodes = store.getAllNodes();
34
+ const existingNodeIds = new Set(existingNodes.map((n) => n.id));
35
+ // 3. Partition incoming nodes
36
+ const nodesToInsert = [];
37
+ let nodesSkipped = 0;
38
+ for (const node of incoming.nodes) {
39
+ if (existingNodeIds.has(node.id)) {
40
+ nodesSkipped++;
41
+ }
42
+ else {
43
+ // Tag with merge origin metadata
44
+ const taggedNode = {
45
+ ...node,
46
+ metadata: {
47
+ ...node.metadata,
48
+ mergedFrom: sourceProject,
49
+ },
50
+ };
51
+ nodesToInsert.push(taggedNode);
52
+ }
53
+ }
54
+ // 4. Build set of all available node IDs (existing + to-be-inserted)
55
+ const availableNodeIds = new Set(existingNodeIds);
56
+ for (const node of nodesToInsert) {
57
+ availableNodeIds.add(node.id);
58
+ }
59
+ // 5. Filter edges: keep only those where both from and to exist
60
+ const validEdges = [];
61
+ let edgesOrphaned = 0;
62
+ for (const edge of incoming.edges) {
63
+ if (availableNodeIds.has(edge.from) && availableNodeIds.has(edge.to)) {
64
+ validEdges.push(edge);
65
+ }
66
+ else {
67
+ edgesOrphaned++;
68
+ }
69
+ }
70
+ // 6. In dry-run mode, estimate edge skips without writing
71
+ if (dryRun) {
72
+ // We can't know exact edge skip counts without trying INSERT OR IGNORE,
73
+ // but we can count edges whose (from, to, relationType) match existing edges
74
+ const existingEdges = store.getAllEdges();
75
+ const existingEdgeKeys = new Set(existingEdges.map((e) => `${e.from}|${e.to}|${e.relationType}`));
76
+ let edgesInserted = 0;
77
+ let edgesSkipped = 0;
78
+ for (const edge of validEdges) {
79
+ const key = `${edge.from}|${edge.to}|${edge.relationType}`;
80
+ if (existingEdgeKeys.has(key)) {
81
+ edgesSkipped++;
82
+ }
83
+ else {
84
+ edgesInserted++;
85
+ }
86
+ }
87
+ logger.info("merge-graph:dry-run", {
88
+ nodesInserted: nodesToInsert.length,
89
+ nodesSkipped,
90
+ edgesInserted,
91
+ edgesSkipped,
92
+ edgesOrphaned,
93
+ });
94
+ return {
95
+ nodesInserted: nodesToInsert.length,
96
+ nodesSkipped,
97
+ edgesInserted,
98
+ edgesSkipped,
99
+ edgesOrphaned,
100
+ sourceProject,
101
+ };
102
+ }
103
+ // 7. Create snapshot before merge (safety net)
104
+ store.createSnapshot();
105
+ // 8. Perform merge insert
106
+ const { nodesInserted, edgesInserted } = store.mergeInsert(nodesToInsert, validEdges);
107
+ const edgesSkipped = validEdges.length - edgesInserted;
108
+ // 9. Record import
109
+ store.recordImport(`merge:${sourceProject}`, nodesInserted, edgesInserted);
110
+ logger.info("merge-graph:done", {
111
+ sourceProject,
112
+ nodesInserted,
113
+ nodesSkipped,
114
+ edgesInserted,
115
+ edgesSkipped,
116
+ edgesOrphaned,
117
+ });
118
+ return {
119
+ nodesInserted,
120
+ nodesSkipped,
121
+ edgesInserted,
122
+ edgesSkipped,
123
+ edgesOrphaned,
124
+ sourceProject,
125
+ };
126
+ }
127
+ //# sourceMappingURL=import-graph.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"import-graph.js","sourceRoot":"","sources":["../../../src/core/importer/import-graph.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,QAAQ,CAAC;AAG3B,OAAO,EAAE,mBAAmB,EAAE,MAAM,+BAA+B,CAAC;AACpE,OAAO,EAAE,eAAe,EAAE,MAAM,oBAAoB,CAAC;AACrD,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAe5C;;;;;;;GAOG;AACH,MAAM,UAAU,UAAU,CACxB,KAAkB,EAClB,QAAuB,EACvB,OAA2B;IAE3B,gCAAgC;IAChC,IAAI,CAAC;QACH,mBAAmB,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;IACtC,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,IAAI,GAAG,YAAY,CAAC,CAAC,QAAQ,EAAE,CAAC;YAC9B,MAAM,IAAI,eAAe,CAAC,wBAAwB,EAAE,GAAG,CAAC,MAAM,CAAC,CAAC;QAClE,CAAC;QACD,MAAM,GAAG,CAAC;IACZ,CAAC;IAED,MAAM,aAAa,GAAG,QAAQ,CAAC,OAAO,CAAC,IAAI,CAAC;IAC5C,MAAM,MAAM,GAAG,OAAO,EAAE,MAAM,IAAI,KAAK,CAAC;IAExC,MAAM,CAAC,IAAI,CAAC,mBAAmB,EAAE;QAC/B,aAAa;QACb,aAAa,EAAE,QAAQ,CAAC,KAAK,CAAC,MAAM;QACpC,aAAa,EAAE,QAAQ,CAAC,KAAK,CAAC,MAAM;QACpC,MAAM;KACP,CAAC,CAAC;IAEH,+BAA+B;IAC/B,MAAM,aAAa,GAAG,KAAK,CAAC,WAAW,EAAE,CAAC;IAC1C,MAAM,eAAe,GAAG,IAAI,GAAG,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;IAEhE,8BAA8B;IAC9B,MAAM,aAAa,GAAgB,EAAE,CAAC;IACtC,IAAI,YAAY,GAAG,CAAC,CAAC;IAErB,KAAK,MAAM,IAAI,IAAI,QAAQ,CAAC,KAAK,EAAE,CAAC;QAClC,IAAI,eAAe,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,CAAC;YACjC,YAAY,EAAE,CAAC;QACjB,CAAC;aAAM,CAAC;YACN,iCAAiC;YACjC,MAAM,UAAU,GAAc;gBAC5B,GAAG,IAAI;gBACP,QAAQ,EAAE;oBACR,GAAG,IAAI,CAAC,QAAQ;oBAChB,UAAU,EAAE,aAAa;iBAC1B;aACF,CAAC;YACF,aAAa,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QACjC,CAAC;IACH,CAAC;IAED,qEAAqE;IACrE,MAAM,gBAAgB,GAAG,IAAI,GAAG,CAAC,eAAe,CAAC,CAAC;IAClD,KAAK,MAAM,IAAI,IAAI,aAAa,EAAE,CAAC;QACjC,gBAAgB,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAChC,CAAC;IAED,gEAAgE;IAChE,MAAM,UAAU,GAAG,EAAE,CAAC;IACtB,IAAI,aAAa,GAAG,CAAC,CAAC;IAEtB,KAAK,MAAM,IAAI,IAAI,QAAQ,CAAC,KAAK,EAAE,CAAC;QAClC,IAAI,gBAAgB,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,gBAAgB,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,CAAC;YACrE,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACxB,CAAC;aAAM,CAAC;YACN,aAAa,EAAE,CAAC;QAClB,CAAC;IACH,CAAC;IAED,0DAA0D;IAC1D,IAAI,MAAM,EAAE,CAAC;QACX,wEAAwE;QACxE,6EAA6E;QAC7E,MAAM,aAAa,GAAG,KAAK,CAAC,WAAW,EAAE,CAAC;QAC1C,MAAM,gBAAgB,GAAG,IAAI,GAAG,CAC9B,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,YAAY,EAAE,CAAC,CAChE,CAAC;QACF,IAAI,aAAa,GAAG,CAAC,CAAC;QACtB,IAAI,YAAY,GAAG,CAAC,CAAC;QACrB,KAAK,MAAM,IAAI,IAAI,UAAU,EAAE,CAAC;YAC9B,MAAM,GAAG,GAAG,GAAG,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,EAAE,IAAI,IAAI,CAAC,YAAY,EAAE,CAAC;YAC3D,IAAI,gBAAgB,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;gBAC9B,YAAY,EAAE,CAAC;YACjB,CAAC;iBAAM,CAAC;gBACN,aAAa,EAAE,CAAC;YAClB,CAAC;QACH,CAAC;QAED,MAAM,CAAC,IAAI,CAAC,qBAAqB,EAAE;YACjC,aAAa,EAAE,aAAa,CAAC,MAAM;YACnC,YAAY;YACZ,aAAa;YACb,YAAY;YACZ,aAAa;SACd,CAAC,CAAC;QAEH,OAAO;YACL,aAAa,EAAE,aAAa,CAAC,MAAM;YACnC,YAAY;YACZ,aAAa;YACb,YAAY;YACZ,aAAa;YACb,aAAa;SACd,CAAC;IACJ,CAAC;IAED,+CAA+C;IAC/C,KAAK,CAAC,cAAc,EAAE,CAAC;IAEvB,0BAA0B;IAC1B,MAAM,EAAE,aAAa,EAAE,aAAa,EAAE,GAAG,KAAK,CAAC,WAAW,CAAC,aAAa,EAAE,UAAU,CAAC,CAAC;IACtF,MAAM,YAAY,GAAG,UAAU,CAAC,MAAM,GAAG,aAAa,CAAC;IAEvD,mBAAmB;IACnB,KAAK,CAAC,YAAY,CAAC,SAAS,aAAa,EAAE,EAAE,aAAa,EAAE,aAAa,CAAC,CAAC;IAE3E,MAAM,CAAC,IAAI,CAAC,kBAAkB,EAAE;QAC9B,aAAa;QACb,aAAa;QACb,YAAY;QACZ,aAAa;QACb,YAAY;QACZ,aAAa;KACd,CAAC,CAAC;IAEH,OAAO;QACL,aAAa;QACb,YAAY;QACZ,aAAa;QACb,YAAY;QACZ,aAAa;QACb,aAAa;KACd,CAAC;AACJ,CAAC"}
@@ -0,0 +1,32 @@
1
+ /**
2
+ * Benchmark Indexer — indexes performance benchmark results as knowledge documents.
3
+ * Enables RAG queries about performance metrics, token economy, and SLA compliance.
4
+ */
5
+ import type { KnowledgeStore } from "../store/knowledge-store.js";
6
+ export interface BenchmarkMetric {
7
+ name: string;
8
+ value: number;
9
+ target: number;
10
+ passed: boolean;
11
+ }
12
+ export interface BenchmarkData {
13
+ timestamp: string;
14
+ metrics: BenchmarkMetric[];
15
+ tokenEconomy?: {
16
+ avgCompression: number;
17
+ totalTokensSaved: number;
18
+ };
19
+ toolUsage?: Record<string, {
20
+ inputTokens: number;
21
+ outputTokens: number;
22
+ }>;
23
+ }
24
+ export interface BenchmarkIndexResult {
25
+ documentsIndexed: number;
26
+ }
27
+ /**
28
+ * Index benchmark results into the knowledge store.
29
+ * Creates documents for metrics summary and token economy.
30
+ */
31
+ export declare function indexBenchmarkResults(store: KnowledgeStore, data: BenchmarkData): BenchmarkIndexResult;
32
+ //# sourceMappingURL=benchmark-indexer.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"benchmark-indexer.d.ts","sourceRoot":"","sources":["../../../src/core/rag/benchmark-indexer.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,6BAA6B,CAAC;AAGlE,MAAM,WAAW,eAAe;IAC9B,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,OAAO,CAAC;CACjB;AAED,MAAM,WAAW,aAAa;IAC5B,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,eAAe,EAAE,CAAC;IAC3B,YAAY,CAAC,EAAE;QACb,cAAc,EAAE,MAAM,CAAC;QACvB,gBAAgB,EAAE,MAAM,CAAC;KAC1B,CAAC;IACF,SAAS,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE;QAAE,WAAW,EAAE,MAAM,CAAC;QAAC,YAAY,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;CAC3E;AAED,MAAM,WAAW,oBAAoB;IACnC,gBAAgB,EAAE,MAAM,CAAC;CAC1B;AAED;;;GAGG;AACH,wBAAgB,qBAAqB,CACnC,KAAK,EAAE,cAAc,EACrB,IAAI,EAAE,aAAa,GAClB,oBAAoB,CAwFtB"}
@@ -0,0 +1,85 @@
1
+ /**
2
+ * Benchmark Indexer — indexes performance benchmark results as knowledge documents.
3
+ * Enables RAG queries about performance metrics, token economy, and SLA compliance.
4
+ */
5
+ import { logger } from "../utils/logger.js";
6
+ /**
7
+ * Index benchmark results into the knowledge store.
8
+ * Creates documents for metrics summary and token economy.
9
+ */
10
+ export function indexBenchmarkResults(store, data) {
11
+ let documentsIndexed = 0;
12
+ // Index metrics summary
13
+ if (data.metrics.length > 0) {
14
+ const metricsLines = data.metrics.map((m) => {
15
+ const status = m.passed ? "PASSED" : "FAILED — exceeded target";
16
+ return `- ${m.name}: ${m.value} (target: ${m.target}) [${status}]`;
17
+ });
18
+ const passing = data.metrics.filter((m) => m.passed).length;
19
+ const failing = data.metrics.length - passing;
20
+ const content = [
21
+ `## Benchmark Results (${data.timestamp})`,
22
+ "",
23
+ `Summary: ${passing} passed, ${failing} failed out of ${data.metrics.length} metrics.`,
24
+ "",
25
+ ...metricsLines,
26
+ ].join("\n");
27
+ store.insert({
28
+ sourceType: "benchmark",
29
+ sourceId: `benchmark:${data.timestamp}`,
30
+ title: `Benchmark Results ${data.timestamp}`,
31
+ content,
32
+ chunkIndex: 0,
33
+ metadata: {
34
+ timestamp: data.timestamp,
35
+ totalMetrics: data.metrics.length,
36
+ passing,
37
+ failing,
38
+ },
39
+ });
40
+ documentsIndexed++;
41
+ }
42
+ // Index token economy
43
+ if (data.tokenEconomy) {
44
+ const content = [
45
+ `## Token Economy (${data.timestamp})`,
46
+ "",
47
+ `- Average compression ratio: ${data.tokenEconomy.avgCompression}`,
48
+ `- Total tokens saved: ${data.tokenEconomy.totalTokensSaved}`,
49
+ ].join("\n");
50
+ store.insert({
51
+ sourceType: "benchmark",
52
+ sourceId: `benchmark:token-economy:${data.timestamp}`,
53
+ title: `Token Economy ${data.timestamp}`,
54
+ content,
55
+ chunkIndex: 0,
56
+ metadata: {
57
+ timestamp: data.timestamp,
58
+ avgCompression: data.tokenEconomy.avgCompression,
59
+ totalTokensSaved: data.tokenEconomy.totalTokensSaved,
60
+ },
61
+ });
62
+ documentsIndexed++;
63
+ }
64
+ // Index tool usage
65
+ if (data.toolUsage && Object.keys(data.toolUsage).length > 0) {
66
+ const lines = Object.entries(data.toolUsage).map(([tool, usage]) => `- ${tool}: input=${usage.inputTokens}, output=${usage.outputTokens}`);
67
+ const content = [
68
+ `## Tool Token Usage (${data.timestamp})`,
69
+ "",
70
+ ...lines,
71
+ ].join("\n");
72
+ store.insert({
73
+ sourceType: "benchmark",
74
+ sourceId: `benchmark:tool-usage:${data.timestamp}`,
75
+ title: `Tool Token Usage ${data.timestamp}`,
76
+ content,
77
+ chunkIndex: 0,
78
+ metadata: { timestamp: data.timestamp },
79
+ });
80
+ documentsIndexed++;
81
+ }
82
+ logger.info("Benchmark results indexed", { documentsIndexed, timestamp: data.timestamp });
83
+ return { documentsIndexed };
84
+ }
85
+ //# sourceMappingURL=benchmark-indexer.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"benchmark-indexer.js","sourceRoot":"","sources":["../../../src/core/rag/benchmark-indexer.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAGH,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAuB5C;;;GAGG;AACH,MAAM,UAAU,qBAAqB,CACnC,KAAqB,EACrB,IAAmB;IAEnB,IAAI,gBAAgB,GAAG,CAAC,CAAC;IAEzB,wBAAwB;IACxB,IAAI,IAAI,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC5B,MAAM,YAAY,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE;YAC1C,MAAM,MAAM,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,0BAA0B,CAAC;YAChE,OAAO,KAAK,CAAC,CAAC,IAAI,KAAK,CAAC,CAAC,KAAK,aAAa,CAAC,CAAC,MAAM,MAAM,MAAM,GAAG,CAAC;QACrE,CAAC,CAAC,CAAC;QAEH,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC;QAC5D,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,GAAG,OAAO,CAAC;QAE9C,MAAM,OAAO,GAAG;YACd,yBAAyB,IAAI,CAAC,SAAS,GAAG;YAC1C,EAAE;YACF,YAAY,OAAO,YAAY,OAAO,kBAAkB,IAAI,CAAC,OAAO,CAAC,MAAM,WAAW;YACtF,EAAE;YACF,GAAG,YAAY;SAChB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAEb,KAAK,CAAC,MAAM,CAAC;YACX,UAAU,EAAE,WAAW;YACvB,QAAQ,EAAE,aAAa,IAAI,CAAC,SAAS,EAAE;YACvC,KAAK,EAAE,qBAAqB,IAAI,CAAC,SAAS,EAAE;YAC5C,OAAO;YACP,UAAU,EAAE,CAAC;YACb,QAAQ,EAAE;gBACR,SAAS,EAAE,IAAI,CAAC,SAAS;gBACzB,YAAY,EAAE,IAAI,CAAC,OAAO,CAAC,MAAM;gBACjC,OAAO;gBACP,OAAO;aACR;SACF,CAAC,CAAC;QACH,gBAAgB,EAAE,CAAC;IACrB,CAAC;IAED,sBAAsB;IACtB,IAAI,IAAI,CAAC,YAAY,EAAE,CAAC;QACtB,MAAM,OAAO,GAAG;YACd,qBAAqB,IAAI,CAAC,SAAS,GAAG;YACtC,EAAE;YACF,gCAAgC,IAAI,CAAC,YAAY,CAAC,cAAc,EAAE;YAClE,yBAAyB,IAAI,CAAC,YAAY,CAAC,gBAAgB,EAAE;SAC9D,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAEb,KAAK,CAAC,MAAM,CAAC;YACX,UAAU,EAAE,WAAW;YACvB,QAAQ,EAAE,2BAA2B,IAAI,CAAC,SAAS,EAAE;YACrD,KAAK,EAAE,iBAAiB,IAAI,CAAC,SAAS,EAAE;YACxC,OAAO;YACP,UAAU,EAAE,CAAC;YACb,QAAQ,EAAE;gBACR,SAAS,EAAE,IAAI,CAAC,SAAS;gBACzB,cAAc,EAAE,IAAI,CAAC,YAAY,CAAC,cAAc;gBAChD,gBAAgB,EAAE,IAAI,CAAC,YAAY,CAAC,gBAAgB;aACrD;SACF,CAAC,CAAC;QACH,gBAAgB,EAAE,CAAC;IACrB,CAAC;IAED,mBAAmB;IACnB,IAAI,IAAI,CAAC,SAAS,IAAI,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC7D,MAAM,KAAK,GAAG,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,GAAG,CAC9C,CAAC,CAAC,IAAI,EAAE,KAAK,CAAC,EAAE,EAAE,CAChB,KAAK,IAAI,WAAW,KAAK,CAAC,WAAW,YAAY,KAAK,CAAC,YAAY,EAAE,CACxE,CAAC;QAEF,MAAM,OAAO,GAAG;YACd,wBAAwB,IAAI,CAAC,SAAS,GAAG;YACzC,EAAE;YACF,GAAG,KAAK;SACT,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAEb,KAAK,CAAC,MAAM,CAAC;YACX,UAAU,EAAE,WAAW;YACvB,QAAQ,EAAE,wBAAwB,IAAI,CAAC,SAAS,EAAE;YAClD,KAAK,EAAE,oBAAoB,IAAI,CAAC,SAAS,EAAE;YAC3C,OAAO;YACP,UAAU,EAAE,CAAC;YACb,QAAQ,EAAE,EAAE,SAAS,EAAE,IAAI,CAAC,SAAS,EAAE;SACxC,CAAC,CAAC;QACH,gBAAgB,EAAE,CAAC;IACrB,CAAC;IAED,MAAM,CAAC,IAAI,CAAC,2BAA2B,EAAE,EAAE,gBAAgB,EAAE,SAAS,EAAE,IAAI,CAAC,SAAS,EAAE,CAAC,CAAC;IAE1F,OAAO,EAAE,gBAAgB,EAAE,CAAC;AAC9B,CAAC"}
@@ -0,0 +1,32 @@
1
+ /**
2
+ * Citation Mapper — maps search results to citations with source traceability.
3
+ *
4
+ * Each chunk in the assembled context gets a [N] citation marker,
5
+ * enabling users to trace information back to its source.
6
+ */
7
+ import type { RankedResult } from "./multi-strategy-retrieval.js";
8
+ export interface Citation {
9
+ id: string;
10
+ chunkId: string;
11
+ sourceType: string;
12
+ sourceId: string;
13
+ title: string;
14
+ snippet: string;
15
+ relevanceScore: number;
16
+ position: number;
17
+ }
18
+ export interface CitedContext {
19
+ assembledText: string;
20
+ citations: Citation[];
21
+ sourceBreakdown: Record<string, number>;
22
+ }
23
+ /**
24
+ * Create citations from ranked results.
25
+ * Each result gets a position-based citation (1-indexed).
26
+ */
27
+ export declare function mapCitations(results: RankedResult[]): Citation[];
28
+ /**
29
+ * Build a cited context: assembled text with [N] markers + citation list + source breakdown.
30
+ */
31
+ export declare function buildCitedContext(results: RankedResult[]): CitedContext;
32
+ //# sourceMappingURL=citation-mapper.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"citation-mapper.d.ts","sourceRoot":"","sources":["../../../src/core/rag/citation-mapper.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,+BAA+B,CAAC;AAGlE,MAAM,WAAW,QAAQ;IACvB,EAAE,EAAE,MAAM,CAAC;IACX,OAAO,EAAE,MAAM,CAAC;IAChB,UAAU,EAAE,MAAM,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,MAAM,CAAC;IAChB,cAAc,EAAE,MAAM,CAAC;IACvB,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,YAAY;IAC3B,aAAa,EAAE,MAAM,CAAC;IACtB,SAAS,EAAE,QAAQ,EAAE,CAAC;IACtB,eAAe,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CACzC;AAID;;;GAGG;AACH,wBAAgB,YAAY,CAAC,OAAO,EAAE,YAAY,EAAE,GAAG,QAAQ,EAAE,CAahE;AAED;;GAEG;AACH,wBAAgB,iBAAiB,CAAC,OAAO,EAAE,YAAY,EAAE,GAAG,YAAY,CAoBvE"}
@@ -0,0 +1,45 @@
1
+ /**
2
+ * Citation Mapper — maps search results to citations with source traceability.
3
+ *
4
+ * Each chunk in the assembled context gets a [N] citation marker,
5
+ * enabling users to trace information back to its source.
6
+ */
7
+ import { generateId } from "../utils/id.js";
8
+ const MAX_SNIPPET_LENGTH = 400;
9
+ /**
10
+ * Create citations from ranked results.
11
+ * Each result gets a position-based citation (1-indexed).
12
+ */
13
+ export function mapCitations(results) {
14
+ return results.map((r, i) => ({
15
+ id: generateId("cite"),
16
+ chunkId: r.id,
17
+ sourceType: r.sourceType,
18
+ sourceId: r.sourceId,
19
+ title: r.title,
20
+ snippet: r.content.length > MAX_SNIPPET_LENGTH
21
+ ? r.content.slice(0, MAX_SNIPPET_LENGTH)
22
+ : r.content,
23
+ relevanceScore: r.score,
24
+ position: i + 1,
25
+ }));
26
+ }
27
+ /**
28
+ * Build a cited context: assembled text with [N] markers + citation list + source breakdown.
29
+ */
30
+ export function buildCitedContext(results) {
31
+ if (results.length === 0) {
32
+ return { assembledText: "", citations: [], sourceBreakdown: {} };
33
+ }
34
+ const citations = mapCitations(results);
35
+ const sourceBreakdown = {};
36
+ const textParts = [];
37
+ for (let i = 0; i < results.length; i++) {
38
+ const r = results[i];
39
+ textParts.push(`[${i + 1}] ${r.content}`);
40
+ sourceBreakdown[r.sourceType] = (sourceBreakdown[r.sourceType] ?? 0) + 1;
41
+ }
42
+ const assembledText = textParts.join("\n\n");
43
+ return { assembledText, citations, sourceBreakdown };
44
+ }
45
+ //# sourceMappingURL=citation-mapper.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"citation-mapper.js","sourceRoot":"","sources":["../../../src/core/rag/citation-mapper.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAGH,OAAO,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAmB5C,MAAM,kBAAkB,GAAG,GAAG,CAAC;AAE/B;;;GAGG;AACH,MAAM,UAAU,YAAY,CAAC,OAAuB;IAClD,OAAO,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC;QAC5B,EAAE,EAAE,UAAU,CAAC,MAAM,CAAC;QACtB,OAAO,EAAE,CAAC,CAAC,EAAE;QACb,UAAU,EAAE,CAAC,CAAC,UAAU;QACxB,QAAQ,EAAE,CAAC,CAAC,QAAQ;QACpB,KAAK,EAAE,CAAC,CAAC,KAAK;QACd,OAAO,EAAE,CAAC,CAAC,OAAO,CAAC,MAAM,GAAG,kBAAkB;YAC5C,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,kBAAkB,CAAC;YACxC,CAAC,CAAC,CAAC,CAAC,OAAO;QACb,cAAc,EAAE,CAAC,CAAC,KAAK;QACvB,QAAQ,EAAE,CAAC,GAAG,CAAC;KAChB,CAAC,CAAC,CAAC;AACN,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,iBAAiB,CAAC,OAAuB;IACvD,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACzB,OAAO,EAAE,aAAa,EAAE,EAAE,EAAE,SAAS,EAAE,EAAE,EAAE,eAAe,EAAE,EAAE,EAAE,CAAC;IACnE,CAAC;IAED,MAAM,SAAS,GAAG,YAAY,CAAC,OAAO,CAAC,CAAC;IACxC,MAAM,eAAe,GAA2B,EAAE,CAAC;IAEnD,MAAM,SAAS,GAAa,EAAE,CAAC;IAE/B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACxC,MAAM,CAAC,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC;QACrB,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC;QAE1C,eAAe,CAAC,CAAC,CAAC,UAAU,CAAC,GAAG,CAAC,eAAe,CAAC,CAAC,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;IAC3E,CAAC;IAED,MAAM,aAAa,GAAG,SAAS,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAE7C,OAAO,EAAE,aAAa,EAAE,SAAS,EAAE,eAAe,EAAE,CAAC;AACvD,CAAC"}
@@ -0,0 +1,48 @@
1
+ /**
2
+ * Enrichment Pipeline — enriches text chunks with keywords, entities, and summaries
3
+ * before indexation into the knowledge store.
4
+ *
5
+ * Transforms raw TextChunks into EnrichedChunks with:
6
+ * - TF-IDF-based keyword extraction (top-N terms)
7
+ * - Regex-based entity detection (PascalCase, camelCase, file paths)
8
+ * - Auto-generated summary (first sentence or markdown heading)
9
+ * - Parent-child chunk linking for later chunk stitching
10
+ */
11
+ import type { TextChunk } from "./chunk-text.js";
12
+ export interface EnrichedChunk extends TextChunk {
13
+ /** Top-N keywords extracted via TF-IDF term frequency */
14
+ keywords: string[];
15
+ /** Detected entities: class names, function names, file paths */
16
+ entities: string[];
17
+ /** Auto-generated summary (first sentence or heading) */
18
+ summary: string;
19
+ /** Source type discriminator */
20
+ sourceType: string;
21
+ /** Reference to parent chunk index for chunk stitching */
22
+ parentChunkIndex?: number;
23
+ }
24
+ /**
25
+ * Extract top-N keywords from text using term frequency scoring.
26
+ * Uses the project tokenizer (handles PT + EN, strips stopwords).
27
+ */
28
+ export declare function extractKeywords(text: string, topN?: number): string[];
29
+ /**
30
+ * Extract named entities from text using regex patterns.
31
+ * Detects PascalCase types, camelCase functions, and file paths.
32
+ */
33
+ export declare function extractEntities(text: string): string[];
34
+ /**
35
+ * Generate a short summary from chunk content.
36
+ * Prefers markdown heading if present, otherwise first sentence.
37
+ */
38
+ export declare function generateSummary(text: string): string;
39
+ /**
40
+ * Enrich a single text chunk with keywords, entities, summary, and source metadata.
41
+ */
42
+ export declare function enrichChunk(chunk: TextChunk, sourceType: string, parentChunkIndex?: number): EnrichedChunk;
43
+ /**
44
+ * Enrich multiple chunks from the same document.
45
+ * Applies parent-child linking for multi-chunk documents.
46
+ */
47
+ export declare function enrichChunks(chunks: TextChunk[], sourceType: string): EnrichedChunk[];
48
+ //# sourceMappingURL=enrichment-pipeline.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"enrichment-pipeline.d.ts","sourceRoot":"","sources":["../../../src/core/rag/enrichment-pipeline.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAC;AAIjD,MAAM,WAAW,aAAc,SAAQ,SAAS;IAC9C,yDAAyD;IACzD,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,iEAAiE;IACjE,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,yDAAyD;IACzD,OAAO,EAAE,MAAM,CAAC;IAChB,gCAAgC;IAChC,UAAU,EAAE,MAAM,CAAC;IACnB,0DAA0D;IAC1D,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC3B;AAKD;;;GAGG;AACH,wBAAgB,eAAe,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,GAAE,MAAsB,GAAG,MAAM,EAAE,CAepF;AAcD;;;GAGG;AACH,wBAAgB,eAAe,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE,CAwBtD;AAED;;;GAGG;AACH,wBAAgB,eAAe,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CA0BpD;AAED;;GAEG;AACH,wBAAgB,WAAW,CACzB,KAAK,EAAE,SAAS,EAChB,UAAU,EAAE,MAAM,EAClB,gBAAgB,CAAC,EAAE,MAAM,GACxB,aAAa,CAoBf;AAED;;;GAGG;AACH,wBAAgB,YAAY,CAC1B,MAAM,EAAE,SAAS,EAAE,EACnB,UAAU,EAAE,MAAM,GACjB,aAAa,EAAE,CASjB"}
@@ -0,0 +1,127 @@
1
+ /**
2
+ * Enrichment Pipeline — enriches text chunks with keywords, entities, and summaries
3
+ * before indexation into the knowledge store.
4
+ *
5
+ * Transforms raw TextChunks into EnrichedChunks with:
6
+ * - TF-IDF-based keyword extraction (top-N terms)
7
+ * - Regex-based entity detection (PascalCase, camelCase, file paths)
8
+ * - Auto-generated summary (first sentence or markdown heading)
9
+ * - Parent-child chunk linking for later chunk stitching
10
+ */
11
+ import { tokenize } from "../search/tokenizer.js";
12
+ import { logger } from "../utils/logger.js";
13
+ const MAX_SUMMARY_LENGTH = 200;
14
+ const DEFAULT_TOP_N = 5;
15
+ /**
16
+ * Extract top-N keywords from text using term frequency scoring.
17
+ * Uses the project tokenizer (handles PT + EN, strips stopwords).
18
+ */
19
+ export function extractKeywords(text, topN = DEFAULT_TOP_N) {
20
+ const tokens = tokenize(text);
21
+ if (tokens.length === 0)
22
+ return [];
23
+ // Count term frequencies
24
+ const freq = new Map();
25
+ for (const token of tokens) {
26
+ freq.set(token, (freq.get(token) ?? 0) + 1);
27
+ }
28
+ // Sort by frequency descending, then alphabetically for stability
29
+ const sorted = Array.from(freq.entries())
30
+ .sort((a, b) => b[1] - a[1] || a[0].localeCompare(b[0]));
31
+ return sorted.slice(0, topN).map(([term]) => term);
32
+ }
33
+ // ── Entity detection patterns ──────────────────────────────
34
+ /** PascalCase: at least two words joined (e.g., GraphNode, SqliteStore) */
35
+ // eslint-disable-next-line security/detect-unsafe-regex -- bounded pattern, safe for short identifiers
36
+ const PASCAL_CASE_RE = /\b([A-Z][a-z]+(?:[A-Z][a-z0-9]*)+)\b/g;
37
+ /** camelCase: starts lowercase, has at least one uppercase (e.g., findNextTask) */
38
+ const CAMEL_CASE_RE = /\b([a-z][a-zA-Z0-9]*[A-Z][a-zA-Z0-9]*)\b/g;
39
+ /** File paths: src/... or similar path patterns ending in common extensions */
40
+ const FILE_PATH_RE = /\b((?:src|lib|dist|test|tests)\/[\w\-./]+\.(?:ts|js|tsx|jsx|json|md|sql))\b/g;
41
+ /**
42
+ * Extract named entities from text using regex patterns.
43
+ * Detects PascalCase types, camelCase functions, and file paths.
44
+ */
45
+ export function extractEntities(text) {
46
+ const entities = new Set();
47
+ let match;
48
+ // PascalCase (reset lastIndex before each scan)
49
+ PASCAL_CASE_RE.lastIndex = 0;
50
+ while ((match = PASCAL_CASE_RE.exec(text)) !== null) {
51
+ entities.add(match[1]);
52
+ }
53
+ // camelCase
54
+ CAMEL_CASE_RE.lastIndex = 0;
55
+ while ((match = CAMEL_CASE_RE.exec(text)) !== null) {
56
+ entities.add(match[1]);
57
+ }
58
+ // File paths
59
+ FILE_PATH_RE.lastIndex = 0;
60
+ while ((match = FILE_PATH_RE.exec(text)) !== null) {
61
+ entities.add(match[1]);
62
+ }
63
+ return Array.from(entities);
64
+ }
65
+ /**
66
+ * Generate a short summary from chunk content.
67
+ * Prefers markdown heading if present, otherwise first sentence.
68
+ */
69
+ export function generateSummary(text) {
70
+ const trimmed = text.trim();
71
+ if (!trimmed)
72
+ return "";
73
+ // Check for markdown heading
74
+ const headingMatch = trimmed.match(/^#{1,6}\s+(.+)/m);
75
+ if (headingMatch) {
76
+ const heading = headingMatch[1].trim();
77
+ return heading.length > MAX_SUMMARY_LENGTH
78
+ ? heading.slice(0, MAX_SUMMARY_LENGTH)
79
+ : heading;
80
+ }
81
+ // First sentence (ends with . ! or ?)
82
+ const sentenceMatch = trimmed.match(/^[^.!?]+[.!?]/);
83
+ if (sentenceMatch) {
84
+ const sentence = sentenceMatch[0].trim();
85
+ return sentence.length > MAX_SUMMARY_LENGTH
86
+ ? sentence.slice(0, MAX_SUMMARY_LENGTH)
87
+ : sentence;
88
+ }
89
+ // Fallback: truncate to MAX_SUMMARY_LENGTH
90
+ return trimmed.length > MAX_SUMMARY_LENGTH
91
+ ? trimmed.slice(0, MAX_SUMMARY_LENGTH)
92
+ : trimmed;
93
+ }
94
+ /**
95
+ * Enrich a single text chunk with keywords, entities, summary, and source metadata.
96
+ */
97
+ export function enrichChunk(chunk, sourceType, parentChunkIndex) {
98
+ const keywords = extractKeywords(chunk.content);
99
+ const entities = extractEntities(chunk.content);
100
+ const summary = generateSummary(chunk.content);
101
+ logger.debug("Chunk enriched", {
102
+ index: chunk.index,
103
+ sourceType,
104
+ keywordCount: keywords.length,
105
+ entityCount: entities.length,
106
+ });
107
+ return {
108
+ ...chunk,
109
+ keywords,
110
+ entities,
111
+ summary,
112
+ sourceType,
113
+ parentChunkIndex,
114
+ };
115
+ }
116
+ /**
117
+ * Enrich multiple chunks from the same document.
118
+ * Applies parent-child linking for multi-chunk documents.
119
+ */
120
+ export function enrichChunks(chunks, sourceType) {
121
+ if (chunks.length <= 1) {
122
+ return chunks.map((c) => enrichChunk(c, sourceType));
123
+ }
124
+ // For multi-chunk documents, first chunk (index 0) is the "parent" reference
125
+ return chunks.map((c, i) => enrichChunk(c, sourceType, i === 0 ? undefined : 0));
126
+ }
127
+ //# sourceMappingURL=enrichment-pipeline.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"enrichment-pipeline.js","sourceRoot":"","sources":["../../../src/core/rag/enrichment-pipeline.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAGH,OAAO,EAAE,QAAQ,EAAE,MAAM,wBAAwB,CAAC;AAClD,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAe5C,MAAM,kBAAkB,GAAG,GAAG,CAAC;AAC/B,MAAM,aAAa,GAAG,CAAC,CAAC;AAExB;;;GAGG;AACH,MAAM,UAAU,eAAe,CAAC,IAAY,EAAE,OAAe,aAAa;IACxE,MAAM,MAAM,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC;IAC9B,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAEnC,yBAAyB;IACzB,MAAM,IAAI,GAAG,IAAI,GAAG,EAAkB,CAAC;IACvC,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,IAAI,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAC9C,CAAC;IAED,kEAAkE;IAClE,MAAM,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC;SACtC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAE3D,OAAO,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,CAAC;AACrD,CAAC;AAED,8DAA8D;AAE9D,2EAA2E;AAC3E,uGAAuG;AACvG,MAAM,cAAc,GAAG,uCAAuC,CAAC;AAE/D,mFAAmF;AACnF,MAAM,aAAa,GAAG,2CAA2C,CAAC;AAElE,+EAA+E;AAC/E,MAAM,YAAY,GAAG,8EAA8E,CAAC;AAEpG;;;GAGG;AACH,MAAM,UAAU,eAAe,CAAC,IAAY;IAC1C,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAU,CAAC;IAEnC,IAAI,KAA6B,CAAC;IAElC,gDAAgD;IAChD,cAAc,CAAC,SAAS,GAAG,CAAC,CAAC;IAC7B,OAAO,CAAC,KAAK,GAAG,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACpD,QAAQ,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IACzB,CAAC;IAED,YAAY;IACZ,aAAa,CAAC,SAAS,GAAG,CAAC,CAAC;IAC5B,OAAO,CAAC,KAAK,GAAG,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACnD,QAAQ,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IACzB,CAAC;IAED,aAAa;IACb,YAAY,CAAC,SAAS,GAAG,CAAC,CAAC;IAC3B,OAAO,CAAC,KAAK,GAAG,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QAClD,QAAQ,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IACzB,CAAC;IAED,OAAO,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;AAC9B,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,eAAe,CAAC,IAAY;IAC1C,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAC5B,IAAI,CAAC,OAAO;QAAE,OAAO,EAAE,CAAC;IAExB,6BAA6B;IAC7B,MAAM,YAAY,GAAG,OAAO,CAAC,KAAK,CAAC,iBAAiB,CAAC,CAAC;IACtD,IAAI,YAAY,EAAE,CAAC;QACjB,MAAM,OAAO,GAAG,YAAY,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QACvC,OAAO,OAAO,CAAC,MAAM,GAAG,kBAAkB;YACxC,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,kBAAkB,CAAC;YACtC,CAAC,CAAC,OAAO,CAAC;IACd,CAAC;IAED,sCAAsC;IACtC,MAAM,aAAa,GAAG,OAAO,CAAC,KAAK,CAAC,eAAe,CAAC,CAAC;IACrD,IAAI,aAAa,EAAE,CAAC;QAClB,MAAM,QAAQ,GAAG,aAAa,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QACzC,OAAO,QAAQ,CAAC,MAAM,GAAG,kBAAkB;YACzC,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,kBAAkB,CAAC;YACvC,CAAC,CAAC,QAAQ,CAAC;IACf,CAAC;IAED,2CAA2C;IAC3C,OAAO,OAAO,CAAC,MAAM,GAAG,kBAAkB;QACxC,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,kBAAkB,CAAC;QACtC,CAAC,CAAC,OAAO,CAAC;AACd,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,WAAW,CACzB,KAAgB,EAChB,UAAkB,EAClB,gBAAyB;IAEzB,MAAM,QAAQ,GAAG,eAAe,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;IAChD,MAAM,QAAQ,GAAG,eAAe,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;IAChD,MAAM,OAAO,GAAG,eAAe,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;IAE/C,MAAM,CAAC,KAAK,CAAC,gBAAgB,EAAE;QAC7B,KAAK,EAAE,KAAK,CAAC,KAAK;QAClB,UAAU;QACV,YAAY,EAAE,QAAQ,CAAC,MAAM;QAC7B,WAAW,EAAE,QAAQ,CAAC,MAAM;KAC7B,CAAC,CAAC;IAEH,OAAO;QACL,GAAG,KAAK;QACR,QAAQ;QACR,QAAQ;QACR,OAAO;QACP,UAAU;QACV,gBAAgB;KACjB,CAAC;AACJ,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,YAAY,CAC1B,MAAmB,EACnB,UAAkB;IAElB,IAAI,MAAM,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;QACvB,OAAO,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,WAAW,CAAC,CAAC,EAAE,UAAU,CAAC,CAAC,CAAC;IACvD,CAAC;IAED,6EAA6E;IAC7E,OAAO,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CACzB,WAAW,CAAC,CAAC,EAAE,UAAU,EAAE,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,CACpD,CAAC;AACJ,CAAC"}
@@ -0,0 +1,40 @@
1
+ /**
2
+ * Post-Retrieval Pipeline — processes search results after initial retrieval.
3
+ *
4
+ * Pipeline stages:
5
+ * 1. Deduplication — remove results with identical content
6
+ * 2. Reranking — boost results with higher query keyword overlap
7
+ * 3. Chunk stitching — merge adjacent chunks from the same source
8
+ * 4. Limit — enforce maxResults
9
+ */
10
+ import type { RankedResult } from "./multi-strategy-retrieval.js";
11
+ export interface PostRetrievalOptions {
12
+ query: string;
13
+ results: RankedResult[];
14
+ maxResults: number;
15
+ chunkMeta?: Map<string, number>;
16
+ }
17
+ export interface PostRetrievalResult {
18
+ results: RankedResult[];
19
+ deduplicated: number;
20
+ stitchedChunks: number;
21
+ }
22
+ /**
23
+ * Remove results with identical content, keeping the highest-scored one.
24
+ */
25
+ export declare function deduplicateResults(results: RankedResult[]): RankedResult[];
26
+ /**
27
+ * Rerank results by keyword overlap with the query.
28
+ * Combines original score with keyword overlap boost.
29
+ */
30
+ export declare function rerankByKeywordOverlap(results: RankedResult[], query: string): RankedResult[];
31
+ /**
32
+ * Merge adjacent chunks from the same source document.
33
+ * Chunks are considered adjacent if their chunk indices differ by 1.
34
+ */
35
+ export declare function stitchAdjacentChunks(results: RankedResult[], chunkMeta: Map<string, number>): RankedResult[];
36
+ /**
37
+ * Full post-retrieval pipeline.
38
+ */
39
+ export declare function postRetrievalPipeline(options: PostRetrievalOptions): PostRetrievalResult;
40
+ //# sourceMappingURL=post-retrieval.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"post-retrieval.d.ts","sourceRoot":"","sources":["../../../src/core/rag/post-retrieval.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,+BAA+B,CAAC;AAIlE,MAAM,WAAW,oBAAoB;IACnC,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,YAAY,EAAE,CAAC;IACxB,UAAU,EAAE,MAAM,CAAC;IACnB,SAAS,CAAC,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CACjC;AAED,MAAM,WAAW,mBAAmB;IAClC,OAAO,EAAE,YAAY,EAAE,CAAC;IACxB,YAAY,EAAE,MAAM,CAAC;IACrB,cAAc,EAAE,MAAM,CAAC;CACxB;AAED;;GAEG;AACH,wBAAgB,kBAAkB,CAAC,OAAO,EAAE,YAAY,EAAE,GAAG,YAAY,EAAE,CAY1E;AAED;;;GAGG;AACH,wBAAgB,sBAAsB,CACpC,OAAO,EAAE,YAAY,EAAE,EACvB,KAAK,EAAE,MAAM,GACZ,YAAY,EAAE,CAchB;AAED;;;GAGG;AACH,wBAAgB,oBAAoB,CAClC,OAAO,EAAE,YAAY,EAAE,EACvB,SAAS,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,GAC7B,YAAY,EAAE,CAmDhB;AAED;;GAEG;AACH,wBAAgB,qBAAqB,CAAC,OAAO,EAAE,oBAAoB,GAAG,mBAAmB,CA2BxF"}