@eucoder/rag 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +384 -0
- package/dist/ab-testing.d.ts +52 -0
- package/dist/ab-testing.d.ts.map +1 -0
- package/dist/ab-testing.js +144 -0
- package/dist/ab-testing.js.map +1 -0
- package/dist/ab-testing.test.d.ts +2 -0
- package/dist/ab-testing.test.d.ts.map +1 -0
- package/dist/ab-testing.test.js +147 -0
- package/dist/ab-testing.test.js.map +1 -0
- package/dist/agentic-rag.d.ts +23 -0
- package/dist/agentic-rag.d.ts.map +1 -0
- package/dist/agentic-rag.js +170 -0
- package/dist/agentic-rag.js.map +1 -0
- package/dist/agentic-rag.test.d.ts +2 -0
- package/dist/agentic-rag.test.d.ts.map +1 -0
- package/dist/agentic-rag.test.js +174 -0
- package/dist/agentic-rag.test.js.map +1 -0
- package/dist/corrective-rag.d.ts +16 -0
- package/dist/corrective-rag.d.ts.map +1 -0
- package/dist/corrective-rag.js +85 -0
- package/dist/corrective-rag.js.map +1 -0
- package/dist/corrective-rag.test.d.ts +2 -0
- package/dist/corrective-rag.test.d.ts.map +1 -0
- package/dist/corrective-rag.test.js +140 -0
- package/dist/corrective-rag.test.js.map +1 -0
- package/dist/feedback.d.ts +77 -0
- package/dist/feedback.d.ts.map +1 -0
- package/dist/feedback.js +44 -0
- package/dist/feedback.js.map +1 -0
- package/dist/feedback.test.d.ts +2 -0
- package/dist/feedback.test.d.ts.map +1 -0
- package/dist/feedback.test.js +202 -0
- package/dist/feedback.test.js.map +1 -0
- package/dist/hybrid-search.d.ts +14 -0
- package/dist/hybrid-search.d.ts.map +1 -0
- package/dist/hybrid-search.js +70 -0
- package/dist/hybrid-search.js.map +1 -0
- package/dist/hybrid-search.test.d.ts +2 -0
- package/dist/hybrid-search.test.d.ts.map +1 -0
- package/dist/hybrid-search.test.js +93 -0
- package/dist/hybrid-search.test.js.map +1 -0
- package/dist/index.d.ts +17 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +12 -0
- package/dist/index.js.map +1 -0
- package/dist/knowledge-graph.d.ts +24 -0
- package/dist/knowledge-graph.d.ts.map +1 -0
- package/dist/knowledge-graph.js +131 -0
- package/dist/knowledge-graph.js.map +1 -0
- package/dist/knowledge-graph.test.d.ts +2 -0
- package/dist/knowledge-graph.test.d.ts.map +1 -0
- package/dist/knowledge-graph.test.js +140 -0
- package/dist/knowledge-graph.test.js.map +1 -0
- package/dist/llm-grader.d.ts +19 -0
- package/dist/llm-grader.d.ts.map +1 -0
- package/dist/llm-grader.js +63 -0
- package/dist/llm-grader.js.map +1 -0
- package/dist/metrics.d.ts +26 -0
- package/dist/metrics.d.ts.map +1 -0
- package/dist/metrics.js +100 -0
- package/dist/metrics.js.map +1 -0
- package/dist/optimizer.d.ts +52 -0
- package/dist/optimizer.d.ts.map +1 -0
- package/dist/optimizer.js +228 -0
- package/dist/optimizer.js.map +1 -0
- package/dist/optimizer.test.d.ts +2 -0
- package/dist/optimizer.test.d.ts.map +1 -0
- package/dist/optimizer.test.js +201 -0
- package/dist/optimizer.test.js.map +1 -0
- package/dist/self-improving.d.ts +85 -0
- package/dist/self-improving.d.ts.map +1 -0
- package/dist/self-improving.js +163 -0
- package/dist/self-improving.js.map +1 -0
- package/dist/self-improving.test.d.ts +2 -0
- package/dist/self-improving.test.d.ts.map +1 -0
- package/dist/self-improving.test.js +234 -0
- package/dist/self-improving.test.js.map +1 -0
- package/dist/types.d.ts +117 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +2 -0
- package/dist/types.js.map +1 -0
- package/package.json +42 -0
- package/src/ab-testing.test.ts +239 -0
- package/src/ab-testing.ts +214 -0
- package/src/agentic-rag.test.ts +201 -0
- package/src/agentic-rag.ts +220 -0
- package/src/corrective-rag.test.ts +166 -0
- package/src/corrective-rag.ts +115 -0
- package/src/feedback.test.ts +227 -0
- package/src/feedback.ts +118 -0
- package/src/hybrid-search.test.ts +107 -0
- package/src/hybrid-search.ts +86 -0
- package/src/index.ts +57 -0
- package/src/knowledge-graph.test.ts +170 -0
- package/src/knowledge-graph.ts +182 -0
- package/src/llm-grader.ts +69 -0
- package/src/metrics.ts +121 -0
- package/src/optimizer.test.ts +232 -0
- package/src/optimizer.ts +307 -0
- package/src/self-improving.test.ts +341 -0
- package/src/self-improving.ts +239 -0
- package/src/types.ts +139 -0
- package/tsconfig.json +9 -0
package/dist/types.js
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":""}
|
package/package.json
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@eucoder/rag",
|
|
3
|
+
"version": "0.2.0",
|
|
4
|
+
"description": "Advanced RAG module with Corrective RAG, Hybrid Search, Knowledge Graph, Agentic RAG, and Self-Improving capabilities",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"main": "./dist/index.js",
|
|
7
|
+
"types": "./dist/index.d.ts",
|
|
8
|
+
"exports": {
|
|
9
|
+
".": {
|
|
10
|
+
"types": "./dist/index.d.ts",
|
|
11
|
+
"import": "./dist/index.js"
|
|
12
|
+
}
|
|
13
|
+
},
|
|
14
|
+
"scripts": {
|
|
15
|
+
"build": "tsc",
|
|
16
|
+
"dev": "tsc --watch",
|
|
17
|
+
"typecheck": "tsc --noEmit",
|
|
18
|
+
"test": "vitest run --passWithNoTests"
|
|
19
|
+
},
|
|
20
|
+
"keywords": [
|
|
21
|
+
"rag",
|
|
22
|
+
"retrieval-augmented-generation",
|
|
23
|
+
"corrective-rag",
|
|
24
|
+
"hybrid-search",
|
|
25
|
+
"knowledge-graph",
|
|
26
|
+
"agentic-rag",
|
|
27
|
+
"self-improving",
|
|
28
|
+
"ab-testing",
|
|
29
|
+
"llm"
|
|
30
|
+
],
|
|
31
|
+
"author": "EUCoder Team",
|
|
32
|
+
"license": "Apache-2.0",
|
|
33
|
+
"dependencies": {
|
|
34
|
+
"@eucode/indexer": "workspace:*",
|
|
35
|
+
"@eucode/llm": "workspace:*"
|
|
36
|
+
},
|
|
37
|
+
"devDependencies": {
|
|
38
|
+
"@types/node": "^22.10.5",
|
|
39
|
+
"typescript": "^5.7.3",
|
|
40
|
+
"vitest": "^3.0.5"
|
|
41
|
+
}
|
|
42
|
+
}
|
|
@@ -0,0 +1,239 @@
|
|
|
1
|
+
import { describe, it, expect, beforeEach } from "vitest";
|
|
2
|
+
import { ABTestFramework } from "./ab-testing.js";
|
|
3
|
+
import { InMemoryFeedbackStorage } from "./feedback.js";
|
|
4
|
+
import type { RagStrategyConfig } from "./ab-testing.js";
|
|
5
|
+
|
|
6
|
+
describe("ABTestFramework", () => {
|
|
7
|
+
let framework: ABTestFramework;
|
|
8
|
+
let storage: InMemoryFeedbackStorage;
|
|
9
|
+
|
|
10
|
+
beforeEach(() => {
|
|
11
|
+
storage = new InMemoryFeedbackStorage();
|
|
12
|
+
framework = new ABTestFramework(storage);
|
|
13
|
+
});
|
|
14
|
+
|
|
15
|
+
it("should register strategies", () => {
|
|
16
|
+
const config: RagStrategyConfig = {
|
|
17
|
+
name: "strategy-a",
|
|
18
|
+
type: "corrective",
|
|
19
|
+
params: { minGoodHits: 2 },
|
|
20
|
+
};
|
|
21
|
+
|
|
22
|
+
framework.registerStrategy(config);
|
|
23
|
+
// Strategy registered successfully
|
|
24
|
+
expect(true).toBe(true);
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
it("should select strategy randomly", () => {
|
|
28
|
+
const configA: RagStrategyConfig = {
|
|
29
|
+
name: "strategy-a",
|
|
30
|
+
type: "corrective",
|
|
31
|
+
params: {},
|
|
32
|
+
};
|
|
33
|
+
|
|
34
|
+
const configB: RagStrategyConfig = {
|
|
35
|
+
name: "strategy-b",
|
|
36
|
+
type: "hybrid",
|
|
37
|
+
params: {},
|
|
38
|
+
};
|
|
39
|
+
|
|
40
|
+
framework.registerStrategy(configA);
|
|
41
|
+
framework.registerStrategy(configB);
|
|
42
|
+
|
|
43
|
+
const selected = framework.selectStrategy();
|
|
44
|
+
expect(["strategy-a", "strategy-b"]).toContain(selected.name);
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
it("should throw error when no strategies registered", () => {
|
|
48
|
+
expect(() => framework.selectStrategy()).toThrow("No strategies registered");
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
it("should record feedback", async () => {
|
|
52
|
+
const config: RagStrategyConfig = {
|
|
53
|
+
name: "strategy-a",
|
|
54
|
+
type: "corrective",
|
|
55
|
+
params: {},
|
|
56
|
+
};
|
|
57
|
+
|
|
58
|
+
framework.registerStrategy(config);
|
|
59
|
+
|
|
60
|
+
await framework.recordFeedback(
|
|
61
|
+
"test query",
|
|
62
|
+
{ text: "test answer", citations: [], steps: [], rewrites: 0 },
|
|
63
|
+
4,
|
|
64
|
+
0.8,
|
|
65
|
+
0.7,
|
|
66
|
+
0.9,
|
|
67
|
+
"strategy-a"
|
|
68
|
+
);
|
|
69
|
+
|
|
70
|
+
const feedback = await storage.getFeedback("strategy-a");
|
|
71
|
+
expect(feedback).toHaveLength(1);
|
|
72
|
+
expect(feedback[0]?.rating).toBe(4);
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
it("should run A/B test with sufficient samples", async () => {
|
|
76
|
+
const configA: RagStrategyConfig = {
|
|
77
|
+
name: "strategy-a",
|
|
78
|
+
type: "corrective",
|
|
79
|
+
params: {},
|
|
80
|
+
};
|
|
81
|
+
|
|
82
|
+
const configB: RagStrategyConfig = {
|
|
83
|
+
name: "strategy-b",
|
|
84
|
+
type: "hybrid",
|
|
85
|
+
params: {},
|
|
86
|
+
};
|
|
87
|
+
|
|
88
|
+
framework.registerStrategy(configA);
|
|
89
|
+
framework.registerStrategy(configB);
|
|
90
|
+
|
|
91
|
+
// Add 30 samples for strategy A (lower quality)
|
|
92
|
+
for (let i = 0; i < 30; i++) {
|
|
93
|
+
await framework.recordFeedback(
|
|
94
|
+
`query ${i}`,
|
|
95
|
+
{ text: `answer ${i}`, citations: [], steps: [], rewrites: 0 },
|
|
96
|
+
3,
|
|
97
|
+
0.6,
|
|
98
|
+
0.5,
|
|
99
|
+
0.7,
|
|
100
|
+
"strategy-a"
|
|
101
|
+
);
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
// Add 30 samples for strategy B (higher quality)
|
|
105
|
+
for (let i = 0; i < 30; i++) {
|
|
106
|
+
await framework.recordFeedback(
|
|
107
|
+
`query ${i}`,
|
|
108
|
+
{ text: `answer ${i}`, citations: [], steps: [], rewrites: 0 },
|
|
109
|
+
5,
|
|
110
|
+
0.9,
|
|
111
|
+
0.85,
|
|
112
|
+
0.95,
|
|
113
|
+
"strategy-b"
|
|
114
|
+
);
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
const result = await framework.runABTest("strategy-a", "strategy-b", 20);
|
|
118
|
+
|
|
119
|
+
expect(result.strategyA).toBe("strategy-a");
|
|
120
|
+
expect(result.strategyB).toBe("strategy-b");
|
|
121
|
+
expect(result.winner).toBe("B");
|
|
122
|
+
expect(result.improvement).toBeGreaterThan(0);
|
|
123
|
+
expect(result.statisticalSignificance).toBeGreaterThan(0);
|
|
124
|
+
expect(result.recommendation).toContain("B");
|
|
125
|
+
});
|
|
126
|
+
|
|
127
|
+
it("should throw error with insufficient samples", async () => {
|
|
128
|
+
const configA: RagStrategyConfig = {
|
|
129
|
+
name: "strategy-a",
|
|
130
|
+
type: "corrective",
|
|
131
|
+
params: {},
|
|
132
|
+
};
|
|
133
|
+
|
|
134
|
+
const configB: RagStrategyConfig = {
|
|
135
|
+
name: "strategy-b",
|
|
136
|
+
type: "hybrid",
|
|
137
|
+
params: {},
|
|
138
|
+
};
|
|
139
|
+
|
|
140
|
+
framework.registerStrategy(configA);
|
|
141
|
+
framework.registerStrategy(configB);
|
|
142
|
+
|
|
143
|
+
// Add only 10 samples (not enough)
|
|
144
|
+
for (let i = 0; i < 10; i++) {
|
|
145
|
+
await framework.recordFeedback(
|
|
146
|
+
`query ${i}`,
|
|
147
|
+
{ text: `answer ${i}`, citations: [], steps: [], rewrites: 0 },
|
|
148
|
+
4,
|
|
149
|
+
0.8,
|
|
150
|
+
0.7,
|
|
151
|
+
0.9,
|
|
152
|
+
"strategy-a"
|
|
153
|
+
);
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
await expect(framework.runABTest("strategy-a", "strategy-b", 20))
|
|
157
|
+
.rejects.toThrow("Insufficient samples");
|
|
158
|
+
});
|
|
159
|
+
|
|
160
|
+
it("should get strategy metrics", async () => {
|
|
161
|
+
const config: RagStrategyConfig = {
|
|
162
|
+
name: "strategy-a",
|
|
163
|
+
type: "corrective",
|
|
164
|
+
params: {},
|
|
165
|
+
};
|
|
166
|
+
|
|
167
|
+
framework.registerStrategy(config);
|
|
168
|
+
|
|
169
|
+
for (let i = 0; i < 5; i++) {
|
|
170
|
+
await framework.recordFeedback(
|
|
171
|
+
`query ${i}`,
|
|
172
|
+
{ text: `answer ${i}`, citations: [], steps: [], rewrites: 0 },
|
|
173
|
+
4,
|
|
174
|
+
0.8,
|
|
175
|
+
0.7,
|
|
176
|
+
0.9,
|
|
177
|
+
"strategy-a"
|
|
178
|
+
);
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
const metrics = await framework.getStrategyMetrics("strategy-a");
|
|
182
|
+
expect(metrics).not.toBeNull();
|
|
183
|
+
expect(metrics?.totalQueries).toBe(5);
|
|
184
|
+
expect(metrics?.averageRating).toBe(4);
|
|
185
|
+
});
|
|
186
|
+
|
|
187
|
+
it("should return null for non-existent strategy metrics", async () => {
|
|
188
|
+
const metrics = await framework.getStrategyMetrics("non-existent");
|
|
189
|
+
expect(metrics).toBeNull();
|
|
190
|
+
});
|
|
191
|
+
|
|
192
|
+
it("should get all metrics sorted by score", async () => {
|
|
193
|
+
const configA: RagStrategyConfig = {
|
|
194
|
+
name: "strategy-a",
|
|
195
|
+
type: "corrective",
|
|
196
|
+
params: {},
|
|
197
|
+
};
|
|
198
|
+
|
|
199
|
+
const configB: RagStrategyConfig = {
|
|
200
|
+
name: "strategy-b",
|
|
201
|
+
type: "hybrid",
|
|
202
|
+
params: {},
|
|
203
|
+
};
|
|
204
|
+
|
|
205
|
+
framework.registerStrategy(configA);
|
|
206
|
+
framework.registerStrategy(configB);
|
|
207
|
+
|
|
208
|
+
// Strategy A: lower quality
|
|
209
|
+
for (let i = 0; i < 5; i++) {
|
|
210
|
+
await framework.recordFeedback(
|
|
211
|
+
`query ${i}`,
|
|
212
|
+
{ text: `answer ${i}`, citations: [], steps: [], rewrites: 0 },
|
|
213
|
+
3,
|
|
214
|
+
0.6,
|
|
215
|
+
0.5,
|
|
216
|
+
0.7,
|
|
217
|
+
"strategy-a"
|
|
218
|
+
);
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
// Strategy B: higher quality
|
|
222
|
+
for (let i = 0; i < 5; i++) {
|
|
223
|
+
await framework.recordFeedback(
|
|
224
|
+
`query ${i}`,
|
|
225
|
+
{ text: `answer ${i}`, citations: [], steps: [], rewrites: 0 },
|
|
226
|
+
5,
|
|
227
|
+
0.9,
|
|
228
|
+
0.85,
|
|
229
|
+
0.95,
|
|
230
|
+
"strategy-b"
|
|
231
|
+
);
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
const allMetrics = await framework.getAllMetrics();
|
|
235
|
+
expect(allMetrics).toHaveLength(2);
|
|
236
|
+
expect(allMetrics[0]?.strategy).toBe("strategy-b");
|
|
237
|
+
expect(allMetrics[0]?.overallScore).toBeGreaterThan(allMetrics[1]?.overallScore || 0);
|
|
238
|
+
});
|
|
239
|
+
});
|
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
import type { RagAnswer } from "./types.js";
|
|
2
|
+
import type { RagFeedback, RagMetrics, ABTestResult, FeedbackStorage } from "./feedback.js";
|
|
3
|
+
import { InMemoryFeedbackStorage } from "./feedback.js";
|
|
4
|
+
import { MetricsCalculator } from "./metrics.js";
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Configurazione per una strategia RAG
|
|
8
|
+
*/
|
|
9
|
+
export interface RagStrategyConfig {
|
|
10
|
+
name: string;
|
|
11
|
+
type: "corrective" | "hybrid" | "agentic";
|
|
12
|
+
params: Record<string, unknown>;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* A/B Testing framework per confrontare strategie RAG
|
|
17
|
+
*/
|
|
18
|
+
export class ABTestFramework {
|
|
19
|
+
private storage: FeedbackStorage;
|
|
20
|
+
private strategies: Map<string, RagStrategyConfig> = new Map();
|
|
21
|
+
|
|
22
|
+
constructor(storage?: FeedbackStorage) {
|
|
23
|
+
this.storage = storage || new InMemoryFeedbackStorage();
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Registra una strategia per il testing
|
|
28
|
+
*/
|
|
29
|
+
registerStrategy(config: RagStrategyConfig): void {
|
|
30
|
+
this.strategies.set(config.name, config);
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* Seleziona casualmente una strategia per A/B testing
|
|
35
|
+
* Usa weighted random per bilanciare il traffico
|
|
36
|
+
*/
|
|
37
|
+
selectStrategy(): RagStrategyConfig {
|
|
38
|
+
const strategies = Array.from(this.strategies.values());
|
|
39
|
+
if (strategies.length === 0) {
|
|
40
|
+
throw new Error("No strategies registered for A/B testing");
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
// Simple random selection (potrebbe essere esteso con weighted selection)
|
|
44
|
+
const index = Math.floor(Math.random() * strategies.length);
|
|
45
|
+
return strategies[index]!;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Registra feedback per una risposta
|
|
50
|
+
*/
|
|
51
|
+
async recordFeedback(
|
|
52
|
+
query: string,
|
|
53
|
+
answer: RagAnswer,
|
|
54
|
+
rating: number,
|
|
55
|
+
relevance: number,
|
|
56
|
+
completeness: number,
|
|
57
|
+
citationsQuality: number,
|
|
58
|
+
strategy: string,
|
|
59
|
+
comments?: string
|
|
60
|
+
): Promise<void> {
|
|
61
|
+
const feedback: RagFeedback = {
|
|
62
|
+
id: `feedback-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`,
|
|
63
|
+
query,
|
|
64
|
+
answer,
|
|
65
|
+
rating,
|
|
66
|
+
relevance,
|
|
67
|
+
completeness,
|
|
68
|
+
citationsQuality,
|
|
69
|
+
comments,
|
|
70
|
+
timestamp: new Date(),
|
|
71
|
+
strategy,
|
|
72
|
+
};
|
|
73
|
+
|
|
74
|
+
await this.storage.saveFeedback(feedback);
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
/**
|
|
78
|
+
* Esegue A/B test tra due strategie
|
|
79
|
+
*/
|
|
80
|
+
async runABTest(
|
|
81
|
+
strategyA: string,
|
|
82
|
+
strategyB: string,
|
|
83
|
+
minSamples: number = 20
|
|
84
|
+
): Promise<ABTestResult> {
|
|
85
|
+
const feedbacksA = await this.storage.getFeedback(strategyA);
|
|
86
|
+
const feedbacksB = await this.storage.getFeedback(strategyB);
|
|
87
|
+
|
|
88
|
+
if (feedbacksA.length < minSamples || feedbacksB.length < minSamples) {
|
|
89
|
+
throw new Error(
|
|
90
|
+
`Insufficient samples for A/B test. Need at least ${minSamples} per strategy. ` +
|
|
91
|
+
`Got ${feedbacksA.length} for ${strategyA} and ${feedbacksB.length} for ${strategyB}.`
|
|
92
|
+
);
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
const metricsA = MetricsCalculator.calculateMetrics(feedbacksA, strategyA);
|
|
96
|
+
const metricsB = MetricsCalculator.calculateMetrics(feedbacksB, strategyB);
|
|
97
|
+
|
|
98
|
+
// Determina vincitore basato su overallScore
|
|
99
|
+
const diff = metricsB.overallScore - metricsA.overallScore;
|
|
100
|
+
const improvement = Math.abs(diff) / Math.max(metricsA.overallScore, metricsB.overallScore);
|
|
101
|
+
|
|
102
|
+
// Calcola significatività statistica (test t semplificato)
|
|
103
|
+
const statisticalSignificance = this.calculateStatisticalSignificance(
|
|
104
|
+
feedbacksA,
|
|
105
|
+
feedbacksB
|
|
106
|
+
);
|
|
107
|
+
|
|
108
|
+
let winner: "A" | "B" | "tie";
|
|
109
|
+
if (Math.abs(diff) < 0.02 || statisticalSignificance < 0.7) {
|
|
110
|
+
winner = "tie";
|
|
111
|
+
} else {
|
|
112
|
+
winner = diff > 0 ? "B" : "A";
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
const recommendation = this.generateRecommendation(
|
|
116
|
+
winner,
|
|
117
|
+
metricsA,
|
|
118
|
+
metricsB,
|
|
119
|
+
improvement,
|
|
120
|
+
statisticalSignificance
|
|
121
|
+
);
|
|
122
|
+
|
|
123
|
+
return {
|
|
124
|
+
strategyA,
|
|
125
|
+
strategyB,
|
|
126
|
+
winner,
|
|
127
|
+
metricsA,
|
|
128
|
+
metricsB,
|
|
129
|
+
improvement,
|
|
130
|
+
statisticalSignificance,
|
|
131
|
+
recommendation,
|
|
132
|
+
};
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
/**
|
|
136
|
+
* Calcola significatività statistica (test t semplificato)
|
|
137
|
+
*/
|
|
138
|
+
private calculateStatisticalSignificance(
|
|
139
|
+
feedbacksA: RagFeedback[],
|
|
140
|
+
feedbacksB: RagFeedback[]
|
|
141
|
+
): number {
|
|
142
|
+
const meanA = feedbacksA.reduce((sum, f) => sum + f.relevance, 0) / feedbacksA.length;
|
|
143
|
+
const meanB = feedbacksB.reduce((sum, f) => sum + f.relevance, 0) / feedbacksB.length;
|
|
144
|
+
|
|
145
|
+
const stdDevA = MetricsCalculator.calculateStandardDeviation(feedbacksA, "relevance");
|
|
146
|
+
const stdDevB = MetricsCalculator.calculateStandardDeviation(feedbacksB, "relevance");
|
|
147
|
+
|
|
148
|
+
const nA = feedbacksA.length;
|
|
149
|
+
const nB = feedbacksB.length;
|
|
150
|
+
|
|
151
|
+
// Standard error
|
|
152
|
+
const se = Math.sqrt(
|
|
153
|
+
(stdDevA * stdDevA) / nA + (stdDevB * stdDevB) / nB
|
|
154
|
+
);
|
|
155
|
+
|
|
156
|
+
if (se === 0) return 1;
|
|
157
|
+
|
|
158
|
+
// t-statistic
|
|
159
|
+
const t = Math.abs(meanB - meanA) / se;
|
|
160
|
+
|
|
161
|
+
// Approssimazione p-value (semplificata)
|
|
162
|
+
// t > 2.0 ≈ p < 0.05 (significativo)
|
|
163
|
+
// t > 2.6 ≈ p < 0.01 (molto significativo)
|
|
164
|
+
const significance = Math.min(1, t / 3);
|
|
165
|
+
|
|
166
|
+
return significance;
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
/**
|
|
170
|
+
* Genera raccomandazione basata sui risultati del test
|
|
171
|
+
*/
|
|
172
|
+
private generateRecommendation(
|
|
173
|
+
winner: "A" | "B" | "tie",
|
|
174
|
+
metricsA: RagMetrics,
|
|
175
|
+
metricsB: RagMetrics,
|
|
176
|
+
improvement: number,
|
|
177
|
+
significance: number
|
|
178
|
+
): string {
|
|
179
|
+
if (winner === "tie") {
|
|
180
|
+
return `Le strategie sono equivalenti (differenza: ${(improvement * 100).toFixed(1)}%, significatività: ${(significance * 100).toFixed(0)}%). ` +
|
|
181
|
+
`Continua il testing per raccogliere più dati.`;
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
const winnerMetrics = winner === "A" ? metricsA : metricsB;
|
|
185
|
+
const loserMetrics = winner === "A" ? metricsB : metricsA;
|
|
186
|
+
|
|
187
|
+
return `Strategia ${winner} vince con ${(improvement * 100).toFixed(1)}% di miglioramento ` +
|
|
188
|
+
`(significatività: ${(significance * 100).toFixed(0)}%). ` +
|
|
189
|
+
`Score: ${winnerMetrics.overallScore.toFixed(3)} vs ${loserMetrics.overallScore.toFixed(3)}. ` +
|
|
190
|
+
`Raccomandazione: adotta la strategia ${winner} come default.`;
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
/**
|
|
194
|
+
* Ottiene metriche per una strategia specifica
|
|
195
|
+
*/
|
|
196
|
+
async getStrategyMetrics(strategy: string): Promise<RagMetrics | null> {
|
|
197
|
+
const feedbacks = await this.storage.getFeedback(strategy);
|
|
198
|
+
if (feedbacks.length === 0) return null;
|
|
199
|
+
|
|
200
|
+
return MetricsCalculator.calculateMetrics(feedbacks, strategy);
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
/**
|
|
204
|
+
* Ottiene tutte le metriche delle strategie registrate
|
|
205
|
+
*/
|
|
206
|
+
async getAllMetrics(): Promise<RagMetrics[]> {
|
|
207
|
+
const metrics: RagMetrics[] = [];
|
|
208
|
+
for (const strategyName of this.strategies.keys()) {
|
|
209
|
+
const m = await this.getStrategyMetrics(strategyName);
|
|
210
|
+
if (m) metrics.push(m);
|
|
211
|
+
}
|
|
212
|
+
return metrics.sort((a, b) => b.overallScore - a.overallScore);
|
|
213
|
+
}
|
|
214
|
+
}
|
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
import { describe, it, expect } from "vitest";
|
|
2
|
+
import { AgenticRag } from "./agentic-rag.js";
|
|
3
|
+
import type { SearchHit } from "@eucode/indexer";
|
|
4
|
+
import type { Retriever, Grader, QueryRewriter } from "./types.js";
|
|
5
|
+
|
|
6
|
+
class MockRetriever implements Retriever {
|
|
7
|
+
private callCount = 0;
|
|
8
|
+
|
|
9
|
+
async search(_query: string, topK: number): Promise<SearchHit[]> {
|
|
10
|
+
this.callCount++;
|
|
11
|
+
|
|
12
|
+
// First call returns poor results, second call returns good results
|
|
13
|
+
if (this.callCount === 1) {
|
|
14
|
+
return [
|
|
15
|
+
{
|
|
16
|
+
id: "hit-1",
|
|
17
|
+
path: "src/unrelated.ts",
|
|
18
|
+
startLine: 1,
|
|
19
|
+
endLine: 10,
|
|
20
|
+
text: "Unrelated content",
|
|
21
|
+
score: 0.3,
|
|
22
|
+
},
|
|
23
|
+
].slice(0, topK);
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
return [
|
|
27
|
+
{
|
|
28
|
+
id: "hit-2",
|
|
29
|
+
path: "src/auth.ts",
|
|
30
|
+
startLine: 10,
|
|
31
|
+
endLine: 20,
|
|
32
|
+
text: "Authentication implementation",
|
|
33
|
+
score: 0.9,
|
|
34
|
+
},
|
|
35
|
+
{
|
|
36
|
+
id: "hit-3",
|
|
37
|
+
path: "src/login.ts",
|
|
38
|
+
startLine: 5,
|
|
39
|
+
endLine: 15,
|
|
40
|
+
text: "Login logic",
|
|
41
|
+
score: 0.85,
|
|
42
|
+
},
|
|
43
|
+
].slice(0, topK);
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
class MockGrader implements Grader {
|
|
48
|
+
async grade(_query: string, hit: SearchHit) {
|
|
49
|
+
return {
|
|
50
|
+
relevant: hit.score > 0.5,
|
|
51
|
+
score: hit.score,
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
class MockRewriter implements QueryRewriter {
|
|
57
|
+
async rewrite(query: string, _context: string): Promise<string> {
|
|
58
|
+
return `${query} (improved)`;
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
describe("AgenticRag", () => {
|
|
63
|
+
it("should perform iterative search with reasoning", async () => {
|
|
64
|
+
const agentic = new AgenticRag({
|
|
65
|
+
retriever: new MockRetriever(),
|
|
66
|
+
grader: new MockGrader(),
|
|
67
|
+
rewriter: new MockRewriter(),
|
|
68
|
+
maxIterations: 3,
|
|
69
|
+
enableSelfReflection: true,
|
|
70
|
+
enableChainOfThought: true,
|
|
71
|
+
});
|
|
72
|
+
|
|
73
|
+
const result = await agentic.search("authentication");
|
|
74
|
+
|
|
75
|
+
expect(result.iterations).toBeGreaterThan(0);
|
|
76
|
+
expect(result.reasoningSteps.length).toBeGreaterThan(0);
|
|
77
|
+
expect(result.answer).toContain("Processo di Ragionamento");
|
|
78
|
+
expect(result.citations.length).toBeGreaterThan(0);
|
|
79
|
+
});
|
|
80
|
+
|
|
81
|
+
it("should stop early when results are good enough", async () => {
|
|
82
|
+
const goodRetriever: Retriever = {
|
|
83
|
+
async search(_query: string, topK: number): Promise<SearchHit[]> {
|
|
84
|
+
return [
|
|
85
|
+
{
|
|
86
|
+
id: "hit-1",
|
|
87
|
+
path: "src/auth.ts",
|
|
88
|
+
startLine: 10,
|
|
89
|
+
endLine: 20,
|
|
90
|
+
text: "Authentication implementation",
|
|
91
|
+
score: 0.9,
|
|
92
|
+
},
|
|
93
|
+
{
|
|
94
|
+
id: "hit-2",
|
|
95
|
+
path: "src/login.ts",
|
|
96
|
+
startLine: 5,
|
|
97
|
+
endLine: 15,
|
|
98
|
+
text: "Login logic",
|
|
99
|
+
score: 0.85,
|
|
100
|
+
},
|
|
101
|
+
{
|
|
102
|
+
id: "hit-3",
|
|
103
|
+
path: "src/session.ts",
|
|
104
|
+
startLine: 1,
|
|
105
|
+
endLine: 10,
|
|
106
|
+
text: "Session management",
|
|
107
|
+
score: 0.8,
|
|
108
|
+
},
|
|
109
|
+
].slice(0, topK);
|
|
110
|
+
},
|
|
111
|
+
};
|
|
112
|
+
|
|
113
|
+
const agentic = new AgenticRag({
|
|
114
|
+
retriever: goodRetriever,
|
|
115
|
+
grader: new MockGrader(),
|
|
116
|
+
rewriter: new MockRewriter(),
|
|
117
|
+
maxIterations: 5,
|
|
118
|
+
enableSelfReflection: true,
|
|
119
|
+
});
|
|
120
|
+
|
|
121
|
+
const result = await agentic.search("authentication");
|
|
122
|
+
|
|
123
|
+
// Should stop after 1 iteration because results are good
|
|
124
|
+
expect(result.iterations).toBe(1);
|
|
125
|
+
expect(result.confidence).toBeGreaterThan(0.7);
|
|
126
|
+
});
|
|
127
|
+
|
|
128
|
+
it("should rewrite query when results are poor", async () => {
|
|
129
|
+
const poorRetriever: Retriever = {
|
|
130
|
+
async search(_query: string, topK: number): Promise<SearchHit[]> {
|
|
131
|
+
return [
|
|
132
|
+
{
|
|
133
|
+
id: "hit-1",
|
|
134
|
+
path: "src/unrelated.ts",
|
|
135
|
+
startLine: 1,
|
|
136
|
+
endLine: 10,
|
|
137
|
+
text: "Unrelated content",
|
|
138
|
+
score: 0.2,
|
|
139
|
+
},
|
|
140
|
+
].slice(0, topK);
|
|
141
|
+
},
|
|
142
|
+
};
|
|
143
|
+
|
|
144
|
+
const agentic = new AgenticRag({
|
|
145
|
+
retriever: poorRetriever,
|
|
146
|
+
grader: new MockGrader(),
|
|
147
|
+
rewriter: new MockRewriter(),
|
|
148
|
+
maxIterations: 3,
|
|
149
|
+
enableSelfReflection: true,
|
|
150
|
+
});
|
|
151
|
+
|
|
152
|
+
const result = await agentic.search("authentication");
|
|
153
|
+
|
|
154
|
+
// Should iterate multiple times trying to improve
|
|
155
|
+
expect(result.iterations).toBeGreaterThan(1);
|
|
156
|
+
expect(result.confidence).toBeLessThan(0.5);
|
|
157
|
+
});
|
|
158
|
+
|
|
159
|
+
it("should include self-reflection when enabled", async () => {
|
|
160
|
+
const agentic = new AgenticRag({
|
|
161
|
+
retriever: new MockRetriever(),
|
|
162
|
+
grader: new MockGrader(),
|
|
163
|
+
rewriter: new MockRewriter(),
|
|
164
|
+
maxIterations: 2,
|
|
165
|
+
enableSelfReflection: true,
|
|
166
|
+
});
|
|
167
|
+
|
|
168
|
+
const result = await agentic.search("authentication");
|
|
169
|
+
|
|
170
|
+
expect(result.selfReflection).toBeDefined();
|
|
171
|
+
expect(result.selfReflection).toContain("Valutazione Finale");
|
|
172
|
+
});
|
|
173
|
+
|
|
174
|
+
it("should skip self-reflection when disabled", async () => {
|
|
175
|
+
const agentic = new AgenticRag({
|
|
176
|
+
retriever: new MockRetriever(),
|
|
177
|
+
grader: new MockGrader(),
|
|
178
|
+
rewriter: new MockRewriter(),
|
|
179
|
+
maxIterations: 2,
|
|
180
|
+
enableSelfReflection: false,
|
|
181
|
+
});
|
|
182
|
+
|
|
183
|
+
const result = await agentic.search("authentication");
|
|
184
|
+
|
|
185
|
+
expect(result.selfReflection).toBeUndefined();
|
|
186
|
+
});
|
|
187
|
+
|
|
188
|
+
it("should skip chain of thought when disabled", async () => {
|
|
189
|
+
const agentic = new AgenticRag({
|
|
190
|
+
retriever: new MockRetriever(),
|
|
191
|
+
grader: new MockGrader(),
|
|
192
|
+
rewriter: new MockRewriter(),
|
|
193
|
+
maxIterations: 2,
|
|
194
|
+
enableChainOfThought: false,
|
|
195
|
+
});
|
|
196
|
+
|
|
197
|
+
const result = await agentic.search("authentication");
|
|
198
|
+
|
|
199
|
+
expect(result.answer).not.toContain("Processo di Ragionamento");
|
|
200
|
+
});
|
|
201
|
+
});
|