magector 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +627 -0
- package/config/mcp-config.json +13 -0
- package/package.json +53 -0
- package/src/binary.js +66 -0
- package/src/cli.js +203 -0
- package/src/init.js +293 -0
- package/src/magento-patterns.js +563 -0
- package/src/mcp-server.js +915 -0
- package/src/model.js +127 -0
- package/src/templates/claude-md.js +47 -0
- package/src/templates/cursorrules.js +45 -0
- package/src/validation/accuracy-calculator.js +397 -0
- package/src/validation/benchmark.js +355 -0
- package/src/validation/test-data-generator.js +672 -0
- package/src/validation/test-queries.js +326 -0
- package/src/validation/validator.js +302 -0
|
@@ -0,0 +1,326 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Test queries with expected results for accuracy validation
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
export const TEST_QUERIES = [
|
|
6
|
+
// Controller queries
|
|
7
|
+
{
|
|
8
|
+
id: 'ctrl-1',
|
|
9
|
+
query: 'controller execute action',
|
|
10
|
+
type: 'semantic',
|
|
11
|
+
expectedTypes: ['Controller'],
|
|
12
|
+
expectedPatterns: ['controller'],
|
|
13
|
+
minResults: 1,
|
|
14
|
+
category: 'controller'
|
|
15
|
+
},
|
|
16
|
+
{
|
|
17
|
+
id: 'ctrl-2',
|
|
18
|
+
query: 'Index controller',
|
|
19
|
+
type: 'exact',
|
|
20
|
+
expectedClasses: ['Index'],
|
|
21
|
+
expectedTypes: ['Controller'],
|
|
22
|
+
category: 'controller'
|
|
23
|
+
},
|
|
24
|
+
|
|
25
|
+
// Model queries
|
|
26
|
+
{
|
|
27
|
+
id: 'model-1',
|
|
28
|
+
query: 'model beforeSave afterLoad',
|
|
29
|
+
type: 'semantic',
|
|
30
|
+
expectedTypes: ['Model'],
|
|
31
|
+
expectedPatterns: ['model'],
|
|
32
|
+
category: 'model'
|
|
33
|
+
},
|
|
34
|
+
{
|
|
35
|
+
id: 'model-2',
|
|
36
|
+
query: 'AbstractModel extends',
|
|
37
|
+
type: 'semantic',
|
|
38
|
+
expectedInContent: ['extends AbstractModel'],
|
|
39
|
+
category: 'model'
|
|
40
|
+
},
|
|
41
|
+
|
|
42
|
+
// Repository queries
|
|
43
|
+
{
|
|
44
|
+
id: 'repo-1',
|
|
45
|
+
query: 'repository getById save delete',
|
|
46
|
+
type: 'semantic',
|
|
47
|
+
expectedTypes: ['Repository'],
|
|
48
|
+
expectedPatterns: ['repository'],
|
|
49
|
+
category: 'repository'
|
|
50
|
+
},
|
|
51
|
+
{
|
|
52
|
+
id: 'repo-2',
|
|
53
|
+
query: 'ItemRepository',
|
|
54
|
+
type: 'exact',
|
|
55
|
+
expectedClasses: ['ItemRepository'],
|
|
56
|
+
category: 'repository'
|
|
57
|
+
},
|
|
58
|
+
{
|
|
59
|
+
id: 'repo-3',
|
|
60
|
+
query: 'getList SearchCriteria',
|
|
61
|
+
type: 'semantic',
|
|
62
|
+
expectedInContent: ['getList', 'SearchCriteria'],
|
|
63
|
+
category: 'repository'
|
|
64
|
+
},
|
|
65
|
+
|
|
66
|
+
// Plugin queries
|
|
67
|
+
{
|
|
68
|
+
id: 'plugin-1',
|
|
69
|
+
query: 'plugin interceptor before after around',
|
|
70
|
+
type: 'semantic',
|
|
71
|
+
expectedTypes: ['Plugin'],
|
|
72
|
+
expectedPatterns: ['plugin'],
|
|
73
|
+
category: 'plugin'
|
|
74
|
+
},
|
|
75
|
+
{
|
|
76
|
+
id: 'plugin-2',
|
|
77
|
+
query: 'beforeGetPrice plugin',
|
|
78
|
+
type: 'semantic',
|
|
79
|
+
expectedInContent: ['beforeGetPrice', 'before'],
|
|
80
|
+
category: 'plugin'
|
|
81
|
+
},
|
|
82
|
+
{
|
|
83
|
+
id: 'plugin-3',
|
|
84
|
+
query: 'around method interceptor',
|
|
85
|
+
type: 'semantic',
|
|
86
|
+
expectedInContent: ['around', 'proceed'],
|
|
87
|
+
category: 'plugin'
|
|
88
|
+
},
|
|
89
|
+
|
|
90
|
+
// Observer queries
|
|
91
|
+
{
|
|
92
|
+
id: 'obs-1',
|
|
93
|
+
query: 'observer execute event',
|
|
94
|
+
type: 'semantic',
|
|
95
|
+
expectedTypes: ['Observer'],
|
|
96
|
+
expectedPatterns: ['observer'],
|
|
97
|
+
category: 'observer'
|
|
98
|
+
},
|
|
99
|
+
{
|
|
100
|
+
id: 'obs-2',
|
|
101
|
+
query: 'catalog_product_save_after observer',
|
|
102
|
+
type: 'semantic',
|
|
103
|
+
expectedInContent: ['catalog_product_save_after'],
|
|
104
|
+
category: 'observer'
|
|
105
|
+
},
|
|
106
|
+
{
|
|
107
|
+
id: 'obs-3',
|
|
108
|
+
query: 'ObserverInterface implement',
|
|
109
|
+
type: 'semantic',
|
|
110
|
+
expectedInContent: ['ObserverInterface'],
|
|
111
|
+
category: 'observer'
|
|
112
|
+
},
|
|
113
|
+
|
|
114
|
+
// Block queries
|
|
115
|
+
{
|
|
116
|
+
id: 'block-1',
|
|
117
|
+
query: 'block template phtml',
|
|
118
|
+
type: 'semantic',
|
|
119
|
+
expectedTypes: ['Block'],
|
|
120
|
+
expectedPatterns: ['block'],
|
|
121
|
+
category: 'block'
|
|
122
|
+
},
|
|
123
|
+
{
|
|
124
|
+
id: 'block-2',
|
|
125
|
+
query: '_toHtml _prepareLayout',
|
|
126
|
+
type: 'semantic',
|
|
127
|
+
expectedInContent: ['_toHtml', '_prepareLayout'],
|
|
128
|
+
category: 'block'
|
|
129
|
+
},
|
|
130
|
+
|
|
131
|
+
// DI.xml queries
|
|
132
|
+
{
|
|
133
|
+
id: 'di-1',
|
|
134
|
+
query: 'preference interface implementation',
|
|
135
|
+
type: 'semantic',
|
|
136
|
+
expectedFileTypes: ['xml'],
|
|
137
|
+
expectedInContent: ['preference'],
|
|
138
|
+
category: 'di'
|
|
139
|
+
},
|
|
140
|
+
{
|
|
141
|
+
id: 'di-2',
|
|
142
|
+
query: 'plugin configuration di.xml',
|
|
143
|
+
type: 'semantic',
|
|
144
|
+
expectedFileTypes: ['xml'],
|
|
145
|
+
expectedInContent: ['plugin'],
|
|
146
|
+
category: 'di'
|
|
147
|
+
},
|
|
148
|
+
{
|
|
149
|
+
id: 'di-3',
|
|
150
|
+
query: 'virtualType argument',
|
|
151
|
+
type: 'semantic',
|
|
152
|
+
expectedInContent: ['virtualType'],
|
|
153
|
+
category: 'di'
|
|
154
|
+
},
|
|
155
|
+
|
|
156
|
+
// Events.xml queries
|
|
157
|
+
{
|
|
158
|
+
id: 'event-1',
|
|
159
|
+
query: 'event observer configuration',
|
|
160
|
+
type: 'semantic',
|
|
161
|
+
expectedFileTypes: ['xml'],
|
|
162
|
+
expectedInContent: ['event', 'observer'],
|
|
163
|
+
category: 'events'
|
|
164
|
+
},
|
|
165
|
+
{
|
|
166
|
+
id: 'event-2',
|
|
167
|
+
query: 'catalog_product_save_after event config',
|
|
168
|
+
type: 'exact',
|
|
169
|
+
expectedInContent: ['catalog_product_save_after'],
|
|
170
|
+
category: 'events'
|
|
171
|
+
},
|
|
172
|
+
|
|
173
|
+
// Web API queries
|
|
174
|
+
{
|
|
175
|
+
id: 'api-1',
|
|
176
|
+
query: 'REST API endpoint route',
|
|
177
|
+
type: 'semantic',
|
|
178
|
+
expectedFileTypes: ['xml'],
|
|
179
|
+
expectedInContent: ['route', 'service'],
|
|
180
|
+
category: 'webapi'
|
|
181
|
+
},
|
|
182
|
+
{
|
|
183
|
+
id: 'api-2',
|
|
184
|
+
query: 'GET /V1/items',
|
|
185
|
+
type: 'semantic',
|
|
186
|
+
expectedInContent: ['GET', '/V1/'],
|
|
187
|
+
category: 'webapi'
|
|
188
|
+
},
|
|
189
|
+
|
|
190
|
+
// GraphQL queries
|
|
191
|
+
{
|
|
192
|
+
id: 'gql-1',
|
|
193
|
+
query: 'GraphQL resolver',
|
|
194
|
+
type: 'semantic',
|
|
195
|
+
expectedTypes: ['GraphQlResolver'],
|
|
196
|
+
expectedPatterns: ['graphql_resolver'],
|
|
197
|
+
category: 'graphql'
|
|
198
|
+
},
|
|
199
|
+
{
|
|
200
|
+
id: 'gql-2',
|
|
201
|
+
query: 'type Query mutation',
|
|
202
|
+
type: 'semantic',
|
|
203
|
+
expectedFileTypes: ['graphql'],
|
|
204
|
+
expectedInContent: ['type', 'Query'],
|
|
205
|
+
category: 'graphql'
|
|
206
|
+
},
|
|
207
|
+
{
|
|
208
|
+
id: 'gql-3',
|
|
209
|
+
query: 'ResolverInterface resolve Field',
|
|
210
|
+
type: 'semantic',
|
|
211
|
+
expectedInContent: ['ResolverInterface', 'resolve'],
|
|
212
|
+
category: 'graphql'
|
|
213
|
+
},
|
|
214
|
+
|
|
215
|
+
// Cron queries
|
|
216
|
+
{
|
|
217
|
+
id: 'cron-1',
|
|
218
|
+
query: 'cron job schedule execute',
|
|
219
|
+
type: 'semantic',
|
|
220
|
+
expectedTypes: ['Cron'],
|
|
221
|
+
category: 'cron'
|
|
222
|
+
},
|
|
223
|
+
{
|
|
224
|
+
id: 'cron-2',
|
|
225
|
+
query: 'crontab.xml job instance',
|
|
226
|
+
type: 'semantic',
|
|
227
|
+
expectedFileTypes: ['xml'],
|
|
228
|
+
expectedInContent: ['job', 'instance'],
|
|
229
|
+
category: 'cron'
|
|
230
|
+
},
|
|
231
|
+
|
|
232
|
+
// Cross-cutting queries
|
|
233
|
+
{
|
|
234
|
+
id: 'cross-1',
|
|
235
|
+
query: 'dependency injection constructor',
|
|
236
|
+
type: 'semantic',
|
|
237
|
+
expectedInContent: ['__construct'],
|
|
238
|
+
minResults: 3,
|
|
239
|
+
category: 'di_pattern'
|
|
240
|
+
},
|
|
241
|
+
{
|
|
242
|
+
id: 'cross-2',
|
|
243
|
+
query: 'LoggerInterface logging',
|
|
244
|
+
type: 'semantic',
|
|
245
|
+
expectedInContent: ['LoggerInterface', 'logger'],
|
|
246
|
+
category: 'logging'
|
|
247
|
+
},
|
|
248
|
+
{
|
|
249
|
+
id: 'cross-3',
|
|
250
|
+
query: 'exception handling try catch',
|
|
251
|
+
type: 'semantic',
|
|
252
|
+
expectedInContent: ['Exception', 'throw'],
|
|
253
|
+
category: 'error_handling'
|
|
254
|
+
},
|
|
255
|
+
|
|
256
|
+
// Module-specific queries
|
|
257
|
+
{
|
|
258
|
+
id: 'mod-1',
|
|
259
|
+
query: 'Acme_Catalog module',
|
|
260
|
+
type: 'module',
|
|
261
|
+
expectedModule: 'Acme_Catalog',
|
|
262
|
+
minResults: 3,
|
|
263
|
+
category: 'module'
|
|
264
|
+
},
|
|
265
|
+
|
|
266
|
+
// Method-specific queries
|
|
267
|
+
{
|
|
268
|
+
id: 'method-1',
|
|
269
|
+
query: 'function getById',
|
|
270
|
+
type: 'method',
|
|
271
|
+
expectedMethods: ['getById'],
|
|
272
|
+
category: 'method'
|
|
273
|
+
},
|
|
274
|
+
{
|
|
275
|
+
id: 'method-2',
|
|
276
|
+
query: 'save method repository',
|
|
277
|
+
type: 'method',
|
|
278
|
+
expectedMethods: ['save'],
|
|
279
|
+
category: 'method'
|
|
280
|
+
}
|
|
281
|
+
];
|
|
282
|
+
|
|
283
|
+
export const QUERY_CATEGORIES = {
|
|
284
|
+
controller: { weight: 1.0, description: 'Controller action detection' },
|
|
285
|
+
model: { weight: 1.0, description: 'Model and lifecycle hooks' },
|
|
286
|
+
repository: { weight: 1.2, description: 'Repository pattern detection' },
|
|
287
|
+
plugin: { weight: 1.3, description: 'Plugin/interceptor detection' },
|
|
288
|
+
observer: { weight: 1.2, description: 'Observer pattern detection' },
|
|
289
|
+
block: { weight: 1.0, description: 'Block class detection' },
|
|
290
|
+
di: { weight: 1.2, description: 'DI configuration detection' },
|
|
291
|
+
events: { weight: 1.1, description: 'Event configuration detection' },
|
|
292
|
+
webapi: { weight: 1.2, description: 'Web API route detection' },
|
|
293
|
+
graphql: { weight: 1.3, description: 'GraphQL schema/resolver detection' },
|
|
294
|
+
cron: { weight: 1.0, description: 'Cron job detection' },
|
|
295
|
+
di_pattern: { weight: 0.8, description: 'DI pattern recognition' },
|
|
296
|
+
logging: { weight: 0.7, description: 'Logging pattern recognition' },
|
|
297
|
+
error_handling: { weight: 0.7, description: 'Error handling patterns' },
|
|
298
|
+
module: { weight: 1.0, description: 'Module filtering' },
|
|
299
|
+
method: { weight: 1.0, description: 'Method search' }
|
|
300
|
+
};
|
|
301
|
+
|
|
302
|
+
/**
|
|
303
|
+
* Generate edge case queries for stress testing
|
|
304
|
+
*/
|
|
305
|
+
export function generateEdgeCaseQueries() {
|
|
306
|
+
return [
|
|
307
|
+
// Very short queries
|
|
308
|
+
{ id: 'edge-1', query: 'save', type: 'short', category: 'edge_short', minResults: 1, expectedMethods: ['save'] },
|
|
309
|
+
{ id: 'edge-2', query: 'get', type: 'short', category: 'edge_short', minResults: 1 },
|
|
310
|
+
|
|
311
|
+
// Very long queries
|
|
312
|
+
{ id: 'edge-3', query: 'public function execute action controller', type: 'long', category: 'edge_long', minResults: 1, expectedPatterns: ['controller'] },
|
|
313
|
+
|
|
314
|
+
// Technical jargon
|
|
315
|
+
{ id: 'edge-4', query: 'CRUD operations repository interface', type: 'jargon', category: 'edge_jargon', minResults: 1, expectedTypes: ['Repository'] },
|
|
316
|
+
|
|
317
|
+
// Magento-specific terms - lower expectations
|
|
318
|
+
{ id: 'edge-5', query: 'service contract API', type: 'magento_specific', category: 'edge_magento', minResults: 1 },
|
|
319
|
+
|
|
320
|
+
// Negative queries (should return few/no results)
|
|
321
|
+
{ id: 'edge-6', query: 'wordpress drupal laravel', type: 'negative', category: 'edge_negative', maxResults: 2 },
|
|
322
|
+
|
|
323
|
+
// Mixed case
|
|
324
|
+
{ id: 'edge-7', query: 'REPOSITORY getbyid SAVE', type: 'case', category: 'edge_case', minResults: 1, expectedTypes: ['Repository'] }
|
|
325
|
+
];
|
|
326
|
+
}
|
|
@@ -0,0 +1,302 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Main validation runner
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { MagentoIndexer } from '../indexer.js';
|
|
7
|
+
import { generateCompleteMockModule, MOCK_MODULES } from './test-data-generator.js';
|
|
8
|
+
import { TEST_QUERIES, QUERY_CATEGORIES, generateEdgeCaseQueries } from './test-queries.js';
|
|
9
|
+
import {
|
|
10
|
+
calculatePrecision,
|
|
11
|
+
calculateRecall,
|
|
12
|
+
calculateF1,
|
|
13
|
+
calculateMRR,
|
|
14
|
+
calculateNDCG,
|
|
15
|
+
aggregateMetrics,
|
|
16
|
+
gradeAccuracy,
|
|
17
|
+
generateReport,
|
|
18
|
+
isResultRelevant
|
|
19
|
+
} from './accuracy-calculator.js';
|
|
20
|
+
import { writeFile, mkdir, rm } from 'fs/promises';
|
|
21
|
+
import { existsSync } from 'fs';
|
|
22
|
+
import path from 'path';
|
|
23
|
+
|
|
24
|
+
const VALIDATION_DIR = '/private/tmp/magector-validation';
|
|
25
|
+
const RESULTS_DIR = './validation-results';
|
|
26
|
+
|
|
27
|
+
export class MagectorValidator {
|
|
28
|
+
constructor(options = {}) {
|
|
29
|
+
this.indexer = null;
|
|
30
|
+
this.testData = [];
|
|
31
|
+
this.results = [];
|
|
32
|
+
this.verbose = options.verbose || false;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
async setup() {
|
|
36
|
+
console.log('Setting up validation environment...\n');
|
|
37
|
+
|
|
38
|
+
// Clean up previous test data
|
|
39
|
+
if (existsSync(VALIDATION_DIR)) {
|
|
40
|
+
await rm(VALIDATION_DIR, { recursive: true });
|
|
41
|
+
}
|
|
42
|
+
await mkdir(VALIDATION_DIR, { recursive: true });
|
|
43
|
+
await mkdir(`${VALIDATION_DIR}/app/code`, { recursive: true });
|
|
44
|
+
|
|
45
|
+
// Generate mock modules
|
|
46
|
+
console.log('Generating mock Magento modules...');
|
|
47
|
+
for (const moduleName of MOCK_MODULES) {
|
|
48
|
+
const files = generateCompleteMockModule(moduleName);
|
|
49
|
+
this.testData.push(...files);
|
|
50
|
+
|
|
51
|
+
for (const file of files) {
|
|
52
|
+
const filePath = path.join(VALIDATION_DIR, file.path);
|
|
53
|
+
const dir = path.dirname(filePath);
|
|
54
|
+
await mkdir(dir, { recursive: true });
|
|
55
|
+
await writeFile(filePath, file.content);
|
|
56
|
+
}
|
|
57
|
+
console.log(` ✓ Generated ${moduleName} (${files.length} files)`);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
console.log(`\nTotal test files: ${this.testData.length}`);
|
|
61
|
+
|
|
62
|
+
// Initialize indexer
|
|
63
|
+
console.log('\nInitializing indexer with GNN...');
|
|
64
|
+
this.indexer = new MagentoIndexer({
|
|
65
|
+
dbPath: `${VALIDATION_DIR}/magector-test.db`,
|
|
66
|
+
graphPath: `${VALIDATION_DIR}/magector-test-graph.json`,
|
|
67
|
+
magentoRoot: VALIDATION_DIR,
|
|
68
|
+
enableGNN: true
|
|
69
|
+
});
|
|
70
|
+
await this.indexer.init();
|
|
71
|
+
|
|
72
|
+
// Index the test data
|
|
73
|
+
console.log('Indexing test data...');
|
|
74
|
+
const stats = await this.indexer.indexDirectory(VALIDATION_DIR);
|
|
75
|
+
console.log(` Indexed: ${stats.indexed} files`);
|
|
76
|
+
console.log(` Graph nodes: ${stats.graphNodes}`);
|
|
77
|
+
console.log(` Graph edges: ${stats.graphEdges}\n`);
|
|
78
|
+
|
|
79
|
+
return stats;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
async runQuery(testQuery) {
|
|
83
|
+
const startTime = Date.now();
|
|
84
|
+
|
|
85
|
+
try {
|
|
86
|
+
const results = await this.indexer.searchWithGraph(testQuery.query, { limit: 10 });
|
|
87
|
+
const duration = Date.now() - startTime;
|
|
88
|
+
|
|
89
|
+
// Calculate expected count based on test data
|
|
90
|
+
const expectedCount = this.testData.filter(td =>
|
|
91
|
+
isResultRelevant({ ...td.metadata, content: td.content }, testQuery)
|
|
92
|
+
).length;
|
|
93
|
+
|
|
94
|
+
const precision = calculatePrecision(results, testQuery);
|
|
95
|
+
const recall = calculateRecall(results, testQuery, Math.max(expectedCount, testQuery.minResults || 1));
|
|
96
|
+
const f1 = calculateF1(precision, recall);
|
|
97
|
+
const mrr = calculateMRR(results, testQuery);
|
|
98
|
+
const ndcg = calculateNDCG(results, testQuery);
|
|
99
|
+
|
|
100
|
+
// Determine pass/fail
|
|
101
|
+
let passed = true;
|
|
102
|
+
let failReason = '';
|
|
103
|
+
|
|
104
|
+
if (testQuery.minResults && results.length < testQuery.minResults) {
|
|
105
|
+
passed = false;
|
|
106
|
+
failReason = `Expected min ${testQuery.minResults} results, got ${results.length}`;
|
|
107
|
+
} else if (testQuery.maxResults !== undefined && results.length > testQuery.maxResults) {
|
|
108
|
+
passed = false;
|
|
109
|
+
failReason = `Expected max ${testQuery.maxResults} results, got ${results.length}`;
|
|
110
|
+
} else if (precision < 0.3) {
|
|
111
|
+
passed = false;
|
|
112
|
+
failReason = `Precision too low: ${(precision * 100).toFixed(1)}%`;
|
|
113
|
+
} else if (results.length > 0 && mrr === 0) {
|
|
114
|
+
passed = false;
|
|
115
|
+
failReason = 'No relevant results in top 10';
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
return {
|
|
119
|
+
queryId: testQuery.id,
|
|
120
|
+
query: testQuery.query,
|
|
121
|
+
category: testQuery.category || 'unknown',
|
|
122
|
+
resultCount: results.length,
|
|
123
|
+
expectedCount,
|
|
124
|
+
precision,
|
|
125
|
+
recall,
|
|
126
|
+
f1,
|
|
127
|
+
mrr,
|
|
128
|
+
ndcg,
|
|
129
|
+
duration,
|
|
130
|
+
passed,
|
|
131
|
+
failReason,
|
|
132
|
+
topResults: results.slice(0, 3).map(r => ({
|
|
133
|
+
path: r.path,
|
|
134
|
+
type: r.magentoType || r.type,
|
|
135
|
+
score: r.score,
|
|
136
|
+
relevant: isResultRelevant(r, testQuery)
|
|
137
|
+
}))
|
|
138
|
+
};
|
|
139
|
+
} catch (error) {
|
|
140
|
+
return {
|
|
141
|
+
queryId: testQuery.id,
|
|
142
|
+
query: testQuery.query,
|
|
143
|
+
category: testQuery.category || 'unknown',
|
|
144
|
+
passed: false,
|
|
145
|
+
failReason: `Error: ${error.message}`,
|
|
146
|
+
precision: 0,
|
|
147
|
+
recall: 0,
|
|
148
|
+
f1: 0,
|
|
149
|
+
mrr: 0,
|
|
150
|
+
ndcg: 0,
|
|
151
|
+
duration: Date.now() - startTime
|
|
152
|
+
};
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
async runAllQueries() {
|
|
157
|
+
console.log('Running validation queries...\n');
|
|
158
|
+
const allQueries = [...TEST_QUERIES, ...generateEdgeCaseQueries()];
|
|
159
|
+
|
|
160
|
+
let completed = 0;
|
|
161
|
+
for (const query of allQueries) {
|
|
162
|
+
const result = await this.runQuery(query);
|
|
163
|
+
this.results.push(result);
|
|
164
|
+
completed++;
|
|
165
|
+
|
|
166
|
+
if (this.verbose) {
|
|
167
|
+
const status = result.passed ? '✓' : '✗';
|
|
168
|
+
console.log(` ${status} [${query.id}] ${query.query.substring(0, 40).padEnd(40)} F1: ${(result.f1 * 100).toFixed(1).padStart(5)}%`);
|
|
169
|
+
} else if (completed % 10 === 0) {
|
|
170
|
+
process.stdout.write(` Progress: ${completed}/${allQueries.length}\r`);
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
console.log(`\nCompleted ${completed} queries.`);
|
|
175
|
+
return this.results;
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
async runCategoryBenchmark(category) {
|
|
179
|
+
const queries = TEST_QUERIES.filter(q => q.category === category);
|
|
180
|
+
const results = [];
|
|
181
|
+
|
|
182
|
+
for (const query of queries) {
|
|
183
|
+
results.push(await this.runQuery(query));
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
return aggregateMetrics(results);
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
async runLatencyBenchmark(iterations = 100) {
|
|
190
|
+
console.log(`\nRunning latency benchmark (${iterations} iterations)...`);
|
|
191
|
+
|
|
192
|
+
const latencies = [];
|
|
193
|
+
const sampleQueries = TEST_QUERIES.slice(0, 10);
|
|
194
|
+
|
|
195
|
+
for (let i = 0; i < iterations; i++) {
|
|
196
|
+
const query = sampleQueries[i % sampleQueries.length];
|
|
197
|
+
const start = Date.now();
|
|
198
|
+
await this.indexer.search(query.query, { limit: 10 });
|
|
199
|
+
latencies.push(Date.now() - start);
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
latencies.sort((a, b) => a - b);
|
|
203
|
+
|
|
204
|
+
return {
|
|
205
|
+
min: latencies[0],
|
|
206
|
+
max: latencies[latencies.length - 1],
|
|
207
|
+
avg: latencies.reduce((a, b) => a + b, 0) / latencies.length,
|
|
208
|
+
p50: latencies[Math.floor(latencies.length * 0.5)],
|
|
209
|
+
p90: latencies[Math.floor(latencies.length * 0.9)],
|
|
210
|
+
p99: latencies[Math.floor(latencies.length * 0.99)]
|
|
211
|
+
};
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
async generateFullReport() {
|
|
215
|
+
const metrics = aggregateMetrics(this.results);
|
|
216
|
+
const grade = gradeAccuracy(metrics);
|
|
217
|
+
const report = generateReport(metrics, grade);
|
|
218
|
+
|
|
219
|
+
// Ensure results directory exists
|
|
220
|
+
await mkdir(RESULTS_DIR, { recursive: true });
|
|
221
|
+
|
|
222
|
+
// Save detailed results
|
|
223
|
+
const detailedResults = {
|
|
224
|
+
timestamp: new Date().toISOString(),
|
|
225
|
+
metrics,
|
|
226
|
+
grade,
|
|
227
|
+
results: this.results
|
|
228
|
+
};
|
|
229
|
+
|
|
230
|
+
await writeFile(
|
|
231
|
+
`${RESULTS_DIR}/validation-results.json`,
|
|
232
|
+
JSON.stringify(detailedResults, null, 2)
|
|
233
|
+
);
|
|
234
|
+
|
|
235
|
+
await writeFile(`${RESULTS_DIR}/validation-report.txt`, report);
|
|
236
|
+
|
|
237
|
+
return { metrics, grade, report };
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
async cleanup() {
|
|
241
|
+
if (existsSync(VALIDATION_DIR)) {
|
|
242
|
+
await rm(VALIDATION_DIR, { recursive: true });
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
/**
|
|
248
|
+
* Run full validation suite
|
|
249
|
+
*/
|
|
250
|
+
export async function runFullValidation(options = {}) {
|
|
251
|
+
const validator = new MagectorValidator(options);
|
|
252
|
+
|
|
253
|
+
try {
|
|
254
|
+
console.log('╔════════════════════════════════════════════════════════════════╗');
|
|
255
|
+
console.log('║ MAGECTOR ACCURACY VALIDATION SUITE ║');
|
|
256
|
+
console.log('╚════════════════════════════════════════════════════════════════╝\n');
|
|
257
|
+
|
|
258
|
+
// Setup
|
|
259
|
+
await validator.setup();
|
|
260
|
+
|
|
261
|
+
// Run queries
|
|
262
|
+
await validator.runAllQueries();
|
|
263
|
+
|
|
264
|
+
// Latency benchmark
|
|
265
|
+
const latency = await validator.runLatencyBenchmark();
|
|
266
|
+
console.log('\nLatency Results:');
|
|
267
|
+
console.log(` Min: ${latency.min}ms, Avg: ${latency.avg.toFixed(1)}ms, P99: ${latency.p99}ms`);
|
|
268
|
+
|
|
269
|
+
// Generate report
|
|
270
|
+
const { metrics, grade, report } = await validator.generateFullReport();
|
|
271
|
+
|
|
272
|
+
// Print report
|
|
273
|
+
console.log(report);
|
|
274
|
+
|
|
275
|
+
// Print summary
|
|
276
|
+
console.log('Results saved to:');
|
|
277
|
+
console.log(' - validation-results/validation-report.txt');
|
|
278
|
+
console.log(' - validation-results/validation-results.json\n');
|
|
279
|
+
|
|
280
|
+
return { metrics, grade, latency };
|
|
281
|
+
|
|
282
|
+
} finally {
|
|
283
|
+
if (!options.keepTestData) {
|
|
284
|
+
await validator.cleanup();
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
// CLI execution
|
|
290
|
+
if (process.argv[1].includes('validator.js')) {
|
|
291
|
+
const verbose = process.argv.includes('--verbose') || process.argv.includes('-v');
|
|
292
|
+
const keepData = process.argv.includes('--keep');
|
|
293
|
+
|
|
294
|
+
runFullValidation({ verbose, keepTestData: keepData })
|
|
295
|
+
.then(({ grade }) => {
|
|
296
|
+
process.exit(grade.score >= 70 ? 0 : 1);
|
|
297
|
+
})
|
|
298
|
+
.catch(err => {
|
|
299
|
+
console.error('Validation failed:', err);
|
|
300
|
+
process.exit(1);
|
|
301
|
+
});
|
|
302
|
+
}
|