magector 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +627 -0
- package/config/mcp-config.json +13 -0
- package/package.json +53 -0
- package/src/binary.js +66 -0
- package/src/cli.js +203 -0
- package/src/init.js +293 -0
- package/src/magento-patterns.js +563 -0
- package/src/mcp-server.js +915 -0
- package/src/model.js +127 -0
- package/src/templates/claude-md.js +47 -0
- package/src/templates/cursorrules.js +45 -0
- package/src/validation/accuracy-calculator.js +397 -0
- package/src/validation/benchmark.js +355 -0
- package/src/validation/test-data-generator.js +672 -0
- package/src/validation/test-queries.js +326 -0
- package/src/validation/validator.js +302 -0
package/src/model.js
ADDED
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Resolve and download ONNX model files for magector-core.
|
|
3
|
+
*
|
|
4
|
+
* Resolution order:
|
|
5
|
+
* 1. MAGECTOR_MODELS env var
|
|
6
|
+
* 2. ~/.magector/models/ (global cache)
|
|
7
|
+
* 3. rust-core/models/ (dev fallback)
|
|
8
|
+
*
|
|
9
|
+
* Downloads from HuggingFace if not found.
|
|
10
|
+
*/
|
|
11
|
+
import { existsSync, mkdirSync, createWriteStream } from 'fs';
|
|
12
|
+
import { get as httpsGet } from 'https';
|
|
13
|
+
import path from 'path';
|
|
14
|
+
import os from 'os';
|
|
15
|
+
import { fileURLToPath } from 'url';
|
|
16
|
+
|
|
17
|
+
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
18
|
+
|
|
19
|
+
const MODEL_FILES = [
|
|
20
|
+
{
|
|
21
|
+
name: 'all-MiniLM-L6-v2.onnx',
|
|
22
|
+
url: 'https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2/resolve/main/onnx/model.onnx',
|
|
23
|
+
description: 'ONNX embedding model (~86MB)'
|
|
24
|
+
},
|
|
25
|
+
{
|
|
26
|
+
name: 'tokenizer.json',
|
|
27
|
+
url: 'https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2/resolve/main/tokenizer.json',
|
|
28
|
+
description: 'Tokenizer vocabulary (~700KB)'
|
|
29
|
+
}
|
|
30
|
+
];
|
|
31
|
+
|
|
32
|
+
function getGlobalCacheDir() {
|
|
33
|
+
return path.join(os.homedir(), '.magector', 'models');
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Find the model directory. Does NOT download — returns null if not found.
|
|
38
|
+
*/
|
|
39
|
+
export function resolveModels() {
|
|
40
|
+
// 1. Explicit env var
|
|
41
|
+
if (process.env.MAGECTOR_MODELS) {
|
|
42
|
+
if (hasModels(process.env.MAGECTOR_MODELS)) {
|
|
43
|
+
return process.env.MAGECTOR_MODELS;
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
// 2. Global cache
|
|
48
|
+
const globalDir = getGlobalCacheDir();
|
|
49
|
+
if (hasModels(globalDir)) {
|
|
50
|
+
return globalDir;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
// 3. Dev fallback
|
|
54
|
+
const devDir = path.join(__dirname, '..', 'rust-core', 'models');
|
|
55
|
+
if (hasModels(devDir)) {
|
|
56
|
+
return devDir;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
return null;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
function hasModels(dir) {
|
|
63
|
+
return MODEL_FILES.every(f => existsSync(path.join(dir, f.name)));
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* Ensure models exist, downloading if needed. Returns the model directory path.
|
|
68
|
+
*/
|
|
69
|
+
export async function ensureModels({ silent = false } = {}) {
|
|
70
|
+
const existing = resolveModels();
|
|
71
|
+
if (existing) return existing;
|
|
72
|
+
|
|
73
|
+
const targetDir = getGlobalCacheDir();
|
|
74
|
+
mkdirSync(targetDir, { recursive: true });
|
|
75
|
+
|
|
76
|
+
if (!silent) {
|
|
77
|
+
console.log(`Downloading ONNX model to ${targetDir} ...`);
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
for (const file of MODEL_FILES) {
|
|
81
|
+
const dest = path.join(targetDir, file.name);
|
|
82
|
+
if (existsSync(dest)) continue;
|
|
83
|
+
|
|
84
|
+
if (!silent) {
|
|
85
|
+
process.stdout.write(` ${file.description} ... `);
|
|
86
|
+
}
|
|
87
|
+
await downloadFile(file.url, dest);
|
|
88
|
+
if (!silent) {
|
|
89
|
+
console.log('done');
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
if (!hasModels(targetDir)) {
|
|
94
|
+
throw new Error('Model download failed — files missing after download');
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
return targetDir;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
function downloadFile(url, dest) {
|
|
101
|
+
return new Promise((resolve, reject) => {
|
|
102
|
+
const file = createWriteStream(dest);
|
|
103
|
+
|
|
104
|
+
function follow(url) {
|
|
105
|
+
httpsGet(url, (res) => {
|
|
106
|
+
if (res.statusCode >= 300 && res.statusCode < 400 && res.headers.location) {
|
|
107
|
+
follow(res.headers.location);
|
|
108
|
+
return;
|
|
109
|
+
}
|
|
110
|
+
if (res.statusCode !== 200) {
|
|
111
|
+
file.close();
|
|
112
|
+
reject(new Error(`HTTP ${res.statusCode} downloading ${url}`));
|
|
113
|
+
return;
|
|
114
|
+
}
|
|
115
|
+
res.pipe(file);
|
|
116
|
+
file.on('finish', () => {
|
|
117
|
+
file.close(resolve);
|
|
118
|
+
});
|
|
119
|
+
}).on('error', (err) => {
|
|
120
|
+
file.close();
|
|
121
|
+
reject(err);
|
|
122
|
+
});
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
follow(url);
|
|
126
|
+
});
|
|
127
|
+
}
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CLAUDE.md section content for Magento projects using Magector.
|
|
3
|
+
*/
|
|
4
|
+
export const CLAUDE_MD = `# Magector — Magento Semantic Search
|
|
5
|
+
|
|
6
|
+
This project is indexed with Magector. Use the MCP tools below to search the codebase semantically instead of reading files manually.
|
|
7
|
+
|
|
8
|
+
## MCP Tools Available
|
|
9
|
+
|
|
10
|
+
### Search
|
|
11
|
+
- \`magento_search\` — Natural language search ("checkout totals calculation", "product price with tier pricing")
|
|
12
|
+
- \`magento_find_class\` — Find PHP class/interface/trait by name
|
|
13
|
+
- \`magento_find_method\` — Find method implementations across the codebase
|
|
14
|
+
|
|
15
|
+
### Magento-Specific
|
|
16
|
+
- \`magento_find_config\` — Find XML config files (di.xml, events.xml, system.xml)
|
|
17
|
+
- \`magento_find_template\` — Find PHTML templates
|
|
18
|
+
- \`magento_find_plugin\` — Find interceptor plugins (before/after/around)
|
|
19
|
+
- \`magento_find_observer\` — Find event observers
|
|
20
|
+
- \`magento_find_preference\` — Find DI preference overrides
|
|
21
|
+
- \`magento_find_api\` — Find REST/SOAP API endpoints
|
|
22
|
+
- \`magento_find_controller\` — Find controllers by route
|
|
23
|
+
- \`magento_find_block\` — Find Block classes
|
|
24
|
+
- \`magento_find_cron\` — Find cron job definitions
|
|
25
|
+
- \`magento_find_graphql\` — Find GraphQL resolvers and schema
|
|
26
|
+
- \`magento_find_db_schema\` — Find database table definitions
|
|
27
|
+
- \`magento_module_structure\` — Get full module structure
|
|
28
|
+
|
|
29
|
+
### Analysis & Utility
|
|
30
|
+
- \`magento_index\` — Re-index the codebase after changes
|
|
31
|
+
- \`magento_stats\` — View index statistics
|
|
32
|
+
- \`magento_analyze_diff\` — Analyze git diffs for risk scoring
|
|
33
|
+
- \`magento_complexity\` — Analyze code complexity
|
|
34
|
+
|
|
35
|
+
## Query Tips
|
|
36
|
+
|
|
37
|
+
- Describe what code DOES: "calculate product price" not "price file"
|
|
38
|
+
- Include Magento terms: "plugin for save", "observer for order place"
|
|
39
|
+
- Be specific: "customer address validation before checkout" not just "validation"
|
|
40
|
+
|
|
41
|
+
## Re-indexing
|
|
42
|
+
|
|
43
|
+
After significant code changes, re-index:
|
|
44
|
+
\`\`\`bash
|
|
45
|
+
npx magector index
|
|
46
|
+
\`\`\`
|
|
47
|
+
`;
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* .cursorrules content for Magento projects using Magector.
|
|
3
|
+
*/
|
|
4
|
+
export const CURSORRULES = `# Magento 2 Development Rules (Magector)
|
|
5
|
+
|
|
6
|
+
## Semantic Search First
|
|
7
|
+
|
|
8
|
+
Before reading files manually, ALWAYS use Magector MCP tools to find relevant code:
|
|
9
|
+
|
|
10
|
+
1. \`magento_search\` — Natural language search across the entire codebase
|
|
11
|
+
2. \`magento_find_class\` — Find a PHP class, interface, or trait
|
|
12
|
+
3. \`magento_find_method\` — Find method implementations
|
|
13
|
+
4. \`magento_find_config\` — Find XML configuration (di.xml, events.xml, etc.)
|
|
14
|
+
5. \`magento_find_template\` — Find PHTML templates
|
|
15
|
+
6. \`magento_find_plugin\` — Find interceptor plugins
|
|
16
|
+
7. \`magento_find_observer\` — Find event observers
|
|
17
|
+
8. \`magento_find_preference\` — Find DI preference overrides
|
|
18
|
+
9. \`magento_find_api\` — Find REST/SOAP API endpoints
|
|
19
|
+
10. \`magento_find_controller\` — Find controllers by route
|
|
20
|
+
11. \`magento_find_block\` — Find Block classes
|
|
21
|
+
12. \`magento_find_cron\` — Find cron job definitions
|
|
22
|
+
13. \`magento_find_graphql\` — Find GraphQL resolvers and schema
|
|
23
|
+
14. \`magento_find_db_schema\` — Find database table definitions
|
|
24
|
+
15. \`magento_module_structure\` — Get full module structure
|
|
25
|
+
16. \`magento_index\` — Re-index the codebase
|
|
26
|
+
17. \`magento_stats\` — View index statistics
|
|
27
|
+
18. \`magento_analyze_diff\` — Analyze git diffs for risk
|
|
28
|
+
19. \`magento_complexity\` — Analyze code complexity
|
|
29
|
+
|
|
30
|
+
## Writing Effective Queries
|
|
31
|
+
|
|
32
|
+
- Describe what the code DOES, not what it IS: "calculate product price" not "price file"
|
|
33
|
+
- Include Magento terms: "plugin for save", "observer for order place", "checkout totals collector"
|
|
34
|
+
- Be specific: "customer address validation before checkout" not just "validation"
|
|
35
|
+
|
|
36
|
+
## Magento Development Patterns
|
|
37
|
+
|
|
38
|
+
- Always check for existing plugins before modifying core behavior
|
|
39
|
+
- Use dependency injection — never instantiate classes with \`new\`
|
|
40
|
+
- Prefer interfaces over concrete classes
|
|
41
|
+
- Check events.xml for observer hooks before adding plugins
|
|
42
|
+
- Use repositories for entity CRUD, not direct model save
|
|
43
|
+
- Follow PSR-4 autoloading: Vendor\\\\Module\\\\Path\\\\ClassName
|
|
44
|
+
- Use db_schema.xml for database changes, not setup scripts
|
|
45
|
+
`;
|
|
@@ -0,0 +1,397 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Accuracy calculation and metrics for validation
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Calculate precision: relevant results / total results
|
|
7
|
+
*/
|
|
8
|
+
export function calculatePrecision(results, expectedConditions) {
|
|
9
|
+
if (results.length === 0) return 0;
|
|
10
|
+
|
|
11
|
+
const relevant = results.filter(r => isResultRelevant(r, expectedConditions));
|
|
12
|
+
return relevant.length / results.length;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Calculate recall: found relevant / total expected relevant
|
|
17
|
+
*/
|
|
18
|
+
export function calculateRecall(results, expectedConditions, totalExpected) {
|
|
19
|
+
if (totalExpected === 0) return 1;
|
|
20
|
+
|
|
21
|
+
const relevant = results.filter(r => isResultRelevant(r, expectedConditions));
|
|
22
|
+
return Math.min(relevant.length / totalExpected, 1);
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Calculate F1 score: harmonic mean of precision and recall
|
|
27
|
+
*/
|
|
28
|
+
export function calculateF1(precision, recall) {
|
|
29
|
+
if (precision + recall === 0) return 0;
|
|
30
|
+
return 2 * (precision * recall) / (precision + recall);
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* Calculate Mean Reciprocal Rank (MRR)
|
|
35
|
+
*/
|
|
36
|
+
export function calculateMRR(results, expectedConditions) {
|
|
37
|
+
for (let i = 0; i < results.length; i++) {
|
|
38
|
+
if (isResultRelevant(results[i], expectedConditions)) {
|
|
39
|
+
return 1 / (i + 1);
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
return 0;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Calculate Normalized Discounted Cumulative Gain (NDCG)
|
|
47
|
+
*/
|
|
48
|
+
export function calculateNDCG(results, expectedConditions, k = 10) {
|
|
49
|
+
const dcg = results.slice(0, k).reduce((sum, r, i) => {
|
|
50
|
+
const relevance = isResultRelevant(r, expectedConditions) ? 1 : 0;
|
|
51
|
+
return sum + relevance / Math.log2(i + 2);
|
|
52
|
+
}, 0);
|
|
53
|
+
|
|
54
|
+
// Ideal DCG (all relevant results at top)
|
|
55
|
+
const relevantCount = results.filter(r => isResultRelevant(r, expectedConditions)).length;
|
|
56
|
+
const idcg = Array(Math.min(relevantCount, k)).fill(1).reduce((sum, _, i) => {
|
|
57
|
+
return sum + 1 / Math.log2(i + 2);
|
|
58
|
+
}, 0);
|
|
59
|
+
|
|
60
|
+
return idcg === 0 ? 0 : dcg / idcg;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
/**
|
|
64
|
+
* Check if a result matches expected conditions
|
|
65
|
+
*/
|
|
66
|
+
export function isResultRelevant(result, conditions) {
|
|
67
|
+
// Check expected Magento types
|
|
68
|
+
if (conditions.expectedTypes && conditions.expectedTypes.length > 0) {
|
|
69
|
+
const hasType = conditions.expectedTypes.some(t =>
|
|
70
|
+
result.magentoType === t ||
|
|
71
|
+
result.type === t.toLowerCase() ||
|
|
72
|
+
result.path?.includes(`/${t}/`)
|
|
73
|
+
);
|
|
74
|
+
if (hasType) return true;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// Check expected patterns
|
|
78
|
+
if (conditions.expectedPatterns && conditions.expectedPatterns.length > 0) {
|
|
79
|
+
const hasPattern = conditions.expectedPatterns.some(p =>
|
|
80
|
+
result.patterns?.includes(p) ||
|
|
81
|
+
result.isPlugin && p === 'plugin' ||
|
|
82
|
+
result.isController && p === 'controller' ||
|
|
83
|
+
result.isObserver && p === 'observer' ||
|
|
84
|
+
result.isRepository && p === 'repository' ||
|
|
85
|
+
result.isResolver && p === 'graphql_resolver' ||
|
|
86
|
+
result.isModel && p === 'model' ||
|
|
87
|
+
result.isBlock && p === 'block'
|
|
88
|
+
);
|
|
89
|
+
if (hasPattern) return true;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
// Check expected classes
|
|
93
|
+
if (conditions.expectedClasses && conditions.expectedClasses.length > 0) {
|
|
94
|
+
const hasClass = conditions.expectedClasses.some(c =>
|
|
95
|
+
result.className === c ||
|
|
96
|
+
result.className?.includes(c) ||
|
|
97
|
+
result.content?.includes(`class ${c}`)
|
|
98
|
+
);
|
|
99
|
+
if (hasClass) return true;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
// Check expected methods
|
|
103
|
+
if (conditions.expectedMethods && conditions.expectedMethods.length > 0) {
|
|
104
|
+
const hasMethod = conditions.expectedMethods.some(m =>
|
|
105
|
+
result.methodName === m ||
|
|
106
|
+
result.content?.includes(`function ${m}`)
|
|
107
|
+
);
|
|
108
|
+
if (hasMethod) return true;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
// Check expected file types
|
|
112
|
+
if (conditions.expectedFileTypes && conditions.expectedFileTypes.length > 0) {
|
|
113
|
+
const hasFileType = conditions.expectedFileTypes.includes(result.type);
|
|
114
|
+
if (hasFileType) return true;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
// Check expected content
|
|
118
|
+
if (conditions.expectedInContent && conditions.expectedInContent.length > 0) {
|
|
119
|
+
const contentLower = (result.content || '').toLowerCase();
|
|
120
|
+
const hasContent = conditions.expectedInContent.every(c =>
|
|
121
|
+
contentLower.includes(c.toLowerCase())
|
|
122
|
+
);
|
|
123
|
+
if (hasContent) return true;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
// Check expected module
|
|
127
|
+
if (conditions.expectedModule) {
|
|
128
|
+
if (result.module === conditions.expectedModule ||
|
|
129
|
+
result.path?.includes(conditions.expectedModule.replace('_', '/'))) {
|
|
130
|
+
return true;
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
return false;
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
/**
|
|
138
|
+
* Calculate relevance score for a result (0-1)
|
|
139
|
+
*/
|
|
140
|
+
export function calculateRelevanceScore(result, conditions) {
|
|
141
|
+
let score = 0;
|
|
142
|
+
let factors = 0;
|
|
143
|
+
|
|
144
|
+
// Type match
|
|
145
|
+
if (conditions.expectedTypes) {
|
|
146
|
+
factors++;
|
|
147
|
+
if (conditions.expectedTypes.some(t => result.magentoType === t)) {
|
|
148
|
+
score += 1;
|
|
149
|
+
} else if (conditions.expectedTypes.some(t => result.path?.includes(`/${t}/`))) {
|
|
150
|
+
score += 0.7;
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
// Pattern match
|
|
155
|
+
if (conditions.expectedPatterns) {
|
|
156
|
+
factors++;
|
|
157
|
+
const patternMatch = conditions.expectedPatterns.filter(p =>
|
|
158
|
+
result.patterns?.includes(p) || result[`is${p.charAt(0).toUpperCase() + p.slice(1)}`]
|
|
159
|
+
).length;
|
|
160
|
+
score += patternMatch / conditions.expectedPatterns.length;
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
// Class match
|
|
164
|
+
if (conditions.expectedClasses) {
|
|
165
|
+
factors++;
|
|
166
|
+
if (conditions.expectedClasses.some(c => result.className === c)) {
|
|
167
|
+
score += 1;
|
|
168
|
+
} else if (conditions.expectedClasses.some(c => result.className?.includes(c))) {
|
|
169
|
+
score += 0.5;
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
// Content match
|
|
174
|
+
if (conditions.expectedInContent) {
|
|
175
|
+
factors++;
|
|
176
|
+
const contentLower = (result.content || '').toLowerCase();
|
|
177
|
+
const contentMatch = conditions.expectedInContent.filter(c =>
|
|
178
|
+
contentLower.includes(c.toLowerCase())
|
|
179
|
+
).length;
|
|
180
|
+
score += contentMatch / conditions.expectedInContent.length;
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
// Module match
|
|
184
|
+
if (conditions.expectedModule) {
|
|
185
|
+
factors++;
|
|
186
|
+
if (result.module === conditions.expectedModule) {
|
|
187
|
+
score += 1;
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
return factors === 0 ? 0 : score / factors;
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
/**
|
|
195
|
+
* Aggregate metrics across multiple queries
|
|
196
|
+
*/
|
|
197
|
+
export function aggregateMetrics(queryResults) {
|
|
198
|
+
const metrics = {
|
|
199
|
+
totalQueries: queryResults.length,
|
|
200
|
+
passedQueries: 0,
|
|
201
|
+
avgPrecision: 0,
|
|
202
|
+
avgRecall: 0,
|
|
203
|
+
avgF1: 0,
|
|
204
|
+
avgMRR: 0,
|
|
205
|
+
avgNDCG: 0,
|
|
206
|
+
byCategory: {},
|
|
207
|
+
failed: []
|
|
208
|
+
};
|
|
209
|
+
|
|
210
|
+
let sumPrecision = 0;
|
|
211
|
+
let sumRecall = 0;
|
|
212
|
+
let sumF1 = 0;
|
|
213
|
+
let sumMRR = 0;
|
|
214
|
+
let sumNDCG = 0;
|
|
215
|
+
|
|
216
|
+
for (const qr of queryResults) {
|
|
217
|
+
sumPrecision += qr.precision;
|
|
218
|
+
sumRecall += qr.recall;
|
|
219
|
+
sumF1 += qr.f1;
|
|
220
|
+
sumMRR += qr.mrr;
|
|
221
|
+
sumNDCG += qr.ndcg;
|
|
222
|
+
|
|
223
|
+
if (qr.passed) {
|
|
224
|
+
metrics.passedQueries++;
|
|
225
|
+
} else {
|
|
226
|
+
metrics.failed.push({
|
|
227
|
+
id: qr.queryId,
|
|
228
|
+
query: qr.query,
|
|
229
|
+
reason: qr.failReason
|
|
230
|
+
});
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
// Aggregate by category
|
|
234
|
+
if (!metrics.byCategory[qr.category]) {
|
|
235
|
+
metrics.byCategory[qr.category] = {
|
|
236
|
+
count: 0,
|
|
237
|
+
passed: 0,
|
|
238
|
+
avgPrecision: 0,
|
|
239
|
+
avgRecall: 0,
|
|
240
|
+
avgF1: 0
|
|
241
|
+
};
|
|
242
|
+
}
|
|
243
|
+
const cat = metrics.byCategory[qr.category];
|
|
244
|
+
cat.count++;
|
|
245
|
+
if (qr.passed) cat.passed++;
|
|
246
|
+
cat.avgPrecision += qr.precision;
|
|
247
|
+
cat.avgRecall += qr.recall;
|
|
248
|
+
cat.avgF1 += qr.f1;
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
// Calculate averages
|
|
252
|
+
const n = queryResults.length;
|
|
253
|
+
metrics.avgPrecision = sumPrecision / n;
|
|
254
|
+
metrics.avgRecall = sumRecall / n;
|
|
255
|
+
metrics.avgF1 = sumF1 / n;
|
|
256
|
+
metrics.avgMRR = sumMRR / n;
|
|
257
|
+
metrics.avgNDCG = sumNDCG / n;
|
|
258
|
+
metrics.passRate = metrics.passedQueries / n;
|
|
259
|
+
|
|
260
|
+
// Category averages
|
|
261
|
+
for (const cat of Object.values(metrics.byCategory)) {
|
|
262
|
+
cat.avgPrecision /= cat.count;
|
|
263
|
+
cat.avgRecall /= cat.count;
|
|
264
|
+
cat.avgF1 /= cat.count;
|
|
265
|
+
cat.passRate = cat.passed / cat.count;
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
return metrics;
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
/**
|
|
272
|
+
* Grade the overall accuracy
|
|
273
|
+
*/
|
|
274
|
+
export function gradeAccuracy(metrics) {
|
|
275
|
+
const f1 = metrics.avgF1;
|
|
276
|
+
const passRate = metrics.passRate;
|
|
277
|
+
|
|
278
|
+
// Weighted score
|
|
279
|
+
const score = (f1 * 0.6 + passRate * 0.4) * 100;
|
|
280
|
+
|
|
281
|
+
let grade, description;
|
|
282
|
+
if (score >= 95) {
|
|
283
|
+
grade = 'A+';
|
|
284
|
+
description = 'Excellent - Production ready';
|
|
285
|
+
} else if (score >= 90) {
|
|
286
|
+
grade = 'A';
|
|
287
|
+
description = 'Very Good - Minor improvements possible';
|
|
288
|
+
} else if (score >= 85) {
|
|
289
|
+
grade = 'B+';
|
|
290
|
+
description = 'Good - Some edge cases need work';
|
|
291
|
+
} else if (score >= 80) {
|
|
292
|
+
grade = 'B';
|
|
293
|
+
description = 'Above Average - Noticeable gaps';
|
|
294
|
+
} else if (score >= 75) {
|
|
295
|
+
grade = 'C+';
|
|
296
|
+
description = 'Average - Significant improvements needed';
|
|
297
|
+
} else if (score >= 70) {
|
|
298
|
+
grade = 'C';
|
|
299
|
+
description = 'Below Average - Major issues';
|
|
300
|
+
} else if (score >= 60) {
|
|
301
|
+
grade = 'D';
|
|
302
|
+
description = 'Poor - Fundamental problems';
|
|
303
|
+
} else {
|
|
304
|
+
grade = 'F';
|
|
305
|
+
description = 'Failing - Requires complete rework';
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
return {
|
|
309
|
+
score: Math.round(score * 10) / 10,
|
|
310
|
+
grade,
|
|
311
|
+
description,
|
|
312
|
+
breakdown: {
|
|
313
|
+
f1Contribution: Math.round(f1 * 60 * 10) / 10,
|
|
314
|
+
passRateContribution: Math.round(passRate * 40 * 10) / 10
|
|
315
|
+
}
|
|
316
|
+
};
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
/**
|
|
320
|
+
* Generate detailed report
|
|
321
|
+
*/
|
|
322
|
+
export function generateReport(metrics, grade) {
|
|
323
|
+
let report = `
|
|
324
|
+
================================================================================
|
|
325
|
+
MAGECTOR ACCURACY VALIDATION REPORT
|
|
326
|
+
================================================================================
|
|
327
|
+
|
|
328
|
+
OVERALL GRADE: ${grade.grade} (${grade.score}/100)
|
|
329
|
+
${grade.description}
|
|
330
|
+
|
|
331
|
+
--------------------------------------------------------------------------------
|
|
332
|
+
AGGREGATE METRICS
|
|
333
|
+
--------------------------------------------------------------------------------
|
|
334
|
+
Total Queries: ${metrics.totalQueries}
|
|
335
|
+
Passed: ${metrics.passedQueries} (${(metrics.passRate * 100).toFixed(1)}%)
|
|
336
|
+
Failed: ${metrics.failed.length}
|
|
337
|
+
|
|
338
|
+
Precision: ${(metrics.avgPrecision * 100).toFixed(2)}%
|
|
339
|
+
Recall: ${(metrics.avgRecall * 100).toFixed(2)}%
|
|
340
|
+
F1 Score: ${(metrics.avgF1 * 100).toFixed(2)}%
|
|
341
|
+
MRR: ${(metrics.avgMRR * 100).toFixed(2)}%
|
|
342
|
+
NDCG@10: ${(metrics.avgNDCG * 100).toFixed(2)}%
|
|
343
|
+
|
|
344
|
+
--------------------------------------------------------------------------------
|
|
345
|
+
PERFORMANCE BY CATEGORY
|
|
346
|
+
--------------------------------------------------------------------------------
|
|
347
|
+
`;
|
|
348
|
+
|
|
349
|
+
const categories = Object.entries(metrics.byCategory).sort((a, b) => b[1].avgF1 - a[1].avgF1);
|
|
350
|
+
for (const [name, cat] of categories) {
|
|
351
|
+
const status = cat.passRate >= 0.8 ? '✓' : cat.passRate >= 0.5 ? '~' : '✗';
|
|
352
|
+
report += ` ${status} ${name.padEnd(20)} F1: ${(cat.avgF1 * 100).toFixed(1).padStart(5)}% Pass: ${cat.passed}/${cat.count}\n`;
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
if (metrics.failed.length > 0) {
|
|
356
|
+
report += `
|
|
357
|
+
--------------------------------------------------------------------------------
|
|
358
|
+
FAILED QUERIES
|
|
359
|
+
--------------------------------------------------------------------------------
|
|
360
|
+
`;
|
|
361
|
+
for (const fail of metrics.failed.slice(0, 10)) {
|
|
362
|
+
report += ` [${fail.id}] "${fail.query.substring(0, 40)}${fail.query.length > 40 ? '...' : ''}"\n`;
|
|
363
|
+
report += ` Reason: ${fail.reason}\n`;
|
|
364
|
+
}
|
|
365
|
+
if (metrics.failed.length > 10) {
|
|
366
|
+
report += ` ... and ${metrics.failed.length - 10} more\n`;
|
|
367
|
+
}
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
report += `
|
|
371
|
+
--------------------------------------------------------------------------------
|
|
372
|
+
RECOMMENDATIONS
|
|
373
|
+
--------------------------------------------------------------------------------
|
|
374
|
+
`;
|
|
375
|
+
|
|
376
|
+
// Generate recommendations based on weak categories
|
|
377
|
+
const weakCategories = categories.filter(([_, cat]) => cat.avgF1 < 0.7);
|
|
378
|
+
if (weakCategories.length > 0) {
|
|
379
|
+
report += ` Improve indexing for: ${weakCategories.map(([name]) => name).join(', ')}\n`;
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
if (metrics.avgPrecision < 0.7) {
|
|
383
|
+
report += ` - Precision is low: Consider stricter filtering and better chunking\n`;
|
|
384
|
+
}
|
|
385
|
+
if (metrics.avgRecall < 0.7) {
|
|
386
|
+
report += ` - Recall is low: Consider broader search terms and synonym expansion\n`;
|
|
387
|
+
}
|
|
388
|
+
if (metrics.avgMRR < 0.5) {
|
|
389
|
+
report += ` - MRR is low: Top results are not relevant, review ranking algorithm\n`;
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
report += `
|
|
393
|
+
================================================================================
|
|
394
|
+
`;
|
|
395
|
+
|
|
396
|
+
return report;
|
|
397
|
+
}
|