magector 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/model.js ADDED
@@ -0,0 +1,127 @@
1
+ /**
2
+ * Resolve and download ONNX model files for magector-core.
3
+ *
4
+ * Resolution order:
5
+ * 1. MAGECTOR_MODELS env var
6
+ * 2. ~/.magector/models/ (global cache)
7
+ * 3. rust-core/models/ (dev fallback)
8
+ *
9
+ * Downloads from HuggingFace if not found.
10
+ */
11
+ import { existsSync, mkdirSync, createWriteStream } from 'fs';
12
+ import { get as httpsGet } from 'https';
13
+ import path from 'path';
14
+ import os from 'os';
15
+ import { fileURLToPath } from 'url';
16
+
17
+ const __dirname = path.dirname(fileURLToPath(import.meta.url));
18
+
19
+ const MODEL_FILES = [
20
+ {
21
+ name: 'all-MiniLM-L6-v2.onnx',
22
+ url: 'https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2/resolve/main/onnx/model.onnx',
23
+ description: 'ONNX embedding model (~86MB)'
24
+ },
25
+ {
26
+ name: 'tokenizer.json',
27
+ url: 'https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2/resolve/main/tokenizer.json',
28
+ description: 'Tokenizer vocabulary (~700KB)'
29
+ }
30
+ ];
31
+
32
+ function getGlobalCacheDir() {
33
+ return path.join(os.homedir(), '.magector', 'models');
34
+ }
35
+
36
+ /**
37
+ * Find the model directory. Does NOT download — returns null if not found.
38
+ */
39
+ export function resolveModels() {
40
+ // 1. Explicit env var
41
+ if (process.env.MAGECTOR_MODELS) {
42
+ if (hasModels(process.env.MAGECTOR_MODELS)) {
43
+ return process.env.MAGECTOR_MODELS;
44
+ }
45
+ }
46
+
47
+ // 2. Global cache
48
+ const globalDir = getGlobalCacheDir();
49
+ if (hasModels(globalDir)) {
50
+ return globalDir;
51
+ }
52
+
53
+ // 3. Dev fallback
54
+ const devDir = path.join(__dirname, '..', 'rust-core', 'models');
55
+ if (hasModels(devDir)) {
56
+ return devDir;
57
+ }
58
+
59
+ return null;
60
+ }
61
+
62
+ function hasModels(dir) {
63
+ return MODEL_FILES.every(f => existsSync(path.join(dir, f.name)));
64
+ }
65
+
66
+ /**
67
+ * Ensure models exist, downloading if needed. Returns the model directory path.
68
+ */
69
+ export async function ensureModels({ silent = false } = {}) {
70
+ const existing = resolveModels();
71
+ if (existing) return existing;
72
+
73
+ const targetDir = getGlobalCacheDir();
74
+ mkdirSync(targetDir, { recursive: true });
75
+
76
+ if (!silent) {
77
+ console.log(`Downloading ONNX model to ${targetDir} ...`);
78
+ }
79
+
80
+ for (const file of MODEL_FILES) {
81
+ const dest = path.join(targetDir, file.name);
82
+ if (existsSync(dest)) continue;
83
+
84
+ if (!silent) {
85
+ process.stdout.write(` ${file.description} ... `);
86
+ }
87
+ await downloadFile(file.url, dest);
88
+ if (!silent) {
89
+ console.log('done');
90
+ }
91
+ }
92
+
93
+ if (!hasModels(targetDir)) {
94
+ throw new Error('Model download failed — files missing after download');
95
+ }
96
+
97
+ return targetDir;
98
+ }
99
+
100
+ function downloadFile(url, dest) {
101
+ return new Promise((resolve, reject) => {
102
+ const file = createWriteStream(dest);
103
+
104
+ function follow(url) {
105
+ httpsGet(url, (res) => {
106
+ if (res.statusCode >= 300 && res.statusCode < 400 && res.headers.location) {
107
+ follow(res.headers.location);
108
+ return;
109
+ }
110
+ if (res.statusCode !== 200) {
111
+ file.close();
112
+ reject(new Error(`HTTP ${res.statusCode} downloading ${url}`));
113
+ return;
114
+ }
115
+ res.pipe(file);
116
+ file.on('finish', () => {
117
+ file.close(resolve);
118
+ });
119
+ }).on('error', (err) => {
120
+ file.close();
121
+ reject(err);
122
+ });
123
+ }
124
+
125
+ follow(url);
126
+ });
127
+ }
@@ -0,0 +1,47 @@
1
+ /**
2
+ * CLAUDE.md section content for Magento projects using Magector.
3
+ */
4
+ export const CLAUDE_MD = `# Magector — Magento Semantic Search
5
+
6
+ This project is indexed with Magector. Use the MCP tools below to search the codebase semantically instead of reading files manually.
7
+
8
+ ## MCP Tools Available
9
+
10
+ ### Search
11
+ - \`magento_search\` — Natural language search ("checkout totals calculation", "product price with tier pricing")
12
+ - \`magento_find_class\` — Find PHP class/interface/trait by name
13
+ - \`magento_find_method\` — Find method implementations across the codebase
14
+
15
+ ### Magento-Specific
16
+ - \`magento_find_config\` — Find XML config files (di.xml, events.xml, system.xml)
17
+ - \`magento_find_template\` — Find PHTML templates
18
+ - \`magento_find_plugin\` — Find interceptor plugins (before/after/around)
19
+ - \`magento_find_observer\` — Find event observers
20
+ - \`magento_find_preference\` — Find DI preference overrides
21
+ - \`magento_find_api\` — Find REST/SOAP API endpoints
22
+ - \`magento_find_controller\` — Find controllers by route
23
+ - \`magento_find_block\` — Find Block classes
24
+ - \`magento_find_cron\` — Find cron job definitions
25
+ - \`magento_find_graphql\` — Find GraphQL resolvers and schema
26
+ - \`magento_find_db_schema\` — Find database table definitions
27
+ - \`magento_module_structure\` — Get full module structure
28
+
29
+ ### Analysis & Utility
30
+ - \`magento_index\` — Re-index the codebase after changes
31
+ - \`magento_stats\` — View index statistics
32
+ - \`magento_analyze_diff\` — Analyze git diffs for risk scoring
33
+ - \`magento_complexity\` — Analyze code complexity
34
+
35
+ ## Query Tips
36
+
37
+ - Describe what code DOES: "calculate product price" not "price file"
38
+ - Include Magento terms: "plugin for save", "observer for order place"
39
+ - Be specific: "customer address validation before checkout" not just "validation"
40
+
41
+ ## Re-indexing
42
+
43
+ After significant code changes, re-index:
44
+ \`\`\`bash
45
+ npx magector index
46
+ \`\`\`
47
+ `;
@@ -0,0 +1,45 @@
1
+ /**
2
+ * .cursorrules content for Magento projects using Magector.
3
+ */
4
+ export const CURSORRULES = `# Magento 2 Development Rules (Magector)
5
+
6
+ ## Semantic Search First
7
+
8
+ Before reading files manually, ALWAYS use Magector MCP tools to find relevant code:
9
+
10
+ 1. \`magento_search\` — Natural language search across the entire codebase
11
+ 2. \`magento_find_class\` — Find a PHP class, interface, or trait
12
+ 3. \`magento_find_method\` — Find method implementations
13
+ 4. \`magento_find_config\` — Find XML configuration (di.xml, events.xml, etc.)
14
+ 5. \`magento_find_template\` — Find PHTML templates
15
+ 6. \`magento_find_plugin\` — Find interceptor plugins
16
+ 7. \`magento_find_observer\` — Find event observers
17
+ 8. \`magento_find_preference\` — Find DI preference overrides
18
+ 9. \`magento_find_api\` — Find REST/SOAP API endpoints
19
+ 10. \`magento_find_controller\` — Find controllers by route
20
+ 11. \`magento_find_block\` — Find Block classes
21
+ 12. \`magento_find_cron\` — Find cron job definitions
22
+ 13. \`magento_find_graphql\` — Find GraphQL resolvers and schema
23
+ 14. \`magento_find_db_schema\` — Find database table definitions
24
+ 15. \`magento_module_structure\` — Get full module structure
25
+ 16. \`magento_index\` — Re-index the codebase
26
+ 17. \`magento_stats\` — View index statistics
27
+ 18. \`magento_analyze_diff\` — Analyze git diffs for risk
28
+ 19. \`magento_complexity\` — Analyze code complexity
29
+
30
+ ## Writing Effective Queries
31
+
32
+ - Describe what the code DOES, not what it IS: "calculate product price" not "price file"
33
+ - Include Magento terms: "plugin for save", "observer for order place", "checkout totals collector"
34
+ - Be specific: "customer address validation before checkout" not just "validation"
35
+
36
+ ## Magento Development Patterns
37
+
38
+ - Always check for existing plugins before modifying core behavior
39
+ - Use dependency injection — never instantiate classes with \`new\`
40
+ - Prefer interfaces over concrete classes
41
+ - Check events.xml for observer hooks before adding plugins
42
+ - Use repositories for entity CRUD, not direct model save
43
+ - Follow PSR-4 autoloading: Vendor\\\\Module\\\\Path\\\\ClassName
44
+ - Use db_schema.xml for database changes, not setup scripts
45
+ `;
@@ -0,0 +1,397 @@
1
+ /**
2
+ * Accuracy calculation and metrics for validation
3
+ */
4
+
5
+ /**
6
+ * Calculate precision: relevant results / total results
7
+ */
8
+ export function calculatePrecision(results, expectedConditions) {
9
+ if (results.length === 0) return 0;
10
+
11
+ const relevant = results.filter(r => isResultRelevant(r, expectedConditions));
12
+ return relevant.length / results.length;
13
+ }
14
+
15
+ /**
16
+ * Calculate recall: found relevant / total expected relevant
17
+ */
18
+ export function calculateRecall(results, expectedConditions, totalExpected) {
19
+ if (totalExpected === 0) return 1;
20
+
21
+ const relevant = results.filter(r => isResultRelevant(r, expectedConditions));
22
+ return Math.min(relevant.length / totalExpected, 1);
23
+ }
24
+
25
+ /**
26
+ * Calculate F1 score: harmonic mean of precision and recall
27
+ */
28
+ export function calculateF1(precision, recall) {
29
+ if (precision + recall === 0) return 0;
30
+ return 2 * (precision * recall) / (precision + recall);
31
+ }
32
+
33
+ /**
34
+ * Calculate Mean Reciprocal Rank (MRR)
35
+ */
36
+ export function calculateMRR(results, expectedConditions) {
37
+ for (let i = 0; i < results.length; i++) {
38
+ if (isResultRelevant(results[i], expectedConditions)) {
39
+ return 1 / (i + 1);
40
+ }
41
+ }
42
+ return 0;
43
+ }
44
+
45
+ /**
46
+ * Calculate Normalized Discounted Cumulative Gain (NDCG)
47
+ */
48
+ export function calculateNDCG(results, expectedConditions, k = 10) {
49
+ const dcg = results.slice(0, k).reduce((sum, r, i) => {
50
+ const relevance = isResultRelevant(r, expectedConditions) ? 1 : 0;
51
+ return sum + relevance / Math.log2(i + 2);
52
+ }, 0);
53
+
54
+ // Ideal DCG (all relevant results at top)
55
+ const relevantCount = results.filter(r => isResultRelevant(r, expectedConditions)).length;
56
+ const idcg = Array(Math.min(relevantCount, k)).fill(1).reduce((sum, _, i) => {
57
+ return sum + 1 / Math.log2(i + 2);
58
+ }, 0);
59
+
60
+ return idcg === 0 ? 0 : dcg / idcg;
61
+ }
62
+
63
+ /**
64
+ * Check if a result matches expected conditions
65
+ */
66
+ export function isResultRelevant(result, conditions) {
67
+ // Check expected Magento types
68
+ if (conditions.expectedTypes && conditions.expectedTypes.length > 0) {
69
+ const hasType = conditions.expectedTypes.some(t =>
70
+ result.magentoType === t ||
71
+ result.type === t.toLowerCase() ||
72
+ result.path?.includes(`/${t}/`)
73
+ );
74
+ if (hasType) return true;
75
+ }
76
+
77
+ // Check expected patterns
78
+ if (conditions.expectedPatterns && conditions.expectedPatterns.length > 0) {
79
+ const hasPattern = conditions.expectedPatterns.some(p =>
80
+ result.patterns?.includes(p) ||
81
+ result.isPlugin && p === 'plugin' ||
82
+ result.isController && p === 'controller' ||
83
+ result.isObserver && p === 'observer' ||
84
+ result.isRepository && p === 'repository' ||
85
+ result.isResolver && p === 'graphql_resolver' ||
86
+ result.isModel && p === 'model' ||
87
+ result.isBlock && p === 'block'
88
+ );
89
+ if (hasPattern) return true;
90
+ }
91
+
92
+ // Check expected classes
93
+ if (conditions.expectedClasses && conditions.expectedClasses.length > 0) {
94
+ const hasClass = conditions.expectedClasses.some(c =>
95
+ result.className === c ||
96
+ result.className?.includes(c) ||
97
+ result.content?.includes(`class ${c}`)
98
+ );
99
+ if (hasClass) return true;
100
+ }
101
+
102
+ // Check expected methods
103
+ if (conditions.expectedMethods && conditions.expectedMethods.length > 0) {
104
+ const hasMethod = conditions.expectedMethods.some(m =>
105
+ result.methodName === m ||
106
+ result.content?.includes(`function ${m}`)
107
+ );
108
+ if (hasMethod) return true;
109
+ }
110
+
111
+ // Check expected file types
112
+ if (conditions.expectedFileTypes && conditions.expectedFileTypes.length > 0) {
113
+ const hasFileType = conditions.expectedFileTypes.includes(result.type);
114
+ if (hasFileType) return true;
115
+ }
116
+
117
+ // Check expected content
118
+ if (conditions.expectedInContent && conditions.expectedInContent.length > 0) {
119
+ const contentLower = (result.content || '').toLowerCase();
120
+ const hasContent = conditions.expectedInContent.every(c =>
121
+ contentLower.includes(c.toLowerCase())
122
+ );
123
+ if (hasContent) return true;
124
+ }
125
+
126
+ // Check expected module
127
+ if (conditions.expectedModule) {
128
+ if (result.module === conditions.expectedModule ||
129
+ result.path?.includes(conditions.expectedModule.replace('_', '/'))) {
130
+ return true;
131
+ }
132
+ }
133
+
134
+ return false;
135
+ }
136
+
137
+ /**
138
+ * Calculate relevance score for a result (0-1)
139
+ */
140
+ export function calculateRelevanceScore(result, conditions) {
141
+ let score = 0;
142
+ let factors = 0;
143
+
144
+ // Type match
145
+ if (conditions.expectedTypes) {
146
+ factors++;
147
+ if (conditions.expectedTypes.some(t => result.magentoType === t)) {
148
+ score += 1;
149
+ } else if (conditions.expectedTypes.some(t => result.path?.includes(`/${t}/`))) {
150
+ score += 0.7;
151
+ }
152
+ }
153
+
154
+ // Pattern match
155
+ if (conditions.expectedPatterns) {
156
+ factors++;
157
+ const patternMatch = conditions.expectedPatterns.filter(p =>
158
+ result.patterns?.includes(p) || result[`is${p.charAt(0).toUpperCase() + p.slice(1)}`]
159
+ ).length;
160
+ score += patternMatch / conditions.expectedPatterns.length;
161
+ }
162
+
163
+ // Class match
164
+ if (conditions.expectedClasses) {
165
+ factors++;
166
+ if (conditions.expectedClasses.some(c => result.className === c)) {
167
+ score += 1;
168
+ } else if (conditions.expectedClasses.some(c => result.className?.includes(c))) {
169
+ score += 0.5;
170
+ }
171
+ }
172
+
173
+ // Content match
174
+ if (conditions.expectedInContent) {
175
+ factors++;
176
+ const contentLower = (result.content || '').toLowerCase();
177
+ const contentMatch = conditions.expectedInContent.filter(c =>
178
+ contentLower.includes(c.toLowerCase())
179
+ ).length;
180
+ score += contentMatch / conditions.expectedInContent.length;
181
+ }
182
+
183
+ // Module match
184
+ if (conditions.expectedModule) {
185
+ factors++;
186
+ if (result.module === conditions.expectedModule) {
187
+ score += 1;
188
+ }
189
+ }
190
+
191
+ return factors === 0 ? 0 : score / factors;
192
+ }
193
+
194
+ /**
195
+ * Aggregate metrics across multiple queries
196
+ */
197
+ export function aggregateMetrics(queryResults) {
198
+ const metrics = {
199
+ totalQueries: queryResults.length,
200
+ passedQueries: 0,
201
+ avgPrecision: 0,
202
+ avgRecall: 0,
203
+ avgF1: 0,
204
+ avgMRR: 0,
205
+ avgNDCG: 0,
206
+ byCategory: {},
207
+ failed: []
208
+ };
209
+
210
+ let sumPrecision = 0;
211
+ let sumRecall = 0;
212
+ let sumF1 = 0;
213
+ let sumMRR = 0;
214
+ let sumNDCG = 0;
215
+
216
+ for (const qr of queryResults) {
217
+ sumPrecision += qr.precision;
218
+ sumRecall += qr.recall;
219
+ sumF1 += qr.f1;
220
+ sumMRR += qr.mrr;
221
+ sumNDCG += qr.ndcg;
222
+
223
+ if (qr.passed) {
224
+ metrics.passedQueries++;
225
+ } else {
226
+ metrics.failed.push({
227
+ id: qr.queryId,
228
+ query: qr.query,
229
+ reason: qr.failReason
230
+ });
231
+ }
232
+
233
+ // Aggregate by category
234
+ if (!metrics.byCategory[qr.category]) {
235
+ metrics.byCategory[qr.category] = {
236
+ count: 0,
237
+ passed: 0,
238
+ avgPrecision: 0,
239
+ avgRecall: 0,
240
+ avgF1: 0
241
+ };
242
+ }
243
+ const cat = metrics.byCategory[qr.category];
244
+ cat.count++;
245
+ if (qr.passed) cat.passed++;
246
+ cat.avgPrecision += qr.precision;
247
+ cat.avgRecall += qr.recall;
248
+ cat.avgF1 += qr.f1;
249
+ }
250
+
251
+ // Calculate averages
252
+ const n = queryResults.length;
253
+ metrics.avgPrecision = sumPrecision / n;
254
+ metrics.avgRecall = sumRecall / n;
255
+ metrics.avgF1 = sumF1 / n;
256
+ metrics.avgMRR = sumMRR / n;
257
+ metrics.avgNDCG = sumNDCG / n;
258
+ metrics.passRate = metrics.passedQueries / n;
259
+
260
+ // Category averages
261
+ for (const cat of Object.values(metrics.byCategory)) {
262
+ cat.avgPrecision /= cat.count;
263
+ cat.avgRecall /= cat.count;
264
+ cat.avgF1 /= cat.count;
265
+ cat.passRate = cat.passed / cat.count;
266
+ }
267
+
268
+ return metrics;
269
+ }
270
+
271
+ /**
272
+ * Grade the overall accuracy
273
+ */
274
+ export function gradeAccuracy(metrics) {
275
+ const f1 = metrics.avgF1;
276
+ const passRate = metrics.passRate;
277
+
278
+ // Weighted score
279
+ const score = (f1 * 0.6 + passRate * 0.4) * 100;
280
+
281
+ let grade, description;
282
+ if (score >= 95) {
283
+ grade = 'A+';
284
+ description = 'Excellent - Production ready';
285
+ } else if (score >= 90) {
286
+ grade = 'A';
287
+ description = 'Very Good - Minor improvements possible';
288
+ } else if (score >= 85) {
289
+ grade = 'B+';
290
+ description = 'Good - Some edge cases need work';
291
+ } else if (score >= 80) {
292
+ grade = 'B';
293
+ description = 'Above Average - Noticeable gaps';
294
+ } else if (score >= 75) {
295
+ grade = 'C+';
296
+ description = 'Average - Significant improvements needed';
297
+ } else if (score >= 70) {
298
+ grade = 'C';
299
+ description = 'Below Average - Major issues';
300
+ } else if (score >= 60) {
301
+ grade = 'D';
302
+ description = 'Poor - Fundamental problems';
303
+ } else {
304
+ grade = 'F';
305
+ description = 'Failing - Requires complete rework';
306
+ }
307
+
308
+ return {
309
+ score: Math.round(score * 10) / 10,
310
+ grade,
311
+ description,
312
+ breakdown: {
313
+ f1Contribution: Math.round(f1 * 60 * 10) / 10,
314
+ passRateContribution: Math.round(passRate * 40 * 10) / 10
315
+ }
316
+ };
317
+ }
318
+
319
+ /**
320
+ * Generate detailed report
321
+ */
322
+ export function generateReport(metrics, grade) {
323
+ let report = `
324
+ ================================================================================
325
+ MAGECTOR ACCURACY VALIDATION REPORT
326
+ ================================================================================
327
+
328
+ OVERALL GRADE: ${grade.grade} (${grade.score}/100)
329
+ ${grade.description}
330
+
331
+ --------------------------------------------------------------------------------
332
+ AGGREGATE METRICS
333
+ --------------------------------------------------------------------------------
334
+ Total Queries: ${metrics.totalQueries}
335
+ Passed: ${metrics.passedQueries} (${(metrics.passRate * 100).toFixed(1)}%)
336
+ Failed: ${metrics.failed.length}
337
+
338
+ Precision: ${(metrics.avgPrecision * 100).toFixed(2)}%
339
+ Recall: ${(metrics.avgRecall * 100).toFixed(2)}%
340
+ F1 Score: ${(metrics.avgF1 * 100).toFixed(2)}%
341
+ MRR: ${(metrics.avgMRR * 100).toFixed(2)}%
342
+ NDCG@10: ${(metrics.avgNDCG * 100).toFixed(2)}%
343
+
344
+ --------------------------------------------------------------------------------
345
+ PERFORMANCE BY CATEGORY
346
+ --------------------------------------------------------------------------------
347
+ `;
348
+
349
+ const categories = Object.entries(metrics.byCategory).sort((a, b) => b[1].avgF1 - a[1].avgF1);
350
+ for (const [name, cat] of categories) {
351
+ const status = cat.passRate >= 0.8 ? '✓' : cat.passRate >= 0.5 ? '~' : '✗';
352
+ report += ` ${status} ${name.padEnd(20)} F1: ${(cat.avgF1 * 100).toFixed(1).padStart(5)}% Pass: ${cat.passed}/${cat.count}\n`;
353
+ }
354
+
355
+ if (metrics.failed.length > 0) {
356
+ report += `
357
+ --------------------------------------------------------------------------------
358
+ FAILED QUERIES
359
+ --------------------------------------------------------------------------------
360
+ `;
361
+ for (const fail of metrics.failed.slice(0, 10)) {
362
+ report += ` [${fail.id}] "${fail.query.substring(0, 40)}${fail.query.length > 40 ? '...' : ''}"\n`;
363
+ report += ` Reason: ${fail.reason}\n`;
364
+ }
365
+ if (metrics.failed.length > 10) {
366
+ report += ` ... and ${metrics.failed.length - 10} more\n`;
367
+ }
368
+ }
369
+
370
+ report += `
371
+ --------------------------------------------------------------------------------
372
+ RECOMMENDATIONS
373
+ --------------------------------------------------------------------------------
374
+ `;
375
+
376
+ // Generate recommendations based on weak categories
377
+ const weakCategories = categories.filter(([_, cat]) => cat.avgF1 < 0.7);
378
+ if (weakCategories.length > 0) {
379
+ report += ` Improve indexing for: ${weakCategories.map(([name]) => name).join(', ')}\n`;
380
+ }
381
+
382
+ if (metrics.avgPrecision < 0.7) {
383
+ report += ` - Precision is low: Consider stricter filtering and better chunking\n`;
384
+ }
385
+ if (metrics.avgRecall < 0.7) {
386
+ report += ` - Recall is low: Consider broader search terms and synonym expansion\n`;
387
+ }
388
+ if (metrics.avgMRR < 0.5) {
389
+ report += ` - MRR is low: Top results are not relevant, review ranking algorithm\n`;
390
+ }
391
+
392
+ report += `
393
+ ================================================================================
394
+ `;
395
+
396
+ return report;
397
+ }