@vibecheckai/cli 3.0.2 → 3.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +9 -1
- package/bin/cli-hygiene.js +0 -241
- package/bin/guardrail.js +0 -834
- package/bin/runners/cli-utils.js +0 -1070
- package/bin/runners/context/ai-task-decomposer.js +0 -337
- package/bin/runners/context/analyzer.js +0 -462
- package/bin/runners/context/api-contracts.js +0 -427
- package/bin/runners/context/context-diff.js +0 -342
- package/bin/runners/context/context-pruner.js +0 -291
- package/bin/runners/context/dependency-graph.js +0 -414
- package/bin/runners/context/generators/claude.js +0 -107
- package/bin/runners/context/generators/codex.js +0 -108
- package/bin/runners/context/generators/copilot.js +0 -119
- package/bin/runners/context/generators/cursor.js +0 -514
- package/bin/runners/context/generators/mcp.js +0 -151
- package/bin/runners/context/generators/windsurf.js +0 -180
- package/bin/runners/context/git-context.js +0 -302
- package/bin/runners/context/index.js +0 -1042
- package/bin/runners/context/insights.js +0 -173
- package/bin/runners/context/mcp-server/generate-rules.js +0 -337
- package/bin/runners/context/mcp-server/index.js +0 -1176
- package/bin/runners/context/mcp-server/package.json +0 -24
- package/bin/runners/context/memory.js +0 -200
- package/bin/runners/context/monorepo.js +0 -215
- package/bin/runners/context/multi-repo-federation.js +0 -404
- package/bin/runners/context/patterns.js +0 -253
- package/bin/runners/context/proof-context.js +0 -972
- package/bin/runners/context/security-scanner.js +0 -303
- package/bin/runners/context/semantic-search.js +0 -350
- package/bin/runners/context/shared.js +0 -264
- package/bin/runners/context/team-conventions.js +0 -310
- package/bin/runners/lib/ai-bridge.js +0 -416
- package/bin/runners/lib/analysis-core.js +0 -271
- package/bin/runners/lib/analyzers.js +0 -541
- package/bin/runners/lib/audit-bridge.js +0 -391
- package/bin/runners/lib/auth-truth.js +0 -193
- package/bin/runners/lib/auth.js +0 -215
- package/bin/runners/lib/backup.js +0 -62
- package/bin/runners/lib/billing.js +0 -107
- package/bin/runners/lib/claims.js +0 -118
- package/bin/runners/lib/cli-ui.js +0 -540
- package/bin/runners/lib/compliance-bridge-new.js +0 -0
- package/bin/runners/lib/compliance-bridge.js +0 -165
- package/bin/runners/lib/contracts/auth-contract.js +0 -194
- package/bin/runners/lib/contracts/env-contract.js +0 -178
- package/bin/runners/lib/contracts/external-contract.js +0 -198
- package/bin/runners/lib/contracts/guard.js +0 -168
- package/bin/runners/lib/contracts/index.js +0 -89
- package/bin/runners/lib/contracts/plan-validator.js +0 -311
- package/bin/runners/lib/contracts/route-contract.js +0 -192
- package/bin/runners/lib/detect.js +0 -89
- package/bin/runners/lib/doctor/autofix.js +0 -254
- package/bin/runners/lib/doctor/index.js +0 -37
- package/bin/runners/lib/doctor/modules/dependencies.js +0 -325
- package/bin/runners/lib/doctor/modules/index.js +0 -46
- package/bin/runners/lib/doctor/modules/network.js +0 -250
- package/bin/runners/lib/doctor/modules/project.js +0 -312
- package/bin/runners/lib/doctor/modules/runtime.js +0 -224
- package/bin/runners/lib/doctor/modules/security.js +0 -348
- package/bin/runners/lib/doctor/modules/system.js +0 -213
- package/bin/runners/lib/doctor/modules/vibecheck.js +0 -394
- package/bin/runners/lib/doctor/reporter.js +0 -262
- package/bin/runners/lib/doctor/service.js +0 -262
- package/bin/runners/lib/doctor/types.js +0 -113
- package/bin/runners/lib/doctor/ui.js +0 -263
- package/bin/runners/lib/doctor-enhanced.js +0 -233
- package/bin/runners/lib/doctor-v2.js +0 -608
- package/bin/runners/lib/enforcement.js +0 -72
|
@@ -1,303 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Security Scanner Module
|
|
3
|
-
* Scans context for secrets, vulnerabilities, and sensitive data
|
|
4
|
-
*/
|
|
5
|
-
|
|
6
|
-
const fs = require("fs");
|
|
7
|
-
const path = require("path");
|
|
8
|
-
const crypto = require("crypto");
|
|
9
|
-
|
|
10
|
-
/**
|
|
11
|
-
* Secret patterns to detect
|
|
12
|
-
*/
|
|
13
|
-
const SECRET_PATTERNS = [
|
|
14
|
-
// API Keys
|
|
15
|
-
{ pattern: /AIza[0-9A-Za-z_-]{35}/, type: "Google API Key" },
|
|
16
|
-
{ pattern: /AKIA[0-9A-Z]{16}/, type: "AWS Access Key" },
|
|
17
|
-
{ pattern: /xoxb-[0-9]{10}-[0-9]{10}/, type: "Slack Bot Token" },
|
|
18
|
-
{ pattern: /ghp_[a-zA-Z0-9]{36}/, type: "GitHub Personal Token" },
|
|
19
|
-
{ pattern: /sk_live_[0-9a-zA-Z]{24}/, type: "Stripe Live Key" },
|
|
20
|
-
{ pattern: /pk_live_[0-9a-zA-Z]{24}/, type: "Stripe Publishable Key" },
|
|
21
|
-
|
|
22
|
-
// Generic patterns
|
|
23
|
-
{ pattern: /['"]?API[_-]?KEY['"]?\s*[:=]\s*['"][^'"]{8,}['"]/, type: "API Key" },
|
|
24
|
-
{ pattern: /['"]?SECRET[_-]?KEY['"]?\s*[:=]\s*['"][^'"]{8,}['"]/, type: "Secret Key" },
|
|
25
|
-
{ pattern: /['"]?PASSWORD['"]?\s*[:=]\s*['"][^'"]{6,}['"]/, type: "Password" },
|
|
26
|
-
{ pattern: /['"]?TOKEN['"]?\s*[:=]\s*['"][^'"]{8,}['"]/, type: "Token" },
|
|
27
|
-
{ pattern: /['"]?PRIVATE[_-]?KEY['"]?\s*[:=]\s*['"][^'"]{16,}['"]/, type: "Private Key" },
|
|
28
|
-
|
|
29
|
-
// Database URLs
|
|
30
|
-
{ pattern: /mongodb:\/\/[^:]+:[^@]+@/, type: "MongoDB URL" },
|
|
31
|
-
{ pattern: /postgres:\/\/[^:]+:[^@]+@/, type: "PostgreSQL URL" },
|
|
32
|
-
{ pattern: /mysql:\/\/[^:]+:[^@]+@/, type: "MySQL URL" },
|
|
33
|
-
|
|
34
|
-
// JWT tokens
|
|
35
|
-
{ pattern: /eyJ[A-Za-z0-9_-]*\.eyJ[A-Za-z0-9_-]*\.[A-Za-z0-9_-]*/, type: "JWT Token" },
|
|
36
|
-
];
|
|
37
|
-
|
|
38
|
-
/**
|
|
39
|
-
* Vulnerability patterns
|
|
40
|
-
*/
|
|
41
|
-
const VULNERABILITY_PATTERNS = [
|
|
42
|
-
// SQL Injection
|
|
43
|
-
{ pattern: /query\s*\(\s*['"]\s*\+.*\+\s*['"]/, type: "SQL Injection", severity: "high" },
|
|
44
|
-
{ pattern: /execute\s*\(\s*['"]\s*\+/, type: "SQL Injection", severity: "high" },
|
|
45
|
-
|
|
46
|
-
// XSS
|
|
47
|
-
{ pattern: /dangerouslySetInnerHTML/, type: "XSS Risk", severity: "high" },
|
|
48
|
-
{ pattern: /innerHTML\s*=/, type: "XSS Risk", severity: "medium" },
|
|
49
|
-
{ pattern: /document\.write\s*\(/, type: "XSS Risk", severity: "high" },
|
|
50
|
-
|
|
51
|
-
// Path Traversal
|
|
52
|
-
{ pattern: /\.\.\/\.\./, type: "Path Traversal", severity: "medium" },
|
|
53
|
-
{ pattern: /readFile\s*\(\s*.*\+/, type: "Path Traversal", severity: "high" },
|
|
54
|
-
|
|
55
|
-
// Insecure Crypto
|
|
56
|
-
{ pattern: /md5\s*\(/, type: "Weak Hash", severity: "medium" },
|
|
57
|
-
{ pattern: /sha1\s*\(/, type: "Weak Hash", severity: "medium" },
|
|
58
|
-
|
|
59
|
-
// Hardcoded credentials
|
|
60
|
-
{ pattern: /admin\s*:\s*['"]admin['"]/, type: "Hardcoded Credentials", severity: "high" },
|
|
61
|
-
{ pattern: /root\s*:\s*['"][^'"]{4,}['"]/, type: "Hardcoded Credentials", severity: "high" },
|
|
62
|
-
|
|
63
|
-
// Debug code
|
|
64
|
-
{ pattern: /console\.log\s*\(\s*password/, type: "Password in Log", severity: "high" },
|
|
65
|
-
{ pattern: /console\.log\s*\(\s*token/, type: "Token in Log", severity: "high" },
|
|
66
|
-
];
|
|
67
|
-
|
|
68
|
-
/**
|
|
69
|
-
* Find files recursively
|
|
70
|
-
*/
|
|
71
|
-
function findFiles(dir, extensions, maxDepth = 5, currentDepth = 0) {
|
|
72
|
-
if (currentDepth >= maxDepth || !fs.existsSync(dir)) return [];
|
|
73
|
-
|
|
74
|
-
const files = [];
|
|
75
|
-
try {
|
|
76
|
-
const entries = fs.readdirSync(dir, { withFileTypes: true });
|
|
77
|
-
for (const entry of entries) {
|
|
78
|
-
const fullPath = path.join(dir, entry.name);
|
|
79
|
-
if (entry.isDirectory() && !entry.name.startsWith(".") && entry.name !== "node_modules") {
|
|
80
|
-
files.push(...findFiles(fullPath, extensions, maxDepth, currentDepth + 1));
|
|
81
|
-
} else if (entry.isFile() && extensions.some(ext => entry.name.endsWith(ext))) {
|
|
82
|
-
files.push(fullPath);
|
|
83
|
-
}
|
|
84
|
-
}
|
|
85
|
-
} catch {}
|
|
86
|
-
return files;
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
/**
|
|
90
|
-
* Scan file for secrets
|
|
91
|
-
*/
|
|
92
|
-
function scanForSecrets(content, filePath) {
|
|
93
|
-
const secrets = [];
|
|
94
|
-
const lines = content.split("\n");
|
|
95
|
-
|
|
96
|
-
for (const pattern of SECRET_PATTERNS) {
|
|
97
|
-
const matches = content.matchAll(new RegExp(pattern.pattern.source, 'g'));
|
|
98
|
-
for (const match of matches) {
|
|
99
|
-
const lineNum = content.substring(0, match.index).split("\n").length;
|
|
100
|
-
const line = lines[lineNum - 1];
|
|
101
|
-
|
|
102
|
-
secrets.push({
|
|
103
|
-
type: pattern.type,
|
|
104
|
-
file: path.relative(process.cwd(), filePath).replace(/\\/g, "/"),
|
|
105
|
-
line: lineNum,
|
|
106
|
-
content: line.trim(),
|
|
107
|
-
severity: "critical",
|
|
108
|
-
});
|
|
109
|
-
}
|
|
110
|
-
}
|
|
111
|
-
|
|
112
|
-
return secrets;
|
|
113
|
-
}
|
|
114
|
-
|
|
115
|
-
/**
|
|
116
|
-
* Scan file for vulnerabilities
|
|
117
|
-
*/
|
|
118
|
-
function scanForVulnerabilities(content, filePath) {
|
|
119
|
-
const vulnerabilities = [];
|
|
120
|
-
const lines = content.split("\n");
|
|
121
|
-
|
|
122
|
-
for (const pattern of VULNERABILITY_PATTERNS) {
|
|
123
|
-
const matches = content.matchAll(new RegExp(pattern.pattern.source, 'g'));
|
|
124
|
-
for (const match of matches) {
|
|
125
|
-
const lineNum = content.substring(0, match.index).split("\n").length;
|
|
126
|
-
const line = lines[lineNum - 1];
|
|
127
|
-
|
|
128
|
-
vulnerabilities.push({
|
|
129
|
-
type: pattern.type,
|
|
130
|
-
file: path.relative(process.cwd(), filePath).replace(/\\/g, "/"),
|
|
131
|
-
line: lineNum,
|
|
132
|
-
content: line.trim(),
|
|
133
|
-
severity: pattern.severity || "medium",
|
|
134
|
-
recommendation: getRecommendation(pattern.type),
|
|
135
|
-
});
|
|
136
|
-
}
|
|
137
|
-
}
|
|
138
|
-
|
|
139
|
-
return vulnerabilities;
|
|
140
|
-
}
|
|
141
|
-
|
|
142
|
-
/**
|
|
143
|
-
* Get recommendation for vulnerability type
|
|
144
|
-
*/
|
|
145
|
-
function getRecommendation(type) {
|
|
146
|
-
const recommendations = {
|
|
147
|
-
"SQL Injection": "Use parameterized queries or prepared statements",
|
|
148
|
-
"XSS Risk": "Sanitize user input and use textContent instead of innerHTML",
|
|
149
|
-
"Path Traversal": "Validate and sanitize file paths, use path.join()",
|
|
150
|
-
"Weak Hash": "Use stronger hashing algorithms like bcrypt or Argon2",
|
|
151
|
-
"Hardcoded Credentials": "Use environment variables for credentials",
|
|
152
|
-
"Password in Log": "Remove sensitive data from logs",
|
|
153
|
-
"Token in Log": "Remove sensitive data from logs",
|
|
154
|
-
};
|
|
155
|
-
|
|
156
|
-
return recommendations[type] || "Review and fix the security issue";
|
|
157
|
-
}
|
|
158
|
-
|
|
159
|
-
/**
|
|
160
|
-
* Scan project for security issues
|
|
161
|
-
*/
|
|
162
|
-
function scanProject(projectPath) {
|
|
163
|
-
const files = findFiles(projectPath, [".ts", ".tsx", ".js", ".jsx", ".json", ".env*", ".yml", ".yaml"], 5);
|
|
164
|
-
|
|
165
|
-
const results = {
|
|
166
|
-
secrets: [],
|
|
167
|
-
vulnerabilities: [],
|
|
168
|
-
stats: {
|
|
169
|
-
totalFiles: files.length,
|
|
170
|
-
filesWithSecrets: 0,
|
|
171
|
-
filesWithVulnerabilities: 0,
|
|
172
|
-
criticalIssues: 0,
|
|
173
|
-
highIssues: 0,
|
|
174
|
-
mediumIssues: 0,
|
|
175
|
-
},
|
|
176
|
-
scanned: new Date().toISOString(),
|
|
177
|
-
};
|
|
178
|
-
|
|
179
|
-
for (const file of files) {
|
|
180
|
-
try {
|
|
181
|
-
const content = fs.readFileSync(file, "utf-8");
|
|
182
|
-
const relativePath = path.relative(projectPath, file).replace(/\\/g, "/");
|
|
183
|
-
|
|
184
|
-
// Skip certain files
|
|
185
|
-
if (relativePath.includes("node_modules") ||
|
|
186
|
-
relativePath.includes(".git") ||
|
|
187
|
-
relativePath.includes("dist/") ||
|
|
188
|
-
relativePath.includes("build/")) {
|
|
189
|
-
continue;
|
|
190
|
-
}
|
|
191
|
-
|
|
192
|
-
const secrets = scanForSecrets(content, file);
|
|
193
|
-
const vulnerabilities = scanForVulnerabilities(content, file);
|
|
194
|
-
|
|
195
|
-
if (secrets.length > 0) {
|
|
196
|
-
results.secrets.push(...secrets);
|
|
197
|
-
results.stats.filesWithSecrets++;
|
|
198
|
-
}
|
|
199
|
-
|
|
200
|
-
if (vulnerabilities.length > 0) {
|
|
201
|
-
results.vulnerabilities.push(...vulnerabilities);
|
|
202
|
-
results.stats.filesWithVulnerabilities++;
|
|
203
|
-
}
|
|
204
|
-
|
|
205
|
-
// Count severity
|
|
206
|
-
for (const issue of [...secrets, ...vulnerabilities]) {
|
|
207
|
-
switch (issue.severity) {
|
|
208
|
-
case "critical":
|
|
209
|
-
results.stats.criticalIssues++;
|
|
210
|
-
break;
|
|
211
|
-
case "high":
|
|
212
|
-
results.stats.highIssues++;
|
|
213
|
-
break;
|
|
214
|
-
case "medium":
|
|
215
|
-
results.stats.mediumIssues++;
|
|
216
|
-
break;
|
|
217
|
-
}
|
|
218
|
-
}
|
|
219
|
-
} catch {}
|
|
220
|
-
}
|
|
221
|
-
|
|
222
|
-
return results;
|
|
223
|
-
}
|
|
224
|
-
|
|
225
|
-
/**
|
|
226
|
-
* Generate security report
|
|
227
|
-
*/
|
|
228
|
-
function generateSecurityReport(results) {
|
|
229
|
-
let report = `# Security Scan Report\n\n`;
|
|
230
|
-
report += `Scanned: ${new Date(results.scanned).toLocaleString()}\n`;
|
|
231
|
-
report += `Total Files: ${results.stats.totalFiles}\n\n`;
|
|
232
|
-
|
|
233
|
-
// Summary
|
|
234
|
-
report += `## Summary\n\n`;
|
|
235
|
-
report += `- Files with Secrets: ${results.stats.filesWithSecrets}\n`;
|
|
236
|
-
report += `- Files with Vulnerabilities: ${results.stats.filesWithVulnerabilities}\n`;
|
|
237
|
-
report += `- Critical Issues: ${results.stats.criticalIssues}\n`;
|
|
238
|
-
report += `- High Issues: ${results.stats.highIssues}\n`;
|
|
239
|
-
report += `- Medium Issues: ${results.stats.mediumIssues}\n\n`;
|
|
240
|
-
|
|
241
|
-
// Secrets
|
|
242
|
-
if (results.secrets.length > 0) {
|
|
243
|
-
report += `## 🔑 Secrets Found (${results.secrets.length})\n\n`;
|
|
244
|
-
for (const secret of results.secrets) {
|
|
245
|
-
report += `### ${secret.type} - ${secret.file}:${secret.line}\n`;
|
|
246
|
-
report += `\`\`\`\n${secret.content}\n\`\`\`\n\n`;
|
|
247
|
-
}
|
|
248
|
-
}
|
|
249
|
-
|
|
250
|
-
// Vulnerabilities
|
|
251
|
-
if (results.vulnerabilities.length > 0) {
|
|
252
|
-
report += `## 🚨 Vulnerabilities Found (${results.vulnerabilities.length})\n\n`;
|
|
253
|
-
for (const vuln of results.vulnerabilities) {
|
|
254
|
-
const icon = vuln.severity === "critical" ? "🔴" :
|
|
255
|
-
vuln.severity === "high" ? "🟠" : "🟡";
|
|
256
|
-
report += `### ${icon} ${vuln.type} - ${vuln.file}:${vuln.line}\n`;
|
|
257
|
-
report += `**Severity:** ${vuln.severity}\n`;
|
|
258
|
-
report += `**Recommendation:** ${vuln.recommendation}\n\n`;
|
|
259
|
-
report += `\`\`\`\n${vuln.content}\n\`\`\`\n\n`;
|
|
260
|
-
}
|
|
261
|
-
}
|
|
262
|
-
|
|
263
|
-
if (results.secrets.length === 0 && results.vulnerabilities.length === 0) {
|
|
264
|
-
report += `## ✅ No Security Issues Found\n\n`;
|
|
265
|
-
report += `Great job! No secrets or obvious vulnerabilities were detected.\n`;
|
|
266
|
-
}
|
|
267
|
-
|
|
268
|
-
return report;
|
|
269
|
-
}
|
|
270
|
-
|
|
271
|
-
/**
|
|
272
|
-
* Filter content for safe AI consumption
|
|
273
|
-
*/
|
|
274
|
-
function filterForAI(content) {
|
|
275
|
-
let filtered = content;
|
|
276
|
-
|
|
277
|
-
// Remove detected secrets
|
|
278
|
-
for (const pattern of SECRET_PATTERNS) {
|
|
279
|
-
filtered = filtered.replace(pattern.pattern, "[REDACTED_SECRET]");
|
|
280
|
-
}
|
|
281
|
-
|
|
282
|
-
// Remove sensitive lines
|
|
283
|
-
const lines = filtered.split("\n");
|
|
284
|
-
const safeLines = lines.filter(line => {
|
|
285
|
-
const lower = line.toLowerCase();
|
|
286
|
-
return !lower.includes("password") &&
|
|
287
|
-
!lower.includes("secret") &&
|
|
288
|
-
!lower.includes("private_key") &&
|
|
289
|
-
!lower.includes("api_key") &&
|
|
290
|
-
!line.includes("console.log") &&
|
|
291
|
-
!line.includes("debugger");
|
|
292
|
-
});
|
|
293
|
-
|
|
294
|
-
return safeLines.join("\n");
|
|
295
|
-
}
|
|
296
|
-
|
|
297
|
-
module.exports = {
|
|
298
|
-
scanProject,
|
|
299
|
-
generateSecurityReport,
|
|
300
|
-
filterForAI,
|
|
301
|
-
SECRET_PATTERNS,
|
|
302
|
-
VULNERABILITY_PATTERNS,
|
|
303
|
-
};
|
|
@@ -1,350 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Semantic Code Search Module
|
|
3
|
-
* Embeds code chunks for natural language queries
|
|
4
|
-
*/
|
|
5
|
-
|
|
6
|
-
const fs = require("fs");
|
|
7
|
-
const path = require("path");
|
|
8
|
-
const crypto = require("crypto");
|
|
9
|
-
|
|
10
|
-
/**
|
|
11
|
-
* Simple TF-IDF vectorizer for semantic search
|
|
12
|
-
* In production, would use OpenAI embeddings or similar
|
|
13
|
-
*/
|
|
14
|
-
class SimpleVectorizer {
|
|
15
|
-
constructor() {
|
|
16
|
-
this.vocabulary = new Map();
|
|
17
|
-
this.idf = new Map();
|
|
18
|
-
this.documents = [];
|
|
19
|
-
}
|
|
20
|
-
|
|
21
|
-
/**
|
|
22
|
-
* Tokenize text into words
|
|
23
|
-
*/
|
|
24
|
-
tokenize(text) {
|
|
25
|
-
return text
|
|
26
|
-
.toLowerCase()
|
|
27
|
-
.replace(/[^\w\s]/g, " ")
|
|
28
|
-
.split(/\s+/)
|
|
29
|
-
.filter(word => word.length > 2);
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
/**
|
|
33
|
-
* Build vocabulary from documents
|
|
34
|
-
*/
|
|
35
|
-
fit(documents) {
|
|
36
|
-
this.documents = documents;
|
|
37
|
-
const docCount = documents.length;
|
|
38
|
-
const docFreq = new Map();
|
|
39
|
-
|
|
40
|
-
// Count document frequency for each term
|
|
41
|
-
for (const doc of documents) {
|
|
42
|
-
const tokens = new Set(this.tokenize(doc));
|
|
43
|
-
for (const token of tokens) {
|
|
44
|
-
docFreq.set(token, (docFreq.get(token) || 0) + 1);
|
|
45
|
-
}
|
|
46
|
-
}
|
|
47
|
-
|
|
48
|
-
// Calculate IDF
|
|
49
|
-
for (const [term, freq] of docFreq) {
|
|
50
|
-
this.idf.set(term, Math.log(docCount / freq));
|
|
51
|
-
this.vocabulary.set(term, this.vocabulary.size);
|
|
52
|
-
}
|
|
53
|
-
}
|
|
54
|
-
|
|
55
|
-
/**
|
|
56
|
-
* Transform document to TF-IDF vector
|
|
57
|
-
*/
|
|
58
|
-
transform(text) {
|
|
59
|
-
const tokens = this.tokenize(text);
|
|
60
|
-
const tf = new Map();
|
|
61
|
-
|
|
62
|
-
// Count term frequency
|
|
63
|
-
for (const token of tokens) {
|
|
64
|
-
tf.set(token, (tf.get(token) || 0) + 1);
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
// Create vector
|
|
68
|
-
const vector = new Array(this.vocabulary.size).fill(0);
|
|
69
|
-
for (const [term, count] of tf) {
|
|
70
|
-
if (this.vocabulary.has(term)) {
|
|
71
|
-
const idx = this.vocabulary.get(term);
|
|
72
|
-
vector[idx] = (count / tokens.length) * this.idf.get(term);
|
|
73
|
-
}
|
|
74
|
-
}
|
|
75
|
-
|
|
76
|
-
return vector;
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
/**
|
|
80
|
-
* Calculate cosine similarity between vectors
|
|
81
|
-
*/
|
|
82
|
-
cosineSimilarity(vec1, vec2) {
|
|
83
|
-
let dotProduct = 0;
|
|
84
|
-
let norm1 = 0;
|
|
85
|
-
let norm2 = 0;
|
|
86
|
-
|
|
87
|
-
for (let i = 0; i < vec1.length; i++) {
|
|
88
|
-
dotProduct += vec1[i] * vec2[i];
|
|
89
|
-
norm1 += vec1[i] * vec1[i];
|
|
90
|
-
norm2 += vec2[i] * vec2[i];
|
|
91
|
-
}
|
|
92
|
-
|
|
93
|
-
return dotProduct / (Math.sqrt(norm1) * Math.sqrt(norm2));
|
|
94
|
-
}
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
/**
|
|
98
|
-
* Find files recursively
|
|
99
|
-
*/
|
|
100
|
-
function findFiles(dir, extensions, maxDepth = 5, currentDepth = 0) {
|
|
101
|
-
if (currentDepth >= maxDepth || !fs.existsSync(dir)) return [];
|
|
102
|
-
|
|
103
|
-
const files = [];
|
|
104
|
-
try {
|
|
105
|
-
const entries = fs.readdirSync(dir, { withFileTypes: true });
|
|
106
|
-
for (const entry of entries) {
|
|
107
|
-
const fullPath = path.join(dir, entry.name);
|
|
108
|
-
if (entry.isDirectory() && !entry.name.startsWith(".") && entry.name !== "node_modules") {
|
|
109
|
-
files.push(...findFiles(fullPath, extensions, maxDepth, currentDepth + 1));
|
|
110
|
-
} else if (entry.isFile() && extensions.some(ext => entry.name.endsWith(ext))) {
|
|
111
|
-
files.push(fullPath);
|
|
112
|
-
}
|
|
113
|
-
}
|
|
114
|
-
} catch {}
|
|
115
|
-
return files;
|
|
116
|
-
}
|
|
117
|
-
|
|
118
|
-
/**
|
|
119
|
-
* Extract code chunks with context
|
|
120
|
-
*/
|
|
121
|
-
function extractCodeChunks(filePath, maxSize = 1000) {
|
|
122
|
-
const chunks = [];
|
|
123
|
-
try {
|
|
124
|
-
const content = fs.readFileSync(filePath, "utf-8");
|
|
125
|
-
const lines = content.split("\n");
|
|
126
|
-
|
|
127
|
-
// Extract functions, classes, and important blocks
|
|
128
|
-
let currentChunk = [];
|
|
129
|
-
let startLine = 0;
|
|
130
|
-
let inFunction = false;
|
|
131
|
-
let inClass = false;
|
|
132
|
-
let braceCount = 0;
|
|
133
|
-
|
|
134
|
-
for (let i = 0; i < lines.length; i++) {
|
|
135
|
-
const line = lines[i];
|
|
136
|
-
currentChunk.push(line);
|
|
137
|
-
|
|
138
|
-
// Detect function/class start
|
|
139
|
-
if (line.match(/^(function|class|export\s+(function|class)|const\s+\w+\s*=|async\s+function)/)) {
|
|
140
|
-
startLine = i;
|
|
141
|
-
inFunction = true;
|
|
142
|
-
braceCount = (line.match(/{/g) || []).length - (line.match(/}/g) || []).length;
|
|
143
|
-
}
|
|
144
|
-
|
|
145
|
-
// Track braces for function boundaries
|
|
146
|
-
if (inFunction) {
|
|
147
|
-
braceCount += (line.match(/{/g) || []).length - (line.match(/}/g) || []).length;
|
|
148
|
-
|
|
149
|
-
if (braceCount <= 0 || line.trim().endsWith("}") || line.trim().endsWith("});")) {
|
|
150
|
-
// End of function
|
|
151
|
-
const chunkText = currentChunk.join("\n");
|
|
152
|
-
if (chunkText.length < maxSize) {
|
|
153
|
-
chunks.push({
|
|
154
|
-
text: chunkText,
|
|
155
|
-
file: path.relative(process.cwd(), filePath).replace(/\\/g, "/"),
|
|
156
|
-
startLine: startLine + 1,
|
|
157
|
-
endLine: i + 1,
|
|
158
|
-
type: "function",
|
|
159
|
-
});
|
|
160
|
-
}
|
|
161
|
-
currentChunk = [];
|
|
162
|
-
inFunction = false;
|
|
163
|
-
braceCount = 0;
|
|
164
|
-
}
|
|
165
|
-
}
|
|
166
|
-
|
|
167
|
-
// Split large chunks
|
|
168
|
-
if (currentChunk.length > 50) {
|
|
169
|
-
const chunkText = currentChunk.join("\n");
|
|
170
|
-
if (chunkText.length < maxSize) {
|
|
171
|
-
chunks.push({
|
|
172
|
-
text: chunkText,
|
|
173
|
-
file: path.relative(process.cwd(), filePath).replace(/\\/g, "/"),
|
|
174
|
-
startLine: startLine + 1,
|
|
175
|
-
endLine: i + 1,
|
|
176
|
-
type: "block",
|
|
177
|
-
});
|
|
178
|
-
}
|
|
179
|
-
currentChunk = [];
|
|
180
|
-
startLine = i + 1;
|
|
181
|
-
}
|
|
182
|
-
}
|
|
183
|
-
|
|
184
|
-
// Add remaining chunk if significant
|
|
185
|
-
if (currentChunk.length > 5) {
|
|
186
|
-
const chunkText = currentChunk.join("\n");
|
|
187
|
-
if (chunkText.length < maxSize) {
|
|
188
|
-
chunks.push({
|
|
189
|
-
text: chunkText,
|
|
190
|
-
file: path.relative(process.cwd(), filePath).replace(/\\/g, "/"),
|
|
191
|
-
startLine: startLine + 1,
|
|
192
|
-
endLine: lines.length,
|
|
193
|
-
type: "block",
|
|
194
|
-
});
|
|
195
|
-
}
|
|
196
|
-
}
|
|
197
|
-
} catch {}
|
|
198
|
-
|
|
199
|
-
return chunks;
|
|
200
|
-
}
|
|
201
|
-
|
|
202
|
-
/**
|
|
203
|
-
* Build semantic search index
|
|
204
|
-
*/
|
|
205
|
-
function buildSearchIndex(projectPath) {
|
|
206
|
-
const files = findFiles(projectPath, [".ts", ".tsx", ".js", ".jsx"], 5);
|
|
207
|
-
const chunks = [];
|
|
208
|
-
|
|
209
|
-
// Extract code chunks
|
|
210
|
-
for (const file of files) {
|
|
211
|
-
const fileChunks = extractCodeChunks(file);
|
|
212
|
-
chunks.push(...fileChunks);
|
|
213
|
-
}
|
|
214
|
-
|
|
215
|
-
// Create vectorizer and fit
|
|
216
|
-
const vectorizer = new SimpleVectorizer();
|
|
217
|
-
const documents = chunks.map(c => c.text);
|
|
218
|
-
vectorizer.fit(documents);
|
|
219
|
-
|
|
220
|
-
// Create embeddings
|
|
221
|
-
const embeddings = chunks.map((chunk, idx) => ({
|
|
222
|
-
...chunk,
|
|
223
|
-
vector: vectorizer.transform(chunk.text),
|
|
224
|
-
id: crypto.createHash("md5").update(chunk.text).digest("hex").slice(0, 8),
|
|
225
|
-
}));
|
|
226
|
-
|
|
227
|
-
return {
|
|
228
|
-
vectorizer,
|
|
229
|
-
embeddings,
|
|
230
|
-
totalChunks: chunks.length,
|
|
231
|
-
totalFiles: files.length,
|
|
232
|
-
};
|
|
233
|
-
}
|
|
234
|
-
|
|
235
|
-
/**
|
|
236
|
-
* Search code semantically
|
|
237
|
-
*/
|
|
238
|
-
function semanticSearch(index, query, limit = 10) {
|
|
239
|
-
const queryVector = index.vectorizer.transform(query);
|
|
240
|
-
const results = [];
|
|
241
|
-
|
|
242
|
-
for (const embedding of index.embeddings) {
|
|
243
|
-
const similarity = index.vectorizer.cosineSimilarity(queryVector, embedding.vector);
|
|
244
|
-
if (similarity > 0.1) { // Threshold
|
|
245
|
-
results.push({
|
|
246
|
-
...embedding,
|
|
247
|
-
similarity,
|
|
248
|
-
});
|
|
249
|
-
}
|
|
250
|
-
}
|
|
251
|
-
|
|
252
|
-
return results
|
|
253
|
-
.sort((a, b) => b.similarity - a.similarity)
|
|
254
|
-
.slice(0, limit);
|
|
255
|
-
}
|
|
256
|
-
|
|
257
|
-
/**
|
|
258
|
-
* Save search index
|
|
259
|
-
*/
|
|
260
|
-
function saveSearchIndex(projectPath, index) {
|
|
261
|
-
const vibecheckDir = path.join(projectPath, ".vibecheck");
|
|
262
|
-
if (!fs.existsSync(vibecheckDir)) {
|
|
263
|
-
fs.mkdirSync(vibecheckDir, { recursive: true });
|
|
264
|
-
}
|
|
265
|
-
|
|
266
|
-
const indexData = {
|
|
267
|
-
version: "1.0.0",
|
|
268
|
-
created: new Date().toISOString(),
|
|
269
|
-
totalChunks: index.totalChunks,
|
|
270
|
-
totalFiles: index.totalFiles,
|
|
271
|
-
vocabulary: Array.from(index.vectorizer.vocabulary.keys()),
|
|
272
|
-
idf: Object.fromEntries(index.vectorizer.idf),
|
|
273
|
-
embeddings: index.embeddings.map(e => ({
|
|
274
|
-
id: e.id,
|
|
275
|
-
file: e.file,
|
|
276
|
-
startLine: e.startLine,
|
|
277
|
-
endLine: e.endLine,
|
|
278
|
-
type: e.type,
|
|
279
|
-
vector: e.vector,
|
|
280
|
-
})),
|
|
281
|
-
};
|
|
282
|
-
|
|
283
|
-
fs.writeFileSync(
|
|
284
|
-
path.join(vibecheckDir, "semantic-index.json"),
|
|
285
|
-
JSON.stringify(indexData, null, 2)
|
|
286
|
-
);
|
|
287
|
-
}
|
|
288
|
-
|
|
289
|
-
/**
|
|
290
|
-
* Load search index
|
|
291
|
-
*/
|
|
292
|
-
function loadSearchIndex(projectPath) {
|
|
293
|
-
const indexPath = path.join(projectPath, ".vibecheck", "semantic-index.json");
|
|
294
|
-
|
|
295
|
-
if (!fs.existsSync(indexPath)) {
|
|
296
|
-
return null;
|
|
297
|
-
}
|
|
298
|
-
|
|
299
|
-
try {
|
|
300
|
-
const data = JSON.parse(fs.readFileSync(indexPath, "utf-8"));
|
|
301
|
-
|
|
302
|
-
// Reconstruct vectorizer
|
|
303
|
-
const vectorizer = new SimpleVectorizer();
|
|
304
|
-
data.vocabulary.forEach((term, idx) => {
|
|
305
|
-
vectorizer.vocabulary.set(term, idx);
|
|
306
|
-
});
|
|
307
|
-
vectorizer.idf = new Map(Object.entries(data.idf));
|
|
308
|
-
|
|
309
|
-
return {
|
|
310
|
-
vectorizer,
|
|
311
|
-
embeddings: data.embeddings,
|
|
312
|
-
totalChunks: data.totalChunks,
|
|
313
|
-
totalFiles: data.totalFiles,
|
|
314
|
-
};
|
|
315
|
-
} catch {
|
|
316
|
-
return null;
|
|
317
|
-
}
|
|
318
|
-
}
|
|
319
|
-
|
|
320
|
-
/**
|
|
321
|
-
* Generate semantic search report
|
|
322
|
-
*/
|
|
323
|
-
function generateSearchReport(results, query) {
|
|
324
|
-
let report = `# Semantic Search Results\n\n`;
|
|
325
|
-
report += `Query: "${query}"\n`;
|
|
326
|
-
report += `Found: ${results.length} results\n\n`;
|
|
327
|
-
|
|
328
|
-
for (const result of results) {
|
|
329
|
-
report += `## ${result.file}:${result.startLine}-${result.endLine}\n`;
|
|
330
|
-
report += `**Similarity:** ${(result.similarity * 100).toFixed(1)}%\n`;
|
|
331
|
-
report += `**Type:** ${result.type}\n\n`;
|
|
332
|
-
report += `\`\`\`${path.extname(result.file).slice(1)}\n`;
|
|
333
|
-
report += result.text.split("\n").slice(0, 10).join("\n");
|
|
334
|
-
if (result.text.split("\n").length > 10) {
|
|
335
|
-
report += "\n...";
|
|
336
|
-
}
|
|
337
|
-
report += "\n\`\`\`\n\n";
|
|
338
|
-
}
|
|
339
|
-
|
|
340
|
-
return report;
|
|
341
|
-
}
|
|
342
|
-
|
|
343
|
-
module.exports = {
|
|
344
|
-
buildSearchIndex,
|
|
345
|
-
semanticSearch,
|
|
346
|
-
saveSearchIndex,
|
|
347
|
-
loadSearchIndex,
|
|
348
|
-
generateSearchReport,
|
|
349
|
-
SimpleVectorizer,
|
|
350
|
-
};
|