codesight 1.3.2 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,9 @@
1
- import { join } from "node:path";
1
+ import { join, relative } from "node:path";
2
2
  import { readFileSafe } from "../scanner.js";
3
3
  import { loadTypeScript } from "../ast/loader.js";
4
4
  import { extractDrizzleSchemaAST, extractTypeORMSchemaAST } from "../ast/extract-schema.js";
5
+ import { extractSQLAlchemyAST } from "../ast/extract-python.js";
6
+ import { extractGORMModelsStructured } from "../ast/extract-go.js";
5
7
  const AUDIT_FIELDS = new Set([
6
8
  "createdAt",
7
9
  "updatedAt",
@@ -26,6 +28,9 @@ export async function detectSchemas(files, project) {
26
28
  case "sqlalchemy":
27
29
  models.push(...(await detectSQLAlchemySchemas(files, project)));
28
30
  break;
31
+ case "gorm":
32
+ models.push(...(await detectGORMSchemas(files, project)));
33
+ break;
29
34
  }
30
35
  }
31
36
  return models;
@@ -256,8 +261,15 @@ async function detectSQLAlchemySchemas(files, project) {
256
261
  continue;
257
262
  if (!content.includes("Base") && !content.includes("DeclarativeBase") && !content.includes("Model"))
258
263
  continue;
259
- // Match class definitions
260
- const classPattern = /class\s+(\w+)\s*\([^)]*(?:Base|Model|DeclarativeBase)[^)]*\)\s*:([\s\S]*?)(?=\nclass\s|\n[^\s]|\Z)/g;
264
+ const rel = relative(project.root, file);
265
+ // Try Python AST first
266
+ const astModels = await extractSQLAlchemyAST(rel, content);
267
+ if (astModels && astModels.length > 0) {
268
+ models.push(...astModels);
269
+ continue;
270
+ }
271
+ // Fallback to regex
272
+ const classPattern = /class\s+(\w+)\s*\([^)]*(?:Base|Model|DeclarativeBase)[^)]*\)\s*:([\s\S]*?)(?=\nclass\s|\n[^\s]|$)/g;
261
273
  let match;
262
274
  while ((match = classPattern.exec(content)) !== null) {
263
275
  const name = match[1];
@@ -301,3 +313,17 @@ async function detectSQLAlchemySchemas(files, project) {
301
313
  }
302
314
  return models;
303
315
  }
316
+ // --- GORM ---
317
+ async function detectGORMSchemas(files, _project) {
318
+ const goFiles = files.filter((f) => f.endsWith(".go"));
319
+ const models = [];
320
+ for (const file of goFiles) {
321
+ const content = await readFileSafe(file);
322
+ if (!content.includes("gorm") && !content.includes("Model") && !content.includes("`json:"))
323
+ continue;
324
+ const rel = relative(_project.root, file);
325
+ const structModels = extractGORMModelsStructured(rel, content);
326
+ models.push(...structModels);
327
+ }
328
+ return models;
329
+ }
package/dist/eval.d.ts ADDED
@@ -0,0 +1,5 @@
1
+ /**
2
+ * Evaluation suite: runs codesight on fixture repos and measures
3
+ * precision, recall, and F1 against ground truth.
4
+ */
5
+ export declare function runEval(): Promise<void>;
package/dist/eval.js ADDED
@@ -0,0 +1,184 @@
1
+ /**
2
+ * Evaluation suite: runs codesight on fixture repos and measures
3
+ * precision, recall, and F1 against ground truth.
4
+ */
5
+ import { readFile, writeFile, mkdir, rm } from "node:fs/promises";
6
+ import { join, dirname } from "node:path";
7
+ import { fileURLToPath } from "node:url";
8
+ import { collectFiles, detectProject } from "./scanner.js";
9
+ import { detectRoutes } from "./detectors/routes.js";
10
+ import { detectSchemas } from "./detectors/schema.js";
11
+ import { detectComponents } from "./detectors/components.js";
12
+ import { detectConfig } from "./detectors/config.js";
13
+ import { detectMiddleware } from "./detectors/middleware.js";
14
+ function calcMetrics(detected, expected) {
15
+ let tp = 0;
16
+ let fp = 0;
17
+ let fn = 0;
18
+ for (const item of detected) {
19
+ if (expected.has(item))
20
+ tp++;
21
+ else
22
+ fp++;
23
+ }
24
+ for (const item of expected) {
25
+ if (!detected.has(item))
26
+ fn++;
27
+ }
28
+ const precision = tp + fp > 0 ? tp / (tp + fp) : 1;
29
+ const recall = tp + fn > 0 ? tp / (tp + fn) : 1;
30
+ const f1 = precision + recall > 0 ? (2 * precision * recall) / (precision + recall) : 0;
31
+ return {
32
+ precision: Math.round(precision * 1000) / 1000,
33
+ recall: Math.round(recall * 1000) / 1000,
34
+ f1: Math.round(f1 * 1000) / 1000,
35
+ truePositives: tp,
36
+ falsePositives: fp,
37
+ falseNegatives: fn,
38
+ };
39
+ }
40
+ async function createTempRepo(fixture) {
41
+ const tmpDir = join((await import("node:os")).tmpdir(), `codesight-eval-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`);
42
+ for (const [filePath, content] of Object.entries(fixture.files)) {
43
+ const fullPath = join(tmpDir, filePath);
44
+ await mkdir(dirname(fullPath), { recursive: true });
45
+ await writeFile(fullPath, content);
46
+ }
47
+ return tmpDir;
48
+ }
49
+ async function evalFixture(fixturePath) {
50
+ const repoJson = JSON.parse(await readFile(join(fixturePath, "repo.json"), "utf-8"));
51
+ const groundTruth = JSON.parse(await readFile(join(fixturePath, "ground-truth.json"), "utf-8"));
52
+ // Create temp repo from fixture
53
+ const tmpDir = await createTempRepo(repoJson);
54
+ const startTime = Date.now();
55
+ try {
56
+ // Run codesight detectors
57
+ const project = await detectProject(tmpDir);
58
+ const files = await collectFiles(tmpDir, 10);
59
+ const [routes, schemas, components, config, middleware] = await Promise.all([
60
+ detectRoutes(files, project),
61
+ detectSchemas(files, project),
62
+ detectComponents(files, project),
63
+ detectConfig(files, project),
64
+ detectMiddleware(files, project),
65
+ ]);
66
+ const runtime = Date.now() - startTime;
67
+ // Compare routes: method:path
68
+ const detectedRoutes = new Set(routes.map((r) => `${r.method}:${r.path}`));
69
+ const expectedRoutes = new Set((groundTruth.routes || []).map((r) => `${r.method}:${r.path}`));
70
+ // Compare models: name
71
+ const detectedModels = new Set(schemas.map((s) => s.name.toLowerCase()));
72
+ const expectedModels = new Set((groundTruth.models || []).map((m) => m.name.toLowerCase()));
73
+ // Compare env vars
74
+ const detectedEnvVars = new Set(config.envVars.map((e) => e.name));
75
+ const expectedEnvVars = new Set(groundTruth.envVars || []);
76
+ const result = {
77
+ name: repoJson.name,
78
+ routes: calcMetrics(detectedRoutes, expectedRoutes),
79
+ models: calcMetrics(detectedModels, expectedModels),
80
+ envVars: calcMetrics(detectedEnvVars, expectedEnvVars),
81
+ runtime,
82
+ };
83
+ // Components (if ground truth has them)
84
+ if (groundTruth.components && groundTruth.components.length > 0) {
85
+ const detectedComps = new Set(components.map((c) => c.name));
86
+ const expectedComps = new Set(groundTruth.components.map((c) => c.name));
87
+ result.components = calcMetrics(detectedComps, expectedComps);
88
+ }
89
+ // Middleware
90
+ if (groundTruth.middleware && groundTruth.middleware.length > 0) {
91
+ const detectedMw = new Set(middleware.map((m) => m.name));
92
+ const expectedMw = new Set(groundTruth.middleware);
93
+ result.middleware = calcMetrics(detectedMw, expectedMw);
94
+ }
95
+ return result;
96
+ }
97
+ finally {
98
+ // Cleanup temp dir
99
+ await rm(tmpDir, { recursive: true, force: true }).catch(() => { });
100
+ }
101
+ }
102
+ function formatPercent(n) {
103
+ return `${(n * 100).toFixed(1)}%`;
104
+ }
105
+ function printMetrics(label, m) {
106
+ console.log(` ${label.padEnd(14)} P: ${formatPercent(m.precision).padStart(6)} R: ${formatPercent(m.recall).padStart(6)} F1: ${formatPercent(m.f1).padStart(6)} (TP:${m.truePositives} FP:${m.falsePositives} FN:${m.falseNegatives})`);
107
+ }
108
+ export async function runEval() {
109
+ // Find eval fixtures
110
+ const __dirname = dirname(fileURLToPath(import.meta.url));
111
+ const evalDir = join(__dirname, "..", "eval", "fixtures");
112
+ let fixtureNames;
113
+ try {
114
+ const { readdir } = await import("node:fs/promises");
115
+ fixtureNames = await readdir(evalDir);
116
+ }
117
+ catch {
118
+ // Try from dist path
119
+ const altDir = join(__dirname, "..", "..", "eval", "fixtures");
120
+ const { readdir } = await import("node:fs/promises");
121
+ fixtureNames = await readdir(altDir);
122
+ // Override evalDir for the loop below
123
+ return runEvalFromDir(altDir, fixtureNames);
124
+ }
125
+ return runEvalFromDir(evalDir, fixtureNames);
126
+ }
127
+ async function runEvalFromDir(evalDir, fixtureNames) {
128
+ console.log(`\n codesight eval — precision/recall benchmarks\n`);
129
+ const results = [];
130
+ let totalPrecision = 0;
131
+ let totalRecall = 0;
132
+ let totalF1 = 0;
133
+ let metricCount = 0;
134
+ for (const name of fixtureNames) {
135
+ const fixturePath = join(evalDir, name);
136
+ // Check if it has repo.json
137
+ try {
138
+ await import("node:fs/promises").then((fs) => fs.stat(join(fixturePath, "repo.json")));
139
+ }
140
+ catch {
141
+ continue;
142
+ }
143
+ process.stdout.write(` ${name}...`);
144
+ const result = await evalFixture(fixturePath);
145
+ results.push(result);
146
+ console.log(` ${result.runtime}ms`);
147
+ printMetrics("Routes", result.routes);
148
+ printMetrics("Models", result.models);
149
+ printMetrics("Env vars", result.envVars);
150
+ if (result.components)
151
+ printMetrics("Components", result.components);
152
+ if (result.middleware)
153
+ printMetrics("Middleware", result.middleware);
154
+ console.log("");
155
+ // Accumulate for averages
156
+ const metrics = [result.routes, result.models, result.envVars];
157
+ if (result.components)
158
+ metrics.push(result.components);
159
+ if (result.middleware)
160
+ metrics.push(result.middleware);
161
+ for (const m of metrics) {
162
+ totalPrecision += m.precision;
163
+ totalRecall += m.recall;
164
+ totalF1 += m.f1;
165
+ metricCount++;
166
+ }
167
+ }
168
+ if (results.length === 0) {
169
+ console.log(" No fixtures found. Add fixtures to eval/fixtures/");
170
+ return;
171
+ }
172
+ // Summary
173
+ const avgP = totalPrecision / metricCount;
174
+ const avgR = totalRecall / metricCount;
175
+ const avgF1 = totalF1 / metricCount;
176
+ const totalRuntime = results.reduce((s, r) => s + r.runtime, 0);
177
+ console.log(" ──────────────────────────────────────────");
178
+ console.log(` Fixtures: ${results.length}`);
179
+ console.log(` Avg precision: ${formatPercent(avgP)}`);
180
+ console.log(` Avg recall: ${formatPercent(avgR)}`);
181
+ console.log(` Avg F1: ${formatPercent(avgF1)}`);
182
+ console.log(` Total runtime: ${totalRuntime}ms`);
183
+ console.log("");
184
+ }
package/dist/index.js CHANGED
@@ -14,7 +14,8 @@ import { calculateTokenStats } from "./detectors/tokens.js";
14
14
  import { writeOutput } from "./formatter.js";
15
15
  import { generateAIConfigs } from "./generators/ai-config.js";
16
16
  import { generateHtmlReport } from "./generators/html-report.js";
17
- const VERSION = "1.3.2";
17
+ import { loadConfig, mergeCliConfig } from "./config.js";
18
+ const VERSION = "1.5.0";
18
19
  const BRAND = "codesight";
19
20
  function printHelp() {
20
21
  console.log(`
@@ -35,9 +36,15 @@ function printHelp() {
35
36
  --benchmark Show detailed token savings breakdown
36
37
  --profile <tool> Generate optimized config (claude-code|cursor|codex|copilot|windsurf)
37
38
  --blast <file> Show blast radius for a file
39
+ --telemetry Run token telemetry (real before/after measurement)
40
+ --eval Run precision/recall benchmarks on eval fixtures
38
41
  -v, --version Show version
39
42
  -h, --help Show this help
40
43
 
44
+ Config:
45
+ Reads codesight.config.(ts|js|json) or package.json "codesight" field.
46
+ See docs for disableDetectors, customRoutePatterns, plugins, and more.
47
+
41
48
  Examples:
42
49
  npx ${BRAND} # Scan current directory
43
50
  npx ${BRAND} --init # Scan + generate AI config files
@@ -45,6 +52,8 @@ function printHelp() {
45
52
  npx ${BRAND} --watch # Watch mode, re-scan on changes
46
53
  npx ${BRAND} --mcp # Start MCP server
47
54
  npx ${BRAND} --hook # Install git pre-commit hook
55
+ npx ${BRAND} --telemetry # Measure real token savings
56
+ npx ${BRAND} --eval # Run accuracy benchmarks
48
57
  npx ${BRAND} ./my-project # Scan specific directory
49
58
  `);
50
59
  }
@@ -57,7 +66,7 @@ async function fileExists(path) {
57
66
  return false;
58
67
  }
59
68
  }
60
- async function scan(root, outputDirName, maxDepth) {
69
+ async function scan(root, outputDirName, maxDepth, userConfig = {}) {
61
70
  const outputDir = join(root, outputDirName);
62
71
  console.log(`\n ${BRAND} v${VERSION}`);
63
72
  console.log(` Scanning: ${root}\n`);
@@ -73,17 +82,39 @@ async function scan(root, outputDirName, maxDepth) {
73
82
  process.stdout.write(" Collecting files...");
74
83
  const files = await collectFiles(root, maxDepth);
75
84
  console.log(` ${files.length} files`);
76
- // Step 3: Run all detectors in parallel
85
+ // Step 3: Run all detectors in parallel (respecting disableDetectors config)
77
86
  process.stdout.write(" Analyzing...");
78
- const [rawRoutes, schemas, components, libs, config, middleware, graph] = await Promise.all([
79
- detectRoutes(files, project),
80
- detectSchemas(files, project),
81
- detectComponents(files, project),
82
- detectLibs(files, project),
83
- detectConfig(files, project),
84
- detectMiddleware(files, project),
85
- detectDependencyGraph(files, project),
87
+ const disabled = new Set(userConfig.disableDetectors || []);
88
+ const [rawRoutes, schemas, components, libs, configResult, middleware, graph] = await Promise.all([
89
+ disabled.has("routes") ? Promise.resolve([]) : detectRoutes(files, project),
90
+ disabled.has("schema") ? Promise.resolve([]) : detectSchemas(files, project),
91
+ disabled.has("components") ? Promise.resolve([]) : detectComponents(files, project),
92
+ disabled.has("libs") ? Promise.resolve([]) : detectLibs(files, project),
93
+ disabled.has("config") ? Promise.resolve({ envVars: [], configFiles: [], dependencies: {}, devDependencies: {} }) : detectConfig(files, project),
94
+ disabled.has("middleware") ? Promise.resolve([]) : detectMiddleware(files, project),
95
+ disabled.has("graph") ? Promise.resolve({ edges: [], hotFiles: [] }) : detectDependencyGraph(files, project),
86
96
  ]);
97
+ // Step 3b: Run plugin detectors
98
+ if (userConfig.plugins) {
99
+ for (const plugin of userConfig.plugins) {
100
+ if (plugin.detector) {
101
+ try {
102
+ const pluginResult = await plugin.detector(files, project);
103
+ if (pluginResult.routes)
104
+ rawRoutes.push(...pluginResult.routes);
105
+ if (pluginResult.schemas)
106
+ schemas.push(...pluginResult.schemas);
107
+ if (pluginResult.components)
108
+ components.push(...pluginResult.components);
109
+ if (pluginResult.middleware)
110
+ middleware.push(...pluginResult.middleware);
111
+ }
112
+ catch (err) {
113
+ console.warn(`\n Warning: plugin "${plugin.name}" failed: ${err.message}`);
114
+ }
115
+ }
116
+ }
117
+ }
87
118
  // Step 4: Enrich routes with contract info
88
119
  const routes = await enrichRouteContracts(rawRoutes, project);
89
120
  // Report AST vs regex detection
@@ -106,7 +137,7 @@ async function scan(root, outputDirName, maxDepth) {
106
137
  schemas,
107
138
  components,
108
139
  libs,
109
- config,
140
+ config: configResult,
110
141
  middleware,
111
142
  graph,
112
143
  tokenStats: { outputTokens: 0, estimatedExplorationTokens: 0, saved: 0, fileCount: files.length },
@@ -126,7 +157,7 @@ async function scan(root, outputDirName, maxDepth) {
126
157
  Models: ${schemas.length}
127
158
  Components: ${components.length}
128
159
  Libraries: ${libs.length}
129
- Env vars: ${config.envVars.length}
160
+ Env vars: ${configResult.envVars.length}
130
161
  Middleware: ${middleware.length}
131
162
  Import links: ${graph.edges.length}
132
163
  Hot files: ${graph.hotFiles.length}
@@ -234,6 +265,8 @@ async function main() {
234
265
  let doBenchmark = false;
235
266
  let doProfile = "";
236
267
  let doBlast = "";
268
+ let doTelemetry = false;
269
+ let doEval = false;
237
270
  for (let i = 0; i < args.length; i++) {
238
271
  const arg = args[i];
239
272
  if ((arg === "-o" || arg === "--output") && args[i + 1]) {
@@ -273,6 +306,12 @@ async function main() {
273
306
  else if (arg === "--blast" && args[i + 1]) {
274
307
  doBlast = args[++i];
275
308
  }
309
+ else if (arg === "--telemetry") {
310
+ doTelemetry = true;
311
+ }
312
+ else if (arg === "--eval") {
313
+ doEval = true;
314
+ }
276
315
  else if (!arg.startsWith("-")) {
277
316
  targetDir = resolve(arg);
278
317
  }
@@ -283,13 +322,58 @@ async function main() {
283
322
  await startMCPServer();
284
323
  return;
285
324
  }
325
+ // Eval mode (standalone, no scan needed)
326
+ if (doEval) {
327
+ const { runEval } = await import("./eval.js");
328
+ await runEval();
329
+ return;
330
+ }
286
331
  const root = resolve(targetDir);
332
+ // Load config file
333
+ const fileConfig = await loadConfig(root);
334
+ const config = mergeCliConfig(fileConfig, {
335
+ maxDepth: maxDepth !== 10 ? maxDepth : undefined,
336
+ outputDir: outputDirName !== ".codesight" ? outputDirName : undefined,
337
+ profile: doProfile || undefined,
338
+ });
339
+ // Apply config overrides
340
+ if (config.maxDepth)
341
+ maxDepth = config.maxDepth;
342
+ if (config.outputDir)
343
+ outputDirName = config.outputDir;
287
344
  // Install git hook
288
345
  if (doHook) {
289
346
  await installGitHook(root, outputDirName);
290
347
  }
291
- // Run scan
292
- const result = await scan(root, outputDirName, maxDepth);
348
+ // Run scan (passes config for disabled detectors + plugins)
349
+ let result = await scan(root, outputDirName, maxDepth, config);
350
+ // Run plugin post-processors
351
+ if (config.plugins) {
352
+ for (const plugin of config.plugins) {
353
+ if (plugin.postProcessor) {
354
+ try {
355
+ result = await plugin.postProcessor(result);
356
+ }
357
+ catch (err) {
358
+ console.warn(` Warning: plugin "${plugin.name}" post-processor failed: ${err.message}`);
359
+ }
360
+ }
361
+ }
362
+ }
363
+ // Token telemetry
364
+ if (doTelemetry) {
365
+ const { runTelemetry } = await import("./telemetry.js");
366
+ const outputDir = join(root, outputDirName);
367
+ process.stdout.write(" Running telemetry...");
368
+ const report = await runTelemetry(root, result, outputDir);
369
+ console.log(` ${outputDirName}/telemetry.md`);
370
+ console.log(`\n Telemetry Results:`);
371
+ for (const task of report.tasks) {
372
+ console.log(` ${task.name}: ${task.reduction}x reduction (${task.tokensWithout.toLocaleString()} → ${task.tokensWith.toLocaleString()} tokens)`);
373
+ }
374
+ console.log(` Average: ${report.summary.averageReduction}x | Tool calls saved: ${report.summary.totalToolCallsSaved}`);
375
+ console.log("");
376
+ }
293
377
  // JSON output
294
378
  if (jsonOutput) {
295
379
  console.log(JSON.stringify(result, null, 2));
package/dist/scanner.js CHANGED
@@ -212,11 +212,11 @@ async function detectFrameworks(root, pkg) {
212
212
  frameworks.push("django");
213
213
  // Go frameworks - check go.mod
214
214
  const goDeps = await getGoDeps(root);
215
- if (goDeps.includes("net/http"))
215
+ if (goDeps.some((d) => d.includes("net/http")))
216
216
  frameworks.push("go-net-http");
217
- if (goDeps.includes("gin-gonic/gin"))
217
+ if (goDeps.some((d) => d.includes("gin-gonic/gin")))
218
218
  frameworks.push("gin");
219
- if (goDeps.includes("gofiber/fiber"))
219
+ if (goDeps.some((d) => d.includes("gofiber/fiber")))
220
220
  frameworks.push("fiber");
221
221
  if (goDeps.some((d) => d.includes("labstack/echo")))
222
222
  frameworks.push("echo");
@@ -0,0 +1,38 @@
1
+ /**
2
+ * Token telemetry: measures real before/after token usage by simulating
3
+ * what an AI agent would do with and without codesight context.
4
+ *
5
+ * Approach: for each standard task (explain architecture, add route, review diff),
6
+ * measure the actual bytes of context that would be consumed.
7
+ *
8
+ * "Without codesight": count tokens from the files an AI would need to read
9
+ * to discover routes, schema, components, config, etc.
10
+ *
11
+ * "With codesight": count tokens from the CODESIGHT.md output.
12
+ */
13
+ import type { ScanResult } from "./types.js";
14
+ export interface TelemetryTask {
15
+ name: string;
16
+ description: string;
17
+ /** Files the AI would need to read without codesight */
18
+ filesRead: string[];
19
+ /** Tool calls the AI would make (glob, grep, read) */
20
+ toolCalls: number;
21
+ /** Tokens consumed reading those files */
22
+ tokensWithout: number;
23
+ /** Tokens consumed from codesight output */
24
+ tokensWith: number;
25
+ /** Reduction factor */
26
+ reduction: number;
27
+ }
28
+ export interface TelemetryReport {
29
+ project: string;
30
+ tasks: TelemetryTask[];
31
+ summary: {
32
+ totalTokensWithout: number;
33
+ totalTokensWith: number;
34
+ averageReduction: number;
35
+ totalToolCallsSaved: number;
36
+ };
37
+ }
38
+ export declare function runTelemetry(root: string, result: ScanResult, outputDir: string): Promise<TelemetryReport>;