@ophan/core 0.0.2 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. package/dist/community-detectors/index.d.ts +20 -0
  2. package/dist/community-detectors/index.d.ts.map +1 -0
  3. package/dist/community-detectors/index.js +45 -0
  4. package/dist/community-detectors/label-prop.d.ts +20 -0
  5. package/dist/community-detectors/label-prop.d.ts.map +1 -0
  6. package/dist/community-detectors/label-prop.js +77 -0
  7. package/dist/community-detectors/leiden.d.ts +22 -0
  8. package/dist/community-detectors/leiden.d.ts.map +1 -0
  9. package/dist/community-detectors/leiden.js +312 -0
  10. package/dist/community-detectors/louvain.d.ts +13 -0
  11. package/dist/community-detectors/louvain.d.ts.map +1 -0
  12. package/dist/community-detectors/louvain.js +29 -0
  13. package/dist/community-detectors/types.d.ts +36 -0
  14. package/dist/community-detectors/types.d.ts.map +1 -0
  15. package/dist/{parsers/__fixtures__/no-functions.js → community-detectors/types.js} +0 -2
  16. package/dist/edge-resolvers/call.d.ts +13 -0
  17. package/dist/edge-resolvers/call.d.ts.map +1 -0
  18. package/dist/edge-resolvers/call.js +40 -0
  19. package/dist/edge-resolvers/co-location.d.ts +16 -0
  20. package/dist/edge-resolvers/co-location.d.ts.map +1 -0
  21. package/dist/edge-resolvers/co-location.js +129 -0
  22. package/dist/edge-resolvers/import.d.ts +16 -0
  23. package/dist/edge-resolvers/import.d.ts.map +1 -0
  24. package/dist/edge-resolvers/import.js +118 -0
  25. package/dist/edge-resolvers/index.d.ts +9 -0
  26. package/dist/edge-resolvers/index.d.ts.map +1 -0
  27. package/dist/edge-resolvers/index.js +29 -0
  28. package/dist/edge-resolvers/jsx-ref.d.ts +13 -0
  29. package/dist/edge-resolvers/jsx-ref.d.ts.map +1 -0
  30. package/dist/edge-resolvers/jsx-ref.js +40 -0
  31. package/dist/edge-resolvers/types.d.ts +40 -0
  32. package/dist/edge-resolvers/types.d.ts.map +1 -0
  33. package/dist/edge-resolvers/types.js +2 -0
  34. package/dist/graph.d.ts +293 -0
  35. package/dist/graph.d.ts.map +1 -0
  36. package/dist/graph.js +1295 -0
  37. package/dist/index.d.ts +37 -8
  38. package/dist/index.d.ts.map +1 -1
  39. package/dist/index.js +385 -183
  40. package/dist/migrations.d.ts +25 -0
  41. package/dist/migrations.d.ts.map +1 -0
  42. package/dist/migrations.js +323 -0
  43. package/dist/module-resolvers/index.d.ts +11 -0
  44. package/dist/module-resolvers/index.d.ts.map +1 -0
  45. package/dist/module-resolvers/index.js +67 -0
  46. package/dist/module-resolvers/javascript.d.ts +18 -0
  47. package/dist/module-resolvers/javascript.d.ts.map +1 -0
  48. package/dist/module-resolvers/javascript.js +130 -0
  49. package/dist/module-resolvers/types.d.ts +18 -0
  50. package/dist/module-resolvers/types.d.ts.map +1 -0
  51. package/dist/module-resolvers/types.js +2 -0
  52. package/dist/parsers/python.d.ts.map +1 -1
  53. package/dist/parsers/python.js +38 -4
  54. package/dist/parsers/typescript.d.ts.map +1 -1
  55. package/dist/parsers/typescript.js +133 -0
  56. package/dist/practices.d.ts +28 -0
  57. package/dist/practices.d.ts.map +1 -0
  58. package/dist/practices.js +95 -0
  59. package/dist/schemas.d.ts +251 -3
  60. package/dist/schemas.d.ts.map +1 -1
  61. package/dist/schemas.js +121 -6
  62. package/dist/shared.d.ts +8 -0
  63. package/dist/shared.d.ts.map +1 -1
  64. package/dist/summarize.d.ts +165 -0
  65. package/dist/summarize.d.ts.map +1 -0
  66. package/dist/summarize.js +1067 -0
  67. package/ophan_logo.png +0 -0
  68. package/package.json +9 -2
  69. package/dist/parsers/__fixtures__/arrow-functions.d.ts +0 -5
  70. package/dist/parsers/__fixtures__/arrow-functions.d.ts.map +0 -1
  71. package/dist/parsers/__fixtures__/arrow-functions.js +0 -16
  72. package/dist/parsers/__fixtures__/class-methods.d.ts +0 -6
  73. package/dist/parsers/__fixtures__/class-methods.d.ts.map +0 -1
  74. package/dist/parsers/__fixtures__/class-methods.js +0 -12
  75. package/dist/parsers/__fixtures__/no-functions.d.ts +0 -9
  76. package/dist/parsers/__fixtures__/no-functions.d.ts.map +0 -1
package/dist/index.js CHANGED
@@ -73,11 +73,12 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
73
73
  return (mod && mod.__esModule) ? mod : { "default": mod };
74
74
  };
75
75
  Object.defineProperty(exports, "__esModule", { value: true });
76
- exports.getSupportedExtensions = exports.computeHash = void 0;
76
+ exports.buildL1Context = exports.summarizeCommunities = exports.LeidenDetector = exports.LabelPropDetector = exports.LouvainDetector = exports.listDetectors = exports.registerDetector = exports.getDetector = exports.JavaScriptModuleResolver = exports.getModuleResolverForFile = exports.getModuleResolver = exports.registerModuleResolver = exports.ImportEdgeResolver = exports.CoLocationEdgeResolver = exports.JsxRefEdgeResolver = exports.CallEdgeResolver = exports.getEdgeResolvers = exports.registerEdgeResolver = exports.DEFAULT_COMPARISONS = exports.runComparison = exports.computeComparisonMetrics = exports.rescueDissolvedNodes = exports.addTransitiveEdges = exports.buildModuleResolver = exports.storeEdges = exports.resolveEdges = exports.DEFAULT_EDGE_WEIGHTS = exports.DEFAULT_GRAPH_CONFIG = exports.loadCommunityEdges = exports.storeCommunityEdges = exports.computeCommunityEdges = exports.computeCentrality = exports.detectCommunities = exports.buildGraph = exports.loadCommunities = exports.loadEdges = exports.saveGraphConfig = exports.loadGraphConfig = exports.computeDirectoryDistance = exports.computePackage = exports.detectHierarchicalCommunities = exports.analyzeGraph = exports.MAX_RULE_LENGTH = exports.MAX_PRACTICES = exports.buildPracticesPrompt = exports.validatePractices = exports.importPractices = exports.loadPracticesFromDb = exports.getSupportedExtensions = exports.computeHash = void 0;
77
+ exports.DEFAULT_SUMMARIZE_CONFIG = exports.summarizeCC = exports.computeCCInputHash = exports.detectCrossCuttingConcerns = exports.loadAllSummaries = exports.loadSummary = exports.storeSummary = exports.computeL3InputHash = exports.computeL2InputHash = exports.computeL1InputHash = exports.summarizeL3 = exports.summarizeL2 = exports.summarizeL1 = exports.formatPackageBreakdown = exports.computePackageBreakdown = exports.buildL1RawContext = void 0;
78
+ exports.numberLines = numberLines;
77
79
  exports.ensureGitignore = ensureGitignore;
78
80
  exports.discoverFiles = discoverFiles;
79
81
  exports.initDb = initDb;
80
- exports.migrateToAnalysisTypes = migrateToAnalysisTypes;
81
82
  exports.analyzeFunctions = analyzeFunctions;
82
83
  exports.analyzeFiles = analyzeFiles;
83
84
  exports.analyzeRepository = analyzeRepository;
@@ -88,12 +89,15 @@ exports.gcAnalysis = gcAnalysis;
88
89
  exports.refreshFileIndex = refreshFileIndex;
89
90
  exports.findMissingHashes = findMissingHashes;
90
91
  exports.importAnalysis = importAnalysis;
92
+ exports.extractAllFunctions = extractAllFunctions;
93
+ exports.populateFileIndex = populateFileIndex;
91
94
  const sdk_1 = __importDefault(require("@anthropic-ai/sdk"));
92
95
  const better_sqlite3_1 = __importDefault(require("better-sqlite3"));
93
96
  const fs = __importStar(require("fs"));
94
97
  const path = __importStar(require("path"));
95
98
  const child_process_1 = require("child_process");
96
99
  const glob_1 = require("glob");
100
+ const p_limit_1 = __importDefault(require("p-limit"));
97
101
  const p_retry_1 = __importDefault(require("p-retry"));
98
102
  const parsers_1 = require("./parsers");
99
103
  // Re-export from shared so downstream consumers (CLI, etc.) can still import from @ophan/core
@@ -102,9 +106,94 @@ Object.defineProperty(exports, "computeHash", { enumerable: true, get: function
102
106
  var parsers_2 = require("./parsers");
103
107
  Object.defineProperty(exports, "getSupportedExtensions", { enumerable: true, get: function () { return parsers_2.getSupportedExtensions; } });
104
108
  __exportStar(require("./schemas"), exports);
109
+ var practices_1 = require("./practices");
110
+ Object.defineProperty(exports, "loadPracticesFromDb", { enumerable: true, get: function () { return practices_1.loadPracticesFromDb; } });
111
+ Object.defineProperty(exports, "importPractices", { enumerable: true, get: function () { return practices_1.importPractices; } });
112
+ Object.defineProperty(exports, "validatePractices", { enumerable: true, get: function () { return practices_1.validatePractices; } });
113
+ Object.defineProperty(exports, "buildPracticesPrompt", { enumerable: true, get: function () { return practices_1.buildPracticesPrompt; } });
114
+ Object.defineProperty(exports, "MAX_PRACTICES", { enumerable: true, get: function () { return practices_1.MAX_PRACTICES; } });
115
+ Object.defineProperty(exports, "MAX_RULE_LENGTH", { enumerable: true, get: function () { return practices_1.MAX_RULE_LENGTH; } });
116
+ var graph_1 = require("./graph");
117
+ Object.defineProperty(exports, "analyzeGraph", { enumerable: true, get: function () { return graph_1.analyzeGraph; } });
118
+ Object.defineProperty(exports, "detectHierarchicalCommunities", { enumerable: true, get: function () { return graph_1.detectHierarchicalCommunities; } });
119
+ Object.defineProperty(exports, "computePackage", { enumerable: true, get: function () { return graph_1.computePackage; } });
120
+ Object.defineProperty(exports, "computeDirectoryDistance", { enumerable: true, get: function () { return graph_1.computeDirectoryDistance; } });
121
+ Object.defineProperty(exports, "loadGraphConfig", { enumerable: true, get: function () { return graph_1.loadGraphConfig; } });
122
+ Object.defineProperty(exports, "saveGraphConfig", { enumerable: true, get: function () { return graph_1.saveGraphConfig; } });
123
+ Object.defineProperty(exports, "loadEdges", { enumerable: true, get: function () { return graph_1.loadEdges; } });
124
+ Object.defineProperty(exports, "loadCommunities", { enumerable: true, get: function () { return graph_1.loadCommunities; } });
125
+ Object.defineProperty(exports, "buildGraph", { enumerable: true, get: function () { return graph_1.buildGraph; } });
126
+ Object.defineProperty(exports, "detectCommunities", { enumerable: true, get: function () { return graph_1.detectCommunities; } });
127
+ Object.defineProperty(exports, "computeCentrality", { enumerable: true, get: function () { return graph_1.computeCentrality; } });
128
+ Object.defineProperty(exports, "computeCommunityEdges", { enumerable: true, get: function () { return graph_1.computeCommunityEdges; } });
129
+ Object.defineProperty(exports, "storeCommunityEdges", { enumerable: true, get: function () { return graph_1.storeCommunityEdges; } });
130
+ Object.defineProperty(exports, "loadCommunityEdges", { enumerable: true, get: function () { return graph_1.loadCommunityEdges; } });
131
+ Object.defineProperty(exports, "DEFAULT_GRAPH_CONFIG", { enumerable: true, get: function () { return graph_1.DEFAULT_GRAPH_CONFIG; } });
132
+ Object.defineProperty(exports, "DEFAULT_EDGE_WEIGHTS", { enumerable: true, get: function () { return graph_1.DEFAULT_EDGE_WEIGHTS; } });
133
+ Object.defineProperty(exports, "resolveEdges", { enumerable: true, get: function () { return graph_1.resolveEdges; } });
134
+ Object.defineProperty(exports, "storeEdges", { enumerable: true, get: function () { return graph_1.storeEdges; } });
135
+ Object.defineProperty(exports, "buildModuleResolver", { enumerable: true, get: function () { return graph_1.buildModuleResolver; } });
136
+ Object.defineProperty(exports, "addTransitiveEdges", { enumerable: true, get: function () { return graph_1.addTransitiveEdges; } });
137
+ Object.defineProperty(exports, "rescueDissolvedNodes", { enumerable: true, get: function () { return graph_1.rescueDissolvedNodes; } });
138
+ Object.defineProperty(exports, "computeComparisonMetrics", { enumerable: true, get: function () { return graph_1.computeComparisonMetrics; } });
139
+ Object.defineProperty(exports, "runComparison", { enumerable: true, get: function () { return graph_1.runComparison; } });
140
+ Object.defineProperty(exports, "DEFAULT_COMPARISONS", { enumerable: true, get: function () { return graph_1.DEFAULT_COMPARISONS; } });
141
+ var edge_resolvers_1 = require("./edge-resolvers");
142
+ Object.defineProperty(exports, "registerEdgeResolver", { enumerable: true, get: function () { return edge_resolvers_1.registerEdgeResolver; } });
143
+ Object.defineProperty(exports, "getEdgeResolvers", { enumerable: true, get: function () { return edge_resolvers_1.getEdgeResolvers; } });
144
+ Object.defineProperty(exports, "CallEdgeResolver", { enumerable: true, get: function () { return edge_resolvers_1.CallEdgeResolver; } });
145
+ Object.defineProperty(exports, "JsxRefEdgeResolver", { enumerable: true, get: function () { return edge_resolvers_1.JsxRefEdgeResolver; } });
146
+ Object.defineProperty(exports, "CoLocationEdgeResolver", { enumerable: true, get: function () { return edge_resolvers_1.CoLocationEdgeResolver; } });
147
+ Object.defineProperty(exports, "ImportEdgeResolver", { enumerable: true, get: function () { return edge_resolvers_1.ImportEdgeResolver; } });
148
+ var module_resolvers_1 = require("./module-resolvers");
149
+ Object.defineProperty(exports, "registerModuleResolver", { enumerable: true, get: function () { return module_resolvers_1.registerModuleResolver; } });
150
+ Object.defineProperty(exports, "getModuleResolver", { enumerable: true, get: function () { return module_resolvers_1.getModuleResolver; } });
151
+ Object.defineProperty(exports, "getModuleResolverForFile", { enumerable: true, get: function () { return module_resolvers_1.getModuleResolverForFile; } });
152
+ Object.defineProperty(exports, "JavaScriptModuleResolver", { enumerable: true, get: function () { return module_resolvers_1.JavaScriptModuleResolver; } });
153
+ var community_detectors_1 = require("./community-detectors");
154
+ Object.defineProperty(exports, "getDetector", { enumerable: true, get: function () { return community_detectors_1.getDetector; } });
155
+ Object.defineProperty(exports, "registerDetector", { enumerable: true, get: function () { return community_detectors_1.registerDetector; } });
156
+ Object.defineProperty(exports, "listDetectors", { enumerable: true, get: function () { return community_detectors_1.listDetectors; } });
157
+ Object.defineProperty(exports, "LouvainDetector", { enumerable: true, get: function () { return community_detectors_1.LouvainDetector; } });
158
+ Object.defineProperty(exports, "LabelPropDetector", { enumerable: true, get: function () { return community_detectors_1.LabelPropDetector; } });
159
+ Object.defineProperty(exports, "LeidenDetector", { enumerable: true, get: function () { return community_detectors_1.LeidenDetector; } });
160
+ var summarize_1 = require("./summarize");
161
+ Object.defineProperty(exports, "summarizeCommunities", { enumerable: true, get: function () { return summarize_1.summarizeCommunities; } });
162
+ Object.defineProperty(exports, "buildL1Context", { enumerable: true, get: function () { return summarize_1.buildL1Context; } });
163
+ Object.defineProperty(exports, "buildL1RawContext", { enumerable: true, get: function () { return summarize_1.buildL1RawContext; } });
164
+ Object.defineProperty(exports, "computePackageBreakdown", { enumerable: true, get: function () { return summarize_1.computePackageBreakdown; } });
165
+ Object.defineProperty(exports, "formatPackageBreakdown", { enumerable: true, get: function () { return summarize_1.formatPackageBreakdown; } });
166
+ Object.defineProperty(exports, "summarizeL1", { enumerable: true, get: function () { return summarize_1.summarizeL1; } });
167
+ Object.defineProperty(exports, "summarizeL2", { enumerable: true, get: function () { return summarize_1.summarizeL2; } });
168
+ Object.defineProperty(exports, "summarizeL3", { enumerable: true, get: function () { return summarize_1.summarizeL3; } });
169
+ Object.defineProperty(exports, "computeL1InputHash", { enumerable: true, get: function () { return summarize_1.computeL1InputHash; } });
170
+ Object.defineProperty(exports, "computeL2InputHash", { enumerable: true, get: function () { return summarize_1.computeL2InputHash; } });
171
+ Object.defineProperty(exports, "computeL3InputHash", { enumerable: true, get: function () { return summarize_1.computeL3InputHash; } });
172
+ Object.defineProperty(exports, "storeSummary", { enumerable: true, get: function () { return summarize_1.storeSummary; } });
173
+ Object.defineProperty(exports, "loadSummary", { enumerable: true, get: function () { return summarize_1.loadSummary; } });
174
+ Object.defineProperty(exports, "loadAllSummaries", { enumerable: true, get: function () { return summarize_1.loadAllSummaries; } });
175
+ Object.defineProperty(exports, "detectCrossCuttingConcerns", { enumerable: true, get: function () { return summarize_1.detectCrossCuttingConcerns; } });
176
+ Object.defineProperty(exports, "computeCCInputHash", { enumerable: true, get: function () { return summarize_1.computeCCInputHash; } });
177
+ Object.defineProperty(exports, "summarizeCC", { enumerable: true, get: function () { return summarize_1.summarizeCC; } });
178
+ Object.defineProperty(exports, "DEFAULT_SUMMARIZE_CONFIG", { enumerable: true, get: function () { return summarize_1.DEFAULT_SUMMARIZE_CONFIG; } });
105
179
  const schemas_1 = require("./schemas");
180
+ const practices_2 = require("./practices");
181
+ const migrations_1 = require("./migrations");
106
182
  /** Skip files larger than this — catches minified bundles, generated code, etc. */
107
183
  const MAX_FILE_SIZE_BYTES = 50 * 1024; // 50 KB
184
+ /**
185
+ * Pre-process source code with visible line numbers for the Claude prompt.
186
+ * Claude reads these labels to report issue locations accurately (instead of counting lines).
187
+ * The numbered output is ONLY used in the prompt — content hashes are computed from
188
+ * the original unnumbered source.
189
+ */
190
+ function numberLines(sourceCode) {
191
+ const lines = sourceCode.split("\n");
192
+ const pad = String(lines.length).length;
193
+ return lines
194
+ .map((line, i) => `${String(i + 1).padStart(pad)} | ${line}`)
195
+ .join("\n");
196
+ }
108
197
  /**
109
198
  * Ensure .ophan/ is in .gitignore. Only acts in git repos.
110
199
  * Creates .gitignore if the repo has .git/ but no .gitignore.
@@ -125,6 +214,7 @@ const HARDCODED_IGNORES = [
125
214
  "**/node_modules/**", "**/.ophan/**", "**/dist/**",
126
215
  "**/__pycache__/**", "**/.venv/**", "**/venv/**", "**/env/**",
127
216
  "**/.tox/**", "**/.eggs/**", "**/build/**",
217
+ "**/.output/**", "**/.next/**", "**/.nuxt/**", "**/.turbo/**", "**/.cache/**",
128
218
  ];
129
219
  /**
130
220
  * Discover source files using git (respects .gitignore) with glob fallback.
@@ -142,7 +232,7 @@ async function discoverFiles(rootPath) {
142
232
  .filter((f) => extSet.has(path.extname(f).toLowerCase()))
143
233
  .map((f) => path.resolve(rootPath, f));
144
234
  // Still apply hardcoded ignores as safety net (e.g. checked-in node_modules)
145
- const ignoreSegments = ["node_modules", ".ophan", "__pycache__", ".venv", "venv", ".tox", ".eggs"];
235
+ const ignoreSegments = ["node_modules", ".ophan", "__pycache__", ".venv", "venv", ".tox", ".eggs", ".output", ".next", ".nuxt", ".turbo", ".cache"];
146
236
  return files.filter((f) => !ignoreSegments.some((seg) => f.includes(`/${seg}/`) || f.includes(`\\${seg}\\`)));
147
237
  }
148
238
  catch {
@@ -158,43 +248,55 @@ async function discoverFiles(rootPath) {
158
248
  // ============ DATABASE ============
159
249
  function initDb(dbPath) {
160
250
  const db = new better_sqlite3_1.default(dbPath);
161
- // Check if we need the analysis_type migration
162
- const columns = db.prepare("PRAGMA table_info(function_analysis)").all();
163
- const hasAnalysisType = columns.some((c) => c.name === "analysis_type");
164
- if (columns.length > 0 && !hasAnalysisType) {
165
- // Existing DB without analysis_type — run migration
166
- migrateToAnalysisTypes(db);
167
- }
168
- else if (columns.length === 0) {
169
- // Fresh DB — create with new schema directly
251
+ db.pragma("journal_mode = WAL");
252
+ // Fresh DB: create current schema directly, mark all migrations as applied.
253
+ // Existing DB: run any pending migrations to bring schema up to date.
254
+ const tables = db
255
+ .prepare("SELECT name FROM sqlite_master WHERE type='table'")
256
+ .all();
257
+ if (tables.length === 0) {
170
258
  createFreshSchema(db);
259
+ (0, migrations_1.bootstrapMigrations)(db, migrations_1.CORE_MIGRATIONS, "core:bootstrap");
260
+ }
261
+ else {
262
+ (0, migrations_1.runMigrations)(db, migrations_1.CORE_MIGRATIONS, "core");
171
263
  }
172
- // else: already migrated, no action needed
173
- // Ensure supporting tables exist
264
+ return db;
265
+ }
266
+ /**
267
+ * Create the full current schema for a fresh database.
268
+ * Includes all tables, indexes, and the migration tracking table.
269
+ */
270
+ function createFreshSchema(db) {
174
271
  db.exec(`
175
- CREATE TABLE IF NOT EXISTS file_functions (
272
+ CREATE TABLE function_analysis (
273
+ content_hash TEXT NOT NULL,
274
+ analysis_type TEXT NOT NULL,
275
+ analysis JSON NOT NULL,
276
+ model_version TEXT NOT NULL,
277
+ schema_version INTEGER NOT NULL DEFAULT 1,
278
+ created_at INTEGER NOT NULL,
279
+ last_seen_at INTEGER NOT NULL,
280
+ language TEXT NOT NULL DEFAULT 'typescript',
281
+ entity_type TEXT NOT NULL DEFAULT 'function',
282
+ synced_at INTEGER,
283
+ PRIMARY KEY (content_hash, analysis_type)
284
+ )
285
+ `);
286
+ db.exec(`
287
+ CREATE TABLE file_functions (
176
288
  file_path TEXT NOT NULL,
177
289
  function_name TEXT NOT NULL,
178
290
  content_hash TEXT NOT NULL,
179
291
  file_mtime INTEGER NOT NULL,
180
292
  language TEXT NOT NULL DEFAULT 'typescript',
181
- entity_type TEXT NOT NULL DEFAULT 'function'
293
+ entity_type TEXT NOT NULL DEFAULT 'function',
294
+ start_line INTEGER NOT NULL DEFAULT 0,
295
+ PRIMARY KEY (file_path, function_name)
182
296
  )
183
297
  `);
184
- // Migration: add columns to file_functions for existing databases
185
- const ffColumns = db.prepare("PRAGMA table_info(file_functions)").all();
186
- if (!ffColumns.some((c) => c.name === "language")) {
187
- try {
188
- db.exec("ALTER TABLE file_functions ADD COLUMN language TEXT NOT NULL DEFAULT 'typescript'");
189
- }
190
- catch (_) { }
191
- try {
192
- db.exec("ALTER TABLE file_functions ADD COLUMN entity_type TEXT NOT NULL DEFAULT 'function'");
193
- }
194
- catch (_) { }
195
- }
196
298
  db.exec(`
197
- CREATE TABLE IF NOT EXISTS function_gc (
299
+ CREATE TABLE function_gc (
198
300
  content_hash TEXT NOT NULL,
199
301
  analysis_type TEXT,
200
302
  gc_at INTEGER NOT NULL,
@@ -202,132 +304,88 @@ function initDb(dbPath) {
202
304
  PRIMARY KEY (content_hash, analysis_type)
203
305
  )
204
306
  `);
205
- // Migration: add analysis_type to function_gc for existing databases
206
- const gcColumns = db.prepare("PRAGMA table_info(function_gc)").all();
207
- if (!gcColumns.some((c) => c.name === "analysis_type")) {
208
- try {
209
- db.exec("ALTER TABLE function_gc ADD COLUMN analysis_type TEXT");
210
- }
211
- catch (_) { }
212
- // Rebuild PK by recreating table
213
- db.exec(`
214
- CREATE TABLE IF NOT EXISTS function_gc_v2 (
215
- content_hash TEXT NOT NULL,
216
- analysis_type TEXT,
217
- gc_at INTEGER NOT NULL,
218
- synced_at INTEGER,
219
- PRIMARY KEY (content_hash, analysis_type)
220
- )
221
- `);
222
- db.exec("INSERT OR IGNORE INTO function_gc_v2 SELECT content_hash, NULL, gc_at, synced_at FROM function_gc");
223
- db.exec("DROP TABLE function_gc");
224
- db.exec("ALTER TABLE function_gc_v2 RENAME TO function_gc");
225
- }
226
307
  db.exec(`
227
- CREATE TABLE IF NOT EXISTS sync_meta (
308
+ CREATE TABLE sync_meta (
228
309
  key TEXT PRIMARY KEY,
229
310
  value TEXT NOT NULL
230
311
  )
231
312
  `);
232
- db.exec("CREATE INDEX IF NOT EXISTS idx_file_functions_path ON file_functions(file_path)");
233
- db.exec("CREATE INDEX IF NOT EXISTS idx_file_functions_hash ON file_functions(content_hash)");
234
- db.exec("CREATE INDEX IF NOT EXISTS idx_fa_hash ON function_analysis(content_hash)");
235
- db.exec("CREATE INDEX IF NOT EXISTS idx_fa_type ON function_analysis(analysis_type)");
236
- return db;
237
- }
238
- function createFreshSchema(db) {
239
313
  db.exec(`
240
- CREATE TABLE IF NOT EXISTS function_analysis (
314
+ CREATE TABLE practices (
315
+ practice_id TEXT PRIMARY KEY,
316
+ rule TEXT NOT NULL,
317
+ severity TEXT NOT NULL DEFAULT 'warning'
318
+ )
319
+ `);
320
+ // Graph tables
321
+ db.exec(`
322
+ CREATE TABLE function_edges (
323
+ source_hash TEXT NOT NULL,
324
+ target_hash TEXT NOT NULL,
325
+ edge_type TEXT NOT NULL,
326
+ weight REAL NOT NULL DEFAULT 1.0,
327
+ PRIMARY KEY (source_hash, target_hash, edge_type)
328
+ )
329
+ `);
330
+ db.exec(`
331
+ CREATE TABLE communities (
241
332
  content_hash TEXT NOT NULL,
242
- analysis_type TEXT NOT NULL,
243
- analysis JSON NOT NULL,
244
- model_version TEXT NOT NULL,
245
- schema_version INTEGER NOT NULL DEFAULT 1,
333
+ level INTEGER NOT NULL,
334
+ community_id TEXT NOT NULL,
335
+ algorithm TEXT NOT NULL DEFAULT 'louvain',
336
+ PRIMARY KEY (content_hash, level, algorithm)
337
+ )
338
+ `);
339
+ db.exec(`
340
+ CREATE TABLE community_summaries (
341
+ community_id TEXT NOT NULL,
342
+ level INTEGER NOT NULL,
343
+ algorithm TEXT NOT NULL DEFAULT 'louvain',
344
+ input_hash TEXT NOT NULL,
345
+ summary JSON NOT NULL,
346
+ model_version TEXT,
246
347
  created_at INTEGER NOT NULL,
247
- last_seen_at INTEGER NOT NULL,
248
- language TEXT NOT NULL DEFAULT 'typescript',
249
- entity_type TEXT NOT NULL DEFAULT 'function',
250
- synced_at INTEGER,
251
- PRIMARY KEY (content_hash, analysis_type)
348
+ PRIMARY KEY (community_id, level, algorithm)
252
349
  )
253
350
  `);
254
- }
255
- /**
256
- * Migrate from single-blob function_analysis to split analysis types.
257
- * Creates v2 table, splits each existing row into 'documentation' + 'security' rows,
258
- * drops old table, renames. Sets synced_at = NULL to force full re-sync.
259
- */
260
- function migrateToAnalysisTypes(db) {
261
- db.exec("BEGIN TRANSACTION");
262
- try {
263
- db.exec(`
264
- CREATE TABLE function_analysis_v2 (
265
- content_hash TEXT NOT NULL,
266
- analysis_type TEXT NOT NULL,
267
- analysis JSON NOT NULL,
268
- model_version TEXT NOT NULL,
269
- schema_version INTEGER NOT NULL DEFAULT 1,
270
- created_at INTEGER NOT NULL,
271
- last_seen_at INTEGER NOT NULL,
272
- language TEXT NOT NULL DEFAULT 'typescript',
273
- entity_type TEXT NOT NULL DEFAULT 'function',
274
- synced_at INTEGER,
275
- PRIMARY KEY (content_hash, analysis_type)
276
- )
277
- `);
278
- // Split documentation fields
279
- db.exec(`
280
- INSERT OR IGNORE INTO function_analysis_v2
281
- (content_hash, analysis_type, analysis, model_version, schema_version, created_at, last_seen_at, language, entity_type, synced_at)
282
- SELECT
283
- content_hash,
284
- 'documentation',
285
- json_object(
286
- 'description', json_extract(analysis, '$.description'),
287
- 'params', json_extract(analysis, '$.params'),
288
- 'returns', json_extract(analysis, '$.returns')
289
- ),
290
- model_version,
291
- 1,
292
- created_at,
293
- last_seen_at,
294
- COALESCE(language, 'typescript'),
295
- COALESCE(entity_type, 'function'),
296
- NULL
297
- FROM function_analysis
298
- `);
299
- // Split security fields
300
- db.exec(`
301
- INSERT OR IGNORE INTO function_analysis_v2
302
- (content_hash, analysis_type, analysis, model_version, schema_version, created_at, last_seen_at, language, entity_type, synced_at)
303
- SELECT
304
- content_hash,
305
- 'security',
306
- json_object(
307
- 'dataTags', json_extract(analysis, '$.dataTags'),
308
- 'securityFlags', json_extract(analysis, '$.securityFlags')
309
- ),
310
- model_version,
311
- 1,
312
- created_at,
313
- last_seen_at,
314
- COALESCE(language, 'typescript'),
315
- COALESCE(entity_type, 'function'),
316
- NULL
317
- FROM function_analysis
318
- `);
319
- db.exec("DROP TABLE function_analysis");
320
- db.exec("ALTER TABLE function_analysis_v2 RENAME TO function_analysis");
321
- db.exec("COMMIT");
322
- }
323
- catch (e) {
324
- db.exec("ROLLBACK");
325
- throw e;
326
- }
351
+ db.exec(`
352
+ CREATE TABLE graph_config (
353
+ key TEXT PRIMARY KEY,
354
+ value TEXT NOT NULL
355
+ )
356
+ `);
357
+ db.exec(`
358
+ CREATE TABLE community_edges (
359
+ source_community TEXT NOT NULL,
360
+ target_community TEXT NOT NULL,
361
+ algorithm TEXT NOT NULL DEFAULT 'louvain',
362
+ weight REAL NOT NULL DEFAULT 0,
363
+ edge_count INTEGER NOT NULL DEFAULT 0,
364
+ PRIMARY KEY (source_community, target_community, algorithm)
365
+ )
366
+ `);
367
+ db.exec(`
368
+ CREATE TABLE community_signatures (
369
+ community_id TEXT NOT NULL,
370
+ algorithm TEXT NOT NULL DEFAULT 'louvain',
371
+ signatures JSON NOT NULL,
372
+ PRIMARY KEY (community_id, algorithm)
373
+ )
374
+ `);
375
+ // Indexes
376
+ db.exec("CREATE INDEX idx_file_functions_path ON file_functions(file_path)");
377
+ db.exec("CREATE INDEX idx_file_functions_hash ON file_functions(content_hash)");
378
+ db.exec("CREATE INDEX idx_fa_hash ON function_analysis(content_hash)");
379
+ db.exec("CREATE INDEX idx_fa_type ON function_analysis(analysis_type)");
380
+ db.exec("CREATE INDEX idx_edges_source ON function_edges(source_hash)");
381
+ db.exec("CREATE INDEX idx_edges_target ON function_edges(target_hash)");
382
+ db.exec("CREATE INDEX idx_communities_id ON communities(community_id)");
383
+ db.exec("CREATE INDEX idx_ce_source ON community_edges(source_community)");
384
+ db.exec("CREATE INDEX idx_ce_target ON community_edges(target_community)");
327
385
  }
328
386
  // ============ LLM ANALYSIS ============
329
387
  const anthropic = new sdk_1.default();
330
- async function analyzeFunctions(functions) {
388
+ async function analyzeFunctions(functions, practices) {
331
389
  if (functions.length === 0)
332
390
  return [];
333
391
  // Use the language from the first function in the batch (batches are always same-file)
@@ -341,6 +399,7 @@ async function analyzeFunctions(functions) {
341
399
  };
342
400
  const securityHints = securityExamples[lang] || securityExamples.typescript;
343
401
  const prompt = `Analyze these ${langLabel} functions. Return JSON array with one object per function.
402
+ Source lines are numbered for reference — when reporting line numbers, read the label at the start of the line, don't count.
344
403
 
345
404
  Each object must have:
346
405
  - name: string (function name, must match input)
@@ -348,17 +407,34 @@ Each object must have:
348
407
  - params: array of { name, type, description }
349
408
  - returns: { type, description }
350
409
  - dataTags: array from [user_input, pii, credentials, database, external_api, file_system, config, internal]
351
- - securityFlags: array of concerns like [${securityHints}] or empty
352
-
410
+ - securityFlags: array of flag names found (e.g. [${securityHints}]) or empty
411
+ - issues: array of detailed findings (empty if no security concerns), each with:
412
+ - flag: the security concern identifier (must also appear in securityFlags)
413
+ - title: short one-line title
414
+ - description: what's wrong (1-2 sentences)
415
+ - explanation: why it's a problem and how to fix (2-4 sentences)
416
+ - line: the line number shown at the start of the problematic line (read the number, don't count)
417
+ - startText: the first ~30 characters of the problematic code on that line
418
+ - endText: the last ~20 characters of the problematic code on that line
419
+ - severity: "error" | "warning" | "info"
420
+ - confidence: "high" | "medium" | "low"
421
+ high = the vulnerability is clearly exploitable given the visible code
422
+ (e.g., user input directly concatenated into SQL, dangerouslySetInnerHTML with user content)
423
+ medium = the pattern is risky but exploitability depends on how callers
424
+ use this function (e.g., file path parameter without validation)
425
+ low = the pattern matches a known vulnerability signature but is common
426
+ safe practice or requires unusual conditions to exploit
427
+ (e.g., prop spreading in React, Object.assign with internal data)
428
+ ${practices && practices.length > 0 ? (0, practices_2.buildPracticesPrompt)(practices) : ""}
353
429
  Functions:
354
430
  ${functions
355
- .map((fn) => `### ${fn.name}\n\`\`\`${lang}\n${fn.sourceCode}\n\`\`\``)
431
+ .map((fn) => `### ${fn.name}\n\`\`\`${lang}\n${numberLines(fn.sourceCode)}\n\`\`\``)
356
432
  .join("\n\n")}
357
433
 
358
434
  CRITICAL: Return ONLY the raw JSON array. No markdown, no code fences, no \`\`\`json blocks, no explanation. Just the [ ... ] array directly.`;
359
435
  const response = await (0, p_retry_1.default)(() => anthropic.messages.create({
360
436
  model: "claude-sonnet-4-20250514",
361
- max_tokens: 4096,
437
+ max_tokens: 8192,
362
438
  messages: [{ role: "user", content: prompt }],
363
439
  }), {
364
440
  retries: 4,
@@ -384,6 +460,8 @@ CRITICAL: Return ONLY the raw JSON array. No markdown, no code fences, no \`\`\`
384
460
  returns: validated.returns,
385
461
  dataTags: validated.dataTags,
386
462
  securityFlags: validated.securityFlags,
463
+ issues: validated.issues,
464
+ practiceViolations: validated.practiceViolations,
387
465
  };
388
466
  });
389
467
  }
@@ -396,6 +474,8 @@ CRITICAL: Return ONLY the raw JSON array. No markdown, no code fences, no \`\`\`
396
474
  returns: { type: "unknown", description: "" },
397
475
  dataTags: [],
398
476
  securityFlags: [],
477
+ issues: [],
478
+ practiceViolations: [],
399
479
  }));
400
480
  }
401
481
  }
@@ -405,7 +485,7 @@ CRITICAL: Return ONLY the raw JSON array. No markdown, no code fences, no \`\`\`
405
485
  * The caller is responsible for DB lifecycle (open/close).
406
486
  */
407
487
  async function analyzeFiles(db, rootPath, files, options = {}) {
408
- const { pullFn, onProgress } = options;
488
+ const { pullFn, onProgress, onAnalysisProgress } = options;
409
489
  const now = Math.floor(Date.now() / 1000);
410
490
  let totalAnalyzed = 0;
411
491
  let totalSkippedSize = 0;
@@ -413,23 +493,36 @@ async function analyzeFiles(db, rootPath, files, options = {}) {
413
493
  let totalPulled = 0;
414
494
  // Prepared statements
415
495
  const getFileMtime = db.prepare("SELECT file_mtime FROM file_functions WHERE file_path = ? LIMIT 1");
416
- // Check for documentation type if it exists, security exists too (always written together)
417
- const checkAnalysis = db.prepare("SELECT 1 FROM function_analysis WHERE content_hash = ? AND analysis_type = 'documentation'");
496
+ // Check for analysis at current schema versions.
497
+ // If a row exists but with an older schema_version, it's treated as a cache miss
498
+ // so the function gets re-analyzed with the current schema.
499
+ const minSchemaVersion = Math.min(schemas_1.SCHEMA_VERSIONS.documentation, schemas_1.SCHEMA_VERSIONS.security);
500
+ const checkAnalysis = db.prepare(`SELECT 1 FROM function_analysis
501
+ WHERE content_hash = ? AND analysis_type = 'documentation'
502
+ AND schema_version >= ?`);
503
+ const getFileHashes = db.prepare("SELECT content_hash FROM file_functions WHERE file_path = ?");
504
+ // INSERT OR REPLACE: overwrites stale rows when schema_version bumps trigger re-analysis.
505
+ // Previously INSERT OR IGNORE, but that would keep old-version rows forever.
418
506
  const insertDocAnalysis = db.prepare(`
419
- INSERT OR IGNORE INTO function_analysis
507
+ INSERT OR REPLACE INTO function_analysis
420
508
  (content_hash, analysis_type, analysis, model_version, schema_version, created_at, last_seen_at, language, entity_type)
421
509
  VALUES (?, 'documentation', ?, ?, ?, ?, ?, ?, ?)
422
510
  `);
423
511
  const insertSecAnalysis = db.prepare(`
424
- INSERT OR IGNORE INTO function_analysis
512
+ INSERT OR REPLACE INTO function_analysis
425
513
  (content_hash, analysis_type, analysis, model_version, schema_version, created_at, last_seen_at, language, entity_type)
426
514
  VALUES (?, 'security', ?, ?, ?, ?, ?, ?, ?)
515
+ `);
516
+ const insertPracAnalysis = db.prepare(`
517
+ INSERT OR REPLACE INTO function_analysis
518
+ (content_hash, analysis_type, analysis, model_version, schema_version, created_at, last_seen_at, language, entity_type)
519
+ VALUES (?, 'practices', ?, ?, ?, ?, ?, ?, ?)
427
520
  `);
428
521
  const deleteFileEntries = db.prepare("DELETE FROM file_functions WHERE file_path = ?");
429
522
  const insertFileFunction = db.prepare(`
430
- INSERT INTO file_functions
431
- (file_path, function_name, content_hash, file_mtime, language, entity_type)
432
- VALUES (?, ?, ?, ?, ?, ?)
523
+ INSERT OR REPLACE INTO file_functions
524
+ (file_path, function_name, content_hash, file_mtime, language, entity_type, start_line)
525
+ VALUES (?, ?, ?, ?, ?, ?, ?)
433
526
  `);
434
527
  const pendingFiles = [];
435
528
  const allMissingHashes = new Set();
@@ -445,7 +538,14 @@ async function analyzeFiles(db, rootPath, files, options = {}) {
445
538
  const currentMtime = Math.floor(stat.mtimeMs);
446
539
  const storedRow = getFileMtime.get(file);
447
540
  if (storedRow && storedRow.file_mtime === currentMtime) {
448
- continue;
541
+ // Mtime unchanged — check if all functions already have analysis.
542
+ // If any lack analysis (e.g. init ran but analyze never did), re-process.
543
+ const fileHashes = getFileHashes.all(file);
544
+ const allAnalyzed = fileHashes.length > 0 && fileHashes.every((row) => checkAnalysis.get(row.content_hash, minSchemaVersion));
545
+ if (allAnalyzed) {
546
+ totalSkipped += fileHashes.length;
547
+ continue;
548
+ }
449
549
  }
450
550
  const parser = (0, parsers_1.getParserForFile)(file);
451
551
  if (!parser)
@@ -457,7 +557,7 @@ async function analyzeFiles(db, rootPath, files, options = {}) {
457
557
  }
458
558
  const needsAnalysis = [];
459
559
  for (const fn of functions) {
460
- if (checkAnalysis.get(fn.contentHash)) {
560
+ if (checkAnalysis.get(fn.contentHash, minSchemaVersion)) {
461
561
  totalSkipped++;
462
562
  }
463
563
  else {
@@ -469,43 +569,88 @@ async function analyzeFiles(db, rootPath, files, options = {}) {
469
569
  }
470
570
  // Phase 2: Pull from cloud to avoid redundant Claude calls
471
571
  if (pullFn && allMissingHashes.size > 0) {
472
- await pullFn([...allMissingHashes]);
572
+ try {
573
+ await pullFn([...allMissingHashes]);
574
+ }
575
+ catch {
576
+ // Pull is a non-critical optimization — continue with local analysis
577
+ }
473
578
  }
474
- // Phase 3: Run Claude analysis on hashes still missing, update file_functions
579
+ // Phase 3: Re-check after pull, compute totals, run Claude analysis
475
580
  const modelVersion = "claude-sonnet-4-20250514";
476
- for (const pending of pendingFiles) {
477
- // Re-check after pull — some hashes may now exist locally
581
+ const phase3Files = pendingFiles.map((pending) => {
478
582
  const stillNeeds = pullFn
479
- ? pending.needsAnalysis.filter((fn) => !checkAnalysis.get(fn.contentHash))
583
+ ? pending.needsAnalysis.filter((fn) => !checkAnalysis.get(fn.contentHash, minSchemaVersion))
480
584
  : pending.needsAnalysis;
481
585
  const pulled = pending.needsAnalysis.length - stillNeeds.length;
482
586
  totalPulled += pulled;
483
587
  totalSkipped += pulled;
484
- for (let j = 0; j < stillNeeds.length; j += 10) {
485
- const batch = stillNeeds.slice(j, j + 10);
486
- const analyzed = await analyzeFunctions(batch);
487
- for (const fn of analyzed) {
488
- insertDocAnalysis.run(fn.contentHash, JSON.stringify({
489
- description: fn.description,
490
- params: fn.params,
491
- returns: fn.returns,
492
- }), modelVersion, schemas_1.SCHEMA_VERSIONS.documentation, now, now, fn.language, fn.entityType);
493
- insertSecAnalysis.run(fn.contentHash, JSON.stringify({
494
- dataTags: fn.dataTags,
495
- securityFlags: fn.securityFlags,
496
- }), modelVersion, schemas_1.SCHEMA_VERSIONS.security, now, now, fn.language, fn.entityType);
588
+ return { ...pending, stillNeeds };
589
+ });
590
+ // Flatten and dedupe all functions needing Claude analysis
591
+ const allStillNeeds = [];
592
+ const seenHashes = new Set();
593
+ for (const pending of phase3Files) {
594
+ for (const fn of pending.stillNeeds) {
595
+ if (!seenHashes.has(fn.contentHash)) {
596
+ seenHashes.add(fn.contentHash);
597
+ allStillNeeds.push(fn);
497
598
  }
498
- totalAnalyzed += analyzed.length;
499
599
  }
500
- // Rebuild file_functions for this file
600
+ }
601
+ const totalNeedsAnalysis = allStillNeeds.length;
602
+ // Batch into groups of 10, fire with bounded concurrency
603
+ const analysisBatches = [];
604
+ for (let i = 0; i < allStillNeeds.length; i += 10) {
605
+ analysisBatches.push(allStillNeeds.slice(i, i + 10));
606
+ }
607
+ const analysisLimit = (0, p_limit_1.default)(5);
608
+ const { practices } = options;
609
+ await Promise.all(analysisBatches.map((batch) => analysisLimit(async () => {
610
+ const analyzed = await analyzeFunctions(batch, practices);
611
+ for (const fn of analyzed) {
612
+ // Compute lineText for each issue — trimmed source line at analysis time
613
+ // Used by the extension for modification detection (see docs/research/issue-remediation-detection.md)
614
+ const sourceLines = fn.sourceCode.split("\n");
615
+ for (const issue of fn.issues) {
616
+ if (issue.line > 0 && issue.line <= sourceLines.length) {
617
+ issue.lineText = sourceLines[issue.line - 1].trim();
618
+ }
619
+ }
620
+ for (const pv of fn.practiceViolations) {
621
+ if (pv.line > 0 && pv.line <= sourceLines.length) {
622
+ pv.lineText = sourceLines[pv.line - 1].trim();
623
+ }
624
+ }
625
+ insertDocAnalysis.run(fn.contentHash, JSON.stringify({
626
+ description: fn.description,
627
+ params: fn.params,
628
+ returns: fn.returns,
629
+ }), modelVersion, schemas_1.SCHEMA_VERSIONS.documentation, now, now, fn.language, fn.entityType);
630
+ insertSecAnalysis.run(fn.contentHash, JSON.stringify({
631
+ dataTags: fn.dataTags,
632
+ securityFlags: fn.securityFlags,
633
+ issues: fn.issues,
634
+ }), modelVersion, schemas_1.SCHEMA_VERSIONS.security, now, now, fn.language, fn.entityType);
635
+ if (practices && practices.length > 0) {
636
+ insertPracAnalysis.run(fn.contentHash, JSON.stringify({
637
+ violations: fn.practiceViolations,
638
+ }), modelVersion, schemas_1.SCHEMA_VERSIONS.practices, now, now, fn.language, fn.entityType);
639
+ }
640
+ }
641
+ totalAnalyzed += analyzed.length;
642
+ onAnalysisProgress?.(totalAnalyzed, totalNeedsAnalysis);
643
+ })));
644
+ // Rebuild file_functions for all pending files
645
+ for (const pending of phase3Files) {
501
646
  deleteFileEntries.run(pending.file);
502
647
  for (const fn of pending.functions) {
503
- insertFileFunction.run(pending.file, fn.name, fn.contentHash, pending.mtime, fn.language, fn.entityType);
648
+ insertFileFunction.run(pending.file, fn.name, fn.contentHash, pending.mtime, fn.language, fn.entityType, fn.startLine);
504
649
  }
505
650
  }
506
651
  return { analyzed: totalAnalyzed, skipped: totalSkipped, skippedSize: totalSkippedSize, pulled: totalPulled };
507
652
  }
508
- async function analyzeRepository(rootPath, onProgress, pullFn) {
653
+ async function analyzeRepository(rootPath, onProgress, pullFn, onAnalysisProgress) {
509
654
  const dbPath = path.join(rootPath, ".ophan", "index.db");
510
655
  fs.mkdirSync(path.join(rootPath, ".ophan"), { recursive: true });
511
656
  // Auto-add .ophan/ to .gitignore (only in git repos)
@@ -513,7 +658,8 @@ async function analyzeRepository(rootPath, onProgress, pullFn) {
513
658
  const db = initDb(dbPath);
514
659
  const now = Math.floor(Date.now() / 1000);
515
660
  const files = await discoverFiles(rootPath);
516
- const result = await analyzeFiles(db, rootPath, files, { pullFn, onProgress });
661
+ const practices = (0, practices_2.loadPracticesFromDb)(db);
662
+ const result = await analyzeFiles(db, rootPath, files, { pullFn, onProgress, onAnalysisProgress, practices });
517
663
  // Clean up entries for deleted files
518
664
  const fileSet = new Set(files);
519
665
  const deleteFileEntries = db.prepare("DELETE FROM file_functions WHERE file_path = ?");
@@ -539,12 +685,15 @@ async function analyzeRepository(rootPath, onProgress, pullFn) {
539
685
  function mergeAnalysisRows(rows) {
540
686
  let doc = {};
541
687
  let sec = {};
688
+ let prac = {};
542
689
  for (const row of rows) {
543
690
  const parsed = JSON.parse(row.analysis);
544
691
  if (row.analysis_type === "documentation")
545
692
  doc = parsed;
546
693
  else if (row.analysis_type === "security")
547
694
  sec = parsed;
695
+ else if (row.analysis_type === "practices")
696
+ prac = parsed;
548
697
  }
549
698
  return {
550
699
  description: doc.description || "",
@@ -552,6 +701,8 @@ function mergeAnalysisRows(rows) {
552
701
  returns: doc.returns || { type: "unknown", description: "" },
553
702
  dataTags: sec.dataTags || [],
554
703
  securityFlags: sec.securityFlags || [],
704
+ issues: sec.issues || [],
705
+ practiceViolations: prac.violations || [],
555
706
  };
556
707
  }
557
708
  function getAnalysisForFile(dbPath, filePath) {
@@ -631,9 +782,9 @@ async function refreshFileIndex(rootPath, onProgress) {
631
782
  const getFileMtime = db.prepare("SELECT file_mtime FROM file_functions WHERE file_path = ? LIMIT 1");
632
783
  const deleteFileEntries = db.prepare("DELETE FROM file_functions WHERE file_path = ?");
633
784
  const insertFileFunction = db.prepare(`
634
- INSERT INTO file_functions
635
- (file_path, function_name, content_hash, file_mtime, language, entity_type)
636
- VALUES (?, ?, ?, ?, ?, ?)
785
+ INSERT OR REPLACE INTO file_functions
786
+ (file_path, function_name, content_hash, file_mtime, language, entity_type, start_line)
787
+ VALUES (?, ?, ?, ?, ?, ?, ?)
637
788
  `);
638
789
  const fileSet = new Set(files);
639
790
  for (let i = 0; i < files.length; i++) {
@@ -654,7 +805,7 @@ async function refreshFileIndex(rootPath, onProgress) {
654
805
  const functions = parser.extractFunctions(file);
655
806
  deleteFileEntries.run(file);
656
807
  for (const fn of functions) {
657
- insertFileFunction.run(file, fn.name, fn.contentHash, currentMtime, fn.language, fn.entityType);
808
+ insertFileFunction.run(file, fn.name, fn.contentHash, currentMtime, fn.language, fn.entityType, fn.startLine);
658
809
  }
659
810
  }
660
811
  // Remove entries for deleted files
@@ -713,3 +864,54 @@ function importAnalysis(dbPath, rows) {
713
864
  db.close();
714
865
  return imported;
715
866
  }
867
+ /**
868
+ * Extract all functions from a repository, returning FunctionInfo arrays
869
+ * with relationship data (calls, imports, exported). Used by the graph
870
+ * analysis pipeline to build the relationship graph.
871
+ *
872
+ * Respects the same file size limits and parser selection as refreshFileIndex.
873
+ */
874
+ async function extractAllFunctions(rootPath, onProgress) {
875
+ const files = await discoverFiles(rootPath);
876
+ const allFunctions = [];
877
+ for (let i = 0; i < files.length; i++) {
878
+ const file = files[i];
879
+ const relPath = path.relative(rootPath, file);
880
+ onProgress?.(i + 1, files.length, relPath);
881
+ const stat = fs.statSync(file);
882
+ if (stat.size > MAX_FILE_SIZE_BYTES)
883
+ continue;
884
+ const parser = (0, parsers_1.getParserForFile)(file);
885
+ if (!parser)
886
+ continue;
887
+ const functions = parser.extractFunctions(file);
888
+ allFunctions.push(...functions);
889
+ }
890
+ return allFunctions;
891
+ }
892
+ /**
893
+ * Populate file_functions index from pre-extracted FunctionInfo array.
894
+ * Used by the graph pipeline to avoid scanning files twice — extractAllFunctions()
895
+ * already has all the data needed for the index.
896
+ */
897
+ function populateFileIndex(db, functions) {
898
+ const byFile = new Map();
899
+ for (const fn of functions) {
900
+ const existing = byFile.get(fn.filePath) || [];
901
+ existing.push(fn);
902
+ byFile.set(fn.filePath, existing);
903
+ }
904
+ const deleteFileEntries = db.prepare("DELETE FROM file_functions WHERE file_path = ?");
905
+ const insertFileFunction = db.prepare(`INSERT OR REPLACE INTO file_functions (file_path, function_name, content_hash, file_mtime, language, entity_type, start_line)
906
+ VALUES (?, ?, ?, ?, ?, ?, ?)`);
907
+ const tx = db.transaction(() => {
908
+ for (const [filePath, fns] of byFile) {
909
+ const mtime = Math.floor(fs.statSync(filePath).mtimeMs);
910
+ deleteFileEntries.run(filePath);
911
+ for (const fn of fns) {
912
+ insertFileFunction.run(filePath, fn.name, fn.contentHash, mtime, fn.language, fn.entityType, fn.startLine);
913
+ }
914
+ }
915
+ });
916
+ tx();
917
+ }