@ophan/core 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/community-detectors/index.d.ts +20 -0
- package/dist/community-detectors/index.d.ts.map +1 -0
- package/dist/community-detectors/index.js +45 -0
- package/dist/community-detectors/label-prop.d.ts +20 -0
- package/dist/community-detectors/label-prop.d.ts.map +1 -0
- package/dist/community-detectors/label-prop.js +77 -0
- package/dist/community-detectors/leiden.d.ts +22 -0
- package/dist/community-detectors/leiden.d.ts.map +1 -0
- package/dist/community-detectors/leiden.js +312 -0
- package/dist/community-detectors/louvain.d.ts +13 -0
- package/dist/community-detectors/louvain.d.ts.map +1 -0
- package/dist/community-detectors/louvain.js +29 -0
- package/dist/community-detectors/types.d.ts +36 -0
- package/dist/community-detectors/types.d.ts.map +1 -0
- package/dist/{parsers/__fixtures__/no-functions.js → community-detectors/types.js} +0 -2
- package/dist/edge-resolvers/call.d.ts +13 -0
- package/dist/edge-resolvers/call.d.ts.map +1 -0
- package/dist/edge-resolvers/call.js +40 -0
- package/dist/edge-resolvers/co-location.d.ts +16 -0
- package/dist/edge-resolvers/co-location.d.ts.map +1 -0
- package/dist/edge-resolvers/co-location.js +129 -0
- package/dist/edge-resolvers/import.d.ts +16 -0
- package/dist/edge-resolvers/import.d.ts.map +1 -0
- package/dist/edge-resolvers/import.js +118 -0
- package/dist/edge-resolvers/index.d.ts +9 -0
- package/dist/edge-resolvers/index.d.ts.map +1 -0
- package/dist/edge-resolvers/index.js +29 -0
- package/dist/edge-resolvers/jsx-ref.d.ts +13 -0
- package/dist/edge-resolvers/jsx-ref.d.ts.map +1 -0
- package/dist/edge-resolvers/jsx-ref.js +40 -0
- package/dist/edge-resolvers/types.d.ts +40 -0
- package/dist/edge-resolvers/types.d.ts.map +1 -0
- package/dist/edge-resolvers/types.js +2 -0
- package/dist/graph.d.ts +293 -0
- package/dist/graph.d.ts.map +1 -0
- package/dist/graph.js +1295 -0
- package/dist/index.d.ts +37 -8
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +385 -183
- package/dist/migrations.d.ts +25 -0
- package/dist/migrations.d.ts.map +1 -0
- package/dist/migrations.js +323 -0
- package/dist/module-resolvers/index.d.ts +11 -0
- package/dist/module-resolvers/index.d.ts.map +1 -0
- package/dist/module-resolvers/index.js +67 -0
- package/dist/module-resolvers/javascript.d.ts +18 -0
- package/dist/module-resolvers/javascript.d.ts.map +1 -0
- package/dist/module-resolvers/javascript.js +130 -0
- package/dist/module-resolvers/types.d.ts +18 -0
- package/dist/module-resolvers/types.d.ts.map +1 -0
- package/dist/module-resolvers/types.js +2 -0
- package/dist/parsers/python.d.ts.map +1 -1
- package/dist/parsers/python.js +38 -4
- package/dist/parsers/typescript.d.ts.map +1 -1
- package/dist/parsers/typescript.js +133 -0
- package/dist/practices.d.ts +28 -0
- package/dist/practices.d.ts.map +1 -0
- package/dist/practices.js +95 -0
- package/dist/schemas.d.ts +251 -3
- package/dist/schemas.d.ts.map +1 -1
- package/dist/schemas.js +121 -6
- package/dist/shared.d.ts +8 -0
- package/dist/shared.d.ts.map +1 -1
- package/dist/summarize.d.ts +165 -0
- package/dist/summarize.d.ts.map +1 -0
- package/dist/summarize.js +1067 -0
- package/ophan_logo.png +0 -0
- package/package.json +9 -2
- package/dist/parsers/__fixtures__/arrow-functions.d.ts +0 -5
- package/dist/parsers/__fixtures__/arrow-functions.d.ts.map +0 -1
- package/dist/parsers/__fixtures__/arrow-functions.js +0 -16
- package/dist/parsers/__fixtures__/class-methods.d.ts +0 -6
- package/dist/parsers/__fixtures__/class-methods.d.ts.map +0 -1
- package/dist/parsers/__fixtures__/class-methods.js +0 -12
- package/dist/parsers/__fixtures__/no-functions.d.ts +0 -9
- package/dist/parsers/__fixtures__/no-functions.d.ts.map +0 -1
package/dist/index.js
CHANGED
|
@@ -73,11 +73,12 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
73
73
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
74
74
|
};
|
|
75
75
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
76
|
-
exports.getSupportedExtensions = exports.computeHash = void 0;
|
|
76
|
+
exports.buildL1Context = exports.summarizeCommunities = exports.LeidenDetector = exports.LabelPropDetector = exports.LouvainDetector = exports.listDetectors = exports.registerDetector = exports.getDetector = exports.JavaScriptModuleResolver = exports.getModuleResolverForFile = exports.getModuleResolver = exports.registerModuleResolver = exports.ImportEdgeResolver = exports.CoLocationEdgeResolver = exports.JsxRefEdgeResolver = exports.CallEdgeResolver = exports.getEdgeResolvers = exports.registerEdgeResolver = exports.DEFAULT_COMPARISONS = exports.runComparison = exports.computeComparisonMetrics = exports.rescueDissolvedNodes = exports.addTransitiveEdges = exports.buildModuleResolver = exports.storeEdges = exports.resolveEdges = exports.DEFAULT_EDGE_WEIGHTS = exports.DEFAULT_GRAPH_CONFIG = exports.loadCommunityEdges = exports.storeCommunityEdges = exports.computeCommunityEdges = exports.computeCentrality = exports.detectCommunities = exports.buildGraph = exports.loadCommunities = exports.loadEdges = exports.saveGraphConfig = exports.loadGraphConfig = exports.computeDirectoryDistance = exports.computePackage = exports.detectHierarchicalCommunities = exports.analyzeGraph = exports.MAX_RULE_LENGTH = exports.MAX_PRACTICES = exports.buildPracticesPrompt = exports.validatePractices = exports.importPractices = exports.loadPracticesFromDb = exports.getSupportedExtensions = exports.computeHash = void 0;
|
|
77
|
+
exports.DEFAULT_SUMMARIZE_CONFIG = exports.summarizeCC = exports.computeCCInputHash = exports.detectCrossCuttingConcerns = exports.loadAllSummaries = exports.loadSummary = exports.storeSummary = exports.computeL3InputHash = exports.computeL2InputHash = exports.computeL1InputHash = exports.summarizeL3 = exports.summarizeL2 = exports.summarizeL1 = exports.formatPackageBreakdown = exports.computePackageBreakdown = exports.buildL1RawContext = void 0;
|
|
78
|
+
exports.numberLines = numberLines;
|
|
77
79
|
exports.ensureGitignore = ensureGitignore;
|
|
78
80
|
exports.discoverFiles = discoverFiles;
|
|
79
81
|
exports.initDb = initDb;
|
|
80
|
-
exports.migrateToAnalysisTypes = migrateToAnalysisTypes;
|
|
81
82
|
exports.analyzeFunctions = analyzeFunctions;
|
|
82
83
|
exports.analyzeFiles = analyzeFiles;
|
|
83
84
|
exports.analyzeRepository = analyzeRepository;
|
|
@@ -88,12 +89,15 @@ exports.gcAnalysis = gcAnalysis;
|
|
|
88
89
|
exports.refreshFileIndex = refreshFileIndex;
|
|
89
90
|
exports.findMissingHashes = findMissingHashes;
|
|
90
91
|
exports.importAnalysis = importAnalysis;
|
|
92
|
+
exports.extractAllFunctions = extractAllFunctions;
|
|
93
|
+
exports.populateFileIndex = populateFileIndex;
|
|
91
94
|
const sdk_1 = __importDefault(require("@anthropic-ai/sdk"));
|
|
92
95
|
const better_sqlite3_1 = __importDefault(require("better-sqlite3"));
|
|
93
96
|
const fs = __importStar(require("fs"));
|
|
94
97
|
const path = __importStar(require("path"));
|
|
95
98
|
const child_process_1 = require("child_process");
|
|
96
99
|
const glob_1 = require("glob");
|
|
100
|
+
const p_limit_1 = __importDefault(require("p-limit"));
|
|
97
101
|
const p_retry_1 = __importDefault(require("p-retry"));
|
|
98
102
|
const parsers_1 = require("./parsers");
|
|
99
103
|
// Re-export from shared so downstream consumers (CLI, etc.) can still import from @ophan/core
|
|
@@ -102,9 +106,94 @@ Object.defineProperty(exports, "computeHash", { enumerable: true, get: function
|
|
|
102
106
|
var parsers_2 = require("./parsers");
|
|
103
107
|
Object.defineProperty(exports, "getSupportedExtensions", { enumerable: true, get: function () { return parsers_2.getSupportedExtensions; } });
|
|
104
108
|
__exportStar(require("./schemas"), exports);
|
|
109
|
+
var practices_1 = require("./practices");
|
|
110
|
+
Object.defineProperty(exports, "loadPracticesFromDb", { enumerable: true, get: function () { return practices_1.loadPracticesFromDb; } });
|
|
111
|
+
Object.defineProperty(exports, "importPractices", { enumerable: true, get: function () { return practices_1.importPractices; } });
|
|
112
|
+
Object.defineProperty(exports, "validatePractices", { enumerable: true, get: function () { return practices_1.validatePractices; } });
|
|
113
|
+
Object.defineProperty(exports, "buildPracticesPrompt", { enumerable: true, get: function () { return practices_1.buildPracticesPrompt; } });
|
|
114
|
+
Object.defineProperty(exports, "MAX_PRACTICES", { enumerable: true, get: function () { return practices_1.MAX_PRACTICES; } });
|
|
115
|
+
Object.defineProperty(exports, "MAX_RULE_LENGTH", { enumerable: true, get: function () { return practices_1.MAX_RULE_LENGTH; } });
|
|
116
|
+
var graph_1 = require("./graph");
|
|
117
|
+
Object.defineProperty(exports, "analyzeGraph", { enumerable: true, get: function () { return graph_1.analyzeGraph; } });
|
|
118
|
+
Object.defineProperty(exports, "detectHierarchicalCommunities", { enumerable: true, get: function () { return graph_1.detectHierarchicalCommunities; } });
|
|
119
|
+
Object.defineProperty(exports, "computePackage", { enumerable: true, get: function () { return graph_1.computePackage; } });
|
|
120
|
+
Object.defineProperty(exports, "computeDirectoryDistance", { enumerable: true, get: function () { return graph_1.computeDirectoryDistance; } });
|
|
121
|
+
Object.defineProperty(exports, "loadGraphConfig", { enumerable: true, get: function () { return graph_1.loadGraphConfig; } });
|
|
122
|
+
Object.defineProperty(exports, "saveGraphConfig", { enumerable: true, get: function () { return graph_1.saveGraphConfig; } });
|
|
123
|
+
Object.defineProperty(exports, "loadEdges", { enumerable: true, get: function () { return graph_1.loadEdges; } });
|
|
124
|
+
Object.defineProperty(exports, "loadCommunities", { enumerable: true, get: function () { return graph_1.loadCommunities; } });
|
|
125
|
+
Object.defineProperty(exports, "buildGraph", { enumerable: true, get: function () { return graph_1.buildGraph; } });
|
|
126
|
+
Object.defineProperty(exports, "detectCommunities", { enumerable: true, get: function () { return graph_1.detectCommunities; } });
|
|
127
|
+
Object.defineProperty(exports, "computeCentrality", { enumerable: true, get: function () { return graph_1.computeCentrality; } });
|
|
128
|
+
Object.defineProperty(exports, "computeCommunityEdges", { enumerable: true, get: function () { return graph_1.computeCommunityEdges; } });
|
|
129
|
+
Object.defineProperty(exports, "storeCommunityEdges", { enumerable: true, get: function () { return graph_1.storeCommunityEdges; } });
|
|
130
|
+
Object.defineProperty(exports, "loadCommunityEdges", { enumerable: true, get: function () { return graph_1.loadCommunityEdges; } });
|
|
131
|
+
Object.defineProperty(exports, "DEFAULT_GRAPH_CONFIG", { enumerable: true, get: function () { return graph_1.DEFAULT_GRAPH_CONFIG; } });
|
|
132
|
+
Object.defineProperty(exports, "DEFAULT_EDGE_WEIGHTS", { enumerable: true, get: function () { return graph_1.DEFAULT_EDGE_WEIGHTS; } });
|
|
133
|
+
Object.defineProperty(exports, "resolveEdges", { enumerable: true, get: function () { return graph_1.resolveEdges; } });
|
|
134
|
+
Object.defineProperty(exports, "storeEdges", { enumerable: true, get: function () { return graph_1.storeEdges; } });
|
|
135
|
+
Object.defineProperty(exports, "buildModuleResolver", { enumerable: true, get: function () { return graph_1.buildModuleResolver; } });
|
|
136
|
+
Object.defineProperty(exports, "addTransitiveEdges", { enumerable: true, get: function () { return graph_1.addTransitiveEdges; } });
|
|
137
|
+
Object.defineProperty(exports, "rescueDissolvedNodes", { enumerable: true, get: function () { return graph_1.rescueDissolvedNodes; } });
|
|
138
|
+
Object.defineProperty(exports, "computeComparisonMetrics", { enumerable: true, get: function () { return graph_1.computeComparisonMetrics; } });
|
|
139
|
+
Object.defineProperty(exports, "runComparison", { enumerable: true, get: function () { return graph_1.runComparison; } });
|
|
140
|
+
Object.defineProperty(exports, "DEFAULT_COMPARISONS", { enumerable: true, get: function () { return graph_1.DEFAULT_COMPARISONS; } });
|
|
141
|
+
var edge_resolvers_1 = require("./edge-resolvers");
|
|
142
|
+
Object.defineProperty(exports, "registerEdgeResolver", { enumerable: true, get: function () { return edge_resolvers_1.registerEdgeResolver; } });
|
|
143
|
+
Object.defineProperty(exports, "getEdgeResolvers", { enumerable: true, get: function () { return edge_resolvers_1.getEdgeResolvers; } });
|
|
144
|
+
Object.defineProperty(exports, "CallEdgeResolver", { enumerable: true, get: function () { return edge_resolvers_1.CallEdgeResolver; } });
|
|
145
|
+
Object.defineProperty(exports, "JsxRefEdgeResolver", { enumerable: true, get: function () { return edge_resolvers_1.JsxRefEdgeResolver; } });
|
|
146
|
+
Object.defineProperty(exports, "CoLocationEdgeResolver", { enumerable: true, get: function () { return edge_resolvers_1.CoLocationEdgeResolver; } });
|
|
147
|
+
Object.defineProperty(exports, "ImportEdgeResolver", { enumerable: true, get: function () { return edge_resolvers_1.ImportEdgeResolver; } });
|
|
148
|
+
var module_resolvers_1 = require("./module-resolvers");
|
|
149
|
+
Object.defineProperty(exports, "registerModuleResolver", { enumerable: true, get: function () { return module_resolvers_1.registerModuleResolver; } });
|
|
150
|
+
Object.defineProperty(exports, "getModuleResolver", { enumerable: true, get: function () { return module_resolvers_1.getModuleResolver; } });
|
|
151
|
+
Object.defineProperty(exports, "getModuleResolverForFile", { enumerable: true, get: function () { return module_resolvers_1.getModuleResolverForFile; } });
|
|
152
|
+
Object.defineProperty(exports, "JavaScriptModuleResolver", { enumerable: true, get: function () { return module_resolvers_1.JavaScriptModuleResolver; } });
|
|
153
|
+
var community_detectors_1 = require("./community-detectors");
|
|
154
|
+
Object.defineProperty(exports, "getDetector", { enumerable: true, get: function () { return community_detectors_1.getDetector; } });
|
|
155
|
+
Object.defineProperty(exports, "registerDetector", { enumerable: true, get: function () { return community_detectors_1.registerDetector; } });
|
|
156
|
+
Object.defineProperty(exports, "listDetectors", { enumerable: true, get: function () { return community_detectors_1.listDetectors; } });
|
|
157
|
+
Object.defineProperty(exports, "LouvainDetector", { enumerable: true, get: function () { return community_detectors_1.LouvainDetector; } });
|
|
158
|
+
Object.defineProperty(exports, "LabelPropDetector", { enumerable: true, get: function () { return community_detectors_1.LabelPropDetector; } });
|
|
159
|
+
Object.defineProperty(exports, "LeidenDetector", { enumerable: true, get: function () { return community_detectors_1.LeidenDetector; } });
|
|
160
|
+
var summarize_1 = require("./summarize");
|
|
161
|
+
Object.defineProperty(exports, "summarizeCommunities", { enumerable: true, get: function () { return summarize_1.summarizeCommunities; } });
|
|
162
|
+
Object.defineProperty(exports, "buildL1Context", { enumerable: true, get: function () { return summarize_1.buildL1Context; } });
|
|
163
|
+
Object.defineProperty(exports, "buildL1RawContext", { enumerable: true, get: function () { return summarize_1.buildL1RawContext; } });
|
|
164
|
+
Object.defineProperty(exports, "computePackageBreakdown", { enumerable: true, get: function () { return summarize_1.computePackageBreakdown; } });
|
|
165
|
+
Object.defineProperty(exports, "formatPackageBreakdown", { enumerable: true, get: function () { return summarize_1.formatPackageBreakdown; } });
|
|
166
|
+
Object.defineProperty(exports, "summarizeL1", { enumerable: true, get: function () { return summarize_1.summarizeL1; } });
|
|
167
|
+
Object.defineProperty(exports, "summarizeL2", { enumerable: true, get: function () { return summarize_1.summarizeL2; } });
|
|
168
|
+
Object.defineProperty(exports, "summarizeL3", { enumerable: true, get: function () { return summarize_1.summarizeL3; } });
|
|
169
|
+
Object.defineProperty(exports, "computeL1InputHash", { enumerable: true, get: function () { return summarize_1.computeL1InputHash; } });
|
|
170
|
+
Object.defineProperty(exports, "computeL2InputHash", { enumerable: true, get: function () { return summarize_1.computeL2InputHash; } });
|
|
171
|
+
Object.defineProperty(exports, "computeL3InputHash", { enumerable: true, get: function () { return summarize_1.computeL3InputHash; } });
|
|
172
|
+
Object.defineProperty(exports, "storeSummary", { enumerable: true, get: function () { return summarize_1.storeSummary; } });
|
|
173
|
+
Object.defineProperty(exports, "loadSummary", { enumerable: true, get: function () { return summarize_1.loadSummary; } });
|
|
174
|
+
Object.defineProperty(exports, "loadAllSummaries", { enumerable: true, get: function () { return summarize_1.loadAllSummaries; } });
|
|
175
|
+
Object.defineProperty(exports, "detectCrossCuttingConcerns", { enumerable: true, get: function () { return summarize_1.detectCrossCuttingConcerns; } });
|
|
176
|
+
Object.defineProperty(exports, "computeCCInputHash", { enumerable: true, get: function () { return summarize_1.computeCCInputHash; } });
|
|
177
|
+
Object.defineProperty(exports, "summarizeCC", { enumerable: true, get: function () { return summarize_1.summarizeCC; } });
|
|
178
|
+
Object.defineProperty(exports, "DEFAULT_SUMMARIZE_CONFIG", { enumerable: true, get: function () { return summarize_1.DEFAULT_SUMMARIZE_CONFIG; } });
|
|
105
179
|
const schemas_1 = require("./schemas");
|
|
180
|
+
const practices_2 = require("./practices");
|
|
181
|
+
const migrations_1 = require("./migrations");
|
|
106
182
|
/** Skip files larger than this — catches minified bundles, generated code, etc. */
|
|
107
183
|
const MAX_FILE_SIZE_BYTES = 50 * 1024; // 50 KB
|
|
184
|
+
/**
|
|
185
|
+
* Pre-process source code with visible line numbers for the Claude prompt.
|
|
186
|
+
* Claude reads these labels to report issue locations accurately (instead of counting lines).
|
|
187
|
+
* The numbered output is ONLY used in the prompt — content hashes are computed from
|
|
188
|
+
* the original unnumbered source.
|
|
189
|
+
*/
|
|
190
|
+
function numberLines(sourceCode) {
|
|
191
|
+
const lines = sourceCode.split("\n");
|
|
192
|
+
const pad = String(lines.length).length;
|
|
193
|
+
return lines
|
|
194
|
+
.map((line, i) => `${String(i + 1).padStart(pad)} | ${line}`)
|
|
195
|
+
.join("\n");
|
|
196
|
+
}
|
|
108
197
|
/**
|
|
109
198
|
* Ensure .ophan/ is in .gitignore. Only acts in git repos.
|
|
110
199
|
* Creates .gitignore if the repo has .git/ but no .gitignore.
|
|
@@ -125,6 +214,7 @@ const HARDCODED_IGNORES = [
|
|
|
125
214
|
"**/node_modules/**", "**/.ophan/**", "**/dist/**",
|
|
126
215
|
"**/__pycache__/**", "**/.venv/**", "**/venv/**", "**/env/**",
|
|
127
216
|
"**/.tox/**", "**/.eggs/**", "**/build/**",
|
|
217
|
+
"**/.output/**", "**/.next/**", "**/.nuxt/**", "**/.turbo/**", "**/.cache/**",
|
|
128
218
|
];
|
|
129
219
|
/**
|
|
130
220
|
* Discover source files using git (respects .gitignore) with glob fallback.
|
|
@@ -142,7 +232,7 @@ async function discoverFiles(rootPath) {
|
|
|
142
232
|
.filter((f) => extSet.has(path.extname(f).toLowerCase()))
|
|
143
233
|
.map((f) => path.resolve(rootPath, f));
|
|
144
234
|
// Still apply hardcoded ignores as safety net (e.g. checked-in node_modules)
|
|
145
|
-
const ignoreSegments = ["node_modules", ".ophan", "__pycache__", ".venv", "venv", ".tox", ".eggs"];
|
|
235
|
+
const ignoreSegments = ["node_modules", ".ophan", "__pycache__", ".venv", "venv", ".tox", ".eggs", ".output", ".next", ".nuxt", ".turbo", ".cache"];
|
|
146
236
|
return files.filter((f) => !ignoreSegments.some((seg) => f.includes(`/${seg}/`) || f.includes(`\\${seg}\\`)));
|
|
147
237
|
}
|
|
148
238
|
catch {
|
|
@@ -158,43 +248,55 @@ async function discoverFiles(rootPath) {
|
|
|
158
248
|
// ============ DATABASE ============
|
|
159
249
|
function initDb(dbPath) {
|
|
160
250
|
const db = new better_sqlite3_1.default(dbPath);
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
else if (columns.length === 0) {
|
|
169
|
-
// Fresh DB — create with new schema directly
|
|
251
|
+
db.pragma("journal_mode = WAL");
|
|
252
|
+
// Fresh DB: create current schema directly, mark all migrations as applied.
|
|
253
|
+
// Existing DB: run any pending migrations to bring schema up to date.
|
|
254
|
+
const tables = db
|
|
255
|
+
.prepare("SELECT name FROM sqlite_master WHERE type='table'")
|
|
256
|
+
.all();
|
|
257
|
+
if (tables.length === 0) {
|
|
170
258
|
createFreshSchema(db);
|
|
259
|
+
(0, migrations_1.bootstrapMigrations)(db, migrations_1.CORE_MIGRATIONS, "core:bootstrap");
|
|
260
|
+
}
|
|
261
|
+
else {
|
|
262
|
+
(0, migrations_1.runMigrations)(db, migrations_1.CORE_MIGRATIONS, "core");
|
|
171
263
|
}
|
|
172
|
-
|
|
173
|
-
|
|
264
|
+
return db;
|
|
265
|
+
}
|
|
266
|
+
/**
|
|
267
|
+
* Create the full current schema for a fresh database.
|
|
268
|
+
* Includes all tables, indexes, and the migration tracking table.
|
|
269
|
+
*/
|
|
270
|
+
function createFreshSchema(db) {
|
|
174
271
|
db.exec(`
|
|
175
|
-
CREATE TABLE
|
|
272
|
+
CREATE TABLE function_analysis (
|
|
273
|
+
content_hash TEXT NOT NULL,
|
|
274
|
+
analysis_type TEXT NOT NULL,
|
|
275
|
+
analysis JSON NOT NULL,
|
|
276
|
+
model_version TEXT NOT NULL,
|
|
277
|
+
schema_version INTEGER NOT NULL DEFAULT 1,
|
|
278
|
+
created_at INTEGER NOT NULL,
|
|
279
|
+
last_seen_at INTEGER NOT NULL,
|
|
280
|
+
language TEXT NOT NULL DEFAULT 'typescript',
|
|
281
|
+
entity_type TEXT NOT NULL DEFAULT 'function',
|
|
282
|
+
synced_at INTEGER,
|
|
283
|
+
PRIMARY KEY (content_hash, analysis_type)
|
|
284
|
+
)
|
|
285
|
+
`);
|
|
286
|
+
db.exec(`
|
|
287
|
+
CREATE TABLE file_functions (
|
|
176
288
|
file_path TEXT NOT NULL,
|
|
177
289
|
function_name TEXT NOT NULL,
|
|
178
290
|
content_hash TEXT NOT NULL,
|
|
179
291
|
file_mtime INTEGER NOT NULL,
|
|
180
292
|
language TEXT NOT NULL DEFAULT 'typescript',
|
|
181
|
-
entity_type TEXT NOT NULL DEFAULT 'function'
|
|
293
|
+
entity_type TEXT NOT NULL DEFAULT 'function',
|
|
294
|
+
start_line INTEGER NOT NULL DEFAULT 0,
|
|
295
|
+
PRIMARY KEY (file_path, function_name)
|
|
182
296
|
)
|
|
183
297
|
`);
|
|
184
|
-
// Migration: add columns to file_functions for existing databases
|
|
185
|
-
const ffColumns = db.prepare("PRAGMA table_info(file_functions)").all();
|
|
186
|
-
if (!ffColumns.some((c) => c.name === "language")) {
|
|
187
|
-
try {
|
|
188
|
-
db.exec("ALTER TABLE file_functions ADD COLUMN language TEXT NOT NULL DEFAULT 'typescript'");
|
|
189
|
-
}
|
|
190
|
-
catch (_) { }
|
|
191
|
-
try {
|
|
192
|
-
db.exec("ALTER TABLE file_functions ADD COLUMN entity_type TEXT NOT NULL DEFAULT 'function'");
|
|
193
|
-
}
|
|
194
|
-
catch (_) { }
|
|
195
|
-
}
|
|
196
298
|
db.exec(`
|
|
197
|
-
CREATE TABLE
|
|
299
|
+
CREATE TABLE function_gc (
|
|
198
300
|
content_hash TEXT NOT NULL,
|
|
199
301
|
analysis_type TEXT,
|
|
200
302
|
gc_at INTEGER NOT NULL,
|
|
@@ -202,132 +304,88 @@ function initDb(dbPath) {
|
|
|
202
304
|
PRIMARY KEY (content_hash, analysis_type)
|
|
203
305
|
)
|
|
204
306
|
`);
|
|
205
|
-
// Migration: add analysis_type to function_gc for existing databases
|
|
206
|
-
const gcColumns = db.prepare("PRAGMA table_info(function_gc)").all();
|
|
207
|
-
if (!gcColumns.some((c) => c.name === "analysis_type")) {
|
|
208
|
-
try {
|
|
209
|
-
db.exec("ALTER TABLE function_gc ADD COLUMN analysis_type TEXT");
|
|
210
|
-
}
|
|
211
|
-
catch (_) { }
|
|
212
|
-
// Rebuild PK by recreating table
|
|
213
|
-
db.exec(`
|
|
214
|
-
CREATE TABLE IF NOT EXISTS function_gc_v2 (
|
|
215
|
-
content_hash TEXT NOT NULL,
|
|
216
|
-
analysis_type TEXT,
|
|
217
|
-
gc_at INTEGER NOT NULL,
|
|
218
|
-
synced_at INTEGER,
|
|
219
|
-
PRIMARY KEY (content_hash, analysis_type)
|
|
220
|
-
)
|
|
221
|
-
`);
|
|
222
|
-
db.exec("INSERT OR IGNORE INTO function_gc_v2 SELECT content_hash, NULL, gc_at, synced_at FROM function_gc");
|
|
223
|
-
db.exec("DROP TABLE function_gc");
|
|
224
|
-
db.exec("ALTER TABLE function_gc_v2 RENAME TO function_gc");
|
|
225
|
-
}
|
|
226
307
|
db.exec(`
|
|
227
|
-
CREATE TABLE
|
|
308
|
+
CREATE TABLE sync_meta (
|
|
228
309
|
key TEXT PRIMARY KEY,
|
|
229
310
|
value TEXT NOT NULL
|
|
230
311
|
)
|
|
231
312
|
`);
|
|
232
|
-
db.exec("CREATE INDEX IF NOT EXISTS idx_file_functions_path ON file_functions(file_path)");
|
|
233
|
-
db.exec("CREATE INDEX IF NOT EXISTS idx_file_functions_hash ON file_functions(content_hash)");
|
|
234
|
-
db.exec("CREATE INDEX IF NOT EXISTS idx_fa_hash ON function_analysis(content_hash)");
|
|
235
|
-
db.exec("CREATE INDEX IF NOT EXISTS idx_fa_type ON function_analysis(analysis_type)");
|
|
236
|
-
return db;
|
|
237
|
-
}
|
|
238
|
-
function createFreshSchema(db) {
|
|
239
313
|
db.exec(`
|
|
240
|
-
CREATE TABLE
|
|
314
|
+
CREATE TABLE practices (
|
|
315
|
+
practice_id TEXT PRIMARY KEY,
|
|
316
|
+
rule TEXT NOT NULL,
|
|
317
|
+
severity TEXT NOT NULL DEFAULT 'warning'
|
|
318
|
+
)
|
|
319
|
+
`);
|
|
320
|
+
// Graph tables
|
|
321
|
+
db.exec(`
|
|
322
|
+
CREATE TABLE function_edges (
|
|
323
|
+
source_hash TEXT NOT NULL,
|
|
324
|
+
target_hash TEXT NOT NULL,
|
|
325
|
+
edge_type TEXT NOT NULL,
|
|
326
|
+
weight REAL NOT NULL DEFAULT 1.0,
|
|
327
|
+
PRIMARY KEY (source_hash, target_hash, edge_type)
|
|
328
|
+
)
|
|
329
|
+
`);
|
|
330
|
+
db.exec(`
|
|
331
|
+
CREATE TABLE communities (
|
|
241
332
|
content_hash TEXT NOT NULL,
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
333
|
+
level INTEGER NOT NULL,
|
|
334
|
+
community_id TEXT NOT NULL,
|
|
335
|
+
algorithm TEXT NOT NULL DEFAULT 'louvain',
|
|
336
|
+
PRIMARY KEY (content_hash, level, algorithm)
|
|
337
|
+
)
|
|
338
|
+
`);
|
|
339
|
+
db.exec(`
|
|
340
|
+
CREATE TABLE community_summaries (
|
|
341
|
+
community_id TEXT NOT NULL,
|
|
342
|
+
level INTEGER NOT NULL,
|
|
343
|
+
algorithm TEXT NOT NULL DEFAULT 'louvain',
|
|
344
|
+
input_hash TEXT NOT NULL,
|
|
345
|
+
summary JSON NOT NULL,
|
|
346
|
+
model_version TEXT,
|
|
246
347
|
created_at INTEGER NOT NULL,
|
|
247
|
-
|
|
248
|
-
language TEXT NOT NULL DEFAULT 'typescript',
|
|
249
|
-
entity_type TEXT NOT NULL DEFAULT 'function',
|
|
250
|
-
synced_at INTEGER,
|
|
251
|
-
PRIMARY KEY (content_hash, analysis_type)
|
|
348
|
+
PRIMARY KEY (community_id, level, algorithm)
|
|
252
349
|
)
|
|
253
350
|
`);
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
'returns', json_extract(analysis, '$.returns')
|
|
289
|
-
),
|
|
290
|
-
model_version,
|
|
291
|
-
1,
|
|
292
|
-
created_at,
|
|
293
|
-
last_seen_at,
|
|
294
|
-
COALESCE(language, 'typescript'),
|
|
295
|
-
COALESCE(entity_type, 'function'),
|
|
296
|
-
NULL
|
|
297
|
-
FROM function_analysis
|
|
298
|
-
`);
|
|
299
|
-
// Split security fields
|
|
300
|
-
db.exec(`
|
|
301
|
-
INSERT OR IGNORE INTO function_analysis_v2
|
|
302
|
-
(content_hash, analysis_type, analysis, model_version, schema_version, created_at, last_seen_at, language, entity_type, synced_at)
|
|
303
|
-
SELECT
|
|
304
|
-
content_hash,
|
|
305
|
-
'security',
|
|
306
|
-
json_object(
|
|
307
|
-
'dataTags', json_extract(analysis, '$.dataTags'),
|
|
308
|
-
'securityFlags', json_extract(analysis, '$.securityFlags')
|
|
309
|
-
),
|
|
310
|
-
model_version,
|
|
311
|
-
1,
|
|
312
|
-
created_at,
|
|
313
|
-
last_seen_at,
|
|
314
|
-
COALESCE(language, 'typescript'),
|
|
315
|
-
COALESCE(entity_type, 'function'),
|
|
316
|
-
NULL
|
|
317
|
-
FROM function_analysis
|
|
318
|
-
`);
|
|
319
|
-
db.exec("DROP TABLE function_analysis");
|
|
320
|
-
db.exec("ALTER TABLE function_analysis_v2 RENAME TO function_analysis");
|
|
321
|
-
db.exec("COMMIT");
|
|
322
|
-
}
|
|
323
|
-
catch (e) {
|
|
324
|
-
db.exec("ROLLBACK");
|
|
325
|
-
throw e;
|
|
326
|
-
}
|
|
351
|
+
db.exec(`
|
|
352
|
+
CREATE TABLE graph_config (
|
|
353
|
+
key TEXT PRIMARY KEY,
|
|
354
|
+
value TEXT NOT NULL
|
|
355
|
+
)
|
|
356
|
+
`);
|
|
357
|
+
db.exec(`
|
|
358
|
+
CREATE TABLE community_edges (
|
|
359
|
+
source_community TEXT NOT NULL,
|
|
360
|
+
target_community TEXT NOT NULL,
|
|
361
|
+
algorithm TEXT NOT NULL DEFAULT 'louvain',
|
|
362
|
+
weight REAL NOT NULL DEFAULT 0,
|
|
363
|
+
edge_count INTEGER NOT NULL DEFAULT 0,
|
|
364
|
+
PRIMARY KEY (source_community, target_community, algorithm)
|
|
365
|
+
)
|
|
366
|
+
`);
|
|
367
|
+
db.exec(`
|
|
368
|
+
CREATE TABLE community_signatures (
|
|
369
|
+
community_id TEXT NOT NULL,
|
|
370
|
+
algorithm TEXT NOT NULL DEFAULT 'louvain',
|
|
371
|
+
signatures JSON NOT NULL,
|
|
372
|
+
PRIMARY KEY (community_id, algorithm)
|
|
373
|
+
)
|
|
374
|
+
`);
|
|
375
|
+
// Indexes
|
|
376
|
+
db.exec("CREATE INDEX idx_file_functions_path ON file_functions(file_path)");
|
|
377
|
+
db.exec("CREATE INDEX idx_file_functions_hash ON file_functions(content_hash)");
|
|
378
|
+
db.exec("CREATE INDEX idx_fa_hash ON function_analysis(content_hash)");
|
|
379
|
+
db.exec("CREATE INDEX idx_fa_type ON function_analysis(analysis_type)");
|
|
380
|
+
db.exec("CREATE INDEX idx_edges_source ON function_edges(source_hash)");
|
|
381
|
+
db.exec("CREATE INDEX idx_edges_target ON function_edges(target_hash)");
|
|
382
|
+
db.exec("CREATE INDEX idx_communities_id ON communities(community_id)");
|
|
383
|
+
db.exec("CREATE INDEX idx_ce_source ON community_edges(source_community)");
|
|
384
|
+
db.exec("CREATE INDEX idx_ce_target ON community_edges(target_community)");
|
|
327
385
|
}
|
|
328
386
|
// ============ LLM ANALYSIS ============
|
|
329
387
|
const anthropic = new sdk_1.default();
|
|
330
|
-
async function analyzeFunctions(functions) {
|
|
388
|
+
async function analyzeFunctions(functions, practices) {
|
|
331
389
|
if (functions.length === 0)
|
|
332
390
|
return [];
|
|
333
391
|
// Use the language from the first function in the batch (batches are always same-file)
|
|
@@ -341,6 +399,7 @@ async function analyzeFunctions(functions) {
|
|
|
341
399
|
};
|
|
342
400
|
const securityHints = securityExamples[lang] || securityExamples.typescript;
|
|
343
401
|
const prompt = `Analyze these ${langLabel} functions. Return JSON array with one object per function.
|
|
402
|
+
Source lines are numbered for reference — when reporting line numbers, read the label at the start of the line, don't count.
|
|
344
403
|
|
|
345
404
|
Each object must have:
|
|
346
405
|
- name: string (function name, must match input)
|
|
@@ -348,17 +407,34 @@ Each object must have:
|
|
|
348
407
|
- params: array of { name, type, description }
|
|
349
408
|
- returns: { type, description }
|
|
350
409
|
- dataTags: array from [user_input, pii, credentials, database, external_api, file_system, config, internal]
|
|
351
|
-
- securityFlags: array of
|
|
352
|
-
|
|
410
|
+
- securityFlags: array of flag names found (e.g. [${securityHints}]) or empty
|
|
411
|
+
- issues: array of detailed findings (empty if no security concerns), each with:
|
|
412
|
+
- flag: the security concern identifier (must also appear in securityFlags)
|
|
413
|
+
- title: short one-line title
|
|
414
|
+
- description: what's wrong (1-2 sentences)
|
|
415
|
+
- explanation: why it's a problem and how to fix (2-4 sentences)
|
|
416
|
+
- line: the line number shown at the start of the problematic line (read the number, don't count)
|
|
417
|
+
- startText: the first ~30 characters of the problematic code on that line
|
|
418
|
+
- endText: the last ~20 characters of the problematic code on that line
|
|
419
|
+
- severity: "error" | "warning" | "info"
|
|
420
|
+
- confidence: "high" | "medium" | "low"
|
|
421
|
+
high = the vulnerability is clearly exploitable given the visible code
|
|
422
|
+
(e.g., user input directly concatenated into SQL, dangerouslySetInnerHTML with user content)
|
|
423
|
+
medium = the pattern is risky but exploitability depends on how callers
|
|
424
|
+
use this function (e.g., file path parameter without validation)
|
|
425
|
+
low = the pattern matches a known vulnerability signature but is common
|
|
426
|
+
safe practice or requires unusual conditions to exploit
|
|
427
|
+
(e.g., prop spreading in React, Object.assign with internal data)
|
|
428
|
+
${practices && practices.length > 0 ? (0, practices_2.buildPracticesPrompt)(practices) : ""}
|
|
353
429
|
Functions:
|
|
354
430
|
${functions
|
|
355
|
-
.map((fn) => `### ${fn.name}\n\`\`\`${lang}\n${fn.sourceCode}\n\`\`\``)
|
|
431
|
+
.map((fn) => `### ${fn.name}\n\`\`\`${lang}\n${numberLines(fn.sourceCode)}\n\`\`\``)
|
|
356
432
|
.join("\n\n")}
|
|
357
433
|
|
|
358
434
|
CRITICAL: Return ONLY the raw JSON array. No markdown, no code fences, no \`\`\`json blocks, no explanation. Just the [ ... ] array directly.`;
|
|
359
435
|
const response = await (0, p_retry_1.default)(() => anthropic.messages.create({
|
|
360
436
|
model: "claude-sonnet-4-20250514",
|
|
361
|
-
max_tokens:
|
|
437
|
+
max_tokens: 8192,
|
|
362
438
|
messages: [{ role: "user", content: prompt }],
|
|
363
439
|
}), {
|
|
364
440
|
retries: 4,
|
|
@@ -384,6 +460,8 @@ CRITICAL: Return ONLY the raw JSON array. No markdown, no code fences, no \`\`\`
|
|
|
384
460
|
returns: validated.returns,
|
|
385
461
|
dataTags: validated.dataTags,
|
|
386
462
|
securityFlags: validated.securityFlags,
|
|
463
|
+
issues: validated.issues,
|
|
464
|
+
practiceViolations: validated.practiceViolations,
|
|
387
465
|
};
|
|
388
466
|
});
|
|
389
467
|
}
|
|
@@ -396,6 +474,8 @@ CRITICAL: Return ONLY the raw JSON array. No markdown, no code fences, no \`\`\`
|
|
|
396
474
|
returns: { type: "unknown", description: "" },
|
|
397
475
|
dataTags: [],
|
|
398
476
|
securityFlags: [],
|
|
477
|
+
issues: [],
|
|
478
|
+
practiceViolations: [],
|
|
399
479
|
}));
|
|
400
480
|
}
|
|
401
481
|
}
|
|
@@ -405,7 +485,7 @@ CRITICAL: Return ONLY the raw JSON array. No markdown, no code fences, no \`\`\`
|
|
|
405
485
|
* The caller is responsible for DB lifecycle (open/close).
|
|
406
486
|
*/
|
|
407
487
|
async function analyzeFiles(db, rootPath, files, options = {}) {
|
|
408
|
-
const { pullFn, onProgress } = options;
|
|
488
|
+
const { pullFn, onProgress, onAnalysisProgress } = options;
|
|
409
489
|
const now = Math.floor(Date.now() / 1000);
|
|
410
490
|
let totalAnalyzed = 0;
|
|
411
491
|
let totalSkippedSize = 0;
|
|
@@ -413,23 +493,36 @@ async function analyzeFiles(db, rootPath, files, options = {}) {
|
|
|
413
493
|
let totalPulled = 0;
|
|
414
494
|
// Prepared statements
|
|
415
495
|
const getFileMtime = db.prepare("SELECT file_mtime FROM file_functions WHERE file_path = ? LIMIT 1");
|
|
416
|
-
// Check for
|
|
417
|
-
|
|
496
|
+
// Check for analysis at current schema versions.
|
|
497
|
+
// If a row exists but with an older schema_version, it's treated as a cache miss
|
|
498
|
+
// so the function gets re-analyzed with the current schema.
|
|
499
|
+
const minSchemaVersion = Math.min(schemas_1.SCHEMA_VERSIONS.documentation, schemas_1.SCHEMA_VERSIONS.security);
|
|
500
|
+
const checkAnalysis = db.prepare(`SELECT 1 FROM function_analysis
|
|
501
|
+
WHERE content_hash = ? AND analysis_type = 'documentation'
|
|
502
|
+
AND schema_version >= ?`);
|
|
503
|
+
const getFileHashes = db.prepare("SELECT content_hash FROM file_functions WHERE file_path = ?");
|
|
504
|
+
// INSERT OR REPLACE: overwrites stale rows when schema_version bumps trigger re-analysis.
|
|
505
|
+
// Previously INSERT OR IGNORE, but that would keep old-version rows forever.
|
|
418
506
|
const insertDocAnalysis = db.prepare(`
|
|
419
|
-
INSERT OR
|
|
507
|
+
INSERT OR REPLACE INTO function_analysis
|
|
420
508
|
(content_hash, analysis_type, analysis, model_version, schema_version, created_at, last_seen_at, language, entity_type)
|
|
421
509
|
VALUES (?, 'documentation', ?, ?, ?, ?, ?, ?, ?)
|
|
422
510
|
`);
|
|
423
511
|
const insertSecAnalysis = db.prepare(`
|
|
424
|
-
INSERT OR
|
|
512
|
+
INSERT OR REPLACE INTO function_analysis
|
|
425
513
|
(content_hash, analysis_type, analysis, model_version, schema_version, created_at, last_seen_at, language, entity_type)
|
|
426
514
|
VALUES (?, 'security', ?, ?, ?, ?, ?, ?, ?)
|
|
515
|
+
`);
|
|
516
|
+
const insertPracAnalysis = db.prepare(`
|
|
517
|
+
INSERT OR REPLACE INTO function_analysis
|
|
518
|
+
(content_hash, analysis_type, analysis, model_version, schema_version, created_at, last_seen_at, language, entity_type)
|
|
519
|
+
VALUES (?, 'practices', ?, ?, ?, ?, ?, ?, ?)
|
|
427
520
|
`);
|
|
428
521
|
const deleteFileEntries = db.prepare("DELETE FROM file_functions WHERE file_path = ?");
|
|
429
522
|
const insertFileFunction = db.prepare(`
|
|
430
|
-
INSERT INTO file_functions
|
|
431
|
-
(file_path, function_name, content_hash, file_mtime, language, entity_type)
|
|
432
|
-
VALUES (?, ?, ?, ?, ?, ?)
|
|
523
|
+
INSERT OR REPLACE INTO file_functions
|
|
524
|
+
(file_path, function_name, content_hash, file_mtime, language, entity_type, start_line)
|
|
525
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
433
526
|
`);
|
|
434
527
|
const pendingFiles = [];
|
|
435
528
|
const allMissingHashes = new Set();
|
|
@@ -445,7 +538,14 @@ async function analyzeFiles(db, rootPath, files, options = {}) {
|
|
|
445
538
|
const currentMtime = Math.floor(stat.mtimeMs);
|
|
446
539
|
const storedRow = getFileMtime.get(file);
|
|
447
540
|
if (storedRow && storedRow.file_mtime === currentMtime) {
|
|
448
|
-
|
|
541
|
+
// Mtime unchanged — check if all functions already have analysis.
|
|
542
|
+
// If any lack analysis (e.g. init ran but analyze never did), re-process.
|
|
543
|
+
const fileHashes = getFileHashes.all(file);
|
|
544
|
+
const allAnalyzed = fileHashes.length > 0 && fileHashes.every((row) => checkAnalysis.get(row.content_hash, minSchemaVersion));
|
|
545
|
+
if (allAnalyzed) {
|
|
546
|
+
totalSkipped += fileHashes.length;
|
|
547
|
+
continue;
|
|
548
|
+
}
|
|
449
549
|
}
|
|
450
550
|
const parser = (0, parsers_1.getParserForFile)(file);
|
|
451
551
|
if (!parser)
|
|
@@ -457,7 +557,7 @@ async function analyzeFiles(db, rootPath, files, options = {}) {
|
|
|
457
557
|
}
|
|
458
558
|
const needsAnalysis = [];
|
|
459
559
|
for (const fn of functions) {
|
|
460
|
-
if (checkAnalysis.get(fn.contentHash)) {
|
|
560
|
+
if (checkAnalysis.get(fn.contentHash, minSchemaVersion)) {
|
|
461
561
|
totalSkipped++;
|
|
462
562
|
}
|
|
463
563
|
else {
|
|
@@ -469,43 +569,88 @@ async function analyzeFiles(db, rootPath, files, options = {}) {
|
|
|
469
569
|
}
|
|
470
570
|
// Phase 2: Pull from cloud to avoid redundant Claude calls
|
|
471
571
|
if (pullFn && allMissingHashes.size > 0) {
|
|
472
|
-
|
|
572
|
+
try {
|
|
573
|
+
await pullFn([...allMissingHashes]);
|
|
574
|
+
}
|
|
575
|
+
catch {
|
|
576
|
+
// Pull is a non-critical optimization — continue with local analysis
|
|
577
|
+
}
|
|
473
578
|
}
|
|
474
|
-
// Phase 3:
|
|
579
|
+
// Phase 3: Re-check after pull, compute totals, run Claude analysis
|
|
475
580
|
const modelVersion = "claude-sonnet-4-20250514";
|
|
476
|
-
|
|
477
|
-
// Re-check after pull — some hashes may now exist locally
|
|
581
|
+
const phase3Files = pendingFiles.map((pending) => {
|
|
478
582
|
const stillNeeds = pullFn
|
|
479
|
-
? pending.needsAnalysis.filter((fn) => !checkAnalysis.get(fn.contentHash))
|
|
583
|
+
? pending.needsAnalysis.filter((fn) => !checkAnalysis.get(fn.contentHash, minSchemaVersion))
|
|
480
584
|
: pending.needsAnalysis;
|
|
481
585
|
const pulled = pending.needsAnalysis.length - stillNeeds.length;
|
|
482
586
|
totalPulled += pulled;
|
|
483
587
|
totalSkipped += pulled;
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
dataTags: fn.dataTags,
|
|
495
|
-
securityFlags: fn.securityFlags,
|
|
496
|
-
}), modelVersion, schemas_1.SCHEMA_VERSIONS.security, now, now, fn.language, fn.entityType);
|
|
588
|
+
return { ...pending, stillNeeds };
|
|
589
|
+
});
|
|
590
|
+
// Flatten and dedupe all functions needing Claude analysis
|
|
591
|
+
const allStillNeeds = [];
|
|
592
|
+
const seenHashes = new Set();
|
|
593
|
+
for (const pending of phase3Files) {
|
|
594
|
+
for (const fn of pending.stillNeeds) {
|
|
595
|
+
if (!seenHashes.has(fn.contentHash)) {
|
|
596
|
+
seenHashes.add(fn.contentHash);
|
|
597
|
+
allStillNeeds.push(fn);
|
|
497
598
|
}
|
|
498
|
-
totalAnalyzed += analyzed.length;
|
|
499
599
|
}
|
|
500
|
-
|
|
600
|
+
}
|
|
601
|
+
const totalNeedsAnalysis = allStillNeeds.length;
|
|
602
|
+
// Batch into groups of 10, fire with bounded concurrency
|
|
603
|
+
const analysisBatches = [];
|
|
604
|
+
for (let i = 0; i < allStillNeeds.length; i += 10) {
|
|
605
|
+
analysisBatches.push(allStillNeeds.slice(i, i + 10));
|
|
606
|
+
}
|
|
607
|
+
const analysisLimit = (0, p_limit_1.default)(5);
|
|
608
|
+
const { practices } = options;
|
|
609
|
+
await Promise.all(analysisBatches.map((batch) => analysisLimit(async () => {
|
|
610
|
+
const analyzed = await analyzeFunctions(batch, practices);
|
|
611
|
+
for (const fn of analyzed) {
|
|
612
|
+
// Compute lineText for each issue — trimmed source line at analysis time
|
|
613
|
+
// Used by the extension for modification detection (see docs/research/issue-remediation-detection.md)
|
|
614
|
+
const sourceLines = fn.sourceCode.split("\n");
|
|
615
|
+
for (const issue of fn.issues) {
|
|
616
|
+
if (issue.line > 0 && issue.line <= sourceLines.length) {
|
|
617
|
+
issue.lineText = sourceLines[issue.line - 1].trim();
|
|
618
|
+
}
|
|
619
|
+
}
|
|
620
|
+
for (const pv of fn.practiceViolations) {
|
|
621
|
+
if (pv.line > 0 && pv.line <= sourceLines.length) {
|
|
622
|
+
pv.lineText = sourceLines[pv.line - 1].trim();
|
|
623
|
+
}
|
|
624
|
+
}
|
|
625
|
+
insertDocAnalysis.run(fn.contentHash, JSON.stringify({
|
|
626
|
+
description: fn.description,
|
|
627
|
+
params: fn.params,
|
|
628
|
+
returns: fn.returns,
|
|
629
|
+
}), modelVersion, schemas_1.SCHEMA_VERSIONS.documentation, now, now, fn.language, fn.entityType);
|
|
630
|
+
insertSecAnalysis.run(fn.contentHash, JSON.stringify({
|
|
631
|
+
dataTags: fn.dataTags,
|
|
632
|
+
securityFlags: fn.securityFlags,
|
|
633
|
+
issues: fn.issues,
|
|
634
|
+
}), modelVersion, schemas_1.SCHEMA_VERSIONS.security, now, now, fn.language, fn.entityType);
|
|
635
|
+
if (practices && practices.length > 0) {
|
|
636
|
+
insertPracAnalysis.run(fn.contentHash, JSON.stringify({
|
|
637
|
+
violations: fn.practiceViolations,
|
|
638
|
+
}), modelVersion, schemas_1.SCHEMA_VERSIONS.practices, now, now, fn.language, fn.entityType);
|
|
639
|
+
}
|
|
640
|
+
}
|
|
641
|
+
totalAnalyzed += analyzed.length;
|
|
642
|
+
onAnalysisProgress?.(totalAnalyzed, totalNeedsAnalysis);
|
|
643
|
+
})));
|
|
644
|
+
// Rebuild file_functions for all pending files
|
|
645
|
+
for (const pending of phase3Files) {
|
|
501
646
|
deleteFileEntries.run(pending.file);
|
|
502
647
|
for (const fn of pending.functions) {
|
|
503
|
-
insertFileFunction.run(pending.file, fn.name, fn.contentHash, pending.mtime, fn.language, fn.entityType);
|
|
648
|
+
insertFileFunction.run(pending.file, fn.name, fn.contentHash, pending.mtime, fn.language, fn.entityType, fn.startLine);
|
|
504
649
|
}
|
|
505
650
|
}
|
|
506
651
|
return { analyzed: totalAnalyzed, skipped: totalSkipped, skippedSize: totalSkippedSize, pulled: totalPulled };
|
|
507
652
|
}
|
|
508
|
-
async function analyzeRepository(rootPath, onProgress, pullFn) {
|
|
653
|
+
async function analyzeRepository(rootPath, onProgress, pullFn, onAnalysisProgress) {
|
|
509
654
|
const dbPath = path.join(rootPath, ".ophan", "index.db");
|
|
510
655
|
fs.mkdirSync(path.join(rootPath, ".ophan"), { recursive: true });
|
|
511
656
|
// Auto-add .ophan/ to .gitignore (only in git repos)
|
|
@@ -513,7 +658,8 @@ async function analyzeRepository(rootPath, onProgress, pullFn) {
|
|
|
513
658
|
const db = initDb(dbPath);
|
|
514
659
|
const now = Math.floor(Date.now() / 1000);
|
|
515
660
|
const files = await discoverFiles(rootPath);
|
|
516
|
-
const
|
|
661
|
+
const practices = (0, practices_2.loadPracticesFromDb)(db);
|
|
662
|
+
const result = await analyzeFiles(db, rootPath, files, { pullFn, onProgress, onAnalysisProgress, practices });
|
|
517
663
|
// Clean up entries for deleted files
|
|
518
664
|
const fileSet = new Set(files);
|
|
519
665
|
const deleteFileEntries = db.prepare("DELETE FROM file_functions WHERE file_path = ?");
|
|
@@ -539,12 +685,15 @@ async function analyzeRepository(rootPath, onProgress, pullFn) {
|
|
|
539
685
|
function mergeAnalysisRows(rows) {
|
|
540
686
|
let doc = {};
|
|
541
687
|
let sec = {};
|
|
688
|
+
let prac = {};
|
|
542
689
|
for (const row of rows) {
|
|
543
690
|
const parsed = JSON.parse(row.analysis);
|
|
544
691
|
if (row.analysis_type === "documentation")
|
|
545
692
|
doc = parsed;
|
|
546
693
|
else if (row.analysis_type === "security")
|
|
547
694
|
sec = parsed;
|
|
695
|
+
else if (row.analysis_type === "practices")
|
|
696
|
+
prac = parsed;
|
|
548
697
|
}
|
|
549
698
|
return {
|
|
550
699
|
description: doc.description || "",
|
|
@@ -552,6 +701,8 @@ function mergeAnalysisRows(rows) {
|
|
|
552
701
|
returns: doc.returns || { type: "unknown", description: "" },
|
|
553
702
|
dataTags: sec.dataTags || [],
|
|
554
703
|
securityFlags: sec.securityFlags || [],
|
|
704
|
+
issues: sec.issues || [],
|
|
705
|
+
practiceViolations: prac.violations || [],
|
|
555
706
|
};
|
|
556
707
|
}
|
|
557
708
|
function getAnalysisForFile(dbPath, filePath) {
|
|
@@ -631,9 +782,9 @@ async function refreshFileIndex(rootPath, onProgress) {
|
|
|
631
782
|
const getFileMtime = db.prepare("SELECT file_mtime FROM file_functions WHERE file_path = ? LIMIT 1");
|
|
632
783
|
const deleteFileEntries = db.prepare("DELETE FROM file_functions WHERE file_path = ?");
|
|
633
784
|
const insertFileFunction = db.prepare(`
|
|
634
|
-
INSERT INTO file_functions
|
|
635
|
-
(file_path, function_name, content_hash, file_mtime, language, entity_type)
|
|
636
|
-
VALUES (?, ?, ?, ?, ?, ?)
|
|
785
|
+
INSERT OR REPLACE INTO file_functions
|
|
786
|
+
(file_path, function_name, content_hash, file_mtime, language, entity_type, start_line)
|
|
787
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
637
788
|
`);
|
|
638
789
|
const fileSet = new Set(files);
|
|
639
790
|
for (let i = 0; i < files.length; i++) {
|
|
@@ -654,7 +805,7 @@ async function refreshFileIndex(rootPath, onProgress) {
|
|
|
654
805
|
const functions = parser.extractFunctions(file);
|
|
655
806
|
deleteFileEntries.run(file);
|
|
656
807
|
for (const fn of functions) {
|
|
657
|
-
insertFileFunction.run(file, fn.name, fn.contentHash, currentMtime, fn.language, fn.entityType);
|
|
808
|
+
insertFileFunction.run(file, fn.name, fn.contentHash, currentMtime, fn.language, fn.entityType, fn.startLine);
|
|
658
809
|
}
|
|
659
810
|
}
|
|
660
811
|
// Remove entries for deleted files
|
|
@@ -713,3 +864,54 @@ function importAnalysis(dbPath, rows) {
|
|
|
713
864
|
db.close();
|
|
714
865
|
return imported;
|
|
715
866
|
}
|
|
867
|
+
/**
|
|
868
|
+
* Extract all functions from a repository, returning FunctionInfo arrays
|
|
869
|
+
* with relationship data (calls, imports, exported). Used by the graph
|
|
870
|
+
* analysis pipeline to build the relationship graph.
|
|
871
|
+
*
|
|
872
|
+
* Respects the same file size limits and parser selection as refreshFileIndex.
|
|
873
|
+
*/
|
|
874
|
+
async function extractAllFunctions(rootPath, onProgress) {
|
|
875
|
+
const files = await discoverFiles(rootPath);
|
|
876
|
+
const allFunctions = [];
|
|
877
|
+
for (let i = 0; i < files.length; i++) {
|
|
878
|
+
const file = files[i];
|
|
879
|
+
const relPath = path.relative(rootPath, file);
|
|
880
|
+
onProgress?.(i + 1, files.length, relPath);
|
|
881
|
+
const stat = fs.statSync(file);
|
|
882
|
+
if (stat.size > MAX_FILE_SIZE_BYTES)
|
|
883
|
+
continue;
|
|
884
|
+
const parser = (0, parsers_1.getParserForFile)(file);
|
|
885
|
+
if (!parser)
|
|
886
|
+
continue;
|
|
887
|
+
const functions = parser.extractFunctions(file);
|
|
888
|
+
allFunctions.push(...functions);
|
|
889
|
+
}
|
|
890
|
+
return allFunctions;
|
|
891
|
+
}
|
|
892
|
+
/**
|
|
893
|
+
* Populate file_functions index from pre-extracted FunctionInfo array.
|
|
894
|
+
* Used by the graph pipeline to avoid scanning files twice — extractAllFunctions()
|
|
895
|
+
* already has all the data needed for the index.
|
|
896
|
+
*/
|
|
897
|
+
function populateFileIndex(db, functions) {
|
|
898
|
+
const byFile = new Map();
|
|
899
|
+
for (const fn of functions) {
|
|
900
|
+
const existing = byFile.get(fn.filePath) || [];
|
|
901
|
+
existing.push(fn);
|
|
902
|
+
byFile.set(fn.filePath, existing);
|
|
903
|
+
}
|
|
904
|
+
const deleteFileEntries = db.prepare("DELETE FROM file_functions WHERE file_path = ?");
|
|
905
|
+
const insertFileFunction = db.prepare(`INSERT OR REPLACE INTO file_functions (file_path, function_name, content_hash, file_mtime, language, entity_type, start_line)
|
|
906
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)`);
|
|
907
|
+
const tx = db.transaction(() => {
|
|
908
|
+
for (const [filePath, fns] of byFile) {
|
|
909
|
+
const mtime = Math.floor(fs.statSync(filePath).mtimeMs);
|
|
910
|
+
deleteFileEntries.run(filePath);
|
|
911
|
+
for (const fn of fns) {
|
|
912
|
+
insertFileFunction.run(filePath, fn.name, fn.contentHash, mtime, fn.language, fn.entityType, fn.startLine);
|
|
913
|
+
}
|
|
914
|
+
}
|
|
915
|
+
});
|
|
916
|
+
tx();
|
|
917
|
+
}
|