@ophan/core 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +107 -0
- package/dist/index.d.ts +115 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +715 -0
- package/dist/index.test.d.ts +2 -0
- package/dist/index.test.d.ts.map +1 -0
- package/dist/index.test.js +492 -0
- package/dist/parsers/__fixtures__/arrow-functions.d.ts +5 -0
- package/dist/parsers/__fixtures__/arrow-functions.d.ts.map +1 -0
- package/dist/parsers/__fixtures__/arrow-functions.js +16 -0
- package/dist/parsers/__fixtures__/class-methods.d.ts +6 -0
- package/dist/parsers/__fixtures__/class-methods.d.ts.map +1 -0
- package/dist/parsers/__fixtures__/class-methods.js +12 -0
- package/dist/parsers/__fixtures__/no-functions.d.ts +9 -0
- package/dist/parsers/__fixtures__/no-functions.d.ts.map +1 -0
- package/dist/parsers/__fixtures__/no-functions.js +4 -0
- package/dist/parsers/index.d.ts +3 -0
- package/dist/parsers/index.d.ts.map +1 -0
- package/dist/parsers/index.js +18 -0
- package/dist/parsers/python.d.ts +8 -0
- package/dist/parsers/python.d.ts.map +1 -0
- package/dist/parsers/python.js +137 -0
- package/dist/parsers/python.test.d.ts +2 -0
- package/dist/parsers/python.test.d.ts.map +1 -0
- package/dist/parsers/python.test.js +96 -0
- package/dist/parsers/registry.d.ts +8 -0
- package/dist/parsers/registry.d.ts.map +1 -0
- package/dist/parsers/registry.js +68 -0
- package/dist/parsers/types.d.ts +10 -0
- package/dist/parsers/types.d.ts.map +1 -0
- package/dist/parsers/types.js +18 -0
- package/dist/parsers/typescript.d.ts +8 -0
- package/dist/parsers/typescript.d.ts.map +1 -0
- package/dist/parsers/typescript.js +110 -0
- package/dist/parsers/typescript.test.d.ts +2 -0
- package/dist/parsers/typescript.test.d.ts.map +1 -0
- package/dist/parsers/typescript.test.js +106 -0
- package/dist/schemas.d.ts +100 -0
- package/dist/schemas.d.ts.map +1 -0
- package/dist/schemas.js +128 -0
- package/dist/shared.d.ts +12 -0
- package/dist/shared.d.ts.map +1 -0
- package/dist/shared.js +10 -0
- package/dist/test-utils.d.ts +46 -0
- package/dist/test-utils.d.ts.map +1 -0
- package/dist/test-utils.js +141 -0
- package/package.json +37 -0
package/dist/index.js
ADDED
|
@@ -0,0 +1,715 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
// @ophan/core — Analysis engine
|
|
3
|
+
//
|
|
4
|
+
// Architecture:
|
|
5
|
+
// Content-addressed storage: Analysis is keyed by SHA256 of function source code,
|
|
6
|
+
// not by file path or branch. This makes analysis branch-agnostic, merge-friendly,
|
|
7
|
+
// and enables insert-only sync (no conflict resolution).
|
|
8
|
+
//
|
|
9
|
+
// Two-table split:
|
|
10
|
+
// - function_analysis: (content_hash, analysis_type) → analysis JSON. Persistent cache,
|
|
11
|
+
// synced to cloud. Each function produces two rows: 'documentation' and 'security'.
|
|
12
|
+
// - file_functions: file_path → content_hash mappings. Ephemeral index, rebuilt on
|
|
13
|
+
// every scan, never synced. Maps the current checkout's files to their analysis entries.
|
|
14
|
+
//
|
|
15
|
+
// Analysis types:
|
|
16
|
+
// Analysis is split into independent types ('documentation', 'security') stored as
|
|
17
|
+
// separate rows. This enables independent versioning, selective re-analysis, and
|
|
18
|
+
// extensibility (add new types without re-running existing analysis). See schemas.ts
|
|
19
|
+
// for the Zod schemas and docs/architecture/analysis-types.md for design rationale.
|
|
20
|
+
//
|
|
21
|
+
// Language & entity_type columns:
|
|
22
|
+
// Both tables carry `language` and `entity_type` fields to support multi-language
|
|
23
|
+
// analysis (Python, Go, Java) and multi-entity analysis (classes, methods, modules).
|
|
24
|
+
// These are set at parse time from file extension and AST node kind respectively.
|
|
25
|
+
//
|
|
26
|
+
// Parser abstraction:
|
|
27
|
+
// Function extraction is handled by language-specific parsers (see ./parsers/).
|
|
28
|
+
// Each parser implements the LanguageParser interface and registers itself by file extension.
|
|
29
|
+
// TypeScript/JavaScript uses the TS compiler API. Python uses Python's own ast module via
|
|
30
|
+
// subprocess. New languages are added by creating a parser file — the analysis pipeline,
|
|
31
|
+
// hashing, DB storage, and sync all work unchanged.
|
|
32
|
+
//
|
|
33
|
+
// No tree-sitter, no custom LSP:
|
|
34
|
+
// Each language uses its own native tooling rather than a universal parser framework.
|
|
35
|
+
// The CLI + SQLite DB is the cross-IDE abstraction — any IDE plugin just reads .ophan/index.db.
|
|
36
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
37
|
+
if (k2 === undefined) k2 = k;
|
|
38
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
39
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
40
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
41
|
+
}
|
|
42
|
+
Object.defineProperty(o, k2, desc);
|
|
43
|
+
}) : (function(o, m, k, k2) {
|
|
44
|
+
if (k2 === undefined) k2 = k;
|
|
45
|
+
o[k2] = m[k];
|
|
46
|
+
}));
|
|
47
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
48
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
49
|
+
}) : function(o, v) {
|
|
50
|
+
o["default"] = v;
|
|
51
|
+
});
|
|
52
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
53
|
+
var ownKeys = function(o) {
|
|
54
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
55
|
+
var ar = [];
|
|
56
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
57
|
+
return ar;
|
|
58
|
+
};
|
|
59
|
+
return ownKeys(o);
|
|
60
|
+
};
|
|
61
|
+
return function (mod) {
|
|
62
|
+
if (mod && mod.__esModule) return mod;
|
|
63
|
+
var result = {};
|
|
64
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
65
|
+
__setModuleDefault(result, mod);
|
|
66
|
+
return result;
|
|
67
|
+
};
|
|
68
|
+
})();
|
|
69
|
+
var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
70
|
+
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
|
|
71
|
+
};
|
|
72
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
73
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
74
|
+
};
|
|
75
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
76
|
+
exports.getSupportedExtensions = exports.computeHash = void 0;
|
|
77
|
+
exports.ensureGitignore = ensureGitignore;
|
|
78
|
+
exports.discoverFiles = discoverFiles;
|
|
79
|
+
exports.initDb = initDb;
|
|
80
|
+
exports.migrateToAnalysisTypes = migrateToAnalysisTypes;
|
|
81
|
+
exports.analyzeFunctions = analyzeFunctions;
|
|
82
|
+
exports.analyzeFiles = analyzeFiles;
|
|
83
|
+
exports.analyzeRepository = analyzeRepository;
|
|
84
|
+
exports.mergeAnalysisRows = mergeAnalysisRows;
|
|
85
|
+
exports.getAnalysisForFile = getAnalysisForFile;
|
|
86
|
+
exports.getAnalysisByHash = getAnalysisByHash;
|
|
87
|
+
exports.gcAnalysis = gcAnalysis;
|
|
88
|
+
exports.refreshFileIndex = refreshFileIndex;
|
|
89
|
+
exports.findMissingHashes = findMissingHashes;
|
|
90
|
+
exports.importAnalysis = importAnalysis;
|
|
91
|
+
const sdk_1 = __importDefault(require("@anthropic-ai/sdk"));
|
|
92
|
+
const better_sqlite3_1 = __importDefault(require("better-sqlite3"));
|
|
93
|
+
const fs = __importStar(require("fs"));
|
|
94
|
+
const path = __importStar(require("path"));
|
|
95
|
+
const child_process_1 = require("child_process");
|
|
96
|
+
const glob_1 = require("glob");
|
|
97
|
+
const p_retry_1 = __importDefault(require("p-retry"));
|
|
98
|
+
const parsers_1 = require("./parsers");
|
|
99
|
+
// Re-export from shared so downstream consumers (CLI, etc.) can still import from @ophan/core
|
|
100
|
+
var shared_1 = require("./shared");
|
|
101
|
+
Object.defineProperty(exports, "computeHash", { enumerable: true, get: function () { return shared_1.computeHash; } });
|
|
102
|
+
var parsers_2 = require("./parsers");
|
|
103
|
+
Object.defineProperty(exports, "getSupportedExtensions", { enumerable: true, get: function () { return parsers_2.getSupportedExtensions; } });
|
|
104
|
+
__exportStar(require("./schemas"), exports);
|
|
105
|
+
const schemas_1 = require("./schemas");
|
|
106
|
+
/** Skip files larger than this — catches minified bundles, generated code, etc. */
|
|
107
|
+
const MAX_FILE_SIZE_BYTES = 50 * 1024; // 50 KB
|
|
108
|
+
/**
|
|
109
|
+
* Ensure .ophan/ is in .gitignore. Only acts in git repos.
|
|
110
|
+
* Creates .gitignore if the repo has .git/ but no .gitignore.
|
|
111
|
+
*/
|
|
112
|
+
function ensureGitignore(rootPath) {
|
|
113
|
+
const gitignorePath = path.join(rootPath, ".gitignore");
|
|
114
|
+
if (fs.existsSync(gitignorePath)) {
|
|
115
|
+
const content = fs.readFileSync(gitignorePath, "utf-8");
|
|
116
|
+
if (!content.includes(".ophan")) {
|
|
117
|
+
fs.appendFileSync(gitignorePath, "\n# Ophan analysis cache\n.ophan/\n");
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
else if (fs.existsSync(path.join(rootPath, ".git"))) {
|
|
121
|
+
fs.writeFileSync(gitignorePath, "# Ophan analysis cache\n.ophan/\n");
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
const HARDCODED_IGNORES = [
|
|
125
|
+
"**/node_modules/**", "**/.ophan/**", "**/dist/**",
|
|
126
|
+
"**/__pycache__/**", "**/.venv/**", "**/venv/**", "**/env/**",
|
|
127
|
+
"**/.tox/**", "**/.eggs/**", "**/build/**",
|
|
128
|
+
];
|
|
129
|
+
/**
|
|
130
|
+
* Discover source files using git (respects .gitignore) with glob fallback.
|
|
131
|
+
* Returns absolute paths filtered to supported extensions.
|
|
132
|
+
*/
|
|
133
|
+
async function discoverFiles(rootPath) {
|
|
134
|
+
const exts = (0, parsers_1.getSupportedExtensions)(); // [".ts", ".tsx", ...]
|
|
135
|
+
const extSet = new Set(exts.map((e) => e.toLowerCase()));
|
|
136
|
+
try {
|
|
137
|
+
// git ls-files: tracked + untracked-but-not-ignored. Respects all .gitignore files.
|
|
138
|
+
const output = (0, child_process_1.execSync)("git ls-files --cached --others --exclude-standard", { cwd: rootPath, encoding: "utf-8", maxBuffer: 10 * 1024 * 1024 });
|
|
139
|
+
const files = output
|
|
140
|
+
.split("\n")
|
|
141
|
+
.filter((f) => f.length > 0)
|
|
142
|
+
.filter((f) => extSet.has(path.extname(f).toLowerCase()))
|
|
143
|
+
.map((f) => path.resolve(rootPath, f));
|
|
144
|
+
// Still apply hardcoded ignores as safety net (e.g. checked-in node_modules)
|
|
145
|
+
const ignoreSegments = ["node_modules", ".ophan", "__pycache__", ".venv", "venv", ".tox", ".eggs"];
|
|
146
|
+
return files.filter((f) => !ignoreSegments.some((seg) => f.includes(`/${seg}/`) || f.includes(`\\${seg}\\`)));
|
|
147
|
+
}
|
|
148
|
+
catch {
|
|
149
|
+
// Not a git repo or git not available — fall back to glob
|
|
150
|
+
const globExts = exts.map((e) => e.slice(1));
|
|
151
|
+
return (0, glob_1.glob)(`**/*.{${globExts.join(",")}}`, {
|
|
152
|
+
cwd: rootPath,
|
|
153
|
+
ignore: HARDCODED_IGNORES,
|
|
154
|
+
absolute: true,
|
|
155
|
+
});
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
// ============ DATABASE ============
|
|
159
|
+
function initDb(dbPath) {
|
|
160
|
+
const db = new better_sqlite3_1.default(dbPath);
|
|
161
|
+
// Check if we need the analysis_type migration
|
|
162
|
+
const columns = db.prepare("PRAGMA table_info(function_analysis)").all();
|
|
163
|
+
const hasAnalysisType = columns.some((c) => c.name === "analysis_type");
|
|
164
|
+
if (columns.length > 0 && !hasAnalysisType) {
|
|
165
|
+
// Existing DB without analysis_type — run migration
|
|
166
|
+
migrateToAnalysisTypes(db);
|
|
167
|
+
}
|
|
168
|
+
else if (columns.length === 0) {
|
|
169
|
+
// Fresh DB — create with new schema directly
|
|
170
|
+
createFreshSchema(db);
|
|
171
|
+
}
|
|
172
|
+
// else: already migrated, no action needed
|
|
173
|
+
// Ensure supporting tables exist
|
|
174
|
+
db.exec(`
|
|
175
|
+
CREATE TABLE IF NOT EXISTS file_functions (
|
|
176
|
+
file_path TEXT NOT NULL,
|
|
177
|
+
function_name TEXT NOT NULL,
|
|
178
|
+
content_hash TEXT NOT NULL,
|
|
179
|
+
file_mtime INTEGER NOT NULL,
|
|
180
|
+
language TEXT NOT NULL DEFAULT 'typescript',
|
|
181
|
+
entity_type TEXT NOT NULL DEFAULT 'function'
|
|
182
|
+
)
|
|
183
|
+
`);
|
|
184
|
+
// Migration: add columns to file_functions for existing databases
|
|
185
|
+
const ffColumns = db.prepare("PRAGMA table_info(file_functions)").all();
|
|
186
|
+
if (!ffColumns.some((c) => c.name === "language")) {
|
|
187
|
+
try {
|
|
188
|
+
db.exec("ALTER TABLE file_functions ADD COLUMN language TEXT NOT NULL DEFAULT 'typescript'");
|
|
189
|
+
}
|
|
190
|
+
catch (_) { }
|
|
191
|
+
try {
|
|
192
|
+
db.exec("ALTER TABLE file_functions ADD COLUMN entity_type TEXT NOT NULL DEFAULT 'function'");
|
|
193
|
+
}
|
|
194
|
+
catch (_) { }
|
|
195
|
+
}
|
|
196
|
+
db.exec(`
|
|
197
|
+
CREATE TABLE IF NOT EXISTS function_gc (
|
|
198
|
+
content_hash TEXT NOT NULL,
|
|
199
|
+
analysis_type TEXT,
|
|
200
|
+
gc_at INTEGER NOT NULL,
|
|
201
|
+
synced_at INTEGER,
|
|
202
|
+
PRIMARY KEY (content_hash, analysis_type)
|
|
203
|
+
)
|
|
204
|
+
`);
|
|
205
|
+
// Migration: add analysis_type to function_gc for existing databases
|
|
206
|
+
const gcColumns = db.prepare("PRAGMA table_info(function_gc)").all();
|
|
207
|
+
if (!gcColumns.some((c) => c.name === "analysis_type")) {
|
|
208
|
+
try {
|
|
209
|
+
db.exec("ALTER TABLE function_gc ADD COLUMN analysis_type TEXT");
|
|
210
|
+
}
|
|
211
|
+
catch (_) { }
|
|
212
|
+
// Rebuild PK by recreating table
|
|
213
|
+
db.exec(`
|
|
214
|
+
CREATE TABLE IF NOT EXISTS function_gc_v2 (
|
|
215
|
+
content_hash TEXT NOT NULL,
|
|
216
|
+
analysis_type TEXT,
|
|
217
|
+
gc_at INTEGER NOT NULL,
|
|
218
|
+
synced_at INTEGER,
|
|
219
|
+
PRIMARY KEY (content_hash, analysis_type)
|
|
220
|
+
)
|
|
221
|
+
`);
|
|
222
|
+
db.exec("INSERT OR IGNORE INTO function_gc_v2 SELECT content_hash, NULL, gc_at, synced_at FROM function_gc");
|
|
223
|
+
db.exec("DROP TABLE function_gc");
|
|
224
|
+
db.exec("ALTER TABLE function_gc_v2 RENAME TO function_gc");
|
|
225
|
+
}
|
|
226
|
+
db.exec(`
|
|
227
|
+
CREATE TABLE IF NOT EXISTS sync_meta (
|
|
228
|
+
key TEXT PRIMARY KEY,
|
|
229
|
+
value TEXT NOT NULL
|
|
230
|
+
)
|
|
231
|
+
`);
|
|
232
|
+
db.exec("CREATE INDEX IF NOT EXISTS idx_file_functions_path ON file_functions(file_path)");
|
|
233
|
+
db.exec("CREATE INDEX IF NOT EXISTS idx_file_functions_hash ON file_functions(content_hash)");
|
|
234
|
+
db.exec("CREATE INDEX IF NOT EXISTS idx_fa_hash ON function_analysis(content_hash)");
|
|
235
|
+
db.exec("CREATE INDEX IF NOT EXISTS idx_fa_type ON function_analysis(analysis_type)");
|
|
236
|
+
return db;
|
|
237
|
+
}
|
|
238
|
+
function createFreshSchema(db) {
|
|
239
|
+
db.exec(`
|
|
240
|
+
CREATE TABLE IF NOT EXISTS function_analysis (
|
|
241
|
+
content_hash TEXT NOT NULL,
|
|
242
|
+
analysis_type TEXT NOT NULL,
|
|
243
|
+
analysis JSON NOT NULL,
|
|
244
|
+
model_version TEXT NOT NULL,
|
|
245
|
+
schema_version INTEGER NOT NULL DEFAULT 1,
|
|
246
|
+
created_at INTEGER NOT NULL,
|
|
247
|
+
last_seen_at INTEGER NOT NULL,
|
|
248
|
+
language TEXT NOT NULL DEFAULT 'typescript',
|
|
249
|
+
entity_type TEXT NOT NULL DEFAULT 'function',
|
|
250
|
+
synced_at INTEGER,
|
|
251
|
+
PRIMARY KEY (content_hash, analysis_type)
|
|
252
|
+
)
|
|
253
|
+
`);
|
|
254
|
+
}
|
|
255
|
+
/**
|
|
256
|
+
* Migrate from single-blob function_analysis to split analysis types.
|
|
257
|
+
* Creates v2 table, splits each existing row into 'documentation' + 'security' rows,
|
|
258
|
+
* drops old table, renames. Sets synced_at = NULL to force full re-sync.
|
|
259
|
+
*/
|
|
260
|
+
function migrateToAnalysisTypes(db) {
|
|
261
|
+
db.exec("BEGIN TRANSACTION");
|
|
262
|
+
try {
|
|
263
|
+
db.exec(`
|
|
264
|
+
CREATE TABLE function_analysis_v2 (
|
|
265
|
+
content_hash TEXT NOT NULL,
|
|
266
|
+
analysis_type TEXT NOT NULL,
|
|
267
|
+
analysis JSON NOT NULL,
|
|
268
|
+
model_version TEXT NOT NULL,
|
|
269
|
+
schema_version INTEGER NOT NULL DEFAULT 1,
|
|
270
|
+
created_at INTEGER NOT NULL,
|
|
271
|
+
last_seen_at INTEGER NOT NULL,
|
|
272
|
+
language TEXT NOT NULL DEFAULT 'typescript',
|
|
273
|
+
entity_type TEXT NOT NULL DEFAULT 'function',
|
|
274
|
+
synced_at INTEGER,
|
|
275
|
+
PRIMARY KEY (content_hash, analysis_type)
|
|
276
|
+
)
|
|
277
|
+
`);
|
|
278
|
+
// Split documentation fields
|
|
279
|
+
db.exec(`
|
|
280
|
+
INSERT OR IGNORE INTO function_analysis_v2
|
|
281
|
+
(content_hash, analysis_type, analysis, model_version, schema_version, created_at, last_seen_at, language, entity_type, synced_at)
|
|
282
|
+
SELECT
|
|
283
|
+
content_hash,
|
|
284
|
+
'documentation',
|
|
285
|
+
json_object(
|
|
286
|
+
'description', json_extract(analysis, '$.description'),
|
|
287
|
+
'params', json_extract(analysis, '$.params'),
|
|
288
|
+
'returns', json_extract(analysis, '$.returns')
|
|
289
|
+
),
|
|
290
|
+
model_version,
|
|
291
|
+
1,
|
|
292
|
+
created_at,
|
|
293
|
+
last_seen_at,
|
|
294
|
+
COALESCE(language, 'typescript'),
|
|
295
|
+
COALESCE(entity_type, 'function'),
|
|
296
|
+
NULL
|
|
297
|
+
FROM function_analysis
|
|
298
|
+
`);
|
|
299
|
+
// Split security fields
|
|
300
|
+
db.exec(`
|
|
301
|
+
INSERT OR IGNORE INTO function_analysis_v2
|
|
302
|
+
(content_hash, analysis_type, analysis, model_version, schema_version, created_at, last_seen_at, language, entity_type, synced_at)
|
|
303
|
+
SELECT
|
|
304
|
+
content_hash,
|
|
305
|
+
'security',
|
|
306
|
+
json_object(
|
|
307
|
+
'dataTags', json_extract(analysis, '$.dataTags'),
|
|
308
|
+
'securityFlags', json_extract(analysis, '$.securityFlags')
|
|
309
|
+
),
|
|
310
|
+
model_version,
|
|
311
|
+
1,
|
|
312
|
+
created_at,
|
|
313
|
+
last_seen_at,
|
|
314
|
+
COALESCE(language, 'typescript'),
|
|
315
|
+
COALESCE(entity_type, 'function'),
|
|
316
|
+
NULL
|
|
317
|
+
FROM function_analysis
|
|
318
|
+
`);
|
|
319
|
+
db.exec("DROP TABLE function_analysis");
|
|
320
|
+
db.exec("ALTER TABLE function_analysis_v2 RENAME TO function_analysis");
|
|
321
|
+
db.exec("COMMIT");
|
|
322
|
+
}
|
|
323
|
+
catch (e) {
|
|
324
|
+
db.exec("ROLLBACK");
|
|
325
|
+
throw e;
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
// ============ LLM ANALYSIS ============
|
|
329
|
+
const anthropic = new sdk_1.default();
|
|
330
|
+
async function analyzeFunctions(functions) {
|
|
331
|
+
if (functions.length === 0)
|
|
332
|
+
return [];
|
|
333
|
+
// Use the language from the first function in the batch (batches are always same-file)
|
|
334
|
+
const lang = functions[0].language || "typescript";
|
|
335
|
+
const langLabel = lang.charAt(0).toUpperCase() + lang.slice(1); // "TypeScript", "Python"
|
|
336
|
+
// Language-specific security concerns for the prompt
|
|
337
|
+
const securityExamples = {
|
|
338
|
+
typescript: "sql_injection, xss, hardcoded_secret, unsanitized_input, path_traversal, prototype_pollution",
|
|
339
|
+
javascript: "sql_injection, xss, hardcoded_secret, unsanitized_input, path_traversal, prototype_pollution",
|
|
340
|
+
python: "sql_injection, command_injection, hardcoded_secret, unsanitized_input, path_traversal, pickle_deserialization, eval_exec, insecure_subprocess",
|
|
341
|
+
};
|
|
342
|
+
const securityHints = securityExamples[lang] || securityExamples.typescript;
|
|
343
|
+
const prompt = `Analyze these ${langLabel} functions. Return JSON array with one object per function.
|
|
344
|
+
|
|
345
|
+
Each object must have:
|
|
346
|
+
- name: string (function name, must match input)
|
|
347
|
+
- description: string (one sentence)
|
|
348
|
+
- params: array of { name, type, description }
|
|
349
|
+
- returns: { type, description }
|
|
350
|
+
- dataTags: array from [user_input, pii, credentials, database, external_api, file_system, config, internal]
|
|
351
|
+
- securityFlags: array of concerns like [${securityHints}] or empty
|
|
352
|
+
|
|
353
|
+
Functions:
|
|
354
|
+
${functions
|
|
355
|
+
.map((fn) => `### ${fn.name}\n\`\`\`${lang}\n${fn.sourceCode}\n\`\`\``)
|
|
356
|
+
.join("\n\n")}
|
|
357
|
+
|
|
358
|
+
CRITICAL: Return ONLY the raw JSON array. No markdown, no code fences, no \`\`\`json blocks, no explanation. Just the [ ... ] array directly.`;
|
|
359
|
+
const response = await (0, p_retry_1.default)(() => anthropic.messages.create({
|
|
360
|
+
model: "claude-sonnet-4-20250514",
|
|
361
|
+
max_tokens: 4096,
|
|
362
|
+
messages: [{ role: "user", content: prompt }],
|
|
363
|
+
}), {
|
|
364
|
+
retries: 4,
|
|
365
|
+
onFailedAttempt: (err) => {
|
|
366
|
+
if (err.status !== 429)
|
|
367
|
+
throw err;
|
|
368
|
+
console.log(` Rate limited, retrying (${err.attemptNumber}/4)...`);
|
|
369
|
+
},
|
|
370
|
+
});
|
|
371
|
+
const text = response.content[0].type === "text" ? response.content[0].text : "";
|
|
372
|
+
try {
|
|
373
|
+
// Strip markdown code fences if model adds them
|
|
374
|
+
const cleaned = text.replace(/^```(?:json)?\s*\n?/i, "").replace(/\n?```\s*$/i, "").trim();
|
|
375
|
+
const results = JSON.parse(cleaned);
|
|
376
|
+
return functions.map((fn) => {
|
|
377
|
+
const raw = results.find((r) => r.name === fn.name) || {};
|
|
378
|
+
// Validate with Zod, using .catch() defaults for graceful degradation
|
|
379
|
+
const validated = schemas_1.ClaudeAnalysisResponse.parse({ name: fn.name, ...raw });
|
|
380
|
+
return {
|
|
381
|
+
...fn,
|
|
382
|
+
description: validated.description,
|
|
383
|
+
params: validated.params,
|
|
384
|
+
returns: validated.returns,
|
|
385
|
+
dataTags: validated.dataTags,
|
|
386
|
+
securityFlags: validated.securityFlags,
|
|
387
|
+
};
|
|
388
|
+
});
|
|
389
|
+
}
|
|
390
|
+
catch (e) {
|
|
391
|
+
console.error("Failed to parse LLM response:", text);
|
|
392
|
+
return functions.map((fn) => ({
|
|
393
|
+
...fn,
|
|
394
|
+
description: "Analysis failed",
|
|
395
|
+
params: [],
|
|
396
|
+
returns: { type: "unknown", description: "" },
|
|
397
|
+
dataTags: [],
|
|
398
|
+
securityFlags: [],
|
|
399
|
+
}));
|
|
400
|
+
}
|
|
401
|
+
}
|
|
402
|
+
/**
|
|
403
|
+
* Analyze a specific set of files: parse → hash → pull → Claude → store.
|
|
404
|
+
* Used by `analyzeRepository()` for full scans and `ophan watch` for incremental.
|
|
405
|
+
* The caller is responsible for DB lifecycle (open/close).
|
|
406
|
+
*/
|
|
407
|
+
async function analyzeFiles(db, rootPath, files, options = {}) {
|
|
408
|
+
const { pullFn, onProgress } = options;
|
|
409
|
+
const now = Math.floor(Date.now() / 1000);
|
|
410
|
+
let totalAnalyzed = 0;
|
|
411
|
+
let totalSkippedSize = 0;
|
|
412
|
+
let totalSkipped = 0;
|
|
413
|
+
let totalPulled = 0;
|
|
414
|
+
// Prepared statements
|
|
415
|
+
const getFileMtime = db.prepare("SELECT file_mtime FROM file_functions WHERE file_path = ? LIMIT 1");
|
|
416
|
+
// Check for documentation type — if it exists, security exists too (always written together)
|
|
417
|
+
const checkAnalysis = db.prepare("SELECT 1 FROM function_analysis WHERE content_hash = ? AND analysis_type = 'documentation'");
|
|
418
|
+
const insertDocAnalysis = db.prepare(`
|
|
419
|
+
INSERT OR IGNORE INTO function_analysis
|
|
420
|
+
(content_hash, analysis_type, analysis, model_version, schema_version, created_at, last_seen_at, language, entity_type)
|
|
421
|
+
VALUES (?, 'documentation', ?, ?, ?, ?, ?, ?, ?)
|
|
422
|
+
`);
|
|
423
|
+
const insertSecAnalysis = db.prepare(`
|
|
424
|
+
INSERT OR IGNORE INTO function_analysis
|
|
425
|
+
(content_hash, analysis_type, analysis, model_version, schema_version, created_at, last_seen_at, language, entity_type)
|
|
426
|
+
VALUES (?, 'security', ?, ?, ?, ?, ?, ?, ?)
|
|
427
|
+
`);
|
|
428
|
+
const deleteFileEntries = db.prepare("DELETE FROM file_functions WHERE file_path = ?");
|
|
429
|
+
const insertFileFunction = db.prepare(`
|
|
430
|
+
INSERT INTO file_functions
|
|
431
|
+
(file_path, function_name, content_hash, file_mtime, language, entity_type)
|
|
432
|
+
VALUES (?, ?, ?, ?, ?, ?)
|
|
433
|
+
`);
|
|
434
|
+
const pendingFiles = [];
|
|
435
|
+
const allMissingHashes = new Set();
|
|
436
|
+
for (let i = 0; i < files.length; i++) {
|
|
437
|
+
const file = files[i];
|
|
438
|
+
const relPath = path.relative(rootPath, file);
|
|
439
|
+
onProgress?.(i + 1, files.length, relPath);
|
|
440
|
+
const stat = fs.statSync(file);
|
|
441
|
+
if (stat.size > MAX_FILE_SIZE_BYTES) {
|
|
442
|
+
totalSkippedSize++;
|
|
443
|
+
continue;
|
|
444
|
+
}
|
|
445
|
+
const currentMtime = Math.floor(stat.mtimeMs);
|
|
446
|
+
const storedRow = getFileMtime.get(file);
|
|
447
|
+
if (storedRow && storedRow.file_mtime === currentMtime) {
|
|
448
|
+
continue;
|
|
449
|
+
}
|
|
450
|
+
const parser = (0, parsers_1.getParserForFile)(file);
|
|
451
|
+
if (!parser)
|
|
452
|
+
continue;
|
|
453
|
+
const functions = parser.extractFunctions(file);
|
|
454
|
+
if (functions.length === 0) {
|
|
455
|
+
deleteFileEntries.run(file);
|
|
456
|
+
continue;
|
|
457
|
+
}
|
|
458
|
+
const needsAnalysis = [];
|
|
459
|
+
for (const fn of functions) {
|
|
460
|
+
if (checkAnalysis.get(fn.contentHash)) {
|
|
461
|
+
totalSkipped++;
|
|
462
|
+
}
|
|
463
|
+
else {
|
|
464
|
+
needsAnalysis.push(fn);
|
|
465
|
+
allMissingHashes.add(fn.contentHash);
|
|
466
|
+
}
|
|
467
|
+
}
|
|
468
|
+
pendingFiles.push({ file, functions, needsAnalysis, mtime: currentMtime });
|
|
469
|
+
}
|
|
470
|
+
// Phase 2: Pull from cloud to avoid redundant Claude calls
|
|
471
|
+
if (pullFn && allMissingHashes.size > 0) {
|
|
472
|
+
await pullFn([...allMissingHashes]);
|
|
473
|
+
}
|
|
474
|
+
// Phase 3: Run Claude analysis on hashes still missing, update file_functions
|
|
475
|
+
const modelVersion = "claude-sonnet-4-20250514";
|
|
476
|
+
for (const pending of pendingFiles) {
|
|
477
|
+
// Re-check after pull — some hashes may now exist locally
|
|
478
|
+
const stillNeeds = pullFn
|
|
479
|
+
? pending.needsAnalysis.filter((fn) => !checkAnalysis.get(fn.contentHash))
|
|
480
|
+
: pending.needsAnalysis;
|
|
481
|
+
const pulled = pending.needsAnalysis.length - stillNeeds.length;
|
|
482
|
+
totalPulled += pulled;
|
|
483
|
+
totalSkipped += pulled;
|
|
484
|
+
for (let j = 0; j < stillNeeds.length; j += 10) {
|
|
485
|
+
const batch = stillNeeds.slice(j, j + 10);
|
|
486
|
+
const analyzed = await analyzeFunctions(batch);
|
|
487
|
+
for (const fn of analyzed) {
|
|
488
|
+
insertDocAnalysis.run(fn.contentHash, JSON.stringify({
|
|
489
|
+
description: fn.description,
|
|
490
|
+
params: fn.params,
|
|
491
|
+
returns: fn.returns,
|
|
492
|
+
}), modelVersion, schemas_1.SCHEMA_VERSIONS.documentation, now, now, fn.language, fn.entityType);
|
|
493
|
+
insertSecAnalysis.run(fn.contentHash, JSON.stringify({
|
|
494
|
+
dataTags: fn.dataTags,
|
|
495
|
+
securityFlags: fn.securityFlags,
|
|
496
|
+
}), modelVersion, schemas_1.SCHEMA_VERSIONS.security, now, now, fn.language, fn.entityType);
|
|
497
|
+
}
|
|
498
|
+
totalAnalyzed += analyzed.length;
|
|
499
|
+
}
|
|
500
|
+
// Rebuild file_functions for this file
|
|
501
|
+
deleteFileEntries.run(pending.file);
|
|
502
|
+
for (const fn of pending.functions) {
|
|
503
|
+
insertFileFunction.run(pending.file, fn.name, fn.contentHash, pending.mtime, fn.language, fn.entityType);
|
|
504
|
+
}
|
|
505
|
+
}
|
|
506
|
+
return { analyzed: totalAnalyzed, skipped: totalSkipped, skippedSize: totalSkippedSize, pulled: totalPulled };
|
|
507
|
+
}
|
|
508
|
+
async function analyzeRepository(rootPath, onProgress, pullFn) {
|
|
509
|
+
const dbPath = path.join(rootPath, ".ophan", "index.db");
|
|
510
|
+
fs.mkdirSync(path.join(rootPath, ".ophan"), { recursive: true });
|
|
511
|
+
// Auto-add .ophan/ to .gitignore (only in git repos)
|
|
512
|
+
ensureGitignore(rootPath);
|
|
513
|
+
const db = initDb(dbPath);
|
|
514
|
+
const now = Math.floor(Date.now() / 1000);
|
|
515
|
+
const files = await discoverFiles(rootPath);
|
|
516
|
+
const result = await analyzeFiles(db, rootPath, files, { pullFn, onProgress });
|
|
517
|
+
// Clean up entries for deleted files
|
|
518
|
+
const fileSet = new Set(files);
|
|
519
|
+
const deleteFileEntries = db.prepare("DELETE FROM file_functions WHERE file_path = ?");
|
|
520
|
+
const storedPaths = db
|
|
521
|
+
.prepare("SELECT DISTINCT file_path FROM file_functions")
|
|
522
|
+
.all();
|
|
523
|
+
for (const { file_path } of storedPaths) {
|
|
524
|
+
if (!fileSet.has(file_path)) {
|
|
525
|
+
deleteFileEntries.run(file_path);
|
|
526
|
+
}
|
|
527
|
+
}
|
|
528
|
+
// Update last_seen_at for all hashes currently referenced
|
|
529
|
+
db.prepare(`UPDATE function_analysis SET last_seen_at = ?
|
|
530
|
+
WHERE content_hash IN (SELECT DISTINCT content_hash FROM file_functions)`).run(now);
|
|
531
|
+
db.close();
|
|
532
|
+
return { files: files.length, ...result };
|
|
533
|
+
}
|
|
534
|
+
// ============ QUERY ============
|
|
535
|
+
/**
|
|
536
|
+
* Merge documentation + security analysis rows for a content_hash into
|
|
537
|
+
* the unified FunctionAnalysis shape that consumers expect.
|
|
538
|
+
*/
|
|
539
|
+
function mergeAnalysisRows(rows) {
|
|
540
|
+
let doc = {};
|
|
541
|
+
let sec = {};
|
|
542
|
+
for (const row of rows) {
|
|
543
|
+
const parsed = JSON.parse(row.analysis);
|
|
544
|
+
if (row.analysis_type === "documentation")
|
|
545
|
+
doc = parsed;
|
|
546
|
+
else if (row.analysis_type === "security")
|
|
547
|
+
sec = parsed;
|
|
548
|
+
}
|
|
549
|
+
return {
|
|
550
|
+
description: doc.description || "",
|
|
551
|
+
params: doc.params || [],
|
|
552
|
+
returns: doc.returns || { type: "unknown", description: "" },
|
|
553
|
+
dataTags: sec.dataTags || [],
|
|
554
|
+
securityFlags: sec.securityFlags || [],
|
|
555
|
+
};
|
|
556
|
+
}
|
|
557
|
+
function getAnalysisForFile(dbPath, filePath) {
|
|
558
|
+
const db = new better_sqlite3_1.default(dbPath, { readonly: true });
|
|
559
|
+
const rows = db
|
|
560
|
+
.prepare(`SELECT ff.function_name, ff.content_hash, fa.analysis_type, fa.analysis, ff.language, ff.entity_type
|
|
561
|
+
FROM file_functions ff
|
|
562
|
+
JOIN function_analysis fa ON ff.content_hash = fa.content_hash
|
|
563
|
+
WHERE ff.file_path = ?`)
|
|
564
|
+
.all(filePath);
|
|
565
|
+
db.close();
|
|
566
|
+
// Group by content_hash, merge analysis types
|
|
567
|
+
const grouped = new Map();
|
|
568
|
+
for (const row of rows) {
|
|
569
|
+
const existing = grouped.get(row.content_hash);
|
|
570
|
+
if (existing) {
|
|
571
|
+
existing.analysisRows.push({ analysis_type: row.analysis_type, analysis: row.analysis });
|
|
572
|
+
}
|
|
573
|
+
else {
|
|
574
|
+
grouped.set(row.content_hash, {
|
|
575
|
+
functionName: row.function_name,
|
|
576
|
+
contentHash: row.content_hash,
|
|
577
|
+
language: row.language,
|
|
578
|
+
entityType: row.entity_type,
|
|
579
|
+
analysisRows: [{ analysis_type: row.analysis_type, analysis: row.analysis }],
|
|
580
|
+
});
|
|
581
|
+
}
|
|
582
|
+
}
|
|
583
|
+
return [...grouped.values()].map((entry) => ({
|
|
584
|
+
functionName: entry.functionName,
|
|
585
|
+
contentHash: entry.contentHash,
|
|
586
|
+
analysis: mergeAnalysisRows(entry.analysisRows),
|
|
587
|
+
language: entry.language,
|
|
588
|
+
entityType: entry.entityType,
|
|
589
|
+
}));
|
|
590
|
+
}
|
|
591
|
+
function getAnalysisByHash(dbPath, hash) {
|
|
592
|
+
const db = new better_sqlite3_1.default(dbPath, { readonly: true });
|
|
593
|
+
const rows = db
|
|
594
|
+
.prepare("SELECT analysis_type, analysis FROM function_analysis WHERE content_hash = ?")
|
|
595
|
+
.all(hash);
|
|
596
|
+
db.close();
|
|
597
|
+
if (rows.length === 0)
|
|
598
|
+
return null;
|
|
599
|
+
return mergeAnalysisRows(rows);
|
|
600
|
+
}
|
|
601
|
+
// ============ GC ============
|
|
602
|
+
function gcAnalysis(dbPath, maxAgeDays = 30) {
|
|
603
|
+
const db = new better_sqlite3_1.default(dbPath);
|
|
604
|
+
const cutoff = Math.floor(Date.now() / 1000) - maxAgeDays * 24 * 60 * 60;
|
|
605
|
+
const now = Math.floor(Date.now() / 1000);
|
|
606
|
+
// Insert tombstones for hashes about to be deleted (for sync to Supabase)
|
|
607
|
+
// One tombstone per (content_hash, analysis_type) pair
|
|
608
|
+
db.prepare(`INSERT OR IGNORE INTO function_gc (content_hash, analysis_type, gc_at)
|
|
609
|
+
SELECT content_hash, analysis_type, ?
|
|
610
|
+
FROM function_analysis
|
|
611
|
+
WHERE content_hash NOT IN (SELECT DISTINCT content_hash FROM file_functions)
|
|
612
|
+
AND last_seen_at < ?`).run(now, cutoff);
|
|
613
|
+
// Also clean up any previously-synced GC entries
|
|
614
|
+
db.prepare("DELETE FROM function_gc WHERE synced_at IS NOT NULL").run();
|
|
615
|
+
const result = db
|
|
616
|
+
.prepare(`DELETE FROM function_analysis
|
|
617
|
+
WHERE content_hash NOT IN (SELECT DISTINCT content_hash FROM file_functions)
|
|
618
|
+
AND last_seen_at < ?`)
|
|
619
|
+
.run(cutoff);
|
|
620
|
+
db.close();
|
|
621
|
+
return { deleted: result.changes };
|
|
622
|
+
}
|
|
623
|
+
// ============ REFRESH INDEX ============
|
|
624
|
+
async function refreshFileIndex(rootPath, onProgress) {
|
|
625
|
+
const dbPath = path.join(rootPath, ".ophan", "index.db");
|
|
626
|
+
if (!fs.existsSync(dbPath))
|
|
627
|
+
return;
|
|
628
|
+
const db = initDb(dbPath);
|
|
629
|
+
const now = Math.floor(Date.now() / 1000);
|
|
630
|
+
const files = await discoverFiles(rootPath);
|
|
631
|
+
const getFileMtime = db.prepare("SELECT file_mtime FROM file_functions WHERE file_path = ? LIMIT 1");
|
|
632
|
+
const deleteFileEntries = db.prepare("DELETE FROM file_functions WHERE file_path = ?");
|
|
633
|
+
const insertFileFunction = db.prepare(`
|
|
634
|
+
INSERT INTO file_functions
|
|
635
|
+
(file_path, function_name, content_hash, file_mtime, language, entity_type)
|
|
636
|
+
VALUES (?, ?, ?, ?, ?, ?)
|
|
637
|
+
`);
|
|
638
|
+
const fileSet = new Set(files);
|
|
639
|
+
for (let i = 0; i < files.length; i++) {
|
|
640
|
+
const file = files[i];
|
|
641
|
+
const relPath = path.relative(rootPath, file);
|
|
642
|
+
onProgress?.(i + 1, files.length, relPath);
|
|
643
|
+
const stat = fs.statSync(file);
|
|
644
|
+
if (stat.size > MAX_FILE_SIZE_BYTES)
|
|
645
|
+
continue;
|
|
646
|
+
const currentMtime = Math.floor(stat.mtimeMs);
|
|
647
|
+
const storedRow = getFileMtime.get(file);
|
|
648
|
+
if (storedRow && storedRow.file_mtime === currentMtime) {
|
|
649
|
+
continue;
|
|
650
|
+
}
|
|
651
|
+
const parser = (0, parsers_1.getParserForFile)(file);
|
|
652
|
+
if (!parser)
|
|
653
|
+
continue;
|
|
654
|
+
const functions = parser.extractFunctions(file);
|
|
655
|
+
deleteFileEntries.run(file);
|
|
656
|
+
for (const fn of functions) {
|
|
657
|
+
insertFileFunction.run(file, fn.name, fn.contentHash, currentMtime, fn.language, fn.entityType);
|
|
658
|
+
}
|
|
659
|
+
}
|
|
660
|
+
// Remove entries for deleted files
|
|
661
|
+
const storedPaths = db
|
|
662
|
+
.prepare("SELECT DISTINCT file_path FROM file_functions")
|
|
663
|
+
.all();
|
|
664
|
+
for (const { file_path } of storedPaths) {
|
|
665
|
+
if (!fileSet.has(file_path)) {
|
|
666
|
+
deleteFileEntries.run(file_path);
|
|
667
|
+
}
|
|
668
|
+
}
|
|
669
|
+
// Update last_seen_at for current hashes
|
|
670
|
+
db.prepare(`UPDATE function_analysis SET last_seen_at = ?
|
|
671
|
+
WHERE content_hash IN (SELECT DISTINCT content_hash FROM file_functions)`).run(now);
|
|
672
|
+
db.close();
|
|
673
|
+
}
|
|
674
|
+
// ============ PULL SYNC HELPERS ============
|
|
675
|
+
/**
|
|
676
|
+
* Find content hashes that are referenced in file_functions but have no
|
|
677
|
+
* analysis in function_analysis. Used by CLI to know what to pull from Supabase
|
|
678
|
+
* before running expensive Claude analysis.
|
|
679
|
+
*/
|
|
680
|
+
function findMissingHashes(dbPath) {
|
|
681
|
+
const db = new better_sqlite3_1.default(dbPath, { readonly: true });
|
|
682
|
+
const rows = db.prepare(`SELECT DISTINCT ff.content_hash
|
|
683
|
+
FROM file_functions ff
|
|
684
|
+
LEFT JOIN function_analysis fa ON ff.content_hash = fa.content_hash AND fa.analysis_type = 'documentation'
|
|
685
|
+
WHERE fa.content_hash IS NULL`).all();
|
|
686
|
+
db.close();
|
|
687
|
+
return rows.map((r) => r.content_hash);
|
|
688
|
+
}
|
|
689
|
+
/**
|
|
690
|
+
* Import analysis rows pulled from Supabase into the local SQLite database.
|
|
691
|
+
* Used by CLI pull sync to cache remote analysis locally.
|
|
692
|
+
*/
|
|
693
|
+
function importAnalysis(dbPath, rows) {
|
|
694
|
+
if (rows.length === 0)
|
|
695
|
+
return 0;
|
|
696
|
+
const db = new better_sqlite3_1.default(dbPath);
|
|
697
|
+
const now = Math.floor(Date.now() / 1000);
|
|
698
|
+
const insert = db.prepare(`
|
|
699
|
+
INSERT OR IGNORE INTO function_analysis
|
|
700
|
+
(content_hash, analysis_type, analysis, model_version, schema_version, created_at, last_seen_at, language, entity_type, synced_at)
|
|
701
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
702
|
+
`);
|
|
703
|
+
let imported = 0;
|
|
704
|
+
const runInserts = db.transaction(() => {
|
|
705
|
+
for (const r of rows) {
|
|
706
|
+
const result = insert.run(r.content_hash, r.analysis_type, r.analysis, r.model_version, r.schema_version, now, now, r.language, r.entity_type, now // synced_at = now (came from Supabase)
|
|
707
|
+
);
|
|
708
|
+
if (result.changes > 0)
|
|
709
|
+
imported++;
|
|
710
|
+
}
|
|
711
|
+
});
|
|
712
|
+
runInserts();
|
|
713
|
+
db.close();
|
|
714
|
+
return imported;
|
|
715
|
+
}
|