scai 0.1.116 ā 0.1.118
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agents/MainAgent.js +255 -0
- package/dist/agents/contextReviewStep.js +104 -0
- package/dist/agents/finalPlanGenStep.js +123 -0
- package/dist/agents/infoPlanGenStep.js +126 -0
- package/dist/agents/planGeneratorStep.js +118 -0
- package/dist/agents/planResolverStep.js +95 -0
- package/dist/agents/planTargetFilesStep.js +48 -0
- package/dist/agents/preFileSearchCheckStep.js +95 -0
- package/dist/agents/selectRelevantSourcesStep.js +100 -0
- package/dist/agents/semanticAnalysisStep.js +144 -0
- package/dist/agents/structuralAnalysisStep.js +46 -0
- package/dist/agents/transformPlanGenStep.js +107 -0
- package/dist/agents/understandIntentStep.js +72 -0
- package/dist/agents/validationAnalysisStep.js +87 -0
- package/dist/commands/AskCmd.js +47 -116
- package/dist/commands/ChangeLogUpdateCmd.js +11 -5
- package/dist/commands/CommitSuggesterCmd.js +50 -75
- package/dist/commands/DaemonCmd.js +119 -29
- package/dist/commands/IndexCmd.js +41 -24
- package/dist/commands/InspectCmd.js +0 -1
- package/dist/commands/ReadlineSingleton.js +18 -0
- package/dist/commands/ResetDbCmd.js +20 -21
- package/dist/commands/ReviewCmd.js +89 -54
- package/dist/commands/SummaryCmd.js +12 -18
- package/dist/commands/WorkflowCmd.js +41 -0
- package/dist/commands/factory.js +254 -0
- package/dist/config.js +67 -15
- package/dist/constants.js +20 -4
- package/dist/context.js +10 -11
- package/dist/daemon/daemonQueues.js +63 -0
- package/dist/daemon/daemonWorker.js +40 -63
- package/dist/daemon/generateSummaries.js +58 -0
- package/dist/daemon/runFolderCapsuleBatch.js +247 -0
- package/dist/daemon/runIndexingBatch.js +147 -0
- package/dist/daemon/runKgBatch.js +104 -0
- package/dist/db/fileIndex.js +168 -63
- package/dist/db/functionExtractors/extractFromJava.js +210 -6
- package/dist/db/functionExtractors/extractFromJs.js +186 -198
- package/dist/db/functionExtractors/extractFromTs.js +181 -192
- package/dist/db/functionExtractors/index.js +7 -5
- package/dist/db/schema.js +55 -20
- package/dist/db/sqlTemplates.js +50 -19
- package/dist/fileRules/builtins.js +31 -0
- package/dist/fileRules/codeAllowedExtensions.js +4 -0
- package/dist/fileRules/fileExceptions.js +0 -13
- package/dist/fileRules/ignoredExtensions.js +10 -0
- package/dist/index.js +128 -325
- package/dist/lib/generate.js +37 -14
- package/dist/lib/generateFolderCapsules.js +109 -0
- package/dist/lib/spinner.js +12 -5
- package/dist/modelSetup.js +35 -6
- package/dist/pipeline/modules/changeLogModule.js +16 -19
- package/dist/pipeline/modules/chunkManagerModule.js +24 -0
- package/dist/pipeline/modules/cleanupModule.js +96 -91
- package/dist/pipeline/modules/codeTransformModule.js +208 -0
- package/dist/pipeline/modules/commentModule.js +20 -11
- package/dist/pipeline/modules/commitSuggesterModule.js +36 -14
- package/dist/pipeline/modules/contextReviewModule.js +52 -0
- package/dist/pipeline/modules/fileReaderModule.js +72 -0
- package/dist/pipeline/modules/fileSearchModule.js +136 -0
- package/dist/pipeline/modules/finalAnswerModule.js +53 -0
- package/dist/pipeline/modules/gatherInfoModule.js +176 -0
- package/dist/pipeline/modules/generateTestsModule.js +63 -54
- package/dist/pipeline/modules/kgModule.js +26 -11
- package/dist/pipeline/modules/preserveCodeModule.js +91 -49
- package/dist/pipeline/modules/refactorModule.js +19 -7
- package/dist/pipeline/modules/repairTestsModule.js +44 -36
- package/dist/pipeline/modules/reviewModule.js +23 -13
- package/dist/pipeline/modules/summaryModule.js +27 -35
- package/dist/pipeline/modules/writeFileModule.js +86 -0
- package/dist/pipeline/registry/moduleRegistry.js +38 -93
- package/dist/pipeline/runModulePipeline.js +22 -19
- package/dist/scripts/dbcheck.js +156 -91
- package/dist/utils/buildContextualPrompt.js +245 -164
- package/dist/utils/debugContext.js +24 -0
- package/dist/utils/fileTree.js +16 -6
- package/dist/utils/loadRelevantFolderCapsules.js +64 -0
- package/dist/utils/log.js +2 -0
- package/dist/utils/normalizeData.js +23 -0
- package/dist/utils/planActions.js +60 -0
- package/dist/utils/promptBuilderHelper.js +67 -0
- package/dist/utils/promptLogHelper.js +52 -0
- package/dist/utils/sanitizeQuery.js +20 -8
- package/dist/utils/sharedUtils.js +8 -0
- package/dist/utils/sleep.js +3 -0
- package/dist/utils/splitCodeIntoChunk.js +65 -32
- package/dist/utils/vscode.js +49 -0
- package/dist/workflow/workflowResolver.js +14 -0
- package/dist/workflow/workflowRunner.js +103 -0
- package/package.json +6 -5
- package/dist/agent/agentManager.js +0 -39
- package/dist/agent/workflowManager.js +0 -95
- package/dist/commands/ModulePipelineCmd.js +0 -31
- package/dist/daemon/daemonBatch.js +0 -186
- package/dist/fileRules/scoreFiles.js +0 -71
- package/dist/lib/generateEmbedding.js +0 -22
|
@@ -1,41 +1,33 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import {
|
|
3
|
-
import path from 'path';
|
|
1
|
+
import { generate } from "../../lib/generate.js";
|
|
2
|
+
import { logInputOutput } from "../../utils/promptLogHelper.js";
|
|
4
3
|
export const summaryModule = {
|
|
5
|
-
name:
|
|
6
|
-
description:
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
const
|
|
4
|
+
name: "summary",
|
|
5
|
+
description: "Generates a general summary of any file content.",
|
|
6
|
+
groups: ["analysis"],
|
|
7
|
+
run: async (input) => {
|
|
8
|
+
// ā
Only care about content now
|
|
9
|
+
const contentStr = typeof input.content === "string"
|
|
10
|
+
? input.content
|
|
11
|
+
: JSON.stringify(input.content ?? "", null, 2);
|
|
12
|
+
// ā Removed filepath/ext/filename extraction
|
|
11
13
|
const prompt = `
|
|
12
14
|
You are an assistant specialized in summarizing files.
|
|
13
15
|
|
|
14
|
-
Your task is to summarize the
|
|
16
|
+
Your task is to summarize the following content as clearly and concisely as possible:
|
|
15
17
|
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
if (response.content) {
|
|
32
|
-
response.summary = response.content;
|
|
33
|
-
console.log('\nš Summary:\n');
|
|
34
|
-
console.log(response.summary);
|
|
35
|
-
}
|
|
36
|
-
else {
|
|
37
|
-
console.warn('ā ļø No summary generated.');
|
|
38
|
-
}
|
|
39
|
-
return response;
|
|
40
|
-
}
|
|
18
|
+
${contentStr}
|
|
19
|
+
`.trim();
|
|
20
|
+
const response = await generate({
|
|
21
|
+
content: prompt,
|
|
22
|
+
query: ""
|
|
23
|
+
});
|
|
24
|
+
const summary = response.data ?? "ā ļø No summary generated.";
|
|
25
|
+
const output = {
|
|
26
|
+
query: input.query, // keep query for context
|
|
27
|
+
content: '',
|
|
28
|
+
data: { summary }, // ā no filepath
|
|
29
|
+
};
|
|
30
|
+
logInputOutput("summary", "output", output.data);
|
|
31
|
+
return output;
|
|
32
|
+
},
|
|
41
33
|
};
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
// File: src/modules/writeFileModule.ts
|
|
2
|
+
import fs from "fs/promises";
|
|
3
|
+
import chalk from "chalk";
|
|
4
|
+
import { normalizePath } from "../../utils/contentUtils.js";
|
|
5
|
+
import { logInputOutput } from "../../utils/promptLogHelper.js";
|
|
6
|
+
export const writeFileModule = {
|
|
7
|
+
name: "writeFile",
|
|
8
|
+
description: "Writes materialized file outputs from codeTransformModule to disk. " +
|
|
9
|
+
"Only writes files specified in the current plan step.",
|
|
10
|
+
groups: ["finalize"],
|
|
11
|
+
run: async (input) => {
|
|
12
|
+
var _a;
|
|
13
|
+
const context = input.context;
|
|
14
|
+
const mode = input.data?.mode ?? "overwrite";
|
|
15
|
+
if (!context) {
|
|
16
|
+
return {
|
|
17
|
+
query: input.query,
|
|
18
|
+
data: { writeMode: mode, writtenFiles: [], errors: ["Missing execution context"] },
|
|
19
|
+
};
|
|
20
|
+
}
|
|
21
|
+
const step = context.currentStep;
|
|
22
|
+
if (!step) {
|
|
23
|
+
return {
|
|
24
|
+
query: input.query,
|
|
25
|
+
data: { writeMode: mode, writtenFiles: [], errors: ["No current step in context"] },
|
|
26
|
+
};
|
|
27
|
+
}
|
|
28
|
+
// Determine target file(s) from the current step
|
|
29
|
+
const targetFiles = [];
|
|
30
|
+
if (step.targetFile)
|
|
31
|
+
targetFiles.push(step.targetFile);
|
|
32
|
+
if (Array.isArray(step.targetFiles))
|
|
33
|
+
targetFiles.push(...step.targetFiles);
|
|
34
|
+
if (!targetFiles.length) {
|
|
35
|
+
return {
|
|
36
|
+
query: input.query,
|
|
37
|
+
data: { writeMode: mode, writtenFiles: [], errors: ["No targetFile(s) specified in current step"] },
|
|
38
|
+
};
|
|
39
|
+
}
|
|
40
|
+
// Filter codeTransformArtifacts to only the target file(s)
|
|
41
|
+
const allFiles = context.execution?.codeTransformArtifacts?.files ?? [];
|
|
42
|
+
const filesToWrite = allFiles.filter(f => targetFiles.includes(f.filePath));
|
|
43
|
+
if (!filesToWrite.length) {
|
|
44
|
+
return {
|
|
45
|
+
query: input.query,
|
|
46
|
+
data: { writeMode: mode, writtenFiles: [], errors: ["No transformed files found for targetFile(s)"] },
|
|
47
|
+
};
|
|
48
|
+
}
|
|
49
|
+
const writtenFiles = [];
|
|
50
|
+
const errors = [];
|
|
51
|
+
for (const f of filesToWrite) {
|
|
52
|
+
const filePath = normalizePath(f.filePath);
|
|
53
|
+
if (!filePath) {
|
|
54
|
+
errors.push(`Invalid filePath: ${f.filePath}`);
|
|
55
|
+
continue;
|
|
56
|
+
}
|
|
57
|
+
if (typeof f.content !== "string" || !f.content.trim()) {
|
|
58
|
+
errors.push(`No content to write for ${filePath}`);
|
|
59
|
+
continue;
|
|
60
|
+
}
|
|
61
|
+
try {
|
|
62
|
+
await fs.writeFile(filePath, f.content, "utf-8");
|
|
63
|
+
console.log(chalk.green(`ā
Written: ${filePath}`));
|
|
64
|
+
writtenFiles.push(filePath);
|
|
65
|
+
}
|
|
66
|
+
catch (err) {
|
|
67
|
+
console.error(chalk.red(`ā Failed writing ${filePath}:`), err);
|
|
68
|
+
errors.push(`${filePath}: ${err.message}`);
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
// Mark only the files we just wrote as touched
|
|
72
|
+
context.plan ?? (context.plan = {});
|
|
73
|
+
(_a = context.plan).touchedFiles ?? (_a.touchedFiles = []);
|
|
74
|
+
for (const file of writtenFiles) {
|
|
75
|
+
if (!context.plan.touchedFiles.includes(file)) {
|
|
76
|
+
context.plan.touchedFiles.push(file);
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
const output = {
|
|
80
|
+
query: input.query,
|
|
81
|
+
data: { writeMode: mode, writtenFiles, errors },
|
|
82
|
+
};
|
|
83
|
+
logInputOutput("writeFile", "output", output.data);
|
|
84
|
+
return output;
|
|
85
|
+
},
|
|
86
|
+
};
|
|
@@ -1,110 +1,55 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import {
|
|
3
|
-
import {
|
|
4
|
-
import {
|
|
5
|
-
import {
|
|
6
|
-
import {
|
|
7
|
-
import {
|
|
8
|
-
import {
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
1
|
+
import { contextReviewModule } from "../modules/contextReviewModule.js";
|
|
2
|
+
import { finalAnswerModule } from "../modules/finalAnswerModule.js";
|
|
3
|
+
import { writeFileModule } from "../modules/writeFileModule.js";
|
|
4
|
+
import { fileSearchModule } from "../modules/fileSearchModule.js";
|
|
5
|
+
import { cleanupModule } from "../modules/cleanupModule.js";
|
|
6
|
+
import { summaryModule } from "../modules/summaryModule.js";
|
|
7
|
+
import { addCommentsModule } from "../modules/commentModule.js";
|
|
8
|
+
import { codeTransformModule } from "../modules/codeTransformModule.js";
|
|
9
|
+
/**
|
|
10
|
+
* Active built-in modules ā all use ModuleIO for input/output.
|
|
11
|
+
*/
|
|
12
12
|
export const builtInModules = {
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
group: 'documentation',
|
|
23
|
-
},
|
|
24
|
-
cleanup: {
|
|
25
|
-
...cleanupModule,
|
|
26
|
-
group: 'maintenance',
|
|
27
|
-
},
|
|
28
|
-
summary: {
|
|
29
|
-
...summaryModule,
|
|
30
|
-
group: 'documentation',
|
|
31
|
-
},
|
|
32
|
-
tests: {
|
|
33
|
-
...generateTestsModule,
|
|
34
|
-
group: 'testing',
|
|
35
|
-
dependencies: {
|
|
36
|
-
after: ['cleanTests'], // run cleanTests after tests
|
|
37
|
-
},
|
|
38
|
-
},
|
|
39
|
-
cleanTests: {
|
|
40
|
-
...cleanGeneratedTestsModule,
|
|
41
|
-
group: 'testing',
|
|
42
|
-
},
|
|
43
|
-
runTests: {
|
|
44
|
-
...runTestsModule,
|
|
45
|
-
group: 'testing',
|
|
46
|
-
dependencies: {
|
|
47
|
-
before: ['tests'], // must exist after tests are generated
|
|
48
|
-
after: ['cleanTests'], // run after cleaning
|
|
49
|
-
},
|
|
50
|
-
},
|
|
51
|
-
repairTests: {
|
|
52
|
-
...repairTestsModule,
|
|
53
|
-
group: 'testing',
|
|
54
|
-
dependencies: {
|
|
55
|
-
after: ['runTests'], // repair runs after tests have been executed
|
|
56
|
-
},
|
|
57
|
-
},
|
|
58
|
-
suggest: {
|
|
59
|
-
...commitSuggesterModule,
|
|
60
|
-
group: 'git',
|
|
61
|
-
},
|
|
62
|
-
changelog: {
|
|
63
|
-
...changelogModule,
|
|
64
|
-
group: 'git',
|
|
65
|
-
},
|
|
13
|
+
fileSearch: fileSearchModule,
|
|
14
|
+
summary: summaryModule,
|
|
15
|
+
contextReview: contextReviewModule,
|
|
16
|
+
// analysis (as agent options?)
|
|
17
|
+
codeTransform: codeTransformModule,
|
|
18
|
+
cleanup: cleanupModule,
|
|
19
|
+
comments: addCommentsModule,
|
|
20
|
+
writeFile: writeFileModule,
|
|
21
|
+
finalAnswer: finalAnswerModule,
|
|
66
22
|
};
|
|
67
|
-
|
|
23
|
+
/**
|
|
24
|
+
* Get module by name.
|
|
25
|
+
*/
|
|
68
26
|
export function getModuleByName(name) {
|
|
69
27
|
return builtInModules[name];
|
|
70
28
|
}
|
|
71
|
-
|
|
29
|
+
/**
|
|
30
|
+
* List available modules for CLI or UI display.
|
|
31
|
+
*/
|
|
72
32
|
export function listAvailableModules() {
|
|
73
|
-
return Object.
|
|
33
|
+
return Object.entries(builtInModules).map(([name, mod]) => ({
|
|
74
34
|
name,
|
|
75
|
-
description: description
|
|
76
|
-
group,
|
|
77
|
-
dependencies,
|
|
35
|
+
description: mod.description ?? "No description available",
|
|
78
36
|
}));
|
|
79
37
|
}
|
|
80
38
|
/**
|
|
81
|
-
* Resolve
|
|
82
|
-
* Returns a unique ordered array of
|
|
39
|
+
* Resolve modules from a list of names.
|
|
40
|
+
* Returns a unique ordered array of modules.
|
|
83
41
|
*/
|
|
84
|
-
export function
|
|
42
|
+
export function resolveModulesByNames(names) {
|
|
43
|
+
const seen = new Set();
|
|
85
44
|
const resolved = [];
|
|
86
|
-
const
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
return;
|
|
45
|
+
for (const name of names) {
|
|
46
|
+
if (seen.has(name))
|
|
47
|
+
continue;
|
|
90
48
|
const mod = getModuleByName(name);
|
|
91
|
-
if (
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
if (mod.dependencies?.before) {
|
|
95
|
-
for (const dep of mod.dependencies.before)
|
|
96
|
-
visit(dep);
|
|
97
|
-
}
|
|
98
|
-
// Add the module itself
|
|
99
|
-
resolved.push(mod);
|
|
100
|
-
visited.add(name);
|
|
101
|
-
// Handle after dependencies
|
|
102
|
-
if (mod.dependencies?.after) {
|
|
103
|
-
for (const dep of mod.dependencies.after)
|
|
104
|
-
visit(dep);
|
|
49
|
+
if (mod) {
|
|
50
|
+
resolved.push(mod);
|
|
51
|
+
seen.add(name);
|
|
105
52
|
}
|
|
106
53
|
}
|
|
107
|
-
for (const name of moduleNames)
|
|
108
|
-
visit(name);
|
|
109
54
|
return resolved;
|
|
110
55
|
}
|
|
@@ -1,27 +1,30 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
1
|
+
export async function runModulePipeline(modules, initialInput) {
|
|
2
|
+
const logPrefix = `[runModulePipeline]`;
|
|
3
|
+
console.log(`${logPrefix} š Starting pipeline with ${modules.length} module(s)`);
|
|
4
|
+
// Copy initial input
|
|
5
|
+
let currentIO = { ...initialInput };
|
|
5
6
|
for (const mod of modules) {
|
|
6
7
|
try {
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
if (isDebug) {
|
|
12
|
-
console.log(chalk.yellow('ā”ļø Output:', response.content));
|
|
8
|
+
const output = await mod.run(currentIO);
|
|
9
|
+
// Validate shape
|
|
10
|
+
if (!output) {
|
|
11
|
+
throw new Error(`Module '${mod.name}' returned empty output`);
|
|
13
12
|
}
|
|
14
|
-
//
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
13
|
+
// --- FIX: Pipeline semantics ---
|
|
14
|
+
// Next module receives:
|
|
15
|
+
// - same query
|
|
16
|
+
// - content = previous module's data
|
|
17
|
+
// - (data is the last produced data only, never merged)
|
|
18
|
+
currentIO = {
|
|
19
|
+
query: currentIO.query,
|
|
20
|
+
content: output.data, // data -> content handoff
|
|
21
|
+
data: '',
|
|
19
22
|
};
|
|
23
|
+
console.log(`${logPrefix} š '${mod.name}' ā content replaced with data`);
|
|
20
24
|
}
|
|
21
|
-
catch (
|
|
22
|
-
console.error(
|
|
23
|
-
throw new Error(`Pipeline failed at module ${mod.name}`);
|
|
25
|
+
catch (err) {
|
|
26
|
+
console.error(`${logPrefix} ā Module '${mod.name}' failed:`, err instanceof Error ? err.message : err);
|
|
24
27
|
}
|
|
25
28
|
}
|
|
26
|
-
return
|
|
29
|
+
return currentIO;
|
|
27
30
|
}
|
package/dist/scripts/dbcheck.js
CHANGED
|
@@ -1,119 +1,184 @@
|
|
|
1
1
|
import Database from "better-sqlite3";
|
|
2
2
|
import path from "path";
|
|
3
3
|
import os from "os";
|
|
4
|
-
import { Config } from "../config.js";
|
|
5
4
|
import fs from "fs";
|
|
5
|
+
import { execSync } from "child_process";
|
|
6
|
+
import { Config } from "../config.js";
|
|
7
|
+
/* āāāāāāāāāāāāāāāāāāāāāāāāā bootstrap āāāāāāāāāāāāāāāāāāāāāāāāā */
|
|
6
8
|
const cfg = Config.getRaw();
|
|
7
9
|
const repoKey = cfg.activeRepo;
|
|
8
10
|
if (!repoKey) {
|
|
9
|
-
console.error("ā No active repo found. Use `scai set-index
|
|
11
|
+
console.error("ā No active repo found. Use `scai set-index`.");
|
|
10
12
|
process.exit(1);
|
|
11
13
|
}
|
|
12
14
|
const repoName = path.basename(repoKey);
|
|
13
15
|
const scaiRepoRoot = path.join(os.homedir(), ".scai", "repos", repoName);
|
|
14
16
|
const dbPath = path.join(scaiRepoRoot, "db.sqlite");
|
|
15
17
|
if (!fs.existsSync(dbPath)) {
|
|
16
|
-
console.error(`ā
|
|
18
|
+
console.error(`ā DB not found: ${dbPath}`);
|
|
17
19
|
process.exit(1);
|
|
18
20
|
}
|
|
19
21
|
const db = new Database(dbPath);
|
|
20
|
-
|
|
21
|
-
function
|
|
22
|
-
|
|
23
|
-
let extra = "";
|
|
24
|
-
if (column) {
|
|
25
|
-
const nonNull = db.prepare(`SELECT COUNT(*) AS count FROM ${table} WHERE ${column} IS NOT NULL AND ${column} != ''`).get().count;
|
|
26
|
-
extra = ` | Non-null ${column}: ${nonNull}`;
|
|
27
|
-
}
|
|
28
|
-
console.log(`š ${table}: ${total}${extra}`);
|
|
22
|
+
/* āāāāāāāāāāāāāāāāāāāāāāāāā helpers āāāāāāāāāāāāāāāāāāāāāāāāā */
|
|
23
|
+
function tableCount(table) {
|
|
24
|
+
return db.prepare(`SELECT COUNT(*) AS c FROM ${table}`).get().c;
|
|
29
25
|
}
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
tableStats("files", "summary");
|
|
33
|
-
tableStats("files", "embedding");
|
|
34
|
-
const processingStatuses = ["extracted", "skipped", "failed", "unprocessed"];
|
|
35
|
-
for (const status of processingStatuses) {
|
|
36
|
-
const count = db.prepare(`SELECT COUNT(*) AS count FROM files WHERE processing_status = ?`).get(status).count;
|
|
37
|
-
console.log(` Status '${status}': ${count}`);
|
|
26
|
+
function nonEmptyCount(table, col) {
|
|
27
|
+
return db.prepare(`SELECT COUNT(*) AS c FROM ${table} WHERE ${col} IS NOT NULL AND ${col} != ''`).get().c;
|
|
38
28
|
}
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
29
|
+
function header(title) {
|
|
30
|
+
console.log(`\n${title}`);
|
|
31
|
+
}
|
|
32
|
+
/* āāāāāāāāāāāāāāāāāāāāāāāāā files āāāāāāāāāāāāāāāāāāāāāāāāā */
|
|
33
|
+
header("š files");
|
|
34
|
+
const totalFiles = tableCount("files");
|
|
35
|
+
const filesWithContent = nonEmptyCount("files", "content_text");
|
|
36
|
+
console.log(`š total files: ${totalFiles}`);
|
|
37
|
+
console.log(`š files with content: ${filesWithContent}`);
|
|
38
|
+
header("āļø processing_status");
|
|
39
|
+
const statuses = db.prepare(`
|
|
40
|
+
SELECT processing_status, COUNT(*) AS count
|
|
42
41
|
FROM files
|
|
43
|
-
|
|
42
|
+
GROUP BY processing_status
|
|
43
|
+
ORDER BY count DESC
|
|
44
44
|
`).all();
|
|
45
|
-
console.log(
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
const
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
}
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
const
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
45
|
+
statuses.forEach(s => console.log(` ${s.processing_status ?? "NULL"}: ${s.count}`));
|
|
46
|
+
/* āāāāāāāāāāāāāāāāāāāāāāāāā FTS āāāāāāāāāāāāāāāāāāāāāāāāā */
|
|
47
|
+
header("š files_fts");
|
|
48
|
+
const ftsExists = db
|
|
49
|
+
.prepare(`SELECT 1 FROM sqlite_master WHERE type='table' AND name='files_fts'`)
|
|
50
|
+
.get();
|
|
51
|
+
if (!ftsExists) {
|
|
52
|
+
console.log("ā files_fts missing ā search is broken");
|
|
53
|
+
}
|
|
54
|
+
else {
|
|
55
|
+
const cols = db.prepare(`PRAGMA table_info(files_fts)`).all();
|
|
56
|
+
const hasContentText = cols.some(c => c.name === "content_text");
|
|
57
|
+
if (!hasContentText) {
|
|
58
|
+
console.log("ā files_fts.content_text missing");
|
|
59
|
+
}
|
|
60
|
+
else {
|
|
61
|
+
const indexed = nonEmptyCount("files_fts", "content_text");
|
|
62
|
+
console.log(`š¦ indexed rows with content_text: ${indexed}`);
|
|
63
|
+
const sample = db.prepare(`
|
|
64
|
+
SELECT filename, substr(content_text,1,80) AS preview
|
|
65
|
+
FROM files_fts
|
|
66
|
+
WHERE files_fts MATCH 'function'
|
|
67
|
+
LIMIT 3
|
|
68
|
+
`).all();
|
|
69
|
+
if (sample.length === 0) {
|
|
70
|
+
console.log("ā ļø no FTS hits for 'function'");
|
|
71
|
+
}
|
|
72
|
+
else {
|
|
73
|
+
console.log("ā
FTS sample:");
|
|
74
|
+
sample.forEach(r => console.log(` ${r.filename} | "${r.preview}"`));
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
/* āāāāāāāāāāāāāāāāāāāāāāāāā folder capsules āāāāāāāāāāāāāāāāāāāāāāāāā */
|
|
79
|
+
header("š folder_capsules");
|
|
80
|
+
console.log(`š total capsules: ${tableCount("folder_capsules")}`);
|
|
81
|
+
const emptyCapsules = db.prepare(`
|
|
82
|
+
SELECT COUNT(*) AS c
|
|
83
|
+
FROM folder_capsules
|
|
84
|
+
WHERE capsule_json IS NULL OR capsule_json = ''
|
|
85
|
+
`).get();
|
|
86
|
+
if (emptyCapsules.c > 0) {
|
|
87
|
+
console.log(`ā ļø ${emptyCapsules.c} capsules have empty JSON`);
|
|
88
|
+
}
|
|
89
|
+
const sampleCaps = db.prepare(`
|
|
90
|
+
SELECT path, depth, confidence, source_file_count, capsule_json
|
|
91
|
+
FROM folder_capsules
|
|
90
92
|
ORDER BY RANDOM()
|
|
91
93
|
LIMIT 5
|
|
92
94
|
`).all();
|
|
93
|
-
|
|
94
|
-
|
|
95
|
+
sampleCaps.forEach(c => {
|
|
96
|
+
let parsed = null;
|
|
97
|
+
try {
|
|
98
|
+
parsed = JSON.parse(c.capsule_json);
|
|
99
|
+
}
|
|
100
|
+
catch { }
|
|
101
|
+
console.log(` ${c.path}`);
|
|
102
|
+
console.log(` depth=${c.depth} confidence=${c.confidence} files=${c.source_file_count}`);
|
|
103
|
+
if (parsed) {
|
|
104
|
+
console.log(` roles=${parsed.roles?.length ?? 0} ` +
|
|
105
|
+
`concerns=${parsed.concerns?.length ?? 0} ` +
|
|
106
|
+
`keyFiles=${parsed.keyFiles?.length ?? 0}`);
|
|
107
|
+
}
|
|
95
108
|
});
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
const
|
|
100
|
-
SELECT id, name
|
|
101
|
-
FROM
|
|
109
|
+
/* āāāāāāāāāāāāāāāāāāāāāāāāā functions āāāāāāāāāāāāāāāāāāāāāāāāā */
|
|
110
|
+
header("š§āš» functions");
|
|
111
|
+
console.log(`š total functions: ${tableCount("functions")}`);
|
|
112
|
+
const funcSamples = db.prepare(`
|
|
113
|
+
SELECT id, name, file_id, substr(content,1,60) AS preview
|
|
114
|
+
FROM functions
|
|
102
115
|
ORDER BY RANDOM()
|
|
103
116
|
LIMIT 5
|
|
104
117
|
`).all();
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
console.log(
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
118
|
+
funcSamples.forEach(f => console.log(` [${f.id}] ${f.name} (file ${f.file_id}) "${f.preview}"`));
|
|
119
|
+
/* āāāāāāāāāāāāāāāāāāāāāāāāā graph (KG) āāāāāāāāāāāāāāāāāāāāāāāāā */
|
|
120
|
+
header("š· graph_classes");
|
|
121
|
+
console.log(`š total classes: ${tableCount("graph_classes")}`);
|
|
122
|
+
header("š graph_edges");
|
|
123
|
+
console.log(`š total edges: ${tableCount("graph_edges")}`);
|
|
124
|
+
const danglingFuncs = db.prepare(`
|
|
125
|
+
SELECT e.id
|
|
126
|
+
FROM graph_edges e
|
|
127
|
+
WHERE e.source_type='function'
|
|
128
|
+
AND NOT EXISTS (SELECT 1 FROM functions f WHERE f.unique_id = e.source_unique_id)
|
|
129
|
+
UNION
|
|
130
|
+
SELECT e.id
|
|
131
|
+
FROM graph_edges e
|
|
132
|
+
WHERE e.target_type='function'
|
|
133
|
+
AND NOT EXISTS (SELECT 1 FROM functions f WHERE f.unique_id = e.target_unique_id)
|
|
134
|
+
LIMIT 10
|
|
115
135
|
`).all();
|
|
116
|
-
|
|
117
|
-
console.log(
|
|
118
|
-
}
|
|
119
|
-
|
|
136
|
+
if (danglingFuncs.length === 0) {
|
|
137
|
+
console.log("ā
no dangling function edges");
|
|
138
|
+
}
|
|
139
|
+
else {
|
|
140
|
+
console.log(`ā dangling function edges: ${danglingFuncs.length}`);
|
|
141
|
+
}
|
|
142
|
+
/* āāāāāāāāāāāāāāāāāāāāāāāāā Graphviz āāāāāāāāāāāāāāāāāāāāāāāāā */
|
|
143
|
+
header("š§© graphviz");
|
|
144
|
+
try {
|
|
145
|
+
const edges = db.prepare(`
|
|
146
|
+
SELECT source_type, source_unique_id, target_type, target_unique_id, relation
|
|
147
|
+
FROM graph_edges
|
|
148
|
+
LIMIT 300
|
|
149
|
+
`).all();
|
|
150
|
+
if (edges.length === 0) {
|
|
151
|
+
console.log("ā ļø no edges ā skipping export");
|
|
152
|
+
}
|
|
153
|
+
else {
|
|
154
|
+
const lines = [
|
|
155
|
+
"digraph G {",
|
|
156
|
+
" rankdir=LR;",
|
|
157
|
+
' node [shape=box, style=filled, color="#eaeaea"];'
|
|
158
|
+
];
|
|
159
|
+
for (const e of edges) {
|
|
160
|
+
const s = `${e.source_type}_${e.source_unique_id}`.replace(/[^a-zA-Z0-9_]/g, "_");
|
|
161
|
+
const t = `${e.target_type}_${e.target_unique_id}`.replace(/[^a-zA-Z0-9_]/g, "_");
|
|
162
|
+
const label = (e.relation ?? "").replace(/"/g, "'");
|
|
163
|
+
lines.push(` ${s} -> ${t} [label="${label}"];`);
|
|
164
|
+
}
|
|
165
|
+
lines.push("}");
|
|
166
|
+
const outDir = path.join(scaiRepoRoot, "graphs");
|
|
167
|
+
fs.mkdirSync(outDir, { recursive: true });
|
|
168
|
+
const dot = path.join(outDir, "graph-overview.dot");
|
|
169
|
+
const png = path.join(outDir, "graph-overview.png");
|
|
170
|
+
fs.writeFileSync(dot, lines.join("\n"));
|
|
171
|
+
console.log(`ā
dot written: ${dot}`);
|
|
172
|
+
try {
|
|
173
|
+
execSync(`dot -Tpng "${dot}" -o "${png}"`);
|
|
174
|
+
console.log(`ā
png written: ${png}`);
|
|
175
|
+
}
|
|
176
|
+
catch {
|
|
177
|
+
console.log("ā ļø graphviz not installed ā png skipped");
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
catch (err) {
|
|
182
|
+
console.error("ā graphviz export failed", err);
|
|
183
|
+
}
|
|
184
|
+
console.log("\nā
DB check complete\n");
|