boxsafe 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.directory +2 -0
- package/.env.example +3 -0
- package/AUDIT_LANG.md +45 -0
- package/BOXSAFE_VERSION_NOTES.md +14 -0
- package/README.md +4 -0
- package/TODO.md +130 -0
- package/adapters/index.ts +27 -0
- package/adapters/primary/cli-adapter.ts +56 -0
- package/adapters/secondary/filesystem/node-filesystem.ts +307 -0
- package/adapters/secondary/system/configuration.ts +147 -0
- package/ai/caller.ts +42 -0
- package/ai/label.ts +33 -0
- package/ai/modelConfig.ts +236 -0
- package/ai/provider.ts +111 -0
- package/boxsafe.config.json +68 -0
- package/core/auth/dasktop/cred/CRED.md +112 -0
- package/core/auth/dasktop/cred/credLinux.ts +82 -0
- package/core/auth/dasktop/cred/credWin.ts +2 -0
- package/core/config/defaults/boxsafeDefaults.ts +67 -0
- package/core/config/defaults/index.ts +1 -0
- package/core/config/loadConfig.ts +133 -0
- package/core/loop/about.md +13 -0
- package/core/loop/boxConfig.ts +20 -0
- package/core/loop/buildExecCommand.ts +76 -0
- package/core/loop/cmd/execode.ts +121 -0
- package/core/loop/cmd/test.js +3 -0
- package/core/loop/execLoop.ts +341 -0
- package/core/loop/git/VERSIONING.md +17 -0
- package/core/loop/git/commands.ts +11 -0
- package/core/loop/git/gitClient.ts +78 -0
- package/core/loop/git/index.ts +99 -0
- package/core/loop/git/runVersionControlRunner.ts +33 -0
- package/core/loop/initNavigator.ts +44 -0
- package/core/loop/initTasksManager.ts +35 -0
- package/core/loop/runValidation.ts +25 -0
- package/core/loop/tasks/AGENT-TASKS.md +36 -0
- package/core/loop/tasks/index.ts +96 -0
- package/core/loop/toolCalls.ts +168 -0
- package/core/loop/toolDispatcher.ts +146 -0
- package/core/loop/traceLogger.ts +106 -0
- package/core/loop/types.ts +26 -0
- package/core/loop/versionControlAdapter.ts +36 -0
- package/core/loop/waterfall.ts +404 -0
- package/core/loop/writeArtifactAtomically.ts +13 -0
- package/core/navigate/NAVIGATE.md +186 -0
- package/core/navigate/about.md +128 -0
- package/core/navigate/examples.ts +367 -0
- package/core/navigate/handler.ts +148 -0
- package/core/navigate/index.ts +32 -0
- package/core/navigate/navigate.test.ts +372 -0
- package/core/navigate/navigator.ts +437 -0
- package/core/navigate/types.ts +132 -0
- package/core/navigate/utils.ts +146 -0
- package/core/paths/paths.ts +33 -0
- package/core/ports/index.ts +271 -0
- package/core/segments/CONVENTIONS.md +30 -0
- package/core/segments/loop/index.ts +18 -0
- package/core/segments/map.ts +56 -0
- package/core/segments/navigate/index.ts +20 -0
- package/core/segments/versionControl/index.ts +18 -0
- package/core/util/logger.ts +128 -0
- package/docs/AGENT-TASKS.md +36 -0
- package/docs/ARQUITETURA_CORRECAO.md +121 -0
- package/docs/CONVENTIONS.md +30 -0
- package/docs/CRED.md +112 -0
- package/docs/L_RAG.md +567 -0
- package/docs/NAVIGATE.md +186 -0
- package/docs/PRIMARY_ACTORS.md +78 -0
- package/docs/SECONDARY_ACTORS.md +174 -0
- package/docs/VERSIONING.md +17 -0
- package/docs/boxsafe.config.md +472 -0
- package/eslint.config.mts +15 -0
- package/main.ts +53 -0
- package/memo/generated/codelog.md +13 -0
- package/memo/state/tasks/state.json +6 -0
- package/memo/state/tasks/tasks/task_001.md +2 -0
- package/memo/states-logs/logs.txt +7 -0
- package/memo/states-logs/trace-mljvrxvi-9g0k4q.jsonl +11 -0
- package/memo/states-logs/trace-mljvvc9j-pe9ekj.jsonl +11 -0
- package/memo/states-logs/trace-mljvvm1c-wbnqzp.jsonl +11 -0
- package/memo/states-logs/trace-mljxecwn-9xh3nw.jsonl +11 -0
- package/memo/states-logs/trace-mljxqkfm-ipijik.jsonl +11 -0
- package/memo/states-logs/trace-mljxwtrw-3fanky.jsonl +11 -0
- package/memo/states-logs/trace-mljxzen3-m8iinh.jsonl +11 -0
- package/memo/states-logs/trace-mljyucef-td6odn.jsonl +11 -0
- package/memo/states-logs/trace-mljyuprw-b1a6f4.jsonl +11 -0
- package/memo/states-logs/trace-mljyvefl-b6yoce.jsonl +11 -0
- package/memo/states-logs/trace-mljyxjo4-n7ibj2.jsonl +13 -0
- package/memo/states-logs/trace-mljziez5-8drqtn.jsonl +13 -0
- package/memo/states-logs/trace-mljziulp-dtd03z.jsonl +13 -0
- package/memo/states-logs/trace-mljzjwrq-1p2krb.jsonl +13 -0
- package/memo/states-logs/trace-mljzl0i7-b1cqa6.jsonl +13 -0
- package/memo/states-logs/trace-mljzmlk6-7kdyls.jsonl +13 -0
- package/memo/states-logs/trace-mlk0oj25-xa3dcu.jsonl +13 -0
- package/memo/states-logs/trace-mlk1x59q-713huj.jsonl +14 -0
- package/memo/states-logs/trace-mlk22dz8-7fd6hq.jsonl +14 -0
- package/memo/states-logs/trace-mlk241uy-wmx907.jsonl +14 -0
- package/memo/states-logs/trace-mlk2bf5r-yoh1vg.jsonl +15 -0
- package/package.json +44 -0
- package/pnpm-workspace.yaml +4 -0
- package/prompt_improvement_example.md +55 -0
- package/remove.txt +1 -0
- package/tests/adapters.test.ts +128 -0
- package/tests/extractCode.test.ts +26 -0
- package/tests/integration.test.ts +83 -0
- package/tests/loadConfig.test.ts +25 -0
- package/tests/navigatorBoundary.test.ts +17 -0
- package/tests/ports.test.ts +84 -0
- package/tests/runAllTests.ts +49 -0
- package/tests/toolCalls.test.ts +149 -0
- package/tests/waterfall.test.ts +52 -0
- package/tsconfig.json +32 -0
- package/tsup.config.ts +17 -0
- package/types.d.ts +96 -0
- package/util/ANSI.ts +29 -0
- package/util/extractCode.ts +217 -0
- package/util/extractToolCalls.ts +80 -0
- package/util/logger.ts +125 -0
|
@@ -0,0 +1,404 @@
|
|
|
1
|
+
import { readFile } from "node:fs/promises";
|
|
2
|
+
import { Logger } from '@core/util/logger';
|
|
3
|
+
|
|
4
|
+
const logger = Logger.createModuleLogger('Waterfall');
|
|
5
|
+
|
|
6
|
+
export interface ExecResult {
|
|
7
|
+
exitCode: number;
|
|
8
|
+
stdout: string;
|
|
9
|
+
stderr: string;
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
export interface WaterfallContext {
|
|
13
|
+
exec: ExecResult;
|
|
14
|
+
artifacts?: {
|
|
15
|
+
outputFile?: string;
|
|
16
|
+
};
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export interface WaterfallFail {
|
|
20
|
+
ok: false;
|
|
21
|
+
layer: string;
|
|
22
|
+
reason: string;
|
|
23
|
+
details?: string;
|
|
24
|
+
score?: number;
|
|
25
|
+
breakdown?: ScoreBreakdown;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
export interface WaterfallSuccess {
|
|
29
|
+
ok: true;
|
|
30
|
+
score: number;
|
|
31
|
+
breakdown: ScoreBreakdown;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export type WaterfallResult = WaterfallFail | WaterfallSuccess;
|
|
35
|
+
|
|
36
|
+
export interface ScoreBreakdown {
|
|
37
|
+
exitCode: CheckResult;
|
|
38
|
+
stderr: CheckResult;
|
|
39
|
+
outputContract: CheckResult;
|
|
40
|
+
artifacts: CheckResult;
|
|
41
|
+
totalScore: number;
|
|
42
|
+
maxScore: number;
|
|
43
|
+
percentage: number;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
export interface CheckResult {
|
|
47
|
+
passed: boolean;
|
|
48
|
+
points: number;
|
|
49
|
+
maxPoints: number;
|
|
50
|
+
severity: 'critical' | 'high' | 'medium' | 'low';
|
|
51
|
+
message?: string;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
// Scoring system configuration
|
|
55
|
+
const SCORING_CONFIG = {
|
|
56
|
+
PASSING_THRESHOLD: 70, // Minimum score to pass (0-100)
|
|
57
|
+
|
|
58
|
+
WEIGHTS: {
|
|
59
|
+
exitCode: 40,
|
|
60
|
+
stderr: 20,
|
|
61
|
+
outputContract: 30,
|
|
62
|
+
artifacts: 10,
|
|
63
|
+
},
|
|
64
|
+
|
|
65
|
+
STDERR: {
|
|
66
|
+
CRITICAL_PATTERNS: [
|
|
67
|
+
/^fatal error/im,
|
|
68
|
+
/^error:.*failed/im,
|
|
69
|
+
/segmentation fault/i,
|
|
70
|
+
/core dumped/i,
|
|
71
|
+
/unhandled exception/i,
|
|
72
|
+
/^traceback \(most recent call last\)/im,
|
|
73
|
+
],
|
|
74
|
+
|
|
75
|
+
WARNING_PATTERNS: [
|
|
76
|
+
/warning:/i,
|
|
77
|
+
/deprecated/i,
|
|
78
|
+
/error rate/i,
|
|
79
|
+
/exception.*registered/i,
|
|
80
|
+
],
|
|
81
|
+
|
|
82
|
+
WARNING_PENALTY: 50, // Penalty percentage for warnings
|
|
83
|
+
},
|
|
84
|
+
|
|
85
|
+
OUTPUT_CONTRACT: {
|
|
86
|
+
PARTIAL_CREDIT: true,
|
|
87
|
+
PARTIAL_THRESHOLD: 50, // Minimum percentage of contracts to earn partial credit
|
|
88
|
+
},
|
|
89
|
+
};
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* Executes cascading validations with scoring system.
|
|
93
|
+
* Critical failures result in immediate rejection.
|
|
94
|
+
* Minor failures are accumulated to calculate final score.
|
|
95
|
+
*/
|
|
96
|
+
export async function waterfall(
|
|
97
|
+
ctx: WaterfallContext
|
|
98
|
+
): Promise<WaterfallResult> {
|
|
99
|
+
logger.info(`Starting evaluation system`);
|
|
100
|
+
|
|
101
|
+
const breakdown: ScoreBreakdown = {
|
|
102
|
+
exitCode: { passed: false, points: 0, maxPoints: 0, severity: 'critical' },
|
|
103
|
+
stderr: { passed: false, points: 0, maxPoints: 0, severity: 'high' },
|
|
104
|
+
outputContract: { passed: false, points: 0, maxPoints: 0, severity: 'high' },
|
|
105
|
+
artifacts: { passed: false, points: 0, maxPoints: 0, severity: 'medium' },
|
|
106
|
+
totalScore: 0,
|
|
107
|
+
maxScore: 100,
|
|
108
|
+
percentage: 0,
|
|
109
|
+
};
|
|
110
|
+
|
|
111
|
+
// Check exit code (critical - auto-fails if non-zero)
|
|
112
|
+
breakdown.exitCode = checkExitCode(ctx);
|
|
113
|
+
|
|
114
|
+
if (!breakdown.exitCode.passed && breakdown.exitCode.severity === 'critical') {
|
|
115
|
+
logger.error(`FAILED: Non-zero exit code (critical)`);
|
|
116
|
+
return {
|
|
117
|
+
ok: false,
|
|
118
|
+
layer: 'exit-code',
|
|
119
|
+
reason: breakdown.exitCode.message || 'Process exited with non-zero code',
|
|
120
|
+
details: `exitCode=${ctx.exec.exitCode}`,
|
|
121
|
+
score: 0,
|
|
122
|
+
breakdown,
|
|
123
|
+
};
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
// Check stderr (can be critical or partial)
|
|
127
|
+
breakdown.stderr = checkStderr(ctx);
|
|
128
|
+
|
|
129
|
+
if (!breakdown.stderr.passed && breakdown.stderr.severity === 'critical') {
|
|
130
|
+
logger.error(`FAILED: Critical error in stderr`);
|
|
131
|
+
return {
|
|
132
|
+
ok: false,
|
|
133
|
+
layer: 'stderr',
|
|
134
|
+
reason: breakdown.stderr.message || 'Critical error detected',
|
|
135
|
+
details: ctx.exec.stderr.slice(0, 500),
|
|
136
|
+
score: calculateTotalScore(breakdown),
|
|
137
|
+
breakdown,
|
|
138
|
+
};
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
// Check output contract (non-critical)
|
|
142
|
+
breakdown.outputContract = await checkOutputContract(ctx);
|
|
143
|
+
|
|
144
|
+
// Check artifacts (non-critical)
|
|
145
|
+
breakdown.artifacts = await checkArtifacts(ctx);
|
|
146
|
+
|
|
147
|
+
// Calculate final score
|
|
148
|
+
breakdown.totalScore = calculateTotalScore(breakdown);
|
|
149
|
+
breakdown.percentage = breakdown.totalScore;
|
|
150
|
+
|
|
151
|
+
logger.info(`Final Score: ${breakdown.totalScore.toFixed(1)}/100`);
|
|
152
|
+
logScoreBreakdown(breakdown);
|
|
153
|
+
|
|
154
|
+
if (breakdown.totalScore >= SCORING_CONFIG.PASSING_THRESHOLD) {
|
|
155
|
+
logger.info(`PASSED (score: ${breakdown.totalScore.toFixed(1)} >= ${SCORING_CONFIG.PASSING_THRESHOLD})`);
|
|
156
|
+
return {
|
|
157
|
+
ok: true,
|
|
158
|
+
score: breakdown.totalScore,
|
|
159
|
+
breakdown,
|
|
160
|
+
};
|
|
161
|
+
} else {
|
|
162
|
+
logger.warn(`FAILED (score: ${breakdown.totalScore.toFixed(1)} < ${SCORING_CONFIG.PASSING_THRESHOLD})`);
|
|
163
|
+
return {
|
|
164
|
+
ok: false,
|
|
165
|
+
layer: 'score-threshold',
|
|
166
|
+
reason: 'Insufficient score for approval',
|
|
167
|
+
details: `Obtained: ${breakdown.totalScore.toFixed(1)}, Required: ${SCORING_CONFIG.PASSING_THRESHOLD}`,
|
|
168
|
+
score: breakdown.totalScore,
|
|
169
|
+
breakdown,
|
|
170
|
+
};
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
function checkExitCode(ctx: WaterfallContext): CheckResult {
|
|
175
|
+
const maxPoints = SCORING_CONFIG.WEIGHTS.exitCode;
|
|
176
|
+
|
|
177
|
+
if (ctx.exec.exitCode === 0) {
|
|
178
|
+
return {
|
|
179
|
+
passed: true,
|
|
180
|
+
points: maxPoints,
|
|
181
|
+
maxPoints,
|
|
182
|
+
severity: 'critical',
|
|
183
|
+
message: 'Exit code OK',
|
|
184
|
+
};
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
return {
|
|
188
|
+
passed: false,
|
|
189
|
+
points: 0,
|
|
190
|
+
maxPoints,
|
|
191
|
+
severity: 'critical',
|
|
192
|
+
message: `Process exited with code ${ctx.exec.exitCode}`,
|
|
193
|
+
};
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
function checkStderr(ctx: WaterfallContext): CheckResult {
|
|
197
|
+
const maxPoints = SCORING_CONFIG.WEIGHTS.stderr;
|
|
198
|
+
const stderr = ctx.exec.stderr || '';
|
|
199
|
+
|
|
200
|
+
// Check for critical patterns first
|
|
201
|
+
for (const pattern of SCORING_CONFIG.STDERR.CRITICAL_PATTERNS) {
|
|
202
|
+
if (pattern.test(stderr)) {
|
|
203
|
+
logger.error(`Critical error detected: ${pattern}`);
|
|
204
|
+
return {
|
|
205
|
+
passed: false,
|
|
206
|
+
points: 0,
|
|
207
|
+
maxPoints,
|
|
208
|
+
severity: 'critical',
|
|
209
|
+
message: `Critical error pattern matched: ${pattern}`,
|
|
210
|
+
};
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
// Check for warnings (non-critical, but deduct points)
|
|
215
|
+
let warningCount = 0;
|
|
216
|
+
for (const pattern of SCORING_CONFIG.STDERR.WARNING_PATTERNS) {
|
|
217
|
+
if (pattern.test(stderr)) {
|
|
218
|
+
warningCount++;
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
if (warningCount > 0) {
|
|
223
|
+
const penalty = SCORING_CONFIG.STDERR.WARNING_PENALTY / 100;
|
|
224
|
+
const points = maxPoints * (1 - penalty);
|
|
225
|
+
logger.warn(`${warningCount} warning(s) in stderr - ${SCORING_CONFIG.STDERR.WARNING_PENALTY}% penalty`);
|
|
226
|
+
return {
|
|
227
|
+
passed: true,
|
|
228
|
+
points,
|
|
229
|
+
maxPoints,
|
|
230
|
+
severity: 'medium',
|
|
231
|
+
message: `${warningCount} warning(s) detected`,
|
|
232
|
+
};
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
return {
|
|
236
|
+
passed: true,
|
|
237
|
+
points: maxPoints,
|
|
238
|
+
maxPoints,
|
|
239
|
+
severity: 'high',
|
|
240
|
+
message: 'No critical errors in stderr',
|
|
241
|
+
};
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
async function checkOutputContract(ctx: WaterfallContext): Promise<CheckResult> {
|
|
245
|
+
const maxPoints = SCORING_CONFIG.WEIGHTS.outputContract;
|
|
246
|
+
const raw = process.env.SUCCESS_CONTRACTS ?? '__RESULT__=SUCCESS';
|
|
247
|
+
const contracts = raw.split(',').map((s) => s.trim()).filter(Boolean);
|
|
248
|
+
const stdout = ctx.exec.stdout ?? '';
|
|
249
|
+
|
|
250
|
+
let matchedContracts = 0;
|
|
251
|
+
|
|
252
|
+
// Try JSON parse
|
|
253
|
+
try {
|
|
254
|
+
const parsed = JSON.parse(stdout);
|
|
255
|
+
if (parsed && (parsed.result === 'success' || parsed.status === 'success')) {
|
|
256
|
+
logger.info(`✓ JSON contract detected in stdout`);
|
|
257
|
+
return {
|
|
258
|
+
passed: true,
|
|
259
|
+
points: maxPoints,
|
|
260
|
+
maxPoints,
|
|
261
|
+
severity: 'high',
|
|
262
|
+
message: 'JSON success contract matched',
|
|
263
|
+
};
|
|
264
|
+
}
|
|
265
|
+
} catch {
|
|
266
|
+
// Not JSON, continue
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
// Check string/regex contracts in stdout
|
|
270
|
+
for (const contract of contracts) {
|
|
271
|
+
if (!contract) continue;
|
|
272
|
+
|
|
273
|
+
const matched = contract.startsWith('/') && contract.endsWith('/')
|
|
274
|
+
? new RegExp(contract.slice(1, -1)).test(stdout)
|
|
275
|
+
: stdout.includes(contract);
|
|
276
|
+
|
|
277
|
+
if (matched) {
|
|
278
|
+
matchedContracts++;
|
|
279
|
+
logger.info(`✓ Contract matched: ${contract}`);
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
// Check artifact if no matches in stdout
|
|
284
|
+
if (ctx.artifacts?.outputFile && matchedContracts === 0) {
|
|
285
|
+
try {
|
|
286
|
+
const content = await readFile(ctx.artifacts.outputFile, 'utf-8');
|
|
287
|
+
for (const contract of contracts) {
|
|
288
|
+
if (!contract) continue;
|
|
289
|
+
|
|
290
|
+
const matched = contract.startsWith('/') && contract.endsWith('/')
|
|
291
|
+
? new RegExp(contract.slice(1, -1)).test(content)
|
|
292
|
+
: content.includes(contract);
|
|
293
|
+
|
|
294
|
+
if (matched) {
|
|
295
|
+
matchedContracts++;
|
|
296
|
+
logger.info(`✓ Contract matched in artifact: ${contract}`);
|
|
297
|
+
}
|
|
298
|
+
}
|
|
299
|
+
} catch (err: any) {
|
|
300
|
+
logger.warn(`Could not read artifact: ${err?.message}`);
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
// Calculate score based on matched contracts
|
|
305
|
+
const percentage = contracts.length > 0 ? matchedContracts / contracts.length : 0;
|
|
306
|
+
|
|
307
|
+
if (matchedContracts === contracts.length) {
|
|
308
|
+
return {
|
|
309
|
+
passed: true,
|
|
310
|
+
points: maxPoints,
|
|
311
|
+
maxPoints,
|
|
312
|
+
severity: 'high',
|
|
313
|
+
message: `All ${contracts.length} contract(s) matched`,
|
|
314
|
+
};
|
|
315
|
+
} else if (
|
|
316
|
+
SCORING_CONFIG.OUTPUT_CONTRACT.PARTIAL_CREDIT &&
|
|
317
|
+
percentage >= SCORING_CONFIG.OUTPUT_CONTRACT.PARTIAL_THRESHOLD / 100
|
|
318
|
+
) {
|
|
319
|
+
const points = maxPoints * percentage;
|
|
320
|
+
logger.warn(`Only ${matchedContracts}/${contracts.length} contracts matched`);
|
|
321
|
+
return {
|
|
322
|
+
passed: true,
|
|
323
|
+
points,
|
|
324
|
+
maxPoints,
|
|
325
|
+
severity: 'medium',
|
|
326
|
+
message: `Partial match: ${matchedContracts}/${contracts.length} contracts`,
|
|
327
|
+
};
|
|
328
|
+
} else {
|
|
329
|
+
logger.warn(`Insufficient contracts: ${matchedContracts}/${contracts.length}`);
|
|
330
|
+
return {
|
|
331
|
+
passed: false,
|
|
332
|
+
points: 0,
|
|
333
|
+
maxPoints,
|
|
334
|
+
severity: 'high',
|
|
335
|
+
message: `Insufficient contracts: ${matchedContracts}/${contracts.length}`,
|
|
336
|
+
};
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
async function checkArtifacts(ctx: WaterfallContext): Promise<CheckResult> {
|
|
341
|
+
const maxPoints = SCORING_CONFIG.WEIGHTS.artifacts;
|
|
342
|
+
|
|
343
|
+
if (!ctx.artifacts?.outputFile) {
|
|
344
|
+
return {
|
|
345
|
+
passed: true,
|
|
346
|
+
points: maxPoints,
|
|
347
|
+
maxPoints,
|
|
348
|
+
severity: 'low',
|
|
349
|
+
message: 'No artifact required',
|
|
350
|
+
};
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
try {
|
|
354
|
+
const content = await readFile(ctx.artifacts.outputFile, 'utf-8');
|
|
355
|
+
|
|
356
|
+
if (!content || content.trim().length === 0) {
|
|
357
|
+
logger.warn('Empty artifact');
|
|
358
|
+
return {
|
|
359
|
+
passed: false,
|
|
360
|
+
points: 0,
|
|
361
|
+
maxPoints,
|
|
362
|
+
severity: 'medium',
|
|
363
|
+
message: 'Output artifact is empty',
|
|
364
|
+
};
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
logger.info(`✓ Artifact OK (${content.length} bytes)`);
|
|
368
|
+
return {
|
|
369
|
+
passed: true,
|
|
370
|
+
points: maxPoints,
|
|
371
|
+
maxPoints,
|
|
372
|
+
severity: 'low',
|
|
373
|
+
message: 'Artifact valid',
|
|
374
|
+
};
|
|
375
|
+
} catch (err: any) {
|
|
376
|
+
logger.warn(`Error reading artifact: ${err.message}`);
|
|
377
|
+
return {
|
|
378
|
+
passed: false,
|
|
379
|
+
points: 0,
|
|
380
|
+
maxPoints,
|
|
381
|
+
severity: 'medium',
|
|
382
|
+
message: `Failed to read artifact: ${err.message}`,
|
|
383
|
+
};
|
|
384
|
+
}
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
function calculateTotalScore(breakdown: ScoreBreakdown): number {
|
|
388
|
+
return (
|
|
389
|
+
breakdown.exitCode.points +
|
|
390
|
+
breakdown.stderr.points +
|
|
391
|
+
breakdown.outputContract.points +
|
|
392
|
+
breakdown.artifacts.points
|
|
393
|
+
);
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
function logScoreBreakdown(breakdown: ScoreBreakdown): void {
|
|
397
|
+
logger.info(` Score breakdown:`);
|
|
398
|
+
logger.info(` Exit Code: ${breakdown.exitCode.points.toFixed(1)}/${breakdown.exitCode.maxPoints} ${breakdown.exitCode.passed ? '✓' : '✗'}`);
|
|
399
|
+
logger.info(` Stderr: ${breakdown.stderr.points.toFixed(1)}/${breakdown.stderr.maxPoints} ${breakdown.stderr.passed ? '✓' : '✗'}`);
|
|
400
|
+
logger.info(` Output Contract: ${breakdown.outputContract.points.toFixed(1)}/${breakdown.outputContract.maxPoints} ${breakdown.outputContract.passed ? '✓' : '✗'}`);
|
|
401
|
+
logger.info(` Artifacts: ${breakdown.artifacts.points.toFixed(1)}/${breakdown.artifacts.maxPoints} ${breakdown.artifacts.passed ? '✓' : '✗'}`);
|
|
402
|
+
logger.info(` ────────────────────────────`);
|
|
403
|
+
logger.info(` TOTAL: ${breakdown.totalScore.toFixed(1)}/100`);
|
|
404
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import { writeFile, rename } from 'node:fs/promises';
|
|
2
|
+
|
|
3
|
+
export async function writeArtifactAtomically(args: {
|
|
4
|
+
tmpPath: string;
|
|
5
|
+
pathOutput: string;
|
|
6
|
+
content: string;
|
|
7
|
+
signal?: AbortSignal;
|
|
8
|
+
}): Promise<void> {
|
|
9
|
+
const { tmpPath, pathOutput, content, signal } = args;
|
|
10
|
+
if (signal?.aborted) throw new Error('Aborted');
|
|
11
|
+
await writeFile(tmpPath, content, 'utf-8');
|
|
12
|
+
await rename(tmpPath, pathOutput);
|
|
13
|
+
}
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
# Navigate Module — Documentação Completa
|
|
2
|
+
|
|
3
|
+
Este documento descreve o funcionamento, a API e as melhores práticas do módulo `core/navigate` do projeto BoxSafe. O objetivo é oferecer uma referência completa, clara e utilizável pelo time e por agentes (LLMs).
|
|
4
|
+
|
|
5
|
+
## Visão Geral
|
|
6
|
+
|
|
7
|
+
O módulo `navigate` fornece um conjunto seguro e tipado de operações de sistema de ficheiros confinadas a um `workspace` definido. Ele é projetado para permitir que um agente (LLM) ou código do sistema:
|
|
8
|
+
|
|
9
|
+
- Liste diretórios com metadados
|
|
10
|
+
- Leia arquivos com limite de tamanho
|
|
11
|
+
- Escreva/atualize arquivos (criando diretórios pais quando solicitado)
|
|
12
|
+
- Crie diretórios (recursivos)
|
|
13
|
+
- Delete arquivos e diretórios (recursivo opcional)
|
|
14
|
+
- Consulte metadados de arquivos/pastas
|
|
15
|
+
|
|
16
|
+
Características principais:
|
|
17
|
+
|
|
18
|
+
- Boundary enforcement: todas as operações são validadas para permanecer dentro do `workspace` configurado.
|
|
19
|
+
- Permissões: verificação de leitura/escrita antes de executar ações sensíveis.
|
|
20
|
+
- Limits: leitura de arquivos limitada por tamanho configurável (padrão 10MB).
|
|
21
|
+
- Tipos TS explícitos: `Navigator`, `NavigatorHandler` e resultados estruturados.
|
|
22
|
+
|
|
23
|
+
## Arquivos Principais
|
|
24
|
+
|
|
25
|
+
- `navigator.ts` — implementação principal da classe `Navigator`.
|
|
26
|
+
- `handler.ts` — adaptador `NavigatorHandler` para uso por rotas/sgmnt.
|
|
27
|
+
- `utils.ts` — utilitários de segurança e validação de paths (resolução, checagem de permissões, sanitização).
|
|
28
|
+
- `types.ts` — definições de tipos exportadas (resultados, entradas, config).
|
|
29
|
+
- `examples.ts` — exemplos práticos de uso do módulo.
|
|
30
|
+
- `navigate.test.ts` — testes unitários de integração mínima.
|
|
31
|
+
- `NAVIGATEDOC.md` — esta documentação.
|
|
32
|
+
|
|
33
|
+
## Conceitos e Contratos
|
|
34
|
+
|
|
35
|
+
Tipos principais (resumo):
|
|
36
|
+
|
|
37
|
+
- `NavigatorConfig` — { workspace: string; followSymlinks?: boolean; maxFileSize?: number; logger?: Logger }
|
|
38
|
+
- `FileSystemEntry` — { path, name, type: 'file'|'directory', size?, mtime?, readable, writable }
|
|
39
|
+
- `DirectoryListing` — sucesso de listagem: { ok: true, path, entries: FileSystemEntry[], total }
|
|
40
|
+
- `FileReadResult` — leitura: { ok: true, path, content, size, encoding }
|
|
41
|
+
- `FileWriteResult` — escrita: { ok: true, path, size, created }
|
|
42
|
+
- `DirectoryCreateResult` — criação de pasta: { ok: true, path, created }
|
|
43
|
+
- `DeleteResult` — remoção: { ok: true, path, type, deletedAt }
|
|
44
|
+
- `MetadataResult` — metadados: { ok: true, path, stat: { type, size, mtime, isReadable, isWritable } }
|
|
45
|
+
- `OperationError` — falha simples: { ok: false, operation, error }
|
|
46
|
+
|
|
47
|
+
Observação: as respostas são discriminadas por `ok: true | false`. Use type guards (ou checagem de propriedades) para reduzir o tipo retornado antes de acessar campos específicos.
|
|
48
|
+
|
|
49
|
+
## Inicialização
|
|
50
|
+
|
|
51
|
+
Use `createNavigator` quando precisar trabalhar diretamente com a API programática:
|
|
52
|
+
|
|
53
|
+
```ts
|
|
54
|
+
import { createNavigator } from '@core/navigate';
|
|
55
|
+
|
|
56
|
+
const nav = createNavigator({
|
|
57
|
+
workspace: '/home/inky/Development/boxsafe',
|
|
58
|
+
maxFileSize: 10 * 1024 * 1024, // 10MB
|
|
59
|
+
});
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
Para integrar com a camada de rotas/segmentos (sgmnt), use `createNavigatorHandler`:
|
|
63
|
+
|
|
64
|
+
```ts
|
|
65
|
+
import { createNavigatorHandler } from '@core/navigate';
|
|
66
|
+
|
|
67
|
+
const handler = createNavigatorHandler('/home/inky/Development/boxsafe');
|
|
68
|
+
await handler.execute({ op: 'list', path: 'src' });
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
## API (métodos do `Navigator`)
|
|
72
|
+
|
|
73
|
+
- `listDirectory(dirPath?: string) => Promise<DirectoryListing | OperationError>`
|
|
74
|
+
- Lista conteúdo com `FileSystemEntry[]` e metadados.
|
|
75
|
+
- Ordena: diretórios primeiro, depois arquivos alfabeticamente.
|
|
76
|
+
|
|
77
|
+
- `readFile(filePath: string) => Promise<FileReadResult | OperationError>`
|
|
78
|
+
- Verifica permissões e limite de tamanho antes de ler.
|
|
79
|
+
- Retorna `encoding: 'utf-8'` e `size` (bytes).
|
|
80
|
+
|
|
81
|
+
- `writeFile(filePath: string, content: string, options?: { append?: boolean; createDirs?: boolean }) => Promise<FileWriteResult | OperationError>`
|
|
82
|
+
- Se `createDirs: true`, cria diretórios pai automaticamente.
|
|
83
|
+
- Suporta `append` para anexar conteúdo.
|
|
84
|
+
|
|
85
|
+
- `createDirectory(dirPath: string, options?: { recursive?: boolean }) => Promise<DirectoryCreateResult | OperationError>`
|
|
86
|
+
|
|
87
|
+
- `delete(targetPath: string, options?: { recursive?: boolean }) => Promise<DeleteResult | OperationError>`
|
|
88
|
+
- Para diretórios, `recursive` padrão é `true` no handler; seja cauteloso.
|
|
89
|
+
|
|
90
|
+
- `getMetadata(targetPath: string) => Promise<MetadataResult | OperationError>`
|
|
91
|
+
- Perfeito para decidir se deve ler ou não (tamanho / permissões).
|
|
92
|
+
|
|
93
|
+
### Observação sobre erros
|
|
94
|
+
|
|
95
|
+
O `OperationError` contém `operation` e `error` (mensagem curta). Não expõe paths resolvidos para manter erro simples e consistente — o logger interno registra detalhes quando necessário.
|
|
96
|
+
|
|
97
|
+
## Boas práticas (para humanos e LLMs)
|
|
98
|
+
|
|
99
|
+
1. Sempre verifique `result.ok` ou use type guards antes de acessar propriedades específicas.
|
|
100
|
+
2. Prefira caminhos relativos ao workspace (`src/main.ts`) em vez de absolutos.
|
|
101
|
+
3. Use `createDirs: true` em `writeFile` quando o destino puder não existir.
|
|
102
|
+
4. Evite ler arquivos muito grandes — use `getMetadata` e respeite `maxFileSize`.
|
|
103
|
+
5. Não confie somente em mensagens do agente — valide operações críticas externamente (p.ex. testes automatizados).
|
|
104
|
+
|
|
105
|
+
## Exemplos práticos
|
|
106
|
+
|
|
107
|
+
Listar diretório e ler um arquivo condicionalmente:
|
|
108
|
+
|
|
109
|
+
```ts
|
|
110
|
+
const list = await nav.listDirectory('src');
|
|
111
|
+
if (list.ok) {
|
|
112
|
+
for (const e of list.entries) {
|
|
113
|
+
if (e.type === 'file' && e.size && e.size < 1024 * 1024) {
|
|
114
|
+
const r = await nav.readFile(e.path);
|
|
115
|
+
if (r.ok) console.log(r.content.slice(0, 200));
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
Criar estrutura e escrever múltiplos arquivos:
|
|
122
|
+
|
|
123
|
+
```ts
|
|
124
|
+
await nav.createDirectory('output/generated', { recursive: true });
|
|
125
|
+
await nav.writeFile('output/generated/index.ts', "export * from './types'", { createDirs: true });
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
Usando o `NavigatorHandler` (útil para integração com sgmnt):
|
|
129
|
+
|
|
130
|
+
```ts
|
|
131
|
+
const handler = createNavigatorHandler('/home/inky/Development/boxsafe');
|
|
132
|
+
const res = await handler.execute({ op: 'write', path: 'out/result.txt', content: 'ok', writeOptions: { createDirs: true } });
|
|
133
|
+
if (!res.ok) console.error('Handler error', res.error);
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
## Integração com `sgmnt` (map)
|
|
137
|
+
|
|
138
|
+
No `core/sgmnt/map.ts` você pode adicionar uma rota/entry que instancie o handler com o workspace do `BS.config.json`:
|
|
139
|
+
|
|
140
|
+
```ts
|
|
141
|
+
navigate: {
|
|
142
|
+
handler: async (params?: any) => {
|
|
143
|
+
const mod = await import('@core/navigate');
|
|
144
|
+
const handler = mod.createNavigatorHandler(BSConfig.project?.workspace ?? './');
|
|
145
|
+
return handler.execute(params);
|
|
146
|
+
},
|
|
147
|
+
meta: { description: 'File navigation with workspace boundary', implemented: true }
|
|
148
|
+
}
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
## Segurança e Limitações
|
|
152
|
+
|
|
153
|
+
- A validação de `workspace` evita directory traversal. Se um path calculado estiver fora do `workspace`, a operação falha com `OperationError`.
|
|
154
|
+
- `followSymlinks` está desligado por padrão; habilite com cautela.
|
|
155
|
+
- O módulo não tenta contornar políticas de OS; ele respeita permissões do usuário que executa o processo.
|
|
156
|
+
|
|
157
|
+
## Testes e Validação
|
|
158
|
+
|
|
159
|
+
Executar a suite de testes localmente (ex.: `navigate.test.ts`) é recomendado após mudanças:
|
|
160
|
+
|
|
161
|
+
```bash
|
|
162
|
+
# rodar apenas o teste do navigate (exemplo usando tsx/ts-node conforme projeto)
|
|
163
|
+
npx tsx core/navigate/navigate.test.ts
|
|
164
|
+
|
|
165
|
+
# ou rodar verificação TypeScript
|
|
166
|
+
npx tsc --noEmit
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
## Troubleshooting rápido
|
|
170
|
+
|
|
171
|
+
- Erro `Access denied` ao ler/escrever: verifique permissões do processo e se o arquivo está dentro do `workspace`.
|
|
172
|
+
- `File size exceeds limit`: aumente `maxFileSize` no `NavigatorConfig` ou use streaming externo.
|
|
173
|
+
- Mensagens de erro genéricas: confira logs do `logger` (padrão `console`) para detalhes adicionais.
|
|
174
|
+
|
|
175
|
+
## Notas finais
|
|
176
|
+
|
|
177
|
+
O `navigate` foi projetado para ser simples, seguro e fácil de integrar com agentes LLM. A API é propositalmente pequena e previsível para minimizar erros de interpretação e facilitar validação automática em loops (ex.: `execLoop`).
|
|
178
|
+
|
|
179
|
+
Se quiser, eu posso:
|
|
180
|
+
|
|
181
|
+
- Adicionar exemplos de prompts para LLMs que utilizam o handler.
|
|
182
|
+
- Gerar snippets de código para integração em `core/sgmnt/map.ts` automaticamente.
|
|
183
|
+
- Criar um CLI leve para operações manuais de navegação para debug.
|
|
184
|
+
|
|
185
|
+
----
|
|
186
|
+
Documento gerado automaticamente — revise e peça ajustes.
|