@agjs/tsforge 0.2.1 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/scripts/sweep.ts +11 -6
- package/src/cli.ts +2 -1
- package/src/detect-gate.ts +16 -0
- package/src/eval/failure-class.ts +40 -9
- package/strict.eslint.config.mjs +24 -1
package/package.json
CHANGED
package/scripts/sweep.ts
CHANGED
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
import { mkdir, readdir, rm, stat } from "node:fs/promises";
|
|
7
7
|
import { join } from "node:path";
|
|
8
8
|
import { parseSpec } from "../src/spec";
|
|
9
|
-
import { buildGate,
|
|
9
|
+
import { buildGate, buildCoreFix } from "../src/detect-gate";
|
|
10
10
|
import { runSpec, qualityRepair } from "../src/loop";
|
|
11
11
|
import { modelAgent } from "../src/agent";
|
|
12
12
|
import { OpenAICompatibleProvider } from "../src/inference";
|
|
@@ -258,17 +258,22 @@ async function runOne(
|
|
|
258
258
|
// (an unguarded index access, an `as any`) slipped through as GREEN. Now
|
|
259
259
|
// every task and the whole-spec verify must clear the strict floor BEFORE
|
|
260
260
|
// its functional tests count.
|
|
261
|
-
//
|
|
262
|
-
//
|
|
263
|
-
|
|
261
|
+
// buildCoreFix (eslint --fix + prettier) runs as task.fix before each gate
|
|
262
|
+
// check — same janitor as the interactive CLI — so padding-line, prefer-const,
|
|
263
|
+
// etc. are squashed without model turns.
|
|
264
|
+
const gateCommand = (await buildGate(runDir)).command;
|
|
265
|
+
const fixCommand = buildCoreFix();
|
|
264
266
|
const gatedSpec = {
|
|
265
267
|
...spec,
|
|
266
268
|
tasks: spec.tasks.map((t) => ({
|
|
267
269
|
...t,
|
|
268
|
-
|
|
270
|
+
fix: fixCommand,
|
|
271
|
+
accept: `${gateCommand} && ${t.accept}`,
|
|
269
272
|
})),
|
|
270
273
|
verify:
|
|
271
|
-
spec.verify.length > 0
|
|
274
|
+
spec.verify.length > 0
|
|
275
|
+
? `${gateCommand} && ${spec.verify}`
|
|
276
|
+
: gateCommand,
|
|
272
277
|
};
|
|
273
278
|
|
|
274
279
|
// Every run gets a full transcript at <runDir>/run.log; stream to the
|
package/src/cli.ts
CHANGED
|
@@ -38,6 +38,7 @@ import {
|
|
|
38
38
|
buildGate,
|
|
39
39
|
buildWebGate,
|
|
40
40
|
buildWebFix,
|
|
41
|
+
buildCoreFix,
|
|
41
42
|
buildWebTypeGate,
|
|
42
43
|
buildWebTscCheck,
|
|
43
44
|
scaffoldWeb,
|
|
@@ -903,7 +904,7 @@ async function repl(args: ICliArgs): Promise<number> {
|
|
|
903
904
|
fix: buildWebFix("react"),
|
|
904
905
|
incrementalCheck: buildWebTscCheck(),
|
|
905
906
|
}
|
|
906
|
-
: { scaffoldWeb: true }),
|
|
907
|
+
: { scaffoldWeb: true, fix: buildCoreFix() }),
|
|
907
908
|
...(thinkingTokenBudget === undefined ? {} : { thinkingTokenBudget }),
|
|
908
909
|
...(autoCompactAt === undefined ? {} : { autoCompactAt }),
|
|
909
910
|
// Thinking OFF for interactive replies so they STREAM immediately instead of
|
package/src/detect-gate.ts
CHANGED
|
@@ -462,6 +462,22 @@ export function buildWebFix(framework: WebFramework): string {
|
|
|
462
462
|
return `${lintFix} ; ${format}`;
|
|
463
463
|
}
|
|
464
464
|
|
|
465
|
+
/**
|
|
466
|
+
* The core (non-web) auto-fix command — same janitor as buildWebFix but uses the
|
|
467
|
+
* bundled strict.eslint.config.mjs. Run BEFORE the gate each cycle so padding-line,
|
|
468
|
+
* prefer-const, curly, etc. are squashed without model turns.
|
|
469
|
+
*/
|
|
470
|
+
export function buildCoreFix(): string {
|
|
471
|
+
const lintFix =
|
|
472
|
+
`"${ESLINT_BIN}" --no-config-lookup -c "${STRICT_CONFIG}" --fix .`.replace(
|
|
473
|
+
/\s+/g,
|
|
474
|
+
" "
|
|
475
|
+
);
|
|
476
|
+
const format = `"${PRETTIER_BIN}" --write .`;
|
|
477
|
+
|
|
478
|
+
return `${lintFix} ; ${format}`;
|
|
479
|
+
}
|
|
480
|
+
|
|
465
481
|
async function ensureFile(
|
|
466
482
|
cwd: string,
|
|
467
483
|
name: string,
|
|
@@ -12,7 +12,8 @@ export const FAILURE_CLASS = {
|
|
|
12
12
|
none: "none",
|
|
13
13
|
/** Model emitted tool calls the parser couldn't read (repair L3 / salvage). */
|
|
14
14
|
toolMalformed: "tool-malformed",
|
|
15
|
-
/** Edits
|
|
15
|
+
/** Edits/tool calls were rejected — missing target (missing-file / not-found /
|
|
16
|
+
* ambiguous) or out-of-scope (the dispatcher's tool_rejected). */
|
|
16
17
|
editReject: "edit-reject",
|
|
17
18
|
/** Hit the turn cap or the gate stalled with no decisive error class. */
|
|
18
19
|
noProgress: "no-progress",
|
|
@@ -44,6 +45,8 @@ export interface IFailureSignals {
|
|
|
44
45
|
salvages: number;
|
|
45
46
|
editRejects: number;
|
|
46
47
|
degenerated: boolean;
|
|
48
|
+
timedOut: boolean;
|
|
49
|
+
toolUseFailed: boolean;
|
|
47
50
|
tsErrors: number;
|
|
48
51
|
lintErrors: number;
|
|
49
52
|
missingModule: number;
|
|
@@ -60,10 +63,25 @@ export interface IFailureSummary {
|
|
|
60
63
|
|
|
61
64
|
const TS_CODE = /^TS\d+$/;
|
|
62
65
|
const MISSING_MODULE = /cannot find module/i;
|
|
63
|
-
|
|
66
|
+
// The terminal degeneration stops say "repetition loop" (run.ts, session.ts) —
|
|
67
|
+
// NOT "degenerate". Match both the user-facing phrase and the internal term.
|
|
68
|
+
const DEGENERATE = /repetition loop|degenerat/i;
|
|
69
|
+
// Salvage telemetry on the tool channel ("recovered N malformed tool call(s)").
|
|
64
70
|
const TOOL_MALFORMED = /salvage|recovered|malformed|re-ask/i;
|
|
71
|
+
// Terminal stops where the model never produced usable tool calls: the leaked
|
|
72
|
+
// malformed-tool-call stop and the narrate-instead-of-build stop (session.ts).
|
|
73
|
+
const TOOL_USE_FAILED =
|
|
74
|
+
/malformed tool-call|writing files as chat|instead of creating them/i;
|
|
75
|
+
// Edit/scope rejections surface on TWO channels: model-agent emits a `kind:"edit"`
|
|
76
|
+
// "<file> — rejected (<reason>)"; the tool dispatcher emits `kind:"tool"`
|
|
77
|
+
// "tool_rejected:" / "tool_input_rejected:". Both contain "reject".
|
|
65
78
|
const REJECTED = /reject/i;
|
|
66
|
-
|
|
79
|
+
// The TERMINAL timeout stop ("timed out repeatedly … stopped"), NOT the transient
|
|
80
|
+
// per-turn re-steer ("timed out … re-steering (1/3)") — only the former ends a run.
|
|
81
|
+
const TIMED_OUT = /timed out repeatedly/i;
|
|
82
|
+
// The actual browser-oracle failure strings (oracle.ts): "rendered blank",
|
|
83
|
+
// "app did not mount", "console error:", "uncaught:", "route X failed to load".
|
|
84
|
+
const BROWSER = /blank|did not mount|console error|uncaught|failed to load/i;
|
|
67
85
|
const ROUTE = /route|phantom|stub/i;
|
|
68
86
|
const BUILD = /vite|esbuild|build failed|bundl/i;
|
|
69
87
|
|
|
@@ -146,10 +164,15 @@ function gatherSignals(
|
|
|
146
164
|
salvages: events.filter(
|
|
147
165
|
(e) => e.kind === "tool" && TOOL_MALFORMED.test(e.message)
|
|
148
166
|
).length,
|
|
167
|
+
// Rejections come on both the "edit" channel (model-agent) and the "tool"
|
|
168
|
+
// channel (dispatcher: tool_rejected / tool_input_rejected).
|
|
149
169
|
editRejects: events.filter(
|
|
150
|
-
(e) =>
|
|
170
|
+
(e) =>
|
|
171
|
+
(e.kind === "edit" || e.kind === "tool") && REJECTED.test(e.message)
|
|
151
172
|
).length,
|
|
152
173
|
degenerated: events.some((e) => DEGENERATE.test(e.message)),
|
|
174
|
+
timedOut: events.some((e) => TIMED_OUT.test(e.message)),
|
|
175
|
+
toolUseFailed: events.some((e) => TOOL_USE_FAILED.test(e.message)),
|
|
153
176
|
tsErrors: rules.filter((r) => TS_CODE.test(r) && r !== "TS2307").length,
|
|
154
177
|
lintErrors: rules.filter((r) => !TS_CODE.test(r)).length,
|
|
155
178
|
missingModule,
|
|
@@ -203,11 +226,7 @@ function classifyGateErrors(
|
|
|
203
226
|
|
|
204
227
|
/** Behavioral fallback when no gate-error class dominates. */
|
|
205
228
|
function classifyBehavior(signals: IFailureSignals): FailureClass {
|
|
206
|
-
if (signals.
|
|
207
|
-
return FAILURE_CLASS.degeneration;
|
|
208
|
-
}
|
|
209
|
-
|
|
210
|
-
if (signals.salvages > 0 || signals.repairs > 0) {
|
|
229
|
+
if (signals.toolUseFailed || signals.salvages > 0 || signals.repairs > 0) {
|
|
211
230
|
return FAILURE_CLASS.toolMalformed;
|
|
212
231
|
}
|
|
213
232
|
|
|
@@ -234,6 +253,18 @@ export function classifyRun(
|
|
|
234
253
|
return { failureClass: FAILURE_CLASS.none, signals };
|
|
235
254
|
}
|
|
236
255
|
|
|
256
|
+
// A repeated request timeout is the terminal cause — the model couldn't even
|
|
257
|
+
// respond — so it outranks any stale gate error from an earlier turn.
|
|
258
|
+
if (signals.timedOut) {
|
|
259
|
+
return { failureClass: FAILURE_CLASS.timeout, signals };
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
// Likewise a repetition-loop stop: the run died because generation degenerated,
|
|
263
|
+
// not because of whatever the gate last reported.
|
|
264
|
+
if (signals.degenerated) {
|
|
265
|
+
return { failureClass: FAILURE_CLASS.degeneration, signals };
|
|
266
|
+
}
|
|
267
|
+
|
|
237
268
|
if (signals.missingModule > 0) {
|
|
238
269
|
return { failureClass: FAILURE_CLASS.hallucinatedImport, signals };
|
|
239
270
|
}
|
package/strict.eslint.config.mjs
CHANGED
|
@@ -12,6 +12,7 @@
|
|
|
12
12
|
// Rule overrides are loaded via TSFORGE_RULE_OVERRIDES env var (JSON-encoded
|
|
13
13
|
// map of bare rule names to "error" | "warn" | "off").
|
|
14
14
|
import tseslint from "typescript-eslint";
|
|
15
|
+
import stylistic from "@stylistic/eslint-plugin";
|
|
15
16
|
|
|
16
17
|
// Load stack-aware packs if TSFORGE_PACKS env var is set
|
|
17
18
|
let packConfig = [];
|
|
@@ -52,7 +53,10 @@ export default tseslint.config(
|
|
|
52
53
|
{
|
|
53
54
|
files: ["**/*.ts", "**/*.tsx"],
|
|
54
55
|
languageOptions: { parser: tseslint.parser },
|
|
55
|
-
plugins: {
|
|
56
|
+
plugins: {
|
|
57
|
+
"@typescript-eslint": tseslint.plugin,
|
|
58
|
+
"@stylistic": stylistic,
|
|
59
|
+
},
|
|
56
60
|
rules: {
|
|
57
61
|
// The idioms the model habitually violates — all caught WITHOUT type info.
|
|
58
62
|
"@typescript-eslint/consistent-type-assertions": [
|
|
@@ -69,6 +73,25 @@ export default tseslint.config(
|
|
|
69
73
|
"prefer-const": "error",
|
|
70
74
|
"prefer-template": "error",
|
|
71
75
|
"no-var": "error",
|
|
76
|
+
// Blank-line discipline — the model rarely gets spacing right, so
|
|
77
|
+
// eslint --fix + prettier make it free. Uses @stylistic (the rule's
|
|
78
|
+
// maintained home; the core rule is deprecated and spams usedDeprecatedRules
|
|
79
|
+
// into eslint's --format json gate output).
|
|
80
|
+
"@stylistic/padding-line-between-statements": [
|
|
81
|
+
"error",
|
|
82
|
+
{ blankLine: "always", prev: "import", next: "*" },
|
|
83
|
+
{ blankLine: "any", prev: "import", next: "import" },
|
|
84
|
+
{ blankLine: "always", prev: "*", next: "return" },
|
|
85
|
+
{ blankLine: "always", prev: "*", next: "throw" },
|
|
86
|
+
{ blankLine: "always", prev: ["const", "let", "var"], next: "*" },
|
|
87
|
+
{
|
|
88
|
+
blankLine: "any",
|
|
89
|
+
prev: ["const", "let", "var"],
|
|
90
|
+
next: ["const", "let", "var"],
|
|
91
|
+
},
|
|
92
|
+
{ blankLine: "always", prev: "block-like", next: "*" },
|
|
93
|
+
{ blankLine: "always", prev: "*", next: "block-like" },
|
|
94
|
+
],
|
|
72
95
|
eqeqeq: ["error", "always"],
|
|
73
96
|
curly: ["error", "all"],
|
|
74
97
|
"no-restricted-syntax": [
|