muonroi-cli 1.6.6 → 1.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/src/generated/version.d.ts +1 -1
- package/dist/src/generated/version.js +1 -1
- package/dist/src/orchestrator/message-processor.js +1 -1
- package/dist/src/orchestrator/prompts.js +16 -2
- package/dist/src/orchestrator/stream-runner.js +50 -3
- package/dist/src/orchestrator/subagent-compactor.d.ts +1 -1
- package/dist/src/orchestrator/subagent-compactor.js +1 -1
- package/dist/src/pil/__tests__/layer4-gsd.test.js +40 -23
- package/dist/src/pil/__tests__/llm-classify.test.js +40 -3
- package/dist/src/pil/layer1-intent.js +10 -1
- package/dist/src/pil/layer1-intent.test.js +18 -0
- package/dist/src/pil/layer4-gsd.js +43 -19
- package/dist/src/pil/llm-classify.d.ts +36 -0
- package/dist/src/pil/llm-classify.js +84 -18
- package/dist/src/pil/types.d.ts +27 -2
- package/dist/src/{gsd → playbook}/__tests__/directives.test.js +34 -58
- package/dist/src/playbook/complexity.d.ts +17 -0
- package/dist/src/playbook/complexity.js +18 -0
- package/dist/src/{gsd → playbook}/directives.d.ts +20 -13
- package/dist/src/playbook/directives.js +149 -0
- package/dist/src/providers/__tests__/reasoning-roundtrip.test.js +70 -1
- package/dist/src/providers/strategies/deepseek.strategy.js +5 -22
- package/dist/src/providers/strategies/siliconflow.strategy.js +5 -0
- package/dist/src/providers/strategies/thinking-mode.d.ts +35 -0
- package/dist/src/providers/strategies/thinking-mode.js +73 -0
- package/dist/src/tools/registry.js +47 -47
- package/package.json +1 -1
- package/dist/src/gsd/__tests__/complexity.test.d.ts +0 -1
- package/dist/src/gsd/__tests__/complexity.test.js +0 -0
- package/dist/src/gsd/complexity.d.ts +0 -28
- package/dist/src/gsd/complexity.js +0 -103
- package/dist/src/gsd/directives.js +0 -154
- /package/dist/src/{gsd → playbook}/__tests__/directives.test.d.ts +0 -0
|
@@ -19,7 +19,8 @@
|
|
|
19
19
|
*/
|
|
20
20
|
import { createOpenAICompatible } from "@ai-sdk/openai-compatible";
|
|
21
21
|
import { streamText } from "ai";
|
|
22
|
-
import { describe, expect, it } from "vitest";
|
|
22
|
+
import { afterEach, beforeEach, describe, expect, it } from "vitest";
|
|
23
|
+
import { transformThinkingModeBody } from "../strategies/thinking-mode.js";
|
|
23
24
|
function makeStubProvider(name, capture) {
|
|
24
25
|
return createOpenAICompatible({
|
|
25
26
|
name,
|
|
@@ -113,6 +114,11 @@ describe("reasoning_content round-trip — AI SDK 2.0.42 wire shape", () => {
|
|
|
113
114
|
expect(Array.isArray(assistantMsg.tool_calls)).toBe(true);
|
|
114
115
|
expect(assistantMsg.tool_calls[0]?.id).toBe("c1");
|
|
115
116
|
});
|
|
117
|
+
// NOTE: the bare provider correctly omits reasoning_content when a turn has
|
|
118
|
+
// no reasoning part — but that is exactly the shape SiliconFlow's
|
|
119
|
+
// thinking-mode validator rejects (code 20015) in a mixed history. The
|
|
120
|
+
// strategy's transformRequestBody backfills it; see the dedicated describe
|
|
121
|
+
// block below ("transformThinkingModeBody — backfill / disable").
|
|
116
122
|
it("emits no reasoning_content key when there are no reasoning parts (no false positives)", async () => {
|
|
117
123
|
const capture = { current: null };
|
|
118
124
|
const provider = makeStubProvider("siliconflow", capture);
|
|
@@ -132,4 +138,67 @@ describe("reasoning_content round-trip — AI SDK 2.0.42 wire shape", () => {
|
|
|
132
138
|
expect(assistantMsg.reasoning_content).toBeUndefined();
|
|
133
139
|
});
|
|
134
140
|
});
|
|
141
|
+
describe("transformThinkingModeBody — backfill / disable (code 20015 fix)", () => {
|
|
142
|
+
const ENV = "MUONROI_DEEPSEEK_DISABLE_THINKING";
|
|
143
|
+
let saved;
|
|
144
|
+
beforeEach(() => {
|
|
145
|
+
saved = process.env[ENV];
|
|
146
|
+
delete process.env[ENV];
|
|
147
|
+
});
|
|
148
|
+
afterEach(() => {
|
|
149
|
+
if (saved === undefined)
|
|
150
|
+
delete process.env[ENV];
|
|
151
|
+
else
|
|
152
|
+
process.env[ENV] = saved;
|
|
153
|
+
});
|
|
154
|
+
it("A (default): backfills reasoning_content on a tool-call turn that lacks it", () => {
|
|
155
|
+
const body = {
|
|
156
|
+
messages: [
|
|
157
|
+
{ role: "user", content: "go" },
|
|
158
|
+
// tool-call turn with NO reasoning (the real bug shape)
|
|
159
|
+
{ role: "assistant", content: null, tool_calls: [{ id: "t1", type: "function" }] },
|
|
160
|
+
],
|
|
161
|
+
};
|
|
162
|
+
const out = transformThinkingModeBody(body);
|
|
163
|
+
const asst = out.messages.find((m) => m.role === "assistant");
|
|
164
|
+
expect(asst.reasoning_content).toBe("");
|
|
165
|
+
expect(Array.isArray(asst.tool_calls)).toBe(true); // tool_calls preserved
|
|
166
|
+
expect("thinking" in out).toBe(false); // thinking still ON
|
|
167
|
+
});
|
|
168
|
+
it("A (default): leaves a real reasoning_content untouched and patches only the gap", () => {
|
|
169
|
+
const body = {
|
|
170
|
+
messages: [
|
|
171
|
+
{ role: "user", content: "go" },
|
|
172
|
+
{ role: "assistant", content: null, reasoning_content: "real thought", tool_calls: [{ id: "a" }] },
|
|
173
|
+
{ role: "tool", content: "result" },
|
|
174
|
+
{ role: "assistant", content: null, tool_calls: [{ id: "b" }] }, // gap
|
|
175
|
+
],
|
|
176
|
+
};
|
|
177
|
+
const out = transformThinkingModeBody(body);
|
|
178
|
+
const asst = out.messages.filter((m) => m.role === "assistant");
|
|
179
|
+
expect(asst[0].reasoning_content).toBe("real thought"); // untouched
|
|
180
|
+
expect(asst[1].reasoning_content).toBe(""); // backfilled
|
|
181
|
+
});
|
|
182
|
+
it("A (default): does not touch non-assistant messages", () => {
|
|
183
|
+
const body = {
|
|
184
|
+
messages: [
|
|
185
|
+
{ role: "user", content: "hi" },
|
|
186
|
+
{ role: "tool", content: "r" },
|
|
187
|
+
],
|
|
188
|
+
};
|
|
189
|
+
const out = transformThinkingModeBody(body);
|
|
190
|
+
expect("reasoning_content" in out.messages[0]).toBe(false);
|
|
191
|
+
expect("reasoning_content" in out.messages[1]).toBe(false);
|
|
192
|
+
});
|
|
193
|
+
it("B (env=1): disables thinking and does NOT backfill reasoning_content", () => {
|
|
194
|
+
process.env[ENV] = "1";
|
|
195
|
+
const body = {
|
|
196
|
+
messages: [{ role: "assistant", content: null, tool_calls: [{ id: "t1" }] }],
|
|
197
|
+
};
|
|
198
|
+
const out = transformThinkingModeBody(body);
|
|
199
|
+
expect(out.thinking).toEqual({ type: "disabled" });
|
|
200
|
+
const asst = out.messages.find((m) => m.role === "assistant");
|
|
201
|
+
expect("reasoning_content" in asst).toBe(false);
|
|
202
|
+
});
|
|
203
|
+
});
|
|
135
204
|
//# sourceMappingURL=reasoning-roundtrip.test.js.map
|
|
@@ -7,19 +7,7 @@ import { createOpenAICompatible } from "@ai-sdk/openai-compatible";
|
|
|
7
7
|
import { getProviderCapabilities } from "../capabilities.js";
|
|
8
8
|
import { OPENAI_COMPATIBLE_BASE_URLS } from "../endpoints.js";
|
|
9
9
|
import { BaseProviderStrategy } from "./base.strategy.js";
|
|
10
|
-
|
|
11
|
-
* If MUONROI_DEEPSEEK_DISABLE_THINKING=1 (default for self-qa), inject
|
|
12
|
-
* `extra_body.thinking.type="disabled"` into every DeepSeek request per
|
|
13
|
-
* https://api-docs.deepseek.com/guides/thinking_mode . Cuts response time
|
|
14
|
-
* 30-50% and prevents reasoning prose from leaking into JSON outputs.
|
|
15
|
-
*
|
|
16
|
-
* Set MUONROI_DEEPSEEK_DISABLE_THINKING=0 to keep thinking mode on for
|
|
17
|
-
* chat sessions that actually benefit from reasoning.
|
|
18
|
-
*/
|
|
19
|
-
function shouldDisableThinking() {
|
|
20
|
-
const v = process.env["MUONROI_DEEPSEEK_DISABLE_THINKING"];
|
|
21
|
-
return v === undefined ? false : v === "1" || v.toLowerCase() === "true";
|
|
22
|
-
}
|
|
10
|
+
import { transformThinkingModeBody } from "./thinking-mode.js";
|
|
23
11
|
export class DeepSeekStrategy extends BaseProviderStrategy {
|
|
24
12
|
id = "deepseek";
|
|
25
13
|
capabilities = getProviderCapabilities("deepseek");
|
|
@@ -34,15 +22,10 @@ export class DeepSeekStrategy extends BaseProviderStrategy {
|
|
|
34
22
|
// json_object form for generateObject calls, matching DeepSeek docs:
|
|
35
23
|
// https://api-docs.deepseek.com/guides/json_mode .
|
|
36
24
|
supportsStructuredOutputs: false,
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
thinking: { type: "disabled" },
|
|
42
|
-
};
|
|
43
|
-
}
|
|
44
|
-
return body;
|
|
45
|
-
},
|
|
25
|
+
// Thinking-mode round-trip fix: backfill reasoning_content (default) or
|
|
26
|
+
// disable thinking (MUONROI_DEEPSEEK_DISABLE_THINKING=1). See
|
|
27
|
+
// thinking-mode.ts for the full rationale (code 20015 rejection).
|
|
28
|
+
transformRequestBody: (body) => transformThinkingModeBody(body),
|
|
46
29
|
});
|
|
47
30
|
return (modelId) => p(modelId);
|
|
48
31
|
}
|
|
@@ -8,6 +8,7 @@ import { getProviderCapabilities } from "../capabilities.js";
|
|
|
8
8
|
import { OPENAI_COMPATIBLE_BASE_URLS } from "../endpoints.js";
|
|
9
9
|
import { createSiliconflowRepairFetch } from "../siliconflow-sse-repair.js";
|
|
10
10
|
import { BaseProviderStrategy } from "./base.strategy.js";
|
|
11
|
+
import { transformThinkingModeBody } from "./thinking-mode.js";
|
|
11
12
|
export class SiliconflowStrategy extends BaseProviderStrategy {
|
|
12
13
|
id = "siliconflow";
|
|
13
14
|
capabilities = getProviderCapabilities("siliconflow");
|
|
@@ -17,6 +18,10 @@ export class SiliconflowStrategy extends BaseProviderStrategy {
|
|
|
17
18
|
baseURL: opts.baseURL ?? OPENAI_COMPATIBLE_BASE_URLS.siliconflow,
|
|
18
19
|
apiKey: opts.apiKey,
|
|
19
20
|
fetch: createSiliconflowRepairFetch(),
|
|
21
|
+
// Thinking-mode round-trip fix (code 20015): backfill reasoning_content
|
|
22
|
+
// on every assistant turn, or disable thinking when
|
|
23
|
+
// MUONROI_DEEPSEEK_DISABLE_THINKING=1. See thinking-mode.ts.
|
|
24
|
+
transformRequestBody: (body) => transformThinkingModeBody(body),
|
|
20
25
|
});
|
|
21
26
|
return (modelId) => p(modelId);
|
|
22
27
|
}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* src/providers/strategies/thinking-mode.ts
|
|
3
|
+
*
|
|
4
|
+
* Shared `transformRequestBody` logic for DeepSeek-family providers
|
|
5
|
+
* (deepseek + siliconflow) that run a `thinking`/reasoning mode.
|
|
6
|
+
*
|
|
7
|
+
* THE BUG (verified on a live SiliconFlow wire body): DeepSeek-V4-Flash in
|
|
8
|
+
* thinking mode rejects the WHOLE request with HTTP 400 / code 20015
|
|
9
|
+
* ("The reasoning_content in the thinking mode must be passed back to the
|
|
10
|
+
* API") whenever the history contains an assistant message that lacks a
|
|
11
|
+
* `reasoning_content` field. During multi-step tool loops some assistant
|
|
12
|
+
* turns make a tool call WITHOUT emitting a reasoning segment (e.g. a quick
|
|
13
|
+
* `todo_write`), so `@ai-sdk/openai-compatible` serializes them as
|
|
14
|
+
* `{content:null, tool_calls:[...]}` with no `reasoning_content` key — and
|
|
15
|
+
* the next request blows up. The earlier "reasoning round-trips natively"
|
|
16
|
+
* conclusion only held for histories where EVERY assistant turn had reasoning.
|
|
17
|
+
*
|
|
18
|
+
* Two mitigations, selected by `MUONROI_DEEPSEEK_DISABLE_THINKING`:
|
|
19
|
+
*
|
|
20
|
+
* - Default (A): keep thinking ON, but backfill `reasoning_content: ""` onto
|
|
21
|
+
* every assistant message in the wire body that is missing it, so the
|
|
22
|
+
* thinking-mode validator always sees the field.
|
|
23
|
+
* - Fallback (B, env=1): disable thinking entirely via
|
|
24
|
+
* `thinking: { type: "disabled" }` (per the DeepSeek thinking_mode guide).
|
|
25
|
+
* Sidesteps the whole class of bug, cuts latency 30-50%, and stops
|
|
26
|
+
* reasoning prose from leaking into JSON outputs — at the cost of reasoning.
|
|
27
|
+
*
|
|
28
|
+
* https://api-docs.deepseek.com/guides/thinking_mode
|
|
29
|
+
*/
|
|
30
|
+
export declare function shouldDisableThinking(): boolean;
|
|
31
|
+
/**
|
|
32
|
+
* The shared `transformRequestBody` for deepseek + siliconflow. Runs on the
|
|
33
|
+
* fully-serialized wire body right before fetch.
|
|
34
|
+
*/
|
|
35
|
+
export declare function transformThinkingModeBody<T extends Record<string, unknown>>(body: T): T;
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* src/providers/strategies/thinking-mode.ts
|
|
3
|
+
*
|
|
4
|
+
* Shared `transformRequestBody` logic for DeepSeek-family providers
|
|
5
|
+
* (deepseek + siliconflow) that run a `thinking`/reasoning mode.
|
|
6
|
+
*
|
|
7
|
+
* THE BUG (verified on a live SiliconFlow wire body): DeepSeek-V4-Flash in
|
|
8
|
+
* thinking mode rejects the WHOLE request with HTTP 400 / code 20015
|
|
9
|
+
* ("The reasoning_content in the thinking mode must be passed back to the
|
|
10
|
+
* API") whenever the history contains an assistant message that lacks a
|
|
11
|
+
* `reasoning_content` field. During multi-step tool loops some assistant
|
|
12
|
+
* turns make a tool call WITHOUT emitting a reasoning segment (e.g. a quick
|
|
13
|
+
* `todo_write`), so `@ai-sdk/openai-compatible` serializes them as
|
|
14
|
+
* `{content:null, tool_calls:[...]}` with no `reasoning_content` key — and
|
|
15
|
+
* the next request blows up. The earlier "reasoning round-trips natively"
|
|
16
|
+
* conclusion only held for histories where EVERY assistant turn had reasoning.
|
|
17
|
+
*
|
|
18
|
+
* Two mitigations, selected by `MUONROI_DEEPSEEK_DISABLE_THINKING`:
|
|
19
|
+
*
|
|
20
|
+
* - Default (A): keep thinking ON, but backfill `reasoning_content: ""` onto
|
|
21
|
+
* every assistant message in the wire body that is missing it, so the
|
|
22
|
+
* thinking-mode validator always sees the field.
|
|
23
|
+
* - Fallback (B, env=1): disable thinking entirely via
|
|
24
|
+
* `thinking: { type: "disabled" }` (per the DeepSeek thinking_mode guide).
|
|
25
|
+
* Sidesteps the whole class of bug, cuts latency 30-50%, and stops
|
|
26
|
+
* reasoning prose from leaking into JSON outputs — at the cost of reasoning.
|
|
27
|
+
*
|
|
28
|
+
* https://api-docs.deepseek.com/guides/thinking_mode
|
|
29
|
+
*/
|
|
30
|
+
export function shouldDisableThinking() {
|
|
31
|
+
const v = process.env["MUONROI_DEEPSEEK_DISABLE_THINKING"];
|
|
32
|
+
return v === undefined ? false : v === "1" || v.toLowerCase() === "true";
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* Backfill `reasoning_content: ""` onto any assistant message that lacks a
|
|
36
|
+
* (non-empty/present) one, so SiliconFlow's thinking-mode validator never
|
|
37
|
+
* sees a reasoning-less assistant turn. Assistant turns that already carry a
|
|
38
|
+
* real `reasoning_content` are left untouched.
|
|
39
|
+
*/
|
|
40
|
+
function backfillReasoningContent(messages) {
|
|
41
|
+
let mutated = false;
|
|
42
|
+
const next = messages.map((m) => {
|
|
43
|
+
if (m?.role !== "assistant")
|
|
44
|
+
return m;
|
|
45
|
+
const rc = m.reasoning_content;
|
|
46
|
+
if (typeof rc === "string")
|
|
47
|
+
return m; // already present (incl. "")
|
|
48
|
+
mutated = true;
|
|
49
|
+
return { ...m, reasoning_content: "" };
|
|
50
|
+
});
|
|
51
|
+
return mutated ? next : messages;
|
|
52
|
+
}
|
|
53
|
+
/**
|
|
54
|
+
* The shared `transformRequestBody` for deepseek + siliconflow. Runs on the
|
|
55
|
+
* fully-serialized wire body right before fetch.
|
|
56
|
+
*/
|
|
57
|
+
export function transformThinkingModeBody(body) {
|
|
58
|
+
if (shouldDisableThinking()) {
|
|
59
|
+
// Fallback B: turn thinking off. No reasoning is produced, so there is
|
|
60
|
+
// nothing to backfill.
|
|
61
|
+
return { ...body, thinking: { type: "disabled" } };
|
|
62
|
+
}
|
|
63
|
+
// Default A: keep thinking on, but guarantee every assistant message carries
|
|
64
|
+
// a reasoning_content field so the validator is satisfied.
|
|
65
|
+
const messages = body["messages"];
|
|
66
|
+
if (!Array.isArray(messages))
|
|
67
|
+
return body;
|
|
68
|
+
const patched = backfillReasoningContent(messages);
|
|
69
|
+
if (patched === messages)
|
|
70
|
+
return body;
|
|
71
|
+
return { ...body, messages: patched };
|
|
72
|
+
}
|
|
73
|
+
//# sourceMappingURL=thinking-mode.js.map
|
|
@@ -593,58 +593,58 @@ export function createBuiltinTools(bash, mode, opts) {
|
|
|
593
593
|
.join("\n");
|
|
594
594
|
},
|
|
595
595
|
});
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
596
|
+
}
|
|
597
|
+
// todo_write — Claude-Code-style task list. Each call REPLACES the agent's
|
|
598
|
+
// current todo snapshot; the orchestrator post-processes this tool's args
|
|
599
|
+
// into a task_list_update StreamChunk that the UI renders as a sticky
|
|
600
|
+
// checklist panel. Status flow: pending → in_progress → completed; only
|
|
601
|
+
// ONE item should be in_progress at a time. Use this when the user asks
|
|
602
|
+
// for a multi-step task (≥3 distinct steps) so progress is visible.
|
|
603
|
+
tools.todo_write = dynamicTool({
|
|
604
|
+
description: "Write the full current todo list. Replaces the previous list entirely on every call (no partial updates). Use when a user request resolves into ≥3 discrete steps so the UI can show progress. Mark exactly one item as in_progress at a time. Always emit the FULL list, not just the changed items.",
|
|
605
|
+
inputSchema: jsonSchema({
|
|
606
|
+
type: "object",
|
|
607
|
+
properties: {
|
|
608
|
+
todos: {
|
|
609
|
+
type: "array",
|
|
610
|
+
description: "The full ordered list of todo items. Replaces any prior list. Keep order stable across updates so the UI doesn't reshuffle on every call.",
|
|
611
|
+
items: {
|
|
612
|
+
type: "object",
|
|
613
|
+
properties: {
|
|
614
|
+
id: { type: "string", description: "Stable id across updates (e.g. '1','2', or a slug)." },
|
|
615
|
+
subject: { type: "string", description: "Short imperative title shown in the list." },
|
|
616
|
+
activeForm: {
|
|
617
|
+
type: "string",
|
|
618
|
+
description: "Present-continuous form shown while in_progress (e.g. 'Reading files'). Falls back to subject when absent.",
|
|
619
|
+
},
|
|
620
|
+
status: {
|
|
621
|
+
type: "string",
|
|
622
|
+
enum: ["pending", "in_progress", "completed"],
|
|
623
|
+
description: "Item status. Only ONE item should be in_progress at any time.",
|
|
624
624
|
},
|
|
625
|
-
required: ["id", "subject", "status"],
|
|
626
625
|
},
|
|
626
|
+
required: ["id", "subject", "status"],
|
|
627
627
|
},
|
|
628
628
|
},
|
|
629
|
-
required: ["todos"],
|
|
630
|
-
}),
|
|
631
|
-
execute: async (input) => {
|
|
632
|
-
const todos = Array.isArray(input?.todos)
|
|
633
|
-
? input.todos
|
|
634
|
-
: [];
|
|
635
|
-
const counts = { completed: 0, inProgress: 0, pending: 0, total: todos.length };
|
|
636
|
-
for (const t of todos) {
|
|
637
|
-
if (t.status === "completed")
|
|
638
|
-
counts.completed++;
|
|
639
|
-
else if (t.status === "in_progress")
|
|
640
|
-
counts.inProgress++;
|
|
641
|
-
else
|
|
642
|
-
counts.pending++;
|
|
643
|
-
}
|
|
644
|
-
return `Tracking ${counts.total} todo${counts.total !== 1 ? "s" : ""}: ${counts.completed} done · ${counts.inProgress} in progress · ${counts.pending} queued.`;
|
|
645
629
|
},
|
|
646
|
-
|
|
647
|
-
|
|
630
|
+
required: ["todos"],
|
|
631
|
+
}),
|
|
632
|
+
execute: async (input) => {
|
|
633
|
+
const todos = Array.isArray(input?.todos)
|
|
634
|
+
? input.todos
|
|
635
|
+
: [];
|
|
636
|
+
const counts = { completed: 0, inProgress: 0, pending: 0, total: todos.length };
|
|
637
|
+
for (const t of todos) {
|
|
638
|
+
if (t.status === "completed")
|
|
639
|
+
counts.completed++;
|
|
640
|
+
else if (t.status === "in_progress")
|
|
641
|
+
counts.inProgress++;
|
|
642
|
+
else
|
|
643
|
+
counts.pending++;
|
|
644
|
+
}
|
|
645
|
+
return `Tracking ${counts.total} todo${counts.total !== 1 ? "s" : ""}: ${counts.completed} done · ${counts.inProgress} in progress · ${counts.pending} queued.`;
|
|
646
|
+
},
|
|
647
|
+
});
|
|
648
648
|
// Vision-tool gate: drop the 3 vision-proxy tools on turns with no plausible
|
|
649
649
|
// image involvement. Built then deleted (closures are cheap) to avoid
|
|
650
650
|
// re-indenting the tool definitions above. todo_write + core tools untouched.
|
package/package.json
CHANGED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
export {};
|
|
Binary file
|
|
@@ -1,28 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* src/gsd/complexity.ts
|
|
3
|
-
*
|
|
4
|
-
* Heuristic complexity scorer for incoming prompts.
|
|
5
|
-
* Maps a raw user prompt to one of three tiers that drive the GSD directive
|
|
6
|
-
* injected by layer4:
|
|
7
|
-
*
|
|
8
|
-
* - "heavy" → multi-file / multi-repo / architectural / "do everything"
|
|
9
|
-
* Triggers the full discuss → research → verify → plan → impl → verify flow.
|
|
10
|
-
* - "standard" → ordinary feature/bugfix work. GSD-quick mindset.
|
|
11
|
-
* - "quick" → trivial single-shot tasks (typo, rename, read-and-explain).
|
|
12
|
-
*
|
|
13
|
-
* The scorer is intentionally cheap: regex + length checks. It runs inside the
|
|
14
|
-
* 200ms PIL budget and must never throw.
|
|
15
|
-
*/
|
|
16
|
-
export type ComplexityTier = "quick" | "standard" | "heavy";
|
|
17
|
-
export interface ComplexitySignal {
|
|
18
|
-
/** Short tag identifying which heuristic fired (e.g. "multi-repo", "wholesale"). */
|
|
19
|
-
tag: string;
|
|
20
|
-
/** Weight contributed to the score. Positive = heavier, negative = lighter. */
|
|
21
|
-
weight: number;
|
|
22
|
-
}
|
|
23
|
-
export interface ComplexityResult {
|
|
24
|
-
tier: ComplexityTier;
|
|
25
|
-
score: number;
|
|
26
|
-
signals: ComplexitySignal[];
|
|
27
|
-
}
|
|
28
|
-
export declare function scoreComplexity(prompt: string): ComplexityResult;
|
|
@@ -1,103 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* src/gsd/complexity.ts
|
|
3
|
-
*
|
|
4
|
-
* Heuristic complexity scorer for incoming prompts.
|
|
5
|
-
* Maps a raw user prompt to one of three tiers that drive the GSD directive
|
|
6
|
-
* injected by layer4:
|
|
7
|
-
*
|
|
8
|
-
* - "heavy" → multi-file / multi-repo / architectural / "do everything"
|
|
9
|
-
* Triggers the full discuss → research → verify → plan → impl → verify flow.
|
|
10
|
-
* - "standard" → ordinary feature/bugfix work. GSD-quick mindset.
|
|
11
|
-
* - "quick" → trivial single-shot tasks (typo, rename, read-and-explain).
|
|
12
|
-
*
|
|
13
|
-
* The scorer is intentionally cheap: regex + length checks. It runs inside the
|
|
14
|
-
* 200ms PIL budget and must never throw.
|
|
15
|
-
*/
|
|
16
|
-
/** Patterns that strongly suggest a large, multi-step undertaking. */
|
|
17
|
-
const HEAVY_PATTERNS = [
|
|
18
|
-
{ tag: "wholesale", pattern: /\b(toàn bộ|all of|entire|whole|everything|tất cả)\b/i, weight: 3 },
|
|
19
|
-
{ tag: "deep-map", pattern: /\b(deep[-\s]?map|repo[-\s]?map|map (the )?(codebase|project|repo))\b/i, weight: 3 },
|
|
20
|
-
{ tag: "redo", pattern: /\b(redo|rewrite|rebuild|migrate (the )?entire|port (the )?(whole|entire))\b/i, weight: 3 },
|
|
21
|
-
{
|
|
22
|
-
tag: "from-scratch",
|
|
23
|
-
pattern: /\b(from[-\s]scratch|greenfield|new project|khởi tạo (project|dự án))\b/i,
|
|
24
|
-
weight: 2,
|
|
25
|
-
},
|
|
26
|
-
{ tag: "architecture", pattern: /\b(architect(?:ure)?|system design|design contract|domain model)\b/i, weight: 2 },
|
|
27
|
-
{ tag: "milestone", pattern: /\b(milestone|roadmap|epic|phase \d+|sprint \d+)\b/i, weight: 2 },
|
|
28
|
-
{ tag: "multi-repo", pattern: /\b(multi[-\s]repo|across repos|every repo|all repos|cross[-\s]repo)\b/i, weight: 3 },
|
|
29
|
-
{ tag: "refactor-wide", pattern: /\b(refactor (the )?(entire|whole|all))\b/i, weight: 3 },
|
|
30
|
-
{ tag: "i18n", pattern: /\b(i18n|internationali[sz]e|localization|translation pipeline)\b/i, weight: 2 },
|
|
31
|
-
{ tag: "auth-system", pattern: /\b(auth(entication)? system|sso|oauth flow|rbac)\b/i, weight: 2 },
|
|
32
|
-
{ tag: "many-files", pattern: /\b(\d{2,})\s+(files?|modules?|services?)\b/i, weight: 2 },
|
|
33
|
-
];
|
|
34
|
-
/** Patterns that suggest a tiny, one-shot task. */
|
|
35
|
-
const QUICK_PATTERNS = [
|
|
36
|
-
{ tag: "typo", pattern: /\b(typo|misspell|spelling)\b/i, weight: -3 },
|
|
37
|
-
{ tag: "rename", pattern: /\b(rename (this|the|a) (var|variable|function|file)|đổi tên)\b/i, weight: -2 },
|
|
38
|
-
{
|
|
39
|
-
tag: "read-explain",
|
|
40
|
-
pattern: /^(what (does|is)|how (does|do|is)|explain|giải thích|là gì|nghĩa là)\b/i,
|
|
41
|
-
weight: -2,
|
|
42
|
-
},
|
|
43
|
-
{ tag: "single-line", pattern: /\b(one[-\s]liner|single line|một dòng)\b/i, weight: -2 },
|
|
44
|
-
{ tag: "lookup", pattern: /\b(where is|find the|locate|tìm)\b/i, weight: -1 },
|
|
45
|
-
];
|
|
46
|
-
/** Words that, when stacked, indicate orchestration vs single task. */
|
|
47
|
-
const COORDINATION_MARKERS = [
|
|
48
|
-
/\b(?:and then|sau đó|tiếp theo|after that|followed by)\b/gi,
|
|
49
|
-
/\b(?:multiple|several|many|nhiều)\b/gi,
|
|
50
|
-
];
|
|
51
|
-
const HEAVY_THRESHOLD = 4;
|
|
52
|
-
const QUICK_THRESHOLD = -2;
|
|
53
|
-
const LONG_PROMPT_CHARS = 500;
|
|
54
|
-
const SHORT_PROMPT_CHARS = 60;
|
|
55
|
-
export function scoreComplexity(prompt) {
|
|
56
|
-
const signals = [];
|
|
57
|
-
let score = 0;
|
|
58
|
-
if (!prompt || prompt.trim().length === 0) {
|
|
59
|
-
return { tier: "quick", score: 0, signals: [{ tag: "empty", weight: 0 }] };
|
|
60
|
-
}
|
|
61
|
-
for (const { tag, pattern, weight } of HEAVY_PATTERNS) {
|
|
62
|
-
if (pattern.test(prompt)) {
|
|
63
|
-
signals.push({ tag, weight });
|
|
64
|
-
score += weight;
|
|
65
|
-
}
|
|
66
|
-
}
|
|
67
|
-
for (const { tag, pattern, weight } of QUICK_PATTERNS) {
|
|
68
|
-
if (pattern.test(prompt)) {
|
|
69
|
-
signals.push({ tag, weight });
|
|
70
|
-
score += weight;
|
|
71
|
-
}
|
|
72
|
-
}
|
|
73
|
-
// Coordination words: each match adds 1 point (capped at +3).
|
|
74
|
-
let coordinationHits = 0;
|
|
75
|
-
for (const re of COORDINATION_MARKERS) {
|
|
76
|
-
const matches = prompt.match(re);
|
|
77
|
-
if (matches)
|
|
78
|
-
coordinationHits += matches.length;
|
|
79
|
-
}
|
|
80
|
-
if (coordinationHits > 0) {
|
|
81
|
-
const weight = Math.min(coordinationHits, 3);
|
|
82
|
-
signals.push({ tag: "coordination", weight });
|
|
83
|
-
score += weight;
|
|
84
|
-
}
|
|
85
|
-
// Length heuristics.
|
|
86
|
-
if (prompt.length >= LONG_PROMPT_CHARS) {
|
|
87
|
-
signals.push({ tag: "long-prompt", weight: 1 });
|
|
88
|
-
score += 1;
|
|
89
|
-
}
|
|
90
|
-
else if (prompt.length <= SHORT_PROMPT_CHARS) {
|
|
91
|
-
signals.push({ tag: "short-prompt", weight: -1 });
|
|
92
|
-
score -= 1;
|
|
93
|
-
}
|
|
94
|
-
let tier;
|
|
95
|
-
if (score >= HEAVY_THRESHOLD)
|
|
96
|
-
tier = "heavy";
|
|
97
|
-
else if (score <= QUICK_THRESHOLD)
|
|
98
|
-
tier = "quick";
|
|
99
|
-
else
|
|
100
|
-
tier = "standard";
|
|
101
|
-
return { tier, score, signals };
|
|
102
|
-
}
|
|
103
|
-
//# sourceMappingURL=complexity.js.map
|