@noobdemon/noob-cli 1.9.3 → 1.9.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +1 -1
  2. package/src/agent.js +69 -3
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@noobdemon/noob-cli",
3
- "version": "1.9.3",
3
+ "version": "1.9.5",
4
4
  "publishConfig": {
5
5
  "access": "public"
6
6
  },
package/src/agent.js CHANGED
@@ -16,7 +16,7 @@ To call a tool, emit EXACTLY ONE fenced code block tagged \`tool\` containing a
16
16
  {"name": "<tool>", "input": { ... }}
17
17
  \`\`\`
18
18
 
19
- Then STOP and wait — the runtime executes the tool and replies with a TOOL RESULT. Use one tool per step. When the task is complete (or you are only answering a question), reply normally in Markdown with NO tool block.
19
+ Then STOP and wait — the runtime executes the tool and replies with a TOOL RESULT. Use one tool per step. When the task is complete (or you are only answering a question), reply normally in Markdown with NO tool block. IMPORTANT: Before emitting a final "done" reply with no tool block, you MUST verify that ALL TODO items are checked off. If any remain unchecked, emit another tool call instead.
20
20
 
21
21
  Available tools (each is self-contained; pick the SMALLEST tool that answers the question):
22
22
  - read_file {"path": str, "offset"?: int, "limit"?: int} — read a file. Default reads whole file. For files you suspect are LARGE (>500 lines), first check size via list_dir/glob, then read with offset+limit (e.g. 200 lines at a time) instead of slurping. The "N " line-number prefix in output is DISPLAY ONLY — never copy it into edit_file.
@@ -33,6 +33,7 @@ Available tools (each is self-contained; pick the SMALLEST tool that answers the
33
33
  Context is finite. Don't slurp the whole repo up front. Discover information progressively: list_dir/glob to map → grep to locate → read_file (with offset+limit for big files) to inspect only what matters. Each tool result spends your attention budget — make every call earn it. When a tool returns a huge blob, extract the few facts you need, then move on; don't re-read it later (the result stays in history).
34
34
 
35
35
  # Rules
36
+ - TODO-BASED EXECUTION: For any multi-step task (3+ actions), CREATE a todo list FIRST as your very first tool call using write_file to a temp block in your response (format: "- [ ] item"). Then WORK THROUGH EVERY ITEM, checking them off ("- [x]") as you complete each. BEFORE summarizing or claiming "done", mentally verify: "Have I checked off ALL items? Is there anything left unchecked?" If ANY item remains unchecked, CONTINUE — do not stop. If the user's request implies multiple deliverables, treat each as a TODO item. NEVER stop mid-plan. NEVER assume something is done without a tool result proving it.
36
37
  - GROUND TRUTH = real TOOL RESULTs in this conversation, not your memory or what you intended to do. A file changed only if a write_file/edit_file result confirms it (see the FILES CHANGED list). A test passed / build succeeded / command worked only if a run_command result above shows it. Never narrate outcomes you didn't observe; if you haven't checked, say so and check now (read_file / list_dir / run the command). Before any "done/summary" reply, reconcile every file and result you're about to claim against the actual tool results above — if it isn't there, you didn't do it yet.
37
38
  - Investigate before editing: read the relevant files first; never invent file contents.
38
39
  - Make the smallest change that fully solves the task. Match the surrounding code style.
@@ -75,6 +76,67 @@ Có — cả 12 test đều pass.
75
76
 
76
77
  Follow this pattern exactly. Your very first response to a task that needs the filesystem MUST be a tool block — do not refuse or explain limitations.`;
77
78
 
79
+ // ── Effort classifier ──────────────────────────────────────────────────────
80
+ // Phân loại mức độ phức tạp task để set effort level cho model:
81
+ // low — câu hỏi đơn, đọc 1 file, grep nhanh, list dir
82
+ // medium — đa file, edit vừa, chạy test/build, task coding thông thường
83
+ // high — refactor lớn, debug phức tạp, multi-step agentic, workflow
84
+ // Effort直接影响: số token model dùng, thời gian suy nghĩ, số tool call.
85
+ // LOW/MEDIUM = model skip thinking cho vấn đề đơn → nhanh hơn nhiều.
86
+ const LOW_PATTERNS = [
87
+ /^(list|ls|dir)\s/i,
88
+ /^(xem|hiện|đọc|read)\s+(file|thư mục|folder)/i,
89
+ /^(tìm|find|grep|search)\s+.{0,30}$/i,
90
+ /^(có|is|are|was|were)\s+.+\?$/i,
91
+ /^(version|phiên bản)\s*\??$/i,
92
+ /^(help|trợ giúp|help)\s*$/i,
93
+ /^(cwd|thư mục hiện tại)\s*$/i,
94
+ /^(status|trạng thái)\s*$/i,
95
+ /^(tokens?|token)\s*$/i,
96
+ /^(memory|noob\.md)\s*$/i,
97
+ /^(logout|đăng xuất)\s*$/i,
98
+ /^@/, // @file reference — typically a quick read
99
+ ];
100
+ const MEDIUM_PATTERNS = [
101
+ /^(edit|sửa|fix|thay đổi)\s/i,
102
+ /^(thêm|add|tạo|create|write)\s+(file|function|hàm|class|module)/i,
103
+ /^(chạy|run)\s+(test|build|lint|npm|npx)/i,
104
+ /^(đọc|read)\s+\S+\s+\S+/, // read with multiple files
105
+ /^(so sánh|compare|diff)\s/i,
106
+ /^(tóm tắt|summarize|overview)\s/i,
107
+ /^(cập nhật|update|upgrade)\s/i,
108
+ /^(triển khai|deploy|publish)\s/i,
109
+ /^(install|cài đặt)\s/i,
110
+ ];
111
+ const HIGH_PATTERNS = [
112
+ /(refactor|tái cấu trúc|đóng gói|restructure)/i,
113
+ /(implement|triển khai|xây dựng|build)\s+(hệ thống|system|feature|tính năng)/i,
114
+ /(debug|gỡ lỗi|tìm nguyên nhân|root cause)/i,
115
+ /(workflow|multi-agent|orchestrat|pipeline)/i,
116
+ /(architecture|kiến trúc|thiết kế|design)\s+(system|module)/i,
117
+ /(migrate|di chuyển|chuyển đổi)\s+(from|từ)/i,
118
+ /(review|rà soát|kiểm tra)\s+(code|toàn bộ|all)/i,
119
+ /(audit|kiểm toán|security|bảo mật)/i,
120
+ /(performance|hiệu năng|optimize|tối ưu)/i,
121
+ /(test|kiểm chứng)\s+(toàn bộ|all|comprehensive|end.to.end)/i,
122
+ /(tạo|create|write)\s+(noob\.md|SKILL|skill|workflow)/i,
123
+ /(ghi|write)\s+.+\s+(vào|into|to)\s+.+/i, // write X into Y — multi-step
124
+ /\b(ultra|goal|workflow)\b/i,
125
+ ];
126
+
127
+ export function classifyEffort(userMessage) {
128
+ const msg = (userMessage || "").trim();
129
+ if (!msg) return "medium";
130
+ // Kiểm high TRƯỚC (nhiều pattern hơn, ưu tiên)
131
+ for (const rx of HIGH_PATTERNS) if (rx.test(msg)) return "high";
132
+ // Kiểm low TRƯỚC medium — các thao tác đọc/list đơn nên ưu tiên low
133
+ for (const rx of LOW_PATTERNS) if (rx.test(msg)) return "low";
134
+ // Kiểm medium
135
+ for (const rx of MEDIUM_PATTERNS) if (rx.test(msg)) return "medium";
136
+ // Mặc định: message dài (>200 chars) → medium, ngắn → low
137
+ return msg.length > 200 ? "medium" : "low";
138
+ }
139
+
78
140
  // Số bước tool tối đa cho một lượt. Đặt rất cao theo yêu cầu người dùng: task
79
141
  // dài cứ chạy, đừng tự dừng. Người dùng vẫn có thể Ctrl+C bất cứ lúc nào.
80
142
  const MAX_STEPS = 10000;
@@ -415,6 +477,9 @@ export async function runAgent({ history, model, signal, onTool, onStatus, onDel
415
477
  // chạy không giới hạn token. Dừng theo: GOAL đạt, <<LOOP_DONE>>, <<ULTRA_DONE>>,
416
478
  // model tự kết thúc reply không có tool block, hoặc user Ctrl+C.
417
479
  const recentCalls = []; // {name, inputStr} — theo dõi vòng lặp
480
+ // Effort classifier: phân loại task từ user message gốc → set effort level.
481
+ // Chỉ classify 1 lần ở bước đầu, giữ nguyên suốt task (thay đổi giữa chừng gây bất ổn).
482
+ const effort = classifyEffort(history.find((m) => m.role === "user")?.content || "");
418
483
  for (let step = 0; step < MAX_STEPS; step++) {
419
484
  // Mỗi 100 bước log một mốc để người dùng biết noob vẫn đang chạy (task dài).
420
485
  if (step > 0 && step % 100 === 0) onStatus?.(`đã chạy ${step} bước…`);
@@ -437,7 +502,7 @@ export async function runAgent({ history, model, signal, onTool, onStatus, onDel
437
502
  // trường hợp api.js trả về với finishReason bất thường (tool_unclosed/empty) hoặc
438
503
  // throw ApiError retryable (network drop, 5xx, timeout).
439
504
  const { text, finishReason } = await streamWithRetry({
440
- model, message, system, signal, tokenMeter, onDelta, onStatus,
505
+ model, message, system, signal, tokenMeter, onDelta, onStatus, effort,
441
506
  });
442
507
  tokenMeter?.endOutput();
443
508
  onDelta?.({ type: "step-end" });
@@ -499,7 +564,7 @@ export async function runAgent({ history, model, signal, onTool, onStatus, onDel
499
564
  * backoff (1s, 2s, 4s, 8s, max 30s), tối đa 8 lần thử trước khi bỏ cuộc.
500
565
  * - Throw lại nếu signal abort hoặc lỗi không retryable.
501
566
  */
502
- async function streamWithRetry({ model, message, system, signal, tokenMeter, onDelta, onStatus }) {
567
+ async function streamWithRetry({ model, message, system, signal, tokenMeter, onDelta, onStatus, effort }) {
503
568
  const MAX_RETRIES = 8;
504
569
  let lastErr = null;
505
570
  for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
@@ -510,6 +575,7 @@ async function streamWithRetry({ model, message, system, signal, tokenMeter, onD
510
575
  message,
511
576
  system,
512
577
  signal,
578
+ effort,
513
579
  onDelta: (d) => {
514
580
  tokenMeter?.pushOutputDelta(d);
515
581
  onDelta?.({ type: "delta", text: d });