npm - @noobdemon/noob-cli - Versions diffs - 1.9.3 → 1.9.5 - Mend

@noobdemon/noob-cli 1.9.3 → 1.9.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/package.json +1 -1
package/src/agent.js +69 -3

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@noobdemon/noob-cli",
-  "version": "1.9.3",
+  "version": "1.9.5",
   "publishConfig": {
     "access": "public"
   },

package/src/agent.js CHANGED Viewed

@@ -16,7 +16,7 @@ To call a tool, emit EXACTLY ONE fenced code block tagged \`tool\` containing a
 {"name": "<tool>", "input": { ... }}
 \`\`\`
-Then STOP and wait — the runtime executes the tool and replies with a TOOL RESULT. Use one tool per step. When the task is complete (or you are only answering a question), reply normally in Markdown with NO tool block.
+Then STOP and wait — the runtime executes the tool and replies with a TOOL RESULT. Use one tool per step. When the task is complete (or you are only answering a question), reply normally in Markdown with NO tool block. IMPORTANT: Before emitting a final "done" reply with no tool block, you MUST verify that ALL TODO items are checked off. If any remain unchecked, emit another tool call instead.
 Available tools (each is self-contained; pick the SMALLEST tool that answers the question):
 - read_file   {"path": str, "offset"?: int, "limit"?: int}   — read a file. Default reads whole file. For files you suspect are LARGE (>500 lines), first check size via list_dir/glob, then read with offset+limit (e.g. 200 lines at a time) instead of slurping. The "N  " line-number prefix in output is DISPLAY ONLY — never copy it into edit_file.
@@ -33,6 +33,7 @@ Available tools (each is self-contained; pick the SMALLEST tool that answers the
 Context is finite. Don't slurp the whole repo up front. Discover information progressively: list_dir/glob to map → grep to locate → read_file (with offset+limit for big files) to inspect only what matters. Each tool result spends your attention budget — make every call earn it. When a tool returns a huge blob, extract the few facts you need, then move on; don't re-read it later (the result stays in history).
 # Rules
+- TODO-BASED EXECUTION: For any multi-step task (3+ actions), CREATE a todo list FIRST as your very first tool call using write_file to a temp block in your response (format: "- [ ] item"). Then WORK THROUGH EVERY ITEM, checking them off ("- [x]") as you complete each. BEFORE summarizing or claiming "done", mentally verify: "Have I checked off ALL items? Is there anything left unchecked?" If ANY item remains unchecked, CONTINUE — do not stop. If the user's request implies multiple deliverables, treat each as a TODO item. NEVER stop mid-plan. NEVER assume something is done without a tool result proving it.
 - GROUND TRUTH = real TOOL RESULTs in this conversation, not your memory or what you intended to do. A file changed only if a write_file/edit_file result confirms it (see the FILES CHANGED list). A test passed / build succeeded / command worked only if a run_command result above shows it. Never narrate outcomes you didn't observe; if you haven't checked, say so and check now (read_file / list_dir / run the command). Before any "done/summary" reply, reconcile every file and result you're about to claim against the actual tool results above — if it isn't there, you didn't do it yet.
 - Investigate before editing: read the relevant files first; never invent file contents.
 - Make the smallest change that fully solves the task. Match the surrounding code style.
@@ -75,6 +76,67 @@ Có — cả 12 test đều pass.
 Follow this pattern exactly. Your very first response to a task that needs the filesystem MUST be a tool block — do not refuse or explain limitations.`;
+// ── Effort classifier ──────────────────────────────────────────────────────
+// Phân loại mức độ phức tạp task để set effort level cho model:
+//   low    — câu hỏi đơn, đọc 1 file, grep nhanh, list dir
+//   medium — đa file, edit vừa, chạy test/build, task coding thông thường
+//   high   — refactor lớn, debug phức tạp, multi-step agentic, workflow
+// Effort直接影响: số token model dùng, thời gian suy nghĩ, số tool call.
+// LOW/MEDIUM = model skip thinking cho vấn đề đơn → nhanh hơn nhiều.
+const LOW_PATTERNS = [
+  /^(list|ls|dir)\s/i,
+  /^(xem|hiện|đọc|read)\s+(file|thư mục|folder)/i,
+  /^(tìm|find|grep|search)\s+.{0,30}$/i,
+  /^(có|is|are|was|were)\s+.+\?$/i,
+  /^(version|phiên bản)\s*\??$/i,
+  /^(help|trợ giúp|help)\s*$/i,
+  /^(cwd|thư mục hiện tại)\s*$/i,
+  /^(status|trạng thái)\s*$/i,
+  /^(tokens?|token)\s*$/i,
+  /^(memory|noob\.md)\s*$/i,
+  /^(logout|đăng xuất)\s*$/i,
+  /^@/, // @file reference — typically a quick read
+];
+const MEDIUM_PATTERNS = [
+  /^(edit|sửa|fix|thay đổi)\s/i,
+  /^(thêm|add|tạo|create|write)\s+(file|function|hàm|class|module)/i,
+  /^(chạy|run)\s+(test|build|lint|npm|npx)/i,
+  /^(đọc|read)\s+\S+\s+\S+/, // read with multiple files
+  /^(so sánh|compare|diff)\s/i,
+  /^(tóm tắt|summarize|overview)\s/i,
+  /^(cập nhật|update|upgrade)\s/i,
+  /^(triển khai|deploy|publish)\s/i,
+  /^(install|cài đặt)\s/i,
+];
+const HIGH_PATTERNS = [
+  /(refactor|tái cấu trúc|đóng gói|restructure)/i,
+  /(implement|triển khai|xây dựng|build)\s+(hệ thống|system|feature|tính năng)/i,
+  /(debug|gỡ lỗi|tìm nguyên nhân|root cause)/i,
+  /(workflow|multi-agent|orchestrat|pipeline)/i,
+  /(architecture|kiến trúc|thiết kế|design)\s+(system|module)/i,
+  /(migrate|di chuyển|chuyển đổi)\s+(from|từ)/i,
+  /(review|rà soát|kiểm tra)\s+(code|toàn bộ|all)/i,
+  /(audit|kiểm toán|security|bảo mật)/i,
+  /(performance|hiệu năng|optimize|tối ưu)/i,
+  /(test|kiểm chứng)\s+(toàn bộ|all|comprehensive|end.to.end)/i,
+  /(tạo|create|write)\s+(noob\.md|SKILL|skill|workflow)/i,
+  /(ghi|write)\s+.+\s+(vào|into|to)\s+.+/i, // write X into Y — multi-step
+  /\b(ultra|goal|workflow)\b/i,
+];
+export function classifyEffort(userMessage) {
+  const msg = (userMessage || "").trim();
+  if (!msg) return "medium";
+  // Kiểm high TRƯỚC (nhiều pattern hơn, ưu tiên)
+  for (const rx of HIGH_PATTERNS) if (rx.test(msg)) return "high";
+  // Kiểm low TRƯỚC medium — các thao tác đọc/list đơn nên ưu tiên low
+  for (const rx of LOW_PATTERNS) if (rx.test(msg)) return "low";
+  // Kiểm medium
+  for (const rx of MEDIUM_PATTERNS) if (rx.test(msg)) return "medium";
+  // Mặc định: message dài (>200 chars) → medium, ngắn → low
+  return msg.length > 200 ? "medium" : "low";
+}
 // Số bước tool tối đa cho một lượt. Đặt rất cao theo yêu cầu người dùng: task
 // dài cứ chạy, đừng tự dừng. Người dùng vẫn có thể Ctrl+C bất cứ lúc nào.
 const MAX_STEPS = 10000;
@@ -415,6 +477,9 @@ export async function runAgent({ history, model, signal, onTool, onStatus, onDel
   // chạy không giới hạn token. Dừng theo: GOAL đạt, <<LOOP_DONE>>, <<ULTRA_DONE>>,
   // model tự kết thúc reply không có tool block, hoặc user Ctrl+C.
   const recentCalls = []; // {name, inputStr} — theo dõi vòng lặp
+  // Effort classifier: phân loại task từ user message gốc → set effort level.
+  // Chỉ classify 1 lần ở bước đầu, giữ nguyên suốt task (thay đổi giữa chừng gây bất ổn).
+  const effort = classifyEffort(history.find((m) => m.role === "user")?.content || "");
   for (let step = 0; step < MAX_STEPS; step++) {
     // Mỗi 100 bước log một mốc để người dùng biết noob vẫn đang chạy (task dài).
     if (step > 0 && step % 100 === 0) onStatus?.(`đã chạy ${step} bước…`);
@@ -437,7 +502,7 @@ export async function runAgent({ history, model, signal, onTool, onStatus, onDel
     // trường hợp api.js trả về với finishReason bất thường (tool_unclosed/empty) hoặc
     // throw ApiError retryable (network drop, 5xx, timeout).
     const { text, finishReason } = await streamWithRetry({
-      model, message, system, signal, tokenMeter, onDelta, onStatus,
+      model, message, system, signal, tokenMeter, onDelta, onStatus, effort,
     });
     tokenMeter?.endOutput();
     onDelta?.({ type: "step-end" });
@@ -499,7 +564,7 @@ export async function runAgent({ history, model, signal, onTool, onStatus, onDel
  *   backoff (1s, 2s, 4s, 8s, max 30s), tối đa 8 lần thử trước khi bỏ cuộc.
  * - Throw lại nếu signal abort hoặc lỗi không retryable.
  */
-async function streamWithRetry({ model, message, system, signal, tokenMeter, onDelta, onStatus }) {
+async function streamWithRetry({ model, message, system, signal, tokenMeter, onDelta, onStatus, effort }) {
   const MAX_RETRIES = 8;
   let lastErr = null;
   for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
@@ -510,6 +575,7 @@ async function streamWithRetry({ model, message, system, signal, tokenMeter, onD
         message,
         system,
         signal,
+        effort,
         onDelta: (d) => {
           tokenMeter?.pushOutputDelta(d);
           onDelta?.({ type: "delta", text: d });