npm - @noobdemon/noob-cli - Versions diffs - 1.5.2 → 1.5.3 - Mend

@noobdemon/noob-cli 1.5.2 → 1.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/package.json +1 -1
package/src/api.js +63 -2

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@noobdemon/noob-cli",
-  "version": "1.5.2",
+  "version": "1.5.3",
   "publishConfig": {
     "access": "public"
   },

package/src/api.js CHANGED Viewed

@@ -41,10 +41,59 @@ async function parseError(resp) {
 /**
  * Stream a chat/merge/search request from the gateway.
+ *
+ * Auto-continue (chat only): the chat upstream runs on Vercel, which kills the
+ * function at ~300s. A long reply gets cut mid-stream — the gateway flags this
+ * with `{truncated:true}` (or the connection just drops with no `{done}`). When
+ * that happens we re-send the SAME transcript plus the partial reply so far and
+ * ask the model to write ONLY the rest, then append it. The caller sees one
+ * seamless stream. Capped by `maxContinues` so a genuinely broken upstream can't
+ * loop forever.
+ *
  * @returns {Promise<{text:string, reasoning:string}>}
  */
-export async function stream({ mode = "chat", message, model, signal, onDelta, onReasoning, onStatus, idleMs = 120000 }) {
+export async function stream({ mode = "chat", message, model, signal, onDelta, onReasoning, onStatus, idleMs = 120000, maxContinues = 6 }) {
   const endpoint = mode === "search" ? "/api/search" : mode === "merge" ? "/api/merge" : "/api/chat";
+  let fullText = "";
+  let reasoning = "";
+  let prompt = message; // prompt gửi đi: lần đầu = nguyên bản, các lần sau = nối tiếp
+  for (let attempt = 0; ; attempt++) {
+    const r = await streamOnce({ endpoint, mode, message: prompt, model, signal, idleMs, onStatus, onDelta, onReasoning });
+    fullText = mode === "chat" ? fullText + r.text : r.text; // chat: ghép các đoạn nối tiếp; mode khác: thay thế
+    if (r.reasoning) reasoning = r.reasoning;
+    // Còn nối tiếp được không? Chỉ với chat, khi bị cắt, còn lượt, và lần này có
+    // ra chữ thật (đoạn rỗng → coi như xong, tránh lặp vô tận).
+    if (!r.truncated || mode !== "chat" || attempt >= maxContinues || !r.text.trim()) break;
+    prompt = continuationPrompt(message, fullText);
+  }
+  return { text: fullText.trim(), reasoning: reasoning.trim() };
+}
+// Dựng prompt "nối tiếp" khi câu trả lời bị cắt giữa chừng: gửi lại nguyên ngữ
+// cảnh gốc + phần model đã viết dở + yêu cầu viết TIẾP đúng chỗ dừng, không lặp.
+function continuationPrompt(message, partial) {
+  const bar = "=".repeat(60);
+  return (
+    message +
+    "\n\n" + bar +
+    "\n## ASSISTANT (bị ngắt giữa chừng — phần trả lời dưới đây CHƯA hoàn tất)\n" +
+    partial +
+    "\n\n" + bar +
+    "\n# SYSTEM: Phần trả lời ngay trên bị mạng/timeout cắt ngang trước khi xong. " +
+    "Hãy VIẾT TIẾP liền mạch từ ĐÚNG ký tự cuối cùng ở trên — KHÔNG lặp lại hay diễn đạt lại bất kỳ chữ nào đã hiện, KHÔNG mở đầu lại, KHÔNG thêm lời dẫn. " +
+    "Chỉ xuất phần CÒN LẠI. Nếu đang viết dở một khối tool thì hoàn tất đúng khối đó. Nếu thật ra đã xong, chỉ xuất một dấu cách rồi dừng."
+  );
+}
+/**
+ * One network attempt of the stream. Returns this attempt's accumulated text +
+ * a `truncated` flag telling the caller whether the reply was cut short.
+ */
+async function streamOnce({ endpoint, mode, message, model, signal, idleMs, onStatus, onDelta, onReasoning }) {
   const body = mode === "search" ? { query: message } : mode === "merge" ? { message } : { message, model };
   // Idle-timeout: nếu KHÔNG nhận được byte nào trong idleMs (kết nối treo), tự
@@ -65,6 +114,8 @@ export async function stream({ mode = "chat", message, model, signal, onDelta, o
   let text = "";
   let reasoning = "";
+  let sawDone = false; // thấy {done} = stream kết thúc tử tế (không bị cắt)
+  let truncated = false; // gateway báo upstream bị cắt giữa chừng (Vercel 300s)
   // Một dòng SSE → cập nhật text/reasoning. Tách ra để dùng lại khi flush dòng cuối.
   const processLine = (rawLine) => {
@@ -88,6 +139,8 @@ export async function stream({ mode = "chat", message, model, signal, onDelta, o
       onReasoning?.(p.reasoning);
       if (p.answer) text = p.answer;
     }
+    if (p.truncated) truncated = true;
+    if (p.done) sawDone = true;
     if (p.error) throw new ApiError(p.error, {});
   };
@@ -118,9 +171,17 @@ export async function stream({ mode = "chat", message, model, signal, onDelta, o
     buf += decoder.decode(); // flush decoder
     if (buf.trim()) processLine(buf); // dòng SSE cuối không có '\n' — đừng bỏ sót
-    return { text: text.trim(), reasoning: reasoning.trim() };
+    // Chat: gateway gửi {done} khi xong sạch. Stream EOF mà chưa thấy {done} dù
+    // đã có chữ → kết nối/edge rớt giữa chừng → coi như bị cắt để nối tiếp.
+    if (mode === "chat" && !sawDone && text) truncated = true;
+    return { text, reasoning, truncated };
   } catch (err) {
+    if (signal?.aborted) throw err; // người dùng bấm Ctrl+C → huỷ thật, không nối tiếp
     if (timedOut) throw new ApiError("Kết nối tới máy chủ quá thời gian chờ (treo).", { code: "timeout" });
+    // Rớt mạng giữa chừng (không phải huỷ, không phải treo): với chat, nếu đã có
+    // chữ thì trả phần đã nhận + cờ truncated để lớp trên nối tiếp.
+    if (mode === "chat" && text) return { text, reasoning, truncated: true };
     throw err;
   } finally {
     clearTimeout(idle);