npm - @poncho-ai/cli - Versions diffs - 0.24.0 → 0.24.1 - Mend

@poncho-ai/cli 0.24.0 → 0.24.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/.turbo/turbo-build.log +5 -5
package/CHANGELOG.md +11 -0
package/dist/{chunk-CX2JHBBS.js → chunk-3ETNDULB.js} +27 -10
package/dist/cli.js +1 -1
package/dist/index.js +1 -1
package/dist/{run-interactive-ink-GA5V5ATO.js → run-interactive-ink-IEB4MZ2C.js} +1 -1
package/package.json +3 -3
package/src/index.ts +40 -18

package/.turbo/turbo-build.log CHANGED Viewed

@@ -1,5 +1,5 @@
-> @poncho-ai/cli@0.24.0 build /home/runner/work/poncho-ai/poncho-ai/packages/cli
+> @poncho-ai/cli@0.24.1 build /home/runner/work/poncho-ai/poncho-ai/packages/cli
 > tsup src/index.ts src/cli.ts --format esm --dts
 [34mCLI[39m Building entry: src/cli.ts, src/index.ts
@@ -8,11 +8,11 @@
 [34mCLI[39m Target: es2022
 [34mESM[39m Build start
 [32mESM[39m [1mdist/cli.js                          [22m[32m94.00 B[39m
-[32mESM[39m [1mdist/run-interactive-ink-GA5V5ATO.js [22m[32m56.74 KB[39m
 [32mESM[39m [1mdist/index.js                        [22m[32m857.00 B[39m
-[32mESM[39m [1mdist/chunk-CX2JHBBS.js               [22m[32m399.73 KB[39m
-[32mESM[39m ⚡️ Build success in 65ms
+[32mESM[39m [1mdist/run-interactive-ink-IEB4MZ2C.js [22m[32m56.74 KB[39m
+[32mESM[39m [1mdist/chunk-3ETNDULB.js               [22m[32m400.75 KB[39m
+[32mESM[39m ⚡️ Build success in 57ms
 [34mDTS[39m Build start
-[32mDTS[39m ⚡️ Build success in 3966ms
+[32mDTS[39m ⚡️ Build success in 3876ms
 [32mDTS[39m [1mdist/cli.d.ts   [22m[32m20.00 B[39m
 [32mDTS[39m [1mdist/index.d.ts [22m[32m3.59 KB[39m

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,16 @@
 # @poncho-ai/cli
+## 0.24.1
+### Patch Changes
+- [`096953d`](https://github.com/cesr/poncho-ai/commit/096953d5a64a785950ea0a7f09e2183e481afd29) Thanks [@cesr](https://github.com/cesr)! - Improve time-to-first-token by lazy-loading the recall corpus
+  The recall corpus (past conversation summaries) is now fetched on-demand only when the LLM invokes the `conversation_recall` tool, instead of blocking every message with ~1.3s of upfront I/O. Also adds batch `mget` support to Upstash/Redis/DynamoDB conversation stores, parallelizes memory fetch with skill refresh, debounces skill refresh in dev mode, and caches message conversions across multi-step runs.
+- Updated dependencies [[`096953d`](https://github.com/cesr/poncho-ai/commit/096953d5a64a785950ea0a7f09e2183e481afd29)]:
+  - @poncho-ai/harness@0.22.1
 ## 0.24.0
 ### Minor Changes

package/dist/{chunk-CX2JHBBS.js → chunk-3ETNDULB.js} RENAMED Viewed

@@ -9331,7 +9331,8 @@ data: ${JSON.stringify(data)}
       response.writeHead(200, {
         "Content-Type": "text/event-stream",
         "Cache-Control": "no-cache",
-        Connection: "keep-alive"
+        Connection: "keep-alive",
+        "X-Accel-Buffering": "no"
       });
       const historyMessages = [...conversation.messages];
       const preRunMessages = [...conversation.messages];
@@ -9392,7 +9393,9 @@ data: ${JSON.stringify(data)}
       try {
         conversation.messages = [...historyMessages, { role: "user", content: userContent }];
         conversation.updatedAt = Date.now();
-        await conversationStore.update(conversation);
+        conversationStore.update(conversation).catch((err) => {
+          console.error("[poncho] Failed to persist user turn:", err);
+        });
         const persistDraftAssistantTurn = async () => {
           const draftSections = [
             ...sections.map((section) => ({
@@ -9425,18 +9428,32 @@ data: ${JSON.stringify(data)}
           conversation.updatedAt = Date.now();
           await conversationStore.update(conversation);
         };
-        const recallCorpus = (await conversationStore.list(ownerId)).filter((item) => item.conversationId !== conversationId && !item.parentConversationId).slice(0, 20).map((item) => ({
-          conversationId: item.conversationId,
-          title: item.title,
-          updatedAt: item.updatedAt,
-          content: item.messages.slice(-6).map((message) => `${message.role}: ${typeof message.content === "string" ? message.content : getTextContent(message)}`).join("\n").slice(0, 2e3)
-        })).filter((item) => item.content.length > 0);
+        let cachedRecallCorpus;
+        const lazyRecallCorpus = async () => {
+          if (cachedRecallCorpus) return cachedRecallCorpus;
+          const _rc0 = performance.now();
+          let recallConversations;
+          if (typeof conversationStore.listSummaries === "function") {
+            const recallSummaries = (await conversationStore.listSummaries(ownerId)).filter((s) => s.conversationId !== conversationId && !s.parentConversationId).slice(0, 20);
+            recallConversations = (await Promise.all(recallSummaries.map((s) => conversationStore.get(s.conversationId)))).filter((c) => c != null);
+          } else {
+            recallConversations = (await conversationStore.list(ownerId)).filter((item) => item.conversationId !== conversationId && !item.parentConversationId).slice(0, 20);
+          }
+          cachedRecallCorpus = recallConversations.map((item) => ({
+            conversationId: item.conversationId,
+            title: item.title,
+            updatedAt: item.updatedAt,
+            content: item.messages.slice(-6).map((message) => `${message.role}: ${typeof message.content === "string" ? message.content : getTextContent(message)}`).join("\n").slice(0, 2e3)
+          })).filter((item) => item.content.length > 0);
+          console.info(`[poncho] recall corpus fetched lazily (${cachedRecallCorpus.length} items, ${(performance.now() - _rc0).toFixed(1)}ms)`);
+          return cachedRecallCorpus;
+        };
         for await (const event of harness.runWithTelemetry({
           task: messageText,
           conversationId,
           parameters: {
             ...bodyParameters ?? {},
-            __conversationRecallCorpus: recallCorpus,
+            __conversationRecallCorpus: lazyRecallCorpus,
             __activeConversationId: conversationId,
             __ownerId: ownerId
           },
@@ -10077,7 +10094,7 @@ var runInteractive = async (workingDir, params) => {
   await harness.initialize();
   const identity = await ensureAgentIdentity2(workingDir);
   try {
-    const { runInteractiveInk } = await import("./run-interactive-ink-GA5V5ATO.js");
+    const { runInteractiveInk } = await import("./run-interactive-ink-IEB4MZ2C.js");
     await runInteractiveInk({
       harness,
       params,

package/dist/cli.js CHANGED Viewed

@@ -1,7 +1,7 @@
 #!/usr/bin/env node
 import {
   main
-} from "./chunk-CX2JHBBS.js";
+} from "./chunk-3ETNDULB.js";
 // src/cli.ts
 void main();

package/dist/index.js CHANGED Viewed

@@ -23,7 +23,7 @@ import {
   runTests,
   startDevServer,
   updateAgentGuidance
-} from "./chunk-CX2JHBBS.js";
+} from "./chunk-3ETNDULB.js";
 export {
   addSkill,
   buildCli,

package/dist/{run-interactive-ink-GA5V5ATO.js → run-interactive-ink-IEB4MZ2C.js} RENAMED Viewed

@@ -2,7 +2,7 @@ import {
   consumeFirstRunIntro,
   inferConversationTitle,
   resolveHarnessEnvironment
-} from "./chunk-CX2JHBBS.js";
+} from "./chunk-3ETNDULB.js";
 // src/run-interactive-ink.ts
 import * as readline from "readline";

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@poncho-ai/cli",
-  "version": "0.24.0",
+  "version": "0.24.1",
   "description": "CLI for building and deploying AI agents",
   "repository": {
     "type": "git",
@@ -27,9 +27,9 @@
     "react": "^19.2.4",
     "react-devtools-core": "^6.1.5",
     "yaml": "^2.8.1",
-    "@poncho-ai/harness": "0.22.0",
     "@poncho-ai/messaging": "0.5.1",
-    "@poncho-ai/sdk": "1.5.0"
+    "@poncho-ai/sdk": "1.5.0",
+    "@poncho-ai/harness": "0.22.1"
   },
   "devDependencies": {
     "@types/busboy": "^1.5.4",

package/src/index.ts CHANGED Viewed

@@ -3526,6 +3526,7 @@ export const createRequestHandler = async (options?: {
         "Content-Type": "text/event-stream",
         "Cache-Control": "no-cache",
         Connection: "keep-alive",
+        "X-Accel-Buffering": "no",
       });
       const historyMessages = [...conversation.messages];
       const preRunMessages = [...conversation.messages];
@@ -3584,10 +3585,14 @@ export const createRequestHandler = async (options?: {
       });
       try {
-        // Persist the user turn immediately so refreshing mid-run keeps chat context.
+        // Persist the user turn so refreshing mid-run keeps chat context.
+        // Fire-and-forget: the write chain in the store serializes file ops,
+        // and persistDraftAssistantTurn won't run until LLM events arrive.
         conversation.messages = [...historyMessages, { role: "user", content: userContent }];
         conversation.updatedAt = Date.now();
-        await conversationStore.update(conversation);
+        conversationStore.update(conversation).catch((err) => {
+          console.error("[poncho] Failed to persist user turn:", err);
+        });
         const persistDraftAssistantTurn = async (): Promise<void> => {
           const draftSections: Array<{ type: "text" | "tools"; content: string | string[] }> = [
@@ -3626,27 +3631,45 @@ export const createRequestHandler = async (options?: {
           await conversationStore.update(conversation);
         };
-        const recallCorpus = (await conversationStore.list(ownerId))
-          .filter((item) => item.conversationId !== conversationId && !item.parentConversationId)
-          .slice(0, 20)
-          .map((item) => ({
-            conversationId: item.conversationId,
-            title: item.title,
-            updatedAt: item.updatedAt,
-            content: item.messages
-              .slice(-6)
-              .map((message) => `${message.role}: ${typeof message.content === "string" ? message.content : getTextContent(message)}`)
-              .join("\n")
-              .slice(0, 2000),
-          }))
-          .filter((item) => item.content.length > 0);
+        let cachedRecallCorpus: unknown[] | undefined;
+        const lazyRecallCorpus = async () => {
+          if (cachedRecallCorpus) return cachedRecallCorpus;
+          const _rc0 = performance.now();
+          let recallConversations: Conversation[];
+          if (typeof conversationStore.listSummaries === "function") {
+            const recallSummaries = (await conversationStore.listSummaries(ownerId))
+              .filter((s) => s.conversationId !== conversationId && !s.parentConversationId)
+              .slice(0, 20);
+            recallConversations = (
+              await Promise.all(recallSummaries.map((s) => conversationStore.get(s.conversationId)))
+            ).filter((c): c is NonNullable<typeof c> => c != null);
+          } else {
+            recallConversations = (await conversationStore.list(ownerId))
+              .filter((item) => item.conversationId !== conversationId && !item.parentConversationId)
+              .slice(0, 20);
+          }
+          cachedRecallCorpus = recallConversations
+            .map((item) => ({
+              conversationId: item.conversationId,
+              title: item.title,
+              updatedAt: item.updatedAt,
+              content: item.messages
+                .slice(-6)
+                .map((message) => `${message.role}: ${typeof message.content === "string" ? message.content : getTextContent(message)}`)
+                .join("\n")
+                .slice(0, 2000),
+            }))
+            .filter((item) => item.content.length > 0);
+          console.info(`[poncho] recall corpus fetched lazily (${cachedRecallCorpus.length} items, ${(performance.now() - _rc0).toFixed(1)}ms)`);
+          return cachedRecallCorpus;
+        };
         for await (const event of harness.runWithTelemetry({
           task: messageText,
           conversationId,
           parameters: {
             ...(bodyParameters ?? {}),
-            __conversationRecallCorpus: recallCorpus,
+            __conversationRecallCorpus: lazyRecallCorpus,
             __activeConversationId: conversationId,
             __ownerId: ownerId,
           },
@@ -3675,7 +3698,6 @@ export const createRequestHandler = async (options?: {
             runCancelled = true;
           }
           if (event.type === "model:chunk") {
-            // If we have tools accumulated and text starts again, push tools as a section
             if (currentTools.length > 0) {
               sections.push({ type: "tools", content: currentTools });
               currentTools = [];