@poncho-ai/cli 0.24.0 → 0.24.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
 
2
- > @poncho-ai/cli@0.24.0 build /home/runner/work/poncho-ai/poncho-ai/packages/cli
2
+ > @poncho-ai/cli@0.24.1 build /home/runner/work/poncho-ai/poncho-ai/packages/cli
3
3
  > tsup src/index.ts src/cli.ts --format esm --dts
4
4
 
5
5
  CLI Building entry: src/cli.ts, src/index.ts
@@ -8,11 +8,11 @@
8
8
  CLI Target: es2022
9
9
  ESM Build start
10
10
  ESM dist/cli.js 94.00 B
11
- ESM dist/run-interactive-ink-GA5V5ATO.js 56.74 KB
12
11
  ESM dist/index.js 857.00 B
13
- ESM dist/chunk-CX2JHBBS.js 399.73 KB
14
- ESM ⚡️ Build success in 65ms
12
+ ESM dist/run-interactive-ink-IEB4MZ2C.js 56.74 KB
13
+ ESM dist/chunk-3ETNDULB.js 400.75 KB
14
+ ESM ⚡️ Build success in 57ms
15
15
  DTS Build start
16
- DTS ⚡️ Build success in 3966ms
16
+ DTS ⚡️ Build success in 3876ms
17
17
  DTS dist/cli.d.ts 20.00 B
18
18
  DTS dist/index.d.ts 3.59 KB
package/CHANGELOG.md CHANGED
@@ -1,5 +1,16 @@
1
1
  # @poncho-ai/cli
2
2
 
3
+ ## 0.24.1
4
+
5
+ ### Patch Changes
6
+
7
+ - [`096953d`](https://github.com/cesr/poncho-ai/commit/096953d5a64a785950ea0a7f09e2183e481afd29) Thanks [@cesr](https://github.com/cesr)! - Improve time-to-first-token by lazy-loading the recall corpus
8
+
9
+ The recall corpus (past conversation summaries) is now fetched on-demand only when the LLM invokes the `conversation_recall` tool, instead of blocking every message with ~1.3s of upfront I/O. Also adds batch `mget` support to Upstash/Redis/DynamoDB conversation stores, parallelizes memory fetch with skill refresh, debounces skill refresh in dev mode, and caches message conversions across multi-step runs.
10
+
11
+ - Updated dependencies [[`096953d`](https://github.com/cesr/poncho-ai/commit/096953d5a64a785950ea0a7f09e2183e481afd29)]:
12
+ - @poncho-ai/harness@0.22.1
13
+
3
14
  ## 0.24.0
4
15
 
5
16
  ### Minor Changes
@@ -9331,7 +9331,8 @@ data: ${JSON.stringify(data)}
9331
9331
  response.writeHead(200, {
9332
9332
  "Content-Type": "text/event-stream",
9333
9333
  "Cache-Control": "no-cache",
9334
- Connection: "keep-alive"
9334
+ Connection: "keep-alive",
9335
+ "X-Accel-Buffering": "no"
9335
9336
  });
9336
9337
  const historyMessages = [...conversation.messages];
9337
9338
  const preRunMessages = [...conversation.messages];
@@ -9392,7 +9393,9 @@ data: ${JSON.stringify(data)}
9392
9393
  try {
9393
9394
  conversation.messages = [...historyMessages, { role: "user", content: userContent }];
9394
9395
  conversation.updatedAt = Date.now();
9395
- await conversationStore.update(conversation);
9396
+ conversationStore.update(conversation).catch((err) => {
9397
+ console.error("[poncho] Failed to persist user turn:", err);
9398
+ });
9396
9399
  const persistDraftAssistantTurn = async () => {
9397
9400
  const draftSections = [
9398
9401
  ...sections.map((section) => ({
@@ -9425,18 +9428,32 @@ data: ${JSON.stringify(data)}
9425
9428
  conversation.updatedAt = Date.now();
9426
9429
  await conversationStore.update(conversation);
9427
9430
  };
9428
- const recallCorpus = (await conversationStore.list(ownerId)).filter((item) => item.conversationId !== conversationId && !item.parentConversationId).slice(0, 20).map((item) => ({
9429
- conversationId: item.conversationId,
9430
- title: item.title,
9431
- updatedAt: item.updatedAt,
9432
- content: item.messages.slice(-6).map((message) => `${message.role}: ${typeof message.content === "string" ? message.content : getTextContent(message)}`).join("\n").slice(0, 2e3)
9433
- })).filter((item) => item.content.length > 0);
9431
+ let cachedRecallCorpus;
9432
+ const lazyRecallCorpus = async () => {
9433
+ if (cachedRecallCorpus) return cachedRecallCorpus;
9434
+ const _rc0 = performance.now();
9435
+ let recallConversations;
9436
+ if (typeof conversationStore.listSummaries === "function") {
9437
+ const recallSummaries = (await conversationStore.listSummaries(ownerId)).filter((s) => s.conversationId !== conversationId && !s.parentConversationId).slice(0, 20);
9438
+ recallConversations = (await Promise.all(recallSummaries.map((s) => conversationStore.get(s.conversationId)))).filter((c) => c != null);
9439
+ } else {
9440
+ recallConversations = (await conversationStore.list(ownerId)).filter((item) => item.conversationId !== conversationId && !item.parentConversationId).slice(0, 20);
9441
+ }
9442
+ cachedRecallCorpus = recallConversations.map((item) => ({
9443
+ conversationId: item.conversationId,
9444
+ title: item.title,
9445
+ updatedAt: item.updatedAt,
9446
+ content: item.messages.slice(-6).map((message) => `${message.role}: ${typeof message.content === "string" ? message.content : getTextContent(message)}`).join("\n").slice(0, 2e3)
9447
+ })).filter((item) => item.content.length > 0);
9448
+ console.info(`[poncho] recall corpus fetched lazily (${cachedRecallCorpus.length} items, ${(performance.now() - _rc0).toFixed(1)}ms)`);
9449
+ return cachedRecallCorpus;
9450
+ };
9434
9451
  for await (const event of harness.runWithTelemetry({
9435
9452
  task: messageText,
9436
9453
  conversationId,
9437
9454
  parameters: {
9438
9455
  ...bodyParameters ?? {},
9439
- __conversationRecallCorpus: recallCorpus,
9456
+ __conversationRecallCorpus: lazyRecallCorpus,
9440
9457
  __activeConversationId: conversationId,
9441
9458
  __ownerId: ownerId
9442
9459
  },
@@ -10077,7 +10094,7 @@ var runInteractive = async (workingDir, params) => {
10077
10094
  await harness.initialize();
10078
10095
  const identity = await ensureAgentIdentity2(workingDir);
10079
10096
  try {
10080
- const { runInteractiveInk } = await import("./run-interactive-ink-GA5V5ATO.js");
10097
+ const { runInteractiveInk } = await import("./run-interactive-ink-IEB4MZ2C.js");
10081
10098
  await runInteractiveInk({
10082
10099
  harness,
10083
10100
  params,
package/dist/cli.js CHANGED
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env node
2
2
  import {
3
3
  main
4
- } from "./chunk-CX2JHBBS.js";
4
+ } from "./chunk-3ETNDULB.js";
5
5
 
6
6
  // src/cli.ts
7
7
  void main();
package/dist/index.js CHANGED
@@ -23,7 +23,7 @@ import {
23
23
  runTests,
24
24
  startDevServer,
25
25
  updateAgentGuidance
26
- } from "./chunk-CX2JHBBS.js";
26
+ } from "./chunk-3ETNDULB.js";
27
27
  export {
28
28
  addSkill,
29
29
  buildCli,
@@ -2,7 +2,7 @@ import {
2
2
  consumeFirstRunIntro,
3
3
  inferConversationTitle,
4
4
  resolveHarnessEnvironment
5
- } from "./chunk-CX2JHBBS.js";
5
+ } from "./chunk-3ETNDULB.js";
6
6
 
7
7
  // src/run-interactive-ink.ts
8
8
  import * as readline from "readline";
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@poncho-ai/cli",
3
- "version": "0.24.0",
3
+ "version": "0.24.1",
4
4
  "description": "CLI for building and deploying AI agents",
5
5
  "repository": {
6
6
  "type": "git",
@@ -27,9 +27,9 @@
27
27
  "react": "^19.2.4",
28
28
  "react-devtools-core": "^6.1.5",
29
29
  "yaml": "^2.8.1",
30
- "@poncho-ai/harness": "0.22.0",
31
30
  "@poncho-ai/messaging": "0.5.1",
32
- "@poncho-ai/sdk": "1.5.0"
31
+ "@poncho-ai/sdk": "1.5.0",
32
+ "@poncho-ai/harness": "0.22.1"
33
33
  },
34
34
  "devDependencies": {
35
35
  "@types/busboy": "^1.5.4",
package/src/index.ts CHANGED
@@ -3526,6 +3526,7 @@ export const createRequestHandler = async (options?: {
3526
3526
  "Content-Type": "text/event-stream",
3527
3527
  "Cache-Control": "no-cache",
3528
3528
  Connection: "keep-alive",
3529
+ "X-Accel-Buffering": "no",
3529
3530
  });
3530
3531
  const historyMessages = [...conversation.messages];
3531
3532
  const preRunMessages = [...conversation.messages];
@@ -3584,10 +3585,14 @@ export const createRequestHandler = async (options?: {
3584
3585
  });
3585
3586
 
3586
3587
  try {
3587
- // Persist the user turn immediately so refreshing mid-run keeps chat context.
3588
+ // Persist the user turn so refreshing mid-run keeps chat context.
3589
+ // Fire-and-forget: the write chain in the store serializes file ops,
3590
+ // and persistDraftAssistantTurn won't run until LLM events arrive.
3588
3591
  conversation.messages = [...historyMessages, { role: "user", content: userContent }];
3589
3592
  conversation.updatedAt = Date.now();
3590
- await conversationStore.update(conversation);
3593
+ conversationStore.update(conversation).catch((err) => {
3594
+ console.error("[poncho] Failed to persist user turn:", err);
3595
+ });
3591
3596
 
3592
3597
  const persistDraftAssistantTurn = async (): Promise<void> => {
3593
3598
  const draftSections: Array<{ type: "text" | "tools"; content: string | string[] }> = [
@@ -3626,27 +3631,45 @@ export const createRequestHandler = async (options?: {
3626
3631
  await conversationStore.update(conversation);
3627
3632
  };
3628
3633
 
3629
- const recallCorpus = (await conversationStore.list(ownerId))
3630
- .filter((item) => item.conversationId !== conversationId && !item.parentConversationId)
3631
- .slice(0, 20)
3632
- .map((item) => ({
3633
- conversationId: item.conversationId,
3634
- title: item.title,
3635
- updatedAt: item.updatedAt,
3636
- content: item.messages
3637
- .slice(-6)
3638
- .map((message) => `${message.role}: ${typeof message.content === "string" ? message.content : getTextContent(message)}`)
3639
- .join("\n")
3640
- .slice(0, 2000),
3641
- }))
3642
- .filter((item) => item.content.length > 0);
3634
+ let cachedRecallCorpus: unknown[] | undefined;
3635
+ const lazyRecallCorpus = async () => {
3636
+ if (cachedRecallCorpus) return cachedRecallCorpus;
3637
+ const _rc0 = performance.now();
3638
+ let recallConversations: Conversation[];
3639
+ if (typeof conversationStore.listSummaries === "function") {
3640
+ const recallSummaries = (await conversationStore.listSummaries(ownerId))
3641
+ .filter((s) => s.conversationId !== conversationId && !s.parentConversationId)
3642
+ .slice(0, 20);
3643
+ recallConversations = (
3644
+ await Promise.all(recallSummaries.map((s) => conversationStore.get(s.conversationId)))
3645
+ ).filter((c): c is NonNullable<typeof c> => c != null);
3646
+ } else {
3647
+ recallConversations = (await conversationStore.list(ownerId))
3648
+ .filter((item) => item.conversationId !== conversationId && !item.parentConversationId)
3649
+ .slice(0, 20);
3650
+ }
3651
+ cachedRecallCorpus = recallConversations
3652
+ .map((item) => ({
3653
+ conversationId: item.conversationId,
3654
+ title: item.title,
3655
+ updatedAt: item.updatedAt,
3656
+ content: item.messages
3657
+ .slice(-6)
3658
+ .map((message) => `${message.role}: ${typeof message.content === "string" ? message.content : getTextContent(message)}`)
3659
+ .join("\n")
3660
+ .slice(0, 2000),
3661
+ }))
3662
+ .filter((item) => item.content.length > 0);
3663
+ console.info(`[poncho] recall corpus fetched lazily (${cachedRecallCorpus.length} items, ${(performance.now() - _rc0).toFixed(1)}ms)`);
3664
+ return cachedRecallCorpus;
3665
+ };
3643
3666
 
3644
3667
  for await (const event of harness.runWithTelemetry({
3645
3668
  task: messageText,
3646
3669
  conversationId,
3647
3670
  parameters: {
3648
3671
  ...(bodyParameters ?? {}),
3649
- __conversationRecallCorpus: recallCorpus,
3672
+ __conversationRecallCorpus: lazyRecallCorpus,
3650
3673
  __activeConversationId: conversationId,
3651
3674
  __ownerId: ownerId,
3652
3675
  },
@@ -3675,7 +3698,6 @@ export const createRequestHandler = async (options?: {
3675
3698
  runCancelled = true;
3676
3699
  }
3677
3700
  if (event.type === "model:chunk") {
3678
- // If we have tools accumulated and text starts again, push tools as a section
3679
3701
  if (currentTools.length > 0) {
3680
3702
  sections.push({ type: "tools", content: currentTools });
3681
3703
  currentTools = [];