@poncho-ai/cli 0.24.0 → 0.24.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +5 -5
- package/CHANGELOG.md +11 -0
- package/dist/{chunk-CX2JHBBS.js → chunk-3ETNDULB.js} +27 -10
- package/dist/cli.js +1 -1
- package/dist/index.js +1 -1
- package/dist/{run-interactive-ink-GA5V5ATO.js → run-interactive-ink-IEB4MZ2C.js} +1 -1
- package/package.json +3 -3
- package/src/index.ts +40 -18
package/.turbo/turbo-build.log
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
|
|
2
|
-
> @poncho-ai/cli@0.24.
|
|
2
|
+
> @poncho-ai/cli@0.24.1 build /home/runner/work/poncho-ai/poncho-ai/packages/cli
|
|
3
3
|
> tsup src/index.ts src/cli.ts --format esm --dts
|
|
4
4
|
|
|
5
5
|
[34mCLI[39m Building entry: src/cli.ts, src/index.ts
|
|
@@ -8,11 +8,11 @@
|
|
|
8
8
|
[34mCLI[39m Target: es2022
|
|
9
9
|
[34mESM[39m Build start
|
|
10
10
|
[32mESM[39m [1mdist/cli.js [22m[32m94.00 B[39m
|
|
11
|
-
[32mESM[39m [1mdist/run-interactive-ink-GA5V5ATO.js [22m[32m56.74 KB[39m
|
|
12
11
|
[32mESM[39m [1mdist/index.js [22m[32m857.00 B[39m
|
|
13
|
-
[32mESM[39m [1mdist/
|
|
14
|
-
[32mESM[39m
|
|
12
|
+
[32mESM[39m [1mdist/run-interactive-ink-IEB4MZ2C.js [22m[32m56.74 KB[39m
|
|
13
|
+
[32mESM[39m [1mdist/chunk-3ETNDULB.js [22m[32m400.75 KB[39m
|
|
14
|
+
[32mESM[39m ⚡️ Build success in 57ms
|
|
15
15
|
[34mDTS[39m Build start
|
|
16
|
-
[32mDTS[39m ⚡️ Build success in
|
|
16
|
+
[32mDTS[39m ⚡️ Build success in 3876ms
|
|
17
17
|
[32mDTS[39m [1mdist/cli.d.ts [22m[32m20.00 B[39m
|
|
18
18
|
[32mDTS[39m [1mdist/index.d.ts [22m[32m3.59 KB[39m
|
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,16 @@
|
|
|
1
1
|
# @poncho-ai/cli
|
|
2
2
|
|
|
3
|
+
## 0.24.1
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- [`096953d`](https://github.com/cesr/poncho-ai/commit/096953d5a64a785950ea0a7f09e2183e481afd29) Thanks [@cesr](https://github.com/cesr)! - Improve time-to-first-token by lazy-loading the recall corpus
|
|
8
|
+
|
|
9
|
+
The recall corpus (past conversation summaries) is now fetched on-demand only when the LLM invokes the `conversation_recall` tool, instead of blocking every message with ~1.3s of upfront I/O. Also adds batch `mget` support to Upstash/Redis/DynamoDB conversation stores, parallelizes memory fetch with skill refresh, debounces skill refresh in dev mode, and caches message conversions across multi-step runs.
|
|
10
|
+
|
|
11
|
+
- Updated dependencies [[`096953d`](https://github.com/cesr/poncho-ai/commit/096953d5a64a785950ea0a7f09e2183e481afd29)]:
|
|
12
|
+
- @poncho-ai/harness@0.22.1
|
|
13
|
+
|
|
3
14
|
## 0.24.0
|
|
4
15
|
|
|
5
16
|
### Minor Changes
|
|
@@ -9331,7 +9331,8 @@ data: ${JSON.stringify(data)}
|
|
|
9331
9331
|
response.writeHead(200, {
|
|
9332
9332
|
"Content-Type": "text/event-stream",
|
|
9333
9333
|
"Cache-Control": "no-cache",
|
|
9334
|
-
Connection: "keep-alive"
|
|
9334
|
+
Connection: "keep-alive",
|
|
9335
|
+
"X-Accel-Buffering": "no"
|
|
9335
9336
|
});
|
|
9336
9337
|
const historyMessages = [...conversation.messages];
|
|
9337
9338
|
const preRunMessages = [...conversation.messages];
|
|
@@ -9392,7 +9393,9 @@ data: ${JSON.stringify(data)}
|
|
|
9392
9393
|
try {
|
|
9393
9394
|
conversation.messages = [...historyMessages, { role: "user", content: userContent }];
|
|
9394
9395
|
conversation.updatedAt = Date.now();
|
|
9395
|
-
|
|
9396
|
+
conversationStore.update(conversation).catch((err) => {
|
|
9397
|
+
console.error("[poncho] Failed to persist user turn:", err);
|
|
9398
|
+
});
|
|
9396
9399
|
const persistDraftAssistantTurn = async () => {
|
|
9397
9400
|
const draftSections = [
|
|
9398
9401
|
...sections.map((section) => ({
|
|
@@ -9425,18 +9428,32 @@ data: ${JSON.stringify(data)}
|
|
|
9425
9428
|
conversation.updatedAt = Date.now();
|
|
9426
9429
|
await conversationStore.update(conversation);
|
|
9427
9430
|
};
|
|
9428
|
-
|
|
9429
|
-
|
|
9430
|
-
|
|
9431
|
-
|
|
9432
|
-
|
|
9433
|
-
|
|
9431
|
+
let cachedRecallCorpus;
|
|
9432
|
+
const lazyRecallCorpus = async () => {
|
|
9433
|
+
if (cachedRecallCorpus) return cachedRecallCorpus;
|
|
9434
|
+
const _rc0 = performance.now();
|
|
9435
|
+
let recallConversations;
|
|
9436
|
+
if (typeof conversationStore.listSummaries === "function") {
|
|
9437
|
+
const recallSummaries = (await conversationStore.listSummaries(ownerId)).filter((s) => s.conversationId !== conversationId && !s.parentConversationId).slice(0, 20);
|
|
9438
|
+
recallConversations = (await Promise.all(recallSummaries.map((s) => conversationStore.get(s.conversationId)))).filter((c) => c != null);
|
|
9439
|
+
} else {
|
|
9440
|
+
recallConversations = (await conversationStore.list(ownerId)).filter((item) => item.conversationId !== conversationId && !item.parentConversationId).slice(0, 20);
|
|
9441
|
+
}
|
|
9442
|
+
cachedRecallCorpus = recallConversations.map((item) => ({
|
|
9443
|
+
conversationId: item.conversationId,
|
|
9444
|
+
title: item.title,
|
|
9445
|
+
updatedAt: item.updatedAt,
|
|
9446
|
+
content: item.messages.slice(-6).map((message) => `${message.role}: ${typeof message.content === "string" ? message.content : getTextContent(message)}`).join("\n").slice(0, 2e3)
|
|
9447
|
+
})).filter((item) => item.content.length > 0);
|
|
9448
|
+
console.info(`[poncho] recall corpus fetched lazily (${cachedRecallCorpus.length} items, ${(performance.now() - _rc0).toFixed(1)}ms)`);
|
|
9449
|
+
return cachedRecallCorpus;
|
|
9450
|
+
};
|
|
9434
9451
|
for await (const event of harness.runWithTelemetry({
|
|
9435
9452
|
task: messageText,
|
|
9436
9453
|
conversationId,
|
|
9437
9454
|
parameters: {
|
|
9438
9455
|
...bodyParameters ?? {},
|
|
9439
|
-
__conversationRecallCorpus:
|
|
9456
|
+
__conversationRecallCorpus: lazyRecallCorpus,
|
|
9440
9457
|
__activeConversationId: conversationId,
|
|
9441
9458
|
__ownerId: ownerId
|
|
9442
9459
|
},
|
|
@@ -10077,7 +10094,7 @@ var runInteractive = async (workingDir, params) => {
|
|
|
10077
10094
|
await harness.initialize();
|
|
10078
10095
|
const identity = await ensureAgentIdentity2(workingDir);
|
|
10079
10096
|
try {
|
|
10080
|
-
const { runInteractiveInk } = await import("./run-interactive-ink-
|
|
10097
|
+
const { runInteractiveInk } = await import("./run-interactive-ink-IEB4MZ2C.js");
|
|
10081
10098
|
await runInteractiveInk({
|
|
10082
10099
|
harness,
|
|
10083
10100
|
params,
|
package/dist/cli.js
CHANGED
package/dist/index.js
CHANGED
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@poncho-ai/cli",
|
|
3
|
-
"version": "0.24.
|
|
3
|
+
"version": "0.24.1",
|
|
4
4
|
"description": "CLI for building and deploying AI agents",
|
|
5
5
|
"repository": {
|
|
6
6
|
"type": "git",
|
|
@@ -27,9 +27,9 @@
|
|
|
27
27
|
"react": "^19.2.4",
|
|
28
28
|
"react-devtools-core": "^6.1.5",
|
|
29
29
|
"yaml": "^2.8.1",
|
|
30
|
-
"@poncho-ai/harness": "0.22.0",
|
|
31
30
|
"@poncho-ai/messaging": "0.5.1",
|
|
32
|
-
"@poncho-ai/sdk": "1.5.0"
|
|
31
|
+
"@poncho-ai/sdk": "1.5.0",
|
|
32
|
+
"@poncho-ai/harness": "0.22.1"
|
|
33
33
|
},
|
|
34
34
|
"devDependencies": {
|
|
35
35
|
"@types/busboy": "^1.5.4",
|
package/src/index.ts
CHANGED
|
@@ -3526,6 +3526,7 @@ export const createRequestHandler = async (options?: {
|
|
|
3526
3526
|
"Content-Type": "text/event-stream",
|
|
3527
3527
|
"Cache-Control": "no-cache",
|
|
3528
3528
|
Connection: "keep-alive",
|
|
3529
|
+
"X-Accel-Buffering": "no",
|
|
3529
3530
|
});
|
|
3530
3531
|
const historyMessages = [...conversation.messages];
|
|
3531
3532
|
const preRunMessages = [...conversation.messages];
|
|
@@ -3584,10 +3585,14 @@ export const createRequestHandler = async (options?: {
|
|
|
3584
3585
|
});
|
|
3585
3586
|
|
|
3586
3587
|
try {
|
|
3587
|
-
// Persist the user turn
|
|
3588
|
+
// Persist the user turn so refreshing mid-run keeps chat context.
|
|
3589
|
+
// Fire-and-forget: the write chain in the store serializes file ops,
|
|
3590
|
+
// and persistDraftAssistantTurn won't run until LLM events arrive.
|
|
3588
3591
|
conversation.messages = [...historyMessages, { role: "user", content: userContent }];
|
|
3589
3592
|
conversation.updatedAt = Date.now();
|
|
3590
|
-
|
|
3593
|
+
conversationStore.update(conversation).catch((err) => {
|
|
3594
|
+
console.error("[poncho] Failed to persist user turn:", err);
|
|
3595
|
+
});
|
|
3591
3596
|
|
|
3592
3597
|
const persistDraftAssistantTurn = async (): Promise<void> => {
|
|
3593
3598
|
const draftSections: Array<{ type: "text" | "tools"; content: string | string[] }> = [
|
|
@@ -3626,27 +3631,45 @@ export const createRequestHandler = async (options?: {
|
|
|
3626
3631
|
await conversationStore.update(conversation);
|
|
3627
3632
|
};
|
|
3628
3633
|
|
|
3629
|
-
|
|
3630
|
-
|
|
3631
|
-
|
|
3632
|
-
.
|
|
3633
|
-
|
|
3634
|
-
|
|
3635
|
-
|
|
3636
|
-
|
|
3637
|
-
.slice(
|
|
3638
|
-
|
|
3639
|
-
.
|
|
3640
|
-
|
|
3641
|
-
}
|
|
3642
|
-
|
|
3634
|
+
let cachedRecallCorpus: unknown[] | undefined;
|
|
3635
|
+
const lazyRecallCorpus = async () => {
|
|
3636
|
+
if (cachedRecallCorpus) return cachedRecallCorpus;
|
|
3637
|
+
const _rc0 = performance.now();
|
|
3638
|
+
let recallConversations: Conversation[];
|
|
3639
|
+
if (typeof conversationStore.listSummaries === "function") {
|
|
3640
|
+
const recallSummaries = (await conversationStore.listSummaries(ownerId))
|
|
3641
|
+
.filter((s) => s.conversationId !== conversationId && !s.parentConversationId)
|
|
3642
|
+
.slice(0, 20);
|
|
3643
|
+
recallConversations = (
|
|
3644
|
+
await Promise.all(recallSummaries.map((s) => conversationStore.get(s.conversationId)))
|
|
3645
|
+
).filter((c): c is NonNullable<typeof c> => c != null);
|
|
3646
|
+
} else {
|
|
3647
|
+
recallConversations = (await conversationStore.list(ownerId))
|
|
3648
|
+
.filter((item) => item.conversationId !== conversationId && !item.parentConversationId)
|
|
3649
|
+
.slice(0, 20);
|
|
3650
|
+
}
|
|
3651
|
+
cachedRecallCorpus = recallConversations
|
|
3652
|
+
.map((item) => ({
|
|
3653
|
+
conversationId: item.conversationId,
|
|
3654
|
+
title: item.title,
|
|
3655
|
+
updatedAt: item.updatedAt,
|
|
3656
|
+
content: item.messages
|
|
3657
|
+
.slice(-6)
|
|
3658
|
+
.map((message) => `${message.role}: ${typeof message.content === "string" ? message.content : getTextContent(message)}`)
|
|
3659
|
+
.join("\n")
|
|
3660
|
+
.slice(0, 2000),
|
|
3661
|
+
}))
|
|
3662
|
+
.filter((item) => item.content.length > 0);
|
|
3663
|
+
console.info(`[poncho] recall corpus fetched lazily (${cachedRecallCorpus.length} items, ${(performance.now() - _rc0).toFixed(1)}ms)`);
|
|
3664
|
+
return cachedRecallCorpus;
|
|
3665
|
+
};
|
|
3643
3666
|
|
|
3644
3667
|
for await (const event of harness.runWithTelemetry({
|
|
3645
3668
|
task: messageText,
|
|
3646
3669
|
conversationId,
|
|
3647
3670
|
parameters: {
|
|
3648
3671
|
...(bodyParameters ?? {}),
|
|
3649
|
-
__conversationRecallCorpus:
|
|
3672
|
+
__conversationRecallCorpus: lazyRecallCorpus,
|
|
3650
3673
|
__activeConversationId: conversationId,
|
|
3651
3674
|
__ownerId: ownerId,
|
|
3652
3675
|
},
|
|
@@ -3675,7 +3698,6 @@ export const createRequestHandler = async (options?: {
|
|
|
3675
3698
|
runCancelled = true;
|
|
3676
3699
|
}
|
|
3677
3700
|
if (event.type === "model:chunk") {
|
|
3678
|
-
// If we have tools accumulated and text starts again, push tools as a section
|
|
3679
3701
|
if (currentTools.length > 0) {
|
|
3680
3702
|
sections.push({ type: "tools", content: currentTools });
|
|
3681
3703
|
currentTools = [];
|