kimaki 0.4.76 → 0.4.78
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/adapter-rest-boundary.test.js +34 -0
- package/dist/agent-model.e2e.test.js +2 -20
- package/dist/cli.js +50 -13
- package/dist/commands/channel-ref.js +16 -0
- package/dist/commands/diff.js +20 -85
- package/dist/commands/merge-worktree.js +5 -17
- package/dist/commands/new-worktree.js +5 -9
- package/dist/commands/permissions.js +77 -11
- package/dist/commands/resume.js +5 -9
- package/dist/commands/screenshare.js +295 -0
- package/dist/commands/session.js +6 -17
- package/dist/critique-utils.js +95 -0
- package/dist/diff-patch-plugin.js +314 -0
- package/dist/discord-bot.js +19 -14
- package/dist/discord-js-import-boundary.test.js +62 -0
- package/dist/discord-utils.js +44 -0
- package/dist/event-stream-real-capture.e2e.test.js +2 -20
- package/dist/gateway-proxy.e2e.test.js +2 -5
- package/dist/generated/cloudflare/browser.js +17 -0
- package/dist/generated/cloudflare/client.js +34 -0
- package/dist/generated/cloudflare/commonInputTypes.js +10 -0
- package/dist/generated/cloudflare/enums.js +48 -0
- package/dist/generated/cloudflare/internal/class.js +47 -0
- package/dist/generated/cloudflare/internal/prismaNamespace.js +252 -0
- package/dist/generated/cloudflare/internal/prismaNamespaceBrowser.js +222 -0
- package/dist/generated/cloudflare/internal/query_compiler_fast_bg.js +135 -0
- package/dist/generated/cloudflare/models/bot_api_keys.js +1 -0
- package/dist/generated/cloudflare/models/bot_tokens.js +1 -0
- package/dist/generated/cloudflare/models/channel_agents.js +1 -0
- package/dist/generated/cloudflare/models/channel_directories.js +1 -0
- package/dist/generated/cloudflare/models/channel_mention_mode.js +1 -0
- package/dist/generated/cloudflare/models/channel_models.js +1 -0
- package/dist/generated/cloudflare/models/channel_verbosity.js +1 -0
- package/dist/generated/cloudflare/models/channel_worktrees.js +1 -0
- package/dist/generated/cloudflare/models/forum_sync_configs.js +1 -0
- package/dist/generated/cloudflare/models/global_models.js +1 -0
- package/dist/generated/cloudflare/models/ipc_requests.js +1 -0
- package/dist/generated/cloudflare/models/part_messages.js +1 -0
- package/dist/generated/cloudflare/models/scheduled_tasks.js +1 -0
- package/dist/generated/cloudflare/models/session_agents.js +1 -0
- package/dist/generated/cloudflare/models/session_events.js +1 -0
- package/dist/generated/cloudflare/models/session_models.js +1 -0
- package/dist/generated/cloudflare/models/session_start_sources.js +1 -0
- package/dist/generated/cloudflare/models/thread_sessions.js +1 -0
- package/dist/generated/cloudflare/models/thread_worktrees.js +1 -0
- package/dist/generated/cloudflare/models.js +1 -0
- package/dist/generated/node/browser.js +17 -0
- package/dist/generated/node/client.js +37 -0
- package/dist/generated/node/commonInputTypes.js +10 -0
- package/dist/generated/node/enums.js +48 -0
- package/dist/generated/node/internal/class.js +49 -0
- package/dist/generated/node/internal/prismaNamespace.js +252 -0
- package/dist/generated/node/internal/prismaNamespaceBrowser.js +222 -0
- package/dist/generated/node/models/bot_api_keys.js +1 -0
- package/dist/generated/node/models/bot_tokens.js +1 -0
- package/dist/generated/node/models/channel_agents.js +1 -0
- package/dist/generated/node/models/channel_directories.js +1 -0
- package/dist/generated/node/models/channel_mention_mode.js +1 -0
- package/dist/generated/node/models/channel_models.js +1 -0
- package/dist/generated/node/models/channel_verbosity.js +1 -0
- package/dist/generated/node/models/channel_worktrees.js +1 -0
- package/dist/generated/node/models/forum_sync_configs.js +1 -0
- package/dist/generated/node/models/global_models.js +1 -0
- package/dist/generated/node/models/ipc_requests.js +1 -0
- package/dist/generated/node/models/part_messages.js +1 -0
- package/dist/generated/node/models/scheduled_tasks.js +1 -0
- package/dist/generated/node/models/session_agents.js +1 -0
- package/dist/generated/node/models/session_events.js +1 -0
- package/dist/generated/node/models/session_models.js +1 -0
- package/dist/generated/node/models/session_start_sources.js +1 -0
- package/dist/generated/node/models/thread_sessions.js +1 -0
- package/dist/generated/node/models/thread_worktrees.js +1 -0
- package/dist/generated/node/models.js +1 -0
- package/dist/interaction-handler.js +10 -0
- package/dist/kimaki-digital-twin.e2e.test.js +2 -20
- package/dist/message-flags-boundary.test.js +54 -0
- package/dist/message-formatting.js +3 -62
- package/dist/onboarding-tutorial-plugin.js +1 -1
- package/dist/opencode-command.js +129 -0
- package/dist/opencode-command.test.js +48 -0
- package/dist/opencode-interrupt-plugin.js +19 -1
- package/dist/opencode-interrupt-plugin.test.js +0 -5
- package/dist/opencode-plugin-loading.e2e.test.js +9 -20
- package/dist/opencode-plugin.js +4 -4
- package/dist/opencode.js +150 -27
- package/dist/patch-text-parser.js +97 -0
- package/dist/platform/components-v2.js +20 -0
- package/dist/platform/discord-adapter.js +1440 -0
- package/dist/platform/discord-routes.js +31 -0
- package/dist/platform/message-flags.js +8 -0
- package/dist/platform/platform-value.js +41 -0
- package/dist/platform/slack-adapter.js +872 -0
- package/dist/platform/slack-markdown.js +169 -0
- package/dist/platform/types.js +4 -0
- package/dist/queue-advanced-e2e-setup.js +265 -0
- package/dist/queue-advanced-footer.e2e.test.js +173 -0
- package/dist/queue-advanced-model-switch.e2e.test.js +299 -0
- package/dist/queue-advanced-permissions-typing.e2e.test.js +73 -1
- package/dist/runtime-lifecycle.e2e.test.js +2 -20
- package/dist/session-handler/event-stream-state.js +5 -0
- package/dist/session-handler/event-stream-state.test.js +6 -2
- package/dist/session-handler/thread-session-runtime.js +32 -2
- package/dist/system-message.js +26 -23
- package/dist/test-utils.js +16 -0
- package/dist/thread-message-queue.e2e.test.js +2 -20
- package/dist/utils.js +3 -1
- package/dist/voice-message.e2e.test.js +2 -20
- package/dist/voice.js +122 -9
- package/dist/voice.test.js +17 -2
- package/dist/websockify.js +69 -0
- package/dist/worktree-lifecycle.e2e.test.js +308 -0
- package/package.json +4 -2
- package/skills/critique/SKILL.md +17 -0
- package/skills/egaki/SKILL.md +35 -0
- package/skills/event-sourcing-state/SKILL.md +252 -0
- package/skills/goke/SKILL.md +1 -0
- package/skills/npm-package/SKILL.md +21 -2
- package/skills/playwriter/SKILL.md +1 -1
- package/skills/x-articles/SKILL.md +554 -0
- package/src/agent-model.e2e.test.ts +4 -19
- package/src/cli.ts +60 -13
- package/src/commands/diff.ts +25 -99
- package/src/commands/merge-worktree.ts +5 -21
- package/src/commands/new-worktree.ts +5 -11
- package/src/commands/permissions.ts +100 -15
- package/src/commands/resume.ts +5 -12
- package/src/commands/screenshare.ts +354 -0
- package/src/commands/session.ts +6 -23
- package/src/critique-utils.ts +139 -0
- package/src/discord-bot.ts +20 -15
- package/src/discord-utils.ts +53 -0
- package/src/event-stream-real-capture.e2e.test.ts +4 -20
- package/src/gateway-proxy.e2e.test.ts +2 -5
- package/src/interaction-handler.ts +15 -0
- package/src/kimaki-digital-twin.e2e.test.ts +2 -21
- package/src/message-formatting.ts +3 -68
- package/src/onboarding-tutorial-plugin.ts +1 -1
- package/src/opencode-command.test.ts +70 -0
- package/src/opencode-command.ts +188 -0
- package/src/opencode-interrupt-plugin.test.ts +0 -5
- package/src/opencode-interrupt-plugin.ts +34 -1
- package/src/opencode-plugin-loading.e2e.test.ts +25 -35
- package/src/opencode-plugin.ts +5 -4
- package/src/opencode.ts +199 -32
- package/src/patch-text-parser.ts +107 -0
- package/src/queue-advanced-e2e-setup.ts +273 -0
- package/src/queue-advanced-footer.e2e.test.ts +211 -0
- package/src/queue-advanced-model-switch.e2e.test.ts +383 -0
- package/src/queue-advanced-permissions-typing.e2e.test.ts +92 -0
- package/src/runtime-lifecycle.e2e.test.ts +4 -19
- package/src/session-handler/event-stream-state.test.ts +6 -2
- package/src/session-handler/event-stream-state.ts +5 -0
- package/src/session-handler/thread-session-runtime.ts +45 -2
- package/src/system-message.ts +26 -23
- package/src/test-utils.ts +17 -0
- package/src/thread-message-queue.e2e.test.ts +2 -20
- package/src/utils.ts +3 -1
- package/src/voice-message.e2e.test.ts +3 -20
- package/src/voice.test.ts +26 -2
- package/src/voice.ts +147 -9
- package/src/websockify.ts +101 -0
- package/src/worktree-lifecycle.e2e.test.ts +391 -0
|
@@ -330,6 +330,271 @@ export function createDeterministicMatchers(): DeterministicMatcher[] {
|
|
|
330
330
|
},
|
|
331
331
|
}
|
|
332
332
|
|
|
333
|
+
// Model responds with text + tool call, then after tool result the
|
|
334
|
+
// follow-up matcher responds with text. This creates two assistant messages:
|
|
335
|
+
// first with finish="tool-calls" + completed, second with finish="stop".
|
|
336
|
+
// Reproduces the bug where the first message gets no footer even though
|
|
337
|
+
// it completed normally (isAssistantMessageNaturalCompletion rejects
|
|
338
|
+
// finish="tool-calls").
|
|
339
|
+
const toolCallFooterMatcher: DeterministicMatcher = {
|
|
340
|
+
id: 'tool-call-footer',
|
|
341
|
+
priority: 108,
|
|
342
|
+
when: {
|
|
343
|
+
lastMessageRole: 'user',
|
|
344
|
+
latestUserTextIncludes: 'TOOL_CALL_FOOTER_MARKER',
|
|
345
|
+
},
|
|
346
|
+
then: {
|
|
347
|
+
parts: [
|
|
348
|
+
{ type: 'stream-start', warnings: [] },
|
|
349
|
+
{ type: 'text-start', id: 'tool-call-footer-text' },
|
|
350
|
+
{ type: 'text-delta', id: 'tool-call-footer-text', delta: 'running tool' },
|
|
351
|
+
{ type: 'text-end', id: 'tool-call-footer-text' },
|
|
352
|
+
{
|
|
353
|
+
type: 'tool-call',
|
|
354
|
+
toolCallId: 'tool-call-footer-bash',
|
|
355
|
+
toolName: 'bash',
|
|
356
|
+
input: JSON.stringify({
|
|
357
|
+
command: 'echo tool-call-footer-test',
|
|
358
|
+
description: 'Echo for footer test',
|
|
359
|
+
}),
|
|
360
|
+
},
|
|
361
|
+
{
|
|
362
|
+
type: 'finish',
|
|
363
|
+
finishReason: 'tool-calls',
|
|
364
|
+
usage: { inputTokens: 1, outputTokens: 1, totalTokens: 2 },
|
|
365
|
+
},
|
|
366
|
+
],
|
|
367
|
+
},
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
const toolCallFooterFollowupMatcher: DeterministicMatcher = {
|
|
371
|
+
id: 'tool-call-footer-followup',
|
|
372
|
+
priority: 109,
|
|
373
|
+
when: {
|
|
374
|
+
lastMessageRole: 'tool',
|
|
375
|
+
latestUserTextIncludes: 'TOOL_CALL_FOOTER_MARKER',
|
|
376
|
+
},
|
|
377
|
+
then: {
|
|
378
|
+
parts: [
|
|
379
|
+
{ type: 'stream-start', warnings: [] },
|
|
380
|
+
{ type: 'text-start', id: 'tool-call-footer-followup' },
|
|
381
|
+
{ type: 'text-delta', id: 'tool-call-footer-followup', delta: 'tool call completed' },
|
|
382
|
+
{ type: 'text-end', id: 'tool-call-footer-followup' },
|
|
383
|
+
{
|
|
384
|
+
type: 'finish',
|
|
385
|
+
finishReason: 'stop',
|
|
386
|
+
usage: { inputTokens: 1, outputTokens: 1, totalTokens: 2 },
|
|
387
|
+
},
|
|
388
|
+
],
|
|
389
|
+
},
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
// Multi-step tool chain: model emits text + 3 parallel tool calls in one
|
|
393
|
+
// response (finish="tool-calls"). All tools complete, then the follow-up
|
|
394
|
+
// matcher responds with final text (finish="stop"). This creates 2 assistant
|
|
395
|
+
// messages — one with finish="tool-calls" + completed, one with finish="stop".
|
|
396
|
+
// With the naive fix (allowing tool-calls as natural completion), we'd get
|
|
397
|
+
// 2 footers. Only the final text response should get a footer.
|
|
398
|
+
const multiToolMatcher: DeterministicMatcher = {
|
|
399
|
+
id: 'multi-tool',
|
|
400
|
+
priority: 115,
|
|
401
|
+
when: {
|
|
402
|
+
lastMessageRole: 'user',
|
|
403
|
+
latestUserTextIncludes: 'MULTI_TOOL_FOOTER_MARKER',
|
|
404
|
+
},
|
|
405
|
+
then: {
|
|
406
|
+
parts: [
|
|
407
|
+
{ type: 'stream-start', warnings: [] },
|
|
408
|
+
{ type: 'text-start', id: 'multi-tool-text' },
|
|
409
|
+
{ type: 'text-delta', id: 'multi-tool-text', delta: 'investigating the issue' },
|
|
410
|
+
{ type: 'text-end', id: 'multi-tool-text' },
|
|
411
|
+
{
|
|
412
|
+
type: 'tool-call',
|
|
413
|
+
toolCallId: 'multi-tool-bash-1',
|
|
414
|
+
toolName: 'bash',
|
|
415
|
+
input: JSON.stringify({
|
|
416
|
+
command: 'echo search-done',
|
|
417
|
+
description: 'Search codebase',
|
|
418
|
+
}),
|
|
419
|
+
},
|
|
420
|
+
{
|
|
421
|
+
type: 'tool-call',
|
|
422
|
+
toolCallId: 'multi-tool-bash-2',
|
|
423
|
+
toolName: 'bash',
|
|
424
|
+
input: JSON.stringify({
|
|
425
|
+
command: 'echo read-done',
|
|
426
|
+
description: 'Read config file',
|
|
427
|
+
}),
|
|
428
|
+
},
|
|
429
|
+
{
|
|
430
|
+
type: 'tool-call',
|
|
431
|
+
toolCallId: 'multi-tool-bash-3',
|
|
432
|
+
toolName: 'bash',
|
|
433
|
+
input: JSON.stringify({
|
|
434
|
+
command: 'echo fix-done',
|
|
435
|
+
description: 'Apply fix',
|
|
436
|
+
}),
|
|
437
|
+
},
|
|
438
|
+
{
|
|
439
|
+
type: 'finish',
|
|
440
|
+
finishReason: 'tool-calls',
|
|
441
|
+
usage: { inputTokens: 10, outputTokens: 15, totalTokens: 25 },
|
|
442
|
+
},
|
|
443
|
+
],
|
|
444
|
+
},
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
const multiToolFollowupMatcher: DeterministicMatcher = {
|
|
448
|
+
id: 'multi-tool-followup',
|
|
449
|
+
priority: 114,
|
|
450
|
+
when: {
|
|
451
|
+
latestUserTextIncludes: 'MULTI_TOOL_FOOTER_MARKER',
|
|
452
|
+
rawPromptIncludes: 'investigating the issue',
|
|
453
|
+
},
|
|
454
|
+
then: {
|
|
455
|
+
parts: [
|
|
456
|
+
{ type: 'stream-start', warnings: [] },
|
|
457
|
+
{ type: 'text-start', id: 'multi-tool-followup-text' },
|
|
458
|
+
{ type: 'text-delta', id: 'multi-tool-followup-text', delta: 'all done, fixed 3 files' },
|
|
459
|
+
{ type: 'text-end', id: 'multi-tool-followup-text' },
|
|
460
|
+
{
|
|
461
|
+
type: 'finish',
|
|
462
|
+
finishReason: 'stop',
|
|
463
|
+
usage: { inputTokens: 30, outputTokens: 10, totalTokens: 40 },
|
|
464
|
+
},
|
|
465
|
+
],
|
|
466
|
+
},
|
|
467
|
+
}
|
|
468
|
+
|
|
469
|
+
// Multi-step sequential tool chain: 3 separate tool-call steps (each a
|
|
470
|
+
// separate assistant message with finish="tool-calls"), then a final text
|
|
471
|
+
// response. This creates 4 assistant messages total. Without proper
|
|
472
|
+
// deferred footer logic, each tool-call step would emit its own footer,
|
|
473
|
+
// producing 3 spurious footers before the real one.
|
|
474
|
+
//
|
|
475
|
+
// Flow: user → step1 (text + tool-call) → tool result →
|
|
476
|
+
// step2 (text + tool-call) → tool result →
|
|
477
|
+
// step3 (text + tool-call) → tool result →
|
|
478
|
+
// final text (finish="stop")
|
|
479
|
+
//
|
|
480
|
+
// Matcher priority ensures each step fires in order: the highest-priority
|
|
481
|
+
// matcher that matches wins, and each step's rawPromptIncludes check only
|
|
482
|
+
// matches once the previous step's output text is in the conversation.
|
|
483
|
+
const multiStepChainInitMatcher: DeterministicMatcher = {
|
|
484
|
+
id: 'multi-step-chain-init',
|
|
485
|
+
priority: 119,
|
|
486
|
+
when: {
|
|
487
|
+
lastMessageRole: 'user',
|
|
488
|
+
latestUserTextIncludes: 'MULTI_STEP_CHAIN_MARKER',
|
|
489
|
+
},
|
|
490
|
+
then: {
|
|
491
|
+
parts: [
|
|
492
|
+
{ type: 'stream-start', warnings: [] },
|
|
493
|
+
{ type: 'text-start', id: 'chain-step1-text' },
|
|
494
|
+
{ type: 'text-delta', id: 'chain-step1-text', delta: 'chain step 1: reading config' },
|
|
495
|
+
{ type: 'text-end', id: 'chain-step1-text' },
|
|
496
|
+
{
|
|
497
|
+
type: 'tool-call',
|
|
498
|
+
toolCallId: 'chain-step1-bash',
|
|
499
|
+
toolName: 'bash',
|
|
500
|
+
input: JSON.stringify({
|
|
501
|
+
command: 'echo chain-step-1-output',
|
|
502
|
+
description: 'Read config',
|
|
503
|
+
}),
|
|
504
|
+
},
|
|
505
|
+
{
|
|
506
|
+
type: 'finish',
|
|
507
|
+
finishReason: 'tool-calls',
|
|
508
|
+
usage: { inputTokens: 5, outputTokens: 10, totalTokens: 15 },
|
|
509
|
+
},
|
|
510
|
+
],
|
|
511
|
+
},
|
|
512
|
+
}
|
|
513
|
+
|
|
514
|
+
const multiStepChainStep2Matcher: DeterministicMatcher = {
|
|
515
|
+
id: 'multi-step-chain-step2',
|
|
516
|
+
priority: 120,
|
|
517
|
+
when: {
|
|
518
|
+
latestUserTextIncludes: 'MULTI_STEP_CHAIN_MARKER',
|
|
519
|
+
rawPromptIncludes: 'chain step 1: reading config',
|
|
520
|
+
},
|
|
521
|
+
then: {
|
|
522
|
+
parts: [
|
|
523
|
+
{ type: 'stream-start', warnings: [] },
|
|
524
|
+
{ type: 'text-start', id: 'chain-step2-text' },
|
|
525
|
+
{ type: 'text-delta', id: 'chain-step2-text', delta: 'chain step 2: analyzing results' },
|
|
526
|
+
{ type: 'text-end', id: 'chain-step2-text' },
|
|
527
|
+
{
|
|
528
|
+
type: 'tool-call',
|
|
529
|
+
toolCallId: 'chain-step2-bash',
|
|
530
|
+
toolName: 'bash',
|
|
531
|
+
input: JSON.stringify({
|
|
532
|
+
command: 'echo chain-step-2-output',
|
|
533
|
+
description: 'Analyze results',
|
|
534
|
+
}),
|
|
535
|
+
},
|
|
536
|
+
{
|
|
537
|
+
type: 'finish',
|
|
538
|
+
finishReason: 'tool-calls',
|
|
539
|
+
usage: { inputTokens: 15, outputTokens: 10, totalTokens: 25 },
|
|
540
|
+
},
|
|
541
|
+
],
|
|
542
|
+
},
|
|
543
|
+
}
|
|
544
|
+
|
|
545
|
+
const multiStepChainStep3Matcher: DeterministicMatcher = {
|
|
546
|
+
id: 'multi-step-chain-step3',
|
|
547
|
+
priority: 121,
|
|
548
|
+
when: {
|
|
549
|
+
latestUserTextIncludes: 'MULTI_STEP_CHAIN_MARKER',
|
|
550
|
+
rawPromptIncludes: 'chain step 2: analyzing results',
|
|
551
|
+
},
|
|
552
|
+
then: {
|
|
553
|
+
parts: [
|
|
554
|
+
{ type: 'stream-start', warnings: [] },
|
|
555
|
+
{ type: 'text-start', id: 'chain-step3-text' },
|
|
556
|
+
{ type: 'text-delta', id: 'chain-step3-text', delta: 'chain step 3: applying fix' },
|
|
557
|
+
{ type: 'text-end', id: 'chain-step3-text' },
|
|
558
|
+
{
|
|
559
|
+
type: 'tool-call',
|
|
560
|
+
toolCallId: 'chain-step3-bash',
|
|
561
|
+
toolName: 'bash',
|
|
562
|
+
input: JSON.stringify({
|
|
563
|
+
command: 'echo chain-step-3-output',
|
|
564
|
+
description: 'Apply fix',
|
|
565
|
+
}),
|
|
566
|
+
},
|
|
567
|
+
{
|
|
568
|
+
type: 'finish',
|
|
569
|
+
finishReason: 'tool-calls',
|
|
570
|
+
usage: { inputTokens: 25, outputTokens: 10, totalTokens: 35 },
|
|
571
|
+
},
|
|
572
|
+
],
|
|
573
|
+
},
|
|
574
|
+
}
|
|
575
|
+
|
|
576
|
+
const multiStepChainFinalMatcher: DeterministicMatcher = {
|
|
577
|
+
id: 'multi-step-chain-final',
|
|
578
|
+
priority: 122,
|
|
579
|
+
when: {
|
|
580
|
+
latestUserTextIncludes: 'MULTI_STEP_CHAIN_MARKER',
|
|
581
|
+
rawPromptIncludes: 'chain step 3: applying fix',
|
|
582
|
+
},
|
|
583
|
+
then: {
|
|
584
|
+
parts: [
|
|
585
|
+
{ type: 'stream-start', warnings: [] },
|
|
586
|
+
{ type: 'text-start', id: 'chain-final-text' },
|
|
587
|
+
{ type: 'text-delta', id: 'chain-final-text', delta: 'chain complete: all 3 steps done' },
|
|
588
|
+
{ type: 'text-end', id: 'chain-final-text' },
|
|
589
|
+
{
|
|
590
|
+
type: 'finish',
|
|
591
|
+
finishReason: 'stop',
|
|
592
|
+
usage: { inputTokens: 35, outputTokens: 5, totalTokens: 40 },
|
|
593
|
+
},
|
|
594
|
+
],
|
|
595
|
+
},
|
|
596
|
+
}
|
|
597
|
+
|
|
333
598
|
return [
|
|
334
599
|
slowAbortMatcher,
|
|
335
600
|
typingRepulseMatcher,
|
|
@@ -338,7 +603,15 @@ export function createDeterministicMatchers(): DeterministicMatcher[] {
|
|
|
338
603
|
questionToolMatcher,
|
|
339
604
|
permissionTypingMatcher,
|
|
340
605
|
permissionTypingFollowupMatcher,
|
|
606
|
+
multiToolMatcher,
|
|
607
|
+
multiToolFollowupMatcher,
|
|
608
|
+
multiStepChainInitMatcher,
|
|
609
|
+
multiStepChainStep2Matcher,
|
|
610
|
+
multiStepChainStep3Matcher,
|
|
611
|
+
multiStepChainFinalMatcher,
|
|
341
612
|
raceFinalReplyMatcher,
|
|
613
|
+
toolCallFooterMatcher,
|
|
614
|
+
toolCallFooterFollowupMatcher,
|
|
342
615
|
toolFollowupMatcher,
|
|
343
616
|
userReplyMatcher,
|
|
344
617
|
]
|
|
@@ -362,4 +362,215 @@ e2eTest('queue advanced: footer emission', () => {
|
|
|
362
362
|
},
|
|
363
363
|
15_000,
|
|
364
364
|
)
|
|
365
|
+
|
|
366
|
+
test(
|
|
367
|
+
'tool-call assistant message gets footer when it completes normally',
|
|
368
|
+
async () => {
|
|
369
|
+
// Reproduces the bug: model responds with text + tool call,
|
|
370
|
+
// finish="tool-calls", message gets completed timestamp. Then the tool
|
|
371
|
+
// result triggers a follow-up text response in a second assistant message.
|
|
372
|
+
// The second message gets a footer, but the first (tool-call) message
|
|
373
|
+
// should ALSO get a footer since it completed normally.
|
|
374
|
+
// This matches the real-world scenario where an agent calls a bash tool
|
|
375
|
+
// (e.g. `kimaki send`) and then follows up with a summary text.
|
|
376
|
+
await ctx.discord.channel(TEXT_CHANNEL_ID).user(TEST_USER_ID).sendMessage({
|
|
377
|
+
content: 'TOOL_CALL_FOOTER_MARKER',
|
|
378
|
+
})
|
|
379
|
+
|
|
380
|
+
const thread = await ctx.discord.channel(TEXT_CHANNEL_ID).waitForThread({
|
|
381
|
+
timeout: 4_000,
|
|
382
|
+
predicate: (t) => {
|
|
383
|
+
return t.name === 'TOOL_CALL_FOOTER_MARKER'
|
|
384
|
+
},
|
|
385
|
+
})
|
|
386
|
+
|
|
387
|
+
const th = ctx.discord.thread(thread.id)
|
|
388
|
+
|
|
389
|
+
// Wait for the follow-up text response after tool completion.
|
|
390
|
+
// The tool call completes and the model follows up with a second
|
|
391
|
+
// assistant message containing text.
|
|
392
|
+
await waitForBotReplyAfterUserMessage({
|
|
393
|
+
discord: ctx.discord,
|
|
394
|
+
threadId: thread.id,
|
|
395
|
+
userId: TEST_USER_ID,
|
|
396
|
+
userMessageIncludes: 'TOOL_CALL_FOOTER_MARKER',
|
|
397
|
+
timeout: 4_000,
|
|
398
|
+
})
|
|
399
|
+
|
|
400
|
+
// Wait for at least one footer to appear
|
|
401
|
+
await waitForFooterMessage({
|
|
402
|
+
discord: ctx.discord,
|
|
403
|
+
threadId: thread.id,
|
|
404
|
+
timeout: 4_000,
|
|
405
|
+
})
|
|
406
|
+
|
|
407
|
+
// Poll until both footers have arrived — the first footer (after the
|
|
408
|
+
// tool-call step) and the second (after the text follow-up) are emitted
|
|
409
|
+
// by sequential handleNaturalAssistantCompletion calls but the second
|
|
410
|
+
// may not have hit the Discord thread by the time we first check.
|
|
411
|
+
const deadline = Date.now() + 4_000
|
|
412
|
+
let footerCount = 0
|
|
413
|
+
while (Date.now() < deadline) {
|
|
414
|
+
const msgs = await th.getMessages()
|
|
415
|
+
footerCount = msgs.filter((m) => {
|
|
416
|
+
return m.author.id === ctx.discord.botUserId
|
|
417
|
+
&& m.content.startsWith('*')
|
|
418
|
+
&& m.content.includes('⋅')
|
|
419
|
+
}).length
|
|
420
|
+
if (footerCount >= 2) {
|
|
421
|
+
break
|
|
422
|
+
}
|
|
423
|
+
await new Promise((resolve) => {
|
|
424
|
+
setTimeout(resolve, 100)
|
|
425
|
+
})
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
expect(await th.text()).toMatchInlineSnapshot(`
|
|
429
|
+
"--- from: user (queue-advanced-tester)
|
|
430
|
+
TOOL_CALL_FOOTER_MARKER
|
|
431
|
+
--- from: assistant (TestBot)
|
|
432
|
+
⬥ running tool
|
|
433
|
+
⬥ ok
|
|
434
|
+
*project ⋅ main ⋅ Ns ⋅ N% ⋅ deterministic-v2*"
|
|
435
|
+
`)
|
|
436
|
+
|
|
437
|
+
// Only ONE footer at the end — the tool-call step's footer is NOT
|
|
438
|
+
// emitted mid-turn. The final text follow-up gets the footer.
|
|
439
|
+
expect(footerCount).toBe(1)
|
|
440
|
+
},
|
|
441
|
+
10_000,
|
|
442
|
+
)
|
|
443
|
+
|
|
444
|
+
test(
|
|
445
|
+
'multi-step tool chain should only have one footer at the end',
|
|
446
|
+
async () => {
|
|
447
|
+
// Model does 3 sequential tool calls (each a separate assistant message
|
|
448
|
+
// with finish="tool-calls") then a final text response. Only the final
|
|
449
|
+
// text response should get a footer — intermediate tool-call steps
|
|
450
|
+
// should NOT get footers since they're mid-turn work.
|
|
451
|
+
await ctx.discord.channel(TEXT_CHANNEL_ID).user(TEST_USER_ID).sendMessage({
|
|
452
|
+
content: 'MULTI_TOOL_FOOTER_MARKER',
|
|
453
|
+
})
|
|
454
|
+
|
|
455
|
+
const thread = await ctx.discord.channel(TEXT_CHANNEL_ID).waitForThread({
|
|
456
|
+
timeout: 4_000,
|
|
457
|
+
predicate: (t) => {
|
|
458
|
+
return t.name === 'MULTI_TOOL_FOOTER_MARKER'
|
|
459
|
+
},
|
|
460
|
+
})
|
|
461
|
+
|
|
462
|
+
const th = ctx.discord.thread(thread.id)
|
|
463
|
+
|
|
464
|
+
// Wait for the final text response after all 3 tool steps
|
|
465
|
+
await waitForBotMessageContaining({
|
|
466
|
+
discord: ctx.discord,
|
|
467
|
+
threadId: thread.id,
|
|
468
|
+
userId: TEST_USER_ID,
|
|
469
|
+
text: 'all done, fixed 3 files',
|
|
470
|
+
timeout: 4_000,
|
|
471
|
+
})
|
|
472
|
+
|
|
473
|
+
// Wait for the footer after the final response
|
|
474
|
+
await waitForFooterMessage({
|
|
475
|
+
discord: ctx.discord,
|
|
476
|
+
threadId: thread.id,
|
|
477
|
+
timeout: 4_000,
|
|
478
|
+
})
|
|
479
|
+
|
|
480
|
+
// Give any spurious extra footers time to arrive
|
|
481
|
+
await new Promise((resolve) => {
|
|
482
|
+
setTimeout(resolve, 500)
|
|
483
|
+
})
|
|
484
|
+
|
|
485
|
+
const messages = await th.getMessages()
|
|
486
|
+
const footerCount = messages.filter((m) => {
|
|
487
|
+
return m.author.id === ctx.discord.botUserId
|
|
488
|
+
&& m.content.startsWith('*')
|
|
489
|
+
&& m.content.includes('⋅')
|
|
490
|
+
}).length
|
|
491
|
+
|
|
492
|
+
expect(await th.text()).toMatchInlineSnapshot(`
|
|
493
|
+
"--- from: user (queue-advanced-tester)
|
|
494
|
+
MULTI_TOOL_FOOTER_MARKER
|
|
495
|
+
--- from: assistant (TestBot)
|
|
496
|
+
⬥ investigating the issue
|
|
497
|
+
⬥ all done, fixed 3 files
|
|
498
|
+
*project ⋅ main ⋅ Ns ⋅ N% ⋅ deterministic-v2*"
|
|
499
|
+
`)
|
|
500
|
+
|
|
501
|
+
// Only ONE footer should appear — after the final text response.
|
|
502
|
+
// Intermediate tool-call steps should NOT get footers.
|
|
503
|
+
expect(footerCount).toBe(1)
|
|
504
|
+
},
|
|
505
|
+
10_000,
|
|
506
|
+
)
|
|
507
|
+
|
|
508
|
+
test(
|
|
509
|
+
'3 sequential tool-call steps produce exactly 1 footer, not 3',
|
|
510
|
+
async () => {
|
|
511
|
+
// This is the most obvious reproduction of the multi-footer bug:
|
|
512
|
+
// the model runs 3 sequential tool-call steps (each a SEPARATE
|
|
513
|
+
// assistant message with finish="tool-calls"), then a final text.
|
|
514
|
+
// With a naive fix that treats tool-calls as natural completions,
|
|
515
|
+
// you'd see 4 footers (one per assistant message). Only the final
|
|
516
|
+
// text response should produce a footer.
|
|
517
|
+
await ctx.discord.channel(TEXT_CHANNEL_ID).user(TEST_USER_ID).sendMessage({
|
|
518
|
+
content: 'MULTI_STEP_CHAIN_MARKER',
|
|
519
|
+
})
|
|
520
|
+
|
|
521
|
+
const thread = await ctx.discord.channel(TEXT_CHANNEL_ID).waitForThread({
|
|
522
|
+
timeout: 4_000,
|
|
523
|
+
predicate: (t) => {
|
|
524
|
+
return t.name === 'MULTI_STEP_CHAIN_MARKER'
|
|
525
|
+
},
|
|
526
|
+
})
|
|
527
|
+
|
|
528
|
+
const th = ctx.discord.thread(thread.id)
|
|
529
|
+
|
|
530
|
+
// Wait for the final text after all 3 sequential tool steps
|
|
531
|
+
await waitForBotMessageContaining({
|
|
532
|
+
discord: ctx.discord,
|
|
533
|
+
threadId: thread.id,
|
|
534
|
+
userId: TEST_USER_ID,
|
|
535
|
+
text: 'chain complete: all 3 steps done',
|
|
536
|
+
timeout: 8_000,
|
|
537
|
+
})
|
|
538
|
+
|
|
539
|
+
// Wait for footer
|
|
540
|
+
await waitForFooterMessage({
|
|
541
|
+
discord: ctx.discord,
|
|
542
|
+
threadId: thread.id,
|
|
543
|
+
timeout: 4_000,
|
|
544
|
+
})
|
|
545
|
+
|
|
546
|
+
// Give any spurious extra footers time to arrive
|
|
547
|
+
await new Promise((resolve) => {
|
|
548
|
+
setTimeout(resolve, 500)
|
|
549
|
+
})
|
|
550
|
+
|
|
551
|
+
const messages = await th.getMessages()
|
|
552
|
+
const footerCount = messages.filter((m) => {
|
|
553
|
+
return m.author.id === ctx.discord.botUserId
|
|
554
|
+
&& m.content.startsWith('*')
|
|
555
|
+
&& m.content.includes('⋅')
|
|
556
|
+
}).length
|
|
557
|
+
|
|
558
|
+
expect(await th.text()).toMatchInlineSnapshot(`
|
|
559
|
+
"--- from: user (queue-advanced-tester)
|
|
560
|
+
MULTI_STEP_CHAIN_MARKER
|
|
561
|
+
--- from: assistant (TestBot)
|
|
562
|
+
⬥ chain step 1: reading config
|
|
563
|
+
⬥ chain step 2: analyzing results
|
|
564
|
+
⬥ chain step 3: applying fix
|
|
565
|
+
⬥ chain complete: all 3 steps done
|
|
566
|
+
*project ⋅ main ⋅ Ns ⋅ N% ⋅ deterministic-v2*"
|
|
567
|
+
`)
|
|
568
|
+
|
|
569
|
+
// The critical assertion: only 1 footer at the very end.
|
|
570
|
+
// With the naive "allow tool-calls as natural completion" fix,
|
|
571
|
+
// this would be 4 (one per assistant message). We want 1.
|
|
572
|
+
expect(footerCount).toBe(1)
|
|
573
|
+
},
|
|
574
|
+
15_000,
|
|
575
|
+
)
|
|
365
576
|
})
|