kimaki 0.10.2 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -45,7 +45,7 @@ import { handleScreenshareCommand, handleScreenshareStopCommand, } from './comma
45
45
  import { handleVscodeCommand } from './commands/vscode.js';
46
46
  import { handleModelVariantSelectMenu } from './commands/model.js';
47
47
  import { handleModelVariantCommand, handleVariantQuickSelectMenu, handleVariantScopeSelectMenu, } from './commands/model-variant.js';
48
- import { hasKimakiBotPermission } from './discord-utils.js';
48
+ import { hasKimakiAdminPermission, hasKimakiBotPermission } from './discord-utils.js';
49
49
  import { createLogger, LogPrefix } from './logger.js';
50
50
  import { notifyError } from './sentry.js';
51
51
  const interactionLogger = createLogger(LogPrefix.INTERACTION);
@@ -178,6 +178,13 @@ export function registerInteractionHandler({ discordClient, appId, }) {
178
178
  await handleUnsetModelCommand({ interaction, appId });
179
179
  return;
180
180
  case 'login':
181
+ if (!hasKimakiAdminPermission(interaction.member, interaction.guild)) {
182
+ await interaction.reply({
183
+ content: `Only server admins or users with the **Kimaki** role can configure login credentials.`,
184
+ flags: MessageFlags.Ephemeral,
185
+ });
186
+ return;
187
+ }
181
188
  await handleLoginCommand({ interaction, appId });
182
189
  return;
183
190
  case 'agent':
@@ -223,6 +230,13 @@ export function registerInteractionHandler({ discordClient, appId, }) {
223
230
  });
224
231
  return;
225
232
  case 'transcription-key':
233
+ if (!hasKimakiAdminPermission(interaction.member, interaction.guild)) {
234
+ await interaction.reply({
235
+ content: `Only server admins or users with the **Kimaki** role can configure API keys.`,
236
+ flags: MessageFlags.Ephemeral,
237
+ });
238
+ return;
239
+ }
226
240
  await handleTranscriptionApiKeyCommand({
227
241
  interaction,
228
242
  appId,
@@ -269,6 +283,13 @@ export function registerInteractionHandler({ discordClient, appId, }) {
269
283
  }
270
284
  const customId = interaction.customId;
271
285
  if (customId.startsWith('transcription_apikey:')) {
286
+ if (!hasKimakiAdminPermission(interaction.member, interaction.guild)) {
287
+ await interaction.reply({
288
+ content: `Only server admins or users with the **Kimaki** role can configure API keys.`,
289
+ flags: MessageFlags.Ephemeral,
290
+ });
291
+ return;
292
+ }
272
293
  await handleTranscriptionApiKeyButton(interaction);
273
294
  return;
274
295
  }
@@ -283,14 +304,35 @@ export function registerInteractionHandler({ discordClient, appId, }) {
283
304
  return;
284
305
  }
285
306
  if (customId.startsWith('login_text_btn:')) {
307
+ if (!hasKimakiAdminPermission(interaction.member, interaction.guild)) {
308
+ await interaction.reply({
309
+ content: `Only server admins or users with the **Kimaki** role can configure login credentials.`,
310
+ flags: MessageFlags.Ephemeral,
311
+ });
312
+ return;
313
+ }
286
314
  await handleLoginTextButton(interaction);
287
315
  return;
288
316
  }
289
317
  if (customId.startsWith('login_apikey_btn:')) {
318
+ if (!hasKimakiAdminPermission(interaction.member, interaction.guild)) {
319
+ await interaction.reply({
320
+ content: `Only server admins or users with the **Kimaki** role can configure login credentials.`,
321
+ flags: MessageFlags.Ephemeral,
322
+ });
323
+ return;
324
+ }
290
325
  await handleLoginApiKeyButton(interaction);
291
326
  return;
292
327
  }
293
328
  if (customId.startsWith('login_oauth_code_btn:')) {
329
+ if (!hasKimakiAdminPermission(interaction.member, interaction.guild)) {
330
+ await interaction.reply({
331
+ content: `Only server admins or users with the **Kimaki** role can configure login credentials.`,
332
+ flags: MessageFlags.Ephemeral,
333
+ });
334
+ return;
335
+ }
294
336
  await handleOAuthCodeButton(interaction);
295
337
  return;
296
338
  }
@@ -362,6 +404,13 @@ export function registerInteractionHandler({ discordClient, appId, }) {
362
404
  return;
363
405
  }
364
406
  if (customId.startsWith('login_select:')) {
407
+ if (!hasKimakiAdminPermission(interaction.member, interaction.guild)) {
408
+ await interaction.reply({
409
+ content: `Only server admins or users with the **Kimaki** role can configure login credentials.`,
410
+ flags: MessageFlags.Ephemeral,
411
+ });
412
+ return;
413
+ }
365
414
  await handleLoginSelect(interaction);
366
415
  return;
367
416
  }
@@ -377,18 +426,46 @@ export function registerInteractionHandler({ discordClient, appId, }) {
377
426
  }
378
427
  const customId = interaction.customId;
379
428
  if (customId.startsWith('login_apikey:')) {
429
+ if (!hasKimakiAdminPermission(interaction.member, interaction.guild)) {
430
+ await interaction.reply({
431
+ content: `Only server admins or users with the **Kimaki** role can configure credentials.`,
432
+ flags: MessageFlags.Ephemeral,
433
+ });
434
+ return;
435
+ }
380
436
  await handleApiKeyModalSubmit(interaction);
381
437
  return;
382
438
  }
383
439
  if (customId.startsWith('login_text:')) {
440
+ if (!hasKimakiAdminPermission(interaction.member, interaction.guild)) {
441
+ await interaction.reply({
442
+ content: `Only server admins or users with the **Kimaki** role can configure credentials.`,
443
+ flags: MessageFlags.Ephemeral,
444
+ });
445
+ return;
446
+ }
384
447
  await handleLoginTextModalSubmit(interaction);
385
448
  return;
386
449
  }
387
450
  if (customId.startsWith('login_oauth_code:')) {
451
+ if (!hasKimakiAdminPermission(interaction.member, interaction.guild)) {
452
+ await interaction.reply({
453
+ content: `Only server admins or users with the **Kimaki** role can configure credentials.`,
454
+ flags: MessageFlags.Ephemeral,
455
+ });
456
+ return;
457
+ }
388
458
  await handleOAuthCodeModalSubmit(interaction);
389
459
  return;
390
460
  }
391
461
  if (customId.startsWith('transcription_apikey_modal:')) {
462
+ if (!hasKimakiAdminPermission(interaction.member, interaction.guild)) {
463
+ await interaction.reply({
464
+ content: `Only server admins or users with the **Kimaki** role can configure credentials.`,
465
+ flags: MessageFlags.Ephemeral,
466
+ });
467
+ return;
468
+ }
392
469
  await handleTranscriptionApiKeyModalSubmit(interaction);
393
470
  return;
394
471
  }
@@ -1859,7 +1859,7 @@ export class ThreadSessionRuntime {
1859
1859
  await this.persistEventBufferDebounced.flush();
1860
1860
  return;
1861
1861
  }
1862
- const errorMessage = formatSessionErrorFromProps(properties.error);
1862
+ const errorMessage = truncateSessionErrorMessage(formatSessionErrorFromProps(properties.error));
1863
1863
  logger.error(`Sending error to thread: ${errorMessage}`);
1864
1864
  await sendThreadMessage(this.thread, `✗ opencode session error: ${errorMessage}`, { flags: NOTIFY_MESSAGE_FLAGS });
1865
1865
  await this.persistEventBufferDebounced.flush();
@@ -3541,3 +3541,10 @@ function formatSessionErrorFromProps(error) {
3541
3541
  }
3542
3542
  return parts.length > 0 ? parts.join(' ') : error.name || 'Unknown error';
3543
3543
  }
3544
+ function truncateSessionErrorMessage(message) {
3545
+ const maxLength = 400;
3546
+ if (message.length <= maxLength) {
3547
+ return message;
3548
+ }
3549
+ return `${message.slice(0, maxLength - 1)}…`;
3550
+ }
package/dist/store.js CHANGED
@@ -11,6 +11,7 @@ export const store = createStore(() => ({
11
11
  critiqueEnabled: true,
12
12
  enabledSkills: [],
13
13
  disabledSkills: [],
14
+ allowAllUsers: false,
14
15
  discordBaseUrl: 'https://discord.com',
15
16
  gatewayToken: null,
16
17
  registeredUserCommands: [],
@@ -317,6 +317,22 @@ To upload files to the Discord thread (images, screenshots, long files that woul
317
317
 
318
318
  kimaki upload-to-discord --session ${sessionId} <file1> [file2] ...
319
319
 
320
+ ## generating audio from text
321
+
322
+ When the user asks you to generate audio of some text so they can listen instead of reading, use \`kimaki tts\` to create a speech file and \`kimaki upload-to-discord\` to send it to the thread. Only use this when the user explicitly asks for audio.
323
+
324
+ \`\`\`bash
325
+ # generate audio from inline text
326
+ kimaki tts 'Your summary goes here' -o /tmp/summary.mp3
327
+ kimaki upload-to-discord --session ${sessionId} /tmp/summary.mp3
328
+
329
+ # generate audio from a file (pipe via stdin)
330
+ cat docs/explanation.md | kimaki tts -o /tmp/explanation.mp3
331
+ kimaki upload-to-discord --session ${sessionId} /tmp/explanation.mp3
332
+ \`\`\`
333
+
334
+ see --help for options like voice, speed, etc.
335
+
320
336
  ## requesting files from the user
321
337
 
322
338
  To ask the user to upload files from their device, use the \`kimaki_file_upload\` tool. This shows a native file picker dialog in Discord. The files are downloaded to the project's \`uploads/\` directory and the tool returns the local file paths.
@@ -9,10 +9,11 @@ import path from 'node:path';
9
9
  import { Transform } from 'node:stream';
10
10
  import * as prism from 'prism-media';
11
11
  import dedent from 'string-dedent';
12
- import { Events, ActionRowBuilder, ButtonBuilder, ButtonStyle, } from 'discord.js';
12
+ import { Events, } from 'discord.js';
13
13
  import { createGenAIWorker } from './genai-worker-wrapper.js';
14
14
  import { getVoiceChannelDirectory, getGeminiApiKey, getTranscriptionApiKey, findTextChannelByVoiceChannel, } from './database.js';
15
- import { sendThreadMessage, escapeDiscordFormatting, SILENT_MESSAGE_FLAGS, NOTIFY_MESSAGE_FLAGS, hasKimakiBotPermission, } from './discord-utils.js';
15
+ import { sendThreadMessage, escapeDiscordFormatting, NOTIFY_MESSAGE_FLAGS, hasKimakiBotPermission, } from './discord-utils.js';
16
+ import { showApiKeyRequiredButton } from './commands/gemini-apikey.js';
16
17
  import { transcribeAudio } from './voice.js';
17
18
  import { FetchError } from './errors.js';
18
19
  import { store } from './store.js';
@@ -420,15 +421,10 @@ export async function processVoiceAttachment({ message, thread, projectDirectory
420
421
  }
421
422
  if (!transcriptionApiKey) {
422
423
  if (appId) {
423
- const button = new ButtonBuilder()
424
- .setCustomId(`transcription_apikey:${appId}`)
425
- .setLabel('Set Transcription API Key')
426
- .setStyle(ButtonStyle.Primary);
427
- const row = new ActionRowBuilder().addComponents(button);
428
- await thread.send({
429
- content: 'Voice transcription requires an API key (OpenAI or Gemini). Set one to enable voice message transcription.',
430
- components: [row],
431
- flags: SILENT_MESSAGE_FLAGS,
424
+ await showApiKeyRequiredButton({
425
+ thread,
426
+ appId,
427
+ message: 'Voice transcription requires an API key (OpenAI or Gemini). Set one to enable voice message transcription.',
432
428
  });
433
429
  }
434
430
  else {
package/dist/voice.js CHANGED
@@ -13,7 +13,7 @@ import { Readable } from 'node:stream';
13
13
  import prism from 'prism-media';
14
14
  import * as errore from 'errore';
15
15
  import { createLogger, LogPrefix } from './logger.js';
16
- import { ApiKeyMissingError, InvalidAudioFormatError, TranscriptionError, EmptyTranscriptionError, NoResponseContentError, NoToolResponseError, } from './errors.js';
16
+ import { ApiKeyMissingError, InvalidAudioFormatError, TranscriptionError, EmptyTranscriptionError, NoResponseContentError, NoToolResponseError, SpeechGenerationError, } from './errors.js';
17
17
  const voiceLogger = createLogger(LogPrefix.VOICE);
18
18
  // OpenAI input_audio only supports wav and mp3. Other formats (OGG Opus, etc)
19
19
  // must be converted before sending.
@@ -456,3 +456,128 @@ Note: "critique" is a CLI tool for showing diffs in the browser.`;
456
456
  provider: resolvedProvider,
457
457
  });
458
458
  }
459
+ /** Default voices per provider. OpenAI uses short names, Google uses prebuilt voice names. */
460
+ const DEFAULT_VOICES = {
461
+ openai: 'alloy',
462
+ gemini: 'Kore',
463
+ };
464
+ /** Available OpenAI TTS models. gpt-4o-mini-tts supports instructions for style control. */
465
+ const OPENAI_TTS_MODEL = 'gpt-4o-mini-tts';
466
+ /** Gemini TTS model ID. Uses language model interface with AUDIO response modality. */
467
+ const GEMINI_TTS_MODEL = 'gemini-2.5-flash-preview-tts';
468
+ /**
469
+ * Create an OpenAI SpeechModelV3 for TTS.
470
+ * Uses gpt-4o-mini-tts which supports instructions for voice style control.
471
+ */
472
+ function createOpenAISpeechModel({ apiKey }) {
473
+ const openai = createOpenAI({ apiKey });
474
+ return openai.speech(OPENAI_TTS_MODEL);
475
+ }
476
+ /**
477
+ * Generate speech via OpenAI SpeechModelV3.
478
+ * Returns mp3 audio by default.
479
+ */
480
+ async function generateSpeechOpenAI({ text, voice, apiKey, instructions, speed, }) {
481
+ const model = createOpenAISpeechModel({ apiKey });
482
+ const response = await Promise.resolve(model.doGenerate({
483
+ text,
484
+ voice: voice || DEFAULT_VOICES.openai,
485
+ outputFormat: 'mp3',
486
+ instructions,
487
+ speed,
488
+ providerOptions: {
489
+ openai: {
490
+ ...(instructions ? { instructions } : {}),
491
+ ...(speed ? { speed } : {}),
492
+ },
493
+ },
494
+ })).catch((e) => new SpeechGenerationError({ reason: `OpenAI TTS API call failed: ${String(e)}`, cause: e }));
495
+ if (response instanceof Error)
496
+ return response;
497
+ const audioData = typeof response.audio === 'string'
498
+ ? Buffer.from(response.audio, 'base64')
499
+ : Buffer.from(response.audio);
500
+ if (audioData.length === 0) {
501
+ return new SpeechGenerationError({ reason: 'OpenAI TTS returned empty audio' });
502
+ }
503
+ return { audio: audioData, mediaType: 'audio/mp3' };
504
+ }
505
+ /**
506
+ * Generate speech via Google Gemini TTS model.
507
+ * Uses the language model interface with responseModalities: ['AUDIO'].
508
+ * Returns PCM WAV audio at 24kHz.
509
+ */
510
+ async function generateSpeechGemini({ text, voice, apiKey, }) {
511
+ const google = createGoogleGenerativeAI({ apiKey });
512
+ const model = google(GEMINI_TTS_MODEL);
513
+ const resolvedVoice = voice || DEFAULT_VOICES.gemini;
514
+ const options = {
515
+ prompt: [
516
+ {
517
+ role: 'user',
518
+ content: [{ type: 'text', text }],
519
+ },
520
+ ],
521
+ providerOptions: {
522
+ google: {
523
+ responseModalities: ['AUDIO'],
524
+ speechConfig: {
525
+ voiceConfig: {
526
+ prebuiltVoiceConfig: {
527
+ voiceName: resolvedVoice,
528
+ },
529
+ },
530
+ },
531
+ },
532
+ },
533
+ };
534
+ const response = await Promise.resolve(model.doGenerate(options)).catch((e) => new SpeechGenerationError({ reason: `Gemini TTS API call failed: ${String(e)}`, cause: e }));
535
+ if (response instanceof Error)
536
+ return response;
537
+ // Gemini returns audio as LanguageModelV3File parts with inlineData
538
+ const filePart = response.content.find((c) => c.type === 'file');
539
+ if (!filePart) {
540
+ return new SpeechGenerationError({ reason: 'Gemini TTS returned no audio content' });
541
+ }
542
+ const audioData = typeof filePart.data === 'string'
543
+ ? Buffer.from(filePart.data, 'base64')
544
+ : Buffer.from(filePart.data);
545
+ if (audioData.length === 0) {
546
+ return new SpeechGenerationError({ reason: 'Gemini TTS returned empty audio' });
547
+ }
548
+ // Gemini TTS returns raw PCM at 24kHz mono 16-bit LE; wrap it in a WAV header
549
+ // so Discord and other players can handle it directly.
550
+ const mediaType = filePart.mediaType || 'audio/wav';
551
+ const needsWavHeader = mediaType === 'audio/L16' ||
552
+ mediaType === 'audio/pcm' ||
553
+ mediaType.startsWith('audio/l16');
554
+ if (needsWavHeader) {
555
+ const wavHeader = createWavHeader({
556
+ dataLength: audioData.length,
557
+ sampleRate: 24000,
558
+ numChannels: 1,
559
+ bitsPerSample: 16,
560
+ });
561
+ return { audio: Buffer.concat([wavHeader, audioData]), mediaType: 'audio/wav' };
562
+ }
563
+ return { audio: audioData, mediaType };
564
+ }
565
+ /**
566
+ * Generate speech audio from text using OpenAI or Google TTS.
567
+ * Calls the provider's TTS API directly via AI SDK without the `ai` npm package.
568
+ *
569
+ * Provider auto-detection: sk-* prefix → OpenAI, otherwise → Gemini.
570
+ * OpenAI returns mp3, Gemini returns WAV (24kHz mono).
571
+ */
572
+ export async function generateSpeech({ text, voice, apiKey: apiKeyParam, provider, instructions, speed, }) {
573
+ const apiKey = apiKeyParam || process.env.OPENAI_API_KEY || process.env.GEMINI_API_KEY;
574
+ if (!apiKey) {
575
+ return new ApiKeyMissingError({ service: 'OpenAI or Gemini' });
576
+ }
577
+ const resolvedProvider = provider || (apiKey.startsWith('sk-') ? 'openai' : 'gemini');
578
+ voiceLogger.log(`Generating speech with ${resolvedProvider}, text length: ${text.length}`);
579
+ if (resolvedProvider === 'openai') {
580
+ return generateSpeechOpenAI({ text, voice, apiKey, instructions, speed });
581
+ }
582
+ return generateSpeechGemini({ text, voice, apiKey });
583
+ }
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "kimaki",
3
3
  "module": "index.ts",
4
4
  "type": "module",
5
- "version": "0.10.2",
5
+ "version": "0.11.0",
6
6
  "repository": "https://github.com/remorses/kimaki",
7
7
  "bin": "bin.js",
8
8
  "files": [
@@ -26,8 +26,8 @@
26
26
  "undici": "^8.0.2",
27
27
  "discord-digital-twin": "^0.1.0",
28
28
  "opencode-cached-provider": "^0.0.1",
29
- "db": "^0.0.0",
30
- "opencode-deterministic-provider": "^0.0.1"
29
+ "opencode-deterministic-provider": "^0.0.1",
30
+ "db": "^0.0.0"
31
31
  },
32
32
  "dependencies": {
33
33
  "@ai-sdk/google": "^3.0.53",
@@ -63,8 +63,8 @@
63
63
  "zod": "^4.3.6",
64
64
  "zustand": "^5.0.11",
65
65
  "errore": "^0.14.1",
66
- "traforo": "^0.5.0",
67
66
  "libsqlproxy": "^0.1.0",
67
+ "traforo": "^0.5.0",
68
68
  "opencode-injection-guard": "^0.2.1"
69
69
  },
70
70
  "optionalDependencies": {
@@ -38,6 +38,14 @@ npm install goke # or bun, pnpm, etc
38
38
 
39
39
  The README is the source of truth for rules, examples, testing patterns, JustBash integration, and API details.
40
40
 
41
+ If the README or this skill mentions a `goke` export that is missing from the installed package, upgrade `goke` to latest first before adding workarounds or custom local detection code:
42
+
43
+ ```bash
44
+ pnpm update goke --latest
45
+ ```
46
+
47
+ Use the project package manager for the repo you are editing. After upgrading, re-check the export from the installed package and continue with the documented API.
48
+
41
49
  ## Terminal Colors
42
50
 
43
51
  **Never install a separate color library.** goke vendors picocolors and exports it as `colors`:
@@ -75,6 +83,37 @@ if (isAgent || !process.stdin.isTTY) {
75
83
 
76
84
  Supported agents: `cursor`, `claude`, `devin`, `replit`, `gemini`, `codex`, `auggie`, `opencode`, `kiro`, `goose`, `pi`. Set `AI_AGENT` env var to override.
77
85
 
86
+ ## Long-Running Interactive Commands
87
+
88
+ Commands that start a browser/device login flow or any other long-running TTY-only interaction must fail fast in non-TTY shells. Do not start the flow and hope the agent notices the URL. The process must stay alive while the user approves the browser prompt, so agents need to launch it in a persistent terminal session like tuistory or tmux.
89
+
90
+ Always guard these commands with `!process.stdout.isTTY` before making network requests, opening the browser, or starting spinners. Do not fail just because an agent is running the command if the command has a real TTY.
91
+
92
+ ```ts
93
+ import dedent from 'string-dedent'
94
+ import { goke } from 'goke'
95
+
96
+ cli.command('login', 'Authenticate with browser login').action((options, { console, process }) => {
97
+ if (!process.stdout.isTTY) {
98
+ console.error(dedent`
99
+ mycli login needs an interactive terminal and must stay alive while you approve the browser login.
100
+
101
+ Run it in a background terminal session like tuistory or tmux, then wait for the URL/code:
102
+
103
+ bunx tuistory launch "mycli login" -s mycli-login
104
+ bunx tuistory -s mycli-login wait "/code:|https?:\\/\\//i" --timeout 15000
105
+
106
+ The login command exits by itself after successful browser approval.
107
+ `)
108
+ process.exit(1)
109
+ }
110
+
111
+ // Start device/browser login only after the guard.
112
+ })
113
+ ```
114
+
115
+ Use `tuistory wait` for the handoff point that needs user interaction. It returns nearby lines around the match, so agents can show the URL/code without a separate `tuistory read` call. Do not add `tuistory close` to login instructions when the CLI exits by itself after success.
116
+
78
117
  ## Shell Completions
79
118
 
80
119
  **Always add `.completions()` next to `.help()` in every CLI.** This gives users Tab completion for free.
@@ -128,6 +128,7 @@ There is no rigid template. Structure the content in whatever way communicates t
128
128
  - **Use code blocks with language hints.** The agent uses these to generate correct code.
129
129
  - **Keep prose short between code blocks.** One or two sentences of explanation, then an example.
130
130
  - **Call out common mistakes.** If there is a gotcha the agent will likely hit, warn about it explicitly.
131
+ - **Do not add HTML comments.** Skills are instructions, not generated files. Avoid comments like `<!-- Skill instructions for agents using ... -->`.
131
132
 
132
133
  ## What makes a good skill
133
134
 
@@ -293,6 +293,29 @@ Resolution flow when `tsc` sees `import db from '#sqlite'`:
293
293
  - **Bun / browser runtime conditions can still point at `src`**, because
294
294
  those runtimes execute `.ts` directly and skip the build step.
295
295
 
296
+ ### Excluding heavy dependencies from server/client bundles
297
+
298
+ `imports` conditions can swap a heavy client-only dependency for a lightweight
299
+ noop stub on the server (or vice versa). This keeps the SSR bundle small
300
+ without changing application code.
301
+
302
+ ```json
303
+ {
304
+ "imports": {
305
+ "#prism": {
306
+ "types": "./dist/prism.d.ts",
307
+ "ssr": "./dist/prism-noop.js",
308
+ "browser": "./dist/prism.js",
309
+ "default": "./dist/prism.js"
310
+ }
311
+ }
312
+ }
313
+ ```
314
+
315
+ `src/prism.ts` imports the real library (~500KB). `src/prism-noop.ts` exports
316
+ the same interface with stub implementations that return input unchanged.
317
+ Application code imports from `#prism` and gets the right version automatically.
318
+
296
319
  ### Requirements
297
320
 
298
321
  This only works when:
@@ -608,6 +631,28 @@ dist
608
631
 
609
632
  Workspace packages inside a monorepo inherit the root `.gitignore`, so this only applies to standalone packages.
610
633
 
634
+ ## Peer dependencies
635
+
636
+ Peer dependencies are installed by default by npm and pnpm. Use them to prevent a dependency from being duplicated in the consumer's `node_modules` tree. Common examples: `react`, `react-dom`, framework packages that must be singletons.
637
+
638
+ To make peer dependencies optional, add `peerDependenciesMeta`:
639
+
640
+ ```json
641
+ {
642
+ "peerDependencies": {
643
+ "react": "^18.0.0 || ^19.0.0",
644
+ "sass": "^1.70.0",
645
+ "tsx": "^4.8.1"
646
+ },
647
+ "peerDependenciesMeta": {
648
+ "sass": { "optional": true },
649
+ "tsx": { "optional": true }
650
+ }
651
+ }
652
+ ```
653
+
654
+ `react` above is required (no meta entry), so consumers must install it. `sass` and `tsx` are optional; the package works without them but can use them if present.
655
+
611
656
  ## common mistakes
612
657
 
613
658
  - if you need to use zod always use latest version
@@ -35,6 +35,8 @@ Reference examples for real-world usage:
35
35
 
36
36
  Always import and use `Link` from `spiceflow/react` for navigational links in Spiceflow apps. Do not render raw `<a>` elements for links. `Link` enables client-side navigation while preserving normal anchor behavior for external URLs, hashes, `target`, `rel`, styling, and event handlers. `Link` supports external URLs too, so it is fine to use for ambiguous or user-provided links when you do not know ahead of time whether they are internal or external.
37
37
 
38
+ **`Link` auto-prepends the Vite `base` path.** Never manually prepend the base path to `Link` href values. `<Link href="/dashboard" />` automatically renders as `<a href="/my-app/dashboard">` when the Vite base is `/my-app/`. Manually prepending causes double-prefixing. This only applies to `Link`; raw `fetch()` calls, `Response.redirect()`, and other non-Link URL construction still need manual base path handling.
39
+
38
40
  ## OpenTelemetry instrumentation
39
41
 
40
42
  Spiceflow supports automatic route instrumentation when you pass an OpenTelemetry-compatible tracer to the constructor:
@@ -1,23 +1,21 @@
1
1
  ---
2
2
  name: tuistory
3
3
  description: |
4
- Control and monitor terminal applications. Supports running TUI processes in background. TMUX replacement for agents. Can control fully interactive TUI apps like claude or opencode.
4
+ tmux for AI agents. Run dev servers and TUIs in named background sessions that agents can read, wait on, snapshot, and type into. Replaces tmux with reactive waiting instead of blind `sleep`. Projects wrap their dev script with tuistory (`"dev": "tuistory -- next dev"`) so agents get a background session and humans get auto-attached.
5
5
 
6
- Use tuistory and read the skill when you need to:
7
- - Run background processes for agents like dev servers. prefer it over `tmux` because it waits for real output instead of guessing with `sleep`
8
- - Control interactive CLIs and TUIs by typing, pressing keys, clicking, waiting, and taking snapshots
9
- - Write Playwright-style tests for terminal apps with `vitest` or `bun:test`
6
+ Use tuistory when you need to:
7
+ - Run background dev servers or long-lived processes
8
+ - Control interactive CLIs and TUIs (type, press keys, click, wait, snapshot)
9
+ - Write Playwright-style tests for terminal apps
10
10
 
11
- It has **2 modes**:
12
- - **CLI** (`tuistory`) for persistent background sessions and terminal automation. **Run `tuistory --help` first.**
13
- - **JS/TS API** (`launchTerminal`) for writing tests (like playwright for TUIs) and programmatic control in scripts.
11
+ **CLI** (`tuistory`) for background sessions. **JS/TS API** (`launchTerminal`) for programmatic control and tests.
14
12
  ---
15
13
 
16
14
  # tuistory
17
15
 
18
- Playwright for terminal apps. Use it to run background processes for agents, drive interactive TUIs, and write Playwright-style tests for CLIs and TUIs.
16
+ Run dev servers and terminal commands that AI agents can read, wait on, and type into. Wrap any command in a named background session. Humans get auto-attached; agents get a session they can inspect with `read`, `snapshot`, `wait`, and `type`.
19
17
 
20
- Prefer tuistory over `tmux` for agent automation. It is better because it reacts to terminal output with `wait` and `wait-idle` instead of wasting time on blind `sleep` calls. That makes scripts both faster and more reliable.
18
+ Prefer tuistory over `tmux` for agent automation. It reacts to terminal output with `wait` and `wait-idle` instead of wasting time on blind `sleep` calls. That makes scripts both faster and more reliable.
21
19
 
22
20
  Every time you use tuistory, you MUST run these two commands first. NEVER pipe to head/tail, read the full output:
23
21
 
@@ -29,14 +27,45 @@ tuistory --help
29
27
  curl -s https://raw.githubusercontent.com/remorses/tuistory/refs/heads/main/README.md
30
28
  ```
31
29
 
30
+ ## Dev script pattern
31
+
32
+ When a project has `"dev": "tuistory -- next dev"` in package.json, running `pnpm dev` gives agents a background session. The session name is auto-derived from `<cwd-basename>-<command>` in kebab-case. If the session is already running, agents get a message like:
33
+
34
+ ```
35
+ Session "myapp-next-dev" already running
36
+ with command: `next dev`
37
+ read output with: `tuistory read -s x --all`
38
+ ```
39
+
40
+ Agents can then use `tuistory read -s x`, `tuistory -s x wait "ready"`, etc. to inspect the running process.
41
+
42
+ ## Dev script convention for projects
43
+
44
+ Projects should wrap their dev server command with `tuistory --` in `package.json` scripts. This ensures agents never hang on an interactive long-lived process, and dev servers started by humans are automatically shared with agents.
45
+
46
+ ```json
47
+ {
48
+ "scripts": {
49
+ "dev": "tuistory -- next dev",
50
+ "dev:api": "tuistory -- node api/server.js"
51
+ }
52
+ }
53
+ ```
54
+
55
+ When a human runs `pnpm dev`, they get auto-attached to the terminal (same experience as running the command directly). When an agent runs `pnpm dev`, the process launches in the background and the command returns immediately. If the session is already running, both humans and agents reuse it instead of fighting over ports.
56
+
57
+ **Agents MUST never stop or close a session started by another user or agent.** Dev server sessions are shared resources. Only close them when the user explicitly asks to stop the dev server. Default to leaving sessions running. Use `read`, `wait`, and `snapshot` to inspect them without disrupting them.
58
+
32
59
  ## Key rules
33
60
 
61
+ - **Options before `--`, command after.** Everything after `--` is passed verbatim to the child process. `tuistory -s myserver --cols 150 -- node server.js` is correct. `tuistory -- node server.js -s myserver` is wrong.
62
+ - Session names are auto-derived from `<cwd-basename>-<command>`. You usually don't need `-s` when launching.
34
63
  - Always run `snapshot --trim` after every CLI action to see the current terminal state
35
64
  - Always set a timeout on `waitForText` for async operations
36
65
  - String patterns are case-sensitive by default. Use regex like `/ready/i` when casing may vary.
37
66
  - Use `trimEnd: true` in `session.text()` to avoid trailing whitespace in snapshots
38
67
  - Close sessions in test teardown to avoid leaked processes
39
- - Use `--cols` and `--rows` to control terminal size affects TUI layout
68
+ - Use `--cols` and `--rows` to control terminal size, they affect TUI layout
40
69
  - Use `--pixel-ratio 2` for sharp screenshot images
41
70
 
42
71
  ## Feedback loop
@@ -46,21 +75,21 @@ Use an **observe → act → observe** loop, like Playwright but for terminals.
46
75
  ### Background process instead of tmux
47
76
 
48
77
  ```bash
49
- # start a server in the background
50
- tuistory -s dev -- bun run dev
78
+ # start a server in the background (session name auto-derived)
79
+ tuistory -- bun run dev
51
80
 
52
81
  # wait for actual output instead of sleep 5
53
82
  # use regex so this still matches Ready, READY, etc.
54
- tuistory -s dev wait "/ready/i" --timeout 30000
83
+ tuistory -s x wait "/ready/i" --timeout 30000
55
84
 
56
85
  # read everything the process printed
57
- tuistory read -s dev
86
+ tuistory read -s x
58
87
 
59
88
  # later, read only the new output
60
- tuistory read -s dev
89
+ tuistory read -s x
61
90
 
62
91
  # restart the server (sends Ctrl+C, waits, relaunches same command/cwd/env)
63
- tuistory -s dev restart
92
+ tuistory -s x restart
64
93
  ```
65
94
 
66
95
  Why this is better than `tmux`: