kimaki 0.10.2 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli-commands/misc.js +76 -2
- package/dist/cli.js +5 -0
- package/dist/commands/agent.js +116 -2
- package/dist/commands/gemini-apikey.js +24 -5
- package/dist/database.js +16 -0
- package/dist/discord-command-registration.js +9 -46
- package/dist/discord-utils.js +29 -0
- package/dist/discord-utils.test.js +68 -2
- package/dist/errors.js +5 -0
- package/dist/interaction-handler.js +78 -1
- package/dist/session-handler/thread-session-runtime.js +8 -1
- package/dist/store.js +1 -0
- package/dist/system-message.js +16 -0
- package/dist/voice-handler.js +7 -11
- package/dist/voice.js +126 -1
- package/package.json +4 -4
- package/skills/goke/SKILL.md +39 -0
- package/skills/new-skill/SKILL.md +1 -0
- package/skills/npm-package/SKILL.md +45 -0
- package/skills/spiceflow/SKILL.md +2 -0
- package/skills/tuistory/SKILL.md +46 -17
- package/src/cli-commands/misc.ts +90 -2
- package/src/cli.ts +12 -0
- package/src/commands/agent.ts +147 -1
- package/src/commands/gemini-apikey.ts +38 -6
- package/src/database.ts +16 -0
- package/src/discord-command-registration.ts +11 -71
- package/src/discord-utils.test.ts +82 -2
- package/src/discord-utils.ts +34 -0
- package/src/errors.ts +9 -0
- package/src/interaction-handler.ts +78 -1
- package/src/session-handler/thread-session-runtime.ts +11 -1
- package/src/store.ts +8 -0
- package/src/system-message.ts +16 -0
- package/src/voice-handler.ts +5 -16
- package/src/voice.ts +217 -0
|
@@ -45,7 +45,7 @@ import { handleScreenshareCommand, handleScreenshareStopCommand, } from './comma
|
|
|
45
45
|
import { handleVscodeCommand } from './commands/vscode.js';
|
|
46
46
|
import { handleModelVariantSelectMenu } from './commands/model.js';
|
|
47
47
|
import { handleModelVariantCommand, handleVariantQuickSelectMenu, handleVariantScopeSelectMenu, } from './commands/model-variant.js';
|
|
48
|
-
import { hasKimakiBotPermission } from './discord-utils.js';
|
|
48
|
+
import { hasKimakiAdminPermission, hasKimakiBotPermission } from './discord-utils.js';
|
|
49
49
|
import { createLogger, LogPrefix } from './logger.js';
|
|
50
50
|
import { notifyError } from './sentry.js';
|
|
51
51
|
const interactionLogger = createLogger(LogPrefix.INTERACTION);
|
|
@@ -178,6 +178,13 @@ export function registerInteractionHandler({ discordClient, appId, }) {
|
|
|
178
178
|
await handleUnsetModelCommand({ interaction, appId });
|
|
179
179
|
return;
|
|
180
180
|
case 'login':
|
|
181
|
+
if (!hasKimakiAdminPermission(interaction.member, interaction.guild)) {
|
|
182
|
+
await interaction.reply({
|
|
183
|
+
content: `Only server admins or users with the **Kimaki** role can configure login credentials.`,
|
|
184
|
+
flags: MessageFlags.Ephemeral,
|
|
185
|
+
});
|
|
186
|
+
return;
|
|
187
|
+
}
|
|
181
188
|
await handleLoginCommand({ interaction, appId });
|
|
182
189
|
return;
|
|
183
190
|
case 'agent':
|
|
@@ -223,6 +230,13 @@ export function registerInteractionHandler({ discordClient, appId, }) {
|
|
|
223
230
|
});
|
|
224
231
|
return;
|
|
225
232
|
case 'transcription-key':
|
|
233
|
+
if (!hasKimakiAdminPermission(interaction.member, interaction.guild)) {
|
|
234
|
+
await interaction.reply({
|
|
235
|
+
content: `Only server admins or users with the **Kimaki** role can configure API keys.`,
|
|
236
|
+
flags: MessageFlags.Ephemeral,
|
|
237
|
+
});
|
|
238
|
+
return;
|
|
239
|
+
}
|
|
226
240
|
await handleTranscriptionApiKeyCommand({
|
|
227
241
|
interaction,
|
|
228
242
|
appId,
|
|
@@ -269,6 +283,13 @@ export function registerInteractionHandler({ discordClient, appId, }) {
|
|
|
269
283
|
}
|
|
270
284
|
const customId = interaction.customId;
|
|
271
285
|
if (customId.startsWith('transcription_apikey:')) {
|
|
286
|
+
if (!hasKimakiAdminPermission(interaction.member, interaction.guild)) {
|
|
287
|
+
await interaction.reply({
|
|
288
|
+
content: `Only server admins or users with the **Kimaki** role can configure API keys.`,
|
|
289
|
+
flags: MessageFlags.Ephemeral,
|
|
290
|
+
});
|
|
291
|
+
return;
|
|
292
|
+
}
|
|
272
293
|
await handleTranscriptionApiKeyButton(interaction);
|
|
273
294
|
return;
|
|
274
295
|
}
|
|
@@ -283,14 +304,35 @@ export function registerInteractionHandler({ discordClient, appId, }) {
|
|
|
283
304
|
return;
|
|
284
305
|
}
|
|
285
306
|
if (customId.startsWith('login_text_btn:')) {
|
|
307
|
+
if (!hasKimakiAdminPermission(interaction.member, interaction.guild)) {
|
|
308
|
+
await interaction.reply({
|
|
309
|
+
content: `Only server admins or users with the **Kimaki** role can configure login credentials.`,
|
|
310
|
+
flags: MessageFlags.Ephemeral,
|
|
311
|
+
});
|
|
312
|
+
return;
|
|
313
|
+
}
|
|
286
314
|
await handleLoginTextButton(interaction);
|
|
287
315
|
return;
|
|
288
316
|
}
|
|
289
317
|
if (customId.startsWith('login_apikey_btn:')) {
|
|
318
|
+
if (!hasKimakiAdminPermission(interaction.member, interaction.guild)) {
|
|
319
|
+
await interaction.reply({
|
|
320
|
+
content: `Only server admins or users with the **Kimaki** role can configure login credentials.`,
|
|
321
|
+
flags: MessageFlags.Ephemeral,
|
|
322
|
+
});
|
|
323
|
+
return;
|
|
324
|
+
}
|
|
290
325
|
await handleLoginApiKeyButton(interaction);
|
|
291
326
|
return;
|
|
292
327
|
}
|
|
293
328
|
if (customId.startsWith('login_oauth_code_btn:')) {
|
|
329
|
+
if (!hasKimakiAdminPermission(interaction.member, interaction.guild)) {
|
|
330
|
+
await interaction.reply({
|
|
331
|
+
content: `Only server admins or users with the **Kimaki** role can configure login credentials.`,
|
|
332
|
+
flags: MessageFlags.Ephemeral,
|
|
333
|
+
});
|
|
334
|
+
return;
|
|
335
|
+
}
|
|
294
336
|
await handleOAuthCodeButton(interaction);
|
|
295
337
|
return;
|
|
296
338
|
}
|
|
@@ -362,6 +404,13 @@ export function registerInteractionHandler({ discordClient, appId, }) {
|
|
|
362
404
|
return;
|
|
363
405
|
}
|
|
364
406
|
if (customId.startsWith('login_select:')) {
|
|
407
|
+
if (!hasKimakiAdminPermission(interaction.member, interaction.guild)) {
|
|
408
|
+
await interaction.reply({
|
|
409
|
+
content: `Only server admins or users with the **Kimaki** role can configure login credentials.`,
|
|
410
|
+
flags: MessageFlags.Ephemeral,
|
|
411
|
+
});
|
|
412
|
+
return;
|
|
413
|
+
}
|
|
365
414
|
await handleLoginSelect(interaction);
|
|
366
415
|
return;
|
|
367
416
|
}
|
|
@@ -377,18 +426,46 @@ export function registerInteractionHandler({ discordClient, appId, }) {
|
|
|
377
426
|
}
|
|
378
427
|
const customId = interaction.customId;
|
|
379
428
|
if (customId.startsWith('login_apikey:')) {
|
|
429
|
+
if (!hasKimakiAdminPermission(interaction.member, interaction.guild)) {
|
|
430
|
+
await interaction.reply({
|
|
431
|
+
content: `Only server admins or users with the **Kimaki** role can configure credentials.`,
|
|
432
|
+
flags: MessageFlags.Ephemeral,
|
|
433
|
+
});
|
|
434
|
+
return;
|
|
435
|
+
}
|
|
380
436
|
await handleApiKeyModalSubmit(interaction);
|
|
381
437
|
return;
|
|
382
438
|
}
|
|
383
439
|
if (customId.startsWith('login_text:')) {
|
|
440
|
+
if (!hasKimakiAdminPermission(interaction.member, interaction.guild)) {
|
|
441
|
+
await interaction.reply({
|
|
442
|
+
content: `Only server admins or users with the **Kimaki** role can configure credentials.`,
|
|
443
|
+
flags: MessageFlags.Ephemeral,
|
|
444
|
+
});
|
|
445
|
+
return;
|
|
446
|
+
}
|
|
384
447
|
await handleLoginTextModalSubmit(interaction);
|
|
385
448
|
return;
|
|
386
449
|
}
|
|
387
450
|
if (customId.startsWith('login_oauth_code:')) {
|
|
451
|
+
if (!hasKimakiAdminPermission(interaction.member, interaction.guild)) {
|
|
452
|
+
await interaction.reply({
|
|
453
|
+
content: `Only server admins or users with the **Kimaki** role can configure credentials.`,
|
|
454
|
+
flags: MessageFlags.Ephemeral,
|
|
455
|
+
});
|
|
456
|
+
return;
|
|
457
|
+
}
|
|
388
458
|
await handleOAuthCodeModalSubmit(interaction);
|
|
389
459
|
return;
|
|
390
460
|
}
|
|
391
461
|
if (customId.startsWith('transcription_apikey_modal:')) {
|
|
462
|
+
if (!hasKimakiAdminPermission(interaction.member, interaction.guild)) {
|
|
463
|
+
await interaction.reply({
|
|
464
|
+
content: `Only server admins or users with the **Kimaki** role can configure credentials.`,
|
|
465
|
+
flags: MessageFlags.Ephemeral,
|
|
466
|
+
});
|
|
467
|
+
return;
|
|
468
|
+
}
|
|
392
469
|
await handleTranscriptionApiKeyModalSubmit(interaction);
|
|
393
470
|
return;
|
|
394
471
|
}
|
|
@@ -1859,7 +1859,7 @@ export class ThreadSessionRuntime {
|
|
|
1859
1859
|
await this.persistEventBufferDebounced.flush();
|
|
1860
1860
|
return;
|
|
1861
1861
|
}
|
|
1862
|
-
const errorMessage = formatSessionErrorFromProps(properties.error);
|
|
1862
|
+
const errorMessage = truncateSessionErrorMessage(formatSessionErrorFromProps(properties.error));
|
|
1863
1863
|
logger.error(`Sending error to thread: ${errorMessage}`);
|
|
1864
1864
|
await sendThreadMessage(this.thread, `✗ opencode session error: ${errorMessage}`, { flags: NOTIFY_MESSAGE_FLAGS });
|
|
1865
1865
|
await this.persistEventBufferDebounced.flush();
|
|
@@ -3541,3 +3541,10 @@ function formatSessionErrorFromProps(error) {
|
|
|
3541
3541
|
}
|
|
3542
3542
|
return parts.length > 0 ? parts.join(' ') : error.name || 'Unknown error';
|
|
3543
3543
|
}
|
|
3544
|
+
function truncateSessionErrorMessage(message) {
|
|
3545
|
+
const maxLength = 400;
|
|
3546
|
+
if (message.length <= maxLength) {
|
|
3547
|
+
return message;
|
|
3548
|
+
}
|
|
3549
|
+
return `${message.slice(0, maxLength - 1)}…`;
|
|
3550
|
+
}
|
package/dist/store.js
CHANGED
package/dist/system-message.js
CHANGED
|
@@ -317,6 +317,22 @@ To upload files to the Discord thread (images, screenshots, long files that woul
|
|
|
317
317
|
|
|
318
318
|
kimaki upload-to-discord --session ${sessionId} <file1> [file2] ...
|
|
319
319
|
|
|
320
|
+
## generating audio from text
|
|
321
|
+
|
|
322
|
+
When the user asks you to generate audio of some text so they can listen instead of reading, use \`kimaki tts\` to create a speech file and \`kimaki upload-to-discord\` to send it to the thread. Only use this when the user explicitly asks for audio.
|
|
323
|
+
|
|
324
|
+
\`\`\`bash
|
|
325
|
+
# generate audio from inline text
|
|
326
|
+
kimaki tts 'Your summary goes here' -o /tmp/summary.mp3
|
|
327
|
+
kimaki upload-to-discord --session ${sessionId} /tmp/summary.mp3
|
|
328
|
+
|
|
329
|
+
# generate audio from a file (pipe via stdin)
|
|
330
|
+
cat docs/explanation.md | kimaki tts -o /tmp/explanation.mp3
|
|
331
|
+
kimaki upload-to-discord --session ${sessionId} /tmp/explanation.mp3
|
|
332
|
+
\`\`\`
|
|
333
|
+
|
|
334
|
+
see --help for options like voice, speed, etc.
|
|
335
|
+
|
|
320
336
|
## requesting files from the user
|
|
321
337
|
|
|
322
338
|
To ask the user to upload files from their device, use the \`kimaki_file_upload\` tool. This shows a native file picker dialog in Discord. The files are downloaded to the project's \`uploads/\` directory and the tool returns the local file paths.
|
package/dist/voice-handler.js
CHANGED
|
@@ -9,10 +9,11 @@ import path from 'node:path';
|
|
|
9
9
|
import { Transform } from 'node:stream';
|
|
10
10
|
import * as prism from 'prism-media';
|
|
11
11
|
import dedent from 'string-dedent';
|
|
12
|
-
import { Events,
|
|
12
|
+
import { Events, } from 'discord.js';
|
|
13
13
|
import { createGenAIWorker } from './genai-worker-wrapper.js';
|
|
14
14
|
import { getVoiceChannelDirectory, getGeminiApiKey, getTranscriptionApiKey, findTextChannelByVoiceChannel, } from './database.js';
|
|
15
|
-
import { sendThreadMessage, escapeDiscordFormatting,
|
|
15
|
+
import { sendThreadMessage, escapeDiscordFormatting, NOTIFY_MESSAGE_FLAGS, hasKimakiBotPermission, } from './discord-utils.js';
|
|
16
|
+
import { showApiKeyRequiredButton } from './commands/gemini-apikey.js';
|
|
16
17
|
import { transcribeAudio } from './voice.js';
|
|
17
18
|
import { FetchError } from './errors.js';
|
|
18
19
|
import { store } from './store.js';
|
|
@@ -420,15 +421,10 @@ export async function processVoiceAttachment({ message, thread, projectDirectory
|
|
|
420
421
|
}
|
|
421
422
|
if (!transcriptionApiKey) {
|
|
422
423
|
if (appId) {
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
const row = new ActionRowBuilder().addComponents(button);
|
|
428
|
-
await thread.send({
|
|
429
|
-
content: 'Voice transcription requires an API key (OpenAI or Gemini). Set one to enable voice message transcription.',
|
|
430
|
-
components: [row],
|
|
431
|
-
flags: SILENT_MESSAGE_FLAGS,
|
|
424
|
+
await showApiKeyRequiredButton({
|
|
425
|
+
thread,
|
|
426
|
+
appId,
|
|
427
|
+
message: 'Voice transcription requires an API key (OpenAI or Gemini). Set one to enable voice message transcription.',
|
|
432
428
|
});
|
|
433
429
|
}
|
|
434
430
|
else {
|
package/dist/voice.js
CHANGED
|
@@ -13,7 +13,7 @@ import { Readable } from 'node:stream';
|
|
|
13
13
|
import prism from 'prism-media';
|
|
14
14
|
import * as errore from 'errore';
|
|
15
15
|
import { createLogger, LogPrefix } from './logger.js';
|
|
16
|
-
import { ApiKeyMissingError, InvalidAudioFormatError, TranscriptionError, EmptyTranscriptionError, NoResponseContentError, NoToolResponseError, } from './errors.js';
|
|
16
|
+
import { ApiKeyMissingError, InvalidAudioFormatError, TranscriptionError, EmptyTranscriptionError, NoResponseContentError, NoToolResponseError, SpeechGenerationError, } from './errors.js';
|
|
17
17
|
const voiceLogger = createLogger(LogPrefix.VOICE);
|
|
18
18
|
// OpenAI input_audio only supports wav and mp3. Other formats (OGG Opus, etc)
|
|
19
19
|
// must be converted before sending.
|
|
@@ -456,3 +456,128 @@ Note: "critique" is a CLI tool for showing diffs in the browser.`;
|
|
|
456
456
|
provider: resolvedProvider,
|
|
457
457
|
});
|
|
458
458
|
}
|
|
459
|
+
/** Default voices per provider. OpenAI uses short names, Google uses prebuilt voice names. */
|
|
460
|
+
const DEFAULT_VOICES = {
|
|
461
|
+
openai: 'alloy',
|
|
462
|
+
gemini: 'Kore',
|
|
463
|
+
};
|
|
464
|
+
/** Available OpenAI TTS models. gpt-4o-mini-tts supports instructions for style control. */
|
|
465
|
+
const OPENAI_TTS_MODEL = 'gpt-4o-mini-tts';
|
|
466
|
+
/** Gemini TTS model ID. Uses language model interface with AUDIO response modality. */
|
|
467
|
+
const GEMINI_TTS_MODEL = 'gemini-2.5-flash-preview-tts';
|
|
468
|
+
/**
|
|
469
|
+
* Create an OpenAI SpeechModelV3 for TTS.
|
|
470
|
+
* Uses gpt-4o-mini-tts which supports instructions for voice style control.
|
|
471
|
+
*/
|
|
472
|
+
function createOpenAISpeechModel({ apiKey }) {
|
|
473
|
+
const openai = createOpenAI({ apiKey });
|
|
474
|
+
return openai.speech(OPENAI_TTS_MODEL);
|
|
475
|
+
}
|
|
476
|
+
/**
|
|
477
|
+
* Generate speech via OpenAI SpeechModelV3.
|
|
478
|
+
* Returns mp3 audio by default.
|
|
479
|
+
*/
|
|
480
|
+
async function generateSpeechOpenAI({ text, voice, apiKey, instructions, speed, }) {
|
|
481
|
+
const model = createOpenAISpeechModel({ apiKey });
|
|
482
|
+
const response = await Promise.resolve(model.doGenerate({
|
|
483
|
+
text,
|
|
484
|
+
voice: voice || DEFAULT_VOICES.openai,
|
|
485
|
+
outputFormat: 'mp3',
|
|
486
|
+
instructions,
|
|
487
|
+
speed,
|
|
488
|
+
providerOptions: {
|
|
489
|
+
openai: {
|
|
490
|
+
...(instructions ? { instructions } : {}),
|
|
491
|
+
...(speed ? { speed } : {}),
|
|
492
|
+
},
|
|
493
|
+
},
|
|
494
|
+
})).catch((e) => new SpeechGenerationError({ reason: `OpenAI TTS API call failed: ${String(e)}`, cause: e }));
|
|
495
|
+
if (response instanceof Error)
|
|
496
|
+
return response;
|
|
497
|
+
const audioData = typeof response.audio === 'string'
|
|
498
|
+
? Buffer.from(response.audio, 'base64')
|
|
499
|
+
: Buffer.from(response.audio);
|
|
500
|
+
if (audioData.length === 0) {
|
|
501
|
+
return new SpeechGenerationError({ reason: 'OpenAI TTS returned empty audio' });
|
|
502
|
+
}
|
|
503
|
+
return { audio: audioData, mediaType: 'audio/mp3' };
|
|
504
|
+
}
|
|
505
|
+
/**
|
|
506
|
+
* Generate speech via Google Gemini TTS model.
|
|
507
|
+
* Uses the language model interface with responseModalities: ['AUDIO'].
|
|
508
|
+
* Returns PCM WAV audio at 24kHz.
|
|
509
|
+
*/
|
|
510
|
+
async function generateSpeechGemini({ text, voice, apiKey, }) {
|
|
511
|
+
const google = createGoogleGenerativeAI({ apiKey });
|
|
512
|
+
const model = google(GEMINI_TTS_MODEL);
|
|
513
|
+
const resolvedVoice = voice || DEFAULT_VOICES.gemini;
|
|
514
|
+
const options = {
|
|
515
|
+
prompt: [
|
|
516
|
+
{
|
|
517
|
+
role: 'user',
|
|
518
|
+
content: [{ type: 'text', text }],
|
|
519
|
+
},
|
|
520
|
+
],
|
|
521
|
+
providerOptions: {
|
|
522
|
+
google: {
|
|
523
|
+
responseModalities: ['AUDIO'],
|
|
524
|
+
speechConfig: {
|
|
525
|
+
voiceConfig: {
|
|
526
|
+
prebuiltVoiceConfig: {
|
|
527
|
+
voiceName: resolvedVoice,
|
|
528
|
+
},
|
|
529
|
+
},
|
|
530
|
+
},
|
|
531
|
+
},
|
|
532
|
+
},
|
|
533
|
+
};
|
|
534
|
+
const response = await Promise.resolve(model.doGenerate(options)).catch((e) => new SpeechGenerationError({ reason: `Gemini TTS API call failed: ${String(e)}`, cause: e }));
|
|
535
|
+
if (response instanceof Error)
|
|
536
|
+
return response;
|
|
537
|
+
// Gemini returns audio as LanguageModelV3File parts with inlineData
|
|
538
|
+
const filePart = response.content.find((c) => c.type === 'file');
|
|
539
|
+
if (!filePart) {
|
|
540
|
+
return new SpeechGenerationError({ reason: 'Gemini TTS returned no audio content' });
|
|
541
|
+
}
|
|
542
|
+
const audioData = typeof filePart.data === 'string'
|
|
543
|
+
? Buffer.from(filePart.data, 'base64')
|
|
544
|
+
: Buffer.from(filePart.data);
|
|
545
|
+
if (audioData.length === 0) {
|
|
546
|
+
return new SpeechGenerationError({ reason: 'Gemini TTS returned empty audio' });
|
|
547
|
+
}
|
|
548
|
+
// Gemini TTS returns raw PCM at 24kHz mono 16-bit LE; wrap it in a WAV header
|
|
549
|
+
// so Discord and other players can handle it directly.
|
|
550
|
+
const mediaType = filePart.mediaType || 'audio/wav';
|
|
551
|
+
const needsWavHeader = mediaType === 'audio/L16' ||
|
|
552
|
+
mediaType === 'audio/pcm' ||
|
|
553
|
+
mediaType.startsWith('audio/l16');
|
|
554
|
+
if (needsWavHeader) {
|
|
555
|
+
const wavHeader = createWavHeader({
|
|
556
|
+
dataLength: audioData.length,
|
|
557
|
+
sampleRate: 24000,
|
|
558
|
+
numChannels: 1,
|
|
559
|
+
bitsPerSample: 16,
|
|
560
|
+
});
|
|
561
|
+
return { audio: Buffer.concat([wavHeader, audioData]), mediaType: 'audio/wav' };
|
|
562
|
+
}
|
|
563
|
+
return { audio: audioData, mediaType };
|
|
564
|
+
}
|
|
565
|
+
/**
|
|
566
|
+
* Generate speech audio from text using OpenAI or Google TTS.
|
|
567
|
+
* Calls the provider's TTS API directly via AI SDK without the `ai` npm package.
|
|
568
|
+
*
|
|
569
|
+
* Provider auto-detection: sk-* prefix → OpenAI, otherwise → Gemini.
|
|
570
|
+
* OpenAI returns mp3, Gemini returns WAV (24kHz mono).
|
|
571
|
+
*/
|
|
572
|
+
export async function generateSpeech({ text, voice, apiKey: apiKeyParam, provider, instructions, speed, }) {
|
|
573
|
+
const apiKey = apiKeyParam || process.env.OPENAI_API_KEY || process.env.GEMINI_API_KEY;
|
|
574
|
+
if (!apiKey) {
|
|
575
|
+
return new ApiKeyMissingError({ service: 'OpenAI or Gemini' });
|
|
576
|
+
}
|
|
577
|
+
const resolvedProvider = provider || (apiKey.startsWith('sk-') ? 'openai' : 'gemini');
|
|
578
|
+
voiceLogger.log(`Generating speech with ${resolvedProvider}, text length: ${text.length}`);
|
|
579
|
+
if (resolvedProvider === 'openai') {
|
|
580
|
+
return generateSpeechOpenAI({ text, voice, apiKey, instructions, speed });
|
|
581
|
+
}
|
|
582
|
+
return generateSpeechGemini({ text, voice, apiKey });
|
|
583
|
+
}
|
package/package.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"name": "kimaki",
|
|
3
3
|
"module": "index.ts",
|
|
4
4
|
"type": "module",
|
|
5
|
-
"version": "0.
|
|
5
|
+
"version": "0.11.0",
|
|
6
6
|
"repository": "https://github.com/remorses/kimaki",
|
|
7
7
|
"bin": "bin.js",
|
|
8
8
|
"files": [
|
|
@@ -26,8 +26,8 @@
|
|
|
26
26
|
"undici": "^8.0.2",
|
|
27
27
|
"discord-digital-twin": "^0.1.0",
|
|
28
28
|
"opencode-cached-provider": "^0.0.1",
|
|
29
|
-
"
|
|
30
|
-
"
|
|
29
|
+
"opencode-deterministic-provider": "^0.0.1",
|
|
30
|
+
"db": "^0.0.0"
|
|
31
31
|
},
|
|
32
32
|
"dependencies": {
|
|
33
33
|
"@ai-sdk/google": "^3.0.53",
|
|
@@ -63,8 +63,8 @@
|
|
|
63
63
|
"zod": "^4.3.6",
|
|
64
64
|
"zustand": "^5.0.11",
|
|
65
65
|
"errore": "^0.14.1",
|
|
66
|
-
"traforo": "^0.5.0",
|
|
67
66
|
"libsqlproxy": "^0.1.0",
|
|
67
|
+
"traforo": "^0.5.0",
|
|
68
68
|
"opencode-injection-guard": "^0.2.1"
|
|
69
69
|
},
|
|
70
70
|
"optionalDependencies": {
|
package/skills/goke/SKILL.md
CHANGED
|
@@ -38,6 +38,14 @@ npm install goke # or bun, pnpm, etc
|
|
|
38
38
|
|
|
39
39
|
The README is the source of truth for rules, examples, testing patterns, JustBash integration, and API details.
|
|
40
40
|
|
|
41
|
+
If the README or this skill mentions a `goke` export that is missing from the installed package, upgrade `goke` to latest first before adding workarounds or custom local detection code:
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
pnpm update goke --latest
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
Use the project package manager for the repo you are editing. After upgrading, re-check the export from the installed package and continue with the documented API.
|
|
48
|
+
|
|
41
49
|
## Terminal Colors
|
|
42
50
|
|
|
43
51
|
**Never install a separate color library.** goke vendors picocolors and exports it as `colors`:
|
|
@@ -75,6 +83,37 @@ if (isAgent || !process.stdin.isTTY) {
|
|
|
75
83
|
|
|
76
84
|
Supported agents: `cursor`, `claude`, `devin`, `replit`, `gemini`, `codex`, `auggie`, `opencode`, `kiro`, `goose`, `pi`. Set `AI_AGENT` env var to override.
|
|
77
85
|
|
|
86
|
+
## Long-Running Interactive Commands
|
|
87
|
+
|
|
88
|
+
Commands that start a browser/device login flow or any other long-running TTY-only interaction must fail fast in non-TTY shells. Do not start the flow and hope the agent notices the URL. The process must stay alive while the user approves the browser prompt, so agents need to launch it in a persistent terminal session like tuistory or tmux.
|
|
89
|
+
|
|
90
|
+
Always guard these commands with `!process.stdout.isTTY` before making network requests, opening the browser, or starting spinners. Do not fail just because an agent is running the command if the command has a real TTY.
|
|
91
|
+
|
|
92
|
+
```ts
|
|
93
|
+
import dedent from 'string-dedent'
|
|
94
|
+
import { goke } from 'goke'
|
|
95
|
+
|
|
96
|
+
cli.command('login', 'Authenticate with browser login').action((options, { console, process }) => {
|
|
97
|
+
if (!process.stdout.isTTY) {
|
|
98
|
+
console.error(dedent`
|
|
99
|
+
mycli login needs an interactive terminal and must stay alive while you approve the browser login.
|
|
100
|
+
|
|
101
|
+
Run it in a background terminal session like tuistory or tmux, then wait for the URL/code:
|
|
102
|
+
|
|
103
|
+
bunx tuistory launch "mycli login" -s mycli-login
|
|
104
|
+
bunx tuistory -s mycli-login wait "/code:|https?:\\/\\//i" --timeout 15000
|
|
105
|
+
|
|
106
|
+
The login command exits by itself after successful browser approval.
|
|
107
|
+
`)
|
|
108
|
+
process.exit(1)
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
// Start device/browser login only after the guard.
|
|
112
|
+
})
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
Use `tuistory wait` for the handoff point that needs user interaction. It returns nearby lines around the match, so agents can show the URL/code without a separate `tuistory read` call. Do not add `tuistory close` to login instructions when the CLI exits by itself after success.
|
|
116
|
+
|
|
78
117
|
## Shell Completions
|
|
79
118
|
|
|
80
119
|
**Always add `.completions()` next to `.help()` in every CLI.** This gives users Tab completion for free.
|
|
@@ -128,6 +128,7 @@ There is no rigid template. Structure the content in whatever way communicates t
|
|
|
128
128
|
- **Use code blocks with language hints.** The agent uses these to generate correct code.
|
|
129
129
|
- **Keep prose short between code blocks.** One or two sentences of explanation, then an example.
|
|
130
130
|
- **Call out common mistakes.** If there is a gotcha the agent will likely hit, warn about it explicitly.
|
|
131
|
+
- **Do not add HTML comments.** Skills are instructions, not generated files. Avoid comments like `<!-- Skill instructions for agents using ... -->`.
|
|
131
132
|
|
|
132
133
|
## What makes a good skill
|
|
133
134
|
|
|
@@ -293,6 +293,29 @@ Resolution flow when `tsc` sees `import db from '#sqlite'`:
|
|
|
293
293
|
- **Bun / browser runtime conditions can still point at `src`**, because
|
|
294
294
|
those runtimes execute `.ts` directly and skip the build step.
|
|
295
295
|
|
|
296
|
+
### Excluding heavy dependencies from server/client bundles
|
|
297
|
+
|
|
298
|
+
`imports` conditions can swap a heavy client-only dependency for a lightweight
|
|
299
|
+
noop stub on the server (or vice versa). This keeps the SSR bundle small
|
|
300
|
+
without changing application code.
|
|
301
|
+
|
|
302
|
+
```json
|
|
303
|
+
{
|
|
304
|
+
"imports": {
|
|
305
|
+
"#prism": {
|
|
306
|
+
"types": "./dist/prism.d.ts",
|
|
307
|
+
"ssr": "./dist/prism-noop.js",
|
|
308
|
+
"browser": "./dist/prism.js",
|
|
309
|
+
"default": "./dist/prism.js"
|
|
310
|
+
}
|
|
311
|
+
}
|
|
312
|
+
}
|
|
313
|
+
```
|
|
314
|
+
|
|
315
|
+
`src/prism.ts` imports the real library (~500KB). `src/prism-noop.ts` exports
|
|
316
|
+
the same interface with stub implementations that return input unchanged.
|
|
317
|
+
Application code imports from `#prism` and gets the right version automatically.
|
|
318
|
+
|
|
296
319
|
### Requirements
|
|
297
320
|
|
|
298
321
|
This only works when:
|
|
@@ -608,6 +631,28 @@ dist
|
|
|
608
631
|
|
|
609
632
|
Workspace packages inside a monorepo inherit the root `.gitignore`, so this only applies to standalone packages.
|
|
610
633
|
|
|
634
|
+
## Peer dependencies
|
|
635
|
+
|
|
636
|
+
Peer dependencies are installed by default by npm and pnpm. Use them to prevent a dependency from being duplicated in the consumer's `node_modules` tree. Common examples: `react`, `react-dom`, framework packages that must be singletons.
|
|
637
|
+
|
|
638
|
+
To make peer dependencies optional, add `peerDependenciesMeta`:
|
|
639
|
+
|
|
640
|
+
```json
|
|
641
|
+
{
|
|
642
|
+
"peerDependencies": {
|
|
643
|
+
"react": "^18.0.0 || ^19.0.0",
|
|
644
|
+
"sass": "^1.70.0",
|
|
645
|
+
"tsx": "^4.8.1"
|
|
646
|
+
},
|
|
647
|
+
"peerDependenciesMeta": {
|
|
648
|
+
"sass": { "optional": true },
|
|
649
|
+
"tsx": { "optional": true }
|
|
650
|
+
}
|
|
651
|
+
}
|
|
652
|
+
```
|
|
653
|
+
|
|
654
|
+
`react` above is required (no meta entry), so consumers must install it. `sass` and `tsx` are optional; the package works without them but can use them if present.
|
|
655
|
+
|
|
611
656
|
## common mistakes
|
|
612
657
|
|
|
613
658
|
- if you need to use zod always use latest version
|
|
@@ -35,6 +35,8 @@ Reference examples for real-world usage:
|
|
|
35
35
|
|
|
36
36
|
Always import and use `Link` from `spiceflow/react` for navigational links in Spiceflow apps. Do not render raw `<a>` elements for links. `Link` enables client-side navigation while preserving normal anchor behavior for external URLs, hashes, `target`, `rel`, styling, and event handlers. `Link` supports external URLs too, so it is fine to use for ambiguous or user-provided links when you do not know ahead of time whether they are internal or external.
|
|
37
37
|
|
|
38
|
+
**`Link` auto-prepends the Vite `base` path.** Never manually prepend the base path to `Link` href values. `<Link href="/dashboard" />` automatically renders as `<a href="/my-app/dashboard">` when the Vite base is `/my-app/`. Manually prepending causes double-prefixing. This only applies to `Link`; raw `fetch()` calls, `Response.redirect()`, and other non-Link URL construction still need manual base path handling.
|
|
39
|
+
|
|
38
40
|
## OpenTelemetry instrumentation
|
|
39
41
|
|
|
40
42
|
Spiceflow supports automatic route instrumentation when you pass an OpenTelemetry-compatible tracer to the constructor:
|
package/skills/tuistory/SKILL.md
CHANGED
|
@@ -1,23 +1,21 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: tuistory
|
|
3
3
|
description: |
|
|
4
|
-
|
|
4
|
+
tmux for AI agents. Run dev servers and TUIs in named background sessions that agents can read, wait on, snapshot, and type into. Replaces tmux with reactive waiting instead of blind `sleep`. Projects wrap their dev script with tuistory (`"dev": "tuistory -- next dev"`) so agents get a background session and humans get auto-attached.
|
|
5
5
|
|
|
6
|
-
Use tuistory
|
|
7
|
-
- Run background
|
|
8
|
-
- Control interactive CLIs and TUIs
|
|
9
|
-
- Write Playwright-style tests for terminal apps
|
|
6
|
+
Use tuistory when you need to:
|
|
7
|
+
- Run background dev servers or long-lived processes
|
|
8
|
+
- Control interactive CLIs and TUIs (type, press keys, click, wait, snapshot)
|
|
9
|
+
- Write Playwright-style tests for terminal apps
|
|
10
10
|
|
|
11
|
-
|
|
12
|
-
- **CLI** (`tuistory`) for persistent background sessions and terminal automation. **Run `tuistory --help` first.**
|
|
13
|
-
- **JS/TS API** (`launchTerminal`) for writing tests (like playwright for TUIs) and programmatic control in scripts.
|
|
11
|
+
**CLI** (`tuistory`) for background sessions. **JS/TS API** (`launchTerminal`) for programmatic control and tests.
|
|
14
12
|
---
|
|
15
13
|
|
|
16
14
|
# tuistory
|
|
17
15
|
|
|
18
|
-
|
|
16
|
+
Run dev servers and terminal commands that AI agents can read, wait on, and type into. Wrap any command in a named background session. Humans get auto-attached; agents get a session they can inspect with `read`, `snapshot`, `wait`, and `type`.
|
|
19
17
|
|
|
20
|
-
Prefer tuistory over `tmux` for agent automation. It
|
|
18
|
+
Prefer tuistory over `tmux` for agent automation. It reacts to terminal output with `wait` and `wait-idle` instead of wasting time on blind `sleep` calls. That makes scripts both faster and more reliable.
|
|
21
19
|
|
|
22
20
|
Every time you use tuistory, you MUST run these two commands first. NEVER pipe to head/tail, read the full output:
|
|
23
21
|
|
|
@@ -29,14 +27,45 @@ tuistory --help
|
|
|
29
27
|
curl -s https://raw.githubusercontent.com/remorses/tuistory/refs/heads/main/README.md
|
|
30
28
|
```
|
|
31
29
|
|
|
30
|
+
## Dev script pattern
|
|
31
|
+
|
|
32
|
+
When a project has `"dev": "tuistory -- next dev"` in package.json, running `pnpm dev` gives agents a background session. The session name is auto-derived from `<cwd-basename>-<command>` in kebab-case. If the session is already running, agents get a message like:
|
|
33
|
+
|
|
34
|
+
```
|
|
35
|
+
Session "myapp-next-dev" already running
|
|
36
|
+
with command: `next dev`
|
|
37
|
+
read output with: `tuistory read -s x --all`
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
Agents can then use `tuistory read -s x`, `tuistory -s x wait "ready"`, etc. to inspect the running process.
|
|
41
|
+
|
|
42
|
+
## Dev script convention for projects
|
|
43
|
+
|
|
44
|
+
Projects should wrap their dev server command with `tuistory --` in `package.json` scripts. This ensures agents never hang on an interactive long-lived process, and dev servers started by humans are automatically shared with agents.
|
|
45
|
+
|
|
46
|
+
```json
|
|
47
|
+
{
|
|
48
|
+
"scripts": {
|
|
49
|
+
"dev": "tuistory -- next dev",
|
|
50
|
+
"dev:api": "tuistory -- node api/server.js"
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
When a human runs `pnpm dev`, they get auto-attached to the terminal (same experience as running the command directly). When an agent runs `pnpm dev`, the process launches in the background and the command returns immediately. If the session is already running, both humans and agents reuse it instead of fighting over ports.
|
|
56
|
+
|
|
57
|
+
**Agents MUST never stop or close a session started by another user or agent.** Dev server sessions are shared resources. Only close them when the user explicitly asks to stop the dev server. Default to leaving sessions running. Use `read`, `wait`, and `snapshot` to inspect them without disrupting them.
|
|
58
|
+
|
|
32
59
|
## Key rules
|
|
33
60
|
|
|
61
|
+
- **Options before `--`, command after.** Everything after `--` is passed verbatim to the child process. `tuistory -s myserver --cols 150 -- node server.js` is correct. `tuistory -- node server.js -s myserver` is wrong.
|
|
62
|
+
- Session names are auto-derived from `<cwd-basename>-<command>`. You usually don't need `-s` when launching.
|
|
34
63
|
- Always run `snapshot --trim` after every CLI action to see the current terminal state
|
|
35
64
|
- Always set a timeout on `waitForText` for async operations
|
|
36
65
|
- String patterns are case-sensitive by default. Use regex like `/ready/i` when casing may vary.
|
|
37
66
|
- Use `trimEnd: true` in `session.text()` to avoid trailing whitespace in snapshots
|
|
38
67
|
- Close sessions in test teardown to avoid leaked processes
|
|
39
|
-
- Use `--cols` and `--rows` to control terminal size
|
|
68
|
+
- Use `--cols` and `--rows` to control terminal size, they affect TUI layout
|
|
40
69
|
- Use `--pixel-ratio 2` for sharp screenshot images
|
|
41
70
|
|
|
42
71
|
## Feedback loop
|
|
@@ -46,21 +75,21 @@ Use an **observe → act → observe** loop, like Playwright but for terminals.
|
|
|
46
75
|
### Background process instead of tmux
|
|
47
76
|
|
|
48
77
|
```bash
|
|
49
|
-
# start a server in the background
|
|
50
|
-
tuistory
|
|
78
|
+
# start a server in the background (session name auto-derived)
|
|
79
|
+
tuistory -- bun run dev
|
|
51
80
|
|
|
52
81
|
# wait for actual output instead of sleep 5
|
|
53
82
|
# use regex so this still matches Ready, READY, etc.
|
|
54
|
-
tuistory -s
|
|
83
|
+
tuistory -s x wait "/ready/i" --timeout 30000
|
|
55
84
|
|
|
56
85
|
# read everything the process printed
|
|
57
|
-
tuistory read -s
|
|
86
|
+
tuistory read -s x
|
|
58
87
|
|
|
59
88
|
# later, read only the new output
|
|
60
|
-
tuistory read -s
|
|
89
|
+
tuistory read -s x
|
|
61
90
|
|
|
62
91
|
# restart the server (sends Ctrl+C, waits, relaunches same command/cwd/env)
|
|
63
|
-
tuistory -s
|
|
92
|
+
tuistory -s x restart
|
|
64
93
|
```
|
|
65
94
|
|
|
66
95
|
Why this is better than `tmux`:
|