loki-mode 5.13.1 → 5.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/VERSION CHANGED
@@ -1 +1 @@
1
- 5.13.1
1
+ 5.14.0
package/autonomy/loki CHANGED
@@ -137,6 +137,7 @@ show_help() {
137
137
  echo " api [cmd] HTTP API server (start|stop|status)"
138
138
  echo " sandbox [cmd] Docker sandbox (start|stop|status|logs|shell|build)"
139
139
  echo " notify [cmd] Send notifications (test|slack|discord|webhook|status)"
140
+ echo " voice [cmd] Voice input for PRD creation (status|listen|dictate|speak|start)"
140
141
  echo " import Import GitHub issues as tasks"
141
142
  echo " config [cmd] Manage configuration (show|init|edit|path)"
142
143
  echo " memory [cmd] Cross-project learnings (list|show|search|stats)"
@@ -2486,6 +2487,9 @@ main() {
2486
2487
  enterprise)
2487
2488
  cmd_enterprise "$@"
2488
2489
  ;;
2490
+ voice)
2491
+ cmd_voice "$@"
2492
+ ;;
2489
2493
  version|--version|-v)
2490
2494
  cmd_version
2491
2495
  ;;
@@ -3365,6 +3369,99 @@ for check, passed in checks.items():
3365
3369
  esac
3366
3370
  }
3367
3371
 
3372
+ # Voice input commands
3373
+ cmd_voice() {
3374
+ local subcommand="${1:-status}"
3375
+ local VOICE_SCRIPT="$SKILL_DIR/autonomy/voice.sh"
3376
+
3377
+ # Check fallback locations for voice script
3378
+ if [ ! -f "$VOICE_SCRIPT" ]; then
3379
+ # Try relative to loki CLI location
3380
+ local loki_dir
3381
+ loki_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
3382
+ VOICE_SCRIPT="$loki_dir/voice.sh"
3383
+ fi
3384
+
3385
+ if [ ! -f "$VOICE_SCRIPT" ]; then
3386
+ echo -e "${RED}Error: Voice module not found${NC}"
3387
+ echo "Expected at: $SKILL_DIR/autonomy/voice.sh"
3388
+ echo ""
3389
+ echo "Voice input requires the voice.sh module."
3390
+ echo "This feature may not be available in all installations."
3391
+ exit 1
3392
+ fi
3393
+
3394
+ case "$subcommand" in
3395
+ status)
3396
+ "$VOICE_SCRIPT" status
3397
+ ;;
3398
+ listen)
3399
+ echo -e "${BOLD}Starting voice input...${NC}"
3400
+ local text
3401
+ text=$("$VOICE_SCRIPT" listen)
3402
+ if [ -n "$text" ]; then
3403
+ echo ""
3404
+ echo -e "${GREEN}Transcribed text:${NC}"
3405
+ echo "$text"
3406
+ fi
3407
+ ;;
3408
+ dictate)
3409
+ local output="${2:-prd-voice.md}"
3410
+ echo -e "${BOLD}Starting guided PRD dictation...${NC}"
3411
+ echo ""
3412
+ "$VOICE_SCRIPT" dictate "$output"
3413
+ if [ -f "$output" ]; then
3414
+ echo ""
3415
+ echo -e "${GREEN}PRD created: $output${NC}"
3416
+ echo ""
3417
+ echo "Start Loki Mode with:"
3418
+ echo " loki start $output"
3419
+ fi
3420
+ ;;
3421
+ speak)
3422
+ shift
3423
+ if [ $# -eq 0 ]; then
3424
+ echo -e "${RED}Usage: loki voice speak MESSAGE${NC}"
3425
+ exit 1
3426
+ fi
3427
+ "$VOICE_SCRIPT" speak "$*"
3428
+ ;;
3429
+ start)
3430
+ # Dictate PRD and start Loki Mode
3431
+ local prd_file="${2:-prd-voice-$(date +%Y%m%d%H%M%S).md}"
3432
+ echo -e "${BOLD}Voice-activated PRD creation...${NC}"
3433
+ "$VOICE_SCRIPT" dictate "$prd_file"
3434
+ if [ -f "$prd_file" ]; then
3435
+ echo ""
3436
+ echo -e "${GREEN}PRD created. Starting Loki Mode...${NC}"
3437
+ cmd_start "$prd_file"
3438
+ fi
3439
+ ;;
3440
+ --help|-h|help)
3441
+ echo -e "${BOLD}loki voice${NC} - Voice input for PRD creation"
3442
+ echo ""
3443
+ echo "Usage: loki voice <command> [options]"
3444
+ echo ""
3445
+ echo "Commands:"
3446
+ echo " status Check voice input capabilities"
3447
+ echo " listen Listen and transcribe voice input"
3448
+ echo " dictate [FILE] Guided PRD dictation (default: prd-voice.md)"
3449
+ echo " speak MESSAGE Text-to-speech output"
3450
+ echo " start [FILE] Dictate PRD and start Loki Mode immediately"
3451
+ echo ""
3452
+ echo "Requirements:"
3453
+ echo " macOS: Enable Dictation in System Settings > Keyboard"
3454
+ echo " Or: Set OPENAI_API_KEY for Whisper API transcription"
3455
+ echo " Or: pip install openai-whisper for local transcription"
3456
+ ;;
3457
+ *)
3458
+ echo -e "${RED}Unknown voice command: $subcommand${NC}"
3459
+ echo "Run 'loki voice help' for usage."
3460
+ exit 1
3461
+ ;;
3462
+ esac
3463
+ }
3464
+
3368
3465
  # Enterprise features (optional - requires env vars)
3369
3466
  cmd_enterprise() {
3370
3467
  local subcommand="${1:-status}"
@@ -0,0 +1,487 @@
1
+ #!/usr/bin/env bash
2
+ # Loki Mode Voice Input Support (v1.0.0)
3
+ # Enables voice-to-text for PRD dictation and command input
4
+ #
5
+ # Usage:
6
+ # ./autonomy/voice.sh listen - Listen for voice input
7
+ # ./autonomy/voice.sh speak MESSAGE - Text-to-speech output
8
+ # ./autonomy/voice.sh dictate FILE - Dictate to file
9
+ # ./autonomy/voice.sh status - Check voice capabilities
10
+ #
11
+ # Requires: macOS with Dictation enabled, or Whisper API
12
+
13
+ set -euo pipefail
14
+
15
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
16
+ LOKI_DIR="${LOKI_DIR:-.loki}"
17
+
18
+ # Colors (only if terminal supports them)
19
+ if [[ -t 1 ]]; then
20
+ RED='\033[0;31m'
21
+ GREEN='\033[0;32m'
22
+ YELLOW='\033[0;33m'
23
+ BLUE='\033[0;34m'
24
+ NC='\033[0m'
25
+ else
26
+ RED='' GREEN='' YELLOW='' BLUE='' NC=''
27
+ fi
28
+
29
+ log() { echo -e "${BLUE}[loki-voice]${NC} $*"; }
30
+ log_success() { echo -e "${GREEN}[loki-voice]${NC} $*"; }
31
+ log_warn() { echo -e "${YELLOW}[loki-voice]${NC} $*"; }
32
+ log_error() { echo -e "${RED}[loki-voice]${NC} $*" >&2; }
33
+
34
+ # Detect platform and available voice tools
35
+ detect_platform() {
36
+ if [[ "$OSTYPE" == "darwin"* ]]; then
37
+ echo "macos"
38
+ elif [[ "$OSTYPE" == "linux-gnu"* ]]; then
39
+ echo "linux"
40
+ elif [[ "$OSTYPE" == "msys" ]] || [[ "$OSTYPE" == "cygwin" ]]; then
41
+ echo "windows"
42
+ else
43
+ echo "unknown"
44
+ fi
45
+ }
46
+
47
+ # Check if voice input is available
48
+ check_voice_input() {
49
+ local platform
50
+ platform=$(detect_platform)
51
+
52
+ case "$platform" in
53
+ macos)
54
+ # Check if dictation is enabled
55
+ if defaults read com.apple.speech.recognition.AppleSpeechRecognition.prefs DictationIMMEnabled 2>/dev/null | grep -q "1"; then
56
+ echo "macos-dictation"
57
+ elif command -v whisper &>/dev/null; then
58
+ echo "whisper"
59
+ elif [[ -n "${OPENAI_API_KEY:-}" ]]; then
60
+ echo "whisper-api"
61
+ else
62
+ echo "none"
63
+ fi
64
+ ;;
65
+ linux)
66
+ if command -v whisper &>/dev/null; then
67
+ echo "whisper"
68
+ elif [[ -n "${OPENAI_API_KEY:-}" ]]; then
69
+ echo "whisper-api"
70
+ elif command -v arecord &>/dev/null && command -v vosk &>/dev/null; then
71
+ echo "vosk"
72
+ else
73
+ echo "none"
74
+ fi
75
+ ;;
76
+ *)
77
+ echo "none"
78
+ ;;
79
+ esac
80
+ }
81
+
82
+ # Check if text-to-speech is available
83
+ check_voice_output() {
84
+ local platform
85
+ platform=$(detect_platform)
86
+
87
+ case "$platform" in
88
+ macos)
89
+ if command -v say &>/dev/null; then
90
+ echo "say"
91
+ else
92
+ echo "none"
93
+ fi
94
+ ;;
95
+ linux)
96
+ if command -v espeak &>/dev/null; then
97
+ echo "espeak"
98
+ elif command -v festival &>/dev/null; then
99
+ echo "festival"
100
+ else
101
+ echo "none"
102
+ fi
103
+ ;;
104
+ *)
105
+ echo "none"
106
+ ;;
107
+ esac
108
+ }
109
+
110
+ # Text-to-speech output
111
+ speak() {
112
+ local message="$1"
113
+ local output_method
114
+ output_method=$(check_voice_output)
115
+
116
+ case "$output_method" in
117
+ say)
118
+ say -v "Samantha" "$message" 2>/dev/null || say "$message"
119
+ ;;
120
+ espeak)
121
+ espeak "$message"
122
+ ;;
123
+ festival)
124
+ echo "$message" | festival --tts
125
+ ;;
126
+ none)
127
+ log_warn "No text-to-speech available, printing instead"
128
+ echo "$message"
129
+ ;;
130
+ esac
131
+ }
132
+
133
+ # Temp file cleanup
134
+ declare -a TEMP_FILES
135
+ TEMP_FILES=()
136
+ cleanup_temp_files() {
137
+ if [[ ${#TEMP_FILES[@]} -gt 0 ]]; then
138
+ for f in "${TEMP_FILES[@]}"; do
139
+ rm -f "$f" 2>/dev/null
140
+ done
141
+ fi
142
+ }
143
+ trap cleanup_temp_files EXIT
144
+
145
+ # Create secure temp file
146
+ make_temp_file() {
147
+ local suffix="${1:-.tmp}"
148
+ local temp_file
149
+ temp_file=$(mktemp "/tmp/loki-voice-XXXXXX$suffix")
150
+ TEMP_FILES+=("$temp_file")
151
+ echo "$temp_file"
152
+ }
153
+
154
+ # Record audio using macOS
155
+ record_audio_macos() {
156
+ local output_file="$1"
157
+ local duration="${2:-10}"
158
+
159
+ log "Recording for ${duration} seconds... Press Ctrl+C to stop early"
160
+
161
+ # Use sox or ffmpeg
162
+ if command -v sox &>/dev/null; then
163
+ sox -d -r 16000 -c 1 -b 16 "$output_file" trim 0 "$duration" 2>/dev/null
164
+ elif command -v ffmpeg &>/dev/null; then
165
+ ffmpeg -f avfoundation -i ":0" -t "$duration" -ar 16000 -ac 1 "$output_file" -y 2>/dev/null
166
+ else
167
+ log_error "No audio recording tool found. Install sox: brew install sox"
168
+ return 1
169
+ fi
170
+ }
171
+
172
+ # Record audio using Linux
173
+ record_audio_linux() {
174
+ local output_file="$1"
175
+ local duration="${2:-10}"
176
+
177
+ log "Recording for ${duration} seconds... Press Ctrl+C to stop early"
178
+
179
+ # Use sox, arecord, or ffmpeg
180
+ if command -v sox &>/dev/null; then
181
+ sox -d -r 16000 -c 1 -b 16 "$output_file" trim 0 "$duration" 2>/dev/null
182
+ elif command -v arecord &>/dev/null; then
183
+ arecord -f S16_LE -r 16000 -c 1 -d "$duration" "$output_file" 2>/dev/null
184
+ elif command -v ffmpeg &>/dev/null; then
185
+ ffmpeg -f alsa -i default -t "$duration" -ar 16000 -ac 1 "$output_file" -y 2>/dev/null
186
+ else
187
+ log_error "No audio recording tool found. Install: apt install sox alsa-utils"
188
+ return 1
189
+ fi
190
+ }
191
+
192
+ # Record audio (platform-aware)
193
+ record_audio() {
194
+ local platform
195
+ platform=$(detect_platform)
196
+
197
+ case "$platform" in
198
+ macos)
199
+ record_audio_macos "$@"
200
+ ;;
201
+ linux)
202
+ record_audio_linux "$@"
203
+ ;;
204
+ *)
205
+ log_error "Audio recording not supported on $platform"
206
+ return 1
207
+ ;;
208
+ esac
209
+ }
210
+
211
+ # Transcribe audio using Whisper API
212
+ transcribe_whisper_api() {
213
+ local audio_file="$1"
214
+
215
+ if [[ -z "${OPENAI_API_KEY:-}" ]]; then
216
+ log_error "OPENAI_API_KEY not set"
217
+ return 1
218
+ fi
219
+
220
+ local response
221
+ response=$(curl -s -X POST "https://api.openai.com/v1/audio/transcriptions" \
222
+ -H "Authorization: Bearer $OPENAI_API_KEY" \
223
+ -F "file=@$audio_file" \
224
+ -F "model=whisper-1" \
225
+ -F "language=en")
226
+
227
+ echo "$response" | python3 -c "import sys,json; print(json.load(sys.stdin).get('text', ''))"
228
+ }
229
+
230
+ # Transcribe audio using local Whisper
231
+ transcribe_whisper_local() {
232
+ local audio_file="$1"
233
+
234
+ if ! command -v whisper &>/dev/null; then
235
+ log_error "Whisper not installed. Run: pip install openai-whisper"
236
+ return 1
237
+ fi
238
+
239
+ whisper "$audio_file" --model base --language en --output_format txt 2>/dev/null
240
+ local txt_file="${audio_file%.wav}.txt"
241
+ if [[ -f "$txt_file" ]]; then
242
+ cat "$txt_file"
243
+ rm -f "$txt_file"
244
+ fi
245
+ }
246
+
247
+ # Listen for voice input
248
+ listen() {
249
+ local input_method
250
+ input_method=$(check_voice_input)
251
+
252
+ log "Voice input method: $input_method"
253
+
254
+ case "$input_method" in
255
+ macos-dictation)
256
+ log "Starting macOS Dictation..."
257
+ speak "Starting dictation. Press twice on Function key to begin, then speak your PRD."
258
+
259
+ # Open a dialog for dictation
260
+ osascript <<'EOF'
261
+ tell application "System Events"
262
+ display dialog "Click OK then press Fn twice to start dictation" buttons {"Cancel", "OK"} default button "OK"
263
+ end tell
264
+ EOF
265
+ # Wait for user to dictate
266
+ log "Waiting for dictation input..."
267
+ log "Press Fn twice to toggle dictation on/off"
268
+
269
+ # Use a temporary file approach
270
+ local temp_file
271
+ temp_file=$(make_temp_file .txt)
272
+ osascript <<EOF
273
+ tell application "System Events"
274
+ set userInput to text returned of (display dialog "Dictate or type your PRD:" default answer "" buttons {"Cancel", "OK"} default button "OK" with title "Loki Mode Voice Input")
275
+ do shell script "echo " & quoted form of userInput & " > '$temp_file'"
276
+ end tell
277
+ EOF
278
+ if [[ -f "$temp_file" ]]; then
279
+ cat "$temp_file"
280
+ rm -f "$temp_file"
281
+ fi
282
+ ;;
283
+
284
+ whisper-api)
285
+ log "Using Whisper API for transcription"
286
+ local audio_file
287
+ audio_file=$(make_temp_file .wav)
288
+
289
+ speak "Recording will start now. Speak your requirements."
290
+ record_audio "$audio_file" 30
291
+
292
+ log "Transcribing..."
293
+ transcribe_whisper_api "$audio_file"
294
+ ;;
295
+
296
+ whisper)
297
+ log "Using local Whisper for transcription"
298
+ local audio_file
299
+ audio_file=$(make_temp_file .wav)
300
+
301
+ speak "Recording will start now. Speak your requirements."
302
+ record_audio "$audio_file" 30
303
+
304
+ log "Transcribing locally..."
305
+ transcribe_whisper_local "$audio_file"
306
+ ;;
307
+
308
+ none)
309
+ log_error "No voice input method available"
310
+ log "Options:"
311
+ log " 1. Enable macOS Dictation: System Settings > Keyboard > Dictation"
312
+ log " 2. Set OPENAI_API_KEY for Whisper API"
313
+ log " 3. Install local Whisper: pip install openai-whisper"
314
+ return 1
315
+ ;;
316
+ esac
317
+ }
318
+
319
+ # Dictate to a file
320
+ dictate_to_file() {
321
+ local output_file="$1"
322
+
323
+ log "Dictating to: $output_file"
324
+ speak "Ready to create a PRD. I'll guide you through the sections."
325
+
326
+ local content=""
327
+
328
+ # Guide through PRD sections
329
+ speak "First, what is the name of your project?"
330
+ local project_name
331
+ project_name=$(listen)
332
+ content="# $project_name\n\n"
333
+
334
+ speak "Great. Now describe the overview of your project."
335
+ local overview
336
+ overview=$(listen)
337
+ content+="## Overview\n$overview\n\n"
338
+
339
+ speak "Now list your requirements. Say done when finished."
340
+ content+="## Requirements\n"
341
+
342
+ while true; do
343
+ local requirement
344
+ requirement=$(listen)
345
+
346
+ # Use tr for bash 3.2 compatibility (macOS default)
347
+ local requirement_lower
348
+ requirement_lower=$(printf '%s' "$requirement" | tr '[:upper:]' '[:lower:]')
349
+ if [[ "$requirement_lower" == *"done"* ]] || [[ "$requirement_lower" == *"finish"* ]]; then
350
+ break
351
+ fi
352
+
353
+ content+="- [ ] $requirement\n"
354
+ speak "Got it. Next requirement, or say done."
355
+ done
356
+
357
+ speak "What tech stack do you want to use?"
358
+ local tech_stack
359
+ tech_stack=$(listen)
360
+ content+="\n## Tech Stack\n$tech_stack\n"
361
+
362
+ # Write to file
363
+ echo -e "$content" > "$output_file"
364
+
365
+ speak "PRD created at $output_file"
366
+ log_success "PRD saved to: $output_file"
367
+ echo "$output_file"
368
+ }
369
+
370
+ # Show voice capabilities status
371
+ status() {
372
+ local platform
373
+ platform=$(detect_platform)
374
+
375
+ echo "=== Loki Mode Voice Status ==="
376
+ echo ""
377
+ echo "Platform: $platform"
378
+ echo ""
379
+
380
+ echo "Voice Input:"
381
+ local input_method
382
+ input_method=$(check_voice_input)
383
+ case "$input_method" in
384
+ macos-dictation)
385
+ echo " [OK] macOS Dictation enabled"
386
+ ;;
387
+ whisper-api)
388
+ echo " [OK] Whisper API available (OPENAI_API_KEY set)"
389
+ ;;
390
+ whisper)
391
+ echo " [OK] Local Whisper installed"
392
+ ;;
393
+ vosk)
394
+ echo " [OK] Vosk speech recognition available"
395
+ ;;
396
+ none)
397
+ echo " [--] No voice input available"
398
+ echo " Recommendations:"
399
+ echo " - macOS: Enable Dictation in System Settings > Keyboard"
400
+ echo " - Set OPENAI_API_KEY for Whisper API"
401
+ echo " - pip install openai-whisper for local transcription"
402
+ ;;
403
+ esac
404
+ echo ""
405
+
406
+ echo "Voice Output (TTS):"
407
+ local output_method
408
+ output_method=$(check_voice_output)
409
+ case "$output_method" in
410
+ say)
411
+ echo " [OK] macOS 'say' command available"
412
+ ;;
413
+ espeak)
414
+ echo " [OK] eSpeak available"
415
+ ;;
416
+ festival)
417
+ echo " [OK] Festival TTS available"
418
+ ;;
419
+ none)
420
+ echo " [--] No TTS available"
421
+ ;;
422
+ esac
423
+ echo ""
424
+
425
+ echo "Audio Recording:"
426
+ if command -v sox &>/dev/null; then
427
+ echo " [OK] sox installed"
428
+ elif command -v ffmpeg &>/dev/null; then
429
+ echo " [OK] ffmpeg installed (fallback)"
430
+ else
431
+ echo " [--] No recording tool (install sox: brew install sox)"
432
+ fi
433
+ }
434
+
435
+ # CLI entry point
436
+ main() {
437
+ local command="${1:-help}"
438
+ shift || true
439
+
440
+ case "$command" in
441
+ listen)
442
+ listen
443
+ ;;
444
+ speak)
445
+ if [[ $# -eq 0 ]]; then
446
+ log_error "Usage: voice.sh speak MESSAGE"
447
+ exit 1
448
+ fi
449
+ speak "$*"
450
+ ;;
451
+ dictate)
452
+ local output="${1:-prd-voice.md}"
453
+ dictate_to_file "$output"
454
+ ;;
455
+ status)
456
+ status
457
+ ;;
458
+ help|--help|-h)
459
+ echo "Loki Mode Voice Input"
460
+ echo ""
461
+ echo "Usage: voice.sh <command> [options]"
462
+ echo ""
463
+ echo "Commands:"
464
+ echo " listen Listen for voice input and return text"
465
+ echo " speak MESSAGE Text-to-speech output"
466
+ echo " dictate [FILE] Guided PRD dictation (default: prd-voice.md)"
467
+ echo " status Show voice capabilities"
468
+ echo ""
469
+ echo "Environment:"
470
+ echo " OPENAI_API_KEY Required for Whisper API transcription"
471
+ echo ""
472
+ echo "Setup:"
473
+ echo " macOS: Enable Dictation in System Settings > Keyboard"
474
+ echo " Linux: Install sox and whisper: apt install sox && pip install openai-whisper"
475
+ ;;
476
+ *)
477
+ log_error "Unknown command: $command"
478
+ echo "Run 'voice.sh help' for usage"
479
+ exit 1
480
+ ;;
481
+ esac
482
+ }
483
+
484
+ # Run if executed directly
485
+ if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
486
+ main "$@"
487
+ fi
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "loki-mode",
3
- "version": "5.13.1",
3
+ "version": "5.14.0",
4
4
  "description": "Multi-agent autonomous startup system for Claude Code, Codex CLI, and Gemini CLI",
5
5
  "keywords": [
6
6
  "claude",