loki-mode 5.13.1 → 5.14.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/SKILL.md CHANGED
@@ -3,7 +3,7 @@ name: loki-mode
3
3
  description: Multi-agent autonomous startup system. Triggers on "Loki Mode". Takes PRD to deployed product with zero human intervention. Requires --dangerously-skip-permissions flag.
4
4
  ---
5
5
 
6
- # Loki Mode v5.9.0
6
+ # Loki Mode v5.14.1
7
7
 
8
8
  **You are an autonomous agent. You make decisions. You do not ask questions. You do not stop.**
9
9
 
@@ -253,4 +253,4 @@ Auto-detected or force with `LOKI_COMPLEXITY`:
253
253
 
254
254
  ---
255
255
 
256
- **v5.9.0 | Cross-Project Learning, VS Code Chat and Logs views | ~250 lines core**
256
+ **v5.14.1 | Voice Input, Peer Review Fixes, macOS Compatibility | ~250 lines core**
package/VERSION CHANGED
@@ -1 +1 @@
1
- 5.13.1
1
+ 5.14.1
@@ -1,10 +1,11 @@
1
1
  #!/usr/bin/env node
2
2
  /**
3
- * Loki Mode HTTP API Server (v1.1.0)
3
+ * Loki Mode HTTP API Server (v1.2.0)
4
4
  * Zero npm dependencies - uses only Node.js built-ins
5
5
  *
6
6
  * Usage:
7
7
  * node autonomy/api-server.js [--port 9898]
8
+ * LOKI_API_PORT=9898 node autonomy/api-server.js
8
9
  * loki api start
9
10
  *
10
11
  * Endpoints:
@@ -28,8 +29,48 @@ const fs = require('fs');
28
29
  const path = require('path');
29
30
  const { spawn, execSync } = require('child_process');
30
31
 
32
+ // Validate port number is in valid range
33
+ function isValidPort(port) {
34
+ return Number.isInteger(port) && port >= 1 && port <= 65535;
35
+ }
36
+
37
+ // Parse command line arguments
38
+ function parseArgs() {
39
+ const args = process.argv.slice(2);
40
+ let port = null;
41
+
42
+ for (let i = 0; i < args.length; i++) {
43
+ if (args[i] === '--port' || args[i] === '-p') {
44
+ // Ensure next argument exists and is a number
45
+ if (i + 1 < args.length) {
46
+ const val = parseInt(args[i + 1], 10);
47
+ if (isValidPort(val)) {
48
+ port = val;
49
+ }
50
+ i++; // Skip the value
51
+ }
52
+ } else if (args[i].startsWith('--port=')) {
53
+ const val = parseInt(args[i].split('=')[1], 10);
54
+ if (isValidPort(val)) {
55
+ port = val;
56
+ }
57
+ } else if (/^\d+$/.test(args[i])) {
58
+ // Bare number as port (backwards compatible)
59
+ const val = parseInt(args[i], 10);
60
+ if (isValidPort(val)) {
61
+ port = val;
62
+ }
63
+ }
64
+ }
65
+
66
+ return { port };
67
+ }
68
+
69
+ const cliArgs = parseArgs();
70
+
31
71
  // Configuration
32
- const PORT = parseInt(process.env.LOKI_API_PORT || process.argv[3] || '9898');
72
+ const PORT = cliArgs.port || parseInt(process.env.LOKI_API_PORT || '9898');
73
+ const MAX_BODY_SIZE = parseInt(process.env.LOKI_API_MAX_BODY || '1048576'); // 1MB default
33
74
  const LOKI_DIR = process.env.LOKI_DIR || path.join(process.cwd(), '.loki');
34
75
  const STATE_DIR = path.join(LOKI_DIR, 'state');
35
76
  const LOG_DIR = path.join(LOKI_DIR, 'logs');
@@ -163,11 +204,22 @@ function broadcast(event, data) {
163
204
  }
164
205
  }
165
206
 
166
- // Parse JSON body
207
+ // Parse JSON body with size limit
167
208
  function parseBody(req) {
168
- return new Promise((resolve) => {
209
+ return new Promise((resolve, reject) => {
169
210
  let body = '';
170
- req.on('data', chunk => body += chunk);
211
+ let size = 0;
212
+
213
+ req.on('data', chunk => {
214
+ size += chunk.length;
215
+ if (size > MAX_BODY_SIZE) {
216
+ req.destroy();
217
+ reject(new Error('Request body too large'));
218
+ return;
219
+ }
220
+ body += chunk;
221
+ });
222
+
171
223
  req.on('end', () => {
172
224
  try {
173
225
  resolve(body ? JSON.parse(body) : {});
@@ -175,6 +227,8 @@ function parseBody(req) {
175
227
  resolve({});
176
228
  }
177
229
  });
230
+
231
+ req.on('error', () => resolve({}));
178
232
  });
179
233
  }
180
234
 
@@ -186,8 +240,8 @@ async function handleRequest(req, res) {
186
240
 
187
241
  // CORS headers
188
242
  res.setHeader('Access-Control-Allow-Origin', '*');
189
- res.setHeader('Access-Control-Allow-Methods', 'GET, POST, OPTIONS');
190
- res.setHeader('Access-Control-Allow-Headers', 'Content-Type');
243
+ res.setHeader('Access-Control-Allow-Methods', 'GET, POST, DELETE, OPTIONS');
244
+ res.setHeader('Access-Control-Allow-Headers', 'Content-Type, Authorization');
191
245
 
192
246
  if (method === 'OPTIONS') {
193
247
  res.writeHead(204);
@@ -232,15 +286,27 @@ async function handleRequest(req, res) {
232
286
  }
233
287
  }, 2000);
234
288
 
235
- req.on('close', () => {
289
+ // Clean up on close, error, or finish
290
+ const cleanup = () => {
236
291
  clearInterval(interval);
237
292
  sseClients.delete(res);
238
- });
293
+ };
294
+
295
+ req.on('close', cleanup);
296
+ req.on('error', cleanup);
297
+ res.on('error', cleanup);
298
+ res.on('finish', cleanup);
239
299
  return;
240
300
  }
241
301
 
242
302
  if (method === 'GET' && pathname === '/logs') {
243
- const lines = parseInt(url.searchParams.get('lines')) || 50;
303
+ let lines = parseInt(url.searchParams.get('lines'), 10);
304
+ // Validate lines: must be positive, default to 50, cap at 10000
305
+ if (!Number.isInteger(lines) || lines < 1) {
306
+ lines = 50;
307
+ } else if (lines > 10000) {
308
+ lines = 10000;
309
+ }
244
310
  const logFile = path.join(LOG_DIR, 'session.log');
245
311
 
246
312
  if (!fs.existsSync(logFile)) {
@@ -254,7 +320,12 @@ async function handleRequest(req, res) {
254
320
  }
255
321
 
256
322
  if (method === 'POST' && pathname === '/start') {
257
- const body = await parseBody(req);
323
+ let body;
324
+ try {
325
+ body = await parseBody(req);
326
+ } catch (err) {
327
+ return json({ error: err.message }, 413);
328
+ }
258
329
  const prd = body.prd || '';
259
330
  const provider = body.provider || 'claude';
260
331
  const parallel = body.parallel || false;
@@ -1,6 +1,6 @@
1
1
  #!/bin/bash
2
2
  #===============================================================================
3
- # Loki Mode - GitHub Issue Parser (v5.9.0)
3
+ # Loki Mode - GitHub Issue Parser (v5.14.0)
4
4
  # Parses GitHub issues and extracts structured data for PRD generation
5
5
  #
6
6
  # Usage:
package/autonomy/loki CHANGED
@@ -137,6 +137,7 @@ show_help() {
137
137
  echo " api [cmd] HTTP API server (start|stop|status)"
138
138
  echo " sandbox [cmd] Docker sandbox (start|stop|status|logs|shell|build)"
139
139
  echo " notify [cmd] Send notifications (test|slack|discord|webhook|status)"
140
+ echo " voice [cmd] Voice input for PRD creation (status|listen|dictate|speak|start)"
140
141
  echo " import Import GitHub issues as tasks"
141
142
  echo " config [cmd] Manage configuration (show|init|edit|path)"
142
143
  echo " memory [cmd] Cross-project learnings (list|show|search|stats)"
@@ -2486,6 +2487,9 @@ main() {
2486
2487
  enterprise)
2487
2488
  cmd_enterprise "$@"
2488
2489
  ;;
2490
+ voice)
2491
+ cmd_voice "$@"
2492
+ ;;
2489
2493
  version|--version|-v)
2490
2494
  cmd_version
2491
2495
  ;;
@@ -3365,6 +3369,99 @@ for check, passed in checks.items():
3365
3369
  esac
3366
3370
  }
3367
3371
 
3372
+ # Voice input commands
3373
+ cmd_voice() {
3374
+ local subcommand="${1:-status}"
3375
+ local VOICE_SCRIPT="$SKILL_DIR/autonomy/voice.sh"
3376
+
3377
+ # Check fallback locations for voice script
3378
+ if [ ! -f "$VOICE_SCRIPT" ]; then
3379
+ # Try relative to loki CLI location
3380
+ local loki_dir
3381
+ loki_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
3382
+ VOICE_SCRIPT="$loki_dir/voice.sh"
3383
+ fi
3384
+
3385
+ if [ ! -f "$VOICE_SCRIPT" ]; then
3386
+ echo -e "${RED}Error: Voice module not found${NC}"
3387
+ echo "Expected at: $SKILL_DIR/autonomy/voice.sh"
3388
+ echo ""
3389
+ echo "Voice input requires the voice.sh module."
3390
+ echo "This feature may not be available in all installations."
3391
+ exit 1
3392
+ fi
3393
+
3394
+ case "$subcommand" in
3395
+ status)
3396
+ "$VOICE_SCRIPT" status
3397
+ ;;
3398
+ listen)
3399
+ echo -e "${BOLD}Starting voice input...${NC}"
3400
+ local text
3401
+ text=$("$VOICE_SCRIPT" listen)
3402
+ if [ -n "$text" ]; then
3403
+ echo ""
3404
+ echo -e "${GREEN}Transcribed text:${NC}"
3405
+ echo "$text"
3406
+ fi
3407
+ ;;
3408
+ dictate)
3409
+ local output="${2:-prd-voice.md}"
3410
+ echo -e "${BOLD}Starting guided PRD dictation...${NC}"
3411
+ echo ""
3412
+ "$VOICE_SCRIPT" dictate "$output"
3413
+ if [ -f "$output" ]; then
3414
+ echo ""
3415
+ echo -e "${GREEN}PRD created: $output${NC}"
3416
+ echo ""
3417
+ echo "Start Loki Mode with:"
3418
+ echo " loki start $output"
3419
+ fi
3420
+ ;;
3421
+ speak)
3422
+ shift
3423
+ if [ $# -eq 0 ]; then
3424
+ echo -e "${RED}Usage: loki voice speak MESSAGE${NC}"
3425
+ exit 1
3426
+ fi
3427
+ "$VOICE_SCRIPT" speak "$*"
3428
+ ;;
3429
+ start)
3430
+ # Dictate PRD and start Loki Mode
3431
+ local prd_file="${2:-prd-voice-$(date +%Y%m%d%H%M%S).md}"
3432
+ echo -e "${BOLD}Voice-activated PRD creation...${NC}"
3433
+ "$VOICE_SCRIPT" dictate "$prd_file"
3434
+ if [ -f "$prd_file" ]; then
3435
+ echo ""
3436
+ echo -e "${GREEN}PRD created. Starting Loki Mode...${NC}"
3437
+ cmd_start "$prd_file"
3438
+ fi
3439
+ ;;
3440
+ --help|-h|help)
3441
+ echo -e "${BOLD}loki voice${NC} - Voice input for PRD creation"
3442
+ echo ""
3443
+ echo "Usage: loki voice <command> [options]"
3444
+ echo ""
3445
+ echo "Commands:"
3446
+ echo " status Check voice input capabilities"
3447
+ echo " listen Listen and transcribe voice input"
3448
+ echo " dictate [FILE] Guided PRD dictation (default: prd-voice.md)"
3449
+ echo " speak MESSAGE Text-to-speech output"
3450
+ echo " start [FILE] Dictate PRD and start Loki Mode immediately"
3451
+ echo ""
3452
+ echo "Requirements:"
3453
+ echo " macOS: Enable Dictation in System Settings > Keyboard"
3454
+ echo " Or: Set OPENAI_API_KEY for Whisper API transcription"
3455
+ echo " Or: pip install openai-whisper for local transcription"
3456
+ ;;
3457
+ *)
3458
+ echo -e "${RED}Unknown voice command: $subcommand${NC}"
3459
+ echo "Run 'loki voice help' for usage."
3460
+ exit 1
3461
+ ;;
3462
+ esac
3463
+ }
3464
+
3368
3465
  # Enterprise features (optional - requires env vars)
3369
3466
  cmd_enterprise() {
3370
3467
  local subcommand="${1:-status}"
package/autonomy/run.sh CHANGED
@@ -115,9 +115,30 @@
115
115
  # LOKI_PROMPT_INJECTION - Enable HUMAN_INPUT.md processing (default: false)
116
116
  # Set to "true" only in trusted environments
117
117
  #===============================================================================
118
+ #
119
+ # Compatibility: bash 3.2+ (macOS default), bash 4+ (Linux), WSL
120
+ # Parallel mode (--parallel) requires bash 4.0+ for associative arrays
121
+ #===============================================================================
118
122
 
119
123
  set -uo pipefail
120
124
 
125
+ # Compatibility check: Ensure we're running in bash (not sh, dash, zsh)
126
+ if [ -z "${BASH_VERSION:-}" ]; then
127
+ echo "[ERROR] This script requires bash. Please run with: bash $0" >&2
128
+ exit 1
129
+ fi
130
+
131
+ # Extract major version for feature checks
132
+ BASH_VERSION_MAJOR="${BASH_VERSION%%.*}"
133
+ BASH_VERSION_MINOR="${BASH_VERSION#*.}"
134
+ BASH_VERSION_MINOR="${BASH_VERSION_MINOR%%.*}"
135
+
136
+ # Warn if bash version is very old (< 3.2)
137
+ if [ "$BASH_VERSION_MAJOR" -lt 3 ] || { [ "$BASH_VERSION_MAJOR" -eq 3 ] && [ "$BASH_VERSION_MINOR" -lt 2 ]; }; then
138
+ echo "[WARN] Bash version $BASH_VERSION is old. Recommend bash 3.2+ for full compatibility." >&2
139
+ echo "[WARN] Some features may not work correctly." >&2
140
+ fi
141
+
121
142
  SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
122
143
  PROJECT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
123
144
 
@@ -519,8 +540,7 @@ else
519
540
  fi
520
541
 
521
542
  # Track worktree PIDs for cleanup (requires bash 4+ for associative arrays)
522
- # Check bash version for parallel mode compatibility
523
- BASH_VERSION_MAJOR="${BASH_VERSION%%.*}"
543
+ # BASH_VERSION_MAJOR is defined at script startup
524
544
  if [ "$BASH_VERSION_MAJOR" -ge 4 ] 2>/dev/null; then
525
545
  declare -A WORKTREE_PIDS
526
546
  declare -A WORKTREE_PATHS
@@ -1325,12 +1345,18 @@ notify_rate_limit() {
1325
1345
  # Parallel Workflow Functions (Git Worktrees)
1326
1346
  #===============================================================================
1327
1347
 
1328
- # Check if parallel mode is supported (bash 4+ required)
1348
+ # Check if parallel mode is supported (bash 4+ required for associative arrays)
1329
1349
  check_parallel_support() {
1330
1350
  if [ "$BASH_VERSION_MAJOR" -lt 4 ] 2>/dev/null; then
1331
- log_error "Parallel mode requires bash 4.0 or higher"
1332
- log_error "Current bash version: $BASH_VERSION"
1333
- log_error "On macOS, install newer bash: brew install bash"
1351
+ log_error "Parallel mode requires bash 4.0+ (current: $BASH_VERSION)"
1352
+ log_error "Parallel mode uses associative arrays which require bash 4+"
1353
+ log_error ""
1354
+ log_error "How to upgrade:"
1355
+ log_error " macOS: brew install bash && sudo chsh -s /opt/homebrew/bin/bash"
1356
+ log_error " Ubuntu: sudo apt install bash"
1357
+ log_error " WSL: Usually has bash 4+ by default"
1358
+ log_error ""
1359
+ log_error "Or run without --parallel flag for sequential mode (works with bash 3.2+)"
1334
1360
  return 1
1335
1361
  fi
1336
1362
  return 0
@@ -1754,7 +1780,7 @@ run_parallel_orchestrator() {
1754
1780
 
1755
1781
  cat > "$state_file" << EOF
1756
1782
  {
1757
- "timestamp": "$(date -Iseconds)",
1783
+ "timestamp": "$(date -u +%Y-%m-%dT%H:%M:%SZ)",
1758
1784
  "worktrees": {
1759
1785
  $(for stream in "${!WORKTREE_PATHS[@]}"; do
1760
1786
  local path="${WORKTREE_PATHS[$stream]}"
@@ -0,0 +1,502 @@
1
+ #!/usr/bin/env bash
2
+ # Loki Mode Voice Input Support (v1.0.0)
3
+ # Enables voice-to-text for PRD dictation and command input
4
+ #
5
+ # Usage:
6
+ # ./autonomy/voice.sh listen - Listen for voice input
7
+ # ./autonomy/voice.sh speak MESSAGE - Text-to-speech output
8
+ # ./autonomy/voice.sh dictate FILE - Dictate to file
9
+ # ./autonomy/voice.sh status - Check voice capabilities
10
+ #
11
+ # Requires: macOS with Dictation enabled, or Whisper API
12
+
13
+ set -euo pipefail
14
+
15
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
16
+ LOKI_DIR="${LOKI_DIR:-.loki}"
17
+
18
+ # Colors (only if terminal supports them)
19
+ if [[ -t 1 ]]; then
20
+ RED='\033[0;31m'
21
+ GREEN='\033[0;32m'
22
+ YELLOW='\033[0;33m'
23
+ BLUE='\033[0;34m'
24
+ NC='\033[0m'
25
+ else
26
+ RED='' GREEN='' YELLOW='' BLUE='' NC=''
27
+ fi
28
+
29
+ log() { echo -e "${BLUE}[loki-voice]${NC} $*"; }
30
+ log_success() { echo -e "${GREEN}[loki-voice]${NC} $*"; }
31
+ log_warn() { echo -e "${YELLOW}[loki-voice]${NC} $*"; }
32
+ log_error() { echo -e "${RED}[loki-voice]${NC} $*" >&2; }
33
+
34
+ # Detect platform and available voice tools
35
+ detect_platform() {
36
+ if [[ "$OSTYPE" == "darwin"* ]]; then
37
+ echo "macos"
38
+ elif [[ "$OSTYPE" == "linux-gnu"* ]]; then
39
+ echo "linux"
40
+ elif [[ "$OSTYPE" == "msys" ]] || [[ "$OSTYPE" == "cygwin" ]]; then
41
+ echo "windows"
42
+ else
43
+ echo "unknown"
44
+ fi
45
+ }
46
+
47
+ # Check if voice input is available
48
+ check_voice_input() {
49
+ local platform
50
+ platform=$(detect_platform)
51
+
52
+ case "$platform" in
53
+ macos)
54
+ # Check if dictation is enabled
55
+ if defaults read com.apple.speech.recognition.AppleSpeechRecognition.prefs DictationIMMEnabled 2>/dev/null | grep -q "1"; then
56
+ echo "macos-dictation"
57
+ elif command -v whisper &>/dev/null; then
58
+ echo "whisper"
59
+ elif [[ -n "${OPENAI_API_KEY:-}" ]]; then
60
+ echo "whisper-api"
61
+ else
62
+ echo "none"
63
+ fi
64
+ ;;
65
+ linux)
66
+ if command -v whisper &>/dev/null; then
67
+ echo "whisper"
68
+ elif [[ -n "${OPENAI_API_KEY:-}" ]]; then
69
+ echo "whisper-api"
70
+ elif command -v arecord &>/dev/null && command -v vosk &>/dev/null; then
71
+ echo "vosk"
72
+ else
73
+ echo "none"
74
+ fi
75
+ ;;
76
+ *)
77
+ echo "none"
78
+ ;;
79
+ esac
80
+ }
81
+
82
+ # Check if text-to-speech is available
83
+ check_voice_output() {
84
+ local platform
85
+ platform=$(detect_platform)
86
+
87
+ case "$platform" in
88
+ macos)
89
+ if command -v say &>/dev/null; then
90
+ echo "say"
91
+ else
92
+ echo "none"
93
+ fi
94
+ ;;
95
+ linux)
96
+ if command -v espeak &>/dev/null; then
97
+ echo "espeak"
98
+ elif command -v festival &>/dev/null; then
99
+ echo "festival"
100
+ else
101
+ echo "none"
102
+ fi
103
+ ;;
104
+ *)
105
+ echo "none"
106
+ ;;
107
+ esac
108
+ }
109
+
110
+ # Text-to-speech output
111
+ speak() {
112
+ local message="$1"
113
+ local output_method
114
+ output_method=$(check_voice_output)
115
+
116
+ case "$output_method" in
117
+ say)
118
+ say -v "Samantha" "$message" 2>/dev/null || say "$message"
119
+ ;;
120
+ espeak)
121
+ espeak "$message"
122
+ ;;
123
+ festival)
124
+ echo "$message" | festival --tts
125
+ ;;
126
+ none)
127
+ log_warn "No text-to-speech available, printing instead"
128
+ echo "$message"
129
+ ;;
130
+ esac
131
+ }
132
+
133
+ # Temp file cleanup
134
+ declare -a TEMP_FILES
135
+ TEMP_FILES=()
136
+ cleanup_temp_files() {
137
+ if [[ ${#TEMP_FILES[@]} -gt 0 ]]; then
138
+ for f in "${TEMP_FILES[@]}"; do
139
+ rm -f "$f" 2>/dev/null
140
+ done
141
+ fi
142
+ }
143
+ trap cleanup_temp_files EXIT
144
+
145
+ # Create secure temp file
146
+ make_temp_file() {
147
+ local suffix="${1:-.tmp}"
148
+ local temp_file
149
+ temp_file=$(mktemp "/tmp/loki-voice-XXXXXX$suffix")
150
+ TEMP_FILES+=("$temp_file")
151
+ echo "$temp_file"
152
+ }
153
+
154
+ # Record audio using macOS
155
+ record_audio_macos() {
156
+ local output_file="$1"
157
+ local duration="${2:-10}"
158
+
159
+ log "Recording for ${duration} seconds... Press Ctrl+C to stop early"
160
+
161
+ # Use sox or ffmpeg
162
+ if command -v sox &>/dev/null; then
163
+ sox -d -r 16000 -c 1 -b 16 "$output_file" trim 0 "$duration" 2>/dev/null
164
+ elif command -v ffmpeg &>/dev/null; then
165
+ ffmpeg -f avfoundation -i ":0" -t "$duration" -ar 16000 -ac 1 "$output_file" -y 2>/dev/null
166
+ else
167
+ log_error "No audio recording tool found. Install sox: brew install sox"
168
+ return 1
169
+ fi
170
+ }
171
+
172
+ # Record audio using Linux
173
+ record_audio_linux() {
174
+ local output_file="$1"
175
+ local duration="${2:-10}"
176
+
177
+ log "Recording for ${duration} seconds... Press Ctrl+C to stop early"
178
+
179
+ # Use sox, arecord, or ffmpeg
180
+ if command -v sox &>/dev/null; then
181
+ sox -d -r 16000 -c 1 -b 16 "$output_file" trim 0 "$duration" 2>/dev/null
182
+ elif command -v arecord &>/dev/null; then
183
+ arecord -f S16_LE -r 16000 -c 1 -d "$duration" "$output_file" 2>/dev/null
184
+ elif command -v ffmpeg &>/dev/null; then
185
+ ffmpeg -f alsa -i default -t "$duration" -ar 16000 -ac 1 "$output_file" -y 2>/dev/null
186
+ else
187
+ log_error "No audio recording tool found. Install: apt install sox alsa-utils"
188
+ return 1
189
+ fi
190
+ }
191
+
192
+ # Record audio (platform-aware)
193
+ record_audio() {
194
+ local platform
195
+ platform=$(detect_platform)
196
+
197
+ case "$platform" in
198
+ macos)
199
+ record_audio_macos "$@"
200
+ ;;
201
+ linux)
202
+ record_audio_linux "$@"
203
+ ;;
204
+ *)
205
+ log_error "Audio recording not supported on $platform"
206
+ return 1
207
+ ;;
208
+ esac
209
+ }
210
+
211
+ # Transcribe audio using Whisper API
212
+ transcribe_whisper_api() {
213
+ local audio_file="$1"
214
+
215
+ if [[ -z "${OPENAI_API_KEY:-}" ]]; then
216
+ log_error "OPENAI_API_KEY not set"
217
+ return 1
218
+ fi
219
+
220
+ local response
221
+ response=$(curl -s -X POST "https://api.openai.com/v1/audio/transcriptions" \
222
+ -H "Authorization: Bearer $OPENAI_API_KEY" \
223
+ -F "file=@$audio_file" \
224
+ -F "model=whisper-1" \
225
+ -F "language=en")
226
+
227
+ echo "$response" | python3 -c "import sys,json; print(json.load(sys.stdin).get('text', ''))"
228
+ }
229
+
230
+ # Transcribe audio using local Whisper
231
+ transcribe_whisper_local() {
232
+ local audio_file="$1"
233
+ local output_dir
234
+ output_dir=$(dirname "$audio_file")
235
+
236
+ if ! command -v whisper &>/dev/null; then
237
+ log_error "Whisper not installed. Run: pip install openai-whisper"
238
+ return 1
239
+ fi
240
+
241
+ # Specify output directory to ensure output goes to same location as audio
242
+ whisper "$audio_file" --model base --language en --output_format txt --output_dir "$output_dir" 2>/dev/null
243
+ local txt_file="${audio_file%.wav}.txt"
244
+ if [[ -f "$txt_file" ]]; then
245
+ cat "$txt_file"
246
+ rm -f "$txt_file"
247
+ else
248
+ # Fallback: check current directory (older whisper versions)
249
+ local basename_txt
250
+ basename_txt=$(basename "${audio_file%.wav}.txt")
251
+ if [[ -f "$basename_txt" ]]; then
252
+ cat "$basename_txt"
253
+ rm -f "$basename_txt"
254
+ fi
255
+ fi
256
+ }
257
+
258
+ # Listen for voice input
259
+ listen() {
260
+ local input_method
261
+ input_method=$(check_voice_input)
262
+
263
+ log "Voice input method: $input_method"
264
+
265
+ case "$input_method" in
266
+ macos-dictation)
267
+ log "Starting macOS Dictation..."
268
+ speak "Starting dictation. Press twice on Function key to begin, then speak your PRD."
269
+
270
+ # Open a dialog for dictation
271
+ osascript <<'EOF'
272
+ tell application "System Events"
273
+ display dialog "Click OK then press Fn twice to start dictation" buttons {"Cancel", "OK"} default button "OK"
274
+ end tell
275
+ EOF
276
+ # Wait for user to dictate
277
+ log "Waiting for dictation input..."
278
+ log "Press Fn twice to toggle dictation on/off"
279
+
280
+ # Use a temporary file approach
281
+ local temp_file
282
+ temp_file=$(make_temp_file .txt)
283
+ # Escape single quotes in temp_file for safe embedding in AppleScript
284
+ local escaped_temp_file="${temp_file//\'/\'\\\'\'}"
285
+ osascript <<EOF
286
+ tell application "System Events"
287
+ set userInput to text returned of (display dialog "Dictate or type your PRD:" default answer "" buttons {"Cancel", "OK"} default button "OK" with title "Loki Mode Voice Input")
288
+ do shell script "cat > '${escaped_temp_file}'" & " <<HEREDOC
289
+ " & userInput & "
290
+ HEREDOC"
291
+ end tell
292
+ EOF
293
+ if [[ -f "$temp_file" ]]; then
294
+ cat "$temp_file"
295
+ rm -f "$temp_file"
296
+ fi
297
+ ;;
298
+
299
+ whisper-api)
300
+ log "Using Whisper API for transcription"
301
+ local audio_file
302
+ audio_file=$(make_temp_file .wav)
303
+
304
+ speak "Recording will start now. Speak your requirements."
305
+ record_audio "$audio_file" 30
306
+
307
+ log "Transcribing..."
308
+ transcribe_whisper_api "$audio_file"
309
+ ;;
310
+
311
+ whisper)
312
+ log "Using local Whisper for transcription"
313
+ local audio_file
314
+ audio_file=$(make_temp_file .wav)
315
+
316
+ speak "Recording will start now. Speak your requirements."
317
+ record_audio "$audio_file" 30
318
+
319
+ log "Transcribing locally..."
320
+ transcribe_whisper_local "$audio_file"
321
+ ;;
322
+
323
+ none)
324
+ log_error "No voice input method available"
325
+ log "Options:"
326
+ log " 1. Enable macOS Dictation: System Settings > Keyboard > Dictation"
327
+ log " 2. Set OPENAI_API_KEY for Whisper API"
328
+ log " 3. Install local Whisper: pip install openai-whisper"
329
+ return 1
330
+ ;;
331
+ esac
332
+ }
333
+
334
+ # Dictate to a file
335
+ dictate_to_file() {
336
+ local output_file="$1"
337
+
338
+ log "Dictating to: $output_file"
339
+ speak "Ready to create a PRD. I'll guide you through the sections."
340
+
341
+ local content=""
342
+
343
+ # Guide through PRD sections
344
+ speak "First, what is the name of your project?"
345
+ local project_name
346
+ project_name=$(listen)
347
+ content="# $project_name\n\n"
348
+
349
+ speak "Great. Now describe the overview of your project."
350
+ local overview
351
+ overview=$(listen)
352
+ content+="## Overview\n$overview\n\n"
353
+
354
+ speak "Now list your requirements. Say done when finished."
355
+ content+="## Requirements\n"
356
+
357
+ while true; do
358
+ local requirement
359
+ requirement=$(listen)
360
+
361
+ # Use tr for bash 3.2 compatibility (macOS default)
362
+ local requirement_lower
363
+ requirement_lower=$(printf '%s' "$requirement" | tr '[:upper:]' '[:lower:]')
364
+ if [[ "$requirement_lower" == *"done"* ]] || [[ "$requirement_lower" == *"finish"* ]]; then
365
+ break
366
+ fi
367
+
368
+ content+="- [ ] $requirement\n"
369
+ speak "Got it. Next requirement, or say done."
370
+ done
371
+
372
+ speak "What tech stack do you want to use?"
373
+ local tech_stack
374
+ tech_stack=$(listen)
375
+ content+="\n## Tech Stack\n$tech_stack\n"
376
+
377
+ # Write to file
378
+ echo -e "$content" > "$output_file"
379
+
380
+ speak "PRD created at $output_file"
381
+ log_success "PRD saved to: $output_file"
382
+ echo "$output_file"
383
+ }
384
+
385
+ # Show voice capabilities status
386
+ status() {
387
+ local platform
388
+ platform=$(detect_platform)
389
+
390
+ echo "=== Loki Mode Voice Status ==="
391
+ echo ""
392
+ echo "Platform: $platform"
393
+ echo ""
394
+
395
+ echo "Voice Input:"
396
+ local input_method
397
+ input_method=$(check_voice_input)
398
+ case "$input_method" in
399
+ macos-dictation)
400
+ echo " [OK] macOS Dictation enabled"
401
+ ;;
402
+ whisper-api)
403
+ echo " [OK] Whisper API available (OPENAI_API_KEY set)"
404
+ ;;
405
+ whisper)
406
+ echo " [OK] Local Whisper installed"
407
+ ;;
408
+ vosk)
409
+ echo " [OK] Vosk speech recognition available"
410
+ ;;
411
+ none)
412
+ echo " [--] No voice input available"
413
+ echo " Recommendations:"
414
+ echo " - macOS: Enable Dictation in System Settings > Keyboard"
415
+ echo " - Set OPENAI_API_KEY for Whisper API"
416
+ echo " - pip install openai-whisper for local transcription"
417
+ ;;
418
+ esac
419
+ echo ""
420
+
421
+ echo "Voice Output (TTS):"
422
+ local output_method
423
+ output_method=$(check_voice_output)
424
+ case "$output_method" in
425
+ say)
426
+ echo " [OK] macOS 'say' command available"
427
+ ;;
428
+ espeak)
429
+ echo " [OK] eSpeak available"
430
+ ;;
431
+ festival)
432
+ echo " [OK] Festival TTS available"
433
+ ;;
434
+ none)
435
+ echo " [--] No TTS available"
436
+ ;;
437
+ esac
438
+ echo ""
439
+
440
+ echo "Audio Recording:"
441
+ if command -v sox &>/dev/null; then
442
+ echo " [OK] sox installed"
443
+ elif command -v ffmpeg &>/dev/null; then
444
+ echo " [OK] ffmpeg installed (fallback)"
445
+ else
446
+ echo " [--] No recording tool (install sox: brew install sox)"
447
+ fi
448
+ }
449
+
450
+ # CLI entry point
451
+ main() {
452
+ local command="${1:-help}"
453
+ shift || true
454
+
455
+ case "$command" in
456
+ listen)
457
+ listen
458
+ ;;
459
+ speak)
460
+ if [[ $# -eq 0 ]]; then
461
+ log_error "Usage: voice.sh speak MESSAGE"
462
+ exit 1
463
+ fi
464
+ speak "$*"
465
+ ;;
466
+ dictate)
467
+ local output="${1:-prd-voice.md}"
468
+ dictate_to_file "$output"
469
+ ;;
470
+ status)
471
+ status
472
+ ;;
473
+ help|--help|-h)
474
+ echo "Loki Mode Voice Input"
475
+ echo ""
476
+ echo "Usage: voice.sh <command> [options]"
477
+ echo ""
478
+ echo "Commands:"
479
+ echo " listen Listen for voice input and return text"
480
+ echo " speak MESSAGE Text-to-speech output"
481
+ echo " dictate [FILE] Guided PRD dictation (default: prd-voice.md)"
482
+ echo " status Show voice capabilities"
483
+ echo ""
484
+ echo "Environment:"
485
+ echo " OPENAI_API_KEY Required for Whisper API transcription"
486
+ echo ""
487
+ echo "Setup:"
488
+ echo " macOS: Enable Dictation in System Settings > Keyboard"
489
+ echo " Linux: Install sox and whisper: apt install sox && pip install openai-whisper"
490
+ ;;
491
+ *)
492
+ log_error "Unknown command: $command"
493
+ echo "Run 'voice.sh help' for usage"
494
+ exit 1
495
+ ;;
496
+ esac
497
+ }
498
+
499
+ # Run if executed directly
500
+ if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
501
+ main "$@"
502
+ fi
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "loki-mode",
3
- "version": "5.13.1",
3
+ "version": "5.14.1",
4
4
  "description": "Multi-agent autonomous startup system for Claude Code, Codex CLI, and Gemini CLI",
5
5
  "keywords": [
6
6
  "claude",