loki-mode 7.5.17 → 7.5.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/README.md +10 -9
  2. package/SKILL.md +14 -14
  3. package/VERSION +1 -1
  4. package/autonomy/completion-council.sh +26 -3
  5. package/autonomy/lib/claude-flags.sh +132 -0
  6. package/autonomy/lib/mcp-config.sh +160 -0
  7. package/autonomy/lib/project-graph.sh +685 -0
  8. package/autonomy/lib/voter-agents.sh +356 -0
  9. package/autonomy/loki +108 -111
  10. package/autonomy/run.sh +95 -186
  11. package/bin/loki +12 -1
  12. package/dashboard/__init__.py +1 -1
  13. package/dashboard/requirements.txt +13 -8
  14. package/dashboard/server.py +33 -15
  15. package/dashboard/static/index.html +298 -299
  16. package/docs/INSTALLATION.md +54 -21
  17. package/docs/retrospectives/v7.5.15-fleet-postmortem.md +325 -0
  18. package/docs/retrospectives/v7.5.15-honesty-audit.md +136 -0
  19. package/docs/retrospectives/v7.5.15-llm-failure-modes.md +49 -0
  20. package/loki-ts/data/finding-schema.json +74 -0
  21. package/loki-ts/data/model-pricing.json +12 -0
  22. package/loki-ts/dist/loki.js +198 -172
  23. package/mcp/__init__.py +1 -1
  24. package/mcp/lsp_proxy.py +713 -0
  25. package/mcp/requirements.txt +9 -3
  26. package/mcp/tests/__init__.py +0 -0
  27. package/mcp/tests/test_lsp_proxy.py +377 -0
  28. package/memory/app_graph.py +153 -0
  29. package/memory/storage.py +6 -1
  30. package/memory/tests/test_app_graph.py +134 -0
  31. package/package.json +4 -3
  32. package/providers/claude.sh +115 -4
  33. package/providers/codex.sh +2 -2
  34. package/providers/loader.sh +4 -4
  35. package/providers/model_catalog.json +0 -9
  36. package/providers/models.sh +1 -2
  37. package/references/multi-provider.md +26 -35
  38. package/references/prompt-repetition.md +1 -1
  39. package/references/quality-control.md +1 -1
  40. package/skills/00-index.md +3 -3
  41. package/skills/model-selection.md +11 -14
  42. package/skills/providers.md +17 -57
  43. package/skills/quality-gates.md +2 -2
  44. package/skills/troubleshooting.md +1 -1
  45. package/src/integrations/github/action-handler.js +3 -2
  46. package/src/protocols/tools/start-project.js +1 -1
  47. package/providers/gemini.sh +0 -343
@@ -1,35 +1,34 @@
1
1
  # Multi-Provider Support
2
2
 
3
- Loki Mode v5.0.0 supports five AI providers for autonomous execution.
3
+ Loki Mode supports four AI providers for autonomous execution.
4
4
 
5
5
  ## Provider Comparison
6
6
 
7
7
  > **CLI Flags Verified:** The autonomous mode flags have been verified against actual CLI help output:
8
8
  > - Claude: `--dangerously-skip-permissions` (verified)
9
9
  > - Codex: `--full-auto` (recommended, v0.98.0) or `exec --dangerously-bypass-approvals-and-sandbox` (legacy)
10
- > - Gemini: `--approval-mode=yolo` (v0.27.3+) - Note: `-p` prompt flag is deprecated, using positional prompts
11
-
12
- | Feature | Claude Code | OpenAI Codex | Gemini CLI | Cline CLI | Aider |
13
- |---------|-------------|--------------|------------|-----------|-------|
14
- | **Full Features** | Yes | No (Degraded) | No (Degraded) | Near-Full (Tier 2) | No (Degraded) |
15
- | **Task Tool (Subagents)** | Yes | No | No | Yes (Subagents) | No |
16
- | **Parallel Agents** | Yes (10+) | No | No | No | No |
17
- | **MCP Integration** | Yes | Yes (basic) | No | Yes | No |
18
- | **Context Window** | 200K | 400K | 1M | Varies by provider | Varies by provider |
19
- | **Max Output Tokens** | 128K | 32K | 64K | Varies by provider | Varies by provider |
20
- | **Model Tiers** | 3 (opus/sonnet/haiku) | 1 (effort param) | 1 (thinking param) | 1 (external) | 1 (external) |
21
- | **Multi-Provider** | Claude only | OpenAI only | Gemini only | 12+ providers | 18+ providers |
22
- | **Skill Directory** | ~/.claude/skills | None | None | None | None |
10
+
11
+ | Feature | Claude Code | OpenAI Codex | Cline CLI | Aider |
12
+ |---------|-------------|--------------|-----------|-------|
13
+ | **Full Features** | Yes | No (Degraded) | Near-Full (Tier 2) | No (Degraded) |
14
+ | **Task Tool (Subagents)** | Yes | No | Yes (Subagents) | No |
15
+ | **Parallel Agents** | Yes (10+) | No | No | No |
16
+ | **MCP Integration** | Yes | Yes (basic) | Yes | No |
17
+ | **Context Window** | 200K | 400K | Varies by provider | Varies by provider |
18
+ | **Max Output Tokens** | 128K | 32K | Varies by provider | Varies by provider |
19
+ | **Model Tiers** | 3 (opus/sonnet/haiku) | 1 (effort param) | 1 (external) | 1 (external) |
20
+ | **Multi-Provider** | Claude only | OpenAI only | 12+ providers | 18+ providers |
21
+ | **Skill Directory** | ~/.claude/skills | None | None | None |
23
22
 
24
23
  ## Provider Selection
25
24
 
26
25
  ```bash
27
26
  # Via environment variable
28
- export LOKI_PROVIDER=claude # or codex, gemini
27
+ export LOKI_PROVIDER=claude # or codex, cline, aider
29
28
 
30
29
  # Via CLI flag
31
30
  ./autonomy/run.sh --provider codex ./prd.md
32
- loki start --provider gemini ./prd.md
31
+ loki start --provider cline ./prd.md
33
32
  ```
34
33
 
35
34
  ## Claude Code (Default, Full Features)
@@ -94,42 +93,6 @@ CODEX_MODEL_REASONING_EFFORT=high codex exec --dangerously-bypass-approvals-and-
94
93
 
95
94
  ---
96
95
 
97
- ## Google Gemini CLI (Degraded Mode)
98
-
99
- **Best for:** Teams standardized on Google. Large context needs (1M tokens).
100
-
101
- **Limitations:**
102
- - No Task tool (cannot spawn subagents)
103
- - No parallel execution (sequential only)
104
- - No MCP integration
105
- - Single model with thinking_level parameter
106
- - 1M context window (largest)
107
-
108
- **Invocation:**
109
- ```bash
110
- # Note: -p flag is DEPRECATED. Using positional prompt.
111
- gemini --approval-mode=yolo "$prompt"
112
- ```
113
-
114
- **Model Tiers via Thinking Level (settings.json, not CLI flag):**
115
-
116
- Note: Gemini CLI does not support `--thinking-level` as a CLI flag. Thinking mode must be configured in `~/.gemini/settings.json`.
117
-
118
- ```json
119
- // ~/.gemini/settings.json
120
- {
121
- "thinkingMode": "medium" // high, medium, low
122
- }
123
- ```
124
-
125
- | Tier | Thinking | Use Case |
126
- |------|----------|----------|
127
- | planning | high | Architecture, PRD analysis |
128
- | development | medium | Feature implementation, tests |
129
- | fast | low | Simple fixes, docs |
130
-
131
- ---
132
-
133
96
  ## Cline CLI (Tier 2 - Near-Full, 12+ Providers)
134
97
 
135
98
  **Best for:** Teams wanting Claude Code-like experience with any model provider.
@@ -248,13 +211,12 @@ aider --message "$prompt" --yes-always --no-auto-commits --model model_name
248
211
 
249
212
  ## Degraded Mode Behavior
250
213
 
251
- When running with Codex, Gemini, or Aider (Tier 3):
214
+ When running with Codex or Aider (Tier 3):
252
215
 
253
216
  1. **RARV Cycle executes sequentially** - No parallel agents
254
217
  2. **Task tool calls are skipped** - Main thread handles all work
255
218
  3. **Model tier maps to provider configuration:**
256
219
  - Codex: `CODEX_MODEL_REASONING_EFFORT` env var (xhigh/high/medium/low)
257
- - Gemini: `~/.gemini/settings.json` thinkingMode (high/medium/low)
258
220
  4. **Quality gates run sequentially** - No 3-reviewer parallel review
259
221
  5. **Git worktree parallelism disabled** - `--parallel` flag has no effect
260
222
 
@@ -277,7 +239,6 @@ Provider configs are shell-sourceable files in `providers/`:
277
239
  providers/
278
240
  claude.sh # Full-featured provider (Tier 1)
279
241
  codex.sh # Degraded mode, effort parameter (Tier 3)
280
- gemini.sh # Degraded mode, thinking_level parameter (Tier 3)
281
242
  cline.sh # Near-full mode, 12+ providers (Tier 2)
282
243
  aider.sh # Degraded mode, 18+ providers (Tier 3)
283
244
  loader.sh # Provider loader utility
@@ -303,8 +264,7 @@ PROVIDER_DEGRADED=false
303
264
  | MCP server integration | Claude (full), Cline, or Codex (basic) |
304
265
  | Subagents without Claude subscription | Cline |
305
266
  | OpenAI ecosystem compatibility | Codex |
306
- | Largest context window (1M) | Gemini |
307
267
  | Maximum provider flexibility (18+) | Aider |
308
268
  | Local models (Ollama, free) | Aider or Cline |
309
269
  | Architect mode (dual model) | Aider |
310
- | Sequential-only is acceptable | Codex, Gemini, or Aider |
270
+ | Sequential-only is acceptable | Codex or Aider |
@@ -113,10 +113,10 @@ the Python episodic memory layer via `memory.engine.save_episode`.
113
113
  **Override-judge knobs (v7.5.4+):**
114
114
 
115
115
  ```bash
116
- LOKI_OVERRIDE_JUDGES=claude,gemini # csv of provider names for the
116
+ LOKI_OVERRIDE_JUDGES=claude,codex # csv of provider names for the
117
117
  # 3-judge override council. Defaults
118
118
  # to the available installed providers
119
- # (claude, codex, gemini, cline, aider).
119
+ # (claude, codex, cline, aider).
120
120
  LOKI_OVERRIDE_REAL_JUDGE=0 # force the deterministic stub-judge
121
121
  # path (hermetic CI / cost control).
122
122
  # Default: 1 = real provider-backed
@@ -229,7 +229,7 @@ The circuit breaker prevents cascading failures by temporarily disabling operati
229
229
  "cooldown_until": "2025-01-20T10:40:42Z",
230
230
  "failure_window_start": "2025-01-20T10:34:50Z"
231
231
  },
232
- "api/gemini": {
232
+ "api/codex": {
233
233
  "state": "HALF_OPEN",
234
234
  "failure_count": 0,
235
235
  "success_count": 1,
@@ -12,7 +12,7 @@
12
12
  * Allowed provider values. The workflow_dispatch REST API can supply any
13
13
  * string even when the YAML declares a choice type, so we validate here.
14
14
  */
15
- var ALLOWED_PROVIDERS = ['claude', 'codex', 'gemini'];
15
+ var ALLOWED_PROVIDERS = ['claude', 'codex', 'cline', 'aider'];
16
16
 
17
17
  /**
18
18
  * Label-to-configuration mapping.
@@ -23,7 +23,8 @@ const LABEL_CONFIG_MAP = {
23
23
  'loki-priority-high': { priority: 'high' },
24
24
  'loki-priority-low': { priority: 'low' },
25
25
  'loki-provider-codex': { provider: 'codex' },
26
- 'loki-provider-gemini': { provider: 'gemini' },
26
+ 'loki-provider-cline': { provider: 'cline' },
27
+ 'loki-provider-aider': { provider: 'aider' },
27
28
  'loki-dry-run': { dryRun: true },
28
29
  };
29
30
 
@@ -25,7 +25,7 @@ const schema = {
25
25
  },
26
26
  provider: {
27
27
  type: 'string',
28
- enum: ['claude', 'codex', 'gemini'],
28
+ enum: ['claude', 'codex', 'cline', 'aider'],
29
29
  description: 'AI provider to use (default: claude)',
30
30
  default: 'claude'
31
31
  }
@@ -1,343 +0,0 @@
1
- #!/usr/bin/env bash
2
- # Google Gemini CLI Provider Configuration
3
- # Shell-sourceable config for loki-mode multi-provider support
4
-
5
- # Provider Functions (for external use)
6
- # =====================================
7
- # These functions provide a clean interface for external scripts:
8
- # provider_detect() - Check if CLI is installed
9
- # provider_version() - Get CLI version
10
- # provider_invoke() - Invoke with prompt (autonomous mode)
11
- # provider_invoke_with_tier() - Invoke with tier-specific thinking level
12
- # provider_get_tier_param() - Map tier name to thinking level
13
- #
14
- # Usage:
15
- # source providers/gemini.sh
16
- # if provider_detect; then
17
- # provider_invoke "Your prompt here"
18
- # fi
19
- #
20
- # Note: autonomy/run.sh uses inline invocation for streaming support
21
- # and real-time agent tracking. These functions are intended for
22
- # simpler scripts, wrappers, and external integrations.
23
- # =====================================
24
-
25
- # Provider Identity
26
- PROVIDER_NAME="gemini"
27
- PROVIDER_DISPLAY_NAME="Google Gemini CLI"
28
- PROVIDER_CLI="gemini"
29
-
30
- # CLI Invocation
31
- # VERIFIED: --approval-mode=yolo is the unified approach (replaces legacy --yolo)
32
- # Sandbox enabled by default in yolo mode
33
- PROVIDER_AUTONOMOUS_FLAG="--approval-mode=yolo"
34
- # NOTE: -p flag is DEPRECATED per gemini --help. Using positional prompt instead.
35
- PROVIDER_PROMPT_FLAG=""
36
- PROVIDER_PROMPT_POSITIONAL=true
37
-
38
- # Skill System
39
- # Note: Gemini CLI does not have a native skills system
40
- PROVIDER_SKILL_DIR=""
41
- PROVIDER_SKILL_FORMAT="none"
42
-
43
- # Capability Flags
44
- PROVIDER_HAS_SUBAGENTS=false
45
- PROVIDER_HAS_PARALLEL=false
46
- PROVIDER_HAS_TASK_TOOL=false
47
- PROVIDER_HAS_MCP=false
48
- PROVIDER_MAX_PARALLEL=1
49
-
50
- # Model Configuration
51
- # Gemini CLI supports --model flag to specify model
52
- # Primary: gemini-3-pro-preview (preview names - may change when GA is released)
53
- # Fallback: gemini-3-flash-preview (for rate limit scenarios)
54
- GEMINI_DEFAULT_PRO="gemini-3-pro-preview"
55
- GEMINI_DEFAULT_FLASH="gemini-3-flash-preview"
56
-
57
- # Known valid Gemini model prefixes for validation
58
- GEMINI_KNOWN_MODELS=("gemini-" "models/gemini-")
59
-
60
- # Validate that a model name looks like a Gemini model
61
- _gemini_validate_model() {
62
- local model="$1"
63
- local fallback="$2"
64
- for prefix in "${GEMINI_KNOWN_MODELS[@]}"; do
65
- if [[ "$model" == ${prefix}* ]]; then
66
- echo "$model"
67
- return 0
68
- fi
69
- done
70
- # Not a valid Gemini model name -- fall back
71
- echo "$fallback"
72
- }
73
-
74
- PROVIDER_MODEL_PLANNING="$(_gemini_validate_model "${LOKI_GEMINI_MODEL_PLANNING:-${LOKI_MODEL_PLANNING:-$GEMINI_DEFAULT_PRO}}" "$GEMINI_DEFAULT_PRO")"
75
- PROVIDER_MODEL_DEVELOPMENT="$(_gemini_validate_model "${LOKI_GEMINI_MODEL_DEVELOPMENT:-${LOKI_MODEL_DEVELOPMENT:-$GEMINI_DEFAULT_PRO}}" "$GEMINI_DEFAULT_PRO")"
76
- PROVIDER_MODEL_FAST="$(_gemini_validate_model "${LOKI_GEMINI_MODEL_FAST:-${LOKI_MODEL_FAST:-$GEMINI_DEFAULT_FLASH}}" "$GEMINI_DEFAULT_FLASH")"
77
- PROVIDER_MODEL_FALLBACK="${LOKI_GEMINI_MODEL_FALLBACK:-$GEMINI_DEFAULT_FLASH}"
78
-
79
- # BUG-PROV-006 fix: PROVIDER_MODEL is now a function, not a frozen variable.
80
- # For backward compatibility, set the variable to planning model at load time,
81
- # but callers should use provider_get_current_model() for runtime resolution.
82
- PROVIDER_MODEL="${PROVIDER_MODEL_PLANNING}"
83
-
84
- # Return the model for the current tier at runtime (not frozen at load time)
85
- provider_get_current_model() {
86
- local tier="${LOKI_CURRENT_TIER:-planning}"
87
- resolve_model_for_tier "$tier"
88
- }
89
-
90
- # Thinking levels (Gemini-specific: maps to reasoning depth)
91
- PROVIDER_THINKING_PLANNING="high"
92
- PROVIDER_THINKING_DEVELOPMENT="medium"
93
- PROVIDER_THINKING_FAST="low"
94
-
95
- # No Task tool - thinking level is set via CLI flag
96
- PROVIDER_TASK_MODEL_PARAM=""
97
- PROVIDER_TASK_MODEL_VALUES=()
98
-
99
- # Context and Limits
100
- PROVIDER_CONTEXT_WINDOW=1000000 # Gemini 3 has 1M context
101
- PROVIDER_MAX_OUTPUT_TOKENS=65536
102
- # Rate limit varies by tier: Free=5-15 RPM, Tier1=150+ RPM, Tier2=500+ RPM
103
- # Default to conservative free-tier value; override with LOKI_GEMINI_RPM env var
104
- PROVIDER_RATE_LIMIT_RPM="${LOKI_GEMINI_RPM:-15}"
105
-
106
- # Cost (USD per 1K tokens, approximate for Gemini 3 Pro)
107
- PROVIDER_COST_INPUT_PLANNING=0.00125
108
- PROVIDER_COST_OUTPUT_PLANNING=0.005
109
- PROVIDER_COST_INPUT_DEV=0.00125
110
- PROVIDER_COST_OUTPUT_DEV=0.005
111
- PROVIDER_COST_INPUT_FAST=0.00125
112
- PROVIDER_COST_OUTPUT_FAST=0.005
113
-
114
- # Degraded Mode
115
- PROVIDER_DEGRADED=true
116
- PROVIDER_DEGRADED_REASONS=(
117
- "No Task tool subagent support - cannot spawn parallel agents"
118
- "Single model with thinking_level parameter - no cheap tier for parallelization"
119
- "No native skills system - SKILL.md must be passed via prompt"
120
- "No MCP server integration"
121
- )
122
-
123
- # BUG-PROV-003 fix: API key resolution with fallback and rotation support.
124
- # Gemini CLI accepts GOOGLE_API_KEY or GEMINI_API_KEY env vars.
125
- # If LOKI_GEMINI_API_KEYS is set (comma-separated), rotate through them on auth errors.
126
- # This function sets GOOGLE_API_KEY for the current invocation.
127
- _gemini_resolve_api_key() {
128
- # Already have a key set -- nothing to do
129
- if [ -n "${GOOGLE_API_KEY:-}" ]; then
130
- return 0
131
- fi
132
- # Try GEMINI_API_KEY as alias
133
- if [ -n "${GEMINI_API_KEY:-}" ]; then
134
- export GOOGLE_API_KEY="$GEMINI_API_KEY"
135
- return 0
136
- fi
137
- # Try gcloud ADC (Application Default Credentials) -- gemini CLI supports this natively
138
- if [ -f "${HOME}/.config/gcloud/application_default_credentials.json" ]; then
139
- return 0 # Let gemini CLI handle ADC
140
- fi
141
- return 1
142
- }
143
-
144
- # Rotate to next API key from LOKI_GEMINI_API_KEYS (comma-separated list)
145
- # Called after auth errors (401/403) to try the next key
146
- _gemini_rotate_api_key() {
147
- local keys="${LOKI_GEMINI_API_KEYS:-}"
148
- [ -z "$keys" ] && return 1 # No key list configured
149
-
150
- local current="${GOOGLE_API_KEY:-}"
151
- local IFS=','
152
- local found_current=false
153
- local first_key=""
154
-
155
- for key in $keys; do
156
- key=$(echo "$key" | tr -d ' ') # trim whitespace
157
- [ -z "$key" ] && continue
158
- [ -z "$first_key" ] && first_key="$key"
159
-
160
- if [ "$found_current" = "true" ]; then
161
- export GOOGLE_API_KEY="$key"
162
- return 0
163
- fi
164
- if [ "$key" = "$current" ]; then
165
- found_current=true
166
- fi
167
- done
168
-
169
- # Wrap around to first key (or set first key if current wasn't in list)
170
- if [ -n "$first_key" ] && [ "$first_key" != "$current" ]; then
171
- export GOOGLE_API_KEY="$first_key"
172
- return 0
173
- fi
174
-
175
- return 1 # All keys exhausted or only one key
176
- }
177
-
178
- # Detection function - check if provider CLI is available
179
- provider_detect() {
180
- command -v gemini >/dev/null 2>&1
181
- }
182
-
183
- # Version check function
184
- provider_version() {
185
- gemini --version 2>/dev/null | head -1
186
- }
187
-
188
- # Invocation function with rate limit fallback and API key rotation
189
- # Uses --model flag to specify model, --approval-mode=yolo for autonomous mode
190
- # Falls back to flash model if pro hits rate limit
191
- # BUG-PROV-003 fix: rotates API keys on auth errors (401/403)
192
- # Accepts optional --model <name> as first args to override default model
193
- # BUG-PROV-010 fix: uses tee to stream output while still capturing for rate-limit check
194
- # Note: < /dev/null prevents Gemini from pausing on stdin
195
- provider_invoke() {
196
- # Resolve API key before invocation
197
- _gemini_resolve_api_key || true
198
-
199
- local model
200
- model=$(provider_get_current_model)
201
-
202
- # Allow callers to pass --model <name> to override
203
- if [[ "${1:-}" == "--model" ]] && [[ -n "${2:-}" ]]; then
204
- model="$2"
205
- shift 2
206
- fi
207
-
208
- local prompt="$1"
209
- shift
210
- local exit_code
211
-
212
- # Stream output via tee while capturing for rate-limit check
213
- local output_file stderr_file
214
- output_file=$(mktemp)
215
- stderr_file=$(mktemp)
216
- gemini --approval-mode=yolo --model "$model" "$prompt" "$@" < /dev/null 2>"$stderr_file" | tee "$output_file"
217
- exit_code=${PIPESTATUS[0]}
218
-
219
- # Check for auth errors (401/403) -- try rotating API key
220
- if [[ $exit_code -ne 0 ]] && grep -qiE "(401|403|unauthorized|forbidden|invalid.?api.?key|permission.?denied)" "$stderr_file" 2>/dev/null; then
221
- if _gemini_rotate_api_key; then
222
- echo "[loki] Auth error on Gemini, rotated to next API key" >&2
223
- rm -f "$stderr_file" "$output_file"
224
- output_file=$(mktemp)
225
- stderr_file=$(mktemp)
226
- gemini --approval-mode=yolo --model "$model" "$prompt" "$@" < /dev/null 2>"$stderr_file" | tee "$output_file"
227
- exit_code=${PIPESTATUS[0]}
228
- fi
229
- fi
230
-
231
- # Check for rate limit (429) or quota exceeded (check stderr for error indicators)
232
- if [[ $exit_code -ne 0 ]] && grep -qiE "(rate.?limit|429|quota|resource.?exhausted)" "$stderr_file" 2>/dev/null; then
233
- rm -f "$stderr_file" "$output_file"
234
- echo "[loki] Rate limit hit on $model, falling back to $PROVIDER_MODEL_FALLBACK" >&2
235
- gemini --approval-mode=yolo --model "$PROVIDER_MODEL_FALLBACK" "$prompt" "$@" < /dev/null
236
- else
237
- rm -f "$stderr_file" "$output_file"
238
- return $exit_code
239
- fi
240
- }
241
-
242
- # Model tier to thinking level parameter
243
- provider_get_tier_param() {
244
- local tier="$1"
245
- case "$tier" in
246
- planning) echo "high" ;;
247
- development) echo "medium" ;;
248
- fast) echo "low" ;;
249
- *) echo "medium" ;; # default to development tier
250
- esac
251
- }
252
-
253
- # Dynamic model resolution (v6.0.0)
254
- # Resolves a capability tier to a concrete model name at runtime.
255
- # Respects LOKI_MAX_TIER to cap cost.
256
- resolve_model_for_tier() {
257
- local tier="$1"
258
-
259
- # Handle capability aliases
260
- case "$tier" in
261
- best) tier="planning" ;;
262
- balanced) tier="development" ;;
263
- cheap) tier="fast" ;;
264
- esac
265
-
266
- local max_tier="${LOKI_MAX_TIER:-}"
267
- local model=""
268
-
269
- case "$tier" in
270
- planning) model="$PROVIDER_MODEL_PLANNING" ;;
271
- development) model="$PROVIDER_MODEL_DEVELOPMENT" ;;
272
- fast) model="$PROVIDER_MODEL_FAST" ;;
273
- *) model="$PROVIDER_MODEL_DEVELOPMENT" ;;
274
- esac
275
-
276
- # Apply maxTier ceiling
277
- if [ -n "$max_tier" ]; then
278
- case "$max_tier" in
279
- haiku|flash)
280
- model="$PROVIDER_MODEL_FAST"
281
- ;;
282
- sonnet|pro)
283
- # Cap planning to development (pro)
284
- if [ "$tier" = "planning" ]; then
285
- model="$PROVIDER_MODEL_DEVELOPMENT"
286
- fi
287
- ;;
288
- opus) ;; # No cap
289
- esac
290
- fi
291
-
292
- echo "$model"
293
- }
294
-
295
- # Tier-aware invocation with rate limit fallback and API key rotation
296
- # BUG-PROV-001 fix: uses resolve_model_for_tier to select actual model for the tier
297
- # BUG-PROV-003 fix: rotates API keys on auth errors (401/403)
298
- # BUG-PROV-010 fix: uses tee to stream output while capturing for rate-limit check
299
- # Note: < /dev/null prevents Gemini from pausing on stdin
300
- provider_invoke_with_tier() {
301
- # Resolve API key before invocation
302
- _gemini_resolve_api_key || true
303
-
304
- local tier="$1"
305
- local prompt="$2"
306
- shift 2
307
-
308
- local model
309
- model=$(resolve_model_for_tier "$tier")
310
-
311
- echo "[loki] Using tier: $tier, model: $model" >&2
312
-
313
- local exit_code
314
-
315
- # Stream output via tee while capturing for rate-limit check
316
- local output_file stderr_file
317
- output_file=$(mktemp)
318
- stderr_file=$(mktemp)
319
- gemini --approval-mode=yolo --model "$model" "$prompt" "$@" < /dev/null 2>"$stderr_file" | tee "$output_file"
320
- exit_code=${PIPESTATUS[0]}
321
-
322
- # Check for auth errors (401/403) -- try rotating API key
323
- if [[ $exit_code -ne 0 ]] && grep -qiE "(401|403|unauthorized|forbidden|invalid.?api.?key|permission.?denied)" "$stderr_file" 2>/dev/null; then
324
- if _gemini_rotate_api_key; then
325
- echo "[loki] Auth error on Gemini, rotated to next API key" >&2
326
- rm -f "$stderr_file" "$output_file"
327
- output_file=$(mktemp)
328
- stderr_file=$(mktemp)
329
- gemini --approval-mode=yolo --model "$model" "$prompt" "$@" < /dev/null 2>"$stderr_file" | tee "$output_file"
330
- exit_code=${PIPESTATUS[0]}
331
- fi
332
- fi
333
-
334
- # Check for rate limit (429) or quota exceeded - fallback to flash
335
- if [[ $exit_code -ne 0 ]] && grep -qiE "(rate.?limit|429|quota|resource.?exhausted)" "$stderr_file" 2>/dev/null; then
336
- rm -f "$stderr_file" "$output_file"
337
- echo "[loki] Rate limit hit on $model, falling back to $PROVIDER_MODEL_FALLBACK" >&2
338
- gemini --approval-mode=yolo --model "$PROVIDER_MODEL_FALLBACK" "$prompt" "$@" < /dev/null
339
- else
340
- rm -f "$stderr_file" "$output_file"
341
- return $exit_code
342
- fi
343
- }