npm - opencode-provider-litellm - Versions diffs - 0.5.1 → 0.6.0 - Mend

opencode-provider-litellm 0.5.1 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/README.md CHANGED Viewed

@@ -21,34 +21,39 @@ All models and MCP tools from your LiteLLM proxy appear in OpenCode automaticall
 ### Environment variables
-| Variable | Description |
-|----------|-------------|
-| `LITELLM_URL` | Your LiteLLM proxy base URL |
-| `LITELLM_KEY` | API key for the proxy |
-| `LITELLM_PROVIDER_ID` | Provider ID in OpenCode (defaults to `LiteLLM`) |
-| `LITELLM_GCLOUD_TOKEN_AUTH` | Set to `1` to use Google ADC for auth (makes `LITELLM_KEY` optional) |
-| `GOOGLE_APPLICATION_CREDENTIALS` | Path to a Google ADC JSON file (used when `LITELLM_GCLOUD_TOKEN_AUTH=1`) |
+Environment variables take precedence over all other configuration. Use them to keep secrets out of checked-in files.
-### Inline config
+| Variable | Required | Description |
+|----------|----------|-------------|
+| `LITELLM_URL` | Yes | Your LiteLLM proxy base URL |
+| `LITELLM_KEY` | Yes* | API key for the proxy |
+| `LITELLM_PROVIDER_ID` | No | Provider name in OpenCode (default: `LiteLLM`) |
+| `LITELLM_GCLOUD_TOKEN_AUTH` | No | Set to `1` to use Google ADC for auth — makes `LITELLM_KEY` optional |
+| `GOOGLE_APPLICATION_CREDENTIALS` | No | Path to a Google ADC JSON file (used when `LITELLM_GCLOUD_TOKEN_AUTH=1`) |
-Alternatively, provide `url` and `apiKey` directly in your `opencode.json`:
+*`LITELLM_KEY` is optional when `LITELLM_GCLOUD_TOKEN_AUTH=1`.
+### Plugin options
+All env vars have an equivalent in the plugin options block in `opencode.json`. **Env vars take precedence** — use options for defaults that can be overridden per environment.
 ```jsonc
 {
   "plugin": [
     ["opencode-provider-litellm", {
-      "url": "https://your-litellm-proxy.example.com",
-      "apiKey": "sk-..."
+      "url": "https://your-litellm-proxy.example.com",  // LITELLM_URL
+      "apiKey": "sk-...",                                // LITELLM_KEY
+      "providerName": "MyLiteLLM",                       // LITELLM_PROVIDER_ID
+      "gcloudTokenAuth": true                            // LITELLM_GCLOUD_TOKEN_AUTH=1
+      // apiKey can be omitted when gcloudTokenAuth is true
     }]
   ]
 }
 ```
-> **Tip:** Environment variables take precedence over inline config. Use env vars to keep secrets out of checked-in files.
 ### Google Vertex AI (gcloud token auth)
-When your LiteLLM proxy is backed by Google Vertex AI, you can skip `LITELLM_KEY` and let the plugin automatically fetch a gcloud OAuth token:
+When your LiteLLM proxy is backed by Google Vertex AI, you can skip `LITELLM_KEY` and let the plugin automatically fetch a Google OAuth token:
 ```bash
 # 1. Authenticate with gcloud (creates an ADC JSON file)
@@ -62,9 +67,12 @@ export LITELLM_GCLOUD_TOKEN_AUTH=1
 opencode plugin opencode-provider-litellm
 ```
-The plugin reads your [Application Default Credentials](https://cloud.google.com/docs/authentication/application-default-credentials) JSON file and exchanges the refresh token for an access token before every LLM request. Tokens are cached for 50 minutes.
+The plugin reads your [Application Default Credentials](https://cloud.google.com/docs/authentication/application-default-credentials) JSON file and exchanges the refresh token for an access token. Tokens are cached for 50 minutes and used for both model discovery at startup and every LLM request.
-To use a custom credentials file, set `GOOGLE_APPLICATION_CREDENTIALS` to its path.
+**ADC file locations searched (in order):**
+1. `GOOGLE_APPLICATION_CREDENTIALS` env var (all platforms)
+2. `~/.config/gcloud/application_default_credentials.json` (Linux/macOS)
+3. `%APPDATA%/gcloud/application_default_credentials.json` (Windows)
 > **Note:** Only `authorized_user` credentials (from `gcloud auth application-default login`) are supported. Service account keys are not yet supported.
@@ -109,22 +117,23 @@ Skills appear in OpenCode's `/skills` menu and are loaded natively by the agent.
 ## How it works
-The plugin uses three OpenCode hooks:
+The plugin uses these OpenCode hooks:
 | Hook | Purpose |
 |------|---------|
 | `config` | Discovers models from LiteLLM and injects them into OpenCode |
 | `auth` | Provides a `/connect` entry point for pasting an API key |
 | `tool` | Exposes discovered MCP tools as native OpenCode tools |
-| `chat.headers` | Injects `Authorization: Bearer <token>` when `LITELLM_GCLOUD_TOKEN_AUTH=1` |
+| `chat.headers` | Injects `Authorization: Bearer <token>` when gcloud token auth is enabled |
 ## Troubleshooting
 | Problem | Solution |
 |---------|----------|
-| "Plugin config error" | Set `LITELLM_URL` and `LITELLM_KEY`, or add `url`/`apiKey` to plugin options |
+| "Plugin config error" | Set `LITELLM_URL` and `LITELLM_KEY` (or `LITELLM_GCLOUD_TOKEN_AUTH=1`) |
 | "Access denied" (403) | Verify the API key has access to the LiteLLM proxy |
-| "No models discovered" | Check that the proxy is reachable and the `/health` endpoint responds |
+| "No models discovered" | Check the proxy is reachable and `/health` responds |
+| No models with gcloud auth | Verify `gcloud auth application-default login` has been run and the ADC file exists |
 | Skills not showing | Verify the proxy-sidecar is running and the skills URL is in `opencode.json` |
 ## Development

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "opencode-provider-litellm",
-  "version": "0.5.1",
+  "version": "0.6.0",
   "description": "OpenCode plugin for any LiteLLM proxy — auto-discovers models, auth, and capabilities",
   "type": "module",
   "exports": {

package/src/discovery.test.ts CHANGED Viewed

@@ -83,7 +83,7 @@ describe('discoverModels', () => {
         tool_call: true,
         reasoning: false,
         limit: { context: 8192, output: 8192 },
-        cost: { input: 0.0001, output: 0.0003 },
+        cost: { input: 100, output: 300 },
         modalities: { input: ['text'], output: ['text'] },
       },
       'qwen3-32b': {
@@ -91,7 +91,7 @@ describe('discoverModels', () => {
         tool_call: true,
         reasoning: true,
         limit: { context: 32768, output: 32768 },
-        cost: { input: 0.00005, output: 0.00015 },
+        cost: { input: 50, output: 150 },
         modalities: { input: ['text'], output: ['text'] },
       },
     })
@@ -107,6 +107,51 @@ describe('discoverModels', () => {
     expect(getToken).toHaveBeenCalled()
   })
+  it('converts per-token cost to per-1M tokens with cache costs', async () => {
+    const mockFetch = vi.fn()
+      .mockResolvedValueOnce({
+        ok: true,
+        status: 200,
+        json: async () => ({
+          healthy_endpoints: [
+            { model: 'anthropic/claude-sonnet', model_id: 'uuid-1' },
+          ],
+        }),
+      })
+      .mockResolvedValueOnce({
+        ok: true,
+        status: 200,
+        json: async () => ({
+          data: [{
+            model_name: 'anthropic/claude-sonnet',
+            model_info: {
+              max_input_tokens: 1_000_000,
+              max_output_tokens: 64_000,
+              supports_function_calling: true,
+              supports_reasoning: true,
+              supports_vision: true,
+              supports_pdf_input: true,
+              // Per-token costs (LiteLLM format)
+              input_cost_per_token: 0.000005,
+              output_cost_per_token: 0.000025,
+              cache_read_input_token_cost: 0.0000005,
+              cache_creation_input_token_cost: 0.00000375,
+            },
+          }],
+        }),
+      })
+    vi.stubGlobal('fetch', mockFetch)
+    const result = await discoverModels(config, getToken)
+    expect(result['anthropic/claude-sonnet']?.cost).toEqual({
+      input: 5,         // 0.000005 * 1M
+      output: 25,       // 0.000025 * 1M
+      cache_read: 0.5,  // 0.0000005 * 1M
+      cache_write: 3.75, // 0.00000375 * 1M
+    })
+  })
   it('returns empty object on timeout', async () => {
     vi.useFakeTimers()

package/src/discovery.ts CHANGED Viewed

@@ -117,16 +117,17 @@ export async function discoverModels(
       }
       // Add cost info if available
+      // LiteLLM returns cost per single token; opencode expects cost per 1M tokens
       if (info.input_cost_per_token != null && info.output_cost_per_token != null) {
         modelConfig.cost = {
-          input: info.input_cost_per_token,
-          output: info.output_cost_per_token,
+          input: info.input_cost_per_token * 1_000_000,
+          output: info.output_cost_per_token * 1_000_000,
         }
         if (info.cache_read_input_token_cost != null) {
-          modelConfig.cost.cache_read = info.cache_read_input_token_cost
+          modelConfig.cost.cache_read = info.cache_read_input_token_cost * 1_000_000
         }
         if (info.cache_creation_input_token_cost != null) {
-          modelConfig.cost.cache_write = info.cache_creation_input_token_cost
+          modelConfig.cost.cache_write = info.cache_creation_input_token_cost * 1_000_000
         }
       }

package/src/model-cache.test.ts ADDED Viewed

@@ -0,0 +1,115 @@
+import { describe, it, expect, vi, afterEach } from 'vitest'
+import type { OpencodeModelConfig } from './types.js'
+const mockReadFileSync = vi.hoisted(() => vi.fn())
+const mockWriteFileSync = vi.hoisted(() => vi.fn())
+vi.mock('fs', () => ({
+  existsSync: vi.fn(() => true),
+  readFileSync: mockReadFileSync,
+  writeFileSync: mockWriteFileSync,
+}))
+vi.mock('os', () => ({
+  homedir: () => '/home/test',
+}))
+const { loadModelCache, saveModelCache } = await import('./model-cache.js')
+const sampleModels: Record<string, OpencodeModelConfig> = {
+  'anthropic/claude-sonnet': {
+    name: 'anthropic/claude-sonnet',
+    tool_call: true,
+    reasoning: true,
+    limit: { context: 1_000_000, output: 64_000 },
+    cost: { input: 3, output: 15, cache_read: 0.3, cache_write: 3.75 },
+    modalities: { input: ['text', 'image', 'pdf'], output: ['text'] },
+  },
+  'qwen/qwen3.6-27b': {
+    name: 'qwen/qwen3.6-27b',
+    tool_call: true,
+    reasoning: false,
+    limit: { context: 262144, output: 32768 },
+    modalities: { input: ['text'], output: ['text'] },
+  },
+}
+describe('loadModelCache', () => {
+  afterEach(() => vi.clearAllMocks())
+  it('returns null when file does not exist', () => {
+    mockReadFileSync.mockImplementation(() => { throw new Error('ENOENT') })
+    expect(loadModelCache('protector')).toBeNull()
+  })
+  it('returns null when file contains invalid JSON', () => {
+    mockReadFileSync.mockReturnValue('not valid json{{{')
+    expect(loadModelCache('protector')).toBeNull()
+  })
+  it('returns null when providerId does not match', () => {
+    mockReadFileSync.mockReturnValue(JSON.stringify({
+      savedAt: Date.now(), providerId: 'other', models: sampleModels,
+    }))
+    expect(loadModelCache('protector')).toBeNull()
+  })
+  it('returns null when models field is missing', () => {
+    mockReadFileSync.mockReturnValue(JSON.stringify({
+      savedAt: Date.now(), providerId: 'protector', models: null,
+    }))
+    expect(loadModelCache('protector')).toBeNull()
+  })
+  it('returns models when cache is valid', () => {
+    mockReadFileSync.mockReturnValue(JSON.stringify({
+      savedAt: Date.now(), providerId: 'protector', models: sampleModels,
+    }))
+    expect(loadModelCache('protector')).toEqual(sampleModels)
+  })
+  it('reads from the correct path', () => {
+    mockReadFileSync.mockReturnValue(JSON.stringify({
+      savedAt: Date.now(), providerId: 'protector', models: sampleModels,
+    }))
+    loadModelCache('protector')
+    expect(mockReadFileSync).toHaveBeenCalledWith(
+      expect.stringContaining('opencode-provider-litellm-cache.json'),
+      'utf-8',
+    )
+  })
+})
+describe('saveModelCache', () => {
+  afterEach(() => vi.clearAllMocks())
+  it('writes a valid cache file', () => {
+    saveModelCache('protector', sampleModels)
+    expect(mockWriteFileSync).toHaveBeenCalledOnce()
+    const [filePath, content] = mockWriteFileSync.mock.calls[0] as [string, string, string]
+    expect(filePath).toContain('opencode-provider-litellm-cache.json')
+    const parsed = JSON.parse(content)
+    expect(parsed.providerId).toBe('protector')
+    expect(parsed.models).toEqual(sampleModels)
+    expect(typeof parsed.savedAt).toBe('number')
+  })
+  it('writes to the correct path under ~/.local/share/opencode/', () => {
+    saveModelCache('protector', sampleModels)
+    const [filePath] = mockWriteFileSync.mock.calls[0] as [string, string, string]
+    expect(filePath).toMatch(/\.local[/\\]share[/\\]opencode[/\\]opencode-provider-litellm-cache\.json/)
+  })
+  it('does not throw when writeFileSync fails', () => {
+    mockWriteFileSync.mockImplementation(() => { throw new Error('EACCES') })
+    expect(() => saveModelCache('protector', sampleModels)).not.toThrow()
+  })
+  it('round-trips correctly with loadModelCache', () => {
+    let written = ''
+    mockWriteFileSync.mockImplementation((_p: string, content: string) => { written = content })
+    mockReadFileSync.mockImplementation(() => written)
+    saveModelCache('protector', sampleModels)
+    expect(loadModelCache('protector')).toEqual(sampleModels)
+  })
+})

package/src/model-cache.ts ADDED Viewed

@@ -0,0 +1,49 @@
+import { existsSync, readFileSync, writeFileSync } from 'fs'
+import { join } from 'path'
+import { homedir } from 'os'
+import type { OpencodeModelConfig } from './types.js'
+const CACHE_FILENAME = 'opencode-provider-litellm-cache.json'
+interface ModelCache {
+  savedAt: number
+  providerId: string
+  models: Record<string, OpencodeModelConfig>
+}
+function getCachePath(): string {
+  return join(homedir(), '.local', 'share', 'opencode', CACHE_FILENAME)
+}
+/**
+ * Loads the model cache from disk. Returns null if the file does not exist,
+ * cannot be parsed, or belongs to a different provider.
+ */
+export function loadModelCache(providerId: string): Record<string, OpencodeModelConfig> | null {
+  try {
+    const raw = readFileSync(getCachePath(), 'utf-8')
+    const cache = JSON.parse(raw) as ModelCache
+    if (cache.providerId !== providerId) return null
+    if (!cache.models || typeof cache.models !== 'object') return null
+    return cache.models
+  } catch {
+    return null
+  }
+}
+/**
+ * Saves the discovered models to the cache file on disk. Failures are
+ * non-fatal — discovery already succeeded.
+ */
+export function saveModelCache(providerId: string, models: Record<string, OpencodeModelConfig>): void {
+  try {
+    const cache: ModelCache = {
+      savedAt: Date.now(),
+      providerId,
+      models,
+    }
+    writeFileSync(getCachePath(), JSON.stringify(cache, null, 2), 'utf-8')
+  } catch {
+    // Non-fatal — cache will be written next time
+  }
+}

package/src/plugin.test.ts CHANGED Viewed

@@ -26,6 +26,12 @@ vi.mock('./gcloud-token.js', () => ({
   resetTokenCache: vi.fn(),
 }))
+// Mock the model cache module — no cache by default
+vi.mock('./model-cache.js', () => ({
+  loadModelCache: vi.fn().mockReturnValue(null),
+  saveModelCache: vi.fn(),
+}))
 import { LiteLLMPlugin } from './plugin.js'
 import { discoverModels, injectModelsIntoConfig } from './discovery.js'
 import { resolvePluginConfig } from './utils.js'

package/src/plugin.ts CHANGED Viewed

@@ -3,6 +3,7 @@ import { resolvePluginConfig, getProviderId } from './utils.js'
 import { discoverModels, injectModelsIntoConfig } from './discovery.js'
 import { createMcpToolDefinitions } from './mcp-tools.js'
 import { getGcloudToken } from './gcloud-token.js'
+import { loadModelCache, saveModelCache } from './model-cache.js'
 export const LiteLLMPlugin: Plugin = async (
   input: PluginInput,
@@ -45,11 +46,23 @@ export const LiteLLMPlugin: Plugin = async (
   const result: Record<string, unknown> = {
     config: async (config: Record<string, any>) => {
-      try {
-        const models = await discoverModels(
-          pluginConfig,
-          getToken,
+      // Inject cached models immediately so opencode has something to work
+      // with while live discovery runs.
+      const cachedModels = loadModelCache(providerId)
+      if (cachedModels) {
+        const token = await getToken()
+        injectModelsIntoConfig(
+          config as Parameters<typeof injectModelsIntoConfig>[0],
+          providerId,
+          pluginConfig.url,
+          token,
+          cachedModels,
         )
+      }
+      // Discover live models, update cache, and re-inject with fresh data.
+      try {
+        const models = await discoverModels(pluginConfig, getToken)
         if (Object.keys(models).length === 0) {
           await input.client.app.log({
@@ -60,6 +73,7 @@ export const LiteLLMPlugin: Plugin = async (
             },
           })
         } else {
+          saveModelCache(providerId, models)
           const token = await getToken()
           injectModelsIntoConfig(
             config as Parameters<typeof injectModelsIntoConfig>[0],