@danmademe/pi-provider-litellm 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@danmademe/pi-provider-litellm",
3
- "version": "0.2.0",
3
+ "version": "0.3.0",
4
4
  "description": "Pi agent extension for LiteLLM proxy auto-discovery and model configuration",
5
5
  "type": "module",
6
6
  "main": "./src/index.ts",
@@ -0,0 +1,169 @@
1
+ import { existsSync, readFileSync } from 'fs'
2
+ import { join } from 'path'
3
+
4
+ let cachedToken: string | null = null
5
+ let cachedAt: number = 0
6
+ let inflight: Promise<string | null> | null = null
7
+ export const CACHE_TTL = 50 * 60 * 1000 // 50 minutes in ms
8
+
9
+ interface AuthorizedUserCredentials {
10
+ type: 'authorized_user'
11
+ client_id: string
12
+ client_secret: string
13
+ refresh_token: string
14
+ account?: string
15
+ universe_domain?: string
16
+ }
17
+
18
+ interface ServiceAccountCredentials {
19
+ type: 'service_account'
20
+ }
21
+
22
+ type GoogleCredentials = AuthorizedUserCredentials | ServiceAccountCredentials
23
+
24
+ const ADC_FILENAME = 'application_default_credentials.json'
25
+
26
+ function getAdcPath(): string | null {
27
+ // 1. GOOGLE_APPLICATION_CREDENTIALS env var (all platforms)
28
+ const envPath = typeof process !== 'undefined' ? process.env.GOOGLE_APPLICATION_CREDENTIALS : undefined
29
+ if (envPath) {
30
+ return envPath
31
+ }
32
+
33
+ // 2. Default ADC locations (Google's official search order)
34
+ const candidates: string[] = []
35
+
36
+ // Linux / macOS: ~/.config/gcloud/
37
+ const home = typeof process !== 'undefined' ? process.env.HOME : undefined
38
+ if (home) {
39
+ candidates.push(join(home, '.config', 'gcloud', ADC_FILENAME))
40
+ }
41
+
42
+ // Windows: %APPDATA%/gcloud/
43
+ const appData = typeof process !== 'undefined' ? process.env.APPDATA : undefined
44
+ if (appData) {
45
+ candidates.push(join(appData, 'gcloud', ADC_FILENAME))
46
+ }
47
+
48
+ for (const path of candidates) {
49
+ if (existsSync(path)) {
50
+ return path
51
+ }
52
+ }
53
+
54
+ return null
55
+ }
56
+
57
+ function readCredentials(path: string): GoogleCredentials | null {
58
+ try {
59
+ const content = readFileSync(path, 'utf-8')
60
+ return JSON.parse(content) as GoogleCredentials
61
+ } catch {
62
+ return null
63
+ }
64
+ }
65
+
66
+ async function exchangeRefreshToken(credentials: AuthorizedUserCredentials): Promise<string | null> {
67
+ const body = new URLSearchParams({
68
+ grant_type: 'refresh_token',
69
+ client_id: credentials.client_id,
70
+ client_secret: credentials.client_secret,
71
+ refresh_token: credentials.refresh_token,
72
+ }).toString()
73
+
74
+ try {
75
+ const response = await fetch('https://oauth2.googleapis.com/token', {
76
+ method: 'POST',
77
+ headers: { 'Content-Type': 'application/x-www-form-urlencoded' },
78
+ body,
79
+ signal: AbortSignal.timeout(10_000),
80
+ })
81
+
82
+ if (!response.ok) {
83
+ const text = await response.text()
84
+ console.warn(`[pi-provider-litellm] Token exchange failed (${response.status}): ${text}`)
85
+ return null
86
+ }
87
+
88
+ const data = await response.json()
89
+ return data.access_token || null
90
+ } catch (error) {
91
+ console.warn(`[pi-provider-litellm] Token exchange failed: ${error}`)
92
+ return null
93
+ }
94
+ }
95
+
96
+ /**
97
+ * Gets a Google OAuth access token from the ADC JSON file, cached with a 50-minute TTL.
98
+ * Concurrent calls share one in-flight request (request coalescing).
99
+ * Returns null if credentials are not available or the token cannot be fetched.
100
+ * Logs a warning on failure.
101
+ */
102
+ export async function getGcloudToken(): Promise<string | null> {
103
+ // Return cached token if still valid
104
+ if (cachedToken && (Date.now() - cachedAt) < CACHE_TTL) {
105
+ return cachedToken
106
+ }
107
+
108
+ // Coalesce concurrent calls: reuse the in-flight promise if one exists
109
+ if (inflight) {
110
+ return inflight
111
+ }
112
+
113
+ inflight = (async () => {
114
+ try {
115
+ const adcPath = getAdcPath()
116
+ if (!adcPath) {
117
+ console.warn(
118
+ '[pi-provider-litellm] No Google ADC file found. Set GOOGLE_APPLICATION_CREDENTIALS or run `gcloud auth application-default login`.',
119
+ )
120
+ return null
121
+ }
122
+
123
+ const credentials = readCredentials(adcPath)
124
+ if (!credentials) {
125
+ console.warn(`[pi-provider-litellm] Failed to read ADC file: ${adcPath}`)
126
+ return null
127
+ }
128
+
129
+ if (credentials.type === 'authorized_user') {
130
+ const token = await exchangeRefreshToken(credentials)
131
+ if (token) {
132
+ cachedToken = token
133
+ cachedAt = Date.now()
134
+ }
135
+ return token
136
+ }
137
+
138
+ if (credentials.type === 'service_account') {
139
+ console.warn('[pi-provider-litellm] Service account credentials are not yet supported. Use an authorized_user credential or set GOOGLE_APPLICATION_CREDENTIALS to an authorized_user JSON file.')
140
+ return null
141
+ }
142
+
143
+ // eslint-disable-next-line @typescript-eslint/no-unnecessary-type-assertion
144
+ console.warn(`[pi-provider-litellm] Unknown credential type: ${(credentials as { type: string }).type}`)
145
+ return null
146
+ } finally {
147
+ inflight = null
148
+ }
149
+ })()
150
+
151
+ return inflight
152
+ }
153
+
154
+ /**
155
+ * Pre-warms the token cache by fetching a token in the background.
156
+ * Safe to call without awaiting — errors are swallowed since getGcloudToken logs them.
157
+ */
158
+ export function warmGcloudToken(): void {
159
+ void getGcloudToken()
160
+ }
161
+
162
+ /**
163
+ * Resets the token cache. Exported for testing purposes.
164
+ */
165
+ export function resetTokenCache(): void {
166
+ cachedToken = null
167
+ cachedAt = 0
168
+ inflight = null
169
+ }
package/src/index.ts CHANGED
@@ -1,17 +1,37 @@
1
1
  import type { ExtensionAPI, BeforeAgentStartEvent, BeforeAgentStartEventResult } from '@earendil-works/pi-coding-agent'
2
2
  import { resolvePluginConfig, discoverModels, discoverMcpTools, listSkills, buildProviderConfig } from './litellm-api.js'
3
3
  import { createMcpToolDefinitions, createSkillToolDefinitions, createSkillsInjector } from './tools.js'
4
+ import { getGcloudToken } from './gcloud-token.js'
5
+ import { loadModelCache, saveModelCache } from './model-cache.js'
4
6
  import type { LiteLLMModelInfo, McpTool, PluginConfig } from './types.js'
5
7
 
8
+ const LOG = '[pi-provider-litellm]'
9
+
6
10
  export default async function (pi: ExtensionAPI): Promise<void> {
7
11
  const config = resolvePluginConfig()
8
12
  if (!config) {
13
+ console.warn(`${LOG} No config found — set LITELLM_URL and LITELLM_KEY (or LITELLM_GCLOUD_TOKEN_AUTH=1)`)
9
14
  return
10
15
  }
11
16
 
12
- await discoverAndRegister(pi, config)
17
+ const isGcloudAuth = !!(process.env.LITELLM_GCLOUD_TOKEN_AUTH &&
18
+ process.env.LITELLM_GCLOUD_TOKEN_AUTH !== '' &&
19
+ process.env.LITELLM_GCLOUD_TOKEN_AUTH !== '0')
20
+
21
+ // When gcloud token auth is enabled, fetch a live token instead of using the static apiKey
22
+ const getToken = async (): Promise<string> => {
23
+ if (isGcloudAuth) {
24
+ return (await getGcloudToken()) ?? ''
25
+ }
26
+ return config.apiKey
27
+ }
13
28
 
14
- const injector = createSkillsInjector(config, config.apiKey)
29
+ // Await discovery so PI blocks until models are registered before resolving
30
+ // model patterns. Cache is loaded at the top of discoverAndRegister so the
31
+ // first call returns quickly on subsequent startups.
32
+ await discoverAndRegister(pi, config, getToken)
33
+
34
+ const injector = createSkillsInjector(config, getToken)
15
35
  const setupCompleteSessions = new Set<string>()
16
36
  pi.on('before_agent_start', async (event: BeforeAgentStartEvent, ctx): Promise<BeforeAgentStartEventResult> => {
17
37
  const sessionId = ctx.sessionManager.getSessionFile()
@@ -26,7 +46,7 @@ export default async function (pi: ExtensionAPI): Promise<void> {
26
46
  pi.on('session_start', async (_event, _ctx) => {
27
47
  setupCompleteSessions.clear()
28
48
  injector.clearCache()
29
- await discoverAndRegister(pi, config)
49
+ await discoverAndRegister(pi, config, getToken)
30
50
  })
31
51
 
32
52
  pi.on('session_shutdown', async (_event, _ctx) => {
@@ -34,11 +54,16 @@ export default async function (pi: ExtensionAPI): Promise<void> {
34
54
  })
35
55
  }
36
56
 
37
- export async function discoverAndRegister(pi: ExtensionAPI, config: PluginConfig): Promise<void> {
38
- try {
39
- pi.unregisterProvider(config.providerId)
40
- } catch {
41
- // Provider not yet registered
57
+ export async function discoverAndRegister(pi: ExtensionAPI, config: PluginConfig, getToken: () => Promise<string>): Promise<void> {
58
+ // Register from cache before live discovery so models are visible immediately.
59
+ // Must await getToken() — PI rejects an empty apiKey. In gcloud mode this is
60
+ // one OAuth exchange (~500ms), but subsequent starts within the 50-min TTL
61
+ // return the cached token instantly. On first-ever run there is no cache file,
62
+ // so this block is skipped entirely.
63
+ const cached = loadModelCache(config.providerId)
64
+ if (cached) {
65
+ const token = await getToken()
66
+ pi.registerProvider(config.providerId, buildProviderConfig(config.url, token, cached))
42
67
  }
43
68
 
44
69
  const DISCOVERY_TIMEOUT_MS = 30_000
@@ -51,11 +76,12 @@ export async function discoverAndRegister(pi: ExtensionAPI, config: PluginConfig
51
76
  })
52
77
 
53
78
  try {
79
+ const token = await getToken()
54
80
  const results = await Promise.race([
55
81
  Promise.allSettled([
56
- discoverModels(config, config.apiKey),
57
- discoverMcpTools(config, config.apiKey),
58
- listSkills(config, config.apiKey),
82
+ discoverModels(config, token),
83
+ discoverMcpTools(config, token),
84
+ listSkills(config, token),
59
85
  ]),
60
86
  timeoutPromise,
61
87
  ])
@@ -71,19 +97,30 @@ export async function discoverAndRegister(pi: ExtensionAPI, config: PluginConfig
71
97
  mcpResult = { status: 'rejected', reason: error as Error }
72
98
  }
73
99
 
74
- if (modelsResult.status === 'fulfilled' && Object.keys(modelsResult.value).length > 0) {
75
- const providerConfig = buildProviderConfig(config.url, config.apiKey, modelsResult.value)
76
- pi.registerProvider(config.providerId, providerConfig)
100
+ if (modelsResult.status === 'fulfilled') {
101
+ const modelCount = Object.keys(modelsResult.value).length
102
+ if (modelCount > 0) {
103
+ saveModelCache(config.providerId, modelsResult.value)
104
+ const token = await getToken()
105
+ const providerConfig = buildProviderConfig(config.url, token, modelsResult.value)
106
+ pi.registerProvider(config.providerId, providerConfig)
107
+ } else {
108
+ console.warn(`${LOG} No models discovered — check LiteLLM /health endpoint`)
109
+ }
110
+ } else {
111
+ console.error(`${LOG} Model discovery error: ${modelsResult.reason}`)
77
112
  }
78
113
 
79
114
  if (mcpResult.status === 'fulfilled') {
80
- const mcpTools = createMcpToolDefinitions(config, config.apiKey, mcpResult.value)
115
+ const mcpTools = createMcpToolDefinitions(config, getToken, mcpResult.value)
81
116
  for (const tool of mcpTools) {
82
117
  pi.registerTool(tool)
83
118
  }
119
+ } else {
120
+ console.warn(`${LOG} MCP tool discovery failed: ${mcpResult.reason}`)
84
121
  }
85
122
 
86
- const skillTools = createSkillToolDefinitions(config, config.apiKey)
123
+ const skillTools = createSkillToolDefinitions(config, getToken)
87
124
  for (const tool of skillTools) {
88
125
  pi.registerTool(tool)
89
126
  }
@@ -318,11 +318,17 @@ export function resolvePluginConfig(): PluginConfig | null {
318
318
  // Check env vars first
319
319
  const envUrl = process.env.LITELLM_URL
320
320
  const envKey = process.env.LITELLM_KEY
321
+ const envGcloudAuth = process.env.LITELLM_GCLOUD_TOKEN_AUTH
321
322
 
322
323
  if (envUrl && envKey) {
323
324
  return { url: envUrl, apiKey: envKey, providerId: process.env.LITELLM_PROVIDER_ID ?? 'litellm' }
324
325
  }
325
326
 
327
+ // Allow missing LITELLM_KEY when gcloud token auth is enabled
328
+ if (envUrl && envGcloudAuth && envGcloudAuth !== '' && envGcloudAuth !== '0') {
329
+ return { url: envUrl, apiKey: envKey ?? '', providerId: process.env.LITELLM_PROVIDER_ID ?? 'litellm' }
330
+ }
331
+
326
332
  // Check settings.json
327
333
  try {
328
334
  const settingsPath = path.join(os.homedir(), '.pi', 'agent', 'settings.json')
@@ -0,0 +1,51 @@
1
+ import os from 'node:os'
2
+ import path from 'node:path'
3
+ import fs from 'node:fs'
4
+ import type { LiteLLMModelInfo } from './types.js'
5
+
6
+ const CACHE_FILENAME = 'pi-provider-litellm-cache.json'
7
+
8
+ interface ModelCache {
9
+ savedAt: number
10
+ providerId: string
11
+ models: Record<string, LiteLLMModelInfo>
12
+ }
13
+
14
+ function getCachePath(): string {
15
+ return path.join(os.homedir(), '.pi', 'agent', CACHE_FILENAME)
16
+ }
17
+
18
+ /**
19
+ * Loads the model cache from disk. Returns null if the file does not exist or
20
+ * cannot be parsed.
21
+ */
22
+ export function loadModelCache(providerId: string): Record<string, LiteLLMModelInfo> | null {
23
+ try {
24
+ const raw = fs.readFileSync(getCachePath(), 'utf-8')
25
+ const cache = JSON.parse(raw) as ModelCache
26
+
27
+ // Ignore cache for a different provider
28
+ if (cache.providerId !== providerId) return null
29
+ if (!cache.models || typeof cache.models !== 'object') return null
30
+
31
+ return cache.models
32
+ } catch {
33
+ return null
34
+ }
35
+ }
36
+
37
+ /**
38
+ * Saves the discovered models to the cache file on disk.
39
+ */
40
+ export function saveModelCache(providerId: string, models: Record<string, LiteLLMModelInfo>): void {
41
+ try {
42
+ const cache: ModelCache = {
43
+ savedAt: Date.now(),
44
+ providerId,
45
+ models,
46
+ }
47
+ fs.writeFileSync(getCachePath(), JSON.stringify(cache, null, 2), 'utf-8')
48
+ } catch {
49
+ // Non-fatal — discovery still succeeded, cache will be written next time
50
+ }
51
+ }
package/src/tools.ts CHANGED
@@ -114,7 +114,7 @@ export function buildTypeBoxSchema(inputSchema: Record<string, unknown>): TSchem
114
114
 
115
115
  export function createMcpToolDefinitions(
116
116
  config: PluginConfig,
117
- token: string,
117
+ getToken: () => Promise<string>,
118
118
  mcpTools: McpTool[],
119
119
  ): ToolDefinition[] {
120
120
  return mcpTools.map((tool) => {
@@ -141,6 +141,7 @@ export function createMcpToolDefinitions(
141
141
  ? (typeof params.args === 'string' ? (() => { try { return JSON.parse(params.args) } catch { return params.args } })() : params.args)
142
142
  : params
143
143
 
144
+ const token = await getToken()
144
145
  const result = await executeMcpTool(config, token, server, toolName, args as Record<string, unknown>)
145
146
  return {
146
147
  content: [{ type: 'text', text: result }],
@@ -153,7 +154,7 @@ export function createMcpToolDefinitions(
153
154
 
154
155
  export function createSkillToolDefinitions(
155
156
  config: PluginConfig,
156
- token: string,
157
+ getToken: () => Promise<string>,
157
158
  ): ToolDefinition[] {
158
159
  return [
159
160
  {
@@ -168,6 +169,7 @@ export function createSkillToolDefinitions(
168
169
  _onUpdate: AgentToolUpdateCallback<unknown> | undefined,
169
170
  _ctx: ExtensionContext,
170
171
  ): Promise<AgentToolResult<undefined>> {
172
+ const token = await getToken()
171
173
  const skills = await listSkills(config, token)
172
174
  if (!skills.length) {
173
175
  return { content: [{ type: 'text', text: 'No skills found.' }], details: undefined }
@@ -195,6 +197,7 @@ export function createSkillToolDefinitions(
195
197
  _onUpdate: AgentToolUpdateCallback<unknown> | undefined,
196
198
  _ctx: ExtensionContext,
197
199
  ): Promise<AgentToolResult<undefined>> {
200
+ const token = await getToken()
198
201
  const skills = await listSkills(config, token)
199
202
  const skill = skills.find((s) => s.name === params.name)
200
203
  if (!skill) {
@@ -225,6 +228,7 @@ export function createSkillToolDefinitions(
225
228
  _onUpdate: AgentToolUpdateCallback<unknown> | undefined,
226
229
  _ctx: ExtensionContext,
227
230
  ): Promise<AgentToolResult<undefined>> {
231
+ const token = await getToken()
228
232
  const result = await registerSkill(
229
233
  config,
230
234
  token,
@@ -249,6 +253,7 @@ export function createSkillToolDefinitions(
249
253
  _onUpdate: AgentToolUpdateCallback<unknown> | undefined,
250
254
  _ctx: ExtensionContext,
251
255
  ): Promise<AgentToolResult<undefined>> {
256
+ const token = await getToken()
252
257
  const result = await enableSkill(config, token, params.name)
253
258
  return { content: [{ type: 'text', text: result }], details: undefined }
254
259
  },
@@ -265,6 +270,7 @@ export function createSkillToolDefinitions(
265
270
  _onUpdate: AgentToolUpdateCallback<unknown> | undefined,
266
271
  _ctx: ExtensionContext,
267
272
  ): Promise<AgentToolResult<undefined>> {
273
+ const token = await getToken()
268
274
  const result = await disableSkill(config, token, params.name)
269
275
  return { content: [{ type: 'text', text: result }], details: undefined }
270
276
  },
@@ -279,7 +285,7 @@ export interface SkillsInjector {
279
285
 
280
286
  export function createSkillsInjector(
281
287
  config: PluginConfig,
282
- token: string,
288
+ getToken: () => Promise<string>,
283
289
  ): SkillsInjector {
284
290
  let cache: { skills: Skill[]; timestamp: number } | null = null
285
291
  const TTL = 60_000 // 60 seconds
@@ -289,6 +295,7 @@ export function createSkillsInjector(
289
295
  if (cache && now - cache.timestamp < TTL) {
290
296
  return cache.skills
291
297
  }
298
+ const token = await getToken()
292
299
  const skills = await listSkills(config, token)
293
300
  cache = { skills, timestamp: now }
294
301
  return skills