@poolzin/pool-bot 2026.2.19 → 2026.2.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. package/CHANGELOG.md +17 -0
  2. package/dist/agents/model-auth.js +12 -0
  3. package/dist/agents/model-fallback.js +24 -0
  4. package/dist/agents/models-config.providers.js +85 -0
  5. package/dist/agents/openclaw-tools.js +16 -0
  6. package/dist/agents/pi-embedded-runner/run/attempt.js +15 -0
  7. package/dist/agents/poolbot-tools.js +16 -0
  8. package/dist/agents/provider/config-loader.js +76 -0
  9. package/dist/agents/provider/index.js +15 -0
  10. package/dist/agents/provider/integration.js +136 -0
  11. package/dist/agents/provider/models-dev.js +129 -0
  12. package/dist/agents/provider/rate-limits.js +458 -0
  13. package/dist/agents/provider/request-monitor.js +449 -0
  14. package/dist/agents/provider/session-binding.js +376 -0
  15. package/dist/agents/provider/token-pool.js +541 -0
  16. package/dist/agents/tools/deep-research-tool.js +225 -0
  17. package/dist/agents/tools/image-generate-tool.js +235 -0
  18. package/dist/build-info.json +3 -3
  19. package/package.json +1 -1
  20. package/skills/plcode-controller/SKILL.md +156 -0
  21. package/skills/plcode-controller/assets/operator-prompts.md +65 -0
  22. package/skills/plcode-controller/references/command-cheatsheet.md +53 -0
  23. package/skills/plcode-controller/references/failure-handling.md +60 -0
  24. package/skills/plcode-controller/references/model-selection.md +57 -0
  25. package/skills/plcode-controller/references/plan-vs-build.md +52 -0
  26. package/skills/plcode-controller/references/question-handling.md +40 -0
  27. package/skills/plcode-controller/references/session-management.md +63 -0
  28. package/skills/plcode-controller/references/workflow.md +35 -0
package/CHANGELOG.md CHANGED
@@ -1,3 +1,20 @@
1
+ ## v2026.2.21 (2026-02-18)
2
+
3
+ ### Features
4
+ - **PLCODE Controller Skill:** native bundled skill for controlling PLCODE via CLI commands, slash commands, and multi-agent orchestration — includes session management, model selection, agent delegation (plan/code/review), operator prompts, failure handling, and workflow references
5
+
6
+ ---
7
+
8
+ ## v2026.2.20 (2026-02-18)
9
+
10
+ ### Features
11
+ - **Z.AI Provider:** added ZhipuAI (GLM) as first-class provider with 5 models — GLM-4.7-Flash (free), GLM-4.5-Flash (free), GLM-4.6V-Flash (free vision), GLM-4.7, GLM-5
12
+ - **Image Generation Tool:** `image_generate` tool powered by Z.AI CogView-4, CogView-4-Flash, and GLM-Image — generates images from text prompts, downloads to disk, returns media path
13
+ - **Deep Research Tool:** `deep_research` tool powered by Z.AI GLM-4.7-Flash + web_search — configurable depth (shallow/standard/deep), language, and max sources; returns structured report with sources and metadata; cost ~$0.01–$0.05 per query
14
+ - **Provider Infrastructure:** multi-key token pool with 4 scheduling strategies (round-robin, least-used, priority, random), proactive rate limit tracking with header parsing, request monitoring with ring-buffer observability and percentile stats, sticky session binding, and models.dev catalog — integrated into the LLM pipeline as an opt-in layer that coexists with existing auth/cooldown paths
15
+
16
+ ---
17
+
1
18
  ## v2026.2.19 (2026-02-17)
2
19
 
3
20
  ### Fixes
@@ -4,6 +4,7 @@ import { getShellEnvAppliedKeys } from "../infra/shell-env.js";
4
4
  import { formatCliCommand } from "../cli/command-format.js";
5
5
  import { ensureAuthProfileStore, listProfilesForProvider, resolveApiKeyForProfile, resolveAuthProfileOrder, resolveAuthStorePathForDisplay, } from "./auth-profiles.js";
6
6
  import { normalizeProviderId } from "./model-selection.js";
7
+ import { resolveTokenFromPool } from "./provider/integration.js";
7
8
  export { ensureAuthProfileStore, resolveAuthProfileOrder } from "./auth-profiles.js";
8
9
  const AWS_BEARER_ENV = "AWS_BEARER_TOKEN_BEDROCK";
9
10
  const AWS_ACCESS_KEY_ENV = "AWS_ACCESS_KEY_ID";
@@ -135,6 +136,17 @@ export async function resolveApiKeyForProvider(params) {
135
136
  }
136
137
  catch { }
137
138
  }
139
+ // Token pool rotation: if a pool is configured for this provider,
140
+ // try to get a key from the pool (with rate-limit-aware scheduling).
141
+ // Falls through to existing env/config resolution if no pool or no tokens.
142
+ const poolResult = await resolveTokenFromPool(provider);
143
+ if (poolResult) {
144
+ return {
145
+ apiKey: poolResult.apiKey,
146
+ source: poolResult.source,
147
+ mode: "api-key",
148
+ };
149
+ }
138
150
  const envResolved = resolveEnvApiKey(provider);
139
151
  if (envResolved) {
140
152
  return {
@@ -2,6 +2,7 @@ import { DEFAULT_MODEL, DEFAULT_PROVIDER } from "./defaults.js";
2
2
  import { coerceToFailoverError, describeFailoverError, isFailoverError, isTimeoutError, } from "./failover-error.js";
3
3
  import { buildModelAliasIndex, modelKey, parseModelRef, resolveConfiguredModelRef, resolveModelRefFromString, } from "./model-selection.js";
4
4
  import { ensureAuthProfileStore, getSoonestCooldownExpiry, isProfileInCooldown, resolveAuthProfileOrder, } from "./auth-profiles.js";
5
+ import { isProviderRateLimited, recordFallbackError } from "./provider/integration.js";
5
6
  function isAbortError(err) {
6
7
  if (!err || typeof err !== "object")
7
8
  return false;
@@ -218,6 +219,21 @@ export async function runWithModelFallback(params) {
218
219
  lastProbeAttempt.set(probeThrottleKey, now);
219
220
  }
220
221
  }
222
+ // Provider-level rate limit check (from token pool / rate limit tracking).
223
+ // Complements the per-profile cooldown above — skips candidates when all
224
+ // API keys in the pool are known to be rate-limited.
225
+ const poolLimit = isProviderRateLimited(candidate.provider);
226
+ if (poolLimit.isLimited && hasFallbackCandidates && i > 0) {
227
+ // Skip non-primary candidates that are pool-rate-limited.
228
+ // Primary (i === 0) is never skipped here — it gets a chance to probe.
229
+ attempts.push({
230
+ provider: candidate.provider,
231
+ model: candidate.model,
232
+ error: `Provider ${candidate.provider} pool is rate-limited (wait ${poolLimit.waitTimeMs}ms)`,
233
+ reason: "rate_limit",
234
+ });
235
+ continue;
236
+ }
221
237
  try {
222
238
  const result = await params.run(candidate.provider, candidate.model);
223
239
  return {
@@ -246,6 +262,14 @@ export async function runWithModelFallback(params) {
246
262
  status: described.status,
247
263
  code: described.code,
248
264
  });
265
+ // Feed error into provider infrastructure (rate limits + monitoring)
266
+ recordFallbackError({
267
+ provider: candidate.provider,
268
+ model: candidate.model,
269
+ status: described.status,
270
+ reason: described.reason,
271
+ error: described.message,
272
+ });
249
273
  await params.onError?.({
250
274
  provider: candidate.provider,
251
275
  model: candidate.model,
@@ -31,6 +31,33 @@ const MOONSHOT_DEFAULT_COST = {
31
31
  cacheRead: 0,
32
32
  cacheWrite: 0,
33
33
  };
34
+ // Z.AI (ZhipuAI / GLM) — OpenAI-compatible API
35
+ // Docs: https://docs.z.ai/ | Base: https://open.bigmodel.cn/api/paas/v4/
36
+ const ZAI_BASE_URL = "https://open.bigmodel.cn/api/paas/v4";
37
+ const ZAI_DEFAULT_MODEL_ID = "GLM-4.7-Flash";
38
+ const ZAI_DEFAULT_CONTEXT_WINDOW = 128000;
39
+ const ZAI_DEFAULT_MAX_TOKENS = 8192;
40
+ // GLM-4.7-Flash and GLM-4.5-Flash are completely free
41
+ const ZAI_FREE_COST = {
42
+ input: 0,
43
+ output: 0,
44
+ cacheRead: 0,
45
+ cacheWrite: 0,
46
+ };
47
+ // GLM-4.7-FlashX: $0.07/$0.4 per 1M tokens
48
+ const ZAI_FLASHX_COST = {
49
+ input: 0.07,
50
+ output: 0.4,
51
+ cacheRead: 0,
52
+ cacheWrite: 0,
53
+ };
54
+ // GLM-4.7: $0.6/$2.2 per 1M tokens
55
+ const ZAI_PREMIUM_COST = {
56
+ input: 0.6,
57
+ output: 2.2,
58
+ cacheRead: 0,
59
+ cacheWrite: 0,
60
+ };
34
61
  const XIAOMI_BASE_URL = "https://api.xiaomimimo.com/anthropic";
35
62
  export const XIAOMI_DEFAULT_MODEL_ID = "mimo-v2-flash";
36
63
  const XIAOMI_DEFAULT_CONTEXT_WINDOW = 262144;
@@ -256,6 +283,59 @@ function buildMoonshotProvider() {
256
283
  ],
257
284
  };
258
285
  }
286
+ function buildZaiProvider() {
287
+ return {
288
+ baseUrl: ZAI_BASE_URL,
289
+ api: "openai-completions",
290
+ models: [
291
+ {
292
+ id: ZAI_DEFAULT_MODEL_ID,
293
+ name: "GLM 4.7 Flash",
294
+ reasoning: false,
295
+ input: ["text"],
296
+ cost: ZAI_FREE_COST,
297
+ contextWindow: ZAI_DEFAULT_CONTEXT_WINDOW,
298
+ maxTokens: ZAI_DEFAULT_MAX_TOKENS,
299
+ },
300
+ {
301
+ id: "GLM-4.5-Flash",
302
+ name: "GLM 4.5 Flash",
303
+ reasoning: false,
304
+ input: ["text"],
305
+ cost: ZAI_FREE_COST,
306
+ contextWindow: ZAI_DEFAULT_CONTEXT_WINDOW,
307
+ maxTokens: ZAI_DEFAULT_MAX_TOKENS,
308
+ },
309
+ {
310
+ id: "GLM-4.6V-Flash",
311
+ name: "GLM 4.6V Flash (Vision)",
312
+ reasoning: false,
313
+ input: ["text", "image"],
314
+ cost: ZAI_FREE_COST,
315
+ contextWindow: ZAI_DEFAULT_CONTEXT_WINDOW,
316
+ maxTokens: ZAI_DEFAULT_MAX_TOKENS,
317
+ },
318
+ {
319
+ id: "GLM-4.7-FlashX",
320
+ name: "GLM 4.7 FlashX",
321
+ reasoning: false,
322
+ input: ["text"],
323
+ cost: ZAI_FLASHX_COST,
324
+ contextWindow: ZAI_DEFAULT_CONTEXT_WINDOW,
325
+ maxTokens: ZAI_DEFAULT_MAX_TOKENS,
326
+ },
327
+ {
328
+ id: "GLM-4.7",
329
+ name: "GLM 4.7",
330
+ reasoning: false,
331
+ input: ["text"],
332
+ cost: ZAI_PREMIUM_COST,
333
+ contextWindow: ZAI_DEFAULT_CONTEXT_WINDOW,
334
+ maxTokens: ZAI_DEFAULT_MAX_TOKENS,
335
+ },
336
+ ],
337
+ };
338
+ }
259
339
  function buildQwenPortalProvider() {
260
340
  return {
261
341
  baseUrl: QWEN_PORTAL_BASE_URL,
@@ -402,6 +482,11 @@ export async function resolveImplicitProviders(params) {
402
482
  if (moonshotKey) {
403
483
  providers.moonshot = { ...buildMoonshotProvider(), apiKey: moonshotKey };
404
484
  }
485
+ const zaiKey = resolveEnvApiKeyVarName("zai") ??
486
+ resolveApiKeyFromProfiles({ provider: "zai", store: authStore });
487
+ if (zaiKey) {
488
+ providers.zai = { ...buildZaiProvider(), apiKey: zaiKey };
489
+ }
405
490
  const syntheticKey = resolveEnvApiKeyVarName("synthetic") ??
406
491
  resolveApiKeyFromProfiles({ provider: "synthetic", store: authStore });
407
492
  if (syntheticKey) {
@@ -5,6 +5,8 @@ import { createBrowserTool } from "./tools/browser-tool.js";
5
5
  import { createCanvasTool } from "./tools/canvas-tool.js";
6
6
  import { createCronTool } from "./tools/cron-tool.js";
7
7
  import { createGatewayTool } from "./tools/gateway-tool.js";
8
+ import { createDeepResearchTool } from "./tools/deep-research-tool.js";
9
+ import { createImageGenerateTool } from "./tools/image-generate-tool.js";
8
10
  import { createImageTool } from "./tools/image-tool.js";
9
11
  import { createMessageTool } from "./tools/message-tool.js";
10
12
  import { createNodesTool } from "./tools/nodes-tool.js";
@@ -30,6 +32,11 @@ export function createOpenClawTools(options) {
30
32
  modelHasVision: options?.modelHasVision,
31
33
  })
32
34
  : null;
35
+ const imageGenerateTool = createImageGenerateTool({
36
+ config: options?.config,
37
+ agentDir: options?.agentDir,
38
+ sandboxRoot: options?.sandboxRoot,
39
+ });
33
40
  const webSearchTool = createWebSearchTool({
34
41
  config: options?.config,
35
42
  sandboxed: options?.sandboxed,
@@ -113,7 +120,16 @@ export function createOpenClawTools(options) {
113
120
  ...(webSearchTool ? [webSearchTool] : []),
114
121
  ...(webFetchTool ? [webFetchTool] : []),
115
122
  ...(imageTool ? [imageTool] : []),
123
+ ...(imageGenerateTool ? [imageGenerateTool] : []),
116
124
  ];
125
+ // Z.AI-powered research tool (gracefully absent when no key configured)
126
+ const deepResearchTool = createDeepResearchTool({
127
+ config: options?.config,
128
+ agentDir: options?.agentDir,
129
+ sandboxRoot: options?.sandboxRoot,
130
+ });
131
+ if (deepResearchTool)
132
+ tools.push(deepResearchTool);
117
133
  const pluginTools = resolvePluginTools({
118
134
  context: {
119
135
  config: options?.config,
@@ -20,6 +20,7 @@ import { resolveSessionAgentIds } from "../../agent-scope.js";
20
20
  import { makeBootstrapWarn, resolveBootstrapContextForRun } from "../../bootstrap-files.js";
21
21
  import { resolvePoolbotDocsPath } from "../../docs-path.js";
22
22
  import { resolveModelAuthMode } from "../../model-auth.js";
23
+ import { recordRequestOutcome } from "../../provider/integration.js";
23
24
  import { isCloudCodeAssistFormatError, resolveBootstrapMaxChars, validateAnthropicTurns, validateGeminiTurns, } from "../../pi-embedded-helpers.js";
24
25
  import { subscribeEmbeddedPiSession } from "../../pi-embedded-subscribe.js";
25
26
  import { ensurePiCompactionReserveTokens, resolveCompactionReserveTokensFloor, } from "../../pi-settings.js";
@@ -694,6 +695,20 @@ export async function runEmbeddedAttempt(params) {
694
695
  note: promptError ? "prompt error" : undefined,
695
696
  });
696
697
  anthropicPayloadLogger?.recordUsage(messagesSnapshot, promptError);
698
+ // Record request outcome into provider infrastructure (monitoring + rate limits).
699
+ // Fire-and-forget — never blocks or throws.
700
+ try {
701
+ recordRequestOutcome({
702
+ provider: params.provider,
703
+ model: params.modelId,
704
+ status: promptError ? 500 : 200,
705
+ latencyMs: Date.now() - promptStartedAt,
706
+ streaming: true,
707
+ });
708
+ }
709
+ catch {
710
+ // Observability must never break the run
711
+ }
697
712
  // Run agent_end hooks to allow plugins to analyze the conversation
698
713
  // This is fire-and-forget, so we don't await
699
714
  if (hookRunner?.hasHooks("agent_end")) {
@@ -5,6 +5,8 @@ import { createBrowserTool } from "./tools/browser-tool.js";
5
5
  import { createCanvasTool } from "./tools/canvas-tool.js";
6
6
  import { createCronTool } from "./tools/cron-tool.js";
7
7
  import { createGatewayTool } from "./tools/gateway-tool.js";
8
+ import { createDeepResearchTool } from "./tools/deep-research-tool.js";
9
+ import { createImageGenerateTool } from "./tools/image-generate-tool.js";
8
10
  import { createImageTool } from "./tools/image-tool.js";
9
11
  import { createMessageTool } from "./tools/message-tool.js";
10
12
  import { createNodesTool } from "./tools/nodes-tool.js";
@@ -24,6 +26,11 @@ export function createPoolBotTools(options) {
24
26
  modelHasVision: options?.modelHasVision,
25
27
  })
26
28
  : null;
29
+ const imageGenerateTool = createImageGenerateTool({
30
+ config: options?.config,
31
+ agentDir: options?.agentDir,
32
+ sandboxRoot: options?.sandboxRoot,
33
+ });
27
34
  const webSearchTool = createWebSearchTool({
28
35
  config: options?.config,
29
36
  sandboxed: options?.sandboxed,
@@ -104,7 +111,16 @@ export function createPoolBotTools(options) {
104
111
  ...(webSearchTool ? [webSearchTool] : []),
105
112
  ...(webFetchTool ? [webFetchTool] : []),
106
113
  ...(imageTool ? [imageTool] : []),
114
+ ...(imageGenerateTool ? [imageGenerateTool] : []),
107
115
  ];
116
+ // Z.AI-powered research tool (gracefully absent when no key configured)
117
+ const deepResearchTool = createDeepResearchTool({
118
+ config: options?.config,
119
+ agentDir: options?.agentDir,
120
+ sandboxRoot: options?.sandboxRoot,
121
+ });
122
+ if (deepResearchTool)
123
+ tools.push(deepResearchTool);
108
124
  const pluginTools = resolvePluginTools({
109
125
  context: {
110
126
  config: options?.config,
@@ -0,0 +1,76 @@
1
+ /**
2
+ * Provider Pool — Config Loader
3
+ *
4
+ * Reads pool configuration from `models.providers.<id>.pool` in the
5
+ * PoolBot config and initializes the TokenPool with the configured tokens.
6
+ *
7
+ * Called once during gateway/agent startup. Idempotent — clears
8
+ * existing pool state before loading.
9
+ *
10
+ * @module provider/config-loader
11
+ */
12
+ import { createSubsystemLogger } from "../../logging/subsystem.js";
13
+ import { TokenPool } from "./token-pool.js";
14
+ const log = createSubsystemLogger("provider/config-loader");
15
+ /**
16
+ * Load token pool configuration from the PoolBot config.
17
+ *
18
+ * Reads `models.providers.<providerID>.pool` entries and calls
19
+ * `TokenPool.addToken()` for each configured token.
20
+ *
21
+ * @returns Number of providers with pool configuration loaded
22
+ */
23
+ export function loadPoolConfig(cfg) {
24
+ const providers = cfg.models?.providers;
25
+ if (!providers)
26
+ return 0;
27
+ let loadedProviders = 0;
28
+ for (const [providerID, providerCfg] of Object.entries(providers)) {
29
+ const pool = providerCfg.pool;
30
+ if (!pool?.tokens?.length)
31
+ continue;
32
+ // Configure pool scheduling before adding tokens
33
+ if (pool.scheduling || pool.maxWaitMs || pool.autoDisable !== undefined) {
34
+ TokenPool.configure(providerID, {
35
+ scheduling: pool.scheduling,
36
+ maxWaitMs: pool.maxWaitMs,
37
+ autoDisable: pool.autoDisable,
38
+ autoDisableThreshold: pool.autoDisableThreshold,
39
+ });
40
+ }
41
+ let tokenCount = 0;
42
+ for (const tokenCfg of pool.tokens) {
43
+ if (!tokenCfg.id || !tokenCfg.key) {
44
+ log.info("skipped-invalid-token", { providerID, tokenID: tokenCfg.id ?? "(missing)" });
45
+ continue;
46
+ }
47
+ try {
48
+ TokenPool.addToken(providerID, tokenCfg.id, tokenCfg.key, {
49
+ tier: tokenCfg.tier ?? "paid",
50
+ label: tokenCfg.label,
51
+ enabled: tokenCfg.enabled ?? true,
52
+ });
53
+ tokenCount++;
54
+ }
55
+ catch (e) {
56
+ log.info("token-add-failed", {
57
+ providerID,
58
+ tokenID: tokenCfg.id,
59
+ error: String(e),
60
+ });
61
+ }
62
+ }
63
+ if (tokenCount > 0) {
64
+ loadedProviders++;
65
+ log.info("pool-loaded", {
66
+ providerID,
67
+ tokens: tokenCount,
68
+ scheduling: pool.scheduling ?? "priority",
69
+ });
70
+ }
71
+ }
72
+ if (loadedProviders > 0) {
73
+ log.info("config-loaded", { providers: loadedProviders });
74
+ }
75
+ return loadedProviders;
76
+ }
@@ -0,0 +1,15 @@
1
+ /**
2
+ * Provider Infrastructure
3
+ *
4
+ * Operational modules for token rotation, rate limiting,
5
+ * request monitoring, session management, and model catalog.
6
+ *
7
+ * @module provider
8
+ */
9
+ export { RateLimits } from "./rate-limits.js";
10
+ export { RequestMonitor } from "./request-monitor.js";
11
+ export { SessionManager } from "./session-binding.js";
12
+ export { TokenPool } from "./token-pool.js";
13
+ export { ModelsDev } from "./models-dev.js";
14
+ export { resolveTokenFromPool, isProviderRateLimited, recordRequestOutcome, recordFallbackError, } from "./integration.js";
15
+ export { loadPoolConfig } from "./config-loader.js";
@@ -0,0 +1,136 @@
1
+ /**
2
+ * Provider Infrastructure — Integration Facade
3
+ *
4
+ * Thin integration layer that bridges the provider infrastructure modules
5
+ * (TokenPool, RateLimits, RequestMonitor) into the existing LLM pipeline
6
+ * (model-auth.ts, model-fallback.ts, attempt.ts).
7
+ *
8
+ * Design principles:
9
+ * - **Opt-in**: Only activates when TokenPool has tokens for a provider
10
+ * - **Fallthrough**: Returns null/undefined when not configured, letting existing paths run
11
+ * - **Error-isolated**: Never throws — all failures are caught and logged
12
+ * - **Zero behavior change by default**: Without pool configuration, the system is identical
13
+ *
14
+ * @module provider/integration
15
+ */
16
+ import { RateLimits } from "./rate-limits.js";
17
+ import { RequestMonitor } from "./request-monitor.js";
18
+ import { TokenPool } from "./token-pool.js";
19
+ /**
20
+ * Try to resolve an API key from the token pool for a provider.
21
+ * Returns `null` if no pool is configured or no tokens are available,
22
+ * allowing the caller to fall back to the existing auth resolution path.
23
+ */
24
+ export async function resolveTokenFromPool(provider) {
25
+ try {
26
+ if (!TokenPool.hasPool(provider))
27
+ return null;
28
+ const result = await TokenPool.getToken(provider);
29
+ if (!result)
30
+ return null;
31
+ return {
32
+ apiKey: result.token.key,
33
+ tokenID: result.token.id,
34
+ source: `pool:${provider}/${result.token.id}`,
35
+ waited: result.waited,
36
+ };
37
+ }
38
+ catch {
39
+ // Never break the auth chain — fall back to existing resolution
40
+ return null;
41
+ }
42
+ }
43
+ /**
44
+ * Check whether a provider is rate-limited across all its pool keys.
45
+ * Returns `{ isLimited: false }` if no pool is configured (safe default).
46
+ *
47
+ * When `keyID` is provided, checks that specific key only.
48
+ */
49
+ export function isProviderRateLimited(provider, keyID) {
50
+ try {
51
+ const check = RateLimits.isLimited(provider, keyID);
52
+ return {
53
+ isLimited: check.isLimited,
54
+ waitTimeMs: check.waitTimeMs,
55
+ };
56
+ }
57
+ catch {
58
+ return { isLimited: false, waitTimeMs: 0 };
59
+ }
60
+ }
61
+ /**
62
+ * Record the outcome of a provider request into both RateLimits and RequestMonitor.
63
+ *
64
+ * - For 429 responses: marks the provider/key as rate-limited (with header parsing)
65
+ * - For successes: records success in TokenPool (for scheduling) and clears rate limits
66
+ * - All outcomes: logged in RequestMonitor for observability
67
+ *
68
+ * Safe to call unconditionally — does nothing harmful if provider modules aren't configured.
69
+ */
70
+ export function recordRequestOutcome(outcome) {
71
+ try {
72
+ // Feed into RequestMonitor for observability
73
+ RequestMonitor.logRequest({
74
+ providerID: outcome.provider,
75
+ modelID: outcome.model,
76
+ method: "chat",
77
+ status: outcome.status,
78
+ latencyMs: outcome.latencyMs,
79
+ streaming: outcome.streaming,
80
+ keyID: outcome.keyID,
81
+ inputTokens: outcome.inputTokens,
82
+ outputTokens: outcome.outputTokens,
83
+ cacheReadTokens: outcome.cacheReadTokens,
84
+ cacheWriteTokens: outcome.cacheWriteTokens,
85
+ error: outcome.error,
86
+ });
87
+ }
88
+ catch {
89
+ // Observability should never break the pipeline
90
+ }
91
+ try {
92
+ // Feed into RateLimits for 429s
93
+ if (outcome.status === 429) {
94
+ RateLimits.markLimitedFromResponse(outcome.provider, outcome.status, outcome.headers ?? {}, outcome.keyID);
95
+ }
96
+ }
97
+ catch {
98
+ // Rate limit tracking should never break the pipeline
99
+ }
100
+ try {
101
+ // Feed success/failure into TokenPool for scheduling decisions
102
+ if (outcome.keyID && TokenPool.hasPool(outcome.provider)) {
103
+ if (outcome.status >= 200 && outcome.status < 400) {
104
+ TokenPool.recordSuccess(outcome.provider, outcome.keyID, {
105
+ inputTokens: outcome.inputTokens ?? 0,
106
+ outputTokens: outcome.outputTokens ?? 0,
107
+ });
108
+ }
109
+ else if (outcome.status >= 400) {
110
+ TokenPool.recordFailure(outcome.provider, outcome.keyID, outcome.status, outcome.headers);
111
+ }
112
+ }
113
+ }
114
+ catch {
115
+ // Token pool feedback should never break the pipeline
116
+ }
117
+ }
118
+ // ---------------------------------------------------------------------------
119
+ // Convenience: record a fallback error
120
+ // ---------------------------------------------------------------------------
121
+ /**
122
+ * Record a fallback error (e.g., from model-fallback catch block).
123
+ * Extracts provider, model, status from the error and feeds into the monitoring pipeline.
124
+ */
125
+ export function recordFallbackError(params) {
126
+ recordRequestOutcome({
127
+ provider: params.provider,
128
+ model: params.model,
129
+ status: params.status ?? 500,
130
+ latencyMs: params.latencyMs ?? 0,
131
+ streaming: false,
132
+ keyID: params.keyID,
133
+ error: params.error ?? params.reason,
134
+ headers: params.headers,
135
+ });
136
+ }
@@ -0,0 +1,129 @@
1
+ /**
2
+ * Models.dev Catalog
3
+ *
4
+ * Fetches and caches the model catalog from https://models.dev/api.json.
5
+ * Provides typed schemas for providers and models via Zod.
6
+ *
7
+ * @module provider/models-dev
8
+ */
9
+ import { readFile, writeFile, mkdir } from "node:fs/promises";
10
+ import path from "node:path";
11
+ import z from "zod";
12
+ import { createSubsystemLogger } from "../../logging/subsystem.js";
13
+ import { resolveStateDir } from "../../config/paths.js";
14
+ export var ModelsDev;
15
+ (function (ModelsDev) {
16
+ const log = createSubsystemLogger("provider/models-dev");
17
+ /** Cache directory: ~/.poolbot/cache/ */
18
+ const cacheDir = path.join(resolveStateDir(), "cache");
19
+ const filepath = path.join(cacheDir, "models.json");
20
+ /** User-Agent for fetching */
21
+ const USER_AGENT = "pool-bot";
22
+ ModelsDev.Model = z.object({
23
+ id: z.string(),
24
+ name: z.string(),
25
+ family: z.string().optional(),
26
+ release_date: z.string(),
27
+ attachment: z.boolean(),
28
+ reasoning: z.boolean(),
29
+ temperature: z.boolean(),
30
+ tool_call: z.boolean(),
31
+ interleaved: z
32
+ .union([
33
+ z.literal(true),
34
+ z
35
+ .object({
36
+ field: z.enum(["reasoning_content", "reasoning_details"]),
37
+ })
38
+ .strict(),
39
+ ])
40
+ .optional(),
41
+ cost: z
42
+ .object({
43
+ input: z.number(),
44
+ output: z.number(),
45
+ cache_read: z.number().optional(),
46
+ cache_write: z.number().optional(),
47
+ context_over_200k: z
48
+ .object({
49
+ input: z.number(),
50
+ output: z.number(),
51
+ cache_read: z.number().optional(),
52
+ cache_write: z.number().optional(),
53
+ })
54
+ .optional(),
55
+ })
56
+ .optional(),
57
+ limit: z.object({
58
+ context: z.number(),
59
+ output: z.number(),
60
+ }),
61
+ modalities: z
62
+ .object({
63
+ input: z.array(z.enum(["text", "audio", "image", "video", "pdf"])),
64
+ output: z.array(z.enum(["text", "audio", "image", "video", "pdf"])),
65
+ })
66
+ .optional(),
67
+ experimental: z.boolean().optional(),
68
+ status: z.enum(["alpha", "beta", "deprecated"]).optional(),
69
+ options: z.record(z.string(), z.any()),
70
+ headers: z.record(z.string(), z.string()).optional(),
71
+ provider: z.object({ npm: z.string() }).optional(),
72
+ });
73
+ ModelsDev.Provider = z.object({
74
+ api: z.string().optional(),
75
+ name: z.string(),
76
+ env: z.array(z.string()),
77
+ id: z.string(),
78
+ npm: z.string().optional(),
79
+ models: z.record(z.string(), ModelsDev.Model),
80
+ });
81
+ /**
82
+ * Gets the cached model catalog, triggering a background refresh.
83
+ * Returns empty object if no cached data is available.
84
+ */
85
+ async function get() {
86
+ // Fire-and-forget background refresh
87
+ refresh().catch((e) => log.error("background refresh failed", { error: String(e) }));
88
+ // Try reading from cache
89
+ try {
90
+ const content = await readFile(filepath, "utf-8");
91
+ const result = JSON.parse(content);
92
+ return result;
93
+ }
94
+ catch {
95
+ // No cached file yet — return empty catalog
96
+ return {};
97
+ }
98
+ }
99
+ ModelsDev.get = get;
100
+ /**
101
+ * Fetches the latest model catalog from models.dev and caches it.
102
+ */
103
+ async function refresh() {
104
+ if (process.env.POOLBOT_DISABLE_MODELS_FETCH)
105
+ return;
106
+ log.info("refreshing", { filepath });
107
+ const result = await fetch("https://models.dev/api.json", {
108
+ headers: {
109
+ "User-Agent": USER_AGENT,
110
+ },
111
+ signal: AbortSignal.timeout(10 * 1000),
112
+ }).catch((e) => {
113
+ log.error("failed to fetch models.dev", { error: String(e) });
114
+ });
115
+ if (!result || !result.ok)
116
+ return;
117
+ try {
118
+ const text = await result.text();
119
+ await mkdir(cacheDir, { recursive: true });
120
+ await writeFile(filepath, text, "utf-8");
121
+ }
122
+ catch (e) {
123
+ log.error("failed to write models.json", { error: String(e) });
124
+ }
125
+ }
126
+ ModelsDev.refresh = refresh;
127
+ })(ModelsDev || (ModelsDev = {}));
128
+ // Background refresh every hour
129
+ setInterval(() => ModelsDev.refresh().catch(() => { }), 60 * 60 * 1000).unref();