@poolzin/pool-bot 2026.2.20 → 2026.2.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,9 +1,17 @@
1
+ ## v2026.2.21 (2026-02-18)
2
+
3
+ ### Features
4
+ - **PLCODE Controller Skill:** native bundled skill for controlling PLCODE via CLI commands, slash commands, and multi-agent orchestration — includes session management, model selection, agent delegation (plan/code/review), operator prompts, failure handling, and workflow references
5
+
6
+ ---
7
+
1
8
  ## v2026.2.20 (2026-02-18)
2
9
 
3
10
  ### Features
4
11
  - **Z.AI Provider:** added ZhipuAI (GLM) as first-class provider with 5 models — GLM-4.7-Flash (free), GLM-4.5-Flash (free), GLM-4.6V-Flash (free vision), GLM-4.7, GLM-5
5
12
  - **Image Generation Tool:** `image_generate` tool powered by Z.AI CogView-4, CogView-4-Flash, and GLM-Image — generates images from text prompts, downloads to disk, returns media path
6
13
  - **Deep Research Tool:** `deep_research` tool powered by Z.AI GLM-4.7-Flash + web_search — configurable depth (shallow/standard/deep), language, and max sources; returns structured report with sources and metadata; cost ~$0.01–$0.05 per query
14
+ - **Provider Infrastructure:** multi-key token pool with 4 scheduling strategies (round-robin, least-used, priority, random), proactive rate limit tracking with header parsing, request monitoring with ring-buffer observability and percentile stats, sticky session binding, and models.dev catalog — integrated into the LLM pipeline as an opt-in layer that coexists with existing auth/cooldown paths
7
15
 
8
16
  ---
9
17
 
@@ -4,6 +4,7 @@ import { getShellEnvAppliedKeys } from "../infra/shell-env.js";
4
4
  import { formatCliCommand } from "../cli/command-format.js";
5
5
  import { ensureAuthProfileStore, listProfilesForProvider, resolveApiKeyForProfile, resolveAuthProfileOrder, resolveAuthStorePathForDisplay, } from "./auth-profiles.js";
6
6
  import { normalizeProviderId } from "./model-selection.js";
7
+ import { resolveTokenFromPool } from "./provider/integration.js";
7
8
  export { ensureAuthProfileStore, resolveAuthProfileOrder } from "./auth-profiles.js";
8
9
  const AWS_BEARER_ENV = "AWS_BEARER_TOKEN_BEDROCK";
9
10
  const AWS_ACCESS_KEY_ENV = "AWS_ACCESS_KEY_ID";
@@ -135,6 +136,17 @@ export async function resolveApiKeyForProvider(params) {
135
136
  }
136
137
  catch { }
137
138
  }
139
+ // Token pool rotation: if a pool is configured for this provider,
140
+ // try to get a key from the pool (with rate-limit-aware scheduling).
141
+ // Falls through to existing env/config resolution if no pool or no tokens.
142
+ const poolResult = await resolveTokenFromPool(provider);
143
+ if (poolResult) {
144
+ return {
145
+ apiKey: poolResult.apiKey,
146
+ source: poolResult.source,
147
+ mode: "api-key",
148
+ };
149
+ }
138
150
  const envResolved = resolveEnvApiKey(provider);
139
151
  if (envResolved) {
140
152
  return {
@@ -2,6 +2,7 @@ import { DEFAULT_MODEL, DEFAULT_PROVIDER } from "./defaults.js";
2
2
  import { coerceToFailoverError, describeFailoverError, isFailoverError, isTimeoutError, } from "./failover-error.js";
3
3
  import { buildModelAliasIndex, modelKey, parseModelRef, resolveConfiguredModelRef, resolveModelRefFromString, } from "./model-selection.js";
4
4
  import { ensureAuthProfileStore, getSoonestCooldownExpiry, isProfileInCooldown, resolveAuthProfileOrder, } from "./auth-profiles.js";
5
+ import { isProviderRateLimited, recordFallbackError } from "./provider/integration.js";
5
6
  function isAbortError(err) {
6
7
  if (!err || typeof err !== "object")
7
8
  return false;
@@ -218,6 +219,21 @@ export async function runWithModelFallback(params) {
218
219
  lastProbeAttempt.set(probeThrottleKey, now);
219
220
  }
220
221
  }
222
+ // Provider-level rate limit check (from token pool / rate limit tracking).
223
+ // Complements the per-profile cooldown above — skips candidates when all
224
+ // API keys in the pool are known to be rate-limited.
225
+ const poolLimit = isProviderRateLimited(candidate.provider);
226
+ if (poolLimit.isLimited && hasFallbackCandidates && i > 0) {
227
+ // Skip non-primary candidates that are pool-rate-limited.
228
+ // Primary (i === 0) is never skipped here — it gets a chance to probe.
229
+ attempts.push({
230
+ provider: candidate.provider,
231
+ model: candidate.model,
232
+ error: `Provider ${candidate.provider} pool is rate-limited (wait ${poolLimit.waitTimeMs}ms)`,
233
+ reason: "rate_limit",
234
+ });
235
+ continue;
236
+ }
221
237
  try {
222
238
  const result = await params.run(candidate.provider, candidate.model);
223
239
  return {
@@ -246,6 +262,14 @@ export async function runWithModelFallback(params) {
246
262
  status: described.status,
247
263
  code: described.code,
248
264
  });
265
+ // Feed error into provider infrastructure (rate limits + monitoring)
266
+ recordFallbackError({
267
+ provider: candidate.provider,
268
+ model: candidate.model,
269
+ status: described.status,
270
+ reason: described.reason,
271
+ error: described.message,
272
+ });
249
273
  await params.onError?.({
250
274
  provider: candidate.provider,
251
275
  model: candidate.model,
@@ -20,6 +20,7 @@ import { resolveSessionAgentIds } from "../../agent-scope.js";
20
20
  import { makeBootstrapWarn, resolveBootstrapContextForRun } from "../../bootstrap-files.js";
21
21
  import { resolvePoolbotDocsPath } from "../../docs-path.js";
22
22
  import { resolveModelAuthMode } from "../../model-auth.js";
23
+ import { recordRequestOutcome } from "../../provider/integration.js";
23
24
  import { isCloudCodeAssistFormatError, resolveBootstrapMaxChars, validateAnthropicTurns, validateGeminiTurns, } from "../../pi-embedded-helpers.js";
24
25
  import { subscribeEmbeddedPiSession } from "../../pi-embedded-subscribe.js";
25
26
  import { ensurePiCompactionReserveTokens, resolveCompactionReserveTokensFloor, } from "../../pi-settings.js";
@@ -694,6 +695,20 @@ export async function runEmbeddedAttempt(params) {
694
695
  note: promptError ? "prompt error" : undefined,
695
696
  });
696
697
  anthropicPayloadLogger?.recordUsage(messagesSnapshot, promptError);
698
+ // Record request outcome into provider infrastructure (monitoring + rate limits).
699
+ // Fire-and-forget — never blocks or throws.
700
+ try {
701
+ recordRequestOutcome({
702
+ provider: params.provider,
703
+ model: params.modelId,
704
+ status: promptError ? 500 : 200,
705
+ latencyMs: Date.now() - promptStartedAt,
706
+ streaming: true,
707
+ });
708
+ }
709
+ catch {
710
+ // Observability must never break the run
711
+ }
697
712
  // Run agent_end hooks to allow plugins to analyze the conversation
698
713
  // This is fire-and-forget, so we don't await
699
714
  if (hookRunner?.hasHooks("agent_end")) {
@@ -0,0 +1,76 @@
1
+ /**
2
+ * Provider Pool — Config Loader
3
+ *
4
+ * Reads pool configuration from `models.providers.<id>.pool` in the
5
+ * PoolBot config and initializes the TokenPool with the configured tokens.
6
+ *
7
+ * Called once during gateway/agent startup. Idempotent — clears
8
+ * existing pool state before loading.
9
+ *
10
+ * @module provider/config-loader
11
+ */
12
+ import { createSubsystemLogger } from "../../logging/subsystem.js";
13
+ import { TokenPool } from "./token-pool.js";
14
+ const log = createSubsystemLogger("provider/config-loader");
15
+ /**
16
+ * Load token pool configuration from the PoolBot config.
17
+ *
18
+ * Reads `models.providers.<providerID>.pool` entries and calls
19
+ * `TokenPool.addToken()` for each configured token.
20
+ *
21
+ * @returns Number of providers with pool configuration loaded
22
+ */
23
+ export function loadPoolConfig(cfg) {
24
+ const providers = cfg.models?.providers;
25
+ if (!providers)
26
+ return 0;
27
+ let loadedProviders = 0;
28
+ for (const [providerID, providerCfg] of Object.entries(providers)) {
29
+ const pool = providerCfg.pool;
30
+ if (!pool?.tokens?.length)
31
+ continue;
32
+ // Configure pool scheduling before adding tokens
33
+ if (pool.scheduling || pool.maxWaitMs || pool.autoDisable !== undefined) {
34
+ TokenPool.configure(providerID, {
35
+ scheduling: pool.scheduling,
36
+ maxWaitMs: pool.maxWaitMs,
37
+ autoDisable: pool.autoDisable,
38
+ autoDisableThreshold: pool.autoDisableThreshold,
39
+ });
40
+ }
41
+ let tokenCount = 0;
42
+ for (const tokenCfg of pool.tokens) {
43
+ if (!tokenCfg.id || !tokenCfg.key) {
44
+ log.info("skipped-invalid-token", { providerID, tokenID: tokenCfg.id ?? "(missing)" });
45
+ continue;
46
+ }
47
+ try {
48
+ TokenPool.addToken(providerID, tokenCfg.id, tokenCfg.key, {
49
+ tier: tokenCfg.tier ?? "paid",
50
+ label: tokenCfg.label,
51
+ enabled: tokenCfg.enabled ?? true,
52
+ });
53
+ tokenCount++;
54
+ }
55
+ catch (e) {
56
+ log.info("token-add-failed", {
57
+ providerID,
58
+ tokenID: tokenCfg.id,
59
+ error: String(e),
60
+ });
61
+ }
62
+ }
63
+ if (tokenCount > 0) {
64
+ loadedProviders++;
65
+ log.info("pool-loaded", {
66
+ providerID,
67
+ tokens: tokenCount,
68
+ scheduling: pool.scheduling ?? "priority",
69
+ });
70
+ }
71
+ }
72
+ if (loadedProviders > 0) {
73
+ log.info("config-loaded", { providers: loadedProviders });
74
+ }
75
+ return loadedProviders;
76
+ }
@@ -0,0 +1,15 @@
1
+ /**
2
+ * Provider Infrastructure
3
+ *
4
+ * Operational modules for token rotation, rate limiting,
5
+ * request monitoring, session management, and model catalog.
6
+ *
7
+ * @module provider
8
+ */
9
+ export { RateLimits } from "./rate-limits.js";
10
+ export { RequestMonitor } from "./request-monitor.js";
11
+ export { SessionManager } from "./session-binding.js";
12
+ export { TokenPool } from "./token-pool.js";
13
+ export { ModelsDev } from "./models-dev.js";
14
+ export { resolveTokenFromPool, isProviderRateLimited, recordRequestOutcome, recordFallbackError, } from "./integration.js";
15
+ export { loadPoolConfig } from "./config-loader.js";
@@ -0,0 +1,136 @@
1
+ /**
2
+ * Provider Infrastructure — Integration Facade
3
+ *
4
+ * Thin integration layer that bridges the provider infrastructure modules
5
+ * (TokenPool, RateLimits, RequestMonitor) into the existing LLM pipeline
6
+ * (model-auth.ts, model-fallback.ts, attempt.ts).
7
+ *
8
+ * Design principles:
9
+ * - **Opt-in**: Only activates when TokenPool has tokens for a provider
10
+ * - **Fallthrough**: Returns null/undefined when not configured, letting existing paths run
11
+ * - **Error-isolated**: Never throws — all failures are caught and logged
12
+ * - **Zero behavior change by default**: Without pool configuration, the system is identical
13
+ *
14
+ * @module provider/integration
15
+ */
16
+ import { RateLimits } from "./rate-limits.js";
17
+ import { RequestMonitor } from "./request-monitor.js";
18
+ import { TokenPool } from "./token-pool.js";
19
+ /**
20
+ * Try to resolve an API key from the token pool for a provider.
21
+ * Returns `null` if no pool is configured or no tokens are available,
22
+ * allowing the caller to fall back to the existing auth resolution path.
23
+ */
24
+ export async function resolveTokenFromPool(provider) {
25
+ try {
26
+ if (!TokenPool.hasPool(provider))
27
+ return null;
28
+ const result = await TokenPool.getToken(provider);
29
+ if (!result)
30
+ return null;
31
+ return {
32
+ apiKey: result.token.key,
33
+ tokenID: result.token.id,
34
+ source: `pool:${provider}/${result.token.id}`,
35
+ waited: result.waited,
36
+ };
37
+ }
38
+ catch {
39
+ // Never break the auth chain — fall back to existing resolution
40
+ return null;
41
+ }
42
+ }
43
+ /**
44
+ * Check whether a provider is rate-limited across all its pool keys.
45
+ * Returns `{ isLimited: false }` if no pool is configured (safe default).
46
+ *
47
+ * When `keyID` is provided, checks that specific key only.
48
+ */
49
+ export function isProviderRateLimited(provider, keyID) {
50
+ try {
51
+ const check = RateLimits.isLimited(provider, keyID);
52
+ return {
53
+ isLimited: check.isLimited,
54
+ waitTimeMs: check.waitTimeMs,
55
+ };
56
+ }
57
+ catch {
58
+ return { isLimited: false, waitTimeMs: 0 };
59
+ }
60
+ }
61
+ /**
62
+ * Record the outcome of a provider request into both RateLimits and RequestMonitor.
63
+ *
64
+ * - For 429 responses: marks the provider/key as rate-limited (with header parsing)
65
+ * - For successes: records success in TokenPool (for scheduling) and clears rate limits
66
+ * - All outcomes: logged in RequestMonitor for observability
67
+ *
68
+ * Safe to call unconditionally — does nothing harmful if provider modules aren't configured.
69
+ */
70
+ export function recordRequestOutcome(outcome) {
71
+ try {
72
+ // Feed into RequestMonitor for observability
73
+ RequestMonitor.logRequest({
74
+ providerID: outcome.provider,
75
+ modelID: outcome.model,
76
+ method: "chat",
77
+ status: outcome.status,
78
+ latencyMs: outcome.latencyMs,
79
+ streaming: outcome.streaming,
80
+ keyID: outcome.keyID,
81
+ inputTokens: outcome.inputTokens,
82
+ outputTokens: outcome.outputTokens,
83
+ cacheReadTokens: outcome.cacheReadTokens,
84
+ cacheWriteTokens: outcome.cacheWriteTokens,
85
+ error: outcome.error,
86
+ });
87
+ }
88
+ catch {
89
+ // Observability should never break the pipeline
90
+ }
91
+ try {
92
+ // Feed into RateLimits for 429s
93
+ if (outcome.status === 429) {
94
+ RateLimits.markLimitedFromResponse(outcome.provider, outcome.status, outcome.headers ?? {}, outcome.keyID);
95
+ }
96
+ }
97
+ catch {
98
+ // Rate limit tracking should never break the pipeline
99
+ }
100
+ try {
101
+ // Feed success/failure into TokenPool for scheduling decisions
102
+ if (outcome.keyID && TokenPool.hasPool(outcome.provider)) {
103
+ if (outcome.status >= 200 && outcome.status < 400) {
104
+ TokenPool.recordSuccess(outcome.provider, outcome.keyID, {
105
+ inputTokens: outcome.inputTokens ?? 0,
106
+ outputTokens: outcome.outputTokens ?? 0,
107
+ });
108
+ }
109
+ else if (outcome.status >= 400) {
110
+ TokenPool.recordFailure(outcome.provider, outcome.keyID, outcome.status, outcome.headers);
111
+ }
112
+ }
113
+ }
114
+ catch {
115
+ // Token pool feedback should never break the pipeline
116
+ }
117
+ }
118
+ // ---------------------------------------------------------------------------
119
+ // Convenience: record a fallback error
120
+ // ---------------------------------------------------------------------------
121
+ /**
122
+ * Record a fallback error (e.g., from model-fallback catch block).
123
+ * Extracts provider, model, status from the error and feeds into the monitoring pipeline.
124
+ */
125
+ export function recordFallbackError(params) {
126
+ recordRequestOutcome({
127
+ provider: params.provider,
128
+ model: params.model,
129
+ status: params.status ?? 500,
130
+ latencyMs: params.latencyMs ?? 0,
131
+ streaming: false,
132
+ keyID: params.keyID,
133
+ error: params.error ?? params.reason,
134
+ headers: params.headers,
135
+ });
136
+ }
@@ -0,0 +1,129 @@
1
+ /**
2
+ * Models.dev Catalog
3
+ *
4
+ * Fetches and caches the model catalog from https://models.dev/api.json.
5
+ * Provides typed schemas for providers and models via Zod.
6
+ *
7
+ * @module provider/models-dev
8
+ */
9
+ import { readFile, writeFile, mkdir } from "node:fs/promises";
10
+ import path from "node:path";
11
+ import z from "zod";
12
+ import { createSubsystemLogger } from "../../logging/subsystem.js";
13
+ import { resolveStateDir } from "../../config/paths.js";
14
+ export var ModelsDev;
15
+ (function (ModelsDev) {
16
+ const log = createSubsystemLogger("provider/models-dev");
17
+ /** Cache directory: ~/.poolbot/cache/ */
18
+ const cacheDir = path.join(resolveStateDir(), "cache");
19
+ const filepath = path.join(cacheDir, "models.json");
20
+ /** User-Agent for fetching */
21
+ const USER_AGENT = "pool-bot";
22
+ ModelsDev.Model = z.object({
23
+ id: z.string(),
24
+ name: z.string(),
25
+ family: z.string().optional(),
26
+ release_date: z.string(),
27
+ attachment: z.boolean(),
28
+ reasoning: z.boolean(),
29
+ temperature: z.boolean(),
30
+ tool_call: z.boolean(),
31
+ interleaved: z
32
+ .union([
33
+ z.literal(true),
34
+ z
35
+ .object({
36
+ field: z.enum(["reasoning_content", "reasoning_details"]),
37
+ })
38
+ .strict(),
39
+ ])
40
+ .optional(),
41
+ cost: z
42
+ .object({
43
+ input: z.number(),
44
+ output: z.number(),
45
+ cache_read: z.number().optional(),
46
+ cache_write: z.number().optional(),
47
+ context_over_200k: z
48
+ .object({
49
+ input: z.number(),
50
+ output: z.number(),
51
+ cache_read: z.number().optional(),
52
+ cache_write: z.number().optional(),
53
+ })
54
+ .optional(),
55
+ })
56
+ .optional(),
57
+ limit: z.object({
58
+ context: z.number(),
59
+ output: z.number(),
60
+ }),
61
+ modalities: z
62
+ .object({
63
+ input: z.array(z.enum(["text", "audio", "image", "video", "pdf"])),
64
+ output: z.array(z.enum(["text", "audio", "image", "video", "pdf"])),
65
+ })
66
+ .optional(),
67
+ experimental: z.boolean().optional(),
68
+ status: z.enum(["alpha", "beta", "deprecated"]).optional(),
69
+ options: z.record(z.string(), z.any()),
70
+ headers: z.record(z.string(), z.string()).optional(),
71
+ provider: z.object({ npm: z.string() }).optional(),
72
+ });
73
+ ModelsDev.Provider = z.object({
74
+ api: z.string().optional(),
75
+ name: z.string(),
76
+ env: z.array(z.string()),
77
+ id: z.string(),
78
+ npm: z.string().optional(),
79
+ models: z.record(z.string(), ModelsDev.Model),
80
+ });
81
+ /**
82
+ * Gets the cached model catalog, triggering a background refresh.
83
+ * Returns empty object if no cached data is available.
84
+ */
85
+ async function get() {
86
+ // Fire-and-forget background refresh
87
+ refresh().catch((e) => log.error("background refresh failed", { error: String(e) }));
88
+ // Try reading from cache
89
+ try {
90
+ const content = await readFile(filepath, "utf-8");
91
+ const result = JSON.parse(content);
92
+ return result;
93
+ }
94
+ catch {
95
+ // No cached file yet — return empty catalog
96
+ return {};
97
+ }
98
+ }
99
+ ModelsDev.get = get;
100
+ /**
101
+ * Fetches the latest model catalog from models.dev and caches it.
102
+ */
103
+ async function refresh() {
104
+ if (process.env.POOLBOT_DISABLE_MODELS_FETCH)
105
+ return;
106
+ log.info("refreshing", { filepath });
107
+ const result = await fetch("https://models.dev/api.json", {
108
+ headers: {
109
+ "User-Agent": USER_AGENT,
110
+ },
111
+ signal: AbortSignal.timeout(10 * 1000),
112
+ }).catch((e) => {
113
+ log.error("failed to fetch models.dev", { error: String(e) });
114
+ });
115
+ if (!result || !result.ok)
116
+ return;
117
+ try {
118
+ const text = await result.text();
119
+ await mkdir(cacheDir, { recursive: true });
120
+ await writeFile(filepath, text, "utf-8");
121
+ }
122
+ catch (e) {
123
+ log.error("failed to write models.json", { error: String(e) });
124
+ }
125
+ }
126
+ ModelsDev.refresh = refresh;
127
+ })(ModelsDev || (ModelsDev = {}));
128
+ // Background refresh every hour
129
+ setInterval(() => ModelsDev.refresh().catch(() => { }), 60 * 60 * 1000).unref();