@poolzin/pool-bot 2026.3.4 → 2026.3.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. package/CHANGELOG.md +10 -0
  2. package/assets/pool-bot-icon-dark.png +0 -0
  3. package/assets/pool-bot-logo-1.png +0 -0
  4. package/assets/pool-bot-mascot.png +0 -0
  5. package/dist/agents/pi-embedded-runner/tool-result-truncation.js +62 -7
  6. package/dist/agents/pi-tools.js +32 -2
  7. package/dist/agents/poolbot-tools.js +12 -0
  8. package/dist/agents/session-write-lock.js +93 -8
  9. package/dist/agents/tools/pdf-native-providers.js +102 -0
  10. package/dist/agents/tools/pdf-tool.helpers.js +86 -0
  11. package/dist/agents/tools/pdf-tool.js +508 -0
  12. package/dist/auto-reply/reply/get-reply.js +6 -0
  13. package/dist/auto-reply/reply/message-preprocess-hooks.js +17 -0
  14. package/dist/build-info.json +3 -3
  15. package/dist/cli/banner.js +20 -1
  16. package/dist/cli/security-cli.js +211 -2
  17. package/dist/cli/tagline.js +7 -0
  18. package/dist/config/types.cli.js +1 -0
  19. package/dist/config/types.security.js +33 -0
  20. package/dist/config/zod-schema.js +15 -0
  21. package/dist/config/zod-schema.providers-core.js +1 -0
  22. package/dist/config/zod-schema.security.js +113 -0
  23. package/dist/cron/normalize.js +3 -0
  24. package/dist/cron/service/jobs.js +48 -0
  25. package/dist/discord/monitor/message-handler.preflight.js +11 -2
  26. package/dist/gateway/http-common.js +6 -1
  27. package/dist/gateway/protocol/schema/cron.js +3 -0
  28. package/dist/gateway/server-channels.js +99 -14
  29. package/dist/gateway/server-cron.js +89 -0
  30. package/dist/gateway/server-health-probes.js +55 -0
  31. package/dist/gateway/server-http.js +5 -0
  32. package/dist/hooks/bundled/session-memory/handler.js +8 -2
  33. package/dist/hooks/fire-and-forget.js +6 -0
  34. package/dist/hooks/internal-hooks.js +64 -19
  35. package/dist/hooks/message-hook-mappers.js +179 -0
  36. package/dist/infra/abort-signal.js +12 -0
  37. package/dist/infra/boundary-file-read.js +118 -0
  38. package/dist/infra/boundary-path.js +594 -0
  39. package/dist/infra/file-identity.js +12 -0
  40. package/dist/infra/fs-safe.js +377 -12
  41. package/dist/infra/hardlink-guards.js +30 -0
  42. package/dist/infra/json-utf8-bytes.js +8 -0
  43. package/dist/infra/net/fetch-guard.js +63 -13
  44. package/dist/infra/net/proxy-env.js +17 -0
  45. package/dist/infra/net/ssrf.js +74 -272
  46. package/dist/infra/path-alias-guards.js +21 -0
  47. package/dist/infra/path-guards.js +13 -1
  48. package/dist/infra/ports-probe.js +19 -0
  49. package/dist/infra/prototype-keys.js +4 -0
  50. package/dist/infra/restart-stale-pids.js +254 -0
  51. package/dist/infra/safe-open-sync.js +71 -0
  52. package/dist/infra/secure-random.js +7 -0
  53. package/dist/media/ffmpeg-limits.js +4 -0
  54. package/dist/media/input-files.js +6 -2
  55. package/dist/media/temp-files.js +12 -0
  56. package/dist/memory/embedding-chunk-limits.js +5 -2
  57. package/dist/memory/embeddings-ollama.js +91 -138
  58. package/dist/memory/embeddings-remote-fetch.js +11 -10
  59. package/dist/memory/embeddings.js +25 -9
  60. package/dist/memory/manager-embedding-ops.js +1 -1
  61. package/dist/memory/post-json.js +23 -0
  62. package/dist/memory/qmd-manager.js +272 -77
  63. package/dist/memory/remote-http.js +33 -0
  64. package/dist/plugin-sdk/windows-spawn.js +214 -0
  65. package/dist/security/capability-guards.js +89 -0
  66. package/dist/security/capability-manager.js +76 -0
  67. package/dist/security/capability.js +147 -0
  68. package/dist/security/index.js +7 -0
  69. package/dist/security/middleware.js +105 -0
  70. package/dist/shared/net/ip-test-fixtures.js +1 -0
  71. package/dist/shared/net/ip.js +303 -0
  72. package/dist/shared/net/ipv4.js +8 -11
  73. package/dist/shared/pid-alive.js +59 -2
  74. package/dist/slack/monitor/context.js +1 -0
  75. package/dist/slack/monitor/message-handler/dispatch.js +14 -1
  76. package/dist/slack/monitor/provider.js +2 -0
  77. package/dist/test-helpers/ssrf.js +13 -0
  78. package/dist/tui/tui.js +9 -4
  79. package/dist/utils/fetch-timeout.js +12 -1
  80. package/docs/adr/003-feature-gap-analysis.md +112 -0
  81. package/package.json +10 -4
package/CHANGELOG.md CHANGED
@@ -1,3 +1,13 @@
1
+ ## v2026.3.6 (2026-03-06)
2
+
3
+ ### Features
4
+ - **PDF Analysis Tool:** new `pdf` tool for Pool Bot agents — supports native PDF processing for Anthropic (via `pdfs-2024-09-25` beta header) and Google Gemini (via `inline_data`), with automatic text-extraction + page-image fallback for other providers; configurable via `pdfModel` in agent defaults
5
+ - **Cron onFailure Alerts:** `onFailure` config on cron jobs — triggers webhook or announce alert when a job fails (`status="error"`); supports `bestEffort` flag to silence delivery errors
6
+ - **Gateway Health Probes:** `/health`, `/healthz` (liveness) and `/ready`, `/readyz` (readiness) endpoints — run before auth middleware for Docker/K8s orchestrator compatibility
7
+ - **Tool Result Truncation:** head+tail truncation strategy preserving important tail content (errors, JSON closing braces, summaries) via `hasImportantTail()` heuristic and `MIDDLE_OMISSION_MARKER`
8
+
9
+ ---
10
+
1
11
  ## v2026.3.4 (2026-03-04)
2
12
 
3
13
  ### Features
Binary file
Binary file
Binary file
@@ -26,20 +26,75 @@ const TRUNCATION_SUFFIX = "\n\n⚠️ [Content truncated — original was too la
26
26
  "The content above is a partial view. If you need more, request specific sections or use " +
27
27
  "offset/limit parameters to read smaller chunks.]";
28
28
  /**
29
- * Truncate a single text string to fit within maxChars, preserving the beginning.
29
+ * Marker inserted between head and tail when both ends are preserved.
30
30
  */
31
- export function truncateToolResultText(text, maxChars) {
31
+ export const MIDDLE_OMISSION_MARKER = "\n\n⚠️ [... middle content omitted — showing head and tail ...]\n\n";
32
+ /**
33
+ * Check whether the tail of a text block contains important content that
34
+ * should be preserved (error messages, JSON closing braces, summaries).
35
+ */
36
+ export function hasImportantTail(text) {
37
+ const tailSlice = text.slice(-2000).toLowerCase();
38
+ const errorPatterns = [
39
+ "error",
40
+ "exception",
41
+ "failed",
42
+ "fatal",
43
+ "traceback",
44
+ "panic",
45
+ "stack trace",
46
+ "errno",
47
+ "exit code",
48
+ ];
49
+ if (errorPatterns.some((p) => tailSlice.includes(p)))
50
+ return true;
51
+ // JSON closing brace — likely structured output
52
+ if (tailSlice.trimEnd().endsWith("}"))
53
+ return true;
54
+ // Summary-like keywords
55
+ const summaryPatterns = ["total", "summary", "result", "complete", "finished", "done"];
56
+ return summaryPatterns.some((p) => tailSlice.includes(p));
57
+ }
58
+ /**
59
+ * Truncate a single text string to fit within maxChars.
60
+ *
61
+ * When the tail contains important content (errors, JSON, summaries) and the
62
+ * budget allows, the function preserves both the head and the tail of the
63
+ * text with a middle-omission marker. Otherwise it falls back to head-only
64
+ * truncation.
65
+ */
66
+ export function truncateToolResultText(text, maxChars, options) {
32
67
  if (text.length <= maxChars) {
33
68
  return text;
34
69
  }
35
- const keepChars = Math.max(MIN_KEEP_CHARS, maxChars - TRUNCATION_SUFFIX.length);
70
+ const suffix = options?.suffix ?? TRUNCATION_SUFFIX;
71
+ const minKeep = options?.minKeepChars ?? MIN_KEEP_CHARS;
72
+ // --- head + tail strategy ---
73
+ if (hasImportantTail(text) && maxChars > minKeep + MIDDLE_OMISSION_MARKER.length + 200) {
74
+ const budget = maxChars - MIDDLE_OMISSION_MARKER.length;
75
+ const tailBudget = Math.min(Math.floor(budget * 0.3), 4000);
76
+ const headBudget = budget - tailBudget;
77
+ // snap head to newline boundary
78
+ let headEnd = headBudget;
79
+ const headNl = text.lastIndexOf("\n", headBudget);
80
+ if (headNl > headBudget * 0.8)
81
+ headEnd = headNl;
82
+ // snap tail to newline boundary
83
+ let tailStart = text.length - tailBudget;
84
+ const tailNl = text.indexOf("\n", tailStart);
85
+ if (tailNl !== -1 && tailNl < tailStart + tailBudget * 0.2)
86
+ tailStart = tailNl + 1;
87
+ return text.slice(0, headEnd) + MIDDLE_OMISSION_MARKER + text.slice(tailStart);
88
+ }
89
+ // --- head-only fallback ---
90
+ const keepChars = Math.max(minKeep, maxChars - suffix.length);
36
91
  // Try to break at a newline boundary to avoid cutting mid-line
37
92
  let cutPoint = keepChars;
38
93
  const lastNewline = text.lastIndexOf("\n", keepChars);
39
94
  if (lastNewline > keepChars * 0.8) {
40
95
  cutPoint = lastNewline;
41
96
  }
42
- return text.slice(0, cutPoint) + TRUNCATION_SUFFIX;
97
+ return text.slice(0, cutPoint) + suffix;
43
98
  }
44
99
  /**
45
100
  * Calculate the maximum allowed characters for a single tool result
@@ -57,7 +112,7 @@ export function calculateMaxToolResultChars(contextWindowTokens) {
57
112
  /**
58
113
  * Get the total character count of text content blocks in a tool result message.
59
114
  */
60
- function getToolResultTextLength(msg) {
115
+ export function getToolResultTextLength(msg) {
61
116
  if (!msg || msg.role !== "toolResult") {
62
117
  return 0;
63
118
  }
@@ -80,7 +135,7 @@ function getToolResultTextLength(msg) {
80
135
  * Truncate a tool result message's text content blocks to fit within maxChars.
81
136
  * Returns a new message (does not mutate the original).
82
137
  */
83
- function truncateToolResultMessage(msg, maxChars) {
138
+ export function truncateToolResultMessage(msg, maxChars, options) {
84
139
  const content = msg.content;
85
140
  if (!Array.isArray(content)) {
86
141
  return msg;
@@ -104,7 +159,7 @@ function truncateToolResultMessage(msg, maxChars) {
104
159
  const blockBudget = Math.max(MIN_KEEP_CHARS, Math.floor(maxChars * blockShare));
105
160
  return {
106
161
  ...textBlock,
107
- text: truncateToolResultText(textBlock.text, blockBudget),
162
+ text: truncateToolResultText(textBlock.text, blockBudget, options),
108
163
  };
109
164
  });
110
165
  return { ...msg, content: newContent };
@@ -16,8 +16,10 @@ import { assertRequiredParams, CLAUDE_PARAM_GROUPS, createPoolbotReadTool, creat
16
16
  import { cleanToolSchemaForGemini, normalizeToolParameters } from "./pi-tools.schema.js";
17
17
  import { getSubagentDepthFromSessionStore } from "./subagent-depth.js";
18
18
  import { applyToolPolicyPipeline, buildDefaultToolPolicyPipelineSteps, } from "./tool-policy-pipeline.js";
19
- import { applyOwnerOnlyToolPolicy, collectExplicitAllowlist, mergeAlsoAllowPolicy, resolveToolProfilePolicy, } from "./tool-policy.js";
19
+ import { applyOwnerOnlyToolPolicy, collectExplicitAllowlist, mergeAlsoAllowPolicy, normalizeToolName, resolveToolProfilePolicy, } from "./tool-policy.js";
20
20
  import { resolveWorkspaceRoot } from "./workspace-dir.js";
21
+ import { CapabilityError } from "../security/capability-guards.js";
22
+ import { createDefaultSecurityMiddleware } from "../security/middleware.js";
21
23
  function isOpenAIProvider(provider) {
22
24
  const normalized = provider?.trim().toLowerCase();
23
25
  return normalized === "openai" || normalized === "openai-codex";
@@ -339,8 +341,36 @@ export function createPoolbotCodingTools(options) {
339
341
  const withAbort = options?.abortSignal
340
342
  ? withHooks.map((tool) => wrapToolWithAbortSignal(tool, options.abortSignal))
341
343
  : withHooks;
344
+ // Apply capability-based security middleware if enabled
345
+ const withCapabilities = options?.config?.security?.enabled && agentId
346
+ ? withAbort.map((tool) => wrapToolWithCapabilityCheck(tool, agentId))
347
+ : withAbort;
342
348
  // NOTE: Keep canonical (lowercase) tool names here.
343
349
  // pi-ai's Anthropic OAuth transport remaps tool names to Claude Code-style names
344
350
  // on the wire and maps them back for tool dispatch.
345
- return withAbort;
351
+ return withCapabilities;
352
+ }
353
+ /**
354
+ * Wraps a tool with capability-based security checks.
355
+ * This enforces fine-grained permissions for tool invocation.
356
+ */
357
+ function wrapToolWithCapabilityCheck(tool, agentId) {
358
+ const middleware = createDefaultSecurityMiddleware();
359
+ return {
360
+ ...tool,
361
+ execute: async (toolCallId, args, signal, onUpdate) => {
362
+ const ctx = { agentId };
363
+ const toolId = normalizeToolName(tool.name);
364
+ const result = await middleware(ctx, toolId, (args ?? {}), async () => {
365
+ if (!tool.execute) {
366
+ throw new CapabilityError(`Tool ${tool.name} has no execute function`, agentId, {
367
+ type: "tool:invoke",
368
+ toolId,
369
+ });
370
+ }
371
+ return await tool.execute(toolCallId, args, signal, onUpdate);
372
+ });
373
+ return result;
374
+ },
375
+ };
346
376
  }
@@ -9,6 +9,7 @@ import { createGatewayTool } from "./tools/gateway-tool.js";
9
9
  import { createImageGenerateTool } from "./tools/image-generate-tool.js";
10
10
  import { createImageTool } from "./tools/image-tool.js";
11
11
  import { createMessageTool } from "./tools/message-tool.js";
12
+ import { createPdfTool } from "./tools/pdf-tool.js";
12
13
  import { createNodesTool } from "./tools/nodes-tool.js";
13
14
  import { createSessionStatusTool } from "./tools/session-status-tool.js";
14
15
  import { createSessionsHistoryTool } from "./tools/sessions-history-tool.js";
@@ -32,6 +33,16 @@ export function createPoolBotTools(options) {
32
33
  modelHasVision: options?.modelHasVision,
33
34
  })
34
35
  : null;
36
+ const pdfTool = options?.agentDir?.trim()
37
+ ? createPdfTool({
38
+ config: options?.config,
39
+ agentDir: options.agentDir,
40
+ workspaceDir,
41
+ sandbox: options?.sandboxRoot && options?.sandboxFsBridge
42
+ ? { root: options.sandboxRoot, bridge: options.sandboxFsBridge }
43
+ : undefined,
44
+ })
45
+ : null;
35
46
  const imageGenerateTool = createImageGenerateTool({
36
47
  config: options?.config,
37
48
  agentDir: options?.agentDir,
@@ -121,6 +132,7 @@ export function createPoolBotTools(options) {
121
132
  ...(webFetchTool ? [webFetchTool] : []),
122
133
  ...(imageTool ? [imageTool] : []),
123
134
  ...(imageGenerateTool ? [imageGenerateTool] : []),
135
+ ...(pdfTool ? [pdfTool] : []),
124
136
  ];
125
137
  // Z.AI-powered research tool (gracefully absent when no key configured)
126
138
  const deepResearchTool = createDeepResearchTool({
@@ -1,8 +1,11 @@
1
1
  import fsSync from "node:fs";
2
2
  import fs from "node:fs/promises";
3
3
  import path from "node:path";
4
- import { isPidAlive } from "../shared/pid-alive.js";
4
+ import { getProcessStartTime, isPidAlive } from "../shared/pid-alive.js";
5
5
  import { resolveProcessScopedMap } from "../shared/process-scoped-map.js";
6
+ function isValidLockNumber(value) {
7
+ return typeof value === "number" && Number.isInteger(value) && value >= 0;
8
+ }
6
9
  const CLEANUP_SIGNALS = ["SIGINT", "SIGTERM", "SIGQUIT", "SIGABRT"];
7
10
  const CLEANUP_STATE_KEY = Symbol.for("poolbot.sessionWriteLockCleanupState");
8
11
  const HELD_LOCKS_KEY = Symbol.for("poolbot.sessionWriteLockHeldLocks");
@@ -196,12 +199,15 @@ async function readLockPayload(lockPath) {
196
199
  const raw = await fs.readFile(lockPath, "utf8");
197
200
  const parsed = JSON.parse(raw);
198
201
  const payload = {};
199
- if (typeof parsed.pid === "number") {
202
+ if (isValidLockNumber(parsed.pid) && parsed.pid > 0) {
200
203
  payload.pid = parsed.pid;
201
204
  }
202
205
  if (typeof parsed.createdAt === "string") {
203
206
  payload.createdAt = parsed.createdAt;
204
207
  }
208
+ if (isValidLockNumber(parsed.starttime)) {
209
+ payload.starttime = parsed.starttime;
210
+ }
205
211
  return payload;
206
212
  }
207
213
  catch {
@@ -209,11 +215,21 @@ async function readLockPayload(lockPath) {
209
215
  }
210
216
  }
211
217
  function inspectLockPayload(payload, staleMs, nowMs) {
212
- const pid = typeof payload?.pid === "number" ? payload.pid : null;
218
+ const pid = isValidLockNumber(payload?.pid) && payload.pid > 0 ? payload.pid : null;
213
219
  const pidAlive = pid !== null ? isPidAlive(pid) : false;
214
220
  const createdAt = typeof payload?.createdAt === "string" ? payload.createdAt : null;
215
221
  const createdAtMs = createdAt ? Date.parse(createdAt) : Number.NaN;
216
222
  const ageMs = Number.isFinite(createdAtMs) ? Math.max(0, nowMs - createdAtMs) : null;
223
+ // Detect PID recycling: if the PID is alive but its start time differs from
224
+ // what was recorded in the lock file, the original process died and the OS
225
+ // reassigned the same PID to a different process.
226
+ const storedStarttime = isValidLockNumber(payload?.starttime) ? payload.starttime : null;
227
+ const pidRecycled = pidAlive && pid !== null && storedStarttime !== null
228
+ ? (() => {
229
+ const currentStarttime = getProcessStartTime(pid);
230
+ return currentStarttime !== null && currentStarttime !== storedStarttime;
231
+ })()
232
+ : false;
217
233
  const staleReasons = [];
218
234
  if (pid === null) {
219
235
  staleReasons.push("missing-pid");
@@ -221,6 +237,9 @@ function inspectLockPayload(payload, staleMs, nowMs) {
221
237
  else if (!pidAlive) {
222
238
  staleReasons.push("dead-pid");
223
239
  }
240
+ else if (pidRecycled) {
241
+ staleReasons.push("recycled-pid");
242
+ }
224
243
  if (ageMs === null) {
225
244
  staleReasons.push("invalid-createdAt");
226
245
  }
@@ -236,6 +255,38 @@ function inspectLockPayload(payload, staleMs, nowMs) {
236
255
  staleReasons,
237
256
  };
238
257
  }
258
+ function lockInspectionNeedsMtimeStaleFallback(details) {
259
+ return (details.stale &&
260
+ details.staleReasons.every((reason) => reason === "missing-pid" || reason === "invalid-createdAt"));
261
+ }
262
+ async function shouldReclaimContendedLockFile(lockPath, details, staleMs, nowMs) {
263
+ if (!details.stale) {
264
+ return false;
265
+ }
266
+ if (!lockInspectionNeedsMtimeStaleFallback(details)) {
267
+ return true;
268
+ }
269
+ try {
270
+ const stat = await fs.stat(lockPath);
271
+ const ageMs = Math.max(0, nowMs - stat.mtimeMs);
272
+ return ageMs > staleMs;
273
+ }
274
+ catch (error) {
275
+ const code = error?.code;
276
+ return code !== "ENOENT";
277
+ }
278
+ }
279
+ function shouldTreatAsOrphanSelfLock(params) {
280
+ const pid = isValidLockNumber(params.payload?.pid) ? params.payload.pid : null;
281
+ if (pid !== process.pid) {
282
+ return false;
283
+ }
284
+ const hasValidStarttime = isValidLockNumber(params.payload?.starttime);
285
+ if (hasValidStarttime) {
286
+ return false;
287
+ }
288
+ return !HELD_LOCKS.has(params.normalizedSessionFile);
289
+ }
239
290
  export async function cleanStaleLockFiles(params) {
240
291
  const sessionsDir = path.resolve(params.sessionsDir);
241
292
  const staleMs = resolvePositiveMs(params.staleMs, DEFAULT_STALE_MS);
@@ -256,7 +307,7 @@ export async function cleanStaleLockFiles(params) {
256
307
  const cleaned = [];
257
308
  const lockEntries = entries
258
309
  .filter((entry) => entry.name.endsWith(".jsonl.lock"))
259
- .toSorted((a, b) => a.name.localeCompare(b.name));
310
+ .sort((a, b) => a.name.localeCompare(b.name));
260
311
  for (const entry of lockEntries) {
261
312
  const lockPath = path.join(sessionsDir, entry.name);
262
313
  const payload = await readLockPayload(lockPath);
@@ -307,10 +358,16 @@ export async function acquireSessionWriteLock(params) {
307
358
  let attempt = 0;
308
359
  while (Date.now() - startedAt < timeoutMs) {
309
360
  attempt += 1;
361
+ let handle = null;
310
362
  try {
311
- const handle = await fs.open(lockPath, "wx");
363
+ handle = await fs.open(lockPath, "wx");
312
364
  const createdAt = new Date().toISOString();
313
- await handle.writeFile(JSON.stringify({ pid: process.pid, createdAt }, null, 2), "utf8");
365
+ const starttime = getProcessStartTime(process.pid);
366
+ const lockPayload = { pid: process.pid, createdAt };
367
+ if (starttime !== null) {
368
+ lockPayload.starttime = starttime;
369
+ }
370
+ await handle.writeFile(JSON.stringify(lockPayload, null, 2), "utf8");
314
371
  const createdHeld = {
315
372
  count: 1,
316
373
  handle,
@@ -326,13 +383,41 @@ export async function acquireSessionWriteLock(params) {
326
383
  };
327
384
  }
328
385
  catch (err) {
386
+ if (handle) {
387
+ try {
388
+ await handle.close();
389
+ }
390
+ catch {
391
+ // Ignore cleanup errors on failed lock initialization.
392
+ }
393
+ try {
394
+ await fs.rm(lockPath, { force: true });
395
+ }
396
+ catch {
397
+ // Ignore cleanup errors on failed lock initialization.
398
+ }
399
+ }
329
400
  const code = err.code;
330
401
  if (code !== "EEXIST") {
331
402
  throw err;
332
403
  }
333
404
  const payload = await readLockPayload(lockPath);
334
- const inspected = inspectLockPayload(payload, staleMs, Date.now());
335
- if (inspected.stale) {
405
+ const nowMs = Date.now();
406
+ const inspected = inspectLockPayload(payload, staleMs, nowMs);
407
+ const orphanSelfLock = shouldTreatAsOrphanSelfLock({
408
+ payload,
409
+ normalizedSessionFile,
410
+ });
411
+ const reclaimDetails = orphanSelfLock
412
+ ? {
413
+ ...inspected,
414
+ stale: true,
415
+ staleReasons: inspected.staleReasons.includes("orphan-self-pid")
416
+ ? inspected.staleReasons
417
+ : [...inspected.staleReasons, "orphan-self-pid"],
418
+ }
419
+ : inspected;
420
+ if (await shouldReclaimContendedLockFile(lockPath, reclaimDetails, staleMs, nowMs)) {
336
421
  await fs.rm(lockPath, { force: true });
337
422
  continue;
338
423
  }
@@ -0,0 +1,102 @@
1
+ /**
2
+ * Direct SDK/HTTP calls for providers that support native PDF document input.
3
+ * This bypasses pi-ai's content type system which does not have a "document" type.
4
+ */
5
+ import { isRecord } from "../../utils.js";
6
+ import { normalizeSecretInput } from "../../utils/normalize-secret-input.js";
7
+ export async function anthropicAnalyzePdf(params) {
8
+ const apiKey = normalizeSecretInput(params.apiKey);
9
+ if (!apiKey) {
10
+ throw new Error("Anthropic PDF: apiKey required");
11
+ }
12
+ const content = [];
13
+ for (const pdf of params.pdfs) {
14
+ content.push({
15
+ type: "document",
16
+ source: {
17
+ type: "base64",
18
+ media_type: "application/pdf",
19
+ data: pdf.base64,
20
+ },
21
+ });
22
+ }
23
+ content.push({ type: "text", text: params.prompt });
24
+ const baseUrl = (params.baseUrl ?? "https://api.anthropic.com").replace(/\/+$/, "");
25
+ const res = await fetch(`${baseUrl}/v1/messages`, {
26
+ method: "POST",
27
+ headers: {
28
+ "Content-Type": "application/json",
29
+ "x-api-key": apiKey,
30
+ "anthropic-version": "2023-06-01",
31
+ "anthropic-beta": "pdfs-2024-09-25",
32
+ },
33
+ body: JSON.stringify({
34
+ model: params.modelId,
35
+ max_tokens: params.maxTokens ?? 4096,
36
+ messages: [{ role: "user", content }],
37
+ }),
38
+ });
39
+ if (!res.ok) {
40
+ const body = await res.text().catch(() => "");
41
+ throw new Error(`Anthropic PDF request failed (${res.status} ${res.statusText})${body ? `: ${body.slice(0, 400)}` : ""}`);
42
+ }
43
+ const json = (await res.json().catch(() => null));
44
+ if (!isRecord(json)) {
45
+ throw new Error("Anthropic PDF response was not JSON.");
46
+ }
47
+ const responseContent = json.content;
48
+ if (!Array.isArray(responseContent)) {
49
+ throw new Error("Anthropic PDF response missing content array.");
50
+ }
51
+ const text = responseContent
52
+ .filter((block) => block.type === "text" && typeof block.text === "string")
53
+ .map((block) => block.text)
54
+ .join("");
55
+ if (!text.trim()) {
56
+ throw new Error("Anthropic PDF returned no text.");
57
+ }
58
+ return text.trim();
59
+ }
60
+ export async function geminiAnalyzePdf(params) {
61
+ const apiKey = normalizeSecretInput(params.apiKey);
62
+ if (!apiKey) {
63
+ throw new Error("Gemini PDF: apiKey required");
64
+ }
65
+ const parts = [];
66
+ for (const pdf of params.pdfs) {
67
+ parts.push({
68
+ inline_data: {
69
+ mime_type: "application/pdf",
70
+ data: pdf.base64,
71
+ },
72
+ });
73
+ }
74
+ parts.push({ text: params.prompt });
75
+ const baseUrl = (params.baseUrl ?? "https://generativelanguage.googleapis.com").replace(/\/+$/, "");
76
+ const url = `${baseUrl}/v1beta/models/${encodeURIComponent(params.modelId)}:generateContent?key=${encodeURIComponent(apiKey)}`;
77
+ const res = await fetch(url, {
78
+ method: "POST",
79
+ headers: { "Content-Type": "application/json" },
80
+ body: JSON.stringify({
81
+ contents: [{ role: "user", parts }],
82
+ }),
83
+ });
84
+ if (!res.ok) {
85
+ const body = await res.text().catch(() => "");
86
+ throw new Error(`Gemini PDF request failed (${res.status} ${res.statusText})${body ? `: ${body.slice(0, 400)}` : ""}`);
87
+ }
88
+ const json = (await res.json().catch(() => null));
89
+ if (!isRecord(json)) {
90
+ throw new Error("Gemini PDF response was not JSON.");
91
+ }
92
+ const candidates = json.candidates;
93
+ if (!Array.isArray(candidates) || candidates.length === 0) {
94
+ throw new Error("Gemini PDF returned no candidates.");
95
+ }
96
+ const textParts = candidates[0].content?.parts?.filter((p) => typeof p.text === "string") ?? [];
97
+ const text = textParts.map((p) => p.text).join("");
98
+ if (!text.trim()) {
99
+ throw new Error("Gemini PDF returned no text.");
100
+ }
101
+ return text.trim();
102
+ }
@@ -0,0 +1,86 @@
1
+ import { extractAssistantText } from "../pi-embedded-utils.js";
2
+ /**
3
+ * Providers known to support native PDF document input.
4
+ * When the model's provider is in this set, the tool sends raw PDF bytes
5
+ * via provider-specific API calls instead of extracting text/images first.
6
+ */
7
+ export const NATIVE_PDF_PROVIDERS = new Set(["anthropic", "google"]);
8
+ /**
9
+ * Check whether a provider supports native PDF document input.
10
+ */
11
+ export function providerSupportsNativePdf(provider) {
12
+ return NATIVE_PDF_PROVIDERS.has(provider.toLowerCase().trim());
13
+ }
14
+ /**
15
+ * Parse a page range string (e.g. "1-5", "3", "1-3,7-9") into an array of 1-based page numbers.
16
+ */
17
+ export function parsePageRange(range, maxPages) {
18
+ const pages = new Set();
19
+ const parts = range.split(",").map((p) => p.trim());
20
+ for (const part of parts) {
21
+ if (!part) {
22
+ continue;
23
+ }
24
+ const dashMatch = /^(\d+)\s*-\s*(\d+)$/.exec(part);
25
+ if (dashMatch) {
26
+ const start = Number(dashMatch[1]);
27
+ const end = Number(dashMatch[2]);
28
+ if (!Number.isFinite(start) || !Number.isFinite(end) || start < 1 || end < start) {
29
+ throw new Error(`Invalid page range: "${part}"`);
30
+ }
31
+ for (let i = start; i <= Math.min(end, maxPages); i++) {
32
+ pages.add(i);
33
+ }
34
+ }
35
+ else {
36
+ const num = Number(part);
37
+ if (!Number.isFinite(num) || num < 1) {
38
+ throw new Error(`Invalid page number: "${part}"`);
39
+ }
40
+ if (num <= maxPages) {
41
+ pages.add(num);
42
+ }
43
+ }
44
+ }
45
+ return Array.from(pages).toSorted((a, b) => a - b);
46
+ }
47
+ export function coercePdfAssistantText(params) {
48
+ const label = `${params.provider}/${params.model}`;
49
+ const errorMessage = params.message.errorMessage?.trim();
50
+ const fail = (message) => {
51
+ throw new Error(message ? `PDF model failed (${label}): ${message}` : `PDF model failed (${label})`);
52
+ };
53
+ if (params.message.stopReason === "error" || params.message.stopReason === "aborted") {
54
+ fail(errorMessage);
55
+ }
56
+ if (errorMessage) {
57
+ fail(errorMessage);
58
+ }
59
+ const text = extractAssistantText(params.message);
60
+ const trimmed = text.trim();
61
+ if (trimmed) {
62
+ return trimmed;
63
+ }
64
+ throw new Error(`PDF model returned no text (${label}).`);
65
+ }
66
+ /**
67
+ * Coerce the `agents.defaults.pdfModel` config value into a PdfModelConfig.
68
+ * Follows the same manual pattern as coerceImageModelConfig.
69
+ */
70
+ export function coercePdfModelConfig(cfg) {
71
+ const pdfModel = cfg?.agents?.defaults?.pdfModel;
72
+ const primary = typeof pdfModel === "string" ? pdfModel.trim() : pdfModel?.primary;
73
+ const fallbacks = typeof pdfModel === "object" ? (pdfModel?.fallbacks ?? []) : [];
74
+ return {
75
+ ...(primary?.trim() ? { primary: primary.trim() } : {}),
76
+ ...(fallbacks.length > 0 ? { fallbacks } : {}),
77
+ };
78
+ }
79
+ export function resolvePdfToolMaxTokens(modelMaxTokens, requestedMaxTokens = 4096) {
80
+ if (typeof modelMaxTokens !== "number" ||
81
+ !Number.isFinite(modelMaxTokens) ||
82
+ modelMaxTokens <= 0) {
83
+ return requestedMaxTokens;
84
+ }
85
+ return Math.min(requestedMaxTokens, modelMaxTokens);
86
+ }