botholomew 0.11.4 → 0.11.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "botholomew",
3
- "version": "0.11.4",
3
+ "version": "0.11.6",
4
4
  "description": "An autonomous AI agent for knowledge work — works your task queue while you sleep.",
5
5
  "type": "module",
6
6
  "bin": {
package/src/chat/agent.ts CHANGED
@@ -56,6 +56,7 @@ const CHAT_TOOL_NAMES = new Set([
56
56
  "mcp_info",
57
57
  "mcp_exec",
58
58
  "read_large_result",
59
+ "pipe_to_context",
59
60
  "spawn_worker",
60
61
  "skill_list",
61
62
  "skill_read",
@@ -47,11 +47,13 @@ function isModelCached(model: string): boolean {
47
47
  async function getPipeline(model: string): Promise<FeatureExtractionPipeline> {
48
48
  let p = pipelinePromises.get(model);
49
49
  if (!p) {
50
- logger.info(
51
- isModelCached(model)
52
- ? `Loading embedding model ${model}`
53
- : `Loading embedding model ${model} (first run, downloading weights)`,
54
- );
50
+ if (isModelCached(model)) {
51
+ logger.debug(`Loading embedding model ${model}`);
52
+ } else {
53
+ logger.info(
54
+ `Loading embedding model ${model} (first run, downloading weights)`,
55
+ );
56
+ }
55
57
  p = pipeline("feature-extraction", model);
56
58
  pipelinePromises.set(model, p);
57
59
  }
@@ -1,5 +1,5 @@
1
1
  import type { SkillDefinition } from "./parser.ts";
2
- import { renderSkill, validateSkillArgs } from "./parser.ts";
2
+ import { renderSkill, tokenizeForSkill, validateSkillArgs } from "./parser.ts";
3
3
 
4
4
  export interface SlashCommand {
5
5
  name: string;
@@ -40,6 +40,58 @@ export function formatSkillUsage(skill: SkillDefinition): string {
40
40
  return parts.join(" ");
41
41
  }
42
42
 
43
+ /**
44
+ * Detect when a multi-arg skill received unquoted whitespace-separated
45
+ * input that the greedy-last splitter has packed into the final slot.
46
+ * The user almost certainly intended one of the words to belong to a
47
+ * different slot (or the whole thing to be a single argument), so we
48
+ * surface a parse breakdown instead of silently committing to one
49
+ * interpretation.
50
+ *
51
+ * Returns null when the input is unambiguous and may proceed.
52
+ */
53
+ export function detectAmbiguousSplit(
54
+ skill: SkillDefinition,
55
+ rawArgs: string,
56
+ ): { tokens: string[] } | null {
57
+ if (skill.arguments.length < 2) return null;
58
+ if (rawArgs.includes('"') || rawArgs.includes("'")) return null;
59
+ const tokens = tokenizeForSkill(rawArgs, skill);
60
+ const last = tokens[tokens.length - 1];
61
+ if (!last || !/\s/.test(last)) return null;
62
+ return { tokens };
63
+ }
64
+
65
+ function formatAmbiguityHint(skill: SkillDefinition, tokens: string[]): string {
66
+ const slots: string[] = [];
67
+ const nameWidth = skill.arguments.reduce(
68
+ (m, a) => Math.max(m, a.name.length),
69
+ 0,
70
+ );
71
+ skill.arguments.forEach((argDef, i) => {
72
+ const value =
73
+ tokens[i] !== undefined
74
+ ? `"${tokens[i]}"`
75
+ : argDef.default !== undefined
76
+ ? `"${argDef.default}" (default)`
77
+ : "(unset)";
78
+ slots.push(` ${argDef.name.padEnd(nameWidth)} = ${value}`);
79
+ });
80
+
81
+ const firstWord = tokens[0] ?? "";
82
+ const restPreview = tokens.slice(1).join(" ");
83
+ const fullPreview = [firstWord, restPreview].filter(Boolean).join(" ");
84
+
85
+ return [
86
+ `/${skill.name}: ambiguous input. Parsed as:`,
87
+ ...slots,
88
+ "",
89
+ "Quote the multi-word argument to confirm, e.g.:",
90
+ ` /${skill.name} "${fullPreview}"`,
91
+ ` /${skill.name} '${firstWord}' '${restPreview}'`,
92
+ ].join("\n");
93
+ }
94
+
43
95
  /**
44
96
  * Handle a slash-command input. Returns true if the command was consumed
45
97
  * (recognized or errored), false if it should fall through.
@@ -96,6 +148,11 @@ export function handleSlashCommand(
96
148
  );
97
149
  return true;
98
150
  }
151
+ const ambiguous = detectAmbiguousSplit(skill, rawArgs);
152
+ if (ambiguous) {
153
+ ctx.addSystemMessage(formatAmbiguityHint(skill, ambiguous.tokens));
154
+ return true;
155
+ }
99
156
  const rendered = renderSkill(skill, rawArgs);
100
157
  ctx.queueUserMessage(rendered, { display: input });
101
158
  return true;
@@ -52,18 +52,22 @@ export function parseSkillFile(raw: string, filePath: string): SkillDefinition {
52
52
  }
53
53
 
54
54
  /**
55
- * Split a raw argument string into positional tokens,
56
- * respecting double-quoted strings.
55
+ * Split a raw argument string into positional tokens, respecting both
56
+ * single- and double-quoted strings. A closing quote must match the
57
+ * opening quote; the other quote character is treated as a literal
58
+ * inside the run.
57
59
  */
58
60
  export function tokenize(raw: string): string[] {
59
61
  const tokens: string[] = [];
60
62
  let current = "";
61
- let inQuote = false;
63
+ let quoteChar: '"' | "'" | null = null;
62
64
 
63
65
  for (const ch of raw) {
64
- if (ch === '"') {
65
- inQuote = !inQuote;
66
- } else if (!inQuote && /\s/.test(ch)) {
66
+ if (quoteChar === null && (ch === '"' || ch === "'")) {
67
+ quoteChar = ch;
68
+ } else if (quoteChar !== null && ch === quoteChar) {
69
+ quoteChar = null;
70
+ } else if (quoteChar === null && /\s/.test(ch)) {
67
71
  if (current) {
68
72
  tokens.push(current);
69
73
  current = "";
@@ -77,12 +81,75 @@ export function tokenize(raw: string): string[] {
77
81
  return tokens;
78
82
  }
79
83
 
84
+ /**
85
+ * Schema-aware tokenizer used by skill rendering. When a skill declares
86
+ * N >= 1 positional arguments, the first N - 1 tokens are split with
87
+ * `tokenize()` and the **last** token captures the entire remaining
88
+ * input verbatim (with surrounding whitespace trimmed and a single
89
+ * surrounding pair of matched quotes stripped). This makes the common
90
+ * case of an unquoted multi-word final argument "just work" — e.g.
91
+ * `/write-as-evan why are avocados good?` for a single-arg skill puts
92
+ * the whole sentence into `$1`.
93
+ *
94
+ * When N === 0 (no declared arguments), behaves exactly like
95
+ * `tokenize()`.
96
+ */
97
+ export function tokenizeForSkill(
98
+ raw: string,
99
+ skill: SkillDefinition,
100
+ ): string[] {
101
+ const n = skill.arguments.length;
102
+ if (n === 0) return tokenize(raw);
103
+
104
+ const tokens: string[] = [];
105
+ let current = "";
106
+ let quoteChar: '"' | "'" | null = null;
107
+ let i = 0;
108
+
109
+ for (; i < raw.length && tokens.length < n - 1; i++) {
110
+ const ch = raw[i] as string;
111
+ if (quoteChar === null && (ch === '"' || ch === "'")) {
112
+ quoteChar = ch;
113
+ } else if (quoteChar !== null && ch === quoteChar) {
114
+ quoteChar = null;
115
+ } else if (quoteChar === null && /\s/.test(ch)) {
116
+ if (current) {
117
+ tokens.push(current);
118
+ current = "";
119
+ }
120
+ } else {
121
+ current += ch;
122
+ }
123
+ }
124
+
125
+ // Flush any in-progress token if we hit the N-1 cap mid-run.
126
+ if (current) {
127
+ tokens.push(current);
128
+ current = "";
129
+ }
130
+
131
+ let remainder = raw.slice(i).trim();
132
+ if (remainder.length >= 2) {
133
+ const first = remainder[0];
134
+ const last = remainder[remainder.length - 1];
135
+ if ((first === '"' || first === "'") && first === last) {
136
+ // Strip surrounding quotes only when the entire remainder is a
137
+ // single quoted string with no interior unescaped same-quote.
138
+ const inner = remainder.slice(1, -1);
139
+ if (!inner.includes(first)) remainder = inner;
140
+ }
141
+ }
142
+ if (remainder.length > 0) tokens.push(remainder);
143
+
144
+ return tokens;
145
+ }
146
+
80
147
  function escapeRegex(s: string): string {
81
148
  return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
82
149
  }
83
150
 
84
151
  export function renderSkill(skill: SkillDefinition, rawArgs: string): string {
85
- const tokens = tokenize(rawArgs);
152
+ const tokens = tokenizeForSkill(rawArgs, skill);
86
153
  let result = skill.body;
87
154
 
88
155
  // Replace $<argName> placeholders first, longest names first so a `$start`
@@ -123,7 +190,7 @@ export function validateSkillArgs(
123
190
  skill: SkillDefinition,
124
191
  rawArgs: string,
125
192
  ): { missing: string[] } {
126
- const tokens = tokenize(rawArgs);
193
+ const tokens = tokenizeForSkill(rawArgs, skill);
127
194
  const missing: string[] = [];
128
195
  skill.arguments.forEach((argDef, i) => {
129
196
  if (!argDef.required) return;
@@ -0,0 +1,228 @@
1
+ import { isText } from "istextorbinary";
2
+ import { z } from "zod";
3
+ import { formatDriveRef } from "../../context/drives.ts";
4
+ import { ingestByPath } from "../../context/ingest.ts";
5
+ import {
6
+ createContextItemStrict,
7
+ PathConflictError,
8
+ upsertContextItem,
9
+ } from "../../db/context.ts";
10
+ import { getTool, type ToolDefinition } from "../tool.ts";
11
+
12
+ const PREVIEW_CHARS = 200;
13
+ const ERROR_MESSAGE_CAP = 2000;
14
+ const TOOL_NAME = "pipe_to_context";
15
+
16
+ function mimeFromPath(path: string): string {
17
+ const type = Bun.file(path).type.split(";")[0];
18
+ return type ?? "application/octet-stream";
19
+ }
20
+
21
+ function isTextualPath(path: string): boolean {
22
+ const filename = path.split("/").pop() ?? path;
23
+ return isText(filename) !== false;
24
+ }
25
+
26
+ function truncate(s: string, cap: number): string {
27
+ if (s.length <= cap) return s;
28
+ return `${s.slice(0, cap)}…[truncated, ${s.length - cap} more chars]`;
29
+ }
30
+
31
+ const inputSchema = z.object({
32
+ tool_name: z
33
+ .string()
34
+ .describe(
35
+ "Name of the tool to dispatch. Its full output is piped into a context item; you (the LLM) will only see the storage acknowledgment, never the raw bytes.",
36
+ ),
37
+ tool_input: z
38
+ .record(z.string(), z.unknown())
39
+ .describe(
40
+ "Arguments to pass to the inner tool (same shape as a normal call).",
41
+ ),
42
+ drive: z
43
+ .string()
44
+ .default("agent")
45
+ .describe(
46
+ "Drive to write to (defaults to 'agent', the agent's scratch drive).",
47
+ ),
48
+ path: z.string().describe("Path within the drive (starts with /)"),
49
+ title: z
50
+ .string()
51
+ .optional()
52
+ .describe("Title for the file (defaults to filename)"),
53
+ description: z.string().optional().describe("Description of the file"),
54
+ on_conflict: z
55
+ .enum(["error", "overwrite"])
56
+ .optional()
57
+ .describe(
58
+ "What to do if a file already exists at this (drive, path). Defaults to 'error'. Pass 'overwrite' to replace.",
59
+ ),
60
+ });
61
+
62
+ const outputSchema = z.object({
63
+ is_error: z.boolean(),
64
+ id: z.string().optional(),
65
+ drive: z.string().optional(),
66
+ path: z.string().optional(),
67
+ ref: z.string().optional(),
68
+ bytes_written: z.number().optional(),
69
+ preview: z
70
+ .string()
71
+ .optional()
72
+ .describe(
73
+ `First ${PREVIEW_CHARS} characters of the stored content so you can sanity-check what was captured.`,
74
+ ),
75
+ inner_tool_is_error: z.boolean().optional(),
76
+ error_type: z
77
+ .enum([
78
+ "unknown_tool",
79
+ "forbidden_tool",
80
+ "invalid_input",
81
+ "inner_tool_error",
82
+ "path_conflict",
83
+ ])
84
+ .optional(),
85
+ message: z.string().optional(),
86
+ next_action_hint: z.string().optional(),
87
+ });
88
+
89
+ export const pipeToContextTool = {
90
+ name: TOOL_NAME,
91
+ description:
92
+ "[[ bash equivalent command: cmd > file ]] Run another tool and pipe its full output directly into a context item, without the result flowing through the conversation. Use this when you need a large tool output (web pages, search dumps, big mcp_exec results) to be searchable/embedded for later but you do NOT need to read the bytes yourself. You'll only see the storage ack (drive, path, id, size, short preview).",
93
+ group: "context",
94
+ inputSchema,
95
+ outputSchema,
96
+ execute: async (input, ctx) => {
97
+ const inner = getTool(input.tool_name);
98
+ if (!inner) {
99
+ return {
100
+ is_error: true,
101
+ error_type: "unknown_tool",
102
+ message: `No tool named "${input.tool_name}".`,
103
+ next_action_hint:
104
+ "Check the tool name spelling, or call the inner tool directly if you do need to see its output.",
105
+ };
106
+ }
107
+
108
+ if (inner.name === TOOL_NAME || inner.terminal) {
109
+ return {
110
+ is_error: true,
111
+ error_type: "forbidden_tool",
112
+ message: `Tool "${inner.name}" cannot be piped (terminal tools and pipe_to_context itself are not allowed).`,
113
+ next_action_hint:
114
+ "Pipe a non-terminal tool (search_grep, mcp_exec, context_refresh, etc.) instead.",
115
+ };
116
+ }
117
+
118
+ const parsedInner = inner.inputSchema.safeParse(input.tool_input);
119
+ if (!parsedInner.success) {
120
+ const issues = parsedInner.error.issues
121
+ .map((i) => `${i.path.join(".")}: ${i.message}`)
122
+ .join("; ");
123
+ return {
124
+ is_error: true,
125
+ error_type: "invalid_input",
126
+ message: `Invalid input for ${inner.name}: ${issues}.`,
127
+ next_action_hint:
128
+ "Fix tool_input to match the inner tool's schema and retry.",
129
+ };
130
+ }
131
+
132
+ let innerResult: unknown;
133
+ try {
134
+ innerResult = await inner.execute(parsedInner.data, ctx);
135
+ } catch (err) {
136
+ return {
137
+ is_error: true,
138
+ error_type: "inner_tool_error",
139
+ inner_tool_is_error: true,
140
+ message: truncate(
141
+ `Tool ${inner.name} threw: ${err instanceof Error ? err.message : String(err)}`,
142
+ ERROR_MESSAGE_CAP,
143
+ ),
144
+ next_action_hint:
145
+ "Retry with different arguments, or call the tool directly to see the full error.",
146
+ };
147
+ }
148
+
149
+ const innerIsError =
150
+ typeof innerResult === "object" &&
151
+ innerResult !== null &&
152
+ "is_error" in innerResult
153
+ ? (innerResult as { is_error: boolean }).is_error
154
+ : false;
155
+
156
+ const innerOutput =
157
+ typeof innerResult === "string"
158
+ ? innerResult
159
+ : JSON.stringify(innerResult);
160
+
161
+ if (innerIsError) {
162
+ return {
163
+ is_error: true,
164
+ error_type: "inner_tool_error",
165
+ inner_tool_is_error: true,
166
+ message: truncate(innerOutput, ERROR_MESSAGE_CAP),
167
+ next_action_hint:
168
+ "The inner tool returned an error and nothing was written. Fix the inputs and retry, or pipe a different tool.",
169
+ };
170
+ }
171
+
172
+ const mimeType = mimeFromPath(input.path);
173
+ const isTextual = isTextualPath(input.path);
174
+ const title =
175
+ input.title ?? input.path.split("/").filter(Boolean).pop() ?? input.path;
176
+ const onConflict = input.on_conflict ?? "error";
177
+ const target = { drive: input.drive, path: input.path };
178
+
179
+ try {
180
+ const item =
181
+ onConflict === "overwrite"
182
+ ? await upsertContextItem(ctx.conn, {
183
+ title,
184
+ description: input.description,
185
+ content: innerOutput,
186
+ drive: target.drive,
187
+ path: target.path,
188
+ mimeType,
189
+ isTextual,
190
+ })
191
+ : await createContextItemStrict(ctx.conn, {
192
+ title,
193
+ description: input.description,
194
+ content: innerOutput,
195
+ drive: target.drive,
196
+ path: target.path,
197
+ mimeType,
198
+ isTextual,
199
+ });
200
+
201
+ await ingestByPath(ctx.conn, target, ctx.config);
202
+
203
+ return {
204
+ is_error: false,
205
+ id: item.id,
206
+ drive: item.drive,
207
+ path: item.path,
208
+ ref: formatDriveRef(item),
209
+ bytes_written: innerOutput.length,
210
+ preview: innerOutput.slice(0, PREVIEW_CHARS),
211
+ };
212
+ } catch (err) {
213
+ if (err instanceof PathConflictError) {
214
+ return {
215
+ is_error: true,
216
+ error_type: "path_conflict",
217
+ drive: err.drive,
218
+ path: err.path,
219
+ ref: formatDriveRef({ drive: err.drive, path: err.path }),
220
+ message: `A file already exists at ${formatDriveRef({ drive: err.drive, path: err.path })} (id: ${err.existingId}). The inner tool ran but its output was discarded.`,
221
+ next_action_hint:
222
+ "Retry with on_conflict='overwrite' to replace, or pick a different path.",
223
+ };
224
+ }
225
+ throw err;
226
+ }
227
+ },
228
+ } satisfies ToolDefinition<typeof inputSchema, typeof outputSchema>;
@@ -2,6 +2,7 @@
2
2
  import { capabilitiesRefreshTool } from "./capabilities/refresh.ts";
3
3
  // Context tools
4
4
  import { contextListDrivesTool } from "./context/list-drives.ts";
5
+ import { pipeToContextTool } from "./context/pipe.ts";
5
6
  import { readLargeResultTool } from "./context/read-large-result.ts";
6
7
  import { contextRefreshTool } from "./context/refresh.ts";
7
8
  import { contextSearchTool } from "./context/search.ts";
@@ -85,6 +86,7 @@ export function registerAllTools(): void {
85
86
  registerTool(updateBeliefsTool);
86
87
  registerTool(updateGoalsTool);
87
88
  registerTool(readLargeResultTool);
89
+ registerTool(pipeToContextTool);
88
90
 
89
91
  // Capabilities
90
92
  registerTool(capabilitiesRefreshTool);