@agentprojectcontext/apx 1.30.1 → 1.31.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -28,6 +28,7 @@ import transcribeAudio from "./tools/transcribe-audio.js";
28
28
  import askQuestions from "./tools/ask-questions.js";
29
29
  import createTask from "./tools/create-task.js";
30
30
  import listTasks from "./tools/list-tasks.js";
31
+ import discoverTools from "./tools/discover-tools.js";
31
32
  import { createPermissionGuard } from "./helpers.js";
32
33
  import { buildBridgedTools, DEFAULT_CATEGORIES } from "./registry-bridge.js";
33
34
 
@@ -62,6 +63,7 @@ const NATIVE_TOOLS = [
62
63
  askQuestions,
63
64
  createTask,
64
65
  listTasks,
66
+ discoverTools,
65
67
  ];
66
68
 
67
69
  // Registry-backed bridges. Categories can be overridden per-process via env
@@ -78,67 +80,254 @@ const TOOLS = [...NATIVE_TOOLS, ...BRIDGED_TOOLS];
78
80
 
79
81
  export const TOOL_SCHEMAS = TOOLS.map((tool) => tool.schema);
80
82
 
81
- // "Core" tools always sent to the model. The rest are pulled in on-demand via
82
- // load_skill or by switching to a heavier channel. Picked to fit cheap cloud
83
- // tiers: full TOOL_SCHEMAS is ~22 KB / ~5.5 K tokens — too much when Groq
84
- // free tier caps you at 6-12 K TPM. CORE_TOOL_NAMES is ~3 KB / ~700 tokens.
85
- // See spec/done/backlog item 12 for the underlying motivation.
86
- const CORE_TOOL_NAMES = new Set([
87
- // Inventory the model NEEDS to call these to know what's there.
83
+ // ---------------------------------------------------------------------------
84
+ // Lazy tools: base set (always loaded) + on-demand set (revealed via
85
+ // discover_tools). Motivation: full TOOL_SCHEMAS is ~25 KB / ~6.3 K tokens —
86
+ // too much when Groq's free tier caps you at 6-12 K TPM. The base set is
87
+ // ~24 tools (the ones a Telegram chat actually reaches for); everything else
88
+ // (browser/Puppeteer, fetch, web_search, runtime delegation, voice, …) stays
89
+ // off the wire until the model asks for it with discover_tools().
90
+ // ---------------------------------------------------------------------------
91
+
92
+ // Always loaded on lightweight channels. Covers messages, files, memory,
93
+ // sessions, projects/inventory, basic shell, tasks, skills, and discovery.
94
+ export const BASE_TOOL_NAMES = new Set([
95
+ // Discovery — the entry point to everything not loaded here.
96
+ "discover_tools",
97
+ // Inventory — the model needs these to know what exists.
88
98
  "list_projects",
89
99
  "list_agents",
90
100
  "list_mcps",
91
101
  "list_skills",
92
- // Memory + identity — used during identity / config conversations.
102
+ "load_skill",
103
+ // Memory + identity.
93
104
  "read_agent_memory",
94
- "set_identity",
95
- // Self-memory: jot durable facts so they survive across sessions.
105
+ "read_self_memory",
96
106
  "remember",
97
- // Self-recall: "what did we do / last session" must work on every channel.
107
+ "set_identity",
108
+ // Sessions + messages (self-recall + channel history).
98
109
  "search_sessions",
99
- // Conversation control.
100
- "ask_questions",
101
- // On-demand expansion: this is how the model loads the rest of the surface.
102
- "load_skill",
103
- // Channels the user expects out of any super-agent turn.
110
+ "search_messages",
111
+ "tail_messages",
112
+ // Channels + conversation control + lightweight delegation.
104
113
  "send_telegram",
105
- // Lightweight delegation (no spawn).
114
+ "ask_questions",
106
115
  "call_agent",
107
- // Routine creation (very common ask via chat).
116
+ // Tasks (very common ask via chat).
108
117
  "create_task",
109
118
  "list_tasks",
119
+ // Files + basic shell — frequent enough on chat to keep hot.
120
+ "read_file",
121
+ "write_file",
122
+ "edit_file",
123
+ "list_files",
124
+ "search_files",
125
+ "run_shell",
110
126
  ]);
111
127
 
112
- export const CORE_TOOL_SCHEMAS = TOOLS
113
- .filter((t) => CORE_TOOL_NAMES.has(t.name))
128
+ // Channels that get the FULL registry up front (deliberate, user-picked model,
129
+ // no cheap-tier TPM cap). Everything else is a "lightweight" channel and starts
130
+ // on BASE_TOOL_NAMES with discover_tools to expand.
131
+ const FULL_CHANNELS = new Set(["routine", "api", "web", "code", "terminal"]);
132
+
133
+ // Category labels for grouping the discover_tools catalog. Native tools have no
134
+ // registry category, so we assign one here; bridged tools carry their own
135
+ // (browser/fetch/search/file) from registry-bridge.js.
136
+ const NATIVE_CATEGORY = {
137
+ discover_tools: "system",
138
+ set_permission_mode: "system",
139
+ list_projects: "inventory",
140
+ list_agents: "inventory",
141
+ list_vault_agents: "inventory",
142
+ list_mcps: "inventory",
143
+ list_skills: "inventory",
144
+ load_skill: "skills",
145
+ import_agent: "agents",
146
+ add_project: "projects",
147
+ call_agent: "agents",
148
+ call_runtime: "runtime",
149
+ call_mcp: "mcp",
150
+ read_agent_memory: "memory",
151
+ read_self_memory: "memory",
152
+ remember: "memory",
153
+ set_identity: "identity",
154
+ search_sessions: "sessions",
155
+ search_messages: "messages",
156
+ tail_messages: "messages",
157
+ send_telegram: "messages",
158
+ ask_questions: "conversation",
159
+ create_task: "tasks",
160
+ list_tasks: "tasks",
161
+ transcribe_audio: "voice",
162
+ read_file: "files",
163
+ write_file: "files",
164
+ edit_file: "files",
165
+ list_files: "files",
166
+ search_files: "files",
167
+ run_shell: "shell",
168
+ };
169
+
170
+ function categoryOf(tool) {
171
+ return tool.category || NATIVE_CATEGORY[tool.name] || "other";
172
+ }
173
+
174
+ function oneLine(desc = "") {
175
+ const flat = String(desc).replace(/\s+/g, " ").trim();
176
+ if (flat.length <= 120) return flat;
177
+ return flat.slice(0, 117).trimEnd() + "…";
178
+ }
179
+
180
+ // Static metadata index for every tool — name, schema, category, short blurb.
181
+ // Used by the per-turn tool session for the catalog and activation lookups.
182
+ const TOOL_META = TOOLS.map((t) => ({
183
+ name: t.name,
184
+ schema: t.schema,
185
+ category: categoryOf(t),
186
+ description: oneLine(t.schema?.function?.description),
187
+ }));
188
+ const META_BY_NAME = new Map(TOOL_META.map((m) => [m.name, m]));
189
+
190
+ export const BASE_TOOL_SCHEMAS = TOOLS
191
+ .filter((t) => BASE_TOOL_NAMES.has(t.name))
114
192
  .map((t) => t.schema);
115
193
 
194
+ // Back-compat alias: a few callers/tests historically referenced the "core"
195
+ // subset. The base set supersedes it.
196
+ export const CORE_TOOL_SCHEMAS = BASE_TOOL_SCHEMAS;
197
+
198
+ const schemaName = (s) => s?.function?.name || s?.name;
199
+
116
200
  /**
117
- * Choose the tool schema list for a given channel. Telegram / desktop / api
118
- * (chit-chat) get the "core" subset to stay under cheap-tier TPM limits;
119
- * routines get the full list because they're deliberate, scheduled, and the
120
- * user has chosen the model. Override with the explicit `full: true` opt.
201
+ * Choose the INITIAL tool schema list for a channel. Full channels get the
202
+ * whole registry; lightweight channels (telegram/desktop/deck/web_sidebar) get
203
+ * the base set and expand on demand via discover_tools. `full: true` forces the
204
+ * complete registry regardless of channel.
121
205
  */
122
206
  export function schemasForChannel(channel, { full = false } = {}) {
123
- if (full) return TOOL_SCHEMAS;
124
- // Full registry for deliberate, local surfaces running on a user-picked model
125
- // (not subject to the cheap-tier TPM caps that motivate the "core" subset):
126
- // routine — scheduled/autonomous · api — generic HTTP / `apx exec`
127
- // web — the big web chat (long-form workspace)
128
- // code — the web Code module (needs read/write/edit/run_shell/grep/glob)
129
- // terminal the `apx code`/`apx sys` TUI: same coding surface as web Code,
130
- // so it needs the full read/write/edit/run_shell registry too.
131
- if (
132
- channel === "routine" ||
133
- channel === "api" ||
134
- channel === "web" ||
135
- channel === "code" ||
136
- channel === "terminal"
137
- )
138
- return TOOL_SCHEMAS;
139
- // Lightweight surfaces stay on the small subset to fit cheap cloud TPM limits
140
- // and keep replies snappy: telegram, web_sidebar, deck, desktop.
141
- return CORE_TOOL_SCHEMAS;
207
+ if (full || FULL_CHANNELS.has(channel)) return TOOL_SCHEMAS;
208
+ return BASE_TOOL_SCHEMAS;
209
+ }
210
+
211
+ /**
212
+ * Per-turn tool session: tracks which tools are live, exposes the catalog of
213
+ * not-yet-loaded tools, and activates more on demand. The agent loop reads
214
+ * `pending` after each iteration and merges the new schemas into the live set,
215
+ * so activated tools become callable on the model's next step.
216
+ *
217
+ * `allowedTools` mirrors the role gate: "*" = unrestricted, [] = nothing, an
218
+ * array = allowlist. Both the initial set AND any activation respect it, so a
219
+ * limited sender can't discover its way past the gate.
220
+ */
221
+ export function createToolSession(channel, { full = false, allowedTools = "*" } = {}) {
222
+ const allowAll = allowedTools === "*";
223
+ const allow = allowAll || !Array.isArray(allowedTools) ? null : new Set(allowedTools);
224
+ const permits = (name) => allowAll || (allow ? allow.has(name) : false);
225
+
226
+ // If the role gate is "[]" (no tools), start empty and stay empty.
227
+ const gateEmpty = Array.isArray(allowedTools) && allowedTools.length === 0;
228
+
229
+ const initial = (gateEmpty ? [] : schemasForChannel(channel, { full }))
230
+ .filter((s) => permits(schemaName(s)));
231
+ const activeNames = new Set(initial.map(schemaName));
232
+
233
+ const session = {
234
+ channel,
235
+ initialSchemas: initial,
236
+ pending: [],
237
+ activeNames,
238
+
239
+ // Tools that exist but aren't loaded yet (and are permitted by the gate).
240
+ notLoaded() {
241
+ return TOOL_META.filter((m) => !activeNames.has(m.name) && permits(m.name));
242
+ },
243
+
244
+ // Catalog response for discover_tools() with no args: grouped by category.
245
+ catalogResponse() {
246
+ const pool = session.notLoaded();
247
+ const byCategory = {};
248
+ for (const m of pool) {
249
+ (byCategory[m.category] ||= []).push({ name: m.name, description: m.description });
250
+ }
251
+ return {
252
+ ok: true,
253
+ loaded_count: activeNames.size,
254
+ available_count: pool.length,
255
+ categories: byCategory,
256
+ hint:
257
+ "Activá lo que necesites con discover_tools({ category: \"<cat>\" }) o " +
258
+ "discover_tools({ names: [\"tool_a\", \"tool_b\"] }). Quedan disponibles desde tu próximo paso.",
259
+ };
260
+ },
261
+
262
+ // Activate by exact names and/or whole category. Pushes new schemas to
263
+ // `pending` for the agent loop to merge.
264
+ activate({ names, category } = {}) {
265
+ const targets = new Set();
266
+ if (Array.isArray(names)) for (const n of names) targets.add(n);
267
+ if (typeof category === "string" && category.trim()) {
268
+ const cat = category.trim();
269
+ for (const m of TOOL_META) if (m.category === cat) targets.add(m.name);
270
+ }
271
+
272
+ const activated = [];
273
+ const alreadyLoaded = [];
274
+ const unknown = [];
275
+ const denied = [];
276
+ for (const name of targets) {
277
+ const meta = META_BY_NAME.get(name);
278
+ if (!meta) { unknown.push(name); continue; }
279
+ if (!permits(name)) { denied.push(name); continue; }
280
+ if (activeNames.has(name)) { alreadyLoaded.push(name); continue; }
281
+ activeNames.add(name);
282
+ session.pending.push(meta.schema);
283
+ activated.push(name);
284
+ }
285
+
286
+ return {
287
+ ok: activated.length > 0 || (unknown.length === 0 && denied.length === 0),
288
+ activated,
289
+ already_loaded: alreadyLoaded,
290
+ ...(unknown.length ? { unknown } : {}),
291
+ ...(denied.length ? { denied } : {}),
292
+ note: activated.length
293
+ ? `Activé ${activated.length} tool(s): ${activated.join(", ")}. Ya las podés usar desde tu próximo paso.`
294
+ : "No se activó ninguna tool nueva.",
295
+ };
296
+ },
297
+ };
298
+
299
+ return session;
300
+ }
301
+
302
+ /**
303
+ * Compact "tools you can activate" block for the system prompt: instructions +
304
+ * just the NAMES (no schemas) of not-loaded tools, grouped by category. Returns
305
+ * "" when nothing is pending (full channels), so it's omitted from the prompt.
306
+ */
307
+ export function buildLazyToolsBlock(session) {
308
+ if (!session) return "";
309
+ const pool = session.notLoaded();
310
+ if (pool.length === 0) return "";
311
+
312
+ const byCategory = {};
313
+ for (const m of pool) (byCategory[m.category] ||= []).push(m.name);
314
+ const lines = Object.keys(byCategory)
315
+ .sort()
316
+ .map((cat) => `- ${cat}: ${byCategory[cat].join(", ")}`);
317
+
318
+ return [
319
+ "# Tools adicionales (activación on-demand)",
320
+ "Tenés las tools base siempre cargadas. Estas otras EXISTEN pero no están",
321
+ "cargadas (para ahorrar tokens). Activalas cuando las necesites con",
322
+ "discover_tools — quedan disponibles desde tu próximo paso:",
323
+ ' • discover_tools() → catálogo completo (nombre + descripción)',
324
+ ' • discover_tools({ category: "browser" }) → activa toda una categoría',
325
+ ' • discover_tools({ names: ["browser_navigate"] })→ activa tools puntuales',
326
+ "Si no encontrás la tool que buscás, llamá discover_tools() sin argumentos.",
327
+ "",
328
+ `Tools no cargadas (solo nombres, ${pool.length} en total):`,
329
+ ...lines,
330
+ ].join("\n");
142
331
  }
143
332
 
144
333
  export function makeToolHandlers(ctx) {
@@ -18,7 +18,29 @@
18
18
  // Net result: adding a tool = adding one entry to registry.js. No file in
19
19
  // super-agent-tools/tools/, no import in index.js.
20
20
 
21
+ import fs from "node:fs";
21
22
  import { TOOL_DEFINITIONS } from "../../../core/tools/registry.js";
23
+ import { TOKEN_PATH } from "../../../core/config.js";
24
+
25
+ // The bridge POSTs to the daemon's OWN HTTP server, which is behind the bearer
26
+ // auth middleware (see api/shared.js). Without a token every bridged tool call
27
+ // (web_search, browser_*, http_*, glob, grep) comes back 401 "unauthorized" —
28
+ // which is exactly what Roby hit. We read the daemon's master token from
29
+ // ~/.apx/daemon.token (the same file the CLI authenticates with) and cache it.
30
+ let cachedToken = null;
31
+ function daemonToken() {
32
+ if (cachedToken !== null) return cachedToken;
33
+ cachedToken =
34
+ process.env.APX_TOKEN ||
35
+ (() => {
36
+ try {
37
+ return fs.readFileSync(TOKEN_PATH, "utf8").trim();
38
+ } catch {
39
+ return "";
40
+ }
41
+ })();
42
+ return cachedToken;
43
+ }
22
44
 
23
45
  // Native handlers in super-agent-tools/tools/ that own these names. The bridge
24
46
  // MUST skip them or the registry version (HTTP roundtrip) would shadow the
@@ -56,9 +78,13 @@ function buildHandler(entry) {
56
78
  const method = String(entry.endpoint?.method || "POST").toUpperCase();
57
79
  let url = `http://127.0.0.1:${port}${entry.endpoint?.path || ""}`;
58
80
 
81
+ const token = daemonToken();
59
82
  const opts = {
60
83
  method,
61
- headers: { "content-type": "application/json" },
84
+ headers: {
85
+ "content-type": "application/json",
86
+ ...(token ? { authorization: `Bearer ${token}` } : {}),
87
+ },
62
88
  };
63
89
 
64
90
  if (method === "GET" || method === "HEAD") {
@@ -114,6 +140,9 @@ export function buildBridgedTools(opts = {}) {
114
140
  .filter(e => e.endpoint?.path)
115
141
  .map(entry => ({
116
142
  name: entry.name,
143
+ // Carried through so the lazy-tools catalog can group on-demand tools by
144
+ // their registry category (browser/fetch/search/file) for discover_tools.
145
+ category: entry.category,
117
146
  schema: buildSchema(entry),
118
147
  makeHandler: buildHandler(entry),
119
148
  }));
@@ -0,0 +1,67 @@
1
+ // discover_tools — lazy tool discovery + activation.
2
+ //
3
+ // Roby (and any super-agent surface) only carries a small "base" set of tool
4
+ // schemas on lightweight channels (Telegram/desktop/deck) to stay under
5
+ // cheap-tier TPM caps. The rest (browser/Puppeteer, fetch, web_search, runtime,
6
+ // voice, …) exist but are NOT sent to the model by default. This tool is how
7
+ // the model reveals and activates them on demand:
8
+ //
9
+ // discover_tools() → catalog of NOT-loaded tools
10
+ // discover_tools({ category: "browser" }) → activate a whole category
11
+ // discover_tools({ names: ["browser_navigate"] })→ activate specific tools
12
+ //
13
+ // Activation pushes the requested schemas into the per-turn tool session; the
14
+ // agent loop (run-agent.js) merges them into the live schema set so the NEXT
15
+ // model call can actually invoke them. Handlers for every tool already exist —
16
+ // gating is purely about which schemas the model sees.
17
+
18
+ export default {
19
+ name: "discover_tools",
20
+ schema: {
21
+ type: "function",
22
+ function: {
23
+ name: "discover_tools",
24
+ description:
25
+ "Discover and activate additional tools that are not loaded by default. " +
26
+ "Call with NO arguments to get the catalog of available-but-not-loaded tools " +
27
+ "(name + 1-line description, grouped by category). Call with `category` (e.g. " +
28
+ "\"browser\", \"fetch\") or `names` (exact tool names) to ACTIVATE those tools — " +
29
+ "they become callable starting on your next step. Use this whenever the tool you " +
30
+ "need (browser automation, HTTP fetch, web search, runtime delegation, voice, …) " +
31
+ "isn't in your current tool list.",
32
+ parameters: {
33
+ type: "object",
34
+ properties: {
35
+ category: {
36
+ type: "string",
37
+ description:
38
+ "Activate every not-loaded tool in this category (e.g. \"browser\", \"fetch\", \"search\").",
39
+ },
40
+ names: {
41
+ type: "array",
42
+ items: { type: "string" },
43
+ description:
44
+ "Exact tool names to activate, e.g. [\"browser_navigate\", \"browser_screenshot\"].",
45
+ },
46
+ },
47
+ },
48
+ },
49
+ },
50
+ makeHandler: (ctx) => ({ category, names } = {}) => {
51
+ const session = ctx?.toolSession;
52
+ // No lazy session (full channels, or direct handler use in tests): every
53
+ // tool is already exposed, so there's nothing to discover or activate.
54
+ if (!session) {
55
+ return {
56
+ ok: true,
57
+ loaded_all: true,
58
+ note: "En este canal todas las tools ya están cargadas; no hace falta discover_tools.",
59
+ };
60
+ }
61
+ const wantsActivate =
62
+ (Array.isArray(names) && names.length > 0) ||
63
+ (typeof category === "string" && category.trim() !== "");
64
+ if (!wantsActivate) return session.catalogResponse();
65
+ return session.activate({ names, category });
66
+ },
67
+ };
@@ -1,5 +1,5 @@
1
1
  // Super-agent: daemon-level action agent for Telegram, TUI, desktop, routines.
2
- import { schemasForChannel, makeToolHandlers } from "./super-agent-tools/index.js";
2
+ import { createToolSession, buildLazyToolsBlock, makeToolHandlers } from "./super-agent-tools/index.js";
3
3
  import { listSkills } from "./skills-loader.js";
4
4
  import {
5
5
  runAgent,
@@ -79,6 +79,15 @@ export async function runSuperAgent({
79
79
  }
80
80
  }
81
81
 
82
+ // Per-turn tool session. Lightweight channels (telegram/desktop/deck) start
83
+ // on the small "base" set and expand on demand via discover_tools; full
84
+ // channels (routine/api/web/code/terminal) get the whole registry up front.
85
+ // The session also enforces role gating ("*" = unrestricted, [] = none,
86
+ // array = allowlist) on BOTH the initial set and any later activation, so a
87
+ // limited sender can't discover its way past the gate.
88
+ // noTools callers (summarize/ask) get no session — text only.
89
+ const toolSession = noTools ? null : createToolSession(channel, { allowedTools });
90
+
82
91
  const system = buildSuperAgentSystem({
83
92
  globalConfig,
84
93
  projects,
@@ -90,23 +99,12 @@ export async function runSuperAgent({
90
99
  systemSuffix,
91
100
  memoryBlock,
92
101
  activeThreadsBlock,
102
+ // Compact "tools you can activate" block (names only, no schemas). Empty on
103
+ // full channels and tool-free callers, where it's omitted from the prompt.
104
+ lazyToolsBlock: buildLazyToolsBlock(toolSession),
93
105
  });
94
106
 
95
- // Pick the schema subset for this channel: chit-chat surfaces get a small
96
- // "core" set (~700 tokens) to fit cheap-tier TPM caps; routines get the
97
- // full registry. The model can still call load_skill / read more on demand.
98
- // noTools callers (summarize/ask) get an empty set — text only.
99
- let toolSchemas = noTools ? [] : schemasForChannel(channel);
100
- // Role gating: restrict the visible tools for limited senders (e.g. guests
101
- // on Telegram). "*" = unrestricted; [] = no tools; array = allowlist.
102
- if (allowedTools !== "*" && Array.isArray(allowedTools)) {
103
- if (allowedTools.length === 0) {
104
- toolSchemas = [];
105
- } else {
106
- const allow = new Set(allowedTools);
107
- toolSchemas = toolSchemas.filter((t) => allow.has(t?.function?.name || t?.name));
108
- }
109
- }
107
+ const toolSchemas = noTools ? [] : toolSession.initialSchemas;
110
108
 
111
109
  return runAgent({
112
110
  globalConfig,
@@ -116,7 +114,7 @@ export async function runSuperAgent({
116
114
  overrideModel,
117
115
  toolSchemas,
118
116
  makeToolHandlers,
119
- toolHandlerCtx: { projects, plugins, registries, globalConfig, channel },
117
+ toolHandlerCtx: { projects, plugins, registries, globalConfig, channel, toolSession },
120
118
  onEvent,
121
119
  signal,
122
120
  onToken,