npm - @agentprojectcontext/apx - Versions diffs - 1.30.1 → 1.31.0 - Mend

@agentprojectcontext/apx 1.30.1 → 1.31.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/package.json +1 -1
package/src/core/agent/prompt-builder.js +6 -0
package/src/core/agent/run-agent.js +21 -0
package/src/core/tools/browser.js +169 -75
package/src/core/tools/registry.js +6 -1
package/src/core/tools/search.js +35 -7
package/src/host/daemon/plugins/telegram.js +5 -0
package/src/host/daemon/super-agent-tools/index.js +232 -43
package/src/host/daemon/super-agent-tools/registry-bridge.js +30 -1
package/src/host/daemon/super-agent-tools/tools/discover-tools.js +67 -0
package/src/host/daemon/super-agent.js +15 -17
package/src/interfaces/web/package-lock.json +100 -211

package/src/host/daemon/super-agent-tools/index.js CHANGED Viewed

@@ -28,6 +28,7 @@ import transcribeAudio from "./tools/transcribe-audio.js";
 import askQuestions from "./tools/ask-questions.js";
 import createTask from "./tools/create-task.js";
 import listTasks from "./tools/list-tasks.js";
+import discoverTools from "./tools/discover-tools.js";
 import { createPermissionGuard } from "./helpers.js";
 import { buildBridgedTools, DEFAULT_CATEGORIES } from "./registry-bridge.js";
@@ -62,6 +63,7 @@ const NATIVE_TOOLS = [
   askQuestions,
   createTask,
   listTasks,
+  discoverTools,
 ];
 // Registry-backed bridges. Categories can be overridden per-process via env
@@ -78,67 +80,254 @@ const TOOLS = [...NATIVE_TOOLS, ...BRIDGED_TOOLS];
 export const TOOL_SCHEMAS = TOOLS.map((tool) => tool.schema);
-// "Core" tools always sent to the model. The rest are pulled in on-demand via
-// load_skill or by switching to a heavier channel. Picked to fit cheap cloud
-// tiers: full TOOL_SCHEMAS is ~22 KB / ~5.5 K tokens — too much when Groq
-// free tier caps you at 6-12 K TPM. CORE_TOOL_NAMES is ~3 KB / ~700 tokens.
-// See spec/done/backlog item 12 for the underlying motivation.
-const CORE_TOOL_NAMES = new Set([
-  // Inventory — the model NEEDS to call these to know what's there.
+// ---------------------------------------------------------------------------
+// Lazy tools: base set (always loaded) + on-demand set (revealed via
+// discover_tools). Motivation: full TOOL_SCHEMAS is ~25 KB / ~6.3 K tokens —
+// too much when Groq's free tier caps you at 6-12 K TPM. The base set is
+// ~24 tools (the ones a Telegram chat actually reaches for); everything else
+// (browser/Puppeteer, fetch, web_search, runtime delegation, voice, …) stays
+// off the wire until the model asks for it with discover_tools().
+// ---------------------------------------------------------------------------
+// Always loaded on lightweight channels. Covers messages, files, memory,
+// sessions, projects/inventory, basic shell, tasks, skills, and discovery.
+export const BASE_TOOL_NAMES = new Set([
+  // Discovery — the entry point to everything not loaded here.
+  "discover_tools",
+  // Inventory — the model needs these to know what exists.
   "list_projects",
   "list_agents",
   "list_mcps",
   "list_skills",
-  // Memory + identity — used during identity / config conversations.
+  "load_skill",
+  // Memory + identity.
   "read_agent_memory",
-  "set_identity",
-  // Self-memory: jot durable facts so they survive across sessions.
+  "read_self_memory",
   "remember",
-  // Self-recall: "what did we do / last session" must work on every channel.
+  "set_identity",
+  // Sessions + messages (self-recall + channel history).
   "search_sessions",
-  // Conversation control.
-  "ask_questions",
-  // On-demand expansion: this is how the model loads the rest of the surface.
-  "load_skill",
-  // Channels the user expects out of any super-agent turn.
+  "search_messages",
+  "tail_messages",
+  // Channels + conversation control + lightweight delegation.
   "send_telegram",
-  // Lightweight delegation (no spawn).
+  "ask_questions",
   "call_agent",
-  // Routine creation (very common ask via chat).
+  // Tasks (very common ask via chat).
   "create_task",
   "list_tasks",
+  // Files + basic shell — frequent enough on chat to keep hot.
+  "read_file",
+  "write_file",
+  "edit_file",
+  "list_files",
+  "search_files",
+  "run_shell",
 ]);
-export const CORE_TOOL_SCHEMAS = TOOLS
-  .filter((t) => CORE_TOOL_NAMES.has(t.name))
+// Channels that get the FULL registry up front (deliberate, user-picked model,
+// no cheap-tier TPM cap). Everything else is a "lightweight" channel and starts
+// on BASE_TOOL_NAMES with discover_tools to expand.
+const FULL_CHANNELS = new Set(["routine", "api", "web", "code", "terminal"]);
+// Category labels for grouping the discover_tools catalog. Native tools have no
+// registry category, so we assign one here; bridged tools carry their own
+// (browser/fetch/search/file) from registry-bridge.js.
+const NATIVE_CATEGORY = {
+  discover_tools: "system",
+  set_permission_mode: "system",
+  list_projects: "inventory",
+  list_agents: "inventory",
+  list_vault_agents: "inventory",
+  list_mcps: "inventory",
+  list_skills: "inventory",
+  load_skill: "skills",
+  import_agent: "agents",
+  add_project: "projects",
+  call_agent: "agents",
+  call_runtime: "runtime",
+  call_mcp: "mcp",
+  read_agent_memory: "memory",
+  read_self_memory: "memory",
+  remember: "memory",
+  set_identity: "identity",
+  search_sessions: "sessions",
+  search_messages: "messages",
+  tail_messages: "messages",
+  send_telegram: "messages",
+  ask_questions: "conversation",
+  create_task: "tasks",
+  list_tasks: "tasks",
+  transcribe_audio: "voice",
+  read_file: "files",
+  write_file: "files",
+  edit_file: "files",
+  list_files: "files",
+  search_files: "files",
+  run_shell: "shell",
+};
+function categoryOf(tool) {
+  return tool.category || NATIVE_CATEGORY[tool.name] || "other";
+}
+function oneLine(desc = "") {
+  const flat = String(desc).replace(/\s+/g, " ").trim();
+  if (flat.length <= 120) return flat;
+  return flat.slice(0, 117).trimEnd() + "…";
+}
+// Static metadata index for every tool — name, schema, category, short blurb.
+// Used by the per-turn tool session for the catalog and activation lookups.
+const TOOL_META = TOOLS.map((t) => ({
+  name: t.name,
+  schema: t.schema,
+  category: categoryOf(t),
+  description: oneLine(t.schema?.function?.description),
+}));
+const META_BY_NAME = new Map(TOOL_META.map((m) => [m.name, m]));
+export const BASE_TOOL_SCHEMAS = TOOLS
+  .filter((t) => BASE_TOOL_NAMES.has(t.name))
   .map((t) => t.schema);
+// Back-compat alias: a few callers/tests historically referenced the "core"
+// subset. The base set supersedes it.
+export const CORE_TOOL_SCHEMAS = BASE_TOOL_SCHEMAS;
+const schemaName = (s) => s?.function?.name || s?.name;
 /**
- * Choose the tool schema list for a given channel. Telegram / desktop / api
- * (chit-chat) get the "core" subset to stay under cheap-tier TPM limits;
- * routines get the full list because they're deliberate, scheduled, and the
- * user has chosen the model. Override with the explicit `full: true` opt.
+ * Choose the INITIAL tool schema list for a channel. Full channels get the
+ * whole registry; lightweight channels (telegram/desktop/deck/web_sidebar) get
+ * the base set and expand on demand via discover_tools. `full: true` forces the
+ * complete registry regardless of channel.
  */
 export function schemasForChannel(channel, { full = false } = {}) {
-  if (full) return TOOL_SCHEMAS;
-  // Full registry for deliberate, local surfaces running on a user-picked model
-  // (not subject to the cheap-tier TPM caps that motivate the "core" subset):
-  //   routine — scheduled/autonomous · api — generic HTTP / `apx exec`
-  //   web     — the big web chat (long-form workspace)
-  //   code    — the web Code module (needs read/write/edit/run_shell/grep/glob)
-  //   terminal — the `apx code`/`apx sys` TUI: same coding surface as web Code,
-  //              so it needs the full read/write/edit/run_shell registry too.
-  if (
-    channel === "routine" ||
-    channel === "api" ||
-    channel === "web" ||
-    channel === "code" ||
-    channel === "terminal"
-  )
-    return TOOL_SCHEMAS;
-  // Lightweight surfaces stay on the small subset to fit cheap cloud TPM limits
-  // and keep replies snappy: telegram, web_sidebar, deck, desktop.
-  return CORE_TOOL_SCHEMAS;
+  if (full || FULL_CHANNELS.has(channel)) return TOOL_SCHEMAS;
+  return BASE_TOOL_SCHEMAS;
+}
+/**
+ * Per-turn tool session: tracks which tools are live, exposes the catalog of
+ * not-yet-loaded tools, and activates more on demand. The agent loop reads
+ * `pending` after each iteration and merges the new schemas into the live set,
+ * so activated tools become callable on the model's next step.
+ *
+ * `allowedTools` mirrors the role gate: "*" = unrestricted, [] = nothing, an
+ * array = allowlist. Both the initial set AND any activation respect it, so a
+ * limited sender can't discover its way past the gate.
+ */
+export function createToolSession(channel, { full = false, allowedTools = "*" } = {}) {
+  const allowAll = allowedTools === "*";
+  const allow = allowAll || !Array.isArray(allowedTools) ? null : new Set(allowedTools);
+  const permits = (name) => allowAll || (allow ? allow.has(name) : false);
+  // If the role gate is "[]" (no tools), start empty and stay empty.
+  const gateEmpty = Array.isArray(allowedTools) && allowedTools.length === 0;
+  const initial = (gateEmpty ? [] : schemasForChannel(channel, { full }))
+    .filter((s) => permits(schemaName(s)));
+  const activeNames = new Set(initial.map(schemaName));
+  const session = {
+    channel,
+    initialSchemas: initial,
+    pending: [],
+    activeNames,
+    // Tools that exist but aren't loaded yet (and are permitted by the gate).
+    notLoaded() {
+      return TOOL_META.filter((m) => !activeNames.has(m.name) && permits(m.name));
+    },
+    // Catalog response for discover_tools() with no args: grouped by category.
+    catalogResponse() {
+      const pool = session.notLoaded();
+      const byCategory = {};
+      for (const m of pool) {
+        (byCategory[m.category] ||= []).push({ name: m.name, description: m.description });
+      }
+      return {
+        ok: true,
+        loaded_count: activeNames.size,
+        available_count: pool.length,
+        categories: byCategory,
+        hint:
+          "Activá lo que necesites con discover_tools({ category: \"<cat>\" }) o " +
+          "discover_tools({ names: [\"tool_a\", \"tool_b\"] }). Quedan disponibles desde tu próximo paso.",
+      };
+    },
+    // Activate by exact names and/or whole category. Pushes new schemas to
+    // `pending` for the agent loop to merge.
+    activate({ names, category } = {}) {
+      const targets = new Set();
+      if (Array.isArray(names)) for (const n of names) targets.add(n);
+      if (typeof category === "string" && category.trim()) {
+        const cat = category.trim();
+        for (const m of TOOL_META) if (m.category === cat) targets.add(m.name);
+      }
+      const activated = [];
+      const alreadyLoaded = [];
+      const unknown = [];
+      const denied = [];
+      for (const name of targets) {
+        const meta = META_BY_NAME.get(name);
+        if (!meta) { unknown.push(name); continue; }
+        if (!permits(name)) { denied.push(name); continue; }
+        if (activeNames.has(name)) { alreadyLoaded.push(name); continue; }
+        activeNames.add(name);
+        session.pending.push(meta.schema);
+        activated.push(name);
+      }
+      return {
+        ok: activated.length > 0 || (unknown.length === 0 && denied.length === 0),
+        activated,
+        already_loaded: alreadyLoaded,
+        ...(unknown.length ? { unknown } : {}),
+        ...(denied.length ? { denied } : {}),
+        note: activated.length
+          ? `Activé ${activated.length} tool(s): ${activated.join(", ")}. Ya las podés usar desde tu próximo paso.`
+          : "No se activó ninguna tool nueva.",
+      };
+    },
+  };
+  return session;
+}
+/**
+ * Compact "tools you can activate" block for the system prompt: instructions +
+ * just the NAMES (no schemas) of not-loaded tools, grouped by category. Returns
+ * "" when nothing is pending (full channels), so it's omitted from the prompt.
+ */
+export function buildLazyToolsBlock(session) {
+  if (!session) return "";
+  const pool = session.notLoaded();
+  if (pool.length === 0) return "";
+  const byCategory = {};
+  for (const m of pool) (byCategory[m.category] ||= []).push(m.name);
+  const lines = Object.keys(byCategory)
+    .sort()
+    .map((cat) => `- ${cat}: ${byCategory[cat].join(", ")}`);
+  return [
+    "# Tools adicionales (activación on-demand)",
+    "Tenés las tools base siempre cargadas. Estas otras EXISTEN pero no están",
+    "cargadas (para ahorrar tokens). Activalas cuando las necesites con",
+    "discover_tools — quedan disponibles desde tu próximo paso:",
+    '  • discover_tools()                              → catálogo completo (nombre + descripción)',
+    '  • discover_tools({ category: "browser" })       → activa toda una categoría',
+    '  • discover_tools({ names: ["browser_navigate"] })→ activa tools puntuales',
+    "Si no encontrás la tool que buscás, llamá discover_tools() sin argumentos.",
+    "",
+    `Tools no cargadas (solo nombres, ${pool.length} en total):`,
+    ...lines,
+  ].join("\n");
 }
 export function makeToolHandlers(ctx) {

package/src/host/daemon/super-agent-tools/registry-bridge.js CHANGED Viewed

@@ -18,7 +18,29 @@
 // Net result: adding a tool = adding one entry to registry.js. No file in
 // super-agent-tools/tools/, no import in index.js.
+import fs from "node:fs";
 import { TOOL_DEFINITIONS } from "../../../core/tools/registry.js";
+import { TOKEN_PATH } from "../../../core/config.js";
+// The bridge POSTs to the daemon's OWN HTTP server, which is behind the bearer
+// auth middleware (see api/shared.js). Without a token every bridged tool call
+// (web_search, browser_*, http_*, glob, grep) comes back 401 "unauthorized" —
+// which is exactly what Roby hit. We read the daemon's master token from
+// ~/.apx/daemon.token (the same file the CLI authenticates with) and cache it.
+let cachedToken = null;
+function daemonToken() {
+  if (cachedToken !== null) return cachedToken;
+  cachedToken =
+    process.env.APX_TOKEN ||
+    (() => {
+      try {
+        return fs.readFileSync(TOKEN_PATH, "utf8").trim();
+      } catch {
+        return "";
+      }
+    })();
+  return cachedToken;
+}
 // Native handlers in super-agent-tools/tools/ that own these names. The bridge
 // MUST skip them or the registry version (HTTP roundtrip) would shadow the
@@ -56,9 +78,13 @@ function buildHandler(entry) {
     const method = String(entry.endpoint?.method || "POST").toUpperCase();
     let url = `http://127.0.0.1:${port}${entry.endpoint?.path || ""}`;
+    const token = daemonToken();
     const opts = {
       method,
-      headers: { "content-type": "application/json" },
+      headers: {
+        "content-type": "application/json",
+        ...(token ? { authorization: `Bearer ${token}` } : {}),
+      },
     };
     if (method === "GET" || method === "HEAD") {
@@ -114,6 +140,9 @@ export function buildBridgedTools(opts = {}) {
     .filter(e => e.endpoint?.path)
     .map(entry => ({
       name: entry.name,
+      // Carried through so the lazy-tools catalog can group on-demand tools by
+      // their registry category (browser/fetch/search/file) for discover_tools.
+      category: entry.category,
       schema: buildSchema(entry),
       makeHandler: buildHandler(entry),
     }));

package/src/host/daemon/super-agent-tools/tools/discover-tools.js ADDED Viewed

@@ -0,0 +1,67 @@
+// discover_tools — lazy tool discovery + activation.
+//
+// Roby (and any super-agent surface) only carries a small "base" set of tool
+// schemas on lightweight channels (Telegram/desktop/deck) to stay under
+// cheap-tier TPM caps. The rest (browser/Puppeteer, fetch, web_search, runtime,
+// voice, …) exist but are NOT sent to the model by default. This tool is how
+// the model reveals and activates them on demand:
+//
+//   discover_tools()                              → catalog of NOT-loaded tools
+//   discover_tools({ category: "browser" })       → activate a whole category
+//   discover_tools({ names: ["browser_navigate"] })→ activate specific tools
+//
+// Activation pushes the requested schemas into the per-turn tool session; the
+// agent loop (run-agent.js) merges them into the live schema set so the NEXT
+// model call can actually invoke them. Handlers for every tool already exist —
+// gating is purely about which schemas the model sees.
+export default {
+  name: "discover_tools",
+  schema: {
+    type: "function",
+    function: {
+      name: "discover_tools",
+      description:
+        "Discover and activate additional tools that are not loaded by default. " +
+        "Call with NO arguments to get the catalog of available-but-not-loaded tools " +
+        "(name + 1-line description, grouped by category). Call with `category` (e.g. " +
+        "\"browser\", \"fetch\") or `names` (exact tool names) to ACTIVATE those tools — " +
+        "they become callable starting on your next step. Use this whenever the tool you " +
+        "need (browser automation, HTTP fetch, web search, runtime delegation, voice, …) " +
+        "isn't in your current tool list.",
+      parameters: {
+        type: "object",
+        properties: {
+          category: {
+            type: "string",
+            description:
+              "Activate every not-loaded tool in this category (e.g. \"browser\", \"fetch\", \"search\").",
+          },
+          names: {
+            type: "array",
+            items: { type: "string" },
+            description:
+              "Exact tool names to activate, e.g. [\"browser_navigate\", \"browser_screenshot\"].",
+          },
+        },
+      },
+    },
+  },
+  makeHandler: (ctx) => ({ category, names } = {}) => {
+    const session = ctx?.toolSession;
+    // No lazy session (full channels, or direct handler use in tests): every
+    // tool is already exposed, so there's nothing to discover or activate.
+    if (!session) {
+      return {
+        ok: true,
+        loaded_all: true,
+        note: "En este canal todas las tools ya están cargadas; no hace falta discover_tools.",
+      };
+    }
+    const wantsActivate =
+      (Array.isArray(names) && names.length > 0) ||
+      (typeof category === "string" && category.trim() !== "");
+    if (!wantsActivate) return session.catalogResponse();
+    return session.activate({ names, category });
+  },
+};

package/src/host/daemon/super-agent.js CHANGED Viewed

@@ -1,5 +1,5 @@
 // Super-agent: daemon-level action agent for Telegram, TUI, desktop, routines.
-import { schemasForChannel, makeToolHandlers } from "./super-agent-tools/index.js";
+import { createToolSession, buildLazyToolsBlock, makeToolHandlers } from "./super-agent-tools/index.js";
 import { listSkills } from "./skills-loader.js";
 import {
   runAgent,
@@ -79,6 +79,15 @@ export async function runSuperAgent({
     }
   }
+  // Per-turn tool session. Lightweight channels (telegram/desktop/deck) start
+  // on the small "base" set and expand on demand via discover_tools; full
+  // channels (routine/api/web/code/terminal) get the whole registry up front.
+  // The session also enforces role gating ("*" = unrestricted, [] = none,
+  // array = allowlist) on BOTH the initial set and any later activation, so a
+  // limited sender can't discover its way past the gate.
+  // noTools callers (summarize/ask) get no session — text only.
+  const toolSession = noTools ? null : createToolSession(channel, { allowedTools });
   const system = buildSuperAgentSystem({
     globalConfig,
     projects,
@@ -90,23 +99,12 @@ export async function runSuperAgent({
     systemSuffix,
     memoryBlock,
     activeThreadsBlock,
+    // Compact "tools you can activate" block (names only, no schemas). Empty on
+    // full channels and tool-free callers, where it's omitted from the prompt.
+    lazyToolsBlock: buildLazyToolsBlock(toolSession),
   });
-  // Pick the schema subset for this channel: chit-chat surfaces get a small
-  // "core" set (~700 tokens) to fit cheap-tier TPM caps; routines get the
-  // full registry. The model can still call load_skill / read more on demand.
-  // noTools callers (summarize/ask) get an empty set — text only.
-  let toolSchemas = noTools ? [] : schemasForChannel(channel);
-  // Role gating: restrict the visible tools for limited senders (e.g. guests
-  // on Telegram). "*" = unrestricted; [] = no tools; array = allowlist.
-  if (allowedTools !== "*" && Array.isArray(allowedTools)) {
-    if (allowedTools.length === 0) {
-      toolSchemas = [];
-    } else {
-      const allow = new Set(allowedTools);
-      toolSchemas = toolSchemas.filter((t) => allow.has(t?.function?.name || t?.name));
-    }
-  }
+  const toolSchemas = noTools ? [] : toolSession.initialSchemas;
   return runAgent({
     globalConfig,
@@ -116,7 +114,7 @@ export async function runSuperAgent({
     overrideModel,
     toolSchemas,
     makeToolHandlers,
-    toolHandlerCtx: { projects, plugins, registries, globalConfig, channel },
+    toolHandlerCtx: { projects, plugins, registries, globalConfig, channel, toolSession },
     onEvent,
     signal,
     onToken,