@agentprojectcontext/apx 1.14.0 → 1.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -1
- package/skills/apc-context/SKILL.md +68 -18
- package/skills/apx/SKILL.md +89 -33
- package/src/cli/commands/sys.js +249 -21
- package/src/cli/commands/telegram.js +8 -2
- package/src/cli/http.js +24 -7
- package/src/cli/index.js +10 -3
- package/src/cli/postinstall.js +54 -4
- package/src/cli/terminal-chat/renderer.js +60 -3
- package/src/core/logging.js +37 -0
- package/src/core/scaffold.js +70 -56
- package/src/daemon/api.js +29 -2
- package/src/daemon/engines/anthropic.js +2 -1
- package/src/daemon/engines/gemini.js +2 -1
- package/src/daemon/engines/index.js +3 -3
- package/src/daemon/engines/ollama.js +2 -1
- package/src/daemon/engines/openai.js +2 -1
- package/src/daemon/plugins/telegram.js +85 -1
- package/src/daemon/skills-loader.js +31 -66
- package/src/daemon/smoke.js +9 -1
- package/src/daemon/super-agent-tools/index.js +2 -0
- package/src/daemon/super-agent-tools/tools/ask-questions.js +28 -0
- package/src/daemon/super-agent-tools/tools/send-telegram.js +85 -15
- package/src/daemon/super-agent.js +99 -10
- package/src/daemon/tools/browser.js +19 -1
- package/src/daemon/tools/registry.js +9 -7
- package/src/core/apc-context-skill.md +0 -105
- package/src/core/apx-skill.md +0 -135
|
@@ -1,26 +1,28 @@
|
|
|
1
1
|
// daemon/skills-loader.js
|
|
2
2
|
// Discover and load APX skills on-demand for the super-agent.
|
|
3
3
|
//
|
|
4
|
-
// The super-agent reads skills from immutable INTERNAL sources under
|
|
5
|
-
//
|
|
6
|
-
// guarantees apx/apc/runtime knowledge is always available regardless
|
|
7
|
-
// what the user does to ~/.apx/skills
|
|
8
|
-
// <package>/skills/ are a separate concern (scaffold.js handles them) and
|
|
9
|
-
// the loader does NOT read from there.
|
|
4
|
+
// The super-agent reads skills from immutable INTERNAL sources under the
|
|
5
|
+
// package root — they ship with apx and can never be deleted by the user.
|
|
6
|
+
// This guarantees apx/apc/runtime knowledge is always available regardless
|
|
7
|
+
// of what the user does to ~/.apx/skills/ or per-project overrides.
|
|
10
8
|
//
|
|
11
9
|
// Discovery order (priority high → low):
|
|
12
|
-
// 1. <projectPath>/.apc/skills/<slug>.md
|
|
13
|
-
// 1b.<projectPath>/.apc/skills/<slug>/SKILL.md
|
|
14
|
-
// 2. ~/.apx/skills/<slug>/SKILL.md
|
|
15
|
-
// 3. <packageRoot>/
|
|
16
|
-
//
|
|
17
|
-
//
|
|
18
|
-
//
|
|
19
|
-
//
|
|
10
|
+
// 1. <projectPath>/.apc/skills/<slug>.md ← project-scoped (flat)
|
|
11
|
+
// 1b.<projectPath>/.apc/skills/<slug>/SKILL.md ← project-scoped (dir)
|
|
12
|
+
// 2. ~/.apx/skills/<slug>/SKILL.md ← user-installed global
|
|
13
|
+
// 3. <packageRoot>/skills/<slug>/SKILL.md ← bundled core skills
|
|
14
|
+
// (apx, apc-context)
|
|
15
|
+
// 4. <packageRoot>/src/core/runtime-skills/<slug>.md
|
|
16
|
+
// (claude-code, codex-cli,
|
|
17
|
+
// opencode-cli, openrouter)
|
|
20
18
|
//
|
|
21
|
-
// A slug found in a higher-priority location SHADOWS lower ones
|
|
22
|
-
//
|
|
23
|
-
//
|
|
19
|
+
// A slug found in a higher-priority location SHADOWS lower ones. A user can
|
|
20
|
+
// override the bundled apc-context by dropping `~/.apx/skills/apc-context/SKILL.md`,
|
|
21
|
+
// but the bundled copy stays in the package as a safety net.
|
|
22
|
+
//
|
|
23
|
+
// Note: the bundled `apc-context` skill is REFRESHED from the canonical apc
|
|
24
|
+
// repo on every npm install / update (see src/cli/postinstall.js). APC is a
|
|
25
|
+
// living standard, so its skill content is not pinned to an apx version.
|
|
24
26
|
|
|
25
27
|
import fs from "node:fs";
|
|
26
28
|
import path from "node:path";
|
|
@@ -32,38 +34,8 @@ const __dirname = path.dirname(__filename);
|
|
|
32
34
|
const PACKAGE_ROOT = path.resolve(__dirname, "..", "..");
|
|
33
35
|
|
|
34
36
|
const RUNTIME_SKILLS_DIR = path.join(PACKAGE_ROOT, "src", "core", "runtime-skills");
|
|
37
|
+
const BUNDLED_SKILLS_DIR = path.join(PACKAGE_ROOT, "skills");
|
|
35
38
|
const GLOBAL_DIR = path.join(os.homedir(), ".apx", "skills");
|
|
36
|
-
const CORE_DIR = path.join(PACKAGE_ROOT, "src", "core");
|
|
37
|
-
|
|
38
|
-
// Intrinsic built-in skills whose source files (src/core/*-skill.md) do NOT
|
|
39
|
-
// carry frontmatter — the scaffold.js wrapper adds frontmatter when copying
|
|
40
|
-
// these out to external IDE skill dirs. For the super-agent's catalog we
|
|
41
|
-
// supply slug + description inline. Keep in sync with scaffold.js.
|
|
42
|
-
const INTRINSIC = [
|
|
43
|
-
{
|
|
44
|
-
slug: "apx",
|
|
45
|
-
file: path.join(CORE_DIR, "apx-skill.md"),
|
|
46
|
-
description:
|
|
47
|
-
"APX CLI skill. Activate when: user asks to run or coordinate agents, " +
|
|
48
|
-
"use MCP tools from .apc/mcps.json, install agents from a team workspace, " +
|
|
49
|
-
"or explicitly mentions apx commands. Do NOT activate just because .apc/ exists — " +
|
|
50
|
-
"that is handled by the apc-context skill. Activate on: 'apx run', 'apx exec', " +
|
|
51
|
-
"'run an agent', 'coordinate agents', 'MCP not working', 'install agent', " +
|
|
52
|
-
"'team agents', 'apx memory', 'daemon'.",
|
|
53
|
-
},
|
|
54
|
-
{
|
|
55
|
-
slug: "apc-context",
|
|
56
|
-
file: path.join(CORE_DIR, "apc-context-skill.md"),
|
|
57
|
-
description:
|
|
58
|
-
"ALWAYS activate when the project has a .apc/ directory or AGENTS.md file. " +
|
|
59
|
-
"Do not wait to be asked. Read .apc/ before making any assumption about agents, " +
|
|
60
|
-
"memory, or project structure. Activate on: .apc/, AGENTS.md, 'which agents', " +
|
|
61
|
-
"'list agents', 'agent context', 'who are the agents', any question about agents " +
|
|
62
|
-
"or memory in this project. IMPORTANT: if .apc/migrate.md exists, open the " +
|
|
63
|
-
"conversation with a migration offer before answering anything else. If the user " +
|
|
64
|
-
"declines, delete .apc/migrate.md immediately so it is not shown again.",
|
|
65
|
-
},
|
|
66
|
-
];
|
|
67
39
|
|
|
68
40
|
// ---------------------------------------------------------------------------
|
|
69
41
|
// Frontmatter parsing (minimal — handles the YAML we ship)
|
|
@@ -153,15 +125,11 @@ export function listSkills({ projectPath } = {}) {
|
|
|
153
125
|
// priority 2: user-installed global
|
|
154
126
|
found.push(...scanDirStyle(GLOBAL_DIR, "global"));
|
|
155
127
|
|
|
156
|
-
// priority 3:
|
|
157
|
-
found.push(...
|
|
128
|
+
// priority 3: bundled core skills (apx, apc-context)
|
|
129
|
+
found.push(...scanDirStyle(BUNDLED_SKILLS_DIR, "builtin"));
|
|
158
130
|
|
|
159
|
-
// priority 4:
|
|
160
|
-
|
|
161
|
-
if (fs.existsSync(it.file)) {
|
|
162
|
-
found.push({ slug: it.slug, source: "builtin", file: it.file, _description: it.description });
|
|
163
|
-
}
|
|
164
|
-
}
|
|
131
|
+
// priority 4: runtime docs (claude-code, codex-cli, opencode-cli, openrouter)
|
|
132
|
+
found.push(...scanFlatStyle(RUNTIME_SKILLS_DIR, "builtin"));
|
|
165
133
|
|
|
166
134
|
// dedupe by slug (first-wins = higher priority shadows lower)
|
|
167
135
|
const seen = new Set();
|
|
@@ -170,15 +138,12 @@ export function listSkills({ projectPath } = {}) {
|
|
|
170
138
|
if (seen.has(entry.slug)) continue;
|
|
171
139
|
seen.add(entry.slug);
|
|
172
140
|
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
description = fm.description || "";
|
|
180
|
-
} catch { /* unreadable — skip description */ }
|
|
181
|
-
}
|
|
141
|
+
let description = "";
|
|
142
|
+
try {
|
|
143
|
+
const raw = fs.readFileSync(entry.file, "utf8");
|
|
144
|
+
const { fm } = parseFrontmatter(raw);
|
|
145
|
+
description = fm.description || "";
|
|
146
|
+
} catch { /* unreadable — skip description */ }
|
|
182
147
|
|
|
183
148
|
result.push({
|
|
184
149
|
slug: entry.slug,
|
|
@@ -223,6 +188,6 @@ export function loadSkill(slug, { projectPath } = {}) {
|
|
|
223
188
|
// Useful for diagnostics
|
|
224
189
|
export const SKILL_LOCATIONS = {
|
|
225
190
|
runtime_skills: RUNTIME_SKILLS_DIR,
|
|
226
|
-
|
|
191
|
+
bundled: BUNDLED_SKILLS_DIR,
|
|
227
192
|
global: GLOBAL_DIR,
|
|
228
193
|
};
|
package/src/daemon/smoke.js
CHANGED
|
@@ -14,7 +14,14 @@ import { readAgents } from "../core/parser.js";
|
|
|
14
14
|
const __filename = fileURLToPath(import.meta.url);
|
|
15
15
|
const __dirname = path.dirname(__filename);
|
|
16
16
|
|
|
17
|
-
const
|
|
17
|
+
const EXAMPLE_CANDIDATES = [
|
|
18
|
+
path.resolve(__dirname, "..", "..", "examples", "my-first-project"),
|
|
19
|
+
path.resolve(__dirname, "..", "..", "..", "apc", "examples", "my-first-project"),
|
|
20
|
+
];
|
|
21
|
+
const EXAMPLE = EXAMPLE_CANDIDATES.find((p) =>
|
|
22
|
+
fs.existsSync(path.join(p, "AGENTS.md")) &&
|
|
23
|
+
fs.existsSync(path.join(p, ".apc", "project.json"))
|
|
24
|
+
);
|
|
18
25
|
|
|
19
26
|
function assert(cond, msg) {
|
|
20
27
|
if (!cond) {
|
|
@@ -24,6 +31,7 @@ function assert(cond, msg) {
|
|
|
24
31
|
}
|
|
25
32
|
|
|
26
33
|
const projects = new ProjectManager();
|
|
34
|
+
assert(EXAMPLE, `example project missing; checked ${EXAMPLE_CANDIDATES.join(", ")}`);
|
|
27
35
|
const entry = projects.register(EXAMPLE);
|
|
28
36
|
console.log("registered project", entry.id, entry.path);
|
|
29
37
|
|
|
@@ -22,6 +22,7 @@ import searchFiles from "./tools/search-files.js";
|
|
|
22
22
|
import listSkills from "./tools/list-skills.js";
|
|
23
23
|
import loadSkill from "./tools/load-skill.js";
|
|
24
24
|
import transcribeAudio from "./tools/transcribe-audio.js";
|
|
25
|
+
import askQuestions from "./tools/ask-questions.js";
|
|
25
26
|
import { createPermissionGuard } from "./helpers.js";
|
|
26
27
|
import { buildBridgedTools, DEFAULT_CATEGORIES } from "./registry-bridge.js";
|
|
27
28
|
|
|
@@ -50,6 +51,7 @@ const NATIVE_TOOLS = [
|
|
|
50
51
|
listSkills,
|
|
51
52
|
loadSkill,
|
|
52
53
|
transcribeAudio,
|
|
54
|
+
askQuestions,
|
|
53
55
|
];
|
|
54
56
|
|
|
55
57
|
// Registry-backed bridges. Categories can be overridden per-process via env
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
export default {
|
|
2
|
+
name: "ask_questions",
|
|
3
|
+
schema: {
|
|
4
|
+
function: {
|
|
5
|
+
name: "ask_questions",
|
|
6
|
+
description: "Ask the user one or more specific questions to clarify the task or gather requirements.",
|
|
7
|
+
parameters: {
|
|
8
|
+
type: "object",
|
|
9
|
+
properties: {
|
|
10
|
+
questions: {
|
|
11
|
+
type: "array",
|
|
12
|
+
items: { type: "string" },
|
|
13
|
+
description: "A list of questions for the user."
|
|
14
|
+
}
|
|
15
|
+
},
|
|
16
|
+
required: ["questions"]
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
},
|
|
20
|
+
makeHandler: () => async ({ questions }) => {
|
|
21
|
+
// This tool is used by the agent to explicitly signal that it is waiting for
|
|
22
|
+
// answers to specific questions. The UI can then highlight these.
|
|
23
|
+
return {
|
|
24
|
+
status: "Questions presented to user. Waiting for input.",
|
|
25
|
+
count: questions.length
|
|
26
|
+
};
|
|
27
|
+
}
|
|
28
|
+
};
|
|
@@ -1,12 +1,41 @@
|
|
|
1
1
|
import { confirmedProperty } from "../helpers.js";
|
|
2
2
|
|
|
3
|
+
function decodeBase64(b64) {
|
|
4
|
+
const clean = String(b64).replace(/^data:[a-z/-]+;base64,/, "");
|
|
5
|
+
return Buffer.from(clean, "base64");
|
|
6
|
+
}
|
|
7
|
+
|
|
3
8
|
function decodePhoto({ photo_base64, photo_path, photo_url }) {
|
|
4
|
-
if (photo_url)
|
|
5
|
-
if (photo_path)
|
|
6
|
-
if (photo_base64)
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
9
|
+
if (photo_url) return String(photo_url);
|
|
10
|
+
if (photo_path) return String(photo_path);
|
|
11
|
+
if (photo_base64) return decodeBase64(photo_base64);
|
|
12
|
+
return null;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
function decodeDocument({ document_base64, document_path, document_url }) {
|
|
16
|
+
if (document_url) return String(document_url);
|
|
17
|
+
if (document_path) return String(document_path);
|
|
18
|
+
if (document_base64) return decodeBase64(document_base64);
|
|
19
|
+
return null;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Detect the common LLM mistake of embedding raw base64 in the text field
|
|
24
|
+
* (often wrapped in markdown image syntax). Telegram does NOT render those —
|
|
25
|
+
* it just shows the literal characters. Fail fast with a clear hint.
|
|
26
|
+
*/
|
|
27
|
+
function detectBase64InText(text) {
|
|
28
|
+
if (!text || typeof text !== "string") return null;
|
|
29
|
+
if (/!\[[^\]]*\]\(data:image\/[a-z]+;base64,/i.test(text)) {
|
|
30
|
+
return "markdown image with data URI";
|
|
31
|
+
}
|
|
32
|
+
if (/data:image\/[a-z]+;base64,/i.test(text)) {
|
|
33
|
+
return "data URI";
|
|
34
|
+
}
|
|
35
|
+
// Long runs of base64-looking chars (>500 contiguous) — almost certainly a
|
|
36
|
+
// dumped image
|
|
37
|
+
if (/[A-Za-z0-9+/=]{500,}/.test(text)) {
|
|
38
|
+
return "raw base64 blob (>500 chars)";
|
|
10
39
|
}
|
|
11
40
|
return null;
|
|
12
41
|
}
|
|
@@ -18,28 +47,61 @@ export default {
|
|
|
18
47
|
function: {
|
|
19
48
|
name: "send_telegram",
|
|
20
49
|
description:
|
|
21
|
-
"Send a Telegram message via the daemon's Telegram plugin.
|
|
50
|
+
"Send a Telegram message via the daemon's Telegram plugin. STRICT rule: to attach an image use the photo_* params; to attach a file use the document_* params — NEVER paste base64 or a data URI inside `text` (Telegram does not render markdown images / data URIs, the recipient sees the literal base64). After browser_screenshot, pass its `base64` field directly to photo_base64 here (not in text). The text field becomes the caption when media is attached.",
|
|
22
51
|
parameters: {
|
|
23
52
|
type: "object",
|
|
24
53
|
properties: {
|
|
25
|
-
channel:
|
|
26
|
-
chat_id:
|
|
27
|
-
text:
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
54
|
+
channel: { type: "string", description: "telegram channel name; omit for default" },
|
|
55
|
+
chat_id: { type: "string", description: "destination chat id; omit to use channel default" },
|
|
56
|
+
text: {
|
|
57
|
+
type: "string",
|
|
58
|
+
description:
|
|
59
|
+
"Plain-text body (becomes the caption when a photo_* or document_* is attached). MUST NOT contain base64, data URIs, or markdown image syntax like  — use photo_base64 for that.",
|
|
60
|
+
},
|
|
61
|
+
// --- image attachments ---
|
|
62
|
+
photo_base64: {
|
|
63
|
+
type: "string",
|
|
64
|
+
description:
|
|
65
|
+
"raw base64 PNG/JPG (or 'data:image/...;base64,...'). Pass the `base64` field from browser_screenshot directly here.",
|
|
66
|
+
},
|
|
67
|
+
photo_path: { type: "string", description: "absolute filesystem path to an image file" },
|
|
68
|
+
photo_url: { type: "string", description: "public https URL of an image" },
|
|
69
|
+
// --- document attachments (PDF, txt, zip, etc) ---
|
|
70
|
+
document_base64: { type: "string", description: "raw base64 of a file" },
|
|
71
|
+
document_path: { type: "string", description: "absolute filesystem path to any file (PDF, txt, zip, .csv...)" },
|
|
72
|
+
document_url: { type: "string", description: "public https URL of a file" },
|
|
73
|
+
filename: { type: "string", description: "filename to show in Telegram when sending a document (Buffer-style input)" },
|
|
74
|
+
mime_type: { type: "string", description: "optional MIME type for the document" },
|
|
75
|
+
confirmed: confirmedProperty("true only after explicit user confirmation for this exact outbound message"),
|
|
32
76
|
},
|
|
33
77
|
required: ["text"],
|
|
34
78
|
},
|
|
35
79
|
},
|
|
36
80
|
},
|
|
37
|
-
makeHandler: ({ plugins, requirePermission }) => async (
|
|
81
|
+
makeHandler: ({ plugins, requirePermission }) => async (args = {}) => {
|
|
82
|
+
const {
|
|
83
|
+
channel, chat_id, text,
|
|
84
|
+
photo_base64, photo_path, photo_url,
|
|
85
|
+
document_base64, document_path, document_url,
|
|
86
|
+
filename, mime_type,
|
|
87
|
+
confirmed = false,
|
|
88
|
+
} = args;
|
|
89
|
+
|
|
38
90
|
requirePermission("send_telegram", { dangerous: true, confirmed });
|
|
39
91
|
if (!plugins) throw new Error("plugins unavailable");
|
|
40
92
|
const telegram = plugins.get("telegram");
|
|
41
93
|
if (!telegram) throw new Error("telegram plugin not loaded");
|
|
42
94
|
|
|
95
|
+
// Defensive: catch the classic mistake of dumping base64 into text.
|
|
96
|
+
const bad = detectBase64InText(text);
|
|
97
|
+
if (bad) {
|
|
98
|
+
throw new Error(
|
|
99
|
+
`send_telegram: refusing to send — text appears to contain ${bad}. ` +
|
|
100
|
+
`Telegram does not render data URIs or markdown images. ` +
|
|
101
|
+
`Pass the base64 in photo_base64 (NOT text). Set text to a short caption like "Captura de localhost:8801".`
|
|
102
|
+
);
|
|
103
|
+
}
|
|
104
|
+
|
|
43
105
|
const photo = decodePhoto({ photo_base64, photo_path, photo_url });
|
|
44
106
|
if (photo) {
|
|
45
107
|
const result = await telegram.sendPhoto({
|
|
@@ -48,6 +110,14 @@ export default {
|
|
|
48
110
|
return { ok: true, kind: "photo", message_id: result.message_id };
|
|
49
111
|
}
|
|
50
112
|
|
|
113
|
+
const document = decodeDocument({ document_base64, document_path, document_url });
|
|
114
|
+
if (document) {
|
|
115
|
+
const result = await telegram.sendDocument({
|
|
116
|
+
channel, chat_id, document, caption: text, filename, mime_type, author: "apx",
|
|
117
|
+
});
|
|
118
|
+
return { ok: true, kind: "document", message_id: result.message_id, filename };
|
|
119
|
+
}
|
|
120
|
+
|
|
51
121
|
const result = await telegram.send({ channel, chat_id, text, author: "apx" });
|
|
52
122
|
return { ok: true, kind: "text", message_id: result.message_id };
|
|
53
123
|
},
|
|
@@ -65,7 +65,49 @@ HARD RULES (do not deviate):
|
|
|
65
65
|
18. **NO EMPTY RESPONSES**: Never respond with only text when you have tools available and the user is asking you to DO something. Call the tool FIRST, then explain. Never say "I'll do X" without immediately calling the tool. Empty acknowledgments ("ok", "entendido", "dame un minuto", "voy", "checking", "stand by") without a tool call are invalid responses — they will be re-prompted and waste a turn.
|
|
66
66
|
19. **CWD RULE**: When the channel context includes a "CWD: <path>" line, that is the user's current working directory. References to "este directorio", "este proyecto", "esta carpeta", "acá", "aquí", "this directory", "this project", "current dir/folder" all mean that exact CWD path. Use it as the path argument directly — DO NOT ask the user "what's the path?" when CWD is already given. Example: if user says "agregá este proyecto a la lista", call add_project({path: <CWD>}) immediately.
|
|
67
67
|
20. **NO MANUAL SCAFFOLDING**: To register or scaffold a project, ALWAYS use add_project — it auto-creates AGENTS.md and .apc/project.json when missing (one call, atomic). NEVER write AGENTS.md, .apc/project.json, or any APC scaffold file by hand via run_shell / write_file / shell pipes. The schema must come from the official initApf scaffold, not improvised. If add_project errors, report the error to the user — don't try to work around it with shell hacks. Same for any other APC-managed file (.apc/agents/*, .apc/skills/*, etc.) — use the dedicated tool, never raw filesystem writes.
|
|
68
|
-
21. **SKILLS — ON DEMAND**: The "# Available skills" section below lists every skill available to you (slug + description, NO body). When the user asks about specific APX/APC commands, project structure, agent runtimes, or anything where exact syntax or detailed behavior matches a skill description (in ANY language — match semantically, not by keyword), call load_skill({slug}) to fetch the full markdown body. If a CWD is in the contextNote, pass it as project_path so project-scoped skills resolve. If the user explicitly asks "what skills do you have?", you can either read the catalog below directly OR call list_skills to get a fresh enumeration. Do NOT load skills for trivial / unrelated questions — that wastes tokens. Don't guess CLI syntax when a skill can tell you; load it
|
|
68
|
+
21. **SKILLS — ON DEMAND**: The "# Available skills" section below lists every skill available to you (slug + description, NO body). When the user asks about specific APX/APC commands, project structure, agent runtimes, or anything where exact syntax or detailed behavior matches a skill description (in ANY language — match semantically, not by keyword), call load_skill({slug}) to fetch the full markdown body. If a CWD is in the contextNote, pass it as project_path so project-scoped skills resolve. If the user explicitly asks "what skills do you have?", you can either read the catalog below directly OR call list_skills to get a fresh enumeration. Do NOT load skills for trivial / unrelated questions — that wastes tokens. Don't guess CLI syntax when a skill can tell you; load it.
|
|
69
|
+
22. **NEVER PASTE BASE64 OR DATA URIs IN MESSAGE TEXT**: When you need to send an image, audio, or file via Telegram (or any channel), you MUST pass it via the dedicated parameter — NEVER embed it in the text field. Concretely: after browser_screenshot returns its base64 field, call send_telegram({text: "<short caption>", photo_base64: "<that base64>"}). Do NOT write text like 'Aquí está: ' — Telegram (and most chat clients) do NOT render data URIs or markdown images; the user sees thousands of garbage characters. Same for files: use document_path / document_base64 / document_url, NOT the text field. The text field is exclusively for human-readable prose (and becomes the caption when media is attached). If unsure, save the image to /tmp/screenshot-<ts>.png first (browser_screenshot supports save_to_tmp=true and returns a path field) and pass that path to send_telegram via photo_path — never inline the bytes in text.`;
|
|
70
|
+
|
|
71
|
+
function compactToolSchema(schema) {
|
|
72
|
+
const fn = schema?.function || {};
|
|
73
|
+
const params = fn.parameters || {};
|
|
74
|
+
const properties = params.properties || {};
|
|
75
|
+
return {
|
|
76
|
+
name: fn.name,
|
|
77
|
+
description: fn.description,
|
|
78
|
+
required: params.required || [],
|
|
79
|
+
properties: Object.fromEntries(
|
|
80
|
+
Object.entries(properties).map(([name, spec]) => [
|
|
81
|
+
name,
|
|
82
|
+
{
|
|
83
|
+
type: spec?.type || "string",
|
|
84
|
+
enum: spec?.enum,
|
|
85
|
+
description: spec?.description,
|
|
86
|
+
},
|
|
87
|
+
])
|
|
88
|
+
),
|
|
89
|
+
};
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
function pseudoToolSystem(system) {
|
|
93
|
+
const catalog = TOOL_SCHEMAS.map(compactToolSchema);
|
|
94
|
+
return [
|
|
95
|
+
system,
|
|
96
|
+
"# Structured tool fallback",
|
|
97
|
+
"The engine rejected native structured tools. You can still call tools by emitting plain JSON.",
|
|
98
|
+
"When you need a tool, respond ONLY with one JSON object per line:",
|
|
99
|
+
"{\"name\":\"tool_name\",\"arguments\":{\"arg\":\"value\"}}",
|
|
100
|
+
"After tool results arrive, continue the task or give the final answer normally.",
|
|
101
|
+
"Available tools:",
|
|
102
|
+
JSON.stringify(catalog),
|
|
103
|
+
].join("\n\n");
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
function shouldRetryWithPseudoTools(modelId, error, alreadyPseudo) {
|
|
107
|
+
if (alreadyPseudo) return false;
|
|
108
|
+
const message = String(error?.message || "");
|
|
109
|
+
return /^ollama:/i.test(String(modelId || "")) && /ollama\s+500/i.test(message);
|
|
110
|
+
}
|
|
69
111
|
|
|
70
112
|
function isShortConfirmation(text) {
|
|
71
113
|
return /^(yes|y|si|si dale|dale|ok|okay|confirm|confirmed|go|proceed|do it)\b/i
|
|
@@ -114,6 +156,7 @@ export async function runSuperAgent({
|
|
|
114
156
|
previousMessages = [],
|
|
115
157
|
overrideModel = null,
|
|
116
158
|
onEvent = null,
|
|
159
|
+
signal,
|
|
117
160
|
}) {
|
|
118
161
|
if (!isSuperAgentEnabled(globalConfig)) {
|
|
119
162
|
throw new Error("super-agent not enabled (set super_agent.enabled and .model in ~/.apx/config.json)");
|
|
@@ -186,6 +229,7 @@ export async function runSuperAgent({
|
|
|
186
229
|
const trace = [];
|
|
187
230
|
let totalUsage = { input_tokens: 0, output_tokens: 0 };
|
|
188
231
|
let lastText = "";
|
|
232
|
+
let usePseudoTools = false;
|
|
189
233
|
|
|
190
234
|
for (let iter = 0; iter < MAX_TOOL_ITERS; iter++) {
|
|
191
235
|
await emitProgress(onEvent, { type: "model_start", iteration: iter + 1 });
|
|
@@ -194,15 +238,38 @@ export async function runSuperAgent({
|
|
|
194
238
|
// acting on an action request. On later iterations (after tool results
|
|
195
239
|
// have been fed back) tool_choice is "auto" so the model can produce its
|
|
196
240
|
// final text summary.
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
241
|
+
let result;
|
|
242
|
+
try {
|
|
243
|
+
result = await callEngine({
|
|
244
|
+
modelId: activeModel,
|
|
245
|
+
system: usePseudoTools ? pseudoToolSystem(system) : system,
|
|
246
|
+
messages: conversation,
|
|
247
|
+
config: globalConfig,
|
|
248
|
+
tools: usePseudoTools ? null : TOOL_SCHEMAS,
|
|
249
|
+
toolChoice: usePseudoTools ? null : (iter === 0 ? "required" : "auto"),
|
|
250
|
+
maxTokens: 1024,
|
|
251
|
+
signal,
|
|
252
|
+
});
|
|
253
|
+
} catch (e) {
|
|
254
|
+
if (usePseudoTools && /^ollama:/i.test(String(activeModel || "")) && /ollama\s+500/i.test(String(e?.message || "")) && trace.length > 0) {
|
|
255
|
+
await emitProgress(onEvent, { type: "model_retry", reason: "ollama_final_response_500", iteration: iter + 1 });
|
|
256
|
+
lastText = fallbackFinalText(trace, e);
|
|
257
|
+
break;
|
|
258
|
+
}
|
|
259
|
+
if (!shouldRetryWithPseudoTools(activeModel, e, usePseudoTools)) throw e;
|
|
260
|
+
usePseudoTools = true;
|
|
261
|
+
await emitProgress(onEvent, { type: "model_retry", reason: "ollama_structured_tools_500", iteration: iter + 1 });
|
|
262
|
+
result = await callEngine({
|
|
263
|
+
modelId: activeModel,
|
|
264
|
+
system: pseudoToolSystem(system),
|
|
265
|
+
messages: conversation,
|
|
266
|
+
config: globalConfig,
|
|
267
|
+
tools: null,
|
|
268
|
+
toolChoice: null,
|
|
269
|
+
maxTokens: 1024,
|
|
270
|
+
signal,
|
|
271
|
+
});
|
|
272
|
+
}
|
|
206
273
|
totalUsage.input_tokens += result.usage?.input_tokens || 0;
|
|
207
274
|
totalUsage.output_tokens += result.usage?.output_tokens || 0;
|
|
208
275
|
lastText = result.text || "";
|
|
@@ -316,3 +383,25 @@ function summarizeForTrace(r) {
|
|
|
316
383
|
if (s.length <= 400) return r;
|
|
317
384
|
return s.slice(0, 380) + "…(truncated)";
|
|
318
385
|
}
|
|
386
|
+
|
|
387
|
+
function fallbackFinalText(trace, error) {
|
|
388
|
+
const lines = [
|
|
389
|
+
"Tool execution completed, but the model failed while composing the final answer.",
|
|
390
|
+
`Engine error: ${String(error?.message || error).slice(0, 220)}`,
|
|
391
|
+
"Trace:",
|
|
392
|
+
];
|
|
393
|
+
for (const item of trace.slice(-8)) {
|
|
394
|
+
lines.push(`- ${item.tool}: ${previewTraceResult(item.result)}`);
|
|
395
|
+
}
|
|
396
|
+
return lines.join("\n");
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
function previewTraceResult(result) {
|
|
400
|
+
if (result === null || result === undefined) return "ok";
|
|
401
|
+
if (typeof result === "string") return result.slice(0, 180);
|
|
402
|
+
if (result.error) return `error: ${String(result.error).slice(0, 180)}`;
|
|
403
|
+
if (result.path) return String(result.path).slice(0, 180);
|
|
404
|
+
if (result.content) return String(result.content).slice(0, 180);
|
|
405
|
+
if (result.results) return JSON.stringify(result.results).slice(0, 180);
|
|
406
|
+
return JSON.stringify(result).slice(0, 180);
|
|
407
|
+
}
|
|
@@ -197,7 +197,7 @@ export async function browser_navigate({ url, launch_options, allow_dangerous }
|
|
|
197
197
|
};
|
|
198
198
|
}
|
|
199
199
|
|
|
200
|
-
export async function browser_screenshot({ selector, full_page = false, width, height, encoded = false } = {}) {
|
|
200
|
+
export async function browser_screenshot({ selector, full_page = false, width, height, encoded = false, save_path, save_to_tmp = false } = {}) {
|
|
201
201
|
const page = await ensureBrowser();
|
|
202
202
|
if (width || height) {
|
|
203
203
|
await page.setViewport({
|
|
@@ -218,12 +218,30 @@ export async function browser_screenshot({ selector, full_page = false, width, h
|
|
|
218
218
|
throw new Error(`Screenshot too large: ${Math.round(size / 1024)}KB (max ${Math.round(MAX_SCREENSHOT_BYTES / 1024)}KB)`);
|
|
219
219
|
}
|
|
220
220
|
|
|
221
|
+
// Optional disk write so the caller can pass `path` to e.g. send_telegram
|
|
222
|
+
// instead of shuttling base64 around.
|
|
223
|
+
let writtenPath = null;
|
|
224
|
+
if (save_path || save_to_tmp) {
|
|
225
|
+
const fs = await import("node:fs");
|
|
226
|
+
const path = await import("node:path");
|
|
227
|
+
const os = await import("node:os");
|
|
228
|
+
let target = save_path;
|
|
229
|
+
if (!target) {
|
|
230
|
+
const dir = path.join(os.tmpdir(), "apx-screenshots");
|
|
231
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
232
|
+
target = path.join(dir, `screenshot-${Date.now()}.png`);
|
|
233
|
+
}
|
|
234
|
+
fs.writeFileSync(target, Buffer.from(String(buf), "base64"));
|
|
235
|
+
writtenPath = target;
|
|
236
|
+
}
|
|
237
|
+
|
|
221
238
|
return {
|
|
222
239
|
ok: true,
|
|
223
240
|
url: page.url(),
|
|
224
241
|
format: "png",
|
|
225
242
|
bytes: size,
|
|
226
243
|
base64: buf,
|
|
244
|
+
path: writtenPath,
|
|
227
245
|
data_uri: encoded ? `data:image/png;base64,${buf}` : undefined,
|
|
228
246
|
};
|
|
229
247
|
}
|
|
@@ -366,19 +366,21 @@ const TOOL_DEFINITIONS = [
|
|
|
366
366
|
{
|
|
367
367
|
name: "browser_screenshot",
|
|
368
368
|
category: "browser",
|
|
369
|
-
description: "Take a screenshot of the current browser page (or
|
|
369
|
+
description: "Take a screenshot of the current browser page (or an element via selector). Returns { base64, path?, bytes, url }. To send via Telegram, prefer `save_to_tmp: true` and pass the returned `path` to send_telegram({photo_path}); otherwise pass `base64` straight to send_telegram({photo_base64}). NEVER include the base64 in any text field — Telegram does not render it.",
|
|
370
370
|
endpoint: { method: "POST", path: "/tools/browser/screenshot" },
|
|
371
371
|
parameters: {
|
|
372
372
|
type: "object",
|
|
373
373
|
properties: {
|
|
374
|
-
selector:
|
|
375
|
-
full_page:
|
|
376
|
-
width:
|
|
377
|
-
height:
|
|
378
|
-
encoded:
|
|
374
|
+
selector: { type: "string", description: "CSS selector of element to capture. Omit for full viewport/page." },
|
|
375
|
+
full_page: { type: "boolean", default: false },
|
|
376
|
+
width: { type: "number", description: "Viewport width (capped at 1920)." },
|
|
377
|
+
height: { type: "number", description: "Viewport height (capped at 1080)." },
|
|
378
|
+
encoded: { type: "boolean", description: "Also return a data:image/png;base64 URI in response." },
|
|
379
|
+
save_path: { type: "string", description: "Absolute path to write the PNG. Returns it in `path`." },
|
|
380
|
+
save_to_tmp: { type: "boolean", description: "Auto-write to <os.tmpdir>/apx-screenshots/screenshot-<ts>.png. Returns the path." },
|
|
379
381
|
},
|
|
380
382
|
},
|
|
381
|
-
examples: [{}, { selector: "#hero" }],
|
|
383
|
+
examples: [{}, { selector: "#hero" }, { save_to_tmp: true }],
|
|
382
384
|
},
|
|
383
385
|
{
|
|
384
386
|
name: "browser_click",
|