@agentprojectcontext/apx 1.42.0 → 1.42.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@agentprojectcontext/apx",
3
- "version": "1.42.0",
3
+ "version": "1.42.2",
4
4
  "description": "APX — unified CLI + daemon for the Agent Project Context (APC) standard.",
5
5
  "publishConfig": {
6
6
  "access": "public"
@@ -5,6 +5,16 @@
5
5
  // Coding surfaces (web Code / terminal Build) raise this via maxIters and use
6
6
  // the finish-tool completionContract instead.
7
7
  export const MAX_TOOL_ITERS = 10;
8
+ // Telegram is the "do real work for me" conversational surface (the super-agent
9
+ // Roby): it needs to chain explore→edit→verify→close autonomously, not stop
10
+ // after ~9 actions and ask "want me to continue?". A budget of 10 left only one
11
+ // usable action step before the reserved wrap-up, so multi-step tasks routinely
12
+ // cut off mid-job. We give it a real autonomy budget (mirroring the TUI Code
13
+ // surface's maxIters:40) while keeping it below the coding surfaces. The
14
+ // reserved final-step wrap-up still applies, but now only fires when a task
15
+ // genuinely exhausts this budget — a rare safety floor, not the default close.
16
+ // Overridable per-deployment via config.super_agent.telegram_max_iters.
17
+ export const TELEGRAM_TOOL_ITERS = 24;
8
18
  export const ACK_ONLY_TOOLS = new Set(["send_telegram"]);
9
19
  export const MAX_CONSECUTIVE_ACKS = 2;
10
20
  // Tools whose semantics REQUIRE handing control back to the user. After the
@@ -84,20 +84,32 @@ export const FINISH_TOOL_SCHEMA = {
84
84
  },
85
85
  };
86
86
 
87
- // Behavioral nudge appended to the system prompt for the ONE tool-free wrap-up
88
- // step at the end of a turn (see the loop's `isFinalWrapUp`). This shapes
89
- // BEHAVIOR only it never dictates wording or supplies a canned/templated
90
- // sentence. The reply the user sees is 100% model-authored and varies with
91
- // what the model actually did this turn. We do NOT mention any "tool limit":
92
- // the model just speaks from where it is. Critically it must not claim work it
93
- // didn't do (weak models otherwise fabricate "all done").
94
- const WRAPUP_NUDGE =
95
- "\n\n[Internal note last step of this turn. No more tools will run now. " +
96
- "Reply in plain prose, in the user's language, from your own context: briefly " +
97
- "say what you actually accomplished so far (check the tool results above — do " +
98
- "NOT claim anything you didn't do), and if work is still pending, name what's " +
99
- "left and ask the user whether you should continue. Do not mention limits, " +
100
- "steps, or iterations just talk naturally.]";
87
+ // In-band signal injected as a CONVERSATION turn (not a system suffix) for the
88
+ // ONE tool-free wrap-up step at the end of a turn (see the loop's
89
+ // `isFinalWrapUp`). Delivering it through the message channel the way a tool
90
+ // result arrives makes weak models reliably author a reply instead of
91
+ // returning empty, because they always answer the latest turn. It shapes
92
+ // BEHAVIOR only: it never dictates wording or supplies a canned sentence. The
93
+ // reply the user sees is 100% model-authored and varies with what the model
94
+ // actually did this turn. Critically it must not claim work it didn't do (weak
95
+ // models otherwise fabricate "all done").
96
+ //
97
+ // Unlike a hard "iteration limit" message, it asks the model to surface the
98
+ // situation NATURALLY ("this is taking more steps than I expected") plus a
99
+ // concrete recap of what it found and did NOT find so the closing reads like
100
+ // a human status update, never robotic system jargon.
101
+ const WRAPUP_SIGNAL =
102
+ "[Internal turn note — this is NOT from the user. You've taken several tool " +
103
+ "steps this turn and the task isn't finished; no more tools will run now. " +
104
+ "Write the user ONE short, natural closing message, in their language, " +
105
+ "entirely in your own words:\n" +
106
+ "- Concretely recap what you actually did and what you found so far — and be " +
107
+ "honest about what you did NOT find or couldn't resolve yet. Read the tool " +
108
+ "results above; do not claim anything you didn't do.\n" +
109
+ "- Mention plainly that this is taking more steps than expected and isn't done.\n" +
110
+ "- Ask whether they want you to keep going.\n" +
111
+ "Talk like a person giving a quick status update. Do NOT emit a tool call, " +
112
+ "JSON, or system jargon like \"iteration\" or \"limit\".]";
101
113
 
102
114
  /**
103
115
  * Shared tool-calling agent loop used by super-agent and future surfaces.
@@ -301,8 +313,8 @@ export async function runAgent({
301
313
  // Rather than cut off silently mid-tool-call, we run ONE tool-free step so
302
314
  // the model writes a natural closing in its OWN words — what it did, what's
303
315
  // left, and (if anything remains) whether to continue. We change only the
304
- // STRUCTURE (no tools this step) + a behavioral nudge; the wording is
305
- // entirely the model's. Coding surfaces keep their finish-tool flow, so
316
+ // STRUCTURE (no tools this step) + an in-band directive turn (WRAPUP_SIGNAL);
317
+ // the wording is entirely the model's. Coding surfaces keep their finish-tool flow, so
306
318
  // this never applies under completionContract.
307
319
  const isFinalWrapUp =
308
320
  !useContract && effectiveSchemas.length > 0 && iter === maxIters - 1;
@@ -322,8 +334,14 @@ export async function runAgent({
322
334
  let result;
323
335
  try {
324
336
  result = await tryCallEngine({
325
- system: isFinalWrapUp ? baseSystem + WRAPUP_NUDGE : baseSystem,
326
- messages: conversation,
337
+ system: baseSystem,
338
+ // Wrap-up: deliver the "you're out of steps, summarize + ask" directive
339
+ // as the latest CONVERSATION turn so the model treats it like any other
340
+ // turn it must answer — far more reliable than a system suffix on weak
341
+ // models. Ephemeral: built fresh here, never persisted to history.
342
+ messages: isFinalWrapUp
343
+ ? [...conversation, { role: "user", content: WRAPUP_SIGNAL }]
344
+ : conversation,
327
345
  config: globalConfig,
328
346
  // On the wrap-up step we withhold tools entirely so the model must
329
347
  // answer in prose — same as a real engine called with tools omitted.
@@ -0,0 +1,62 @@
1
+ // Low-level Telegram Bot API client — the single place the raw JSON endpoints
2
+ // are called. Higher layers (the poller's send/typing/keyboard methods, the
3
+ // confirmation adapter, the ask flow) compose these instead of hand-rolling
4
+ // fetch boilerplate, so each endpoint's quirks live in exactly one spot. These
5
+ // used to be duplicated across the poller AND the confirm adapter.
6
+ //
7
+ // Every call is token-explicit (no channel/config coupling) so it's reusable
8
+ // from any surface — poller, adapter, routines, tests. Media uploads (multipart
9
+ // FormData) stay in ./media.js; this module owns the JSON endpoints.
10
+ import { API_BASE } from "./media.js";
11
+
12
+ /**
13
+ * POST a JSON body to a Bot API method. Returns the parsed `result` on success;
14
+ * throws on transport failure or a non-ok Telegram response. Best-effort callers
15
+ * (typing, keyboard edits, callback acks) wrap this in their own try/catch.
16
+ */
17
+ async function apiCall(token, method, body) {
18
+ const res = await fetch(`${API_BASE}/bot${token}/${method}`, {
19
+ method: "POST",
20
+ headers: { "content-type": "application/json" },
21
+ body: JSON.stringify(body),
22
+ });
23
+ const json = await res.json().catch(() => ({}));
24
+ if (!json.ok) throw new Error(json.description || `${method} failed (${res.status})`);
25
+ return json.result;
26
+ }
27
+
28
+ /** sendMessage: the plain text reply (optionally with an inline keyboard). */
29
+ export function sendMessage(token, chatId, { text, reply_markup, parse_mode } = {}) {
30
+ const body = { chat_id: chatId, text };
31
+ if (reply_markup) body.reply_markup = reply_markup;
32
+ if (parse_mode) body.parse_mode = parse_mode;
33
+ return apiCall(token, "sendMessage", body);
34
+ }
35
+
36
+ /** sendChatAction: the "typing…" indicator (auto-clears after ~5s). */
37
+ export function sendChatAction(token, chatId, action = "typing") {
38
+ return apiCall(token, "sendChatAction", { chat_id: chatId, action });
39
+ }
40
+
41
+ /** editMessageReplyMarkup: swap/clear the inline keyboard on a sent message. */
42
+ export function editMessageReplyMarkup(token, chatId, messageId, reply_markup) {
43
+ const body = { chat_id: chatId, message_id: messageId };
44
+ if (reply_markup) body.reply_markup = reply_markup;
45
+ return apiCall(token, "editMessageReplyMarkup", body);
46
+ }
47
+
48
+ /** answerCallbackQuery: clear the spinner on a tapped inline button (+ toast). */
49
+ export function answerCallbackQuery(token, callbackQueryId, text) {
50
+ const body = { callback_query_id: callbackQueryId };
51
+ if (text) body.text = text;
52
+ return apiCall(token, "answerCallbackQuery", body);
53
+ }
54
+
55
+ /** getUpdates: long-poll for inbound updates from a given offset. */
56
+ export async function getUpdates(token, { offset = 0, timeout = 25 } = {}) {
57
+ const res = await fetch(`${API_BASE}/bot${token}/getUpdates?timeout=${timeout}&offset=${offset}`);
58
+ if (!res.ok) throw new Error(`getUpdates ${res.status}`);
59
+ const json = await res.json();
60
+ if (!json.ok) throw new Error(json.description || "telegram error");
61
+ return json.result || [];
62
+ }
@@ -0,0 +1,238 @@
1
+ // ask_questions flow orchestration for Telegram, extracted from the host poller
2
+ // so that file stays focused on process lifecycle. Like dispatch.js, every
3
+ // function takes the poller instance (`self`) and reaches its I/O surface
4
+ // (self._send / _editKeyboard / _answerCallback / _startTyping) and config
5
+ // through it. The flow's own state machine lives in ./ask.js; this is the glue
6
+ // that turns its decisions into Telegram messages and re-enters the reply path.
7
+ import * as askFlow from "./ask.js";
8
+ import { resolveBotToken } from "./helpers.js";
9
+ import { buildStreamHandler, runTelegramSuperAgent, telegramErrorText, sendFinalReply } from "./reply.js";
10
+ import { createTelegramConfirmAdapter } from "#core/confirmation/adapters/telegram.js";
11
+ import { getConfirmationStore as getConfirmStore } from "#core/confirmation/pending-store.js";
12
+ import { getRecentTelegramTurnsFromFs, appendGlobalMessage } from "#core/stores/messages.js";
13
+ import { CHANNELS } from "#core/constants/channels.js";
14
+ import { SUPERAGENT_ACTOR_ID } from "#core/identity/index.js";
15
+
16
+ /**
17
+ * Route an inbound callback_query. ask_questions button presses are handled
18
+ * here; everything else falls through to the confirmation adapter. Both use
19
+ * `apx:<verb>:...` namespacing but the ask flow owns its own state.
20
+ */
21
+ export async function handleCallbackQuery(self, callbackQuery) {
22
+ const data = callbackQuery.data || "";
23
+ if (data.startsWith("apx:ask:")) {
24
+ await handleAskCallback(self, callbackQuery);
25
+ return;
26
+ }
27
+ const adapter = createTelegramConfirmAdapter({
28
+ token: resolveBotToken(self.channel),
29
+ chatId: callbackQuery.message?.chat?.id,
30
+ pendingStore: getConfirmStore(),
31
+ });
32
+ const handled = await adapter.handleCallbackQuery(callbackQuery);
33
+ if (!handled) {
34
+ self.log(`telegram[${self.channel.name}] unhandled callback_query: ${callbackQuery.data}`);
35
+ }
36
+ }
37
+
38
+ /**
39
+ * Draw the current question as a fresh message with its inline keyboard, wiping
40
+ * the previous question's keyboard so the chat reads as a clean history.
41
+ */
42
+ export async function renderQuestion(self, state) {
43
+ const text = askFlow.formatQuestionText(state);
44
+ const reply_markup = askFlow.buildKeyboard(state);
45
+ if (state.messageId) {
46
+ try {
47
+ await self._editKeyboard({
48
+ chat_id: state.chatId,
49
+ message_id: state.messageId,
50
+ reply_markup: { inline_keyboard: [] },
51
+ });
52
+ } catch { /* best-effort */ }
53
+ }
54
+ const sent = await self._send({ chat_id: state.chatId, text, reply_markup, parse_mode: "Markdown" });
55
+ state.messageId = sent?.message_id || null;
56
+ askFlow.saveState(state.chatId, state);
57
+ }
58
+
59
+ /**
60
+ * Kick off a brand-new ask flow after the super-agent called ask_questions. The
61
+ * flow's `resume` callback captures the per-turn context so when the compiled
62
+ * answer arrives we run another super-agent turn without retyping the inputs.
63
+ */
64
+ export async function startAskFlow(self, ctx) {
65
+ const state = askFlow.startFlow({
66
+ chatId: ctx.chat_id,
67
+ projectId: ctx.projectId,
68
+ authorId: ctx.authorId,
69
+ questions: ctx.questions,
70
+ resume: async (compiled) => {
71
+ await runResumedTurn(self, { ...ctx, compiled });
72
+ },
73
+ });
74
+ await renderQuestion(self, state);
75
+ }
76
+
77
+ /** Apply an inline-keyboard press, then react: redraw, advance, cancel or finish. */
78
+ export async function handleAskCallback(self, callbackQuery) {
79
+ const chatId = callbackQuery.message?.chat?.id;
80
+ if (!chatId) return;
81
+ const result = askFlow.applyCallback(chatId, callbackQuery.data || "");
82
+ // Ack the press regardless — keeps the spinner from hanging client-side.
83
+ await self._answerCallback({ callback_query_id: callbackQuery.id });
84
+ if (!result) return; // stale or unknown — adapter already ack'd.
85
+
86
+ if (result.action === "redraw") {
87
+ // Multi-select toggle: refresh the keyboard on the SAME message.
88
+ try {
89
+ await self._editKeyboard({
90
+ chat_id: chatId,
91
+ message_id: callbackQuery.message?.message_id,
92
+ reply_markup: askFlow.buildKeyboard(result.state),
93
+ });
94
+ } catch (e) {
95
+ self.log(`telegram[${self.channel.name}] redraw failed: ${e.message}`);
96
+ }
97
+ return;
98
+ }
99
+ if (result.action === "advance") {
100
+ await renderQuestion(self, result.state);
101
+ return;
102
+ }
103
+ if (result.action === "cancel") {
104
+ try {
105
+ await self._editKeyboard({
106
+ chat_id: chatId,
107
+ message_id: callbackQuery.message?.message_id,
108
+ reply_markup: { inline_keyboard: [] },
109
+ });
110
+ await self._send({ chat_id: chatId, text: "Pregunta cancelada." });
111
+ } catch { /* best-effort */ }
112
+ return;
113
+ }
114
+ if (result.action === "done") {
115
+ try {
116
+ await self._editKeyboard({
117
+ chat_id: chatId,
118
+ message_id: callbackQuery.message?.message_id,
119
+ reply_markup: { inline_keyboard: [] },
120
+ });
121
+ } catch { /* best-effort */ }
122
+ // Feed the compiled answer back as a synthetic user turn.
123
+ if (typeof result.state.resume === "function") {
124
+ await result.state.resume(result.compiled);
125
+ }
126
+ }
127
+ }
128
+
129
+ /**
130
+ * Apply a free-text user reply when there's a pending free-text question.
131
+ * Returns true iff the message was consumed by the ask flow (so the normal
132
+ * super-agent path should be skipped for this update).
133
+ */
134
+ export async function maybeConsumeAskTextAnswer(self, { chat_id, text }) {
135
+ if (!chat_id || !text) return false;
136
+ if (!askFlow.hasPendingFreeText(chat_id)) return false;
137
+ const state = askFlow.applyTextAnswer(chat_id, text);
138
+ if (!state) return false;
139
+ // Advance: emit a synthetic "next" to move past this question.
140
+ const next = askFlow.applyCallback(chat_id, `apx:ask:${state.correlationId}:next`);
141
+ if (!next) return true;
142
+ if (next.action === "advance") {
143
+ await renderQuestion(self, next.state);
144
+ return true;
145
+ }
146
+ if (next.action === "done") {
147
+ if (typeof next.state.resume === "function") {
148
+ await next.state.resume(next.compiled);
149
+ }
150
+ return true;
151
+ }
152
+ return true;
153
+ }
154
+
155
+ /**
156
+ * Run a follow-up super-agent turn with the compiled answers as the user prompt.
157
+ * Shares the exact reply path as a normal inbound turn (./reply.js) — only the
158
+ * photo/audio/reset preamble is skipped. Re-enters the ask flow if the model
159
+ * decides to ask again.
160
+ */
161
+ export async function runResumedTurn(self, ctx) {
162
+ const { chat_id, compiled, target, relationshipBlock, allowedTools, author, agentDisplay, update_id, sender, authorId } = ctx;
163
+ if (!chat_id) return;
164
+ // Log the synthetic user message so getRecentTelegramTurnsFromFs picks it up
165
+ // on the NEXT inbound. Mirrors how a normal text reply would be recorded.
166
+ appendGlobalMessage({
167
+ channel: CHANNELS.TELEGRAM,
168
+ direction: "in",
169
+ type: "user",
170
+ actor_id: authorId ? String(authorId) : (author || "ask_flow"),
171
+ external_id: `ask-${Date.now()}`,
172
+ author: author || "user",
173
+ body: compiled,
174
+ meta: { chat_id, user_id: authorId || null, tg_channel: self.channel.name, ask_flow: true },
175
+ });
176
+
177
+ const previousMessages = getRecentTelegramTurnsFromFs({ chat_id, keepRecent: 40, max_age_hours: 24 });
178
+
179
+ const { onEvent, state } = buildStreamHandler(self, { chat_id, update_id, agentDisplay });
180
+ const stopTyping = self._startTyping(chat_id);
181
+ let replyText;
182
+ let replyAuthor;
183
+ let saUsage = null;
184
+ try {
185
+ const sa = await runTelegramSuperAgent(self, {
186
+ chat_id,
187
+ prompt: compiled,
188
+ previousMessages,
189
+ target,
190
+ author,
191
+ relationshipBlock,
192
+ allowedTools,
193
+ onEvent,
194
+ });
195
+
196
+ // Did the model ask again? Restart the flow instead of replying.
197
+ const followupAsk = askFlow.extractAskQuestionsFromTrace(sa.trace);
198
+ if (followupAsk) {
199
+ stopTyping();
200
+ await startAskFlow(self, {
201
+ chat_id,
202
+ projectId: target?.id,
203
+ authorId,
204
+ questions: followupAsk,
205
+ author,
206
+ agentDisplay,
207
+ relationshipBlock,
208
+ allowedTools,
209
+ target,
210
+ sender,
211
+ update_id,
212
+ });
213
+ return;
214
+ }
215
+ replyText = sa.text;
216
+ replyAuthor = sa.name || agentDisplay;
217
+ saUsage = sa.usage;
218
+ } catch (e) {
219
+ self.log(`telegram[${self.channel.name}] ask resume failed: ${e.message}`);
220
+ replyText = telegramErrorText(self, e);
221
+ replyAuthor = agentDisplay;
222
+ }
223
+
224
+ stopTyping();
225
+ await sendFinalReply(self, {
226
+ chat_id,
227
+ update_id,
228
+ replyText,
229
+ replyAuthor,
230
+ replyActorId: SUPERAGENT_ACTOR_ID,
231
+ replyKind: "superagent",
232
+ saUsage,
233
+ streamedCount: state.streamedCount,
234
+ lastStreamedText: state.lastStreamedText,
235
+ agentDisplay,
236
+ extraMeta: { ask_resume: true },
237
+ });
238
+ }