@ducci/jarvis 1.0.94 → 1.0.96
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/docs/telegram.md +45 -10
- package/package.json +1 -1
- package/src/channels/telegram/index.js +80 -0
- package/src/server/agent.js +32 -8
- package/src/server/config.js +1 -0
- package/src/server/tools.js +34 -3
package/docs/telegram.md
CHANGED
|
@@ -248,7 +248,7 @@ await bot.api.setMyCommands([
|
|
|
248
248
|
|
|
249
249
|
## Photo Support
|
|
250
250
|
|
|
251
|
-
The bot handles incoming photos (`message:photo`) in addition to text. When a user sends a photo, the adapter selects the best resolution under 800px wide to keep token usage reasonable
|
|
251
|
+
The bot handles incoming photos (`message:photo`) in addition to text. When a user sends a photo, the adapter selects the best resolution under 800px wide to keep token usage reasonable.
|
|
252
252
|
|
|
253
253
|
### Photo selection
|
|
254
254
|
|
|
@@ -265,30 +265,65 @@ This gives the highest quality image below the 800px threshold. Sending the full
|
|
|
265
265
|
|
|
266
266
|
The image is downloaded immediately at receive time using the Telegram file URL (`https://api.telegram.org/file/bot<token>/<file_path>`) and converted to a base64 data URL (`data:image/jpeg;base64,...`). The data URL is stored directly in the session message, so the image remains available across handoffs and future conversation turns without depending on a Telegram URL that would expire after ~1 hour. Base64 encoding does not cost more tokens than a URL — image token cost is based on pixel dimensions, not transport format.
|
|
267
267
|
|
|
268
|
-
###
|
|
268
|
+
### Image processing paths
|
|
269
269
|
|
|
270
|
-
|
|
270
|
+
How the image reaches the model depends on whether a dedicated vision model is configured:
|
|
271
|
+
|
|
272
|
+
**Path 1 — `visionModel` configured** (`settings.json: visionProvider + visionModel`):
|
|
273
|
+
Before the main agent call, the adapter calls `describeImage()` — a separate, one-shot API call to the vision model. The result (a text description of the image) is injected into the user turn as plain text. The main agent never sees the image itself; it only sees the description. This allows a cheap non-multimodal main model to handle image conversations.
|
|
274
|
+
|
|
275
|
+
**Path 2 — No `visionModel`, multimodal main model**:
|
|
276
|
+
The base64 data URL is passed directly to the main model as an `image_url` content block alongside any caption. The model processes the image natively.
|
|
271
277
|
|
|
272
278
|
```js
|
|
273
279
|
const content = [
|
|
274
|
-
{ type: 'image_url', url:
|
|
280
|
+
{ type: 'image_url', image_url: { url: 'data:image/jpeg;base64,...' } },
|
|
281
|
+
{ type: 'text', text: caption },
|
|
275
282
|
];
|
|
276
|
-
if (caption) content.push({ type: 'text', text: caption });
|
|
277
283
|
```
|
|
278
284
|
|
|
279
|
-
|
|
285
|
+
**Fallback — model rejects image input**:
|
|
286
|
+
If the main model returns an error indicating it does not support image input (`isImageUnsupportedError`), the agent responds with a clear message ("This model does not support image input…") and strips the image from the session so subsequent messages are not permanently broken. A text placeholder is inserted in its place so the model retains context.
|
|
280
287
|
|
|
281
288
|
### Caption
|
|
282
289
|
|
|
283
|
-
If the user attaches a caption to the photo (`ctx.message.caption`), it is included as a text block
|
|
290
|
+
If the user attaches a caption to the photo (`ctx.message.caption`), it is included alongside the image (as a text block in multimodal mode, or appended to the vision description in Path 1). If there is no caption, only the image content is sent.
|
|
291
|
+
|
|
292
|
+
### Unsupported incoming media types
|
|
293
|
+
|
|
294
|
+
Documents, audio files, video, stickers, and other non-photo non-voice media types sent by the user are not handled — the bot silently ignores them.
|
|
295
|
+
|
|
296
|
+
## Outgoing Files
|
|
297
|
+
|
|
298
|
+
The agent can send files from the server to the Telegram chat using the `send_file` seed tool. This complements the text-only `send_telegram_message` tool for cases where the agent has produced or located a file the user needs.
|
|
299
|
+
|
|
300
|
+
### Tool interface
|
|
301
|
+
|
|
302
|
+
```js
|
|
303
|
+
send_file({ path: '/absolute/or/~/path/to/file', caption: 'Optional caption' })
|
|
304
|
+
```
|
|
305
|
+
|
|
306
|
+
The tool resolves `~` to the home directory, checks that the file exists, and calls the channel-provided `sendFile` callback. It returns `{ status: 'error', error: '...' }` if the file is not found or the channel does not support file sending.
|
|
307
|
+
|
|
308
|
+
### Channel integration
|
|
309
|
+
|
|
310
|
+
The Telegram adapter passes an `onSendFile` callback to `handleChat`:
|
|
311
|
+
|
|
312
|
+
```js
|
|
313
|
+
handleChat(config, sessionId, userText, attachments, onCheckpoint, async (filePath, caption) => {
|
|
314
|
+
await api.sendDocument(chatId, new InputFile(filePath), caption ? { caption } : {});
|
|
315
|
+
});
|
|
316
|
+
```
|
|
317
|
+
|
|
318
|
+
`InputFile(filePath)` streams the file from disk — no in-memory buffering of the full file. The callback is threaded through `handleChat → _runHandleChat → runAgentLoop → executeTool` and injected into the tool's `AsyncFunction` as the `sendFile` parameter.
|
|
284
319
|
|
|
285
|
-
###
|
|
320
|
+
### Channel support
|
|
286
321
|
|
|
287
|
-
|
|
322
|
+
`send_file` only works in channels that register an `onSendFile` callback (currently: Telegram). In other contexts (web UI, cron runs), the tool returns an error immediately rather than silently succeeding.
|
|
288
323
|
|
|
289
324
|
## Non-Goals (v1)
|
|
290
325
|
|
|
291
|
-
- No support for documents, audio, video, or other non-photo media
|
|
326
|
+
- No support for receiving documents, audio files, video, or other non-photo non-voice media from the user
|
|
292
327
|
- No inline keyboards or callback queries
|
|
293
328
|
- No group chat support (only private chats)
|
|
294
329
|
- No message editing or deletion handling
|
package/package.json
CHANGED
|
@@ -658,6 +658,9 @@ export async function startTelegramChannel(config) {
|
|
|
658
658
|
lastCheckpointSent = prefixed;
|
|
659
659
|
await appendTelegramChatLog(chatId, getSessionId(chatId, slot) || null, 'JARVIS', prefixed);
|
|
660
660
|
await sendMessage(api, chatId, prefixed, getSessionId(chatId, slot) || null);
|
|
661
|
+
}, async (filePath, caption) => {
|
|
662
|
+
await api.sendDocument(chatId, new InputFile(filePath), caption ? { caption } : {});
|
|
663
|
+
console.log(`[telegram] file sent chat_id=${chatId} slot=${slot} path=${filePath}`);
|
|
661
664
|
});
|
|
662
665
|
} catch (e) {
|
|
663
666
|
console.error(`[telegram] agent error chat_id=${chatId} slot=${slot}: ${e.message}`);
|
|
@@ -850,6 +853,83 @@ export async function startTelegramChannel(config) {
|
|
|
850
853
|
}
|
|
851
854
|
});
|
|
852
855
|
|
|
856
|
+
bot.on('message:document', async (ctx) => {
|
|
857
|
+
const userId = ctx.from?.id;
|
|
858
|
+
if (!allowedUserIds.includes(userId)) return;
|
|
859
|
+
|
|
860
|
+
const chatId = ctx.chat.id;
|
|
861
|
+
const ts = new Date().toISOString();
|
|
862
|
+
const doc = ctx.message.document;
|
|
863
|
+
|
|
864
|
+
const MAX_BYTES = 20 * 1024 * 1024; // 20MB — Telegram bot API getFile limit
|
|
865
|
+
if (doc.file_size && doc.file_size > MAX_BYTES) {
|
|
866
|
+
await ctx.reply(`File too large (${Math.round(doc.file_size / 1024 / 1024)}MB). Telegram bot API limit is 20MB.`).catch(() => {});
|
|
867
|
+
return;
|
|
868
|
+
}
|
|
869
|
+
|
|
870
|
+
console.log(`[telegram] incoming document chat_id=${chatId} name=${doc.file_name} size=${doc.file_size}`);
|
|
871
|
+
|
|
872
|
+
let savedPath;
|
|
873
|
+
try {
|
|
874
|
+
const file = await ctx.api.getFile(doc.file_id);
|
|
875
|
+
const fileUrl = `https://api.telegram.org/file/bot${token}/${file.file_path}`;
|
|
876
|
+
const response = await fetch(fileUrl);
|
|
877
|
+
const buffer = Buffer.from(await response.arrayBuffer());
|
|
878
|
+
|
|
879
|
+
fs.mkdirSync(PATHS.uploadsDir, { recursive: true });
|
|
880
|
+
const safeName = (doc.file_name || 'file').replace(/[^a-zA-Z0-9._-]/g, '_');
|
|
881
|
+
savedPath = path.join(PATHS.uploadsDir, `${Date.now()}-${safeName}`);
|
|
882
|
+
fs.writeFileSync(savedPath, buffer);
|
|
883
|
+
} catch (e) {
|
|
884
|
+
console.error(`[telegram] document download error chat_id=${chatId}: ${e.message}`);
|
|
885
|
+
await ctx.reply('Sorry, could not download the file.').catch(() => {});
|
|
886
|
+
return;
|
|
887
|
+
}
|
|
888
|
+
|
|
889
|
+
const sizeKb = doc.file_size ? `${Math.round(doc.file_size / 1024)} KB` : 'unknown size';
|
|
890
|
+
const mimeType = doc.mime_type || 'application/octet-stream';
|
|
891
|
+
const fileInfo = `[User sent a file: ${savedPath} (${mimeType}, ${sizeKb})]`;
|
|
892
|
+
const userText = ctx.message.caption ? `${fileInfo}\n${ctx.message.caption}` : fileInfo;
|
|
893
|
+
|
|
894
|
+
const entry = { text: userText, attachments: [], ts };
|
|
895
|
+
const slot = getActiveSlot(chatId);
|
|
896
|
+
const key = slotKey(chatId, slot);
|
|
897
|
+
|
|
898
|
+
if (isRunning.has(key)) {
|
|
899
|
+
if (!pendingMessages.has(key)) pendingMessages.set(key, []);
|
|
900
|
+
pendingMessages.get(key).push(entry);
|
|
901
|
+
console.log(`[telegram] buffered document chat_id=${chatId} slot=${slot} pending=${pendingMessages.get(key).length}`);
|
|
902
|
+
return;
|
|
903
|
+
}
|
|
904
|
+
|
|
905
|
+
isRunning.add(key);
|
|
906
|
+
runStartTimes.set(key, new Date());
|
|
907
|
+
await ctx.api.sendChatAction(chatId, 'typing');
|
|
908
|
+
const typingInterval = setInterval(() => {
|
|
909
|
+
ctx.api.sendChatAction(chatId, 'typing').catch(() => {});
|
|
910
|
+
}, 4000);
|
|
911
|
+
|
|
912
|
+
try {
|
|
913
|
+
await processQueue(ctx.api, chatId, slot, [entry]);
|
|
914
|
+
} finally {
|
|
915
|
+
clearInterval(typingInterval);
|
|
916
|
+
isRunning.delete(key);
|
|
917
|
+
runStartTimes.delete(key);
|
|
918
|
+
}
|
|
919
|
+
});
|
|
920
|
+
|
|
921
|
+
bot.on('message:audio', async (ctx) => {
|
|
922
|
+
const userId = ctx.from?.id;
|
|
923
|
+
if (!allowedUserIds.includes(userId)) return;
|
|
924
|
+
await ctx.reply("I can't process audio files. Send a voice message for speech input, or send the file using \"Send as file\" if you want me to read it.").catch(() => {});
|
|
925
|
+
});
|
|
926
|
+
|
|
927
|
+
bot.on('message:video', async (ctx) => {
|
|
928
|
+
const userId = ctx.from?.id;
|
|
929
|
+
if (!allowedUserIds.includes(userId)) return;
|
|
930
|
+
await ctx.reply("I can't process video files. Use \"Send as file\" if you want me to access the file.").catch(() => {});
|
|
931
|
+
});
|
|
932
|
+
|
|
853
933
|
bot.on('message:text', async (ctx) => {
|
|
854
934
|
const userId = ctx.from?.id;
|
|
855
935
|
|
package/src/server/agent.js
CHANGED
|
@@ -131,6 +131,18 @@ function extractApiError(err, model) {
|
|
|
131
131
|
};
|
|
132
132
|
}
|
|
133
133
|
|
|
134
|
+
// Extract the most specific error description available.
|
|
135
|
+
// Provider-specific APIs (e.g. z.ai) return a code + message in err.error;
|
|
136
|
+
// the OpenAI SDK sets err.message to the body message but sometimes prepends
|
|
137
|
+
// the HTTP status code, making it less readable. Prefer err.error.message + code.
|
|
138
|
+
function describeApiError(err) {
|
|
139
|
+
const bodyMsg = err?.error?.message;
|
|
140
|
+
const bodyCode = err?.error?.code;
|
|
141
|
+
if (bodyMsg && bodyCode) return `${bodyMsg} (code: ${bodyCode})`;
|
|
142
|
+
if (bodyMsg) return bodyMsg;
|
|
143
|
+
return err?.message ?? String(err);
|
|
144
|
+
}
|
|
145
|
+
|
|
134
146
|
async function callModelWithFallback(client, config, messages, tools) {
|
|
135
147
|
let primaryErr = null;
|
|
136
148
|
try {
|
|
@@ -141,9 +153,21 @@ async function callModelWithFallback(client, config, messages, tools) {
|
|
|
141
153
|
try {
|
|
142
154
|
return await callModel(client, config.fallbackModel, messages, tools);
|
|
143
155
|
} catch (fallbackErr) {
|
|
144
|
-
const
|
|
145
|
-
|
|
146
|
-
|
|
156
|
+
const primaryDesc = describeApiError(primaryErr);
|
|
157
|
+
const fallbackDesc = describeApiError(fallbackErr);
|
|
158
|
+
const sameModel = config.selectedModel === config.fallbackModel;
|
|
159
|
+
const sameError = primaryDesc === fallbackDesc;
|
|
160
|
+
|
|
161
|
+
let msg;
|
|
162
|
+
if (sameModel && sameError) {
|
|
163
|
+
msg = `Model (${config.selectedModel}) failed: ${fallbackDesc}`;
|
|
164
|
+
} else if (sameModel) {
|
|
165
|
+
msg = `Model (${config.selectedModel}) failed. Primary: ${primaryDesc} | Fallback: ${fallbackDesc}`;
|
|
166
|
+
} else {
|
|
167
|
+
msg = `Both primary (${config.selectedModel}) and fallback (${config.fallbackModel}) models failed. Primary: ${primaryDesc} | Fallback: ${fallbackDesc}`;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
const combined = new Error(msg);
|
|
147
171
|
combined.apiErrors = {
|
|
148
172
|
primary: extractApiError(primaryErr, config.selectedModel),
|
|
149
173
|
fallback: extractApiError(fallbackErr, config.fallbackModel),
|
|
@@ -375,7 +399,7 @@ export async function runAgentLoop(client, config, session, prepareMessages, usa
|
|
|
375
399
|
if (toolName === 'spawn_subagent') {
|
|
376
400
|
result = await runSubagent(client, config, toolArgs, config._sessionId);
|
|
377
401
|
} else {
|
|
378
|
-
result = await executeTool(tools, toolName, toolArgs);
|
|
402
|
+
result = await executeTool(tools, toolName, toolArgs, { sendFile: config._sendFile ?? null });
|
|
379
403
|
}
|
|
380
404
|
} catch (e) {
|
|
381
405
|
result = { status: 'error', error: e.message };
|
|
@@ -685,7 +709,7 @@ export async function withSessionLock(sessionId, fn) {
|
|
|
685
709
|
* Main entry point: handles a single POST /api/chat request.
|
|
686
710
|
* Manages the handoff loop across multiple agent runs.
|
|
687
711
|
*/
|
|
688
|
-
export async function handleChat(config, requestSessionId, userMessage, attachments = [], onCheckpoint = null) {
|
|
712
|
+
export async function handleChat(config, requestSessionId, userMessage, attachments = [], onCheckpoint = null, onSendFile = null) {
|
|
689
713
|
const sessionId = requestSessionId || crypto.randomUUID();
|
|
690
714
|
|
|
691
715
|
// Serialize concurrent requests for the same session. Each request registers
|
|
@@ -699,7 +723,7 @@ export async function handleChat(config, requestSessionId, userMessage, attachme
|
|
|
699
723
|
await previous;
|
|
700
724
|
|
|
701
725
|
try {
|
|
702
|
-
return await _runHandleChat(config, sessionId, userMessage, attachments, onCheckpoint);
|
|
726
|
+
return await _runHandleChat(config, sessionId, userMessage, attachments, onCheckpoint, onSendFile);
|
|
703
727
|
} finally {
|
|
704
728
|
releaseLock();
|
|
705
729
|
// Clean up only if no one else has queued behind us
|
|
@@ -713,7 +737,7 @@ export async function handleChat(config, requestSessionId, userMessage, attachme
|
|
|
713
737
|
* The actual chat logic, extracted so handleChat can wrap it cleanly with the
|
|
714
738
|
* session lock.
|
|
715
739
|
*/
|
|
716
|
-
async function _runHandleChat(config, sessionId, userMessage, attachments = [], onCheckpoint = null) {
|
|
740
|
+
async function _runHandleChat(config, sessionId, userMessage, attachments = [], onCheckpoint = null, onSendFile = null) {
|
|
717
741
|
const client = createClient(config);
|
|
718
742
|
|
|
719
743
|
const systemPromptTemplate = loadSystemPrompt();
|
|
@@ -813,7 +837,7 @@ async function _runHandleChat(config, sessionId, userMessage, attachments = [],
|
|
|
813
837
|
}
|
|
814
838
|
|
|
815
839
|
const runStartIndex = session.messages.length;
|
|
816
|
-
const run = await runAgentLoop(client, { ...config, _sessionId: sessionId }, session, prepareMessages, usageAccum);
|
|
840
|
+
const run = await runAgentLoop(client, { ...config, _sessionId: sessionId, _sendFile: onSendFile }, session, prepareMessages, usageAccum);
|
|
817
841
|
allToolCalls.push(...run.runToolCalls);
|
|
818
842
|
|
|
819
843
|
if (run.status !== 'checkpoint_reached') {
|
package/src/server/config.js
CHANGED
|
@@ -23,6 +23,7 @@ export const PATHS = {
|
|
|
23
23
|
identityFile: path.join(JARVIS_DIR, 'data', 'identity.md'),
|
|
24
24
|
skillsDir: path.join(JARVIS_DIR, 'data', 'skills'),
|
|
25
25
|
cronsFile: path.join(JARVIS_DIR, 'data', 'crons.json'),
|
|
26
|
+
uploadsDir: path.join(JARVIS_DIR, 'uploads'),
|
|
26
27
|
systemPromptFile: path.join(__dirname, '..', '..', 'docs', 'system-prompt.md'),
|
|
27
28
|
};
|
|
28
29
|
|
package/src/server/tools.js
CHANGED
|
@@ -379,6 +379,37 @@ const SEED_TOOLS = {
|
|
|
379
379
|
};
|
|
380
380
|
`,
|
|
381
381
|
},
|
|
382
|
+
send_file: {
|
|
383
|
+
definition: {
|
|
384
|
+
type: 'function',
|
|
385
|
+
function: {
|
|
386
|
+
name: 'send_file',
|
|
387
|
+
description: 'Send a file from disk to the user in the current chat (e.g. Telegram). Supports any file type: images, PDFs, text files, archives, etc. Use a caption to describe the file.',
|
|
388
|
+
parameters: {
|
|
389
|
+
type: 'object',
|
|
390
|
+
properties: {
|
|
391
|
+
path: {
|
|
392
|
+
type: 'string',
|
|
393
|
+
description: 'Absolute or ~ path to the file to send.',
|
|
394
|
+
},
|
|
395
|
+
caption: {
|
|
396
|
+
type: 'string',
|
|
397
|
+
description: 'Optional caption displayed with the file.',
|
|
398
|
+
},
|
|
399
|
+
},
|
|
400
|
+
required: ['path'],
|
|
401
|
+
},
|
|
402
|
+
},
|
|
403
|
+
},
|
|
404
|
+
code: `
|
|
405
|
+
const _p = args.path;
|
|
406
|
+
const targetPath = path.resolve(_p === '~' || _p.startsWith('~/') ? require('os').homedir() + _p.slice(1) : _p);
|
|
407
|
+
if (!fs.existsSync(targetPath)) return { status: 'error', error: 'File not found: ' + targetPath };
|
|
408
|
+
if (typeof sendFile !== 'function') return { status: 'error', error: 'send_file is not supported in this channel.' };
|
|
409
|
+
await sendFile(targetPath, args.caption || '');
|
|
410
|
+
return { status: 'ok', path: targetPath };
|
|
411
|
+
`,
|
|
412
|
+
},
|
|
382
413
|
create_cron: {
|
|
383
414
|
definition: {
|
|
384
415
|
type: 'function',
|
|
@@ -793,13 +824,13 @@ export function getToolDefinitions(tools) {
|
|
|
793
824
|
return defs;
|
|
794
825
|
}
|
|
795
826
|
|
|
796
|
-
export async function executeTool(tools, name, toolArgs) {
|
|
827
|
+
export async function executeTool(tools, name, toolArgs, { sendFile = null } = {}) {
|
|
797
828
|
const tool = tools[name];
|
|
798
829
|
if (!tool) {
|
|
799
830
|
throw new Error(`Unknown tool: ${name}`);
|
|
800
831
|
}
|
|
801
832
|
|
|
802
|
-
const fn = new AsyncFunction('args', 'fs', 'path', 'process', 'require', '__jarvisDir', tool.code);
|
|
833
|
+
const fn = new AsyncFunction('args', 'fs', 'path', 'process', 'require', '__jarvisDir', 'sendFile', tool.code);
|
|
803
834
|
|
|
804
835
|
// Tools can declare their own timeout (e.g. system_install needs 5 min).
|
|
805
836
|
// Falls back to the global default of 60s.
|
|
@@ -812,5 +843,5 @@ export async function executeTool(tools, name, toolArgs) {
|
|
|
812
843
|
)
|
|
813
844
|
);
|
|
814
845
|
|
|
815
|
-
return await Promise.race([fn(toolArgs, fs, path, process, _require, __jarvisDir), timeout]);
|
|
846
|
+
return await Promise.race([fn(toolArgs, fs, path, process, _require, __jarvisDir, sendFile), timeout]);
|
|
816
847
|
}
|