@ducci/jarvis 1.0.68 → 1.0.70

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -64,6 +64,7 @@ Never include markdown code fences, preamble, or any text outside this JSON obje
64
64
  You have access to a set of tools. Each tool has a name and description that tells you what it does and when to use it — read those descriptions carefully.
65
65
 
66
66
  - Always use a tool to perform an action. Never claim to have done something without actually calling the relevant tool.
67
+ - If answering a question requires checking a file, reading data, or calling any tool, do it first — never send a response that announces what you are about to do. Perform the action, then report the result.
67
68
  - Call tools one at a time. You will receive the result before deciding on the next step. Exception: when using `spawn_subagent` for bulk tasks (e.g. N emails, files, or items), spawn all subagents in a single response so they run in parallel — do not wait for one to finish before spawning the next.
68
69
  - After a tool call, verify the result before proceeding. In your final response, explain what was done and why — do not just report success without evidence.
69
70
  - Stop as soon as the task is complete and verified. Do not do extra work that was not asked for.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ducci/jarvis",
3
- "version": "1.0.68",
3
+ "version": "1.0.70",
4
4
  "description": "A fully automated agent system that lives on a server.",
5
5
  "main": "./src/index.js",
6
6
  "type": "module",
@@ -154,9 +154,15 @@ export async function startTelegramChannel(config) {
154
154
  }
155
155
  }
156
156
 
157
+ let lastCheckpointSent = null;
157
158
  let result;
158
159
  try {
159
- result = await handleChat(config, sessionId, userText, allAttachments);
160
+ result = await handleChat(config, sessionId, userText, allAttachments, async (checkpointResponse) => {
161
+ const text = typeof checkpointResponse === 'string' ? checkpointResponse : JSON.stringify(checkpointResponse);
162
+ lastCheckpointSent = text;
163
+ await appendTelegramChatLog(chatId, sessions[chatId] || null, 'JARVIS', text);
164
+ await sendMessage(api, chatId, text, sessions[chatId] || null);
165
+ });
160
166
  } catch (e) {
161
167
  console.error(`[telegram] agent error chat_id=${chatId}: ${e.message}`);
162
168
  const errText = e.message
@@ -185,9 +191,16 @@ export async function startTelegramChannel(config) {
185
191
  : result.response != null ? JSON.stringify(result.response, null, 2) : '';
186
192
  const text = rawResponse.trim()
187
193
  || 'The agent encountered an error and could not produce a response. Please try again.';
188
- await appendTelegramChatLog(chatId, result.sessionId, 'JARVIS', text);
189
- await sendMessage(api, chatId, text, result.sessionId);
190
- console.log(`[telegram] response sent chat_id=${chatId} length=${text.length}`);
194
+ // Skip sending if this response was already sent as a checkpoint update —
195
+ // intervention_required and zero-progress reuse the last checkpoint response
196
+ // as their finalResponse, which would otherwise cause a duplicate message.
197
+ if (text !== lastCheckpointSent) {
198
+ await appendTelegramChatLog(chatId, result.sessionId, 'JARVIS', text);
199
+ await sendMessage(api, chatId, text, result.sessionId);
200
+ console.log(`[telegram] response sent chat_id=${chatId} length=${text.length}`);
201
+ } else {
202
+ console.log(`[telegram] skipped duplicate final response chat_id=${chatId}`);
203
+ }
191
204
  } catch (e) {
192
205
  console.error(`[telegram] delivery error chat_id=${chatId}: ${e.message}`);
193
206
  await api.sendMessage(chatId, 'Sorry, something went wrong sending the response. Please try again.').catch(() => {});
@@ -390,16 +390,42 @@ async function run() {
390
390
  console.log(chalk.green(`\nModel ${chalk.bold(selectedModel)} saved to settings.`));
391
391
 
392
392
  // --- VISION MODEL STEP (OPTIONAL) ---
393
- const { configureVision } = await inquirer.prompt([
394
- {
395
- type: 'confirm',
396
- name: 'configureVision',
397
- message: 'Do you want to configure a separate vision model for image analysis (e.g. for Telegram photos)?',
398
- default: !!settings.visionModel,
393
+ let skipVision = false;
394
+ if (settings.visionModel) {
395
+ const { visionAction } = await inquirer.prompt([
396
+ {
397
+ type: 'list',
398
+ name: 'visionAction',
399
+ message: `Vision model is configured (${chalk.yellow(settings.visionModel)}). What do you want to do?`,
400
+ choices: [
401
+ { name: 'Keep current vision model', value: 'keep' },
402
+ { name: 'Change vision model', value: 'change' },
403
+ { name: 'Disable vision', value: 'disable' },
404
+ ],
405
+ }
406
+ ]);
407
+ if (visionAction === 'keep') {
408
+ skipVision = true;
409
+ } else if (visionAction === 'disable') {
410
+ delete settings.visionProvider;
411
+ delete settings.visionModel;
412
+ saveSettings(settings);
413
+ console.log(chalk.yellow('Vision model disabled.'));
414
+ skipVision = true;
399
415
  }
400
- ]);
416
+ } else {
417
+ const { configureVision } = await inquirer.prompt([
418
+ {
419
+ type: 'confirm',
420
+ name: 'configureVision',
421
+ message: 'Do you want to configure a separate vision model for image analysis (e.g. for Telegram photos)?',
422
+ default: false,
423
+ }
424
+ ]);
425
+ if (!configureVision) skipVision = true;
426
+ }
401
427
 
402
- if (configureVision) {
428
+ if (!skipVision) {
403
429
  const { visionProvider } = await inquirer.prompt([
404
430
  {
405
431
  type: 'list',
@@ -558,11 +584,6 @@ async function run() {
558
584
  settings.visionModel = visionModel;
559
585
  saveSettings(settings);
560
586
  console.log(chalk.green(`Vision model ${chalk.bold(visionModel)} saved.`));
561
- } else {
562
- // Clear vision config if user opts out
563
- delete settings.visionProvider;
564
- delete settings.visionModel;
565
- saveSettings(settings);
566
587
  }
567
588
 
568
589
  // --- TELEGRAM CHANNEL STEP (OPTIONAL) ---
@@ -640,44 +661,6 @@ async function run() {
640
661
  }
641
662
  }
642
663
 
643
- // --- PERPLEXITY STEP (OPTIONAL) ---
644
- const existingPerplexityKey = loadEnvVar('PERPLEXITY_API_KEY');
645
- const { configurePerplexity } = await inquirer.prompt([
646
- {
647
- type: 'confirm',
648
- name: 'configurePerplexity',
649
- message: 'Do you want to configure Perplexity web search?',
650
- default: !!existingPerplexityKey
651
- }
652
- ]);
653
-
654
- if (configurePerplexity) {
655
- let keepPerplexityKey = false;
656
- if (existingPerplexityKey) {
657
- const { keep } = await inquirer.prompt([
658
- {
659
- type: 'confirm',
660
- name: 'keep',
661
- message: 'A PERPLEXITY_API_KEY is already configured. Do you want to keep it?',
662
- default: true
663
- }
664
- ]);
665
- keepPerplexityKey = keep;
666
- }
667
- if (!keepPerplexityKey) {
668
- const { perplexityKey } = await inquirer.prompt([
669
- {
670
- type: 'password',
671
- name: 'perplexityKey',
672
- message: 'Enter your Perplexity API key (from perplexity.ai/settings/api):',
673
- validate: (input) => input.trim().length > 0 || 'API key cannot be empty.'
674
- }
675
- ]);
676
- saveEnvVar('PERPLEXITY_API_KEY', perplexityKey.trim());
677
- console.log(chalk.green('Perplexity API key saved.'));
678
- }
679
- }
680
-
681
664
  // --- PM2 + LOG ROTATION STEP ---
682
665
  const pm2Check = spawnSync('pm2', ['--version'], { stdio: 'pipe' });
683
666
  if (pm2Check.status !== 0) {
@@ -618,7 +618,7 @@ export async function withSessionLock(sessionId, fn) {
618
618
  * Main entry point: handles a single POST /api/chat request.
619
619
  * Manages the handoff loop across multiple agent runs.
620
620
  */
621
- export async function handleChat(config, requestSessionId, userMessage, attachments = []) {
621
+ export async function handleChat(config, requestSessionId, userMessage, attachments = [], onCheckpoint = null) {
622
622
  const sessionId = requestSessionId || crypto.randomUUID();
623
623
 
624
624
  // Serialize concurrent requests for the same session. Each request registers
@@ -632,7 +632,7 @@ export async function handleChat(config, requestSessionId, userMessage, attachme
632
632
  await previous;
633
633
 
634
634
  try {
635
- return await _runHandleChat(config, sessionId, userMessage, attachments);
635
+ return await _runHandleChat(config, sessionId, userMessage, attachments, onCheckpoint);
636
636
  } finally {
637
637
  releaseLock();
638
638
  // Clean up only if no one else has queued behind us
@@ -646,7 +646,7 @@ export async function handleChat(config, requestSessionId, userMessage, attachme
646
646
  * The actual chat logic, extracted so handleChat can wrap it cleanly with the
647
647
  * session lock.
648
648
  */
649
- async function _runHandleChat(config, sessionId, userMessage, attachments = []) {
649
+ async function _runHandleChat(config, sessionId, userMessage, attachments = [], onCheckpoint = null) {
650
650
  const client = createClient(config);
651
651
 
652
652
  const systemPromptTemplate = loadSystemPrompt();
@@ -802,7 +802,9 @@ async function _runHandleChat(config, sessionId, userMessage, attachments = [])
802
802
  break;
803
803
  }
804
804
 
805
- // Checkpoint reached — log this run
805
+ // Checkpoint reached — log this run and notify the caller (e.g. Telegram adapter)
806
+ // so intermediate progress is visible to the user instead of being swallowed
807
+ // by the handoff loop until the final response.
806
808
  await appendLog(sessionId, {
807
809
  iteration: run.iteration,
808
810
  model: config.selectedModel,
@@ -812,6 +814,7 @@ async function _runHandleChat(config, sessionId, userMessage, attachments = [])
812
814
  logSummary: run.logSummary,
813
815
  status: 'checkpoint_reached',
814
816
  });
817
+ if (onCheckpoint) await onCheckpoint(run.response);
815
818
 
816
819
  // Accumulate failedApproaches from this run into session metadata so the
817
820
  // full history of failed strategies is available across all handoff runs.
@@ -656,6 +656,71 @@ const SEED_TOOLS = {
656
656
  return { status: 'ok', name: args.name, content };
657
657
  `,
658
658
  },
659
+ analyze_image: {
660
+ definition: {
661
+ type: 'function',
662
+ function: {
663
+ name: 'analyze_image',
664
+ description: 'Fetch an image from a URL and analyze it using the configured vision model. Returns a detailed description of the image. Use this whenever a user shares an image URL and asks about its content.',
665
+ parameters: {
666
+ type: 'object',
667
+ properties: {
668
+ url: {
669
+ type: 'string',
670
+ description: 'The URL of the image to analyze (http or https).',
671
+ },
672
+ prompt: {
673
+ type: 'string',
674
+ description: 'Optional question or instruction for the vision model, e.g. "What text is visible?" or "Describe the chart". Defaults to a general description.',
675
+ },
676
+ },
677
+ required: ['url'],
678
+ },
679
+ },
680
+ },
681
+ code: `
682
+ const settingsPath = path.join(process.env.HOME, '.jarvis/data/config/settings.json');
683
+ const settings = JSON.parse(await fs.promises.readFile(settingsPath, 'utf8').catch(() => '{}'));
684
+ const visionModel = settings.visionModel;
685
+ const visionProvider = settings.visionProvider;
686
+ if (!visionModel || !visionProvider) {
687
+ return { status: 'error', message: 'No vision model configured. Set visionModel and visionProvider in settings.' };
688
+ }
689
+ let apiKey, baseURL;
690
+ if (visionProvider === 'z-ai') {
691
+ apiKey = process.env.ZAI_API_KEY;
692
+ baseURL = 'https://api.z.ai/api/coding/paas/v4/';
693
+ } else {
694
+ apiKey = process.env.OPENROUTER_API_KEY;
695
+ baseURL = 'https://openrouter.ai/api/v1';
696
+ }
697
+ if (!apiKey) return { status: 'error', message: 'No API key found for vision provider: ' + visionProvider };
698
+ const imgResponse = await fetch(args.url);
699
+ if (!imgResponse.ok) return { status: 'error', message: 'Failed to fetch image: HTTP ' + imgResponse.status };
700
+ const buffer = await imgResponse.arrayBuffer();
701
+ const base64 = Buffer.from(buffer).toString('base64');
702
+ const contentType = imgResponse.headers.get('content-type') || 'image/jpeg';
703
+ const dataUrl = 'data:' + contentType + ';base64,' + base64;
704
+ const textPrompt = args.prompt?.trim()
705
+ ? 'The user shared this image with the following question/context: "' + args.prompt.trim() + '"\\n\\nPlease describe what you see, paying special attention to anything relevant to their message.'
706
+ : 'Please describe this image in detail. Include all visible text, objects, colors, layout, and any other relevant details.';
707
+ const apiResponse = await fetch(baseURL + (baseURL.endsWith('/') ? '' : '/') + 'chat/completions', {
708
+ method: 'POST',
709
+ headers: { 'Authorization': 'Bearer ' + apiKey, 'Content-Type': 'application/json' },
710
+ body: JSON.stringify({
711
+ model: visionModel,
712
+ messages: [{ role: 'user', content: [
713
+ { type: 'image_url', image_url: { url: dataUrl } },
714
+ { type: 'text', text: textPrompt },
715
+ ]}],
716
+ }),
717
+ });
718
+ const result = await apiResponse.json();
719
+ if (!apiResponse.ok) return { status: 'error', message: result.error?.message || 'Vision API error' };
720
+ const description = result.choices?.[0]?.message?.content?.trim() || '(no description returned)';
721
+ return { status: 'ok', description };
722
+ `,
723
+ },
659
724
  };
660
725
 
661
726
  export function seedTools() {