@ducci/jarvis 1.0.27 → 1.0.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/docs/findings/011-empty-model-response.md +157 -0
- package/package.json +1 -1
- package/src/server/agent.js +36 -17
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
# Finding 011: Empty Model Response Causes Generic Telegram Error
|
|
2
|
+
|
|
3
|
+
**Date:** 2026-03-01
|
|
4
|
+
**Severity:** High — user sees generic "please try again" with no actionable information
|
|
5
|
+
**Status:** Fixed
|
|
6
|
+
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
## Observed Session
|
|
10
|
+
|
|
11
|
+
Session `33a50dfe-38ea-4972-adac-498ef0525b0c`, run 16 of 17 (session.jsonl line 16):
|
|
12
|
+
|
|
13
|
+
```
|
|
14
|
+
status=format_error
|
|
15
|
+
model=nvidia/nemotron-3-nano-30b-a3b:free
|
|
16
|
+
iteration=4
|
|
17
|
+
userInput='Ok. Kannst du bitte jetzt das shell script ausführen mit der domain...'
|
|
18
|
+
logSummary='Model returned non-JSON final response after recovery attempts.'
|
|
19
|
+
rawResponse=''
|
|
20
|
+
response=''
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
The Telegram user received:
|
|
24
|
+
|
|
25
|
+
```
|
|
26
|
+
The agent encountered an error and could not produce a response. Please try again.
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
---
|
|
30
|
+
|
|
31
|
+
## What Happened
|
|
32
|
+
|
|
33
|
+
The agent executed a ZAP scan (`./scan.sh juice-shop.herokuapp.com`). The tool result was a large ZAP startup log, truncated at 4000 characters. Two subsequent tool calls failed:
|
|
34
|
+
|
|
35
|
+
- `pkill -f zaproxy || true` → exit 1 (no process to kill)
|
|
36
|
+
- `zaproxy -help | grep -i shutdown -A5` → failed (`libtiff.so.5` missing)
|
|
37
|
+
|
|
38
|
+
On iteration 4, the model returned `assistantMessage.content = null` with no `tool_calls`. This is the "went silent" case: the model produced neither a response nor another tool call.
|
|
39
|
+
|
|
40
|
+
---
|
|
41
|
+
|
|
42
|
+
## Bug Chain
|
|
43
|
+
|
|
44
|
+
### Step 1 — Model returns null content
|
|
45
|
+
|
|
46
|
+
```js
|
|
47
|
+
let content = assistantMessage.content || '';
|
|
48
|
+
// content = ''
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
### Step 2 — Recovery chain falls through on empty content
|
|
52
|
+
|
|
53
|
+
The existing format recovery chain was designed for *non-empty, non-JSON* responses:
|
|
54
|
+
|
|
55
|
+
1. `JSON.parse('')` → throws
|
|
56
|
+
2. Retry with fallback model (same messages, no nudge) → also `''`
|
|
57
|
+
3. Retry with nudge "Your previous response was not valid JSON" → technically wrong for empty content; model still returns `''`
|
|
58
|
+
4. Give up
|
|
59
|
+
|
|
60
|
+
### Step 3 — Empty `response` propagates to Telegram
|
|
61
|
+
|
|
62
|
+
```js
|
|
63
|
+
response = content; // ''
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
`handleChat` returns `{ response: '', ... }`. In `telegram/index.js`:
|
|
67
|
+
|
|
68
|
+
```js
|
|
69
|
+
const rawResponse = typeof result.response === 'string' ? result.response : ...;
|
|
70
|
+
// rawResponse = ''
|
|
71
|
+
const text = rawResponse.trim() || 'The agent encountered an error...';
|
|
72
|
+
// '' → fallback shown
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
The user sees the generic Telegram fallback instead of any information about what happened or what to do.
|
|
76
|
+
|
|
77
|
+
---
|
|
78
|
+
|
|
79
|
+
## Root Causes
|
|
80
|
+
|
|
81
|
+
**Primary**: The `format_error` path set `response = content` without a fallback for the empty string case. An empty `response` triggers the Telegram handler's last-resort fallback message, giving the user no context.
|
|
82
|
+
|
|
83
|
+
**Secondary**: The format recovery chain was designed for non-empty non-JSON responses. When `content` is empty, the nudge message "Your previous response was not valid JSON" is inaccurate — the model produced nothing, not invalid JSON. A targeted nudge for the empty case increases the chance of recovery.
|
|
84
|
+
|
|
85
|
+
**Model-level cause**: The free model `nvidia/nemotron-3-nano-30b-a3b:free` can fail to produce any output after processing a heavily truncated tool result followed by consecutive tool failures. This is a model quality limitation that the recovery layer must account for.
|
|
86
|
+
|
|
87
|
+
---
|
|
88
|
+
|
|
89
|
+
## Difference from Finding 009 and 010
|
|
90
|
+
|
|
91
|
+
| Finding | Model produces... | Bug manifests at... |
|
|
92
|
+
|---------|-------------------|---------------------|
|
|
93
|
+
| 009 | Non-string `response` field (array/object) | Telegram `.trim()` crash |
|
|
94
|
+
| 010 | Non-string `checkpoint.remaining` | Zero-progress `.trim()` crash |
|
|
95
|
+
| 011 | Empty/null content (no text, no tool calls) | Telegram generic fallback (no crash, but useless to user) |
|
|
96
|
+
|
|
97
|
+
Finding 011 is the third in the same class: model output type does not match what the system expects.
|
|
98
|
+
|
|
99
|
+
---
|
|
100
|
+
|
|
101
|
+
## Fix
|
|
102
|
+
|
|
103
|
+
### `src/server/agent.js` — two changes
|
|
104
|
+
|
|
105
|
+
**1. Empty-content detection with targeted nudge**
|
|
106
|
+
|
|
107
|
+
When `content` is empty, skip the standard recovery chain (designed for non-JSON text) and apply a targeted nudge that accurately describes the situation:
|
|
108
|
+
|
|
109
|
+
```js
|
|
110
|
+
if (!content.trim()) {
|
|
111
|
+
// Model returned no content at all — use a targeted nudge instead of the
|
|
112
|
+
// standard JSON recovery chain (designed for non-empty non-JSON responses).
|
|
113
|
+
try {
|
|
114
|
+
const emptyNudge = [
|
|
115
|
+
...preparedMessages,
|
|
116
|
+
{ role: 'user', content: 'You returned an empty response. ' + FORMAT_NUDGE },
|
|
117
|
+
];
|
|
118
|
+
const nudgeResult = await callModelWithFallback(client, config, emptyNudge, toolDefs);
|
|
119
|
+
const nudgeContent = nudgeResult.choices[0]?.message?.content || '';
|
|
120
|
+
parsed = JSON.parse(nudgeContent);
|
|
121
|
+
content = nudgeContent;
|
|
122
|
+
} catch {
|
|
123
|
+
// Give up — fall through to !parsed handler below
|
|
124
|
+
}
|
|
125
|
+
} else {
|
|
126
|
+
// Non-empty content — use the existing 3-step JSON recovery chain
|
|
127
|
+
try { parsed = JSON.parse(content); } catch {
|
|
128
|
+
// Step 1: fallback model...
|
|
129
|
+
// Step 2: nudge...
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
**2. Non-empty fallback on format_error**
|
|
135
|
+
|
|
136
|
+
```js
|
|
137
|
+
if (!parsed) {
|
|
138
|
+
// Ensure response is never empty so the delivery layer can show something
|
|
139
|
+
// meaningful rather than its generic fallback message.
|
|
140
|
+
response = content.trim() || 'The model did not produce a response. Please try again.';
|
|
141
|
+
logSummary = 'Model returned non-JSON final response after recovery attempts.';
|
|
142
|
+
status = 'format_error';
|
|
143
|
+
return { ... };
|
|
144
|
+
}
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
---
|
|
148
|
+
|
|
149
|
+
## Outcome
|
|
150
|
+
|
|
151
|
+
| Scenario | Before | After |
|
|
152
|
+
|----------|--------|-------|
|
|
153
|
+
| Model returns empty content, nudge succeeds | format_error (3 wasted API calls) | Clean recovery (1 targeted API call) |
|
|
154
|
+
| Model returns empty content, nudge fails | Telegram generic fallback | "The model did not produce a response. Please try again." |
|
|
155
|
+
| Model returns non-JSON text, all recovery fails | Telegram generic fallback (if text was empty) | Raw model output shown to user |
|
|
156
|
+
|
|
157
|
+
**Effect on the debugging session**: instead of the generic Telegram fallback, the user would have received "The model did not produce a response. Please try again." — a clear signal that the model failed, not their message. In the best case, the new targeted nudge would have elicited a valid JSON response.
|
package/package.json
CHANGED
package/src/server/agent.js
CHANGED
|
@@ -215,32 +215,51 @@ async function runAgentLoop(client, config, session, prepareMessages) {
|
|
|
215
215
|
let content = assistantMessage.content || '';
|
|
216
216
|
let parsed = null;
|
|
217
217
|
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
218
|
+
if (!content.trim()) {
|
|
219
|
+
// Model returned no content at all — use a targeted nudge instead of the
|
|
220
|
+
// standard JSON recovery chain (designed for non-empty non-JSON responses).
|
|
221
|
+
try {
|
|
222
|
+
const emptyNudge = [
|
|
223
|
+
...preparedMessages,
|
|
224
|
+
{ role: 'user', content: 'You returned an empty response. ' + FORMAT_NUDGE },
|
|
225
|
+
];
|
|
226
|
+
const nudgeResult = await callModelWithFallback(client, config, emptyNudge, toolDefs);
|
|
227
|
+
const nudgeContent = nudgeResult.choices[0]?.message?.content || '';
|
|
228
|
+
parsed = JSON.parse(nudgeContent);
|
|
229
|
+
content = nudgeContent;
|
|
230
|
+
} catch {
|
|
231
|
+
// Give up — fall through to !parsed handler below
|
|
232
|
+
}
|
|
233
|
+
} else {
|
|
222
234
|
try {
|
|
223
|
-
|
|
224
|
-
const fallbackContent = fallbackResult.choices[0]?.message?.content || '';
|
|
225
|
-
parsed = JSON.parse(fallbackContent);
|
|
226
|
-
content = fallbackContent;
|
|
235
|
+
parsed = JSON.parse(content);
|
|
227
236
|
} catch {
|
|
228
|
-
// Step
|
|
237
|
+
// Step 1: retry with fallback model
|
|
229
238
|
try {
|
|
230
|
-
const
|
|
231
|
-
const
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
content = nudgeContent;
|
|
239
|
+
const fallbackResult = await callModel(client, config.fallbackModel, preparedMessages, toolDefs);
|
|
240
|
+
const fallbackContent = fallbackResult.choices[0]?.message?.content || '';
|
|
241
|
+
parsed = JSON.parse(fallbackContent);
|
|
242
|
+
content = fallbackContent;
|
|
235
243
|
} catch {
|
|
236
|
-
//
|
|
244
|
+
// Step 2: nudge retry via both models
|
|
245
|
+
try {
|
|
246
|
+
const nudgeMessages = [...preparedMessages, { role: 'user', content: FORMAT_NUDGE }];
|
|
247
|
+
const nudgeResult = await callModelWithFallback(client, config, nudgeMessages, toolDefs);
|
|
248
|
+
const nudgeContent = nudgeResult.choices[0]?.message?.content || '';
|
|
249
|
+
parsed = JSON.parse(nudgeContent);
|
|
250
|
+
content = nudgeContent;
|
|
251
|
+
} catch {
|
|
252
|
+
// Give up
|
|
253
|
+
}
|
|
237
254
|
}
|
|
238
255
|
}
|
|
239
256
|
}
|
|
240
257
|
|
|
241
258
|
if (!parsed) {
|
|
242
|
-
// Don't push bad content — handleChat will inject a synthetic error note
|
|
243
|
-
response
|
|
259
|
+
// Don't push bad content — handleChat will inject a synthetic error note.
|
|
260
|
+
// Ensure response is never empty so the delivery layer (e.g. Telegram) can
|
|
261
|
+
// show the user something meaningful rather than its generic fallback message.
|
|
262
|
+
response = content.trim() || 'The model did not produce a response. Please try again.';
|
|
244
263
|
logSummary = 'Model returned non-JSON final response after recovery attempts.';
|
|
245
264
|
status = 'format_error';
|
|
246
265
|
return { iteration, response, logSummary, status, runToolCalls, checkpoint: null, rawResponse: content };
|