@visorcraft/idlehands 1.0.9 → 1.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -13,6 +13,7 @@ Idle Hands is built for people who want an agent that can actually ship work, no
13
13
 
14
14
  - **TUI-first UX** for real daily use (streaming output, slash commands, approvals)
15
15
  - **Runtime orchestration** (hosts/backends/models) for local + remote model stacks
16
+ - **Size-aware runtime probes** so very large GGUF/RPC models get sane startup timeouts by default
16
17
  - **Safety + approvals** with explicit modes (`plan`, `reject`, `default`, `auto-edit`, `yolo`)
17
18
  - **Headless mode** for CI and scripts (`json`, `stream-json`, `--fail-on-error`, `--diff-only`)
18
19
  - **Bot frontends** (Telegram + Discord) with service management
@@ -158,6 +159,24 @@ If you use a dedicated `idlehands` account, install/manage the service while log
158
159
 
159
160
  ---
160
161
 
162
+
163
+ ## Runtime probe defaults (size-aware)
164
+
165
+ When a model does not explicitly set probe timeout and probe interval, Idle Hands derives defaults from estimated model size on the target host.
166
+
167
+ Default tiers used by idlehands select:
168
+
169
+ | Model size (GiB) | probe timeout | probe interval |
170
+ |---:|---:|---:|
171
+ | <= 10 | 120s | 1000ms |
172
+ | <= 40 | 300s | 1200ms |
173
+ | <= 80 | 900s | 2000ms |
174
+ | <= 140 | 3600s | 5000ms |
175
+ | > 140 | 5400s | 5000ms |
176
+
177
+ Per-model override remains available in runtimes.json under models.launch.
178
+ Explicit per-model values always take precedence.
179
+
161
180
  ## Documentation map
162
181
 
163
182
  - [Getting Started](https://visorcraft.github.io/IdleHands/guide/getting-started)
package/dist/agent.js CHANGED
@@ -592,6 +592,14 @@ export function parseToolCallsFromContent(content) {
592
592
  const xmlCalls = parseXmlToolCalls(trimmed);
593
593
  if (xmlCalls?.length)
594
594
  return xmlCalls;
595
+ // Case 5: Lightweight function-tag calls (seen in some Qwen content-mode outputs):
596
+ // <function=tool_name>
597
+ // {...json args...}
598
+ // </function>
599
+ // or single-line <function=tool_name>{...}</function>
600
+ const fnTagCalls = parseFunctionTagToolCalls(trimmed);
601
+ if (fnTagCalls?.length)
602
+ return fnTagCalls;
595
603
  return null;
596
604
  }
597
605
  /**
@@ -1006,8 +1014,51 @@ export async function createSession(opts) {
1006
1014
  sessionMeta += `\n\n[Sub-agents] spawn_task is available (isolated context, sequential queue, default max_iterations=${subMaxIter}).`;
1007
1015
  }
1008
1016
  // Harness-driven suffix: append to first user message (NOT system prompt — §9b KV cache rule)
1017
+ // Check if model needs content-mode tool calls (known incompatible templates)
1018
+ // This runs before harness checks so it works regardless of quirk flags.
1019
+ {
1020
+ const modelName = cfg.model ?? '';
1021
+ const { OpenAIClient: OAIClient } = await import('./client.js');
1022
+ if (!client.contentModeToolCalls && OAIClient.needsContentMode(modelName)) {
1023
+ client.contentModeToolCalls = true;
1024
+ client.recordKnownPatternMatch();
1025
+ if (cfg.verbose) {
1026
+ console.warn(`[info] Model "${modelName}" matched known content-mode pattern — using content-based tool calls`);
1027
+ }
1028
+ }
1029
+ }
1009
1030
  if (harness.quirks.needsExplicitToolCallFormatReminder) {
1010
- sessionMeta += '\n\nIMPORTANT: Use the tool_calls mechanism to invoke tools. Do NOT write JSON tool invocations in your message text.';
1031
+ if (client.contentModeToolCalls) {
1032
+ // In content mode, tell the model to use JSON tool calls in its output
1033
+ sessionMeta += '\n\nYou have access to the following tools. To call a tool, output a JSON block in your response like this:\n```json\n{"name": "tool_name", "arguments": {"param": "value"}}\n```\nAvailable tools:\n';
1034
+ const toolSchemas = getToolsSchema();
1035
+ for (const t of toolSchemas) {
1036
+ const fn = t.function;
1037
+ if (fn) {
1038
+ const params = fn.parameters?.properties
1039
+ ? Object.entries(fn.parameters.properties).map(([k, v]) => `${k}: ${v.type ?? 'any'}`).join(', ')
1040
+ : '';
1041
+ sessionMeta += `- ${fn.name}(${params}): ${fn.description ?? ''}\n`;
1042
+ }
1043
+ }
1044
+ sessionMeta += '\nIMPORTANT: Output tool calls as JSON blocks in your message. Do NOT use the tool_calls API mechanism.\nIf you use XML/function tags (e.g. <function=name>), include a full JSON object of arguments between braces.';
1045
+ }
1046
+ else {
1047
+ sessionMeta += '\n\nIMPORTANT: Use the tool_calls mechanism to invoke tools. Do NOT write JSON tool invocations in your message text.';
1048
+ }
1049
+ // One-time tool-call template smoke test (first ask() call only, skip in content mode)
1050
+ if (!client.contentModeToolCalls && !client.__toolCallSmokeTested) {
1051
+ client.__toolCallSmokeTested = true;
1052
+ try {
1053
+ const smokeErr = await client.smokeTestToolCalls(cfg.model ?? 'default');
1054
+ if (smokeErr) {
1055
+ console.error(`\x1b[33m[warn] Tool-call smoke test failed: ${smokeErr}\x1b[0m`);
1056
+ console.error(`\x1b[33m This model/server may not support tool-call replay correctly.\x1b[0m`);
1057
+ console.error(`\x1b[33m Consider using a different model or updating llama.cpp.\x1b[0m`);
1058
+ }
1059
+ }
1060
+ catch { }
1061
+ }
1011
1062
  }
1012
1063
  if (harness.systemPromptSuffix) {
1013
1064
  sessionMeta += '\n\n' + harness.systemPromptSuffix;
@@ -1357,6 +1408,41 @@ export async function createSession(opts) {
1357
1408
  const clearPlan = () => {
1358
1409
  planSteps = [];
1359
1410
  };
1411
+ // Session-level vault context injection: search vault for entries relevant to
1412
+ // the last user message and inject them into the conversation. Used after any
1413
+ // compaction to restore context the model lost when messages were dropped.
1414
+ let lastVaultInjectionQuery = '';
1415
+ const injectVaultContext = async () => {
1416
+ if (!vault)
1417
+ return;
1418
+ let lastUser = null;
1419
+ for (let j = messages.length - 1; j >= 0; j--) {
1420
+ if (messages[j].role === 'user') {
1421
+ lastUser = messages[j];
1422
+ break;
1423
+ }
1424
+ }
1425
+ const userText = userContentToText((lastUser?.content ?? '')).trim();
1426
+ if (!userText)
1427
+ return;
1428
+ const query = userText.slice(0, 200);
1429
+ if (query === lastVaultInjectionQuery)
1430
+ return;
1431
+ const hits = await vault.search(query, 4);
1432
+ if (!hits.length)
1433
+ return;
1434
+ const lines = hits.map((r) => `${r.updatedAt} ${r.kind} ${r.key ?? r.tool ?? r.id} ${String(r.value ?? r.snippet ?? '').replace(/\s+/g, ' ').slice(0, 180)}`);
1435
+ if (!lines.length)
1436
+ return;
1437
+ lastVaultInjectionQuery = query;
1438
+ const vaultContextHeader = vaultMode === 'passive'
1439
+ ? '[Trifecta Vault (passive)]'
1440
+ : '[Vault context after compaction]';
1441
+ messages.push({
1442
+ role: 'user',
1443
+ content: `${vaultContextHeader} Relevant entries for "${query}":\n${lines.join('\n')}`
1444
+ });
1445
+ };
1360
1446
  const compactHistory = async (opts) => {
1361
1447
  const beforeMessages = messages.length;
1362
1448
  const beforeTokens = estimateTokensFromMessages(messages);
@@ -1401,6 +1487,7 @@ export async function createSession(opts) {
1401
1487
  messages = compacted;
1402
1488
  if (dropped.length) {
1403
1489
  messages.push({ role: 'system', content: `[compacted: ${dropped.length} messages archived to Vault - vault_search to recall]` });
1490
+ await injectVaultContext().catch(() => { });
1404
1491
  }
1405
1492
  }
1406
1493
  return {
@@ -1759,7 +1846,6 @@ export async function createSession(opts) {
1759
1846
  // that happen back-to-back with no other tool calls in between.
1760
1847
  let lastTurnSigs = new Set();
1761
1848
  const consecutiveCounts = new Map();
1762
- let lastPassiveVaultQuery = '';
1763
1849
  let malformedCount = 0;
1764
1850
  let noProgressTurns = 0;
1765
1851
  const NO_PROGRESS_TURN_CAP = 3;
@@ -1772,34 +1858,6 @@ export async function createSession(opts) {
1772
1858
  let lastSuccessfulTestRun = null;
1773
1859
  // One-time nudge to prevent post-success churn after green test runs.
1774
1860
  let finalizeAfterTestsNudgeUsed = false;
1775
- const maybeInjectVaultContext = async () => {
1776
- if (!vault || vaultMode !== 'passive')
1777
- return;
1778
- let lastUser = null;
1779
- for (let j = messages.length - 1; j >= 0; j--) {
1780
- if (messages[j].role === 'user') {
1781
- lastUser = messages[j];
1782
- break;
1783
- }
1784
- }
1785
- const userText = userContentToText((lastUser?.content ?? '')).trim();
1786
- if (!userText)
1787
- return;
1788
- const query = userText.slice(0, 200);
1789
- if (query === lastPassiveVaultQuery)
1790
- return;
1791
- const hits = await vault.search(query, 4);
1792
- if (!hits.length)
1793
- return;
1794
- const lines = hits.map((r) => `${r.updatedAt} ${r.kind} ${r.key ?? r.tool ?? r.id} ${String(r.value ?? r.snippet ?? '').replace(/\s+/g, ' ').slice(0, 180)}`);
1795
- if (!lines.length)
1796
- return;
1797
- lastPassiveVaultQuery = query;
1798
- messages.push({
1799
- role: 'user',
1800
- content: `[Trifecta Vault (passive)] Relevant entries for "${query}":\n${lines.join('\n')}`
1801
- });
1802
- };
1803
1861
  const archiveToolOutputForVault = async (msg) => {
1804
1862
  if (!lens || !vault || msg.role !== 'tool' || typeof msg.content !== 'string')
1805
1863
  return msg;
@@ -1901,8 +1959,9 @@ export async function createSession(opts) {
1901
1959
  }
1902
1960
  }
1903
1961
  messages = compacted;
1904
- if (vaultMode === 'passive' && compactedDropped) {
1905
- await maybeInjectVaultContext().catch(() => { });
1962
+ if (dropped.length) {
1963
+ messages.push({ role: 'system', content: `[auto-compacted: ${dropped.length} old messages dropped to stay within context budget. Do NOT re-read files or re-run commands you have already seen — use vault_search to recall prior results if needed.]` });
1964
+ await injectVaultContext().catch(() => { });
1906
1965
  }
1907
1966
  const ac = makeAbortController();
1908
1967
  inFlight = ac;
@@ -2189,9 +2248,13 @@ export async function createSession(opts) {
2189
2248
  // Update to "now" for next turn.
2190
2249
  mutationVersionBySig.set(sig, mutationVersion);
2191
2250
  if (!hasMutatedSince) {
2192
- // Allow a few more repeats for exec since "run tests" loops are common.
2251
+ const count = sigCounts.get(sig) ?? 0;
2193
2252
  const loopThreshold = harness.quirks.loopsOnToolError ? 3 : 6;
2194
- if ((sigCounts.get(sig) ?? 0) >= loopThreshold) {
2253
+ // At 3x, inject vault context so the model gets the data it needs
2254
+ if (count >= 3 && count < loopThreshold) {
2255
+ await injectVaultContext().catch(() => { });
2256
+ }
2257
+ if (count >= loopThreshold) {
2195
2258
  const args = sig.slice(toolName.length + 1);
2196
2259
  const argsPreview = args.length > 220 ? args.slice(0, 220) + '…' : args;
2197
2260
  throw new Error(`tool ${toolName}: identical call repeated ${loopThreshold}x across turns; breaking loop. ` +
@@ -2212,13 +2275,13 @@ export async function createSession(opts) {
2212
2275
  consecutiveCounts.set(sig, 1);
2213
2276
  }
2214
2277
  const consec = consecutiveCounts.get(sig) ?? 1;
2215
- if (consec >= 4) {
2216
- const args = sig.slice(toolName.length + 1);
2217
- const argsPreview = args.length > 220 ? args.slice(0, 220) + '…' : args;
2218
- messages.push({
2219
- role: 'user',
2220
- content: `[System] You have read the same resource ${consec} consecutive times (${toolName} ${argsPreview}). The content has not changed. Please proceed with your task using the information you already have.`,
2221
- });
2278
+ if (consec >= 3) {
2279
+ await injectVaultContext().catch(() => { });
2280
+ }
2281
+ // Hard-break: after 6 consecutive identical reads, stop the session
2282
+ if (consec >= 6) {
2283
+ throw new Error(`tool ${toolName}: identical read repeated ${consec}x consecutively; breaking loop. ` +
2284
+ `The resource content has not changed between reads.`);
2222
2285
  }
2223
2286
  continue;
2224
2287
  }
@@ -2821,4 +2884,30 @@ async function autoPickModel(client, cached) {
2821
2884
  clearTimeout(timer);
2822
2885
  }
2823
2886
  }
2887
+ function parseFunctionTagToolCalls(content) {
2888
+ const m = content.match(/<function=([\w.-]+)>([\s\S]*?)<\/function>/i);
2889
+ if (!m)
2890
+ return null;
2891
+ const name = m[1];
2892
+ const body = (m[2] ?? '').trim();
2893
+ // If body contains JSON object, use it as arguments; else empty object.
2894
+ let args = '{}';
2895
+ const jsonStart = body.indexOf('{');
2896
+ const jsonEnd = body.lastIndexOf('}');
2897
+ if (jsonStart !== -1 && jsonEnd > jsonStart) {
2898
+ const sub = body.slice(jsonStart, jsonEnd + 1);
2899
+ try {
2900
+ JSON.parse(sub);
2901
+ args = sub;
2902
+ }
2903
+ catch {
2904
+ // keep {}
2905
+ }
2906
+ }
2907
+ return [{
2908
+ id: 'call_0',
2909
+ type: 'function',
2910
+ function: { name, arguments: args }
2911
+ }];
2912
+ }
2824
2913
  //# sourceMappingURL=agent.js.map