miii-agent 0.1.18 → 0.1.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/cli.js +256 -12
  2. package/package.json +1 -1
package/dist/cli.js CHANGED
@@ -158,6 +158,30 @@ async function modelContext(entry, model) {
158
158
  throw err;
159
159
  }
160
160
  }
161
+ async function paramCountB(entry, model) {
162
+ try {
163
+ const info = await makeClient(entry).show({ model });
164
+ const details = info.details;
165
+ if (details?.parameter_size) {
166
+ const m = details.parameter_size.match(/([\d.]+)\s*([BM])/i);
167
+ if (m) {
168
+ const n = parseFloat(m[1]);
169
+ if (!isNaN(n)) return m[2].toUpperCase() === "M" ? n / 1e3 : n;
170
+ }
171
+ }
172
+ const modelInfo = info.model_info;
173
+ if (modelInfo) {
174
+ const key = Object.keys(modelInfo).find((k) => k.endsWith("parameter_count"));
175
+ if (key) {
176
+ const val = Number(modelInfo[key]);
177
+ if (!isNaN(val) && val > 0) return val / 1e9;
178
+ }
179
+ }
180
+ return null;
181
+ } catch {
182
+ return null;
183
+ }
184
+ }
161
185
  async function* chat(entry, model, messages, tools, opts) {
162
186
  if (opts?.signal?.aborted) return;
163
187
  const signal = opts?.signal;
@@ -177,15 +201,19 @@ async function* chat(entry, model, messages, tools, opts) {
177
201
  num_ctx: opts?.num_ctx ?? 8192
178
202
  };
179
203
  if (numPredict !== void 0 && numPredict > 0) options.num_predict = numPredict;
180
- stream = await client.chat({
204
+ const req = {
181
205
  model,
182
206
  messages,
183
- tools,
184
207
  stream: true,
185
208
  think: true,
186
209
  keep_alive: opts?.keep_alive ?? "10m",
187
210
  options
188
- });
211
+ };
212
+ if (opts?.format) req.format = opts.format;
213
+ else if (tools) req.tools = tools;
214
+ stream = await client.chat(
215
+ req
216
+ );
189
217
  } catch (err) {
190
218
  if (signal?.aborted) return;
191
219
  if (isConnectionError(err)) {
@@ -452,6 +480,9 @@ var init_openai = __esm({
452
480
  function active() {
453
481
  return resolveProvider();
454
482
  }
483
+ function providerName() {
484
+ return active().name;
485
+ }
455
486
  function isAvailable3() {
456
487
  const { entry } = active();
457
488
  return entry.type === "ollama" ? isAvailable(entry) : isAvailable2(entry);
@@ -468,6 +499,16 @@ async function modelContext3(model) {
468
499
  const { entry } = active();
469
500
  return entry.type === "ollama" ? modelContext(entry, model) : modelContext2(entry, model);
470
501
  }
502
+ async function modelParamCountB(model) {
503
+ const { entry } = active();
504
+ if (entry.type !== "ollama") return null;
505
+ const key = `${entry.baseUrl}:${model}`;
506
+ const cached = paramCountCache.get(key);
507
+ if (cached !== void 0) return cached;
508
+ const params = await paramCountB(entry, model);
509
+ paramCountCache.set(key, params);
510
+ return params;
511
+ }
471
512
  async function* chat3(model, messages, tools, opts) {
472
513
  const { entry } = active();
473
514
  if (entry.type === "ollama") {
@@ -476,12 +517,70 @@ async function* chat3(model, messages, tools, opts) {
476
517
  yield* chat2(entry, model, messages, tools, opts);
477
518
  }
478
519
  }
520
+ var paramCountCache;
479
521
  var init_client = __esm({
480
522
  "src/llm/client.ts"() {
481
523
  "use strict";
482
524
  init_config();
483
525
  init_ollama();
484
526
  init_openai();
527
+ paramCountCache = /* @__PURE__ */ new Map();
528
+ }
529
+ });
530
+
531
+ // src/llm/grammar.ts
532
+ function argProperties(props) {
533
+ const out = {};
534
+ for (const [key, spec] of Object.entries(props)) {
535
+ const node = { type: spec.type };
536
+ if (spec.enum && spec.enum.length) node.enum = spec.enum;
537
+ out[key] = node;
538
+ }
539
+ return out;
540
+ }
541
+ function toolBranch(tool) {
542
+ const args2 = {
543
+ type: "object",
544
+ additionalProperties: false,
545
+ properties: argProperties(tool.input_schema.properties)
546
+ };
547
+ if (tool.input_schema.required && tool.input_schema.required.length) {
548
+ args2.required = tool.input_schema.required;
549
+ }
550
+ return {
551
+ type: "object",
552
+ additionalProperties: false,
553
+ required: ["name", "arguments"],
554
+ properties: {
555
+ name: { const: tool.name },
556
+ arguments: args2
557
+ }
558
+ };
559
+ }
560
+ function respondBranch() {
561
+ return {
562
+ type: "object",
563
+ additionalProperties: false,
564
+ required: ["name", "arguments"],
565
+ properties: {
566
+ name: { const: RESPOND_ACTION },
567
+ arguments: {
568
+ type: "object",
569
+ additionalProperties: false,
570
+ required: ["message"],
571
+ properties: { message: { type: "string" } }
572
+ }
573
+ }
574
+ };
575
+ }
576
+ function buildToolGrammar(tools) {
577
+ return { oneOf: [...tools.map(toolBranch), respondBranch()] };
578
+ }
579
+ var RESPOND_ACTION;
580
+ var init_grammar = __esm({
581
+ "src/llm/grammar.ts"() {
582
+ "use strict";
583
+ RESPOND_ACTION = "respond";
485
584
  }
486
585
  });
487
586
 
@@ -1149,8 +1248,15 @@ var init_context = __esm({
1149
1248
  });
1150
1249
 
1151
1250
  // src/prompt/system.ts
1152
- function buildSystemPrompt(tools, cwd, project) {
1251
+ function buildSystemPrompt(tools, cwd, project, grammarMode = false) {
1153
1252
  const toolLines = tools.map((t) => `- ${t.name}: ${t.description}`).join("\n");
1253
+ const actionProtocol = grammarMode ? `
1254
+ # Action protocol (strict)
1255
+ Every reply is exactly ONE JSON action object, nothing else \u2014 no prose outside it, no markdown, no fences. Decoding is grammar-constrained, so malformed output is impossible; your only job is to choose the right action.
1256
+ To use a tool: {"name": "<tool_name>", "arguments": { ...that tool's args }}
1257
+ To give your final answer to the user: {"name": "respond", "arguments": {"message": "<your full answer here>"}}
1258
+ Call tools until the GOAL is met, then emit a single "respond" action with the complete answer. The "respond" action is the ONLY way to end the turn and talk to the user \u2014 never put your final answer in a tool call.
1259
+ ` : "";
1154
1260
  const projectSection = project && project.content.trim() ? `
1155
1261
  # ${CONTEXT_FILENAME} \u2014 project instructions (authoritative, read first)
1156
1262
  The user maintains ${CONTEXT_FILENAME} at ${project.source} to steer how you work in this project: conventions, commands, architecture, do's and don'ts. Treat it as direct instruction from the user, higher priority than your defaults. When it conflicts with a default rule below, ${CONTEXT_FILENAME} wins (except permissions and safety, which you never override).${project.truncated ? `
@@ -1175,6 +1281,17 @@ If GAPS is non-empty, ask the minimum questions needed to fill them \u2014 one m
1175
1281
 
1176
1282
  Re-read GOAL before every tool call. If a tool call does not move toward GOAL, skip it.
1177
1283
 
1284
+ # Plan summary before acting (conditional)
1285
+ Write a brief plan BEFORE the first tool call ONLY when the work is non-trivial:
1286
+ - Multi-step, OR touches multiple files, OR is destructive/hard to reverse, OR mixes investigation + change.
1287
+ SKIP the plan for trivial work \u2014 a single read, one small edit, a quick search, a direct question. Just act.
1288
+ When you do write one:
1289
+ - One or two plain-text sentences naming what you will do and in what order.
1290
+ - State the intent (the bug/feature/fix and the steps), not a tool-by-tool narration.
1291
+ - Keep it short \u2014 the user reads this to follow along, not a spec.
1292
+ Then begin immediately with the first tool call. Do not wait for approval unless GAPS was non-empty or the work is destructive.
1293
+ This summary is the ONE allowed preamble. It does not override the Tool calls rule below: after this plan, emit tool calls directly with no further narration between them.
1294
+
1178
1295
  # Attention: re-attend to goal at each step
1179
1296
  After each tool result, answer silently: "Does this result move me toward GOAL?"
1180
1297
  YES \u2192 continue
@@ -1224,7 +1341,7 @@ Ask in a numbered list. One round of questions per turn. Then wait.
1224
1341
  # Tools
1225
1342
  You have access to the following tools. Call them via the function-calling interface.
1226
1343
  ${toolLines}
1227
-
1344
+ ${actionProtocol}
1228
1345
  # Loop semantics
1229
1346
  - When you need to act on the filesystem or run a command, emit a tool call.
1230
1347
  - After each tool result, decide: more tool calls, or a final plain-text answer.
@@ -1289,6 +1406,17 @@ function subjectFor(toolName, input) {
1289
1406
  if (typeof obj.path === "string") return obj.path;
1290
1407
  return "";
1291
1408
  }
1409
+ function generalizeCommand(command) {
1410
+ const tokens = command.trim().split(/\s+/);
1411
+ if (tokens.length === 0 || tokens[0] === "") return command;
1412
+ const prog = tokens[0];
1413
+ const prefixLen = WRAPPER_PROGRAMS.has(prog) && tokens.length > 1 ? 2 : 1;
1414
+ const prefix = tokens.slice(0, prefixLen).join(" ");
1415
+ return `${prefix} *`;
1416
+ }
1417
+ function patternToPersist(toolName, subject) {
1418
+ return toolName === "run_bash" ? generalizeCommand(subject) : subject;
1419
+ }
1292
1420
  function globToRegExp(glob2) {
1293
1421
  const escaped = glob2.replace(/[.+^${}()|[\]\\]/g, "\\$&");
1294
1422
  const pattern = escaped.replace(/\*/g, ".*").replace(/\?/g, ".");
@@ -1309,15 +1437,28 @@ async function check(toolName, input, ctx) {
1309
1437
  if (rules.some((r) => matches(r, toolName, subject))) return "allow";
1310
1438
  const answer = await ctx.ask(toolName, input);
1311
1439
  if (answer === "no") return "deny";
1312
- if (answer === "always") addRule(toolName, subject);
1440
+ if (answer === "always") addRule(toolName, patternToPersist(toolName, subject));
1313
1441
  return "allow";
1314
1442
  }
1315
- var RULES_DIR, RULES_PATH, ALWAYS_ALLOW;
1443
+ var RULES_DIR, RULES_PATH, WRAPPER_PROGRAMS, ALWAYS_ALLOW;
1316
1444
  var init_policy = __esm({
1317
1445
  "src/permissions/policy.ts"() {
1318
1446
  "use strict";
1319
1447
  RULES_DIR = join7(homedir5(), ".miii");
1320
1448
  RULES_PATH = join7(RULES_DIR, "permissions.json");
1449
+ WRAPPER_PROGRAMS = /* @__PURE__ */ new Set([
1450
+ "npm",
1451
+ "npx",
1452
+ "pnpm",
1453
+ "yarn",
1454
+ "brew",
1455
+ "pip",
1456
+ "pip3",
1457
+ "cargo",
1458
+ "docker",
1459
+ "kubectl",
1460
+ "go"
1461
+ ]);
1321
1462
  ALWAYS_ALLOW = /* @__PURE__ */ new Set(["read_file", "grep", "glob"]);
1322
1463
  }
1323
1464
  });
@@ -1430,6 +1571,75 @@ function extractFirstJsonObject(s) {
1430
1571
  }
1431
1572
  return null;
1432
1573
  }
1574
+ function parseGrammarAction(content, knownToolNames) {
1575
+ if (!content) return null;
1576
+ let raw = content.trim();
1577
+ if (!raw.startsWith("{")) {
1578
+ const found = extractFirstJsonObject(raw);
1579
+ if (!found) return null;
1580
+ raw = found.json;
1581
+ }
1582
+ let obj;
1583
+ try {
1584
+ obj = JSON.parse(raw);
1585
+ } catch {
1586
+ const found = extractFirstJsonObject(raw);
1587
+ if (!found) return null;
1588
+ try {
1589
+ obj = JSON.parse(found.json);
1590
+ } catch {
1591
+ return null;
1592
+ }
1593
+ }
1594
+ const name = typeof obj.name === "string" ? obj.name : void 0;
1595
+ const args2 = obj.arguments ?? {};
1596
+ if (!name) return null;
1597
+ if (name === "respond") {
1598
+ const message = typeof args2.message === "string" ? args2.message : "";
1599
+ return { kind: "respond", message };
1600
+ }
1601
+ if (!knownToolNames.includes(name)) return null;
1602
+ return { kind: "tool", name, arguments: args2 };
1603
+ }
1604
+ function streamRespondMessage(text) {
1605
+ if (!/"name"\s*:\s*"respond"/.test(text)) return null;
1606
+ const m = text.match(/"message"\s*:\s*"/);
1607
+ if (!m || m.index == null) return null;
1608
+ const start = m.index + m[0].length;
1609
+ const escapes = {
1610
+ n: "\n",
1611
+ t: " ",
1612
+ r: "\r",
1613
+ b: "\b",
1614
+ f: "\f",
1615
+ '"': '"',
1616
+ "\\": "\\",
1617
+ "/": "/"
1618
+ };
1619
+ let out = "";
1620
+ let i = start;
1621
+ while (i < text.length) {
1622
+ const ch = text[i];
1623
+ if (ch === '"') return { message: out, complete: true };
1624
+ if (ch === "\\") {
1625
+ const nx = text[i + 1];
1626
+ if (nx === void 0) break;
1627
+ if (nx === "u") {
1628
+ const hex = text.slice(i + 2, i + 6);
1629
+ if (hex.length < 4) break;
1630
+ out += String.fromCharCode(parseInt(hex, 16));
1631
+ i += 6;
1632
+ continue;
1633
+ }
1634
+ out += escapes[nx] ?? nx;
1635
+ i += 2;
1636
+ continue;
1637
+ }
1638
+ out += ch;
1639
+ i++;
1640
+ }
1641
+ return { message: out, complete: false };
1642
+ }
1433
1643
  function blocksFromOllama(text, tool_calls, knownToolNames = []) {
1434
1644
  const blocks = [];
1435
1645
  let finalText = text;
@@ -1487,8 +1697,15 @@ function markSeen(name, input, seen) {
1487
1697
  async function* runAgent(opts) {
1488
1698
  const { model, cwd, permissions, hooks, signal, num_ctx } = opts;
1489
1699
  const startTime = Date.now();
1490
- const system = buildSystemPrompt(TOOLS, cwd, loadProjectContext(cwd));
1700
+ let useGrammar = false;
1701
+ if (providerName() === "ollama") {
1702
+ const params = await modelParamCountB(model);
1703
+ useGrammar = params == null || params <= GRAMMAR_MAX_PARAMS_B;
1704
+ }
1705
+ const system = buildSystemPrompt(TOOLS, cwd, loadProjectContext(cwd), useGrammar);
1706
+ const grammar = useGrammar ? buildToolGrammar(TOOLS) : void 0;
1491
1707
  const ollamaTools = toOllamaTools(TOOLS);
1708
+ const toolNames = TOOLS.map((t) => t.name);
1492
1709
  const effort = EFFORT_OPTIONS[loadConfig().effort ?? "medium"];
1493
1710
  const history = [
1494
1711
  ...opts.history,
@@ -1502,6 +1719,8 @@ async function* runAgent(opts) {
1502
1719
  for (let turn = 0; turn < MAX_TURNS; turn++) {
1503
1720
  let text = "";
1504
1721
  let tool_calls;
1722
+ let respondEmitted = 0;
1723
+ let streamedRespond = false;
1505
1724
  let lastTail = "";
1506
1725
  let tailRepeats = 0;
1507
1726
  let streamLooped = false;
@@ -1509,11 +1728,22 @@ async function* runAgent(opts) {
1509
1728
  const composedSignal = signal ? AbortSignal.any ? AbortSignal.any([signal, ac.signal]) : ac.signal : ac.signal;
1510
1729
  if (signal) signal.addEventListener("abort", () => ac.abort(), { once: true });
1511
1730
  try {
1512
- for await (const chunk of chat3(model, toOllamaMessages(history, system), ollamaTools, { signal: composedSignal, num_ctx, num_predict: effort.num_predict, temperature: effort.temperature })) {
1731
+ for await (const chunk of chat3(model, toOllamaMessages(history, system), useGrammar ? void 0 : ollamaTools, { signal: composedSignal, num_ctx, num_predict: effort.num_predict, temperature: effort.temperature, format: grammar })) {
1513
1732
  if (signal?.aborted) break;
1514
1733
  if (chunk.content) {
1515
1734
  text += chunk.content;
1516
- yield { type: "text-delta", text: chunk.content };
1735
+ if (!useGrammar) {
1736
+ yield { type: "text-delta", text: chunk.content };
1737
+ } else {
1738
+ const r = streamRespondMessage(text);
1739
+ if (r) {
1740
+ streamedRespond = true;
1741
+ if (r.message.length > respondEmitted) {
1742
+ yield { type: "text-delta", text: r.message.slice(respondEmitted) };
1743
+ respondEmitted = r.message.length;
1744
+ }
1745
+ }
1746
+ }
1517
1747
  if (text.length >= REPEAT_TAIL) {
1518
1748
  const tail = text.slice(-REPEAT_TAIL);
1519
1749
  if (tail === lastTail) {
@@ -1561,7 +1791,19 @@ async function* runAgent(opts) {
1561
1791
  };
1562
1792
  return history;
1563
1793
  }
1564
- const blocks = blocksFromOllama(text, tool_calls, TOOLS.map((t) => t.name));
1794
+ let blocks;
1795
+ if (useGrammar) {
1796
+ const action = parseGrammarAction(text, toolNames);
1797
+ if (action?.kind === "tool") {
1798
+ blocks = [{ type: "tool_use", id: mintToolUseId(), name: action.name, input: action.arguments }];
1799
+ } else {
1800
+ const message = action?.kind === "respond" ? action.message : text.trim();
1801
+ if (message && !streamedRespond) yield { type: "text-delta", text: message };
1802
+ blocks = message ? [{ type: "text", text: message }] : [];
1803
+ }
1804
+ } else {
1805
+ blocks = blocksFromOllama(text, tool_calls, toolNames);
1806
+ }
1565
1807
  const tool_uses = blocks.filter((b) => b.type === "tool_use");
1566
1808
  history.push({ role: "assistant", content: blocks });
1567
1809
  if (tool_uses.length === 0) {
@@ -1670,11 +1912,12 @@ async function* runAgent(opts) {
1670
1912
  yield { type: "done", prompt_tokens: promptTokens, eval_tokens: evalTokens };
1671
1913
  return history;
1672
1914
  }
1673
- var MAX_TURNS, REPEAT_TAIL, REPEAT_KILL;
1915
+ var MAX_TURNS, REPEAT_TAIL, REPEAT_KILL, GRAMMAR_MAX_PARAMS_B;
1674
1916
  var init_loop = __esm({
1675
1917
  "src/agent/loop.ts"() {
1676
1918
  "use strict";
1677
1919
  init_client();
1920
+ init_grammar();
1678
1921
  init_paths();
1679
1922
  init_registry();
1680
1923
  init_validate();
@@ -1686,6 +1929,7 @@ var init_loop = __esm({
1686
1929
  MAX_TURNS = 25;
1687
1930
  REPEAT_TAIL = 120;
1688
1931
  REPEAT_KILL = 4;
1932
+ GRAMMAR_MAX_PARAMS_B = 14;
1689
1933
  }
1690
1934
  });
1691
1935
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "miii-agent",
3
- "version": "0.1.18",
3
+ "version": "0.1.19",
4
4
  "description": "Terminal AI coding agent powered by Ollama",
5
5
  "type": "module",
6
6
  "bin": {