npm - miii-agent - Versions diffs - 0.1.18 → 0.1.19 - Mend

miii-agent 0.1.18 → 0.1.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/dist/cli.js +256 -12
package/package.json +1 -1

package/dist/cli.js CHANGED Viewed

@@ -158,6 +158,30 @@ async function modelContext(entry, model) {
     throw err;
   }
 }
+async function paramCountB(entry, model) {
+  try {
+    const info = await makeClient(entry).show({ model });
+    const details = info.details;
+    if (details?.parameter_size) {
+      const m = details.parameter_size.match(/([\d.]+)\s*([BM])/i);
+      if (m) {
+        const n = parseFloat(m[1]);
+        if (!isNaN(n)) return m[2].toUpperCase() === "M" ? n / 1e3 : n;
+      }
+    }
+    const modelInfo = info.model_info;
+    if (modelInfo) {
+      const key = Object.keys(modelInfo).find((k) => k.endsWith("parameter_count"));
+      if (key) {
+        const val = Number(modelInfo[key]);
+        if (!isNaN(val) && val > 0) return val / 1e9;
+      }
+    }
+    return null;
+  } catch {
+    return null;
+  }
+}
 async function* chat(entry, model, messages, tools, opts) {
   if (opts?.signal?.aborted) return;
   const signal = opts?.signal;
@@ -177,15 +201,19 @@ async function* chat(entry, model, messages, tools, opts) {
       num_ctx: opts?.num_ctx ?? 8192
     };
     if (numPredict !== void 0 && numPredict > 0) options.num_predict = numPredict;
-    stream = await client.chat({
+    const req = {
       model,
       messages,
-      tools,
       stream: true,
       think: true,
       keep_alive: opts?.keep_alive ?? "10m",
       options
-    });
+    };
+    if (opts?.format) req.format = opts.format;
+    else if (tools) req.tools = tools;
+    stream = await client.chat(
+      req
+    );
   } catch (err) {
     if (signal?.aborted) return;
     if (isConnectionError(err)) {
@@ -452,6 +480,9 @@ var init_openai = __esm({
 function active() {
   return resolveProvider();
 }
+function providerName() {
+  return active().name;
+}
 function isAvailable3() {
   const { entry } = active();
   return entry.type === "ollama" ? isAvailable(entry) : isAvailable2(entry);
@@ -468,6 +499,16 @@ async function modelContext3(model) {
   const { entry } = active();
   return entry.type === "ollama" ? modelContext(entry, model) : modelContext2(entry, model);
 }
+async function modelParamCountB(model) {
+  const { entry } = active();
+  if (entry.type !== "ollama") return null;
+  const key = `${entry.baseUrl}:${model}`;
+  const cached = paramCountCache.get(key);
+  if (cached !== void 0) return cached;
+  const params = await paramCountB(entry, model);
+  paramCountCache.set(key, params);
+  return params;
+}
 async function* chat3(model, messages, tools, opts) {
   const { entry } = active();
   if (entry.type === "ollama") {
@@ -476,12 +517,70 @@ async function* chat3(model, messages, tools, opts) {
     yield* chat2(entry, model, messages, tools, opts);
   }
 }
+var paramCountCache;
 var init_client = __esm({
   "src/llm/client.ts"() {
     "use strict";
     init_config();
     init_ollama();
     init_openai();
+    paramCountCache = /* @__PURE__ */ new Map();
+  }
+});
+// src/llm/grammar.ts
+function argProperties(props) {
+  const out = {};
+  for (const [key, spec] of Object.entries(props)) {
+    const node = { type: spec.type };
+    if (spec.enum && spec.enum.length) node.enum = spec.enum;
+    out[key] = node;
+  }
+  return out;
+}
+function toolBranch(tool) {
+  const args2 = {
+    type: "object",
+    additionalProperties: false,
+    properties: argProperties(tool.input_schema.properties)
+  };
+  if (tool.input_schema.required && tool.input_schema.required.length) {
+    args2.required = tool.input_schema.required;
+  }
+  return {
+    type: "object",
+    additionalProperties: false,
+    required: ["name", "arguments"],
+    properties: {
+      name: { const: tool.name },
+      arguments: args2
+    }
+  };
+}
+function respondBranch() {
+  return {
+    type: "object",
+    additionalProperties: false,
+    required: ["name", "arguments"],
+    properties: {
+      name: { const: RESPOND_ACTION },
+      arguments: {
+        type: "object",
+        additionalProperties: false,
+        required: ["message"],
+        properties: { message: { type: "string" } }
+      }
+    }
+  };
+}
+function buildToolGrammar(tools) {
+  return { oneOf: [...tools.map(toolBranch), respondBranch()] };
+}
+var RESPOND_ACTION;
+var init_grammar = __esm({
+  "src/llm/grammar.ts"() {
+    "use strict";
+    RESPOND_ACTION = "respond";
   }
 });
@@ -1149,8 +1248,15 @@ var init_context = __esm({
 });
 // src/prompt/system.ts
-function buildSystemPrompt(tools, cwd, project) {
+function buildSystemPrompt(tools, cwd, project, grammarMode = false) {
   const toolLines = tools.map((t) => `- ${t.name}: ${t.description}`).join("\n");
+  const actionProtocol = grammarMode ? `
+# Action protocol (strict)
+Every reply is exactly ONE JSON action object, nothing else \u2014 no prose outside it, no markdown, no fences. Decoding is grammar-constrained, so malformed output is impossible; your only job is to choose the right action.
+  To use a tool: {"name": "<tool_name>", "arguments": { ...that tool's args }}
+  To give your final answer to the user: {"name": "respond", "arguments": {"message": "<your full answer here>"}}
+Call tools until the GOAL is met, then emit a single "respond" action with the complete answer. The "respond" action is the ONLY way to end the turn and talk to the user \u2014 never put your final answer in a tool call.
+` : "";
   const projectSection = project && project.content.trim() ? `
 # ${CONTEXT_FILENAME} \u2014 project instructions (authoritative, read first)
 The user maintains ${CONTEXT_FILENAME} at ${project.source} to steer how you work in this project: conventions, commands, architecture, do's and don'ts. Treat it as direct instruction from the user, higher priority than your defaults. When it conflicts with a default rule below, ${CONTEXT_FILENAME} wins (except permissions and safety, which you never override).${project.truncated ? `
@@ -1175,6 +1281,17 @@ If GAPS is non-empty, ask the minimum questions needed to fill them \u2014 one m
 Re-read GOAL before every tool call. If a tool call does not move toward GOAL, skip it.
+# Plan summary before acting (conditional)
+Write a brief plan BEFORE the first tool call ONLY when the work is non-trivial:
+  - Multi-step, OR touches multiple files, OR is destructive/hard to reverse, OR mixes investigation + change.
+SKIP the plan for trivial work \u2014 a single read, one small edit, a quick search, a direct question. Just act.
+When you do write one:
+  - One or two plain-text sentences naming what you will do and in what order.
+  - State the intent (the bug/feature/fix and the steps), not a tool-by-tool narration.
+  - Keep it short \u2014 the user reads this to follow along, not a spec.
+Then begin immediately with the first tool call. Do not wait for approval unless GAPS was non-empty or the work is destructive.
+This summary is the ONE allowed preamble. It does not override the Tool calls rule below: after this plan, emit tool calls directly with no further narration between them.
 # Attention: re-attend to goal at each step
 After each tool result, answer silently: "Does this result move me toward GOAL?"
   YES \u2192 continue
@@ -1224,7 +1341,7 @@ Ask in a numbered list. One round of questions per turn. Then wait.
 # Tools
 You have access to the following tools. Call them via the function-calling interface.
 ${toolLines}
+${actionProtocol}
 # Loop semantics
 - When you need to act on the filesystem or run a command, emit a tool call.
 - After each tool result, decide: more tool calls, or a final plain-text answer.
@@ -1289,6 +1406,17 @@ function subjectFor(toolName, input) {
   if (typeof obj.path === "string") return obj.path;
   return "";
 }
+function generalizeCommand(command) {
+  const tokens = command.trim().split(/\s+/);
+  if (tokens.length === 0 || tokens[0] === "") return command;
+  const prog = tokens[0];
+  const prefixLen = WRAPPER_PROGRAMS.has(prog) && tokens.length > 1 ? 2 : 1;
+  const prefix = tokens.slice(0, prefixLen).join(" ");
+  return `${prefix} *`;
+}
+function patternToPersist(toolName, subject) {
+  return toolName === "run_bash" ? generalizeCommand(subject) : subject;
+}
 function globToRegExp(glob2) {
   const escaped = glob2.replace(/[.+^${}()|[\]\\]/g, "\\$&");
   const pattern = escaped.replace(/\*/g, ".*").replace(/\?/g, ".");
@@ -1309,15 +1437,28 @@ async function check(toolName, input, ctx) {
   if (rules.some((r) => matches(r, toolName, subject))) return "allow";
   const answer = await ctx.ask(toolName, input);
   if (answer === "no") return "deny";
-  if (answer === "always") addRule(toolName, subject);
+  if (answer === "always") addRule(toolName, patternToPersist(toolName, subject));
   return "allow";
 }
-var RULES_DIR, RULES_PATH, ALWAYS_ALLOW;
+var RULES_DIR, RULES_PATH, WRAPPER_PROGRAMS, ALWAYS_ALLOW;
 var init_policy = __esm({
   "src/permissions/policy.ts"() {
     "use strict";
     RULES_DIR = join7(homedir5(), ".miii");
     RULES_PATH = join7(RULES_DIR, "permissions.json");
+    WRAPPER_PROGRAMS = /* @__PURE__ */ new Set([
+      "npm",
+      "npx",
+      "pnpm",
+      "yarn",
+      "brew",
+      "pip",
+      "pip3",
+      "cargo",
+      "docker",
+      "kubectl",
+      "go"
+    ]);
     ALWAYS_ALLOW = /* @__PURE__ */ new Set(["read_file", "grep", "glob"]);
   }
 });
@@ -1430,6 +1571,75 @@ function extractFirstJsonObject(s) {
   }
   return null;
 }
+function parseGrammarAction(content, knownToolNames) {
+  if (!content) return null;
+  let raw = content.trim();
+  if (!raw.startsWith("{")) {
+    const found = extractFirstJsonObject(raw);
+    if (!found) return null;
+    raw = found.json;
+  }
+  let obj;
+  try {
+    obj = JSON.parse(raw);
+  } catch {
+    const found = extractFirstJsonObject(raw);
+    if (!found) return null;
+    try {
+      obj = JSON.parse(found.json);
+    } catch {
+      return null;
+    }
+  }
+  const name = typeof obj.name === "string" ? obj.name : void 0;
+  const args2 = obj.arguments ?? {};
+  if (!name) return null;
+  if (name === "respond") {
+    const message = typeof args2.message === "string" ? args2.message : "";
+    return { kind: "respond", message };
+  }
+  if (!knownToolNames.includes(name)) return null;
+  return { kind: "tool", name, arguments: args2 };
+}
+function streamRespondMessage(text) {
+  if (!/"name"\s*:\s*"respond"/.test(text)) return null;
+  const m = text.match(/"message"\s*:\s*"/);
+  if (!m || m.index == null) return null;
+  const start = m.index + m[0].length;
+  const escapes = {
+    n: "\n",
+    t: "	",
+    r: "\r",
+    b: "\b",
+    f: "\f",
+    '"': '"',
+    "\\": "\\",
+    "/": "/"
+  };
+  let out = "";
+  let i = start;
+  while (i < text.length) {
+    const ch = text[i];
+    if (ch === '"') return { message: out, complete: true };
+    if (ch === "\\") {
+      const nx = text[i + 1];
+      if (nx === void 0) break;
+      if (nx === "u") {
+        const hex = text.slice(i + 2, i + 6);
+        if (hex.length < 4) break;
+        out += String.fromCharCode(parseInt(hex, 16));
+        i += 6;
+        continue;
+      }
+      out += escapes[nx] ?? nx;
+      i += 2;
+      continue;
+    }
+    out += ch;
+    i++;
+  }
+  return { message: out, complete: false };
+}
 function blocksFromOllama(text, tool_calls, knownToolNames = []) {
   const blocks = [];
   let finalText = text;
@@ -1487,8 +1697,15 @@ function markSeen(name, input, seen) {
 async function* runAgent(opts) {
   const { model, cwd, permissions, hooks, signal, num_ctx } = opts;
   const startTime = Date.now();
-  const system = buildSystemPrompt(TOOLS, cwd, loadProjectContext(cwd));
+  let useGrammar = false;
+  if (providerName() === "ollama") {
+    const params = await modelParamCountB(model);
+    useGrammar = params == null || params <= GRAMMAR_MAX_PARAMS_B;
+  }
+  const system = buildSystemPrompt(TOOLS, cwd, loadProjectContext(cwd), useGrammar);
+  const grammar = useGrammar ? buildToolGrammar(TOOLS) : void 0;
   const ollamaTools = toOllamaTools(TOOLS);
+  const toolNames = TOOLS.map((t) => t.name);
   const effort = EFFORT_OPTIONS[loadConfig().effort ?? "medium"];
   const history = [
     ...opts.history,
@@ -1502,6 +1719,8 @@ async function* runAgent(opts) {
   for (let turn = 0; turn < MAX_TURNS; turn++) {
     let text = "";
     let tool_calls;
+    let respondEmitted = 0;
+    let streamedRespond = false;
     let lastTail = "";
     let tailRepeats = 0;
     let streamLooped = false;
@@ -1509,11 +1728,22 @@ async function* runAgent(opts) {
     const composedSignal = signal ? AbortSignal.any ? AbortSignal.any([signal, ac.signal]) : ac.signal : ac.signal;
     if (signal) signal.addEventListener("abort", () => ac.abort(), { once: true });
     try {
-      for await (const chunk of chat3(model, toOllamaMessages(history, system), ollamaTools, { signal: composedSignal, num_ctx, num_predict: effort.num_predict, temperature: effort.temperature })) {
+      for await (const chunk of chat3(model, toOllamaMessages(history, system), useGrammar ? void 0 : ollamaTools, { signal: composedSignal, num_ctx, num_predict: effort.num_predict, temperature: effort.temperature, format: grammar })) {
         if (signal?.aborted) break;
         if (chunk.content) {
           text += chunk.content;
-          yield { type: "text-delta", text: chunk.content };
+          if (!useGrammar) {
+            yield { type: "text-delta", text: chunk.content };
+          } else {
+            const r = streamRespondMessage(text);
+            if (r) {
+              streamedRespond = true;
+              if (r.message.length > respondEmitted) {
+                yield { type: "text-delta", text: r.message.slice(respondEmitted) };
+                respondEmitted = r.message.length;
+              }
+            }
+          }
           if (text.length >= REPEAT_TAIL) {
             const tail = text.slice(-REPEAT_TAIL);
             if (tail === lastTail) {
@@ -1561,7 +1791,19 @@ async function* runAgent(opts) {
       };
       return history;
     }
-    const blocks = blocksFromOllama(text, tool_calls, TOOLS.map((t) => t.name));
+    let blocks;
+    if (useGrammar) {
+      const action = parseGrammarAction(text, toolNames);
+      if (action?.kind === "tool") {
+        blocks = [{ type: "tool_use", id: mintToolUseId(), name: action.name, input: action.arguments }];
+      } else {
+        const message = action?.kind === "respond" ? action.message : text.trim();
+        if (message && !streamedRespond) yield { type: "text-delta", text: message };
+        blocks = message ? [{ type: "text", text: message }] : [];
+      }
+    } else {
+      blocks = blocksFromOllama(text, tool_calls, toolNames);
+    }
     const tool_uses = blocks.filter((b) => b.type === "tool_use");
     history.push({ role: "assistant", content: blocks });
     if (tool_uses.length === 0) {
@@ -1670,11 +1912,12 @@ async function* runAgent(opts) {
   yield { type: "done", prompt_tokens: promptTokens, eval_tokens: evalTokens };
   return history;
 }
-var MAX_TURNS, REPEAT_TAIL, REPEAT_KILL;
+var MAX_TURNS, REPEAT_TAIL, REPEAT_KILL, GRAMMAR_MAX_PARAMS_B;
 var init_loop = __esm({
   "src/agent/loop.ts"() {
     "use strict";
     init_client();
+    init_grammar();
     init_paths();
     init_registry();
     init_validate();
@@ -1686,6 +1929,7 @@ var init_loop = __esm({
     MAX_TURNS = 25;
     REPEAT_TAIL = 120;
     REPEAT_KILL = 4;
+    GRAMMAR_MAX_PARAMS_B = 14;
   }
 });

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "miii-agent",
-  "version": "0.1.18",
+  "version": "0.1.19",
   "description": "Terminal AI coding agent powered by Ollama",
   "type": "module",
   "bin": {