npm - shmakk - Versions diffs - 1.2.3 → 1.2.5 - Mend

shmakk 1.2.3 → 1.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

package/.env.example +11 -0
package/README.md +75 -1
package/docs/index.html +154 -16
package/docs/mcp.md +78 -0
package/docs/ssh.md +82 -0
package/docs/vibedit-analysis.md +375 -0
package/docs/vim.md +110 -0
package/docs/voice.md +4 -0
package/package.json +9 -5
package/scripts/test-vibedit.js +45 -0
package/scripts/vibedit-demo.sh +52 -0
package/skills/shmakk-skill-creator.md +269 -0
package/src/_check.js +7 -0
package/src/_check_schema.js +5 -0
package/src/_cleanup.js +18 -0
package/src/_fix.js +9 -0
package/src/_test_import.js +15 -0
package/src/agent.js +11 -4
package/src/browser-daemon.js +209 -0
package/src/browser.js +10 -0
package/src/cli/browserDaemon.js +60 -0
package/src/cli/connectBrowser.js +137 -0
package/src/cli.js +235 -8
package/src/completions.js +8 -0
package/src/control.js +273 -1
package/src/core/browserConnector.js +523 -0
package/src/correction.js +6 -0
package/src/electron.js +305 -0
package/src/endpoints.js +74 -9
package/src/index.js +24 -1
package/src/llm.js +501 -61
package/src/mobile.js +307 -0
package/src/notify.js +51 -3
package/src/orchestrator.js +35 -1
package/src/pty.js +11 -6
package/src/review.js +45 -11
package/src/self-commands.js +153 -0
package/src/session-convert.js +508 -0
package/src/session-search.js +31 -0
package/src/session.js +392 -46
package/src/skills/browserActions.ts +984 -0
package/src/skills.js +451 -24
package/src/system-prompt.js +31 -25
package/src/tools.js +81 -0
package/src/vibedit/control.js +534 -0
package/src/vibedit/electron.js +108 -0
package/src/vibedit/files.js +171 -0
package/src/vibedit/index.js +298 -0
package/src/vibedit/overlay.js +1482 -0
package/src/vibedit/prompts.js +245 -0
package/src/vibedit/state.js +32 -0
package/src/vim.js +410 -0

package/skills/shmakk-skill-creator.md ADDED Viewed

@@ -0,0 +1,269 @@
+---
+name: shmakk-skill-creator
+description: "Create or convert skills for shmakk. Two modes: (1) CREATE — guided authoring of a new shmakk skill from a plain description; (2) CONVERT — take a Claude Code .skill zip and produce a shmakk-native skill directory. Use CREATE when the user describes a new workflow or capability they want to package. Use CONVERT when the user drops a .skill file or references a .skill path. Both modes output an installable shmakk skill directory ready for ~/.config/shmakk/skills/ and shmakk-desktop."
+category: workflow
+---
+# Shmakk Skill Creator
+Two entry points. Read the user's message and pick one:
+| Signal | Mode |
+|---|---|
+| User describes a new capability/workflow in plain language | → **CREATE** |
+| User provides a `.skill` file path or drops a `.skill` zip | → **CONVERT** |
+---
+## MODE A — CREATE
+Build a new shmakk skill from scratch. The output is a single `SKILL.md` (or a directory if the
+skill has sub-agents) that works with the shmakk runtime and renders correctly in shmakk-desktop's
+Skills Browser and Workflows view.
+### Step 1 — Capture intent
+Ask (or infer from context) only what changes the output meaningfully:
+1. **Name** — short kebab-case identifier (`campaign-planner`, `pr-reviewer`)
+2. **What it does** — one sentence trigger description
+3. **Single-step or multi-agent?** — does this need parallel/pipeline sub-agents, or is it one focused prompt?
+4. **Category** — `dev`, `workflow`, `backend`, `frontend`, `media`, `docs`, `system`, `business`, `productivity`, `security`, `planning`, `research`, `general`
+5. **Argument hint** — what the user passes when invoking (`<business description>`, `<PR number>`, etc.)
+Do not ask for things you can infer. If the description clearly implies multi-agent (research → synthesis,
+audit → fix → verify, plan → execute → check), call it multi-agent without asking.
+### Step 2 — Skill shape decision
+**Single-step skill** — one `SKILL.md` with a strong system-prompt body. Use when the task is
+self-contained and doesn't need separate agents for sub-phases.
+**Multi-agent skill** — a directory with `SKILL.md` as the orchestrator plus agent role files. Use
+when phases need isolation (independent research branches, synthesis that must not see each other's
+drafts mid-flight, a verification step that shouldn't share context with the execution step).
+```
+<name>/
+  SKILL.md                    ← orchestrator prompt + workflow metadata
+  agents/
+    <phase>/
+      <NN>-<role>.md          ← one role file per sub-agent; NN = execution order
+  references/                 ← shared docs injected into sub-agents as context
+  assets/                     ← templates, schemas, examples
+```
+### Step 3 — Write the SKILL.md
+Every SKILL.md starts with this frontmatter:
+```yaml
+---
+name: <kebab-case-name>
+description: '<trigger description — when to use this skill, what it does, key phrases that should load it>'
+category: <category>
+argument-hint: '<what the user passes>'
+# For multi-agent skills only:
+skill-type: orchestration
+workflow:
+  topology: <parallel|pipeline|staged>
+  phases:
+    - name: <phase-name>
+      topology: <parallel|pipeline>
+      agents: [<role-file-paths>]
+---
+```
+The body of SKILL.md is the **orchestration prompt** — instructions for the agent that runs this
+skill. Write it to the same standard as the rest of the shmakk skill library:
+- Lead with a one-paragraph summary of what the skill produces.
+- Describe each phase: what agents run, in what order, what each receives and returns.
+- State quality gates explicitly (what a failing output looks like and what to do — bounce back, not
+  silently accept).
+- Name the final deliverable precisely: file path, shape, content contract.
+- End with anti-patterns: the most common ways this skill produces bad output.
+For sub-agent role files (`agents/<phase>/<NN>-<role>.md`), write each as a focused brief:
+- Role in one line
+- Inputs (what the orchestrator passes)
+- What to produce (file path, format)
+- Effort floor or quality bar
+- Anti-fluff rule specific to this role
+### Step 4 — Output the skill
+For a **single-step skill**: write `SKILL.md` directly to the target path.
+For a **multi-agent skill**: write the full directory tree. Then print:
+```
+✓ Skill created: <name>/
+  Install:  cp -r <name>/ ~/.config/shmakk/skills/<category>/<name>/
+  Or via:   shmakk install <name>.skill   (after packaging with: zip -r <name>.skill <name>/)
+```
+---
+## MODE B — CONVERT
+Take a Claude Code `.skill` zip and produce a shmakk-native skill that:
+- Works with `shmakk run skill <name>`
+- Renders in shmakk-desktop's Skills Browser (card with name, description, category, status)
+- Shows phases and steps in shmakk-desktop's Workflows view
+### Step 1 — Ingest the zip
+```bash
+unzip <path>.skill -d /tmp/skill-convert/
+```
+Read the extracted tree. Expected shape:
+```
+<name>/
+  SKILL.md                    ← Claude Code orchestration prompt (frontmatter + prose)
+  agents/
+    research/                 ← parallel research sub-agents (optional)
+    synthesis/                ← synthesis sub-agents; last one = assembler (optional)
+    <other-phase>/            ← any other phase name
+  references/                 ← shared context docs
+  assets/                     ← templates, schemas
+```
+If the shape differs (flat directory, non-standard phase names, etc.) — adapt rather than fail.
+The structure is a convention, not a contract.
+### Step 2 — Analyse phases
+Read `SKILL.md` body to understand the workflow. Then read each agent file header (first 20 lines)
+to understand its role. Build a phase map:
+| Phase dir | Topology | Notes |
+|---|---|---|
+| `agents/research/` | **parallel** | All research agents run concurrently |
+| `agents/synthesis/` | **staged** | All except last run in parallel; last = assembler runs after |
+| Single `agents/` flat | infer from filenames | NN- prefix → pipeline order; no prefix → parallel |
+| Custom phase dirs | read SKILL.md | The orchestrator prose describes the order |
+### Step 3 — Map Claude Code → shmakk conventions
+Apply these substitutions throughout the orchestration prompt and agent role files:
+| Claude Code | shmakk equivalent | Notes |
+|---|---|---|
+| `Task(prompt, ...)` / "spawn a sub-agent" | `subagent(role, task, context)` | shmakk's team.js dispatch |
+| `WebSearch(query)` | `WebSearch` | Same name, keep as-is |
+| `WebFetch(url)` / `web_fetch` | `WebFetch` | Same name, keep as-is |
+| `Write(path, content)` | `Write` | Same |
+| `Read(path)` | `Read` | Same |
+| `Bash(cmd)` | `Bash` | Same |
+| "read `references/X.md` first" | inject as `context` field in subagent call | shmakk passes context docs to sub-agents explicitly |
+| "run in parallel in the same turn" | `topology: parallel` in workflow metadata | shmakk team.js runs parallel steps via Promise.all |
+| "run sequentially, each sees prior output" | `topology: pipeline` | shmakk passes prior step output to next |
+| `fallback.py` / stdlib fallback | note in SKILL.md as optional; shmakk uses LLM fallback | Strip if it references Claude-specific APIs |
+Do **not** strip role files, references, or assets — they carry domain knowledge. Only touch the
+tool-call syntax and the spawn patterns.
+### Step 4 — Rewrite the orchestration SKILL.md
+Keep the original prose and domain logic. Change only:
+1. Replace the frontmatter entirely with shmakk frontmatter (see Step 3 of CREATE mode).
+   - Detect `name` from the folder name or original frontmatter.
+   - Detect `category` from the content (marketing → `business`; code → `dev`; etc.).
+   - Copy `description` from original, trim if over 400 chars.
+   - Add `skill-type: orchestration` and a `workflow:` block derived from the phase map.
+2. In the body, replace every Claude Code spawn pattern with shmakk's:
+   **Before (Claude Code):**
+   ```
+   Spawn three sub-agents in the same turn. Pass each: the path to its role file, references/research-standards.md, the brief.
+   ```
+   **After (shmakk):**
+   ```
+   Run three sub-agents in parallel (topology: parallel). For each, pass: its role file content, references/research-standards.md as context, the brief. shmakk will run these concurrently via the team runner.
+   ```
+3. Replace any reference to Claude Code tools by name (`claude`, `claude-code`, `/skill`, slash
+   commands) with shmakk equivalents (`shmakk run skill`, `shmakk`). Keep all domain logic intact.
+### Step 5 — Generate workflow.json
+This file drives shmakk-desktop's Workflows view. One JSON object per workflow (most skills have one).
+```json
+{
+  "id": "<name>",
+  "description": "<one-line description>",
+  "topology": "staged",
+  "stages": [
+    {
+      "name": "<phase-name>",
+      "topology": "parallel",
+      "steps": [
+        { "role": "<role>", "task": "<one-line task description>", "agentFile": "agents/<phase>/<file>.md" }
+      ]
+    }
+  ]
+}
+```
+For a simple pipeline (no parallel phases), use flat `steps` array instead of `stages`, matching
+the format in `src/workflows.js`.
+### Step 6 — Write the output directory
+```
+<name>/                            ← drop-in shmakk skill directory
+  SKILL.md                         ← rewritten orchestration prompt
+  workflow.json                    ← desktop Workflows view descriptor
+  agents/                          ← agent role files (kept verbatim, paths unchanged)
+  references/                      ← reference docs (kept verbatim)
+  assets/                          ← asset templates (kept verbatim)
+```
+Then print:
+```
+✓ Converted: <original-name>.skill → <name>/
+  What changed:
+  - Frontmatter: replaced with shmakk format
+  - Spawn patterns: Claude Code Task() → shmakk subagent dispatch
+  - workflow.json: generated for shmakk-desktop Workflows view
+  - Tool names: <list any that were remapped>
+  - Kept intact: agent role files, references, assets
+  Install:
+    cp -r <name>/ ~/.config/shmakk/skills/<category>/<name>/
+  Or package and install:
+    zip -r <name>.skill <name>/
+    shmakk install <name>.skill
+```
+If anything couldn't be cleanly mapped (custom tool calls, Claude-specific APIs, platform-specific
+slash commands), list them explicitly under **Manual review needed** so the user knows what to check.
+---
+## Output quality bar (both modes)
+A skill is ready to ship when:
+- `SKILL.md` frontmatter is valid YAML with `name`, `description`, `category`.
+- The description is specific enough that the shmakk dispatcher will match it correctly — not "does stuff", but the actual trigger phrases.
+- Every sub-agent file has a clear role, explicit inputs, and a defined output (file path + format).
+- Quality gates are named: what failing output looks like and what the orchestrator does about it.
+- `workflow.json` (if present) is valid JSON and its `agentFile` paths resolve in the directory.
+- The skill installs cleanly: `cp -r` to `~/.config/shmakk/skills/<category>/<name>/` and `shmakk run skill <name>` finds it.
+A skill is **not** ready when:
+- The description would match the wrong user intent (too broad) or never match (too narrow/technical).
+- Sub-agent role files reference paths that don't exist in the directory.
+- The orchestration prompt tells agents to "just do their best" with no quality gate — this produces variable-quality output that can't be improved systematically.
+- `workflow.json` has hardcoded absolute paths or references files outside the skill directory.

package/src/_check.js ADDED Viewed

@@ -0,0 +1,7 @@
+const D = require('better-sqlite3');
+const db = new D('/home/marcus/.config/shmakk/sessions.db');
+const s = db.prepare("SELECT * FROM sessions WHERE id = 'import-2026-06-28'").all();
+console.log('Sessions:', JSON.stringify(s));
+const t = db.prepare("SELECT COUNT(*) as c FROM turns WHERE session_id = 'import-2026-06-28'").get();
+console.log('Turns:', t.c);
+db.close();

package/src/_check_schema.js ADDED Viewed

@@ -0,0 +1,5 @@
+const D = require('better-sqlite3');
+const db = new D('/home/marcus/.config/shmakk/sessions.db');
+const tables = db.prepare("SELECT sql FROM sqlite_master WHERE type='table'").all();
+tables.forEach(t => console.log(t.sql));
+db.close();

package/src/_cleanup.js ADDED Viewed

@@ -0,0 +1,18 @@
+const D = require('better-sqlite3');
+const db = new D('/home/marcus/.config/shmakk/sessions.db');
+const toDelete = ['debug-cli', 'debug-cli2', 'debug-cli3', 'debug-cli4', 'debug-import', 'import-2026-06-28'];
+for (const sid of toDelete) {
+  // Delete from turns first (FTS will auto-update via triggers)
+  const t = db.prepare('DELETE FROM turns WHERE session_id = ?').run(sid);
+  // Delete session record
+  const s = db.prepare('DELETE FROM sessions WHERE id = ?').run(sid);
+  // Delete files_touched
+  const f = db.prepare('DELETE FROM files_touched WHERE session_id = ?').run(sid);
+  // Delete project_sessions
+  const p = db.prepare('DELETE FROM project_sessions WHERE session_id = ?').run(sid);
+  console.log('Deleted ' + sid + ': ' + t.changes + ' turns, ' + s.changes + ' sessions');
+}
+db.close();
+console.log('Done.');

package/src/_fix.js ADDED Viewed

@@ -0,0 +1,9 @@
+const D = require('better-sqlite3');
+const db = new D('/home/marcus/.config/shmakk/sessions.db');
+// Delete stale session record
+const info = db.prepare("DELETE FROM sessions WHERE id = 'import-2026-06-28'").run();
+console.log('Deleted sessions row:', info.changes);
+// Also check if there's a sessions.jsonl record
+db.close();

package/src/_test_import.js ADDED Viewed

@@ -0,0 +1,15 @@
+const { claude2shmakk } = require('./session-convert');
+async function main() {
+  try {
+    await claude2shmakk(
+      "/home/marcus/.config/Claude/local-agent-mode-sessions/2cebf85a-f27d-41a6-888e-6cff059551a8/f7d639de-d492-4e65-a14b-4d6cd8dfab7a/local_2a11b98e-e761-4401-8b43-0c0ab1fb04f1",
+      "import-2026-06-28"
+    );
+    console.log("SUCCESS");
+  } catch(e) {
+    console.error("FAILED:", e.message);
+    console.error(e.stack);
+  }
+}
+main();

package/src/agent.js CHANGED Viewed

@@ -7,7 +7,7 @@
 const fs = require('fs');
 const path = require('path');
-const { makeClient, modelFor, isConfigured, getDeepSeekOptions, supportsVision } = require('./llm');
+const { makeClient, modelFor, isConfigured, getDeepSeekOptions, supportsVision, describeImages } = require('./llm');
 const {
   sanitizeAssistantContent,
   isLeakedToolMarkup,
@@ -561,9 +561,16 @@ async function runAgent({ input, roots, glossary, confirmTool, write, signal, hi
       );
       let toolContent = (toolText + (Object.keys(toolMeta).length ? ' ' + JSON.stringify(toolMeta) : '')).trim();
       if (toolImages.length > 0 && !supportsVision()) {
-        // Endpoint doesn't support vision — include image metadata as text
-        const imgDesc = toolImages.map((img, i) => `[Image #${i + 1}: ${img.mimeType}, base64=${img.dataLength} chars${img.truncated ? ', truncated' : ''}]`).join(', ');
-        toolContent = toolContent ? `${toolContent} ${imgDesc}` : imgDesc;
+        // Endpoint doesn't support vision — call a vision-capable endpoint
+        // to describe the images as text for the non-vision model.
+        const visionDesc = await describeImages(toolImages, signal);
+        if (visionDesc) {
+          toolContent = toolContent ? `${toolContent}\n${visionDesc}` : visionDesc;
+        } else {
+          // Fallback: include image metadata as text
+          const imgDesc = toolImages.map((img, i) => `[Image #${i + 1}: ${img.mimeType}, base64=${img.dataLength} chars${img.truncated ? ', truncated' : ''}]`).join(', ');
+          toolContent = toolContent ? `${toolContent} ${imgDesc}` : imgDesc;
+        }
       }
       messages.push({ role: 'tool', tool_call_id: c.id, content: toolContent.slice(0, 8000) });
       if (toolImages.length > 0 && supportsVision()) {

package/src/browser-daemon.js ADDED Viewed

@@ -0,0 +1,209 @@
+const fs = require('fs');
+const http = require('http');
+const os = require('os');
+const path = require('path');
+const { WebSocketServer } = require('ws');
+const llm = require('./llm');
+const { getModelRegistry, getVisionSupport } = require('./endpoints');
+const { automationSystem, automationUser } = require('./vibedit/prompts');
+const DEFAULT_PORT = 3947;
+const STATE_PATH = path.join(os.homedir(), '.config', 'shmakk', 'browser-daemon.json');
+function stripFences(s) {
+  const match = String(s || '').match(/\{[\s\S]*\}/);
+  if (match) return match[0].trim();
+  return String(s || '').replace(/^\s*```(?:json)?\s*/i, '').replace(/\s*```\s*$/, '').trim();
+}
+function saveState(state) {
+  try {
+    fs.mkdirSync(path.dirname(STATE_PATH), { recursive: true });
+    fs.writeFileSync(STATE_PATH, JSON.stringify({ ...state, updatedAt: Date.now() }, null, 2));
+  } catch {}
+}
+function findVisionClient() {
+  const registry = getModelRegistry();
+  for (const [name, cfg] of Object.entries(registry.models)) {
+    if (cfg.vision) return llm.makeClientForEndpoint(name);
+  }
+  // Fall back to top-level visionSupport key
+  const vs = getVisionSupport();
+  if (vs) return llm.makeClientForEndpoint('visionSupport');
+  return null;
+}
+async function getClient(visionClient) {
+  if (visionClient) return visionClient.client;
+  const fast = llm.makeClientForEndpoint('fast');
+  if (fast) return fast.client;
+  if (!llm.isConfigured()) return null;
+  return llm.makeClient();
+}
+async function chatCompletion(client, messages, model, opts = {}) {
+  const response = await client.chat.completions.create({
+    model,
+    messages,
+    temperature: opts.temperature ?? 0.2,
+    max_tokens: opts.maxTokens || 2048,
+  });
+  return ((response?.choices?.[0]?.message?.content) || '').trim();
+}
+function send(ws, msg) {
+  try { ws.send(JSON.stringify(msg)); } catch {}
+}
+function daemonAutomationSystem() {
+  return automationSystem() + `
+Additional browser-extension actions are allowed:
+- "newTab": { "action": "newTab", "url": "https://...", "active": true, "description": "..." }
+- "reload": { "action": "reload", "description": "..." }
+- "closeTab": { "action": "closeTab", "description": "..." }
+- "switchTab": { "action": "switchTab", "tabId": 123, "description": "..." }
+- "createGroup": { "action": "createGroup", "title": "Group name", "color": "blue", "description": "..." }
+- "moveToGroup": { "action": "moveToGroup", "groupId": 123, "description": "..." }
+- "ungroup": { "action": "ungroup", "description": "..." }
+Use these only when the user asks for tab or tab-group management.`;
+}
+async function handleAutomation(ws, msg, runtime) {
+  const directActions = Array.isArray(msg.directActions) ? msg.directActions : [];
+  if (directActions.length) {
+    send(ws, {
+      type: 'executeActions',
+      actions: directActions,
+      summary: `Replaying ${directActions.length} recorded action${directActions.length === 1 ? '' : 's'}`,
+      notes: '',
+    });
+    return;
+  }
+  const client = await getClient(runtime.visionClient);
+  if (!client) {
+    send(ws, { type: 'error', text: 'LLM not configured' });
+    return;
+  }
+  const fast = llm.makeClientForEndpoint('fast');
+  const model = runtime.visionModel || (fast ? fast.model : null) || llm.modelFor();
+  send(ws, { type: 'status', text: `Building browser automation with ${model}...` });
+  const shots = msg.screenshots && msg.screenshots.length ? msg.screenshots : [];
+  const userContent = automationUser(msg);
+  const vision = runtime.visionEnabled && shots.length;
+  let raw;
+  if (vision) {
+    const imageParts = shots.map((s) => ({ type: 'image_url', image_url: { url: `data:image/jpeg;base64,${s}`, detail: 'high' } }));
+    try {
+      raw = await chatCompletion(client, [
+        { role: 'system', content: daemonAutomationSystem() },
+        { role: 'user', content: [{ type: 'text', text: userContent }, ...imageParts] },
+      ], model);
+    } catch {
+      raw = await chatCompletion(client, [
+        { role: 'system', content: daemonAutomationSystem() },
+        { role: 'user', content: userContent },
+      ], model);
+    }
+  } else {
+    raw = await chatCompletion(client, [
+      { role: 'system', content: daemonAutomationSystem() },
+      { role: 'user', content: userContent },
+    ], model);
+  }
+  let parsed;
+  try {
+    parsed = JSON.parse(stripFences(raw));
+  } catch {
+    send(ws, {
+      type: 'automationResult',
+      ok: false,
+      summary: 'Failed to parse automation response.',
+      modelOutput: raw.slice(0, 1500),
+    });
+    return;
+  }
+  const actions = Array.isArray(parsed.actions) ? parsed.actions : [];
+  if (actions.length) {
+    send(ws, {
+      type: 'executeActions',
+      actions,
+      summary: parsed.summary || '',
+      notes: parsed.notes || '',
+    });
+    return;
+  }
+  send(ws, {
+    type: 'automationResult',
+    ok: true,
+    summary: parsed.summary || 'No executable actions were produced.',
+    notes: parsed.notes || '',
+    hasActions: false,
+  });
+}
+async function startBrowserDaemon(opts = {}) {
+  const port = Number(opts.port) || DEFAULT_PORT;
+  const visionClient = findVisionClient();
+  const fast = llm.makeClientForEndpoint('fast');
+  const runtime = {
+    visionClient,
+    visionModel: visionClient ? visionClient.model : null,
+    visionEnabled: !!visionClient,
+  };
+  const model = runtime.visionModel || (fast ? fast.model : null) || (llm.modelFor?.() || 'unknown');
+  const httpServer = http.createServer((req, res) => {
+    if (req.url === '/status') {
+      res.writeHead(200, { 'Content-Type': 'application/json' });
+      res.end(JSON.stringify({ ok: true, port, model, vision: runtime.visionEnabled }));
+      return;
+    }
+    res.writeHead(404);
+    res.end();
+  });
+  const wss = new WebSocketServer({ server: httpServer });
+  wss.on('connection', (ws) => {
+    saveState({ running: true, port, model, vision: runtime.visionEnabled, connectedAt: Date.now() });
+    send(ws, { type: 'hello', model, vision: runtime.visionEnabled, daemon: true });
+    ws.on('message', async (data) => {
+      let msg;
+      try { msg = JSON.parse(data.toString()); } catch { return; }
+      try {
+        if (msg.type === 'automation') await handleAutomation(ws, msg, runtime);
+        else if (msg.type === 'status') send(ws, { type: 'status', text: 'Browser daemon connected.' });
+        else if (msg.type === 'tabStatus') saveState({ running: true, port, model, vision: runtime.visionEnabled, activeTab: msg.tab || null });
+      } catch (err) {
+        send(ws, { type: 'error', text: err.message });
+      }
+    });
+  });
+  await new Promise((resolve) => httpServer.listen(port, '127.0.0.1', resolve));
+  saveState({ running: true, port, model, vision: runtime.visionEnabled, pid: process.pid, startedAt: Date.now() });
+  return {
+    port,
+    statePath: STATE_PATH,
+    close: () => {
+      saveState({ running: false, port, stoppedAt: Date.now() });
+      wss.close();
+      httpServer.close();
+    },
+  };
+}
+module.exports = {
+  DEFAULT_PORT,
+  STATE_PATH,
+  startBrowserDaemon,
+};

package/src/browser.js CHANGED Viewed

@@ -184,13 +184,23 @@ async function screenshot(args) {
     const name = `screenshot-${Date.now()}.png`;
     const filePath = path.join(SCREENSHOT_DIR, name);
     await p.screenshot({ path: filePath, fullPage: false });
+    const buf = fs.readFileSync(filePath);
+    const b64 = buf.toString('base64');
     const stats = fs.statSync(filePath);
     return {
       ok: true,
       path: filePath,
       size: stats.size,
       url: p.url(),
       title: await p.title(),
+      images: [{
+        mimeType: 'image/png',
+        data: b64,
+        dataLength: b64.length,
+        truncated: false,
+      }],
     };
   } catch (e) {
     return { error: `screenshot failed: ${e.message}` };

package/src/cli/browserDaemon.js ADDED Viewed

@@ -0,0 +1,60 @@
+const { startBrowserDaemon, DEFAULT_PORT, STATE_PATH } = require('../browser-daemon');
+function parseArgs(argv) {
+  const args = { port: DEFAULT_PORT, help: false };
+  for (let i = 0; i < argv.length; i++) {
+    const a = argv[i];
+    if (a === 'browser-daemon') continue;
+    if (a === '--help' || a === '-h') args.help = true;
+    else if (a === '--port' || a === '-p') {
+      args.port = parseInt(argv[++i], 10);
+      if (isNaN(args.port) || args.port < 1 || args.port > 65535) {
+        process.stderr.write(`[shmakk] browser-daemon: invalid port: ${argv[i]}\n`);
+        process.exit(2);
+      }
+    } else {
+      process.stderr.write(`[shmakk] browser-daemon: unknown option: ${a}\n`);
+      args.help = true;
+    }
+  }
+  return args;
+}
+const HELP = `shmakk browser-daemon — extension automation backend
+Usage:
+  shmakk browser-daemon [--port 3947]
+Runs a single global WebSocket backend for the Chrome extension. State is
+written to ${STATE_PATH}.
+`;
+async function main(argv = process.argv.slice(2)) {
+  const args = parseArgs(argv);
+  if (args.help) {
+    process.stdout.write(HELP);
+    return 0;
+  }
+  const daemon = await startBrowserDaemon({ port: args.port });
+  process.stdout.write(`[shmakk browser-daemon] listening on ws://127.0.0.1:${daemon.port}\n`);
+  process.stdout.write(`[shmakk browser-daemon] state: ${daemon.statePath}\n`);
+  const shutdown = () => {
+    process.stdout.write('\n[shmakk browser-daemon] shutting down\n');
+    daemon.close();
+    process.exit(0);
+  };
+  process.on('SIGINT', shutdown);
+  process.on('SIGTERM', shutdown);
+  return new Promise(() => {});
+}
+if (require.main === module) {
+  main().catch((err) => {
+    process.stderr.write(`[shmakk browser-daemon] fatal: ${err && err.stack || err}\n`);
+    process.exit(1);
+  });
+}
+module.exports = { main, HELP };