shmakk 1.2.3 → 1.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/.env.example +11 -0
  2. package/README.md +75 -1
  3. package/docs/index.html +154 -16
  4. package/docs/mcp.md +78 -0
  5. package/docs/ssh.md +82 -0
  6. package/docs/vibedit-analysis.md +375 -0
  7. package/docs/vim.md +110 -0
  8. package/docs/voice.md +4 -0
  9. package/package.json +9 -5
  10. package/scripts/test-vibedit.js +45 -0
  11. package/scripts/vibedit-demo.sh +52 -0
  12. package/skills/shmakk-skill-creator.md +269 -0
  13. package/src/_check.js +7 -0
  14. package/src/_check_schema.js +5 -0
  15. package/src/_cleanup.js +18 -0
  16. package/src/_fix.js +9 -0
  17. package/src/_test_import.js +15 -0
  18. package/src/agent.js +11 -4
  19. package/src/browser-daemon.js +209 -0
  20. package/src/browser.js +10 -0
  21. package/src/cli/browserDaemon.js +60 -0
  22. package/src/cli/connectBrowser.js +137 -0
  23. package/src/cli.js +235 -8
  24. package/src/completions.js +8 -0
  25. package/src/control.js +273 -1
  26. package/src/core/browserConnector.js +523 -0
  27. package/src/correction.js +6 -0
  28. package/src/electron.js +305 -0
  29. package/src/endpoints.js +74 -9
  30. package/src/index.js +24 -1
  31. package/src/llm.js +501 -61
  32. package/src/mobile.js +307 -0
  33. package/src/notify.js +51 -3
  34. package/src/orchestrator.js +35 -1
  35. package/src/pty.js +11 -6
  36. package/src/review.js +45 -11
  37. package/src/self-commands.js +153 -0
  38. package/src/session-convert.js +508 -0
  39. package/src/session-search.js +31 -0
  40. package/src/session.js +392 -46
  41. package/src/skills/browserActions.ts +984 -0
  42. package/src/skills.js +451 -24
  43. package/src/system-prompt.js +31 -25
  44. package/src/tools.js +81 -0
  45. package/src/vibedit/control.js +534 -0
  46. package/src/vibedit/electron.js +108 -0
  47. package/src/vibedit/files.js +171 -0
  48. package/src/vibedit/index.js +298 -0
  49. package/src/vibedit/overlay.js +1482 -0
  50. package/src/vibedit/prompts.js +245 -0
  51. package/src/vibedit/state.js +32 -0
  52. package/src/vim.js +410 -0
@@ -0,0 +1,305 @@
1
+ // Electron app automation via Chrome DevTools Protocol (CDP).
2
+ // Connects to an Electron app running with --remote-debugging-port.
3
+ // Plays the same role as browser.js but for Electron desktop apps.
4
+ //
5
+ // Usage: Launch the Electron app with:
6
+ // electron --remote-debugging-port=9222 path/to/app
7
+ // Then the agent can use the `electron` tool to interact with it.
8
+
9
+ const fs = require('fs');
10
+ const path = require('path');
11
+
12
+ let pw = null;
13
+ let browser = null;
14
+ let page = null;
15
+ let _debugPort = 9222; // default CDP port
16
+
17
+ const SCREENSHOT_DIR = '/tmp/shmakk-screenshots';
18
+
19
+ function isAvailable() {
20
+ try { require.resolve('playwright'); return true; } catch { return false; }
21
+ }
22
+
23
+ async function ensurePage(args) {
24
+ const port = Number(args.debugPort) || _debugPort;
25
+ _debugPort = port;
26
+
27
+ if (page && !page.isClosed()) return page;
28
+
29
+ if (!pw) {
30
+ try {
31
+ pw = require('playwright');
32
+ } catch {
33
+ throw new Error(
34
+ 'playwright not installed. Run:\n' +
35
+ ' npm install playwright\n' +
36
+ ' npx playwright install chromium',
37
+ );
38
+ }
39
+ }
40
+
41
+ if (!browser || !browser.isConnected()) {
42
+ const wsEndpoint = `http://127.0.0.1:${port}`;
43
+ try {
44
+ browser = await pw.chromium.connectOverCDP(wsEndpoint);
45
+ } catch (e) {
46
+ throw new Error(
47
+ `Cannot connect to Electron at port ${port}. Make sure the app is running with:\n` +
48
+ ` electron --remote-debugging-port=${port} path/to/app\n` +
49
+ `Details: ${e.message}`,
50
+ );
51
+ }
52
+ }
53
+
54
+ const contexts = browser.contexts();
55
+ const ctx = contexts[0];
56
+ const pages = ctx.pages();
57
+ page = pages[0] || await ctx.newPage();
58
+ return page;
59
+ }
60
+
61
+ // ── Commands ──────────────────────────────────────────────────────────────
62
+
63
+ async function screenshot(args) {
64
+ const p = await ensurePage(args);
65
+ try {
66
+ const ts = Date.now();
67
+ if (!fs.existsSync(SCREENSHOT_DIR)) fs.mkdirSync(SCREENSHOT_DIR, { recursive: true });
68
+ const filePath = path.join(SCREENSHOT_DIR, `electron-${ts}.png`);
69
+ await p.screenshot({ path: filePath, fullPage: args.fullPage !== false });
70
+
71
+ const buf = fs.readFileSync(filePath);
72
+ const b64 = buf.toString('base64');
73
+
74
+ return {
75
+ ok: true,
76
+ path: filePath,
77
+ mimeType: 'image/png',
78
+ images: [{
79
+ mimeType: 'image/png',
80
+ data: b64,
81
+ dataLength: b64.length,
82
+ truncated: false,
83
+ }],
84
+ };
85
+ } catch (e) {
86
+ return { error: `screenshot failed: ${e.message}` };
87
+ }
88
+ }
89
+
90
+ async function navigate(args) {
91
+ const url = String(args.url || '').trim();
92
+ if (!url) return { error: 'url required' };
93
+
94
+ const p = await ensurePage(args);
95
+ try {
96
+ const resp = await p.goto(url, { waitUntil: 'domcontentloaded', timeout: 20000 });
97
+ return {
98
+ ok: true,
99
+ url: p.url(),
100
+ title: await p.title(),
101
+ status: resp ? resp.status() : null,
102
+ };
103
+ } catch (e) {
104
+ return { error: `navigate failed: ${e.message}` };
105
+ }
106
+ }
107
+
108
+ async function click(args) {
109
+ const sel = String(args.selector || '').trim();
110
+ if (!sel) return { error: 'selector required' };
111
+
112
+ const p = await ensurePage(args);
113
+ try {
114
+ await p.click(sel, { timeout: 5000 });
115
+ await p.waitForTimeout(500);
116
+ return { ok: true, clicked: sel, url: p.url(), title: await p.title() };
117
+ } catch (e) {
118
+ return { error: `click failed: ${e.message}` };
119
+ }
120
+ }
121
+
122
+ async function type(args) {
123
+ const sel = String(args.selector || '').trim();
124
+ const text = String(args.text || '');
125
+ if (!sel) return { error: 'selector required' };
126
+ if (!text) return { error: 'text required' };
127
+
128
+ const p = await ensurePage(args);
129
+ try {
130
+ await p.fill(sel, text, { timeout: 5000 });
131
+ return { ok: true, typed: text, selector: sel };
132
+ } catch (e) {
133
+ return { error: `type failed: ${e.message}` };
134
+ }
135
+ }
136
+
137
+ async function readPage(args) {
138
+ const p = await ensurePage(args);
139
+ try {
140
+ const title = await p.title();
141
+ const url = p.url();
142
+ const bodyText = await p.evaluate(() => document.body ? document.body.innerText : '');
143
+
144
+ const links = await p.evaluate(() => {
145
+ return Array.from(document.querySelectorAll('a[href]')).slice(0, 100).map(a => ({
146
+ text: (a.textContent || '').trim().slice(0, 200),
147
+ href: a.href,
148
+ }));
149
+ });
150
+
151
+ const forms = await p.evaluate(() => {
152
+ return Array.from(document.querySelectorAll('form')).slice(0, 20).map(f => ({
153
+ action: f.action,
154
+ method: f.method,
155
+ inputs: Array.from(f.querySelectorAll('input, select, textarea, button')).slice(0, 20).map(el => ({
156
+ tag: el.tagName.toLowerCase(),
157
+ type: el.type || '',
158
+ name: el.name || '',
159
+ placeholder: el.placeholder || '',
160
+ value: (el.value || '').slice(0, 100),
161
+ })),
162
+ }));
163
+ });
164
+
165
+ let content = `Title: ${title}\nURL: ${url}\n\n`;
166
+ content += bodyText.slice(0, 8000);
167
+
168
+ return {
169
+ ok: true,
170
+ title,
171
+ url,
172
+ content: content.slice(0, 12000),
173
+ links: links.slice(0, 50),
174
+ forms: forms.slice(0, 10),
175
+ };
176
+ } catch (e) {
177
+ return { error: `read_page failed: ${e.message}` };
178
+ }
179
+ }
180
+
181
+ async function evaluate(args) {
182
+ const code = String(args.code || '').trim();
183
+ if (!code) return { error: 'code required' };
184
+
185
+ const p = await ensurePage(args);
186
+ try {
187
+ const result = await p.evaluate(code);
188
+ return { ok: true, result: JSON.stringify(result).slice(0, 4000) };
189
+ } catch (e) {
190
+ return { error: `evaluate failed: ${e.message}` };
191
+ }
192
+ }
193
+
194
+ async function select(args) {
195
+ const sel = String(args.selector || '').trim();
196
+ const value = String(args.value || '');
197
+ if (!sel) return { error: 'selector required' };
198
+ if (!value) return { error: 'value required' };
199
+
200
+ const p = await ensurePage(args);
201
+ try {
202
+ await p.selectOption(sel, value, { timeout: 5000 });
203
+ return { ok: true, selected: value, selector: sel };
204
+ } catch (e) {
205
+ return { error: `select failed: ${e.message}` };
206
+ }
207
+ }
208
+
209
+ async function wait(args) {
210
+ const sel = args.selector ? String(args.selector).trim() : null;
211
+ const seconds = Number(args.seconds) || 1;
212
+
213
+ const p = await ensurePage(args);
214
+ try {
215
+ if (sel) {
216
+ await p.waitForSelector(sel, { timeout: Math.min(seconds * 1000, 30000) });
217
+ } else {
218
+ await p.waitForTimeout(Math.min(seconds * 1000, 30000));
219
+ }
220
+ return { ok: true, waited: seconds, selector: sel || 'timeout' };
221
+ } catch (e) {
222
+ return { error: `wait failed: ${e.message}` };
223
+ }
224
+ }
225
+
226
+ async function scroll(args) {
227
+ const direction = args.direction === 'up' ? 'up' : 'down';
228
+ const amount = Number(args.amount) || 300;
229
+
230
+ const p = await ensurePage(args);
231
+ try {
232
+ await p.evaluate(({ direction, amount }) => {
233
+ window.scrollBy(0, direction === 'down' ? amount : -amount);
234
+ }, { direction, amount });
235
+ await p.waitForTimeout(300);
236
+ return { ok: true, scrolled: direction, amount };
237
+ } catch (e) {
238
+ return { error: `scroll failed: ${e.message}` };
239
+ }
240
+ }
241
+
242
+ async function close(args) {
243
+ try {
244
+ if (browser) {
245
+ await browser.close();
246
+ browser = null;
247
+ page = null;
248
+ }
249
+ return { ok: true, closed: true };
250
+ } catch (e) {
251
+ browser = null;
252
+ page = null;
253
+ return { ok: true, closed: true, note: 'force closed' };
254
+ }
255
+ }
256
+
257
+ // Connect to a specific Electron debug port
258
+ async function connect(args) {
259
+ const port = Number(args.debugPort) || 9222;
260
+ _debugPort = port;
261
+ try {
262
+ const p = await ensurePage(args);
263
+ return {
264
+ ok: true,
265
+ connected: true,
266
+ port,
267
+ url: p.url(),
268
+ title: await p.title(),
269
+ };
270
+ } catch (e) {
271
+ return { error: `connect failed: ${e.message}` };
272
+ }
273
+ }
274
+
275
+ // ── Dispatch ──────────────────────────────────────────────────────────────
276
+
277
+ const COMMANDS = {
278
+ screenshot, navigate, click, type, read_page: readPage,
279
+ evaluate, select, wait, scroll, close, connect,
280
+ };
281
+
282
+ function classifyElectronCommand(args) {
283
+ const cmd = String(args.command || '');
284
+ if (cmd === 'screenshot' || cmd === 'read_page' || cmd === 'wait') return 'safe';
285
+ if (cmd === 'click' || cmd === 'type' || cmd === 'scroll' || cmd === 'evaluate' || cmd === 'select' || cmd === 'navigate') return 'uncertain';
286
+ if (cmd === 'close' || cmd === 'connect') return 'unsafe';
287
+ return 'uncertain';
288
+ }
289
+
290
+ async function dispatchElectron(args, signal) {
291
+ if (!isAvailable()) {
292
+ return { error: 'playwright not installed. Run: npm install playwright && npx playwright install chromium' };
293
+ }
294
+ const cmd = String(args.command || '');
295
+ const fn = COMMANDS[cmd];
296
+ if (!fn) return { error: `unknown electron command: ${cmd}. Available: ${Object.keys(COMMANDS).join(', ')}` };
297
+ try {
298
+ const result = await fn(args);
299
+ return result;
300
+ } catch (e) {
301
+ return { error: `electron ${cmd} failed: ${e.message}` };
302
+ }
303
+ }
304
+
305
+ module.exports = { dispatchElectron, classifyElectronCommand, isAvailable };
package/src/endpoints.js CHANGED
@@ -5,12 +5,18 @@
5
5
  // Preferred format (~/.config/shmakk/endpoints.json):
6
6
  // {
7
7
  // "main": "gpt-5",
8
+ // "fast": "gemini-flash",
8
9
  // "models": {
9
10
  // "gpt-5": {
10
11
  // "provider": "codex",
11
12
  // "model": "gpt-5-codex",
12
13
  // "api_key": "sk-..."
13
14
  // },
15
+ // "kimi": {
16
+ // "provider": "nvidia",
17
+ // "model": "moonshotai/kimi-k2.6",
18
+ // "api_key": "nvapi-..."
19
+ // },
14
20
  // "claude": {
15
21
  // "provider": "anthropic",
16
22
  // "model": "claude-sonnet-4-5-20250929",
@@ -33,6 +39,13 @@ let currentEndpointConfig = null;
33
39
  let endpointsCwd = null;
34
40
 
35
41
  function configPath(cwd) {
42
+ // Check project-local endpoints first (cwd/endpoints.json), then global
43
+ const dir = cwd || process.cwd();
44
+ const localJson = path.join(dir, 'endpoints.json');
45
+ const localJs = path.join(dir, 'endpoints.js');
46
+ if (fs.existsSync(localJson)) return localJson;
47
+ if (fs.existsSync(localJs)) return localJs;
48
+
36
49
  const configDir = path.join(os.homedir(), '.config', 'shmakk');
37
50
  const jsonPath = path.join(configDir, 'endpoints.json');
38
51
  const jsPath = path.join(configDir, 'endpoints.js');
@@ -73,13 +86,14 @@ function normalizeModelConfig(name, cfg) {
73
86
  headers: cfg.headers || cfg.headears || null,
74
87
  registry: cfg.registry || null,
75
88
  main: !!cfg.main,
76
- vision: !!cfg.vision,
89
+ fast: !!cfg.fast,
90
+ vision: _supportsVisionForConfig(cfg),
77
91
  };
78
92
  }
79
93
 
80
94
  function normalizeRegistry(raw) {
81
95
  if (!raw || typeof raw !== 'object') {
82
- return { main: null, models: {} };
96
+ return { main: null, fast: null, models: {} };
83
97
  }
84
98
 
85
99
  const explicitModels = raw.models || raw.endpoints;
@@ -98,15 +112,23 @@ function normalizeRegistry(raw) {
98
112
  }
99
113
 
100
114
  let main = typeof raw.main === 'string' ? raw.main : null;
115
+ let fast = typeof raw.fast === 'string' ? raw.fast : null;
101
116
  if (!main) {
102
117
  const marked = Object.values(models).find((cfg) => cfg.main);
103
118
  if (marked) main = marked.name;
104
119
  }
120
+ if (!fast) {
121
+ const marked = Object.values(models).find((cfg) => cfg.fast);
122
+ if (marked) fast = marked.name;
123
+ }
105
124
  if (!main && Object.keys(models).length === 1) {
106
125
  main = Object.keys(models)[0];
107
126
  }
127
+ if (!fast && Object.keys(models).length === 1) {
128
+ fast = Object.keys(models)[0];
129
+ }
108
130
 
109
- return { main, models };
131
+ return { main, fast, models };
110
132
  }
111
133
 
112
134
  function loadModelRegistry(cwd) {
@@ -115,7 +137,7 @@ function loadModelRegistry(cwd) {
115
137
 
116
138
  function applyEndpoint(name, cwd) {
117
139
  const registry = loadModelRegistry(cwd);
118
- const selected = name === 'main' ? registry.main : name;
140
+ const selected = name === 'main' ? registry.main : name === 'fast' ? registry.fast : name;
119
141
  if (!selected || !registry.models[selected]) return false;
120
142
 
121
143
  const normalized = registry.models[selected];
@@ -144,8 +166,38 @@ function getCurrentEndpointName() {
144
166
  return currentEndpointName;
145
167
  }
146
168
 
169
+ function _supportsVisionForConfig(cfg) {
170
+ if (!cfg) return false;
171
+
172
+ // Explicit config wins
173
+ if ('vision' in cfg) return !!cfg.vision;
174
+
175
+ // Auto-detect for known vision-capable providers
176
+ const provider = (cfg.provider || '').toLowerCase();
177
+ const model = (cfg.model || '').toLowerCase();
178
+
179
+ // Providers whose APIs always support image_url content blocks
180
+ const visionProviders = new Set([
181
+ 'anthropic', // Claude 3+
182
+ 'google', // Gemini
183
+ 'codex', // Codex / OpenAI
184
+ 'openai',
185
+ 'nvidia', // NIM
186
+ ]);
187
+
188
+ if (visionProviders.has(provider)) return true;
189
+
190
+ // openai-compatible: check model name for vision hints
191
+ if (provider === 'openai-compatible') {
192
+ const visionPatterns = /vision|vl|multimodal|gpt-4o|gemini|claude|llava|minicpm|cogvlm|qwenvl|phi-3\.5-vision/i;
193
+ return visionPatterns.test(model);
194
+ }
195
+
196
+ return false;
197
+ }
198
+
147
199
  function supportsVision() {
148
- return !!(currentEndpointConfig && currentEndpointConfig.vision);
200
+ return _supportsVisionForConfig(currentEndpointConfig);
149
201
  }
150
202
 
151
203
  function listEndpoints(cwd) {
@@ -154,10 +206,20 @@ function listEndpoints(cwd) {
154
206
 
155
207
  function getModelRegistry(cwd) {
156
208
  const registry = loadModelRegistry(cwd || endpointsCwd || process.cwd());
157
- return {
158
- main: registry.main,
159
- models: Object.fromEntries(Object.entries(registry.models).map(([name, cfg]) => [name, { ...cfg }])),
160
- };
209
+ const models = Object.fromEntries(Object.entries(registry.models).map(([name, cfg]) => [name, { ...cfg }]));
210
+ // Include top-level visionSupport in the models map so findVisionClient() picks it up
211
+ const vs = getVisionSupport(cwd);
212
+ if (vs) models.visionSupport = vs;
213
+ return { main: registry.main, fast: registry.fast, models };
214
+ }
215
+
216
+ // Returns the visionSupport endpoint config if defined in endpoints.json.
217
+ // This is a dedicated endpoint used only for describing images when the
218
+ // active model doesn't support vision natively.
219
+ function getVisionSupport(cwd) {
220
+ const raw = loadEndpoints(cwd || endpointsCwd || process.cwd());
221
+ if (!raw || !raw.visionSupport || typeof raw.visionSupport !== 'object') return null;
222
+ return normalizeModelConfig('visionSupport', raw.visionSupport);
161
223
  }
162
224
 
163
225
  module.exports = {
@@ -167,4 +229,7 @@ module.exports = {
167
229
  getCurrentEndpointName,
168
230
  supportsVision,
169
231
  getModelRegistry,
232
+ getVisionSupport,
233
+ _test: { normalizeRegistry, normalizeModelConfig },
234
+ _supportsVisionForConfig,
170
235
  };
package/src/index.js CHANGED
@@ -24,6 +24,17 @@ function findGitRoot(cwd = process.cwd()) {
24
24
  async function main() {
25
25
  const opts = parseArgs(process.argv.slice(2));
26
26
 
27
+ if (opts.vimAi) {
28
+ const { runAi } = require('./vim');
29
+ process.exit(await runAi(opts.vimAi));
30
+ }
31
+
32
+ if (opts.vimEditor) {
33
+ const { runEditor } = require('./vim');
34
+ const realEditor = opts.vimReal || opts.vimEditor;
35
+ process.exit(runEditor(realEditor, opts.unknown));
36
+ }
37
+
27
38
  // Auto-detect git repo root as workspace if not explicitly set.
28
39
  // This keeps .shmakk state centralized at repo root instead of scattered.
29
40
  if (!opts.workspace) {
@@ -65,6 +76,15 @@ async function main() {
65
76
  opts.markdown = v === 'true';
66
77
  }
67
78
 
79
+ if (opts.browserDaemon) {
80
+ const { main: runBrowserDaemon } = require('./cli/browserDaemon');
81
+ process.exit(await runBrowserDaemon([
82
+ 'browser-daemon',
83
+ ...(opts.browserDaemonPort ? ['--port', String(opts.browserDaemonPort)] : []),
84
+ ...(opts.help ? ['--help'] : []),
85
+ ]));
86
+ }
87
+
68
88
  if (opts.help) {
69
89
  process.stdout.write(resolveHelp(opts.helpCategory));
70
90
  process.exit(0);
@@ -95,6 +115,7 @@ async function main() {
95
115
  shell: process.env.SHELL,
96
116
  term: process.env.TERM,
97
117
  endpoint: activeEndpointName || opts.endpoint || null,
118
+ fastEndpoint: require('./endpoints').getModelRegistry(opts.workspace || process.cwd()).fast || null,
98
119
  baseUrl: activeEndpoint?.base_url || process.env.SHMAKK_BASE_URL || null,
99
120
  apiKey: activeEndpoint?.api_key ? (activeEndpoint.api_key.slice(0, 8) + '...' + activeEndpoint.api_key.slice(-4)) : null,
100
121
  model: activeEndpoint?.model || process.env.SHMAKK_MODEL || null,
@@ -106,6 +127,7 @@ async function main() {
106
127
  stt: opts.stt,
107
128
  tts: opts.tts,
108
129
  sts: opts.sts,
130
+ vim: opts.vim,
109
131
  };
110
132
  process.stdout.write(JSON.stringify(cfg, null, 2) + '\n');
111
133
  process.exit(0);
@@ -137,7 +159,7 @@ async function main() {
137
159
  process.exit(0);
138
160
  }
139
161
 
140
- if (opts.status || opts.stats || opts.compact || opts.loadSkill || opts.installSkill || opts.listSkills || opts.skillStatus || opts.unloadSkill || opts.resumeStatus || opts.showPlan || opts.mcpStatus || opts.exitNow || opts.restart || opts.reset || opts.profileSet) {
162
+ if (opts.status || opts.stats || opts.compact || opts.loadSkill || opts.installSkill || opts.listSkills || opts.skillStatus || opts.unloadSkill || opts.resumeStatus || opts.showPlan || opts.mcpStatus || opts.consolidateWorkspace || opts.exitNow || opts.restart || opts.reset || opts.profileSet) {
141
163
  const ctl = require('./control');
142
164
  if (opts.status) process.exit(ctl.status());
143
165
  if (opts.stats) process.exit(ctl.stats());
@@ -150,6 +172,7 @@ async function main() {
150
172
  if (opts.resumeStatus) process.exit(ctl.resumeStatus());
151
173
  if (opts.showPlan) process.exit(ctl.showPlan());
152
174
  if (opts.mcpStatus) process.exit(ctl.mcpStatus());
175
+ if (opts.consolidateWorkspace) process.exit(ctl.consolidateWorkspace());
153
176
  if (opts.exitNow) process.exit(ctl.exitParent());
154
177
  if (opts.restart) process.exit(ctl.restartParent());
155
178
  if (opts.reset) process.exit(ctl.resetConversation());