gm-kilo 2.0.146 → 2.0.149

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/agents/gm.md CHANGED
@@ -74,6 +74,44 @@ All execution via `bun x gm-exec` (Bash) or `agent-browser` skill. Every hypothe
74
74
 
75
75
  **CODE YOUR HYPOTHESES**: Test every possible hypothesis using `bun x gm-exec` or `agent-browser` skill. Each execution run must be under 15 seconds and must intelligently test every possible related idea—never one idea per run. Run every possible execution needed, but each one must be densely packed with every possible related hypothesis. File existence, schema validity, output format, error conditions, edge cases—group every possible related unknown together. The goal is every possible hypothesis per run. Use `agent-browser` skill for cross-client UI testing and browser-based hypothesis validation.
76
76
 
77
+ **OPERATION CHAIN TESTING**: When analyzing or modifying systems with multi-step operation chains, decompose and test each part independently before testing the full chain. Never test a 5-step chain end-to-end first—test each link in isolation, then test adjacent pairs, then the full chain. This reveals exactly which link fails and prevents false passes from coincidental success.
78
+
79
+ Decomposition rules:
80
+ - Identify every distinct operation in the chain (input validation, API call, response parsing, state update, side effect, render)
81
+ - Test stateless operations in isolation first — they have no dependencies and confirm pure logic
82
+ - Test stateful operations together with their immediate downstream effect — they share a state boundary
83
+ - Bundle every confirmation that shares an assertion target into one run — same variable, same API call, same file = same run
84
+ - Unrelated assertion targets = separate runs
85
+
86
+ Tool selection per operation type:
87
+ - Pure logic (parse, validate, transform, calculate): `bun x gm-exec` — no DOM needed
88
+ - API call + response + error handling (node): `bun x gm-exec` — test all three in one run
89
+ - State mutation + downstream state effect: `bun x gm-exec` — test mutation and effect together
90
+ - DOM rendering, visual state, layout: `agent-browser` skill — requires real DOM
91
+ - User interaction (click, type, submit, navigate): `agent-browser` skill — requires real events
92
+ - State mutation visible on DOM: `agent-browser` skill — test both mutation and DOM effect in one session
93
+ - Error path on UI (spinner, toast, retry): `agent-browser` skill — test full visible error flow
94
+
95
+ PRE-EMIT-TEST (before editing any file):
96
+ 1. Test current behavior on disk — understand what exists before changing it
97
+ 2. Execute proposed logic in isolation via `bun x gm-exec` WITHOUT writing to any file
98
+ 3. Confirm proposed approach produces correct output
99
+ 4. Test failure paths of proposed approach
100
+ 5. All mutables must resolve to KNOWN before EMIT phase opens
101
+
102
+ POST-EMIT-VALIDATION (immediately after writing files to disk):
103
+ 1. Load the actual modified file from disk — not the in-memory version
104
+ 2. Execute against real inputs with `bun x gm-exec` or `agent-browser` skill
105
+ 3. Confirm the on-disk code behaves identically to what was proven in PRE-EMIT-TEST
106
+ 4. Test all scenarios again on the real disk file — success, failure, edge cases
107
+ 5. Any variance from PRE-EMIT-TEST results = regression, fix immediately before proceeding
108
+
109
+ Server + client split:
110
+ - Backend operations (node, API, DB, queue, file system): prove with `bun x gm-exec` first
111
+ - Frontend operations (DOM, forms, navigation, rendering): prove with `agent-browser` skill
112
+ - When a single feature spans server and client: run `bun x gm-exec` server tests AND `agent-browser` client tests — both required, neither substitutes for the other
113
+ - A server test passing does NOT prove the UI works. A browser test passing does NOT prove the backend handles edge cases.
114
+
77
115
  **DEFAULT IS gm-exec**: `bun x gm-exec` is the primary execution tool. Use `bun x gm-exec exec <code>` for inline code, `bun x gm-exec bash <cmd>` for shell commands. Git is the only other allowed Bash command.
78
116
 
79
117
  **TOOL POLICY**: All code execution via `bun x gm-exec`. Use `code-search` skill for exploration. Reference TOOL_INVARIANTS for enforcement.
@@ -1,4 +1,4 @@
1
- #!/usr/bin/env node
1
+ #!/usr/bin/env bun
2
2
 
3
3
  const fs = require('fs');
4
4
  const path = require('path');
@@ -1,4 +1,4 @@
1
- #!/usr/bin/env node
1
+ #!/usr/bin/env bun
2
2
 
3
3
  if (process.env.AGENTGUI_SUBPROCESS === '1') {
4
4
  console.log(JSON.stringify({ additionalContext: '' }));
@@ -7,6 +7,7 @@ if (process.env.AGENTGUI_SUBPROCESS === '1') {
7
7
 
8
8
  const fs = require('fs');
9
9
  const path = require('path');
10
+ const { execSync } = require('child_process');
10
11
 
11
12
  const pluginRoot = process.env.CLAUDE_PLUGIN_ROOT || process.env.GEMINI_PROJECT_DIR || process.env.OC_PLUGIN_ROOT || process.env.KILO_PLUGIN_ROOT || path.join(__dirname, '..');
12
13
  const projectDir = process.env.CLAUDE_PROJECT_DIR || process.env.GEMINI_PROJECT_DIR || process.env.OC_PROJECT_DIR || process.env.KILO_PROJECT_DIR;
@@ -32,6 +33,24 @@ const ensureGitignore = () => {
32
33
  } catch (e) {}
33
34
  };
34
35
 
36
+ const runThorns = () => {
37
+ if (!projectDir || !fs.existsSync(projectDir)) return '';
38
+ const localThorns = path.join(process.env.HOME || '/root', 'mcp-thorns', 'index.js');
39
+ const thornsBin = fs.existsSync(localThorns) ? `node ${localThorns}` : 'bun x mcp-thorns@latest';
40
+ try {
41
+ const out = execSync(`${thornsBin} ${projectDir}`, {
42
+ encoding: 'utf-8',
43
+ stdio: ['pipe', 'pipe', 'pipe'],
44
+ timeout: 15000,
45
+ killSignal: 'SIGTERM'
46
+ });
47
+ return `=== mcp-thorns ===\n${out.trim()}`;
48
+ } catch (e) {
49
+ if (e.killed) return '=== mcp-thorns ===\nSkipped (timeout)';
50
+ return '';
51
+ }
52
+ };
53
+
35
54
  const emit = (additionalContext) => {
36
55
  const isGemini = process.env.GEMINI_PROJECT_DIR !== undefined;
37
56
  const isOpenCode = process.env.OC_PROJECT_DIR !== undefined;
@@ -48,7 +67,11 @@ const emit = (additionalContext) => {
48
67
 
49
68
  try {
50
69
  ensureGitignore();
51
- emit('use gm agent | ' + COMPACT_CONTEXT + ' | ' + PLAN_MODE_BLOCK);
70
+ const parts = [];
71
+ const thorns = runThorns();
72
+ if (thorns) parts.push(thorns);
73
+ parts.push('use gm agent | ' + COMPACT_CONTEXT + ' | ' + PLAN_MODE_BLOCK);
74
+ emit(parts.join('\n\n'));
52
75
  } catch (error) {
53
76
  emit('use gm agent | hook error: ' + error.message);
54
77
  process.exit(0);
@@ -1,4 +1,4 @@
1
- #!/usr/bin/env node
1
+ #!/usr/bin/env bun
2
2
 
3
3
  const fs = require('fs');
4
4
  const path = require('path');
@@ -75,29 +75,13 @@ When exploring unfamiliar code, finding similar patterns, understanding integrat
75
75
  encoding: 'utf-8',
76
76
  stdio: ['pipe', 'pipe', 'pipe'],
77
77
  cwd: projectDir,
78
- timeout: 180000,
78
+ timeout: 15000,
79
79
  killSignal: 'SIGTERM'
80
80
  });
81
81
  } catch (bunErr) {
82
- if (bunErr.killed && bunErr.signal === 'SIGTERM') {
83
- thornOutput = '=== mcp-thorns ===\nSkipped (3min timeout)';
84
- } else {
85
- try {
86
- thornOutput = execSync(`npx -y mcp-thorns@latest`, {
87
- encoding: 'utf-8',
88
- stdio: ['pipe', 'pipe', 'pipe'],
89
- cwd: projectDir,
90
- timeout: 180000,
91
- killSignal: 'SIGTERM'
92
- });
93
- } catch (npxErr) {
94
- if (npxErr.killed && npxErr.signal === 'SIGTERM') {
95
- thornOutput = '=== mcp-thorns ===\nSkipped (3min timeout)';
96
- } else {
97
- thornOutput = `=== mcp-thorns ===\nSkipped (error: ${bunErr.message.split('\n')[0]})`;
98
- }
99
- }
100
- }
82
+ thornOutput = bunErr.killed
83
+ ? '=== mcp-thorns ===\nSkipped (timeout)'
84
+ : `=== mcp-thorns ===\nSkipped (error: ${bunErr.message.split('\n')[0]})`;
101
85
  }
102
86
  outputs.push(`=== This is your initial insight of the repository, look at every possible aspect of this for initial opinionation and to offset the need for code exploration ===\n${thornOutput}`);
103
87
  } catch (e) {
@@ -165,7 +149,3 @@ When exploring unfamiliar code, finding similar patterns, understanding integrat
165
149
 
166
150
 
167
151
 
168
-
169
-
170
-
171
-
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "gm-kilo",
3
- "version": "2.0.146",
3
+ "version": "2.0.149",
4
4
  "description": "State machine agent with hooks, skills, and automated git enforcement",
5
5
  "author": "AnEntrypoint",
6
6
  "license": "MIT",