@covibes/zeroshot 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/CHANGELOG.md +167 -0
  2. package/LICENSE +21 -0
  3. package/README.md +364 -0
  4. package/cli/index.js +3990 -0
  5. package/cluster-templates/base-templates/debug-workflow.json +181 -0
  6. package/cluster-templates/base-templates/full-workflow.json +455 -0
  7. package/cluster-templates/base-templates/single-worker.json +48 -0
  8. package/cluster-templates/base-templates/worker-validator.json +131 -0
  9. package/cluster-templates/conductor-bootstrap.json +122 -0
  10. package/cluster-templates/conductor-junior-bootstrap.json +69 -0
  11. package/docker/zeroshot-cluster/Dockerfile +132 -0
  12. package/lib/completion.js +174 -0
  13. package/lib/id-detector.js +53 -0
  14. package/lib/settings.js +97 -0
  15. package/lib/stream-json-parser.js +236 -0
  16. package/package.json +121 -0
  17. package/src/agent/agent-config.js +121 -0
  18. package/src/agent/agent-context-builder.js +241 -0
  19. package/src/agent/agent-hook-executor.js +329 -0
  20. package/src/agent/agent-lifecycle.js +555 -0
  21. package/src/agent/agent-stuck-detector.js +256 -0
  22. package/src/agent/agent-task-executor.js +1034 -0
  23. package/src/agent/agent-trigger-evaluator.js +67 -0
  24. package/src/agent-wrapper.js +459 -0
  25. package/src/agents/git-pusher-agent.json +20 -0
  26. package/src/attach/attach-client.js +438 -0
  27. package/src/attach/attach-server.js +543 -0
  28. package/src/attach/index.js +35 -0
  29. package/src/attach/protocol.js +220 -0
  30. package/src/attach/ring-buffer.js +121 -0
  31. package/src/attach/socket-discovery.js +242 -0
  32. package/src/claude-task-runner.js +468 -0
  33. package/src/config-router.js +80 -0
  34. package/src/config-validator.js +598 -0
  35. package/src/github.js +103 -0
  36. package/src/isolation-manager.js +1042 -0
  37. package/src/ledger.js +429 -0
  38. package/src/logic-engine.js +223 -0
  39. package/src/message-bus-bridge.js +139 -0
  40. package/src/message-bus.js +202 -0
  41. package/src/name-generator.js +232 -0
  42. package/src/orchestrator.js +1938 -0
  43. package/src/schemas/sub-cluster.js +156 -0
  44. package/src/sub-cluster-wrapper.js +545 -0
  45. package/src/task-runner.js +28 -0
  46. package/src/template-resolver.js +347 -0
  47. package/src/tui/CHANGES.txt +133 -0
  48. package/src/tui/LAYOUT.md +261 -0
  49. package/src/tui/README.txt +192 -0
  50. package/src/tui/TWO-LEVEL-NAVIGATION.md +186 -0
  51. package/src/tui/data-poller.js +325 -0
  52. package/src/tui/demo.js +208 -0
  53. package/src/tui/formatters.js +123 -0
  54. package/src/tui/index.js +193 -0
  55. package/src/tui/keybindings.js +383 -0
  56. package/src/tui/layout.js +317 -0
  57. package/src/tui/renderer.js +194 -0
@@ -0,0 +1,256 @@
1
+ /**
2
+ * AgentStuckDetector - Multi-indicator process health analysis
3
+ *
4
+ * Detects stuck Claude processes using multiple indicators:
5
+ * - Process state (S=sleeping vs R=running)
6
+ * - Wait channel (ep_poll = blocked on epoll_wait)
7
+ * - CPU usage over sample period
8
+ * - Context switches (activity indicator)
9
+ * - Network socket state (data in flight)
10
+ *
11
+ * CRITICAL: Single-indicator detection (just output freshness) has HIGH false positive risk.
12
+ * Multi-indicator approach ONLY flags processes that fail ALL indicators.
13
+ *
14
+ * Scoring system:
15
+ * - isSleeping: +1
16
+ * - isBlockedOnPoll: +1
17
+ * - lowCpuUsage: +1
18
+ * - lowCtxSwitches: +1
19
+ * - noDataInFlight: +0.5 (secondary signal)
20
+ * - hasSynSent: +1 (stuck trying to connect)
21
+ * - hasDataInFlight: -2 (active I/O = working)
22
+ *
23
+ * Threshold: stuckScore >= 3.5 = likely stuck
24
+ */
25
+
26
+ const { execSync } = require('child_process');
27
+ const fs = require('fs');
28
+
29
+ // Stuck detection thresholds
30
+ const STUCK_THRESHOLD = 3.5; // Score at which we consider process stuck
31
+ const HIGH_CONFIDENCE_THRESHOLD = 4.5;
32
+ const CPU_LOW_THRESHOLD = 1; // Percent - below this is considered "low"
33
+ const CTX_SWITCHES_LOW_THRESHOLD = 10; // Below this is considered "inactive"
34
+
35
+ /**
36
+ * Get process state from /proc filesystem
37
+ * @param {number} pid - Process ID
38
+ * @returns {object} Process state info
39
+ */
40
+ function getProcessState(pid) {
41
+ try {
42
+ const statPath = `/proc/${pid}/stat`;
43
+ if (!fs.existsSync(statPath)) {
44
+ return { exists: false };
45
+ }
46
+
47
+ const stat = fs.readFileSync(statPath, 'utf8');
48
+ const parts = stat.split(' ');
49
+
50
+ // stat fields: pid, comm, state, ppid, pgrp, ...
51
+ // State is the 3rd field (index 2): R=running, S=sleeping, D=disk sleep, Z=zombie
52
+ const state = parts[2];
53
+
54
+ // Get wchan (what the process is waiting on)
55
+ let wchan = '';
56
+ try {
57
+ wchan = fs.readFileSync(`/proc/${pid}/wchan`, 'utf8').trim();
58
+ } catch {
59
+ // wchan may not be readable
60
+ }
61
+
62
+ // Get CPU usage from stat
63
+ // utime (field 14) + stime (field 15) = total CPU ticks
64
+ const utime = parseInt(parts[13], 10);
65
+ const stime = parseInt(parts[14], 10);
66
+
67
+ // Get status for more info
68
+ const status = fs.readFileSync(`/proc/${pid}/status`, 'utf8');
69
+ const threads = status.match(/Threads:\s+(\d+)/)?.[1] || '1';
70
+ const volCtxSwitches = status.match(/voluntary_ctxt_switches:\s+(\d+)/)?.[1] || '0';
71
+
72
+ return {
73
+ exists: true,
74
+ state,
75
+ wchan,
76
+ cpuTicks: utime + stime,
77
+ threads: parseInt(threads, 10),
78
+ volCtxSwitches: parseInt(volCtxSwitches, 10),
79
+ };
80
+ } catch (err) {
81
+ return { exists: false, error: err.message };
82
+ }
83
+ }
84
+
85
+ /**
86
+ * Get network socket activity for a process
87
+ * @param {number} pid - Process ID
88
+ * @returns {object} Network state info
89
+ */
90
+ function getNetworkState(pid) {
91
+ try {
92
+ const fdPath = `/proc/${pid}/fd`;
93
+ if (!fs.existsSync(fdPath)) {
94
+ return { hasNetwork: false };
95
+ }
96
+
97
+ // Use ss to get socket states for this process
98
+ let ssOutput = '';
99
+ try {
100
+ ssOutput = execSync(`ss -tunp 2>/dev/null | grep ",pid=${pid}," || true`, {
101
+ encoding: 'utf8',
102
+ timeout: 5000,
103
+ });
104
+ } catch {
105
+ return { hasNetwork: false };
106
+ }
107
+
108
+ if (!ssOutput.trim()) {
109
+ return { hasNetwork: false, connections: [] };
110
+ }
111
+
112
+ const connections = [];
113
+ const lines = ssOutput.trim().split('\n');
114
+
115
+ for (const line of lines) {
116
+ // Parse ss output: State Recv-Q Send-Q Local Address:Port Peer Address:Port Process
117
+ const match = line.match(/^(\S+)\s+(\d+)\s+(\d+)\s+(\S+)\s+(\S+)/);
118
+ if (match) {
119
+ connections.push({
120
+ state: match[1],
121
+ recvQ: parseInt(match[2], 10),
122
+ sendQ: parseInt(match[3], 10),
123
+ local: match[4],
124
+ peer: match[5],
125
+ });
126
+ }
127
+ }
128
+
129
+ // Analyze connection health
130
+ const establishedCount = connections.filter((c) => c.state === 'ESTAB').length;
131
+ const hasDataInFlight = connections.some((c) => c.recvQ > 0 || c.sendQ > 0);
132
+ const hasSynSent = connections.some((c) => c.state === 'SYN-SENT');
133
+
134
+ return {
135
+ hasNetwork: connections.length > 0,
136
+ connections,
137
+ establishedCount,
138
+ hasDataInFlight,
139
+ hasSynSent,
140
+ };
141
+ } catch (err) {
142
+ return { hasNetwork: false, error: err.message };
143
+ }
144
+ }
145
+
146
+ /**
147
+ * Analyze process health using multi-indicator approach
148
+ *
149
+ * @param {number} pid - Process ID
150
+ * @param {number} samplePeriodMs - How long to sample (default 5000ms)
151
+ * @returns {Promise<object>} Analysis result with isLikelyStuck, stuckScore, indicators
152
+ */
153
+ async function analyzeProcessHealth(pid, samplePeriodMs = 5000) {
154
+ const t0 = getProcessState(pid);
155
+ if (!t0.exists) {
156
+ return { isLikelyStuck: null, reason: 'Process does not exist', pid };
157
+ }
158
+
159
+ // Wait and sample again
160
+ await new Promise((r) => setTimeout(r, samplePeriodMs));
161
+
162
+ const t1 = getProcessState(pid);
163
+ if (!t1.exists) {
164
+ return { isLikelyStuck: null, reason: 'Process died during analysis', pid };
165
+ }
166
+
167
+ // Calculate CPU usage during sample period
168
+ const cpuTicksDelta = t1.cpuTicks - t0.cpuTicks;
169
+ const ctxSwitchesDelta = t1.volCtxSwitches - t0.volCtxSwitches;
170
+
171
+ // Get clock ticks per second (typically 100 on Linux)
172
+ const clockTicks = 100;
173
+
174
+ // CPU seconds used during sample
175
+ const cpuSeconds = cpuTicksDelta / clockTicks;
176
+ const sampleSeconds = samplePeriodMs / 1000;
177
+ const cpuPercent = (cpuSeconds / sampleSeconds) * 100;
178
+
179
+ // Get network state
180
+ const network = getNetworkState(pid);
181
+
182
+ // Analyze stuck indicators
183
+ const indicators = {
184
+ isSleeping: t1.state === 'S',
185
+ isBlockedOnPoll: t1.wchan.includes('poll') || t1.wchan.includes('wait'),
186
+ lowCpuUsage: cpuPercent < CPU_LOW_THRESHOLD,
187
+ lowCtxSwitches: ctxSwitchesDelta < CTX_SWITCHES_LOW_THRESHOLD,
188
+ // Network indicators (only apply if process has network connections)
189
+ noDataInFlight: network.hasNetwork && !network.hasDataInFlight,
190
+ hasSynSent: network.hasSynSent, // Stuck trying to connect
191
+ };
192
+
193
+ // Calculate stuck score using weighted indicators
194
+ let stuckScore = 0;
195
+ if (indicators.isSleeping) stuckScore += 1;
196
+ if (indicators.isBlockedOnPoll) stuckScore += 1;
197
+ if (indicators.lowCpuUsage) stuckScore += 1;
198
+ if (indicators.lowCtxSwitches) stuckScore += 1;
199
+ if (indicators.noDataInFlight) stuckScore += 0.5; // Secondary signal
200
+ if (indicators.hasSynSent) stuckScore += 1; // Strong signal - stuck connecting
201
+
202
+ // CRITICAL: If data IS flowing, REDUCE stuck score (legitimate work)
203
+ if (network.hasDataInFlight) {
204
+ stuckScore = Math.max(0, stuckScore - 2); // Active I/O = likely working
205
+ }
206
+
207
+ const isLikelyStuck = stuckScore >= STUCK_THRESHOLD;
208
+ const confidence =
209
+ stuckScore >= HIGH_CONFIDENCE_THRESHOLD
210
+ ? 'high'
211
+ : stuckScore >= STUCK_THRESHOLD
212
+ ? 'medium'
213
+ : 'low';
214
+
215
+ return {
216
+ pid,
217
+ state: t1.state,
218
+ wchan: t1.wchan,
219
+ cpuPercent: parseFloat(cpuPercent.toFixed(2)),
220
+ ctxSwitchesDelta,
221
+ threads: t1.threads,
222
+ network: {
223
+ hasConnections: network.hasNetwork,
224
+ establishedCount: network.establishedCount || 0,
225
+ hasDataInFlight: network.hasDataInFlight || false,
226
+ hasSynSent: network.hasSynSent || false,
227
+ },
228
+ indicators,
229
+ stuckScore: parseFloat(stuckScore.toFixed(1)),
230
+ isLikelyStuck,
231
+ confidence,
232
+ analysis: isLikelyStuck
233
+ ? `Process appears STUCK: sleeping on ${t1.wchan}, ${cpuPercent.toFixed(1)}% CPU, ${ctxSwitchesDelta} ctx switches`
234
+ : `Process appears WORKING: ${cpuPercent.toFixed(1)}% CPU, ${ctxSwitchesDelta} ctx switches, state=${t1.state}`,
235
+ };
236
+ }
237
+
238
+ /**
239
+ * Check if we're on a platform that supports /proc filesystem
240
+ * @returns {boolean}
241
+ */
242
+ function isPlatformSupported() {
243
+ return process.platform === 'linux' && fs.existsSync('/proc');
244
+ }
245
+
246
+ module.exports = {
247
+ analyzeProcessHealth,
248
+ getProcessState,
249
+ getNetworkState,
250
+ isPlatformSupported,
251
+ // Export thresholds for testing
252
+ STUCK_THRESHOLD,
253
+ HIGH_CONFIDENCE_THRESHOLD,
254
+ CPU_LOW_THRESHOLD,
255
+ CTX_SWITCHES_LOW_THRESHOLD,
256
+ };