claude-flow 3.5.23 → 3.5.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/v3/@claude-flow/cli/dist/src/commands/daemon.js +54 -7
- package/v3/@claude-flow/cli/dist/src/commands/index.js +2 -0
- package/v3/@claude-flow/cli/dist/src/init/executor.js +17 -17
- package/v3/@claude-flow/cli/dist/src/init/helpers-generator.js +10 -10
- package/v3/@claude-flow/cli/dist/src/mcp-tools/browser-tools.js +2 -2
- package/v3/@claude-flow/cli/dist/src/mcp-tools/config-tools.js +10 -1
- package/v3/@claude-flow/cli/dist/src/mcp-tools/hooks-tools.js +150 -7
- package/v3/@claude-flow/cli/dist/src/mcp-tools/memory-tools.js +2 -0
- package/v3/@claude-flow/cli/dist/src/mcp-tools/swarm-tools.d.ts +2 -1
- package/v3/@claude-flow/cli/dist/src/mcp-tools/swarm-tools.js +216 -30
- package/v3/@claude-flow/cli/dist/src/services/index.d.ts +1 -1
- package/v3/@claude-flow/cli/dist/src/services/ruvector-training.js +11 -4
- package/v3/@claude-flow/cli/dist/src/services/worker-daemon.d.ts +24 -3
- package/v3/@claude-flow/cli/dist/src/services/worker-daemon.js +123 -12
- package/v3/@claude-flow/cli/dist/src/transfer/storage/gcs.js +22 -6
- package/v3/@claude-flow/cli/package.json +1 -1
|
@@ -1,82 +1,212 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Swarm MCP Tools for CLI
|
|
3
3
|
*
|
|
4
|
-
* Tool definitions for swarm coordination.
|
|
4
|
+
* Tool definitions for swarm coordination with file-based state persistence.
|
|
5
|
+
* Replaces previous stub implementations with real state tracking.
|
|
5
6
|
*/
|
|
7
|
+
import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
|
|
8
|
+
import { join } from 'node:path';
|
|
9
|
+
// Swarm state persistence
|
|
10
|
+
const SWARM_DIR = '.claude-flow/swarm';
|
|
11
|
+
const SWARM_STATE_FILE = 'swarm-state.json';
|
|
12
|
+
function getSwarmDir() {
|
|
13
|
+
return join(process.cwd(), SWARM_DIR);
|
|
14
|
+
}
|
|
15
|
+
function getSwarmStatePath() {
|
|
16
|
+
return join(getSwarmDir(), SWARM_STATE_FILE);
|
|
17
|
+
}
|
|
18
|
+
function ensureSwarmDir() {
|
|
19
|
+
const dir = getSwarmDir();
|
|
20
|
+
if (!existsSync(dir)) {
|
|
21
|
+
mkdirSync(dir, { recursive: true, mode: 0o700 });
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
function loadSwarmStore() {
|
|
25
|
+
try {
|
|
26
|
+
const path = getSwarmStatePath();
|
|
27
|
+
if (existsSync(path)) {
|
|
28
|
+
return JSON.parse(readFileSync(path, 'utf-8'));
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
catch { /* return default */ }
|
|
32
|
+
return { swarms: {}, version: '3.0.0' };
|
|
33
|
+
}
|
|
34
|
+
function saveSwarmStore(store) {
|
|
35
|
+
ensureSwarmDir();
|
|
36
|
+
writeFileSync(getSwarmStatePath(), JSON.stringify(store, null, 2), 'utf-8');
|
|
37
|
+
}
|
|
38
|
+
// Input validation
|
|
39
|
+
const VALID_TOPOLOGIES = new Set([
|
|
40
|
+
'hierarchical', 'mesh', 'hierarchical-mesh', 'ring', 'star', 'hybrid', 'adaptive',
|
|
41
|
+
]);
|
|
6
42
|
export const swarmTools = [
|
|
7
43
|
{
|
|
8
44
|
name: 'swarm_init',
|
|
9
|
-
description: 'Initialize a swarm',
|
|
45
|
+
description: 'Initialize a swarm with persistent state tracking',
|
|
10
46
|
category: 'swarm',
|
|
11
47
|
inputSchema: {
|
|
12
48
|
type: 'object',
|
|
13
49
|
properties: {
|
|
14
|
-
topology: { type: 'string', description: 'Swarm topology type' },
|
|
15
|
-
maxAgents: { type: 'number', description: 'Maximum number of agents' },
|
|
16
|
-
|
|
50
|
+
topology: { type: 'string', description: 'Swarm topology type (hierarchical, mesh, hierarchical-mesh, ring, star, hybrid, adaptive)' },
|
|
51
|
+
maxAgents: { type: 'number', description: 'Maximum number of agents (1-50)' },
|
|
52
|
+
strategy: { type: 'string', description: 'Agent strategy (specialized, balanced, adaptive)' },
|
|
53
|
+
config: { type: 'object', description: 'Additional swarm configuration' },
|
|
17
54
|
},
|
|
18
55
|
},
|
|
19
56
|
handler: async (input) => {
|
|
20
57
|
const topology = input.topology || 'hierarchical-mesh';
|
|
21
|
-
const maxAgents = input.maxAgents || 15;
|
|
58
|
+
const maxAgents = Math.min(Math.max(input.maxAgents || 15, 1), 50);
|
|
59
|
+
const strategy = input.strategy || 'specialized';
|
|
22
60
|
const config = (input.config || {});
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
61
|
+
if (!VALID_TOPOLOGIES.has(topology)) {
|
|
62
|
+
return {
|
|
63
|
+
success: false,
|
|
64
|
+
error: `Invalid topology: ${topology}. Valid: ${[...VALID_TOPOLOGIES].join(', ')}`,
|
|
65
|
+
};
|
|
66
|
+
}
|
|
67
|
+
const swarmId = `swarm-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
|
68
|
+
const now = new Date().toISOString();
|
|
69
|
+
const swarmState = {
|
|
70
|
+
swarmId,
|
|
26
71
|
topology,
|
|
27
|
-
|
|
72
|
+
maxAgents,
|
|
73
|
+
status: 'running',
|
|
74
|
+
agents: [],
|
|
75
|
+
tasks: [],
|
|
28
76
|
config: {
|
|
29
77
|
topology,
|
|
30
78
|
maxAgents,
|
|
31
|
-
|
|
79
|
+
strategy,
|
|
32
80
|
communicationProtocol: config.communicationProtocol || 'message-bus',
|
|
33
81
|
autoScaling: config.autoScaling ?? true,
|
|
34
82
|
consensusMechanism: config.consensusMechanism || 'majority',
|
|
35
83
|
},
|
|
84
|
+
createdAt: now,
|
|
85
|
+
updatedAt: now,
|
|
86
|
+
};
|
|
87
|
+
const store = loadSwarmStore();
|
|
88
|
+
store.swarms[swarmId] = swarmState;
|
|
89
|
+
saveSwarmStore(store);
|
|
90
|
+
return {
|
|
91
|
+
success: true,
|
|
92
|
+
swarmId,
|
|
93
|
+
topology,
|
|
94
|
+
strategy,
|
|
95
|
+
maxAgents,
|
|
96
|
+
initializedAt: now,
|
|
97
|
+
config: swarmState.config,
|
|
98
|
+
persisted: true,
|
|
36
99
|
};
|
|
37
100
|
},
|
|
38
101
|
},
|
|
39
102
|
{
|
|
40
103
|
name: 'swarm_status',
|
|
41
|
-
description: 'Get swarm status',
|
|
104
|
+
description: 'Get swarm status from persistent state',
|
|
42
105
|
category: 'swarm',
|
|
43
106
|
inputSchema: {
|
|
44
107
|
type: 'object',
|
|
45
108
|
properties: {
|
|
46
|
-
swarmId: { type: 'string', description: 'Swarm ID' },
|
|
109
|
+
swarmId: { type: 'string', description: 'Swarm ID (omit for most recent)' },
|
|
47
110
|
},
|
|
48
111
|
},
|
|
49
112
|
handler: async (input) => {
|
|
113
|
+
const store = loadSwarmStore();
|
|
114
|
+
const swarmId = input.swarmId;
|
|
115
|
+
if (swarmId && store.swarms[swarmId]) {
|
|
116
|
+
const swarm = store.swarms[swarmId];
|
|
117
|
+
return {
|
|
118
|
+
swarmId: swarm.swarmId,
|
|
119
|
+
status: swarm.status,
|
|
120
|
+
topology: swarm.topology,
|
|
121
|
+
maxAgents: swarm.maxAgents,
|
|
122
|
+
agentCount: swarm.agents.length,
|
|
123
|
+
taskCount: swarm.tasks.length,
|
|
124
|
+
config: swarm.config,
|
|
125
|
+
createdAt: swarm.createdAt,
|
|
126
|
+
updatedAt: swarm.updatedAt,
|
|
127
|
+
};
|
|
128
|
+
}
|
|
129
|
+
// Return most recent swarm if no ID specified
|
|
130
|
+
const swarmIds = Object.keys(store.swarms);
|
|
131
|
+
if (swarmIds.length === 0) {
|
|
132
|
+
return {
|
|
133
|
+
status: 'no_swarm',
|
|
134
|
+
message: 'No active swarms. Use swarm_init to create one.',
|
|
135
|
+
totalSwarms: 0,
|
|
136
|
+
};
|
|
137
|
+
}
|
|
138
|
+
const latest = swarmIds
|
|
139
|
+
.map(id => store.swarms[id])
|
|
140
|
+
.sort((a, b) => new Date(b.updatedAt).getTime() - new Date(a.updatedAt).getTime())[0];
|
|
50
141
|
return {
|
|
51
|
-
swarmId:
|
|
52
|
-
status:
|
|
53
|
-
|
|
54
|
-
|
|
142
|
+
swarmId: latest.swarmId,
|
|
143
|
+
status: latest.status,
|
|
144
|
+
topology: latest.topology,
|
|
145
|
+
maxAgents: latest.maxAgents,
|
|
146
|
+
agentCount: latest.agents.length,
|
|
147
|
+
taskCount: latest.tasks.length,
|
|
148
|
+
config: latest.config,
|
|
149
|
+
createdAt: latest.createdAt,
|
|
150
|
+
updatedAt: latest.updatedAt,
|
|
151
|
+
totalSwarms: swarmIds.length,
|
|
55
152
|
};
|
|
56
153
|
},
|
|
57
154
|
},
|
|
58
155
|
{
|
|
59
156
|
name: 'swarm_shutdown',
|
|
60
|
-
description: 'Shutdown a swarm',
|
|
157
|
+
description: 'Shutdown a swarm and update persistent state',
|
|
61
158
|
category: 'swarm',
|
|
62
159
|
inputSchema: {
|
|
63
160
|
type: 'object',
|
|
64
161
|
properties: {
|
|
65
|
-
swarmId: { type: 'string', description: 'Swarm ID' },
|
|
66
|
-
graceful: { type: 'boolean', description: 'Graceful shutdown' },
|
|
162
|
+
swarmId: { type: 'string', description: 'Swarm ID to shutdown' },
|
|
163
|
+
graceful: { type: 'boolean', description: 'Graceful shutdown (default: true)' },
|
|
67
164
|
},
|
|
68
165
|
},
|
|
69
166
|
handler: async (input) => {
|
|
167
|
+
const store = loadSwarmStore();
|
|
168
|
+
const swarmId = input.swarmId;
|
|
169
|
+
// Find the swarm
|
|
170
|
+
let target;
|
|
171
|
+
if (swarmId && store.swarms[swarmId]) {
|
|
172
|
+
target = store.swarms[swarmId];
|
|
173
|
+
}
|
|
174
|
+
else {
|
|
175
|
+
// Shutdown most recent running swarm
|
|
176
|
+
const running = Object.values(store.swarms)
|
|
177
|
+
.filter(s => s.status === 'running')
|
|
178
|
+
.sort((a, b) => new Date(b.updatedAt).getTime() - new Date(a.updatedAt).getTime());
|
|
179
|
+
target = running[0];
|
|
180
|
+
}
|
|
181
|
+
if (!target) {
|
|
182
|
+
return {
|
|
183
|
+
success: false,
|
|
184
|
+
error: swarmId ? `Swarm ${swarmId} not found` : 'No running swarms to shutdown',
|
|
185
|
+
};
|
|
186
|
+
}
|
|
187
|
+
if (target.status === 'terminated') {
|
|
188
|
+
return {
|
|
189
|
+
success: false,
|
|
190
|
+
swarmId: target.swarmId,
|
|
191
|
+
error: 'Swarm already terminated',
|
|
192
|
+
};
|
|
193
|
+
}
|
|
194
|
+
target.status = 'terminated';
|
|
195
|
+
target.updatedAt = new Date().toISOString();
|
|
196
|
+
saveSwarmStore(store);
|
|
70
197
|
return {
|
|
71
198
|
success: true,
|
|
72
|
-
swarmId:
|
|
199
|
+
swarmId: target.swarmId,
|
|
73
200
|
terminated: true,
|
|
201
|
+
graceful: input.graceful ?? true,
|
|
202
|
+
agentsTerminated: target.agents.length,
|
|
203
|
+
terminatedAt: target.updatedAt,
|
|
74
204
|
};
|
|
75
205
|
},
|
|
76
206
|
},
|
|
77
207
|
{
|
|
78
208
|
name: 'swarm_health',
|
|
79
|
-
description: 'Check swarm health status',
|
|
209
|
+
description: 'Check swarm health status with real state inspection',
|
|
80
210
|
category: 'swarm',
|
|
81
211
|
inputSchema: {
|
|
82
212
|
type: 'object',
|
|
@@ -85,15 +215,71 @@ export const swarmTools = [
|
|
|
85
215
|
},
|
|
86
216
|
},
|
|
87
217
|
handler: async (input) => {
|
|
218
|
+
const store = loadSwarmStore();
|
|
219
|
+
const swarmId = input.swarmId;
|
|
220
|
+
// Find the swarm
|
|
221
|
+
let target;
|
|
222
|
+
if (swarmId) {
|
|
223
|
+
target = store.swarms[swarmId];
|
|
224
|
+
if (!target) {
|
|
225
|
+
return {
|
|
226
|
+
status: 'not_found',
|
|
227
|
+
healthy: false,
|
|
228
|
+
checks: [
|
|
229
|
+
{ name: 'swarm_exists', status: 'fail', message: `Swarm ${swarmId} not found` },
|
|
230
|
+
],
|
|
231
|
+
checkedAt: new Date().toISOString(),
|
|
232
|
+
};
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
else {
|
|
236
|
+
const running = Object.values(store.swarms)
|
|
237
|
+
.filter(s => s.status === 'running')
|
|
238
|
+
.sort((a, b) => new Date(b.updatedAt).getTime() - new Date(a.updatedAt).getTime());
|
|
239
|
+
target = running[0];
|
|
240
|
+
}
|
|
241
|
+
if (!target) {
|
|
242
|
+
return {
|
|
243
|
+
status: 'no_swarm',
|
|
244
|
+
healthy: false,
|
|
245
|
+
checks: [
|
|
246
|
+
{ name: 'swarm_exists', status: 'fail', message: 'No active swarm found' },
|
|
247
|
+
],
|
|
248
|
+
checkedAt: new Date().toISOString(),
|
|
249
|
+
};
|
|
250
|
+
}
|
|
251
|
+
const isRunning = target.status === 'running';
|
|
252
|
+
const stateFileExists = existsSync(getSwarmStatePath());
|
|
253
|
+
const checks = [
|
|
254
|
+
{
|
|
255
|
+
name: 'coordinator',
|
|
256
|
+
status: isRunning ? 'ok' : 'warn',
|
|
257
|
+
message: isRunning ? 'Coordinator active' : `Swarm status: ${target.status}`,
|
|
258
|
+
},
|
|
259
|
+
{
|
|
260
|
+
name: 'agents',
|
|
261
|
+
status: target.agents.length > 0 ? 'ok' : 'info',
|
|
262
|
+
message: `${target.agents.length} agents registered (max: ${target.maxAgents})`,
|
|
263
|
+
},
|
|
264
|
+
{
|
|
265
|
+
name: 'persistence',
|
|
266
|
+
status: stateFileExists ? 'ok' : 'warn',
|
|
267
|
+
message: stateFileExists ? 'State file persisted' : 'State file missing',
|
|
268
|
+
},
|
|
269
|
+
{
|
|
270
|
+
name: 'topology',
|
|
271
|
+
status: 'ok',
|
|
272
|
+
message: `Topology: ${target.topology}`,
|
|
273
|
+
},
|
|
274
|
+
];
|
|
275
|
+
const healthy = isRunning && stateFileExists;
|
|
88
276
|
return {
|
|
89
|
-
status: 'healthy',
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
{ name: 'messaging', status: 'ok', message: 'Message bus active' },
|
|
96
|
-
],
|
|
277
|
+
status: healthy ? 'healthy' : 'degraded',
|
|
278
|
+
healthy,
|
|
279
|
+
swarmId: target.swarmId,
|
|
280
|
+
topology: target.topology,
|
|
281
|
+
agentCount: target.agents.length,
|
|
282
|
+
checks,
|
|
97
283
|
checkedAt: new Date().toISOString(),
|
|
98
284
|
};
|
|
99
285
|
},
|
|
@@ -6,7 +6,7 @@ export { WorkerDaemon, getDaemon, startDaemon, stopDaemon, type WorkerType, } fr
|
|
|
6
6
|
export { HeadlessWorkerExecutor, HEADLESS_WORKER_TYPES, HEADLESS_WORKER_CONFIGS, LOCAL_WORKER_TYPES, LOCAL_WORKER_CONFIGS, ALL_WORKER_CONFIGS, isHeadlessWorker, isLocalWorker, getModelId, getWorkerConfig, type HeadlessWorkerType, type LocalWorkerType, type HeadlessWorkerConfig, type HeadlessExecutionResult, type HeadlessExecutorConfig, type HeadlessOptions, type PoolStatus, type SandboxMode, type ModelType, type OutputFormat, type ExecutionMode, type WorkerPriority, type WorkerConfig, } from './headless-worker-executor.js';
|
|
7
7
|
export { ContainerWorkerPool, type ContainerInfo, type ContainerPoolConfig, type ContainerExecutionOptions, type ContainerPoolStatus, type ContainerState, } from './container-worker-pool.js';
|
|
8
8
|
export { WorkerQueue, type QueueTask, type WorkerQueueConfig, type QueueStats, type WorkerRegistration, type TaskStatus, } from './worker-queue.js';
|
|
9
|
-
export type { default as WorkerDaemonType } from './worker-daemon.js';
|
|
9
|
+
export type { default as WorkerDaemonType, DaemonConfig } from './worker-daemon.js';
|
|
10
10
|
export type { default as HeadlessWorkerExecutorType } from './headless-worker-executor.js';
|
|
11
11
|
export type { default as ContainerWorkerPoolType } from './container-worker-pool.js';
|
|
12
12
|
export type { default as WorkerQueueType } from './worker-queue.js';
|
|
@@ -13,6 +13,14 @@
|
|
|
13
13
|
*
|
|
14
14
|
* Created with ❤️ by ruv.io
|
|
15
15
|
*/
|
|
16
|
+
/**
|
|
17
|
+
* ESM/CJS interop helper — handles `.default` for CJS modules.
|
|
18
|
+
* Uses `'default' in mod` check which is safer than `mod.default || mod`.
|
|
19
|
+
*/
|
|
20
|
+
async function importWithInterop(packageName) {
|
|
21
|
+
const mod = await import(packageName);
|
|
22
|
+
return ('default' in mod) ? mod.default : mod;
|
|
23
|
+
}
|
|
16
24
|
// Lazy-loaded WASM modules
|
|
17
25
|
let microLoRA = null;
|
|
18
26
|
let scopedLoRA = null;
|
|
@@ -252,7 +260,7 @@ export async function initializeTraining(config = {}) {
|
|
|
252
260
|
}
|
|
253
261
|
// --- Attention mechanisms (optional, independent of WASM) ---
|
|
254
262
|
try {
|
|
255
|
-
const attention = await
|
|
263
|
+
const attention = await importWithInterop('@ruvector/attention');
|
|
256
264
|
if (config.useFlashAttention !== false) {
|
|
257
265
|
flashAttention = new attention.FlashAttention(dim, 64);
|
|
258
266
|
features.push('FlashAttention');
|
|
@@ -289,9 +297,8 @@ export async function initializeTraining(config = {}) {
|
|
|
289
297
|
// --- SONA (optional, backward compatible) ---
|
|
290
298
|
if (config.useSona !== false) {
|
|
291
299
|
try {
|
|
292
|
-
const sona = await
|
|
300
|
+
const sona = await importWithInterop('@ruvector/sona');
|
|
293
301
|
const sonaRank = config.sonaRank || 4;
|
|
294
|
-
// @ts-expect-error - SonaEngine accepts 4 positional args but types say 1
|
|
295
302
|
sonaEngine = new sona.SonaEngine(dim, sonaRank, alpha, lr);
|
|
296
303
|
sonaAvailable = true;
|
|
297
304
|
features.push(`SONA (${dim}-dim, rank-${sonaRank}, 624k learn/s)`);
|
|
@@ -473,7 +480,7 @@ export function mineHardNegatives(anchor, candidates) {
|
|
|
473
480
|
* Benchmark the training system
|
|
474
481
|
*/
|
|
475
482
|
export async function benchmarkTraining(dim, iterations) {
|
|
476
|
-
const attention = await
|
|
483
|
+
const attention = await importWithInterop('@ruvector/attention');
|
|
477
484
|
lastBenchmark = attention.benchmarkAttention(dim || 256, 100, iterations || 1000);
|
|
478
485
|
return lastBenchmark ?? [];
|
|
479
486
|
}
|
|
@@ -44,7 +44,7 @@ interface DaemonStatus {
|
|
|
44
44
|
workers: Map<WorkerType, WorkerState>;
|
|
45
45
|
config: DaemonConfig;
|
|
46
46
|
}
|
|
47
|
-
interface DaemonConfig {
|
|
47
|
+
export interface DaemonConfig {
|
|
48
48
|
autoStart: boolean;
|
|
49
49
|
logDir: string;
|
|
50
50
|
stateFile: string;
|
|
@@ -70,6 +70,7 @@ export declare class WorkerDaemon extends EventEmitter {
|
|
|
70
70
|
private pendingWorkers;
|
|
71
71
|
private headlessExecutor;
|
|
72
72
|
private headlessAvailable;
|
|
73
|
+
private originalConfig?;
|
|
73
74
|
constructor(projectRoot: string, config?: Partial<DaemonConfig>);
|
|
74
75
|
/**
|
|
75
76
|
* Initialize headless executor if Claude Code is available
|
|
@@ -83,6 +84,20 @@ export declare class WorkerDaemon extends EventEmitter {
|
|
|
83
84
|
* Get headless executor instance
|
|
84
85
|
*/
|
|
85
86
|
getHeadlessExecutor(): HeadlessWorkerExecutor | null;
|
|
87
|
+
/**
|
|
88
|
+
* Detect effective CPU count for the current environment.
|
|
89
|
+
*
|
|
90
|
+
* Inside Docker / K8s containers, os.cpus().length reports the HOST cpu
|
|
91
|
+
* count, not the container limit (Node.js #28762 — wontfix). We read
|
|
92
|
+
* cgroup v2 / v1 quota files first so the maxCpuLoad threshold stays
|
|
93
|
+
* meaningful under resource-limited containers.
|
|
94
|
+
*/
|
|
95
|
+
static getEffectiveCpuCount(): number;
|
|
96
|
+
/**
|
|
97
|
+
* Read daemon-specific config from .claude-flow/config.json
|
|
98
|
+
* Supports dot-notation keys like 'daemon.resourceThresholds.maxCpuLoad'
|
|
99
|
+
*/
|
|
100
|
+
private readDaemonConfigFromFile;
|
|
86
101
|
/**
|
|
87
102
|
* Setup graceful shutdown handlers
|
|
88
103
|
*/
|
|
@@ -93,6 +108,12 @@ export declare class WorkerDaemon extends EventEmitter {
|
|
|
93
108
|
private canRunWorker;
|
|
94
109
|
/**
|
|
95
110
|
* Process pending workers queue
|
|
111
|
+
*
|
|
112
|
+
* When executeWorkerWithConcurrencyControl defers a worker (returns null),
|
|
113
|
+
* we break immediately to avoid a busy-wait loop — the deferred worker is
|
|
114
|
+
* already back on the pendingWorkers queue by that point. If no workers are
|
|
115
|
+
* currently running when we break, we schedule a backoff retry so the queue
|
|
116
|
+
* does not get permanently stuck.
|
|
96
117
|
*/
|
|
97
118
|
private processPendingWorkers;
|
|
98
119
|
private initializeWorkerStates;
|
|
@@ -194,11 +215,11 @@ export declare class WorkerDaemon extends EventEmitter {
|
|
|
194
215
|
/**
|
|
195
216
|
* Get or create daemon instance
|
|
196
217
|
*/
|
|
197
|
-
export declare function getDaemon(projectRoot?: string): WorkerDaemon;
|
|
218
|
+
export declare function getDaemon(projectRoot?: string, config?: Partial<DaemonConfig>): WorkerDaemon;
|
|
198
219
|
/**
|
|
199
220
|
* Start daemon (for use in session-start hook)
|
|
200
221
|
*/
|
|
201
|
-
export declare function startDaemon(projectRoot: string): Promise<WorkerDaemon>;
|
|
222
|
+
export declare function startDaemon(projectRoot: string, config?: Partial<DaemonConfig>): Promise<WorkerDaemon>;
|
|
202
223
|
/**
|
|
203
224
|
* Stop daemon
|
|
204
225
|
*/
|
|
@@ -11,6 +11,7 @@
|
|
|
11
11
|
*/
|
|
12
12
|
import { EventEmitter } from 'events';
|
|
13
13
|
import { existsSync, mkdirSync, writeFileSync, readFileSync, appendFileSync } from 'fs';
|
|
14
|
+
import { cpus } from 'os';
|
|
14
15
|
import { join } from 'path';
|
|
15
16
|
import { HeadlessWorkerExecutor, isHeadlessWorker, } from './headless-worker-executor.js';
|
|
16
17
|
// Default worker configurations with improved intervals (P0 fix: map 5min -> 15min)
|
|
@@ -40,19 +41,35 @@ export class WorkerDaemon extends EventEmitter {
|
|
|
40
41
|
// Headless execution support
|
|
41
42
|
headlessExecutor = null;
|
|
42
43
|
headlessAvailable = false;
|
|
44
|
+
// Preserve the original constructor config so we can detect explicit overrides
|
|
45
|
+
// during state restoration (R1: constructor config takes priority over stale state)
|
|
46
|
+
originalConfig;
|
|
43
47
|
constructor(projectRoot, config) {
|
|
44
48
|
super();
|
|
45
49
|
this.projectRoot = projectRoot;
|
|
50
|
+
this.originalConfig = config;
|
|
46
51
|
const claudeFlowDir = join(projectRoot, '.claude-flow');
|
|
52
|
+
// Read daemon config from .claude-flow/config.json (Layer B)
|
|
53
|
+
const fileConfig = this.readDaemonConfigFromFile(claudeFlowDir);
|
|
54
|
+
// CPU-proportional smart default instead of hardcoded 2.0
|
|
55
|
+
const cpuCount = WorkerDaemon.getEffectiveCpuCount();
|
|
56
|
+
const smartMaxCpuLoad = Math.max(cpuCount * 0.8, 2.0); // Floor of 2.0 for single-CPU machines
|
|
57
|
+
// Platform-aware default: macOS os.freemem() excludes reclaimable file cache,
|
|
58
|
+
// so reported "free" is much lower than actually available memory.
|
|
59
|
+
// Linux reports available memory (including reclaimable cache) more accurately.
|
|
60
|
+
const defaultMinFreeMemory = process.platform === 'darwin' ? 5 : 10;
|
|
61
|
+
// Priority: constructor arg > config.json > smart default
|
|
62
|
+
// For resourceThresholds, merge field-by-field so partial overrides
|
|
63
|
+
// (e.g. only --max-cpu-load) still pick up defaults for other fields.
|
|
47
64
|
this.config = {
|
|
48
|
-
autoStart: config?.autoStart ??
|
|
65
|
+
autoStart: config?.autoStart ?? fileConfig.autoStart ?? false,
|
|
49
66
|
logDir: config?.logDir ?? join(claudeFlowDir, 'logs'),
|
|
50
67
|
stateFile: config?.stateFile ?? join(claudeFlowDir, 'daemon-state.json'),
|
|
51
|
-
maxConcurrent: config?.maxConcurrent ?? 2,
|
|
52
|
-
workerTimeoutMs: config?.workerTimeoutMs ?? DEFAULT_WORKER_TIMEOUT_MS,
|
|
53
|
-
resourceThresholds:
|
|
54
|
-
maxCpuLoad:
|
|
55
|
-
minFreeMemoryPercent:
|
|
68
|
+
maxConcurrent: config?.maxConcurrent ?? fileConfig.maxConcurrent ?? 2,
|
|
69
|
+
workerTimeoutMs: config?.workerTimeoutMs ?? fileConfig.workerTimeoutMs ?? DEFAULT_WORKER_TIMEOUT_MS,
|
|
70
|
+
resourceThresholds: {
|
|
71
|
+
maxCpuLoad: config?.resourceThresholds?.maxCpuLoad ?? fileConfig.maxCpuLoad ?? smartMaxCpuLoad,
|
|
72
|
+
minFreeMemoryPercent: config?.resourceThresholds?.minFreeMemoryPercent ?? fileConfig.minFreeMemoryPercent ?? defaultMinFreeMemory,
|
|
56
73
|
},
|
|
57
74
|
workers: config?.workers ?? DEFAULT_WORKERS,
|
|
58
75
|
};
|
|
@@ -118,6 +135,66 @@ export class WorkerDaemon extends EventEmitter {
|
|
|
118
135
|
getHeadlessExecutor() {
|
|
119
136
|
return this.headlessExecutor;
|
|
120
137
|
}
|
|
138
|
+
/**
|
|
139
|
+
* Detect effective CPU count for the current environment.
|
|
140
|
+
*
|
|
141
|
+
* Inside Docker / K8s containers, os.cpus().length reports the HOST cpu
|
|
142
|
+
* count, not the container limit (Node.js #28762 — wontfix). We read
|
|
143
|
+
* cgroup v2 / v1 quota files first so the maxCpuLoad threshold stays
|
|
144
|
+
* meaningful under resource-limited containers.
|
|
145
|
+
*/
|
|
146
|
+
static getEffectiveCpuCount() {
|
|
147
|
+
// 1. Try cgroup v2: /sys/fs/cgroup/cpu.max
|
|
148
|
+
try {
|
|
149
|
+
const cpuMax = readFileSync('/sys/fs/cgroup/cpu.max', 'utf8').trim();
|
|
150
|
+
const [quotaStr, periodStr] = cpuMax.split(' ');
|
|
151
|
+
if (quotaStr !== 'max') {
|
|
152
|
+
const quota = parseInt(quotaStr, 10);
|
|
153
|
+
const period = parseInt(periodStr, 10);
|
|
154
|
+
if (quota > 0 && period > 0)
|
|
155
|
+
return Math.ceil(quota / period);
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
catch { /* not in cgroup v2 */ }
|
|
159
|
+
// 2. Try cgroup v1: /sys/fs/cgroup/cpu/cpu.cfs_quota_us
|
|
160
|
+
try {
|
|
161
|
+
const quota = parseInt(readFileSync('/sys/fs/cgroup/cpu/cpu.cfs_quota_us', 'utf8').trim(), 10);
|
|
162
|
+
const period = parseInt(readFileSync('/sys/fs/cgroup/cpu/cpu.cfs_period_us', 'utf8').trim(), 10);
|
|
163
|
+
if (quota > 0 && period > 0)
|
|
164
|
+
return Math.ceil(quota / period);
|
|
165
|
+
}
|
|
166
|
+
catch { /* not in cgroup v1 */ }
|
|
167
|
+
// 3. Fallback to os.cpus().length
|
|
168
|
+
return cpus().length || 1;
|
|
169
|
+
}
|
|
170
|
+
/**
|
|
171
|
+
* Read daemon-specific config from .claude-flow/config.json
|
|
172
|
+
* Supports dot-notation keys like 'daemon.resourceThresholds.maxCpuLoad'
|
|
173
|
+
*/
|
|
174
|
+
readDaemonConfigFromFile(claudeFlowDir) {
|
|
175
|
+
const configPath = join(claudeFlowDir, 'config.json');
|
|
176
|
+
if (!existsSync(configPath))
|
|
177
|
+
return {};
|
|
178
|
+
try {
|
|
179
|
+
const raw = JSON.parse(readFileSync(configPath, 'utf-8'));
|
|
180
|
+
// Support both flat keys at root and nested under scopes.project
|
|
181
|
+
const cfg = raw?.scopes?.project ?? raw;
|
|
182
|
+
const rawCpuLoad = cfg['daemon.resourceThresholds.maxCpuLoad'] ?? raw['daemon.resourceThresholds.maxCpuLoad'];
|
|
183
|
+
const rawMinMem = cfg['daemon.resourceThresholds.minFreeMemoryPercent'] ?? raw['daemon.resourceThresholds.minFreeMemoryPercent'];
|
|
184
|
+
const rawMaxConcurrent = cfg['daemon.maxConcurrent'] ?? raw['daemon.maxConcurrent'];
|
|
185
|
+
const rawTimeout = cfg['daemon.workerTimeoutMs'] ?? raw['daemon.workerTimeoutMs'];
|
|
186
|
+
return {
|
|
187
|
+
autoStart: typeof raw['daemon.autoStart'] === 'boolean' ? raw['daemon.autoStart'] : undefined,
|
|
188
|
+
maxConcurrent: (typeof rawMaxConcurrent === 'number' && rawMaxConcurrent > 0) ? rawMaxConcurrent : undefined,
|
|
189
|
+
workerTimeoutMs: (typeof rawTimeout === 'number' && rawTimeout > 0) ? rawTimeout : undefined,
|
|
190
|
+
maxCpuLoad: (typeof rawCpuLoad === 'number' && rawCpuLoad > 0 && rawCpuLoad < 1000) ? rawCpuLoad : undefined,
|
|
191
|
+
minFreeMemoryPercent: (typeof rawMinMem === 'number' && rawMinMem >= 0 && rawMinMem <= 100) ? rawMinMem : undefined,
|
|
192
|
+
};
|
|
193
|
+
}
|
|
194
|
+
catch {
|
|
195
|
+
return {};
|
|
196
|
+
}
|
|
197
|
+
}
|
|
121
198
|
/**
|
|
122
199
|
* Setup graceful shutdown handlers
|
|
123
200
|
*/
|
|
@@ -150,13 +227,30 @@ export class WorkerDaemon extends EventEmitter {
|
|
|
150
227
|
}
|
|
151
228
|
/**
|
|
152
229
|
* Process pending workers queue
|
|
230
|
+
*
|
|
231
|
+
* When executeWorkerWithConcurrencyControl defers a worker (returns null),
|
|
232
|
+
* we break immediately to avoid a busy-wait loop — the deferred worker is
|
|
233
|
+
* already back on the pendingWorkers queue by that point. If no workers are
|
|
234
|
+
* currently running when we break, we schedule a backoff retry so the queue
|
|
235
|
+
* does not get permanently stuck.
|
|
153
236
|
*/
|
|
154
237
|
async processPendingWorkers() {
|
|
155
238
|
while (this.pendingWorkers.length > 0 && this.runningWorkers.size < this.config.maxConcurrent) {
|
|
156
239
|
const workerType = this.pendingWorkers.shift();
|
|
157
240
|
const workerConfig = this.config.workers.find(w => w.type === workerType);
|
|
158
241
|
if (workerConfig) {
|
|
159
|
-
await this.executeWorkerWithConcurrencyControl(workerConfig);
|
|
242
|
+
const result = await this.executeWorkerWithConcurrencyControl(workerConfig);
|
|
243
|
+
if (result === null) {
|
|
244
|
+
// Worker was deferred (resource pressure or concurrency limit).
|
|
245
|
+
// Break to avoid tight-looping — the next executeWorker() completion
|
|
246
|
+
// will call processPendingWorkers() again via the finally block.
|
|
247
|
+
if (this.runningWorkers.size === 0) {
|
|
248
|
+
// No workers running means nobody will trigger the finally-block
|
|
249
|
+
// callback, so schedule a backoff retry to avoid a stuck queue.
|
|
250
|
+
setTimeout(() => this.processPendingWorkers(), 30_000).unref();
|
|
251
|
+
}
|
|
252
|
+
break;
|
|
253
|
+
}
|
|
160
254
|
}
|
|
161
255
|
}
|
|
162
256
|
}
|
|
@@ -175,6 +269,23 @@ export class WorkerDaemon extends EventEmitter {
|
|
|
175
269
|
}
|
|
176
270
|
}
|
|
177
271
|
}
|
|
272
|
+
// Restore resourceThresholds, maxConcurrent, workerTimeoutMs from saved state
|
|
273
|
+
// Only restore if valid numeric values within sane ranges
|
|
274
|
+
if (saved.config?.resourceThresholds && !this.originalConfig?.resourceThresholds) {
|
|
275
|
+
const rt = saved.config.resourceThresholds;
|
|
276
|
+
if (typeof rt.maxCpuLoad === 'number' && rt.maxCpuLoad > 0 && rt.maxCpuLoad < 1000) {
|
|
277
|
+
this.config.resourceThresholds.maxCpuLoad = rt.maxCpuLoad;
|
|
278
|
+
}
|
|
279
|
+
if (typeof rt.minFreeMemoryPercent === 'number' && rt.minFreeMemoryPercent >= 0 && rt.minFreeMemoryPercent <= 100) {
|
|
280
|
+
this.config.resourceThresholds.minFreeMemoryPercent = rt.minFreeMemoryPercent;
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
if (typeof saved.config?.maxConcurrent === 'number' && saved.config.maxConcurrent > 0) {
|
|
284
|
+
this.config.maxConcurrent = saved.config.maxConcurrent;
|
|
285
|
+
}
|
|
286
|
+
if (typeof saved.config?.workerTimeoutMs === 'number' && saved.config.workerTimeoutMs > 0) {
|
|
287
|
+
this.config.workerTimeoutMs = saved.config.workerTimeoutMs;
|
|
288
|
+
}
|
|
178
289
|
// Restore worker runtime states (runCount, successCount, etc.)
|
|
179
290
|
if (saved.workers) {
|
|
180
291
|
for (const [type, state] of Object.entries(saved.workers)) {
|
|
@@ -228,7 +339,7 @@ export class WorkerDaemon extends EventEmitter {
|
|
|
228
339
|
}
|
|
229
340
|
// Save state
|
|
230
341
|
this.saveState();
|
|
231
|
-
this.log('info', `Daemon started
|
|
342
|
+
this.log('info', `Daemon started (PID: ${process.pid}, CPUs: ${cpus().length}, workers: ${this.config.workers.filter(w => w.enabled).length}, maxCpuLoad: ${this.config.resourceThresholds.maxCpuLoad}, minFreeMemoryPercent: ${this.config.resourceThresholds.minFreeMemoryPercent}%)`);
|
|
232
343
|
}
|
|
233
344
|
/**
|
|
234
345
|
* Stop the daemon and all workers
|
|
@@ -755,9 +866,9 @@ let daemonInstance = null;
|
|
|
755
866
|
/**
|
|
756
867
|
* Get or create daemon instance
|
|
757
868
|
*/
|
|
758
|
-
export function getDaemon(projectRoot) {
|
|
869
|
+
export function getDaemon(projectRoot, config) {
|
|
759
870
|
if (!daemonInstance && projectRoot) {
|
|
760
|
-
daemonInstance = new WorkerDaemon(projectRoot);
|
|
871
|
+
daemonInstance = new WorkerDaemon(projectRoot, config);
|
|
761
872
|
}
|
|
762
873
|
if (!daemonInstance) {
|
|
763
874
|
throw new Error('Daemon not initialized. Provide projectRoot on first call.');
|
|
@@ -767,8 +878,8 @@ export function getDaemon(projectRoot) {
|
|
|
767
878
|
/**
|
|
768
879
|
* Start daemon (for use in session-start hook)
|
|
769
880
|
*/
|
|
770
|
-
export async function startDaemon(projectRoot) {
|
|
771
|
-
const daemon = getDaemon(projectRoot);
|
|
881
|
+
export async function startDaemon(projectRoot, config) {
|
|
882
|
+
const daemon = getDaemon(projectRoot, config);
|
|
772
883
|
await daemon.start();
|
|
773
884
|
return daemon;
|
|
774
885
|
}
|