claude-flow 3.5.23 → 3.5.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,82 +1,212 @@
1
1
  /**
2
2
  * Swarm MCP Tools for CLI
3
3
  *
4
- * Tool definitions for swarm coordination.
4
+ * Tool definitions for swarm coordination with file-based state persistence.
5
+ * Replaces previous stub implementations with real state tracking.
5
6
  */
7
+ import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
8
+ import { join } from 'node:path';
9
+ // Swarm state persistence
10
+ const SWARM_DIR = '.claude-flow/swarm';
11
+ const SWARM_STATE_FILE = 'swarm-state.json';
12
+ function getSwarmDir() {
13
+ return join(process.cwd(), SWARM_DIR);
14
+ }
15
+ function getSwarmStatePath() {
16
+ return join(getSwarmDir(), SWARM_STATE_FILE);
17
+ }
18
+ function ensureSwarmDir() {
19
+ const dir = getSwarmDir();
20
+ if (!existsSync(dir)) {
21
+ mkdirSync(dir, { recursive: true, mode: 0o700 });
22
+ }
23
+ }
24
+ function loadSwarmStore() {
25
+ try {
26
+ const path = getSwarmStatePath();
27
+ if (existsSync(path)) {
28
+ return JSON.parse(readFileSync(path, 'utf-8'));
29
+ }
30
+ }
31
+ catch { /* return default */ }
32
+ return { swarms: {}, version: '3.0.0' };
33
+ }
34
+ function saveSwarmStore(store) {
35
+ ensureSwarmDir();
36
+ writeFileSync(getSwarmStatePath(), JSON.stringify(store, null, 2), 'utf-8');
37
+ }
38
+ // Input validation
39
+ const VALID_TOPOLOGIES = new Set([
40
+ 'hierarchical', 'mesh', 'hierarchical-mesh', 'ring', 'star', 'hybrid', 'adaptive',
41
+ ]);
6
42
  export const swarmTools = [
7
43
  {
8
44
  name: 'swarm_init',
9
- description: 'Initialize a swarm',
45
+ description: 'Initialize a swarm with persistent state tracking',
10
46
  category: 'swarm',
11
47
  inputSchema: {
12
48
  type: 'object',
13
49
  properties: {
14
- topology: { type: 'string', description: 'Swarm topology type' },
15
- maxAgents: { type: 'number', description: 'Maximum number of agents' },
16
- config: { type: 'object', description: 'Swarm configuration' },
50
+ topology: { type: 'string', description: 'Swarm topology type (hierarchical, mesh, hierarchical-mesh, ring, star, hybrid, adaptive)' },
51
+ maxAgents: { type: 'number', description: 'Maximum number of agents (1-50)' },
52
+ strategy: { type: 'string', description: 'Agent strategy (specialized, balanced, adaptive)' },
53
+ config: { type: 'object', description: 'Additional swarm configuration' },
17
54
  },
18
55
  },
19
56
  handler: async (input) => {
20
57
  const topology = input.topology || 'hierarchical-mesh';
21
- const maxAgents = input.maxAgents || 15;
58
+ const maxAgents = Math.min(Math.max(input.maxAgents || 15, 1), 50);
59
+ const strategy = input.strategy || 'specialized';
22
60
  const config = (input.config || {});
23
- return {
24
- success: true,
25
- swarmId: `swarm-${Date.now()}`,
61
+ if (!VALID_TOPOLOGIES.has(topology)) {
62
+ return {
63
+ success: false,
64
+ error: `Invalid topology: ${topology}. Valid: ${[...VALID_TOPOLOGIES].join(', ')}`,
65
+ };
66
+ }
67
+ const swarmId = `swarm-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
68
+ const now = new Date().toISOString();
69
+ const swarmState = {
70
+ swarmId,
26
71
  topology,
27
- initializedAt: new Date().toISOString(),
72
+ maxAgents,
73
+ status: 'running',
74
+ agents: [],
75
+ tasks: [],
28
76
  config: {
29
77
  topology,
30
78
  maxAgents,
31
- currentAgents: 0,
79
+ strategy,
32
80
  communicationProtocol: config.communicationProtocol || 'message-bus',
33
81
  autoScaling: config.autoScaling ?? true,
34
82
  consensusMechanism: config.consensusMechanism || 'majority',
35
83
  },
84
+ createdAt: now,
85
+ updatedAt: now,
86
+ };
87
+ const store = loadSwarmStore();
88
+ store.swarms[swarmId] = swarmState;
89
+ saveSwarmStore(store);
90
+ return {
91
+ success: true,
92
+ swarmId,
93
+ topology,
94
+ strategy,
95
+ maxAgents,
96
+ initializedAt: now,
97
+ config: swarmState.config,
98
+ persisted: true,
36
99
  };
37
100
  },
38
101
  },
39
102
  {
40
103
  name: 'swarm_status',
41
- description: 'Get swarm status',
104
+ description: 'Get swarm status from persistent state',
42
105
  category: 'swarm',
43
106
  inputSchema: {
44
107
  type: 'object',
45
108
  properties: {
46
- swarmId: { type: 'string', description: 'Swarm ID' },
109
+ swarmId: { type: 'string', description: 'Swarm ID (omit for most recent)' },
47
110
  },
48
111
  },
49
112
  handler: async (input) => {
113
+ const store = loadSwarmStore();
114
+ const swarmId = input.swarmId;
115
+ if (swarmId && store.swarms[swarmId]) {
116
+ const swarm = store.swarms[swarmId];
117
+ return {
118
+ swarmId: swarm.swarmId,
119
+ status: swarm.status,
120
+ topology: swarm.topology,
121
+ maxAgents: swarm.maxAgents,
122
+ agentCount: swarm.agents.length,
123
+ taskCount: swarm.tasks.length,
124
+ config: swarm.config,
125
+ createdAt: swarm.createdAt,
126
+ updatedAt: swarm.updatedAt,
127
+ };
128
+ }
129
+ // Return most recent swarm if no ID specified
130
+ const swarmIds = Object.keys(store.swarms);
131
+ if (swarmIds.length === 0) {
132
+ return {
133
+ status: 'no_swarm',
134
+ message: 'No active swarms. Use swarm_init to create one.',
135
+ totalSwarms: 0,
136
+ };
137
+ }
138
+ const latest = swarmIds
139
+ .map(id => store.swarms[id])
140
+ .sort((a, b) => new Date(b.updatedAt).getTime() - new Date(a.updatedAt).getTime())[0];
50
141
  return {
51
- swarmId: input.swarmId,
52
- status: 'running',
53
- agentCount: 0,
54
- taskCount: 0,
142
+ swarmId: latest.swarmId,
143
+ status: latest.status,
144
+ topology: latest.topology,
145
+ maxAgents: latest.maxAgents,
146
+ agentCount: latest.agents.length,
147
+ taskCount: latest.tasks.length,
148
+ config: latest.config,
149
+ createdAt: latest.createdAt,
150
+ updatedAt: latest.updatedAt,
151
+ totalSwarms: swarmIds.length,
55
152
  };
56
153
  },
57
154
  },
58
155
  {
59
156
  name: 'swarm_shutdown',
60
- description: 'Shutdown a swarm',
157
+ description: 'Shutdown a swarm and update persistent state',
61
158
  category: 'swarm',
62
159
  inputSchema: {
63
160
  type: 'object',
64
161
  properties: {
65
- swarmId: { type: 'string', description: 'Swarm ID' },
66
- graceful: { type: 'boolean', description: 'Graceful shutdown' },
162
+ swarmId: { type: 'string', description: 'Swarm ID to shutdown' },
163
+ graceful: { type: 'boolean', description: 'Graceful shutdown (default: true)' },
67
164
  },
68
165
  },
69
166
  handler: async (input) => {
167
+ const store = loadSwarmStore();
168
+ const swarmId = input.swarmId;
169
+ // Find the swarm
170
+ let target;
171
+ if (swarmId && store.swarms[swarmId]) {
172
+ target = store.swarms[swarmId];
173
+ }
174
+ else {
175
+ // Shutdown most recent running swarm
176
+ const running = Object.values(store.swarms)
177
+ .filter(s => s.status === 'running')
178
+ .sort((a, b) => new Date(b.updatedAt).getTime() - new Date(a.updatedAt).getTime());
179
+ target = running[0];
180
+ }
181
+ if (!target) {
182
+ return {
183
+ success: false,
184
+ error: swarmId ? `Swarm ${swarmId} not found` : 'No running swarms to shutdown',
185
+ };
186
+ }
187
+ if (target.status === 'terminated') {
188
+ return {
189
+ success: false,
190
+ swarmId: target.swarmId,
191
+ error: 'Swarm already terminated',
192
+ };
193
+ }
194
+ target.status = 'terminated';
195
+ target.updatedAt = new Date().toISOString();
196
+ saveSwarmStore(store);
70
197
  return {
71
198
  success: true,
72
- swarmId: input.swarmId,
199
+ swarmId: target.swarmId,
73
200
  terminated: true,
201
+ graceful: input.graceful ?? true,
202
+ agentsTerminated: target.agents.length,
203
+ terminatedAt: target.updatedAt,
74
204
  };
75
205
  },
76
206
  },
77
207
  {
78
208
  name: 'swarm_health',
79
- description: 'Check swarm health status',
209
+ description: 'Check swarm health status with real state inspection',
80
210
  category: 'swarm',
81
211
  inputSchema: {
82
212
  type: 'object',
@@ -85,15 +215,71 @@ export const swarmTools = [
85
215
  },
86
216
  },
87
217
  handler: async (input) => {
218
+ const store = loadSwarmStore();
219
+ const swarmId = input.swarmId;
220
+ // Find the swarm
221
+ let target;
222
+ if (swarmId) {
223
+ target = store.swarms[swarmId];
224
+ if (!target) {
225
+ return {
226
+ status: 'not_found',
227
+ healthy: false,
228
+ checks: [
229
+ { name: 'swarm_exists', status: 'fail', message: `Swarm ${swarmId} not found` },
230
+ ],
231
+ checkedAt: new Date().toISOString(),
232
+ };
233
+ }
234
+ }
235
+ else {
236
+ const running = Object.values(store.swarms)
237
+ .filter(s => s.status === 'running')
238
+ .sort((a, b) => new Date(b.updatedAt).getTime() - new Date(a.updatedAt).getTime());
239
+ target = running[0];
240
+ }
241
+ if (!target) {
242
+ return {
243
+ status: 'no_swarm',
244
+ healthy: false,
245
+ checks: [
246
+ { name: 'swarm_exists', status: 'fail', message: 'No active swarm found' },
247
+ ],
248
+ checkedAt: new Date().toISOString(),
249
+ };
250
+ }
251
+ const isRunning = target.status === 'running';
252
+ const stateFileExists = existsSync(getSwarmStatePath());
253
+ const checks = [
254
+ {
255
+ name: 'coordinator',
256
+ status: isRunning ? 'ok' : 'warn',
257
+ message: isRunning ? 'Coordinator active' : `Swarm status: ${target.status}`,
258
+ },
259
+ {
260
+ name: 'agents',
261
+ status: target.agents.length > 0 ? 'ok' : 'info',
262
+ message: `${target.agents.length} agents registered (max: ${target.maxAgents})`,
263
+ },
264
+ {
265
+ name: 'persistence',
266
+ status: stateFileExists ? 'ok' : 'warn',
267
+ message: stateFileExists ? 'State file persisted' : 'State file missing',
268
+ },
269
+ {
270
+ name: 'topology',
271
+ status: 'ok',
272
+ message: `Topology: ${target.topology}`,
273
+ },
274
+ ];
275
+ const healthy = isRunning && stateFileExists;
88
276
  return {
89
- status: 'healthy',
90
- swarmId: input.swarmId || 'default',
91
- checks: [
92
- { name: 'coordinator', status: 'ok', message: 'Coordinator responding' },
93
- { name: 'agents', status: 'ok', message: 'Agent pool healthy' },
94
- { name: 'memory', status: 'ok', message: 'Memory backend connected' },
95
- { name: 'messaging', status: 'ok', message: 'Message bus active' },
96
- ],
277
+ status: healthy ? 'healthy' : 'degraded',
278
+ healthy,
279
+ swarmId: target.swarmId,
280
+ topology: target.topology,
281
+ agentCount: target.agents.length,
282
+ checks,
97
283
  checkedAt: new Date().toISOString(),
98
284
  };
99
285
  },
@@ -6,7 +6,7 @@ export { WorkerDaemon, getDaemon, startDaemon, stopDaemon, type WorkerType, } fr
6
6
  export { HeadlessWorkerExecutor, HEADLESS_WORKER_TYPES, HEADLESS_WORKER_CONFIGS, LOCAL_WORKER_TYPES, LOCAL_WORKER_CONFIGS, ALL_WORKER_CONFIGS, isHeadlessWorker, isLocalWorker, getModelId, getWorkerConfig, type HeadlessWorkerType, type LocalWorkerType, type HeadlessWorkerConfig, type HeadlessExecutionResult, type HeadlessExecutorConfig, type HeadlessOptions, type PoolStatus, type SandboxMode, type ModelType, type OutputFormat, type ExecutionMode, type WorkerPriority, type WorkerConfig, } from './headless-worker-executor.js';
7
7
  export { ContainerWorkerPool, type ContainerInfo, type ContainerPoolConfig, type ContainerExecutionOptions, type ContainerPoolStatus, type ContainerState, } from './container-worker-pool.js';
8
8
  export { WorkerQueue, type QueueTask, type WorkerQueueConfig, type QueueStats, type WorkerRegistration, type TaskStatus, } from './worker-queue.js';
9
- export type { default as WorkerDaemonType } from './worker-daemon.js';
9
+ export type { default as WorkerDaemonType, DaemonConfig } from './worker-daemon.js';
10
10
  export type { default as HeadlessWorkerExecutorType } from './headless-worker-executor.js';
11
11
  export type { default as ContainerWorkerPoolType } from './container-worker-pool.js';
12
12
  export type { default as WorkerQueueType } from './worker-queue.js';
@@ -13,6 +13,14 @@
13
13
  *
14
14
  * Created with ❤️ by ruv.io
15
15
  */
16
+ /**
17
+ * ESM/CJS interop helper — handles `.default` for CJS modules.
18
+ * Uses `'default' in mod` check which is safer than `mod.default || mod`.
19
+ */
20
+ async function importWithInterop(packageName) {
21
+ const mod = await import(packageName);
22
+ return ('default' in mod) ? mod.default : mod;
23
+ }
16
24
  // Lazy-loaded WASM modules
17
25
  let microLoRA = null;
18
26
  let scopedLoRA = null;
@@ -252,7 +260,7 @@ export async function initializeTraining(config = {}) {
252
260
  }
253
261
  // --- Attention mechanisms (optional, independent of WASM) ---
254
262
  try {
255
- const attention = await import('@ruvector/attention');
263
+ const attention = await importWithInterop('@ruvector/attention');
256
264
  if (config.useFlashAttention !== false) {
257
265
  flashAttention = new attention.FlashAttention(dim, 64);
258
266
  features.push('FlashAttention');
@@ -289,9 +297,8 @@ export async function initializeTraining(config = {}) {
289
297
  // --- SONA (optional, backward compatible) ---
290
298
  if (config.useSona !== false) {
291
299
  try {
292
- const sona = await import('@ruvector/sona');
300
+ const sona = await importWithInterop('@ruvector/sona');
293
301
  const sonaRank = config.sonaRank || 4;
294
- // @ts-expect-error - SonaEngine accepts 4 positional args but types say 1
295
302
  sonaEngine = new sona.SonaEngine(dim, sonaRank, alpha, lr);
296
303
  sonaAvailable = true;
297
304
  features.push(`SONA (${dim}-dim, rank-${sonaRank}, 624k learn/s)`);
@@ -473,7 +480,7 @@ export function mineHardNegatives(anchor, candidates) {
473
480
  * Benchmark the training system
474
481
  */
475
482
  export async function benchmarkTraining(dim, iterations) {
476
- const attention = await import('@ruvector/attention');
483
+ const attention = await importWithInterop('@ruvector/attention');
477
484
  lastBenchmark = attention.benchmarkAttention(dim || 256, 100, iterations || 1000);
478
485
  return lastBenchmark ?? [];
479
486
  }
@@ -44,7 +44,7 @@ interface DaemonStatus {
44
44
  workers: Map<WorkerType, WorkerState>;
45
45
  config: DaemonConfig;
46
46
  }
47
- interface DaemonConfig {
47
+ export interface DaemonConfig {
48
48
  autoStart: boolean;
49
49
  logDir: string;
50
50
  stateFile: string;
@@ -70,6 +70,7 @@ export declare class WorkerDaemon extends EventEmitter {
70
70
  private pendingWorkers;
71
71
  private headlessExecutor;
72
72
  private headlessAvailable;
73
+ private originalConfig?;
73
74
  constructor(projectRoot: string, config?: Partial<DaemonConfig>);
74
75
  /**
75
76
  * Initialize headless executor if Claude Code is available
@@ -83,6 +84,20 @@ export declare class WorkerDaemon extends EventEmitter {
83
84
  * Get headless executor instance
84
85
  */
85
86
  getHeadlessExecutor(): HeadlessWorkerExecutor | null;
87
+ /**
88
+ * Detect effective CPU count for the current environment.
89
+ *
90
+ * Inside Docker / K8s containers, os.cpus().length reports the HOST cpu
91
+ * count, not the container limit (Node.js #28762 — wontfix). We read
92
+ * cgroup v2 / v1 quota files first so the maxCpuLoad threshold stays
93
+ * meaningful under resource-limited containers.
94
+ */
95
+ static getEffectiveCpuCount(): number;
96
+ /**
97
+ * Read daemon-specific config from .claude-flow/config.json
98
+ * Supports dot-notation keys like 'daemon.resourceThresholds.maxCpuLoad'
99
+ */
100
+ private readDaemonConfigFromFile;
86
101
  /**
87
102
  * Setup graceful shutdown handlers
88
103
  */
@@ -93,6 +108,12 @@ export declare class WorkerDaemon extends EventEmitter {
93
108
  private canRunWorker;
94
109
  /**
95
110
  * Process pending workers queue
111
+ *
112
+ * When executeWorkerWithConcurrencyControl defers a worker (returns null),
113
+ * we break immediately to avoid a busy-wait loop — the deferred worker is
114
+ * already back on the pendingWorkers queue by that point. If no workers are
115
+ * currently running when we break, we schedule a backoff retry so the queue
116
+ * does not get permanently stuck.
96
117
  */
97
118
  private processPendingWorkers;
98
119
  private initializeWorkerStates;
@@ -194,11 +215,11 @@ export declare class WorkerDaemon extends EventEmitter {
194
215
  /**
195
216
  * Get or create daemon instance
196
217
  */
197
- export declare function getDaemon(projectRoot?: string): WorkerDaemon;
218
+ export declare function getDaemon(projectRoot?: string, config?: Partial<DaemonConfig>): WorkerDaemon;
198
219
  /**
199
220
  * Start daemon (for use in session-start hook)
200
221
  */
201
- export declare function startDaemon(projectRoot: string): Promise<WorkerDaemon>;
222
+ export declare function startDaemon(projectRoot: string, config?: Partial<DaemonConfig>): Promise<WorkerDaemon>;
202
223
  /**
203
224
  * Stop daemon
204
225
  */
@@ -11,6 +11,7 @@
11
11
  */
12
12
  import { EventEmitter } from 'events';
13
13
  import { existsSync, mkdirSync, writeFileSync, readFileSync, appendFileSync } from 'fs';
14
+ import { cpus } from 'os';
14
15
  import { join } from 'path';
15
16
  import { HeadlessWorkerExecutor, isHeadlessWorker, } from './headless-worker-executor.js';
16
17
  // Default worker configurations with improved intervals (P0 fix: map 5min -> 15min)
@@ -40,19 +41,35 @@ export class WorkerDaemon extends EventEmitter {
40
41
  // Headless execution support
41
42
  headlessExecutor = null;
42
43
  headlessAvailable = false;
44
+ // Preserve the original constructor config so we can detect explicit overrides
45
+ // during state restoration (R1: constructor config takes priority over stale state)
46
+ originalConfig;
43
47
  constructor(projectRoot, config) {
44
48
  super();
45
49
  this.projectRoot = projectRoot;
50
+ this.originalConfig = config;
46
51
  const claudeFlowDir = join(projectRoot, '.claude-flow');
52
+ // Read daemon config from .claude-flow/config.json (Layer B)
53
+ const fileConfig = this.readDaemonConfigFromFile(claudeFlowDir);
54
+ // CPU-proportional smart default instead of hardcoded 2.0
55
+ const cpuCount = WorkerDaemon.getEffectiveCpuCount();
56
+ const smartMaxCpuLoad = Math.max(cpuCount * 0.8, 2.0); // Floor of 2.0 for single-CPU machines
57
+ // Platform-aware default: macOS os.freemem() excludes reclaimable file cache,
58
+ // so reported "free" is much lower than actually available memory.
59
+ // Linux reports available memory (including reclaimable cache) more accurately.
60
+ const defaultMinFreeMemory = process.platform === 'darwin' ? 5 : 10;
61
+ // Priority: constructor arg > config.json > smart default
62
+ // For resourceThresholds, merge field-by-field so partial overrides
63
+ // (e.g. only --max-cpu-load) still pick up defaults for other fields.
47
64
  this.config = {
48
- autoStart: config?.autoStart ?? false, // P1 fix: Default to false for explicit consent
65
+ autoStart: config?.autoStart ?? fileConfig.autoStart ?? false,
49
66
  logDir: config?.logDir ?? join(claudeFlowDir, 'logs'),
50
67
  stateFile: config?.stateFile ?? join(claudeFlowDir, 'daemon-state.json'),
51
- maxConcurrent: config?.maxConcurrent ?? 2, // P0 fix: Limit concurrent workers
52
- workerTimeoutMs: config?.workerTimeoutMs ?? DEFAULT_WORKER_TIMEOUT_MS,
53
- resourceThresholds: config?.resourceThresholds ?? {
54
- maxCpuLoad: 2.0,
55
- minFreeMemoryPercent: 20,
68
+ maxConcurrent: config?.maxConcurrent ?? fileConfig.maxConcurrent ?? 2,
69
+ workerTimeoutMs: config?.workerTimeoutMs ?? fileConfig.workerTimeoutMs ?? DEFAULT_WORKER_TIMEOUT_MS,
70
+ resourceThresholds: {
71
+ maxCpuLoad: config?.resourceThresholds?.maxCpuLoad ?? fileConfig.maxCpuLoad ?? smartMaxCpuLoad,
72
+ minFreeMemoryPercent: config?.resourceThresholds?.minFreeMemoryPercent ?? fileConfig.minFreeMemoryPercent ?? defaultMinFreeMemory,
56
73
  },
57
74
  workers: config?.workers ?? DEFAULT_WORKERS,
58
75
  };
@@ -118,6 +135,66 @@ export class WorkerDaemon extends EventEmitter {
118
135
  getHeadlessExecutor() {
119
136
  return this.headlessExecutor;
120
137
  }
138
+ /**
139
+ * Detect effective CPU count for the current environment.
140
+ *
141
+ * Inside Docker / K8s containers, os.cpus().length reports the HOST cpu
142
+ * count, not the container limit (Node.js #28762 — wontfix). We read
143
+ * cgroup v2 / v1 quota files first so the maxCpuLoad threshold stays
144
+ * meaningful under resource-limited containers.
145
+ */
146
+ static getEffectiveCpuCount() {
147
+ // 1. Try cgroup v2: /sys/fs/cgroup/cpu.max
148
+ try {
149
+ const cpuMax = readFileSync('/sys/fs/cgroup/cpu.max', 'utf8').trim();
150
+ const [quotaStr, periodStr] = cpuMax.split(' ');
151
+ if (quotaStr !== 'max') {
152
+ const quota = parseInt(quotaStr, 10);
153
+ const period = parseInt(periodStr, 10);
154
+ if (quota > 0 && period > 0)
155
+ return Math.ceil(quota / period);
156
+ }
157
+ }
158
+ catch { /* not in cgroup v2 */ }
159
+ // 2. Try cgroup v1: /sys/fs/cgroup/cpu/cpu.cfs_quota_us
160
+ try {
161
+ const quota = parseInt(readFileSync('/sys/fs/cgroup/cpu/cpu.cfs_quota_us', 'utf8').trim(), 10);
162
+ const period = parseInt(readFileSync('/sys/fs/cgroup/cpu/cpu.cfs_period_us', 'utf8').trim(), 10);
163
+ if (quota > 0 && period > 0)
164
+ return Math.ceil(quota / period);
165
+ }
166
+ catch { /* not in cgroup v1 */ }
167
+ // 3. Fallback to os.cpus().length
168
+ return cpus().length || 1;
169
+ }
170
+ /**
171
+ * Read daemon-specific config from .claude-flow/config.json
172
+ * Supports dot-notation keys like 'daemon.resourceThresholds.maxCpuLoad'
173
+ */
174
+ readDaemonConfigFromFile(claudeFlowDir) {
175
+ const configPath = join(claudeFlowDir, 'config.json');
176
+ if (!existsSync(configPath))
177
+ return {};
178
+ try {
179
+ const raw = JSON.parse(readFileSync(configPath, 'utf-8'));
180
+ // Support both flat keys at root and nested under scopes.project
181
+ const cfg = raw?.scopes?.project ?? raw;
182
+ const rawCpuLoad = cfg['daemon.resourceThresholds.maxCpuLoad'] ?? raw['daemon.resourceThresholds.maxCpuLoad'];
183
+ const rawMinMem = cfg['daemon.resourceThresholds.minFreeMemoryPercent'] ?? raw['daemon.resourceThresholds.minFreeMemoryPercent'];
184
+ const rawMaxConcurrent = cfg['daemon.maxConcurrent'] ?? raw['daemon.maxConcurrent'];
185
+ const rawTimeout = cfg['daemon.workerTimeoutMs'] ?? raw['daemon.workerTimeoutMs'];
186
+ return {
187
+ autoStart: typeof raw['daemon.autoStart'] === 'boolean' ? raw['daemon.autoStart'] : undefined,
188
+ maxConcurrent: (typeof rawMaxConcurrent === 'number' && rawMaxConcurrent > 0) ? rawMaxConcurrent : undefined,
189
+ workerTimeoutMs: (typeof rawTimeout === 'number' && rawTimeout > 0) ? rawTimeout : undefined,
190
+ maxCpuLoad: (typeof rawCpuLoad === 'number' && rawCpuLoad > 0 && rawCpuLoad < 1000) ? rawCpuLoad : undefined,
191
+ minFreeMemoryPercent: (typeof rawMinMem === 'number' && rawMinMem >= 0 && rawMinMem <= 100) ? rawMinMem : undefined,
192
+ };
193
+ }
194
+ catch {
195
+ return {};
196
+ }
197
+ }
121
198
  /**
122
199
  * Setup graceful shutdown handlers
123
200
  */
@@ -150,13 +227,30 @@ export class WorkerDaemon extends EventEmitter {
150
227
  }
151
228
  /**
152
229
  * Process pending workers queue
230
+ *
231
+ * When executeWorkerWithConcurrencyControl defers a worker (returns null),
232
+ * we break immediately to avoid a busy-wait loop — the deferred worker is
233
+ * already back on the pendingWorkers queue by that point. If no workers are
234
+ * currently running when we break, we schedule a backoff retry so the queue
235
+ * does not get permanently stuck.
153
236
  */
154
237
  async processPendingWorkers() {
155
238
  while (this.pendingWorkers.length > 0 && this.runningWorkers.size < this.config.maxConcurrent) {
156
239
  const workerType = this.pendingWorkers.shift();
157
240
  const workerConfig = this.config.workers.find(w => w.type === workerType);
158
241
  if (workerConfig) {
159
- await this.executeWorkerWithConcurrencyControl(workerConfig);
242
+ const result = await this.executeWorkerWithConcurrencyControl(workerConfig);
243
+ if (result === null) {
244
+ // Worker was deferred (resource pressure or concurrency limit).
245
+ // Break to avoid tight-looping — the next executeWorker() completion
246
+ // will call processPendingWorkers() again via the finally block.
247
+ if (this.runningWorkers.size === 0) {
248
+ // No workers running means nobody will trigger the finally-block
249
+ // callback, so schedule a backoff retry to avoid a stuck queue.
250
+ setTimeout(() => this.processPendingWorkers(), 30_000).unref();
251
+ }
252
+ break;
253
+ }
160
254
  }
161
255
  }
162
256
  }
@@ -175,6 +269,23 @@ export class WorkerDaemon extends EventEmitter {
175
269
  }
176
270
  }
177
271
  }
272
+ // Restore resourceThresholds, maxConcurrent, workerTimeoutMs from saved state
273
+ // Only restore if valid numeric values within sane ranges
274
+ if (saved.config?.resourceThresholds && !this.originalConfig?.resourceThresholds) {
275
+ const rt = saved.config.resourceThresholds;
276
+ if (typeof rt.maxCpuLoad === 'number' && rt.maxCpuLoad > 0 && rt.maxCpuLoad < 1000) {
277
+ this.config.resourceThresholds.maxCpuLoad = rt.maxCpuLoad;
278
+ }
279
+ if (typeof rt.minFreeMemoryPercent === 'number' && rt.minFreeMemoryPercent >= 0 && rt.minFreeMemoryPercent <= 100) {
280
+ this.config.resourceThresholds.minFreeMemoryPercent = rt.minFreeMemoryPercent;
281
+ }
282
+ }
283
+ if (typeof saved.config?.maxConcurrent === 'number' && saved.config.maxConcurrent > 0) {
284
+ this.config.maxConcurrent = saved.config.maxConcurrent;
285
+ }
286
+ if (typeof saved.config?.workerTimeoutMs === 'number' && saved.config.workerTimeoutMs > 0) {
287
+ this.config.workerTimeoutMs = saved.config.workerTimeoutMs;
288
+ }
178
289
  // Restore worker runtime states (runCount, successCount, etc.)
179
290
  if (saved.workers) {
180
291
  for (const [type, state] of Object.entries(saved.workers)) {
@@ -228,7 +339,7 @@ export class WorkerDaemon extends EventEmitter {
228
339
  }
229
340
  // Save state
230
341
  this.saveState();
231
- this.log('info', `Daemon started with ${this.config.workers.filter(w => w.enabled).length} workers`);
342
+ this.log('info', `Daemon started (PID: ${process.pid}, CPUs: ${cpus().length}, workers: ${this.config.workers.filter(w => w.enabled).length}, maxCpuLoad: ${this.config.resourceThresholds.maxCpuLoad}, minFreeMemoryPercent: ${this.config.resourceThresholds.minFreeMemoryPercent}%)`);
232
343
  }
233
344
  /**
234
345
  * Stop the daemon and all workers
@@ -755,9 +866,9 @@ let daemonInstance = null;
755
866
  /**
756
867
  * Get or create daemon instance
757
868
  */
758
- export function getDaemon(projectRoot) {
869
+ export function getDaemon(projectRoot, config) {
759
870
  if (!daemonInstance && projectRoot) {
760
- daemonInstance = new WorkerDaemon(projectRoot);
871
+ daemonInstance = new WorkerDaemon(projectRoot, config);
761
872
  }
762
873
  if (!daemonInstance) {
763
874
  throw new Error('Daemon not initialized. Provide projectRoot on first call.');
@@ -767,8 +878,8 @@ export function getDaemon(projectRoot) {
767
878
  /**
768
879
  * Start daemon (for use in session-start hook)
769
880
  */
770
- export async function startDaemon(projectRoot) {
771
- const daemon = getDaemon(projectRoot);
881
+ export async function startDaemon(projectRoot, config) {
882
+ const daemon = getDaemon(projectRoot, config);
772
883
  await daemon.start();
773
884
  return daemon;
774
885
  }