prjct-cli 1.19.0 → 1.20.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +74 -2
- package/core/__tests__/utils/retry.test.ts +381 -0
- package/core/agentic/tool-registry.ts +40 -12
- package/core/services/agent-generator.ts +35 -8
- package/core/services/agent-service.ts +17 -12
- package/core/utils/retry.ts +318 -0
- package/dist/bin/prjct.mjs +249 -18
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -1,11 +1,83 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
-
## [1.
|
|
3
|
+
## [1.20.0] - 2026-02-10
|
|
4
4
|
|
|
5
5
|
### Features
|
|
6
6
|
|
|
7
|
-
-
|
|
7
|
+
- add retry with exponential backoff for agent and tool operations (#162)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
## [1.20.0] - 2026-02-09
|
|
11
|
+
|
|
12
|
+
### Features
|
|
13
|
+
|
|
14
|
+
- **Retry with exponential backoff for agent and tool operations** (PRJ-271): Comprehensive retry infrastructure with error classification and circuit breaker
|
|
15
|
+
- RetryPolicy utility with configurable attempts, delays, and exponential backoff (1s→2s→4s)
|
|
16
|
+
- Automatic error classification: transient (EBUSY, EAGAIN, ETIMEDOUT) vs permanent (ENOENT, EPERM)
|
|
17
|
+
- Circuit breaker protection: opens after 5 consecutive failures, auto-closes after 60s
|
|
18
|
+
- Agent initialization retries (3 attempts with 1s base delay)
|
|
19
|
+
- Tool operations retry (Read/Write/Bash with 2 attempts)
|
|
20
|
+
- Resilient parallel agent generation using Promise.allSettled()
|
|
21
|
+
|
|
22
|
+
### Implementation Details
|
|
23
|
+
|
|
24
|
+
Built RetryPolicy utility with exponential backoff, error classification, and circuit breaker. Integrated across agent initialization, tool operations, and parallel agent generation. The system now automatically retries transient failures while failing fast on permanent errors.
|
|
25
|
+
|
|
26
|
+
**New modules:**
|
|
27
|
+
- `core/utils/retry.ts` (320 lines) — Core retry infrastructure with RetryPolicy class, error classification, circuit breaker
|
|
28
|
+
- `core/__tests__/utils/retry.test.ts` (380 lines) — 21 comprehensive tests with 53 assertions
|
|
29
|
+
- `ACCEPTANCE-PRJ-271.md` — Full acceptance criteria verification (22 criteria verified)
|
|
30
|
+
|
|
31
|
+
**Modified modules:**
|
|
32
|
+
- `core/services/agent-service.ts` — Wrapped initialize() with retry policy (3 attempts)
|
|
33
|
+
- `core/agentic/tool-registry.ts` — Added retry to Read/Write/Bash tools (2 attempts each)
|
|
34
|
+
- `core/services/agent-generator.ts` — Changed to Promise.allSettled() with per-agent retry
|
|
35
|
+
|
|
36
|
+
**Key features:**
|
|
37
|
+
- Exponential backoff: 1s, 2s, 4s (configurable base/max)
|
|
38
|
+
- Error classification: automatic transient vs permanent detection
|
|
39
|
+
- Circuit breaker: per-operation tracking, 5 failure threshold, 60s cooldown
|
|
40
|
+
- Two default policies: defaultAgentRetryPolicy (3 attempts), defaultToolRetryPolicy (2 attempts)
|
|
41
|
+
- Zero breaking changes: all 968 existing tests pass
|
|
42
|
+
|
|
43
|
+
### Learnings
|
|
44
|
+
|
|
45
|
+
- **RetryPolicy pattern:** Wrapping operations with retry execution provides clean separation of retry logic from business logic
|
|
46
|
+
- **Error classification strategies:** Using error code sets (EBUSY, EAGAIN) for transient vs (ENOENT, EPERM) for permanent enables automatic decision-making
|
|
47
|
+
- **Promise.allSettled() for resilient parallel operations:** Prevents one failure from blocking other operations, enables partial success
|
|
48
|
+
- **Circuit breaker implementation:** Per-operation state tracking prevents cascading failures while allowing recovery
|
|
49
|
+
|
|
50
|
+
### Test Plan
|
|
51
|
+
|
|
52
|
+
#### For QA
|
|
53
|
+
|
|
54
|
+
1. **Agent Initialization Retry**
|
|
55
|
+
- Temporarily make file system busy during agent initialization
|
|
56
|
+
- Verify agent initialization retries up to 3 times
|
|
57
|
+
- Confirm permanent errors (unsupported agent) fail immediately
|
|
58
|
+
|
|
59
|
+
2. **Tool Operations Retry**
|
|
60
|
+
- Test Read/Write/Bash with transient errors (EBUSY, ETIMEDOUT)
|
|
61
|
+
- Verify operations retry automatically (2 attempts)
|
|
62
|
+
- Confirm permanent errors (ENOENT, EPERM) return null/false without retry
|
|
63
|
+
|
|
64
|
+
3. **Circuit Breaker**
|
|
65
|
+
- Trigger 5 consecutive failures on same operation
|
|
66
|
+
- Verify circuit breaker opens and blocks further attempts
|
|
67
|
+
- Wait 60 seconds and verify circuit closes automatically
|
|
68
|
+
|
|
69
|
+
4. **Parallel Agent Generation**
|
|
70
|
+
- Simulate one agent generation failure during sync
|
|
71
|
+
- Verify other agents generate successfully (Promise.allSettled behavior)
|
|
72
|
+
- Check logs for failure warnings
|
|
73
|
+
|
|
74
|
+
#### For Users
|
|
75
|
+
|
|
76
|
+
**What changed:** The system is now more resilient against transient failures. Operations like agent initialization, file reads/writes, and command execution will automatically retry when they encounter temporary errors (disk busy, timeouts, etc).
|
|
77
|
+
|
|
78
|
+
**How to use:** No action required - retry logic works automatically. Users will experience fewer random failures during normal operations.
|
|
8
79
|
|
|
80
|
+
**Breaking changes:** None. All changes are backward compatible. Existing tests (968 total) all pass.
|
|
9
81
|
|
|
10
82
|
## [1.19.0] - 2026-02-09
|
|
11
83
|
|
|
@@ -0,0 +1,381 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Retry Policy Tests
|
|
3
|
+
* Tests for exponential backoff, error classification, and circuit breaker
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { afterEach, beforeEach, describe, expect, it } from 'bun:test'
|
|
7
|
+
import {
|
|
8
|
+
defaultAgentRetryPolicy,
|
|
9
|
+
defaultToolRetryPolicy,
|
|
10
|
+
isPermanentError,
|
|
11
|
+
isTransientError,
|
|
12
|
+
RetryPolicy,
|
|
13
|
+
} from '../../utils/retry'
|
|
14
|
+
|
|
15
|
+
describe('RetryPolicy', () => {
|
|
16
|
+
let policy: RetryPolicy
|
|
17
|
+
|
|
18
|
+
beforeEach(() => {
|
|
19
|
+
policy = new RetryPolicy({
|
|
20
|
+
maxAttempts: 3,
|
|
21
|
+
baseDelayMs: 100, // Shorter delays for tests
|
|
22
|
+
maxDelayMs: 400,
|
|
23
|
+
circuitBreakerThreshold: 5,
|
|
24
|
+
circuitBreakerTimeoutMs: 1000,
|
|
25
|
+
})
|
|
26
|
+
// Reset all circuits before each test
|
|
27
|
+
policy.resetAllCircuits()
|
|
28
|
+
})
|
|
29
|
+
|
|
30
|
+
afterEach(() => {
|
|
31
|
+
policy.resetAllCircuits()
|
|
32
|
+
})
|
|
33
|
+
|
|
34
|
+
describe('Error Classification', () => {
|
|
35
|
+
it('should identify transient errors correctly', () => {
|
|
36
|
+
const transientErrors = [
|
|
37
|
+
{ code: 'EBUSY' },
|
|
38
|
+
{ code: 'EAGAIN' },
|
|
39
|
+
{ code: 'ETIMEDOUT' },
|
|
40
|
+
{ code: 'ECONNRESET' },
|
|
41
|
+
{ code: 'ECONNREFUSED' },
|
|
42
|
+
{ message: 'Operation timed out' },
|
|
43
|
+
{ message: 'Request timeout' },
|
|
44
|
+
]
|
|
45
|
+
|
|
46
|
+
for (const error of transientErrors) {
|
|
47
|
+
expect(isTransientError(error)).toBe(true)
|
|
48
|
+
}
|
|
49
|
+
})
|
|
50
|
+
|
|
51
|
+
it('should identify permanent errors correctly', () => {
|
|
52
|
+
const permanentErrors = [
|
|
53
|
+
{ code: 'ENOENT' },
|
|
54
|
+
{ code: 'EACCES' },
|
|
55
|
+
{ code: 'EPERM' },
|
|
56
|
+
{ code: 'EISDIR' },
|
|
57
|
+
{ code: 'ENOTDIR' },
|
|
58
|
+
{ code: 'EINVAL' },
|
|
59
|
+
]
|
|
60
|
+
|
|
61
|
+
for (const error of permanentErrors) {
|
|
62
|
+
expect(isPermanentError(error)).toBe(true)
|
|
63
|
+
expect(isTransientError(error)).toBe(false)
|
|
64
|
+
}
|
|
65
|
+
})
|
|
66
|
+
|
|
67
|
+
it('should not classify unknown errors as transient', () => {
|
|
68
|
+
const unknownErrors = [
|
|
69
|
+
{ code: 'UNKNOWN' },
|
|
70
|
+
{ message: 'Unknown error' },
|
|
71
|
+
new Error('Generic error'),
|
|
72
|
+
]
|
|
73
|
+
|
|
74
|
+
for (const error of unknownErrors) {
|
|
75
|
+
expect(isTransientError(error)).toBe(false)
|
|
76
|
+
}
|
|
77
|
+
})
|
|
78
|
+
})
|
|
79
|
+
|
|
80
|
+
describe('Successful Operations', () => {
|
|
81
|
+
it('should execute successful operation without retry', async () => {
|
|
82
|
+
let attempts = 0
|
|
83
|
+
const operation = async () => {
|
|
84
|
+
attempts++
|
|
85
|
+
return 'success'
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
const result = await policy.execute(operation, 'test-op')
|
|
89
|
+
|
|
90
|
+
expect(result).toBe('success')
|
|
91
|
+
expect(attempts).toBe(1)
|
|
92
|
+
})
|
|
93
|
+
|
|
94
|
+
it('should reset circuit breaker after success', async () => {
|
|
95
|
+
// Force some failures to increment circuit state
|
|
96
|
+
let failCount = 0
|
|
97
|
+
const failOperation = async () => {
|
|
98
|
+
failCount++
|
|
99
|
+
throw { code: 'EBUSY' }
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
try {
|
|
103
|
+
await policy.execute(failOperation, 'test-op')
|
|
104
|
+
} catch {
|
|
105
|
+
// Expected to fail
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
expect(failCount).toBe(3) // maxAttempts
|
|
109
|
+
|
|
110
|
+
// Now succeed - should reset circuit
|
|
111
|
+
const successOperation = async () => 'success'
|
|
112
|
+
await policy.execute(successOperation, 'test-op')
|
|
113
|
+
|
|
114
|
+
const circuitState = policy.getCircuitState('test-op')
|
|
115
|
+
expect(circuitState).toBeUndefined()
|
|
116
|
+
})
|
|
117
|
+
})
|
|
118
|
+
|
|
119
|
+
describe('Transient Error Retry', () => {
|
|
120
|
+
it('should retry transient errors and succeed', async () => {
|
|
121
|
+
let attempts = 0
|
|
122
|
+
const operation = async () => {
|
|
123
|
+
attempts++
|
|
124
|
+
if (attempts < 3) {
|
|
125
|
+
throw { code: 'EBUSY' } // Transient error
|
|
126
|
+
}
|
|
127
|
+
return 'success'
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
const result = await policy.execute(operation, 'test-op')
|
|
131
|
+
|
|
132
|
+
expect(result).toBe('success')
|
|
133
|
+
expect(attempts).toBe(3)
|
|
134
|
+
})
|
|
135
|
+
|
|
136
|
+
it('should apply exponential backoff between retries', async () => {
|
|
137
|
+
const timestamps: number[] = []
|
|
138
|
+
let attempts = 0
|
|
139
|
+
|
|
140
|
+
const operation = async () => {
|
|
141
|
+
timestamps.push(Date.now())
|
|
142
|
+
attempts++
|
|
143
|
+
if (attempts < 3) {
|
|
144
|
+
throw { code: 'ETIMEDOUT' }
|
|
145
|
+
}
|
|
146
|
+
return 'success'
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
await policy.execute(operation, 'test-op')
|
|
150
|
+
|
|
151
|
+
// Check delays: should be ~100ms, ~200ms (with some tolerance)
|
|
152
|
+
const delay1 = timestamps[1] - timestamps[0]
|
|
153
|
+
const delay2 = timestamps[2] - timestamps[1]
|
|
154
|
+
|
|
155
|
+
expect(delay1).toBeGreaterThanOrEqual(90) // 100ms with tolerance
|
|
156
|
+
expect(delay1).toBeLessThan(150)
|
|
157
|
+
|
|
158
|
+
expect(delay2).toBeGreaterThanOrEqual(180) // 200ms with tolerance
|
|
159
|
+
expect(delay2).toBeLessThan(250)
|
|
160
|
+
})
|
|
161
|
+
|
|
162
|
+
it('should respect maxDelayMs cap', async () => {
|
|
163
|
+
const policy = new RetryPolicy({
|
|
164
|
+
maxAttempts: 5,
|
|
165
|
+
baseDelayMs: 100,
|
|
166
|
+
maxDelayMs: 200,
|
|
167
|
+
})
|
|
168
|
+
|
|
169
|
+
const timestamps: number[] = []
|
|
170
|
+
let attempts = 0
|
|
171
|
+
|
|
172
|
+
const operation = async () => {
|
|
173
|
+
timestamps.push(Date.now())
|
|
174
|
+
attempts++
|
|
175
|
+
if (attempts < 5) {
|
|
176
|
+
throw { code: 'EBUSY' }
|
|
177
|
+
}
|
|
178
|
+
return 'success'
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
await policy.execute(operation, 'test-op')
|
|
182
|
+
|
|
183
|
+
// Last delay should not exceed maxDelayMs (check delay between attempt 3 and 4)
|
|
184
|
+
// Attempt 1: no delay
|
|
185
|
+
// Attempt 2: 100ms delay (baseDelayMs * 2^0)
|
|
186
|
+
// Attempt 3: 200ms delay (baseDelayMs * 2^1, capped at maxDelayMs)
|
|
187
|
+
// Attempt 4: 200ms delay (baseDelayMs * 2^2 = 400ms, capped at 200ms)
|
|
188
|
+
const delay3 = timestamps[3] - timestamps[2]
|
|
189
|
+
expect(delay3).toBeLessThanOrEqual(250) // 200ms + tolerance
|
|
190
|
+
expect(delay3).toBeGreaterThanOrEqual(180)
|
|
191
|
+
})
|
|
192
|
+
|
|
193
|
+
it('should throw if all retry attempts fail with transient error', async () => {
|
|
194
|
+
let attempts = 0
|
|
195
|
+
const operation = async () => {
|
|
196
|
+
attempts++
|
|
197
|
+
throw { code: 'EAGAIN' }
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
await expect(policy.execute(operation, 'test-op')).rejects.toMatchObject({
|
|
201
|
+
code: 'EAGAIN',
|
|
202
|
+
})
|
|
203
|
+
|
|
204
|
+
expect(attempts).toBe(3) // maxAttempts
|
|
205
|
+
})
|
|
206
|
+
})
|
|
207
|
+
|
|
208
|
+
describe('Permanent Error Handling', () => {
|
|
209
|
+
it('should fail fast on permanent errors without retry', async () => {
|
|
210
|
+
let attempts = 0
|
|
211
|
+
const operation = async () => {
|
|
212
|
+
attempts++
|
|
213
|
+
throw { code: 'ENOENT' } // Permanent error
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
await expect(policy.execute(operation, 'test-op')).rejects.toMatchObject({
|
|
217
|
+
code: 'ENOENT',
|
|
218
|
+
})
|
|
219
|
+
|
|
220
|
+
expect(attempts).toBe(1) // No retry
|
|
221
|
+
})
|
|
222
|
+
|
|
223
|
+
it('should fail fast on permission denied', async () => {
|
|
224
|
+
let attempts = 0
|
|
225
|
+
const operation = async () => {
|
|
226
|
+
attempts++
|
|
227
|
+
throw { code: 'EPERM' }
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
await expect(policy.execute(operation, 'test-op')).rejects.toMatchObject({
|
|
231
|
+
code: 'EPERM',
|
|
232
|
+
})
|
|
233
|
+
|
|
234
|
+
expect(attempts).toBe(1)
|
|
235
|
+
})
|
|
236
|
+
|
|
237
|
+
it('should record failure for permanent errors', async () => {
|
|
238
|
+
const operation = async () => {
|
|
239
|
+
throw { code: 'ENOENT' }
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
try {
|
|
243
|
+
await policy.execute(operation, 'perm-op')
|
|
244
|
+
} catch {
|
|
245
|
+
// Expected
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
const state = policy.getCircuitState('perm-op')
|
|
249
|
+
expect(state?.consecutiveFailures).toBe(1)
|
|
250
|
+
})
|
|
251
|
+
})
|
|
252
|
+
|
|
253
|
+
describe('Circuit Breaker', () => {
|
|
254
|
+
it('should open circuit after threshold failures', async () => {
|
|
255
|
+
const operation = async () => {
|
|
256
|
+
throw { code: 'EBUSY' }
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
// Execute 5 times to reach threshold (each attempt counts as 1 failure)
|
|
260
|
+
for (let i = 0; i < 5; i++) {
|
|
261
|
+
try {
|
|
262
|
+
await policy.execute(operation, 'circuit-op')
|
|
263
|
+
} catch {
|
|
264
|
+
// Expected
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
// Circuit should now be open
|
|
269
|
+
expect(policy.isCircuitOpen('circuit-op')).toBe(true)
|
|
270
|
+
|
|
271
|
+
// Next call should fail immediately with circuit breaker error
|
|
272
|
+
await expect(policy.execute(operation, 'circuit-op')).rejects.toThrow(
|
|
273
|
+
/Circuit breaker is open/
|
|
274
|
+
)
|
|
275
|
+
})
|
|
276
|
+
|
|
277
|
+
it('should close circuit after timeout', async () => {
|
|
278
|
+
const policy = new RetryPolicy({
|
|
279
|
+
maxAttempts: 3,
|
|
280
|
+
baseDelayMs: 10,
|
|
281
|
+
maxDelayMs: 50,
|
|
282
|
+
circuitBreakerThreshold: 3,
|
|
283
|
+
circuitBreakerTimeoutMs: 100, // Short timeout for test
|
|
284
|
+
})
|
|
285
|
+
|
|
286
|
+
const operation = async () => {
|
|
287
|
+
throw { code: 'ETIMEDOUT' }
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
// Trigger circuit breaker
|
|
291
|
+
for (let i = 0; i < 3; i++) {
|
|
292
|
+
try {
|
|
293
|
+
await policy.execute(operation, 'timeout-op')
|
|
294
|
+
} catch {
|
|
295
|
+
// Expected
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
expect(policy.isCircuitOpen('timeout-op')).toBe(true)
|
|
300
|
+
|
|
301
|
+
// Wait for timeout
|
|
302
|
+
await new Promise((resolve) => setTimeout(resolve, 150))
|
|
303
|
+
|
|
304
|
+
// Circuit should be closed now
|
|
305
|
+
expect(policy.isCircuitOpen('timeout-op')).toBe(false)
|
|
306
|
+
})
|
|
307
|
+
|
|
308
|
+
it('should track failures per operation independently', async () => {
|
|
309
|
+
const operation = async () => {
|
|
310
|
+
throw { code: 'EAGAIN' }
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
// Fail operation A multiple times
|
|
314
|
+
for (let i = 0; i < 3; i++) {
|
|
315
|
+
try {
|
|
316
|
+
await policy.execute(operation, 'op-a')
|
|
317
|
+
} catch {
|
|
318
|
+
// Expected
|
|
319
|
+
}
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
const stateA = policy.getCircuitState('op-a')
|
|
323
|
+
const stateB = policy.getCircuitState('op-b')
|
|
324
|
+
|
|
325
|
+
expect(stateA?.consecutiveFailures).toBe(3)
|
|
326
|
+
expect(stateB).toBeUndefined()
|
|
327
|
+
})
|
|
328
|
+
})
|
|
329
|
+
|
|
330
|
+
describe('Default Policies', () => {
|
|
331
|
+
it('should have agent retry policy with correct defaults', () => {
|
|
332
|
+
// Test that default agent policy is configured correctly
|
|
333
|
+
expect(defaultAgentRetryPolicy).toBeInstanceOf(RetryPolicy)
|
|
334
|
+
// We can't directly inspect options, but we can test behavior
|
|
335
|
+
})
|
|
336
|
+
|
|
337
|
+
it('should have tool retry policy with correct defaults', () => {
|
|
338
|
+
expect(defaultToolRetryPolicy).toBeInstanceOf(RetryPolicy)
|
|
339
|
+
})
|
|
340
|
+
|
|
341
|
+
it('should retry agent operations 3 times', async () => {
|
|
342
|
+
let attempts = 0
|
|
343
|
+
const operation = async () => {
|
|
344
|
+
attempts++
|
|
345
|
+
if (attempts < 3) {
|
|
346
|
+
throw { code: 'EBUSY' }
|
|
347
|
+
}
|
|
348
|
+
return 'success'
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
await defaultAgentRetryPolicy.execute(operation, 'agent-test')
|
|
352
|
+
expect(attempts).toBe(3)
|
|
353
|
+
})
|
|
354
|
+
})
|
|
355
|
+
|
|
356
|
+
describe('Edge Cases', () => {
|
|
357
|
+
it('should handle non-Error objects', async () => {
|
|
358
|
+
const operation = async () => {
|
|
359
|
+
throw 'string error'
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
await expect(policy.execute(operation, 'edge-op')).rejects.toBe('string error')
|
|
363
|
+
})
|
|
364
|
+
|
|
365
|
+
it('should handle null/undefined errors', async () => {
|
|
366
|
+
const operation = async () => {
|
|
367
|
+
throw null
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
await expect(policy.execute(operation, 'null-op')).rejects.toBeNull()
|
|
371
|
+
})
|
|
372
|
+
|
|
373
|
+
it('should handle errors without code property', async () => {
|
|
374
|
+
const operation = async () => {
|
|
375
|
+
throw new Error('Generic error')
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
await expect(policy.execute(operation, 'generic-op')).rejects.toThrow('Generic error')
|
|
379
|
+
})
|
|
380
|
+
})
|
|
381
|
+
})
|
|
@@ -10,6 +10,7 @@ import { exec } from 'node:child_process'
|
|
|
10
10
|
import fs from 'node:fs/promises'
|
|
11
11
|
import { promisify } from 'node:util'
|
|
12
12
|
import type { ToolFunction, ToolRegistryInterface } from '../types'
|
|
13
|
+
import { defaultToolRetryPolicy, isPermanentError, isTransientError } from '../utils/retry'
|
|
13
14
|
|
|
14
15
|
// Re-export types for convenience
|
|
15
16
|
export type { ToolFunction, ToolRegistryInterface } from '../types'
|
|
@@ -61,36 +62,63 @@ const toolRegistry: ToolRegistryInterface = {
|
|
|
61
62
|
|
|
62
63
|
// Register built-in tools
|
|
63
64
|
|
|
64
|
-
// Read file
|
|
65
|
+
// Read file with retry for transient errors
|
|
65
66
|
toolRegistry.register('Read', async (filePath: unknown): Promise<string | null> => {
|
|
66
67
|
try {
|
|
67
|
-
return await
|
|
68
|
-
|
|
69
|
-
|
|
68
|
+
return await defaultToolRetryPolicy.execute(
|
|
69
|
+
async () => await fs.readFile(filePath as string, 'utf-8'),
|
|
70
|
+
`read-${filePath}`
|
|
71
|
+
)
|
|
72
|
+
} catch (error) {
|
|
73
|
+
// Permanent errors (ENOENT, EPERM) - return null (expected)
|
|
74
|
+
if (isPermanentError(error)) {
|
|
75
|
+
return null
|
|
76
|
+
}
|
|
77
|
+
// Transient errors exhausted retries - return null
|
|
78
|
+
if (isTransientError(error)) {
|
|
79
|
+
return null
|
|
80
|
+
}
|
|
81
|
+
// Unknown errors - return null (fail gracefully)
|
|
70
82
|
return null
|
|
71
83
|
}
|
|
72
84
|
})
|
|
73
85
|
|
|
74
|
-
// Write file
|
|
86
|
+
// Write file with retry for transient errors
|
|
75
87
|
toolRegistry.register('Write', async (filePath: unknown, content: unknown): Promise<boolean> => {
|
|
76
88
|
try {
|
|
77
|
-
await
|
|
89
|
+
await defaultToolRetryPolicy.execute(
|
|
90
|
+
async () => await fs.writeFile(filePath as string, content as string, 'utf-8'),
|
|
91
|
+
`write-${filePath}`
|
|
92
|
+
)
|
|
78
93
|
return true
|
|
79
|
-
} catch (
|
|
80
|
-
//
|
|
94
|
+
} catch (error) {
|
|
95
|
+
// Permanent errors (EPERM, EISDIR) - return false (expected)
|
|
96
|
+
if (isPermanentError(error)) {
|
|
97
|
+
return false
|
|
98
|
+
}
|
|
99
|
+
// Transient errors exhausted retries - return false
|
|
100
|
+
if (isTransientError(error)) {
|
|
101
|
+
return false
|
|
102
|
+
}
|
|
103
|
+
// Unknown errors - return false (fail gracefully)
|
|
81
104
|
return false
|
|
82
105
|
}
|
|
83
106
|
})
|
|
84
107
|
|
|
85
|
-
// Execute bash command
|
|
108
|
+
// Execute bash command with retry for transient errors
|
|
86
109
|
toolRegistry.register(
|
|
87
110
|
'Bash',
|
|
88
111
|
async (command: unknown): Promise<{ stdout: string; stderr: string }> => {
|
|
89
112
|
try {
|
|
90
|
-
|
|
91
|
-
|
|
113
|
+
return await defaultToolRetryPolicy.execute(
|
|
114
|
+
async () => await execAsync(command as string),
|
|
115
|
+
`bash-${command}`
|
|
116
|
+
)
|
|
92
117
|
} catch (error) {
|
|
93
|
-
const err = error as { stdout?: string; stderr?: string; message?: string }
|
|
118
|
+
const err = error as { stdout?: string; stderr?: string; message?: string; code?: string }
|
|
119
|
+
|
|
120
|
+
// For command execution errors, return output with error in stderr
|
|
121
|
+
// This maintains the existing behavior while adding retry for transient errors
|
|
94
122
|
return {
|
|
95
123
|
stdout: err.stdout || '',
|
|
96
124
|
stderr: err.stderr || err.message || 'Command failed',
|
|
@@ -14,6 +14,7 @@ import {
|
|
|
14
14
|
mergePreservedSections,
|
|
15
15
|
validatePreserveBlocks,
|
|
16
16
|
} from '../utils/preserve-sections'
|
|
17
|
+
import { defaultToolRetryPolicy } from '../utils/retry'
|
|
17
18
|
import type { StackDetection } from './stack-detector'
|
|
18
19
|
|
|
19
20
|
// ============================================================================
|
|
@@ -169,16 +170,42 @@ export class AgentGenerator {
|
|
|
169
170
|
agentsToGenerate.push({ name: 'devops', skill: 'developer-kit' })
|
|
170
171
|
}
|
|
171
172
|
|
|
172
|
-
// Generate all domain agents IN PARALLEL
|
|
173
|
-
|
|
174
|
-
|
|
173
|
+
// Generate all domain agents IN PARALLEL with individual retry
|
|
174
|
+
// Using Promise.allSettled() so one failure doesn't block others
|
|
175
|
+
const results = await Promise.allSettled(
|
|
176
|
+
agentsToGenerate.map((agent) =>
|
|
177
|
+
defaultToolRetryPolicy.execute(
|
|
178
|
+
async () => await this.generateDomainAgent(agent.name, stats, stack),
|
|
179
|
+
`generate-agent-${agent.name}`
|
|
180
|
+
)
|
|
181
|
+
)
|
|
175
182
|
)
|
|
176
183
|
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
184
|
+
// Track which agents succeeded and which failed
|
|
185
|
+
const successfulAgents: AgentInfo[] = []
|
|
186
|
+
const failedAgents: string[] = []
|
|
187
|
+
|
|
188
|
+
for (let i = 0; i < results.length; i++) {
|
|
189
|
+
const result = results[i]
|
|
190
|
+
const agent = agentsToGenerate[i]
|
|
191
|
+
|
|
192
|
+
if (result.status === 'fulfilled') {
|
|
193
|
+
successfulAgents.push({
|
|
194
|
+
name: agent.name,
|
|
195
|
+
type: 'domain' as const,
|
|
196
|
+
skill: agent.skill,
|
|
197
|
+
})
|
|
198
|
+
} else {
|
|
199
|
+
failedAgents.push(agent.name)
|
|
200
|
+
// Log failure but continue (don't throw)
|
|
201
|
+
console.warn(`[prjct] Warning: Failed to generate agent: ${agent.name}`)
|
|
202
|
+
if (result.reason) {
|
|
203
|
+
console.warn(`[prjct] Reason: ${result.reason.message || result.reason}`)
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
return successfulAgents
|
|
182
209
|
}
|
|
183
210
|
|
|
184
211
|
/**
|
|
@@ -8,6 +8,7 @@ import AgentRouter from '../agentic/agent-router'
|
|
|
8
8
|
import { AgentError } from '../errors'
|
|
9
9
|
import * as agentDetector from '../infrastructure/agent-detector'
|
|
10
10
|
import type { AgentAssignmentResult, AgentInfo, ProjectContext } from '../types'
|
|
11
|
+
import { defaultAgentRetryPolicy } from '../utils/retry'
|
|
11
12
|
|
|
12
13
|
// Valid agent types - whitelist for security (prevents path traversal)
|
|
13
14
|
const VALID_AGENT_TYPES = ['claude'] as const
|
|
@@ -24,26 +25,30 @@ export class AgentService {
|
|
|
24
25
|
|
|
25
26
|
/**
|
|
26
27
|
* Initialize agent (Claude Code, Desktop, or Terminal)
|
|
28
|
+
* Wrapped with retry policy to handle transient failures
|
|
27
29
|
*/
|
|
28
30
|
async initialize(): Promise<unknown> {
|
|
29
31
|
if (this.agent) return this.agent
|
|
30
32
|
|
|
31
|
-
|
|
33
|
+
// Wrap initialization with retry policy (3 attempts, exponential backoff)
|
|
34
|
+
return await defaultAgentRetryPolicy.execute(async () => {
|
|
35
|
+
this.agentInfo = await agentDetector.detect()
|
|
32
36
|
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
37
|
+
if (!this.agentInfo?.isSupported) {
|
|
38
|
+
throw AgentError.notSupported(this.agentInfo?.type ?? 'unknown')
|
|
39
|
+
}
|
|
36
40
|
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
41
|
+
// Security: validate agent type against whitelist to prevent path traversal
|
|
42
|
+
const agentType = this.agentInfo.type as ValidAgentType
|
|
43
|
+
if (!agentType || !VALID_AGENT_TYPES.includes(agentType)) {
|
|
44
|
+
throw AgentError.notSupported(this.agentInfo?.type ?? 'unknown')
|
|
45
|
+
}
|
|
42
46
|
|
|
43
|
-
|
|
44
|
-
|
|
47
|
+
const { default: Agent } = await import(`../infrastructure/${agentType}-agent`)
|
|
48
|
+
this.agent = new Agent()
|
|
45
49
|
|
|
46
|
-
|
|
50
|
+
return this.agent
|
|
51
|
+
}, 'agent-initialization')
|
|
47
52
|
}
|
|
48
53
|
|
|
49
54
|
/**
|
|
@@ -0,0 +1,318 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Retry Policy Utility
|
|
3
|
+
*
|
|
4
|
+
* Provides exponential backoff retry logic with error classification and circuit breaker.
|
|
5
|
+
* Used to make agent and tool operations resilient against transient failures.
|
|
6
|
+
*
|
|
7
|
+
* @module utils/retry
|
|
8
|
+
* @version 1.0.0
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
// =============================================================================
|
|
12
|
+
// Types
|
|
13
|
+
// =============================================================================
|
|
14
|
+
|
|
15
|
+
export interface RetryOptions {
|
|
16
|
+
/** Maximum number of retry attempts (default: 3) */
|
|
17
|
+
maxAttempts: number
|
|
18
|
+
|
|
19
|
+
/** Base delay in milliseconds for exponential backoff (default: 1000) */
|
|
20
|
+
baseDelayMs: number
|
|
21
|
+
|
|
22
|
+
/** Maximum delay in milliseconds (default: 8000) */
|
|
23
|
+
maxDelayMs: number
|
|
24
|
+
|
|
25
|
+
/** Number of consecutive failures before opening circuit (default: 5) */
|
|
26
|
+
circuitBreakerThreshold?: number
|
|
27
|
+
|
|
28
|
+
/** Time in milliseconds to keep circuit open (default: 60000) */
|
|
29
|
+
circuitBreakerTimeoutMs?: number
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export interface CircuitState {
|
|
33
|
+
consecutiveFailures: number
|
|
34
|
+
openedAt: number | null
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
// =============================================================================
|
|
38
|
+
// Error Classification
|
|
39
|
+
// =============================================================================
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Node.js error codes that indicate transient failures worth retrying
|
|
43
|
+
*/
|
|
44
|
+
const TRANSIENT_ERROR_CODES = new Set([
|
|
45
|
+
'EBUSY', // Resource busy
|
|
46
|
+
'EAGAIN', // Resource temporarily unavailable
|
|
47
|
+
'ETIMEDOUT', // Operation timed out
|
|
48
|
+
'ECONNRESET', // Connection reset by peer
|
|
49
|
+
'ECONNREFUSED', // Connection refused (may be temporary)
|
|
50
|
+
'ENOTFOUND', // DNS lookup failed (may be temporary)
|
|
51
|
+
'EAI_AGAIN', // DNS temporary failure
|
|
52
|
+
])
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* Node.js error codes that indicate permanent failures (fail fast)
|
|
56
|
+
*/
|
|
57
|
+
const PERMANENT_ERROR_CODES = new Set([
|
|
58
|
+
'ENOENT', // No such file or directory
|
|
59
|
+
'EACCES', // Permission denied
|
|
60
|
+
'EPERM', // Operation not permitted
|
|
61
|
+
'EISDIR', // Is a directory
|
|
62
|
+
'ENOTDIR', // Not a directory
|
|
63
|
+
'EINVAL', // Invalid argument
|
|
64
|
+
])
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* Check if an error is transient (worth retrying)
|
|
68
|
+
*/
|
|
69
|
+
export function isTransientError(error: unknown): boolean {
|
|
70
|
+
if (!error || typeof error !== 'object') {
|
|
71
|
+
return false
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
const err = error as { code?: string; errno?: number; message?: string }
|
|
75
|
+
|
|
76
|
+
// Check error code
|
|
77
|
+
if (err.code && TRANSIENT_ERROR_CODES.has(err.code)) {
|
|
78
|
+
return true
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// Permanent errors should never be retried
|
|
82
|
+
if (err.code && PERMANENT_ERROR_CODES.has(err.code)) {
|
|
83
|
+
return false
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
// Check message for timeout indicators
|
|
87
|
+
if (err.message) {
|
|
88
|
+
const msg = err.message.toLowerCase()
|
|
89
|
+
if (msg.includes('timeout') || msg.includes('timed out')) {
|
|
90
|
+
return true
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
// Unknown errors are not retried by default (fail fast)
|
|
95
|
+
return false
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
/**
|
|
99
|
+
* Check if an error is permanent (should not retry)
|
|
100
|
+
*/
|
|
101
|
+
export function isPermanentError(error: unknown): boolean {
|
|
102
|
+
if (!error || typeof error !== 'object') {
|
|
103
|
+
return false
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
const err = error as { code?: string }
|
|
107
|
+
return !!(err.code && PERMANENT_ERROR_CODES.has(err.code))
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
// =============================================================================
|
|
111
|
+
// Circuit Breaker
|
|
112
|
+
// =============================================================================
|
|
113
|
+
|
|
114
|
+
/**
|
|
115
|
+
* Circuit breaker state registry (per operation ID)
|
|
116
|
+
*/
|
|
117
|
+
const circuitStates = new Map<string, CircuitState>()
|
|
118
|
+
|
|
119
|
+
/**
|
|
120
|
+
* Check if circuit is open for a given operation
|
|
121
|
+
*/
|
|
122
|
+
function isCircuitOpen(operationId: string, threshold: number, timeoutMs: number): boolean {
|
|
123
|
+
const state = circuitStates.get(operationId)
|
|
124
|
+
if (!state) {
|
|
125
|
+
return false
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
// Circuit is open if threshold exceeded
|
|
129
|
+
if (state.consecutiveFailures >= threshold && state.openedAt) {
|
|
130
|
+
const elapsed = Date.now() - state.openedAt
|
|
131
|
+
// Circuit closes after timeout
|
|
132
|
+
if (elapsed >= timeoutMs) {
|
|
133
|
+
// Reset circuit
|
|
134
|
+
circuitStates.delete(operationId)
|
|
135
|
+
return false
|
|
136
|
+
}
|
|
137
|
+
return true
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
return false
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
/**
|
|
144
|
+
* Record a failure for circuit breaker
|
|
145
|
+
*/
|
|
146
|
+
function recordFailure(operationId: string, threshold: number): void {
|
|
147
|
+
const state = circuitStates.get(operationId) || {
|
|
148
|
+
consecutiveFailures: 0,
|
|
149
|
+
openedAt: null,
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
state.consecutiveFailures++
|
|
153
|
+
|
|
154
|
+
// Open circuit if threshold reached
|
|
155
|
+
if (state.consecutiveFailures >= threshold && !state.openedAt) {
|
|
156
|
+
state.openedAt = Date.now()
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
circuitStates.set(operationId, state)
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
/**
|
|
163
|
+
* Record a success (reset circuit breaker)
|
|
164
|
+
*/
|
|
165
|
+
function recordSuccess(operationId: string): void {
|
|
166
|
+
circuitStates.delete(operationId)
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
// =============================================================================
|
|
170
|
+
// Retry Policy
|
|
171
|
+
// =============================================================================
|
|
172
|
+
|
|
173
|
+
export class RetryPolicy {
|
|
174
|
+
private options: Required<RetryOptions>
|
|
175
|
+
|
|
176
|
+
constructor(options: Partial<RetryOptions> = {}) {
|
|
177
|
+
this.options = {
|
|
178
|
+
maxAttempts: options.maxAttempts ?? 3,
|
|
179
|
+
baseDelayMs: options.baseDelayMs ?? 1000,
|
|
180
|
+
maxDelayMs: options.maxDelayMs ?? 8000,
|
|
181
|
+
circuitBreakerThreshold: options.circuitBreakerThreshold ?? 5,
|
|
182
|
+
circuitBreakerTimeoutMs: options.circuitBreakerTimeoutMs ?? 60000,
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
/**
|
|
187
|
+
* Execute an operation with retry logic
|
|
188
|
+
*
|
|
189
|
+
* @param operation - Async function to execute
|
|
190
|
+
* @param operationId - Optional ID for circuit breaker tracking
|
|
191
|
+
* @returns Result of the operation
|
|
192
|
+
* @throws Error if all attempts fail or circuit is open
|
|
193
|
+
*/
|
|
194
|
+
async execute<T>(operation: () => Promise<T>, operationId: string = 'default'): Promise<T> {
|
|
195
|
+
// Check circuit breaker
|
|
196
|
+
if (
|
|
197
|
+
isCircuitOpen(
|
|
198
|
+
operationId,
|
|
199
|
+
this.options.circuitBreakerThreshold,
|
|
200
|
+
this.options.circuitBreakerTimeoutMs
|
|
201
|
+
)
|
|
202
|
+
) {
|
|
203
|
+
throw new Error(
|
|
204
|
+
`Circuit breaker is open for operation: ${operationId}. Too many consecutive failures.`
|
|
205
|
+
)
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
let lastError: unknown
|
|
209
|
+
let attempt = 0
|
|
210
|
+
|
|
211
|
+
while (attempt < this.options.maxAttempts) {
|
|
212
|
+
try {
|
|
213
|
+
const result = await operation()
|
|
214
|
+
// Success - reset circuit breaker
|
|
215
|
+
recordSuccess(operationId)
|
|
216
|
+
return result
|
|
217
|
+
} catch (error) {
|
|
218
|
+
lastError = error
|
|
219
|
+
attempt++
|
|
220
|
+
|
|
221
|
+
// Check if error is permanent (fail fast)
|
|
222
|
+
if (isPermanentError(error)) {
|
|
223
|
+
recordFailure(operationId, this.options.circuitBreakerThreshold)
|
|
224
|
+
throw error
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
// Check if error is transient and we have attempts left
|
|
228
|
+
const shouldRetry = isTransientError(error) && attempt < this.options.maxAttempts
|
|
229
|
+
|
|
230
|
+
if (!shouldRetry) {
|
|
231
|
+
// Not transient or out of attempts
|
|
232
|
+
recordFailure(operationId, this.options.circuitBreakerThreshold)
|
|
233
|
+
throw error
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
// Calculate delay with exponential backoff
|
|
237
|
+
const delay = Math.min(
|
|
238
|
+
this.options.baseDelayMs * 2 ** (attempt - 1),
|
|
239
|
+
this.options.maxDelayMs
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
// Wait before retry
|
|
243
|
+
await new Promise((resolve) => setTimeout(resolve, delay))
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
// All attempts failed
|
|
248
|
+
recordFailure(operationId, this.options.circuitBreakerThreshold)
|
|
249
|
+
throw lastError
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
/**
|
|
253
|
+
* Check if an error is transient (exposed for testing)
|
|
254
|
+
*/
|
|
255
|
+
isTransientError(error: unknown): boolean {
|
|
256
|
+
return isTransientError(error)
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
/**
|
|
260
|
+
* Check if circuit is open for an operation (exposed for testing)
|
|
261
|
+
*/
|
|
262
|
+
isCircuitOpen(operationId: string): boolean {
|
|
263
|
+
return isCircuitOpen(
|
|
264
|
+
operationId,
|
|
265
|
+
this.options.circuitBreakerThreshold,
|
|
266
|
+
this.options.circuitBreakerTimeoutMs
|
|
267
|
+
)
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
/**
|
|
271
|
+
* Get current circuit state for an operation (exposed for testing)
|
|
272
|
+
*/
|
|
273
|
+
getCircuitState(operationId: string): CircuitState | undefined {
|
|
274
|
+
return circuitStates.get(operationId)
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
/**
|
|
278
|
+
* Reset circuit breaker for an operation (exposed for testing)
|
|
279
|
+
*/
|
|
280
|
+
resetCircuit(operationId: string): void {
|
|
281
|
+
circuitStates.delete(operationId)
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
/**
|
|
285
|
+
* Reset all circuit breakers (exposed for testing)
|
|
286
|
+
*/
|
|
287
|
+
resetAllCircuits(): void {
|
|
288
|
+
circuitStates.clear()
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
// =============================================================================
|
|
293
|
+
// Exports
|
|
294
|
+
// =============================================================================
|
|
295
|
+
|
|
296
|
+
/**
|
|
297
|
+
* Default retry policy for agent operations
|
|
298
|
+
* - 3 attempts
|
|
299
|
+
* - 1s base delay
|
|
300
|
+
* - Up to 8s max delay
|
|
301
|
+
*/
|
|
302
|
+
export const defaultAgentRetryPolicy = new RetryPolicy({
|
|
303
|
+
maxAttempts: 3,
|
|
304
|
+
baseDelayMs: 1000,
|
|
305
|
+
maxDelayMs: 8000,
|
|
306
|
+
})
|
|
307
|
+
|
|
308
|
+
/**
|
|
309
|
+
* Retry policy for tool operations (less aggressive)
|
|
310
|
+
* - 2 attempts
|
|
311
|
+
* - 500ms base delay
|
|
312
|
+
* - Up to 2s max delay
|
|
313
|
+
*/
|
|
314
|
+
export const defaultToolRetryPolicy = new RetryPolicy({
|
|
315
|
+
maxAttempts: 2,
|
|
316
|
+
baseDelayMs: 500,
|
|
317
|
+
maxDelayMs: 2000,
|
|
318
|
+
})
|
package/dist/bin/prjct.mjs
CHANGED
|
@@ -18857,6 +18857,211 @@ When fragmenting tasks:
|
|
|
18857
18857
|
}
|
|
18858
18858
|
});
|
|
18859
18859
|
|
|
18860
|
+
// core/utils/retry.ts
|
|
18861
|
+
function isTransientError(error) {
|
|
18862
|
+
if (!error || typeof error !== "object") {
|
|
18863
|
+
return false;
|
|
18864
|
+
}
|
|
18865
|
+
const err = error;
|
|
18866
|
+
if (err.code && TRANSIENT_ERROR_CODES.has(err.code)) {
|
|
18867
|
+
return true;
|
|
18868
|
+
}
|
|
18869
|
+
if (err.code && PERMANENT_ERROR_CODES.has(err.code)) {
|
|
18870
|
+
return false;
|
|
18871
|
+
}
|
|
18872
|
+
if (err.message) {
|
|
18873
|
+
const msg = err.message.toLowerCase();
|
|
18874
|
+
if (msg.includes("timeout") || msg.includes("timed out")) {
|
|
18875
|
+
return true;
|
|
18876
|
+
}
|
|
18877
|
+
}
|
|
18878
|
+
return false;
|
|
18879
|
+
}
|
|
18880
|
+
function isPermanentError(error) {
|
|
18881
|
+
if (!error || typeof error !== "object") {
|
|
18882
|
+
return false;
|
|
18883
|
+
}
|
|
18884
|
+
const err = error;
|
|
18885
|
+
return !!(err.code && PERMANENT_ERROR_CODES.has(err.code));
|
|
18886
|
+
}
|
|
18887
|
+
function isCircuitOpen(operationId, threshold, timeoutMs) {
|
|
18888
|
+
const state = circuitStates.get(operationId);
|
|
18889
|
+
if (!state) {
|
|
18890
|
+
return false;
|
|
18891
|
+
}
|
|
18892
|
+
if (state.consecutiveFailures >= threshold && state.openedAt) {
|
|
18893
|
+
const elapsed = Date.now() - state.openedAt;
|
|
18894
|
+
if (elapsed >= timeoutMs) {
|
|
18895
|
+
circuitStates.delete(operationId);
|
|
18896
|
+
return false;
|
|
18897
|
+
}
|
|
18898
|
+
return true;
|
|
18899
|
+
}
|
|
18900
|
+
return false;
|
|
18901
|
+
}
|
|
18902
|
+
function recordFailure(operationId, threshold) {
|
|
18903
|
+
const state = circuitStates.get(operationId) || {
|
|
18904
|
+
consecutiveFailures: 0,
|
|
18905
|
+
openedAt: null
|
|
18906
|
+
};
|
|
18907
|
+
state.consecutiveFailures++;
|
|
18908
|
+
if (state.consecutiveFailures >= threshold && !state.openedAt) {
|
|
18909
|
+
state.openedAt = Date.now();
|
|
18910
|
+
}
|
|
18911
|
+
circuitStates.set(operationId, state);
|
|
18912
|
+
}
|
|
18913
|
+
function recordSuccess(operationId) {
|
|
18914
|
+
circuitStates.delete(operationId);
|
|
18915
|
+
}
|
|
18916
|
+
var TRANSIENT_ERROR_CODES, PERMANENT_ERROR_CODES, circuitStates, RetryPolicy, defaultAgentRetryPolicy, defaultToolRetryPolicy;
|
|
18917
|
+
var init_retry = __esm({
|
|
18918
|
+
"core/utils/retry.ts"() {
|
|
18919
|
+
"use strict";
|
|
18920
|
+
TRANSIENT_ERROR_CODES = /* @__PURE__ */ new Set([
|
|
18921
|
+
"EBUSY",
|
|
18922
|
+
// Resource busy
|
|
18923
|
+
"EAGAIN",
|
|
18924
|
+
// Resource temporarily unavailable
|
|
18925
|
+
"ETIMEDOUT",
|
|
18926
|
+
// Operation timed out
|
|
18927
|
+
"ECONNRESET",
|
|
18928
|
+
// Connection reset by peer
|
|
18929
|
+
"ECONNREFUSED",
|
|
18930
|
+
// Connection refused (may be temporary)
|
|
18931
|
+
"ENOTFOUND",
|
|
18932
|
+
// DNS lookup failed (may be temporary)
|
|
18933
|
+
"EAI_AGAIN"
|
|
18934
|
+
// DNS temporary failure
|
|
18935
|
+
]);
|
|
18936
|
+
PERMANENT_ERROR_CODES = /* @__PURE__ */ new Set([
|
|
18937
|
+
"ENOENT",
|
|
18938
|
+
// No such file or directory
|
|
18939
|
+
"EACCES",
|
|
18940
|
+
// Permission denied
|
|
18941
|
+
"EPERM",
|
|
18942
|
+
// Operation not permitted
|
|
18943
|
+
"EISDIR",
|
|
18944
|
+
// Is a directory
|
|
18945
|
+
"ENOTDIR",
|
|
18946
|
+
// Not a directory
|
|
18947
|
+
"EINVAL"
|
|
18948
|
+
// Invalid argument
|
|
18949
|
+
]);
|
|
18950
|
+
__name(isTransientError, "isTransientError");
|
|
18951
|
+
__name(isPermanentError, "isPermanentError");
|
|
18952
|
+
circuitStates = /* @__PURE__ */ new Map();
|
|
18953
|
+
__name(isCircuitOpen, "isCircuitOpen");
|
|
18954
|
+
__name(recordFailure, "recordFailure");
|
|
18955
|
+
__name(recordSuccess, "recordSuccess");
|
|
18956
|
+
RetryPolicy = class {
|
|
18957
|
+
static {
|
|
18958
|
+
__name(this, "RetryPolicy");
|
|
18959
|
+
}
|
|
18960
|
+
options;
|
|
18961
|
+
constructor(options = {}) {
|
|
18962
|
+
this.options = {
|
|
18963
|
+
maxAttempts: options.maxAttempts ?? 3,
|
|
18964
|
+
baseDelayMs: options.baseDelayMs ?? 1e3,
|
|
18965
|
+
maxDelayMs: options.maxDelayMs ?? 8e3,
|
|
18966
|
+
circuitBreakerThreshold: options.circuitBreakerThreshold ?? 5,
|
|
18967
|
+
circuitBreakerTimeoutMs: options.circuitBreakerTimeoutMs ?? 6e4
|
|
18968
|
+
};
|
|
18969
|
+
}
|
|
18970
|
+
/**
|
|
18971
|
+
* Execute an operation with retry logic
|
|
18972
|
+
*
|
|
18973
|
+
* @param operation - Async function to execute
|
|
18974
|
+
* @param operationId - Optional ID for circuit breaker tracking
|
|
18975
|
+
* @returns Result of the operation
|
|
18976
|
+
* @throws Error if all attempts fail or circuit is open
|
|
18977
|
+
*/
|
|
18978
|
+
async execute(operation, operationId = "default") {
|
|
18979
|
+
if (isCircuitOpen(
|
|
18980
|
+
operationId,
|
|
18981
|
+
this.options.circuitBreakerThreshold,
|
|
18982
|
+
this.options.circuitBreakerTimeoutMs
|
|
18983
|
+
)) {
|
|
18984
|
+
throw new Error(
|
|
18985
|
+
`Circuit breaker is open for operation: ${operationId}. Too many consecutive failures.`
|
|
18986
|
+
);
|
|
18987
|
+
}
|
|
18988
|
+
let lastError;
|
|
18989
|
+
let attempt = 0;
|
|
18990
|
+
while (attempt < this.options.maxAttempts) {
|
|
18991
|
+
try {
|
|
18992
|
+
const result = await operation();
|
|
18993
|
+
recordSuccess(operationId);
|
|
18994
|
+
return result;
|
|
18995
|
+
} catch (error) {
|
|
18996
|
+
lastError = error;
|
|
18997
|
+
attempt++;
|
|
18998
|
+
if (isPermanentError(error)) {
|
|
18999
|
+
recordFailure(operationId, this.options.circuitBreakerThreshold);
|
|
19000
|
+
throw error;
|
|
19001
|
+
}
|
|
19002
|
+
const shouldRetry = isTransientError(error) && attempt < this.options.maxAttempts;
|
|
19003
|
+
if (!shouldRetry) {
|
|
19004
|
+
recordFailure(operationId, this.options.circuitBreakerThreshold);
|
|
19005
|
+
throw error;
|
|
19006
|
+
}
|
|
19007
|
+
const delay = Math.min(
|
|
19008
|
+
this.options.baseDelayMs * 2 ** (attempt - 1),
|
|
19009
|
+
this.options.maxDelayMs
|
|
19010
|
+
);
|
|
19011
|
+
await new Promise((resolve) => setTimeout(resolve, delay));
|
|
19012
|
+
}
|
|
19013
|
+
}
|
|
19014
|
+
recordFailure(operationId, this.options.circuitBreakerThreshold);
|
|
19015
|
+
throw lastError;
|
|
19016
|
+
}
|
|
19017
|
+
/**
|
|
19018
|
+
* Check if an error is transient (exposed for testing)
|
|
19019
|
+
*/
|
|
19020
|
+
isTransientError(error) {
|
|
19021
|
+
return isTransientError(error);
|
|
19022
|
+
}
|
|
19023
|
+
/**
|
|
19024
|
+
* Check if circuit is open for an operation (exposed for testing)
|
|
19025
|
+
*/
|
|
19026
|
+
isCircuitOpen(operationId) {
|
|
19027
|
+
return isCircuitOpen(
|
|
19028
|
+
operationId,
|
|
19029
|
+
this.options.circuitBreakerThreshold,
|
|
19030
|
+
this.options.circuitBreakerTimeoutMs
|
|
19031
|
+
);
|
|
19032
|
+
}
|
|
19033
|
+
/**
|
|
19034
|
+
* Get current circuit state for an operation (exposed for testing)
|
|
19035
|
+
*/
|
|
19036
|
+
getCircuitState(operationId) {
|
|
19037
|
+
return circuitStates.get(operationId);
|
|
19038
|
+
}
|
|
19039
|
+
/**
|
|
19040
|
+
* Reset circuit breaker for an operation (exposed for testing)
|
|
19041
|
+
*/
|
|
19042
|
+
resetCircuit(operationId) {
|
|
19043
|
+
circuitStates.delete(operationId);
|
|
19044
|
+
}
|
|
19045
|
+
/**
|
|
19046
|
+
* Reset all circuit breakers (exposed for testing)
|
|
19047
|
+
*/
|
|
19048
|
+
resetAllCircuits() {
|
|
19049
|
+
circuitStates.clear();
|
|
19050
|
+
}
|
|
19051
|
+
};
|
|
19052
|
+
defaultAgentRetryPolicy = new RetryPolicy({
|
|
19053
|
+
maxAttempts: 3,
|
|
19054
|
+
baseDelayMs: 1e3,
|
|
19055
|
+
maxDelayMs: 8e3
|
|
19056
|
+
});
|
|
19057
|
+
defaultToolRetryPolicy = new RetryPolicy({
|
|
19058
|
+
maxAttempts: 2,
|
|
19059
|
+
baseDelayMs: 500,
|
|
19060
|
+
maxDelayMs: 2e3
|
|
19061
|
+
});
|
|
19062
|
+
}
|
|
19063
|
+
});
|
|
19064
|
+
|
|
18860
19065
|
// core/agentic/tool-registry.ts
|
|
18861
19066
|
import { exec as exec7 } from "node:child_process";
|
|
18862
19067
|
import fs36 from "node:fs/promises";
|
|
@@ -18865,6 +19070,7 @@ var execAsync4, toolRegistry, tool_registry_default;
|
|
|
18865
19070
|
var init_tool_registry = __esm({
|
|
18866
19071
|
"core/agentic/tool-registry.ts"() {
|
|
18867
19072
|
"use strict";
|
|
19073
|
+
init_retry();
|
|
18868
19074
|
execAsync4 = promisify8(exec7);
|
|
18869
19075
|
toolRegistry = {
|
|
18870
19076
|
tools: /* @__PURE__ */ new Map(),
|
|
@@ -18903,16 +19109,34 @@ var init_tool_registry = __esm({
|
|
|
18903
19109
|
};
|
|
18904
19110
|
toolRegistry.register("Read", async (filePath) => {
|
|
18905
19111
|
try {
|
|
18906
|
-
return await
|
|
18907
|
-
|
|
19112
|
+
return await defaultToolRetryPolicy.execute(
|
|
19113
|
+
async () => await fs36.readFile(filePath, "utf-8"),
|
|
19114
|
+
`read-${filePath}`
|
|
19115
|
+
);
|
|
19116
|
+
} catch (error) {
|
|
19117
|
+
if (isPermanentError(error)) {
|
|
19118
|
+
return null;
|
|
19119
|
+
}
|
|
19120
|
+
if (isTransientError(error)) {
|
|
19121
|
+
return null;
|
|
19122
|
+
}
|
|
18908
19123
|
return null;
|
|
18909
19124
|
}
|
|
18910
19125
|
});
|
|
18911
19126
|
toolRegistry.register("Write", async (filePath, content) => {
|
|
18912
19127
|
try {
|
|
18913
|
-
await
|
|
19128
|
+
await defaultToolRetryPolicy.execute(
|
|
19129
|
+
async () => await fs36.writeFile(filePath, content, "utf-8"),
|
|
19130
|
+
`write-${filePath}`
|
|
19131
|
+
);
|
|
18914
19132
|
return true;
|
|
18915
|
-
} catch (
|
|
19133
|
+
} catch (error) {
|
|
19134
|
+
if (isPermanentError(error)) {
|
|
19135
|
+
return false;
|
|
19136
|
+
}
|
|
19137
|
+
if (isTransientError(error)) {
|
|
19138
|
+
return false;
|
|
19139
|
+
}
|
|
18916
19140
|
return false;
|
|
18917
19141
|
}
|
|
18918
19142
|
});
|
|
@@ -18920,8 +19144,10 @@ var init_tool_registry = __esm({
|
|
|
18920
19144
|
"Bash",
|
|
18921
19145
|
async (command) => {
|
|
18922
19146
|
try {
|
|
18923
|
-
|
|
18924
|
-
|
|
19147
|
+
return await defaultToolRetryPolicy.execute(
|
|
19148
|
+
async () => await execAsync4(command),
|
|
19149
|
+
`bash-${command}`
|
|
19150
|
+
);
|
|
18925
19151
|
} catch (error) {
|
|
18926
19152
|
const err = error;
|
|
18927
19153
|
return {
|
|
@@ -19564,6 +19790,7 @@ var init_agent_generator = __esm({
|
|
|
19564
19790
|
"core/services/agent-generator.ts"() {
|
|
19565
19791
|
"use strict";
|
|
19566
19792
|
init_preserve_sections();
|
|
19793
|
+
init_retry();
|
|
19567
19794
|
}
|
|
19568
19795
|
});
|
|
19569
19796
|
|
|
@@ -19792,6 +20019,7 @@ var init_agent_service = __esm({
|
|
|
19792
20019
|
init_agent_router();
|
|
19793
20020
|
init_errors();
|
|
19794
20021
|
init_agent_detector();
|
|
20022
|
+
init_retry();
|
|
19795
20023
|
init_();
|
|
19796
20024
|
VALID_AGENT_TYPES = ["claude"];
|
|
19797
20025
|
AgentService = class {
|
|
@@ -19806,20 +20034,23 @@ var init_agent_service = __esm({
|
|
|
19806
20034
|
}
|
|
19807
20035
|
/**
|
|
19808
20036
|
* Initialize agent (Claude Code, Desktop, or Terminal)
|
|
20037
|
+
* Wrapped with retry policy to handle transient failures
|
|
19809
20038
|
*/
|
|
19810
20039
|
async initialize() {
|
|
19811
20040
|
if (this.agent) return this.agent;
|
|
19812
|
-
|
|
19813
|
-
|
|
19814
|
-
|
|
19815
|
-
|
|
19816
|
-
|
|
19817
|
-
|
|
19818
|
-
|
|
19819
|
-
|
|
19820
|
-
|
|
19821
|
-
|
|
19822
|
-
|
|
20041
|
+
return await defaultAgentRetryPolicy.execute(async () => {
|
|
20042
|
+
this.agentInfo = await detect2();
|
|
20043
|
+
if (!this.agentInfo?.isSupported) {
|
|
20044
|
+
throw AgentError.notSupported(this.agentInfo?.type ?? "unknown");
|
|
20045
|
+
}
|
|
20046
|
+
const agentType = this.agentInfo.type;
|
|
20047
|
+
if (!agentType || !VALID_AGENT_TYPES.includes(agentType)) {
|
|
20048
|
+
throw AgentError.notSupported(this.agentInfo?.type ?? "unknown");
|
|
20049
|
+
}
|
|
20050
|
+
const { default: Agent } = await globImport_infrastructure_agent(`../infrastructure/${agentType}-agent`);
|
|
20051
|
+
this.agent = new Agent();
|
|
20052
|
+
return this.agent;
|
|
20053
|
+
}, "agent-initialization");
|
|
19823
20054
|
}
|
|
19824
20055
|
/**
|
|
19825
20056
|
* Get current agent info
|
|
@@ -34241,7 +34472,7 @@ var require_package = __commonJS({
|
|
|
34241
34472
|
"package.json"(exports, module) {
|
|
34242
34473
|
module.exports = {
|
|
34243
34474
|
name: "prjct-cli",
|
|
34244
|
-
version: "1.
|
|
34475
|
+
version: "1.20.0",
|
|
34245
34476
|
description: "Context layer for AI agents. Project context for Claude Code, Gemini CLI, and more.",
|
|
34246
34477
|
main: "core/index.ts",
|
|
34247
34478
|
bin: {
|