prjct-cli 1.9.0 → 1.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +114 -1
- package/core/__tests__/agentic/prompt-assembly.test.ts +298 -0
- package/core/__tests__/agentic/prompt-builder.test.ts +2 -2
- package/core/__tests__/agentic/token-budget.test.ts +294 -0
- package/core/__tests__/storage/analysis-storage.test.ts +277 -0
- package/core/agentic/anti-hallucination.ts +124 -0
- package/core/agentic/environment-block.ts +102 -0
- package/core/agentic/injection-validator.ts +16 -0
- package/core/agentic/prompt-builder.ts +339 -167
- package/core/agentic/token-budget.ts +226 -0
- package/core/commands/analysis.ts +117 -2
- package/core/commands/command-data.ts +29 -0
- package/core/commands/commands.ts +14 -0
- package/core/commands/register.ts +2 -0
- package/core/index.ts +2 -0
- package/core/schemas/analysis.ts +69 -25
- package/core/services/context-selector.ts +8 -2
- package/core/services/sync-service.ts +35 -0
- package/core/storage/analysis-storage.ts +328 -0
- package/core/storage/index.ts +2 -0
- package/dist/bin/prjct.mjs +1357 -664
- package/package.json +1 -1
|
@@ -0,0 +1,294 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Token Budget Coordinator Tests
|
|
3
|
+
*
|
|
4
|
+
* Tests for:
|
|
5
|
+
* - Budget calculation from model context windows
|
|
6
|
+
* - Priority-based allocation (state > injection > files)
|
|
7
|
+
* - Token request/record tracking
|
|
8
|
+
* - Overflow detection and prevention
|
|
9
|
+
* - Different model allocations
|
|
10
|
+
*
|
|
11
|
+
* @see PRJ-266
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
import { describe, expect, it } from 'bun:test'
|
|
15
|
+
import { budgetsFromCoordinator } from '../../agentic/injection-validator'
|
|
16
|
+
import {
|
|
17
|
+
calculateInputBudget,
|
|
18
|
+
calculateOutputReserve,
|
|
19
|
+
getContextWindow,
|
|
20
|
+
INPUT_RATIO,
|
|
21
|
+
MODEL_CONTEXT_WINDOWS,
|
|
22
|
+
TokenBudgetCoordinator,
|
|
23
|
+
} from '../../agentic/token-budget'
|
|
24
|
+
|
|
25
|
+
// =============================================================================
|
|
26
|
+
// getContextWindow
|
|
27
|
+
// =============================================================================
|
|
28
|
+
|
|
29
|
+
describe('getContextWindow', () => {
|
|
30
|
+
it('should return 200K for Claude models', () => {
|
|
31
|
+
expect(getContextWindow('opus')).toBe(200_000)
|
|
32
|
+
expect(getContextWindow('sonnet')).toBe(200_000)
|
|
33
|
+
expect(getContextWindow('haiku')).toBe(200_000)
|
|
34
|
+
})
|
|
35
|
+
|
|
36
|
+
it('should return 1M for Gemini models', () => {
|
|
37
|
+
expect(getContextWindow('2.5-pro')).toBe(1_000_000)
|
|
38
|
+
expect(getContextWindow('2.5-flash')).toBe(1_000_000)
|
|
39
|
+
})
|
|
40
|
+
|
|
41
|
+
it('should return default for unknown models', () => {
|
|
42
|
+
expect(getContextWindow('unknown-model')).toBe(MODEL_CONTEXT_WINDOWS.default)
|
|
43
|
+
})
|
|
44
|
+
|
|
45
|
+
it('should return default when no model specified', () => {
|
|
46
|
+
expect(getContextWindow()).toBe(MODEL_CONTEXT_WINDOWS.default)
|
|
47
|
+
expect(getContextWindow(undefined)).toBe(MODEL_CONTEXT_WINDOWS.default)
|
|
48
|
+
})
|
|
49
|
+
|
|
50
|
+
it('should support full model IDs', () => {
|
|
51
|
+
expect(getContextWindow('claude-opus-4-6')).toBe(200_000)
|
|
52
|
+
expect(getContextWindow('claude-sonnet-4.5')).toBe(200_000)
|
|
53
|
+
})
|
|
54
|
+
})
|
|
55
|
+
|
|
56
|
+
// =============================================================================
|
|
57
|
+
// calculateInputBudget / calculateOutputReserve
|
|
58
|
+
// =============================================================================
|
|
59
|
+
|
|
60
|
+
describe('calculateInputBudget', () => {
|
|
61
|
+
it('should return 65% of context window', () => {
|
|
62
|
+
const budget = calculateInputBudget('sonnet')
|
|
63
|
+
expect(budget).toBe(Math.floor(200_000 * INPUT_RATIO))
|
|
64
|
+
expect(budget).toBe(130_000)
|
|
65
|
+
})
|
|
66
|
+
|
|
67
|
+
it('should return larger budget for Gemini models', () => {
|
|
68
|
+
const budget = calculateInputBudget('2.5-pro')
|
|
69
|
+
expect(budget).toBe(Math.floor(1_000_000 * INPUT_RATIO))
|
|
70
|
+
expect(budget).toBe(650_000)
|
|
71
|
+
})
|
|
72
|
+
})
|
|
73
|
+
|
|
74
|
+
describe('calculateOutputReserve', () => {
|
|
75
|
+
it('should reserve 35% for output', () => {
|
|
76
|
+
const reserve = calculateOutputReserve('sonnet')
|
|
77
|
+
expect(reserve).toBe(200_000 - 130_000)
|
|
78
|
+
expect(reserve).toBe(70_000)
|
|
79
|
+
})
|
|
80
|
+
})
|
|
81
|
+
|
|
82
|
+
// =============================================================================
|
|
83
|
+
// TokenBudgetCoordinator — Allocation
|
|
84
|
+
// =============================================================================
|
|
85
|
+
|
|
86
|
+
describe('TokenBudgetCoordinator allocation', () => {
|
|
87
|
+
it('should create coordinator with default model', () => {
|
|
88
|
+
const coordinator = new TokenBudgetCoordinator()
|
|
89
|
+
const allocation = coordinator.getAllocation()
|
|
90
|
+
|
|
91
|
+
expect(allocation.contextWindow).toBe(200_000)
|
|
92
|
+
expect(allocation.inputBudget).toBe(130_000)
|
|
93
|
+
expect(allocation.outputReserve).toBe(70_000)
|
|
94
|
+
})
|
|
95
|
+
|
|
96
|
+
it('should distribute budget across three categories', () => {
|
|
97
|
+
const coordinator = new TokenBudgetCoordinator('sonnet')
|
|
98
|
+
const allocation = coordinator.getAllocation()
|
|
99
|
+
|
|
100
|
+
expect(allocation.state).toBeGreaterThan(0)
|
|
101
|
+
expect(allocation.injection).toBeGreaterThan(0)
|
|
102
|
+
expect(allocation.files).toBeGreaterThan(0)
|
|
103
|
+
})
|
|
104
|
+
|
|
105
|
+
it('should allocate all input budget (no waste)', () => {
|
|
106
|
+
const coordinator = new TokenBudgetCoordinator('sonnet')
|
|
107
|
+
const allocation = coordinator.getAllocation()
|
|
108
|
+
|
|
109
|
+
const totalAllocated = allocation.state + allocation.injection + allocation.files
|
|
110
|
+
expect(totalAllocated).toBe(allocation.inputBudget)
|
|
111
|
+
})
|
|
112
|
+
|
|
113
|
+
it('should give files the largest allocation', () => {
|
|
114
|
+
const coordinator = new TokenBudgetCoordinator('sonnet')
|
|
115
|
+
const allocation = coordinator.getAllocation()
|
|
116
|
+
|
|
117
|
+
expect(allocation.files).toBeGreaterThan(allocation.state)
|
|
118
|
+
expect(allocation.files).toBeGreaterThan(allocation.injection)
|
|
119
|
+
})
|
|
120
|
+
|
|
121
|
+
it('should give state minimum 1500 tokens', () => {
|
|
122
|
+
const coordinator = new TokenBudgetCoordinator('sonnet')
|
|
123
|
+
const allocation = coordinator.getAllocation()
|
|
124
|
+
|
|
125
|
+
expect(allocation.state).toBeGreaterThanOrEqual(1_500)
|
|
126
|
+
})
|
|
127
|
+
|
|
128
|
+
it('should give injection minimum 8000 tokens', () => {
|
|
129
|
+
const coordinator = new TokenBudgetCoordinator('sonnet')
|
|
130
|
+
const allocation = coordinator.getAllocation()
|
|
131
|
+
|
|
132
|
+
expect(allocation.injection).toBeGreaterThanOrEqual(8_000)
|
|
133
|
+
})
|
|
134
|
+
|
|
135
|
+
it('should give larger allocations for Gemini models', () => {
|
|
136
|
+
const claudeCoord = new TokenBudgetCoordinator('sonnet')
|
|
137
|
+
const geminiCoord = new TokenBudgetCoordinator('2.5-pro')
|
|
138
|
+
|
|
139
|
+
const claudeAlloc = claudeCoord.getAllocation()
|
|
140
|
+
const geminiAlloc = geminiCoord.getAllocation()
|
|
141
|
+
|
|
142
|
+
expect(geminiAlloc.files).toBeGreaterThan(claudeAlloc.files)
|
|
143
|
+
expect(geminiAlloc.inputBudget).toBeGreaterThan(claudeAlloc.inputBudget)
|
|
144
|
+
})
|
|
145
|
+
})
|
|
146
|
+
|
|
147
|
+
// =============================================================================
|
|
148
|
+
// TokenBudgetCoordinator — Request/Record
|
|
149
|
+
// =============================================================================
|
|
150
|
+
|
|
151
|
+
describe('TokenBudgetCoordinator request/record', () => {
|
|
152
|
+
it('should grant tokens up to allocation', () => {
|
|
153
|
+
const coordinator = new TokenBudgetCoordinator('sonnet')
|
|
154
|
+
const stateAlloc = coordinator.getAllocationFor('state')
|
|
155
|
+
|
|
156
|
+
const granted = coordinator.request('state', 500)
|
|
157
|
+
expect(granted).toBe(500)
|
|
158
|
+
|
|
159
|
+
const usage = coordinator.getUsage('state')
|
|
160
|
+
expect(usage.used).toBe(500)
|
|
161
|
+
expect(usage.remaining).toBe(stateAlloc - 500)
|
|
162
|
+
})
|
|
163
|
+
|
|
164
|
+
it('should cap grants at available budget', () => {
|
|
165
|
+
const coordinator = new TokenBudgetCoordinator('sonnet')
|
|
166
|
+
const stateAlloc = coordinator.getAllocationFor('state')
|
|
167
|
+
|
|
168
|
+
// Request more than allocated
|
|
169
|
+
const granted = coordinator.request('state', stateAlloc + 1000)
|
|
170
|
+
expect(granted).toBe(stateAlloc)
|
|
171
|
+
})
|
|
172
|
+
|
|
173
|
+
it('should track cumulative usage', () => {
|
|
174
|
+
const coordinator = new TokenBudgetCoordinator('sonnet')
|
|
175
|
+
|
|
176
|
+
coordinator.request('state', 500)
|
|
177
|
+
coordinator.request('state', 300)
|
|
178
|
+
|
|
179
|
+
const usage = coordinator.getUsage('state')
|
|
180
|
+
expect(usage.used).toBe(800)
|
|
181
|
+
})
|
|
182
|
+
|
|
183
|
+
it('should return 0 when budget exhausted', () => {
|
|
184
|
+
const coordinator = new TokenBudgetCoordinator('sonnet')
|
|
185
|
+
const stateAlloc = coordinator.getAllocationFor('state')
|
|
186
|
+
|
|
187
|
+
coordinator.request('state', stateAlloc)
|
|
188
|
+
const granted = coordinator.request('state', 100)
|
|
189
|
+
expect(granted).toBe(0)
|
|
190
|
+
})
|
|
191
|
+
|
|
192
|
+
it('should record usage independently', () => {
|
|
193
|
+
const coordinator = new TokenBudgetCoordinator('sonnet')
|
|
194
|
+
|
|
195
|
+
coordinator.record('files', 5000)
|
|
196
|
+
const usage = coordinator.getUsage('files')
|
|
197
|
+
expect(usage.used).toBe(5000)
|
|
198
|
+
})
|
|
199
|
+
|
|
200
|
+
it('should track total remaining across categories', () => {
|
|
201
|
+
const coordinator = new TokenBudgetCoordinator('sonnet')
|
|
202
|
+
const inputBudget = coordinator.inputBudget
|
|
203
|
+
|
|
204
|
+
coordinator.request('state', 500)
|
|
205
|
+
coordinator.request('injection', 2000)
|
|
206
|
+
coordinator.request('files', 10000)
|
|
207
|
+
|
|
208
|
+
expect(coordinator.totalRemaining).toBe(inputBudget - 500 - 2000 - 10000)
|
|
209
|
+
})
|
|
210
|
+
})
|
|
211
|
+
|
|
212
|
+
// =============================================================================
|
|
213
|
+
// TokenBudgetCoordinator — Overflow Detection
|
|
214
|
+
// =============================================================================
|
|
215
|
+
|
|
216
|
+
describe('TokenBudgetCoordinator overflow detection', () => {
|
|
217
|
+
it('should not be over budget initially', () => {
|
|
218
|
+
const coordinator = new TokenBudgetCoordinator('sonnet')
|
|
219
|
+
expect(coordinator.isOverBudget).toBe(false)
|
|
220
|
+
})
|
|
221
|
+
|
|
222
|
+
it('should detect overflow when usage exceeds input budget', () => {
|
|
223
|
+
const coordinator = new TokenBudgetCoordinator('sonnet')
|
|
224
|
+
const inputBudget = coordinator.inputBudget
|
|
225
|
+
|
|
226
|
+
// Force overflow via record (bypasses allocation cap)
|
|
227
|
+
coordinator.record('files', inputBudget + 1)
|
|
228
|
+
expect(coordinator.isOverBudget).toBe(true)
|
|
229
|
+
})
|
|
230
|
+
|
|
231
|
+
it('should prevent overflow via request mechanism', () => {
|
|
232
|
+
const coordinator = new TokenBudgetCoordinator('sonnet')
|
|
233
|
+
|
|
234
|
+
// Exhaust state budget
|
|
235
|
+
const stateAlloc = coordinator.getAllocationFor('state')
|
|
236
|
+
const granted = coordinator.request('state', stateAlloc + 50000)
|
|
237
|
+
|
|
238
|
+
// Should only get what was allocated
|
|
239
|
+
expect(granted).toBe(stateAlloc)
|
|
240
|
+
expect(coordinator.isOverBudget).toBe(false)
|
|
241
|
+
})
|
|
242
|
+
})
|
|
243
|
+
|
|
244
|
+
// =============================================================================
|
|
245
|
+
// budgetsFromCoordinator integration
|
|
246
|
+
// =============================================================================
|
|
247
|
+
|
|
248
|
+
describe('budgetsFromCoordinator', () => {
|
|
249
|
+
it('should create injection budgets from coordinator', () => {
|
|
250
|
+
const coordinator = new TokenBudgetCoordinator('sonnet')
|
|
251
|
+
const budgets = budgetsFromCoordinator(coordinator)
|
|
252
|
+
|
|
253
|
+
expect(budgets.totalPrompt).toBe(coordinator.getAllocationFor('injection'))
|
|
254
|
+
// Per-section budgets remain at defaults
|
|
255
|
+
expect(budgets.autoContext).toBe(500)
|
|
256
|
+
expect(budgets.agentContent).toBe(400)
|
|
257
|
+
expect(budgets.skillContent).toBe(500)
|
|
258
|
+
expect(budgets.stateData).toBe(1000)
|
|
259
|
+
expect(budgets.memories).toBe(600)
|
|
260
|
+
})
|
|
261
|
+
|
|
262
|
+
it('should give larger injection budget for Gemini models', () => {
|
|
263
|
+
const claudeBudgets = budgetsFromCoordinator(new TokenBudgetCoordinator('sonnet'))
|
|
264
|
+
const geminiBudgets = budgetsFromCoordinator(new TokenBudgetCoordinator('2.5-pro'))
|
|
265
|
+
|
|
266
|
+
expect(geminiBudgets.totalPrompt).toBeGreaterThan(claudeBudgets.totalPrompt)
|
|
267
|
+
})
|
|
268
|
+
})
|
|
269
|
+
|
|
270
|
+
// =============================================================================
|
|
271
|
+
// Different models get different allocations
|
|
272
|
+
// =============================================================================
|
|
273
|
+
|
|
274
|
+
describe('model-specific allocations', () => {
|
|
275
|
+
it('should give Gemini 5x the file budget of Claude', () => {
|
|
276
|
+
const claude = new TokenBudgetCoordinator('sonnet')
|
|
277
|
+
const gemini = new TokenBudgetCoordinator('2.5-pro')
|
|
278
|
+
|
|
279
|
+
const claudeFiles = claude.getAllocationFor('files')
|
|
280
|
+
const geminiFiles = gemini.getAllocationFor('files')
|
|
281
|
+
|
|
282
|
+
// Gemini has 1M context vs Claude 200K = 5x
|
|
283
|
+
expect(geminiFiles / claudeFiles).toBeCloseTo(5, 0)
|
|
284
|
+
})
|
|
285
|
+
|
|
286
|
+
it('should maintain 65/35 split across all models', () => {
|
|
287
|
+
for (const model of ['opus', 'sonnet', 'haiku', '2.5-pro', '2.5-flash']) {
|
|
288
|
+
const coordinator = new TokenBudgetCoordinator(model)
|
|
289
|
+
const allocation = coordinator.getAllocation()
|
|
290
|
+
const ratio = allocation.inputBudget / allocation.contextWindow
|
|
291
|
+
expect(ratio).toBeCloseTo(INPUT_RATIO, 2)
|
|
292
|
+
}
|
|
293
|
+
})
|
|
294
|
+
})
|
|
@@ -0,0 +1,277 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Analysis Storage Tests (PRJ-263)
|
|
3
|
+
*
|
|
4
|
+
* Tests for sealable analysis lifecycle:
|
|
5
|
+
* - Schema validation (draft/verified/sealed)
|
|
6
|
+
* - Signature computation and verification
|
|
7
|
+
* - Staleness detection
|
|
8
|
+
* - Draft preservation on re-sync
|
|
9
|
+
* - Backward compatibility
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import { describe, expect, it } from 'bun:test'
|
|
13
|
+
import {
|
|
14
|
+
AnalysisItemSchema,
|
|
15
|
+
AnalysisStatusSchema,
|
|
16
|
+
parseAnalysis,
|
|
17
|
+
safeParseAnalysis,
|
|
18
|
+
} from '../../schemas/analysis'
|
|
19
|
+
|
|
20
|
+
// =============================================================================
|
|
21
|
+
// Schema Validation
|
|
22
|
+
// =============================================================================
|
|
23
|
+
|
|
24
|
+
describe('AnalysisStatusSchema', () => {
|
|
25
|
+
it('should accept valid statuses', () => {
|
|
26
|
+
expect(AnalysisStatusSchema.parse('draft')).toBe('draft')
|
|
27
|
+
expect(AnalysisStatusSchema.parse('verified')).toBe('verified')
|
|
28
|
+
expect(AnalysisStatusSchema.parse('sealed')).toBe('sealed')
|
|
29
|
+
})
|
|
30
|
+
|
|
31
|
+
it('should reject invalid statuses', () => {
|
|
32
|
+
expect(() => AnalysisStatusSchema.parse('pending')).toThrow()
|
|
33
|
+
expect(() => AnalysisStatusSchema.parse('active')).toThrow()
|
|
34
|
+
expect(() => AnalysisStatusSchema.parse('')).toThrow()
|
|
35
|
+
})
|
|
36
|
+
})
|
|
37
|
+
|
|
38
|
+
describe('AnalysisItemSchema', () => {
|
|
39
|
+
const validDraft = {
|
|
40
|
+
projectId: 'test-project-123',
|
|
41
|
+
languages: ['TypeScript'],
|
|
42
|
+
frameworks: ['Hono'],
|
|
43
|
+
configFiles: ['tsconfig.json'],
|
|
44
|
+
fileCount: 295,
|
|
45
|
+
patterns: [{ name: 'Service pattern', description: 'Classes with dependency injection' }],
|
|
46
|
+
antiPatterns: [],
|
|
47
|
+
analyzedAt: '2026-02-08T20:00:00.000Z',
|
|
48
|
+
status: 'draft' as const,
|
|
49
|
+
commitHash: 'abc1234',
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
it('should parse a valid draft analysis', () => {
|
|
53
|
+
const result = AnalysisItemSchema.parse(validDraft)
|
|
54
|
+
expect(result.status).toBe('draft')
|
|
55
|
+
expect(result.commitHash).toBe('abc1234')
|
|
56
|
+
expect(result.projectId).toBe('test-project-123')
|
|
57
|
+
})
|
|
58
|
+
|
|
59
|
+
it('should default status to draft when not provided', () => {
|
|
60
|
+
const { status, ...withoutStatus } = validDraft
|
|
61
|
+
const result = AnalysisItemSchema.parse(withoutStatus)
|
|
62
|
+
expect(result.status).toBe('draft')
|
|
63
|
+
})
|
|
64
|
+
|
|
65
|
+
it('should parse a sealed analysis with signature', () => {
|
|
66
|
+
const sealed = {
|
|
67
|
+
...validDraft,
|
|
68
|
+
status: 'sealed' as const,
|
|
69
|
+
signature: 'sha256-abc123def456',
|
|
70
|
+
sealedAt: '2026-02-08T21:00:00.000Z',
|
|
71
|
+
}
|
|
72
|
+
const result = AnalysisItemSchema.parse(sealed)
|
|
73
|
+
expect(result.status).toBe('sealed')
|
|
74
|
+
expect(result.signature).toBe('sha256-abc123def456')
|
|
75
|
+
expect(result.sealedAt).toBe('2026-02-08T21:00:00.000Z')
|
|
76
|
+
})
|
|
77
|
+
|
|
78
|
+
it('should accept optional fields as undefined', () => {
|
|
79
|
+
const minimal = {
|
|
80
|
+
projectId: 'test',
|
|
81
|
+
languages: [],
|
|
82
|
+
frameworks: [],
|
|
83
|
+
configFiles: [],
|
|
84
|
+
fileCount: 0,
|
|
85
|
+
patterns: [],
|
|
86
|
+
antiPatterns: [],
|
|
87
|
+
analyzedAt: '2026-02-08T20:00:00.000Z',
|
|
88
|
+
}
|
|
89
|
+
const result = AnalysisItemSchema.parse(minimal)
|
|
90
|
+
expect(result.status).toBe('draft')
|
|
91
|
+
expect(result.commitHash).toBeUndefined()
|
|
92
|
+
expect(result.signature).toBeUndefined()
|
|
93
|
+
expect(result.sealedAt).toBeUndefined()
|
|
94
|
+
expect(result.modelMetadata).toBeUndefined()
|
|
95
|
+
})
|
|
96
|
+
|
|
97
|
+
it('should reject missing required fields', () => {
|
|
98
|
+
expect(() => AnalysisItemSchema.parse({})).toThrow()
|
|
99
|
+
expect(() => AnalysisItemSchema.parse({ projectId: 'test' })).toThrow()
|
|
100
|
+
})
|
|
101
|
+
})
|
|
102
|
+
|
|
103
|
+
describe('parseAnalysis / safeParseAnalysis', () => {
|
|
104
|
+
it('should parse valid data', () => {
|
|
105
|
+
const data = {
|
|
106
|
+
projectId: 'test',
|
|
107
|
+
languages: ['TypeScript'],
|
|
108
|
+
frameworks: [],
|
|
109
|
+
configFiles: [],
|
|
110
|
+
fileCount: 10,
|
|
111
|
+
patterns: [],
|
|
112
|
+
antiPatterns: [],
|
|
113
|
+
analyzedAt: '2026-02-08T20:00:00.000Z',
|
|
114
|
+
}
|
|
115
|
+
const result = parseAnalysis(data)
|
|
116
|
+
expect(result.projectId).toBe('test')
|
|
117
|
+
expect(result.status).toBe('draft')
|
|
118
|
+
})
|
|
119
|
+
|
|
120
|
+
it('should return success for safeParseAnalysis with valid data', () => {
|
|
121
|
+
const data = {
|
|
122
|
+
projectId: 'test',
|
|
123
|
+
languages: [],
|
|
124
|
+
frameworks: [],
|
|
125
|
+
configFiles: [],
|
|
126
|
+
fileCount: 0,
|
|
127
|
+
patterns: [],
|
|
128
|
+
antiPatterns: [],
|
|
129
|
+
analyzedAt: '2026-02-08T20:00:00.000Z',
|
|
130
|
+
}
|
|
131
|
+
const result = safeParseAnalysis(data)
|
|
132
|
+
expect(result.success).toBe(true)
|
|
133
|
+
})
|
|
134
|
+
|
|
135
|
+
it('should return failure for safeParseAnalysis with invalid data', () => {
|
|
136
|
+
const result = safeParseAnalysis({ invalid: true })
|
|
137
|
+
expect(result.success).toBe(false)
|
|
138
|
+
})
|
|
139
|
+
})
|
|
140
|
+
|
|
141
|
+
// =============================================================================
|
|
142
|
+
// Staleness Detection (pure function tests)
|
|
143
|
+
// =============================================================================
|
|
144
|
+
|
|
145
|
+
describe('staleness detection', () => {
|
|
146
|
+
// Test checkStaleness method from AnalysisStorage
|
|
147
|
+
it('should detect stale analysis when commits differ', () => {
|
|
148
|
+
const { analysisStorage } = require('../../storage/analysis-storage')
|
|
149
|
+
const result = analysisStorage.checkStaleness('abc1234', 'def5678')
|
|
150
|
+
expect(result.isStale).toBe(true)
|
|
151
|
+
expect(result.sealedCommit).toBe('abc1234')
|
|
152
|
+
expect(result.currentCommit).toBe('def5678')
|
|
153
|
+
})
|
|
154
|
+
|
|
155
|
+
it('should detect fresh analysis when commits match', () => {
|
|
156
|
+
const { analysisStorage } = require('../../storage/analysis-storage')
|
|
157
|
+
const result = analysisStorage.checkStaleness('abc1234', 'abc1234')
|
|
158
|
+
expect(result.isStale).toBe(false)
|
|
159
|
+
})
|
|
160
|
+
|
|
161
|
+
it('should handle null sealed commit', () => {
|
|
162
|
+
const { analysisStorage } = require('../../storage/analysis-storage')
|
|
163
|
+
const result = analysisStorage.checkStaleness(null, 'abc1234')
|
|
164
|
+
expect(result.isStale).toBe(false)
|
|
165
|
+
expect(result.message).toContain('No sealed analysis')
|
|
166
|
+
})
|
|
167
|
+
|
|
168
|
+
it('should handle null current commit', () => {
|
|
169
|
+
const { analysisStorage } = require('../../storage/analysis-storage')
|
|
170
|
+
const result = analysisStorage.checkStaleness('abc1234', null)
|
|
171
|
+
expect(result.isStale).toBe(true)
|
|
172
|
+
expect(result.message).toContain('Cannot determine')
|
|
173
|
+
})
|
|
174
|
+
})
|
|
175
|
+
|
|
176
|
+
// =============================================================================
|
|
177
|
+
// Signature Computation (determinism test)
|
|
178
|
+
// =============================================================================
|
|
179
|
+
|
|
180
|
+
describe('signature computation', () => {
|
|
181
|
+
it('should produce deterministic signatures for same input', () => {
|
|
182
|
+
const { createHash } = require('node:crypto')
|
|
183
|
+
|
|
184
|
+
const analysis = {
|
|
185
|
+
projectId: 'test',
|
|
186
|
+
languages: ['TypeScript'],
|
|
187
|
+
frameworks: ['Hono'],
|
|
188
|
+
configFiles: [],
|
|
189
|
+
fileCount: 100,
|
|
190
|
+
patterns: [],
|
|
191
|
+
antiPatterns: [],
|
|
192
|
+
analyzedAt: '2026-02-08T20:00:00.000Z',
|
|
193
|
+
commitHash: 'abc1234',
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
const canonical = {
|
|
197
|
+
projectId: analysis.projectId,
|
|
198
|
+
languages: analysis.languages,
|
|
199
|
+
frameworks: analysis.frameworks,
|
|
200
|
+
packageManager: undefined,
|
|
201
|
+
sourceDir: undefined,
|
|
202
|
+
testDir: undefined,
|
|
203
|
+
configFiles: analysis.configFiles,
|
|
204
|
+
fileCount: analysis.fileCount,
|
|
205
|
+
patterns: analysis.patterns,
|
|
206
|
+
antiPatterns: analysis.antiPatterns,
|
|
207
|
+
analyzedAt: analysis.analyzedAt,
|
|
208
|
+
commitHash: analysis.commitHash,
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
const sig1 = createHash('sha256').update(JSON.stringify(canonical)).digest('hex')
|
|
212
|
+
const sig2 = createHash('sha256').update(JSON.stringify(canonical)).digest('hex')
|
|
213
|
+
|
|
214
|
+
expect(sig1).toBe(sig2)
|
|
215
|
+
expect(sig1).toHaveLength(64) // SHA-256 hex = 64 chars
|
|
216
|
+
})
|
|
217
|
+
|
|
218
|
+
it('should produce different signatures for different inputs', () => {
|
|
219
|
+
const { createHash } = require('node:crypto')
|
|
220
|
+
|
|
221
|
+
const data1 = JSON.stringify({ projectId: 'a', fileCount: 1 })
|
|
222
|
+
const data2 = JSON.stringify({ projectId: 'b', fileCount: 2 })
|
|
223
|
+
|
|
224
|
+
const sig1 = createHash('sha256').update(data1).digest('hex')
|
|
225
|
+
const sig2 = createHash('sha256').update(data2).digest('hex')
|
|
226
|
+
|
|
227
|
+
expect(sig1).not.toBe(sig2)
|
|
228
|
+
})
|
|
229
|
+
})
|
|
230
|
+
|
|
231
|
+
// =============================================================================
|
|
232
|
+
// Backward Compatibility
|
|
233
|
+
// =============================================================================
|
|
234
|
+
|
|
235
|
+
describe('backward compatibility', () => {
|
|
236
|
+
it('should parse old analysis.json without seal fields', () => {
|
|
237
|
+
const oldFormat = {
|
|
238
|
+
projectId: 'old-project',
|
|
239
|
+
languages: ['JavaScript'],
|
|
240
|
+
frameworks: ['Express'],
|
|
241
|
+
configFiles: ['package.json'],
|
|
242
|
+
fileCount: 50,
|
|
243
|
+
patterns: [],
|
|
244
|
+
antiPatterns: [],
|
|
245
|
+
analyzedAt: '2025-12-01T00:00:00.000Z',
|
|
246
|
+
// No status, commitHash, signature, sealedAt
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
const result = AnalysisItemSchema.parse(oldFormat)
|
|
250
|
+
expect(result.status).toBe('draft') // Default
|
|
251
|
+
expect(result.commitHash).toBeUndefined()
|
|
252
|
+
expect(result.signature).toBeUndefined()
|
|
253
|
+
expect(result.sealedAt).toBeUndefined()
|
|
254
|
+
})
|
|
255
|
+
|
|
256
|
+
it('should parse old analysis with modelMetadata but no seal fields', () => {
|
|
257
|
+
const oldWithModel = {
|
|
258
|
+
projectId: 'old-project',
|
|
259
|
+
languages: ['TypeScript'],
|
|
260
|
+
frameworks: [],
|
|
261
|
+
configFiles: [],
|
|
262
|
+
fileCount: 10,
|
|
263
|
+
patterns: [],
|
|
264
|
+
antiPatterns: [],
|
|
265
|
+
analyzedAt: '2026-01-15T00:00:00.000Z',
|
|
266
|
+
modelMetadata: {
|
|
267
|
+
provider: 'claude',
|
|
268
|
+
model: 'sonnet',
|
|
269
|
+
recordedAt: '2026-01-15T00:00:00.000Z',
|
|
270
|
+
},
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
const result = AnalysisItemSchema.parse(oldWithModel)
|
|
274
|
+
expect(result.status).toBe('draft')
|
|
275
|
+
expect(result.modelMetadata?.provider).toBe('claude')
|
|
276
|
+
})
|
|
277
|
+
})
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Anti-Hallucination Block Generator
|
|
3
|
+
*
|
|
4
|
+
* Generates constraint blocks that ground the LLM in project reality.
|
|
5
|
+
* Based on research of 25+ system prompts (Claude Code, Gemini, ChatGPT),
|
|
6
|
+
* the following techniques reduce hallucinations 25-40%:
|
|
7
|
+
*
|
|
8
|
+
* 1. Explicit availability — what IS in the project
|
|
9
|
+
* 2. Explicit unavailability — what is NOT in the project
|
|
10
|
+
* 3. Visibility grounding — only use paths shown in context
|
|
11
|
+
* 4. Library verification — check package.json before assuming
|
|
12
|
+
* 5. Scope clarification — limit to project directory
|
|
13
|
+
*
|
|
14
|
+
* This block should be injected BEFORE task context (position 5 in prompt order)
|
|
15
|
+
* so the LLM has constraints loaded before reading code.
|
|
16
|
+
*
|
|
17
|
+
* @module agentic/anti-hallucination
|
|
18
|
+
* @see PRJ-301
|
|
19
|
+
*/
|
|
20
|
+
|
|
21
|
+
import { z } from 'zod'
|
|
22
|
+
|
|
23
|
+
// =============================================================================
|
|
24
|
+
// Schema
|
|
25
|
+
// =============================================================================
|
|
26
|
+
|
|
27
|
+
export const ProjectGroundTruthSchema = z.object({
|
|
28
|
+
/** Project root path */
|
|
29
|
+
projectPath: z.string(),
|
|
30
|
+
/** Programming language (e.g., 'TypeScript', 'JavaScript', 'Python') */
|
|
31
|
+
language: z.string().optional(),
|
|
32
|
+
/** Primary framework (e.g., 'Hono', 'Next.js', 'Express') */
|
|
33
|
+
framework: z.string().optional(),
|
|
34
|
+
/** Technology stack items (e.g., ['Hono', 'Zod', 'Vitest']) */
|
|
35
|
+
techStack: z.array(z.string()).default([]),
|
|
36
|
+
/** Domain flags from sealed analysis */
|
|
37
|
+
domains: z
|
|
38
|
+
.object({
|
|
39
|
+
hasFrontend: z.boolean().default(false),
|
|
40
|
+
hasBackend: z.boolean().default(false),
|
|
41
|
+
hasDatabase: z.boolean().default(false),
|
|
42
|
+
hasTesting: z.boolean().default(false),
|
|
43
|
+
hasDocker: z.boolean().default(false),
|
|
44
|
+
})
|
|
45
|
+
.optional(),
|
|
46
|
+
/** Total files in project */
|
|
47
|
+
fileCount: z.number().optional(),
|
|
48
|
+
/** Available agent names (e.g., ['backend', 'testing']) */
|
|
49
|
+
availableAgents: z.array(z.string()).default([]),
|
|
50
|
+
})
|
|
51
|
+
|
|
52
|
+
export type ProjectGroundTruth = z.input<typeof ProjectGroundTruthSchema>
|
|
53
|
+
|
|
54
|
+
// =============================================================================
|
|
55
|
+
// Domain Mapping
|
|
56
|
+
// =============================================================================
|
|
57
|
+
|
|
58
|
+
/** Map domain flags to human-readable technology categories */
|
|
59
|
+
const DOMAIN_LABELS: Record<string, string> = {
|
|
60
|
+
hasFrontend: 'Frontend (UI/components)',
|
|
61
|
+
hasBackend: 'Backend (APIs/servers)',
|
|
62
|
+
hasDatabase: 'Database (SQL/ORM)',
|
|
63
|
+
hasTesting: 'Testing (unit/integration)',
|
|
64
|
+
hasDocker: 'Docker/containers',
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// =============================================================================
|
|
68
|
+
// Generator
|
|
69
|
+
// =============================================================================
|
|
70
|
+
|
|
71
|
+
/**
|
|
72
|
+
* Build the anti-hallucination constraints block.
|
|
73
|
+
*
|
|
74
|
+
* Returns a markdown section with explicit availability statements,
|
|
75
|
+
* visibility grounding rules, and library verification directives.
|
|
76
|
+
*/
|
|
77
|
+
export function buildAntiHallucinationBlock(truth: ProjectGroundTruth): string {
|
|
78
|
+
const parts: string[] = []
|
|
79
|
+
|
|
80
|
+
parts.push('## CONSTRAINTS (Read Before Acting)\n')
|
|
81
|
+
|
|
82
|
+
// 1. Explicit availability
|
|
83
|
+
const available: string[] = []
|
|
84
|
+
if (truth.language) available.push(truth.language)
|
|
85
|
+
if (truth.framework) available.push(truth.framework)
|
|
86
|
+
const techStack = truth.techStack ?? []
|
|
87
|
+
available.push(...techStack.filter((t) => t !== truth.framework))
|
|
88
|
+
|
|
89
|
+
if (available.length > 0) {
|
|
90
|
+
parts.push(`AVAILABLE in this project: ${available.join(', ')}`)
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
// 2. Explicit unavailability from domain flags
|
|
94
|
+
if (truth.domains) {
|
|
95
|
+
const absent = Object.entries(truth.domains)
|
|
96
|
+
.filter(([, hasIt]) => !hasIt)
|
|
97
|
+
.map(([key]) => DOMAIN_LABELS[key])
|
|
98
|
+
.filter(Boolean)
|
|
99
|
+
|
|
100
|
+
if (absent.length > 0) {
|
|
101
|
+
parts.push(`NOT PRESENT: ${absent.join(', ')}`)
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
// 3. Available agents
|
|
106
|
+
const availableAgents = truth.availableAgents ?? []
|
|
107
|
+
if (availableAgents.length > 0) {
|
|
108
|
+
parts.push(`AGENTS: ${availableAgents.join(', ')}`)
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
// 4. Scope and grounding rules
|
|
112
|
+
parts.push('')
|
|
113
|
+
parts.push(`SCOPE: Only files in \`${truth.projectPath}\` are accessible.`)
|
|
114
|
+
parts.push('RULE: Use ONLY file paths explicitly shown in context. Do NOT infer or guess paths.')
|
|
115
|
+
parts.push('RULE: NEVER assume a library is available. Check package.json/imports first.')
|
|
116
|
+
parts.push('RULE: If previous context contradicts this section, trust this section.')
|
|
117
|
+
parts.push('RULE: Read files BEFORE modifying. Never assume code structure.')
|
|
118
|
+
|
|
119
|
+
if (truth.fileCount) {
|
|
120
|
+
parts.push(`\nContext: ${truth.fileCount} files in project.`)
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
return parts.join('\n')
|
|
124
|
+
}
|