prjct-cli 1.7.1 → 1.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,64 @@
1
1
  # Changelog
2
2
 
3
+ ## [1.7.3] - 2026-02-07
4
+
5
+ ### Bug Fixes
6
+
7
+ - add Zod validation and token budgets for prompt injection (PRJ-282) (#142)
8
+
9
+
10
+ ## [1.7.3] - 2026-02-07
11
+
12
+ ### Bug Fixes
13
+ - **Validate auto-injected state in prompt builder (PRJ-282)**: Added `safeInject()` validation utility, token-aware truncation via `InjectionBudgetTracker`, and domain-based skill filtering to prevent oversized or irrelevant content in LLM prompts. Replaced hardcoded character limits with configurable token budgets.
14
+
15
+ ### Implementation Details
16
+ - Created `core/agentic/injection-validator.ts` with `safeInject()`, `safeInjectString()`, `truncateToTokenBudget()`, `estimateTokens()`, `filterSkillsByDomains()`, and `InjectionBudgetTracker` class
17
+ - Wired validation into `prompt-builder.ts`: auto-context truncation, agent/skill token budgets, cumulative state budget tracking
18
+ - Skills filtered by detected task domains before injection to reduce token waste
19
+ - 33 new unit tests covering all validation, filtering, and truncation paths
20
+
21
+ ### Test Plan
22
+
23
+ #### For QA
24
+ 1. Run `bun test` — all 526 tests pass (33 new)
25
+ 2. Verify `safeInject()` returns fallback on corrupt data
26
+ 3. Verify `filterSkillsByDomains()` excludes irrelevant skills
27
+ 4. Verify `InjectionBudgetTracker` enforces cumulative limits
28
+
29
+ #### For Users
30
+ - No user-facing changes — validation is automatic
31
+ - Breaking changes: None
32
+
33
+ ## [1.7.2] - 2026-02-07
34
+
35
+ ### Bug Fixes
36
+
37
+ - add missing state machine transitions and dead-end states (PRJ-280) (#141)
38
+
39
+
40
+ ## [1.7.2] - 2026-02-07
41
+
42
+ ### Bug Fix
43
+ - **Fix state machine completeness: missing transitions and dead-end states (PRJ-280)**: Added missing transitions (`completed → pause`, `paused → ship`, `completed → reopen`), subtask states (`skipped`, `blocked` with reason tracking), migrated `previousTask` to `pausedTasks[]` array with max limit (5) and staleness detection (30 days), and enforced all transitions through the state machine at the storage level.
44
+
45
+ ### Implementation Details
46
+ Added `reopen` command to `WorkflowCommand` type. Updated `getCurrentState()` to detect paused state from `pausedTasks[]` array and legacy `previousTask`. `failSubtask()` now advances to the next subtask instead of halting. New `skipSubtask(reason)` and `blockSubtask(blocker)` methods mark subtasks and advance. `pauseTask()` pushes onto a `pausedTasks[]` array (max 5), `resumeTask()` pops from array or by ID. `getPausedTasksFromState()` handles backward compat by migrating legacy `previousTask` format. All storage mutation methods (`startTask`, `completeTask`, `pauseTask`, `resumeTask`) validate transitions through the state machine before executing.
47
+
48
+ ### Test Plan
49
+
50
+ #### For QA
51
+ 1. Verify `completed → pause`, `paused → ship`, and `completed → reopen` transitions work
52
+ 2. Start a task with subtasks, call `failSubtask()` — verify it records reason AND advances to next subtask
53
+ 3. Call `skipSubtask(reason)` and `blockSubtask(blocker)` — verify they record reasons and advance
54
+ 4. Pause 3+ tasks — verify `pausedTasks[]` array stores all, respects max limit of 5
55
+ 5. State.json with old `previousTask` format — verify auto-migration into array
56
+ 6. Attempt invalid transition (e.g., `done` from `idle`) — verify error thrown at storage level
57
+
58
+ #### For Users
59
+ **What changed:** Workflow supports reopening completed tasks, shipping paused tasks directly, and multiple paused tasks. Subtask failures auto-advance instead of halting.
60
+ **Breaking changes:** `previousTask` deprecated in favor of `pausedTasks[]`. Backward compat maintained via auto-migration.
61
+
3
62
  ## [1.7.1] - 2026-02-07
4
63
 
5
64
  ### Bug Fixes
@@ -0,0 +1,255 @@
1
+ /**
2
+ * Injection Validator Tests
3
+ * Tests for safeInject, truncation, skill filtering, and budget tracking.
4
+ */
5
+
6
+ import { describe, expect, it } from 'bun:test'
7
+ import { z } from 'zod'
8
+ import {
9
+ DEFAULT_BUDGETS,
10
+ estimateTokens,
11
+ filterSkillsByDomains,
12
+ InjectionBudgetTracker,
13
+ safeInject,
14
+ safeInjectString,
15
+ truncateToTokenBudget,
16
+ } from '../../agentic/injection-validator'
17
+
18
+ // =============================================================================
19
+ // safeInject
20
+ // =============================================================================
21
+
22
+ describe('safeInject', () => {
23
+ const schema = z.object({ name: z.string(), value: z.number() })
24
+ const fallback = { name: 'unknown', value: 0 }
25
+
26
+ it('returns validated data on valid input', () => {
27
+ const data = { name: 'test', value: 42 }
28
+ expect(safeInject(data, schema, fallback)).toEqual(data)
29
+ })
30
+
31
+ it('returns fallback on invalid input', () => {
32
+ const data = { name: 123, value: 'bad' }
33
+ expect(safeInject(data, schema, fallback)).toEqual(fallback)
34
+ })
35
+
36
+ it('returns fallback on null input', () => {
37
+ expect(safeInject(null, schema, fallback)).toEqual(fallback)
38
+ })
39
+
40
+ it('returns fallback on undefined input', () => {
41
+ expect(safeInject(undefined, schema, fallback)).toEqual(fallback)
42
+ })
43
+
44
+ it('strips extra fields via Zod', () => {
45
+ const data = { name: 'test', value: 42, extra: 'ignored' }
46
+ const result = safeInject(data, schema, fallback)
47
+ expect(result.name).toBe('test')
48
+ expect(result.value).toBe(42)
49
+ })
50
+ })
51
+
52
+ // =============================================================================
53
+ // safeInjectString
54
+ // =============================================================================
55
+
56
+ describe('safeInjectString', () => {
57
+ const schema = z.object({ count: z.number() })
58
+ const formatter = (d: { count: number }) => `Items: ${d.count}`
59
+
60
+ it('returns formatted string on valid input', () => {
61
+ expect(safeInjectString({ count: 5 }, schema, formatter, 'N/A')).toBe('Items: 5')
62
+ })
63
+
64
+ it('returns fallback string on invalid input', () => {
65
+ expect(safeInjectString({ count: 'bad' }, schema, formatter, 'N/A')).toBe('N/A')
66
+ })
67
+
68
+ it('returns fallback string on null', () => {
69
+ expect(safeInjectString(null, schema, formatter, 'no data')).toBe('no data')
70
+ })
71
+ })
72
+
73
+ // =============================================================================
74
+ // truncateToTokenBudget
75
+ // =============================================================================
76
+
77
+ describe('truncateToTokenBudget', () => {
78
+ it('returns text unchanged if within budget', () => {
79
+ const text = 'short text'
80
+ expect(truncateToTokenBudget(text, 100)).toBe(text)
81
+ })
82
+
83
+ it('truncates text that exceeds budget', () => {
84
+ const text = 'a'.repeat(500) // 500 chars = ~125 tokens
85
+ const result = truncateToTokenBudget(text, 50) // 50 tokens = 200 chars
86
+ expect(result.length).toBeLessThan(500)
87
+ expect(result).toContain('truncated')
88
+ expect(result).toContain('~50 tokens')
89
+ })
90
+
91
+ it('truncates to exact char limit', () => {
92
+ const text = 'a'.repeat(100)
93
+ const result = truncateToTokenBudget(text, 10) // 10 tokens = 40 chars
94
+ expect(result.startsWith('a'.repeat(40))).toBe(true)
95
+ })
96
+
97
+ it('handles empty string', () => {
98
+ expect(truncateToTokenBudget('', 100)).toBe('')
99
+ })
100
+
101
+ it('handles zero budget', () => {
102
+ const result = truncateToTokenBudget('some text', 0)
103
+ expect(result).toContain('truncated')
104
+ })
105
+ })
106
+
107
+ // =============================================================================
108
+ // estimateTokens
109
+ // =============================================================================
110
+
111
+ describe('estimateTokens', () => {
112
+ it('estimates tokens at ~4 chars per token', () => {
113
+ expect(estimateTokens('a'.repeat(100))).toBe(25)
114
+ })
115
+
116
+ it('rounds up partial tokens', () => {
117
+ expect(estimateTokens('abc')).toBe(1) // 3/4 = 0.75, ceil = 1
118
+ })
119
+
120
+ it('handles empty string', () => {
121
+ expect(estimateTokens('')).toBe(0)
122
+ })
123
+ })
124
+
125
+ // =============================================================================
126
+ // filterSkillsByDomains
127
+ // =============================================================================
128
+
129
+ describe('filterSkillsByDomains', () => {
130
+ const skills = [
131
+ { name: 'react-patterns', content: 'React component patterns and hooks' },
132
+ { name: 'api-design', content: 'RESTful API design and endpoint patterns' },
133
+ { name: 'jest-testing', content: 'Jest test patterns and assertions' },
134
+ { name: 'docker-deploy', content: 'Docker and Kubernetes deployment' },
135
+ { name: 'general-coding', content: 'General coding best practices' },
136
+ ]
137
+
138
+ it('returns all skills when no domains detected', () => {
139
+ expect(filterSkillsByDomains(skills, [])).toEqual(skills)
140
+ })
141
+
142
+ it('returns all skills when skills array is empty', () => {
143
+ expect(filterSkillsByDomains([], ['frontend'])).toEqual([])
144
+ })
145
+
146
+ it('filters to frontend-relevant skills', () => {
147
+ const result = filterSkillsByDomains(skills, ['frontend'])
148
+ expect(result.some((s) => s.name === 'react-patterns')).toBe(true)
149
+ expect(result.some((s) => s.name === 'docker-deploy')).toBe(false)
150
+ })
151
+
152
+ it('filters to backend-relevant skills', () => {
153
+ const result = filterSkillsByDomains(skills, ['backend'])
154
+ expect(result.some((s) => s.name === 'api-design')).toBe(true)
155
+ expect(result.some((s) => s.name === 'react-patterns')).toBe(false)
156
+ })
157
+
158
+ it('filters to testing-relevant skills', () => {
159
+ const result = filterSkillsByDomains(skills, ['testing'])
160
+ expect(result.some((s) => s.name === 'jest-testing')).toBe(true)
161
+ })
162
+
163
+ it('supports multiple domains', () => {
164
+ const result = filterSkillsByDomains(skills, ['frontend', 'testing'])
165
+ expect(result.some((s) => s.name === 'react-patterns')).toBe(true)
166
+ expect(result.some((s) => s.name === 'jest-testing')).toBe(true)
167
+ expect(result.some((s) => s.name === 'docker-deploy')).toBe(false)
168
+ })
169
+
170
+ it('matches domain name itself as keyword', () => {
171
+ const customSkills = [{ name: 'devops-helper', content: 'general devops tools' }]
172
+ const result = filterSkillsByDomains(customSkills, ['devops'])
173
+ expect(result).toHaveLength(1)
174
+ })
175
+
176
+ it('is case insensitive', () => {
177
+ const result = filterSkillsByDomains(skills, ['Frontend'])
178
+ expect(result.some((s) => s.name === 'react-patterns')).toBe(true)
179
+ })
180
+ })
181
+
182
+ // =============================================================================
183
+ // InjectionBudgetTracker
184
+ // =============================================================================
185
+
186
+ describe('InjectionBudgetTracker', () => {
187
+ it('tracks cumulative token usage', () => {
188
+ const tracker = new InjectionBudgetTracker({ totalPrompt: 100 })
189
+ tracker.addSection('a'.repeat(200), 100) // 200 chars = 50 tokens
190
+ expect(tracker.totalUsed).toBe(50)
191
+ expect(tracker.remaining).toBe(50)
192
+ })
193
+
194
+ it('truncates sections to per-section budget', () => {
195
+ const tracker = new InjectionBudgetTracker({ totalPrompt: 1000 })
196
+ const result = tracker.addSection('a'.repeat(500), 50) // budget: 50 tokens = 200 chars
197
+ expect(result.length).toBeLessThan(500)
198
+ })
199
+
200
+ it('returns empty string when total budget exhausted', () => {
201
+ const tracker = new InjectionBudgetTracker({ totalPrompt: 10 })
202
+ tracker.addSection('a'.repeat(100), 50) // uses all 10 tokens of total budget
203
+ const result = tracker.addSection('more content', 50)
204
+ expect(result).toBe('')
205
+ })
206
+
207
+ it('fits content to remaining total budget', () => {
208
+ const tracker = new InjectionBudgetTracker({ totalPrompt: 30 })
209
+ tracker.addSection('a'.repeat(80), 30) // 80 chars = 20 tokens
210
+ // Remaining: 10 tokens
211
+ const result = tracker.addSection('b'.repeat(200), 100) // wants 100 tokens, only 10 left
212
+ expect(result.length).toBeLessThan(200)
213
+ expect(tracker.remaining).toBe(0)
214
+ })
215
+
216
+ it('uses default budgets when none provided', () => {
217
+ const tracker = new InjectionBudgetTracker()
218
+ expect(tracker.config.totalPrompt).toBe(DEFAULT_BUDGETS.totalPrompt)
219
+ expect(tracker.config.autoContext).toBe(DEFAULT_BUDGETS.autoContext)
220
+ })
221
+
222
+ it('allows partial budget overrides', () => {
223
+ const tracker = new InjectionBudgetTracker({ totalPrompt: 5000 })
224
+ expect(tracker.config.totalPrompt).toBe(5000)
225
+ expect(tracker.config.autoContext).toBe(DEFAULT_BUDGETS.autoContext) // unchanged
226
+ })
227
+
228
+ it('remaining never goes negative', () => {
229
+ const tracker = new InjectionBudgetTracker({ totalPrompt: 5 })
230
+ tracker.addSection('a'.repeat(1000), 500)
231
+ expect(tracker.remaining).toBe(0)
232
+ })
233
+ })
234
+
235
+ // =============================================================================
236
+ // DEFAULT_BUDGETS
237
+ // =============================================================================
238
+
239
+ describe('DEFAULT_BUDGETS', () => {
240
+ it('has all required fields', () => {
241
+ expect(DEFAULT_BUDGETS.autoContext).toBeGreaterThan(0)
242
+ expect(DEFAULT_BUDGETS.agentContent).toBeGreaterThan(0)
243
+ expect(DEFAULT_BUDGETS.skillContent).toBeGreaterThan(0)
244
+ expect(DEFAULT_BUDGETS.stateData).toBeGreaterThan(0)
245
+ expect(DEFAULT_BUDGETS.memories).toBeGreaterThan(0)
246
+ expect(DEFAULT_BUDGETS.totalPrompt).toBeGreaterThan(0)
247
+ })
248
+
249
+ it('totalPrompt is larger than individual budgets', () => {
250
+ expect(DEFAULT_BUDGETS.totalPrompt).toBeGreaterThan(DEFAULT_BUDGETS.autoContext)
251
+ expect(DEFAULT_BUDGETS.totalPrompt).toBeGreaterThan(DEFAULT_BUDGETS.agentContent)
252
+ expect(DEFAULT_BUDGETS.totalPrompt).toBeGreaterThan(DEFAULT_BUDGETS.skillContent)
253
+ expect(DEFAULT_BUDGETS.totalPrompt).toBeGreaterThan(DEFAULT_BUDGETS.stateData)
254
+ })
255
+ })
@@ -0,0 +1,216 @@
1
+ /**
2
+ * State Machine Tests
3
+ *
4
+ * Tests for workflow state machine transitions:
5
+ * - All valid transitions work
6
+ * - Invalid transitions are rejected
7
+ * - New transitions: completed→paused, paused→shipped, completed→reopen
8
+ * - getCurrentState detects paused tasks from pausedTasks array
9
+ */
10
+
11
+ import { describe, expect, it } from 'bun:test'
12
+ import type { WorkflowCommand, WorkflowState } from '../../workflow/state-machine'
13
+ import { WorkflowStateMachine } from '../../workflow/state-machine'
14
+
15
+ const sm = new WorkflowStateMachine()
16
+
17
+ // =============================================================================
18
+ // getCurrentState
19
+ // =============================================================================
20
+
21
+ describe('getCurrentState', () => {
22
+ it('returns idle when no task and no paused tasks', () => {
23
+ expect(sm.getCurrentState({ currentTask: null })).toBe('idle')
24
+ expect(sm.getCurrentState({})).toBe('idle')
25
+ })
26
+
27
+ it('returns working for in_progress status', () => {
28
+ expect(sm.getCurrentState({ currentTask: { status: 'in_progress' } })).toBe('working')
29
+ expect(sm.getCurrentState({ currentTask: { status: 'working' } })).toBe('working')
30
+ })
31
+
32
+ it('returns completed for completed/done status', () => {
33
+ expect(sm.getCurrentState({ currentTask: { status: 'completed' } })).toBe('completed')
34
+ expect(sm.getCurrentState({ currentTask: { status: 'done' } })).toBe('completed')
35
+ })
36
+
37
+ it('returns shipped for shipped status', () => {
38
+ expect(sm.getCurrentState({ currentTask: { status: 'shipped' } })).toBe('shipped')
39
+ })
40
+
41
+ it('returns paused when currentTask has paused status', () => {
42
+ expect(sm.getCurrentState({ currentTask: { status: 'paused' } })).toBe('paused')
43
+ })
44
+
45
+ it('returns paused when no currentTask but pausedTasks array has entries', () => {
46
+ expect(sm.getCurrentState({ currentTask: null, pausedTasks: [{ id: '1' }] })).toBe('paused')
47
+ })
48
+
49
+ it('returns paused when no currentTask but legacy previousTask is paused', () => {
50
+ expect(sm.getCurrentState({ currentTask: null, previousTask: { status: 'paused' } })).toBe(
51
+ 'paused'
52
+ )
53
+ })
54
+
55
+ it('returns idle when no currentTask and empty pausedTasks', () => {
56
+ expect(sm.getCurrentState({ currentTask: null, pausedTasks: [] })).toBe('idle')
57
+ })
58
+
59
+ it('returns working for unknown status when task exists', () => {
60
+ expect(sm.getCurrentState({ currentTask: { status: 'active' } })).toBe('working')
61
+ expect(sm.getCurrentState({ currentTask: {} })).toBe('working')
62
+ })
63
+ })
64
+
65
+ // =============================================================================
66
+ // canTransition - valid transitions
67
+ // =============================================================================
68
+
69
+ describe('canTransition - valid', () => {
70
+ const validTransitions: [WorkflowState, WorkflowCommand][] = [
71
+ // idle
72
+ ['idle', 'task'],
73
+ ['idle', 'next'],
74
+ // working
75
+ ['working', 'done'],
76
+ ['working', 'pause'],
77
+ // paused
78
+ ['paused', 'resume'],
79
+ ['paused', 'task'],
80
+ ['paused', 'ship'], // NEW: fast-track ship
81
+ // completed
82
+ ['completed', 'ship'],
83
+ ['completed', 'task'],
84
+ ['completed', 'next'],
85
+ ['completed', 'pause'], // NEW: reopen for review
86
+ ['completed', 'reopen'], // NEW: reopen for rework
87
+ // shipped
88
+ ['shipped', 'task'],
89
+ ['shipped', 'next'],
90
+ ]
91
+
92
+ for (const [state, command] of validTransitions) {
93
+ it(`${state} → ${command} is valid`, () => {
94
+ const result = sm.canTransition(state, command)
95
+ expect(result.valid).toBe(true)
96
+ expect(result.error).toBeUndefined()
97
+ })
98
+ }
99
+ })
100
+
101
+ // =============================================================================
102
+ // canTransition - invalid transitions
103
+ // =============================================================================
104
+
105
+ describe('canTransition - invalid', () => {
106
+ const invalidTransitions: [WorkflowState, WorkflowCommand][] = [
107
+ ['idle', 'done'],
108
+ ['idle', 'pause'],
109
+ ['idle', 'resume'],
110
+ ['idle', 'ship'],
111
+ ['idle', 'reopen'],
112
+ ['working', 'task'],
113
+ ['working', 'ship'],
114
+ ['working', 'resume'],
115
+ ['working', 'next'],
116
+ ['working', 'reopen'],
117
+ ['paused', 'done'],
118
+ ['paused', 'pause'],
119
+ ['paused', 'reopen'],
120
+ ['shipped', 'done'],
121
+ ['shipped', 'pause'],
122
+ ['shipped', 'resume'],
123
+ ['shipped', 'ship'],
124
+ ['shipped', 'reopen'],
125
+ ]
126
+
127
+ for (const [state, command] of invalidTransitions) {
128
+ it(`${state} → ${command} is invalid`, () => {
129
+ const result = sm.canTransition(state, command)
130
+ expect(result.valid).toBe(false)
131
+ expect(result.error).toBeDefined()
132
+ expect(result.suggestion).toBeDefined()
133
+ })
134
+ }
135
+ })
136
+
137
+ // =============================================================================
138
+ // getNextState
139
+ // =============================================================================
140
+
141
+ describe('getNextState', () => {
142
+ it('task → working', () => {
143
+ expect(sm.getNextState('idle', 'task')).toBe('working')
144
+ expect(sm.getNextState('paused', 'task')).toBe('working')
145
+ expect(sm.getNextState('completed', 'task')).toBe('working')
146
+ })
147
+
148
+ it('done → completed', () => {
149
+ expect(sm.getNextState('working', 'done')).toBe('completed')
150
+ })
151
+
152
+ it('pause → paused', () => {
153
+ expect(sm.getNextState('working', 'pause')).toBe('paused')
154
+ expect(sm.getNextState('completed', 'pause')).toBe('paused')
155
+ })
156
+
157
+ it('resume → working', () => {
158
+ expect(sm.getNextState('paused', 'resume')).toBe('working')
159
+ })
160
+
161
+ it('ship → shipped', () => {
162
+ expect(sm.getNextState('completed', 'ship')).toBe('shipped')
163
+ expect(sm.getNextState('paused', 'ship')).toBe('shipped')
164
+ })
165
+
166
+ it('reopen → working', () => {
167
+ expect(sm.getNextState('completed', 'reopen')).toBe('working')
168
+ })
169
+
170
+ it('next preserves current state', () => {
171
+ expect(sm.getNextState('idle', 'next')).toBe('idle')
172
+ expect(sm.getNextState('completed', 'next')).toBe('completed')
173
+ })
174
+ })
175
+
176
+ // =============================================================================
177
+ // getValidCommands
178
+ // =============================================================================
179
+
180
+ describe('getValidCommands', () => {
181
+ it('idle allows task, next', () => {
182
+ expect(sm.getValidCommands('idle')).toEqual(['task', 'next'])
183
+ })
184
+
185
+ it('working allows done, pause', () => {
186
+ expect(sm.getValidCommands('working')).toEqual(['done', 'pause'])
187
+ })
188
+
189
+ it('paused allows resume, task, ship', () => {
190
+ expect(sm.getValidCommands('paused')).toEqual(['resume', 'task', 'ship'])
191
+ })
192
+
193
+ it('completed allows ship, task, next, pause, reopen', () => {
194
+ expect(sm.getValidCommands('completed')).toEqual(['ship', 'task', 'next', 'pause', 'reopen'])
195
+ })
196
+
197
+ it('shipped allows task, next', () => {
198
+ expect(sm.getValidCommands('shipped')).toEqual(['task', 'next'])
199
+ })
200
+ })
201
+
202
+ // =============================================================================
203
+ // formatNextSteps
204
+ // =============================================================================
205
+
206
+ describe('formatNextSteps', () => {
207
+ it('includes reopen in completed state steps', () => {
208
+ const steps = sm.formatNextSteps('completed')
209
+ expect(steps.some((s) => s.includes('reopen'))).toBe(true)
210
+ })
211
+
212
+ it('includes ship in paused state steps', () => {
213
+ const steps = sm.formatNextSteps('paused')
214
+ expect(steps.some((s) => s.includes('ship'))).toBe(true)
215
+ })
216
+ })