prjct-cli 1.7.5 → 1.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/CHANGELOG.md +205 -1
  2. package/bin/prjct.ts +14 -0
  3. package/core/__tests__/agentic/command-context.test.ts +281 -0
  4. package/core/__tests__/agentic/domain-classifier.test.ts +330 -0
  5. package/core/__tests__/agentic/response-validator.test.ts +263 -0
  6. package/core/__tests__/agentic/smart-context.test.ts +3 -3
  7. package/core/__tests__/domain/fibonacci.test.ts +113 -0
  8. package/core/__tests__/infrastructure/performance-tracker.test.ts +328 -0
  9. package/core/__tests__/schemas/model.test.ts +272 -0
  10. package/core/agentic/command-classifier.ts +141 -0
  11. package/core/agentic/command-context.ts +168 -0
  12. package/core/agentic/domain-classifier.ts +525 -0
  13. package/core/agentic/index.ts +1 -0
  14. package/core/agentic/orchestrator-executor.ts +43 -199
  15. package/core/agentic/prompt-builder.ts +50 -55
  16. package/core/agentic/response-validator.ts +98 -0
  17. package/core/agentic/smart-context.ts +60 -144
  18. package/core/commands/command-data.ts +17 -0
  19. package/core/commands/commands.ts +9 -0
  20. package/core/commands/performance.ts +114 -0
  21. package/core/commands/register.ts +6 -0
  22. package/core/commands/workflow.ts +87 -4
  23. package/core/config/command-context.config.json +66 -0
  24. package/core/domain/fibonacci.ts +128 -0
  25. package/core/index.ts +25 -1
  26. package/core/infrastructure/ai-provider.ts +35 -0
  27. package/core/infrastructure/performance-tracker.ts +326 -0
  28. package/core/schemas/analysis.ts +4 -0
  29. package/core/schemas/classification.ts +91 -0
  30. package/core/schemas/command-context.ts +29 -0
  31. package/core/schemas/index.ts +6 -0
  32. package/core/schemas/llm-output.ts +170 -0
  33. package/core/schemas/model.ts +153 -0
  34. package/core/schemas/performance.ts +128 -0
  35. package/core/schemas/state.ts +9 -0
  36. package/core/storage/state-storage.ts +21 -0
  37. package/core/types/config.ts +2 -0
  38. package/core/types/provider.ts +12 -0
  39. package/dist/bin/prjct.mjs +3184 -1945
  40. package/dist/core/infrastructure/command-installer.js +78 -7
  41. package/dist/core/infrastructure/setup.js +78 -7
  42. package/package.json +1 -1
package/CHANGELOG.md CHANGED
@@ -1,12 +1,216 @@
1
1
  # Changelog
2
2
 
3
+ ## [1.9.0] - 2026-02-07
4
+
5
+ ### Features
6
+
7
+ - add structured output schema to all LLM prompts (PRJ-264) (#150)
8
+ - add mandatory model specification to AI provider (PRJ-265) (#149)
9
+
10
+ ### Bug Fixes
11
+
12
+ - replace keyword domain detection with LLM semantic classification (PRJ-299) (#148)
13
+
14
+
15
+ ## [1.10.0] - 2026-02-07
16
+
17
+ ### Features
18
+ - **Add structured output schema to all LLM prompts (PRJ-264)**: LLM prompts now include explicit JSON output schemas. Responses are validated with Zod before use. Invalid responses trigger re-prompt with structured error feedback.
19
+
20
+ ### Implementation Details
21
+ - New `core/schemas/llm-output.ts`: Zod schemas for task classification, agent assignment, and subtask breakdown responses. Schema registry (`OUTPUT_SCHEMAS`) with examples that self-validate. `renderSchemaForPrompt()` serializes schemas as markdown format instructions for prompt injection.
22
+ - New `core/agentic/response-validator.ts`: `validateLLMResponse()` handles JSON parsing (plain and markdown-wrapped `\`\`\`json` fences), Zod validation, and typed results. `buildReprompt()` generates retry messages with specific validation errors.
23
+ - Replaced manual field-by-field validation in `domain-classifier.ts` with `TaskClassificationSchema.safeParse()` — the schema existed (PRJ-299) but was unused.
24
+ - Added output schema injection to `prompt-builder.ts` `build()` method with `getSchemaTypeForCommand()` mapping commands to schemas.
25
+ - 20 new unit tests in `core/__tests__/agentic/response-validator.test.ts`
26
+
27
+ ### Test Plan
28
+
29
+ #### For QA
30
+ 1. Run `bun test core/__tests__/agentic/response-validator.test.ts` — all 20 tests pass
31
+ 2. Run `bun test` — full suite (677 tests) passes with no regressions
32
+ 3. Run `bun run build` — build succeeds cleanly
33
+ 4. Verify `renderSchemaForPrompt('classification')` returns markdown with OUTPUT FORMAT header
34
+ 5. Verify `validateLLMResponse()` handles plain JSON, markdown-wrapped JSON, and rejects non-JSON
35
+ 6. Verify OUTPUT_SCHEMAS registry examples validate against their own schemas
36
+
37
+ #### For Users
38
+ **What changed:** LLM prompts include explicit JSON output schemas. Domain classifier uses Zod validation. Response validator provides structured error handling with re-prompt.
39
+ **How to use:** Automatic — schemas injected into prompts and validation runs transparently.
40
+ **Breaking changes:** None — all changes are additive.
41
+
42
+ ## [1.9.0] - 2026-02-07
43
+
44
+ ### Features
45
+ - **Add mandatory model specification to AI provider (PRJ-265)**: Provider configs now include `defaultModel`, `supportedModels`, and `minCliVersion` fields. Analysis and task metadata can record which model was used, enabling consistency tracking and mismatch warnings.
46
+
47
+ ### Implementation Details
48
+ - New `core/schemas/model.ts`: Zod schemas defining supported models per provider (Claude: opus/sonnet/haiku, Gemini: 2.5-pro/2.5-flash/2.0-flash), default model resolution, semver comparison utilities, minimum CLI version validation, and model mismatch detection
49
+ - Extended `AIProviderConfig` interface in `core/types/provider.ts` with `defaultModel`, `supportedModels`, `minCliVersion` fields
50
+ - All 5 provider configs (Claude, Gemini, Cursor, Windsurf, Antigravity) updated with model specification fields
51
+ - Added `modelMetadata` (optional) to `CurrentTaskSchema` in `core/schemas/state.ts` and `AnalysisSchema` in `core/schemas/analysis.ts`
52
+ - Added `preferredModel` to `ProjectSettings` in `core/types/config.ts`
53
+ - Added `validateCliVersion()` to `core/infrastructure/ai-provider.ts` with version warning integration into `detectProvider()`
54
+ - Added `versionWarning` field to `ProviderDetectionResult`
55
+ - 32 new unit tests in `core/__tests__/schemas/model.test.ts`
56
+
57
+ ### Test Plan
58
+
59
+ #### For QA
60
+ 1. Verify `ClaudeProvider.defaultModel` is `'sonnet'` and `supportedModels` includes `['opus', 'sonnet', 'haiku']`
61
+ 2. Verify `GeminiProvider.defaultModel` is `'2.5-flash'` and `supportedModels` includes `['2.5-pro', '2.5-flash', '2.0-flash']`
62
+ 3. Verify multi-model IDEs (Cursor, Windsurf) have `null` defaultModel and empty supportedModels
63
+ 4. Run `bun test core/__tests__/schemas/model.test.ts` — all 32 tests pass
64
+ 5. Run `bun test` — full suite (657 tests) passes with no regressions
65
+ 6. Run `bun run build` — build succeeds cleanly
66
+
67
+ #### For Users
68
+ **What changed:** Provider configs now include model specification fields. Analysis and task metadata can record which model was used. Version validation warns if CLI is outdated.
69
+ **How to use:** Existing configs work unchanged — model fields have sensible defaults. New `preferredModel` setting available in project settings.
70
+ **Breaking changes:** None — all new fields are optional or have defaults.
71
+
72
+ ## [1.8.1] - 2026-02-07
73
+
74
+ ### Bug Fixes
75
+ - **Replace keyword domain detection with LLM semantic classification (PRJ-299)**: Eliminated substring false positives in domain classification. "author" no longer matches "auth" → backend, "Build responsive dashboard" correctly routes to frontend.
76
+
77
+ ### Implementation Details
78
+ - New `core/agentic/domain-classifier.ts`: LLM-based classifier with 4-level fallback chain (cache → confirmed history → Claude Haiku API → word-boundary heuristic)
79
+ - New `core/schemas/classification.ts`: Zod schemas for TaskClassification, cache entries, and confirmed patterns
80
+ - Replaced substring `includes()` matching in `smart-context.ts` and `orchestrator-executor.ts` with word-boundary regex (`\b`)
81
+ - Removed ~230 lines of hardcoded keyword lists from both files
82
+ - Classification results cached per (project + description hash) with 1-hour TTL
83
+ - Successful classifications auto-persisted as confirmed patterns via `confirmClassification()`
84
+
85
+ ### Learnings
86
+ - Word-boundary regex (`\b`) correctly rejects "author" matching "auth" because there's no boundary between "auth" and "or" in "author"
87
+ - Using raw `fetch` to Claude API avoids adding `@anthropic-ai/sdk` dependency while keeping vendor-neutral design
88
+ - Centralized classifier in `domain-classifier.ts` consumed by both `smart-context.ts` and `orchestrator-executor.ts` eliminates duplication
89
+
90
+ ### Test Plan
91
+
92
+ #### For QA
93
+ 1. Run `bun test` — all 625 tests should pass
94
+ 2. Verify `detectDomain('Fix the author display on profile page')` returns `frontend` (not `backend`)
95
+ 3. Verify `detectDomain('Build responsive dashboard')` returns `frontend` (not `general`)
96
+ 4. Verify `detectDomain('Fix the auth middleware')` returns `backend` (standalone "auth" still works)
97
+ 5. Verify `classifyWithHeuristic` returns `general` with confidence 0.3 for unrecognizable tasks
98
+ 6. Run `bun run build` — build should succeed
99
+
100
+ #### For Users
101
+ **What changed:** Domain classification uses smarter word-boundary matching, eliminating false positives.
102
+ **How to use:** No user-facing changes — classification happens automatically during `p. task`.
103
+ **Breaking changes:** None for end users.
104
+
105
+ ## [1.8.0] - 2026-02-07
106
+
107
+ ### Features
108
+
109
+ - add Fibonacci estimation with variance tracking (PRJ-295) (#145)
110
+ - add PerformanceTracker for CLI metrics (PRJ-297) (#146)
111
+
112
+ ### Bug Fixes
113
+
114
+ - replace hardcoded command lists with config-driven context (PRJ-298) (#147)
115
+
116
+
117
+ ## [1.8.0] - 2026-02-07
118
+
119
+ ### Features
120
+ - **Fibonacci estimation with variance tracking (PRJ-295)**: Capture Fibonacci point estimates (1,2,3,5,8,13,21) on task start with automatic points-to-time conversion, record actual duration on done, and display estimation variance.
121
+
122
+ ### Implementation Details
123
+ - New `core/domain/fibonacci.ts` module: `FIBONACCI_POINTS`, `pointsToMinutes()`, `pointsToTimeRange()`, `findClosestPoint()`, `suggestFromHistory()`
124
+ - Added `estimatedPoints` and `estimatedMinutes` optional fields to `CurrentTaskSchema` and `SubtaskSchema`
125
+ - Added `updateCurrentTask()` partial update method to `StateStorage`
126
+ - `now()` handler returns `fibonacci` helper object with `storeEstimate(points)` for template use
127
+ - `done()` handler records outcomes via `outcomeRecorder.record()` and displays variance: `est: 5pt (1h 30m) → +50%`
128
+
129
+ ### Test Plan
130
+
131
+ #### For QA
132
+ 1. Start a task — verify `fibonacci` helper is returned with `storeEstimate()`, `pointsToMinutes()`, `pointsToTimeRange()`
133
+ 2. Call `storeEstimate(5)` — verify `estimatedPoints: 5` and `estimatedMinutes: 90` in state.json
134
+ 3. Complete task with `p. done` — verify outcome recorded to `outcomes/outcomes.jsonl`
135
+ 4. Verify variance display shows `est: 5pt (1h 30m) → +X%`
136
+ 5. Run `bun test` — 552 tests pass
137
+
138
+ #### For Users
139
+ **What changed:** Tasks now support Fibonacci point estimation with automatic time conversion and variance tracking on completion.
140
+ **How to use:** Estimation is stored via `storeEstimate(points)` during task start; variance is auto-displayed on `p. done`.
141
+ **Breaking changes:** None — estimation fields are optional.
142
+
143
+ ## [1.7.7] - 2026-02-07
144
+
145
+ ### Bug Fixes
146
+ - **Config-driven command context (PRJ-298)**: Replaced 4 hardcoded command lists in `prompt-builder.ts` with a single `command-context.config.json` config file. New commands no longer silently get zero context — the wildcard `*` entry provides sensible defaults, and a heuristic classifier handles unknown commands.
147
+ - **Quality checklists for ship/done**: `ship` and `done` commands now receive quality checklists (previously excluded from the hardcoded list).
148
+
149
+ ### Implementation Details
150
+ - Created `core/config/command-context.config.json` mapping 25 commands + wildcard to context sections (agents, patterns, checklists, modules)
151
+ - Zod schema in `core/schemas/command-context.ts` validates config at load time
152
+ - `core/agentic/command-context.ts` provides `resolveCommandContextFull()` with fallback chain: config → cache → heuristic classify → wildcard
153
+ - `core/agentic/command-classifier.ts` uses word-boundary keyword matching with score-based priority to classify unknown commands from template metadata
154
+ - Auto-learn (Phase 3): after 3 identical heuristic classifications, persists to config file via fire-and-forget
155
+
156
+ ### Learnings
157
+ - Keyword substring matching causes false positives (e.g., "check" matching inside "checks") — word boundaries via `\b` regex are essential
158
+ - When quality and info keywords overlap, score-based priority (higher count wins) is more robust than boolean exclusion
159
+
160
+ ### Test Plan
161
+
162
+ #### For QA
163
+ 1. Run `bun test ./core/__tests__/agentic/command-context.test.ts` — all 20 tests pass
164
+ 2. Run `bun test ./core/__tests__/agentic/prompt-builder.test.ts` — all 16 existing tests pass
165
+ 3. Run `bun run build` — compiles without errors
166
+ 4. Verify `ship` and `done` commands have `checklist: true` in config
167
+ 5. Verify unknown commands get wildcard defaults (agents: true, patterns: true)
168
+
169
+ #### For Users
170
+ **What changed:** Commands like `ship` and `done` now receive quality checklists. New commands automatically get sensible context instead of nothing.
171
+ **How to use:** No user action needed — works automatically.
172
+ **Breaking changes:** None
173
+
174
+ ## [1.7.6] - 2026-02-07
175
+
176
+ ### Features
177
+ - **PerformanceTracker service (PRJ-297)**: New `core/infrastructure/performance-tracker.ts` singleton that automatically measures startup time, memory usage, and command durations on every CLI invocation. Data stored in append-only JSONL with 5MB rotation.
178
+ - **`prjct perf` dashboard command**: Shows performance metrics vs targets for the last N days (default 7). Displays startup time, heap/RSS memory, context correctness rate, subtask handoff rate, and per-command duration breakdown.
179
+ - **Zod schemas for performance metrics**: `core/schemas/performance.ts` with typed schemas for all metric types (timing, memory, context correctness, subtask handoff, analysis state).
180
+
181
+ ### Implementation Details
182
+ - PerformanceTracker uses `process.hrtime.bigint()` for nanosecond-precision timing and `process.memoryUsage()` for memory snapshots
183
+ - Startup time captured at top of `bin/prjct.ts` via `globalThis.__perfStartNs` and recorded in `core/index.ts` after command execution
184
+ - All instrumentation wrapped in non-critical try/catch to prevent perf tracking from breaking CLI functionality
185
+ - Uses existing `jsonl-helper.appendJsonLineWithRotation` for storage (5MB rotation limit)
186
+ - 17 unit tests covering timing, memory, recording, context correctness, handoff, and report generation
187
+
188
+ ### Learnings
189
+ - JSONL append-only pattern with rotation is ideal for time-series metrics (vs JSON write-through for stateful data)
190
+ - `globalThis` works well for passing data between `bin/` entry point and `core/` modules without import coupling
191
+ - `process.memoryUsage().heapUsed` can momentarily exceed `heapTotal` during GC — don't assert `<=`
192
+
193
+ ### Test Plan
194
+
195
+ #### For QA
196
+ 1. Run `prjct status` then `prjct perf` — verify metrics appear (startup time, memory, command duration)
197
+ 2. Run multiple commands then `prjct perf 1` — verify all commands show in dashboard
198
+ 3. Check `~/.prjct-cli/projects/{id}/storage/performance.jsonl` exists with valid JSONL entries
199
+ 4. Verify `prjct perf` with no data shows "No performance data yet" message
200
+ 5. Verify target indicators: startup `<500ms` green, `>500ms` yellow warning
201
+
202
+ #### For Users
203
+ **What changed:** New `prjct perf` command shows a performance dashboard with startup time, memory usage, and command duration metrics.
204
+ **How to use:** Run `prjct perf` (default 7 days) or `prjct perf 30` for 30-day view. Metrics are collected automatically.
205
+ **Breaking changes:** None
206
+
207
+
3
208
  ## [1.7.5] - 2026-02-07
4
209
 
5
210
  ### Refactoring
6
211
 
7
212
  - remove unused deps and lazy-load @linear/sdk (PRJ-291) (#144)
8
213
 
9
-
10
214
  ## [1.7.5] - 2026-02-07
11
215
 
12
216
  ### Changed
package/bin/prjct.ts CHANGED
@@ -8,6 +8,10 @@
8
8
  * auto-install on first CLI use. This is the reliable path.
9
9
  */
10
10
 
11
+ // Performance: capture process start time (nanosecond precision)
12
+ // Exposed via globalThis so core/index.ts can read it for startup time metrics
13
+ ;(globalThis as Record<string, unknown>).__perfStartNs = process.hrtime.bigint()
14
+
11
15
  import os from 'node:os'
12
16
  import path from 'node:path'
13
17
  import chalk from 'chalk'
@@ -85,6 +89,16 @@ async function trackSession(command: string): Promise<() => void> {
85
89
  return () => {
86
90
  const durationMs = Date.now() - start
87
91
  sessionTracker.trackCommand(projectId, command, durationMs).catch(() => {})
92
+
93
+ // Performance tracking (non-critical, lazy-loaded)
94
+ import('../core/infrastructure/performance-tracker')
95
+ .then(({ performanceTracker }) => {
96
+ performanceTracker
97
+ .recordTiming(projectId, 'command_duration', durationMs, { command })
98
+ .catch(() => {})
99
+ performanceTracker.recordMemory(projectId, { command }).catch(() => {})
100
+ })
101
+ .catch(() => {})
88
102
  }
89
103
  }
90
104
  } catch {
@@ -0,0 +1,281 @@
1
+ /**
2
+ * Command Context Tests
3
+ *
4
+ * Tests for config-driven command context resolution,
5
+ * classification, caching, and auto-learn.
6
+ *
7
+ * @see PRJ-298
8
+ */
9
+
10
+ import { describe, expect, it } from 'bun:test'
11
+ import { classifyCommand } from '../../agentic/command-classifier'
12
+ import {
13
+ cacheClassification,
14
+ getCachedClassification,
15
+ loadCommandContextConfig,
16
+ resolveCommandContext,
17
+ resolveCommandContextFull,
18
+ trackClassification,
19
+ } from '../../agentic/command-context'
20
+ import type { CommandContextEntry } from '../../schemas/command-context'
21
+ import type { Template } from '../../types'
22
+
23
+ // =============================================================================
24
+ // Config Loading
25
+ // =============================================================================
26
+
27
+ describe('Command Context Config', () => {
28
+ it('should load and validate the config file', async () => {
29
+ const config = await loadCommandContextConfig()
30
+
31
+ expect(config.version).toBe('1.0.0')
32
+ expect(config.commands).toBeDefined()
33
+ expect(config.commands['*']).toBeDefined()
34
+ })
35
+
36
+ it('should have wildcard entry with sensible defaults', async () => {
37
+ const config = await loadCommandContextConfig()
38
+ const wildcard = config.commands['*']
39
+
40
+ expect(wildcard.agents).toBe(true)
41
+ expect(wildcard.patterns).toBe(true)
42
+ expect(wildcard.checklist).toBe(false)
43
+ expect(wildcard.modules).toEqual([])
44
+ })
45
+
46
+ it('should have explicit entries for known commands', async () => {
47
+ const config = await loadCommandContextConfig()
48
+
49
+ expect(config.commands.task).toBeDefined()
50
+ expect(config.commands.ship).toBeDefined()
51
+ expect(config.commands.bug).toBeDefined()
52
+ expect(config.commands.done).toBeDefined()
53
+ expect(config.commands.sync).toBeDefined()
54
+ })
55
+ })
56
+
57
+ // =============================================================================
58
+ // Config Resolution
59
+ // =============================================================================
60
+
61
+ describe('resolveCommandContext', () => {
62
+ it('should return explicit config for known commands', async () => {
63
+ const config = await loadCommandContextConfig()
64
+ const entry = resolveCommandContext(config, 'task')
65
+
66
+ expect(entry.modules).toContain('CLAUDE-intelligence.md')
67
+ expect(entry.modules).toContain('CLAUDE-storage.md')
68
+ })
69
+
70
+ it('should return wildcard for unknown commands', async () => {
71
+ const config = await loadCommandContextConfig()
72
+ const entry = resolveCommandContext(config, 'nonexistent-command')
73
+ const wildcard = config.commands['*']
74
+
75
+ expect(entry).toEqual(wildcard)
76
+ })
77
+
78
+ it('should give ship command patterns and checklist', async () => {
79
+ const config = await loadCommandContextConfig()
80
+ const entry = resolveCommandContext(config, 'ship')
81
+
82
+ expect(entry.patterns).toBe(true)
83
+ expect(entry.checklist).toBe(true)
84
+ })
85
+
86
+ it('should give done command checklist', async () => {
87
+ const config = await loadCommandContextConfig()
88
+ const entry = resolveCommandContext(config, 'done')
89
+
90
+ expect(entry.checklist).toBe(true)
91
+ })
92
+
93
+ it('should give sync command no context sections', async () => {
94
+ const config = await loadCommandContextConfig()
95
+ const entry = resolveCommandContext(config, 'sync')
96
+
97
+ expect(entry.agents).toBe(false)
98
+ expect(entry.patterns).toBe(false)
99
+ expect(entry.checklist).toBe(false)
100
+ expect(entry.modules).toEqual([])
101
+ })
102
+ })
103
+
104
+ // =============================================================================
105
+ // Full Resolution with Classification
106
+ // =============================================================================
107
+
108
+ describe('resolveCommandContextFull', () => {
109
+ it('should return source=config for known commands', async () => {
110
+ const config = await loadCommandContextConfig()
111
+ const result = resolveCommandContextFull(config, 'bug')
112
+
113
+ expect(result.source).toBe('config')
114
+ expect(result.entry.agents).toBe(true)
115
+ })
116
+
117
+ it('should classify unknown commands from template', async () => {
118
+ const config = await loadCommandContextConfig()
119
+ const template: Template = {
120
+ frontmatter: {
121
+ name: 'p:deploy',
122
+ description: 'Deploy the application to production',
123
+ 'allowed-tools': ['Bash', 'Read'],
124
+ },
125
+ content: 'Build and deploy the project. Verify deployment status.',
126
+ }
127
+
128
+ const result = resolveCommandContextFull(config, 'deploy', template)
129
+ expect(result.source).toBe('classified')
130
+ })
131
+
132
+ it('should return source=cache for previously classified commands', async () => {
133
+ const config = await loadCommandContextConfig()
134
+ const entry: CommandContextEntry = {
135
+ agents: true,
136
+ patterns: false,
137
+ checklist: false,
138
+ modules: [],
139
+ }
140
+ cacheClassification('cached-cmd', entry)
141
+
142
+ const result = resolveCommandContextFull(config, 'cached-cmd')
143
+ expect(result.source).toBe('cache')
144
+ expect(result.entry).toEqual(entry)
145
+ })
146
+
147
+ it('should return source=wildcard when no template provided for unknown command', async () => {
148
+ const config = await loadCommandContextConfig()
149
+ const result = resolveCommandContextFull(config, 'truly-unknown-no-template')
150
+
151
+ expect(result.source).toBe('wildcard')
152
+ })
153
+ })
154
+
155
+ // =============================================================================
156
+ // Command Classifier
157
+ // =============================================================================
158
+
159
+ describe('classifyCommand', () => {
160
+ it('should classify code-modifying commands with Write tool', () => {
161
+ const template: Template = {
162
+ frontmatter: {
163
+ name: 'p:scaffold',
164
+ description: 'Scaffold a new component',
165
+ 'allowed-tools': ['Write', 'Read', 'Bash'],
166
+ },
167
+ content: 'Create the component files and implement the structure.',
168
+ }
169
+
170
+ const result = classifyCommand('scaffold', template)
171
+ expect(result.agents).toBe(true)
172
+ expect(result.patterns).toBe(true)
173
+ })
174
+
175
+ it('should classify info commands as needing no context', () => {
176
+ const template: Template = {
177
+ frontmatter: {
178
+ name: 'p:stats',
179
+ description: 'Show project statistics',
180
+ 'allowed-tools': ['Read'],
181
+ },
182
+ content: 'Display a summary of the project status and metrics.',
183
+ }
184
+
185
+ const result = classifyCommand('stats', template)
186
+ expect(result.agents).toBe(false)
187
+ expect(result.patterns).toBe(false)
188
+ })
189
+
190
+ it('should classify quality commands with checklists', () => {
191
+ const template: Template = {
192
+ frontmatter: {
193
+ name: 'p:verify',
194
+ description: 'Verify project integrity',
195
+ 'allowed-tools': ['Read', 'Bash'],
196
+ },
197
+ content: 'Validate all tests pass and lint checks succeed before release.',
198
+ }
199
+
200
+ const result = classifyCommand('verify', template)
201
+ expect(result.checklist).toBe(true)
202
+ })
203
+ })
204
+
205
+ // =============================================================================
206
+ // Classification Cache
207
+ // =============================================================================
208
+
209
+ describe('Classification Cache', () => {
210
+ it('should cache and retrieve classifications', () => {
211
+ const entry: CommandContextEntry = {
212
+ agents: true,
213
+ patterns: true,
214
+ checklist: false,
215
+ modules: ['test.md'],
216
+ }
217
+ cacheClassification('test-cache', entry)
218
+
219
+ const cached = getCachedClassification('test-cache')
220
+ expect(cached).toEqual(entry)
221
+ })
222
+
223
+ it('should return undefined for uncached commands', () => {
224
+ const cached = getCachedClassification('never-cached')
225
+ expect(cached).toBeUndefined()
226
+ })
227
+ })
228
+
229
+ // =============================================================================
230
+ // Auto-Learn Tracking
231
+ // =============================================================================
232
+
233
+ describe('Auto-Learn (trackClassification)', () => {
234
+ it('should not trigger persist on first classification', () => {
235
+ const entry: CommandContextEntry = {
236
+ agents: true,
237
+ patterns: true,
238
+ checklist: false,
239
+ modules: [],
240
+ }
241
+ const shouldPersist = trackClassification('learn-test-1', entry)
242
+
243
+ expect(shouldPersist).toBe(false)
244
+ })
245
+
246
+ it('should trigger persist after threshold reached', () => {
247
+ const entry: CommandContextEntry = {
248
+ agents: false,
249
+ patterns: true,
250
+ checklist: true,
251
+ modules: [],
252
+ }
253
+
254
+ trackClassification('learn-test-2', entry) // 1
255
+ trackClassification('learn-test-2', entry) // 2
256
+ const shouldPersist = trackClassification('learn-test-2', entry) // 3
257
+
258
+ expect(shouldPersist).toBe(true)
259
+ })
260
+
261
+ it('should reset count when classification changes', () => {
262
+ const entry1: CommandContextEntry = {
263
+ agents: true,
264
+ patterns: true,
265
+ checklist: false,
266
+ modules: [],
267
+ }
268
+ const entry2: CommandContextEntry = {
269
+ agents: false,
270
+ patterns: false,
271
+ checklist: true,
272
+ modules: [],
273
+ }
274
+
275
+ trackClassification('learn-test-3', entry1) // 1
276
+ trackClassification('learn-test-3', entry1) // 2
277
+ const shouldPersist = trackClassification('learn-test-3', entry2) // reset to 1
278
+
279
+ expect(shouldPersist).toBe(false)
280
+ })
281
+ })