rafcode 2.1.1 → 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +4 -1
- package/CLAUDE.md +59 -11
- package/RAF/ahslfe-config-wizard/decisions.md +34 -0
- package/RAF/ahslfe-config-wizard/input.md +1 -0
- package/RAF/ahslfe-config-wizard/outcomes/01-define-config-schema.md +38 -0
- package/RAF/ahslfe-config-wizard/outcomes/02-refactor-codebase-to-use-config.md +67 -0
- package/RAF/ahslfe-config-wizard/outcomes/03-create-config-documentation.md +37 -0
- package/RAF/ahslfe-config-wizard/outcomes/04-implement-raf-config-command.md +47 -0
- package/RAF/ahslfe-config-wizard/outcomes/05-update-claude-md.md +26 -0
- package/RAF/ahslfe-config-wizard/plans/01-define-config-schema.md +73 -0
- package/RAF/ahslfe-config-wizard/plans/02-refactor-codebase-to-use-config.md +74 -0
- package/RAF/ahslfe-config-wizard/plans/03-create-config-documentation.md +57 -0
- package/RAF/ahslfe-config-wizard/plans/04-implement-raf-config-command.md +66 -0
- package/RAF/ahslfe-config-wizard/plans/05-update-claude-md.md +60 -0
- package/RAF/ahstvo-token-tracker/decisions.md +44 -0
- package/RAF/ahstvo-token-tracker/input.md +3 -0
- package/RAF/ahstvo-token-tracker/outcomes/01-full-model-id-support.md +43 -0
- package/RAF/ahstvo-token-tracker/outcomes/02-name-generation-no-session.md +33 -0
- package/RAF/ahstvo-token-tracker/outcomes/03-unify-stream-json-execution.md +48 -0
- package/RAF/ahstvo-token-tracker/outcomes/04-token-tracking-cost-calculation.md +53 -0
- package/RAF/ahstvo-token-tracker/outcomes/05-token-cost-console-reporting.md +57 -0
- package/RAF/ahstvo-token-tracker/outcomes/06-runtime-verbose-toggle.md +53 -0
- package/RAF/ahstvo-token-tracker/outcomes/07-readme-config-docs.md +36 -0
- package/RAF/ahstvo-token-tracker/plans/01-full-model-id-support.md +35 -0
- package/RAF/ahstvo-token-tracker/plans/02-name-generation-no-session.md +36 -0
- package/RAF/ahstvo-token-tracker/plans/03-unify-stream-json-execution.md +44 -0
- package/RAF/ahstvo-token-tracker/plans/04-token-tracking-cost-calculation.md +56 -0
- package/RAF/ahstvo-token-tracker/plans/05-token-cost-console-reporting.md +55 -0
- package/RAF/ahstvo-token-tracker/plans/06-runtime-verbose-toggle.md +48 -0
- package/RAF/ahstvo-token-tracker/plans/07-readme-config-docs.md +44 -0
- package/RAF/ahtahs-token-reaper/decisions.md +37 -0
- package/RAF/ahtahs-token-reaper/input.md +20 -0
- package/RAF/ahtahs-token-reaper/outcomes/01-extend-token-tracker-data-model.md +42 -0
- package/RAF/ahtahs-token-reaper/outcomes/02-accumulate-usage-in-retry-loop.md +31 -0
- package/RAF/ahtahs-token-reaper/outcomes/03-per-attempt-display-formatting.md +60 -0
- package/RAF/ahtahs-token-reaper/outcomes/04-add-model-name-to-claude-call-logs.md +57 -0
- package/RAF/ahtahs-token-reaper/outcomes/05-handle-invalid-config-in-raf-config.md +46 -0
- package/RAF/ahtahs-token-reaper/outcomes/06-fix-verbose-toggle-timer-display.md +38 -0
- package/RAF/ahtahs-token-reaper/plans/01-extend-token-tracker-data-model.md +36 -0
- package/RAF/ahtahs-token-reaper/plans/02-accumulate-usage-in-retry-loop.md +36 -0
- package/RAF/ahtahs-token-reaper/plans/03-per-attempt-display-formatting.md +43 -0
- package/RAF/ahtahs-token-reaper/plans/04-add-model-name-to-claude-call-logs.md +38 -0
- package/RAF/ahtahs-token-reaper/plans/05-handle-invalid-config-in-raf-config.md +36 -0
- package/RAF/ahtahs-token-reaper/plans/06-fix-verbose-toggle-timer-display.md +40 -0
- package/README.md +34 -0
- package/dist/commands/config.d.ts +3 -0
- package/dist/commands/config.d.ts.map +1 -0
- package/dist/commands/config.js +195 -0
- package/dist/commands/config.js.map +1 -0
- package/dist/commands/do.d.ts.map +1 -1
- package/dist/commands/do.js +55 -7
- package/dist/commands/do.js.map +1 -1
- package/dist/commands/plan.d.ts.map +1 -1
- package/dist/commands/plan.js +5 -3
- package/dist/commands/plan.js.map +1 -1
- package/dist/core/claude-runner.d.ts +19 -2
- package/dist/core/claude-runner.d.ts.map +1 -1
- package/dist/core/claude-runner.js +43 -96
- package/dist/core/claude-runner.js.map +1 -1
- package/dist/core/failure-analyzer.d.ts.map +1 -1
- package/dist/core/failure-analyzer.js +6 -3
- package/dist/core/failure-analyzer.js.map +1 -1
- package/dist/core/git.d.ts.map +1 -1
- package/dist/core/git.js +10 -3
- package/dist/core/git.js.map +1 -1
- package/dist/core/pull-request.d.ts +1 -1
- package/dist/core/pull-request.d.ts.map +1 -1
- package/dist/core/pull-request.js +9 -4
- package/dist/core/pull-request.js.map +1 -1
- package/dist/index.js +2 -0
- package/dist/index.js.map +1 -1
- package/dist/parsers/stream-renderer.d.ts +16 -1
- package/dist/parsers/stream-renderer.d.ts.map +1 -1
- package/dist/parsers/stream-renderer.js +34 -4
- package/dist/parsers/stream-renderer.js.map +1 -1
- package/dist/prompts/execution.d.ts.map +1 -1
- package/dist/prompts/execution.js +11 -1
- package/dist/prompts/execution.js.map +1 -1
- package/dist/types/config.d.ts +95 -4
- package/dist/types/config.d.ts.map +1 -1
- package/dist/types/config.js +63 -3
- package/dist/types/config.js.map +1 -1
- package/dist/utils/config.d.ts +65 -7
- package/dist/utils/config.d.ts.map +1 -1
- package/dist/utils/config.js +297 -21
- package/dist/utils/config.js.map +1 -1
- package/dist/utils/name-generator.d.ts +3 -7
- package/dist/utils/name-generator.d.ts.map +1 -1
- package/dist/utils/name-generator.js +75 -61
- package/dist/utils/name-generator.js.map +1 -1
- package/dist/utils/terminal-symbols.d.ts +25 -0
- package/dist/utils/terminal-symbols.d.ts.map +1 -1
- package/dist/utils/terminal-symbols.js +87 -0
- package/dist/utils/terminal-symbols.js.map +1 -1
- package/dist/utils/token-tracker.d.ts +55 -0
- package/dist/utils/token-tracker.d.ts.map +1 -0
- package/dist/utils/token-tracker.js +142 -0
- package/dist/utils/token-tracker.js.map +1 -0
- package/dist/utils/validation.d.ts +5 -5
- package/dist/utils/validation.d.ts.map +1 -1
- package/dist/utils/validation.js +10 -6
- package/dist/utils/validation.js.map +1 -1
- package/dist/utils/verbose-toggle.d.ts +33 -0
- package/dist/utils/verbose-toggle.d.ts.map +1 -0
- package/dist/utils/verbose-toggle.js +94 -0
- package/dist/utils/verbose-toggle.js.map +1 -0
- package/package.json +1 -1
- package/src/commands/config.ts +230 -0
- package/src/commands/do.ts +64 -6
- package/src/commands/plan.ts +5 -3
- package/src/core/claude-runner.ts +59 -115
- package/src/core/failure-analyzer.ts +6 -3
- package/src/core/git.ts +10 -3
- package/src/core/pull-request.ts +9 -4
- package/src/index.ts +2 -0
- package/src/parsers/stream-renderer.ts +54 -4
- package/src/prompts/config-docs.md +331 -0
- package/src/prompts/execution.ts +13 -1
- package/src/types/config.ts +156 -7
- package/src/utils/config.ts +357 -21
- package/src/utils/name-generator.ts +84 -71
- package/src/utils/terminal-symbols.ts +103 -0
- package/src/utils/token-tracker.ts +177 -0
- package/src/utils/validation.ts +15 -10
- package/src/utils/verbose-toggle.ts +103 -0
- package/tests/unit/claude-runner.test.ts +171 -7
- package/tests/unit/config-command.test.ts +242 -0
- package/tests/unit/config.test.ts +632 -30
- package/tests/unit/name-generator.test.ts +99 -75
- package/tests/unit/pull-request.test.ts +2 -0
- package/tests/unit/stream-renderer.test.ts +83 -0
- package/tests/unit/terminal-symbols.test.ts +245 -0
- package/tests/unit/timer-verbose-integration.test.ts +170 -0
- package/tests/unit/token-tracker.test.ts +685 -0
- package/tests/unit/verbose-toggle.test.ts +204 -0
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
# Task: Add Token Tracking and Cost Calculation
|
|
2
|
+
|
|
3
|
+
## Objective
|
|
4
|
+
Implement token usage accumulation across tasks and cost calculation using configurable per-model pricing.
|
|
5
|
+
|
|
6
|
+
## Context
|
|
7
|
+
After task execution is unified to stream-json (task 03), each task returns `UsageData` with token counts. This task adds the infrastructure to accumulate usage across multiple tasks and calculate estimated costs based on configurable pricing.
|
|
8
|
+
|
|
9
|
+
## Dependencies
|
|
10
|
+
01, 03
|
|
11
|
+
|
|
12
|
+
## Requirements
|
|
13
|
+
- Add pricing config to the RAF config schema with current prices as defaults
|
|
14
|
+
- Pricing is per-model, per-direction (input vs output), in dollars per million tokens
|
|
15
|
+
- Support pricing tiers: standard (prompts <= 200K tokens) and extended (prompts > 200K tokens) for Opus and Sonnet; flat rate for Haiku
|
|
16
|
+
- Cache read tokens should use a discounted price (typically 90% off input price — check Claude pricing page)
|
|
17
|
+
- Cache creation tokens should use standard input price
|
|
18
|
+
- Accumulate usage data across all tasks in a project execution run
|
|
19
|
+
- Calculate estimated cost from token counts × configurable prices
|
|
20
|
+
|
|
21
|
+
## Implementation Steps
|
|
22
|
+
1. Define pricing types and add pricing config to the config schema in `src/types/config.ts` with default values:
|
|
23
|
+
- Opus: $15/MTok input, $75/MTok output (standard tier)
|
|
24
|
+
- Sonnet: $3/MTok input, $15/MTok output
|
|
25
|
+
- Haiku: $1/MTok input, $5/MTok output
|
|
26
|
+
- Cache read discount: 90% off input price
|
|
27
|
+
- Cache creation: same as input price
|
|
28
|
+
2. Add config validation for pricing fields in `src/utils/config.ts`
|
|
29
|
+
3. Add pricing accessor helpers (e.g., `getPricing(model)`)
|
|
30
|
+
4. Create a `TokenTracker` utility (e.g., `src/utils/token-tracker.ts`) that:
|
|
31
|
+
- Accepts `UsageData` from each task execution
|
|
32
|
+
- Accumulates totals (input, output, cache read, cache creation tokens per model)
|
|
33
|
+
- Calculates cost from token counts × configured prices
|
|
34
|
+
- Provides per-task summaries and a grand total
|
|
35
|
+
5. Map model IDs from CLI output (e.g., `claude-opus-4-6`) to pricing tiers — the modelUsage keys are full model IDs, so need a mapping from full ID to pricing category (opus/sonnet/haiku)
|
|
36
|
+
6. Update config docs in `src/prompts/config-docs.md`
|
|
37
|
+
7. Add tests for cost calculation and token accumulation
|
|
38
|
+
|
|
39
|
+
## Acceptance Criteria
|
|
40
|
+
- [ ] Pricing config added with sensible defaults matching current Claude API pricing
|
|
41
|
+
- [ ] `TokenTracker` accumulates usage across multiple tasks correctly
|
|
42
|
+
- [ ] Cost calculation is accurate: `tokens × price_per_token` for each category
|
|
43
|
+
- [ ] Per-model pricing works (different costs for opus vs sonnet vs haiku)
|
|
44
|
+
- [ ] Cache tokens use discounted pricing
|
|
45
|
+
- [ ] Config validation accepts valid pricing, rejects invalid
|
|
46
|
+
- [ ] All tests pass
|
|
47
|
+
|
|
48
|
+
## Notes
|
|
49
|
+
- Current pricing (as of 2026-02-10):
|
|
50
|
+
- Opus 4.6: $15/MTok input, $75/MTok output (standard ≤200K context)
|
|
51
|
+
- Sonnet 4.5: $3/MTok input, $15/MTok output
|
|
52
|
+
- Haiku 4.5: $1/MTok input, $5/MTok output
|
|
53
|
+
- Cache read is typically 10% of input price
|
|
54
|
+
- Cache creation is typically 25% more than input price
|
|
55
|
+
- Model ID mapping: `claude-opus-4-6` → opus pricing, `claude-sonnet-4-5-*` → sonnet pricing, etc. Use a pattern-based lookup
|
|
56
|
+
- Consider whether to track extended context pricing (>200K tokens) — may not be worth the complexity initially since RAF tasks rarely exceed 200K
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
# Task: Add Token/Cost Reporting to Console Output
|
|
2
|
+
|
|
3
|
+
## Objective
|
|
4
|
+
Display per-task token usage and cost estimates after each task, and a grand total after all tasks complete.
|
|
5
|
+
|
|
6
|
+
## Context
|
|
7
|
+
With token tracking infrastructure in place (task 04), this task wires it into the `raf do` execution flow to display usage reports in the terminal. This is the user-facing part of the feature.
|
|
8
|
+
|
|
9
|
+
## Dependencies
|
|
10
|
+
04
|
|
11
|
+
|
|
12
|
+
## Requirements
|
|
13
|
+
- After each task completes, print a concise usage summary line to the console
|
|
14
|
+
- After all tasks finish (or project completes), print a grand total summary
|
|
15
|
+
- Show: input tokens, output tokens, cache tokens (combined read+creation), estimated cost in USD
|
|
16
|
+
- Format numbers with thousands separators for readability (e.g., `12,345`)
|
|
17
|
+
- Format cost as USD with 2-4 decimal places (e.g., `$1.23` or `$0.0042`)
|
|
18
|
+
- Keep the output compact — avoid verbose multi-line reports for each task
|
|
19
|
+
- Respect the existing console output style (chalk colors, log levels, etc.)
|
|
20
|
+
|
|
21
|
+
## Implementation Steps
|
|
22
|
+
1. In `src/commands/do.ts`, instantiate a `TokenTracker` at the start of project execution
|
|
23
|
+
2. After each task execution, feed the returned `UsageData` into the tracker
|
|
24
|
+
3. Print a per-task summary line after each task completes (e.g., ` Tokens: 5,234 in / 1,023 out | Cache: 18,500 read | Est. cost: $0.42`)
|
|
25
|
+
4. After all tasks complete, print a total summary section with aggregated stats
|
|
26
|
+
5. Handle edge cases: tasks that fail before returning usage data, context overflow, timeouts
|
|
27
|
+
6. Use existing logging/formatting utilities (chalk, logger) for consistent styling
|
|
28
|
+
7. Update CLAUDE.md and README.md to document the token tracking feature
|
|
29
|
+
|
|
30
|
+
## Acceptance Criteria
|
|
31
|
+
- [ ] Per-task token summary displayed after each task completion
|
|
32
|
+
- [ ] Grand total displayed after all tasks finish
|
|
33
|
+
- [ ] Numbers are formatted with thousands separators
|
|
34
|
+
- [ ] Cost displayed in USD format
|
|
35
|
+
- [ ] Failed tasks that have partial usage data are still included in totals
|
|
36
|
+
- [ ] Tasks with no usage data (timeout, crash) are handled gracefully
|
|
37
|
+
- [ ] Output is compact and doesn't overwhelm the console
|
|
38
|
+
- [ ] CLAUDE.md updated with token tracking documentation
|
|
39
|
+
- [ ] All tests pass
|
|
40
|
+
|
|
41
|
+
## Notes
|
|
42
|
+
- Example per-task output format:
|
|
43
|
+
```
|
|
44
|
+
Task 01 complete ✓
|
|
45
|
+
Tokens: 5,234 in / 1,023 out | Cache: 18,500 read | Est. cost: $0.42
|
|
46
|
+
```
|
|
47
|
+
- Example total output format:
|
|
48
|
+
```
|
|
49
|
+
── Token Usage Summary ──────────────────
|
|
50
|
+
Total tokens: 45,678 in / 12,345 out
|
|
51
|
+
Cache: 125,000 read / 8,000 created
|
|
52
|
+
Estimated cost: $3.75
|
|
53
|
+
─────────────────────────────────────────
|
|
54
|
+
```
|
|
55
|
+
- These are just examples — the implementing agent should match the existing output style
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# Task: Add Runtime Verbose Toggle During Task Execution
|
|
2
|
+
|
|
3
|
+
## Objective
|
|
4
|
+
Allow users to press Tab during task execution to toggle verbose mode on/off in real-time, showing or hiding tool-use activity lines.
|
|
5
|
+
|
|
6
|
+
## Context
|
|
7
|
+
After task 03 unifies all execution to stream-json, the underlying data stream always includes tool-use events. Verbose mode becomes purely a display concern — whether to print tool descriptions (e.g., "Reading src/file.ts", "Running: npm test") to the console. This makes runtime toggling straightforward: listen for Tab key on stdin and flip a boolean that controls display output.
|
|
8
|
+
|
|
9
|
+
## Dependencies
|
|
10
|
+
03
|
|
11
|
+
|
|
12
|
+
## Requirements
|
|
13
|
+
- During non-interactive task execution (`raf do`), listen for Tab keypress on `process.stdin`
|
|
14
|
+
- Pressing Tab toggles verbose display: tool-use lines are shown or hidden
|
|
15
|
+
- The initial state matches the `--verbose` flag (off by default, on if `--verbose` was passed)
|
|
16
|
+
- Display a brief indicator when toggling (e.g., `[verbose: on]` / `[verbose: off]`)
|
|
17
|
+
- stdin must be in raw mode to capture individual keypresses without requiring Enter
|
|
18
|
+
- Restore stdin to normal mode after task execution completes (or on process exit)
|
|
19
|
+
- Do not interfere with Ctrl+C signal handling (SIGINT must still work)
|
|
20
|
+
- Do not interfere with the child process — stdin is already `'ignore'` for spawned Claude
|
|
21
|
+
|
|
22
|
+
## Implementation Steps
|
|
23
|
+
1. Before starting task execution in `do.ts`, set up `process.stdin` in raw mode to capture keypresses
|
|
24
|
+
2. Listen for the Tab key (character code `\t` or `0x09`) on the stdin `data` event
|
|
25
|
+
3. Maintain a `verboseDisplay` boolean that the stream-json renderer checks before printing tool-use lines
|
|
26
|
+
4. On Tab press, flip the boolean and print a brief status indicator
|
|
27
|
+
5. Pass the `verboseDisplay` reference (or a callback/event emitter) to the stream renderer so it can check the current state for each event
|
|
28
|
+
6. On task completion (or process exit/error), restore stdin to cooked mode and remove the listener
|
|
29
|
+
7. Integrate with the shutdown handler to ensure clean terminal state on Ctrl+C
|
|
30
|
+
8. Add tests for the toggle mechanism
|
|
31
|
+
|
|
32
|
+
## Acceptance Criteria
|
|
33
|
+
- [ ] Tab key toggles verbose display during task execution
|
|
34
|
+
- [ ] Initial verbose state matches the `--verbose` CLI flag
|
|
35
|
+
- [ ] Tool-use lines appear/disappear immediately on toggle
|
|
36
|
+
- [ ] Brief status indicator shown on toggle
|
|
37
|
+
- [ ] Ctrl+C still works for graceful shutdown
|
|
38
|
+
- [ ] Terminal state is properly restored after execution
|
|
39
|
+
- [ ] No interference with child process stdin
|
|
40
|
+
- [ ] Works correctly across multiple sequential tasks
|
|
41
|
+
- [ ] All existing tests pass
|
|
42
|
+
|
|
43
|
+
## Notes
|
|
44
|
+
- Node.js `process.stdin.setRawMode(true)` is already used in `runInteractive()` so the pattern is familiar in this codebase
|
|
45
|
+
- The shutdown handler in `src/core/shutdown-handler.ts` already manages terminal cleanup — coordinate with it
|
|
46
|
+
- Since the child process has `stdio: ['ignore', 'pipe', 'pipe']`, parent stdin is free to use for keypress detection
|
|
47
|
+
- Consider showing the toggle hint at the start of execution: `Press Tab to toggle verbose mode`
|
|
48
|
+
- Edge case: if stdin is not a TTY (piped input), skip the keypress listener entirely
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# Task: Document raf config in README and Strengthen README Update Policy
|
|
2
|
+
|
|
3
|
+
## Objective
|
|
4
|
+
Add `raf config` documentation to README.md and add explicit guidance in CLAUDE.md about keeping the README updated when CLI commands or important features change.
|
|
5
|
+
|
|
6
|
+
## Context
|
|
7
|
+
The `raf config` command is fully implemented but completely missing from the user-facing README. Users have no way to discover that configuration is possible, how to use it, or what can be configured. Additionally, CLAUDE.md's "Important Reminders" section should be more explicit about when README updates are required to prevent this gap from recurring.
|
|
8
|
+
|
|
9
|
+
## Requirements
|
|
10
|
+
|
|
11
|
+
### README updates
|
|
12
|
+
- Add a `raf config` section alongside the existing command documentation sections (`raf plan`, `raf do`, `raf status`)
|
|
13
|
+
- Keep it brief and consistent with the existing README style
|
|
14
|
+
- Include: command usage, what it does (interactive Claude session for config), basic config file example, 1-2 common use cases
|
|
15
|
+
- Add `raf config` to the Command Reference table at the bottom
|
|
16
|
+
- Mention the config file location (`~/.raf/raf.config.json`) and three-tier precedence (CLI flag > config > defaults)
|
|
17
|
+
- Do NOT duplicate the full config schema — reference that `raf config` itself provides interactive help
|
|
18
|
+
|
|
19
|
+
### CLAUDE.md updates
|
|
20
|
+
- Expand the "Important Reminders" section to explicitly state that README must be updated when:
|
|
21
|
+
- New CLI commands are added
|
|
22
|
+
- Existing command APIs change (new flags, changed behavior)
|
|
23
|
+
- Important features are added (like worktrees, config, token tracking)
|
|
24
|
+
- Keep the reminder actionable and specific, not vague
|
|
25
|
+
|
|
26
|
+
## Implementation Steps
|
|
27
|
+
1. Read the current README.md to understand section ordering, style, and formatting conventions
|
|
28
|
+
2. Add a `### raf config` section after the existing `raf status` section, following the same format
|
|
29
|
+
3. Include a minimal config example showing 2-3 common settings (e.g., changing default model, setting worktree default)
|
|
30
|
+
4. Add `raf config` entries to the Command Reference tables
|
|
31
|
+
5. Update CLAUDE.md's "Important Reminders" with explicit README update policy
|
|
32
|
+
|
|
33
|
+
## Acceptance Criteria
|
|
34
|
+
- [ ] README has a `raf config` section with usage and basic example
|
|
35
|
+
- [ ] `raf config` appears in the Command Reference table
|
|
36
|
+
- [ ] Config file location and precedence rules are mentioned
|
|
37
|
+
- [ ] CLAUDE.md has explicit guidance about when to update README
|
|
38
|
+
- [ ] No existing README content is broken or removed
|
|
39
|
+
- [ ] Documentation tone and style match the rest of the README
|
|
40
|
+
|
|
41
|
+
## Notes
|
|
42
|
+
- The existing README documents `raf config` options: `--reset` flag and inline prompt (`raf config "use haiku"`) — these should be mentioned
|
|
43
|
+
- Look at `src/prompts/config-docs.md` for the full config reference — pick 2-3 of the most common/useful settings for the README example
|
|
44
|
+
- The README currently has a "Features" section that lists 7 features — consider adding "Configurable" to that list if not present
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# Project Decisions
|
|
2
|
+
|
|
3
|
+
## For the per-task token summary, should it show accumulated total or per-attempt breakdown?
|
|
4
|
+
Per-attempt breakdown — show token usage for each attempt individually, plus a combined total.
|
|
5
|
+
|
|
6
|
+
## Should per-attempt breakdown appear in normal output or only with --verbose?
|
|
7
|
+
Always show breakdown — per-attempt details shown regardless of verbose flag for full cost transparency.
|
|
8
|
+
|
|
9
|
+
## Should TokenTracker store per-attempt data, or should accumulation happen in do.ts?
|
|
10
|
+
Tracker stores attempts — TokenTracker gains a richer data model with per-attempt entries. addTask accepts an array of UsageData. Centralized logic.
|
|
11
|
+
|
|
12
|
+
## Should the grand total summary also show per-attempt breakdown?
|
|
13
|
+
Grand total only — the final summary shows combined totals. Per-attempt detail is available in individual task summaries above.
|
|
14
|
+
|
|
15
|
+
## What format for the model name in log messages?
|
|
16
|
+
"...with sonnet" style — append 'with <model>' before the ellipsis, e.g., "Generating project name suggestions with sonnet..."
|
|
17
|
+
|
|
18
|
+
## Should the model name be the short alias or full model ID?
|
|
19
|
+
Short alias — display friendly names like 'sonnet', 'haiku', 'opus'. Cleaner output.
|
|
20
|
+
|
|
21
|
+
## Should model-in-log apply only to name generation or all Claude calls?
|
|
22
|
+
All Claude calls — add model names to all log messages where RAF invokes Claude (name generation, failure analysis, PR generation, config session).
|
|
23
|
+
|
|
24
|
+
## When config is invalid, should `raf config` silently fall back or warn?
|
|
25
|
+
Warn then continue — show a warning about the invalid config, then launch the interactive session normally with defaults.
|
|
26
|
+
|
|
27
|
+
## Should config resilience apply to all commands or only `raf config`?
|
|
28
|
+
Only `raf config` — it's the recovery tool. Other commands can still fail fast on invalid config.
|
|
29
|
+
|
|
30
|
+
## When verbose is ON, should the task name and elapsed time be shown as a header?
|
|
31
|
+
No header at all — when verbose is ON, only show Claude's raw output and tool descriptions. No task name or timer.
|
|
32
|
+
|
|
33
|
+
## When toggling back to verbose OFF, should the timer resume or reset?
|
|
34
|
+
Resume counting — timer continues from actual elapsed time since task start.
|
|
35
|
+
|
|
36
|
+
## When verbose is ON, should tool use descriptions still be shown?
|
|
37
|
+
Show both — show Claude's text AND tool use descriptions (→ Reading file.ts, → Running: npm test, etc.).
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
- [ ] **Accumulate token usage across retry attempts** When a task retries, this assignment overwrites prior `usageData`, and the tracker is only updated once after the retry loop, so tokens/cost from earlier failed attempts are dropped. In any task that takes multiple attempts, the per-task and total summaries underreport actual consumption, which skews cost reporting for long or flaky runs.
|
|
2
|
+
|
|
3
|
+
---
|
|
4
|
+
|
|
5
|
+
when i switch to verbose mode is see output together with timer and task name repeating on each line. could you remove interactive timer when verbose mode is on, and put it back on OFF. and don't put task on each line when in V ON mode. see log: ```● 01-extend-token-tracker-data-model 34s [verbose: on]
|
|
6
|
+
● 01-extend-token-tracker-data-model 37s → Updating task list
|
|
7
|
+
|
|
8
|
+
● 01-extend-token-tracker-data-model 39sNow let me add the `accumulateUsage()` function. I'll add it before the TokenTracker class.
|
|
9
|
+
|
|
10
|
+
● 01-extend-token-tracker-data-model 46s → Editing /Users/eremeev/.raf/worktrees/RAF/ahtahs-token-reaper/src/utils/token-tracker.ts
|
|
11
|
+
|
|
12
|
+
● 01-extend-token-tracker-data-model 50s → Updating task list
|
|
13
|
+
|
|
14
|
+
● 01-extend-token-tracker-data-model 52sNow let me update the `addTask()` method to accept an array.
|
|
15
|
+
|
|
16
|
+
● 01-extend-token-tracker-data-model 53s → Reading /Users/eremeev/.raf/worktrees/RAF/ahtahs-token-reaper/src/utils/token-tracker.ts
|
|
17
|
+
|
|
18
|
+
● 01-extend-token-tracker-data-model 55sNow let me update the `addTask()` method to accept an array of UsageData.
|
|
19
|
+
|
|
20
|
+
● 01-extend-token-tracker-data-model 56s [verbose: off]```
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# Task 01: Extend TokenTracker Data Model
|
|
2
|
+
|
|
3
|
+
## Summary
|
|
4
|
+
|
|
5
|
+
Refactored TokenTracker to accept and store per-attempt UsageData entries per task, enabling accurate token tracking across retries.
|
|
6
|
+
|
|
7
|
+
## Changes Made
|
|
8
|
+
|
|
9
|
+
### src/utils/token-tracker.ts
|
|
10
|
+
- Added `attempts: UsageData[]` field to `TaskUsageEntry` interface
|
|
11
|
+
- Created `accumulateUsage()` utility function that merges multiple UsageData objects into one, summing all token fields and merging modelUsage maps (handles different models across attempts)
|
|
12
|
+
- Updated `addTask()` signature to accept `UsageData[]` instead of single `UsageData`
|
|
13
|
+
- `addTask()` now calls `accumulateUsage()` to compute combined usage and stores raw attempts for future display breakdowns
|
|
14
|
+
|
|
15
|
+
### src/commands/do.ts
|
|
16
|
+
- Updated two call sites to wrap single `lastUsageData` in array `[lastUsageData]`
|
|
17
|
+
- Added TODO comments indicating these should pass all attempt data once retry loop accumulates them
|
|
18
|
+
|
|
19
|
+
### tests/unit/token-tracker.test.ts
|
|
20
|
+
- Updated all existing test calls to use array syntax `[usage]`
|
|
21
|
+
- Added new tests for:
|
|
22
|
+
- `accumulateUsage()` function (empty array, single element, multi-element, multi-model merging, non-mutation)
|
|
23
|
+
- Multi-attempt accumulation in `addTask()`
|
|
24
|
+
- Cost calculation for multi-model retry scenarios
|
|
25
|
+
- `attempts` array storage in entries
|
|
26
|
+
|
|
27
|
+
## Acceptance Criteria Verification
|
|
28
|
+
|
|
29
|
+
- [x] `TaskUsageEntry` has an `attempts: UsageData[]` field
|
|
30
|
+
- [x] `addTask()` accepts an array and correctly accumulates tokens across attempts
|
|
31
|
+
- [x] `accumulateUsage()` correctly sums all token fields including per-model breakdowns
|
|
32
|
+
- [x] `getTotals()` returns correct grand totals when tasks have multiple attempts
|
|
33
|
+
- [x] Single-attempt tasks behave identically to before
|
|
34
|
+
- [x] All existing and new token-tracker tests pass (27 tests)
|
|
35
|
+
|
|
36
|
+
## Notes
|
|
37
|
+
|
|
38
|
+
- The `accumulateUsage()` function handles the case where different attempts use different models (e.g., Opus on first attempt, Sonnet on retry due to fallback)
|
|
39
|
+
- `calculateCost()` was left unchanged as designed - it operates on the accumulated UsageData
|
|
40
|
+
- Pre-existing test failures in validation.test.ts and claude-runner-interactive.test.ts are unrelated to this task
|
|
41
|
+
|
|
42
|
+
<promise>COMPLETE</promise>
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# Task 02: Accumulate Usage in Retry Loop
|
|
2
|
+
|
|
3
|
+
## Summary
|
|
4
|
+
|
|
5
|
+
Modified the retry loop in `do.ts` to collect usage data from every attempt instead of overwriting it, and pass the full array to TokenTracker for accurate token tracking across retries.
|
|
6
|
+
|
|
7
|
+
## Changes Made
|
|
8
|
+
|
|
9
|
+
### src/commands/do.ts
|
|
10
|
+
- Replaced `let lastUsageData: UsageData | undefined` with `const attemptUsageData: UsageData[] = []`
|
|
11
|
+
- Changed from overwriting `lastUsageData = result.usageData` to `attemptUsageData.push(result.usageData)` when usage data is present
|
|
12
|
+
- Updated success path (lines ~1091-1095): now checks `attemptUsageData.length > 0` and passes the full array to `tokenTracker.addTask()`
|
|
13
|
+
- Updated failure path (lines ~1118-1122): same change, passes full array for partial data tracking
|
|
14
|
+
- Removed TODO comments that were added in Task 01 as placeholders
|
|
15
|
+
|
|
16
|
+
## Acceptance Criteria Verification
|
|
17
|
+
|
|
18
|
+
- [x] Usage data from all retry attempts is collected in an array
|
|
19
|
+
- [x] The full array is passed to `tokenTracker.addTask()`
|
|
20
|
+
- [x] Attempts with no usage data (timeout/crash) are excluded from the array (only push when `result.usageData` is defined)
|
|
21
|
+
- [x] Single-attempt tasks still work correctly (array of length 1)
|
|
22
|
+
- [x] All tests pass (token-tracker: 27 tests, do-*: 44 tests)
|
|
23
|
+
|
|
24
|
+
## Notes
|
|
25
|
+
|
|
26
|
+
- The `lastOutput` variable remains unchanged as designed - only final output matters for result parsing
|
|
27
|
+
- The existing tests from Task 01 already cover the accumulation logic in `TokenTracker` and `accumulateUsage()`
|
|
28
|
+
- The change is minimal and surgical - only the usage data collection mechanism was updated
|
|
29
|
+
- Edge cases (timeouts, crashes, context overflow) correctly result in no usage data being pushed for that attempt
|
|
30
|
+
|
|
31
|
+
<promise>COMPLETE</promise>
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
# Task 03: Per-Attempt Display Formatting
|
|
2
|
+
|
|
3
|
+
## Summary
|
|
4
|
+
|
|
5
|
+
Updated `formatTaskTokenSummary()` to display a per-attempt breakdown when a task took multiple attempts, while keeping single-attempt output unchanged.
|
|
6
|
+
|
|
7
|
+
## Changes Made
|
|
8
|
+
|
|
9
|
+
### src/utils/terminal-symbols.ts
|
|
10
|
+
- Added import for `TaskUsageEntry` type from token-tracker
|
|
11
|
+
- Created internal `formatTokenLine()` helper function that formats a single line of token usage (used for both attempts and totals)
|
|
12
|
+
- Updated `formatTaskTokenSummary()` signature to accept:
|
|
13
|
+
- `entry: TaskUsageEntry` (replaces separate `usage` and `cost` parameters)
|
|
14
|
+
- `calculateAttemptCost?: (usage: UsageData) => CostBreakdown` (optional callback for per-attempt cost calculation)
|
|
15
|
+
- Single-attempt behavior: When `entry.attempts.length <= 1`, output is identical to previous format: `" Tokens: X in / Y out | Cache: ... | Est. cost: $X.XX"`
|
|
16
|
+
- Multi-attempt behavior: Shows per-attempt breakdown with:
|
|
17
|
+
- Each attempt on its own line: `" Attempt N: X in / Y out | Cache: ... | Est. cost: $X.XX"`
|
|
18
|
+
- Total line at the end: `" Total: X in / Y out | Cache: ... | Est. cost: $X.XX"`
|
|
19
|
+
|
|
20
|
+
### src/commands/do.ts
|
|
21
|
+
- Updated both call sites (success and failure paths) to pass the full `TaskUsageEntry` and the `calculateCost` callback:
|
|
22
|
+
- `logger.dim(formatTaskTokenSummary(entry, (u) => tokenTracker.calculateCost(u)))`
|
|
23
|
+
|
|
24
|
+
### tests/unit/terminal-symbols.test.ts
|
|
25
|
+
- Added import for `TaskUsageEntry` type
|
|
26
|
+
- Created `makeEntry()` helper to construct `TaskUsageEntry` objects for testing
|
|
27
|
+
- Reorganized `formatTaskTokenSummary` tests into two describe blocks:
|
|
28
|
+
- `single-attempt tasks`: 6 tests verifying unchanged behavior for single-attempt scenarios
|
|
29
|
+
- `multi-attempt tasks`: 4 tests covering multi-attempt formatting, cost calculation, cache tokens, and 3+ attempts
|
|
30
|
+
|
|
31
|
+
## Example Output
|
|
32
|
+
|
|
33
|
+
**Single-attempt (unchanged):**
|
|
34
|
+
```
|
|
35
|
+
Tokens: 5,234 in / 1,023 out | Cache: 18,500 read | Est. cost: $0.42
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
**Multi-attempt (new):**
|
|
39
|
+
```
|
|
40
|
+
Attempt 1: 1,234 in / 567 out | Est. cost: $0.02
|
|
41
|
+
Attempt 2: 2,345 in / 890 out | Est. cost: $0.04
|
|
42
|
+
Total: 3,579 in / 1,457 out | Est. cost: $0.06
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
## Acceptance Criteria Verification
|
|
46
|
+
|
|
47
|
+
- [x] Single-attempt tasks display identically to current format
|
|
48
|
+
- [x] Multi-attempt tasks show per-attempt lines plus a total
|
|
49
|
+
- [x] Formatting is clean and readable in terminal output
|
|
50
|
+
- [x] `formatTokenTotalSummary()` is unchanged
|
|
51
|
+
- [x] All call sites updated
|
|
52
|
+
- [x] All tests pass (135 tests including 10 new tests for this feature)
|
|
53
|
+
|
|
54
|
+
## Notes
|
|
55
|
+
|
|
56
|
+
- The `calculateAttemptCost` callback is optional; when not provided, per-attempt costs show `$0.00` (the total still shows accurate accumulated cost)
|
|
57
|
+
- Per-attempt lines use 4-space indent to visually nest under the task, while single-attempt uses 2-space indent
|
|
58
|
+
- Cache tokens are included in per-attempt breakdowns when present
|
|
59
|
+
|
|
60
|
+
<promise>COMPLETE</promise>
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
# Task 04: Add Model Name to Claude Invocation Logs
|
|
2
|
+
|
|
3
|
+
## Summary
|
|
4
|
+
|
|
5
|
+
Added a `getModelShortName()` utility function and updated all four Claude invocation log messages to display the short model alias (e.g., "sonnet", "haiku", "opus").
|
|
6
|
+
|
|
7
|
+
## Changes Made
|
|
8
|
+
|
|
9
|
+
### src/utils/config.ts
|
|
10
|
+
- Added `getModelShortName(modelId: string)` utility function that:
|
|
11
|
+
- Returns short aliases (`opus`, `sonnet`, `haiku`) as-is
|
|
12
|
+
- Extracts family from full model IDs (e.g., `claude-sonnet-4-5-20250929` → `sonnet`)
|
|
13
|
+
- Returns unknown model IDs as-is for graceful fallback
|
|
14
|
+
|
|
15
|
+
### src/commands/plan.ts
|
|
16
|
+
- Added import for `getModel` and `getModelShortName`
|
|
17
|
+
- Updated name generation log: `"Generating project name suggestions with ${nameModel}..."`
|
|
18
|
+
|
|
19
|
+
### src/commands/do.ts
|
|
20
|
+
- Added import for `getModel` and `getModelShortName`
|
|
21
|
+
- Updated failure analysis log: `"Analyzing failure with ${analysisModel}..."`
|
|
22
|
+
|
|
23
|
+
### src/core/pull-request.ts
|
|
24
|
+
- Added import for `getModelShortName`
|
|
25
|
+
- Added new log message in `generatePrBody()`: `"Generating PR with ${prModel}..."`
|
|
26
|
+
|
|
27
|
+
### src/commands/config.ts
|
|
28
|
+
- Added import for `getModelShortName`
|
|
29
|
+
- Consolidated two log lines into one: `"Starting config session with ${configModel}..."`
|
|
30
|
+
- Previously: "Starting config session with Claude..." + "Using model: ${model}"
|
|
31
|
+
- Now: single line with short model name
|
|
32
|
+
|
|
33
|
+
### tests/unit/config.test.ts
|
|
34
|
+
- Added import for `getModelShortName`
|
|
35
|
+
- Added test suite with 3 test cases:
|
|
36
|
+
- `should return short aliases as-is`
|
|
37
|
+
- `should extract family from full model IDs`
|
|
38
|
+
- `should return unknown model IDs as-is`
|
|
39
|
+
|
|
40
|
+
## Acceptance Criteria Verification
|
|
41
|
+
|
|
42
|
+
- [x] All four Claude invocation points show the model short name in their log messages
|
|
43
|
+
- Name generation: `"Generating project name suggestions with sonnet..."`
|
|
44
|
+
- Failure analysis: `"Analyzing failure with haiku..."`
|
|
45
|
+
- PR generation: `"Generating PR with sonnet..."`
|
|
46
|
+
- Config session: `"Starting config session with sonnet..."`
|
|
47
|
+
- [x] Short name extraction works for full model IDs and already-short names
|
|
48
|
+
- [x] Log format follows the "...with <model>..." pattern
|
|
49
|
+
- [x] Unit tests cover the short name utility (3 tests)
|
|
50
|
+
- [x] All tests pass (95 config tests, 1156 total passing)
|
|
51
|
+
|
|
52
|
+
## Notes
|
|
53
|
+
|
|
54
|
+
- Pre-existing test failures in `validation.test.ts` and `claude-runner-interactive.test.ts` are unrelated to this task
|
|
55
|
+
- The `getModelShortName()` function reuses logic similar to `resolveModelPricingCategory()` but returns the original string for unknown models instead of `null`
|
|
56
|
+
|
|
57
|
+
<promise>COMPLETE</promise>
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# Task 05: Handle Invalid Config Gracefully in raf config Command
|
|
2
|
+
|
|
3
|
+
## Summary
|
|
4
|
+
|
|
5
|
+
Made `raf config` resilient to invalid or corrupt config files so it can serve as the recovery path for broken configurations. Previously, if `~/.raf/raf.config.json` contained invalid JSON or failed schema validation, `raf config` would crash before the interactive session could launch, blocking users from fixing the issue.
|
|
6
|
+
|
|
7
|
+
## Changes Made
|
|
8
|
+
|
|
9
|
+
### src/commands/config.ts
|
|
10
|
+
- Added import for `resetConfigCache` from config utilities
|
|
11
|
+
- Added import for `DEFAULT_CONFIG` from types/config
|
|
12
|
+
- Wrapped `getModel('config')` and `getEffort('config')` calls in try-catch block
|
|
13
|
+
- On error, falls back to `DEFAULT_CONFIG.models.config` ('sonnet') and `DEFAULT_CONFIG.effort.config` ('medium')
|
|
14
|
+
- Displays warning message with the specific error: "Config file has errors, using defaults: {message}"
|
|
15
|
+
- Provides guidance: "Fix the config in this session or run `raf config --reset` to start fresh."
|
|
16
|
+
- Calls `resetConfigCache()` to clear any broken cached config
|
|
17
|
+
- The interactive Claude session still receives the broken config file contents via `getCurrentConfigState()`, so the user can see and fix the issue
|
|
18
|
+
|
|
19
|
+
### tests/unit/config-command.test.ts
|
|
20
|
+
- Added imports for `resolveConfig`, `getModel`, `getEffort`, `resetConfigCache`, and `DEFAULT_CONFIG`
|
|
21
|
+
- Added `resetConfigCache()` calls to beforeEach/afterEach for test isolation
|
|
22
|
+
- Added new test suite "Error recovery - invalid config fallback" with 6 tests:
|
|
23
|
+
- Throws on invalid JSON when resolving config
|
|
24
|
+
- Throws on schema validation failure when resolving config
|
|
25
|
+
- Default fallback values are correct for config scenario
|
|
26
|
+
- Raw file contents readable even with invalid JSON
|
|
27
|
+
- Raw file contents readable even with schema validation failure
|
|
28
|
+
- resetConfigCache clears the cached config
|
|
29
|
+
|
|
30
|
+
## Acceptance Criteria Verification
|
|
31
|
+
|
|
32
|
+
- [x] `raf config` launches successfully even when `~/.raf/raf.config.json` is invalid JSON
|
|
33
|
+
- [x] `raf config` launches successfully even when config fails schema validation
|
|
34
|
+
- [x] A clear warning is displayed to the user about the config error
|
|
35
|
+
- [x] The interactive session uses default model/effort values as fallback
|
|
36
|
+
- [x] The broken config content is still visible in the session for the user to fix
|
|
37
|
+
- [x] Other commands (`raf plan`, `raf do`, `raf status`) still fail fast on invalid config
|
|
38
|
+
- [x] All tests pass (121 config-related tests, 1162 total passing; pre-existing failures in validation.test.ts and claude-runner-interactive.test.ts are unrelated)
|
|
39
|
+
|
|
40
|
+
## Notes
|
|
41
|
+
|
|
42
|
+
- The error handling is specific to `raf config` - other commands continue to fail fast on invalid config as expected
|
|
43
|
+
- The `getCurrentConfigState()` function reads raw file content without JSON parsing, so broken content is always available to Claude in the session
|
|
44
|
+
- The `--reset` option doesn't need this fix since it deletes the file without loading it
|
|
45
|
+
|
|
46
|
+
<promise>COMPLETE</promise>
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# Task 06: Fix Verbose Toggle Timer Display
|
|
2
|
+
|
|
3
|
+
## Summary
|
|
4
|
+
|
|
5
|
+
Modified the timer callback in `do.ts` to check the verbose toggle state on each tick. When verbose is toggled ON at runtime, the status line is immediately cleared and updates are skipped. When toggled back OFF, the timer resumes displaying with the accurate elapsed time.
|
|
6
|
+
|
|
7
|
+
## Changes Made
|
|
8
|
+
|
|
9
|
+
### src/commands/do.ts
|
|
10
|
+
- Updated the `onTick` callback (lines 915-923) to check `verboseToggle.isVerbose` on every tick
|
|
11
|
+
- When verbose is ON: calls `statusLine.clear()` and returns early (skipping the update)
|
|
12
|
+
- When verbose is OFF: updates the status line as normal with task progress
|
|
13
|
+
- The timer continues tracking elapsed time internally regardless of display state
|
|
14
|
+
|
|
15
|
+
### tests/unit/timer-verbose-integration.test.ts (new file)
|
|
16
|
+
- Created new test file with 5 tests covering the timer-verbose integration:
|
|
17
|
+
- `should update status line when verbose is off`
|
|
18
|
+
- `should clear status line and skip update when verbose is toggled on`
|
|
19
|
+
- `should resume updating status line when verbose is toggled back off`
|
|
20
|
+
- `should track elapsed time correctly regardless of verbose state`
|
|
21
|
+
- `should not create timer callback when started with verbose flag`
|
|
22
|
+
|
|
23
|
+
## Acceptance Criteria Verification
|
|
24
|
+
|
|
25
|
+
- [x] Toggling verbose ON clears the status line and stops timer/task-name display
|
|
26
|
+
- [x] Toggling verbose OFF resumes the timer/status line with correct elapsed time
|
|
27
|
+
- [x] No task name prefix appears on verbose output lines (status line cleared immediately)
|
|
28
|
+
- [x] Starting with `--verbose` flag still works as before (no timer callback created)
|
|
29
|
+
- [x] Timer internally tracks elapsed time correctly regardless of display state
|
|
30
|
+
- [x] All existing tests pass (1167 passing; 3 pre-existing failures in validation.test.ts and claude-runner-interactive.test.ts are unrelated)
|
|
31
|
+
|
|
32
|
+
## Notes
|
|
33
|
+
|
|
34
|
+
- The fix is minimal: just 4 lines added to the existing `onTick` callback
|
|
35
|
+
- The `statusLine.clear()` call happens on every tick while verbose is on, which is safe because the clear operation is idempotent
|
|
36
|
+
- The next tick after toggling verbose OFF will immediately show the correct elapsed time since the timer tracks time independently
|
|
37
|
+
|
|
38
|
+
<promise>COMPLETE</promise>
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# Task: Extend TokenTracker to store per-attempt usage data
|
|
2
|
+
|
|
3
|
+
## Objective
|
|
4
|
+
Refactor TokenTracker to accept and store an array of per-attempt UsageData entries per task, instead of a single UsageData.
|
|
5
|
+
|
|
6
|
+
## Context
|
|
7
|
+
Currently TokenTracker stores one `UsageData` per task via `addTask(taskId, usage)`. When a task retries, only the last attempt's data reaches the tracker. To fix underreporting, the tracker needs to accept multiple attempt entries per task and compute totals from all of them.
|
|
8
|
+
|
|
9
|
+
## Requirements
|
|
10
|
+
- Change `TaskUsageEntry` to hold an array of attempt `UsageData` entries alongside the aggregated totals
|
|
11
|
+
- Update `addTask()` to accept an array of `UsageData` (one per attempt) instead of a single `UsageData`
|
|
12
|
+
- The per-entry `usage` field should be the sum of all attempts (for backward compatibility with `getTotals()`)
|
|
13
|
+
- The per-entry `cost` field should be the sum of all attempts' costs
|
|
14
|
+
- Store the raw per-attempt data so formatting functions can display breakdowns
|
|
15
|
+
- `getTotals()` should continue to work correctly — it already sums across entries, so as long as each entry's `usage` is the accumulated total, no changes needed there
|
|
16
|
+
- Add a helper method or utility to merge/accumulate multiple `UsageData` objects into one
|
|
17
|
+
- Maintain backward compatibility: if only one attempt occurred, behavior is identical to today
|
|
18
|
+
- Cover changes with unit tests
|
|
19
|
+
|
|
20
|
+
## Implementation Steps
|
|
21
|
+
1. Add an `attempts` field to `TaskUsageEntry` that stores the array of individual `UsageData` objects
|
|
22
|
+
2. Create an `accumulateUsage()` utility that merges multiple `UsageData` into a single combined `UsageData` (summing all token fields and merging `modelUsage` maps)
|
|
23
|
+
3. Update `addTask()` signature to accept `UsageData[]` — it calls `accumulateUsage()` to compute the combined `usage` and `calculateCost()` on the combined result
|
|
24
|
+
4. Update existing tests and add new tests for multi-attempt accumulation
|
|
25
|
+
|
|
26
|
+
## Acceptance Criteria
|
|
27
|
+
- [ ] `TaskUsageEntry` has an `attempts: UsageData[]` field
|
|
28
|
+
- [ ] `addTask()` accepts an array and correctly accumulates tokens across attempts
|
|
29
|
+
- [ ] `accumulateUsage()` correctly sums all token fields including per-model breakdowns
|
|
30
|
+
- [ ] `getTotals()` returns correct grand totals when tasks have multiple attempts
|
|
31
|
+
- [ ] Single-attempt tasks behave identically to before
|
|
32
|
+
- [ ] All existing and new tests pass
|
|
33
|
+
|
|
34
|
+
## Notes
|
|
35
|
+
- The `accumulateUsage()` helper should handle merging `modelUsage` maps where different attempts may use different models (e.g., attempt 1 uses Opus, retry uses Sonnet via fallback)
|
|
36
|
+
- Keep `calculateCost()` unchanged — it operates on a single `UsageData` which is the accumulated total
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# Task: Accumulate usage data across retry attempts in the retry loop
|
|
2
|
+
|
|
3
|
+
## Objective
|
|
4
|
+
Change the retry loop in `do.ts` to collect usage data from every attempt instead of overwriting it, and pass the full array to TokenTracker.
|
|
5
|
+
|
|
6
|
+
## Context
|
|
7
|
+
The retry loop in `src/commands/do.ts` (around line 908-1021) currently declares a single `lastUsageData` variable that gets overwritten on each retry attempt. After the loop, only the final attempt's data is passed to `tokenTracker.addTask()`. This must change to collect all attempts' data.
|
|
8
|
+
|
|
9
|
+
## Dependencies
|
|
10
|
+
01
|
|
11
|
+
|
|
12
|
+
## Requirements
|
|
13
|
+
- Replace the single `lastUsageData` variable with an array that collects `UsageData` from each attempt
|
|
14
|
+
- Push each attempt's `usageData` into the array (when present) instead of overwriting
|
|
15
|
+
- After the retry loop, pass the full array to `tokenTracker.addTask()` (using the new signature from task 01)
|
|
16
|
+
- Both success and failure paths (lines ~1090 and ~1117) should pass the array
|
|
17
|
+
- Handle edge case: some attempts may not produce `usageData` (timeout, crash) — skip those entries
|
|
18
|
+
- Cover changes with tests
|
|
19
|
+
|
|
20
|
+
## Implementation Steps
|
|
21
|
+
1. Replace `let lastUsageData: UsageData | undefined` with `const attemptUsageData: UsageData[] = []`
|
|
22
|
+
2. Inside the retry loop, change the overwrite (`lastUsageData = result.usageData`) to a push (`attemptUsageData.push(result.usageData)`) when `result.usageData` is defined
|
|
23
|
+
3. Update the success path: call `tokenTracker.addTask(task.id, attemptUsageData)` when the array is non-empty
|
|
24
|
+
4. Update the failure path: same change
|
|
25
|
+
5. Add/update tests to verify accumulation across retries
|
|
26
|
+
|
|
27
|
+
## Acceptance Criteria
|
|
28
|
+
- [ ] Usage data from all retry attempts is collected in an array
|
|
29
|
+
- [ ] The full array is passed to `tokenTracker.addTask()`
|
|
30
|
+
- [ ] Attempts with no usage data (timeout/crash) are excluded from the array
|
|
31
|
+
- [ ] Single-attempt tasks still work correctly (array of length 1)
|
|
32
|
+
- [ ] All tests pass
|
|
33
|
+
|
|
34
|
+
## Notes
|
|
35
|
+
- The variable `lastOutput` should remain as-is (overwritten each attempt) since only the final output matters for result parsing
|
|
36
|
+
- Look at the `result.output` fallback path (line 971-974) — the old code had a fallback where `lastUsageData = result.output` which seems like a type issue; clean this up if it's not needed
|