wogiflow 2.4.2 → 2.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (210) hide show
  1. package/.claude/commands/wogi-start.md +124 -0
  2. package/.claude/docs/claude-code-compatibility.md +51 -0
  3. package/.claude/docs/explore-agents.md +11 -0
  4. package/.claude/settings.json +12 -1
  5. package/.workflow/models/registry.json +1 -1
  6. package/bin/flow +11 -1
  7. package/lib/workspace-contracts.js +599 -0
  8. package/lib/workspace-intelligence.js +600 -0
  9. package/lib/workspace-messages.js +441 -0
  10. package/lib/workspace-routing.js +485 -0
  11. package/lib/workspace-sync.js +339 -0
  12. package/lib/workspace.js +1073 -0
  13. package/package.json +4 -4
  14. package/scripts/MEMORY-ARCHITECTURE.md +1 -1
  15. package/scripts/base-workflow-step.js +136 -0
  16. package/scripts/flow-adaptive-learning.js +8 -9
  17. package/scripts/flow-aggregate.js +11 -6
  18. package/scripts/flow-api-index.js +4 -6
  19. package/scripts/flow-assumption-detector.js +0 -2
  20. package/scripts/flow-audit.js +15 -2
  21. package/scripts/flow-auto-context.js +8 -12
  22. package/scripts/flow-auto-learn.js +49 -49
  23. package/scripts/flow-background.js +5 -6
  24. package/scripts/flow-bridge-state.js +8 -10
  25. package/scripts/flow-bulk-loop.js +1 -3
  26. package/scripts/flow-bulk-orchestrator.js +1 -3
  27. package/scripts/flow-cascade-completion.js +0 -2
  28. package/scripts/flow-cascade.js +4 -4
  29. package/scripts/flow-checkpoint.js +10 -13
  30. package/scripts/flow-code-intelligence.js +10 -12
  31. package/scripts/flow-community-sync.js +4 -4
  32. package/scripts/flow-community.js +12 -20
  33. package/scripts/flow-config-defaults.js +28 -2
  34. package/scripts/flow-config-interactive.js +9 -5
  35. package/scripts/flow-config-loader.js +49 -92
  36. package/scripts/flow-config-substitution.js +0 -2
  37. package/scripts/flow-context-estimator.js +4 -4
  38. package/scripts/flow-context-init.js +10 -12
  39. package/scripts/flow-context-manager.js +0 -2
  40. package/scripts/flow-context-scoring.js +2 -2
  41. package/scripts/flow-contract-scan.js +6 -9
  42. package/scripts/flow-correct.js +29 -27
  43. package/scripts/flow-correction-detector.js +5 -1
  44. package/scripts/flow-damage-control.js +47 -54
  45. package/scripts/flow-decisions-merge.js +4 -14
  46. package/scripts/flow-diff.js +5 -8
  47. package/scripts/flow-done-gates.js +786 -0
  48. package/scripts/flow-done-report.js +123 -0
  49. package/scripts/flow-done.js +71 -717
  50. package/scripts/flow-entropy-monitor.js +1 -3
  51. package/scripts/flow-eval-calibration.js +257 -0
  52. package/scripts/flow-eval-judge.js +10 -1
  53. package/scripts/flow-eval.js +14 -5
  54. package/scripts/flow-extraction-review.js +1 -0
  55. package/scripts/flow-failure-categories.js +0 -2
  56. package/scripts/flow-figma-confirm.js +5 -9
  57. package/scripts/flow-figma-generate.js +8 -10
  58. package/scripts/flow-figma-index.js +8 -10
  59. package/scripts/flow-figma-match.js +3 -5
  60. package/scripts/flow-figma-mcp-server.js +2 -4
  61. package/scripts/flow-figma-orchestrator.js +2 -3
  62. package/scripts/flow-figma-registry.js +2 -3
  63. package/scripts/flow-framework-resolver.js +0 -2
  64. package/scripts/flow-function-index.js +4 -6
  65. package/scripts/flow-gate-confidence.js +2 -2
  66. package/scripts/flow-gitignore.js +0 -2
  67. package/scripts/flow-guided-edit.js +5 -6
  68. package/scripts/flow-health.js +5 -6
  69. package/scripts/flow-hook-errors.js +6 -0
  70. package/scripts/flow-hook-status.js +263 -0
  71. package/scripts/flow-hooks.js +17 -29
  72. package/scripts/flow-http-client.js +9 -8
  73. package/scripts/flow-hybrid-interactive.js +7 -12
  74. package/scripts/flow-hybrid-test.js +12 -13
  75. package/scripts/flow-instruction-richness.js +1 -1
  76. package/scripts/flow-io.js +21 -4
  77. package/scripts/flow-knowledge-router.js +9 -3
  78. package/scripts/flow-learning-orchestrator.js +318 -13
  79. package/scripts/flow-links.js +5 -7
  80. package/scripts/flow-long-input-association.js +275 -0
  81. package/scripts/flow-long-input-chunking.js +1 -0
  82. package/scripts/flow-long-input-cli.js +0 -2
  83. package/scripts/flow-long-input-complexity.js +0 -2
  84. package/scripts/flow-long-input-constants.js +0 -2
  85. package/scripts/flow-long-input-contradictions.js +351 -0
  86. package/scripts/flow-long-input-detection.js +0 -2
  87. package/scripts/flow-long-input-passes.js +885 -0
  88. package/scripts/flow-long-input-stories.js +1 -1
  89. package/scripts/flow-long-input-voice.js +0 -2
  90. package/scripts/flow-long-input.js +425 -3005
  91. package/scripts/flow-loop-retry-learning.js +2 -3
  92. package/scripts/flow-lsp.js +3 -3
  93. package/scripts/flow-mcp-docs.js +3 -4
  94. package/scripts/flow-memory-db.js +6 -8
  95. package/scripts/flow-memory-sync.js +18 -11
  96. package/scripts/flow-metrics.js +1 -2
  97. package/scripts/flow-model-adapter.js +2 -3
  98. package/scripts/flow-model-config.js +72 -104
  99. package/scripts/flow-model-router.js +2 -2
  100. package/scripts/flow-model-types.js +0 -2
  101. package/scripts/flow-multi-approach.js +5 -6
  102. package/scripts/flow-orchestrate-context.js +3 -7
  103. package/scripts/flow-orchestrate-rollback.js +3 -8
  104. package/scripts/flow-orchestrate-state.js +8 -14
  105. package/scripts/flow-orchestrate-templates.js +2 -6
  106. package/scripts/flow-orchestrate-validator.js +5 -9
  107. package/scripts/flow-orchestrate.js +126 -103
  108. package/scripts/flow-output.js +0 -2
  109. package/scripts/flow-parallel.js +1 -1
  110. package/scripts/flow-paths.js +23 -2
  111. package/scripts/flow-pattern-enforcer.js +30 -28
  112. package/scripts/flow-pattern-extractor.js +3 -4
  113. package/scripts/flow-pending.js +0 -2
  114. package/scripts/flow-permissions.js +2 -3
  115. package/scripts/flow-plugin-registry.js +10 -12
  116. package/scripts/flow-prd-manager.js +1 -1
  117. package/scripts/flow-progress.js +7 -9
  118. package/scripts/flow-prompt-composer.js +3 -3
  119. package/scripts/flow-prompt-template.js +2 -2
  120. package/scripts/flow-providers.js +7 -4
  121. package/scripts/flow-registry-manager.js +7 -12
  122. package/scripts/flow-regression.js +9 -11
  123. package/scripts/flow-roadmap.js +2 -2
  124. package/scripts/flow-run-trace.js +16 -15
  125. package/scripts/flow-safety.js +2 -5
  126. package/scripts/flow-scanner-base.js +5 -7
  127. package/scripts/flow-scenario-engine.js +1 -5
  128. package/scripts/flow-security.js +29 -0
  129. package/scripts/flow-session-end.js +32 -41
  130. package/scripts/flow-session-learning.js +53 -49
  131. package/scripts/flow-setup-hooks.js +2 -3
  132. package/scripts/flow-skill-create.js +7 -12
  133. package/scripts/flow-skill-generator.js +12 -16
  134. package/scripts/flow-skill-learn.js +17 -8
  135. package/scripts/flow-skill-matcher.js +1 -2
  136. package/scripts/flow-spec-generator.js +2 -4
  137. package/scripts/flow-stack-wizard.js +5 -7
  138. package/scripts/flow-standards-learner.js +35 -16
  139. package/scripts/flow-start.js +2 -0
  140. package/scripts/flow-stats-collector.js +2 -2
  141. package/scripts/flow-status.js +10 -10
  142. package/scripts/flow-statusline-setup.js +2 -2
  143. package/scripts/flow-step-changelog.js +2 -3
  144. package/scripts/flow-step-comments.js +66 -81
  145. package/scripts/flow-step-complexity.js +50 -70
  146. package/scripts/flow-step-coverage.js +3 -5
  147. package/scripts/flow-step-knowledge.js +2 -3
  148. package/scripts/flow-step-pr-tests.js +64 -74
  149. package/scripts/flow-step-regression.js +3 -5
  150. package/scripts/flow-step-review.js +86 -103
  151. package/scripts/flow-step-security.js +111 -121
  152. package/scripts/flow-step-silent-failures.js +56 -83
  153. package/scripts/flow-step-simplifier.js +52 -70
  154. package/scripts/flow-story.js +4 -7
  155. package/scripts/flow-strict-adherence.js +3 -4
  156. package/scripts/flow-task-checkpoint.js +36 -5
  157. package/scripts/flow-task-enforcer.js +2 -24
  158. package/scripts/flow-tech-debt.js +1 -1
  159. package/scripts/flow-template-extractor.js +1 -0
  160. package/scripts/flow-templates.js +11 -13
  161. package/scripts/flow-test-api.js +9 -13
  162. package/scripts/flow-test-discovery.js +1 -1
  163. package/scripts/flow-test-generate.js +5 -9
  164. package/scripts/flow-test-integrity.js +3 -7
  165. package/scripts/flow-test-ui.js +5 -9
  166. package/scripts/flow-testing-deps.js +1 -3
  167. package/scripts/flow-tiered-learning.js +4 -4
  168. package/scripts/flow-todowrite-sync.js +1 -1
  169. package/scripts/flow-tokens.js +0 -2
  170. package/scripts/flow-verification-profile.js +6 -10
  171. package/scripts/flow-verify.js +12 -16
  172. package/scripts/flow-version-check.js +4 -12
  173. package/scripts/flow-webmcp-generator.js +3 -5
  174. package/scripts/flow-workflow-steps.js +0 -2
  175. package/scripts/flow-workflow.js +9 -11
  176. package/scripts/hooks/adapters/claude-code.js +31 -0
  177. package/scripts/hooks/core/config-change.js +1 -0
  178. package/scripts/hooks/core/extension-registry.js +0 -2
  179. package/scripts/hooks/core/instructions-loaded.js +1 -1
  180. package/scripts/hooks/core/observation-capture.js +5 -5
  181. package/scripts/hooks/core/phase-gate.js +5 -0
  182. package/scripts/hooks/core/post-compact.js +1 -12
  183. package/scripts/hooks/core/research-gate.js +2 -12
  184. package/scripts/hooks/core/routing-gate.js +6 -0
  185. package/scripts/hooks/core/task-completed.js +12 -0
  186. package/scripts/hooks/core/task-created.js +83 -0
  187. package/scripts/hooks/core/worktree-lifecycle.js +1 -1
  188. package/scripts/hooks/entry/claude-code/config-change.js +6 -29
  189. package/scripts/hooks/entry/claude-code/instructions-loaded.js +5 -30
  190. package/scripts/hooks/entry/claude-code/post-compact.js +4 -31
  191. package/scripts/hooks/entry/claude-code/post-tool-use.js +121 -172
  192. package/scripts/hooks/entry/claude-code/pre-tool-use.js +260 -361
  193. package/scripts/hooks/entry/claude-code/session-end.js +4 -28
  194. package/scripts/hooks/entry/claude-code/session-start.js +205 -243
  195. package/scripts/hooks/entry/claude-code/setup.js +8 -49
  196. package/scripts/hooks/entry/claude-code/stop.js +40 -72
  197. package/scripts/hooks/entry/claude-code/task-completed.js +4 -28
  198. package/scripts/hooks/entry/claude-code/task-created.js +15 -0
  199. package/scripts/hooks/entry/claude-code/user-prompt-submit.js +113 -195
  200. package/scripts/hooks/entry/claude-code/worktree-create.js +6 -25
  201. package/scripts/hooks/entry/claude-code/worktree-remove.js +6 -25
  202. package/scripts/hooks/entry/shared/hook-runner.js +99 -0
  203. package/scripts/hooks/entry/shared/read-stdin.js +0 -2
  204. package/scripts/postinstall.js +2 -0
  205. package/scripts/registries/api-registry.js +0 -2
  206. package/scripts/registries/component-registry.js +5 -9
  207. package/scripts/registries/contract-scanner.js +2 -9
  208. package/scripts/registries/function-registry.js +0 -2
  209. package/scripts/registries/schema-registry.js +14 -18
  210. package/scripts/registries/service-registry.js +23 -27
@@ -305,6 +305,45 @@ Test framework auto-detected from package.json: jest, vitest, mocha, tap, or fal
305
305
  4. If failing: debug, fix, retry (max 5 attempts)
306
306
  5. Mark completed only when verification passes
307
307
 
308
+ ### Step 3.05: Sprint-Based Context Reset (L1+ tasks with 5+ criteria)
309
+
310
+ **Activates when**: `config.sprintReset.enabled` (default: true) AND task has 5+ acceptance criteria AND current criterion index is a multiple of `config.sprintReset.criteriaPerSprint` (default: 3).
311
+
312
+ **The problem this solves**: For large tasks, context fills with implementation details from early criteria. By criterion 6+, the AI is working with degraded context — old diffs, stale tool results, and exploration artifacts crowd out what matters for the current criterion. The Anthropic harness design research found that full context resets with structured file-based handoffs produce higher quality output than continuous context for long-running tasks.
313
+
314
+ **Procedure** (runs automatically at sprint boundaries):
315
+
316
+ 1. After completing criterion N (where N % `criteriaPerSprint` === 0 AND remaining criteria > 0):
317
+ 2. **Commit progress**: `git add -A && git commit -m "sprint: criteria 1-N of M complete"`
318
+ 3. **Save sprint checkpoint** to `.workflow/state/task-checkpoint.json`:
319
+ - Task ID, spec path, completed criteria indices, changed files, remaining criteria
320
+ 4. **Output sprint summary** (visible to user):
321
+ ```
322
+ ━━━ SPRINT BOUNDARY ━━━
323
+ Completed criteria 1-N of M. Committing and resetting context.
324
+ Remaining: criteria (N+1)-M
325
+ ```
326
+ 5. **Compact context** — this triggers a full compaction. The PostCompact hook restores:
327
+ - Active task ID and spec reference
328
+ - Which criteria are done vs pending (from checkpoint)
329
+ - Changed files list
330
+ 6. **Resume from checkpoint** — read the spec fresh, skip completed criteria, continue with criterion N+1
331
+
332
+ **Why this is different from normal compaction**: Normal compaction summarizes the conversation. Sprint reset goes further — it commits work, saves a structured checkpoint, and compacts. The next sprint starts with a clean slate + the checkpoint file, not a compressed summary of everything that happened. The AI reads the spec fresh rather than relying on a summarized memory of it.
333
+
334
+ **Configuration**:
335
+ ```json
336
+ {
337
+ "sprintReset": {
338
+ "enabled": true,
339
+ "criteriaPerSprint": 3,
340
+ "minTaskCriteria": 5
341
+ }
342
+ }
343
+ ```
344
+
345
+ **Skip when**: Task has < 5 criteria, TDD mode is active (TDD has its own rhythm), or `sprintReset.enabled` is false.
346
+
308
347
  ### Step 3.5: Criteria Completion Verification (MANDATORY)
309
348
 
310
349
  After implementing all scenarios, BEFORE quality gates:
@@ -416,6 +455,91 @@ After implementing all scenarios, BEFORE quality gates:
416
455
 
417
456
  **Skip conditions**: Tasks that target a specific file or a small known set (e.g., "remove the mock import in Dashboard.tsx") don't need the full inventory — they're scoped enough already. The inventory is for "all X" / "every X" / "clean up X everywhere" tasks.
418
457
 
458
+ ### Step 3.56: Skeptical Evaluator Gate (L2+ tasks, when `config.skepticalEvaluator.enabled`)
459
+
460
+ **The problem this solves**: The same agent that wrote the code verifies its own work in Step 3.5. Anthropic's harness design research found that "separating the agent doing the work from the agent judging it proves to be a strong lever" and that "tuning standalone evaluators toward skepticism is far more tractable than making a generator critical of its own work." This is "confident praise bias" — the implementer always thinks it did a good job.
461
+
462
+ **Activates when**: `config.skepticalEvaluator.enabled` (default: true) AND task level is L2 or higher (not L3 trivial tasks).
463
+
464
+ **Procedure**:
465
+
466
+ 1. **Spawn a skeptical evaluator sub-agent** (separate from the implementation agent):
467
+ ```
468
+ Agent({
469
+ subagent_type: "code-reviewer",
470
+ model: "sonnet", // Use a different model for diversity
471
+ prompt: <see below>
472
+ })
473
+ ```
474
+
475
+ 2. **Evaluator prompt** (tuned toward skepticism):
476
+ ```
477
+ You are a SKEPTICAL code evaluator. Your job is to find problems, not praise.
478
+ Assume the implementation has gaps until proven otherwise.
479
+
480
+ ## Task Specification
481
+ <read and paste the spec from .workflow/specs/wf-XXXXXXXX.md>
482
+
483
+ ## Implementation Diff
484
+ <git diff of all changed files>
485
+
486
+ ## Your Job
487
+
488
+ For EACH acceptance criterion in the spec:
489
+ 1. Read the criterion carefully
490
+ 2. Find the EXACT code that implements it (cite file:line)
491
+ 3. Grade: PASS (fully works), PARTIAL (code exists but incomplete), FAIL (not implemented)
492
+ 4. If PARTIAL or FAIL: explain exactly what's missing
493
+
494
+ IMPORTANT: "Code exists" is NOT the same as "criterion is met."
495
+ A service that exists but is never called = FAIL.
496
+ A component that renders but doesn't handle the specified edge case = PARTIAL.
497
+ Only grade PASS when the criterion is FULLY satisfied end-to-end.
498
+
499
+ ## Output Format
500
+ Return JSON:
501
+ {
502
+ "criteria": [
503
+ { "criterion": "...", "grade": "PASS|PARTIAL|FAIL", "evidence": "file:line", "issue": "..." }
504
+ ],
505
+ "overallPass": true/false,
506
+ "criticalIssues": ["..."]
507
+ }
508
+ ```
509
+
510
+ 3. **Process evaluator results**:
511
+ - If `overallPass: true` → proceed to Step 3.6
512
+ - If `overallPass: false` → **iteration loop** (see below)
513
+
514
+ 4. **Generator-Evaluator Iteration Loop** (when evaluator finds issues):
515
+ - Feed the evaluator's `criticalIssues` and failed criteria back to the implementation context
516
+ - Fix the identified issues (targeted fixes, not re-implementation)
517
+ - Re-run the evaluator on the updated diff
518
+ - **Max iterations**: `config.skepticalEvaluator.maxIterations` (default: 3)
519
+ - If still failing after max iterations → proceed to Step 3.6 anyway but **flag the unresolved issues** in the completion report
520
+
521
+ 5. **Calibration** (when `config.skepticalEvaluator.calibration` is true):
522
+ - Before spawning the evaluator, check `.workflow/state/eval-calibration.json` for calibration examples
523
+ - If examples exist, inject 2-3 into the evaluator prompt as few-shot examples:
524
+ - One high-scoring example (what a PASS looks like)
525
+ - One low-scoring example (what a FAIL looks like)
526
+ - This prevents score drift — the evaluator is anchored to concrete examples
527
+
528
+ **Configuration**:
529
+ ```json
530
+ {
531
+ "skepticalEvaluator": {
532
+ "enabled": true,
533
+ "maxIterations": 3,
534
+ "model": "sonnet",
535
+ "calibration": true,
536
+ "skipForL3": true
537
+ }
538
+ }
539
+ ```
540
+
541
+ **Why this works**: The evaluator has NO emotional investment in the code. It reads the spec and the diff cold. It's explicitly prompted to be skeptical. And because it's a separate sub-agent, it has a fresh context — no accumulated "I already know this works" bias from the implementation phase.
542
+
419
543
  ### Step 3.6: Integration Wiring Validation (MANDATORY)
420
544
 
421
545
  Run `node node_modules/wogiflow/scripts/flow-wiring-verifier.js wf-XXXXXXXX`
@@ -70,6 +70,8 @@ flow parallel check # See available parallel tasks
70
70
  | 1.9.5+ | 2.1.74+ | SessionEnd timeout fix, managed policy ask rules, autoMemoryDirectory, Agent tool routing gate fix |
71
71
  | 2.0.0+ | 2.1.76+ | PostCompact hook, Elicitation/ElicitationResult events, deferred tool schema fix |
72
72
  | 2.1.0+ | 2.1.77+ | PreToolUse allow/deny separation, 128k output tokens, worktree sparse checkout, compaction circuit breaker |
73
+ | 2.4.0+ | 2.1.83+ | managed-settings.d/, CwdChanged/FileChanged hooks, ENV_SCRUB, --channels limitations, MEMORY.md 25KB cap |
74
+ | 2.5.0+ | 2.1.84+ | TaskCreated hook, YAML glob lists in rules, CLAUDE_STREAM_IDLE_TIMEOUT_MS, WorktreeCreate HTTP transport, idle-return prompt, MCP 2KB cap |
73
75
 
74
76
  ### Environment Variables (2.1.19+)
75
77
 
@@ -162,6 +164,7 @@ await cancelTask('wf-123', 'superseded', false);
162
164
  | Stop | stop.js | Session cleanup |
163
165
  | SessionEnd | session-end.js | Request logging, progress update |
164
166
  | TaskCompleted | task-completed.js | Move task to recentlyCompleted |
167
+ | TaskCreated | task-created.js | Link native tasks to active WogiFlow task (2.1.84+) |
165
168
  | ConfigChange | config-change.js | Re-sync bridge on mid-session config changes |
166
169
  | InstructionsLoaded | instructions-loaded.js | Package check, rule conflicts, auto-onboard |
167
170
  | PostCompact | post-compact.js | Re-inject state after context compaction (2.1.76+) |
@@ -236,6 +239,54 @@ await cancelTask('wf-123', 'superseded', false);
236
239
  - **Memory growth fix**: Fixed progress messages surviving compaction in long-running sessions. Reduces memory pressure during long WogiFlow bulk-loop sessions.
237
240
  - **Faster startup on macOS**: ~60ms faster by reading keychain credentials in parallel. Faster `--resume` on fork-heavy sessions — up to 45% faster loading and ~100-150MB less peak memory. Benefits WogiFlow sessions with heavy hook context.
238
241
 
242
+ ### Features in 2.1.83+
243
+
244
+ - **managed-settings.d/ drop-in directory**: A `managed-settings.d/` directory alongside `managed-settings.json` allows separate teams/tools to deploy independent policy fragments that merge alphabetically. WogiFlow currently generates `settings.local.json` — for wogiflow-cloud teams, this opens the door to deploying team policies as individual fragments (e.g., `00-wogiflow-hooks.json`, `50-team-policy.json`). No code change needed yet; tracked as cloud opportunity.
245
+
246
+ - **CwdChanged and FileChanged hook events**: Two new hook events. `CwdChanged` fires when the working directory changes (useful for direnv-style setups). `FileChanged` fires when watched files change on disk — WogiFlow could use this to detect external changes to `.workflow/state/` files and auto-rescan. Added to `UNUSED_SUPPORTED_EVENTS` in `claude-code.js`. Implementation deferred to a future task.
247
+
248
+ - **CLAUDE_CODE_SUBPROCESS_ENV_SCRUB=1**: Strips Anthropic and cloud provider credentials from subprocess environments (Bash tool, hooks, MCP stdio servers). **Impact on WogiFlow**: Hooks are subprocesses, so any hook that needs API keys (e.g., `flow-correction-detector.js` spawning a child process for async correction detection) will not have credentials available. The correction detector now gracefully degrades (returns `isCorrection: false`) when no API key is available. `flow-providers.js` makes direct `https.request()` calls from the hook process itself (not a subprocess of the subprocess), so it reads `process.env` before scrubbing — but if ENV_SCRUB applies transitively to hook processes, provider calls would also be affected. For wogiflow-cloud: if cloud sync hooks need API keys, they must use an alternative credential mechanism (file-based, keychain, or passed via hook input JSON).
249
+
250
+ - **Agents can declare initialPrompt in frontmatter**: Agents can now auto-submit a first turn without the AI composing it. WogiFlow's 11 persona agents in `agents/` could use this for standardized opening probes. No code change needed; optimization opportunity.
251
+
252
+ - **Background subagent fixes**: (1) Fixed subagents becoming invisible after context compaction — this prevented duplicate agent spawns in WogiFlow's parallel explore phase. (2) Fixed agents staying stuck in "running" state when git/API calls hang during cleanup. Both fixes improve reliability of `/wogi-start` explore phase and `/wogi-bulk-loop`.
253
+
254
+ - **--channels disables AskUserQuestion and plan mode**: When `--channels` is active (remote/SDK), `AskUserQuestion` and plan-mode tools are disabled. **Impact on WogiFlow**: WogiFlow uses `AskUserQuestion` extensively for approval gates, clarifying questions, and interactive decisions. In `--channels` mode, these will silently fail or be unavailable. WogiFlow should detect channels mode and fall back to non-interactive patterns: auto-approve with defaults, skip clarifying questions, use best-effort decisions. Documented in CLAUDE.md template and wogi-start command.
255
+
256
+ - **TaskOutput deprecated**: `TaskOutput` tool is deprecated in favor of using `Read` on the background task's output file path. WogiFlow does not use `TaskOutput` directly (confirmed by codebase search). No change needed.
257
+
258
+ - **MEMORY.md index truncation**: Now truncates at **25KB** as well as 200 lines (previously only 200 lines). WogiFlow's MEMORY.md enforcement block at the top consumes space from this budget. Projects with large MEMORY.md files may lose entries silently. The CLAUDE.md template's auto-memory section already mentions 200 lines; the 25KB limit is enforced by Claude Code's system prompt and does not need to be duplicated in the template.
259
+
260
+ - **Plugin manifest.userConfig**: Plugins can now prompt for configuration at enable time, with `sensitive: true` values stored in keychain (macOS) or protected credentials file. If WogiFlow becomes a Claude Code plugin, this provides native credential storage for cloud API tokens and model API keys — replacing `wogi login`'s file-based token storage. Tracked as cloud opportunity.
261
+
262
+ - **WebFetch identifies as Claude-User**: `WebFetch` now sends a `Claude-User` user agent so site operators can recognize and allowlist/block Claude Code traffic via `robots.txt`. WogiFlow's explore agents (Agent 2: Best Practices, Agent 3: Version Verifier) use `WebFetch` for research. If sites block `Claude-User`, research agents will get empty results. Agents should treat unexpectedly empty WebFetch results as potentially blocked and log a warning.
263
+
264
+ - **--mcp-config bypass fix**: Fixed `--mcp-config` CLI flag bypassing `allowedMcpServers`/`deniedMcpServers` managed policy enforcement. Security improvement — no WogiFlow code change needed.
265
+
266
+ - **Uninstalled plugin hooks fix**: Fixed uninstalled plugin hooks continuing to fire until the next session. Improves hook hygiene for WogiFlow plugin management.
267
+
268
+ ### Features in 2.1.84+
269
+
270
+ - **TaskCreated hook event**: New hook event fired when a task is created via TaskCreate. WogiFlow uses this to link native Claude Code tasks to the active WogiFlow task in `session-state.json`, enabling cross-system task tracking. Implemented in `scripts/hooks/core/task-created.js`.
271
+
272
+ - **YAML glob lists in rules/skills frontmatter**: Rules and skills `globs:` field now accepts YAML lists in addition to single strings. WogiFlow's `flow-rules-sync.js` currently generates single-string globs with brace expansion (`"**/*.{js,ts}"`). This opens the door to cleaner multi-pattern rules without brace expansion hacks. No immediate code change — tracked as improvement.
273
+
274
+ - **CLAUDE_STREAM_IDLE_TIMEOUT_MS**: New env var to configure the streaming idle watchdog threshold (default 90s). WogiFlow's explore phase launches 5-6 parallel agents — if an agent takes >90s without streaming output, the watchdog may kill the connection. Users experiencing timeouts during explore should set this higher (e.g., `CLAUDE_STREAM_IDLE_TIMEOUT_MS=180000` for 3 minutes).
275
+
276
+ - **WorktreeCreate hook HTTP transport**: WorktreeCreate now supports `type: "http"` — return the created worktree path via `hookSpecificOutput.worktreePath`. WogiFlow continues to use command transport locally. HTTP transport enables wogiflow-cloud to receive worktree events server-side for team task tracking. Listed in `UNUSED_SUPPORTED_EVENTS` as a cloud opportunity.
277
+
278
+ - **Idle-return prompt**: Users returning after 75+ minutes are nudged to `/clear`. WogiFlow's PostCompact hook handles `/clear` correctly — it fires on compaction, re-injects state (active task, workflow phase, durable session progress), and re-arms routing. Session restore tested and working via the same PostCompact pathway.
279
+
280
+ - **MCP tool descriptions capped at 2KB**: MCP tool descriptions and server instructions now capped at 2KB to prevent OpenAPI-generated servers from bloating context. WogiFlow's plugin system registers MCP servers — plugins with verbose OpenAPI specs may have descriptions silently truncated. Plugin docs should note this limit.
281
+
282
+ - **System-prompt caching with ToolSearch**: Global system-prompt caching now works when ToolSearch is enabled. WogiFlow sessions use ToolSearch for deferred MCP tools — this reduces input token costs automatically. No code change needed.
283
+
284
+ - **Subagent JSON-schema fix**: Fixed workflow subagents failing with API 400 when the outer session uses `--json-schema` and the subagent also specifies a schema. Improves reliability of WogiFlow explore agents in structured-output sessions.
285
+
286
+ - **allowedChannelPlugins managed setting**: Enterprise admins can define a channel plugin allowlist. Relevant for wogiflow-cloud teams product — team admins could control which wogi plugins are allowed across the team. Tracked as cloud opportunity.
287
+
288
+ - **ANTHROPIC_DEFAULT_{OPUS,SONNET,HAIKU}_MODEL_SUPPORTS**: New env vars to override effort/thinking capability detection for pinned default models on Bedrock/Vertex/Foundry. WogiFlow's hybrid mode routes to different models — 3P users who pin models can now declare their capabilities properly.
289
+
239
290
  ### Simple Mode Naming Distinction
240
291
 
241
292
  Claude Code's `CLAUDE_CODE_SIMPLE` environment variable (which enables a simplified tool set) is **unrelated** to WogiFlow's `loops.simpleMode` (a lightweight task completion loop using string detection). They are separate features that happen to share the word "simple":
@@ -111,12 +111,23 @@ Planned files: [FILES_TO_CHANGE]
111
111
  4. If a memory database exists (.workflow/memory/local.db or via MCP):
112
112
  - Query for rejected approaches from past tasks touching the same files
113
113
  - Surface any "approach X was tried and failed" warnings
114
+ 5. **Eval trend analysis** (NEW — from Anthropic harness design research):
115
+ - Read `.workflow/evals/` directory for the last 5-10 eval results
116
+ - Calculate average score per dimension (completeness, accuracy, workflowCompliance, tokenEfficiency, quality)
117
+ - If any dimension averages below 6/10 across recent evals:
118
+ - Flag it as a RECURRING WEAKNESS
119
+ - Suggest a mitigation for the spec (e.g., "tokenEfficiency averaging 4/10 → add context budgeting hints")
120
+ - If eval calibration exists (`.workflow/state/eval-calibration.json`):
121
+ - Compare the current task type against high/low calibration examples
122
+ - Warn if this task type historically scores low
114
123
 
115
124
  Return:
116
125
  - Known risks for this task type (from feedback-patterns)
117
126
  - Past corrections in this area (from corrections/)
118
127
  - Promoted rules that apply (from decisions.md, count >= 3)
119
128
  - Rejected approaches from similar past work (from memory-db)
129
+ - **Eval trend warnings** (dimensions scoring below 6/10 in recent evals)
130
+ - **Recommended spec hints** (based on eval trends — inject into spec generation)
120
131
  - Confidence: HIGH (many data points) / MEDIUM / LOW (no history)
121
132
  ```
122
133
 
@@ -132,9 +132,20 @@
132
132
  }
133
133
  ]
134
134
  }
135
+ ],
136
+ "TaskCreated": [
137
+ {
138
+ "hooks": [
139
+ {
140
+ "type": "command",
141
+ "command": "node scripts/hooks/entry/claude-code/task-created.js",
142
+ "timeout": 5
143
+ }
144
+ ]
145
+ }
135
146
  ]
136
147
  },
137
148
  "_wogiFlowManaged": true,
138
- "_wogiFlowVersion": "1.9.10",
149
+ "_wogiFlowVersion": "2.4.2",
139
150
  "_comment": "Shared WogiFlow hook configuration. Committed to repo for team use. User-specific overrides go in settings.local.json."
140
151
  }
@@ -100,7 +100,7 @@
100
100
  "displayName": "Claude Sonnet 4.6",
101
101
  "contextWindow": 200000,
102
102
  "contextWindowBeta": 1000000,
103
- "maxOutputTokens": 64000,
103
+ "maxOutputTokens": 128000,
104
104
  "costTier": "standard",
105
105
  "pricing": {
106
106
  "inputPer1kTokens": 0.003,
package/bin/flow CHANGED
@@ -23,7 +23,7 @@ const packageJson = require('../package.json');
23
23
  const VERSION = packageJson.version;
24
24
 
25
25
  // Global commands that don't require a project context
26
- const GLOBAL_COMMANDS = ['init', 'upgrade', 'version', '--version', '-v', '--help', '-h', 'skill', 'channel', 'login', 'logout'];
26
+ const GLOBAL_COMMANDS = ['init', 'upgrade', 'version', '--version', '-v', '--help', '-h', 'skill', 'channel', 'login', 'logout', 'workspace'];
27
27
 
28
28
  /**
29
29
  * Find the project root by looking for .workflow directory
@@ -97,6 +97,7 @@ Usage: flow <command> [options]
97
97
 
98
98
  Global Commands:
99
99
  init Initialize Wogi Flow in a new project
100
+ workspace init Initialize a multi-repo workspace
100
101
  upgrade Upgrade an existing project to latest version
101
102
  login Connect to WogiFlow Teams
102
103
  logout Disconnect from WogiFlow Teams
@@ -208,6 +209,15 @@ function main() {
208
209
  return;
209
210
  }
210
211
 
212
+ if (command === 'workspace') {
213
+ const { workspace } = require('../lib/workspace');
214
+ workspace(args.slice(1)).catch(err => {
215
+ console.error(`Workspace error: ${err.message}`);
216
+ process.exit(1);
217
+ });
218
+ return;
219
+ }
220
+
211
221
  // For all other commands, try to find project context
212
222
  const projectRoot = findProjectRoot();
213
223