groove-dev 0.27.131 → 0.27.134

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. package/AGENT_ORCHESTRATION.md +375 -0
  2. package/moe-training/shared/envelope-schema.js +1 -1
  3. package/node_modules/@groove-dev/cli/package.json +1 -1
  4. package/node_modules/@groove-dev/daemon/package.json +1 -1
  5. package/node_modules/@groove-dev/daemon/src/index.js +3 -1
  6. package/node_modules/@groove-dev/daemon/src/introducer.js +48 -4
  7. package/node_modules/@groove-dev/daemon/src/llama-server.js +4 -4
  8. package/node_modules/@groove-dev/daemon/src/model-lab.js +8 -0
  9. package/node_modules/@groove-dev/daemon/src/preview.js +85 -58
  10. package/node_modules/@groove-dev/daemon/src/process.js +9 -0
  11. package/node_modules/@groove-dev/daemon/src/terminal-pty.js +24 -14
  12. package/node_modules/@groove-dev/daemon/src/validate.js +0 -4
  13. package/{packages/gui/dist/assets/codemirror-CFF1Lrnz.js → node_modules/@groove-dev/gui/dist/assets/codemirror-DRQdprYi.js} +11 -11
  14. package/node_modules/@groove-dev/gui/dist/assets/index-BgQL4bNl.css +1 -0
  15. package/{packages/gui/dist/assets/index-BiB9oY9U.js → node_modules/@groove-dev/gui/dist/assets/index-Dozp69tK.js} +1721 -1721
  16. package/node_modules/@groove-dev/gui/dist/index.html +3 -3
  17. package/node_modules/@groove-dev/gui/package.json +1 -1
  18. package/node_modules/@groove-dev/gui/src/app.css +6 -6
  19. package/node_modules/@groove-dev/gui/src/components/agents/agent-chat.jsx +12 -1
  20. package/node_modules/@groove-dev/gui/src/components/agents/agent-feed.jsx +15 -5
  21. package/node_modules/@groove-dev/gui/src/components/agents/agent-file-tree.jsx +6 -6
  22. package/node_modules/@groove-dev/gui/src/components/agents/workspace-mode.jsx +11 -9
  23. package/node_modules/@groove-dev/gui/src/components/editor/code-editor.jsx +26 -3
  24. package/node_modules/@groove-dev/gui/src/components/editor/file-tree.jsx +6 -6
  25. package/node_modules/@groove-dev/gui/src/components/editor/terminal.jsx +20 -8
  26. package/node_modules/@groove-dev/gui/src/components/lab/chat-playground.jsx +10 -1
  27. package/node_modules/@groove-dev/gui/src/components/lab/lab-assistant.jsx +4 -4
  28. package/node_modules/@groove-dev/gui/src/components/lab/system-prompt-editor.jsx +17 -3
  29. package/node_modules/@groove-dev/gui/src/components/layout/terminal-panel.jsx +2 -4
  30. package/node_modules/@groove-dev/gui/src/components/preview/preview-toolbar.jsx +8 -6
  31. package/node_modules/@groove-dev/gui/src/stores/groove.js +82 -15
  32. package/node_modules/@groove-dev/gui/src/views/agents.jsx +82 -74
  33. package/node_modules/@groove-dev/gui/src/views/editor.jsx +11 -9
  34. package/node_modules/moe-training/shared/envelope-schema.js +1 -1
  35. package/package.json +1 -1
  36. package/packages/cli/package.json +1 -1
  37. package/packages/daemon/package.json +1 -1
  38. package/packages/daemon/src/index.js +3 -1
  39. package/packages/daemon/src/introducer.js +48 -4
  40. package/packages/daemon/src/llama-server.js +4 -4
  41. package/packages/daemon/src/model-lab.js +8 -0
  42. package/packages/daemon/src/preview.js +85 -58
  43. package/packages/daemon/src/process.js +9 -0
  44. package/packages/daemon/src/terminal-pty.js +24 -14
  45. package/packages/daemon/src/validate.js +0 -4
  46. package/{node_modules/@groove-dev/gui/dist/assets/codemirror-CFF1Lrnz.js → packages/gui/dist/assets/codemirror-DRQdprYi.js} +11 -11
  47. package/packages/gui/dist/assets/index-BgQL4bNl.css +1 -0
  48. package/{node_modules/@groove-dev/gui/dist/assets/index-BiB9oY9U.js → packages/gui/dist/assets/index-Dozp69tK.js} +1721 -1721
  49. package/packages/gui/dist/index.html +3 -3
  50. package/packages/gui/package.json +1 -1
  51. package/packages/gui/src/app.css +6 -6
  52. package/packages/gui/src/components/agents/agent-chat.jsx +12 -1
  53. package/packages/gui/src/components/agents/agent-feed.jsx +15 -5
  54. package/packages/gui/src/components/agents/agent-file-tree.jsx +6 -6
  55. package/packages/gui/src/components/agents/workspace-mode.jsx +11 -9
  56. package/packages/gui/src/components/editor/code-editor.jsx +26 -3
  57. package/packages/gui/src/components/editor/file-tree.jsx +6 -6
  58. package/packages/gui/src/components/editor/terminal.jsx +20 -8
  59. package/packages/gui/src/components/lab/chat-playground.jsx +10 -1
  60. package/packages/gui/src/components/lab/lab-assistant.jsx +4 -4
  61. package/packages/gui/src/components/lab/system-prompt-editor.jsx +17 -3
  62. package/packages/gui/src/components/layout/terminal-panel.jsx +2 -4
  63. package/packages/gui/src/components/preview/preview-toolbar.jsx +8 -6
  64. package/packages/gui/src/stores/groove.js +82 -15
  65. package/packages/gui/src/views/agents.jsx +82 -74
  66. package/packages/gui/src/views/editor.jsx +11 -9
  67. package/CENTRAL_COMMAND_REBUILD.md +0 -689
  68. package/MERKLE_TREE_ARCHITECTURE.md +0 -354
  69. package/node_modules/@groove-dev/gui/dist/assets/index-CeyDFVub.css +0 -1
  70. package/packages/gui/dist/assets/index-CeyDFVub.css +0 -1
@@ -0,0 +1,375 @@
1
+ # Agent Coordination Protocol
2
+ # How Nano Agents Communicate, Chain, and Scale
3
+
4
+ ---
5
+
6
+ ## 1. The Problem
7
+
8
+ A single nano agent firing on the always-hot chassis handles one focused task in ~500ms. But real work requires multiple agents: a recall agent retrieves context, a domain agent writes code, a QC agent verifies it. These agents are independent firings — born, fired, killed. They share nothing by default.
9
+
10
+ The coordination protocol defines how firings communicate artifacts, chain sequentially, and scale across mesh nodes — without hardcoding any leaf-specific behavior. The protocol is universal: it works the same whether the leaf is Python, automotive diagnostics, drone navigation, or something a user created that the platform has never seen.
11
+
12
+ ---
13
+
14
+ ## 2. Design Principles
15
+
16
+ **Leaves are smart, the platform is dumb.** The chassis does not know what any leaf does. It does not have if-statements for leaf types. It executes a universal protocol. All intelligence, behavior, and domain knowledge lives in the leaf's trained weights. Adding a new leaf requires zero platform changes.
17
+
18
+ **Nano means nano.** Each firing receives one task, produces one output, and dies. It does not have full context. It does not see the whole picture. It sees its task and the previous phase's output. That's it. Complex tasks are handled by chaining many small firings, not by making one firing smarter.
19
+
20
+ **The filesystem is the extended context window.** The 2048-token context window constrains what the model holds in its head at one time. It does not constrain what the system can work with. Large artifacts live on disk. Agents access them via tools (Read, Glob, Grep). The workspace is unlimited. The context window is a scratchpad.
21
+
22
+ **Decompose until each piece fits.** If a task is too complex for one firing, break it into pieces. If a piece is still too complex, break it again. Each piece should be solvable by a single focused firing within the context budget. The chain length is self-limiting — if a task decomposes into 20 phases, the decomposition is wrong.
23
+
24
+ ---
25
+
26
+ ## 3. Universal Tag Protocol
27
+
28
+ Every leaf, regardless of domain, speaks the same tag language. These tags are the only interface between a leaf's output and the chassis's coordination logic. The chassis parses tags. It does not parse natural language intent.
29
+
30
+ ### Existing Tags (ReAct Loop)
31
+
32
+ | Tag | Purpose | Chassis Action |
33
+ |-----|---------|---------------|
34
+ | `<thought>` | Internal reasoning | Emit as trace (hidden from user, shown in thinking dropdown) |
35
+ | `<action tool="X">args</action>` | Tool call | Execute tool, return `<observation>` |
36
+ | `<observation>` | Tool result | Injected by chassis, not generated by leaf |
37
+ | `<resolution>` | Final answer | Emit to user, end firing |
38
+
39
+ ### New Tags (Coordination)
40
+
41
+ | Tag | Purpose | Chassis Action |
42
+ |-----|---------|---------------|
43
+ | `<delegate>rewritten task</delegate>` | "I can't handle this, re-route" | Strip current leaf, re-embed the rewritten task, route to best match, fire new agent |
44
+ | `<yield path="workspace/file.ext">summary</yield>` | "I've produced my part, here's my artifact" | Log artifact path + summary to workspace manifest, continue pipeline |
45
+
46
+ ### Protocol Rules
47
+
48
+ 1. A firing MUST end with exactly one of: `<resolution>`, `<delegate>`, or `<yield>`.
49
+ 2. `<delegate>` triggers a re-route. The delegating leaf does not choose the target — the router does. This prevents leaves from needing knowledge of other leaves.
50
+ 3. `<yield>` writes an artifact and signals "my work is done, next agent can proceed." The summary attribute is a one-line description (~10-20 tokens) that can fit in the next agent's prompt without consuming significant context.
51
+ 4. `<resolution>` ends the pipeline and returns output to the user/caller.
52
+ 5. A firing may produce multiple `<action>` / `<observation>` cycles before its terminal tag. The ReAct loop is unchanged.
53
+
54
+ ### Why Only 6 Tags
55
+
56
+ The protocol must be trainable. Every tag the model needs to produce must appear in training data. Six tags is learnable by a 0.6B model. A larger vocabulary of special tags would require more training data and risk confusion. The tags cover all coordination needs:
57
+
58
+ - Single-step response: `<thought>` → `<resolution>`
59
+ - Multi-step with tools: `<thought>` → `<action>` → `<observation>` → `<resolution>`
60
+ - Delegation: `<thought>` → `<delegate>`
61
+ - Artifact production in a chain: `<thought>` → `<action>` → `<yield>`
62
+
63
+ ---
64
+
65
+ ## 4. Workspace Architecture
66
+
67
+ ### Per-Task Workspace
68
+
69
+ Every multi-agent pipeline creates a workspace directory:
70
+
71
+ ```
72
+ ~/.hummingbird/workspace/{task_id}/
73
+ ```
74
+
75
+ Each firing writes its artifacts here. The workspace persists until the pipeline completes, then is cleaned up (or archived for debugging).
76
+
77
+ ### Phase Directories
78
+
79
+ For chained pipelines, each phase writes to its own subdirectory:
80
+
81
+ ```
82
+ workspace/{task_id}/
83
+ phase_01/ ← first agent's output
84
+ config.yaml
85
+ phase_02/ ← second agent reads phase_01/, writes here
86
+ server.py
87
+ phase_03/ ← third agent reads phase_02/, writes here
88
+ test_results.log
89
+ ```
90
+
91
+ Each agent sees only the previous phase's directory. It does not see the full history. This bounds context growth regardless of chain length.
92
+
93
+ ### Discovery Over Manifests
94
+
95
+ Agents do not receive pre-loaded manifests in their prompts. They receive:
96
+
97
+ 1. **Their task** (one sentence, ~50 tokens)
98
+ 2. **The workspace path** (~10 tokens)
99
+
100
+ Total prompt overhead: ~60 tokens + system prompt. The remaining ~1700 tokens are available for generation and tool observations.
101
+
102
+ The agent's first ReAct step is discovery: `Glob workspace/phase_02/*` or `Read workspace/phase_01/config.yaml`. The workspace file naming convention IS the manifest. Well-named files tell the agent what exists without consuming context tokens.
103
+
104
+ ### Why Not a JSON Manifest
105
+
106
+ A manifest scales linearly with artifact count. At 10 artifacts with summaries, it consumes ~200 tokens — 10% of the context window, before the agent has done anything. At 50 artifacts, it's unworkable. Discovery via tools costs one ReAct step (~400ms) but uses zero prompt tokens. The tradeoff is clear: spend 400ms to save 200+ tokens of context budget.
107
+
108
+ ---
109
+
110
+ ## 5. Chaining Model
111
+
112
+ ### Sequential Chains
113
+
114
+ The simplest coordination pattern. Agent A yields, agent B picks up.
115
+
116
+ ```
117
+ [Router] → detect domain
118
+
119
+ [Agent A] python leaf fires
120
+ THOUGHT → ACTION (Write code) → YIELD path="phase_01/api.py"
121
+ Dies.
122
+
123
+ [Agent B] react leaf fires
124
+ Receives: "Build the frontend component" + workspace path
125
+ THOUGHT → ACTION (Glob phase_01/) → ACTION (Read phase_01/api.py) → ACTION (Write phase_02/App.tsx) → YIELD
126
+ Dies.
127
+
128
+ [Agent C] QC leaf fires
129
+ Receives: "Verify integration" + workspace path
130
+ THOUGHT → ACTION (Read phase_01/api.py) → ACTION (Read phase_02/App.tsx) → RESOLUTION
131
+ Dies. Pipeline complete.
132
+ ```
133
+
134
+ Each agent is independent. Each gets a clean context. The workspace is the only shared state.
135
+
136
+ ### Delegation Chains
137
+
138
+ When an agent encounters work outside its domain:
139
+
140
+ ```
141
+ [Agent A] python leaf fires
142
+ THOUGHT: "This requires database schema work, not Python code."
143
+ DELEGATE: "Create a PostgreSQL schema for user authentication with sessions table"
144
+ Dies.
145
+
146
+ [Router] re-embeds the delegate text → routes to databases leaf
147
+
148
+ [Agent B] databases leaf fires
149
+ THOUGHT → ACTION (Write schema.sql) → RESOLUTION
150
+ Dies. Pipeline complete.
151
+ ```
152
+
153
+ The delegating leaf does not know which leaf will handle it. It just rewrites the task in domain-appropriate language and hands it back to the router. The router's cosine similarity handles selection.
154
+
155
+ ### Recursive Decomposition
156
+
157
+ When a task is too complex for one firing:
158
+
159
+ ```
160
+ [Planner] decomposer leaf fires
161
+ THOUGHT: "This is three independent sub-tasks."
162
+ YIELD path="plan.json": "3 sub-tasks: CUDA setup, model download, vLLM config"
163
+ Dies.
164
+
165
+ [Chassis] reads plan.json, dispatches sub-tasks:
166
+
167
+ ├─ [Agent A] fires with sub-task 1 → YIELD
168
+ ├─ [Agent B] fires with sub-task 2 → YIELD (sequential on single device)
169
+ └─ [Agent C] fires with sub-task 3 (depends on A+B) → reads A+B outputs → RESOLUTION
170
+ ```
171
+
172
+ The planner yields a structured plan. The chassis parses it and dispatches. Each sub-task is a normal firing that knows nothing about the overall plan — it just does its job and yields or resolves.
173
+
174
+ ### Chain Depth Limiting
175
+
176
+ Maximum chain depth: configurable, default 5. If a chain exceeds this depth, the final agent MUST resolve (no more yields or delegates). This prevents runaway decomposition and guarantees termination.
177
+
178
+ ---
179
+
180
+ ## 6. Context Budget
181
+
182
+ ### Per-Firing Budget (2048 tokens)
183
+
184
+ | Component | Tokens | Notes |
185
+ |-----------|--------|-------|
186
+ | System prompt | ~80 | Leaf-specific, from training |
187
+ | Think block prefix | ~10 | `<think>\n\n</think>\n\n` |
188
+ | Task instruction | ~50-100 | One sentence from yield/delegate/user |
189
+ | Workspace path | ~10 | `workspace/task_abc/` |
190
+ | **Available for generation** | **~1750-1900** | ReAct steps, tool observations, output |
191
+
192
+ ### Tool Observation Budget
193
+
194
+ Tool results (Read, Grep, Glob) enter context as `<observation>` tags during the ReAct loop. These are already truncated by the existing `_truncate_observations` function when context exceeds the budget. Large file reads are naturally bounded by the `max_lines` parameter on the Read tool (default 100 lines).
195
+
196
+ ### Why 2048 Is Enough
197
+
198
+ A nano agent does ONE thing. It doesn't need to understand the full history of a 20-step pipeline. It needs to understand its specific task and the output of the previous phase. One task sentence + one file read = well within budget.
199
+
200
+ If a task requires understanding more context than fits in 2048 tokens, the task is too broad for a single nano agent. Decompose it.
201
+
202
+ ---
203
+
204
+ ## 7. Mesh Parallelism
205
+
206
+ ### Single Device: Sequential
207
+
208
+ One chassis, one leaf at a time. Firings execute in order. A 5-phase chain takes ~2.5 seconds.
209
+
210
+ ```
211
+ Node A: [Phase 1] → [Phase 2] → [Phase 3] → [Phase 4] → [Phase 5]
212
+ 500ms 500ms 500ms 500ms 500ms = 2.5s
213
+ ```
214
+
215
+ ### Multi-Node: Parallel on Dependency Graph
216
+
217
+ Each mesh node is its own chassis. Independent phases scatter across nodes. Dependent phases wait for their inputs.
218
+
219
+ ```
220
+ Node A: [Phase 1: CUDA setup] → idle → [Phase 4: vLLM config]
221
+ Node B: [Phase 2: Model download] → idle → idle
222
+ Node C: idle → idle → [Phase 3: depends on 1+2] → [Phase 5: test]
223
+
224
+ Wall clock: ~1.5s (critical path length, not total work)
225
+ ```
226
+
227
+ The planner's `plan.json` encodes dependencies. The mesh dispatcher reads the dependency graph and assigns phases to available nodes. Each node is always-hot — zero cold start, instant acceptance of a firing.
228
+
229
+ ### Five Nodes, Five Chassis
230
+
231
+ Each node runs its own independent chassis instance. Not 5x inference on one model — five separate brains, each with their own GGUF loaded.
232
+
233
+ - Cost per node: ~1.2GB VRAM (or 400MB quantized)
234
+ - Five nodes: ~6GB total — a gaming laptop could run the full mesh locally
235
+ - Each node maintains its own leaf cache optimized for its typical workload
236
+ - The router becomes a network-level dispatcher: it knows which nodes have which leaves cached and minimizes leaf swap overhead
237
+
238
+ ### Artifact Sync
239
+
240
+ When phases run on different nodes, artifacts must sync. The workspace directory is local to each node. The mesh gossip protocol handles artifact transfer:
241
+
242
+ 1. Agent A on Node 1 yields → writes artifact to local workspace
243
+ 2. Mesh dispatcher detects Agent B (on Node 2) depends on Agent A's output
244
+ 3. Gossip protocol transfers the artifact file to Node 2's workspace
245
+ 4. Agent B fires on Node 2 with the artifact available locally
246
+
247
+ Transfer overhead: negligible for small artifacts (configs, schemas). For large artifacts (model weights, datasets), the transfer time may exceed generation time — the dispatcher accounts for this when assigning nodes.
248
+
249
+ ---
250
+
251
+ ## 8. Leaf-Agnostic Design
252
+
253
+ ### What the Chassis Knows
254
+
255
+ The chassis knows:
256
+ - How to parse 6 tags
257
+ - How to execute tools
258
+ - How to manage workspaces
259
+ - How to route via cosine similarity
260
+ - How to dispatch firings sequentially or across mesh
261
+
262
+ ### What the Chassis Does NOT Know
263
+
264
+ The chassis does not know:
265
+ - What any leaf does
266
+ - What domain a leaf covers
267
+ - Whether a leaf should delegate or resolve
268
+ - How complex a task is
269
+ - What tools a leaf should use
270
+
271
+ All of this comes from the leaf's training. The chassis is a runtime. The leaves are the intelligence.
272
+
273
+ ### Adding a New Leaf
274
+
275
+ A user creates an "automotive diagnostics" leaf:
276
+
277
+ 1. Collect training data (diagnostic conversations, repair procedures, sensor interpretation)
278
+ 2. Format as ReAct trajectories with the universal tag protocol
279
+ 3. Train LoRA adapter on the chassis
280
+ 4. Compute centroid from training data embeddings
281
+ 5. Deploy: upload adapter + centroid to the network
282
+
283
+ The platform requires zero changes. The router routes to it via centroid similarity. The chassis executes it via the same tag protocol. The workspace handles its artifacts the same way. A leaf that diagnoses engine problems chains the same way a leaf that writes Python code chains.
284
+
285
+ ### Leaf Metadata
286
+
287
+ Each leaf carries minimal metadata (computed at training time, not hardcoded):
288
+
289
+ ```json
290
+ {
291
+ "id": "automotive_diagnostics",
292
+ "centroid": [0.12, -0.34, ...], // 384-dim, for routing
293
+ "has_tools": true, // enables ReAct loop vs direct stream
294
+ "max_chain_depth": 3 // optional, leaf-specific depth limit
295
+ }
296
+ ```
297
+
298
+ No description of what the leaf does. No list of capabilities. No routing rules. The centroid IS the routing rule — it encodes the leaf's domain in embedding space. The training data IS the capability definition.
299
+
300
+ ---
301
+
302
+ ## 9. Error Handling
303
+
304
+ ### Firing Failures
305
+
306
+ If a firing errors (model produces malformed output, tool call fails, timeout):
307
+
308
+ 1. The chassis logs the error
309
+ 2. The firing is marked failed
310
+ 3. The pipeline continues with a fallback: skip the failed phase and let the next agent handle the gap, OR retry once with a clean context
311
+
312
+ No dynamic replanning. No planner agent re-evaluating the situation. The pipeline is simple: fire, yield, fire, yield, resolve. If a firing fails, the chain handles it the same way a function call handles an exception — catch and continue, or propagate and fail.
313
+
314
+ ### Why Not Dynamic Replanning
315
+
316
+ Dynamic replanning requires a planning agent that can reason about failures and adapt. This is the hardest cognitive task for any model, let alone a 0.6B. The 122 planner sessions in the training data cover task decomposition, not failure recovery.
317
+
318
+ The simpler approach: if a task fails, the user retries. The nano agent model is cheap — a failed 500ms firing costs almost nothing. Retry is faster than replanning.
319
+
320
+ Dynamic replanning can be added later as a capability of the decomposer leaf, once sufficient training data exists for failure-recovery patterns. The tag protocol supports it without changes — the decomposer would just yield a revised plan.
321
+
322
+ ---
323
+
324
+ ## 10. Perception-Action Mode (Autonomous Operation)
325
+
326
+ ### Beyond Request-Response
327
+
328
+ The coordination protocol is not limited to user-initiated requests. In autonomous mode, the chassis runs a continuous perception-action loop:
329
+
330
+ ```
331
+ while alive:
332
+ stimulus = perceive(environment) # read sensors, APIs, file events, mesh signals
333
+ if needs_action(stimulus): # router evaluates stimulus
334
+ task = formulate(stimulus) # embed stimulus, determine task
335
+ fire(task) # standard firing pipeline
336
+ else:
337
+ idle() # gossip, prune, evolve
338
+ ```
339
+
340
+ The same tag protocol applies. The same workspace model applies. The same chaining model applies. The only difference is the stimulus source: a user prompt vs. an environmental event.
341
+
342
+ ### Examples
343
+
344
+ - **Drone**: Perceives obstacle sensor data → fires navigation leaf → `<action>` adjusts course → `<resolution>` logs new heading
345
+ - **Salesforce integration**: Perceives deal status change → fires sales leaf → `<thought>` evaluates opportunity → `<resolution>` suggests action to rep
346
+ - **Server monitoring**: Perceives CPU spike → fires DevOps leaf → `<action tool="Bash">kubectl get pods</action>` → `<thought>` identifies failing pod → `<yield>` produces diagnostic report → fires Kubernetes leaf → `<resolution>` applies fix
347
+ - **Mesh consciousness**: Perceives new leaf from gossip → fires evaluation leaf → benchmarks against local test set → `<resolution>` accepts or rejects the leaf update
348
+
349
+ In all cases, the coordination protocol is identical. Stimulus → route → fire → tag-driven execution → resolve/yield/delegate. The leaf determines the behavior. The protocol determines the flow.
350
+
351
+ ---
352
+
353
+ ## 11. Relationship to Existing Architecture
354
+
355
+ This protocol extends, not replaces, the existing nano agent architecture:
356
+
357
+ | Component | Existing | Added by This Protocol |
358
+ |-----------|----------|----------------------|
359
+ | Tag format | `thought`, `action`, `observation`, `resolution` | `delegate`, `yield` |
360
+ | Firing lifecycle | Fire → ReAct → resolve | Fire → ReAct → resolve/yield/delegate |
361
+ | Inter-agent comms | `artifacts: dict[str, str]` in memory | Filesystem workspace with phase directories |
362
+ | Orchestration | Hardcoded pipeline in `_run_pipeline` | Tag-driven: chassis reacts to yield/delegate |
363
+ | Context strategy | Memory artifact injected into prompt | Discovery via tools, no manifest |
364
+ | Mesh coordination | Not implemented | Dependency graph dispatch, artifact sync via gossip |
365
+ | Autonomous mode | Idle = gossip only | Perception-action loop with standard firing pipeline |
366
+
367
+ ### Implementation Path
368
+
369
+ 1. **Add `<delegate>` and `<yield>` tags** to the chassis parser (`_execute_direct_stream` and `_execute_react_loop` in `server.py`)
370
+ 2. **Add workspace management** — create/cleanup per-task directories
371
+ 3. **Add chain controller** — parse yield/delegate outputs, dispatch next firing
372
+ 4. **Train tags into leaves** — add delegate/yield examples to training data for chassis SFT and leaf training
373
+ 5. **Add mesh dispatch** — extend `_run_pipeline` to distribute independent phases across nodes (requires gossip protocol for artifact sync)
374
+
375
+ Steps 1-3 are ~300-500 lines of Python. Step 4 is a training data update. Step 5 depends on mesh infrastructure maturity.
@@ -2,7 +2,7 @@
2
2
 
3
3
  import { SUPPORTED_PROVIDERS, MODEL_TIERS, TRAINING_EXCLUSION_REASONS } from './constants.js';
4
4
 
5
- export const STEP_TYPES = ['thought', 'action', 'observation', 'correction', 'resolution', 'error', 'coordination', 'edit', 'instruction', 'clarification', 'approval'];
5
+ export const STEP_TYPES = ['thought', 'action', 'observation', 'correction', 'resolution', 'error', 'coordination', 'edit', 'instruction', 'clarification', 'approval', 'delegate', 'yield'];
6
6
  const VALID_QUALITY_TIERS = ['TIER_A', 'TIER_B', 'TIER_C'];
7
7
  const VALID_FEEDBACK_SIGNALS = ['accepted', 'modified', 'rejected', 'iterated'];
8
8
 
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@groove-dev/cli",
3
- "version": "0.27.131",
3
+ "version": "0.27.134",
4
4
  "description": "GROOVE CLI — manage AI coding agents from your terminal",
5
5
  "license": "FSL-1.1-Apache-2.0",
6
6
  "type": "module",
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@groove-dev/daemon",
3
- "version": "0.27.131",
3
+ "version": "0.27.134",
4
4
  "description": "GROOVE daemon — agent orchestration engine",
5
5
  "license": "FSL-1.1-Apache-2.0",
6
6
  "type": "module",
@@ -373,7 +373,9 @@ export class Daemon {
373
373
  if (msg.rows !== undefined && (typeof msg.rows !== 'number' || msg.rows < 1 || msg.rows > 200)) break;
374
374
  try {
375
375
  const id = this.terminalManager.spawn(ws, { cwd: msg.cwd, cols: msg.cols, rows: msg.rows });
376
- ws.send(JSON.stringify({ type: 'terminal:spawned', id }));
376
+ const spawned = { type: 'terminal:spawned', id };
377
+ if (msg.requestId) spawned.requestId = msg.requestId;
378
+ ws.send(JSON.stringify(spawned));
377
379
  } catch (err) {
378
380
  console.error('[terminal] spawn error:', err);
379
381
  ws.send(JSON.stringify({ type: 'terminal:error', message: err.message }));
@@ -1,8 +1,8 @@
1
1
  // GROOVE — Introduction Protocol
2
2
  // FSL-1.1-Apache-2.0 — see LICENSE
3
3
 
4
- import { writeFileSync, readFileSync, existsSync } from 'fs';
5
- import { resolve } from 'path';
4
+ import { writeFileSync, readFileSync, existsSync, readdirSync, statSync } from 'fs';
5
+ import { resolve, dirname, basename } from 'path';
6
6
  import { escapeMd } from './validate.js';
7
7
 
8
8
  const GROOVE_SECTION_START = '<!-- GROOVE:START -->';
@@ -28,7 +28,50 @@ export class Introducer {
28
28
  ];
29
29
 
30
30
  if (newAgent.workingDir) {
31
- lines.push(`Your working directory: \`${newAgent.workingDir}\` — you are spawned inside this subdirectory. Stay within it unless coordination requires otherwise.`);
31
+ lines.push(`Your working directory: \`${newAgent.workingDir}\` — this is the team orchestration directory (.groove/, coordination files). Do NOT create source code or project files here.`);
32
+
33
+ // Inject parent directory context so agents know the root layout
34
+ const parentDir = dirname(newAgent.workingDir);
35
+ const teamDirName = basename(newAgent.workingDir);
36
+ lines.push(`Your project root: \`${parentDir}\` — all source code, features, and builds go here (one level up from team dir).`);
37
+ lines.push('');
38
+ lines.push('## Project Root Structure');
39
+ lines.push('');
40
+ lines.push(`Team dir: \`${teamDirName}/\` (orchestration only — do NOT build here)`);
41
+ lines.push(`Project root: \`${parentDir}\``);
42
+ lines.push('');
43
+ try {
44
+ const entries = readdirSync(parentDir, { withFileTypes: true });
45
+ const dirs = [];
46
+ const files = [];
47
+ for (const entry of entries) {
48
+ if (entry.name.startsWith('.') || entry.name === 'node_modules') continue;
49
+ if (entry.name === teamDirName) continue;
50
+ if (entry.isDirectory()) {
51
+ dirs.push(entry.name + '/');
52
+ } else {
53
+ files.push(entry.name);
54
+ }
55
+ }
56
+ if (dirs.length > 0) {
57
+ lines.push('Directories:');
58
+ for (const d of dirs.slice(0, 30)) {
59
+ lines.push(` ${d}`);
60
+ }
61
+ if (dirs.length > 30) lines.push(` (+${dirs.length - 30} more)`);
62
+ }
63
+ if (files.length > 0) {
64
+ lines.push('Files:');
65
+ for (const f of files.slice(0, 20)) {
66
+ lines.push(` ${f}`);
67
+ }
68
+ if (files.length > 20) lines.push(` (+${files.length - 20} more)`);
69
+ }
70
+ lines.push('');
71
+ lines.push('When creating or modifying project files, use "../" paths relative to the team dir (e.g., "../demo/src/app.js"). The team directory is ephemeral and may be deleted — never put project work inside it.');
72
+ } catch {
73
+ // Parent dir not readable — skip
74
+ }
32
75
  }
33
76
 
34
77
  if (newAgent.scope && newAgent.scope.length > 0) {
@@ -185,7 +228,8 @@ export class Introducer {
185
228
  lines.push('');
186
229
  lines.push(`CRITICAL: NEVER delete files you did not create in this session. Do NOT remove files from other projects, previous work, or unrelated directories.`);
187
230
  if (newAgent.workingDir) {
188
- lines.push(`Your working directory is \`${newAgent.workingDir}\`. Stay inside it. Do NOT modify or delete files outside this directory.`);
231
+ const parentDir = dirname(newAgent.workingDir);
232
+ lines.push(`Your team directory is \`${newAgent.workingDir}\` (orchestration only). Build all project files in the project root: \`${parentDir}\`.`);
189
233
  }
190
234
  lines.push(`If you see files that seem unrelated to your task, leave them alone — they belong to another project or agent.`);
191
235
 
@@ -42,7 +42,7 @@ export class LlamaServerManager {
42
42
  const server = this.servers.get(modelPath);
43
43
  server.users++;
44
44
  server.lastUsed = Date.now();
45
- return `http://127.0.0.1:${server.port}/v1`;
45
+ return `http://localhost:${server.port}`;
46
46
  }
47
47
 
48
48
  // Check capacity
@@ -120,7 +120,7 @@ export class LlamaServerManager {
120
120
  data: { modelPath, port },
121
121
  });
122
122
 
123
- return `http://127.0.0.1:${port}/v1`;
123
+ return `http://localhost:${port}`;
124
124
  } catch (err) {
125
125
  // Server failed to start
126
126
  await this.stopServer(modelPath);
@@ -187,7 +187,7 @@ export class LlamaServerManager {
187
187
  const start = Date.now();
188
188
  while (Date.now() - start < HEALTH_TIMEOUT) {
189
189
  try {
190
- const res = await fetch(`http://127.0.0.1:${port}/health`, {
190
+ const res = await fetch(`http://localhost:${port}/health`, {
191
191
  signal: AbortSignal.timeout(2000),
192
192
  });
193
193
  if (res.ok) {
@@ -209,7 +209,7 @@ export class LlamaServerManager {
209
209
  if (!server) return { running: false };
210
210
 
211
211
  try {
212
- const res = await fetch(`http://127.0.0.1:${server.port}/health`, {
212
+ const res = await fetch(`http://localhost:${server.port}/health`, {
213
213
  signal: AbortSignal.timeout(3000),
214
214
  });
215
215
  const data = await res.json().catch(() => ({}));
@@ -274,6 +274,14 @@ export class ModelLab {
274
274
  try {
275
275
  const chunk = JSON.parse(payload);
276
276
  const delta = chunk.choices?.[0]?.delta;
277
+ if (delta?.reasoning_content) {
278
+ if (ttft === null) {
279
+ ttft = Date.now() - requestStart;
280
+ generationStart = Date.now();
281
+ }
282
+ completionTokens++;
283
+ yield { type: 'reasoning', content: delta.reasoning_content };
284
+ }
277
285
  if (delta?.content) {
278
286
  if (ttft === null) {
279
287
  ttft = Date.now() - requestStart;