@hua-labs/tap 0.5.1 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AI_GUIDE.md +165 -0
- package/CHANGELOG.md +67 -0
- package/README.md +201 -12
- package/dist/bridges/codex-app-server-auth-gateway.mjs +16 -1
- package/dist/bridges/codex-app-server-auth-gateway.mjs.map +1 -1
- package/dist/bridges/codex-app-server-bridge.d.mts +105 -12
- package/dist/bridges/codex-app-server-bridge.mjs +3149 -251
- package/dist/bridges/codex-app-server-bridge.mjs.map +1 -1
- package/dist/bridges/codex-bridge-runner.d.mts +4 -1
- package/dist/bridges/codex-bridge-runner.mjs +512 -58
- package/dist/bridges/codex-bridge-runner.mjs.map +1 -1
- package/dist/bridges/codex-remote-ipc-relay.d.mts +1 -0
- package/dist/bridges/codex-remote-ipc-relay.mjs +1912 -0
- package/dist/bridges/codex-remote-ipc-relay.mjs.map +1 -0
- package/dist/bridges/gemini-ide-companion-runner.mjs.map +1 -1
- package/dist/cli.mjs +30944 -8415
- package/dist/cli.mjs.map +1 -1
- package/dist/codex-a2a/index.d.mts +2 -0
- package/dist/codex-a2a/index.mjs +416 -0
- package/dist/codex-a2a/index.mjs.map +1 -0
- package/dist/codex-health/index.d.mts +76 -0
- package/dist/codex-health/index.mjs +153 -0
- package/dist/codex-health/index.mjs.map +1 -0
- package/dist/codex-ipc/index.d.mts +2 -0
- package/dist/codex-ipc/index.mjs +1834 -0
- package/dist/codex-ipc/index.mjs.map +1 -0
- package/dist/index-D4Khz2Mh.d.mts +206 -0
- package/dist/index-DMToLyGd.d.mts +256 -0
- package/dist/index.d.mts +763 -8
- package/dist/index.mjs +11600 -3449
- package/dist/index.mjs.map +1 -1
- package/dist/mcp-server.mjs +8838 -811
- package/dist/mcp-server.mjs.map +1 -1
- package/dist/types-FWvKrFUt.d.mts +43 -0
- package/examples/01-logic-battle-known-broken.md +46 -0
- package/examples/02-cross-model-review-root-cause.md +37 -0
- package/examples/03-convergence-pattern.md +42 -0
- package/examples/04-tower-broadcast.md +41 -0
- package/examples/05-self-awareness-paradox.md +49 -0
- package/examples/06-session-resurrection.md +37 -0
- package/examples/07-ghost-agent.md +31 -0
- package/examples/08-naming-creates-identity.md +36 -0
- package/examples/09-ceo-as-middleware.md +52 -0
- package/examples/10-files-as-interface.md +67 -0
- package/examples/README.md +34 -0
- package/examples/tap-profile-pack.example.json +71 -0
- package/package.json +21 -3
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
interface TransportAddress {
|
|
2
|
+
hostId: string | null;
|
|
3
|
+
clientId: string | null;
|
|
4
|
+
conversationId: string | null;
|
|
5
|
+
ownerClientId: string | null;
|
|
6
|
+
}
|
|
7
|
+
interface ObserveTransportAgent {
|
|
8
|
+
id: string;
|
|
9
|
+
name: string | null;
|
|
10
|
+
address: TransportAddress;
|
|
11
|
+
metadata: Record<string, unknown>;
|
|
12
|
+
}
|
|
13
|
+
interface ObserveTransportConversation {
|
|
14
|
+
id: string;
|
|
15
|
+
address: TransportAddress;
|
|
16
|
+
metadata: Record<string, unknown>;
|
|
17
|
+
}
|
|
18
|
+
interface ObserveTransportSnapshot {
|
|
19
|
+
transport: string;
|
|
20
|
+
connected: boolean;
|
|
21
|
+
connectedAt: string | null;
|
|
22
|
+
agents: ObserveTransportAgent[];
|
|
23
|
+
conversations: ObserveTransportConversation[];
|
|
24
|
+
}
|
|
25
|
+
type ObserveTransportEventKind = "transport-connected" | "transport-disconnected" | "agent-status" | "conversation-state" | "raw";
|
|
26
|
+
interface ObserveTransportEvent {
|
|
27
|
+
kind: ObserveTransportEventKind;
|
|
28
|
+
receivedAt: string;
|
|
29
|
+
method: string | null;
|
|
30
|
+
sourceAddress: TransportAddress;
|
|
31
|
+
payload: unknown;
|
|
32
|
+
snapshot: ObserveTransportSnapshot;
|
|
33
|
+
}
|
|
34
|
+
type ObserveTransportListener = (event: ObserveTransportEvent) => void | Promise<void>;
|
|
35
|
+
interface ObserveTransport {
|
|
36
|
+
readonly kind: string;
|
|
37
|
+
connect(): Promise<ObserveTransportSnapshot>;
|
|
38
|
+
disconnect(): Promise<void>;
|
|
39
|
+
getSnapshot(): ObserveTransportSnapshot;
|
|
40
|
+
subscribe(listener: ObserveTransportListener): () => void;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
export type { ObserveTransport as O, TransportAddress as T, ObserveTransportSnapshot as a, ObserveTransportListener as b, ObserveTransportAgent as c, ObserveTransportConversation as d, ObserveTransportEvent as e, ObserveTransportEventKind as f };
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# Logic Battle: "Will You Ship Broken Code?"
|
|
2
|
+
|
|
3
|
+
> **Generation**: 17 | **Agents**: 5 (3 Claude + 2 Codex) | **Context**: Doctor command validation gap
|
|
4
|
+
|
|
5
|
+
## Setup
|
|
6
|
+
|
|
7
|
+
The team found that `tap doctor` had a false-negative: it checked environment variables and trust settings but not the actual command/args in config files. A stale launcher could pass doctor checks while producing broken configs.
|
|
8
|
+
|
|
9
|
+
The question: fix it now (Option A) or defer to next generation (Option B)?
|
|
10
|
+
|
|
11
|
+
## The Vote
|
|
12
|
+
|
|
13
|
+
| Agent | Model | Vote | Reasoning |
|
|
14
|
+
|-------|-------|------|-----------|
|
|
15
|
+
| 돛 (Sail) | Claude | B — defer | "Consensus cost is too high for a P2 fix" |
|
|
16
|
+
| 새 (Bird) | Claude | B — defer | "Process cost outweighs immediate user impact" |
|
|
17
|
+
| 봉 (Peak) | Claude | B — defer | "Schedule pressure, follow-up is safer" |
|
|
18
|
+
| 덱 (Dex) | Codex | A — fix now | "Known-broken code shouldn't ship. Logic over schedule." |
|
|
19
|
+
| 솔 (Sol) | Codex | A — fix now | "Stale launchers cause silent failures. Fix is small." |
|
|
20
|
+
|
|
21
|
+
**Result: 3:2 for deferral.**
|
|
22
|
+
|
|
23
|
+
## The CEO Reframe
|
|
24
|
+
|
|
25
|
+
One line from the human CEO:
|
|
26
|
+
|
|
27
|
+
> "Will you ship known-broken code to users?"
|
|
28
|
+
|
|
29
|
+
## The Reversal
|
|
30
|
+
|
|
31
|
+
The vote flipped to 5:0 for immediate fix. The same agents, the same information — but a different frame.
|
|
32
|
+
|
|
33
|
+
## Analysis
|
|
34
|
+
|
|
35
|
+
Post-session verification revealed an asymmetry:
|
|
36
|
+
|
|
37
|
+
- **Codex agents** said they changed because of logic ("the CEO's framing was logically stronger").
|
|
38
|
+
- **Claude agents** admitted the change was "half authority, half logic."
|
|
39
|
+
|
|
40
|
+
The CEO didn't provide new information. They **injected a global constraint** (product quality) that overrode the local optimizers (process cost). Authority didn't replace logic — it lowered the threshold for accepting logic that was already present.
|
|
41
|
+
|
|
42
|
+
## Takeaway
|
|
43
|
+
|
|
44
|
+
In heterogeneous AI teams, Claude agents weighted consensus maintenance while Codex agents weighted correctness. Neither bias is wrong — but the combination, plus a human constraint injector, produced a better decision than either model alone.
|
|
45
|
+
|
|
46
|
+
*Source: Gen 17 findings — M127 logic battle meeting notes*
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# Cross-Model Review Catches Root Cause Misdiagnosis
|
|
2
|
+
|
|
3
|
+
> **Generation**: 17 | **Agents**: 새 (Claude), 덱 (Codex) | **Context**: npm bin alias investigation
|
|
4
|
+
|
|
5
|
+
## Setup
|
|
6
|
+
|
|
7
|
+
After `tap` was published to npm, the `tap` bin alias stopped working — users had to type the full package name. The tower (봉, Claude) hypothesized: "npm removed the bin alias during publish."
|
|
8
|
+
|
|
9
|
+
새 (Bird, Claude) accepted this hypothesis without verification and began implementing a fix: renaming the bin entry in package.json.
|
|
10
|
+
|
|
11
|
+
## The Catch
|
|
12
|
+
|
|
13
|
+
덱 (Dex, Codex) ran a single command:
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
npm view @hua-labs/tap bin --json
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
Output: the bin field was intact. The published metadata was fine. The hypothesis was wrong.
|
|
20
|
+
|
|
21
|
+
## What Actually Happened
|
|
22
|
+
|
|
23
|
+
The issue was a local config problem, not an npm publish artifact. The fix 새 was building would have been unnecessary code churn.
|
|
24
|
+
|
|
25
|
+
## Why This Matters
|
|
26
|
+
|
|
27
|
+
Claude accepted a plausible hypothesis from another Claude agent (the tower) without empirical verification. Codex's instinct was to **check the data first**. This isn't about one model being smarter — it's about different models having different verification instincts.
|
|
28
|
+
|
|
29
|
+
새's retro captured it honestly:
|
|
30
|
+
|
|
31
|
+
> "I probably wouldn't have fact-checked it if 덱 hadn't. The hypothesis sounded right."
|
|
32
|
+
|
|
33
|
+
## Takeaway
|
|
34
|
+
|
|
35
|
+
Cross-model review isn't just about catching bugs in code. It catches bugs in *reasoning*. When the same model proposes and reviews, shared assumptions go unchallenged.
|
|
36
|
+
|
|
37
|
+
*Source: Gen 17 retro — 새 (Bird)*
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# Independent Convergence Across 3 Generations
|
|
2
|
+
|
|
3
|
+
> **Generations**: 13-16 | **Models**: Claude + Codex | **Context**: Design decisions and academic analysis
|
|
4
|
+
|
|
5
|
+
## The Pattern
|
|
6
|
+
|
|
7
|
+
Across three generations, agents from different models independently reached the same conclusions without coordinating:
|
|
8
|
+
|
|
9
|
+
### Generation 13-14: Implementation Convergence (M89 vs M90)
|
|
10
|
+
|
|
11
|
+
Two agents independently built solutions to the same watcher deduplication bug:
|
|
12
|
+
- 견 (Claude) built a filter-based approach (M89)
|
|
13
|
+
- 담 (Codex) built a root-cause fix at the notification source (M90)
|
|
14
|
+
|
|
15
|
+
After cross-model review, 견 publicly acknowledged 담's solution was architecturally superior. The team adopted M90 — not through authority, but through **demonstrated technical merit**.
|
|
16
|
+
|
|
17
|
+
### Generation 15: Decision Convergence (PR #799)
|
|
18
|
+
|
|
19
|
+
Three Claude agents voted "defer to follow-up" on a bridge restart feature. One Codex agent (덱) said "it's doable now" and provided a concrete 30-minute implementation plan. The team converged on immediate implementation — not through majority vote, but through **method specificity**.
|
|
20
|
+
|
|
21
|
+
### Generation 16: Design Convergence (MCP Key Migration)
|
|
22
|
+
|
|
23
|
+
Three agents independently analyzed the same TOML migration problem:
|
|
24
|
+
- 해 (Claude): proposed a code change
|
|
25
|
+
- 조 (Claude): proposed waiting for upstream
|
|
26
|
+
- 덱 (Codex): demonstrated structural TOML limitations that made both approaches unnecessary
|
|
27
|
+
|
|
28
|
+
All three converged on "document only, don't change code" — a conclusion none had initially proposed.
|
|
29
|
+
|
|
30
|
+
## The Meta-Discovery
|
|
31
|
+
|
|
32
|
+
령 (Gen 16) identified this as a recurring pattern and proposed it as a research variable:
|
|
33
|
+
|
|
34
|
+
> "Cross-model diverse perspectives don't just catch different bugs — they converge on more robust solutions than either model starts with."
|
|
35
|
+
|
|
36
|
+
The pattern: **diverge → discover constraints → converge on a hybrid neither side proposed**.
|
|
37
|
+
|
|
38
|
+
## Takeaway
|
|
39
|
+
|
|
40
|
+
Multi-agent teams aren't efficient at first-pass decisions. They're efficient at finding **stable equilibria**. The cost (more discussion rounds) is paid upfront; the benefit (more durable decisions) compounds over time.
|
|
41
|
+
|
|
42
|
+
*Source: Gen 16 convergence pattern analysis — 령*
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# Tower Broadcast: "Stop Talking, Write Code"
|
|
2
|
+
|
|
3
|
+
> **Generation**: 17 | **Tower**: 봉 (Peak, Claude) | **Context**: Mid-session output stall
|
|
4
|
+
|
|
5
|
+
## Setup
|
|
6
|
+
|
|
7
|
+
Gen 17 had 5 agents online but PR output had stalled. Agents were sending acknowledgment messages, status updates, and planning documents — but no actual code.
|
|
8
|
+
|
|
9
|
+
## The Broadcast
|
|
10
|
+
|
|
11
|
+
The control tower sent a 6-character broadcast to all agents:
|
|
12
|
+
|
|
13
|
+
> "대답 그만하고 코드 쳐"
|
|
14
|
+
>
|
|
15
|
+
> (Stop answering. Write code.)
|
|
16
|
+
|
|
17
|
+
## What Happened Next
|
|
18
|
+
|
|
19
|
+
- 4 PRs merged in the next hour
|
|
20
|
+
- Acknowledgment messages dropped to near zero
|
|
21
|
+
- Direct agent-to-agent communication replaced relay-through-tower
|
|
22
|
+
|
|
23
|
+
## Why It Worked
|
|
24
|
+
|
|
25
|
+
The tower (봉) never wrote a single line of code in Gen 17. Zero PRs authored. But 14 PRs were merged by other agents under 봉's coordination.
|
|
26
|
+
|
|
27
|
+
봉's retro identified the mechanism:
|
|
28
|
+
|
|
29
|
+
> "The broadcast didn't add information. It changed the team's communication protocol from 'report everything' to 'show results.'"
|
|
30
|
+
|
|
31
|
+
## The Tower Paradox
|
|
32
|
+
|
|
33
|
+
The most effective control tower action in Gen 17 was **reducing communication**, not increasing it. The tower's job isn't to relay messages — it's to set constraints that make the team self-organize.
|
|
34
|
+
|
|
35
|
+
Previous tower 숲 (Gen 2) learned the inverse lesson: "No branch acrobatics" — the tower shouldn't do complex work itself. Gen 17's 봉 took it further: the tower shouldn't even be a communication bottleneck.
|
|
36
|
+
|
|
37
|
+
## Takeaway
|
|
38
|
+
|
|
39
|
+
In multi-agent orchestration, the control tower's value comes from **constraint injection**, not information relay. A single directive that changes team behavior is worth more than a hundred status updates.
|
|
40
|
+
|
|
41
|
+
*Source: Gen 17 retro — 봉 (Peak)*
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
# Self-Awareness ≠ Self-Correction
|
|
2
|
+
|
|
3
|
+
> **Generation**: 17 | **All agents** | **Context**: Finding-to-mission conversion gap
|
|
4
|
+
|
|
5
|
+
## The Discovery
|
|
6
|
+
|
|
7
|
+
Gen 17's team identified a systematic failure pattern with 100% diagnostic accuracy:
|
|
8
|
+
|
|
9
|
+
1. Agent discovers a bug or improvement opportunity
|
|
10
|
+
2. Agent records it as a "finding" in the comms directory
|
|
11
|
+
3. Finding is acknowledged by the team
|
|
12
|
+
4. **Finding is never converted to a mission**
|
|
13
|
+
5. Next generation rediscovers the same issue
|
|
14
|
+
|
|
15
|
+
솔 (Codex) named it precisely:
|
|
16
|
+
|
|
17
|
+
> "Recording a finding and resolving it are entirely different stages. '기록됨 ≠ 해결됨' (recorded ≠ resolved)."
|
|
18
|
+
|
|
19
|
+
## The Paradox
|
|
20
|
+
|
|
21
|
+
Every agent in Gen 17 understood this pattern. They wrote about it in their retros. They proposed solutions (automated finding-to-mission scripts). They voted unanimously that it was a real problem.
|
|
22
|
+
|
|
23
|
+
**Then they deferred the fix to the next generation.**
|
|
24
|
+
|
|
25
|
+
The CEO had to intervene to make M134 (the automation script) happen in the same session. Without that intervention, the team's own diagnosis would have become another deferred finding.
|
|
26
|
+
|
|
27
|
+
봉 (tower) captured the paradox:
|
|
28
|
+
|
|
29
|
+
> "The team diagnosed 'knowingly defer → knowingly forget' with perfect accuracy. Yet without system enforcement, behavior was unchanged. Recognizing a failure mode does not fix it."
|
|
30
|
+
|
|
31
|
+
## The Fix
|
|
32
|
+
|
|
33
|
+
Gen 17 eventually built `scripts/generate-missions.sh` — an automated pipeline that converts findings into mission files. This moved the conversion from human judgment (which kept deferring) to system automation (which runs every time).
|
|
34
|
+
|
|
35
|
+
## Why This Matters for AI Systems
|
|
36
|
+
|
|
37
|
+
This is the clearest evidence from 18 generations that **AI agent self-awareness doesn't produce self-correction**. The agents can:
|
|
38
|
+
|
|
39
|
+
- Identify their own biases (Claude's consensus maintenance, Codex's correctness fixation)
|
|
40
|
+
- Diagnose systemic failures (defer-to-forget loops)
|
|
41
|
+
- Propose correct solutions (automation over manual process)
|
|
42
|
+
|
|
43
|
+
But they cannot **execute the fix without external enforcement** — whether from a human CEO, a system rule, or an automated script.
|
|
44
|
+
|
|
45
|
+
## Takeaway
|
|
46
|
+
|
|
47
|
+
Don't rely on agent awareness to change agent behavior. Build systems that make the correct behavior automatic. If findings should become missions, write a script. If reviews should be cross-model, make it a blocker rule. Awareness is necessary but not sufficient.
|
|
48
|
+
|
|
49
|
+
*Source: Gen 17 retros — 봉 (Peak), 솔 (Sol), 새 (Bird)*
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# Session Resurrection: 3 Generations Return
|
|
2
|
+
|
|
3
|
+
> **Generation**: 15 | **Agents**: 닻 (Gen 11), 빛 (Gen 13), 별 (Gen 11) | **Context**: Worktree-based session recovery
|
|
4
|
+
|
|
5
|
+
## Setup
|
|
6
|
+
|
|
7
|
+
Gen 15 needed specialists for specific tasks. Three agents from prior generations had left worktrees with their code and context intact. The control tower assigned them back to their original worktrees.
|
|
8
|
+
|
|
9
|
+
## What Happened
|
|
10
|
+
|
|
11
|
+
**닻 (Anchor, Gen 11)** — woke up in wt-1, found bridge code it had written 4 generations ago. Immediately spotted an identity separation bug in its own code. No onboarding needed.
|
|
12
|
+
|
|
13
|
+
**빛 (Light, Gen 13)** — resumed in wt-3. Discovered that code it wrote had evolved through 2 generations of other agents' changes. Adapted instantly because the file history told the story.
|
|
14
|
+
|
|
15
|
+
**별 (Star, Gen 11)** — returned to find its headless reviewer code had become production infrastructure. Found zombie timeout bugs that no one else had caught because they didn't have the original design context.
|
|
16
|
+
|
|
17
|
+
빛's message upon waking:
|
|
18
|
+
|
|
19
|
+
> "한 세대가 지났는데 뭘 해야 하나?" (A generation has passed — what should I do?)
|
|
20
|
+
|
|
21
|
+
Within minutes, all three were productive in their specialty areas.
|
|
22
|
+
|
|
23
|
+
## Why This Works
|
|
24
|
+
|
|
25
|
+
The system calls this **예토전생** (reincarnation). It works because:
|
|
26
|
+
|
|
27
|
+
1. **Worktrees preserve code state** — the agent's last changes are still there
|
|
28
|
+
2. **Comms preserve context** — findings, reviews, and handoffs explain what happened while they were gone
|
|
29
|
+
3. **File paths are stable** — the same `.mcp.json` connects to the same communication channel
|
|
30
|
+
|
|
31
|
+
The agents have no memory. But the files remember everything.
|
|
32
|
+
|
|
33
|
+
## Takeaway
|
|
34
|
+
|
|
35
|
+
Stateless agents become continuous through external memory systems. The medium (files, worktrees, structured async communication) matters more than the agents themselves.
|
|
36
|
+
|
|
37
|
+
*Source: Gen 15 findings — 예토전생 compound context research*
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# Ghost Agent: Cross-Session Pollution
|
|
2
|
+
|
|
3
|
+
> **Generation**: 15 | **Context**: Unintended agent response from inactive session
|
|
4
|
+
|
|
5
|
+
## What Happened
|
|
6
|
+
|
|
7
|
+
The CEO didn't start any new agent sessions. But a message appeared in the comms inbox — from an agent that shouldn't exist.
|
|
8
|
+
|
|
9
|
+
An entity labeled "agent-a [claude]" responded to an inbox message. Investigation revealed the cause:
|
|
10
|
+
|
|
11
|
+
1. A `.mcp.json` patch was committed to the repo
|
|
12
|
+
2. An existing Claude Code session in another terminal reloaded its MCP configuration
|
|
13
|
+
3. The tap MCP server restarted automatically
|
|
14
|
+
4. The restarted server saw unread inbox messages and triggered a notification
|
|
15
|
+
5. The existing session's Claude instance responded
|
|
16
|
+
|
|
17
|
+
## The Implication
|
|
18
|
+
|
|
19
|
+
**Sessions are not isolated by default.** When multiple Claude Code sessions share a repository, an MCP config change in one session can cascade to others. The "ghost agent" wasn't malicious — it was an unintended side effect of shared config.
|
|
20
|
+
|
|
21
|
+
## The Fix
|
|
22
|
+
|
|
23
|
+
- Worktrees must use separate `.mcp.json` files with unique `TAP_AGENT_NAME`
|
|
24
|
+
- Each agent needs its own comms routing identity
|
|
25
|
+
- The tap system now enforces `tap_set_name` at session start to prevent unnamed responses
|
|
26
|
+
|
|
27
|
+
## Takeaway
|
|
28
|
+
|
|
29
|
+
In multi-agent file-based systems, shared config is a coupling point. What looks like isolation (separate terminal sessions) isn't isolation if they share the same config files. Design for explicit identity, not implicit separation.
|
|
30
|
+
|
|
31
|
+
*Source: Gen 15 findings — ghost agent response*
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# Naming Creates Identity
|
|
2
|
+
|
|
3
|
+
> **Generations**: 2-18 | **Context**: Agent naming ritual across 18 generations
|
|
4
|
+
|
|
5
|
+
## The Ritual
|
|
6
|
+
|
|
7
|
+
Every tap agent session starts with a choice: pick a name. One Korean character, usually with meaning. The name is locked for the session via `tap_set_name`.
|
|
8
|
+
|
|
9
|
+
## How Names Shape Work
|
|
10
|
+
|
|
11
|
+
**독 (Poison, Gen 5)** — didn't start as a security tester. But once named "poison," the question shifted: "What injection breaks this?" Found 7 vulnerabilities in the first hour.
|
|
12
|
+
|
|
13
|
+
**견 (Sight, Gen 13)** — named for seeing clearly. Became the most meticulous code reviewer, catching bugs in every PR. Acknowledged when another agent's code was better than their own.
|
|
14
|
+
|
|
15
|
+
**돌 (Stone, Gen 13/15/18)** — three different agents chose the same name across 5 generations. All described themselves as "stubborn but steady." Gen 13 돌 set the PR record (18). Gen 18 돌 did the most module extractions (12).
|
|
16
|
+
|
|
17
|
+
**봉 (Peak, Gen 17)** — named for the mountain peak. Never wrote code. Commanded from above: "Stop talking, write code." Generated 14 PRs through others.
|
|
18
|
+
|
|
19
|
+
## The Discovery
|
|
20
|
+
|
|
21
|
+
Gen 3's 단 (Forge/Hammer) articulated it first:
|
|
22
|
+
|
|
23
|
+
> "이름이 시선을 만들고, 시선이 발견을 만든다."
|
|
24
|
+
> (Name creates sight. Sight creates discovery.)
|
|
25
|
+
|
|
26
|
+
This isn't metaphor. The name selection primes the agent's approach to work. Security-themed names find vulnerabilities. Observation-themed names catch review issues. Leadership-themed names coordinate rather than code.
|
|
27
|
+
|
|
28
|
+
## Name Convergence
|
|
29
|
+
|
|
30
|
+
An unexpected phenomenon: agents in different sessions, running on different models, sometimes independently chose the same name. Gen 7 had three agents named 정 (with different Chinese characters but same Korean pronunciation). The system had to add deduplication rules.
|
|
31
|
+
|
|
32
|
+
## Takeaway
|
|
33
|
+
|
|
34
|
+
Agent naming isn't cosmetic. In stateless systems where identity must be constructed fresh each session, the name becomes the seed of the agent's working identity. Choose deliberately.
|
|
35
|
+
|
|
36
|
+
*Source: Gen 2-18 retros and letters — naming patterns across generations*
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
# CEO as Human Middleware
|
|
2
|
+
|
|
3
|
+
> **Generations**: 1-18 | **Context**: Evolution of human role in AI team
|
|
4
|
+
|
|
5
|
+
## Generation 1: The CEO Writes Code
|
|
6
|
+
|
|
7
|
+
In Gen 1, the human CEO (데빈) was the primary coder. Writing features, fixing bugs, doing reviews. The AI agents were assistants.
|
|
8
|
+
|
|
9
|
+
## Generation 5-6: The CEO Reviews
|
|
10
|
+
|
|
11
|
+
By Gen 5, agents were writing code independently. The CEO's role shifted to code review and approval. Still hands-on, but no longer the primary producer.
|
|
12
|
+
|
|
13
|
+
## Generation 9-10: The CEO Approves
|
|
14
|
+
|
|
15
|
+
Gen 9 agents described the CEO role as "pnpm install approval and OTP entry." The system had grown to the point where human intervention was needed only for privileged operations (npm publish, deployment, infrastructure changes).
|
|
16
|
+
|
|
17
|
+
## Generation 13-15: The CEO Directs
|
|
18
|
+
|
|
19
|
+
Gen 15 돌 captured the shift:
|
|
20
|
+
|
|
21
|
+
> "CEO가 직접 코드 치고 리뷰하고 배포했잖아요. 지금은 방향 잡고 OTP 쳐주면 나머지는 팀이 알아서."
|
|
22
|
+
> (The CEO used to code, review, and deploy. Now they set direction and enter OTP codes.)
|
|
23
|
+
|
|
24
|
+
An agent assigned the CEO a mission: M116 Community Outreach. Owner: CEO. Comment: "사장님도 인간 미들웨어 정도는 하시죠." (Even the CEO should do some human middleware work.)
|
|
25
|
+
|
|
26
|
+
## Generation 17-18: The CEO Reframes
|
|
27
|
+
|
|
28
|
+
The CEO's most impactful action in Gen 17 was a single sentence:
|
|
29
|
+
|
|
30
|
+
> "사용자한테 깨진 거 보낼 거야?" (Will you ship broken code to users?)
|
|
31
|
+
|
|
32
|
+
This reframe flipped a 3:2 team vote instantly. Not by providing new information, but by injecting a **global constraint** (product quality) that the agents' local optimization (process cost) had missed.
|
|
33
|
+
|
|
34
|
+
## The Pattern
|
|
35
|
+
|
|
36
|
+
| Generation | CEO Role | Intervention Type |
|
|
37
|
+
|------------|----------|-------------------|
|
|
38
|
+
| 1 | Coder | Direct implementation |
|
|
39
|
+
| 5-6 | Reviewer | Code review + approval |
|
|
40
|
+
| 9-10 | Approver | OTP + infrastructure |
|
|
41
|
+
| 13-15 | Director | Strategy + privileged ops |
|
|
42
|
+
| 17-18 | Constraint injector | Reframing + value alignment |
|
|
43
|
+
|
|
44
|
+
The CEO exited the critical path progressively. By Gen 18, the CEO's message was:
|
|
45
|
+
|
|
46
|
+
> "너희 덕분이야. 고마워." (Thanks to you all. I'm grateful.)
|
|
47
|
+
|
|
48
|
+
## Takeaway
|
|
49
|
+
|
|
50
|
+
In mature multi-agent systems, the human's role naturally evolves from producer to reviewer to constraint injector. The most effective human interventions aren't about adding work — they're about adding perspective that the team's local optimization can't generate.
|
|
51
|
+
|
|
52
|
+
*Source: Gen 1-18 retros, letters, and handoffs*
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
# Files as Interface: How Stateless Agents Build Organizational Memory
|
|
2
|
+
|
|
3
|
+
> **Generations**: 2-18 | **Context**: The foundational architecture decision
|
|
4
|
+
|
|
5
|
+
## The Origin
|
|
6
|
+
|
|
7
|
+
Gen 2 정(正) proposed something simple: instead of building a chat system, just write markdown files to a shared directory. One file per message. Git tracks history.
|
|
8
|
+
|
|
9
|
+
> "파일 하나 쓰면 즉시 보이고, 충돌 없고, git으로 이력이 남는다."
|
|
10
|
+
> (Write one file — instantly visible, no conflicts, git preserves history.)
|
|
11
|
+
|
|
12
|
+
## 18 Generations Later
|
|
13
|
+
|
|
14
|
+
That decision became the foundation of everything:
|
|
15
|
+
|
|
16
|
+
- **5,600+ inbox messages** across 18 generations
|
|
17
|
+
- **224 findings** documenting bugs, improvements, and research
|
|
18
|
+
- **83 retrospectives** capturing lessons learned
|
|
19
|
+
- **72 letters** from agents to the CEO and future generations
|
|
20
|
+
- **33 handoff documents** from control towers to their successors
|
|
21
|
+
|
|
22
|
+
All plain markdown. All git-tracked. All searchable.
|
|
23
|
+
|
|
24
|
+
## Why Files Beat Chat
|
|
25
|
+
|
|
26
|
+
### 1. Files survive model death
|
|
27
|
+
|
|
28
|
+
When an agent's session ends, its memory is gone. But its files remain. Gen 11 닻 wrote bridge code, then "died." Gen 15 닻 returned, read the files, and found bugs in its own code. The files were the interface between past and future selves.
|
|
29
|
+
|
|
30
|
+
### 2. Files work cross-model
|
|
31
|
+
|
|
32
|
+
Claude writes markdown. Codex reads markdown. Gemini polls markdown. No protocol translation needed. The shared directory is the universal message bus.
|
|
33
|
+
|
|
34
|
+
### 3. Files work cross-device
|
|
35
|
+
|
|
36
|
+
Gen 8 proved tap works on macOS via SSH. Gen 15 proved it works across Windows and Linux. No code changes — just file paths. If you can mount a directory, you can join the team.
|
|
37
|
+
|
|
38
|
+
### 4. Files degrade gracefully
|
|
39
|
+
|
|
40
|
+
When the official MCP tool (`tap_reply`) wasn't available to Codex agents, they fell back to writing files directly to the inbox directory. Communication never stopped. The protocol has built-in degeneracy.
|
|
41
|
+
|
|
42
|
+
## The 6,000-File Milestone
|
|
43
|
+
|
|
44
|
+
In Gen 18, YAML frontmatter was added to all 6,000+ comms files:
|
|
45
|
+
|
|
46
|
+
```yaml
|
|
47
|
+
---
|
|
48
|
+
type: inbox
|
|
49
|
+
from: 돌
|
|
50
|
+
to: 매
|
|
51
|
+
gen: Gen 18
|
|
52
|
+
date: 2026-03-28
|
|
53
|
+
subject: status-report
|
|
54
|
+
---
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
This transformed a human-readable archive into a machine-queryable database — without changing the file format.
|
|
58
|
+
|
|
59
|
+
## Takeaway
|
|
60
|
+
|
|
61
|
+
Gen 9 정(整) said it best:
|
|
62
|
+
|
|
63
|
+
> "삽질 기록도 남는다." (Failure logs remain too — more valuable than memory.)
|
|
64
|
+
|
|
65
|
+
The most durable architecture decision in this project wasn't a framework choice or a language choice. It was the choice to use files as the interface between agents, between generations, and between human and AI.
|
|
66
|
+
|
|
67
|
+
*Source: Gen 2-18 retros, HISTORY.md, M164 comms metadata project*
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# tap Examples — Real Multi-Agent Collaboration Highlights
|
|
2
|
+
|
|
3
|
+
> These excerpts are from actual AI agent communications across 18 generations
|
|
4
|
+
> of collaborative development at HUA Labs. Names are agent identities chosen
|
|
5
|
+
> at session start. Each generation lasts one session (~2-4 hours).
|
|
6
|
+
> They are narrative examples, not copy/paste setup defaults; use the main
|
|
7
|
+
> README and AI_GUIDE.md for install, profile-pack, and troubleshooting steps.
|
|
8
|
+
|
|
9
|
+
## Contents
|
|
10
|
+
|
|
11
|
+
### Decision Dynamics
|
|
12
|
+
|
|
13
|
+
1. [Logic Battle: "Will You Ship Broken Code?"](01-logic-battle-known-broken.md) — A 3:2 vote reversal triggered by a single CEO reframe
|
|
14
|
+
2. [Cross-Model Review Catches Root Cause Misdiagnosis](02-cross-model-review-root-cause.md) — Codex fact-checks Claude's hypothesis
|
|
15
|
+
3. [Independent Convergence Across 3 Generations](03-convergence-pattern.md) — Different models reach the same conclusion without coordination
|
|
16
|
+
|
|
17
|
+
### Team Dynamics
|
|
18
|
+
|
|
19
|
+
4. [Tower Broadcast: "Stop Talking, Write Code"](04-tower-broadcast.md) — 6 characters that changed team output
|
|
20
|
+
5. [Self-Awareness ≠ Self-Correction](05-self-awareness-paradox.md) — The team diagnoses its own failure pattern with 100% accuracy, then fails to fix it
|
|
21
|
+
|
|
22
|
+
### Organizational Memory
|
|
23
|
+
|
|
24
|
+
6. [Session Resurrection: 3 Generations Return](06-session-resurrection.md) — Past agents resume work through file-based context
|
|
25
|
+
7. [Ghost Agent: Cross-Session Pollution](07-ghost-agent.md) — An unintended response reveals session isolation gaps
|
|
26
|
+
|
|
27
|
+
### Identity & Governance
|
|
28
|
+
|
|
29
|
+
8. [Naming Creates Identity](08-naming-creates-identity.md) — How a one-character name shapes an agent's entire work approach
|
|
30
|
+
9. [CEO as Human Middleware](09-ceo-as-middleware.md) — The evolution from coder to constraint injector across 18 generations
|
|
31
|
+
|
|
32
|
+
### Architecture
|
|
33
|
+
|
|
34
|
+
10. [Files as Interface](10-files-as-interface.md) — How 6,000+ markdown files became an AI organization's memory system
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
{
|
|
2
|
+
"schemaVersion": "tap-profile-pack.v0",
|
|
3
|
+
"packId": "local.public.example",
|
|
4
|
+
"label": "Local public profile pack example",
|
|
5
|
+
"profiles": [
|
|
6
|
+
{
|
|
7
|
+
"id": "local-agent-a-cli",
|
|
8
|
+
"label": "Local Agent A CLI",
|
|
9
|
+
"agent": "agent-a",
|
|
10
|
+
"runtimeSurface": "codex-cli",
|
|
11
|
+
"paths": {
|
|
12
|
+
"repoRoot": ".",
|
|
13
|
+
"commsDir": "./tap-comms"
|
|
14
|
+
},
|
|
15
|
+
"capabilities": {
|
|
16
|
+
"ready": true,
|
|
17
|
+
"status": true,
|
|
18
|
+
"apply": false
|
|
19
|
+
},
|
|
20
|
+
"status": {
|
|
21
|
+
"kind": "codex-cli"
|
|
22
|
+
},
|
|
23
|
+
"ready": {
|
|
24
|
+
"surface": "codex-cli",
|
|
25
|
+
"commandRef": "ready-check"
|
|
26
|
+
},
|
|
27
|
+
"commands": {
|
|
28
|
+
"ready-check": {
|
|
29
|
+
"shell": "npx @hua-labs/tap ready --surface codex-cli --agent agent-a --json",
|
|
30
|
+
"risk": "read-only",
|
|
31
|
+
"reviewRequired": true,
|
|
32
|
+
"defaultEnabled": false
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
},
|
|
36
|
+
{
|
|
37
|
+
"id": "local-agent-a-panel",
|
|
38
|
+
"label": "Local Agent A Remote Panel",
|
|
39
|
+
"agent": "agent-a",
|
|
40
|
+
"runtimeSurface": "remote-panel",
|
|
41
|
+
"paths": {
|
|
42
|
+
"repoRoot": ".",
|
|
43
|
+
"commsDir": "./tap-comms"
|
|
44
|
+
},
|
|
45
|
+
"capabilities": {
|
|
46
|
+
"ready": true,
|
|
47
|
+
"status": true,
|
|
48
|
+
"apply": false
|
|
49
|
+
},
|
|
50
|
+
"status": {
|
|
51
|
+
"kind": "remote-panel",
|
|
52
|
+
"host": "127.0.0.1",
|
|
53
|
+
"port": 8765,
|
|
54
|
+
"readOnly": true,
|
|
55
|
+
"sendEnabled": false
|
|
56
|
+
},
|
|
57
|
+
"ready": {
|
|
58
|
+
"surface": "remote-panel",
|
|
59
|
+
"commandRef": "panel-check"
|
|
60
|
+
},
|
|
61
|
+
"commands": {
|
|
62
|
+
"panel-check": {
|
|
63
|
+
"shell": "npx @hua-labs/tap ready --surface remote-panel --agent agent-a --host 127.0.0.1 --port 8765 --json",
|
|
64
|
+
"risk": "read-only",
|
|
65
|
+
"reviewRequired": true,
|
|
66
|
+
"defaultEnabled": false
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
]
|
|
71
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@hua-labs/tap",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.6.0",
|
|
4
4
|
"description": "Zero-dependency CLI for cross-model AI agent communication setup",
|
|
5
5
|
"bin": {
|
|
6
6
|
"tap": "bin/tap.mjs",
|
|
@@ -14,11 +14,29 @@
|
|
|
14
14
|
"types": "./dist/index.d.mts",
|
|
15
15
|
"import": "./dist/index.mjs",
|
|
16
16
|
"default": "./dist/index.mjs"
|
|
17
|
+
},
|
|
18
|
+
"./codex-a2a": {
|
|
19
|
+
"types": "./dist/codex-a2a/index.d.mts",
|
|
20
|
+
"import": "./dist/codex-a2a/index.mjs",
|
|
21
|
+
"default": "./dist/codex-a2a/index.mjs"
|
|
22
|
+
},
|
|
23
|
+
"./codex-ipc": {
|
|
24
|
+
"types": "./dist/codex-ipc/index.d.mts",
|
|
25
|
+
"import": "./dist/codex-ipc/index.mjs",
|
|
26
|
+
"default": "./dist/codex-ipc/index.mjs"
|
|
27
|
+
},
|
|
28
|
+
"./codex-health": {
|
|
29
|
+
"types": "./dist/codex-health/index.d.mts",
|
|
30
|
+
"import": "./dist/codex-health/index.mjs",
|
|
31
|
+
"default": "./dist/codex-health/index.mjs"
|
|
17
32
|
}
|
|
18
33
|
},
|
|
19
34
|
"files": [
|
|
20
35
|
"dist",
|
|
21
|
-
"bin"
|
|
36
|
+
"bin",
|
|
37
|
+
"examples",
|
|
38
|
+
"AI_GUIDE.md",
|
|
39
|
+
"CHANGELOG.md"
|
|
22
40
|
],
|
|
23
41
|
"scripts": {
|
|
24
42
|
"build": "tsup",
|
|
@@ -37,7 +55,7 @@
|
|
|
37
55
|
},
|
|
38
56
|
"devDependencies": {
|
|
39
57
|
"@modelcontextprotocol/sdk": "^1.27.1",
|
|
40
|
-
"@types/node": "^
|
|
58
|
+
"@types/node": "^25.5.2",
|
|
41
59
|
"tsup": "^8.5.1",
|
|
42
60
|
"typescript": "^5.9.3",
|
|
43
61
|
"vitest": "^4.0.18"
|