@pdhaku0/gemini-cli-agent-sdk 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/README.md +109 -0
  2. package/client/index.d.ts +1 -0
  3. package/client/index.js +1 -0
  4. package/client/package.json +1 -0
  5. package/dist/client.d.ts +5 -0
  6. package/dist/client.js +5 -0
  7. package/dist/client.js.map +1 -0
  8. package/dist/common/types.d.ts +191 -0
  9. package/dist/common/types.js +18 -0
  10. package/dist/common/types.js.map +1 -0
  11. package/dist/core/AcpWebSocketTransport.d.ts +25 -0
  12. package/dist/core/AcpWebSocketTransport.js +222 -0
  13. package/dist/core/AcpWebSocketTransport.js.map +1 -0
  14. package/dist/core/AgentChatClient.d.ts +75 -0
  15. package/dist/core/AgentChatClient.js +679 -0
  16. package/dist/core/AgentChatClient.js.map +1 -0
  17. package/dist/core/ToolPermissionManager.d.ts +26 -0
  18. package/dist/core/ToolPermissionManager.js +88 -0
  19. package/dist/core/ToolPermissionManager.js.map +1 -0
  20. package/dist/core/diff-utils.d.ts +1 -0
  21. package/dist/core/diff-utils.js +7 -0
  22. package/dist/core/diff-utils.js.map +1 -0
  23. package/dist/core/stream-utils.d.ts +14 -0
  24. package/dist/core/stream-utils.js +57 -0
  25. package/dist/core/stream-utils.js.map +1 -0
  26. package/dist/extras/index.d.ts +1 -0
  27. package/dist/extras/index.js +2 -0
  28. package/dist/extras/index.js.map +1 -0
  29. package/dist/extras/sys-tags.d.ts +38 -0
  30. package/dist/extras/sys-tags.js +150 -0
  31. package/dist/extras/sys-tags.js.map +1 -0
  32. package/dist/index.d.ts +1 -0
  33. package/dist/index.js +2 -0
  34. package/dist/index.js.map +1 -0
  35. package/dist/server/GeminiBridge.d.ts +50 -0
  36. package/dist/server/GeminiBridge.js +500 -0
  37. package/dist/server/GeminiBridge.js.map +1 -0
  38. package/dist/server.d.ts +7 -0
  39. package/dist/server.js +7 -0
  40. package/dist/server.js.map +1 -0
  41. package/dist/ui/AgentChatStore.d.ts +16 -0
  42. package/dist/ui/AgentChatStore.js +59 -0
  43. package/dist/ui/AgentChatStore.js.map +1 -0
  44. package/docs/API.md +100 -0
  45. package/docs/EVENTS.md +100 -0
  46. package/docs/INTEGRATION.md +109 -0
  47. package/docs/SPECIFICATION.md +93 -0
  48. package/docs/TROUBLESHOOTING.md +44 -0
  49. package/docs/USAGE.md +270 -0
  50. package/docs/design.md +62 -0
  51. package/package.json +71 -0
  52. package/server/index.d.ts +1 -0
  53. package/server/index.js +1 -0
  54. package/server/package.json +1 -0
package/docs/API.md ADDED
@@ -0,0 +1,100 @@
1
+ # API Reference
2
+
3
+ This document defines the public API surface for the SDK.
4
+
5
+ ## Module Entrypoints
6
+
7
+ - `@pdhaku0/gemini-cli-agent-sdk/client`
8
+ - Browser/Next.js client usage
9
+ - `@pdhaku0/gemini-cli-agent-sdk/server`
10
+ - Node.js bridge usage
11
+ - `@pdhaku0/gemini-cli-agent-sdk/extras`
12
+ - Optional helpers (SYS tag capture, etc.)
13
+
14
+ ## AgentChatClient
15
+
16
+ ```ts
17
+ new AgentChatClient(options: AgentChatClientOptions)
18
+ ```
19
+
20
+ ### AgentChatClientOptions
21
+
22
+ - `url: string` (required)
23
+ - `model?: string`
24
+ - `cwd?: string`
25
+ - `diffContextLines?: number`
26
+ - `sessionId?: string`
27
+ - `replay?: { limit?: number; since?: number; before?: number }`
28
+
29
+ ### Methods
30
+
31
+ - `connect(options?: { autoSession?: boolean }): Promise<void>`
32
+ - Connects the WebSocket.
33
+ - If `sessionId` is already set, the client reuses it and **does not** call `session/new`.
34
+ - If `autoSession` is `true` (default) and no session exists, sends `session/new`.
35
+ - `sendMessage(text: string): Promise<void>`
36
+ - `sendMessage(text: string, options?: { hidden?: HiddenMode }): Promise<void>`
37
+ - `submitAuthCode(code: string): Promise<void>`
38
+ - `approveTool(optionId: string): Promise<void>`
39
+ - `cancel(): Promise<void>`
40
+ - `getMessages(): ChatMessage[]`
41
+ - `getMessages(options?: { includeHidden?: boolean }): ChatMessage[]`
42
+ - `getAuthUrl(): string | null`
43
+ - `getPendingApproval(): PendingApproval | null`
44
+ - `getConnectionState(): ConnectionState`
45
+ - `prependMessages(messages: ChatMessage[]): void`
46
+ - Prepends messages (for replay/infinite scroll).
47
+ - `setSessionId(sessionId: string | null): void`
48
+ - `getSessionId(): string | null`
49
+ - `dispose(): void`
50
+
51
+ ### Static
52
+
53
+ - `AgentChatClient.fetchReplay(url, replay, options?): Promise<ChatMessage[]>`
54
+ - Uses bridge replay query params to fetch older messages.
55
+ - `options.idleMs` controls the inactivity timeout once the first replay message arrives.
56
+
57
+ ## AgentChatStore
58
+
59
+ ```ts
60
+ new AgentChatStore(client: AgentChatClient)
61
+ ```
62
+
63
+ ### Methods
64
+
65
+ - `subscribe(listener): () => void`
66
+ - `getState(): AgentChatState`
67
+ - `dispose(): void`
68
+
69
+ ## GeminiBridge (server)
70
+
71
+ ```ts
72
+ new GeminiBridge(options?: GeminiBridgeOptions)
73
+ ```
74
+
75
+ ### GeminiBridgeOptions
76
+
77
+ - `model?: string`
78
+ - `port?: number` (default 4444)
79
+ - `approvalMode?: string`
80
+ - `geminiBin?: string`
81
+ - `cliPackage?: string`
82
+ - `hostApiUrl?: string`
83
+ - `sessionId?: string`
84
+ - `bridgeSecret?: string`
85
+ - `projectRoot?: string`
86
+ - `outgoingTransform?: (msg) => { forward?: any | null; extra?: any[] } | null`
87
+
88
+ ### Methods
89
+
90
+ - `start(): void`
91
+ - `stop(): void`
92
+
93
+ ### Events
94
+
95
+ - `gemini:message` (message from Gemini CLI)
96
+ - `client:message` (message from a WebSocket client)
97
+
98
+ ## Types
99
+
100
+ See `src/common/types.ts` for canonical type definitions.
package/docs/EVENTS.md ADDED
@@ -0,0 +1,100 @@
1
+ # Events and Rendering Rules
2
+
3
+ This document defines the event model and how to render messages correctly without ordering bugs.
4
+
5
+ ## Core Events (AgentChatClient)
6
+
7
+ ### Connection
8
+
9
+ - `connection_state_changed`
10
+ - payload: `{ state: ConnectionState }`
11
+ - values: `connecting | connected | reconnecting | disconnected`
12
+ - `session_ready`
13
+ - payload: `sessionId: string`
14
+ - `error`
15
+ - payload: `unknown`
16
+
17
+ ### Messages
18
+
19
+ - `message`
20
+ - emitted when a **new message object** is created (user or assistant)
21
+ - `message_update`
22
+ - emitted when streaming text/thought/tool updates modify a message
23
+
24
+ ### Streaming deltas
25
+
26
+ - `text_delta`
27
+ - `assistant_text_delta`
28
+ - `thought_delta`
29
+ - `assistant_thought_delta`
30
+ - payload: `{ messageId, delta, text|thought }`
31
+
32
+ ### Tools
33
+
34
+ - `tool_update`
35
+ - `tool_call_started`
36
+ - `tool_call_updated`
37
+ - `tool_call_completed`
38
+ - payload: `{ messageId, toolCall }`
39
+
40
+ ### Turn lifecycle
41
+
42
+ - `turn_started`
43
+ - payload: `{ userMessageId }`
44
+ - `turn_completed`
45
+ - payload: `stopReason` (string)
46
+
47
+ ### Auth / Permission
48
+
49
+ - `auth_required` (string URL)
50
+ - `auth_resolved`
51
+ - `permission_required` (PendingApproval)
52
+ - `approval_required` (PendingApproval)
53
+ - `approval_resolved`
54
+
55
+ ### Replay
56
+
57
+ - `messages_replayed`
58
+ - payload: `{ count }`
59
+ - emitted after `prependMessages()`
60
+
61
+ ### Structured events (optional)
62
+
63
+ - `bridge/structured_event`
64
+ - payload: `{ type, payload, raw, error? }`
65
+ - emitted when SYS tags are captured by the bridge (see `docs/USAGE.md`)
66
+
67
+ ## Rendering Rules (Important)
68
+
69
+ ### 1) Always render assistant content using `content[]`
70
+
71
+ The SDK maintains a `content` array on assistant messages that preserves the **true order** of:
72
+
73
+ - text
74
+ - thought
75
+ - tool calls
76
+
77
+ If you render `m.text` + `m.toolCalls` separately, tool calls can appear out of order.
78
+
79
+ ### 2) Render tool approvals next to the tool call
80
+
81
+ Tool approvals are tied to a specific tool call via `toolCallId`. Put the approval UI inside the matching tool block.
82
+
83
+ ### 3) User messages are local
84
+
85
+ The server does **not** echo user messages. You must render them from SDK state (`message` event or store state).
86
+
87
+ ## Suggested UI Pattern
88
+
89
+ ```ts
90
+ messages.map((msg) => {
91
+ if (msg.role === 'user') renderUser(msg.text);
92
+ if (msg.role === 'assistant') {
93
+ msg.content.map((part) => {
94
+ if (part.type === 'text') renderText(part.text);
95
+ if (part.type === 'thought') renderThought(part.thought);
96
+ if (part.type === 'tool_call') renderTool(part.call);
97
+ });
98
+ }
99
+ });
100
+ ```
@@ -0,0 +1,109 @@
1
+ # Integration Guide
2
+
3
+ This guide focuses on real-world integration details and common pitfalls.
4
+
5
+ ## Use the right entrypoint
6
+
7
+ - **Browser/Next.js**: `@pdhaku0/gemini-cli-agent-sdk/client`
8
+ - **Node.js Bridge**: `@pdhaku0/gemini-cli-agent-sdk/server`
9
+
10
+ If you import the root package in a client component, Next.js will try to bundle server code (`fs`) and fail.
11
+
12
+ ## Next.js (App Router)
13
+
14
+ ### Use a singleton client/store
15
+
16
+ React Strict Mode mounts components twice in dev, which can create **multiple WebSocket connections** and inconsistent state.
17
+ Use a module-level singleton or a guard to ensure you connect only once.
18
+
19
+ ### Use `use client`
20
+
21
+ Instantiate `AgentChatClient` in client components only.
22
+
23
+ ### Persist session across reloads
24
+
25
+ If you want to keep the same ACP session across a page reload, store the session ID and pass it back to the client:
26
+
27
+ ```ts
28
+ const sessionId = localStorage.getItem('agentchat_session_id') || undefined;
29
+ const client = new AgentChatClient({ url: wsUrl, sessionId });
30
+
31
+ client.on('session_ready', (id) => localStorage.setItem('agentchat_session_id', id));
32
+ await client.connect();
33
+ ```
34
+
35
+ ### Example reference
36
+
37
+ See `examples/next-app` for a working App Router implementation (auth, approvals, replay, session persistence).
38
+
39
+ ### Working directory (cwd)
40
+
41
+ The client sends a `cwd` in `session/new`. For Next apps, you can set:
42
+
43
+ ```bash
44
+ NEXT_PUBLIC_GEMINI_CWD=/path/to/project
45
+ ```
46
+
47
+ ## WebSocket URL in remote/SSH setups
48
+
49
+ If you SSH into a remote host, `localhost` points to **the remote**, not your local machine.
50
+ Set the WebSocket URL accordingly, for example:
51
+
52
+ ```bash
53
+ NEXT_PUBLIC_GEMINI_WS_URL=ws://<host>:4444
54
+ ```
55
+
56
+ ## Auth flow
57
+
58
+ When `auth_required` fires, you must call `submitAuthCode()` before the CLI will process prompts.
59
+
60
+ ## Tool approvals
61
+
62
+ Use `pendingApproval.toolCall.toolCallId` to attach permission options to the correct tool block.
63
+
64
+ ## Replay performance tips
65
+
66
+ - `limit` is in **turns**, not messages.
67
+ - Use a small `limit` on connect, then fetch older as needed.
68
+ - Replay is in-memory only; restarting the bridge clears history.
69
+
70
+ ## Optional SYS tag capture
71
+
72
+ If you want to extract structured JSON from assistant output, use the extras helper:
73
+
74
+ ```ts
75
+ import { createSysTagTransform } from '@pdhaku0/gemini-cli-agent-sdk/extras';
76
+
77
+ const bridge = new GeminiBridge({
78
+ outgoingTransform: createSysTagTransform({ mode: 'event' }),
79
+ });
80
+ ```
81
+
82
+ ### Recommended priming prompt
83
+
84
+ Instruct the agent to use SYS tags for structured data so the bridge can capture it:
85
+
86
+ ```text
87
+ When you need to emit machine-readable JSON, wrap it in <SYS_JSON>...</SYS_JSON>.
88
+ When you want to group work, use <SYS_BLOCK>{"type":"start"...}</SYS_BLOCK> and
89
+ <SYS_BLOCK>{"type":"end"...}</SYS_BLOCK>.
90
+ ```
91
+
92
+ ## Structured events → backend tools
93
+
94
+ If you want the assistant to trigger backend tools, use SYS tags and process
95
+ `bridge/structured_event` on the bridge:
96
+
97
+ ```ts
98
+ bridge.on('client:message', (msg) => {
99
+ if (msg?.method !== 'bridge/structured_event') return;
100
+
101
+ const { type, payload } = msg.params || {};
102
+ if (type !== 'sys_json') return;
103
+
104
+ if (payload?.type === 'tool.invoke') {
105
+ // Example: run a custom backend tool
106
+ runTool(payload.payload);
107
+ }
108
+ });
109
+ ```
@@ -0,0 +1,93 @@
1
+ # Technical Specification
2
+
3
+ This document details the internal architecture, protocol specifications, and custom logic implementation of the SDK and bridge.
4
+
5
+ ## Architecture Overview
6
+
7
+ ```
8
+ [ Client Application (SDK) ]
9
+ |
10
+ | WebSocket (JSON-RPC 2.0)
11
+ |
12
+ [ Node.js Bridge (gemini-bridge.cjs) ]
13
+ |
14
+ | Stdio (Pipe)
15
+ |
16
+ [ Gemini CLI Binary ]
17
+ ```
18
+
19
+ ### 1. Client SDK (`src/core/`)
20
+
21
+ - **AgentChatClient**: Manages the WebSocket connection, session state, and event emission.
22
+ - **Event System**: Emits normalized events (`text_delta`, `thought_delta`, `tool_update`) to the UI.
23
+ - **Tool Parsing**: Recovers structured tool data from Gemini CLI output.
24
+
25
+ ### 2. Bridge (`scripts/gemini-bridge.cjs`)
26
+
27
+ - **Process Management**: Spawns the `gemini` binary with `--experimental-acp`.
28
+ - **Protocol Translation**: Forwards JSON-RPC between WebSocket and stdio.
29
+ - **Log Management**: Handles `gemini-acp.log` rotation (max 2MB).
30
+ - **History Replay**: Maintains a small in-memory history for late-joiners.
31
+
32
+ ## ACP Protocol & Extensions
33
+
34
+ ### Session Handshake
35
+
36
+ 1. Client connects via WebSocket.
37
+ 2. Bridge spawns Gemini CLI.
38
+ 3. Client sends `session/new` (unless reusing a stored session ID).
39
+ 4. Bridge relays response with `sessionId`.
40
+
41
+ ### Session Reuse
42
+
43
+ Clients may reuse an existing session by supplying a known `sessionId` and skipping `session/new`.
44
+ This is useful for page reloads. It only works while the bridge/CLI process remains alive.
45
+
46
+ ### Message Flow
47
+
48
+ - **User Input**: Client sends `session/prompt` with text and `sessionId`.
49
+ - **Streaming Response**: CLI sends `session/update` events.
50
+ - `agent_thought_chunk`: Internal reasoning text.
51
+ - `agent_message_chunk`: User-facing assistant text.
52
+ - `tool_call`: Request to execute a tool.
53
+
54
+ ## Backend Event Hooks
55
+
56
+ `GeminiBridge` extends `EventEmitter` and emits:
57
+
58
+ - `gemini:message`: JSON-RPC messages from Gemini CLI.
59
+ - `client:message`: JSON-RPC messages from WebSocket clients.
60
+
61
+ ## Custom Logic: Tool Description Parsing
62
+
63
+ The Gemini CLI does not consistently emit a `description` field in the `tool_call` object. The SDK implements:
64
+
65
+ 1. **Title Parsing**: Analyze the tool title string.
66
+ 2. **Nested Parentheses**: Extract the *last balanced parentheses group*.
67
+ - Example: `"ls -F [cwd] (List files (detailed))"` => `"List files (detailed)"`
68
+ 3. **CWD Extraction**: Capture `[current working directory ...]` as `workingDir`.
69
+ 4. **Fallback**: Leave `description` empty if not found.
70
+
71
+ ## Operational Details
72
+
73
+ ### Log Rotation
74
+
75
+ The `gemini-bridge.cjs` script checks `gemini-acp.log` size on startup.
76
+
77
+ - **Limit**: 2MB (2 * 1024 * 1024 bytes)
78
+ - **Action**: Renames current log to `.old` if limit exceeded
79
+
80
+ ### History Replay (Late Joiners)
81
+
82
+ The bridge keeps an in-memory ring buffer of recent JSON-RPC messages (max 2000).
83
+ Clients can request a replay using WebSocket query params:
84
+
85
+ - `limit`: last N **turns** (to avoid slicing a response mid-stream)
86
+ - `since`: only messages after this UNIX timestamp (ms)
87
+ - `before`: only messages before this UNIX timestamp (ms)
88
+
89
+ ### Environment Variables
90
+
91
+ - `GEMINI_PORT`: WebSocket port (default 4444)
92
+ - `GEMINI_MODEL`: Model ID (default `gemini-3-flash-preview`)
93
+ - `GEMINI_APPROVAL_MODE`: Tool approval mode (default `default`)
@@ -0,0 +1,44 @@
1
+ # Troubleshooting
2
+
3
+ ## Connected but no UI messages
4
+
5
+ - Ensure you render using `content[]` for assistant messages.
6
+ - Ensure you listen to `message_update` (or use `AgentChatStore`).
7
+ - User messages are **not echoed** from the server; render them locally.
8
+
9
+ ## WebSocket connects but session never initializes
10
+
11
+ - Verify `session/new` is being sent (unless you provide `sessionId`).
12
+ - Confirm Gemini CLI is running with `--experimental-acp`.
13
+ - Check bridge logs for JSON-RPC errors.
14
+
15
+ ## New session after page reload
16
+
17
+ - A full page reload reinitializes the client and creates a new session.
18
+ - Persist `sessionId` (localStorage) and pass it back to the client to reuse a session.
19
+ - If the bridge restarts, the previous session ID is invalid and a new session is expected.
20
+ - If you keep reusing an invalid session, clear the stored session ID and refresh.
21
+
22
+ ## History replay not working
23
+
24
+ - `limit` is **turns**, not messages.
25
+ - Make sure you pass `before` as a UNIX timestamp in **ms**.
26
+ - Replay relies on bridge in-memory history; restarting the bridge clears it.
27
+
28
+ ## Replay returns empty even though bridge says "Replaying"
29
+
30
+ - Ensure you are running the updated SDK build (rebuild after local changes if using `file:` dependency).
31
+ - Increase `idleMs` if the bridge or browser is slow.
32
+
33
+ ## Tool approval UI does not show
34
+
35
+ - Approvals are per tool call. Match on `toolCallId` and render inside that tool's block.
36
+
37
+ ## "WebSocket constructor not found"
38
+
39
+ - In Node, ensure `ws` is installed and Node >= 18.
40
+ - In Next.js, instantiate the client in a `use client` component.
41
+
42
+ ## "Blocked message during pending auth"
43
+
44
+ - Gemini CLI requires auth. Use the URL from `auth_required` and call `submitAuthCode()`.