@pdhaku0/gemini-cli-agent-sdk 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/README.md +109 -0
  2. package/client/index.d.ts +1 -0
  3. package/client/index.js +1 -0
  4. package/client/package.json +1 -0
  5. package/dist/client.d.ts +5 -0
  6. package/dist/client.js +5 -0
  7. package/dist/client.js.map +1 -0
  8. package/dist/common/types.d.ts +191 -0
  9. package/dist/common/types.js +18 -0
  10. package/dist/common/types.js.map +1 -0
  11. package/dist/core/AcpWebSocketTransport.d.ts +25 -0
  12. package/dist/core/AcpWebSocketTransport.js +222 -0
  13. package/dist/core/AcpWebSocketTransport.js.map +1 -0
  14. package/dist/core/AgentChatClient.d.ts +75 -0
  15. package/dist/core/AgentChatClient.js +679 -0
  16. package/dist/core/AgentChatClient.js.map +1 -0
  17. package/dist/core/ToolPermissionManager.d.ts +26 -0
  18. package/dist/core/ToolPermissionManager.js +88 -0
  19. package/dist/core/ToolPermissionManager.js.map +1 -0
  20. package/dist/core/diff-utils.d.ts +1 -0
  21. package/dist/core/diff-utils.js +7 -0
  22. package/dist/core/diff-utils.js.map +1 -0
  23. package/dist/core/stream-utils.d.ts +14 -0
  24. package/dist/core/stream-utils.js +57 -0
  25. package/dist/core/stream-utils.js.map +1 -0
  26. package/dist/extras/index.d.ts +1 -0
  27. package/dist/extras/index.js +2 -0
  28. package/dist/extras/index.js.map +1 -0
  29. package/dist/extras/sys-tags.d.ts +38 -0
  30. package/dist/extras/sys-tags.js +150 -0
  31. package/dist/extras/sys-tags.js.map +1 -0
  32. package/dist/index.d.ts +1 -0
  33. package/dist/index.js +2 -0
  34. package/dist/index.js.map +1 -0
  35. package/dist/server/GeminiBridge.d.ts +50 -0
  36. package/dist/server/GeminiBridge.js +500 -0
  37. package/dist/server/GeminiBridge.js.map +1 -0
  38. package/dist/server.d.ts +7 -0
  39. package/dist/server.js +7 -0
  40. package/dist/server.js.map +1 -0
  41. package/dist/ui/AgentChatStore.d.ts +16 -0
  42. package/dist/ui/AgentChatStore.js +59 -0
  43. package/dist/ui/AgentChatStore.js.map +1 -0
  44. package/docs/API.md +100 -0
  45. package/docs/EVENTS.md +100 -0
  46. package/docs/INTEGRATION.md +109 -0
  47. package/docs/SPECIFICATION.md +93 -0
  48. package/docs/TROUBLESHOOTING.md +44 -0
  49. package/docs/USAGE.md +270 -0
  50. package/docs/design.md +62 -0
  51. package/package.json +71 -0
  52. package/server/index.d.ts +1 -0
  53. package/server/index.js +1 -0
  54. package/server/package.json +1 -0
package/docs/USAGE.md ADDED
@@ -0,0 +1,270 @@
1
+ # Usage Guide
2
+
3
+ This guide aims for a complete, no-surprises integration: bridge + client + UI. It assumes Node 18+.
4
+
5
+ ## 0) Prerequisites
6
+
7
+ - Gemini CLI installed and working with `--experimental-acp`
8
+ - Node 18+ for the bridge
9
+ - A WebSocket-capable runtime for the client (browser or Node with `ws`)
10
+
11
+ ## 1) Start the bridge
12
+
13
+ ```bash
14
+ npm run start:bridge
15
+ ```
16
+
17
+ If you prefer using the class directly:
18
+
19
+ ```ts
20
+ import { GeminiBridge } from '@pdhaku0/gemini-cli-agent-sdk/server';
21
+
22
+ const bridge = new GeminiBridge({ port: 4444 });
23
+ bridge.start();
24
+ ```
25
+
26
+ ## 2) Minimal client
27
+
28
+ ```ts
29
+ import { AgentChatClient } from '@pdhaku0/gemini-cli-agent-sdk/client';
30
+
31
+ const client = new AgentChatClient({
32
+ url: 'ws://localhost:4444',
33
+ cwd: '/path/to/project',
34
+ });
35
+
36
+ client.on('text_delta', ({ delta }) => process.stdout.write(delta));
37
+
38
+ await client.connect();
39
+ await client.sendMessage('Hello!');
40
+ ```
41
+
42
+ ## 3) Render messages correctly (important)
43
+
44
+ The server **does not echo user messages**. You must render them from local state.
45
+ For assistant messages, always use `content[]` to preserve the correct order of
46
+ text, thoughts, and tool calls.
47
+
48
+ Recommended UI pattern:
49
+
50
+ ```ts
51
+ messages.map((m) => {
52
+ if (m.role === 'user') return renderUser(m.text);
53
+ return m.content.map((part) => {
54
+ if (part.type === 'text') return renderText(part.text);
55
+ if (part.type === 'thought') return renderThought(part.thought);
56
+ if (part.type === 'tool_call') return renderTool(part.call);
57
+ });
58
+ });
59
+ ```
60
+
61
+ For React UIs, use `AgentChatStore` to receive `message_update` events automatically.
62
+
63
+ ```ts
64
+ import { AgentChatStore } from '@pdhaku0/gemini-cli-agent-sdk/client';
65
+
66
+ const store = new AgentChatStore(client);
67
+ store.subscribe((state) => {
68
+ // state.messages, state.isStreaming, state.pendingApproval, etc
69
+ });
70
+ ```
71
+
72
+ ## 4) Session persistence (page reloads)
73
+
74
+ If the page reloads, a new session is created unless you restore the old session ID.
75
+ You can store it and pass it back to the client:
76
+
77
+ ```ts
78
+ const client = new AgentChatClient({
79
+ url: 'ws://localhost:4444',
80
+ sessionId: localStorage.getItem('agentchat_session_id') || undefined,
81
+ });
82
+
83
+ client.on('session_ready', (sessionId) => {
84
+ localStorage.setItem('agentchat_session_id', sessionId);
85
+ });
86
+
87
+ await client.connect();
88
+ ```
89
+
90
+ Notes:
91
+ - The session only survives while the **bridge and CLI process stay alive**.
92
+ - If the bridge restarts, the stored session becomes invalid; clear it to create a new session.
93
+
94
+ ## 5) Auth flow
95
+
96
+ When Gemini CLI requires auth, the SDK emits `auth_required` with a URL.
97
+ You must obtain the code and call `submitAuthCode` before prompts will process.
98
+
99
+ ```ts
100
+ client.on('auth_required', (url) => openAuthWindow(url));
101
+ await client.submitAuthCode(code);
102
+ ```
103
+
104
+ ## 6) Tool approvals
105
+
106
+ Approvals are tied to a tool call via `toolCallId`. Render permission UI next to the tool entry.
107
+
108
+ ```ts
109
+ client.on('permission_required', (approval) => {
110
+ // approval.options contains allow/deny choices
111
+ });
112
+ ```
113
+
114
+ ## 7) Replay / infinite scroll
115
+
116
+ The bridge keeps a small in-memory history. You can replay on connect:
117
+
118
+ ```ts
119
+ const client = new AgentChatClient({
120
+ url: 'ws://localhost:4444',
121
+ replay: { limit: 15 }, // turns, not messages
122
+ });
123
+ await client.connect();
124
+ ```
125
+
126
+ Or fetch older:
127
+
128
+ ```ts
129
+ const older = await AgentChatClient.fetchReplay('ws://localhost:4444', {
130
+ before: oldestTimestampMs,
131
+ limit: 10, // turns, not messages
132
+ });
133
+ client.prependMessages(older);
134
+ ```
135
+
136
+ Notes:
137
+ - `limit` is **turns**, not messages.
138
+ - `before`/`since` are UNIX timestamps in **ms**.
139
+ - Restarting the bridge clears history.
140
+ - If replay feels empty on slow networks, increase `idleMs`.
141
+
142
+ ## 8) Diff handling
143
+
144
+ Tool results may include diffs. The SDK normalizes those into `toolCall.diff.unified`.
145
+ In UI, prefer:
146
+
147
+ 1) `toolCall.diff.unified` (best)
148
+ 2) `toolCall.result`
149
+
150
+ ## 9) Hidden messages
151
+
152
+ You can send prompts that should not appear in the UI:
153
+
154
+ ```ts
155
+ await client.sendMessage('System priming...', { hidden: 'turn' });
156
+ ```
157
+
158
+ Hidden modes:
159
+
160
+ - `none` (default): show everything
161
+ - `user`: hide the user message only
162
+ - `assistant`: hide the assistant response (including tool/thought)
163
+ - `turn`: hide both user and assistant for the turn
164
+
165
+ If a hidden assistant turn requests tool approval, the SDK will auto-reject.
166
+
167
+ ### Initial system priming (recommended)
168
+
169
+ If you want to give the agent a long initial prompt but **never show it in UI**:
170
+
171
+ ```ts
172
+ await client.sendMessage('You are a long-running agent. Use SYS tags for structured events...', {
173
+ hidden: 'turn',
174
+ });
175
+ ```
176
+
177
+ ## 10) Reconnect behavior
178
+
179
+ The WebSocket transport reconnects automatically. If a page reloads, use session persistence
180
+ (section 4) to reuse the same session ID.
181
+
182
+ ## 11) Optional SYS tags (structured capture)
183
+
184
+ If you want the assistant to emit structured JSON that should **not** be shown
185
+ in the UI, you can wrap it in SYS tags and parse them on the bridge.
186
+
187
+ Example:
188
+
189
+ ```
190
+ <SYS_JSON>{"type":"tool.invoke","payload":{"name":"ping"}}</SYS_JSON>
191
+ <SYS_BLOCK>{"type":"start","id":"b1","title":"Data Collection"}</SYS_BLOCK>
192
+ ```
193
+
194
+ Use the optional extras helper on the bridge:
195
+
196
+ ```ts
197
+ import { GeminiBridge } from '@pdhaku0/gemini-cli-agent-sdk/server';
198
+ import { createSysTagTransform } from '@pdhaku0/gemini-cli-agent-sdk/extras';
199
+
200
+ const bridge = new GeminiBridge({
201
+ outgoingTransform: createSysTagTransform({ mode: 'event' }),
202
+ });
203
+
204
+ bridge.start();
205
+ ```
206
+
207
+ `mode` can be:
208
+ - `event`: strip SYS tags from UI and emit `bridge/structured_event`
209
+ - `raw`: do nothing (no capture)
210
+ - `both`: keep text and emit `bridge/structured_event`
211
+
212
+ ### Pattern: JSON tools without UI leakage
213
+
214
+ Use SYS tags for machine-readable JSON, while keeping normal assistant text visible:
215
+
216
+ ```
217
+ I will fetch the data now.
218
+ <SYS_JSON>{"type":"tool.invoke","payload":{"tool":"fetch","args":{"url":"..."}}}</SYS_JSON>
219
+ ```
220
+
221
+ On the bridge, capture SYS_JSON and execute your backend tool using the structured event payload.
222
+
223
+ ## 12) Example UI
224
+
225
+ A complete Next.js App Router implementation is provided:
226
+
227
+ - `examples/next-app`
228
+
229
+ A minimal CLI example is also available:
230
+
231
+ - `examples/cli`
232
+
233
+ It includes auth UI, tool approvals, replay, and session persistence.
234
+
235
+ ## 13) Structured events: backend tool execution
236
+
237
+ When you use SYS tags, the bridge emits `bridge/structured_event`. You can use it to
238
+ run backend tools **without leaking JSON to the UI**.
239
+
240
+ Example (pseudo):
241
+
242
+ ```ts
243
+ bridge.on('client:message', (msg) => {
244
+ if (msg?.method !== 'bridge/structured_event') return;
245
+ const { type, payload } = msg.params || {};
246
+
247
+ if (type === 'sys_json' && payload?.type === 'tool.invoke') {
248
+ // Run your tool here
249
+ runTool(payload.payload);
250
+ }
251
+ });
252
+ ```
253
+
254
+ ## 14) Blocked UI pattern (long-running agents)
255
+
256
+ For long-running agents, you can group output into collapsible blocks.
257
+ Ask the agent to emit SYS blocks:
258
+
259
+ ```
260
+ <SYS_BLOCK>{"type":"start","id":"b1","title":"Data Collection"}</SYS_BLOCK>
261
+ ...normal text...
262
+ <SYS_BLOCK>{"type":"end","id":"b1","summary":"Collected 120 items"}</SYS_BLOCK>
263
+ ```
264
+
265
+ UI behavior:
266
+ - On `start`, open a block with the title.
267
+ - Append subsequent text to that block.
268
+ - On `end`, close it and show the summary.
269
+
270
+ Use `bridge/structured_event` to receive these block signals and update the UI state.
package/docs/design.md ADDED
@@ -0,0 +1,62 @@
1
+ # Gemini ACP SDK Design Specification
2
+
3
+ ## Overview
4
+
5
+ This document describes the architecture and design goals for the SDK. It is meant to stay aligned with the current implementation.
6
+
7
+ ## Portability & Multi-Environment Support
8
+
9
+ ### 1) Framework-agnostic core
10
+
11
+ - Core logic (transport, session, parsing, auth/approval handling) lives in `AgentChatClient` and related core modules.
12
+ - The core is UI-framework agnostic and works in browsers or Node (with `ws`).
13
+
14
+ ### 2) UI integration
15
+
16
+ - `AgentChatStore` provides a minimal subscribe/getState interface for React and other UI frameworks.
17
+ - Additional framework-specific hooks are **not implemented yet**; they may be added later.
18
+
19
+ ### 3) Distribution
20
+
21
+ - ESM output in `dist/` for modern bundlers.
22
+ - The package exports `client` and `server` entrypoints to keep browser builds clean.
23
+
24
+ ## Tool Execution Approval Flow
25
+
26
+ - Listens for `session/request_permission`.
27
+ - Exposes `pendingApproval` to the UI.
28
+ - UI selects an option and calls `approveTool(optionId)`.
29
+
30
+ ## Authentication Flow
31
+
32
+ - Captures auth URL from the bridge and notifies the UI.
33
+ - CLI blocks prompts until `submitAuthCode(code)` is called.
34
+
35
+ ## Configuration & Model Selection
36
+
37
+ - Bridge can be configured via env vars (`GEMINI_MODEL`, `GEMINI_APPROVAL_MODE`).
38
+ - Client can pass `model` in `session/new` if supported by the bridge/CLI.
39
+
40
+ ## Communication Pattern
41
+
42
+ ```mermaid
43
+ sequenceDiagram
44
+ participant UI as Frontend
45
+ participant SDK as SDK Client
46
+ participant Bridge as Gemini Bridge (WS Server)
47
+ participant CLI as Gemini CLI (Process)
48
+
49
+ UI->>SDK: sendMessage("Hello")
50
+ SDK->>Bridge: session/prompt
51
+ Bridge->>CLI: Write to stdin
52
+ CLI-->>Bridge: session/update/tool_call
53
+ Bridge-->>SDK: session/update / session/request_permission
54
+ SDK-->>UI: Emit events / update store
55
+ ```
56
+
57
+ ## Design Principles
58
+
59
+ - **WebSocket-first**: UI <-> bridge is event-driven, JSON-RPC over WS.
60
+ - **Framework-agnostic**: Core logic is independent from UI frameworks.
61
+ - **Deterministic rendering**: `content[]` preserves interleaving of text/thought/tool calls.
62
+ - **Resilience**: Automatic WS reconnect; robust parsing of tool metadata.
package/package.json ADDED
@@ -0,0 +1,71 @@
1
+ {
2
+ "name": "@pdhaku0/gemini-cli-agent-sdk",
3
+ "version": "0.1.0",
4
+ "type": "module",
5
+ "description": "A high-level SDK for building AI Agent chat interfaces using Gemini ACP",
6
+ "main": "dist/index.js",
7
+ "types": "dist/index.d.ts",
8
+ "files": [
9
+ "dist",
10
+ "client",
11
+ "server",
12
+ "docs"
13
+ ],
14
+ "exports": {
15
+ ".": {
16
+ "types": "./dist/index.d.ts",
17
+ "default": "./dist/index.js"
18
+ },
19
+ "./client": {
20
+ "types": "./dist/client.d.ts",
21
+ "default": "./dist/client.js"
22
+ },
23
+ "./server": {
24
+ "types": "./dist/server.d.ts",
25
+ "default": "./dist/server.js"
26
+ },
27
+ "./extras": {
28
+ "types": "./dist/extras/index.d.ts",
29
+ "default": "./dist/extras/index.js"
30
+ },
31
+ "./package.json": "./package.json"
32
+ },
33
+ "scripts": {
34
+ "build": "rimraf dist && tsc && node scripts/postbuild.mjs",
35
+ "postbuild": "node scripts/postbuild.mjs",
36
+ "dev": "tsc -w",
37
+ "test": "jest",
38
+ "lint": "eslint src/**/*.ts",
39
+ "start:bridge": "export GEMINI_PORT=4444 && node scripts/gemini-bridge.cjs",
40
+ "prepare": "npm run build"
41
+ },
42
+ "keywords": [
43
+ "gemini",
44
+ "acp",
45
+ "agent",
46
+ "chat",
47
+ "sdk",
48
+ "ai"
49
+ ],
50
+ "author": "薄明色の忘れ路",
51
+ "license": "MIT",
52
+ "engines": {
53
+ "node": ">=18"
54
+ },
55
+ "dependencies": {
56
+ "diff": "^5.2.0",
57
+ "events": "^3.3.0",
58
+ "ws": "^8.19.0",
59
+ "zod": "^3.23.0"
60
+ },
61
+ "devDependencies": {
62
+ "@types/diff": "^5.2.0",
63
+ "@types/node": "^20.0.0",
64
+ "@types/ws": "^8.18.1",
65
+ "eslint": "^8.57.0",
66
+ "jest": "^29.7.0",
67
+ "rimraf": "^5.0.0",
68
+ "ts-jest": "^29.1.0",
69
+ "typescript": "^5.4.0"
70
+ }
71
+ }
@@ -0,0 +1 @@
1
+ export * from '../dist/server';
@@ -0,0 +1 @@
1
+ export * from '../dist/server.js';
@@ -0,0 +1 @@
1
+ {"type":"module"}