sigit-code 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. sigit_code-0.1.1/.agents/AGENTS.md +0 -0
  2. sigit_code-0.1.1/.agents/skills/agent-client-protocol/SKILL.md +314 -0
  3. sigit_code-0.1.1/.agents/skills/ai-assisted-coding/SKILL.md +361 -0
  4. sigit_code-0.1.1/.agents/skills/tool-calling/SKILL.md +283 -0
  5. sigit_code-0.1.1/.github/workflows/ci.yml +113 -0
  6. sigit_code-0.1.1/.github/workflows/release-github.yml +187 -0
  7. sigit_code-0.1.1/.github/workflows/release-homebrew.yml +116 -0
  8. sigit_code-0.1.1/.github/workflows/release-npm.yml +212 -0
  9. sigit_code-0.1.1/.github/workflows/release-pypi.yml +187 -0
  10. sigit_code-0.1.1/.gitignore +1 -0
  11. sigit_code-0.1.1/.nvmrc +1 -0
  12. sigit_code-0.1.1/Cargo.lock +7909 -0
  13. sigit_code-0.1.1/Cargo.toml +44 -0
  14. sigit_code-0.1.1/LICENSE +190 -0
  15. sigit_code-0.1.1/PKG-INFO +134 -0
  16. sigit_code-0.1.1/README.md +55 -0
  17. sigit_code-0.1.1/npm/README.md.tmpl +22 -0
  18. sigit_code-0.1.1/npm/package-main.json.tmpl +41 -0
  19. sigit_code-0.1.1/npm/package.json.tmpl +14 -0
  20. sigit_code-0.1.1/npm/scripts/render-main-package.cjs +34 -0
  21. sigit_code-0.1.1/npm/scripts/render-platform-package.cjs +56 -0
  22. sigit_code-0.1.1/npm/sigit/.gitignore +2 -0
  23. sigit_code-0.1.1/npm/sigit/README.md +85 -0
  24. sigit_code-0.1.1/npm/sigit/package.json +44 -0
  25. sigit_code-0.1.1/npm/sigit/src/index.ts +43 -0
  26. sigit_code-0.1.1/npm/sigit/tsconfig.json +12 -0
  27. sigit_code-0.1.1/pypi/README.md +107 -0
  28. sigit_code-0.1.1/pypi/pyproject.toml +38 -0
  29. sigit_code-0.1.1/pyproject.toml +38 -0
  30. sigit_code-0.1.1/rust-toolchain.toml +2 -0
  31. sigit_code-0.1.1/src/chat.rs +1162 -0
  32. sigit_code-0.1.1/src/main.rs +527 -0
  33. sigit_code-0.1.1/src/setup.rs +89 -0
  34. sigit_code-0.1.1/src/tools.rs +1136 -0
File without changes
@@ -0,0 +1,314 @@
1
+ # Skill: Agent Client Protocol (ACP) — Rust Implementation
2
+
3
+ ## Overview
4
+
5
+ ACP is a JSON-RPC 2.0 protocol over **stdio** for integrating AI coding agents
6
+ with editors (Zed, JetBrains, Neovim, etc.). The agent runs as a subprocess;
7
+ the editor is the client. Communication is newline-delimited JSON on stdin/stdout.
8
+
9
+ Crate: `agent-client-protocol = "0.10.4"` (latest as of 2025)
10
+ Docs: https://docs.rs/agent-client-protocol
11
+ Spec: https://agentclientprotocol.com
12
+
13
+ ---
14
+
15
+ ## Dependency setup
16
+
17
+ ```toml
18
+ [dependencies]
19
+ agent-client-protocol = "0.10.4"
20
+ async-trait = "0.1"
21
+ tokio = { version = "1", features = ["rt", "rt-multi-thread", "macros", "io-std", "io-util", "sync"] }
22
+ tokio-util = { version = "0.7", features = ["compat"] }
23
+ futures = "0.3"
24
+ ```
25
+
26
+ ---
27
+
28
+ ## The `Agent` trait
29
+
30
+ Declared `#[async_trait::async_trait(?Send)]` — futures are `!Send`.
31
+ Your impl needs the same annotation:
32
+
33
+ ```rust
34
+ #[async_trait::async_trait(?Send)]
35
+ impl Agent for MyAgent {
36
+ async fn initialize(&self, args: InitializeRequest) -> Result<InitializeResponse> { ... }
37
+ async fn authenticate(&self, args: AuthenticateRequest) -> Result<AuthenticateResponse> { ... }
38
+ async fn new_session(&self, args: NewSessionRequest) -> Result<NewSessionResponse> { ... }
39
+ async fn prompt(&self, args: PromptRequest) -> Result<PromptResponse> { ... }
40
+ async fn cancel(&self, args: CancelNotification) -> Result<()> { ... }
41
+ // All other methods have default impls that return Error::method_not_found()
42
+ }
43
+ ```
44
+
45
+ You must implement `initialize`, `authenticate`, `new_session`, `prompt`, and `cancel`.
46
+ Everything else (`load_session`, `set_session_mode`, etc.) defaults to `Err(method_not_found)`.
47
+
48
+ ---
49
+
50
+ ## Types and their builders
51
+
52
+ All `#[non_exhaustive]` structs require builder methods — struct literal syntax won't compile.
53
+
54
+ ### `InitializeRequest` / `InitializeResponse`
55
+
56
+ ```rust
57
+ // Response builder — use ProtocolVersion::V1, NOT args.protocol_version:
58
+ InitializeResponse::new(ProtocolVersion::V1)
59
+ .agent_info(
60
+ Implementation::new("my-agent", env!("CARGO_PKG_VERSION"))
61
+ .title("My Agent"),
62
+ )
63
+ .auth_methods(vec![AuthMethod::Agent(AuthMethodAgent::new(
64
+ "my-agent", "My Agent",
65
+ ))])
66
+ .agent_capabilities(AgentCapabilities::default())
67
+ ```
68
+
69
+ `auth_methods` must include at least one `AuthMethod::Agent` or Zed hangs on
70
+ "Loading…" forever. Import `AuthMethod`, `AuthMethodAgent`, and `ProtocolVersion`
71
+ from the crate.
72
+
73
+ ### `AuthenticateResponse`
74
+
75
+ ```rust
76
+ Ok(AuthenticateResponse::default()) // No auth = just return default
77
+ ```
78
+
79
+ ### `NewSessionResponse`
80
+
81
+ ```rust
82
+ let session_id = SessionId::new(uuid::Uuid::new_v4().to_string());
83
+ Ok(NewSessionResponse::new(session_id))
84
+ ```
85
+
86
+ `SessionId` is a newtype with `Clone`, `PartialEq`, `Display`, `Into<String>`,
87
+ and `AsRef<str>`. Store it as-is (not as `String`) so `==` works directly.
88
+
89
+ ### `PromptRequest`
90
+
91
+ ```rust
92
+ args.session_id // type: SessionId
93
+ args.prompt // type: Vec<ContentBlock>
94
+ ```
95
+
96
+ Extract user text from the prompt:
97
+ ```rust
98
+ let user_text: String = args.prompt.iter()
99
+ .filter_map(|block| match block {
100
+ ContentBlock::Text(t) => Some(t.text.as_str()),
101
+ _ => None,
102
+ })
103
+ .collect::<Vec<_>>()
104
+ .join("\n");
105
+ ```
106
+
107
+ ### `PromptResponse`
108
+
109
+ ```rust
110
+ Ok(PromptResponse::new(StopReason::EndTurn))
111
+ // Other reasons: MaxTokens, Cancelled, MaxTurnRequests, Refusal
112
+ ```
113
+
114
+ ### `ContentBlock`
115
+
116
+ ```rust
117
+ // Text block — use the From impl:
118
+ ContentBlock::from("some text") // impl From<T: Into<String>> for ContentBlock
119
+
120
+ // Pattern-match incoming blocks:
121
+ match block {
122
+ ContentBlock::Text(t) => t.text.as_str(),
123
+ ContentBlock::ResourceLink(_) => ...,
124
+ ContentBlock::Resource(_) => ...,
125
+ _ => ..., // non_exhaustive — always need a wildcard
126
+ }
127
+ ```
128
+
129
+ ### `ContentChunk` + `SessionUpdate` — streaming
130
+
131
+ ```rust
132
+ let chunk = ContentChunk::new(ContentBlock::from(delta_text));
133
+ let update = SessionUpdate::AgentMessageChunk(chunk);
134
+ // Other variants: UserMessageChunk, AgentThoughtChunk, ToolCall, Plan, ...
135
+ ```
136
+
137
+ ### `SessionNotification` — send streaming content to client
138
+
139
+ ```rust
140
+ let notification = SessionNotification::new(session_id.clone(), update);
141
+ // Deliver via AgentSideConnection::session_notification()
142
+ ```
143
+
144
+ ### `Error`
145
+
146
+ ```rust
147
+ // There is NO Error::internal(msg) method — use:
148
+ agent_client_protocol::Error::new(-32603, "your message here")
149
+
150
+ // For invalid params:
151
+ agent_client_protocol::Error::invalid_params()
152
+
153
+ // For method not found (already the trait default):
154
+ agent_client_protocol::Error::method_not_found()
155
+ ```
156
+
157
+ ---
158
+
159
+ ## Running the agent — `AgentSideConnection`
160
+
161
+ Wraps stdin/stdout with JSON-RPC machinery.
162
+
163
+ ```rust
164
+ use futures::future::LocalBoxFuture;
165
+ use tokio_util::compat::{TokioAsyncReadCompatExt, TokioAsyncWriteCompatExt};
166
+
167
+ // Adapt tokio I/O to futures AsyncRead/AsyncWrite (the SDK expects these)
168
+ let stdin = tokio::io::stdin().compat();
169
+ let stdout = tokio::io::stdout().compat_write();
170
+
171
+ // Must run inside a LocalSet — the spawn fn takes LocalBoxFuture (!Send)
172
+ let local = tokio::task::LocalSet::new();
173
+ local.run_until(async move {
174
+ let (conn, io_task) = AgentSideConnection::new(
175
+ agent,
176
+ stdout,
177
+ stdin,
178
+ |fut: LocalBoxFuture<'static, ()>| {
179
+ tokio::task::spawn_local(fut); // requires LocalSet context
180
+ },
181
+ );
182
+
183
+ // ... set up forwarder task using conn ...
184
+
185
+ io_task.await // drives JSON-RPC until client disconnects
186
+ }).await;
187
+ ```
188
+
189
+ `AgentSideConnection::new` returns `(conn, io_task)` — you need both. `io_task`
190
+ drives the actual IO; `conn` sends notifications. The spawn closure gets
191
+ `LocalBoxFuture<'static, ()>` (not Send), so use `tokio::task::spawn_local`,
192
+ not `tokio::spawn`. Everything must sit inside
193
+ `tokio::task::LocalSet::new().run_until(...)`.
194
+
195
+ ---
196
+
197
+ ## Streaming — circular dependency pattern
198
+
199
+ `Agent::prompt()` needs to send `SessionNotification` through the connection,
200
+ but the connection is built *from* the agent. Break the cycle with an mpsc channel:
201
+
202
+ ```rust
203
+ // 1. Create channel BEFORE the agent
204
+ let (notification_tx, mut notification_rx) = mpsc::channel::<SessionNotification>(256);
205
+
206
+ // 2. Pass sender into agent
207
+ let agent = MyAgent { notification_tx, ... };
208
+
209
+ // 3. Create connection
210
+ let (conn, io_task) = AgentSideConnection::new(agent, stdout, stdin, |fut| {
211
+ tokio::task::spawn_local(fut);
212
+ });
213
+
214
+ // 4. Spawn forwarder that holds `conn`
215
+ tokio::task::spawn_local(async move {
216
+ while let Some(notification) = notification_rx.recv().await {
217
+ conn.session_notification(notification).await.ok();
218
+ }
219
+ });
220
+
221
+ // 5. Run IO
222
+ io_task.await;
223
+ ```
224
+
225
+ Inside `prompt()`, push chunks through the channel:
226
+ ```rust
227
+ self.notification_tx.send(SessionNotification::new(
228
+ session_id.clone(),
229
+ SessionUpdate::AgentMessageChunk(ContentChunk::new(ContentBlock::from(delta))),
230
+ )).await.ok(); // ignore send errors (channel closed = client gone)
231
+ ```
232
+
233
+ ---
234
+
235
+ ## Logging
236
+
237
+ Log to **stderr** — stdout is the ACP JSON-RPC wire:
238
+
239
+ ```rust
240
+ env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info"))
241
+ .target(env_logger::Target::Stderr)
242
+ .init();
243
+ ```
244
+
245
+ ---
246
+
247
+ ## Protocol flow
248
+
249
+ ```
250
+ Editor Agent
251
+ │ │
252
+ │── initialize ────────────────►│ (negotiate version + capabilities)
253
+ │◄─ InitializeResponse ─────────│
254
+ │ │
255
+ │── authenticate ──────────────►│ (method_id from authMethods)
256
+ │◄─ AuthenticateResponse ───────│
257
+ │ │
258
+ │── session/new ───────────────►│ (create session, load model)
259
+ │◄─ NewSessionResponse ─────────│
260
+ │ │
261
+ │── session/prompt ────────────►│ (user message)
262
+ │◄─ session/update (N times) ───│ (streaming tokens via notification)
263
+ │◄─ PromptResponse ─────────────│ (stop_reason = EndTurn when done)
264
+ │ │
265
+ │── session/cancel (optional) ──►│
266
+ │ │
267
+ │── [disconnect] ───────────────►│ (io_task future resolves → shutdown)
268
+ ```
269
+
270
+ ---
271
+
272
+ ## Zed configuration
273
+
274
+ ```json
275
+ {
276
+ "agent_servers": {
277
+ "MyAgent": {
278
+ "type": "custom",
279
+ "command": "/path/to/binary"
280
+ }
281
+ }
282
+ }
283
+ ```
284
+
285
+ ---
286
+
287
+ ## Gotchas
288
+
289
+ 1. **`Error::internal()` doesn't exist** — use `Error::new(-32603, msg)`.
290
+ 2. **All protocol structs are `#[non_exhaustive]`** — use builder methods,
291
+ never struct literals. Add `_ => ...` wildcards when matching.
292
+ 3. **`LocalBoxFuture` is `!Send`** — `tokio::spawn` won't work; use
293
+ `tokio::task::spawn_local` inside a `LocalSet`.
294
+ 4. **`tokio::task::spawn_local` panics outside a `LocalSet`** — wrap with
295
+ `LocalSet::new().run_until(async { ... }).await`.
296
+ 5. **Store `SessionId` as `SessionId`**, not `String` — otherwise `==`
297
+ comparisons get annoying.
298
+ 6. **One session per connection is fine for MVP** — reuse the model with
299
+ `clear_history()` instead of reloading.
300
+ 7. **`AgentCapabilities::default()` exists** — all capabilities None/false.
301
+ 8. **`block_in_place` panics inside `spawn_local`** — dependencies that call
302
+ `tokio::task::block_in_place` internally (e.g. `mistralrs`) will blow up
303
+ with "can call blocking only when running on the multi-threaded runtime"
304
+ from a `spawn_local` task. Fix: do the blocking work *before* entering
305
+ the `LocalSet`, while you're still on a normal multi-thread worker, then
306
+ pass the result into your agent struct.
307
+ 9. **Empty `authMethods` hangs Zed** — `InitializeResponse` with an empty
308
+ `auth_methods` vec makes Zed show "Loading…" forever. Always include at
309
+ least one `AuthMethod::Agent(AuthMethodAgent::new("id", "Name"))`.
310
+ Import `AuthMethod`, `AuthMethodAgent`, and `ProtocolVersion` from the crate.
311
+ 10. **Never write to stdout except JSON-RPC** — any library that prints to
312
+ stdout (`mistralrs` model metadata, stray `println!`, whatever) will
313
+ corrupt the wire. Redirect diagnostics to stderr. If a dependency writes
314
+ to stdout internally, fix it or suppress it before shipping.
@@ -0,0 +1,361 @@
1
+ # Skill: AI-Assisted Coding Agents — Onde Inference Integration
2
+
3
+ ## Overview
4
+
5
+ Building a local AI coding agent in Rust using Onde Inference as the LLM backend.
6
+ Onde wraps mistral.rs with a clean API for model loading, history management, and
7
+ streaming inference across macOS (Metal), iOS, Android, Linux, and Windows.
8
+
9
+ Crate: `onde = { path = "../onde" }` or from crates.io when published
10
+ Repo: https://github.com/ondeinference/onde
11
+ Docs: https://ondeinference.com
12
+
13
+ ---
14
+
15
+ ## Onde `ChatEngine` API
16
+
17
+ ### Construction and lifecycle
18
+
19
+ ```rust
20
+ use onde::inference::{ChatEngine, GgufModelConfig, SamplingConfig};
21
+
22
+ let engine = ChatEngine::new(); // starts unloaded
23
+ engine.is_loaded().await // -> bool
24
+ engine.unload_model().await // -> ()
25
+ ```
26
+
27
+ ### Loading a model
28
+
29
+ ```rust
30
+ // Platform-aware default (Qwen 2.5 3B on macOS, 1.5B on iOS/tvOS/Android)
31
+ let config = GgufModelConfig::platform_default();
32
+
33
+ // Load — blocks until model is in memory and on GPU
34
+ engine
35
+ .load_gguf_model(
36
+ config,
37
+ Some("You are a helpful assistant.".to_string()), // system prompt
38
+ None, // sampling config (uses SamplingConfig::default() internally)
39
+ )
40
+ .await?;
41
+
42
+ // AlreadyLoaded error if called twice — check first:
43
+ if !engine.is_loaded().await {
44
+ engine.load_gguf_model(...).await?;
45
+ }
46
+ ```
47
+
48
+ **Model sizes (macOS/Windows/Linux default — Qwen 2.5 3B Q4_K_M):** ~1.93 GB
49
+ **Model sizes (iOS/tvOS/Android default — Qwen 2.5 1.5B Q4_K_M):** ~941 MB
50
+ First run downloads from HuggingFace Hub into `~/.cache/huggingface/`.
51
+
52
+ ### Blocking (non-streaming) inference
53
+
54
+ ```rust
55
+ let result = engine.send_message("What is Rust's ownership model?").await?;
56
+ // result: InferenceResult
57
+ println!("{}", result.text);
58
+ println!("took {}", result.duration_display); // e.g. "3.2s"
59
+ ```
60
+
61
+ `send_message` appends both the user message and assistant reply to conversation
62
+ history automatically.
63
+
64
+ ### Streaming inference
65
+
66
+ ```rust
67
+ let mut rx: tokio::sync::mpsc::Receiver<StreamChunk> =
68
+ engine.stream_message("Tell me a story.").await?;
69
+
70
+ while let Some(chunk) = rx.recv().await {
71
+ if !chunk.delta.is_empty() {
72
+ print!("{}", chunk.delta); // partial token text
73
+ }
74
+ if chunk.done {
75
+ // chunk.finish_reason: Option<String> — e.g. "stop", "length"
76
+ break;
77
+ }
78
+ }
79
+ ```
80
+
81
+ `StreamChunk` fields:
82
+ - `delta: String` — the new token(s) in this chunk
83
+ - `done: bool` — true on the last chunk
84
+ - `finish_reason: Option<String>` — present on final chunk only
85
+
86
+ History is updated automatically after the stream completes.
87
+
88
+ ### One-shot generation (no history side-effects)
89
+
90
+ ```rust
91
+ use onde::inference::ChatMessage;
92
+
93
+ let result = engine.generate(
94
+ vec![ChatMessage::user("Expand: a cat in space")],
95
+ Some(SamplingConfig::deterministic()),
96
+ ).await?;
97
+ println!("{}", result.text);
98
+ // Does NOT modify conversation history
99
+ ```
100
+
101
+ ### History management
102
+
103
+ ```rust
104
+ let history: Vec<ChatMessage> = engine.history().await;
105
+ let removed: usize = engine.clear_history().await; // returns count cleared
106
+ engine.push_history(ChatMessage::user("context")).await;
107
+ engine.set_system_prompt("new system prompt").await;
108
+ engine.clear_system_prompt().await;
109
+ ```
110
+
111
+ ### Engine status
112
+
113
+ ```rust
114
+ let info: EngineInfo = engine.info().await;
115
+ // info.status: EngineStatus (Unloaded | Loading | Ready | Generating | Error)
116
+ // info.model_name: Option<String>
117
+ // info.approx_memory: Option<String> e.g. "~1.93 GB"
118
+ // info.history_length: u64
119
+ ```
120
+
121
+ ---
122
+
123
+ ## `InferenceError` variants
124
+
125
+ ```rust
126
+ match err {
127
+ InferenceError::NoModelLoaded => { /* load model first */ }
128
+ InferenceError::AlreadyLoaded { model_name } => { /* already loaded */ }
129
+ InferenceError::ModelBuild { reason } => { /* load failure */ }
130
+ InferenceError::Inference { reason } => { /* runtime inference error */ }
131
+ InferenceError::Cancelled => { /* was cancelled */ }
132
+ InferenceError::Other { reason } => { /* unexpected */ }
133
+ }
134
+ ```
135
+
136
+ Map to ACP errors:
137
+ ```rust
138
+ .map_err(|e| agent_client_protocol::Error::new(-32603, e.to_string()))?
139
+ ```
140
+
141
+ ---
142
+
143
+ ## `SamplingConfig` presets
144
+
145
+ | Preset | temp | top_p | max_tokens | Use case |
146
+ |--------|------|-------|------------|----------|
147
+ | `SamplingConfig::default()` | 0.7 | 0.95 | 512 | General chat |
148
+ | `SamplingConfig::deterministic()` | 0.0 | — | 512 | Code / reproducible |
149
+ | `SamplingConfig::mobile()` | 0.7 | 0.95 | 128 | Memory-constrained |
150
+ | `SamplingConfig::coding()` | 0.0 | — | 512 | Code generation |
151
+ | `SamplingConfig::coding_mobile()` | 0.0 | — | 128 | Code on mobile |
152
+
153
+ ---
154
+
155
+ ## `GgufModelConfig` constructors
156
+
157
+ ```rust
158
+ GgufModelConfig::platform_default() // auto-selects based on target_os
159
+ GgufModelConfig::qwen25_1_5b() // force 1.5B
160
+ GgufModelConfig::qwen25_3b() // force 3B
161
+ GgufModelConfig::qwen25_coder_1_5b() // coder variant 1.5B
162
+ GgufModelConfig::qwen25_coder_3b() // coder variant 3B
163
+ ```
164
+
165
+ ---
166
+
167
+ ## Adding onde as a Rust library dependency
168
+
169
+ ```toml
170
+ # In your crate's Cargo.toml — onde is a path dep since it's not on crates.io yet
171
+ onde = { path = "../onde" }
172
+ ```
173
+
174
+ **Important:** `onde` declares `crate-type = ["lib", "cdylib", "staticlib"]`.
175
+ When used as a Rust library dep, only the `lib` target is compiled. The
176
+ `cdylib`/`staticlib` targets (used for Swift/Kotlin FFI) are not built. The
177
+ `uniffi::setup_scaffolding!()` macro generates `#[no_mangle] extern "C"` symbols
178
+ but these are harmless in a binary context.
179
+
180
+ **The `[patch.crates-io]` in onde's Cargo.toml does NOT propagate** to dependents
181
+ unless they are in the same workspace. The `sysctl` patch is only needed for
182
+ watchOS; macOS/iOS/Linux work without it.
183
+
184
+ **GPU feature selection is automatic** via `target_os` cfg flags in onde's
185
+ Cargo.toml — you get Metal on macOS/iOS without any extra features in your crate.
186
+
187
+ ---
188
+
189
+ ## Patterns for coding agents
190
+
191
+ ### Single-engine, multi-session via history reset
192
+
193
+ For a simple MVP where one session is active at a time:
194
+
195
+ ```rust
196
+ struct MyAgent {
197
+ engine: Arc<ChatEngine>,
198
+ active_session: Arc<Mutex<Option<SessionId>>>,
199
+ }
200
+
201
+ // new_session handler:
202
+ if self.engine.is_loaded().await {
203
+ self.engine.clear_history().await; // reuse model, fresh conversation
204
+ } else {
205
+ self.engine
206
+ .load_gguf_model(GgufModelConfig::platform_default(), Some(SYSTEM_PROMPT.into()), None)
207
+ .await?;
208
+ }
209
+ ```
210
+
211
+ **Why:** Loading the model is expensive (seconds + GB of RAM). Reloading for each
212
+ session would make the agent feel broken. `clear_history()` resets context in
213
+ microseconds.
214
+
215
+ ### Per-session engines (multiple concurrent sessions)
216
+
217
+ When you need truly isolated parallel sessions:
218
+
219
+ ```rust
220
+ use std::collections::HashMap;
221
+
222
+ struct MultiSessionAgent {
223
+ sessions: Arc<Mutex<HashMap<String, Arc<ChatEngine>>>>,
224
+ }
225
+
226
+ // new_session: create and load a new engine per session
227
+ // prompt: look up session engine, call send_message or stream_message
228
+ // CAVEAT: each engine holds a separate model copy in GPU memory — expensive!
229
+ ```
230
+
231
+ Better approach for shared GPU memory: use `engine.generate()` (no history
232
+ side-effects) with an explicitly managed message vec per session.
233
+
234
+ ### System prompt design for coding agents
235
+
236
+ ```rust
237
+ const SYSTEM_PROMPT: &str = "\
238
+ You are <AgentName>, an expert AI coding agent integrated into your editor \
239
+ via the Agent Client Protocol. You specialize in:
240
+
241
+ - Code analysis, writing, and refactoring
242
+ - Bug hunting and debugging
243
+ - Git workflows and commit messages
244
+ - Software architecture and design patterns
245
+ - Code review and best practices
246
+
247
+ Be concise, precise, and practical. Write clean, idiomatic code with brief \
248
+ explanations. Identify root causes when debugging. Prefer correctness over brevity.";
249
+ ```
250
+
251
+ Key principles:
252
+ - State the agent's role and name clearly (models respond better to named personas)
253
+ - List specializations explicitly (influences which parts of training are activated)
254
+ - Set tone expectations: "concise", "practical", "idiomatic"
255
+ - Avoid verbose instruction lists — they cost tokens on every turn
256
+
257
+ ### Streaming tokens to ACP (connecting onde → ACP)
258
+
259
+ ```rust
260
+ // In Agent::prompt():
261
+ let mut rx = self.engine.stream_message(user_text).await
262
+ .map_err(|e| Error::new(-32603, e.to_string()))?;
263
+
264
+ while let Some(chunk) = rx.recv().await {
265
+ if !chunk.delta.is_empty() {
266
+ self.notification_tx.send(
267
+ SessionNotification::new(
268
+ session_id.clone(),
269
+ SessionUpdate::AgentMessageChunk(
270
+ ContentChunk::new(ContentBlock::from(chunk.delta)),
271
+ ),
272
+ )
273
+ ).await.ok(); // .ok() — ignore if forwarder is gone
274
+ }
275
+ if chunk.done { break; }
276
+ }
277
+
278
+ Ok(PromptResponse::new(StopReason::EndTurn))
279
+ ```
280
+
281
+ The `PromptResponse` is returned AFTER the stream finishes. The client receives
282
+ streaming tokens via `session/update` notifications while blocking on the
283
+ `session/prompt` response.
284
+
285
+ ---
286
+
287
+ ## Extracting text from ACP `PromptRequest`
288
+
289
+ ACP prompts can contain text, images, resource links, etc. For a text-only
290
+ coding agent:
291
+
292
+ ```rust
293
+ let user_text: String = args.prompt.iter()
294
+ .filter_map(|block| match block {
295
+ ContentBlock::Text(t) => Some(t.text.as_str()),
296
+ // Skip images, resource links, embedded resources for now
297
+ _ => None,
298
+ })
299
+ .collect::<Vec<_>>()
300
+ .join("\n");
301
+ ```
302
+
303
+ For future resource context (e.g. open files provided by Zed):
304
+ ```rust
305
+ ContentBlock::Resource(r) => match &r.resource {
306
+ EmbeddedResourceResource::Text(t) => Some(t.text.as_str()),
307
+ _ => None,
308
+ },
309
+ ```
310
+
311
+ ---
312
+
313
+ ## `ChatEngine` threading model
314
+
315
+ - Internally uses `Arc<tokio::sync::Mutex<Option<LoadedModel>>>` — `Send + Sync`.
316
+ - Safe to wrap in `Arc<ChatEngine>` and share across tasks.
317
+ - `stream_message()` spawns a `tokio::spawn` background task internally — the
318
+ mistralrs model must be `Send`, which it is on all supported platforms.
319
+ - Calling `stream_message()` from a `!Send` future (e.g. inside a `LocalSet`) is
320
+ fine — the future itself doesn't hold a `!Send` value across `.await`.
321
+
322
+ ---
323
+
324
+ ## First-run model download
325
+
326
+ On first use, onde downloads the GGUF model from HuggingFace Hub:
327
+ - Requires internet connectivity
328
+ - Cached at `~/.cache/huggingface/` (or `HF_HUB_CACHE` env var)
329
+ - `HF_TOKEN` env var needed for gated models (public Qwen models don't need it)
330
+ - Subsequent runs load from disk cache — fast
331
+
332
+ For sandboxed environments (iOS, tvOS, Android):
333
+ - Set `HF_HOME` and `HF_HUB_CACHE` to a path inside the app container
334
+ - Do this BEFORE calling any ChatEngine method
335
+ - See `onde/docs/swift-package.md` for `setupInferenceEnvironment()` pattern
336
+
337
+ ---
338
+
339
+ ## Common mistakes
340
+
341
+ 1. **Calling `load_gguf_model` twice** without checking `is_loaded()` first →
342
+ `InferenceError::AlreadyLoaded`. Always guard with `is_loaded().await`.
343
+
344
+ 2. **Blocking on the stream after the channel is closed** → the stream naturally
345
+ ends when the `done` flag is true. Don't `recv()` after `done`.
346
+
347
+ 3. **Losing `StreamChunk` deltas** when `delta` is empty (whitespace tokens) →
348
+ always check `!chunk.delta.is_empty()` before sending to avoid empty
349
+ notifications that waste bandwidth.
350
+
351
+ 4. **Sharing one `ChatEngine` across parallel prompts** without coordination →
352
+ the internal Mutex serializes inference, so concurrent prompts queue up.
353
+ Design for sequential access per engine instance.
354
+
355
+ 5. **Using `SamplingConfig::default()` for code generation** → prefer
356
+ `SamplingConfig::coding()` (deterministic, temp=0) for more reliable code output.
357
+
358
+ 6. **Forgetting that `generate()` doesn't update history** — use it for
359
+ one-shot enhancements (prompt expansion, code review) that shouldn't pollute
360
+ the main conversation. Use `send_message()` / `stream_message()` for the
361
+ primary turn loop.