@namzu/sdk 0.1.5-rc.1-fix → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (224) hide show
  1. package/CHANGELOG.md +25 -0
  2. package/README.md +279 -129
  3. package/dist/bridge/tools/connector/adapter.d.ts +2 -2
  4. package/dist/bridge/tools/connector/adapter.d.ts.map +1 -1
  5. package/dist/bridge/tools/connector/adapter.js +3 -1
  6. package/dist/bridge/tools/connector/adapter.js.map +1 -1
  7. package/dist/connector/BaseConnector.d.ts +2 -1
  8. package/dist/connector/BaseConnector.d.ts.map +1 -1
  9. package/dist/connector/BaseConnector.js.map +1 -1
  10. package/dist/connector/builtins/http.d.ts +1 -1
  11. package/dist/connector/builtins/http.d.ts.map +1 -1
  12. package/dist/connector/builtins/http.js +1 -1
  13. package/dist/connector/builtins/http.js.map +1 -1
  14. package/dist/connector/builtins/webhook.d.ts +1 -1
  15. package/dist/connector/builtins/webhook.d.ts.map +1 -1
  16. package/dist/connector/builtins/webhook.js +1 -1
  17. package/dist/connector/builtins/webhook.js.map +1 -1
  18. package/dist/contracts/api.d.ts.map +1 -1
  19. package/dist/contracts/index.d.ts +1 -0
  20. package/dist/contracts/index.d.ts.map +1 -1
  21. package/dist/contracts/index.js +7 -0
  22. package/dist/contracts/index.js.map +1 -1
  23. package/dist/index.d.ts +5 -33
  24. package/dist/index.d.ts.map +1 -1
  25. package/dist/index.js +5 -23
  26. package/dist/index.js.map +1 -1
  27. package/dist/manager/connector/environment.d.ts +4 -4
  28. package/dist/manager/connector/environment.d.ts.map +1 -1
  29. package/dist/manager/connector/environment.js.map +1 -1
  30. package/dist/manager/connector/lifecycle.d.ts +2 -2
  31. package/dist/manager/connector/lifecycle.d.ts.map +1 -1
  32. package/dist/manager/connector/lifecycle.js.map +1 -1
  33. package/dist/manager/connector/tenant.d.ts +3 -3
  34. package/dist/manager/connector/tenant.d.ts.map +1 -1
  35. package/dist/manager/connector/tenant.js.map +1 -1
  36. package/dist/manager/index.d.ts +2 -1
  37. package/dist/manager/index.d.ts.map +1 -1
  38. package/dist/manager/index.js +2 -1
  39. package/dist/manager/index.js.map +1 -1
  40. package/dist/manager/run/emergency.d.ts.map +1 -1
  41. package/dist/manager/run/emergency.js +44 -12
  42. package/dist/manager/run/emergency.js.map +1 -1
  43. package/dist/manager/run/persistence.d.ts +0 -2
  44. package/dist/manager/run/persistence.d.ts.map +1 -1
  45. package/dist/manager/run/persistence.js +0 -1
  46. package/dist/manager/run/persistence.js.map +1 -1
  47. package/dist/plugin/__tests__/enable-contributions.test.d.ts +2 -0
  48. package/dist/plugin/__tests__/enable-contributions.test.d.ts.map +1 -0
  49. package/dist/plugin/__tests__/enable-contributions.test.js +265 -0
  50. package/dist/plugin/__tests__/enable-contributions.test.js.map +1 -0
  51. package/dist/plugin/__tests__/lifecycle.test.js +30 -0
  52. package/dist/plugin/__tests__/lifecycle.test.js.map +1 -1
  53. package/dist/plugin/lifecycle.d.ts +4 -1
  54. package/dist/plugin/lifecycle.d.ts.map +1 -1
  55. package/dist/plugin/lifecycle.js +157 -49
  56. package/dist/plugin/lifecycle.js.map +1 -1
  57. package/dist/rag/vector-store.d.ts +2 -2
  58. package/dist/rag/vector-store.d.ts.map +1 -1
  59. package/dist/rag/vector-store.js.map +1 -1
  60. package/dist/registry/ManagedRegistry.d.ts +6 -0
  61. package/dist/registry/ManagedRegistry.d.ts.map +1 -1
  62. package/dist/registry/ManagedRegistry.js +9 -3
  63. package/dist/registry/ManagedRegistry.js.map +1 -1
  64. package/dist/registry/agent/definitions.d.ts +2 -7
  65. package/dist/registry/agent/definitions.d.ts.map +1 -1
  66. package/dist/registry/agent/definitions.js +3 -41
  67. package/dist/registry/agent/definitions.js.map +1 -1
  68. package/dist/registry/connector/definitions.d.ts +2 -7
  69. package/dist/registry/connector/definitions.d.ts.map +1 -1
  70. package/dist/registry/connector/definitions.js +3 -41
  71. package/dist/registry/connector/definitions.js.map +1 -1
  72. package/dist/registry/connector/scoped.d.ts +5 -4
  73. package/dist/registry/connector/scoped.d.ts.map +1 -1
  74. package/dist/registry/connector/scoped.js.map +1 -1
  75. package/dist/registry/index.d.ts +1 -0
  76. package/dist/registry/index.d.ts.map +1 -1
  77. package/dist/registry/index.js +1 -0
  78. package/dist/registry/index.js.map +1 -1
  79. package/dist/registry/tool/execute.d.ts +2 -4
  80. package/dist/registry/tool/execute.d.ts.map +1 -1
  81. package/dist/registry/tool/execute.js +4 -21
  82. package/dist/registry/tool/execute.js.map +1 -1
  83. package/dist/run/index.d.ts +4 -4
  84. package/dist/run/index.d.ts.map +1 -1
  85. package/dist/run/index.js +3 -3
  86. package/dist/run/index.js.map +1 -1
  87. package/dist/run/reporter.d.ts +0 -2
  88. package/dist/run/reporter.d.ts.map +1 -1
  89. package/dist/run/reporter.js +0 -1
  90. package/dist/run/reporter.js.map +1 -1
  91. package/dist/runtime/query/__tests__/executor-plugin-hooks.test.d.ts +2 -0
  92. package/dist/runtime/query/__tests__/executor-plugin-hooks.test.d.ts.map +1 -0
  93. package/dist/runtime/query/__tests__/executor-plugin-hooks.test.js +236 -0
  94. package/dist/runtime/query/__tests__/executor-plugin-hooks.test.js.map +1 -0
  95. package/dist/runtime/query/__tests__/plugin-hooks.test.d.ts +2 -0
  96. package/dist/runtime/query/__tests__/plugin-hooks.test.d.ts.map +1 -0
  97. package/dist/runtime/query/__tests__/plugin-hooks.test.js +21 -0
  98. package/dist/runtime/query/__tests__/plugin-hooks.test.js.map +1 -0
  99. package/dist/runtime/query/executor.d.ts +6 -0
  100. package/dist/runtime/query/executor.d.ts.map +1 -1
  101. package/dist/runtime/query/executor.js +110 -6
  102. package/dist/runtime/query/executor.js.map +1 -1
  103. package/dist/runtime/query/index.d.ts.map +1 -1
  104. package/dist/runtime/query/index.js +8 -6
  105. package/dist/runtime/query/index.js.map +1 -1
  106. package/dist/runtime/query/iteration/index.d.ts.map +1 -1
  107. package/dist/runtime/query/iteration/index.js +13 -16
  108. package/dist/runtime/query/iteration/index.js.map +1 -1
  109. package/dist/runtime/query/plugin-hooks.d.ts +11 -0
  110. package/dist/runtime/query/plugin-hooks.d.ts.map +1 -0
  111. package/dist/runtime/query/plugin-hooks.js +28 -0
  112. package/dist/runtime/query/plugin-hooks.js.map +1 -0
  113. package/dist/runtime/query/tooling.d.ts +2 -0
  114. package/dist/runtime/query/tooling.d.ts.map +1 -1
  115. package/dist/runtime/query/tooling.js +1 -0
  116. package/dist/runtime/query/tooling.js.map +1 -1
  117. package/dist/store/index.d.ts +5 -1
  118. package/dist/store/index.d.ts.map +1 -1
  119. package/dist/store/index.js +4 -1
  120. package/dist/store/index.js.map +1 -1
  121. package/dist/store/run/disk.d.ts +0 -2
  122. package/dist/store/run/disk.d.ts.map +1 -1
  123. package/dist/store/run/disk.js +0 -1
  124. package/dist/store/run/disk.js.map +1 -1
  125. package/dist/store/task/__tests__/disk-concurrency.test.d.ts +2 -0
  126. package/dist/store/task/__tests__/disk-concurrency.test.d.ts.map +1 -0
  127. package/dist/store/task/__tests__/disk-concurrency.test.js +91 -0
  128. package/dist/store/task/__tests__/disk-concurrency.test.js.map +1 -0
  129. package/dist/store/task/disk.d.ts +6 -0
  130. package/dist/store/task/disk.d.ts.map +1 -1
  131. package/dist/store/task/disk.js +150 -36
  132. package/dist/store/task/disk.js.map +1 -1
  133. package/dist/types/connector/core.d.ts +2 -2
  134. package/dist/types/connector/core.d.ts.map +1 -1
  135. package/dist/types/connector/definition.d.ts +7 -7
  136. package/dist/types/connector/definition.d.ts.map +1 -1
  137. package/dist/types/connector/mcp.d.ts +4 -4
  138. package/dist/types/connector/mcp.d.ts.map +1 -1
  139. package/dist/types/connector/scope.d.ts +3 -2
  140. package/dist/types/connector/scope.d.ts.map +1 -1
  141. package/dist/types/connector/scope.js.map +1 -1
  142. package/dist/types/connector/tenant.d.ts +4 -4
  143. package/dist/types/connector/tenant.d.ts.map +1 -1
  144. package/dist/types/plugin/index.d.ts +4 -21
  145. package/dist/types/plugin/index.d.ts.map +1 -1
  146. package/dist/types/plugin/index.js +1 -10
  147. package/dist/types/plugin/index.js.map +1 -1
  148. package/dist/types/rag/knowledge-base.d.ts +2 -2
  149. package/dist/types/rag/knowledge-base.d.ts.map +1 -1
  150. package/dist/types/rag/scope.d.ts +2 -1
  151. package/dist/types/rag/scope.d.ts.map +1 -1
  152. package/dist/types/rag/storage.d.ts +3 -3
  153. package/dist/types/rag/storage.d.ts.map +1 -1
  154. package/dist/types/rag/vector.d.ts +3 -3
  155. package/dist/types/rag/vector.d.ts.map +1 -1
  156. package/dist/types/run/config.d.ts +0 -6
  157. package/dist/types/run/config.d.ts.map +1 -1
  158. package/dist/vault/InMemoryCredentialVault.d.ts +3 -3
  159. package/dist/vault/InMemoryCredentialVault.d.ts.map +1 -1
  160. package/dist/vault/InMemoryCredentialVault.js.map +1 -1
  161. package/package.json +1 -1
  162. package/src/bridge/tools/connector/adapter.ts +5 -3
  163. package/src/connector/BaseConnector.ts +2 -1
  164. package/src/connector/builtins/http.ts +1 -1
  165. package/src/connector/builtins/webhook.ts +1 -1
  166. package/src/contracts/api.ts +3 -0
  167. package/src/contracts/index.ts +10 -0
  168. package/src/index.ts +46 -41
  169. package/src/manager/connector/environment.ts +5 -5
  170. package/src/manager/connector/lifecycle.ts +2 -2
  171. package/src/manager/connector/tenant.ts +8 -3
  172. package/src/manager/index.ts +2 -1
  173. package/src/manager/run/emergency.ts +45 -16
  174. package/src/manager/run/persistence.ts +0 -4
  175. package/src/plugin/__tests__/enable-contributions.test.ts +295 -0
  176. package/src/plugin/__tests__/lifecycle.test.ts +44 -0
  177. package/src/plugin/lifecycle.ts +186 -57
  178. package/src/rag/vector-store.ts +2 -2
  179. package/src/registry/ManagedRegistry.ts +14 -3
  180. package/src/registry/agent/definitions.ts +3 -53
  181. package/src/registry/connector/definitions.ts +3 -53
  182. package/src/registry/connector/scoped.ts +7 -6
  183. package/src/registry/index.ts +1 -0
  184. package/src/registry/tool/execute.ts +4 -22
  185. package/src/run/index.ts +4 -4
  186. package/src/run/reporter.ts +0 -4
  187. package/src/runtime/query/__tests__/executor-plugin-hooks.test.ts +305 -0
  188. package/src/runtime/query/__tests__/plugin-hooks.test.ts +28 -0
  189. package/src/runtime/query/executor.ts +148 -7
  190. package/src/runtime/query/index.ts +16 -6
  191. package/src/runtime/query/iteration/index.ts +29 -16
  192. package/src/runtime/query/plugin-hooks.ts +34 -0
  193. package/src/runtime/query/tooling.ts +3 -0
  194. package/src/store/index.ts +6 -1
  195. package/src/store/run/disk.ts +0 -4
  196. package/src/store/task/__tests__/disk-concurrency.test.ts +118 -0
  197. package/src/store/task/disk.ts +150 -37
  198. package/src/types/connector/core.ts +2 -2
  199. package/src/types/connector/definition.ts +7 -7
  200. package/src/types/connector/mcp.ts +4 -4
  201. package/src/types/connector/scope.ts +3 -2
  202. package/src/types/connector/tenant.ts +4 -4
  203. package/src/types/plugin/index.ts +0 -14
  204. package/src/types/rag/knowledge-base.ts +2 -2
  205. package/src/types/rag/scope.ts +3 -1
  206. package/src/types/rag/storage.ts +3 -3
  207. package/src/types/rag/vector.ts +3 -3
  208. package/src/types/run/config.ts +0 -9
  209. package/src/vault/InMemoryCredentialVault.ts +3 -3
  210. package/dist/manager/agent/index.d.ts +0 -2
  211. package/dist/manager/agent/index.d.ts.map +0 -1
  212. package/dist/manager/agent/index.js +0 -2
  213. package/dist/manager/agent/index.js.map +0 -1
  214. package/dist/registry/agent/index.d.ts +0 -2
  215. package/dist/registry/agent/index.d.ts.map +0 -1
  216. package/dist/registry/agent/index.js +0 -2
  217. package/dist/registry/agent/index.js.map +0 -1
  218. package/dist/router/index.d.ts +0 -2
  219. package/dist/router/index.d.ts.map +0 -1
  220. package/dist/router/index.js +0 -2
  221. package/dist/router/index.js.map +0 -1
  222. package/src/manager/agent/index.ts +0 -1
  223. package/src/registry/agent/index.ts +0 -1
  224. package/src/router/index.ts +0 -1
package/README.md CHANGED
@@ -14,7 +14,7 @@ Open-source AI agent SDK with a built-in runtime. Nothing between you and your a
14
14
 
15
15
  There are great agent frameworks out there — LangChain, CrewAI, AutoGen, Vercel AI SDK, OpenAI Agents SDK. Each solves a real problem. Namzu exists because we think some things are still missing.
16
16
 
17
- **Sandboxed execution.** Agents execute tools inside process-level sandboxes macOS Seatbelt (SBPL) profiles and Linux namespaces. File I/O, shell commands, and code execution are isolated to the agent's workspace by default. No Docker required, no container overhead. Deny-default, allow-back for what the agent actually needs.
17
+ **Sandboxed execution.** Agents execute tools inside process-level sandboxes. macOS uses Seatbelt (SBPL) profiles for deny-default file-I/O and process isolation. Linux uses lightweight mount + PID namespace isolation for process scoping, with resource limits (memory, timeout, max processes) enforced by the runtime. No Docker, no containers.
18
18
 
19
19
  **True provider independence.** Most frameworks say they're provider-agnostic but are optimized for one vendor. Namzu treats every provider as a first-class citizen through BYOK (Bring Your Own Key). Switch from OpenRouter to Bedrock by changing one line. No performance penalties, no second-class APIs.
20
20
 
@@ -125,7 +125,7 @@ npm install @namzu/sdk
125
125
  ## Quick Start
126
126
 
127
127
  ```typescript
128
- import { defineAgent, defineTool, ProviderFactory } from '@namzu/sdk'
128
+ import { defineTool, ProviderFactory, ReactiveAgent, ToolRegistry } from '@namzu/sdk'
129
129
  import { z } from 'zod'
130
130
 
131
131
  // Define a tool
@@ -133,32 +133,39 @@ const searchWeb = defineTool({
133
133
  name: 'search_web',
134
134
  description: 'Search the web for information',
135
135
  inputSchema: z.object({ query: z.string() }),
136
+ category: 'network',
137
+ permissions: ['network_access'],
138
+ readOnly: true,
139
+ destructive: false,
140
+ concurrencySafe: true,
136
141
  execute: async ({ query }) => {
137
142
  const results = await fetch(`https://api.search.com?q=${query}`)
138
143
  return { success: true, output: await results.text() }
139
144
  },
140
145
  })
141
146
 
142
- // Create a provider
147
+ // Create a provider (model is chosen per-run, not on the provider)
143
148
  const provider = ProviderFactory.createProvider({
144
149
  type: 'openrouter',
145
150
  apiKey: process.env.OPENROUTER_KEY!,
146
- model: 'anthropic/claude-sonnet-4-20250514',
147
151
  })
148
152
 
149
- // Define an agent
150
- const agent = defineAgent({
151
- info: {
152
- id: 'researcher',
153
- name: 'Research Assistant',
154
- version: '1.0.0',
155
- category: 'research',
156
- description: 'Finds and synthesizes information from the web',
157
- tools: ['search_web'],
158
- defaults: { model: 'anthropic/claude-sonnet-4-20250514', tokenBudget: 8192 },
159
- },
160
- tools: [searchWeb],
153
+ // Register tools and build the agent
154
+ const tools = new ToolRegistry()
155
+ tools.register(searchWeb)
156
+
157
+ const agent = new ReactiveAgent({
158
+ id: 'researcher',
159
+ name: 'Research Assistant',
160
+ version: '1.0.0',
161
+ category: 'research',
162
+ description: 'Finds and synthesizes information from the web',
161
163
  })
164
+
165
+ const result = await agent.run(
166
+ { messages: [{ role: 'user', content: 'Summarize the latest LLM benchmarks' }], workingDirectory: process.cwd() },
167
+ { model: 'anthropic/claude-sonnet-4-20250514', tokenBudget: 8192, timeoutMs: 600_000, provider, tools },
168
+ )
162
169
  ```
163
170
 
164
171
  ## Agent Types
@@ -172,10 +179,24 @@ The core agentic loop. Sends messages to an LLM, executes tool calls, and iterat
172
179
  ```typescript
173
180
  import { ReactiveAgent } from '@namzu/sdk'
174
181
 
175
- const agent = new ReactiveAgent({ id: 'solver', name: 'Problem Solver' })
182
+ const agent = new ReactiveAgent({
183
+ id: 'solver',
184
+ name: 'Problem Solver',
185
+ version: '1.0.0',
186
+ category: 'analysis',
187
+ description: 'Analyzes data with LLM + tools',
188
+ })
189
+
176
190
  const result = await agent.run(
177
- { messages: [{ role: 'user', content: 'Analyze this dataset and find trends' }] },
178
- { provider, tools, systemPrompt: 'You are a data analyst.' },
191
+ { messages: [{ role: 'user', content: 'Analyze this dataset and find trends' }], workingDirectory: process.cwd() },
192
+ {
193
+ model: 'anthropic/claude-sonnet-4-20250514',
194
+ tokenBudget: 8192,
195
+ timeoutMs: 600_000,
196
+ provider,
197
+ tools, // ToolRegistry
198
+ systemPrompt: 'You are a data analyst.',
199
+ },
179
200
  )
180
201
  ```
181
202
 
@@ -186,14 +207,27 @@ Deterministic, sequential step execution. Each step receives the output of the p
186
207
  ```typescript
187
208
  import { PipelineAgent } from '@namzu/sdk'
188
209
 
189
- const etl = new PipelineAgent({ id: 'etl', name: 'ETL Pipeline' })
190
- const result = await etl.run(input, {
191
- steps: [
192
- { name: 'extract', execute: async (inp) => await readSource(inp.path) },
193
- { name: 'transform', execute: async (data) => normalize(data) },
194
- { name: 'load', execute: async (data) => await writeToDb(data) },
195
- ],
210
+ const etl = new PipelineAgent({
211
+ id: 'etl',
212
+ name: 'ETL Pipeline',
213
+ version: '1.0.0',
214
+ category: 'pipeline',
215
+ description: 'Extract transform load',
196
216
  })
217
+
218
+ const result = await etl.run(
219
+ { messages: [], workingDirectory: process.cwd() },
220
+ {
221
+ model: 'anthropic/claude-sonnet-4-20250514',
222
+ tokenBudget: 8192,
223
+ timeoutMs: 600_000,
224
+ steps: [
225
+ { name: 'extract', execute: async (inp, ctx) => await readSource('./data') },
226
+ { name: 'transform', execute: async (data, ctx) => normalize(data) },
227
+ { name: 'load', execute: async (data, ctx) => await writeToDb(data) },
228
+ ],
229
+ },
230
+ )
197
231
  ```
198
232
 
199
233
  ### Router Agent
@@ -203,15 +237,28 @@ Intelligent delegation. An LLM analyzes the input and routes it to the best-suit
203
237
  ```typescript
204
238
  import { RouterAgent } from '@namzu/sdk'
205
239
 
206
- const router = new RouterAgent({ id: 'dispatcher', name: 'Task Router' })
207
- const result = await router.run(input, {
208
- provider,
209
- routes: [
210
- { agentId: 'math-solver', agent: mathAgent, description: 'Solves equations' },
211
- { agentId: 'writer', agent: writerAgent, description: 'Writes content' },
212
- ],
213
- fallbackAgentId: 'writer',
240
+ const router = new RouterAgent({
241
+ id: 'dispatcher',
242
+ name: 'Task Router',
243
+ version: '1.0.0',
244
+ category: 'routing',
245
+ description: 'Routes an input to the best-fit agent',
214
246
  })
247
+
248
+ const result = await router.run(
249
+ { messages: [{ role: 'user', content: 'Solve 2x + 3 = 11' }], workingDirectory: process.cwd() },
250
+ {
251
+ model: 'anthropic/claude-sonnet-4-20250514',
252
+ tokenBudget: 4096,
253
+ timeoutMs: 600_000,
254
+ provider,
255
+ routes: [
256
+ { agentId: 'math-solver', agent: mathAgent, description: 'Solves equations' },
257
+ { agentId: 'writer', agent: writerAgent, description: 'Writes content' },
258
+ ],
259
+ fallbackAgentId: 'writer',
260
+ },
261
+ )
215
262
  ```
216
263
 
217
264
  ### Supervisor Agent
@@ -221,12 +268,26 @@ Multi-agent coordinator. Manages child agents, delegates tasks, aggregates resul
221
268
  ```typescript
222
269
  import { SupervisorAgent, AgentManager } from '@namzu/sdk'
223
270
 
224
- const supervisor = new SupervisorAgent({ id: 'lead', name: 'Project Lead' })
225
- const result = await supervisor.run(input, {
226
- provider,
227
- agentManager,
228
- agentDefinitions: [researcherDef, writerDef, reviewerDef],
271
+ const supervisor = new SupervisorAgent({
272
+ id: 'lead',
273
+ name: 'Project Lead',
274
+ version: '1.0.0',
275
+ category: 'coordination',
276
+ description: 'Delegates sub-tasks to specialized agents',
229
277
  })
278
+
279
+ const result = await supervisor.run(
280
+ { messages: [{ role: 'user', content: 'Research, write, and review a Q3 report' }], workingDirectory: process.cwd() },
281
+ {
282
+ model: 'anthropic/claude-sonnet-4-20250514',
283
+ tokenBudget: 32_768,
284
+ timeoutMs: 1_800_000,
285
+ provider,
286
+ agentManager, // resolves agent ids → implementations
287
+ agentIds: ['researcher', 'writer', 'reviewer'],
288
+ systemPrompt: 'You coordinate specialists. Decompose tasks, delegate, and synthesize results.',
289
+ },
290
+ )
230
291
  // Child runs tracked via parent_run_id and depth
231
292
  ```
232
293
 
@@ -246,6 +307,7 @@ const fetchApi = defineTool({
246
307
  permissions: ['network_access'],
247
308
  readOnly: true,
248
309
  destructive: false,
310
+ concurrencySafe: true,
249
311
  execute: async ({ url, method }) => {
250
312
  const resp = await fetch(url, { method })
251
313
  return { success: true, output: await resp.text() }
@@ -264,16 +326,41 @@ const llmTools = registry.toLLMTools() // Only active + suspended tools
264
326
 
265
327
  Built-in tools: `ReadFileTool`, `WriteFileTool`, `EditTool`, `BashTool`, `GlobTool`, `GrepTool`, `LsTool`, `SearchToolsTool`
266
328
 
329
+ ### Plugin Contributions
330
+
331
+ Plugins extend the runtime with tools, hooks, and MCP servers via a manifest. `PluginLifecycleManager.enable()` loads contributions on demand and rolls back cleanly on failure.
332
+
333
+ ```typescript
334
+ import { PluginLifecycleManager } from '@namzu/sdk'
335
+
336
+ const manager = new PluginLifecycleManager({ pluginRegistry, toolRegistry, log })
337
+ const plugin = await manager.install('/path/to/plugin', 'project')
338
+ await manager.enable(plugin.id)
339
+ // → manifest.tools registered as `${plugin}:${tool}` (deferred)
340
+ // → manifest.hooks attached for run_start/end, iteration_start/end,
341
+ // pre/post_llm_call, pre/post_tool_use
342
+ // → manifest.mcpServers connected via stdio; their tools registered as
343
+ // `${plugin}:mcp__${server}__${tool}` (deferred)
344
+ ```
345
+
346
+ Hook handlers can return `continue`, `modify` (rewrite tool input), `skip` (synthesize a tool result), or `error` (fail the run). Modify actions compose — chained hooks each see the previous hook's modified input. The runtime emits `plugin_hook_executing` / `plugin_hook_completed` events around every handler.
347
+
267
348
  ### Sandbox-Aware Execution
268
349
 
269
- All built-in tools are sandbox-aware. When a sandbox is present in the execution context, tools automatically route through `sandbox.exec()`, `sandbox.readFile()`, and `sandbox.writeFile()`. When no sandbox is present, they fall back to native operations zero config required.
350
+ File and shell built-ins (`ReadFileTool`, `WriteFileTool`, `EditTool`, `BashTool`) route through `sandbox.exec()` / `sandbox.readFile()` / `sandbox.writeFile()` when a sandbox is present in the execution context, and fall back to native operations when not. Use `query()` (streaming generator) with a `ToolRegistry` and a `sandboxProvider`:
270
351
 
271
352
  ```typescript
272
- // With sandbox: tool calls are isolated to agent workspace
273
- const result = await query({ agent, provider, tools: getBuiltinTools(), messages, sandboxProvider })
353
+ import { drainQuery, ToolRegistry, getBuiltinTools } from '@namzu/sdk'
354
+
355
+ const tools = new ToolRegistry()
356
+ tools.register(getBuiltinTools(), 'active')
274
357
 
275
- // Without sandbox: same tools, same API, native execution
276
- const result = await query({ agent, provider, tools: getBuiltinTools(), messages })
358
+ // With sandbox: file + shell tool calls are isolated to the agent workspace
359
+ const result = await drainQuery({
360
+ agentId: 'solver', agentName: 'Solver', threadId,
361
+ provider, tools, runConfig, messages, resumeHandler,
362
+ sandboxProvider,
363
+ })
277
364
  ```
278
365
 
279
366
  ## Sandbox
@@ -281,16 +368,22 @@ const result = await query({ agent, provider, tools: getBuiltinTools(), messages
281
368
  Process-level isolation for agent tool execution. No Docker, no containers — native OS mechanisms.
282
369
 
283
370
  ```typescript
284
- import { query, SandboxProviderFactory, getBuiltinTools } from '@namzu/sdk'
371
+ import { drainQuery, SandboxProviderFactory, ToolRegistry, getBuiltinTools, getRootLogger } from '@namzu/sdk'
285
372
 
286
- const sandboxProvider = SandboxProviderFactory.create({ provider: 'local' })
373
+ const sandboxProvider = SandboxProviderFactory.create(
374
+ { enabled: true, provider: 'local', timeoutMs: 60_000, memoryLimitMb: 512, maxProcesses: 16, cleanupOnDestroy: true },
375
+ getRootLogger(),
376
+ )
377
+
378
+ const tools = new ToolRegistry()
379
+ tools.register(getBuiltinTools(), 'active')
287
380
 
288
- const result = await query({
289
- agent,
290
- provider,
291
- tools: getBuiltinTools(),
381
+ const result = await drainQuery({
382
+ agentId: 'coder', agentName: 'Coder', threadId,
383
+ provider, tools, runConfig,
292
384
  messages: [{ role: 'user', content: 'Write a Python script and run it' }],
293
- sandboxProvider, // All tool calls execute inside the sandbox
385
+ resumeHandler,
386
+ sandboxProvider, // sandbox-aware tools opt in here
294
387
  })
295
388
  ```
296
389
 
@@ -311,9 +404,14 @@ The sandbox creates a temporary workspace directory, restricts file I/O to that
311
404
 
312
405
  ```typescript
313
406
  // Direct sandbox API (low-level)
314
- const sandbox = await sandboxProvider.create({ agentId: 'agt_coder' })
407
+ const sandbox = await sandboxProvider.create({
408
+ workingDirectory: process.cwd(),
409
+ timeoutMs: 30_000,
410
+ memoryLimitMb: 512,
411
+ maxProcesses: 16,
412
+ })
315
413
 
316
- const result = await sandbox.exec('/bin/sh', ['-c', 'echo hello'], { timeout: 5000 })
414
+ const result = await sandbox.exec('/bin/sh', ['-c', 'echo hello'], { timeoutMs: 5_000 })
317
415
  console.log(result.stdout) // "hello\n"
318
416
  console.log(result.exitCode) // 0
319
417
 
@@ -327,30 +425,33 @@ await sandbox.destroy() // Cleanup workspace
327
425
 
328
426
  Pluggable LLM backends with a unified interface for chat, streaming, and model discovery.
329
427
 
428
+ The provider is constructed once with credentials; the model is selected per chat/run so you can swap models without rebuilding the client.
429
+
330
430
  ```typescript
331
- import { ProviderFactory, OpenRouterProvider, BedrockProvider } from '@namzu/sdk'
431
+ import { ProviderFactory } from '@namzu/sdk'
332
432
 
333
433
  // OpenRouter (BYOK)
334
434
  const openrouter = ProviderFactory.createProvider({
335
435
  type: 'openrouter',
336
436
  apiKey: process.env.OPENROUTER_KEY!,
337
- model: 'anthropic/claude-sonnet-4-20250514',
338
437
  })
339
438
 
340
439
  // AWS Bedrock
341
440
  const bedrock = ProviderFactory.createProvider({
342
441
  type: 'bedrock',
343
442
  region: 'us-east-1',
344
- model: 'anthropic.claude-3-sonnet-20240229-v1:0',
345
443
  })
346
444
 
347
- // Streaming
348
- for await (const chunk of provider.chatStream(params)) {
445
+ // Streaming — model is part of the per-call params
446
+ for await (const chunk of openrouter.chatStream({
447
+ model: 'anthropic/claude-sonnet-4-20250514',
448
+ messages: [{ role: 'user', content: 'hi' }],
449
+ })) {
349
450
  process.stdout.write(chunk.delta?.content ?? '')
350
451
  }
351
452
 
352
453
  // Model discovery
353
- const models = await provider.listModels()
454
+ const models = await openrouter.listModels()
354
455
  ```
355
456
 
356
457
  ## RAG
@@ -363,7 +464,6 @@ import {
363
464
  OpenRouterEmbeddingProvider,
364
465
  InMemoryVectorStore,
365
466
  DefaultRetriever,
366
- DefaultIngestionPipeline,
367
467
  DefaultKnowledgeBase,
368
468
  createRAGTool,
369
469
  } from '@namzu/sdk'
@@ -384,11 +484,15 @@ const embedder = new OpenRouterEmbeddingProvider({
384
484
 
385
485
  // Vector store and retriever
386
486
  const vectorStore = new InMemoryVectorStore()
387
- const retriever = new DefaultRetriever(vectorStore)
487
+ const retriever = new DefaultRetriever(vectorStore, embedder)
388
488
 
389
- // Knowledge base
390
- const kb = new DefaultKnowledgeBase({ retriever, ingestionPipeline })
391
- await kb.ingest({ id: 'doc-1', title: 'API Guide', content: apiDoc })
489
+ // Knowledge base — pass (config, vectorStore, embeddingProvider)
490
+ const kb = new DefaultKnowledgeBase(
491
+ { id: 'docs', name: 'API Guides', tenantId: 'default' },
492
+ vectorStore,
493
+ embedder,
494
+ )
495
+ await kb.ingest(apiDoc, { title: 'API Guide', source: 'doc-1' })
392
496
  const results = await kb.query({ text: 'How do I authenticate?', config: { topK: 5 } })
393
497
 
394
498
  // Attach to agent as a tool
@@ -405,20 +509,19 @@ Unified framework for integrating external services — HTTP APIs, webhooks, and
405
509
  ```typescript
406
510
  import {
407
511
  HttpConnector,
512
+ ConnectorManager,
408
513
  ConnectorRegistry,
409
514
  MCPClient,
410
515
  MCPConnectorBridge,
411
516
  TenantConnectorManager,
412
517
  } from '@namzu/sdk'
413
518
 
414
- // HTTP connector
415
- const slack = new HttpConnector({
416
- id: 'slack',
417
- baseUrl: 'https://slack.com/api',
418
- methods: [
419
- { name: 'send_message', path: '/chat.postMessage', httpMethod: 'POST' },
420
- ],
421
- })
519
+ // HTTP connector — configure via connect()
520
+ const slack = new HttpConnector()
521
+ await slack.connect(
522
+ { id: 'slack', baseUrl: 'https://slack.com/api' },
523
+ { type: 'bearer', token: process.env.SLACK_TOKEN! },
524
+ )
422
525
 
423
526
  // MCP client (stdio or HTTP-SSE transport)
424
527
  const mcpClient = new MCPClient({
@@ -429,45 +532,57 @@ await mcpClient.connect()
429
532
  const tools = await mcpClient.listTools()
430
533
  const result = await mcpClient.callTool('my_tool', { input: 'value' })
431
534
 
432
- // Bridge MCP tools into Namzu tool system
433
- const bridge = new MCPConnectorBridge(mcpClient)
434
- const namzuTools = bridge.toToolDefinitions()
535
+ // Bridge MCP as a connector so connector-based code paths can reach it
536
+ const connectorManager = new ConnectorManager({ registry: new ConnectorRegistry() })
537
+ const mcpBridge = new MCPConnectorBridge({ manager: connectorManager })
538
+ const discoveredTools = await mcpBridge.listTools()
539
+ await mcpBridge.callTool('my_tool', { input: 'value' })
435
540
 
436
541
  // Multi-tenant isolation
437
- const tenantManager = new TenantConnectorManager({
438
- connectorRegistry,
439
- tenantId: 'org-123',
440
- })
542
+ const tenantManager = new TenantConnectorManager({ registry: new ConnectorRegistry() })
543
+ tenantManager.registerTenant({ tenantId: 'org-123', name: 'Org 123' })
441
544
  ```
442
545
 
546
+ MCP servers can also be declared in a plugin manifest (`mcpServers: [{ name, command, args, env }]`). The plugin lifecycle starts each server on enable, discovers its tools, and registers them under the plugin namespace. Disable disconnects the clients before unregistering the tools.
547
+
443
548
  ## Human-in-the-Loop
444
549
 
445
550
  Pause agent execution for human review of plans and tool calls. Checkpoint and resume runs across sessions.
446
551
 
552
+ Plan approval and tool review are separate handlers wired at different points:
553
+
447
554
  ```typescript
448
- import { PlanManager } from '@namzu/sdk'
555
+ import { PlanManager, drainQuery, autoApproveHandler } from '@namzu/sdk'
556
+ import type { ResumeHandler } from '@namzu/sdk'
449
557
 
558
+ // 1. Plan approval — runs when the agent produces a plan
450
559
  const planManager = new PlanManager(runId, async (request) => {
451
- if (request.type === 'plan_approval') {
452
- // Present plan to user, get approval
453
- const userDecision = await showPlanUI(request.plan)
454
- return { approved: userDecision.approved }
560
+ const decision = await showPlanUI(request)
561
+ return {
562
+ approved: decision.approved,
563
+ feedback: decision.feedback,
564
+ modifiedSteps: decision.editedSteps,
455
565
  }
566
+ })
456
567
 
568
+ // 2. Tool review — runs for every pending tool call (required by query/drainQuery)
569
+ const resumeHandler: ResumeHandler = async (request) => {
457
570
  if (request.type === 'tool_review') {
458
- // Review tool calls before execution
459
571
  const hasDestructive = request.toolCalls.some((t) => t.isDestructive)
460
- if (hasDestructive) {
461
- return { approved: false, feedback: 'Destructive tool blocked' }
462
- }
463
- return { approved: true }
572
+ return hasDestructive
573
+ ? { action: 'reject_tools', feedback: 'Destructive tool blocked' }
574
+ : { action: 'approve_tools' }
575
+ }
576
+ if (request.type === 'plan_approval') {
577
+ return { action: 'approve_plan' }
464
578
  }
465
-
466
579
  return { action: 'continue' }
467
- })
580
+ }
581
+
582
+ await drainQuery({ /* ...runConfig, provider, tools, messages, */ resumeHandler })
468
583
  ```
469
584
 
470
- Checkpoint/resume enables long-running agents to pause and restart without losing state.
585
+ Checkpoint/resume enables long-running agents to pause and restart without losing state (`CheckpointManager`, `checkpointId` in `QueryParams`).
471
586
 
472
587
  ## A2A Protocol
473
588
 
@@ -484,27 +599,35 @@ const card = buildAgentCard(agentInfo, {
484
599
  })
485
600
  // Serve at /.well-known/agent-card.json
486
601
 
487
- // Convert inbound A2A message to a Namzu run
488
- const runParams = a2aMessageToCreateRun(agentId, a2aMessage)
602
+ // Convert an inbound A2A message-send into run creation params
603
+ const runParams = a2aMessageToCreateRun(agentId, {
604
+ message: a2aMessage,
605
+ contextId: a2aMessage.contextId,
606
+ metadata: { model: 'anthropic/claude-sonnet-4-20250514', tokenBudget: 8192 },
607
+ })
489
608
 
490
- // Convert completed run to A2A task response
491
- const a2aTask = runToA2ATask(run, messages)
609
+ // Convert a persisted Run (wire type) + thread messages into an A2A task response
610
+ const a2aTask = runToA2ATask(run, threadMessages)
492
611
  ```
493
612
 
494
613
  ## Streaming (SSE)
495
614
 
496
615
  Map internal agent execution events to Server-Sent Events for real-time client updates.
497
616
 
617
+ Agents emit `RunEvent`s through the listener passed to `run()` / `drainQuery()`. `mapRunToStreamEvent` translates those into SSE-ready `{ event, data }` tuples (returns `null` for events without a wire mapping, which you should skip):
618
+
498
619
  ```typescript
499
- import { mapRunToStreamEvent } from '@namzu/sdk'
620
+ import { mapRunToStreamEvent, drainQuery } from '@namzu/sdk'
621
+
622
+ // Event families: run.*, iteration.*, tool.*, token.*, message.*, review.*,
623
+ // checkpoint.*, activity.*, plan.*, agent.*, task.*, plugin.*, sandbox.*
624
+ const listener = (event) => {
625
+ const mapped = mapRunToStreamEvent(event, runId)
626
+ if (!mapped) return
627
+ response.write(`event: ${mapped.wire}\ndata: ${JSON.stringify(mapped.data)}\n\n`)
628
+ }
500
629
 
501
- // 28 event types: run.*, iteration.*, tool.*, message.*, plan.*, agent.*, task.*
502
- agent.on('event', (event) => {
503
- const sseEvent = mapRunToStreamEvent(event, runId)
504
- if (sseEvent) {
505
- response.write(`event: ${sseEvent.wire}\ndata: ${JSON.stringify(sseEvent.data)}\n\n`)
506
- }
507
- })
630
+ await drainQuery({ /* ...runConfig, provider, tools, messages */ }, listener)
508
631
  ```
509
632
 
510
633
  ## Persona System
@@ -540,10 +663,11 @@ import { SkillRegistry, resolveSkillChain } from '@namzu/sdk'
540
663
  const registry = new SkillRegistry()
541
664
  await registry.registerAll('/path/to/skills', 'metadata')
542
665
 
543
- // Load full skill content on demand
544
- const skill = await registry.load('web-search', 'full')
666
+ // Load full skill content on demand — returns SkillLoadResult | undefined
667
+ const loaded = await registry.load('web-search', 'full')
668
+ const skill = loaded?.skill
545
669
 
546
- // Resolve inheritance: category skills + agent-specific overrides
670
+ // Resolve inheritance: shared skills + agent-specific overrides
547
671
  const chain = await resolveSkillChain(
548
672
  '/skills/shared',
549
673
  '/skills/agent-specific',
@@ -572,27 +696,39 @@ const history = store.loadMessages('thd_abc123')
572
696
  // → [{ role: 'user', content: '...' }, { role: 'assistant', content: '...' }]
573
697
  ```
574
698
 
575
- The `ConversationStore` interface is pluggable — swap in SQLite, Postgres, or any backend. The SDK ships with `InMemoryConversationStore` as the default.
699
+ The `ConversationStore` interface is pluggable — swap in SQLite, Postgres, or any backend. `InMemoryConversationStore` is bundled for non-persistent use; applications wire it into the runtime themselves.
576
700
 
577
701
  ## Persistence
578
702
 
579
703
  In-memory and disk-backed stores for runs, tasks, conversations, and activities.
580
704
 
581
705
  ```typescript
582
- import { RunPersistence, DiskTaskStore, InMemoryConversationStore } from '@namzu/sdk'
706
+ import { RunPersistence, DiskTaskStore, getRootLogger } from '@namzu/sdk'
583
707
 
584
708
  // Run persistence with token/cost tracking
585
709
  const persistence = new RunPersistence({
586
710
  runId,
587
- agentId,
711
+ agentId: 'researcher',
712
+ agentName: 'Research Assistant',
713
+ providerId: 'openrouter',
588
714
  outputDir: './runs',
589
- runConfig: { model: 'claude-3', temperature: 0.7 },
715
+ runConfig: {
716
+ model: 'anthropic/claude-sonnet-4-20250514',
717
+ tokenBudget: 8192,
718
+ timeoutMs: 600_000,
719
+ temperature: 0.7,
720
+ },
721
+ log: getRootLogger(),
590
722
  })
591
723
  await persistence.init()
592
- persistence.accumulateUsage({ promptTokens: 100, completionTokens: 50 })
724
+ persistence.accumulateUsage({
725
+ promptTokens: 100,
726
+ completionTokens: 50,
727
+ totalTokens: 150,
728
+ })
593
729
  await persistence.persist()
594
730
 
595
- // Multi-tenant task store with atomic writes
731
+ // Task store with atomic writes (tenant-aware)
596
732
  const taskStore = new DiskTaskStore({
597
733
  baseDir: './tasks',
598
734
  defaultRunId: runId,
@@ -605,46 +741,60 @@ const taskStore = new DiskTaskStore({
605
741
  OpenTelemetry integration for distributed tracing and metrics across agents, tools, and providers.
606
742
 
607
743
  ```typescript
608
- import { initTelemetry, getTracer, getMeter, createPlatformMetrics } from '@namzu/sdk'
744
+ import { initTelemetry, getTracer, createPlatformMetrics } from '@namzu/sdk'
609
745
 
610
- initTelemetry({
746
+ const telemetry = initTelemetry({
611
747
  serviceName: 'agent-platform',
612
- traceExporter: { endpoint: 'http://localhost:4318/v1/traces' },
613
- metricsExporter: { endpoint: 'http://localhost:4318/v1/metrics' },
748
+ exporterType: 'otlp',
749
+ otlpEndpoint: 'http://localhost:4318',
750
+ otlpHeaders: { authorization: `Bearer ${process.env.OTLP_TOKEN!}` },
614
751
  })
752
+ await telemetry.start()
615
753
 
616
754
  const tracer = getTracer()
617
755
  const metrics = createPlatformMetrics()
618
756
 
619
757
  const span = tracer.startSpan('agent.run')
620
- metrics.tokenCounter.add(150, { agent_id: 'researcher', model: 'claude-3' })
758
+ metrics.recordTokenUsage('anthropic/claude-sonnet-4-20250514', 100, 50)
759
+ metrics.recordToolCall('search_web', true)
621
760
  span.end()
761
+
762
+ metrics.recordRunDuration('completed', 12.4)
622
763
  ```
623
764
 
624
765
  ## Architecture
625
766
 
626
767
  ```
627
768
  @namzu/sdk
769
+ ├── advisory/ Advisor registry, execution, trigger evaluation
628
770
  ├── agents/ Reactive, Pipeline, Router, Supervisor
629
- ├── bridge/ A2A protocol, SSE mapping, connector tools
771
+ ├── bridge/ A2A, SSE, connector→tool adapters
772
+ ├── bus/ Agent bus and coordination primitives
773
+ ├── compaction/ WorkingState extraction and conversation compaction
630
774
  ├── config/ Runtime configuration with Zod schemas
631
775
  ├── connector/ HTTP, webhook, MCP client/server, tenant isolation
632
- ├── sandbox/ Process-level isolation (Seatbelt, namespace)
633
- ├── contracts/ API wire types and validation schemas
776
+ ├── constants/ Shared SDK constants
777
+ ├── contracts/ External wire types and validation schemas (HTTP/A2A/SSE)
778
+ ├── execution/ Base and local execution contexts
634
779
  ├── gateway/ Local task gateway
635
- ├── manager/ Plan lifecycle, agent coordination, run persistence
780
+ ├── manager/ Plan, agent, connector, run lifecycle
636
781
  ├── persona/ System prompt assembly and merging
782
+ ├── plugin/ Manifest discovery, lifecycle, contributions, hooks
637
783
  ├── provider/ OpenRouter, Bedrock, Mock LLM providers
638
784
  ├── rag/ Chunking, embedding, vector store, retrieval
639
- ├── registry/ Agent, tool, and managed registries
640
- ├── runtime/ Query engine, decision parser, context cache
785
+ ├── registry/ Base, managed, agent, connector, tool, plugin registries
786
+ ├── router/ Task→model routing
787
+ ├── run/ Reporters and limit checking
788
+ ├── runtime/ Query engine, iteration phases, decision parser
789
+ ├── sandbox/ Process-level isolation (Seatbelt, namespace)
641
790
  ├── skills/ Skill registry, discovery, and chaining
642
- ├── store/ In-memory, disk, conversation, activity stores
791
+ ├── store/ In-memory, disk, conversation, activity, task, memory
643
792
  ├── telemetry/ OpenTelemetry tracing and metrics
644
- ├── tools/ defineTool, built-in tools, task tools
645
- ├── types/ Full type system (57 files)
646
- ├── utils/ ID generation, cost calc, hashing, logging
647
- └── vault/ Credential management
793
+ ├── tools/ defineTool, built-ins, task / advisory / memory tools
794
+ ├── types/ Domain model and internal type definitions
795
+ ├── utils/ ID generation, cost calc, hashing, logging, shell
796
+ ├── vault/ Credential management
797
+ └── verification/ Verification gate and rules
648
798
  ```
649
799
 
650
800
  ## Vision