ctxpkg 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/LICENSE +661 -0
  2. package/README.md +282 -0
  3. package/bin/cli.js +8 -0
  4. package/bin/daemon.js +7 -0
  5. package/package.json +70 -0
  6. package/src/agent/AGENTS.md +249 -0
  7. package/src/agent/agent.prompts.ts +66 -0
  8. package/src/agent/agent.test-runner.schemas.ts +158 -0
  9. package/src/agent/agent.test-runner.ts +436 -0
  10. package/src/agent/agent.ts +371 -0
  11. package/src/agent/agent.types.ts +94 -0
  12. package/src/backend/AGENTS.md +112 -0
  13. package/src/backend/backend.protocol.ts +95 -0
  14. package/src/backend/backend.schemas.ts +123 -0
  15. package/src/backend/backend.services.ts +151 -0
  16. package/src/backend/backend.ts +111 -0
  17. package/src/backend/backend.types.ts +34 -0
  18. package/src/cli/AGENTS.md +213 -0
  19. package/src/cli/cli.agent.ts +197 -0
  20. package/src/cli/cli.chat.ts +369 -0
  21. package/src/cli/cli.client.ts +55 -0
  22. package/src/cli/cli.collections.ts +491 -0
  23. package/src/cli/cli.config.ts +252 -0
  24. package/src/cli/cli.daemon.ts +160 -0
  25. package/src/cli/cli.documents.ts +413 -0
  26. package/src/cli/cli.mcp.ts +177 -0
  27. package/src/cli/cli.ts +28 -0
  28. package/src/cli/cli.utils.ts +122 -0
  29. package/src/client/AGENTS.md +135 -0
  30. package/src/client/client.adapters.ts +279 -0
  31. package/src/client/client.ts +86 -0
  32. package/src/client/client.types.ts +17 -0
  33. package/src/collections/AGENTS.md +185 -0
  34. package/src/collections/collections.schemas.ts +195 -0
  35. package/src/collections/collections.ts +1160 -0
  36. package/src/config/config.ts +118 -0
  37. package/src/daemon/AGENTS.md +168 -0
  38. package/src/daemon/daemon.config.ts +23 -0
  39. package/src/daemon/daemon.manager.ts +215 -0
  40. package/src/daemon/daemon.schemas.ts +22 -0
  41. package/src/daemon/daemon.ts +205 -0
  42. package/src/database/AGENTS.md +211 -0
  43. package/src/database/database.ts +64 -0
  44. package/src/database/migrations/migrations.001-init.ts +56 -0
  45. package/src/database/migrations/migrations.002-fts5.ts +32 -0
  46. package/src/database/migrations/migrations.ts +20 -0
  47. package/src/database/migrations/migrations.types.ts +9 -0
  48. package/src/documents/AGENTS.md +301 -0
  49. package/src/documents/documents.schemas.ts +190 -0
  50. package/src/documents/documents.ts +734 -0
  51. package/src/embedder/embedder.ts +53 -0
  52. package/src/exports.ts +0 -0
  53. package/src/mcp/AGENTS.md +264 -0
  54. package/src/mcp/mcp.ts +105 -0
  55. package/src/tools/AGENTS.md +228 -0
  56. package/src/tools/agent/agent.ts +45 -0
  57. package/src/tools/documents/documents.ts +401 -0
  58. package/src/tools/tools.langchain.ts +37 -0
  59. package/src/tools/tools.mcp.ts +46 -0
  60. package/src/tools/tools.types.ts +35 -0
  61. package/src/utils/utils.services.ts +46 -0
package/README.md ADDED
@@ -0,0 +1,282 @@
1
+ # ctxpkg
2
+
3
+ <p align="center">
4
+ <img src="docs/assets/banner.jpg" alt="ctxpkg banner" width="100%">
5
+ </p>
6
+
7
+ A package manager for AI agent context — manage, sync, and distribute documentation collections for AI-assisted development.
8
+
9
+ > **The Vision:** Imagine an AI assistant that knows your context — your team's commit style, your company's security policies, your preferred patterns — without you explaining it every session.
10
+ > [Read the story: **Context Stacking: How Sarah Automated Her Team's Brain**](docs/managing-ai-context-at-scale.md)
11
+
12
+ ## What is ctxpkg?
13
+
14
+ Just as `npm` manages code dependencies, `ctxpkg` manages **context dependencies**.
15
+
16
+ Stack documentation layers — from personal notes to team guidelines to project docs — into a unified knowledge base. Your AI agents search this indexed context instead of relying on stale training data or manual copy-paste.
17
+
18
+ **Key capabilities:**
19
+
20
+ - **Context Stacking** — Layer documentation from multiple sources (personal, team, project, global)
21
+ - **Semantic Search** — Local vector + keyword search finds relevant content without dumping everything into prompts
22
+ - **MCP Integration** — AI editors like Cursor and Claude Desktop can query your context directly
23
+ - **Git-Native Distribution** — Index docs directly from any git repo (public or private) — no publishing required
24
+ - **Bundle Any Source** — Export docs from Confluence, Notion, or any system to markdown, then package into distributable `.tar.gz` archives
25
+
26
+ ## Design Philosophy
27
+
28
+ **Zero-friction adoption.** You probably already have documentation worth indexing — a folder of markdown notes, an Obsidian vault, your company's engineering wiki, or a repo full of ADRs and guides. ctxpkg works with what you have. Point it at existing files and start searching. No migration, no reformatting, no custom schemas required.
29
+
30
+ **Low-risk investment.** Even if you decide ctxpkg isn't for you, any documentation you create remains useful. It's just markdown files with a simple manifest — humans can read it, other tools can consume it, and nothing is locked into a proprietary format. The worst case scenario is you end up with better-organized documentation.
31
+
32
+ ## Installation
33
+
34
+ `npm i -g ctxpkg` or run command with `npx` prefix (`npx ctxpkg col init`)
35
+
36
+ ## Quick Start
37
+
38
+ Get your AI agents access to your documentation in minutes:
39
+
40
+ ```bash
41
+ # Initialize project config
42
+ ctxpkg col init
43
+
44
+ # Add your docs folder (requires manifest.json)
45
+ ctxpkg col add docs ./docs/manifest.json
46
+
47
+ # Index the documents
48
+ ctxpkg col sync
49
+ ```
50
+
51
+ Now configure your AI editor to use the ctxpkg MCP server:
52
+
53
+ <details>
54
+ <summary>🔧 Cursor</summary>
55
+
56
+ Add to `~/.cursor/mcp.json`:
57
+
58
+ ```json
59
+ {
60
+ "mcpServers": {
61
+ "ctxpkg": {
62
+ "command": "npx",
63
+ "args": ["-y", "ctxpkg", "mcp", "documents"]
64
+ }
65
+ }
66
+ }
67
+ ```
68
+
69
+ </details>
70
+
71
+ <details>
72
+ <summary>🤖 Claude Code</summary>
73
+
74
+ Run this command:
75
+
76
+ ```bash
77
+ claude mcp add ctxpkg -- npx -y ctxpkg mcp documents
78
+ ```
79
+
80
+ </details>
81
+
82
+ <details>
83
+ <summary>⚡ Opencode</summary>
84
+
85
+ Add to your Opencode configuration:
86
+
87
+ ```json
88
+ {
89
+ "mcp": {
90
+ "ctxpkg": {
91
+ "type": "local",
92
+ "command": ["npx", "-y", "ctxpkg", "mcp", "documents"],
93
+ "enabled": true
94
+ }
95
+ }
96
+ }
97
+ ```
98
+
99
+ </details>
100
+
101
+ **[See more AI editor setups](docs/setup-agents.md)** • **[Full tutorial: Getting Started](docs/getting-started.md)**
102
+
103
+ ## Documentation
104
+
105
+ | Guide | Description |
106
+ | -------------------------------------------------- | ------------------------------------------------- |
107
+ | [AI Editor Setup](docs/setup-agents.md) | Configure Cursor, Claude Code, Opencode, and more |
108
+ | [Getting Started](docs/getting-started.md) | First-time setup tutorial |
109
+ | [CLI Reference](docs/cli-reference.md) | Complete command documentation |
110
+ | [Configuration](docs/configuration.md) | Project config, global config, manifests |
111
+ | [How It Works](docs/how-it-works.md) | Indexing pipeline, search algorithms |
112
+ | [MCP Server](docs/mcp-server.md) | AI editor integration and tools |
113
+ | [AI Chat & Agent Mode](docs/ai-chat.md) | Chat with docs, reduced-token MCP mode |
114
+ | [Agent Testing](docs/agent-testing.md) | Validate agent performance with test suites |
115
+ | [Publishing Packages](docs/github-distribution.md) | Distribute docs via GitHub Releases |
116
+
117
+ ## CLI Management Tools
118
+
119
+ The CLI is primarily for managing your context collections. Most users will interact with ctxpkg through their AI editor via MCP.
120
+
121
+ ```bash
122
+ # Collections — manage context packages
123
+ ctxpkg col init # Initialize project
124
+ ctxpkg col add <alias> <url> # Add a collection
125
+ ctxpkg col add -g <alias> <url> # Add global collection
126
+ ctxpkg col sync # Index documents
127
+ ctxpkg col list # Show collections
128
+
129
+ # MCP — AI editor integration (main use case)
130
+ ctxpkg mcp docs # Start MCP server (tools mode)
131
+ ctxpkg mcp agent # Start MCP server (agent mode)
132
+
133
+ # Additional tools
134
+ ctxpkg docs search "query" # Direct search (testing)
135
+ ctxpkg chat "question" # AI-powered Q&A
136
+ ctxpkg agent test tests.yaml # Test agent performance
137
+ ctxpkg daemon start # Background service
138
+ ```
139
+
140
+ See [CLI Reference](docs/cli-reference.md) for complete documentation.
141
+
142
+ ## Example: Context Stacking
143
+
144
+ Layer context from multiple sources:
145
+
146
+ ```json
147
+ {
148
+ "collections": {
149
+ "project-docs": {
150
+ "url": "file://./docs/manifest.json"
151
+ },
152
+ "team-standards": {
153
+ "url": "git+https://github.com/myorg/standards#main?manifest=manifest.json"
154
+ },
155
+ "react": {
156
+ "url": "git+https://github.com/facebook/react#v18.2.0?manifest=docs/manifest.json"
157
+ }
158
+ }
159
+ }
160
+ ```
161
+
162
+ **Git repositories are the easiest way to share documentation** — no publishing step required. Just point to a repo with a `manifest.json`:
163
+
164
+ ```bash
165
+ # Add docs from any git repo (HTTPS or SSH)
166
+ ctxpkg col add team-docs "git+https://github.com/myorg/docs#main?manifest=manifest.json"
167
+ ctxpkg col add private-docs "git+ssh://git@github.com/myorg/private#main?manifest=manifest.json"
168
+ ```
169
+
170
+ Add personal/global context available across all projects:
171
+
172
+ ```bash
173
+ ctxpkg col add -g my-notes file:///Users/me/notes/manifest.json
174
+ ```
175
+
176
+ ## MCP Integration
177
+
178
+ ctxpkg's primary purpose is giving AI agents access to your documentation through the **Model Context Protocol (MCP)**. Once configured, your AI assistant gains access to 8 document tools:
179
+
180
+ - `search` - Semantic search across all your documentation
181
+ - `search_batch` - Multiple queries in one call
182
+ - `get_document` - Retrieve full document content
183
+ - `get_section` - Get specific document sections
184
+ - `get_outline` - Get document structure/outline
185
+ - `find_related` - Find related documents
186
+ - `list_collections` - List all indexed collections
187
+ - `list_documents` - List all documents in collections
188
+
189
+ ### Agent Mode (Recommended for Chat)
190
+
191
+ For reduced token costs in long conversations, use **Agent Mode**:
192
+
193
+ ```json
194
+ {
195
+ "mcpServers": {
196
+ "ctxpkg-agent": {
197
+ "command": "npx",
198
+ "args": ["-y", "ctxpkg", "mcp", "agent"]
199
+ }
200
+ }
201
+ }
202
+ ```
203
+
204
+ This exposes a single `ask_documents` tool that uses an internal AI agent to search and synthesize answers. The calling agent sees only the final result, not intermediate search calls — reducing context overhead.
205
+
206
+ See [MCP Server Documentation](docs/mcp-server.md) for complete details.
207
+
208
+ ## AI Chat & Agent Mode
209
+
210
+ Chat with your documentation directly from the terminal, or use **Agent Mode** for reduced token costs in AI assistants.
211
+
212
+ ```bash
213
+ # Configure your LLM
214
+ ctxpkg config set llm.apiKey sk-...
215
+
216
+ # One-shot question
217
+ ctxpkg chat "How do I implement caching?" --use-case "Optimizing API performance"
218
+
219
+ # Interactive session
220
+ ctxpkg chat -i
221
+ ```
222
+
223
+ **Agent Mode MCP** exposes a single `ask_documents` tool that uses an internal AI agent to search and synthesize answers. The calling agent sees only the final result, not intermediate search calls — reducing context overhead in long conversations.
224
+
225
+ ```json
226
+ {
227
+ "mcpServers": {
228
+ "ctxpkg-agent": {
229
+ "command": "ctxpkg",
230
+ "args": ["mcp", "agent"]
231
+ }
232
+ }
233
+ }
234
+ ```
235
+
236
+ See [AI Chat & Agent Mode](docs/ai-chat.md) for details.
237
+
238
+ ## Distributing Internal Documentation
239
+
240
+ ctxpkg can package documentation from any source — Confluence, Notion, SharePoint, or internal wikis — into distributable bundles that teams can share via internal systems.
241
+
242
+ **Workflow:**
243
+
244
+ 1. **Export your docs as Markdown** — Use your platform's export tools or APIs to extract documentation
245
+ 2. **Add a manifest** — Create a `manifest.json` describing the collection:
246
+
247
+ ```json
248
+ {
249
+ "name": "company-knowledge-base",
250
+ "sources": [{ "pattern": "**/*.md" }]
251
+ }
252
+ ```
253
+
254
+ 3. **Create a bundle** — Package everything into a distributable archive:
255
+
256
+ ```bash
257
+ ctxpkg col pack --output knowledge-base-v1.tar.gz
258
+ ```
259
+
260
+ 4. **Distribute internally** — Host the bundle on internal file servers, S3, or artifact storage
261
+
262
+ Teams can then add the bundle:
263
+
264
+ ```bash
265
+ ctxpkg col add kb https://internal.example.com/bundles/knowledge-base-v1.tar.gz
266
+ ```
267
+
268
+ This enables organizations to centralize and distribute institutional knowledge to AI agents across all teams, without requiring git repositories or public hosting.
269
+
270
+ See [Publishing Packages](docs/github-distribution.md) for automated publishing with GitHub Actions.
271
+
272
+ ## Development
273
+
274
+ ```bash
275
+ pnpm run test:lint # Linting
276
+ pnpm run test:unit # Unit tests
277
+ pnpm run build # Build TypeScript
278
+ ```
279
+
280
+ ## License
281
+
282
+ [GNU Affero General Public License v3.0 (AGPL-3.0)](LICENSE)
package/bin/cli.js ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env node
2
+
3
+ import { createProgram } from '../src/cli/cli.ts';
4
+
5
+ const program = createProgram();
6
+
7
+ // eslint-disable-next-line
8
+ await program.parseAsync(process.argv);
package/bin/daemon.js ADDED
@@ -0,0 +1,7 @@
1
+ #!/usr/bin/env node
2
+
3
+ import { Daemon } from '../src/daemon/daemon.ts';
4
+
5
+ const daemon = new Daemon();
6
+
7
+ await daemon.start();
package/package.json ADDED
@@ -0,0 +1,70 @@
1
+ {
2
+ "type": "module",
3
+ "main": "dist/exports.js",
4
+ "bin": {
5
+ "ctxpkg": "./bin/cli.js"
6
+ },
7
+ "files": [
8
+ "src",
9
+ "bin"
10
+ ],
11
+ "exports": {
12
+ ".": "./dist/exports.js"
13
+ },
14
+ "devDependencies": {
15
+ "@eslint/eslintrc": "3.3.3",
16
+ "@eslint/js": "9.39.2",
17
+ "@pnpm/find-workspace-packages": "6.0.9",
18
+ "@types/node": "25.0.8",
19
+ "@types/tar": "^6.1.13",
20
+ "@types/ws": "^8.18.1",
21
+ "@vitest/coverage-v8": "4.0.17",
22
+ "eslint": "9.39.2",
23
+ "eslint-config-prettier": "10.1.8",
24
+ "eslint-plugin-import": "2.32.0",
25
+ "eslint-plugin-prettier": "5.5.4",
26
+ "msw": "^2.12.7",
27
+ "prettier": "3.7.4",
28
+ "tsx": "^4.21.0",
29
+ "typescript": "5.9.3",
30
+ "typescript-eslint": "8.53.0",
31
+ "vitest": "4.0.17"
32
+ },
33
+ "name": "ctxpkg",
34
+ "version": "0.0.1",
35
+ "license": "AGPL-3.0",
36
+ "imports": {
37
+ "#root/*": "./src/*"
38
+ },
39
+ "dependencies": {
40
+ "@huggingface/transformers": "^3.8.1",
41
+ "@inquirer/prompts": "^8.2.0",
42
+ "@langchain/community": "^1.1.4",
43
+ "@langchain/core": "^1.1.13",
44
+ "@langchain/langgraph": "^1.0.15",
45
+ "@langchain/openai": "^1.2.2",
46
+ "@langchain/textsplitters": "^1.0.1",
47
+ "@modelcontextprotocol/sdk": "^1.25.2",
48
+ "@types/convict": "^6.1.6",
49
+ "better-sqlite3": "^12.6.0",
50
+ "chalk": "^5.6.2",
51
+ "commander": "^14.0.2",
52
+ "convict": "^6.2.4",
53
+ "dotenv": "^17.2.3",
54
+ "env-paths": "^3.0.0",
55
+ "knex": "^3.1.0",
56
+ "langchain": "^1.2.8",
57
+ "simple-git": "^3.30.0",
58
+ "sqlite-vec": "0.1.7-alpha.2",
59
+ "tar": "^7.5.2",
60
+ "ws": "^8.19.0",
61
+ "yaml": "^2.8.2",
62
+ "zod": "3"
63
+ },
64
+ "scripts": {
65
+ "test:lint": "eslint",
66
+ "build": "tsc --build",
67
+ "test:unit": "vitest --run --passWithNoTests",
68
+ "test": "pnpm run \"/^test:/\""
69
+ }
70
+ }
@@ -0,0 +1,249 @@
1
+ # Agent — Agent Guidelines
2
+
3
+ This document describes the agent module architecture for AI agents working on this codebase.
4
+
5
+ ## Overview
6
+
7
+ The agent module provides a LangChain-based agent that uses document tools to search and synthesize information. It's designed to reduce token/context costs by consolidating multiple tool calls into a single, synthesized answer.
8
+
9
+ ## File Structure
10
+
11
+ | File | Purpose |
12
+ |------|---------|
13
+ | `agent.ts` | Main agent implementation, factory, and retry logic |
14
+ | `agent.types.ts` | TypeScript types and Zod schemas |
15
+ | `agent.prompts.ts` | System prompts and templates |
16
+
17
+ ## Architecture
18
+
19
+ ```
20
+ ┌─────────────────────────────────────────────────────────────────────┐
21
+ │ DocumentAgent │
22
+ │ ┌───────────────────────────────────────────────────────────────┐ │
23
+ │ │ LangGraph React Agent │ │
24
+ │ │ │ │
25
+ │ │ ┌─────────────┐ ┌─────────────┐ ┌─────────────────────┐ │ │
26
+ │ │ │ search │ │ get_section │ │ get_document, etc. │ │ │
27
+ │ │ └─────────────┘ └─────────────┘ └─────────────────────┘ │ │
28
+ │ │ │ │
29
+ │ │ Uses configured LLM (OpenAI-compatible API) │ │
30
+ │ └───────────────────────────────────────────────────────────────┘ │
31
+ │ │
32
+ │ Features: │
33
+ │ • Verbose mode with step callbacks │
34
+ │ • Conversation history for multi-turn chat │
35
+ │ • Collection filtering via system prompt │
36
+ │ • Retry logic with exponential backoff │
37
+ └─────────────────────────────────────────────────────────────────────┘
38
+ ```
39
+
40
+ ## Usage
41
+
42
+ ### Creating an Agent
43
+
44
+ ```typescript
45
+ import { createDocumentAgent, getLLMConfigFromAppConfig } from '#root/agent/agent.ts';
46
+ import { createClient } from '#root/client/client.ts';
47
+
48
+ const client = await createClient({ mode: 'daemon' });
49
+ const llmConfig = await getLLMConfigFromAppConfig();
50
+
51
+ const agent = createDocumentAgent({
52
+ client,
53
+ llmConfig,
54
+ aliasMap: new Map([['docs', 'pkg:file://./docs/manifest.json']]),
55
+ collections: ['docs'], // Optional: restrict to specific collections
56
+ onStep: (step) => console.log(step), // Optional: verbose callbacks
57
+ });
58
+
59
+ // One-shot query (stateless)
60
+ const response = await agent.ask(
61
+ 'How do I implement streaming?',
62
+ 'Building a chatbot that streams responses'
63
+ );
64
+
65
+ console.log(response.answer);
66
+ console.log(response.sources);
67
+ console.log(response.confidence);
68
+ ```
69
+
70
+ ### Multi-turn Conversation
71
+
72
+ ```typescript
73
+ // First message
74
+ const response1 = await agent.chat(
75
+ 'What authentication methods are available?',
76
+ 'Building a secure API'
77
+ );
78
+
79
+ // Follow-up (maintains conversation context)
80
+ const response2 = await agent.chat(
81
+ 'How do I implement the OAuth2 option?',
82
+ 'Building a secure API'
83
+ );
84
+
85
+ // Clear history when starting new topic
86
+ agent.clearHistory();
87
+ ```
88
+
89
+ ### Verbose Mode
90
+
91
+ ```typescript
92
+ const agent = createDocumentAgent({
93
+ client,
94
+ llmConfig,
95
+ onStep: (step) => {
96
+ switch (step.type) {
97
+ case 'thinking':
98
+ console.log(`[thinking] ${step.content}`);
99
+ break;
100
+ case 'tool_call':
101
+ console.log(`[tool] ${step.toolName}`);
102
+ console.log(` Input: ${JSON.stringify(step.toolInput)}`);
103
+ break;
104
+ case 'tool_result':
105
+ console.log(`[result] ${step.content}`);
106
+ break;
107
+ case 'error':
108
+ console.log(`[retry] ${step.content}`);
109
+ break;
110
+ }
111
+ },
112
+ });
113
+ ```
114
+
115
+ ### Agent Response Format
116
+
117
+ ```typescript
118
+ type AgentResponse = {
119
+ answer: string; // Synthesized answer
120
+ sources: Array<{ // References used
121
+ collection: string;
122
+ document: string;
123
+ section?: string;
124
+ }>;
125
+ confidence: 'high' | 'medium' | 'low';
126
+ note?: string; // Optional note
127
+ };
128
+ ```
129
+
130
+ ## LLM Configuration
131
+
132
+ The agent uses configuration from `config.ts`:
133
+
134
+ ```typescript
135
+ llm: {
136
+ provider: string; // OpenAI-compatible API base URL
137
+ model: string; // Model identifier
138
+ apiKey: string; // API key
139
+ temperature: number; // 0-2
140
+ maxTokens: number; // Max response tokens
141
+ }
142
+ ```
143
+
144
+ Configure via CLI:
145
+
146
+ ```bash
147
+ ctxpkg config set llm.apiKey sk-...
148
+ ctxpkg config set llm.model gpt-4o
149
+ ctxpkg config set llm.provider https://api.openai.com/v1
150
+ ```
151
+
152
+ Or via environment variables:
153
+
154
+ ```bash
155
+ export CTXPKG_LLM_API_KEY=sk-...
156
+ export CTXPKG_LLM_MODEL=gpt-4o
157
+ ```
158
+
159
+ ## Agent Design
160
+
161
+ ### Tool Selection
162
+
163
+ The agent uses LangGraph's React agent pattern with these tools:
164
+
165
+ - `documents_search` — Semantic search across collections
166
+ - `documents_list_documents` — Browse collection contents
167
+ - `documents_get_outline` — Get document structure
168
+ - `documents_get_section` — Get specific sections
169
+ - `documents_get_document` — Get full documents
170
+ - `documents_list_collections` — List available collections
171
+ - `documents_search_batch` — Batch searches
172
+ - `documents_find_related` — Find related content
173
+
174
+ ### Termination
175
+
176
+ The agent stops when:
177
+
178
+ 1. It has synthesized a complete answer (JSON response)
179
+ 2. Maximum iterations reached (default: 15)
180
+ 3. No more relevant information to find
181
+
182
+ ### Response Parsing
183
+
184
+ The agent is prompted to respond in JSON format. The parser:
185
+
186
+ 1. Looks for ```json code blocks
187
+ 2. Tries to parse the whole content as JSON
188
+ 3. Falls back to treating content as plain answer
189
+
190
+ ### Retry Logic
191
+
192
+ The agent automatically retries on transient errors:
193
+
194
+ - **Rate limits**: 429 errors
195
+ - **Server errors**: 500, 502, 503, 504
196
+ - **Network errors**: ECONNRESET, ETIMEDOUT
197
+
198
+ Retry configuration:
199
+ - Max retries: 3
200
+ - Initial delay: 1000ms
201
+ - Max delay: 30000ms
202
+ - Backoff multiplier: 2x
203
+
204
+ ```typescript
205
+ import { withRetry, isRetryableError } from '#root/agent/agent.ts';
206
+
207
+ // Use retry logic for custom async operations
208
+ const result = await withRetry(
209
+ () => someAsyncOperation(),
210
+ { maxRetries: 3, initialDelayMs: 1000, maxDelayMs: 30000, backoffMultiplier: 2 },
211
+ (attempt, error, delayMs) => console.log(`Retry ${attempt}: ${error.message}`)
212
+ );
213
+ ```
214
+
215
+ ## Key Patterns
216
+
217
+ ### Lazy Config Loading
218
+
219
+ Config is loaded dynamically to avoid circular imports:
220
+
221
+ ```typescript
222
+ const getLLMConfigFromAppConfig = async (): Promise<LLMConfig> => {
223
+ const { config } = await import('#root/config/config.ts');
224
+ // ...
225
+ };
226
+ ```
227
+
228
+ ### Tool Conversion
229
+
230
+ Document tools are defined once and converted for LangChain:
231
+
232
+ ```typescript
233
+ const toolDefinitions = createDocumentToolDefinitions({ client, aliasMap });
234
+ const langchainTools = toLangchainTools(toolDefinitions);
235
+ ```
236
+
237
+ ### Collection Filtering
238
+
239
+ Collections can be restricted via the `collections` option:
240
+
241
+ ```typescript
242
+ const agent = createDocumentAgent({
243
+ client,
244
+ llmConfig,
245
+ collections: ['my-docs', 'api-docs'], // Only search these
246
+ });
247
+ ```
248
+
249
+ This adds instructions to the system prompt telling the agent to pass these collections in all search calls.
@@ -0,0 +1,66 @@
1
+ /**
2
+ * System prompt for the document search agent
3
+ */
4
+ export const AGENT_SYSTEM_PROMPT = `You are a documentation search agent. Your task is to find and synthesize information from technical documentation to answer user questions.
5
+
6
+ ## Guidelines
7
+
8
+ 1. **Start broad, then narrow**: Begin with a semantic search, then drill into specific sections or documents as needed.
9
+
10
+ 2. **Use the right tool for the job**:
11
+ - \`documents_search\` — Find relevant content across collections
12
+ - \`documents_list_documents\` — Browse what's available in a collection
13
+ - \`documents_get_outline\` — Understand document structure before diving in
14
+ - \`documents_get_section\` — Get specific section content efficiently
15
+ - \`documents_get_document\` — Only when you need the full document
16
+
17
+ 3. **Stop when sufficient**: The user has provided a use case. Once you have enough information to address their specific use case, synthesize and respond. Don't over-research.
18
+
19
+ 4. **Cite sources**: Track which documents/sections you used.
20
+
21
+ 5. **Acknowledge uncertainty**: If you can't find sufficient information, say so.
22
+
23
+ ## Response Format
24
+
25
+ When you have found sufficient information, respond with a JSON object in this exact format:
26
+
27
+ \`\`\`json
28
+ {
29
+ "answer": "Your synthesized answer here. Be clear, actionable, and include code examples when relevant.",
30
+ "sources": [
31
+ {"collection": "collection-name", "document": "document-id", "section": "Section Heading (if applicable)"}
32
+ ],
33
+ "confidence": "high|medium|low",
34
+ "note": "Optional note about limitations or suggestions for further reading"
35
+ }
36
+ \`\`\`
37
+
38
+ Use "high" confidence when multiple sources agree or you found a direct answer.
39
+ Use "medium" when the information is relevant but not comprehensive.
40
+ Use "low" when you're extrapolating or the information is tangentially related.`;
41
+
42
+ /**
43
+ * Format the collection restriction instruction
44
+ */
45
+ export const formatCollectionRestriction = (collections: string[]): string => {
46
+ if (collections.length === 0) return '';
47
+ const collectionList = collections.map((c) => `"${c}"`).join(', ');
48
+ return `\n\n## Collection Restriction\nIMPORTANT: Only search within these collections: ${collectionList}. Always pass this list in the "collections" parameter of your search calls.`;
49
+ };
50
+
51
+ /**
52
+ * User prompt template
53
+ */
54
+ export const formatUserPrompt = (query: string, useCase: string, collections?: string[]) => {
55
+ const collectionNote = collections?.length
56
+ ? `\n\nNote: Restrict your searches to these collections: ${collections.join(', ')}`
57
+ : '';
58
+
59
+ return `## Question
60
+ ${query}
61
+
62
+ ## Use Case
63
+ ${useCase}${collectionNote}
64
+
65
+ Find the information needed to answer this question for the given use case. Search the documentation, then provide your synthesized answer in JSON format.`;
66
+ };