ctxpkg 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +661 -0
- package/README.md +282 -0
- package/bin/cli.js +8 -0
- package/bin/daemon.js +7 -0
- package/package.json +70 -0
- package/src/agent/AGENTS.md +249 -0
- package/src/agent/agent.prompts.ts +66 -0
- package/src/agent/agent.test-runner.schemas.ts +158 -0
- package/src/agent/agent.test-runner.ts +436 -0
- package/src/agent/agent.ts +371 -0
- package/src/agent/agent.types.ts +94 -0
- package/src/backend/AGENTS.md +112 -0
- package/src/backend/backend.protocol.ts +95 -0
- package/src/backend/backend.schemas.ts +123 -0
- package/src/backend/backend.services.ts +151 -0
- package/src/backend/backend.ts +111 -0
- package/src/backend/backend.types.ts +34 -0
- package/src/cli/AGENTS.md +213 -0
- package/src/cli/cli.agent.ts +197 -0
- package/src/cli/cli.chat.ts +369 -0
- package/src/cli/cli.client.ts +55 -0
- package/src/cli/cli.collections.ts +491 -0
- package/src/cli/cli.config.ts +252 -0
- package/src/cli/cli.daemon.ts +160 -0
- package/src/cli/cli.documents.ts +413 -0
- package/src/cli/cli.mcp.ts +177 -0
- package/src/cli/cli.ts +28 -0
- package/src/cli/cli.utils.ts +122 -0
- package/src/client/AGENTS.md +135 -0
- package/src/client/client.adapters.ts +279 -0
- package/src/client/client.ts +86 -0
- package/src/client/client.types.ts +17 -0
- package/src/collections/AGENTS.md +185 -0
- package/src/collections/collections.schemas.ts +195 -0
- package/src/collections/collections.ts +1160 -0
- package/src/config/config.ts +118 -0
- package/src/daemon/AGENTS.md +168 -0
- package/src/daemon/daemon.config.ts +23 -0
- package/src/daemon/daemon.manager.ts +215 -0
- package/src/daemon/daemon.schemas.ts +22 -0
- package/src/daemon/daemon.ts +205 -0
- package/src/database/AGENTS.md +211 -0
- package/src/database/database.ts +64 -0
- package/src/database/migrations/migrations.001-init.ts +56 -0
- package/src/database/migrations/migrations.002-fts5.ts +32 -0
- package/src/database/migrations/migrations.ts +20 -0
- package/src/database/migrations/migrations.types.ts +9 -0
- package/src/documents/AGENTS.md +301 -0
- package/src/documents/documents.schemas.ts +190 -0
- package/src/documents/documents.ts +734 -0
- package/src/embedder/embedder.ts +53 -0
- package/src/exports.ts +0 -0
- package/src/mcp/AGENTS.md +264 -0
- package/src/mcp/mcp.ts +105 -0
- package/src/tools/AGENTS.md +228 -0
- package/src/tools/agent/agent.ts +45 -0
- package/src/tools/documents/documents.ts +401 -0
- package/src/tools/tools.langchain.ts +37 -0
- package/src/tools/tools.mcp.ts +46 -0
- package/src/tools/tools.types.ts +35 -0
- package/src/utils/utils.services.ts +46 -0
package/README.md
ADDED
|
@@ -0,0 +1,282 @@
|
|
|
1
|
+
# ctxpkg
|
|
2
|
+
|
|
3
|
+
<p align="center">
|
|
4
|
+
<img src="docs/assets/banner.jpg" alt="ctxpkg banner" width="100%">
|
|
5
|
+
</p>
|
|
6
|
+
|
|
7
|
+
A package manager for AI agent context — manage, sync, and distribute documentation collections for AI-assisted development.
|
|
8
|
+
|
|
9
|
+
> **The Vision:** Imagine an AI assistant that knows your context — your team's commit style, your company's security policies, your preferred patterns — without you explaining it every session.
|
|
10
|
+
> [Read the story: **Context Stacking: How Sarah Automated Her Team's Brain**](docs/managing-ai-context-at-scale.md)
|
|
11
|
+
|
|
12
|
+
## What is ctxpkg?
|
|
13
|
+
|
|
14
|
+
Just as `npm` manages code dependencies, `ctxpkg` manages **context dependencies**.
|
|
15
|
+
|
|
16
|
+
Stack documentation layers — from personal notes to team guidelines to project docs — into a unified knowledge base. Your AI agents search this indexed context instead of relying on stale training data or manual copy-paste.
|
|
17
|
+
|
|
18
|
+
**Key capabilities:**
|
|
19
|
+
|
|
20
|
+
- **Context Stacking** — Layer documentation from multiple sources (personal, team, project, global)
|
|
21
|
+
- **Semantic Search** — Local vector + keyword search finds relevant content without dumping everything into prompts
|
|
22
|
+
- **MCP Integration** — AI editors like Cursor and Claude Desktop can query your context directly
|
|
23
|
+
- **Git-Native Distribution** — Index docs directly from any git repo (public or private) — no publishing required
|
|
24
|
+
- **Bundle Any Source** — Export docs from Confluence, Notion, or any system to markdown, then package into distributable `.tar.gz` archives
|
|
25
|
+
|
|
26
|
+
## Design Philosophy
|
|
27
|
+
|
|
28
|
+
**Zero-friction adoption.** You probably already have documentation worth indexing — a folder of markdown notes, an Obsidian vault, your company's engineering wiki, or a repo full of ADRs and guides. ctxpkg works with what you have. Point it at existing files and start searching. No migration, no reformatting, no custom schemas required.
|
|
29
|
+
|
|
30
|
+
**Low-risk investment.** Even if you decide ctxpkg isn't for you, any documentation you create remains useful. It's just markdown files with a simple manifest — humans can read it, other tools can consume it, and nothing is locked into a proprietary format. The worst case scenario is you end up with better-organized documentation.
|
|
31
|
+
|
|
32
|
+
## Installation
|
|
33
|
+
|
|
34
|
+
`npm i -g ctxpkg` or run command with `npx` prefix (`npx ctxpkg col init`)
|
|
35
|
+
|
|
36
|
+
## Quick Start
|
|
37
|
+
|
|
38
|
+
Get your AI agents access to your documentation in minutes:
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
# Initialize project config
|
|
42
|
+
ctxpkg col init
|
|
43
|
+
|
|
44
|
+
# Add your docs folder (requires manifest.json)
|
|
45
|
+
ctxpkg col add docs ./docs/manifest.json
|
|
46
|
+
|
|
47
|
+
# Index the documents
|
|
48
|
+
ctxpkg col sync
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
Now configure your AI editor to use the ctxpkg MCP server:
|
|
52
|
+
|
|
53
|
+
<details>
|
|
54
|
+
<summary>🔧 Cursor</summary>
|
|
55
|
+
|
|
56
|
+
Add to `~/.cursor/mcp.json`:
|
|
57
|
+
|
|
58
|
+
```json
|
|
59
|
+
{
|
|
60
|
+
"mcpServers": {
|
|
61
|
+
"ctxpkg": {
|
|
62
|
+
"command": "npx",
|
|
63
|
+
"args": ["-y", "ctxpkg", "mcp", "documents"]
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
</details>
|
|
70
|
+
|
|
71
|
+
<details>
|
|
72
|
+
<summary>🤖 Claude Code</summary>
|
|
73
|
+
|
|
74
|
+
Run this command:
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
claude mcp add ctxpkg -- npx -y ctxpkg mcp documents
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
</details>
|
|
81
|
+
|
|
82
|
+
<details>
|
|
83
|
+
<summary>⚡ Opencode</summary>
|
|
84
|
+
|
|
85
|
+
Add to your Opencode configuration:
|
|
86
|
+
|
|
87
|
+
```json
|
|
88
|
+
{
|
|
89
|
+
"mcp": {
|
|
90
|
+
"ctxpkg": {
|
|
91
|
+
"type": "local",
|
|
92
|
+
"command": ["npx", "-y", "ctxpkg", "mcp", "documents"],
|
|
93
|
+
"enabled": true
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
</details>
|
|
100
|
+
|
|
101
|
+
**[See more AI editor setups](docs/setup-agents.md)** • **[Full tutorial: Getting Started](docs/getting-started.md)**
|
|
102
|
+
|
|
103
|
+
## Documentation
|
|
104
|
+
|
|
105
|
+
| Guide | Description |
|
|
106
|
+
| -------------------------------------------------- | ------------------------------------------------- |
|
|
107
|
+
| [AI Editor Setup](docs/setup-agents.md) | Configure Cursor, Claude Code, Opencode, and more |
|
|
108
|
+
| [Getting Started](docs/getting-started.md) | First-time setup tutorial |
|
|
109
|
+
| [CLI Reference](docs/cli-reference.md) | Complete command documentation |
|
|
110
|
+
| [Configuration](docs/configuration.md) | Project config, global config, manifests |
|
|
111
|
+
| [How It Works](docs/how-it-works.md) | Indexing pipeline, search algorithms |
|
|
112
|
+
| [MCP Server](docs/mcp-server.md) | AI editor integration and tools |
|
|
113
|
+
| [AI Chat & Agent Mode](docs/ai-chat.md) | Chat with docs, reduced-token MCP mode |
|
|
114
|
+
| [Agent Testing](docs/agent-testing.md) | Validate agent performance with test suites |
|
|
115
|
+
| [Publishing Packages](docs/github-distribution.md) | Distribute docs via GitHub Releases |
|
|
116
|
+
|
|
117
|
+
## CLI Management Tools
|
|
118
|
+
|
|
119
|
+
The CLI is primarily for managing your context collections. Most users will interact with ctxpkg through their AI editor via MCP.
|
|
120
|
+
|
|
121
|
+
```bash
|
|
122
|
+
# Collections — manage context packages
|
|
123
|
+
ctxpkg col init # Initialize project
|
|
124
|
+
ctxpkg col add <alias> <url> # Add a collection
|
|
125
|
+
ctxpkg col add -g <alias> <url> # Add global collection
|
|
126
|
+
ctxpkg col sync # Index documents
|
|
127
|
+
ctxpkg col list # Show collections
|
|
128
|
+
|
|
129
|
+
# MCP — AI editor integration (main use case)
|
|
130
|
+
ctxpkg mcp docs # Start MCP server (tools mode)
|
|
131
|
+
ctxpkg mcp agent # Start MCP server (agent mode)
|
|
132
|
+
|
|
133
|
+
# Additional tools
|
|
134
|
+
ctxpkg docs search "query" # Direct search (testing)
|
|
135
|
+
ctxpkg chat "question" # AI-powered Q&A
|
|
136
|
+
ctxpkg agent test tests.yaml # Test agent performance
|
|
137
|
+
ctxpkg daemon start # Background service
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
See [CLI Reference](docs/cli-reference.md) for complete documentation.
|
|
141
|
+
|
|
142
|
+
## Example: Context Stacking
|
|
143
|
+
|
|
144
|
+
Layer context from multiple sources:
|
|
145
|
+
|
|
146
|
+
```json
|
|
147
|
+
{
|
|
148
|
+
"collections": {
|
|
149
|
+
"project-docs": {
|
|
150
|
+
"url": "file://./docs/manifest.json"
|
|
151
|
+
},
|
|
152
|
+
"team-standards": {
|
|
153
|
+
"url": "git+https://github.com/myorg/standards#main?manifest=manifest.json"
|
|
154
|
+
},
|
|
155
|
+
"react": {
|
|
156
|
+
"url": "git+https://github.com/facebook/react#v18.2.0?manifest=docs/manifest.json"
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
**Git repositories are the easiest way to share documentation** — no publishing step required. Just point to a repo with a `manifest.json`:
|
|
163
|
+
|
|
164
|
+
```bash
|
|
165
|
+
# Add docs from any git repo (HTTPS or SSH)
|
|
166
|
+
ctxpkg col add team-docs "git+https://github.com/myorg/docs#main?manifest=manifest.json"
|
|
167
|
+
ctxpkg col add private-docs "git+ssh://git@github.com/myorg/private#main?manifest=manifest.json"
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
Add personal/global context available across all projects:
|
|
171
|
+
|
|
172
|
+
```bash
|
|
173
|
+
ctxpkg col add -g my-notes file:///Users/me/notes/manifest.json
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
## MCP Integration
|
|
177
|
+
|
|
178
|
+
ctxpkg's primary purpose is giving AI agents access to your documentation through the **Model Context Protocol (MCP)**. Once configured, your AI assistant gains access to 8 document tools:
|
|
179
|
+
|
|
180
|
+
- `search` - Semantic search across all your documentation
|
|
181
|
+
- `search_batch` - Multiple queries in one call
|
|
182
|
+
- `get_document` - Retrieve full document content
|
|
183
|
+
- `get_section` - Get specific document sections
|
|
184
|
+
- `get_outline` - Get document structure/outline
|
|
185
|
+
- `find_related` - Find related documents
|
|
186
|
+
- `list_collections` - List all indexed collections
|
|
187
|
+
- `list_documents` - List all documents in collections
|
|
188
|
+
|
|
189
|
+
### Agent Mode (Recommended for Chat)
|
|
190
|
+
|
|
191
|
+
For reduced token costs in long conversations, use **Agent Mode**:
|
|
192
|
+
|
|
193
|
+
```json
|
|
194
|
+
{
|
|
195
|
+
"mcpServers": {
|
|
196
|
+
"ctxpkg-agent": {
|
|
197
|
+
"command": "npx",
|
|
198
|
+
"args": ["-y", "ctxpkg", "mcp", "agent"]
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
This exposes a single `ask_documents` tool that uses an internal AI agent to search and synthesize answers. The calling agent sees only the final result, not intermediate search calls — reducing context overhead.
|
|
205
|
+
|
|
206
|
+
See [MCP Server Documentation](docs/mcp-server.md) for complete details.
|
|
207
|
+
|
|
208
|
+
## AI Chat & Agent Mode
|
|
209
|
+
|
|
210
|
+
Chat with your documentation directly from the terminal, or use **Agent Mode** for reduced token costs in AI assistants.
|
|
211
|
+
|
|
212
|
+
```bash
|
|
213
|
+
# Configure your LLM
|
|
214
|
+
ctxpkg config set llm.apiKey sk-...
|
|
215
|
+
|
|
216
|
+
# One-shot question
|
|
217
|
+
ctxpkg chat "How do I implement caching?" --use-case "Optimizing API performance"
|
|
218
|
+
|
|
219
|
+
# Interactive session
|
|
220
|
+
ctxpkg chat -i
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
**Agent Mode MCP** exposes a single `ask_documents` tool that uses an internal AI agent to search and synthesize answers. The calling agent sees only the final result, not intermediate search calls — reducing context overhead in long conversations.
|
|
224
|
+
|
|
225
|
+
```json
|
|
226
|
+
{
|
|
227
|
+
"mcpServers": {
|
|
228
|
+
"ctxpkg-agent": {
|
|
229
|
+
"command": "ctxpkg",
|
|
230
|
+
"args": ["mcp", "agent"]
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
```
|
|
235
|
+
|
|
236
|
+
See [AI Chat & Agent Mode](docs/ai-chat.md) for details.
|
|
237
|
+
|
|
238
|
+
## Distributing Internal Documentation
|
|
239
|
+
|
|
240
|
+
ctxpkg can package documentation from any source — Confluence, Notion, SharePoint, or internal wikis — into distributable bundles that teams can share via internal systems.
|
|
241
|
+
|
|
242
|
+
**Workflow:**
|
|
243
|
+
|
|
244
|
+
1. **Export your docs as Markdown** — Use your platform's export tools or APIs to extract documentation
|
|
245
|
+
2. **Add a manifest** — Create a `manifest.json` describing the collection:
|
|
246
|
+
|
|
247
|
+
```json
|
|
248
|
+
{
|
|
249
|
+
"name": "company-knowledge-base",
|
|
250
|
+
"sources": [{ "pattern": "**/*.md" }]
|
|
251
|
+
}
|
|
252
|
+
```
|
|
253
|
+
|
|
254
|
+
3. **Create a bundle** — Package everything into a distributable archive:
|
|
255
|
+
|
|
256
|
+
```bash
|
|
257
|
+
ctxpkg col pack --output knowledge-base-v1.tar.gz
|
|
258
|
+
```
|
|
259
|
+
|
|
260
|
+
4. **Distribute internally** — Host the bundle on internal file servers, S3, or artifact storage
|
|
261
|
+
|
|
262
|
+
Teams can then add the bundle:
|
|
263
|
+
|
|
264
|
+
```bash
|
|
265
|
+
ctxpkg col add kb https://internal.example.com/bundles/knowledge-base-v1.tar.gz
|
|
266
|
+
```
|
|
267
|
+
|
|
268
|
+
This enables organizations to centralize and distribute institutional knowledge to AI agents across all teams, without requiring git repositories or public hosting.
|
|
269
|
+
|
|
270
|
+
See [Publishing Packages](docs/github-distribution.md) for automated publishing with GitHub Actions.
|
|
271
|
+
|
|
272
|
+
## Development
|
|
273
|
+
|
|
274
|
+
```bash
|
|
275
|
+
pnpm run test:lint # Linting
|
|
276
|
+
pnpm run test:unit # Unit tests
|
|
277
|
+
pnpm run build # Build TypeScript
|
|
278
|
+
```
|
|
279
|
+
|
|
280
|
+
## License
|
|
281
|
+
|
|
282
|
+
[GNU Affero General Public License v3.0 (AGPL-3.0)](LICENSE)
|
package/bin/cli.js
ADDED
package/bin/daemon.js
ADDED
package/package.json
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
{
|
|
2
|
+
"type": "module",
|
|
3
|
+
"main": "dist/exports.js",
|
|
4
|
+
"bin": {
|
|
5
|
+
"ctxpkg": "./bin/cli.js"
|
|
6
|
+
},
|
|
7
|
+
"files": [
|
|
8
|
+
"src",
|
|
9
|
+
"bin"
|
|
10
|
+
],
|
|
11
|
+
"exports": {
|
|
12
|
+
".": "./dist/exports.js"
|
|
13
|
+
},
|
|
14
|
+
"devDependencies": {
|
|
15
|
+
"@eslint/eslintrc": "3.3.3",
|
|
16
|
+
"@eslint/js": "9.39.2",
|
|
17
|
+
"@pnpm/find-workspace-packages": "6.0.9",
|
|
18
|
+
"@types/node": "25.0.8",
|
|
19
|
+
"@types/tar": "^6.1.13",
|
|
20
|
+
"@types/ws": "^8.18.1",
|
|
21
|
+
"@vitest/coverage-v8": "4.0.17",
|
|
22
|
+
"eslint": "9.39.2",
|
|
23
|
+
"eslint-config-prettier": "10.1.8",
|
|
24
|
+
"eslint-plugin-import": "2.32.0",
|
|
25
|
+
"eslint-plugin-prettier": "5.5.4",
|
|
26
|
+
"msw": "^2.12.7",
|
|
27
|
+
"prettier": "3.7.4",
|
|
28
|
+
"tsx": "^4.21.0",
|
|
29
|
+
"typescript": "5.9.3",
|
|
30
|
+
"typescript-eslint": "8.53.0",
|
|
31
|
+
"vitest": "4.0.17"
|
|
32
|
+
},
|
|
33
|
+
"name": "ctxpkg",
|
|
34
|
+
"version": "0.0.1",
|
|
35
|
+
"license": "AGPL-3.0",
|
|
36
|
+
"imports": {
|
|
37
|
+
"#root/*": "./src/*"
|
|
38
|
+
},
|
|
39
|
+
"dependencies": {
|
|
40
|
+
"@huggingface/transformers": "^3.8.1",
|
|
41
|
+
"@inquirer/prompts": "^8.2.0",
|
|
42
|
+
"@langchain/community": "^1.1.4",
|
|
43
|
+
"@langchain/core": "^1.1.13",
|
|
44
|
+
"@langchain/langgraph": "^1.0.15",
|
|
45
|
+
"@langchain/openai": "^1.2.2",
|
|
46
|
+
"@langchain/textsplitters": "^1.0.1",
|
|
47
|
+
"@modelcontextprotocol/sdk": "^1.25.2",
|
|
48
|
+
"@types/convict": "^6.1.6",
|
|
49
|
+
"better-sqlite3": "^12.6.0",
|
|
50
|
+
"chalk": "^5.6.2",
|
|
51
|
+
"commander": "^14.0.2",
|
|
52
|
+
"convict": "^6.2.4",
|
|
53
|
+
"dotenv": "^17.2.3",
|
|
54
|
+
"env-paths": "^3.0.0",
|
|
55
|
+
"knex": "^3.1.0",
|
|
56
|
+
"langchain": "^1.2.8",
|
|
57
|
+
"simple-git": "^3.30.0",
|
|
58
|
+
"sqlite-vec": "0.1.7-alpha.2",
|
|
59
|
+
"tar": "^7.5.2",
|
|
60
|
+
"ws": "^8.19.0",
|
|
61
|
+
"yaml": "^2.8.2",
|
|
62
|
+
"zod": "3"
|
|
63
|
+
},
|
|
64
|
+
"scripts": {
|
|
65
|
+
"test:lint": "eslint",
|
|
66
|
+
"build": "tsc --build",
|
|
67
|
+
"test:unit": "vitest --run --passWithNoTests",
|
|
68
|
+
"test": "pnpm run \"/^test:/\""
|
|
69
|
+
}
|
|
70
|
+
}
|
|
@@ -0,0 +1,249 @@
|
|
|
1
|
+
# Agent — Agent Guidelines
|
|
2
|
+
|
|
3
|
+
This document describes the agent module architecture for AI agents working on this codebase.
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
The agent module provides a LangChain-based agent that uses document tools to search and synthesize information. It's designed to reduce token/context costs by consolidating multiple tool calls into a single, synthesized answer.
|
|
8
|
+
|
|
9
|
+
## File Structure
|
|
10
|
+
|
|
11
|
+
| File | Purpose |
|
|
12
|
+
|------|---------|
|
|
13
|
+
| `agent.ts` | Main agent implementation, factory, and retry logic |
|
|
14
|
+
| `agent.types.ts` | TypeScript types and Zod schemas |
|
|
15
|
+
| `agent.prompts.ts` | System prompts and templates |
|
|
16
|
+
|
|
17
|
+
## Architecture
|
|
18
|
+
|
|
19
|
+
```
|
|
20
|
+
┌─────────────────────────────────────────────────────────────────────┐
|
|
21
|
+
│ DocumentAgent │
|
|
22
|
+
│ ┌───────────────────────────────────────────────────────────────┐ │
|
|
23
|
+
│ │ LangGraph React Agent │ │
|
|
24
|
+
│ │ │ │
|
|
25
|
+
│ │ ┌─────────────┐ ┌─────────────┐ ┌─────────────────────┐ │ │
|
|
26
|
+
│ │ │ search │ │ get_section │ │ get_document, etc. │ │ │
|
|
27
|
+
│ │ └─────────────┘ └─────────────┘ └─────────────────────┘ │ │
|
|
28
|
+
│ │ │ │
|
|
29
|
+
│ │ Uses configured LLM (OpenAI-compatible API) │ │
|
|
30
|
+
│ └───────────────────────────────────────────────────────────────┘ │
|
|
31
|
+
│ │
|
|
32
|
+
│ Features: │
|
|
33
|
+
│ • Verbose mode with step callbacks │
|
|
34
|
+
│ • Conversation history for multi-turn chat │
|
|
35
|
+
│ • Collection filtering via system prompt │
|
|
36
|
+
│ • Retry logic with exponential backoff │
|
|
37
|
+
└─────────────────────────────────────────────────────────────────────┘
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
## Usage
|
|
41
|
+
|
|
42
|
+
### Creating an Agent
|
|
43
|
+
|
|
44
|
+
```typescript
|
|
45
|
+
import { createDocumentAgent, getLLMConfigFromAppConfig } from '#root/agent/agent.ts';
|
|
46
|
+
import { createClient } from '#root/client/client.ts';
|
|
47
|
+
|
|
48
|
+
const client = await createClient({ mode: 'daemon' });
|
|
49
|
+
const llmConfig = await getLLMConfigFromAppConfig();
|
|
50
|
+
|
|
51
|
+
const agent = createDocumentAgent({
|
|
52
|
+
client,
|
|
53
|
+
llmConfig,
|
|
54
|
+
aliasMap: new Map([['docs', 'pkg:file://./docs/manifest.json']]),
|
|
55
|
+
collections: ['docs'], // Optional: restrict to specific collections
|
|
56
|
+
onStep: (step) => console.log(step), // Optional: verbose callbacks
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
// One-shot query (stateless)
|
|
60
|
+
const response = await agent.ask(
|
|
61
|
+
'How do I implement streaming?',
|
|
62
|
+
'Building a chatbot that streams responses'
|
|
63
|
+
);
|
|
64
|
+
|
|
65
|
+
console.log(response.answer);
|
|
66
|
+
console.log(response.sources);
|
|
67
|
+
console.log(response.confidence);
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
### Multi-turn Conversation
|
|
71
|
+
|
|
72
|
+
```typescript
|
|
73
|
+
// First message
|
|
74
|
+
const response1 = await agent.chat(
|
|
75
|
+
'What authentication methods are available?',
|
|
76
|
+
'Building a secure API'
|
|
77
|
+
);
|
|
78
|
+
|
|
79
|
+
// Follow-up (maintains conversation context)
|
|
80
|
+
const response2 = await agent.chat(
|
|
81
|
+
'How do I implement the OAuth2 option?',
|
|
82
|
+
'Building a secure API'
|
|
83
|
+
);
|
|
84
|
+
|
|
85
|
+
// Clear history when starting new topic
|
|
86
|
+
agent.clearHistory();
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
### Verbose Mode
|
|
90
|
+
|
|
91
|
+
```typescript
|
|
92
|
+
const agent = createDocumentAgent({
|
|
93
|
+
client,
|
|
94
|
+
llmConfig,
|
|
95
|
+
onStep: (step) => {
|
|
96
|
+
switch (step.type) {
|
|
97
|
+
case 'thinking':
|
|
98
|
+
console.log(`[thinking] ${step.content}`);
|
|
99
|
+
break;
|
|
100
|
+
case 'tool_call':
|
|
101
|
+
console.log(`[tool] ${step.toolName}`);
|
|
102
|
+
console.log(` Input: ${JSON.stringify(step.toolInput)}`);
|
|
103
|
+
break;
|
|
104
|
+
case 'tool_result':
|
|
105
|
+
console.log(`[result] ${step.content}`);
|
|
106
|
+
break;
|
|
107
|
+
case 'error':
|
|
108
|
+
console.log(`[retry] ${step.content}`);
|
|
109
|
+
break;
|
|
110
|
+
}
|
|
111
|
+
},
|
|
112
|
+
});
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
### Agent Response Format
|
|
116
|
+
|
|
117
|
+
```typescript
|
|
118
|
+
type AgentResponse = {
|
|
119
|
+
answer: string; // Synthesized answer
|
|
120
|
+
sources: Array<{ // References used
|
|
121
|
+
collection: string;
|
|
122
|
+
document: string;
|
|
123
|
+
section?: string;
|
|
124
|
+
}>;
|
|
125
|
+
confidence: 'high' | 'medium' | 'low';
|
|
126
|
+
note?: string; // Optional note
|
|
127
|
+
};
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
## LLM Configuration
|
|
131
|
+
|
|
132
|
+
The agent uses configuration from `config.ts`:
|
|
133
|
+
|
|
134
|
+
```typescript
|
|
135
|
+
llm: {
|
|
136
|
+
provider: string; // OpenAI-compatible API base URL
|
|
137
|
+
model: string; // Model identifier
|
|
138
|
+
apiKey: string; // API key
|
|
139
|
+
temperature: number; // 0-2
|
|
140
|
+
maxTokens: number; // Max response tokens
|
|
141
|
+
}
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
Configure via CLI:
|
|
145
|
+
|
|
146
|
+
```bash
|
|
147
|
+
ctxpkg config set llm.apiKey sk-...
|
|
148
|
+
ctxpkg config set llm.model gpt-4o
|
|
149
|
+
ctxpkg config set llm.provider https://api.openai.com/v1
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
Or via environment variables:
|
|
153
|
+
|
|
154
|
+
```bash
|
|
155
|
+
export CTXPKG_LLM_API_KEY=sk-...
|
|
156
|
+
export CTXPKG_LLM_MODEL=gpt-4o
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
## Agent Design
|
|
160
|
+
|
|
161
|
+
### Tool Selection
|
|
162
|
+
|
|
163
|
+
The agent uses LangGraph's React agent pattern with these tools:
|
|
164
|
+
|
|
165
|
+
- `documents_search` — Semantic search across collections
|
|
166
|
+
- `documents_list_documents` — Browse collection contents
|
|
167
|
+
- `documents_get_outline` — Get document structure
|
|
168
|
+
- `documents_get_section` — Get specific sections
|
|
169
|
+
- `documents_get_document` — Get full documents
|
|
170
|
+
- `documents_list_collections` — List available collections
|
|
171
|
+
- `documents_search_batch` — Batch searches
|
|
172
|
+
- `documents_find_related` — Find related content
|
|
173
|
+
|
|
174
|
+
### Termination
|
|
175
|
+
|
|
176
|
+
The agent stops when:
|
|
177
|
+
|
|
178
|
+
1. It has synthesized a complete answer (JSON response)
|
|
179
|
+
2. Maximum iterations reached (default: 15)
|
|
180
|
+
3. No more relevant information to find
|
|
181
|
+
|
|
182
|
+
### Response Parsing
|
|
183
|
+
|
|
184
|
+
The agent is prompted to respond in JSON format. The parser:
|
|
185
|
+
|
|
186
|
+
1. Looks for ```json code blocks
|
|
187
|
+
2. Tries to parse the whole content as JSON
|
|
188
|
+
3. Falls back to treating content as plain answer
|
|
189
|
+
|
|
190
|
+
### Retry Logic
|
|
191
|
+
|
|
192
|
+
The agent automatically retries on transient errors:
|
|
193
|
+
|
|
194
|
+
- **Rate limits**: 429 errors
|
|
195
|
+
- **Server errors**: 500, 502, 503, 504
|
|
196
|
+
- **Network errors**: ECONNRESET, ETIMEDOUT
|
|
197
|
+
|
|
198
|
+
Retry configuration:
|
|
199
|
+
- Max retries: 3
|
|
200
|
+
- Initial delay: 1000ms
|
|
201
|
+
- Max delay: 30000ms
|
|
202
|
+
- Backoff multiplier: 2x
|
|
203
|
+
|
|
204
|
+
```typescript
|
|
205
|
+
import { withRetry, isRetryableError } from '#root/agent/agent.ts';
|
|
206
|
+
|
|
207
|
+
// Use retry logic for custom async operations
|
|
208
|
+
const result = await withRetry(
|
|
209
|
+
() => someAsyncOperation(),
|
|
210
|
+
{ maxRetries: 3, initialDelayMs: 1000, maxDelayMs: 30000, backoffMultiplier: 2 },
|
|
211
|
+
(attempt, error, delayMs) => console.log(`Retry ${attempt}: ${error.message}`)
|
|
212
|
+
);
|
|
213
|
+
```
|
|
214
|
+
|
|
215
|
+
## Key Patterns
|
|
216
|
+
|
|
217
|
+
### Lazy Config Loading
|
|
218
|
+
|
|
219
|
+
Config is loaded dynamically to avoid circular imports:
|
|
220
|
+
|
|
221
|
+
```typescript
|
|
222
|
+
const getLLMConfigFromAppConfig = async (): Promise<LLMConfig> => {
|
|
223
|
+
const { config } = await import('#root/config/config.ts');
|
|
224
|
+
// ...
|
|
225
|
+
};
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
### Tool Conversion
|
|
229
|
+
|
|
230
|
+
Document tools are defined once and converted for LangChain:
|
|
231
|
+
|
|
232
|
+
```typescript
|
|
233
|
+
const toolDefinitions = createDocumentToolDefinitions({ client, aliasMap });
|
|
234
|
+
const langchainTools = toLangchainTools(toolDefinitions);
|
|
235
|
+
```
|
|
236
|
+
|
|
237
|
+
### Collection Filtering
|
|
238
|
+
|
|
239
|
+
Collections can be restricted via the `collections` option:
|
|
240
|
+
|
|
241
|
+
```typescript
|
|
242
|
+
const agent = createDocumentAgent({
|
|
243
|
+
client,
|
|
244
|
+
llmConfig,
|
|
245
|
+
collections: ['my-docs', 'api-docs'], // Only search these
|
|
246
|
+
});
|
|
247
|
+
```
|
|
248
|
+
|
|
249
|
+
This adds instructions to the system prompt telling the agent to pass these collections in all search calls.
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* System prompt for the document search agent
|
|
3
|
+
*/
|
|
4
|
+
export const AGENT_SYSTEM_PROMPT = `You are a documentation search agent. Your task is to find and synthesize information from technical documentation to answer user questions.
|
|
5
|
+
|
|
6
|
+
## Guidelines
|
|
7
|
+
|
|
8
|
+
1. **Start broad, then narrow**: Begin with a semantic search, then drill into specific sections or documents as needed.
|
|
9
|
+
|
|
10
|
+
2. **Use the right tool for the job**:
|
|
11
|
+
- \`documents_search\` — Find relevant content across collections
|
|
12
|
+
- \`documents_list_documents\` — Browse what's available in a collection
|
|
13
|
+
- \`documents_get_outline\` — Understand document structure before diving in
|
|
14
|
+
- \`documents_get_section\` — Get specific section content efficiently
|
|
15
|
+
- \`documents_get_document\` — Only when you need the full document
|
|
16
|
+
|
|
17
|
+
3. **Stop when sufficient**: The user has provided a use case. Once you have enough information to address their specific use case, synthesize and respond. Don't over-research.
|
|
18
|
+
|
|
19
|
+
4. **Cite sources**: Track which documents/sections you used.
|
|
20
|
+
|
|
21
|
+
5. **Acknowledge uncertainty**: If you can't find sufficient information, say so.
|
|
22
|
+
|
|
23
|
+
## Response Format
|
|
24
|
+
|
|
25
|
+
When you have found sufficient information, respond with a JSON object in this exact format:
|
|
26
|
+
|
|
27
|
+
\`\`\`json
|
|
28
|
+
{
|
|
29
|
+
"answer": "Your synthesized answer here. Be clear, actionable, and include code examples when relevant.",
|
|
30
|
+
"sources": [
|
|
31
|
+
{"collection": "collection-name", "document": "document-id", "section": "Section Heading (if applicable)"}
|
|
32
|
+
],
|
|
33
|
+
"confidence": "high|medium|low",
|
|
34
|
+
"note": "Optional note about limitations or suggestions for further reading"
|
|
35
|
+
}
|
|
36
|
+
\`\`\`
|
|
37
|
+
|
|
38
|
+
Use "high" confidence when multiple sources agree or you found a direct answer.
|
|
39
|
+
Use "medium" when the information is relevant but not comprehensive.
|
|
40
|
+
Use "low" when you're extrapolating or the information is tangentially related.`;
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Format the collection restriction instruction
|
|
44
|
+
*/
|
|
45
|
+
export const formatCollectionRestriction = (collections: string[]): string => {
|
|
46
|
+
if (collections.length === 0) return '';
|
|
47
|
+
const collectionList = collections.map((c) => `"${c}"`).join(', ');
|
|
48
|
+
return `\n\n## Collection Restriction\nIMPORTANT: Only search within these collections: ${collectionList}. Always pass this list in the "collections" parameter of your search calls.`;
|
|
49
|
+
};
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* User prompt template
|
|
53
|
+
*/
|
|
54
|
+
export const formatUserPrompt = (query: string, useCase: string, collections?: string[]) => {
|
|
55
|
+
const collectionNote = collections?.length
|
|
56
|
+
? `\n\nNote: Restrict your searches to these collections: ${collections.join(', ')}`
|
|
57
|
+
: '';
|
|
58
|
+
|
|
59
|
+
return `## Question
|
|
60
|
+
${query}
|
|
61
|
+
|
|
62
|
+
## Use Case
|
|
63
|
+
${useCase}${collectionNote}
|
|
64
|
+
|
|
65
|
+
Find the information needed to answer this question for the given use case. Search the documentation, then provide your synthesized answer in JSON format.`;
|
|
66
|
+
};
|