@pentatonic-ai/ai-agent-sdk 0.5.10 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +233 -163
- package/bin/__tests__/callback-server.test.js +67 -0
- package/bin/__tests__/credentials.test.js +58 -0
- package/bin/__tests__/login.test.js +210 -0
- package/bin/__tests__/pkce.test.js +39 -0
- package/bin/__tests__/whoami.test.js +77 -0
- package/bin/cli.js +101 -309
- package/bin/commands/login.js +219 -0
- package/bin/commands/whoami.js +41 -0
- package/bin/lib/callback-server.js +137 -0
- package/bin/lib/credentials.js +100 -0
- package/bin/lib/pkce.js +26 -0
- package/package.json +3 -2
- package/packages/memory/src/__tests__/api-contract.test.js +122 -13
- package/packages/memory/src/__tests__/corpus-chunkers.test.js +143 -0
- package/packages/memory/src/__tests__/corpus-discover.test.js +175 -0
- package/packages/memory/src/__tests__/corpus-ingest.test.js +236 -0
- package/packages/memory/src/__tests__/corpus-signatures.test.js +175 -0
- package/packages/memory/src/__tests__/corpus-state.test.js +161 -0
- package/packages/memory/src/__tests__/ingest-corpus-opts.test.js +129 -0
- package/packages/memory/src/__tests__/search-kind.test.js +108 -0
- package/packages/memory/src/corpus/adapters.js +294 -0
- package/packages/memory/src/corpus/chunkers.js +328 -0
- package/packages/memory/src/corpus/cli.js +548 -0
- package/packages/memory/src/corpus/discover.js +379 -0
- package/packages/memory/src/corpus/index.js +68 -0
- package/packages/memory/src/corpus/ingest.js +356 -0
- package/packages/memory/src/corpus/signatures.js +280 -0
- package/packages/memory/src/corpus/state.js +134 -0
- package/packages/memory/src/index.js +18 -0
- package/packages/memory/src/ingest.js +83 -31
- package/packages/memory/src/openclaw/index.js +39 -1
- package/packages/memory/src/search.js +30 -7
package/README.md
CHANGED
|
@@ -6,11 +6,11 @@
|
|
|
6
6
|
</picture>
|
|
7
7
|
</p>
|
|
8
8
|
|
|
9
|
-
<h3 align="center">AI Agent SDK</h3>
|
|
9
|
+
<h3 align="center">Pentatonic AI Agent SDK</h3>
|
|
10
10
|
|
|
11
11
|
<p align="center">
|
|
12
|
-
|
|
13
|
-
|
|
12
|
+
Memory and observability for AI agents.<br>
|
|
13
|
+
Two products on one platform (TES). One install. JavaScript & Python.
|
|
14
14
|
</p>
|
|
15
15
|
|
|
16
16
|
<p align="center">
|
|
@@ -21,69 +21,106 @@
|
|
|
21
21
|
|
|
22
22
|
---
|
|
23
23
|
|
|
24
|
+
## What's in this SDK
|
|
25
|
+
|
|
26
|
+
Two products that share one TES account, one install line, and one dashboard:
|
|
27
|
+
|
|
28
|
+
| Product | What it does | When you want it |
|
|
29
|
+
|---|---|---|
|
|
30
|
+
| **Memory** | Persistent, searchable memory for your AI agent — semantic + keyword retrieval, distillation, decay, repo onboarding. Runs locally (Docker) or hosted (TES). | You want your agent to remember conversations, preferences, and codebase context across sessions. |
|
|
31
|
+
| **Observability** | Wrap your LLM client and capture every call — tokens, tool calls, latency, content. Events flow to TES for the dashboard, analytics, and search attribution. | You want to know what your agent is actually doing in production. |
|
|
32
|
+
|
|
33
|
+
Both products are sold separately, but you can use either, both, or neither. Plugins for **Claude Code** and **OpenClaw** install everything at once if you'd rather skip the SDK glue.
|
|
34
|
+
|
|
35
|
+
## Pick your path
|
|
36
|
+
|
|
37
|
+
- 🧠 **I want memory in my agent** → [Memory](#memory)
|
|
38
|
+
- 📊 **I want to instrument my LLM calls** → [Observability](#observability)
|
|
39
|
+
- 🔌 **I'm using Claude Code or OpenClaw** → [Plugins](#plugins)
|
|
40
|
+
- 📂 **I want to seed memory from my codebase or docs** → [Repository onboarding](#repository-onboarding-corpus-ingest)
|
|
41
|
+
- 🩺 **I want to check my install** → [Health checks (`doctor`)](#health-checks-doctor)
|
|
42
|
+
|
|
24
43
|
## Table of Contents
|
|
25
44
|
|
|
26
|
-
- [
|
|
27
|
-
- [
|
|
28
|
-
- [Hosted
|
|
29
|
-
- [
|
|
30
|
-
- [
|
|
31
|
-
- [
|
|
32
|
-
- [
|
|
45
|
+
- [TES — the platform](#tes--the-platform)
|
|
46
|
+
- [Memory](#memory)
|
|
47
|
+
- [Hosted (cloud)](#hosted-cloud)
|
|
48
|
+
- [Local (self-hosted)](#local-self-hosted)
|
|
49
|
+
- [Use as a library](#use-as-a-library)
|
|
50
|
+
- [Distilled memory](#distilled-memory)
|
|
51
|
+
- [Observability](#observability)
|
|
52
|
+
- [Wrap your LLM client](#wrap-your-llm-client)
|
|
53
|
+
- [Supported providers](#supported-providers)
|
|
54
|
+
- [Plugins](#plugins)
|
|
55
|
+
- [Claude Code](#claude-code)
|
|
56
|
+
- [OpenClaw](#openclaw)
|
|
57
|
+
- [Repository Onboarding (corpus ingest)](#repository-onboarding-corpus-ingest)
|
|
33
58
|
- [API Reference](#api-reference)
|
|
34
59
|
- [Health Checks (`doctor`)](#health-checks-doctor)
|
|
35
60
|
- [Architecture](#architecture)
|
|
36
61
|
|
|
37
|
-
|
|
62
|
+
---
|
|
38
63
|
|
|
39
|
-
|
|
64
|
+
## TES — the platform
|
|
40
65
|
|
|
41
|
-
**
|
|
66
|
+
**TES** (Thing Event System) is Pentatonic's account-and-events backbone. Both products in this SDK run on it: memory writes/queries land in TES, observability events stream to it, and the dashboard reads from it.
|
|
42
67
|
|
|
43
|
-
|
|
68
|
+
You only need a TES account if you're using **hosted memory** or **observability** (observability always sends events to TES). **Local memory** runs entirely on your machine and needs no TES account.
|
|
44
69
|
|
|
45
|
-
|
|
70
|
+
```bash
|
|
71
|
+
# One-time: open browser, sign in or sign up, get API keys
|
|
72
|
+
npx @pentatonic-ai/ai-agent-sdk login
|
|
73
|
+
```
|
|
46
74
|
|
|
47
|
-
|
|
75
|
+
`login` opens your browser at the hosted sign-in page. New users click "Sign up" to create a tenant (clientId + region + email + password). After verification the CLI writes credentials to `~/.config/tes/credentials.json` (mode 0600). The Claude Code plugin, OpenClaw plugin, hooks, and corpus CLI all auto-discover this file — no manual paste step.
|
|
48
76
|
|
|
49
|
-
|
|
77
|
+
```
|
|
78
|
+
✓ Connected as you@example.com on tenant `your-clientid`
|
|
79
|
+
✓ Credentials written to ~/.config/tes/credentials.json
|
|
80
|
+
```
|
|
50
81
|
|
|
51
|
-
|
|
82
|
+
To check connection state later: `npx @pentatonic-ai/ai-agent-sdk whoami`. To point at a local TES dev instance: `npx @pentatonic-ai/ai-agent-sdk login --endpoint http://localhost:8788`.
|
|
52
83
|
|
|
53
|
-
|
|
54
|
-
npx @pentatonic-ai/ai-agent-sdk memory
|
|
55
|
-
```
|
|
84
|
+
(`init` still works as a one-major-release deprecation alias for `login`.)
|
|
56
85
|
|
|
57
|
-
|
|
86
|
+
---
|
|
58
87
|
|
|
59
|
-
|
|
88
|
+
## Memory
|
|
60
89
|
|
|
61
|
-
|
|
62
|
-
/plugin marketplace add Pentatonic-Ltd/ai-agent-sdk
|
|
63
|
-
/plugin install tes-memory@pentatonic-ai
|
|
64
|
-
```
|
|
90
|
+
Persistent, searchable memory for AI agents. Multi-signal retrieval (vector + BM25 + recency + frequency), HyDE query expansion, atomic-fact distillation, and four memory layers (episodic, semantic, procedural, working).
|
|
65
91
|
|
|
66
|
-
|
|
92
|
+
Two deployment modes — same API, same plugins, same library:
|
|
67
93
|
|
|
68
|
-
###
|
|
94
|
+
### Hosted (cloud)
|
|
69
95
|
|
|
70
|
-
|
|
71
|
-
- **Semantic search** -- multi-signal retrieval combining vector similarity, BM25 full-text, recency decay, and access frequency
|
|
72
|
-
- **Memory layers** -- episodic (recent), semantic (consolidated), procedural (how-to), working (temporary)
|
|
73
|
-
- **Distilled memory** -- a background LLM pass extracts atomic facts from each raw turn and stores each as its own node in the semantic layer, linked back to the source. A query like *"what does Phil drink?"* matches *"Phil drinks cortado"* more reliably than a mixed paragraph covering food, drinks, and hobbies. Default-on; the raw turn is still preserved.
|
|
74
|
-
- **Decay and consolidation** -- memories fade over time; frequently accessed ones get promoted
|
|
96
|
+
Run on Pentatonic's infrastructure. Higher-dimensional embeddings (NV-Embed-v2, 4096d), per-tenant Postgres, team-wide shared memory, the dashboard.
|
|
75
97
|
|
|
76
|
-
|
|
98
|
+
```bash
|
|
99
|
+
# 1. Get a TES account (see [TES — the platform](#tes--the-platform))
|
|
100
|
+
npx @pentatonic-ai/ai-agent-sdk login
|
|
101
|
+
|
|
102
|
+
# 2. Install the SDK
|
|
103
|
+
npm install @pentatonic-ai/ai-agent-sdk
|
|
104
|
+
# or: pip install pentatonic-ai-agent-sdk
|
|
105
|
+
```
|
|
77
106
|
|
|
78
|
-
|
|
107
|
+
That's it — memory operations now go through TES.
|
|
108
|
+
|
|
109
|
+
### Local (self-hosted)
|
|
110
|
+
|
|
111
|
+
Run the full stack on your own machine. PostgreSQL + pgvector + Ollama in Docker. No API keys, no cloud. Pi 5 with 8GB RAM works fine (`nomic-embed-text` ~300MB + `llama3.2:3b` ~2GB).
|
|
79
112
|
|
|
80
113
|
```bash
|
|
81
|
-
|
|
114
|
+
npx @pentatonic-ai/ai-agent-sdk memory
|
|
82
115
|
```
|
|
83
116
|
|
|
84
|
-
|
|
117
|
+
This starts Postgres + pgvector, Ollama, and the memory server. It pulls embedding and chat models, and writes the local config.
|
|
85
118
|
|
|
86
|
-
|
|
119
|
+
Change models:
|
|
120
|
+
|
|
121
|
+
```bash
|
|
122
|
+
EMBEDDING_MODEL=mxbai-embed-large LLM_MODEL=qwen2.5:7b npx @pentatonic-ai/ai-agent-sdk memory
|
|
123
|
+
```
|
|
87
124
|
|
|
88
125
|
### Use as a library
|
|
89
126
|
|
|
@@ -102,85 +139,103 @@ await memory.ingest('User prefers dark mode', { clientId: 'my-app' });
|
|
|
102
139
|
const results = await memory.search('preferences', { clientId: 'my-app' });
|
|
103
140
|
```
|
|
104
141
|
|
|
105
|
-
|
|
142
|
+
### Distilled memory
|
|
106
143
|
|
|
107
|
-
|
|
144
|
+
A background LLM pass extracts atomic facts from each raw turn and stores each as its own node in the semantic layer, linked back to the source. A query like *"what does Phil drink?"* matches *"Phil drinks cortado"* more reliably than a mixed paragraph covering food, drinks, and hobbies. Default-on; the raw turn is still preserved.
|
|
108
145
|
|
|
109
|
-
|
|
146
|
+
> **Store latency note (v0.5.4+):** on the local memory server, `store_memory` now awaits distillation before returning instead of running it fire-and-forget. This fixed a bug where distillation was being killed mid-flight (atoms never got embeddings, so they were unreachable by semantic search), but it means stores now take as long as your configured LLM takes to produce atoms — typically 5–30s on `llama3.2:3b`, up to the `chat()` timeout ceiling (60s default, overridable via `opts.timeout`). Cloudflare Worker deployments pass `ctx.waitUntil` and still return fast. Set `opts.distill: false` on the ingest call if you want the old fast-return behaviour at the cost of no atoms.
|
|
110
147
|
|
|
111
|
-
|
|
112
|
-
npx @pentatonic-ai/ai-agent-sdk init
|
|
113
|
-
```
|
|
148
|
+
---
|
|
114
149
|
|
|
115
|
-
|
|
150
|
+
## Observability
|
|
116
151
|
|
|
117
|
-
|
|
118
|
-
TES_ENDPOINT=https://your-company.api.pentatonic.com
|
|
119
|
-
TES_CLIENT_ID=your-company
|
|
120
|
-
TES_API_KEY=tes_your-company_xxxxx
|
|
121
|
-
```
|
|
152
|
+
Wrap your LLM client and every call automatically emits a `CHAT_TURN` event to TES — input/output tokens, tool calls, model, latency, content. Events flow into the TES dashboard, where you get session metrics, search attribution, dead-end detection, and full-text + semantic search across conversations.
|
|
122
153
|
|
|
123
|
-
|
|
154
|
+
Observability requires a TES account (hosted or self-hosted Pentatonic platform). Events have nowhere to go without one.
|
|
124
155
|
|
|
125
|
-
|
|
126
|
-
|
|
156
|
+
### Wrap your LLM client
|
|
157
|
+
|
|
158
|
+
**JavaScript**
|
|
159
|
+
|
|
160
|
+
```js
|
|
161
|
+
import { TESClient } from "@pentatonic-ai/ai-agent-sdk";
|
|
162
|
+
|
|
163
|
+
const tes = new TESClient({
|
|
164
|
+
clientId: process.env.TES_CLIENT_ID,
|
|
165
|
+
apiKey: process.env.TES_API_KEY,
|
|
166
|
+
endpoint: process.env.TES_ENDPOINT,
|
|
167
|
+
});
|
|
168
|
+
|
|
169
|
+
const ai = tes.wrap(new OpenAI(), { sessionId: "conv-123" });
|
|
170
|
+
const result = await ai.chat.completions.create({
|
|
171
|
+
model: "gpt-4o",
|
|
172
|
+
messages: [{ role: "user", content: "Hello!" }],
|
|
173
|
+
});
|
|
127
174
|
```
|
|
128
175
|
|
|
129
|
-
|
|
130
|
-
|
|
176
|
+
**Python**
|
|
177
|
+
|
|
178
|
+
```python
|
|
179
|
+
from pentatonic_agent_events import TESClient
|
|
180
|
+
|
|
181
|
+
tes = TESClient(
|
|
182
|
+
client_id=os.environ["TES_CLIENT_ID"],
|
|
183
|
+
api_key=os.environ["TES_API_KEY"],
|
|
184
|
+
endpoint=os.environ["TES_ENDPOINT"],
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
ai = tes.wrap(OpenAI(), session_id="conv-123")
|
|
188
|
+
result = ai.chat.completions.create(
|
|
189
|
+
model="gpt-4o",
|
|
190
|
+
messages=[{"role": "user", "content": "Hello!"}],
|
|
191
|
+
)
|
|
131
192
|
```
|
|
132
193
|
|
|
133
|
-
###
|
|
194
|
+
### Supported providers
|
|
195
|
+
|
|
196
|
+
| Provider | Detection | Intercepted Method |
|
|
197
|
+
|----------|-----------|-------------------|
|
|
198
|
+
| OpenAI | `client.chat.completions.create` | `chat.completions.create()` |
|
|
199
|
+
| Anthropic | `client.messages.create` | `messages.create()` |
|
|
200
|
+
| Workers AI | `client.run` (JS only) | `run()` |
|
|
201
|
+
|
|
202
|
+
All other methods pass through unchanged.
|
|
203
|
+
|
|
204
|
+
---
|
|
134
205
|
|
|
135
|
-
|
|
136
|
-
- **Conversation analytics** -- session metrics, search attribution, dead-end detection
|
|
137
|
-
- **Team-wide shared memory** -- semantic search across your team's AI interactions
|
|
138
|
-
- **Admin dashboard** -- visualize conversations, token usage, and memory explorer
|
|
139
|
-
- **Multi-tenancy** -- isolated databases per client
|
|
206
|
+
## Plugins
|
|
140
207
|
|
|
141
|
-
|
|
208
|
+
If you use Claude Code or OpenClaw, the plugin gives you both products at once — every conversation turn is captured (observability) AND searched/stored as memory. No SDK glue to write.
|
|
142
209
|
|
|
143
|
-
|
|
210
|
+
### Claude Code
|
|
144
211
|
|
|
145
|
-
|
|
212
|
+
Works with both local and hosted memory. Install once, switch modes via config.
|
|
146
213
|
|
|
147
214
|
```
|
|
148
215
|
/plugin marketplace add Pentatonic-Ltd/ai-agent-sdk
|
|
149
216
|
/plugin install tes-memory@pentatonic-ai
|
|
150
217
|
```
|
|
151
218
|
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
For hosted TES:
|
|
155
|
-
```
|
|
156
|
-
/tes-memory:tes-setup
|
|
157
|
-
```
|
|
219
|
+
For hosted TES, run `npx @pentatonic-ai/ai-agent-sdk login` once in your terminal — the plugin's MCP server, hooks, and tools all auto-discover the credentials written to `~/.config/tes/credentials.json`. To verify the connection later, ask Claude `/tes-memory:tes-status`.
|
|
158
220
|
|
|
159
221
|
For local memory:
|
|
160
222
|
```bash
|
|
161
223
|
npx @pentatonic-ai/ai-agent-sdk memory
|
|
162
224
|
```
|
|
163
225
|
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
-
|
|
167
|
-
-
|
|
168
|
-
-
|
|
169
|
-
- **Token usage** -- input, output, cache read, cache creation tokens per turn
|
|
170
|
-
|
|
171
|
-
## OpenClaw Plugin
|
|
226
|
+
**What it tracks:**
|
|
227
|
+
- Every conversation turn — user messages, assistant responses, tool calls, duration
|
|
228
|
+
- Automatic memory search — relevant memories injected as context on every prompt
|
|
229
|
+
- Automatic memory storage — every turn stored with embeddings and HyDE queries
|
|
230
|
+
- Token usage — input, output, cache read, cache creation tokens per turn
|
|
172
231
|
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
### Install
|
|
232
|
+
### OpenClaw
|
|
176
233
|
|
|
177
234
|
```bash
|
|
178
235
|
openclaw plugins install @pentatonic-ai/openclaw-memory-plugin
|
|
179
236
|
```
|
|
180
237
|
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
Tell OpenClaw:
|
|
238
|
+
Then tell OpenClaw:
|
|
184
239
|
|
|
185
240
|
```
|
|
186
241
|
Set up pentatonic memory
|
|
@@ -194,18 +249,7 @@ Or use the CLI directly:
|
|
|
194
249
|
openclaw pentatonic-memory local
|
|
195
250
|
```
|
|
196
251
|
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
OpenClaw's context engine hooks fire on every lifecycle event:
|
|
200
|
-
|
|
201
|
-
- **Ingest** -- every user and assistant message is stored with embeddings and HyDE query expansion, then distilled into atomic facts in the background (see [Distilled memory](#what-you-get))
|
|
202
|
-
- **Assemble** -- relevant memories are injected as system prompt context before every model run
|
|
203
|
-
- **Compact** -- decay cycle runs when the context window fills
|
|
204
|
-
- **After turn** -- high-access memories get consolidated to the semantic layer
|
|
205
|
-
|
|
206
|
-
Plus agent-callable tools: `memory_search`, `memory_store`, `memory_layers`.
|
|
207
|
-
|
|
208
|
-
### Configuration
|
|
252
|
+
**What it does:** OpenClaw's context engine hooks fire on every lifecycle event — `ingest` stores user/assistant messages with embeddings + HyDE + distillation; `assemble` injects relevant memories as system-prompt context before every model run; `compact` runs the decay cycle when the context window fills; `after-turn` consolidates high-access memories into the semantic layer. Plus agent-callable tools: `memory_search`, `memory_store`, `memory_layers`.
|
|
209
253
|
|
|
210
254
|
After setup, config lives in `~/.openclaw/pentatonic-memory.json`. To switch modes, run setup again or edit directly.
|
|
211
255
|
|
|
@@ -241,57 +285,80 @@ For hosted mode, replace the config block with:
|
|
|
241
285
|
}
|
|
242
286
|
```
|
|
243
287
|
|
|
244
|
-
|
|
288
|
+
---
|
|
245
289
|
|
|
246
|
-
|
|
290
|
+
## Repository Onboarding (corpus ingest)
|
|
247
291
|
|
|
248
|
-
|
|
249
|
-
import { TESClient } from "@pentatonic-ai/ai-agent-sdk";
|
|
292
|
+
The memory layer starts empty. To avoid the cold-start problem where retrieval has nothing useful to return for the first days of use, you can ingest your repos (or any folder of docs) on day one:
|
|
250
293
|
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
});
|
|
294
|
+
```bash
|
|
295
|
+
# Interactive — picks paths, shows a cost preview, ingests, offers
|
|
296
|
+
# to install a git post-commit hook so memory stays current
|
|
297
|
+
npx @pentatonic-ai/ai-agent-sdk onboard
|
|
256
298
|
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
299
|
+
# One-shot ingest of a single path
|
|
300
|
+
npx @pentatonic-ai/ai-agent-sdk ingest ~/code/my-app
|
|
301
|
+
npx @pentatonic-ai/ai-agent-sdk ingest ~/Documents/design-notes # any folder works
|
|
302
|
+
|
|
303
|
+
# See what's tracked and how big the corpus is
|
|
304
|
+
npx @pentatonic-ai/ai-agent-sdk status
|
|
305
|
+
|
|
306
|
+
# Delta-resync everything that's tracked (or one path)
|
|
307
|
+
npx @pentatonic-ai/ai-agent-sdk resync
|
|
308
|
+
|
|
309
|
+
# Manage the tracked-paths list
|
|
310
|
+
npx @pentatonic-ai/ai-agent-sdk corpus list
|
|
311
|
+
npx @pentatonic-ai/ai-agent-sdk corpus remove ~/code/old-project
|
|
312
|
+
npx @pentatonic-ai/ai-agent-sdk corpus reset
|
|
262
313
|
```
|
|
263
314
|
|
|
264
|
-
|
|
315
|
+
Tenant credentials come from env vars (`TES_ENDPOINT`, `TES_CLIENT_ID`, `TES_API_KEY`) or `~/.config/tes/credentials.json` if you used `npx @pentatonic-ai/ai-agent-sdk login`. To point at a TES instance running on `localhost`, set `TES_ENDPOINT=http://localhost:8788`.
|
|
265
316
|
|
|
266
|
-
|
|
267
|
-
from pentatonic_agent_events import TESClient
|
|
317
|
+
### What gets stored: references, not content
|
|
268
318
|
|
|
269
|
-
|
|
270
|
-
client_id=os.environ["TES_CLIENT_ID"],
|
|
271
|
-
api_key=os.environ["TES_API_KEY"],
|
|
272
|
-
endpoint=os.environ["TES_ENDPOINT"],
|
|
273
|
-
)
|
|
319
|
+
By default, ingest stores **pointers to source content** (path + line range + a short signature/summary), not full chunk content. Per-language strategies:
|
|
274
320
|
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
```
|
|
321
|
+
- **Markdown** — one reference per H1/H2 section
|
|
322
|
+
- **JS / TS** — one per top-level `function` / `class` / `const` / `export`
|
|
323
|
+
- **Python** — one per top-level `def` / `class`
|
|
324
|
+
- **JSON / YAML** — collapsed top-level keys
|
|
325
|
+
- **Other** — single file-level reference
|
|
281
326
|
|
|
282
|
-
|
|
327
|
+
Why pointers? **Code mutates between ingests.** Embedded chunks of old source rot silently — the LLM keeps confidently citing functions you've since rewritten, with retrieval evidence to back it up. Pointers rot loudly: when a file moves or changes, `Read` fails or returns different content, and the agent observes and adjusts. Stale-but-confident is the worst-class memory bug; loud-and-self-correcting is qualitatively better for source code.
|
|
283
328
|
|
|
284
|
-
|
|
285
|
-
|----------|-----------|-------------------|
|
|
286
|
-
| OpenAI | `client.chat.completions.create` | `chat.completions.create()` |
|
|
287
|
-
| Anthropic | `client.messages.create` | `messages.create()` |
|
|
288
|
-
| Workers AI | `client.run` (JS only) | `run()` |
|
|
329
|
+
It also means proprietary source never leaves your machine — only the index (path + summary) is sent to the hosted TES, and the agent reads actual file contents at query time on its own.
|
|
289
330
|
|
|
290
|
-
|
|
331
|
+
If you need a self-contained index (e.g. for air-gapped retrieval where the source isn't available at query time), opt into legacy chunk-content storage by passing `mode: "content"` to `ingestCorpus` when using the SDK as a library.
|
|
332
|
+
|
|
333
|
+
### What gets ingested, what doesn't
|
|
334
|
+
|
|
335
|
+
Any folder works — git is not required. The walker honors `.gitignore` and `.tesignore` if present, plus a hard-exclude list for secrets and credentials that **cannot be overridden** even with `!pattern` rules:
|
|
336
|
+
|
|
337
|
+
- `.env*` (any environment file)
|
|
338
|
+
- `*.pem`, `*.key`, `*.crt`, `*.p12`, `*.pfx`, `*.jks`
|
|
339
|
+
- `id_rsa`, `id_ed25519`, `id_ecdsa`, `id_dsa` (SSH private keys)
|
|
340
|
+
- `.ssh/`, `.aws/`, `.gcp/`, `.azure/` (whole directories)
|
|
341
|
+
- `.npmrc`, `.pypirc`, `.netrc`
|
|
342
|
+
- `secrets/`, `credentials/`, `service-account.*`
|
|
343
|
+
- `*_secret*`, `*_token*`, `*_password*`
|
|
344
|
+
|
|
345
|
+
Plus directory-level skips: `.git`, `node_modules`, `dist`, `build`, `.next`, `venv`, `__pycache__`, `target`, `.terraform`, etc. And extension skips for binaries, lockfiles, and minified output. Files larger than 512 KB are skipped by default (override with adapter options if you need to).
|
|
346
|
+
|
|
347
|
+
### How it stays current
|
|
348
|
+
|
|
349
|
+
For git repos, accepting the prompt during `onboard` installs a post-commit hook at `.git/hooks/post-commit` that re-ingests files changed in each commit. The hook is non-fatal — it never blocks a commit. Install manually any time with:
|
|
350
|
+
|
|
351
|
+
```bash
|
|
352
|
+
npx @pentatonic-ai/ai-agent-sdk install-git-hook
|
|
353
|
+
```
|
|
354
|
+
|
|
355
|
+
For non-git folders, re-run `ingest` or `resync` whenever the source changes. Re-ingest is cheap: the SDK keeps a content-hash per file and skips anything that hasn't changed since the last run.
|
|
356
|
+
|
|
357
|
+
---
|
|
291
358
|
|
|
292
359
|
## API Reference
|
|
293
360
|
|
|
294
|
-
### `TESClient(config)`
|
|
361
|
+
### `TESClient(config)` — Observability
|
|
295
362
|
|
|
296
363
|
| Param | Type | Default | Description |
|
|
297
364
|
|-------|------|---------|-------------|
|
|
@@ -329,6 +396,12 @@ import { normalizeResponse } from "@pentatonic-ai/ai-agent-sdk";
|
|
|
329
396
|
const { content, model, usage, toolCalls } = normalizeResponse(openaiResponse);
|
|
330
397
|
```
|
|
331
398
|
|
|
399
|
+
### `createMemorySystem(deps)` — Memory
|
|
400
|
+
|
|
401
|
+
Returns a memory instance with `.migrate()`, `.ensureLayers(clientId)`, `.ingest(content, opts)`, `.search(query, opts)`, and more. See [Use as a library](#use-as-a-library).
|
|
402
|
+
|
|
403
|
+
---
|
|
404
|
+
|
|
332
405
|
## Health Checks (`doctor`)
|
|
333
406
|
|
|
334
407
|
Run a full health check of your SDK install at any time:
|
|
@@ -337,9 +410,7 @@ Run a full health check of your SDK install at any time:
|
|
|
337
410
|
npx @pentatonic-ai/ai-agent-sdk doctor
|
|
338
411
|
```
|
|
339
412
|
|
|
340
|
-
`doctor` auto-detects which install path you're on (Local Memory, Hosted
|
|
341
|
-
TES, or self-hosted Pentatonic platform) and runs only the checks that
|
|
342
|
-
apply. Exit code is `0` for all-clear, `1` for warnings, `2` for critical.
|
|
413
|
+
`doctor` auto-detects which install path you're on (Local Memory, Hosted TES, or self-hosted Pentatonic platform) and runs only the checks that apply. Exit code is `0` for all-clear, `1` for warnings, `2` for critical.
|
|
343
414
|
|
|
344
415
|
Common flags:
|
|
345
416
|
|
|
@@ -353,17 +424,13 @@ npx @pentatonic-ai/ai-agent-sdk doctor --path local
|
|
|
353
424
|
What gets checked:
|
|
354
425
|
|
|
355
426
|
- **Universal** — Node version, disk space, SDK config-file permissions
|
|
356
|
-
- **Local Memory** — Postgres + pgvector + migrations, embedding/LLM
|
|
357
|
-
endpoints, memory server port
|
|
427
|
+
- **Local Memory** — Postgres + pgvector + migrations, embedding/LLM endpoints, memory server port
|
|
358
428
|
- **Hosted TES** — endpoint reachable, API key authenticates
|
|
359
|
-
- **Self-hosted platform** — HybridRAG, Qdrant, Neo4j, vLLM (each
|
|
360
|
-
optional, skipped when its env var is unset)
|
|
429
|
+
- **Self-hosted platform** — HybridRAG, Qdrant, Neo4j, vLLM (each optional, skipped when its env var is unset)
|
|
361
430
|
|
|
362
431
|
### Plugins
|
|
363
432
|
|
|
364
|
-
Drop a `.mjs` file into `~/.config/pentatonic-ai/doctor-plugins/` to add
|
|
365
|
-
your own checks. Useful for app-specific things — internal APIs, ingest
|
|
366
|
-
freshness, custom infrastructure — without forking the SDK.
|
|
433
|
+
Drop a `.mjs` file into `~/.config/pentatonic-ai/doctor-plugins/` to add your own checks. Useful for app-specific things — internal APIs, ingest freshness, custom infrastructure — without forking the SDK.
|
|
367
434
|
|
|
368
435
|
```js
|
|
369
436
|
// ~/.config/pentatonic-ai/doctor-plugins/my-app.mjs
|
|
@@ -384,32 +451,35 @@ export default {
|
|
|
384
451
|
};
|
|
385
452
|
```
|
|
386
453
|
|
|
387
|
-
See [`packages/doctor/README.md`](packages/doctor/README.md) for the full
|
|
388
|
-
|
|
454
|
+
See [`packages/doctor/README.md`](packages/doctor/README.md) for the full plugin contract and programmatic API.
|
|
455
|
+
|
|
456
|
+
---
|
|
389
457
|
|
|
390
458
|
## Architecture
|
|
391
459
|
|
|
392
460
|
```
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
461
|
+
Your code
|
|
462
|
+
|
|
|
463
|
+
+---------------+---------------+
|
|
464
|
+
| |
|
|
465
|
+
Memory product Observability product
|
|
466
|
+
(createMemorySystem) (TESClient.wrap)
|
|
467
|
+
| |
|
|
468
|
+
| |
|
|
469
|
+
+----+----+ |
|
|
470
|
+
| | |
|
|
471
|
+
Local Hosted ---------------------- TES
|
|
472
|
+
(Docker) (Cloudflare cloud)
|
|
473
|
+
| |
|
|
474
|
+
PG+pgvector PG, R2, Queues,
|
|
475
|
+
+ Ollama Workers, Modules
|
|
476
|
+
(deep-memory,
|
|
477
|
+
conversation-
|
|
478
|
+
analytics, …)
|
|
411
479
|
```
|
|
412
480
|
|
|
481
|
+
Plugins (Claude Code, OpenClaw) are lightweight integrations on top of both products — they call into memory and emit observability events on the user's behalf.
|
|
482
|
+
|
|
413
483
|
## License
|
|
414
484
|
|
|
415
485
|
MIT
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
import { startCallbackServer } from "../lib/callback-server.js";
|
|
2
|
+
|
|
3
|
+
async function fetchCallback(port, qs) {
|
|
4
|
+
const url = `http://localhost:${port}/callback?${qs}`;
|
|
5
|
+
const res = await fetch(url);
|
|
6
|
+
return { status: res.status, text: await res.text() };
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
describe("startCallbackServer", () => {
|
|
10
|
+
it("resolves with {code, state} when callback hits with matching state", async () => {
|
|
11
|
+
const expectedState = "abc123";
|
|
12
|
+
const { port, result } = await startCallbackServer({
|
|
13
|
+
ports: [0],
|
|
14
|
+
state: expectedState,
|
|
15
|
+
timeoutMs: 5000,
|
|
16
|
+
});
|
|
17
|
+
const fetchPromise = fetchCallback(
|
|
18
|
+
port,
|
|
19
|
+
`code=AUTH_CODE_XYZ&state=${expectedState}`
|
|
20
|
+
);
|
|
21
|
+
const callback = await result;
|
|
22
|
+
const httpRes = await fetchPromise;
|
|
23
|
+
expect(callback.code).toBe("AUTH_CODE_XYZ");
|
|
24
|
+
expect(callback.state).toBe(expectedState);
|
|
25
|
+
expect(httpRes.status).toBe(200);
|
|
26
|
+
expect(httpRes.text).toMatch(/close this tab/i);
|
|
27
|
+
});
|
|
28
|
+
|
|
29
|
+
it("rejects when state does not match", async () => {
|
|
30
|
+
const { port, result } = await startCallbackServer({
|
|
31
|
+
ports: [0],
|
|
32
|
+
state: "EXPECTED",
|
|
33
|
+
timeoutMs: 5000,
|
|
34
|
+
});
|
|
35
|
+
fetchCallback(port, "code=ANY&state=ATTACKER").catch(() => {});
|
|
36
|
+
await expect(result).rejects.toThrow(/state/i);
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
it("rejects on timeout", async () => {
|
|
40
|
+
const { result } = await startCallbackServer({
|
|
41
|
+
ports: [0],
|
|
42
|
+
state: "S",
|
|
43
|
+
timeoutMs: 100,
|
|
44
|
+
});
|
|
45
|
+
await expect(result).rejects.toThrow(/timeout|timed out/i);
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
it("uses the first available port from the list", async () => {
|
|
49
|
+
// Bind one server to a known port to force the next attempt.
|
|
50
|
+
const blocker = await startCallbackServer({
|
|
51
|
+
ports: [0],
|
|
52
|
+
state: "BLOCKER",
|
|
53
|
+
timeoutMs: 30000,
|
|
54
|
+
});
|
|
55
|
+
const blockedPort = blocker.port;
|
|
56
|
+
// Now ask the second server to try the blocked port first, then fall
|
|
57
|
+
// through to OS-assigned. We expect it to land on a different port.
|
|
58
|
+
const second = await startCallbackServer({
|
|
59
|
+
ports: [blockedPort, 0],
|
|
60
|
+
state: "S",
|
|
61
|
+
timeoutMs: 30000,
|
|
62
|
+
});
|
|
63
|
+
expect(second.port).not.toBe(blockedPort);
|
|
64
|
+
second.cancel();
|
|
65
|
+
blocker.cancel();
|
|
66
|
+
});
|
|
67
|
+
});
|