@pentatonic-ai/ai-agent-sdk 0.6.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. package/README.md +178 -69
  2. package/bin/__tests__/callback-server.test.js +4 -1
  3. package/bin/cli.js +41 -164
  4. package/bin/commands/config.js +251 -0
  5. package/bin/commands/login.js +10 -3
  6. package/package.json +2 -1
  7. package/packages/doctor/__tests__/detect.test.js +2 -6
  8. package/packages/doctor/src/checks/local-memory.js +164 -196
  9. package/packages/doctor/src/detect.js +11 -3
  10. package/packages/memory/src/corpus/adapters.js +104 -0
  11. package/packages/memory/src/corpus/cli.js +72 -7
  12. package/packages/memory/src/corpus/index.js +1 -1
  13. package/packages/memory-engine/.env.example +13 -0
  14. package/packages/memory-engine/README.md +131 -0
  15. package/packages/memory-engine/bench/README.md +99 -0
  16. package/packages/memory-engine/bench/scorecards-engine/agent-coding__pentatonic-baseline__20260427-142523.json +1115 -0
  17. package/packages/memory-engine/bench/scorecards-engine/chat-recall__pentatonic-baseline__20260427-142648.json +819 -0
  18. package/packages/memory-engine/bench/scorecards-engine/circular-economy__pentatonic-baseline__20260427-142757.json +1278 -0
  19. package/packages/memory-engine/bench/scorecards-engine/customer-support__pentatonic-baseline__20260427-142900.json +1018 -0
  20. package/packages/memory-engine/bench/scorecards-engine/marketplace-ops__pentatonic-baseline__20260427-142957.json +1038 -0
  21. package/packages/memory-engine/bench/scorecards-engine/product-catalogue__pentatonic-baseline__20260427-143122.json +961 -0
  22. package/packages/memory-engine/bench/scorecards-engine-via-docker/agent-coding__pentatonic-memory__20260427-161812.json +1115 -0
  23. package/packages/memory-engine/bench/scorecards-engine-via-docker/chat-recall__pentatonic-memory__20260427-161701.json +819 -0
  24. package/packages/memory-engine/bench/scorecards-engine-via-docker/circular-economy__pentatonic-memory__20260427-161713.json +1278 -0
  25. package/packages/memory-engine/bench/scorecards-engine-via-docker/customer-support__pentatonic-memory__20260427-161723.json +1018 -0
  26. package/packages/memory-engine/bench/scorecards-engine-via-docker/marketplace-ops__pentatonic-memory__20260427-161732.json +1038 -0
  27. package/packages/memory-engine/bench/scorecards-engine-via-docker/product-catalogue__pentatonic-memory__20260427-161741.json +937 -0
  28. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/agent-coding__pentatonic-memory__20260427-184718.json +1115 -0
  29. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/chat-recall__pentatonic-memory__20260427-184614.json +819 -0
  30. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/circular-economy__pentatonic-memory__20260427-184809.json +1278 -0
  31. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/customer-support__pentatonic-memory__20260427-184854.json +1018 -0
  32. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/marketplace-ops__pentatonic-memory__20260427-184929.json +1038 -0
  33. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/product-catalogue__pentatonic-memory__20260427-185015.json +961 -0
  34. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/agent-coding__pentatonic-memory__20260427-175252.json +1115 -0
  35. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/chat-recall__pentatonic-memory__20260427-175312.json +819 -0
  36. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/circular-economy__pentatonic-memory__20260427-175335.json +1278 -0
  37. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/customer-support__pentatonic-memory__20260427-175355.json +1018 -0
  38. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/marketplace-ops__pentatonic-memory__20260427-175413.json +1038 -0
  39. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/product-catalogue__pentatonic-memory__20260427-175430.json +883 -0
  40. package/packages/memory-engine/bench/scorecards-engine-via-shim/agent-coding__pentatonic-memory__20260427-155409.json +1115 -0
  41. package/packages/memory-engine/bench/scorecards-engine-via-shim/chat-recall__pentatonic-memory__20260427-155421.json +819 -0
  42. package/packages/memory-engine/bench/scorecards-engine-via-shim/circular-economy__pentatonic-memory__20260427-155433.json +1278 -0
  43. package/packages/memory-engine/bench/scorecards-engine-via-shim/customer-support__pentatonic-memory__20260427-155443.json +1018 -0
  44. package/packages/memory-engine/bench/scorecards-engine-via-shim/marketplace-ops__pentatonic-memory__20260427-155453.json +1038 -0
  45. package/packages/memory-engine/bench/scorecards-engine-via-shim/product-catalogue__pentatonic-memory__20260427-155503.json +937 -0
  46. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/agent-coding__pentatonic-memory-latest__20260427-145103.json +1115 -0
  47. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/agent-coding__pentatonic-memory__20260427-144909.json +1115 -0
  48. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/chat-recall__pentatonic-memory-latest__20260427-145153.json +819 -0
  49. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/chat-recall__pentatonic-memory__20260427-145120.json +542 -0
  50. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/circular-economy__pentatonic-memory-latest__20260427-145313.json +1278 -0
  51. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/circular-economy__pentatonic-memory__20260427-145207.json +894 -0
  52. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/customer-support__pentatonic-memory-latest__20260427-145412.json +1018 -0
  53. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/customer-support__pentatonic-memory__20260427-145327.json +680 -0
  54. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/marketplace-ops__pentatonic-memory-latest__20260427-145517.json +1038 -0
  55. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/marketplace-ops__pentatonic-memory__20260427-145422.json +693 -0
  56. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/product-catalogue__pentatonic-memory-latest__20260427-145616.json +961 -0
  57. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/product-catalogue__pentatonic-memory__20260427-145528.json +727 -0
  58. package/packages/memory-engine/compat/Dockerfile +11 -0
  59. package/packages/memory-engine/compat/server.py +680 -0
  60. package/packages/memory-engine/docker-compose.yml +243 -0
  61. package/packages/memory-engine/engine/README.md +52 -0
  62. package/packages/memory-engine/engine/l2-hybridrag-proxy.py +1543 -0
  63. package/packages/memory-engine/engine/l5-comms-layer.py +663 -0
  64. package/packages/memory-engine/engine/l6-document-store.py +1018 -0
  65. package/packages/memory-engine/engine/services/l2/Dockerfile +41 -0
  66. package/packages/memory-engine/engine/services/l2/init_databases.py +81 -0
  67. package/packages/memory-engine/engine/services/l2/l2-hybridrag-proxy.py +1543 -0
  68. package/packages/memory-engine/engine/services/l4/Dockerfile +15 -0
  69. package/packages/memory-engine/engine/services/l4/server.py +265 -0
  70. package/packages/memory-engine/engine/services/l5/Dockerfile +9 -0
  71. package/packages/memory-engine/engine/services/l5/l5-comms-layer.py +696 -0
  72. package/packages/memory-engine/engine/services/l6/Dockerfile +11 -0
  73. package/packages/memory-engine/engine/services/l6/l6-document-store.py +1035 -0
  74. package/packages/memory-engine/engine/services/nv-embed/Dockerfile +28 -0
  75. package/packages/memory-engine/engine/services/nv-embed/server.py +152 -0
  76. package/packages/memory-engine/pme_memory/__init__.py +0 -0
  77. package/packages/memory-engine/pme_memory/__main__.py +129 -0
  78. package/packages/memory-engine/pme_memory/artifacts.py +95 -0
  79. package/packages/memory-engine/pme_memory/embed.py +74 -0
  80. package/packages/memory-engine/pme_memory/health.py +36 -0
  81. package/packages/memory-engine/pme_memory/hygiene.py +159 -0
  82. package/packages/memory-engine/pme_memory/indexer.py +200 -0
  83. package/packages/memory-engine/pme_memory/needs.py +55 -0
  84. package/packages/memory-engine/pme_memory/provenance.py +80 -0
  85. package/packages/memory-engine/pme_memory/scoring.py +168 -0
  86. package/packages/memory-engine/pme_memory/search.py +52 -0
  87. package/packages/memory-engine/pme_memory/store.py +86 -0
  88. package/packages/memory-engine/pme_memory/synthesis.py +114 -0
  89. package/packages/memory-engine/pyproject.toml +65 -0
  90. package/packages/memory-engine/scripts/kg-extractor.py +557 -0
  91. package/packages/memory-engine/scripts/kg-preflexor-v2.py +738 -0
  92. package/packages/memory-engine/tests/test_api_contract.sh +57 -0
package/README.md CHANGED
@@ -27,7 +27,7 @@ Two products that share one TES account, one install line, and one dashboard:
27
27
 
28
28
  | Product | What it does | When you want it |
29
29
  |---|---|---|
30
- | **Memory** | Persistent, searchable memory for your AI agent — semantic + keyword retrieval, distillation, decay, repo onboarding. Runs locally (Docker) or hosted (TES). | You want your agent to remember conversations, preferences, and codebase context across sessions. |
30
+ | **Memory** | Persistent, searchable memory for your AI agent — 7-layer hybrid retrieval (BM25 + vector + KG + reranker), repo onboarding via references. Runs locally (Docker) or hosted (TES). | You want your agent to remember conversations, preferences, and codebase context across sessions. |
31
31
  | **Observability** | Wrap your LLM client and capture every call — tokens, tool calls, latency, content. Events flow to TES for the dashboard, analytics, and search attribution. | You want to know what your agent is actually doing in production. |
32
32
 
33
33
  Both products are sold separately, but you can use either, both, or neither. Plugins for **Claude Code** and **OpenClaw** install everything at once if you'd rather skip the SDK glue.
@@ -44,10 +44,9 @@ Both products are sold separately, but you can use either, both, or neither. Plu
44
44
 
45
45
  - [TES — the platform](#tes--the-platform)
46
46
  - [Memory](#memory)
47
- - [Hosted (cloud)](#hosted-cloud)
48
47
  - [Local (self-hosted)](#local-self-hosted)
48
+ - [Hosted (cloud)](#hosted-cloud)
49
49
  - [Use as a library](#use-as-a-library)
50
- - [Distilled memory](#distilled-memory)
51
50
  - [Observability](#observability)
52
51
  - [Wrap your LLM client](#wrap-your-llm-client)
53
52
  - [Supported providers](#supported-providers)
@@ -87,63 +86,159 @@ To check connection state later: `npx @pentatonic-ai/ai-agent-sdk whoami`. To po
87
86
 
88
87
  ## Memory
89
88
 
90
- Persistent, searchable memory for AI agents. Multi-signal retrieval (vector + BM25 + recency + frequency), HyDE query expansion, atomic-fact distillation, and four memory layers (episodic, semantic, procedural, working).
89
+ Persistent, searchable memory for AI agents. Backed by a 7-layer hybrid retrieval engine BM25 keyword (L0), core files (L1), HybridRAG orchestrator (L2), Knowledge Graph entities (L3), vector index (L4), comms-namespace vectors (L5), and a document store with cross-encoder reranker (L6). Reciprocal Rank Fusion stitches them at query time.
91
90
 
92
- Two deployment modes same API, same plugins, same library:
91
+ Same engine, same wire format (`/store`, `/search`, `/forget`, `/store-batch`, `/health`), two deployment modes:
93
92
 
94
- ### Hosted (cloud)
93
+ ### Local (self-hosted)
95
94
 
96
- Run on Pentatonic's infrastructure. Higher-dimensional embeddings (NV-Embed-v2, 4096d), per-tenant Postgres, team-wide shared memory, the dashboard.
95
+ Run the full engine stack on your own machine via Docker. No API keys, no cloud, fully offline. Embeddings come from your local Ollama; quality depends on the model you pull (768d `nomic-embed-text` is the default and works fine on a laptop).
96
+
97
+ **Prerequisites**
98
+
99
+ - Docker + Docker Compose v2
100
+ - Ollama installed on the host (https://ollama.com)
101
+ - A pulled embedding model: `ollama pull nomic-embed-text`
102
+
103
+ If you'll run Claude Code (or anything else) inside a Docker container that needs to reach the engine, **make Ollama listen on all interfaces** so containers can reach it via `host.docker.internal`:
97
104
 
98
105
  ```bash
99
- # 1. Get a TES account (see [TES — the platform](#tes--the-platform))
100
- npx @pentatonic-ai/ai-agent-sdk login
106
+ sudo mkdir -p /etc/systemd/system/ollama.service.d
107
+ echo -e '[Service]\nEnvironment="OLLAMA_HOST=0.0.0.0:11434"' \
108
+ | sudo tee /etc/systemd/system/ollama.service.d/override.conf
109
+ sudo systemctl daemon-reload
110
+ sudo systemctl restart ollama
111
+ ```
101
112
 
102
- # 2. Install the SDK
103
- npm install @pentatonic-ai/ai-agent-sdk
104
- # or: pip install pentatonic-ai-agent-sdk
113
+ **Bring up the engine**
114
+
115
+ ```bash
116
+ git clone https://github.com/Pentatonic-Ltd/ai-agent-sdk.git
117
+ cd ai-agent-sdk/packages/memory-engine
118
+
119
+ # Default .env points at Ollama on the host. Edit if your Ollama is
120
+ # elsewhere or you want to use a higher-quality model (e.g. mxbai-embed-large
121
+ # at 1024d → set EMBED_DIM=1024 and EMBED_MODEL_NAME=mxbai-embed-large).
122
+ cat > .env <<'EOF'
123
+ PME_NV_EMBED_ENABLED=false
124
+ NV_EMBED_URL=http://host.docker.internal:11434/v1/embeddings
125
+ EMBED_MODEL_NAME=nomic-embed-text
126
+ EMBED_DIM=768
127
+ OLLAMA_DIM=768
128
+ PME_OLLAMA_URL=http://host.docker.internal:11434/api/embeddings
129
+ PME_EMBED_MODEL=nomic-embed-text
130
+ L5_OLLAMA_EMBED_URL=http://host.docker.internal:11434/api/embed
131
+ L5_OLLAMA_EMBED_MODEL=nomic-embed-text
132
+ PME_HYDE_ENABLED=false
133
+ PME_RERANK_ENABLED=true
134
+ PME_PORT=8099
135
+ CLIENT_ID=local
136
+ NEO4J_AUTH=neo4j/local-dev-pw
137
+ NEO4J_PASSWORD=local-dev-pw
138
+ EOF
139
+
140
+ docker compose up -d --scale nv-embed=0
105
141
  ```
106
142
 
107
- That's itmemory operations now go through TES.
143
+ First run pulls images and builds engine containers ~10–15 min. Subsequent restarts are seconds.
108
144
 
109
- ### Local (self-hosted)
145
+ **Verify**
110
146
 
111
- Run the full stack on your own machine. PostgreSQL + pgvector + Ollama in Docker. No API keys, no cloud. Pi 5 with 8GB RAM works fine (`nomic-embed-text` ~300MB + `llama3.2:3b` ~2GB).
147
+ ```bash
148
+ curl -s http://localhost:8099/health | jq
149
+ # Status should be "ok" or "degraded" with most layers reporting ok.
150
+
151
+ curl -sX POST http://localhost:8099/store \
152
+ -H "content-type: application/json" \
153
+ -d '{"content":"hello memory","metadata":{"arena":"local"}}' | jq
154
+
155
+ curl -sX POST http://localhost:8099/search \
156
+ -H "content-type: application/json" \
157
+ -d '{"query":"hello","limit":3,"min_score":0.001}' | jq
158
+ ```
159
+
160
+ If `/search` returns the row from `/store`, the engine is live.
161
+
162
+ **Connect Claude Code**
163
+
164
+ The `tes-memory` plugin's hooks already speak the engine's wire format. Three steps:
165
+
166
+ 1. Install the plugin (once):
167
+ ```
168
+ /plugin marketplace add Pentatonic-Ltd/ai-agent-sdk
169
+ /plugin install tes-memory@pentatonic-ai
170
+ ```
171
+ 2. Point it at your local engine — one command writes the plugin config:
172
+ ```bash
173
+ npx @pentatonic-ai/ai-agent-sdk config local
174
+ ```
175
+ This writes `~/.claude-pentatonic/tes-memory.local.md` with `mode: local` and `memory_url: http://localhost:8099`. If you want a different URL, pass `--engine-url <url>`. To switch back to hosted later, run `tes config hosted` (delegates to `login`).
176
+ 3. Reload: `/reload-plugins` (or restart Claude Code if status reports stale state — MCP server processes need a full restart to pick up plugin updates).
177
+
178
+ Inspect what's currently configured at any time:
112
179
 
113
180
  ```bash
114
- npx @pentatonic-ai/ai-agent-sdk memory
181
+ npx @pentatonic-ai/ai-agent-sdk config show
115
182
  ```
116
183
 
117
- This starts Postgres + pgvector, Ollama, and the memory server. It pulls embedding and chat models, and writes the local config.
184
+ Verify:
185
+
186
+ ```
187
+ /tes-memory:tes-status
188
+ ```
118
189
 
119
- Change models:
190
+ Should report `✓ Connected to local memory engine`. Now every prompt auto-searches engine memory and every turn auto-stores. The footer `🧠 Matched N memories from Pentatonic Memory` shows hits.
191
+
192
+ **Seed memory from your codebase or docs (optional)**
193
+
194
+ Drop the cold-start problem on day one by pre-populating the engine with references to your code/docs:
120
195
 
121
196
  ```bash
122
- EMBEDDING_MODEL=mxbai-embed-large LLM_MODEL=qwen2.5:7b npx @pentatonic-ai/ai-agent-sdk memory
197
+ MEMORY_ENGINE_URL=http://localhost:8099 \
198
+ npx @pentatonic-ai/ai-agent-sdk ingest ~/code/my-project
123
199
  ```
124
200
 
125
- ### Use as a library
201
+ References-mode by default stores path + signature pointers, not full file contents. See [Repository Onboarding](#repository-onboarding-corpus-ingest) for details.
126
202
 
127
- ```javascript
128
- import { createMemorySystem } from '@pentatonic-ai/ai-agent-sdk/memory';
203
+ **Tuning**
129
204
 
130
- const memory = createMemorySystem({
131
- db: pgPool,
132
- embedding: { url: 'http://localhost:11434/v1', model: 'nomic-embed-text' },
133
- llm: { url: 'http://localhost:11434/v1', model: 'llama3.2:3b' },
134
- });
205
+ Change embedding model: pull a different one, edit `EMBED_MODEL_NAME` + `EMBED_DIM` in `.env`, then `docker compose down -v && docker compose up -d --scale nv-embed=0` (the `-v` is required because Milvus collections are dim-locked at creation; switching dims means recreating).
206
+
207
+ | Model | Dim | Notes |
208
+ |---|---|---|
209
+ | `nomic-embed-text` (default) | 768 | Smallest; works on any laptop |
210
+ | `mxbai-embed-large` | 1024 | Better recall; ~600 MB download |
211
+ | `nv-embed-v2` (via gateway) | 4096 | Production-grade; needs a hosted endpoint or GPU |
135
212
 
136
- await memory.migrate();
137
- await memory.ensureLayers('my-app');
138
- await memory.ingest('User prefers dark mode', { clientId: 'my-app' });
139
- const results = await memory.search('preferences', { clientId: 'my-app' });
213
+ ### Hosted (cloud)
214
+
215
+ Run on Pentatonic's infrastructure. NV-Embed-v2 (4096d) embeddings via the AI gateway, managed Postgres/Neo4j/Qdrant/Milvus, dashboard. The engine still ships in this repo — hosted just deploys it for you.
216
+
217
+ ```bash
218
+ # 1. Get a TES account
219
+ npx @pentatonic-ai/ai-agent-sdk login
220
+
221
+ # 2. Install the SDK
222
+ npm install @pentatonic-ai/ai-agent-sdk
223
+ # or: pip install pentatonic-ai-agent-sdk
140
224
  ```
141
225
 
142
- ### Distilled memory
226
+ Memory operations route through TES → engine. No client-side change between local and hosted.
227
+
228
+ ### Use as a library
229
+
230
+ ```javascript
231
+ import { engineAdapter, ingestCorpus } from '@pentatonic-ai/ai-agent-sdk/memory/corpus';
143
232
 
144
- A background LLM pass extracts atomic facts from each raw turn and stores each as its own node in the semantic layer, linked back to the source. A query like *"what does Phil drink?"* matches *"Phil drinks cortado"* more reliably than a mixed paragraph covering food, drinks, and hobbies. Default-on; the raw turn is still preserved.
233
+ const adapter = engineAdapter({
234
+ engineUrl: 'http://localhost:8099',
235
+ arena: 'my-app',
236
+ });
237
+ await adapter.init();
238
+ await adapter.ingestChunk('User prefers dark mode', { kind: 'note' });
239
+ ```
145
240
 
146
- > **Store latency note (v0.5.4+):** on the local memory server, `store_memory` now awaits distillation before returning instead of running it fire-and-forget. This fixed a bug where distillation was being killed mid-flight (atoms never got embeddings, so they were unreachable by semantic search), but it means stores now take as long as your configured LLM takes to produce atoms — typically 5–30s on `llama3.2:3b`, up to the `chat()` timeout ceiling (60s default, overridable via `opts.timeout`). Cloudflare Worker deployments pass `ctx.waitUntil` and still return fast. Set `opts.distill: false` on the ingest call if you want the old fast-return behaviour at the cost of no atoms.
241
+ For raw `/search` and `/store`, just `fetch()` against `${engineUrl}/search` etc. The wire format is documented in `packages/memory-engine/docs/MIGRATION.md`.
147
242
 
148
243
  ---
149
244
 
@@ -216,17 +311,30 @@ Works with both local and hosted memory. Install once, switch modes via config.
216
311
  /plugin install tes-memory@pentatonic-ai
217
312
  ```
218
313
 
219
- For hosted TES, run `npx @pentatonic-ai/ai-agent-sdk login` once in your terminal — the plugin's MCP server, hooks, and tools all auto-discover the credentials written to `~/.config/tes/credentials.json`. To verify the connection later, ask Claude `/tes-memory:tes-status`.
314
+ **Local engine** bring up the engine first ([Memory > Local](#local-self-hosted)), then write the plugin config:
220
315
 
221
- For local memory:
222
316
  ```bash
223
- npx @pentatonic-ai/ai-agent-sdk memory
317
+ npx @pentatonic-ai/ai-agent-sdk config local
224
318
  ```
225
319
 
226
- **What it tracks:**
227
- - Every conversation turn — user messages, assistant responses, tool calls, duration
228
- - Automatic memory search — relevant memories injected as context on every prompt
229
- - Automatic memory storage — every turn stored with embeddings and HyDE queries
320
+ **Hosted TES** — run `login` once, the plugin auto-discovers `~/.config/tes/credentials.json`:
321
+
322
+ ```bash
323
+ npx @pentatonic-ai/ai-agent-sdk login
324
+ # equivalent: npx @pentatonic-ai/ai-agent-sdk config hosted
325
+ ```
326
+
327
+ Either way, verify with `/tes-memory:tes-status` in Claude Code, or from the shell:
328
+
329
+ ```bash
330
+ npx @pentatonic-ai/ai-agent-sdk config show
331
+ ```
332
+
333
+ The plugin's MCP server, hooks, and tools all read the same config — switching modes is a single CLI call away.
334
+
335
+ **What it tracks (auto, every turn):**
336
+ - Memory search at prompt time — relevant memories injected as context
337
+ - Memory store at turn end — every conversation turn persisted
230
338
  - Token usage — input, output, cache read, cache creation tokens per turn
231
339
 
232
340
  ### OpenClaw
@@ -249,7 +357,7 @@ Or use the CLI directly:
249
357
  openclaw pentatonic-memory local
250
358
  ```
251
359
 
252
- **What it does:** OpenClaw's context engine hooks fire on every lifecycle event — `ingest` stores user/assistant messages with embeddings + HyDE + distillation; `assemble` injects relevant memories as system-prompt context before every model run; `compact` runs the decay cycle when the context window fills; `after-turn` consolidates high-access memories into the semantic layer. Plus agent-callable tools: `memory_search`, `memory_store`, `memory_layers`.
360
+ **What it does:** OpenClaw's context engine hooks fire on every lifecycle event — `ingest` stores user/assistant messages via the engine's `/store` endpoint (BM25 + vector + KG indexing in parallel); `assemble` calls `/search` to inject relevant memories as system-prompt context; `compact` and `after-turn` are managed by the engine's own decay/consolidation. Plus agent-callable tools: `memory_search`, `memory_store`, `memory_layers`.
253
361
 
254
362
  After setup, config lives in `~/.openclaw/pentatonic-memory.json`. To switch modes, run setup again or edit directly.
255
363
 
@@ -263,11 +371,7 @@ You can also configure via `openclaw.json`:
263
371
  "pentatonic-memory": {
264
372
  "enabled": true,
265
373
  "config": {
266
- "database_url": "postgres://memory:memory@localhost:5433/memory",
267
- "embedding_url": "http://localhost:11435/v1",
268
- "embedding_model": "nomic-embed-text",
269
- "llm_url": "http://localhost:11435/v1",
270
- "llm_model": "llama3.2:3b"
374
+ "memory_url": "http://localhost:8099"
271
375
  }
272
376
  }
273
377
  }
@@ -396,9 +500,11 @@ import { normalizeResponse } from "@pentatonic-ai/ai-agent-sdk";
396
500
  const { content, model, usage, toolCalls } = normalizeResponse(openaiResponse);
397
501
  ```
398
502
 
399
- ### `createMemorySystem(deps)` — Memory
503
+ ### `engineAdapter(config)` — Memory
504
+
505
+ Thin HTTP client for the memory engine. `config = { engineUrl, arena, apiKey? }`. Returns `{ ingestChunk(content, metadata), deleteByCorpusFile(repoAbs, relPath), init() }`. See [Use as a library](#use-as-a-library).
400
506
 
401
- Returns a memory instance with `.migrate()`, `.ensureLayers(clientId)`, `.ingest(content, opts)`, `.search(query, opts)`, and more. See [Use as a library](#use-as-a-library).
507
+ For raw `/store` / `/search` calls, just `fetch()` against `${engineUrl}` directly the wire format is documented in `packages/memory-engine/docs/MIGRATION.md`.
402
508
 
403
509
  ---
404
510
 
@@ -424,9 +530,9 @@ npx @pentatonic-ai/ai-agent-sdk doctor --path local
424
530
  What gets checked:
425
531
 
426
532
  - **Universal** — Node version, disk space, SDK config-file permissions
427
- - **Local Memory** — Postgres + pgvector + migrations, embedding/LLM endpoints, memory server port
533
+ - **Local engine** — engine `/health`, per-layer health (L0–L6), embedding endpoint reachability
428
534
  - **Hosted TES** — endpoint reachable, API key authenticates
429
- - **Self-hosted platform** — HybridRAG, Qdrant, Neo4j, vLLM (each optional, skipped when its env var is unset)
535
+ - **Plugin config** — `tes-memory.local.md` parses, `memory_url` reachable
430
536
 
431
537
  ### Plugins
432
538
 
@@ -458,24 +564,27 @@ See [`packages/doctor/README.md`](packages/doctor/README.md) for the full plugin
458
564
  ## Architecture
459
565
 
460
566
  ```
461
- Your code
462
- |
463
- +---------------+---------------+
464
- | |
465
- Memory product Observability product
466
- (createMemorySystem) (TESClient.wrap)
467
- | |
468
- | |
469
- +----+----+ |
470
- | | |
471
- Local Hosted ---------------------- TES
472
- (Docker) (Cloudflare cloud)
473
- | |
474
- PG+pgvector PG, R2, Queues,
475
- + Ollama Workers, Modules
476
- (deep-memory,
477
- conversation-
478
- analytics, )
567
+ Your code / Claude Code plugin / OpenClaw plugin
568
+ |
569
+ +-------------------+--------------------+
570
+ | |
571
+ Memory product Observability product
572
+ (engine HTTP API) (TESClient.wrap)
573
+ | |
574
+ | POST /store /search /forget | CHAT_TURN events
575
+ ▼ ▼
576
+ +----------------+ +-----------------+
577
+ | memory engine | | TES |
578
+ | (compat shim) | | (Cloudflare) |
579
+ +----------------+ | Workers, R2, |
580
+ | | Queues, Pages |
581
+ +----------+----------+ +--------+--------+
582
+ | | |
583
+ Local Hosted ---------------------------+
584
+ (your machine) (Pentatonic-managed)
585
+ | |
586
+ docker compose AWS/GCP container cluster
587
+ + host Ollama + AI gateway (NV-Embed-v2)
479
588
  ```
480
589
 
481
590
  Plugins (Claude Code, OpenClaw) are lightweight integrations on top of both products — they call into memory and emit observability events on the user's behalf.
@@ -1,7 +1,10 @@
1
1
  import { startCallbackServer } from "../lib/callback-server.js";
2
2
 
3
3
  async function fetchCallback(port, qs) {
4
- const url = `http://localhost:${port}/callback?${qs}`;
4
+ // Use 127.0.0.1 not "localhost" — undici (Node 18+) resolves localhost to
5
+ // ::1 first, but the server binds to 127.0.0.1 only, so on IPv6-preferring
6
+ // hosts (GitHub Actions runners) the IPv6 attempt ECONNREFUSEs.
7
+ const url = `http://127.0.0.1:${port}/callback?${qs}`;
5
8
  const res = await fetch(url);
6
9
  return { status: res.status, text: await res.text() };
7
10
  }
package/bin/cli.js CHANGED
@@ -1,10 +1,6 @@
1
1
  #!/usr/bin/env node
2
2
 
3
3
  import { createInterface } from "readline";
4
- import { execFileSync } from "child_process";
5
- import { existsSync, mkdirSync, readFileSync, writeFileSync } from "fs";
6
- import { join } from "path";
7
- import { homedir } from "os";
8
4
 
9
5
  const DEFAULT_ENDPOINT = "https://api.pentatonic.com";
10
6
 
@@ -31,10 +27,10 @@ function parseArgs() {
31
27
  flags.alert = true;
32
28
  } else if (a === "--no-plugins") {
33
29
  flags.noPlugins = true;
34
- } else if (a === "--local") {
35
- flags.local = true;
36
- } else if (a === "--remote") {
37
- flags.remote = true;
30
+ } else if (a === "--engine-url" && args[i + 1]) {
31
+ flags.engineUrl = args[++i];
32
+ } else if (a.startsWith("--engine-url=")) {
33
+ flags.engineUrl = a.split("=")[1];
38
34
  } else if (!a.startsWith("--")) {
39
35
  // First non-flag arg is the command; subsequent ones are subcommand
40
36
  // arguments handled by the dispatched cmd (e.g. `ingest <path>`).
@@ -77,124 +73,14 @@ function ask(question) {
77
73
  return new Promise((resolve) => rl.question(question, resolve));
78
74
  }
79
75
 
80
- function spinner(text) {
81
- const frames = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"];
82
- let i = 0;
83
- const id = setInterval(() => {
84
- process.stdout.write(`\r${frames[i++ % frames.length]} ${text}`);
85
- }, 80);
86
- return {
87
- stop(result) {
88
- clearInterval(id);
89
- process.stdout.write(`\r✓ ${result}\n`);
90
- },
91
- fail(msg) {
92
- clearInterval(id);
93
- process.stdout.write(`\r✗ ${msg}\n`);
94
- },
95
- };
96
- }
97
-
98
- async function setupLocalMemory() {
99
- console.log(`\n Local Memory Setup\n`);
100
-
101
- // Check Docker
102
- try {
103
- execFileSync("docker", ["info"], { stdio: "pipe" });
104
- } catch {
105
- console.error(" Error: Docker is required. Install it from https://docker.com\n");
106
- process.exit(1);
107
- }
108
-
109
- const memoryDir = new URL("../packages/memory", import.meta.url).pathname;
110
-
111
- // Start infrastructure + memory server
112
- const infraSpinner = spinner("Starting memory server + PostgreSQL + Ollama...");
113
- try {
114
- execFileSync("docker", ["compose", "up", "-d", "memory", "postgres", "ollama"], {
115
- cwd: memoryDir,
116
- stdio: "pipe",
117
- });
118
- infraSpinner.stop("Memory stack running!");
119
- } catch (err) {
120
- infraSpinner.fail(`Failed to start: ${err.message}`);
121
- process.exit(1);
122
- }
123
-
124
- // Pull models
125
- const embModel = process.env.EMBEDDING_MODEL || "nomic-embed-text";
126
- const llmModel = process.env.LLM_MODEL || "llama3.2:3b";
127
-
128
- const embSpinner = spinner(`Pulling ${embModel}...`);
129
- try {
130
- execFileSync("docker", ["compose", "exec", "ollama", "ollama", "pull", embModel], {
131
- cwd: memoryDir,
132
- stdio: "pipe",
133
- });
134
- embSpinner.stop(`${embModel} ready!`);
135
- } catch {
136
- embSpinner.fail(`Failed to pull ${embModel}. Run manually: docker compose exec ollama ollama pull ${embModel}`);
137
- }
138
-
139
- const llmSpinner = spinner(`Pulling ${llmModel}...`);
140
- try {
141
- execFileSync("docker", ["compose", "exec", "ollama", "ollama", "pull", llmModel], {
142
- cwd: memoryDir,
143
- stdio: "pipe",
144
- });
145
- llmSpinner.stop(`${llmModel} ready!`);
146
- } catch {
147
- llmSpinner.fail(`Failed to pull ${llmModel}. Run manually: docker compose exec ollama ollama pull ${llmModel}`);
148
- }
149
-
150
- // Write local config (warn if hosted config exists)
151
- const configDir = join(homedir(), ".claude-pentatonic");
152
- if (!existsSync(configDir)) {
153
- mkdirSync(configDir, { recursive: true });
154
- }
155
-
156
- const configPath = join(configDir, "tes-memory.local.md");
157
- if (existsSync(configPath)) {
158
- const existing = readFileSync(configPath, "utf-8");
159
- if (existing.includes("tes_endpoint") && !existing.includes("mode: local")) {
160
- console.log("\n ⚠ Hosted TES config detected. Switching to local mode will");
161
- console.log(" disable hosted memory. To restore, run: npx @pentatonic-ai/ai-agent-sdk init\n");
162
- const confirm = await ask(" Switch to local mode? (y/n): ");
163
- if (confirm.toLowerCase() !== "y") {
164
- console.log(" Cancelled. Hosted config unchanged.\n");
165
- rl.close();
166
- return;
167
- }
168
- }
169
- }
170
-
171
- writeFileSync(
172
- configPath,
173
- `---
174
- mode: local
175
- memory_url: http://localhost:3333
176
- ---
177
- `
178
- );
179
-
180
- console.log(`\n Config written to ${configPath}`);
181
-
182
- const sdkDir = new URL("..", import.meta.url).pathname;
183
-
184
- console.log(`
185
- Memory server: http://localhost:3333
186
- Hooks are auto-configured to use local memory.
187
-
188
- Install the plugin in Claude Code:
189
- /plugin marketplace add Pentatonic-Ltd/ai-agent-sdk
190
- /plugin install tes-memory@pentatonic-ai
191
-
192
- You're ready! Every prompt auto-searches memory,
193
- every turn auto-stores. No MCP setup needed.
194
- `);
195
-
196
- rl.close();
197
- }
76
+ // setupLocalMemory + its `spinner` helper were the legacy "bring up
77
+ // Postgres + Ollama" wrapper for the in-process memory server. Removed
78
+ // in favour of:
79
+ // - `tes config local` writes the plugin config + prints engine
80
+ // bring-up instructions
81
+ // - `cd packages/memory-engine && docker compose up -d` → runs the
82
+ // actual engine
83
+ // `ask` is kept for any future interactive prompts.
198
84
 
199
85
 
200
86
  async function main() {
@@ -233,11 +119,23 @@ async function main() {
233
119
  process.exit(exitCode);
234
120
  }
235
121
 
236
- // `memory` is kept as a shortcut to skip the local-or-remote question
237
- // for users with that command in scripts/docs. New users should use init.
238
- if (flags.command === "memory") {
239
- await setupLocalMemory();
240
- return;
122
+ // tes config <local|hosted|show> point Claude Code's tes-memory
123
+ // plugin at a memory backend, or inspect what's configured. Each
124
+ // subcommand is a thin scaffold:
125
+ // local → write mode: local + memory_url; print engine bring-up steps
126
+ // hosted → run the login flow (delegates to runLoginCommand)
127
+ // show → read and print the current plugin config
128
+ // Future: `tes config set <key> <value>` for engine env-var tweaks.
129
+ if (flags.command === "config") {
130
+ const sub = process.argv.slice(3).find((a) => !a.startsWith("--"));
131
+ const { runConfigCommand } = await import("./commands/config.js");
132
+ const { exitCode } = await runConfigCommand({
133
+ sub,
134
+ endpoint: TES_ENDPOINT,
135
+ engineUrl: flags.engineUrl,
136
+ });
137
+ rl.close();
138
+ process.exit(exitCode);
241
139
  }
242
140
 
243
141
  // Corpus subcommands — onboarding/repo ingest (spec 01)
@@ -268,18 +166,20 @@ async function main() {
268
166
  process.exit(code);
269
167
  }
270
168
 
271
- if (flags.command !== "init") {
272
- console.log(`
169
+ console.log(`
273
170
  @pentatonic-ai/ai-agent-sdk
274
171
 
275
172
  Usage:
276
- npx @pentatonic-ai/ai-agent-sdk login Sign in with TES (browser-based OAuth)
173
+ npx @pentatonic-ai/ai-agent-sdk login First-time hosted setup: browser sign-in + writes credentials
277
174
  npx @pentatonic-ai/ai-agent-sdk whoami Show current login identity
278
- npx @pentatonic-ai/ai-agent-sdk init [deprecated] Alias for 'login'
279
- npx @pentatonic-ai/ai-agent-sdk init --local Set up local Docker memory stack
280
- npx @pentatonic-ai/ai-agent-sdk memory Shortcut for 'init --local'
175
+ npx @pentatonic-ai/ai-agent-sdk config <sub> Configure memory backend; see 'config --help'
281
176
  npx @pentatonic-ai/ai-agent-sdk doctor Run health checks (exit 0/1/2)
282
177
 
178
+ config subcommands:
179
+ config local Point plugin at a local memory engine
180
+ config hosted Switch to hosted (delegates to login)
181
+ config show Print current plugin config + creds
182
+
283
183
  Memory corpus (onboarding):
284
184
  npx @pentatonic-ai/ai-agent-sdk onboard Interactive: pick paths, ingest, install hooks
285
185
  npx @pentatonic-ai/ai-agent-sdk ingest <path> One-shot ingest of a path (any folder works)
@@ -290,8 +190,8 @@ Memory corpus (onboarding):
290
190
  npx @pentatonic-ai/ai-agent-sdk corpus reset Wipe local corpus state
291
191
  npx @pentatonic-ai/ai-agent-sdk install-git-hook Install post-commit hook in cwd
292
192
 
293
- Tenant for corpus commands is read from these env vars:
294
- TES_ENDPOINT, TES_CLIENT_ID, TES_API_KEY
193
+ Corpus commands route to the backend configured via 'config' (local engine
194
+ or hosted TES). Override with env vars: MEMORY_ENGINE_URL, TES_ENDPOINT,
295
195
 
296
196
  doctor flags:
297
197
  --json Emit a JSON report
@@ -301,31 +201,8 @@ doctor flags:
301
201
  --timeout <ms> Per-check timeout (default 10000)
302
202
 
303
203
  For docs, see https://api.pentatonic.com
304
- `);
305
- process.exit(0);
306
- }
307
-
308
- // init: --local still routes to setupLocalMemory (Docker stack —
309
- // separate concern). Anything else (no flag, --remote, mode prompt)
310
- // delegates to login via runInitAlias which emits a one-line
311
- // deprecation warning. setupHostedTes (the old form-based hosted
312
- // flow) is gone; init has been replaced by `login` for one major
313
- // release, then `init` itself goes away.
314
- if (flags.local && flags.remote) {
315
- console.error("\n Error: --local and --remote are mutually exclusive\n");
316
- process.exit(1);
317
- }
318
- if (flags.local) {
319
- await setupLocalMemory();
320
- return;
321
- }
322
- // Non-local path → login alias.
323
- const { runInitAlias } = await import("./commands/login.js");
324
- const { exitCode } = await runInitAlias({
325
- endpoint: TES_ENDPOINT,
326
- });
327
- rl.close();
328
- process.exit(exitCode);
204
+ `);
205
+ process.exit(0);
329
206
  }
330
207
 
331
208
  main().catch((err) => {