@pentatonic-ai/ai-agent-sdk 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. package/README.md +170 -69
  2. package/bin/__tests__/callback-server.test.js +4 -1
  3. package/bin/cli.js +41 -164
  4. package/bin/commands/config.js +251 -0
  5. package/package.json +2 -1
  6. package/packages/doctor/__tests__/detect.test.js +2 -6
  7. package/packages/doctor/src/checks/local-memory.js +164 -196
  8. package/packages/doctor/src/detect.js +11 -3
  9. package/packages/memory/src/corpus/adapters.js +104 -0
  10. package/packages/memory/src/corpus/cli.js +72 -7
  11. package/packages/memory/src/corpus/index.js +1 -1
  12. package/packages/memory-engine/.env.example +13 -0
  13. package/packages/memory-engine/README.md +131 -0
  14. package/packages/memory-engine/bench/README.md +99 -0
  15. package/packages/memory-engine/bench/scorecards-engine/agent-coding__pentatonic-baseline__20260427-142523.json +1115 -0
  16. package/packages/memory-engine/bench/scorecards-engine/chat-recall__pentatonic-baseline__20260427-142648.json +819 -0
  17. package/packages/memory-engine/bench/scorecards-engine/circular-economy__pentatonic-baseline__20260427-142757.json +1278 -0
  18. package/packages/memory-engine/bench/scorecards-engine/customer-support__pentatonic-baseline__20260427-142900.json +1018 -0
  19. package/packages/memory-engine/bench/scorecards-engine/marketplace-ops__pentatonic-baseline__20260427-142957.json +1038 -0
  20. package/packages/memory-engine/bench/scorecards-engine/product-catalogue__pentatonic-baseline__20260427-143122.json +961 -0
  21. package/packages/memory-engine/bench/scorecards-engine-via-docker/agent-coding__pentatonic-memory__20260427-161812.json +1115 -0
  22. package/packages/memory-engine/bench/scorecards-engine-via-docker/chat-recall__pentatonic-memory__20260427-161701.json +819 -0
  23. package/packages/memory-engine/bench/scorecards-engine-via-docker/circular-economy__pentatonic-memory__20260427-161713.json +1278 -0
  24. package/packages/memory-engine/bench/scorecards-engine-via-docker/customer-support__pentatonic-memory__20260427-161723.json +1018 -0
  25. package/packages/memory-engine/bench/scorecards-engine-via-docker/marketplace-ops__pentatonic-memory__20260427-161732.json +1038 -0
  26. package/packages/memory-engine/bench/scorecards-engine-via-docker/product-catalogue__pentatonic-memory__20260427-161741.json +937 -0
  27. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/agent-coding__pentatonic-memory__20260427-184718.json +1115 -0
  28. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/chat-recall__pentatonic-memory__20260427-184614.json +819 -0
  29. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/circular-economy__pentatonic-memory__20260427-184809.json +1278 -0
  30. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/customer-support__pentatonic-memory__20260427-184854.json +1018 -0
  31. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/marketplace-ops__pentatonic-memory__20260427-184929.json +1038 -0
  32. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/product-catalogue__pentatonic-memory__20260427-185015.json +961 -0
  33. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/agent-coding__pentatonic-memory__20260427-175252.json +1115 -0
  34. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/chat-recall__pentatonic-memory__20260427-175312.json +819 -0
  35. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/circular-economy__pentatonic-memory__20260427-175335.json +1278 -0
  36. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/customer-support__pentatonic-memory__20260427-175355.json +1018 -0
  37. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/marketplace-ops__pentatonic-memory__20260427-175413.json +1038 -0
  38. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/product-catalogue__pentatonic-memory__20260427-175430.json +883 -0
  39. package/packages/memory-engine/bench/scorecards-engine-via-shim/agent-coding__pentatonic-memory__20260427-155409.json +1115 -0
  40. package/packages/memory-engine/bench/scorecards-engine-via-shim/chat-recall__pentatonic-memory__20260427-155421.json +819 -0
  41. package/packages/memory-engine/bench/scorecards-engine-via-shim/circular-economy__pentatonic-memory__20260427-155433.json +1278 -0
  42. package/packages/memory-engine/bench/scorecards-engine-via-shim/customer-support__pentatonic-memory__20260427-155443.json +1018 -0
  43. package/packages/memory-engine/bench/scorecards-engine-via-shim/marketplace-ops__pentatonic-memory__20260427-155453.json +1038 -0
  44. package/packages/memory-engine/bench/scorecards-engine-via-shim/product-catalogue__pentatonic-memory__20260427-155503.json +937 -0
  45. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/agent-coding__pentatonic-memory-latest__20260427-145103.json +1115 -0
  46. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/agent-coding__pentatonic-memory__20260427-144909.json +1115 -0
  47. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/chat-recall__pentatonic-memory-latest__20260427-145153.json +819 -0
  48. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/chat-recall__pentatonic-memory__20260427-145120.json +542 -0
  49. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/circular-economy__pentatonic-memory-latest__20260427-145313.json +1278 -0
  50. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/circular-economy__pentatonic-memory__20260427-145207.json +894 -0
  51. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/customer-support__pentatonic-memory-latest__20260427-145412.json +1018 -0
  52. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/customer-support__pentatonic-memory__20260427-145327.json +680 -0
  53. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/marketplace-ops__pentatonic-memory-latest__20260427-145517.json +1038 -0
  54. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/marketplace-ops__pentatonic-memory__20260427-145422.json +693 -0
  55. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/product-catalogue__pentatonic-memory-latest__20260427-145616.json +961 -0
  56. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/product-catalogue__pentatonic-memory__20260427-145528.json +727 -0
  57. package/packages/memory-engine/compat/Dockerfile +11 -0
  58. package/packages/memory-engine/compat/server.py +680 -0
  59. package/packages/memory-engine/docker-compose.yml +243 -0
  60. package/packages/memory-engine/docs/MIGRATION.md +178 -0
  61. package/packages/memory-engine/docs/RUNBOOK-AWS.md +375 -0
  62. package/packages/memory-engine/docs/why-v05-underperforms.md +138 -0
  63. package/packages/memory-engine/engine/README.md +52 -0
  64. package/packages/memory-engine/engine/l2-hybridrag-proxy.py +1543 -0
  65. package/packages/memory-engine/engine/l5-comms-layer.py +663 -0
  66. package/packages/memory-engine/engine/l6-document-store.py +1018 -0
  67. package/packages/memory-engine/engine/services/l2/Dockerfile +41 -0
  68. package/packages/memory-engine/engine/services/l2/init_databases.py +81 -0
  69. package/packages/memory-engine/engine/services/l2/l2-hybridrag-proxy.py +1543 -0
  70. package/packages/memory-engine/engine/services/l4/Dockerfile +15 -0
  71. package/packages/memory-engine/engine/services/l4/server.py +235 -0
  72. package/packages/memory-engine/engine/services/l5/Dockerfile +9 -0
  73. package/packages/memory-engine/engine/services/l5/l5-comms-layer.py +678 -0
  74. package/packages/memory-engine/engine/services/l6/Dockerfile +11 -0
  75. package/packages/memory-engine/engine/services/l6/l6-document-store.py +1016 -0
  76. package/packages/memory-engine/engine/services/nv-embed/Dockerfile +28 -0
  77. package/packages/memory-engine/engine/services/nv-embed/server.py +152 -0
  78. package/packages/memory-engine/pme_memory/__init__.py +0 -0
  79. package/packages/memory-engine/pme_memory/__main__.py +129 -0
  80. package/packages/memory-engine/pme_memory/artifacts.py +95 -0
  81. package/packages/memory-engine/pme_memory/embed.py +74 -0
  82. package/packages/memory-engine/pme_memory/health.py +36 -0
  83. package/packages/memory-engine/pme_memory/hygiene.py +159 -0
  84. package/packages/memory-engine/pme_memory/indexer.py +200 -0
  85. package/packages/memory-engine/pme_memory/needs.py +55 -0
  86. package/packages/memory-engine/pme_memory/provenance.py +80 -0
  87. package/packages/memory-engine/pme_memory/scoring.py +168 -0
  88. package/packages/memory-engine/pme_memory/search.py +52 -0
  89. package/packages/memory-engine/pme_memory/store.py +86 -0
  90. package/packages/memory-engine/pme_memory/synthesis.py +114 -0
  91. package/packages/memory-engine/pyproject.toml +65 -0
  92. package/packages/memory-engine/scripts/kg-extractor.py +557 -0
  93. package/packages/memory-engine/scripts/kg-preflexor-v2.py +738 -0
  94. package/packages/memory-engine/tests/test_api_contract.sh +57 -0
package/README.md CHANGED
@@ -27,7 +27,7 @@ Two products that share one TES account, one install line, and one dashboard:
27
27
 
28
28
  | Product | What it does | When you want it |
29
29
  |---|---|---|
30
- | **Memory** | Persistent, searchable memory for your AI agent — semantic + keyword retrieval, distillation, decay, repo onboarding. Runs locally (Docker) or hosted (TES). | You want your agent to remember conversations, preferences, and codebase context across sessions. |
30
+ | **Memory** | Persistent, searchable memory for your AI agent — 7-layer hybrid retrieval (BM25 + vector + KG + reranker), repo onboarding via references. Runs locally (Docker) or hosted (TES). | You want your agent to remember conversations, preferences, and codebase context across sessions. |
31
31
  | **Observability** | Wrap your LLM client and capture every call — tokens, tool calls, latency, content. Events flow to TES for the dashboard, analytics, and search attribution. | You want to know what your agent is actually doing in production. |
32
32
 
33
33
  Both products are sold separately, but you can use either, both, or neither. Plugins for **Claude Code** and **OpenClaw** install everything at once if you'd rather skip the SDK glue.
@@ -44,10 +44,9 @@ Both products are sold separately, but you can use either, both, or neither. Plu
44
44
 
45
45
  - [TES — the platform](#tes--the-platform)
46
46
  - [Memory](#memory)
47
- - [Hosted (cloud)](#hosted-cloud)
48
47
  - [Local (self-hosted)](#local-self-hosted)
48
+ - [Hosted (cloud)](#hosted-cloud)
49
49
  - [Use as a library](#use-as-a-library)
50
- - [Distilled memory](#distilled-memory)
51
50
  - [Observability](#observability)
52
51
  - [Wrap your LLM client](#wrap-your-llm-client)
53
52
  - [Supported providers](#supported-providers)
@@ -87,63 +86,155 @@ To check connection state later: `npx @pentatonic-ai/ai-agent-sdk whoami`. To po
87
86
 
88
87
  ## Memory
89
88
 
90
- Persistent, searchable memory for AI agents. Multi-signal retrieval (vector + BM25 + recency + frequency), HyDE query expansion, atomic-fact distillation, and four memory layers (episodic, semantic, procedural, working).
89
+ Persistent, searchable memory for AI agents. Backed by a 7-layer hybrid retrieval engine BM25 keyword (L0), core files (L1), HybridRAG orchestrator (L2), Knowledge Graph entities (L3), vector index (L4), comms-namespace vectors (L5), and a document store with cross-encoder reranker (L6). Reciprocal Rank Fusion stitches them at query time.
91
90
 
92
- Two deployment modes same API, same plugins, same library:
91
+ Same engine, same wire format (`/store`, `/search`, `/forget`, `/store-batch`, `/health`), two deployment modes:
93
92
 
94
- ### Hosted (cloud)
93
+ ### Local (self-hosted)
94
+
95
+ Run the full engine stack on your own machine via Docker. No API keys, no cloud, fully offline. Embeddings come from your local Ollama; quality depends on the model you pull (768d `nomic-embed-text` is the default and works fine on a laptop).
96
+
97
+ **Prerequisites**
98
+
99
+ - Docker + Docker Compose v2
100
+ - Ollama installed on the host (https://ollama.com)
101
+ - A pulled embedding model: `ollama pull nomic-embed-text`
95
102
 
96
- Run on Pentatonic's infrastructure. Higher-dimensional embeddings (NV-Embed-v2, 4096d), per-tenant Postgres, team-wide shared memory, the dashboard.
103
+ If you'll run Claude Code (or anything else) inside a Docker container that needs to reach the engine, **make Ollama listen on all interfaces** so containers can reach it via `host.docker.internal`:
97
104
 
98
105
  ```bash
99
- # 1. Get a TES account (see [TES — the platform](#tes--the-platform))
100
- npx @pentatonic-ai/ai-agent-sdk login
106
+ sudo mkdir -p /etc/systemd/system/ollama.service.d
107
+ echo -e '[Service]\nEnvironment="OLLAMA_HOST=0.0.0.0:11434"' \
108
+ | sudo tee /etc/systemd/system/ollama.service.d/override.conf
109
+ sudo systemctl daemon-reload
110
+ sudo systemctl restart ollama
111
+ ```
101
112
 
102
- # 2. Install the SDK
103
- npm install @pentatonic-ai/ai-agent-sdk
104
- # or: pip install pentatonic-ai-agent-sdk
113
+ **Bring up the engine**
114
+
115
+ ```bash
116
+ git clone https://github.com/Pentatonic-Ltd/ai-agent-sdk.git
117
+ cd ai-agent-sdk/packages/memory-engine
118
+
119
+ # Default .env points at Ollama on the host. Edit if your Ollama is
120
+ # elsewhere or you want to use a higher-quality model (e.g. mxbai-embed-large
121
+ # at 1024d → set EMBED_DIM=1024 and EMBED_MODEL_NAME=mxbai-embed-large).
122
+ cat > .env <<'EOF'
123
+ PME_NV_EMBED_ENABLED=false
124
+ NV_EMBED_URL=http://host.docker.internal:11434/v1/embeddings
125
+ EMBED_MODEL_NAME=nomic-embed-text
126
+ EMBED_DIM=768
127
+ OLLAMA_DIM=768
128
+ PME_OLLAMA_URL=http://host.docker.internal:11434/api/embeddings
129
+ PME_EMBED_MODEL=nomic-embed-text
130
+ L5_OLLAMA_EMBED_URL=http://host.docker.internal:11434/api/embed
131
+ L5_OLLAMA_EMBED_MODEL=nomic-embed-text
132
+ PME_HYDE_ENABLED=false
133
+ PME_RERANK_ENABLED=true
134
+ PME_PORT=8099
135
+ CLIENT_ID=local
136
+ NEO4J_AUTH=neo4j/local-dev-pw
137
+ NEO4J_PASSWORD=local-dev-pw
138
+ EOF
139
+
140
+ docker compose up -d --scale nv-embed=0
105
141
  ```
106
142
 
107
- That's itmemory operations now go through TES.
143
+ First run pulls images and builds engine containers ~10–15 min. Subsequent restarts are seconds.
108
144
 
109
- ### Local (self-hosted)
145
+ **Verify**
146
+
147
+ ```bash
148
+ curl -s http://localhost:8099/health | jq
149
+ # Status should be "ok" or "degraded" with most layers reporting ok.
150
+
151
+ curl -sX POST http://localhost:8099/store \
152
+ -H "content-type: application/json" \
153
+ -d '{"content":"hello memory","metadata":{"arena":"local"}}' | jq
154
+
155
+ curl -sX POST http://localhost:8099/search \
156
+ -H "content-type: application/json" \
157
+ -d '{"query":"hello","limit":3,"min_score":0.001}' | jq
158
+ ```
159
+
160
+ If `/search` returns the row from `/store`, the engine is live.
110
161
 
111
- Run the full stack on your own machine. PostgreSQL + pgvector + Ollama in Docker. No API keys, no cloud. Pi 5 with 8GB RAM works fine (`nomic-embed-text` ~300MB + `llama3.2:3b` ~2GB).
162
+ **Connect Claude Code**
163
+
164
+ The `tes-memory` plugin's hooks already speak the engine's wire format. Two steps:
165
+
166
+ 1. Install the plugin (once):
167
+ ```
168
+ /plugin marketplace add Pentatonic-Ltd/ai-agent-sdk
169
+ /plugin install tes-memory@pentatonic-ai
170
+ ```
171
+ 2. Point it at your local engine. Edit `~/.claude-pentatonic/tes-memory.local.md` (create if missing):
172
+ ```yaml
173
+ ---
174
+ mode: local
175
+ memory_url: http://localhost:8099
176
+ ---
177
+ ```
178
+ 3. Reload: `/reload-plugins` (or restart Claude Code if status reports stale state — MCP server processes need a full restart to pick up plugin updates).
179
+
180
+ Verify:
181
+
182
+ ```
183
+ /tes-memory:tes-status
184
+ ```
185
+
186
+ Should report `✓ Connected to local memory engine`. Now every prompt auto-searches engine memory and every turn auto-stores. The footer `🧠 Matched N memories from Pentatonic Memory` shows hits.
187
+
188
+ **Seed memory from your codebase or docs (optional)**
189
+
190
+ Drop the cold-start problem on day one by pre-populating the engine with references to your code/docs:
112
191
 
113
192
  ```bash
114
- npx @pentatonic-ai/ai-agent-sdk memory
193
+ MEMORY_ENGINE_URL=http://localhost:8099 \
194
+ npx @pentatonic-ai/ai-agent-sdk ingest ~/code/my-project
115
195
  ```
116
196
 
117
- This starts Postgres + pgvector, Ollama, and the memory server. It pulls embedding and chat models, and writes the local config.
197
+ References-mode by default — stores path + signature pointers, not full file contents. See [Repository Onboarding](#repository-onboarding-corpus-ingest) for details.
198
+
199
+ **Tuning**
118
200
 
119
- Change models:
201
+ Change embedding model: pull a different one, edit `EMBED_MODEL_NAME` + `EMBED_DIM` in `.env`, then `docker compose down -v && docker compose up -d --scale nv-embed=0` (the `-v` is required because Milvus collections are dim-locked at creation; switching dims means recreating).
202
+
203
+ | Model | Dim | Notes |
204
+ |---|---|---|
205
+ | `nomic-embed-text` (default) | 768 | Smallest; works on any laptop |
206
+ | `mxbai-embed-large` | 1024 | Better recall; ~600 MB download |
207
+ | `nv-embed-v2` (via gateway) | 4096 | Production-grade; needs a hosted endpoint or GPU |
208
+
209
+ ### Hosted (cloud)
210
+
211
+ Run on Pentatonic's infrastructure. NV-Embed-v2 (4096d) embeddings via the AI gateway, managed Postgres/Neo4j/Qdrant/Milvus, dashboard. The engine still ships in this repo — hosted just deploys it for you.
120
212
 
121
213
  ```bash
122
- EMBEDDING_MODEL=mxbai-embed-large LLM_MODEL=qwen2.5:7b npx @pentatonic-ai/ai-agent-sdk memory
214
+ # 1. Get a TES account
215
+ npx @pentatonic-ai/ai-agent-sdk login
216
+
217
+ # 2. Install the SDK
218
+ npm install @pentatonic-ai/ai-agent-sdk
219
+ # or: pip install pentatonic-ai-agent-sdk
123
220
  ```
124
221
 
222
+ Memory operations route through TES → engine. No client-side change between local and hosted.
223
+
125
224
  ### Use as a library
126
225
 
127
226
  ```javascript
128
- import { createMemorySystem } from '@pentatonic-ai/ai-agent-sdk/memory';
227
+ import { engineAdapter, ingestCorpus } from '@pentatonic-ai/ai-agent-sdk/memory/corpus';
129
228
 
130
- const memory = createMemorySystem({
131
- db: pgPool,
132
- embedding: { url: 'http://localhost:11434/v1', model: 'nomic-embed-text' },
133
- llm: { url: 'http://localhost:11434/v1', model: 'llama3.2:3b' },
229
+ const adapter = engineAdapter({
230
+ engineUrl: 'http://localhost:8099',
231
+ arena: 'my-app',
134
232
  });
135
-
136
- await memory.migrate();
137
- await memory.ensureLayers('my-app');
138
- await memory.ingest('User prefers dark mode', { clientId: 'my-app' });
139
- const results = await memory.search('preferences', { clientId: 'my-app' });
233
+ await adapter.init();
234
+ await adapter.ingestChunk('User prefers dark mode', { kind: 'note' });
140
235
  ```
141
236
 
142
- ### Distilled memory
143
-
144
- A background LLM pass extracts atomic facts from each raw turn and stores each as its own node in the semantic layer, linked back to the source. A query like *"what does Phil drink?"* matches *"Phil drinks cortado"* more reliably than a mixed paragraph covering food, drinks, and hobbies. Default-on; the raw turn is still preserved.
145
-
146
- > **Store latency note (v0.5.4+):** on the local memory server, `store_memory` now awaits distillation before returning instead of running it fire-and-forget. This fixed a bug where distillation was being killed mid-flight (atoms never got embeddings, so they were unreachable by semantic search), but it means stores now take as long as your configured LLM takes to produce atoms — typically 5–30s on `llama3.2:3b`, up to the `chat()` timeout ceiling (60s default, overridable via `opts.timeout`). Cloudflare Worker deployments pass `ctx.waitUntil` and still return fast. Set `opts.distill: false` on the ingest call if you want the old fast-return behaviour at the cost of no atoms.
237
+ For raw `/search` and `/store`, just `fetch()` against `${engineUrl}/search` etc. The wire format is documented in `packages/memory-engine/docs/MIGRATION.md`.
147
238
 
148
239
  ---
149
240
 
@@ -216,17 +307,26 @@ Works with both local and hosted memory. Install once, switch modes via config.
216
307
  /plugin install tes-memory@pentatonic-ai
217
308
  ```
218
309
 
219
- For hosted TES, run `npx @pentatonic-ai/ai-agent-sdk login` once in your terminal the plugin's MCP server, hooks, and tools all auto-discover the credentials written to `~/.config/tes/credentials.json`. To verify the connection later, ask Claude `/tes-memory:tes-status`.
310
+ **Local engine** bring up the engine first ([Memory > Local](#local-self-hosted)), then point the plugin at it. Edit `~/.claude-pentatonic/tes-memory.local.md`:
311
+
312
+ ```yaml
313
+ ---
314
+ mode: local
315
+ memory_url: http://localhost:8099
316
+ ---
317
+ ```
318
+
319
+ **Hosted TES** — run `login` once, the plugin auto-discovers `~/.config/tes/credentials.json`:
220
320
 
221
- For local memory:
222
321
  ```bash
223
- npx @pentatonic-ai/ai-agent-sdk memory
322
+ npx @pentatonic-ai/ai-agent-sdk login
224
323
  ```
225
324
 
226
- **What it tracks:**
227
- - Every conversation turn — user messages, assistant responses, tool calls, duration
228
- - Automatic memory search — relevant memories injected as context on every prompt
229
- - Automatic memory storage every turn stored with embeddings and HyDE queries
325
+ Either way, verify with `/tes-memory:tes-status` in Claude Code. The plugin's MCP server, hooks, and tools all read the same config.
326
+
327
+ **What it tracks (auto, every turn):**
328
+ - Memory search at prompt time relevant memories injected as context
329
+ - Memory store at turn end — every conversation turn persisted
230
330
  - Token usage — input, output, cache read, cache creation tokens per turn
231
331
 
232
332
  ### OpenClaw
@@ -249,7 +349,7 @@ Or use the CLI directly:
249
349
  openclaw pentatonic-memory local
250
350
  ```
251
351
 
252
- **What it does:** OpenClaw's context engine hooks fire on every lifecycle event — `ingest` stores user/assistant messages with embeddings + HyDE + distillation; `assemble` injects relevant memories as system-prompt context before every model run; `compact` runs the decay cycle when the context window fills; `after-turn` consolidates high-access memories into the semantic layer. Plus agent-callable tools: `memory_search`, `memory_store`, `memory_layers`.
352
+ **What it does:** OpenClaw's context engine hooks fire on every lifecycle event — `ingest` stores user/assistant messages via the engine's `/store` endpoint (BM25 + vector + KG indexing in parallel); `assemble` calls `/search` to inject relevant memories as system-prompt context; `compact` and `after-turn` are managed by the engine's own decay/consolidation. Plus agent-callable tools: `memory_search`, `memory_store`, `memory_layers`.
253
353
 
254
354
  After setup, config lives in `~/.openclaw/pentatonic-memory.json`. To switch modes, run setup again or edit directly.
255
355
 
@@ -263,11 +363,7 @@ You can also configure via `openclaw.json`:
263
363
  "pentatonic-memory": {
264
364
  "enabled": true,
265
365
  "config": {
266
- "database_url": "postgres://memory:memory@localhost:5433/memory",
267
- "embedding_url": "http://localhost:11435/v1",
268
- "embedding_model": "nomic-embed-text",
269
- "llm_url": "http://localhost:11435/v1",
270
- "llm_model": "llama3.2:3b"
366
+ "memory_url": "http://localhost:8099"
271
367
  }
272
368
  }
273
369
  }
@@ -396,9 +492,11 @@ import { normalizeResponse } from "@pentatonic-ai/ai-agent-sdk";
396
492
  const { content, model, usage, toolCalls } = normalizeResponse(openaiResponse);
397
493
  ```
398
494
 
399
- ### `createMemorySystem(deps)` — Memory
495
+ ### `engineAdapter(config)` — Memory
496
+
497
+ Thin HTTP client for the memory engine. `config = { engineUrl, arena, apiKey? }`. Returns `{ ingestChunk(content, metadata), deleteByCorpusFile(repoAbs, relPath), init() }`. See [Use as a library](#use-as-a-library).
400
498
 
401
- Returns a memory instance with `.migrate()`, `.ensureLayers(clientId)`, `.ingest(content, opts)`, `.search(query, opts)`, and more. See [Use as a library](#use-as-a-library).
499
+ For raw `/store` / `/search` calls, just `fetch()` against `${engineUrl}` directly the wire format is documented in `packages/memory-engine/docs/MIGRATION.md`.
402
500
 
403
501
  ---
404
502
 
@@ -424,9 +522,9 @@ npx @pentatonic-ai/ai-agent-sdk doctor --path local
424
522
  What gets checked:
425
523
 
426
524
  - **Universal** — Node version, disk space, SDK config-file permissions
427
- - **Local Memory** — Postgres + pgvector + migrations, embedding/LLM endpoints, memory server port
525
+ - **Local engine** — engine `/health`, per-layer health (L0–L6), embedding endpoint reachability
428
526
  - **Hosted TES** — endpoint reachable, API key authenticates
429
- - **Self-hosted platform** — HybridRAG, Qdrant, Neo4j, vLLM (each optional, skipped when its env var is unset)
527
+ - **Plugin config** — `tes-memory.local.md` parses, `memory_url` reachable
430
528
 
431
529
  ### Plugins
432
530
 
@@ -458,24 +556,27 @@ See [`packages/doctor/README.md`](packages/doctor/README.md) for the full plugin
458
556
  ## Architecture
459
557
 
460
558
  ```
461
- Your code
462
- |
463
- +---------------+---------------+
464
- | |
465
- Memory product Observability product
466
- (createMemorySystem) (TESClient.wrap)
467
- | |
468
- | |
469
- +----+----+ |
470
- | | |
471
- Local Hosted ---------------------- TES
472
- (Docker) (Cloudflare cloud)
473
- | |
474
- PG+pgvector PG, R2, Queues,
475
- + Ollama Workers, Modules
476
- (deep-memory,
477
- conversation-
478
- analytics, )
559
+ Your code / Claude Code plugin / OpenClaw plugin
560
+ |
561
+ +-------------------+--------------------+
562
+ | |
563
+ Memory product Observability product
564
+ (engine HTTP API) (TESClient.wrap)
565
+ | |
566
+ | POST /store /search /forget | CHAT_TURN events
567
+ ▼ ▼
568
+ +----------------+ +-----------------+
569
+ | memory engine | | TES |
570
+ | (compat shim) | | (Cloudflare) |
571
+ +----------------+ | Workers, R2, |
572
+ | | Queues, Pages |
573
+ +----------+----------+ +--------+--------+
574
+ | | |
575
+ Local Hosted ---------------------------+
576
+ (your machine) (Pentatonic-managed)
577
+ | |
578
+ docker compose AWS/GCP container cluster
579
+ + host Ollama + AI gateway (NV-Embed-v2)
479
580
  ```
480
581
 
481
582
  Plugins (Claude Code, OpenClaw) are lightweight integrations on top of both products — they call into memory and emit observability events on the user's behalf.
@@ -1,7 +1,10 @@
1
1
  import { startCallbackServer } from "../lib/callback-server.js";
2
2
 
3
3
  async function fetchCallback(port, qs) {
4
- const url = `http://localhost:${port}/callback?${qs}`;
4
+ // Use 127.0.0.1 not "localhost" — undici (Node 18+) resolves localhost to
5
+ // ::1 first, but the server binds to 127.0.0.1 only, so on IPv6-preferring
6
+ // hosts (GitHub Actions runners) the IPv6 attempt ECONNREFUSEs.
7
+ const url = `http://127.0.0.1:${port}/callback?${qs}`;
5
8
  const res = await fetch(url);
6
9
  return { status: res.status, text: await res.text() };
7
10
  }
package/bin/cli.js CHANGED
@@ -1,10 +1,6 @@
1
1
  #!/usr/bin/env node
2
2
 
3
3
  import { createInterface } from "readline";
4
- import { execFileSync } from "child_process";
5
- import { existsSync, mkdirSync, readFileSync, writeFileSync } from "fs";
6
- import { join } from "path";
7
- import { homedir } from "os";
8
4
 
9
5
  const DEFAULT_ENDPOINT = "https://api.pentatonic.com";
10
6
 
@@ -31,10 +27,10 @@ function parseArgs() {
31
27
  flags.alert = true;
32
28
  } else if (a === "--no-plugins") {
33
29
  flags.noPlugins = true;
34
- } else if (a === "--local") {
35
- flags.local = true;
36
- } else if (a === "--remote") {
37
- flags.remote = true;
30
+ } else if (a === "--engine-url" && args[i + 1]) {
31
+ flags.engineUrl = args[++i];
32
+ } else if (a.startsWith("--engine-url=")) {
33
+ flags.engineUrl = a.split("=")[1];
38
34
  } else if (!a.startsWith("--")) {
39
35
  // First non-flag arg is the command; subsequent ones are subcommand
40
36
  // arguments handled by the dispatched cmd (e.g. `ingest <path>`).
@@ -77,124 +73,14 @@ function ask(question) {
77
73
  return new Promise((resolve) => rl.question(question, resolve));
78
74
  }
79
75
 
80
- function spinner(text) {
81
- const frames = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"];
82
- let i = 0;
83
- const id = setInterval(() => {
84
- process.stdout.write(`\r${frames[i++ % frames.length]} ${text}`);
85
- }, 80);
86
- return {
87
- stop(result) {
88
- clearInterval(id);
89
- process.stdout.write(`\r✓ ${result}\n`);
90
- },
91
- fail(msg) {
92
- clearInterval(id);
93
- process.stdout.write(`\r✗ ${msg}\n`);
94
- },
95
- };
96
- }
97
-
98
- async function setupLocalMemory() {
99
- console.log(`\n Local Memory Setup\n`);
100
-
101
- // Check Docker
102
- try {
103
- execFileSync("docker", ["info"], { stdio: "pipe" });
104
- } catch {
105
- console.error(" Error: Docker is required. Install it from https://docker.com\n");
106
- process.exit(1);
107
- }
108
-
109
- const memoryDir = new URL("../packages/memory", import.meta.url).pathname;
110
-
111
- // Start infrastructure + memory server
112
- const infraSpinner = spinner("Starting memory server + PostgreSQL + Ollama...");
113
- try {
114
- execFileSync("docker", ["compose", "up", "-d", "memory", "postgres", "ollama"], {
115
- cwd: memoryDir,
116
- stdio: "pipe",
117
- });
118
- infraSpinner.stop("Memory stack running!");
119
- } catch (err) {
120
- infraSpinner.fail(`Failed to start: ${err.message}`);
121
- process.exit(1);
122
- }
123
-
124
- // Pull models
125
- const embModel = process.env.EMBEDDING_MODEL || "nomic-embed-text";
126
- const llmModel = process.env.LLM_MODEL || "llama3.2:3b";
127
-
128
- const embSpinner = spinner(`Pulling ${embModel}...`);
129
- try {
130
- execFileSync("docker", ["compose", "exec", "ollama", "ollama", "pull", embModel], {
131
- cwd: memoryDir,
132
- stdio: "pipe",
133
- });
134
- embSpinner.stop(`${embModel} ready!`);
135
- } catch {
136
- embSpinner.fail(`Failed to pull ${embModel}. Run manually: docker compose exec ollama ollama pull ${embModel}`);
137
- }
138
-
139
- const llmSpinner = spinner(`Pulling ${llmModel}...`);
140
- try {
141
- execFileSync("docker", ["compose", "exec", "ollama", "ollama", "pull", llmModel], {
142
- cwd: memoryDir,
143
- stdio: "pipe",
144
- });
145
- llmSpinner.stop(`${llmModel} ready!`);
146
- } catch {
147
- llmSpinner.fail(`Failed to pull ${llmModel}. Run manually: docker compose exec ollama ollama pull ${llmModel}`);
148
- }
149
-
150
- // Write local config (warn if hosted config exists)
151
- const configDir = join(homedir(), ".claude-pentatonic");
152
- if (!existsSync(configDir)) {
153
- mkdirSync(configDir, { recursive: true });
154
- }
155
-
156
- const configPath = join(configDir, "tes-memory.local.md");
157
- if (existsSync(configPath)) {
158
- const existing = readFileSync(configPath, "utf-8");
159
- if (existing.includes("tes_endpoint") && !existing.includes("mode: local")) {
160
- console.log("\n ⚠ Hosted TES config detected. Switching to local mode will");
161
- console.log(" disable hosted memory. To restore, run: npx @pentatonic-ai/ai-agent-sdk init\n");
162
- const confirm = await ask(" Switch to local mode? (y/n): ");
163
- if (confirm.toLowerCase() !== "y") {
164
- console.log(" Cancelled. Hosted config unchanged.\n");
165
- rl.close();
166
- return;
167
- }
168
- }
169
- }
170
-
171
- writeFileSync(
172
- configPath,
173
- `---
174
- mode: local
175
- memory_url: http://localhost:3333
176
- ---
177
- `
178
- );
179
-
180
- console.log(`\n Config written to ${configPath}`);
181
-
182
- const sdkDir = new URL("..", import.meta.url).pathname;
183
-
184
- console.log(`
185
- Memory server: http://localhost:3333
186
- Hooks are auto-configured to use local memory.
187
-
188
- Install the plugin in Claude Code:
189
- /plugin marketplace add Pentatonic-Ltd/ai-agent-sdk
190
- /plugin install tes-memory@pentatonic-ai
191
-
192
- You're ready! Every prompt auto-searches memory,
193
- every turn auto-stores. No MCP setup needed.
194
- `);
195
-
196
- rl.close();
197
- }
76
+ // setupLocalMemory + its `spinner` helper were the legacy "bring up
77
+ // Postgres + Ollama" wrapper for the in-process memory server. Removed
78
+ // in favour of:
79
+ // - `tes config local` writes the plugin config + prints engine
80
+ // bring-up instructions
81
+ // - `cd packages/memory-engine && docker compose up -d` → runs the
82
+ // actual engine
83
+ // `ask` is kept for any future interactive prompts.
198
84
 
199
85
 
200
86
  async function main() {
@@ -233,11 +119,23 @@ async function main() {
233
119
  process.exit(exitCode);
234
120
  }
235
121
 
236
- // `memory` is kept as a shortcut to skip the local-or-remote question
237
- // for users with that command in scripts/docs. New users should use init.
238
- if (flags.command === "memory") {
239
- await setupLocalMemory();
240
- return;
122
+ // tes config <local|hosted|show> point Claude Code's tes-memory
123
+ // plugin at a memory backend, or inspect what's configured. Each
124
+ // subcommand is a thin scaffold:
125
+ // local → write mode: local + memory_url; print engine bring-up steps
126
+ // hosted → run the login flow (delegates to runLoginCommand)
127
+ // show → read and print the current plugin config
128
+ // Future: `tes config set <key> <value>` for engine env-var tweaks.
129
+ if (flags.command === "config") {
130
+ const sub = process.argv.slice(3).find((a) => !a.startsWith("--"));
131
+ const { runConfigCommand } = await import("./commands/config.js");
132
+ const { exitCode } = await runConfigCommand({
133
+ sub,
134
+ endpoint: TES_ENDPOINT,
135
+ engineUrl: flags.engineUrl,
136
+ });
137
+ rl.close();
138
+ process.exit(exitCode);
241
139
  }
242
140
 
243
141
  // Corpus subcommands — onboarding/repo ingest (spec 01)
@@ -268,18 +166,20 @@ async function main() {
268
166
  process.exit(code);
269
167
  }
270
168
 
271
- if (flags.command !== "init") {
272
- console.log(`
169
+ console.log(`
273
170
  @pentatonic-ai/ai-agent-sdk
274
171
 
275
172
  Usage:
276
- npx @pentatonic-ai/ai-agent-sdk login Sign in with TES (browser-based OAuth)
173
+ npx @pentatonic-ai/ai-agent-sdk login First-time hosted setup: browser sign-in + writes credentials
277
174
  npx @pentatonic-ai/ai-agent-sdk whoami Show current login identity
278
- npx @pentatonic-ai/ai-agent-sdk init [deprecated] Alias for 'login'
279
- npx @pentatonic-ai/ai-agent-sdk init --local Set up local Docker memory stack
280
- npx @pentatonic-ai/ai-agent-sdk memory Shortcut for 'init --local'
175
+ npx @pentatonic-ai/ai-agent-sdk config <sub> Configure memory backend; see 'config --help'
281
176
  npx @pentatonic-ai/ai-agent-sdk doctor Run health checks (exit 0/1/2)
282
177
 
178
+ config subcommands:
179
+ config local Point plugin at a local memory engine
180
+ config hosted Switch to hosted (delegates to login)
181
+ config show Print current plugin config + creds
182
+
283
183
  Memory corpus (onboarding):
284
184
  npx @pentatonic-ai/ai-agent-sdk onboard Interactive: pick paths, ingest, install hooks
285
185
  npx @pentatonic-ai/ai-agent-sdk ingest <path> One-shot ingest of a path (any folder works)
@@ -290,8 +190,8 @@ Memory corpus (onboarding):
290
190
  npx @pentatonic-ai/ai-agent-sdk corpus reset Wipe local corpus state
291
191
  npx @pentatonic-ai/ai-agent-sdk install-git-hook Install post-commit hook in cwd
292
192
 
293
- Tenant for corpus commands is read from these env vars:
294
- TES_ENDPOINT, TES_CLIENT_ID, TES_API_KEY
193
+ Corpus commands route to the backend configured via 'config' (local engine
194
+ or hosted TES). Override with env vars: MEMORY_ENGINE_URL, TES_ENDPOINT,
295
195
 
296
196
  doctor flags:
297
197
  --json Emit a JSON report
@@ -301,31 +201,8 @@ doctor flags:
301
201
  --timeout <ms> Per-check timeout (default 10000)
302
202
 
303
203
  For docs, see https://api.pentatonic.com
304
- `);
305
- process.exit(0);
306
- }
307
-
308
- // init: --local still routes to setupLocalMemory (Docker stack —
309
- // separate concern). Anything else (no flag, --remote, mode prompt)
310
- // delegates to login via runInitAlias which emits a one-line
311
- // deprecation warning. setupHostedTes (the old form-based hosted
312
- // flow) is gone; init has been replaced by `login` for one major
313
- // release, then `init` itself goes away.
314
- if (flags.local && flags.remote) {
315
- console.error("\n Error: --local and --remote are mutually exclusive\n");
316
- process.exit(1);
317
- }
318
- if (flags.local) {
319
- await setupLocalMemory();
320
- return;
321
- }
322
- // Non-local path → login alias.
323
- const { runInitAlias } = await import("./commands/login.js");
324
- const { exitCode } = await runInitAlias({
325
- endpoint: TES_ENDPOINT,
326
- });
327
- rl.close();
328
- process.exit(exitCode);
204
+ `);
205
+ process.exit(0);
329
206
  }
330
207
 
331
208
  main().catch((err) => {