@pentatonic-ai/ai-agent-sdk 0.8.6 → 0.8.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/dist/index.cjs +1 -1
- package/dist/index.js +1 -1
- package/package.json +1 -1
- package/packages/memory/README.md +33 -0
- package/packages/memory/openclaw-plugin/README.md +25 -0
- package/packages/memory/openclaw-plugin/openclaw.plugin.json +15 -8
- package/packages/memory/openclaw-plugin/package.json +1 -1
- package/packages/memory/src/server.js +16 -0
- package/packages/memory-engine/MIGRATION.md +219 -0
- package/packages/memory-engine/README.md +20 -6
package/README.md
CHANGED
|
@@ -238,7 +238,7 @@ await adapter.init();
|
|
|
238
238
|
await adapter.ingestChunk('User prefers dark mode', { kind: 'note' });
|
|
239
239
|
```
|
|
240
240
|
|
|
241
|
-
For raw `/search` and `/store`, just `fetch()` against `${engineUrl}/search` etc. The wire format is documented in `packages/memory-engine/
|
|
241
|
+
For raw `/search` and `/store`, just `fetch()` against `${engineUrl}/search` etc. The wire format is documented in `packages/memory-engine/MIGRATION.md`.
|
|
242
242
|
|
|
243
243
|
---
|
|
244
244
|
|
|
@@ -504,7 +504,7 @@ const { content, model, usage, toolCalls } = normalizeResponse(openaiResponse);
|
|
|
504
504
|
|
|
505
505
|
Thin HTTP client for the memory engine. `config = { engineUrl, arena, apiKey? }`. Returns `{ ingestChunk(content, metadata), deleteByCorpusFile(repoAbs, relPath), init() }`. See [Use as a library](#use-as-a-library).
|
|
506
506
|
|
|
507
|
-
For raw `/store` / `/search` calls, just `fetch()` against `${engineUrl}` directly — the wire format is documented in `packages/memory-engine/
|
|
507
|
+
For raw `/store` / `/search` calls, just `fetch()` against `${engineUrl}` directly — the wire format is documented in `packages/memory-engine/MIGRATION.md`.
|
|
508
508
|
|
|
509
509
|
---
|
|
510
510
|
|
package/dist/index.cjs
CHANGED
|
@@ -906,7 +906,7 @@ function fireAndForgetEmit(clientConfig, sessionOpts, messages, result, model) {
|
|
|
906
906
|
}
|
|
907
907
|
|
|
908
908
|
// src/telemetry.js
|
|
909
|
-
var VERSION = "0.8.
|
|
909
|
+
var VERSION = "0.8.7";
|
|
910
910
|
var TELEMETRY_URL = "https://sdk-telemetry.philip-134.workers.dev";
|
|
911
911
|
function machineId() {
|
|
912
912
|
const raw = typeof process !== "undefined" ? `${process.env?.USER || process.env?.USERNAME || "u"}:${process.platform || "x"}:${process.arch || "x"}` : "browser";
|
package/dist/index.js
CHANGED
|
@@ -875,7 +875,7 @@ function fireAndForgetEmit(clientConfig, sessionOpts, messages, result, model) {
|
|
|
875
875
|
}
|
|
876
876
|
|
|
877
877
|
// src/telemetry.js
|
|
878
|
-
var VERSION = "0.8.
|
|
878
|
+
var VERSION = "0.8.7";
|
|
879
879
|
var TELEMETRY_URL = "https://sdk-telemetry.philip-134.workers.dev";
|
|
880
880
|
function machineId() {
|
|
881
881
|
const raw = typeof process !== "undefined" ? `${process.env?.USER || process.env?.USERNAME || "u"}:${process.platform || "x"}:${process.arch || "x"}` : "browser";
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@pentatonic-ai/ai-agent-sdk",
|
|
3
|
-
"version": "0.8.
|
|
3
|
+
"version": "0.8.7",
|
|
4
4
|
"description": "TES SDK — LLM observability and lifecycle tracking via Pentatonic Thing Event System. Track token usage, tool calls, and conversations. Manage things through event-sourced lifecycle stages with AI enrichment and vector search.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.cjs",
|
|
@@ -1,5 +1,38 @@
|
|
|
1
1
|
# Memory System
|
|
2
2
|
|
|
3
|
+
> ## ⚠️ DEPRECATED — use the 7-layer memory engine instead
|
|
4
|
+
>
|
|
5
|
+
> This package is the **legacy** single-process MCP server backed by
|
|
6
|
+
> PostgreSQL + pgvector + Ollama. It's superseded by the **7-layer
|
|
7
|
+
> memory engine** at [`packages/memory-engine/`](../memory-engine/),
|
|
8
|
+
> which is what the top-level SDK README walks users into and what
|
|
9
|
+
> TES production runs.
|
|
10
|
+
>
|
|
11
|
+
> | | This package (legacy) | `packages/memory-engine/` (current) |
|
|
12
|
+
> |---|---|---|
|
|
13
|
+
> | Wire | MCP over stdio | HTTP (`/store`, `/search`, ...) |
|
|
14
|
+
> | Storage | One Postgres table, one embedding | 7 layers fused via RRF |
|
|
15
|
+
> | Features | HyDE expansion | KG entities (L3), cross-encoder reranker (L6), multi-collection (L5), arena scoping, /health/deep |
|
|
16
|
+
> | Bench accuracy | (not benched) | 84.6% / p50 110ms |
|
|
17
|
+
> | Deploys | Single node process | docker compose stack |
|
|
18
|
+
>
|
|
19
|
+
> **What still works:** this server keeps running until v1.0. Existing
|
|
20
|
+
> deployments continue to function; no breaking changes here. A startup
|
|
21
|
+
> warning prints to stderr on every cold-start (suppress with
|
|
22
|
+
> `PENTATONIC_DEPRECATION_QUIET=1`).
|
|
23
|
+
>
|
|
24
|
+
> **What to do:** new installs should follow the engine path — see the
|
|
25
|
+
> top-level [README → Memory → Local](../../README.md#local-self-hosted)
|
|
26
|
+
> section. Existing installs can keep running this server through the
|
|
27
|
+
> v0.9 line; migration guidance for v1.0 will land before then.
|
|
28
|
+
>
|
|
29
|
+
> **Why deprecate:** the engine subsumes every feature this server
|
|
30
|
+
> offers (and adds graph, reranker, multi-store fusion), so maintaining
|
|
31
|
+
> both paths fragments testing, security review, and operator-facing
|
|
32
|
+
> docs without a payoff. One product is clearer than two.
|
|
33
|
+
|
|
34
|
+
---
|
|
35
|
+
|
|
3
36
|
Self-hosted memory system for AI agents. Give Claude Code or OpenClaw persistent, searchable memory backed by PostgreSQL, pgvector, and Ollama. Fully local — no API keys, no cloud dependencies.
|
|
4
37
|
|
|
5
38
|
## What You Get
|
|
@@ -2,6 +2,31 @@
|
|
|
2
2
|
|
|
3
3
|
Persistent, searchable memory for OpenClaw. Local (Docker + Ollama) or hosted (Pentatonic TES).
|
|
4
4
|
|
|
5
|
+
> ## ⚠️ The local-mode config below targets a deprecated backend
|
|
6
|
+
>
|
|
7
|
+
> The `database_url` / `embedding_url` / `llm_url` config fields shown
|
|
8
|
+
> in this README configure the **legacy** `packages/memory/` Postgres+
|
|
9
|
+
> Ollama+pgvector MCP server, which is being retired in favour of the
|
|
10
|
+
> 7-layer memory engine at `packages/memory-engine/`. Both backends
|
|
11
|
+
> still work; the legacy one will be removed in v1.0.
|
|
12
|
+
>
|
|
13
|
+
> **For new installs, prefer:**
|
|
14
|
+
>
|
|
15
|
+
> ```json
|
|
16
|
+
> "pentatonic-memory": {
|
|
17
|
+
> "enabled": true,
|
|
18
|
+
> "config": {
|
|
19
|
+
> "mode": "local",
|
|
20
|
+
> "memory_url": "http://localhost:8099"
|
|
21
|
+
> }
|
|
22
|
+
> }
|
|
23
|
+
> ```
|
|
24
|
+
>
|
|
25
|
+
> …with the engine brought up via `docker compose up -d` from
|
|
26
|
+
> `packages/memory-engine/`. See the
|
|
27
|
+
> [top-level SDK README](../../../README.md#local-self-hosted) for the
|
|
28
|
+
> walkthrough.
|
|
29
|
+
|
|
5
30
|
## Install
|
|
6
31
|
|
|
7
32
|
```bash
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"id": "pentatonic-memory",
|
|
3
3
|
"name": "Pentatonic Memory",
|
|
4
4
|
"description": "Persistent, searchable memory with multi-signal retrieval and HyDE query expansion. Local (Docker + Ollama) or hosted (Pentatonic TES).",
|
|
5
|
-
"version": "0.8.
|
|
5
|
+
"version": "0.8.6",
|
|
6
6
|
"kind": "context-engine",
|
|
7
7
|
"configSchema": {
|
|
8
8
|
"type": "object",
|
|
@@ -16,28 +16,35 @@
|
|
|
16
16
|
},
|
|
17
17
|
"memory_url": {
|
|
18
18
|
"type": "string",
|
|
19
|
-
"default": "http://localhost:
|
|
20
|
-
"description": "Memory
|
|
19
|
+
"default": "http://localhost:8099",
|
|
20
|
+
"description": "Memory engine HTTP URL (local mode). Default 8099 = packages/memory-engine compat shim. (3333 was the legacy single-process MCP server port — deprecated.)"
|
|
21
21
|
},
|
|
22
22
|
"database_url": {
|
|
23
23
|
"type": "string",
|
|
24
|
-
"description": "PostgreSQL connection string
|
|
24
|
+
"description": "PostgreSQL connection string. DEPRECATED — targets the legacy packages/memory MCP server, retired in favor of memory_url + the 7-layer engine. Removal targeted v1.0.",
|
|
25
|
+
"deprecated": true
|
|
25
26
|
},
|
|
26
27
|
"embedding_url": {
|
|
27
28
|
"type": "string",
|
|
28
|
-
"description": "OpenAI-compatible embeddings endpoint
|
|
29
|
+
"description": "OpenAI-compatible embeddings endpoint. DEPRECATED — same scope as database_url; the engine manages its own embedding routing via L*_EMBED_PROVIDER server-side.",
|
|
30
|
+
"deprecated": true
|
|
29
31
|
},
|
|
30
32
|
"embedding_model": {
|
|
31
33
|
"type": "string",
|
|
32
|
-
"default": "nomic-embed-text"
|
|
34
|
+
"default": "nomic-embed-text",
|
|
35
|
+
"description": "Legacy-mode embedding model. DEPRECATED — paired with database_url.",
|
|
36
|
+
"deprecated": true
|
|
33
37
|
},
|
|
34
38
|
"llm_url": {
|
|
35
39
|
"type": "string",
|
|
36
|
-
"description": "OpenAI-compatible chat endpoint for HyDE
|
|
40
|
+
"description": "OpenAI-compatible chat endpoint for HyDE. DEPRECATED — HyDE is not used by the 7-layer engine; legacy-mode only.",
|
|
41
|
+
"deprecated": true
|
|
37
42
|
},
|
|
38
43
|
"llm_model": {
|
|
39
44
|
"type": "string",
|
|
40
|
-
"default": "llama3.2:3b"
|
|
45
|
+
"default": "llama3.2:3b",
|
|
46
|
+
"description": "Legacy-mode chat model for HyDE. DEPRECATED — paired with llm_url.",
|
|
47
|
+
"deprecated": true
|
|
41
48
|
},
|
|
42
49
|
"tes_endpoint": {
|
|
43
50
|
"type": "string",
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@pentatonic-ai/openclaw-memory-plugin",
|
|
3
|
-
"version": "0.8.
|
|
3
|
+
"version": "0.8.6",
|
|
4
4
|
"description": "Pentatonic Memory plugin for OpenClaw — persistent, searchable memory with multi-signal retrieval and HyDE query expansion",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "index.js",
|
|
@@ -40,6 +40,22 @@ process.on("unhandledRejection", (err) => {
|
|
|
40
40
|
process.stderr.write(`[memory-server] Unhandled rejection: ${err?.message || err}\n`);
|
|
41
41
|
});
|
|
42
42
|
|
|
43
|
+
// Deprecation notice — see packages/memory/README.md for context.
|
|
44
|
+
// This MCP server (Postgres+pgvector+Ollama, single-process) is being
|
|
45
|
+
// retired in favour of the 7-layer engine at packages/memory-engine/.
|
|
46
|
+
// Targeted for removal in v1.0; in the meantime everything keeps
|
|
47
|
+
// working. Print once on startup so operators see the signal in logs
|
|
48
|
+
// without flooding the conversation surface.
|
|
49
|
+
if (process.env.PENTATONIC_DEPRECATION_QUIET !== "1") {
|
|
50
|
+
process.stderr.write(
|
|
51
|
+
"[memory-server] DEPRECATED: this server (Postgres+pgvector+Ollama MCP) " +
|
|
52
|
+
"is superseded by the 7-layer memory engine at packages/memory-engine/. " +
|
|
53
|
+
"Existing deployments keep working; removal targeted for v1.0. " +
|
|
54
|
+
"See README → Memory → Local for the migration path. " +
|
|
55
|
+
"Suppress this warning with PENTATONIC_DEPRECATION_QUIET=1.\n"
|
|
56
|
+
);
|
|
57
|
+
}
|
|
58
|
+
|
|
43
59
|
const CLIENT_ID = process.env.CLIENT_ID || "default";
|
|
44
60
|
|
|
45
61
|
function createMemory() {
|
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
# Wire format & migration
|
|
2
|
+
|
|
3
|
+
`pentatonic-memory-engine` is a drop-in replacement for `pentatonic-memory` v0.5.x.
|
|
4
|
+
Same HTTP API, same request/response shapes — your existing SDK client code
|
|
5
|
+
keeps working. This doc covers:
|
|
6
|
+
|
|
7
|
+
1. **Wire format** — what every endpoint accepts and returns
|
|
8
|
+
2. **What changed vs v0.5.x** — new endpoints + small additive fields
|
|
9
|
+
3. **Operational notes** — per-layer health, deep healthchecks, env vars
|
|
10
|
+
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
## Wire format
|
|
14
|
+
|
|
15
|
+
All endpoints accept and return JSON. Base URL is the compat-shim host
|
|
16
|
+
(default `http://localhost:8099`).
|
|
17
|
+
|
|
18
|
+
### `POST /store`
|
|
19
|
+
|
|
20
|
+
Write a memory.
|
|
21
|
+
|
|
22
|
+
```json
|
|
23
|
+
{
|
|
24
|
+
"content": "User prefers dark mode",
|
|
25
|
+
"metadata": {
|
|
26
|
+
"arena": "my-app", // tenant-scoping key. Defaults to "default".
|
|
27
|
+
"kind": "note", // optional; opaque, surfaced on read
|
|
28
|
+
"source_file": "config.md", // optional; routed to L1/L6 paths
|
|
29
|
+
"contact_email": "...", // optional; triggers L3 Person extraction
|
|
30
|
+
"contact_name": "...", // optional; same
|
|
31
|
+
"channel": "email", // optional; tags ChannelStat denormalisation
|
|
32
|
+
"direction": "inbound" // optional; same
|
|
33
|
+
},
|
|
34
|
+
"client_id": "my-app" // optional; alternative to metadata.arena
|
|
35
|
+
}
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
Response:
|
|
39
|
+
|
|
40
|
+
```json
|
|
41
|
+
{
|
|
42
|
+
"id": "cc830b145b1e36514f73fd508aac885a",
|
|
43
|
+
"content": "User prefers dark mode",
|
|
44
|
+
"layerId": "ml_my-app_episodic",
|
|
45
|
+
"engine": {
|
|
46
|
+
"l0": 1, "l4_qmd": 1, "l4": 1, "l5": 1, "l6": 1,
|
|
47
|
+
"l3_chunks": 0, "l3_entities": 0 // non-zero when metadata.contact_* present
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
### `POST /store-batch`
|
|
53
|
+
|
|
54
|
+
Same body as `/store` but `content` is replaced with `records: [{content, metadata}, ...]`.
|
|
55
|
+
30–50× faster than calling `/store` per record because L2 issues one
|
|
56
|
+
batched `/v1/embeddings` call instead of one per record.
|
|
57
|
+
|
|
58
|
+
### `POST /search`
|
|
59
|
+
|
|
60
|
+
Hybrid search across all 7 layers, RRF-fused.
|
|
61
|
+
|
|
62
|
+
```json
|
|
63
|
+
{
|
|
64
|
+
"query": "dark mode preferences",
|
|
65
|
+
"limit": 10, // default 16
|
|
66
|
+
"min_score": 0.3, // default 0; filter low-similarity hits
|
|
67
|
+
"client_id": "my-app", // tenant scope
|
|
68
|
+
"user_id": "alice@example.com", // optional; spans clientId AND clientId:user_id arenas
|
|
69
|
+
"method": "hybrid" // hybrid (default) | vector | bm25
|
|
70
|
+
}
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
Response:
|
|
74
|
+
|
|
75
|
+
```json
|
|
76
|
+
{
|
|
77
|
+
"results": [
|
|
78
|
+
{
|
|
79
|
+
"id": "cc830b145b1e36514f73fd508aac885a.md",
|
|
80
|
+
"content": "User prefers dark mode",
|
|
81
|
+
"metadata": { "arena": "my-app", "kind": "note", ... },
|
|
82
|
+
"similarity": 0.87,
|
|
83
|
+
"layer_id": "ml_my-app_episodic",
|
|
84
|
+
"source": "cc830b145b1e36514f73fd508aac885a.md",
|
|
85
|
+
"engine_layer": "" // which layer produced this hit (l0/l3/l4/l5/l6)
|
|
86
|
+
}
|
|
87
|
+
]
|
|
88
|
+
}
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
### `POST /forget`
|
|
92
|
+
|
|
93
|
+
Delete memories matching a filter. Two modes:
|
|
94
|
+
|
|
95
|
+
- **Per-arena delete**: `{"arena": "my-app"}` removes everything tagged with
|
|
96
|
+
that arena across L0/L4/L5/L6 + L3 chunks + L3 Entity nodes.
|
|
97
|
+
- **Global wipe**: `{"confirm": "GLOBAL_WIPE"}` (literal string, no arena) wipes
|
|
98
|
+
every layer for every tenant. Intended for dev resets only.
|
|
99
|
+
|
|
100
|
+
Returns counts deleted per layer.
|
|
101
|
+
|
|
102
|
+
### `GET /health`
|
|
103
|
+
|
|
104
|
+
Shallow health. Returns 200 even when `status: "degraded"` — the body's
|
|
105
|
+
`status` is the verdict.
|
|
106
|
+
|
|
107
|
+
```json
|
|
108
|
+
{
|
|
109
|
+
"status": "ok",
|
|
110
|
+
"version": "0.1.0",
|
|
111
|
+
"engine": "pentatonic-memory-engine",
|
|
112
|
+
"layers": {
|
|
113
|
+
"l0": "ok", "l1": "ok", "l2": "ok", "l3": "ok",
|
|
114
|
+
"l4": "ok", "l5": "ok", "l6": "ok", "nv_embed": "ok"
|
|
115
|
+
},
|
|
116
|
+
"memories": {
|
|
117
|
+
"l0_bm25_chunks": 233142,
|
|
118
|
+
"l4_vectors": 64212,
|
|
119
|
+
"l5_chats_chunks": 109671,
|
|
120
|
+
"l6_vector_chunks": 68220,
|
|
121
|
+
"l6_fts_chunks": 39703
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
Layer values are either `"ok"`, `"degraded"`, `"http <code>"`, or
|
|
127
|
+
`"unreachable: <reason>"`. The aggregate `status` is `degraded` if any
|
|
128
|
+
single layer is non-ok and `down` if ≥3 are non-ok.
|
|
129
|
+
|
|
130
|
+
The `memories` field is a per-layer chunk-count dict (since v0.8.4 — previously
|
|
131
|
+
a single int that only reported L6's count and misled operators about real
|
|
132
|
+
corpus size).
|
|
133
|
+
|
|
134
|
+
### `GET /health/deep` (since v0.8.4)
|
|
135
|
+
|
|
136
|
+
Synthetic round-trip per layer: embed a sentinel → write to layer → search
|
|
137
|
+
for it → assert hit. Slower (~1–2s); intended for ops/cron, not for compose
|
|
138
|
+
healthchecks.
|
|
139
|
+
|
|
140
|
+
```json
|
|
141
|
+
{
|
|
142
|
+
"status": "ok",
|
|
143
|
+
"ok": true,
|
|
144
|
+
"layers": {
|
|
145
|
+
"l4": {"status": "ok", "ok": true, "embed_ms": 12.6, "write_ms": 2.2,
|
|
146
|
+
"search_ms": 0.0, "hit": true, "total_ms": 17.1},
|
|
147
|
+
"l5": {"status": "ok", "ok": true,
|
|
148
|
+
"collections": {"chats": true, "emails": true, "contacts": true, "memory": true},
|
|
149
|
+
"embed_ms": 9.4, "write_ms": 7.6, "search_ms": 1.6, "hit": true},
|
|
150
|
+
"l6": {"status": "ok", "ok": true, "embed_ms": 11.3, "write_ms": 635.1,
|
|
151
|
+
"search_ms": 59.5, "hit": true, "reranker": "ok"}
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
Sentinel rows are stored under arena `__healthcheck__` with a fixed id, so
|
|
157
|
+
the probe pollutes the corpus by at most one row per layer (upserts, never
|
|
158
|
+
accumulates).
|
|
159
|
+
|
|
160
|
+
### `POST /aggregate` (since v0.8.x)
|
|
161
|
+
|
|
162
|
+
Typed-Person aggregation over the L3 graph. Counts `(:Person)-[:COMMUNICATED]->(:Chunk)`
|
|
163
|
+
edges by `group_by` keys. See `packages/memory-engine/compat/server.py` for
|
|
164
|
+
the full schema — used today by the relationships UI in the TES module.
|
|
165
|
+
|
|
166
|
+
---
|
|
167
|
+
|
|
168
|
+
## What changed vs `pentatonic-memory` v0.5.x
|
|
169
|
+
|
|
170
|
+
| | v0.5.x | This package |
|
|
171
|
+
|---|---|---|
|
|
172
|
+
| Storage | Single Postgres + pgvector + HNSW | 7-layer fusion (SQLite FTS, Neo4j, sqlite-vec, Milvus, Milvus+rerank) |
|
|
173
|
+
| Embedding | One model, one dim, ingest-time | Per-layer configurable; provider-aware (`L*_EMBED_PROVIDER`) |
|
|
174
|
+
| Endpoints added | — | `/store-batch`, `/forget`, `/health/deep`, `/aggregate` |
|
|
175
|
+
| `/health` body | `{status, layers}` only | + `version`, `engine`, `nv_embed` layer, per-layer `memories` dict |
|
|
176
|
+
| `/store` engine fields | absent | `engine: { l0, l3_chunks, l3_entities, l4, l5, l6 }` per-layer write counts |
|
|
177
|
+
| Backwards-incompat changes | — | **None.** All v0.5 client code keeps working. |
|
|
178
|
+
|
|
179
|
+
The engine container's compat shim (`compat/server.py`) is the API surface;
|
|
180
|
+
the layer services behind it can be replaced or scaled independently
|
|
181
|
+
without changing client code.
|
|
182
|
+
|
|
183
|
+
---
|
|
184
|
+
|
|
185
|
+
## Operational notes
|
|
186
|
+
|
|
187
|
+
### Provider-aware embedding (since v0.8.0)
|
|
188
|
+
|
|
189
|
+
Each layer service picks an embedding provider via `L*_EMBED_PROVIDER`:
|
|
190
|
+
|
|
191
|
+
- `openai` (default) — Bearer auth, `/v1/embeddings` path, OpenAI-shaped body
|
|
192
|
+
- `pentatonic-gateway` — X-API-Key auth, `/v1/embed` path
|
|
193
|
+
- `cohere` — `{texts, input_type}` body shape
|
|
194
|
+
|
|
195
|
+
A 401 from the configured provider auto-detects against the other built-ins
|
|
196
|
+
(opt out per layer with `L*_EMBED_AUTODETECT=false`). See
|
|
197
|
+
`engine/services/_shared/embed_provider.py` for the dispatch table.
|
|
198
|
+
|
|
199
|
+
### L2 concurrency (since v0.8.4)
|
|
200
|
+
|
|
201
|
+
L2 hybridrag-proxy is async throughout — `AsyncGraphDatabase` for Neo4j,
|
|
202
|
+
`httpx.AsyncClient` for L4/L5/L6 fan-out, `asyncio.to_thread` for sqlite +
|
|
203
|
+
PyTorch reranker. Fan-out across layers runs concurrently via
|
|
204
|
+
`asyncio.gather`. Under sustained ingest, `/health` and `/search` no
|
|
205
|
+
longer compete for a saturated threadpool.
|
|
206
|
+
|
|
207
|
+
### L5 collection bootstrap (since v0.8.4)
|
|
208
|
+
|
|
209
|
+
L5's serve() ensures all four collections (`chats`, `emails`, `contacts`,
|
|
210
|
+
`memory`) exist at startup. Previously only `chats` was bootstrapped and
|
|
211
|
+
writes to the others would 500.
|
|
212
|
+
|
|
213
|
+
### Health check semantics
|
|
214
|
+
|
|
215
|
+
- `compose` healthcheck and `engine-runner.sh` deploy gate use `/health` —
|
|
216
|
+
it's fast (<50ms) and returns HTTP 200 regardless of body status.
|
|
217
|
+
- Operators/cron should use `/health/deep` for real functional validation.
|
|
218
|
+
- A `compose` healthcheck on `/health/deep` would burn embedding budget
|
|
219
|
+
every 10s — avoid.
|
|
@@ -52,7 +52,7 @@ client (any) ───► POST /forget ──► (FastAPI) │──►│
|
|
|
52
52
|
├──────────────────┤
|
|
53
53
|
│ L4 sqlite-vec │
|
|
54
54
|
├──────────────────┤
|
|
55
|
-
│ L5
|
|
55
|
+
│ L5 Milvus comms │
|
|
56
56
|
├──────────────────┤
|
|
57
57
|
│ L6 Document │
|
|
58
58
|
│ Store + │
|
|
@@ -76,7 +76,7 @@ Each layer indexes the same content differently. Search runs all seven in parall
|
|
|
76
76
|
| L2 | HybridRAG orchestrator | Fan-out + RRF fusion across all layers | Python FastAPI |
|
|
77
77
|
| L3 | Knowledge Graph | Entity-aware retrieval, multi-hop relationships | Neo4j (OSS) |
|
|
78
78
|
| L4 | Vector index | High-recall semantic search | sqlite-vec |
|
|
79
|
-
| L5 | Comms / multi-collection vectors | Chat / email / contact / memory namespaces |
|
|
79
|
+
| L5 | Comms / multi-collection vectors | Chat / email / contact / memory namespaces | Milvus (Lite by default; standalone via compose) |
|
|
80
80
|
| L6 | Document store | Per-arena docs + cross-encoder reranker | sqlite + Milvus + MiniLM |
|
|
81
81
|
|
|
82
82
|
## Quick start
|
|
@@ -92,7 +92,19 @@ Wait ~30s for layers to come up. Verify:
|
|
|
92
92
|
|
|
93
93
|
```bash
|
|
94
94
|
curl http://localhost:8099/health
|
|
95
|
-
# → {
|
|
95
|
+
# → {
|
|
96
|
+
# "status": "ok",
|
|
97
|
+
# "version": "0.1.0",
|
|
98
|
+
# "engine": "pentatonic-memory-engine",
|
|
99
|
+
# "layers": {"l0":"ok","l1":"ok","l2":"ok","l3":"ok","l4":"ok","l5":"ok","l6":"ok","nv_embed":"ok"},
|
|
100
|
+
# "memories": {
|
|
101
|
+
# "l0_bm25_chunks": N, "l4_vectors": N,
|
|
102
|
+
# "l5_chats_chunks": N, "l6_vector_chunks": N, "l6_fts_chunks": N
|
|
103
|
+
# }
|
|
104
|
+
# }
|
|
105
|
+
|
|
106
|
+
# Or run real functional round-trips per layer (slower; ~1–2s):
|
|
107
|
+
curl http://localhost:8099/health/deep
|
|
96
108
|
```
|
|
97
109
|
|
|
98
110
|
Now point your existing `pentatonic-memory` SDK client at `http://localhost:8099` — no code change.
|
|
@@ -122,10 +134,12 @@ Both modes populate all 7 layers on `/store-batch` (since v0.2). The mode flag o
|
|
|
122
134
|
|---|---|---|---|
|
|
123
135
|
| `POST /store` | ✅ | ✅ | Same request/response shape |
|
|
124
136
|
| `POST /search` | ✅ | ✅ | Same request/response shape; ?mode=vector/text both supported |
|
|
125
|
-
| `GET /health` | ✅ | ✅ | Returns aggregate health across all 7 layers |
|
|
126
|
-
| `
|
|
137
|
+
| `GET /health` | ✅ | ✅ | Returns aggregate health across all 7 layers + nv-embed reachability + per-layer `memories` counts |
|
|
138
|
+
| `GET /health/deep` | ❌ | ✅ | NEW (v0.8.4): synthetic embed → write → search round-trip per layer. Slower (~1–2s); for ops/monitoring on demand. |
|
|
139
|
+
| `POST /store-batch` | ❌ | ✅ | Batch-ingest N records in one HTTP call (30-50× faster) |
|
|
127
140
|
| `POST /forget` | ❌ (regression) | ✅ | Restored from v0.4.x; supports `metadata_contains` filter |
|
|
141
|
+
| `POST /aggregate` | ❌ | ✅ | NEW (v0.8.x): typed-Person aggregation over the L3 graph — counts COMMUNICATED edges per channel via the ChannelStat denormalisation |
|
|
128
142
|
|
|
129
|
-
Migration: see `
|
|
143
|
+
Migration: see [`MIGRATION.md`](MIGRATION.md) for the wire-format walkthrough.
|
|
130
144
|
|
|
131
145
|
|