@pentatonic-ai/ai-agent-sdk 0.7.9 → 0.7.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/packages/memory/src/__tests__/engine.test.js +78 -3
- package/packages/memory/src/engine.js +79 -15
- package/packages/memory-engine/compat/server.py +83 -41
- package/packages/memory-engine/docker-compose.yml +0 -1
- package/packages/memory-engine/engine/services/l2/l2-hybridrag-proxy.py +50 -30
- package/packages/memory-engine/engine/services/l5/l5-comms-layer.py +25 -9
- package/packages/memory-engine/engine/services/l6/l6-document-store.py +61 -15
- package/packages/memory-engine/tests/e2e_arena.sh +60 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@pentatonic-ai/ai-agent-sdk",
|
|
3
|
-
"version": "0.7.
|
|
3
|
+
"version": "0.7.11",
|
|
4
4
|
"description": "TES SDK — LLM observability and lifecycle tracking via Pentatonic Thing Event System. Track token usage, tool calls, and conversations. Manage things through event-sourced lifecycle stages with AI enrichment and vector search.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.cjs",
|
|
@@ -4,6 +4,8 @@ import {
|
|
|
4
4
|
engineStore,
|
|
5
5
|
engineSearch,
|
|
6
6
|
engineForget,
|
|
7
|
+
composeArena,
|
|
8
|
+
composeArenas,
|
|
7
9
|
DEFAULT_ENGINE_URL,
|
|
8
10
|
} from "../engine.js";
|
|
9
11
|
|
|
@@ -82,7 +84,7 @@ describe("engine HTTP client", () => {
|
|
|
82
84
|
});
|
|
83
85
|
|
|
84
86
|
describe("engineStore", () => {
|
|
85
|
-
it("
|
|
87
|
+
it("tenant-wide by default when no userId", async () => {
|
|
86
88
|
mockOk({ id: "abc", content: "hello", layerId: "ml_acme_episodic" });
|
|
87
89
|
await engineStore("https://e", {
|
|
88
90
|
clientId: "acme",
|
|
@@ -104,6 +106,39 @@ describe("engine HTTP client", () => {
|
|
|
104
106
|
});
|
|
105
107
|
});
|
|
106
108
|
|
|
109
|
+
it("user-scoped by default when userId provided", async () => {
|
|
110
|
+
mockOk({ id: "x", content: "x", layerId: "ml_acme_episodic" });
|
|
111
|
+
await engineStore("https://e", {
|
|
112
|
+
clientId: "acme",
|
|
113
|
+
userId: "user-42",
|
|
114
|
+
content: "x",
|
|
115
|
+
});
|
|
116
|
+
const body = JSON.parse(calls[0].init.body);
|
|
117
|
+
expect(body.metadata.arena).toBe("acme:user-42");
|
|
118
|
+
});
|
|
119
|
+
|
|
120
|
+
it("scope=tenant overrides user-scoped default", async () => {
|
|
121
|
+
mockOk({ id: "x", content: "x", layerId: "ml_acme_episodic" });
|
|
122
|
+
await engineStore("https://e", {
|
|
123
|
+
clientId: "acme",
|
|
124
|
+
userId: "user-42",
|
|
125
|
+
scope: "tenant",
|
|
126
|
+
content: "x",
|
|
127
|
+
});
|
|
128
|
+
const body = JSON.parse(calls[0].init.body);
|
|
129
|
+
expect(body.metadata.arena).toBe("acme");
|
|
130
|
+
});
|
|
131
|
+
|
|
132
|
+
it("scope=user without userId throws", async () => {
|
|
133
|
+
await expect(
|
|
134
|
+
engineStore("https://e", {
|
|
135
|
+
clientId: "acme",
|
|
136
|
+
scope: "user",
|
|
137
|
+
content: "x",
|
|
138
|
+
})
|
|
139
|
+
).rejects.toThrow(/scope=user requires userId/);
|
|
140
|
+
});
|
|
141
|
+
|
|
107
142
|
it("omits layer_type and actor_user_id when not provided", async () => {
|
|
108
143
|
mockOk({ id: "x", content: "x", layerId: "ml_acme_episodic" });
|
|
109
144
|
await engineStore("https://e", { clientId: "acme", content: "x" });
|
|
@@ -116,7 +151,6 @@ describe("engine HTTP client", () => {
|
|
|
116
151
|
await engineStore("https://e", {
|
|
117
152
|
clientId: "acme",
|
|
118
153
|
content: "x",
|
|
119
|
-
// attempted hostile arena spoof:
|
|
120
154
|
metadata: { arena: "tenant-b" },
|
|
121
155
|
});
|
|
122
156
|
const body = JSON.parse(calls[0].init.body);
|
|
@@ -135,7 +169,7 @@ describe("engine HTTP client", () => {
|
|
|
135
169
|
});
|
|
136
170
|
|
|
137
171
|
describe("engineSearch", () => {
|
|
138
|
-
it("
|
|
172
|
+
it("tenant-only arenas list when no userId", async () => {
|
|
139
173
|
mockOk({ results: [] });
|
|
140
174
|
await engineSearch("https://e", {
|
|
141
175
|
clientId: "acme",
|
|
@@ -147,12 +181,26 @@ describe("engine HTTP client", () => {
|
|
|
147
181
|
expect(calls[0].url).toBe("https://e/search");
|
|
148
182
|
expect(body).toEqual({
|
|
149
183
|
arena: "acme",
|
|
184
|
+
arenas: ["acme"],
|
|
150
185
|
query: "hello",
|
|
151
186
|
limit: 5,
|
|
152
187
|
min_score: 0.5,
|
|
153
188
|
});
|
|
154
189
|
});
|
|
155
190
|
|
|
191
|
+
it("tenant + user-scope arenas list when userId provided", async () => {
|
|
192
|
+
mockOk({ results: [] });
|
|
193
|
+
await engineSearch("https://e", {
|
|
194
|
+
clientId: "acme",
|
|
195
|
+
userId: "user-42",
|
|
196
|
+
query: "hi",
|
|
197
|
+
});
|
|
198
|
+
const body = JSON.parse(calls[0].init.body);
|
|
199
|
+
expect(body.arenas).toEqual(["acme", "acme:user-42"]);
|
|
200
|
+
// single-arena field kept for back-compat — points at tenant-wide
|
|
201
|
+
expect(body.arena).toBe("acme");
|
|
202
|
+
});
|
|
203
|
+
|
|
156
204
|
it("includes metadata_filter only when non-empty", async () => {
|
|
157
205
|
mockOk({ results: [] });
|
|
158
206
|
await engineSearch("https://e", {
|
|
@@ -183,6 +231,33 @@ describe("engine HTTP client", () => {
|
|
|
183
231
|
});
|
|
184
232
|
});
|
|
185
233
|
|
|
234
|
+
describe("composeArena", () => {
|
|
235
|
+
it("tenant scope by default when no userId", () => {
|
|
236
|
+
expect(composeArena("acme")).toBe("acme");
|
|
237
|
+
});
|
|
238
|
+
it("user scope by default when userId present", () => {
|
|
239
|
+
expect(composeArena("acme", "u-1")).toBe("acme:u-1");
|
|
240
|
+
});
|
|
241
|
+
it("explicit scope=tenant overrides", () => {
|
|
242
|
+
expect(composeArena("acme", "u-1", "tenant")).toBe("acme");
|
|
243
|
+
});
|
|
244
|
+
it("scope=user without userId throws", () => {
|
|
245
|
+
expect(() => composeArena("acme", null, "user")).toThrow(/userId/);
|
|
246
|
+
});
|
|
247
|
+
it("missing clientId throws", () => {
|
|
248
|
+
expect(() => composeArena("")).toThrow(/clientId/);
|
|
249
|
+
});
|
|
250
|
+
});
|
|
251
|
+
|
|
252
|
+
describe("composeArenas", () => {
|
|
253
|
+
it("tenant only when no userId", () => {
|
|
254
|
+
expect(composeArenas("acme")).toEqual(["acme"]);
|
|
255
|
+
});
|
|
256
|
+
it("tenant + user-scope when userId present", () => {
|
|
257
|
+
expect(composeArenas("acme", "u-1")).toEqual(["acme", "acme:u-1"]);
|
|
258
|
+
});
|
|
259
|
+
});
|
|
260
|
+
|
|
186
261
|
describe("engineForget", () => {
|
|
187
262
|
it("forwards id when provided", async () => {
|
|
188
263
|
mockOk({ deleted: 1 });
|
|
@@ -99,25 +99,77 @@ export async function fetchEngine(engineUrl, path, body) {
|
|
|
99
99
|
return res.json();
|
|
100
100
|
}
|
|
101
101
|
|
|
102
|
+
/**
|
|
103
|
+
* Compose the engine arena for a (clientId, userId, scope) triple.
|
|
104
|
+
*
|
|
105
|
+
* tenant scope: clientId (e.g. "acme")
|
|
106
|
+
* user scope: clientId + ":" + userId (e.g. "acme:user-42")
|
|
107
|
+
*
|
|
108
|
+
* Default scope: "user" when userId is supplied, "tenant" otherwise.
|
|
109
|
+
* Multi-tenant search composes arena lists from this same vocabulary.
|
|
110
|
+
*
|
|
111
|
+
* @param {string} clientId
|
|
112
|
+
* @param {string|null|undefined} userId
|
|
113
|
+
* @param {"tenant"|"user"} [scope]
|
|
114
|
+
* @returns {string} the arena value to stamp on /store metadata
|
|
115
|
+
*/
|
|
116
|
+
export function composeArena(clientId, userId, scope) {
|
|
117
|
+
if (!clientId) throw new Error("composeArena: clientId required");
|
|
118
|
+
const effectiveScope = scope || (userId ? "user" : "tenant");
|
|
119
|
+
if (effectiveScope === "user") {
|
|
120
|
+
if (!userId) throw new Error("composeArena: scope=user requires userId");
|
|
121
|
+
return `${clientId}:${userId}`;
|
|
122
|
+
}
|
|
123
|
+
return clientId;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
/**
|
|
127
|
+
* Compose the arenas list a search should span for a given user.
|
|
128
|
+
*
|
|
129
|
+
* no userId: [clientId] (tenant-wide only)
|
|
130
|
+
* with userId: [clientId, clientId + ":" + userId] (tenant-wide + own user-scope)
|
|
131
|
+
*
|
|
132
|
+
* Order is informational; the engine treats it as a set. Callers passing
|
|
133
|
+
* `userId` get visibility into both their own user-scoped memories and
|
|
134
|
+
* the shared tenant-wide memories — never another user's user-scoped data.
|
|
135
|
+
*
|
|
136
|
+
* @param {string} clientId
|
|
137
|
+
* @param {string|null|undefined} userId
|
|
138
|
+
* @returns {string[]}
|
|
139
|
+
*/
|
|
140
|
+
export function composeArenas(clientId, userId) {
|
|
141
|
+
if (!clientId) throw new Error("composeArenas: clientId required");
|
|
142
|
+
return userId ? [clientId, `${clientId}:${userId}`] : [clientId];
|
|
143
|
+
}
|
|
144
|
+
|
|
102
145
|
/**
|
|
103
146
|
* Store a single memory in the engine.
|
|
104
147
|
*
|
|
105
|
-
* Builds the canonical /store body
|
|
106
|
-
*
|
|
107
|
-
*
|
|
148
|
+
* Builds the canonical /store body. By default the row is **user-scoped**
|
|
149
|
+
* (`arena = clientId:userId`) when `userId` is supplied, otherwise
|
|
150
|
+
* **tenant-wide** (`arena = clientId`). Pass `scope: "tenant"` explicitly
|
|
151
|
+
* to write a shared row from a user-context (e.g. a super-admin uploading
|
|
152
|
+
* a doc that should be visible to every user in the tenant).
|
|
153
|
+
*
|
|
154
|
+
* The arena value is fixed by the SDK after the caller's metadata, so a
|
|
155
|
+
* resolver can't be tricked into spoofing arena via metadata.
|
|
108
156
|
*
|
|
109
157
|
* @param {string} engineUrl
|
|
110
158
|
* @param {object} opts
|
|
111
|
-
* @param {string}
|
|
112
|
-
* @param {string}
|
|
113
|
-
* @param {
|
|
114
|
-
* @param {string}
|
|
115
|
-
* @param {
|
|
159
|
+
* @param {string} opts.clientId tenant id
|
|
160
|
+
* @param {string} [opts.userId] user id within the tenant; controls default scope
|
|
161
|
+
* @param {"tenant"|"user"} [opts.scope] override the default scope. "user" requires userId.
|
|
162
|
+
* @param {string} opts.content
|
|
163
|
+
* @param {object} [opts.metadata] extra metadata; merged into engine body
|
|
164
|
+
* @param {string} [opts.layerType] "episodic" | "semantic" | "procedural" | "working"
|
|
165
|
+
* @param {string} [opts.actorUserId] passes through as metadata.actor_user_id
|
|
116
166
|
* @returns {Promise<EngineStoreResult>}
|
|
117
167
|
*/
|
|
118
168
|
export async function engineStore(engineUrl, opts) {
|
|
119
169
|
const {
|
|
120
170
|
clientId,
|
|
171
|
+
userId,
|
|
172
|
+
scope,
|
|
121
173
|
content,
|
|
122
174
|
metadata = {},
|
|
123
175
|
layerType,
|
|
@@ -125,11 +177,12 @@ export async function engineStore(engineUrl, opts) {
|
|
|
125
177
|
} = opts || {};
|
|
126
178
|
if (!clientId) throw new Error("engineStore: clientId required");
|
|
127
179
|
if (typeof content !== "string") throw new Error("engineStore: content required");
|
|
180
|
+
const arena = composeArena(clientId, userId, scope);
|
|
128
181
|
const body = {
|
|
129
182
|
content,
|
|
130
183
|
metadata: {
|
|
131
184
|
...metadata,
|
|
132
|
-
arena
|
|
185
|
+
arena,
|
|
133
186
|
...(layerType ? { layer_type: layerType } : {}),
|
|
134
187
|
...(actorUserId !== undefined ? { actor_user_id: actorUserId } : {}),
|
|
135
188
|
},
|
|
@@ -140,18 +193,25 @@ export async function engineStore(engineUrl, opts) {
|
|
|
140
193
|
/**
|
|
141
194
|
* Search the engine, scoped to a tenant.
|
|
142
195
|
*
|
|
196
|
+
* When `userId` is supplied the search spans **both** the tenant-wide
|
|
197
|
+
* arena (`clientId`) and the user's own scope (`clientId:userId`) — so a
|
|
198
|
+
* caller sees their own memories plus shared tenant memories, never
|
|
199
|
+
* another user's. Without `userId` the search is tenant-wide only.
|
|
200
|
+
*
|
|
143
201
|
* @param {string} engineUrl
|
|
144
202
|
* @param {object} opts
|
|
145
|
-
* @param {string}
|
|
146
|
-
* @param {string}
|
|
147
|
-
* @param {
|
|
148
|
-
* @param {number}
|
|
149
|
-
* @param {
|
|
203
|
+
* @param {string} opts.clientId
|
|
204
|
+
* @param {string} [opts.userId]
|
|
205
|
+
* @param {string} opts.query
|
|
206
|
+
* @param {number} [opts.limit=10]
|
|
207
|
+
* @param {number} [opts.minScore=0.3]
|
|
208
|
+
* @param {object} [opts.metadataFilter] arbitrary equality filter on result metadata
|
|
150
209
|
* @returns {Promise<{results: EngineSearchHit[]}>}
|
|
151
210
|
*/
|
|
152
211
|
export async function engineSearch(engineUrl, opts) {
|
|
153
212
|
const {
|
|
154
213
|
clientId,
|
|
214
|
+
userId,
|
|
155
215
|
query,
|
|
156
216
|
limit = DEFAULT_LIMIT,
|
|
157
217
|
minScore = DEFAULT_MIN_SCORE,
|
|
@@ -159,8 +219,12 @@ export async function engineSearch(engineUrl, opts) {
|
|
|
159
219
|
} = opts || {};
|
|
160
220
|
if (!clientId) throw new Error("engineSearch: clientId required");
|
|
161
221
|
if (typeof query !== "string") throw new Error("engineSearch: query required");
|
|
222
|
+
const arenas = composeArenas(clientId, userId);
|
|
162
223
|
const body = {
|
|
163
|
-
|
|
224
|
+
arenas,
|
|
225
|
+
// Single-arena field kept for callers / engines that haven't been
|
|
226
|
+
// upgraded to the arenas-list shape. The list is authoritative.
|
|
227
|
+
arena: arenas[0],
|
|
164
228
|
query,
|
|
165
229
|
limit,
|
|
166
230
|
min_score: minScore,
|
|
@@ -30,7 +30,6 @@ Environment:
|
|
|
30
30
|
L6_DOC_URL default http://l6:8037
|
|
31
31
|
NV_EMBED_URL default http://nv-embed:8041/v1/embeddings
|
|
32
32
|
PORT default 8099 (matches pentatonic-memory v0.5)
|
|
33
|
-
CLIENT_ID default "default"
|
|
34
33
|
"""
|
|
35
34
|
|
|
36
35
|
import hashlib
|
|
@@ -63,7 +62,18 @@ NEO4J_AUTH = os.environ.get("NEO4J_AUTH", "neo4j/local-dev-pw")
|
|
|
63
62
|
NEO4J_DB = os.environ.get("NEO4J_DB", "neo4j")
|
|
64
63
|
|
|
65
64
|
PORT = int(os.environ.get("PORT", "8099"))
|
|
66
|
-
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
# Layer types we surface as the SDK 4-layer projection. Engine stores
|
|
68
|
+
# everything as chunks tagged with arena + layer_type metadata; this
|
|
69
|
+
# helper renders the legacy `ml_<arena>_<type>` layer-id from the
|
|
70
|
+
# per-row arena, so the response reflects the actual data not a
|
|
71
|
+
# deployment-wide constant. Falls back to "episodic" when arena or
|
|
72
|
+
# layer_type is missing.
|
|
73
|
+
def _layer_id(arena: Optional[str], layer_type: Optional[str] = None) -> str:
|
|
74
|
+
a = arena or "general"
|
|
75
|
+
t = layer_type or "episodic"
|
|
76
|
+
return f"ml_{a}_{t}"
|
|
67
77
|
|
|
68
78
|
# Test/isolated mode: bypass the L2 HybridRAG orchestrator and query L6 directly.
|
|
69
79
|
# Useful for bench harnesses where you want to validate the ingest+search
|
|
@@ -92,12 +102,15 @@ class SearchRequest(BaseModel):
|
|
|
92
102
|
query: str
|
|
93
103
|
limit: Optional[int] = 10
|
|
94
104
|
min_score: Optional[float] = 0.001
|
|
95
|
-
# Tenant scope
|
|
96
|
-
#
|
|
97
|
-
# post-filter on the shim for layers that don't yet (L2, L4, L5).
|
|
98
|
-
# When unset, search is global — same behaviour as v0.7.x; safe for
|
|
99
|
-
# single-tenant deployments. Multi-tenant callers MUST set this.
|
|
105
|
+
# Tenant scope (single arena). Back-compat shape — single-arena callers
|
|
106
|
+
# can keep sending this. Treated as a one-element `arenas` list.
|
|
100
107
|
arena: Optional[str] = None
|
|
108
|
+
# Multi-arena scope. Used by callers that want to span both a tenant-
|
|
109
|
+
# wide arena ("acme") and a user-scoped arena ("acme:user-42") in one
|
|
110
|
+
# search — the SDK helper composes this list automatically when a
|
|
111
|
+
# `userId` is supplied. Authoritative when both `arena` and `arenas`
|
|
112
|
+
# are present; engine treats it as a set.
|
|
113
|
+
arenas: Optional[list[str]] = None
|
|
101
114
|
# Arbitrary metadata equality filters, applied as a post-filter on
|
|
102
115
|
# the shim. Useful for `kind`, `layer_type`, `source_repo`, etc.
|
|
103
116
|
# Keys not present on a result's metadata are treated as no-match.
|
|
@@ -384,7 +397,6 @@ async def health():
|
|
|
384
397
|
"""
|
|
385
398
|
out = {
|
|
386
399
|
"status": "ok",
|
|
387
|
-
"client": CLIENT_ID,
|
|
388
400
|
"version": VERSION,
|
|
389
401
|
"engine": "pentatonic-memory-engine",
|
|
390
402
|
"layers": {},
|
|
@@ -476,7 +488,7 @@ async def store(req: StoreRequest):
|
|
|
476
488
|
return {
|
|
477
489
|
"id": rid,
|
|
478
490
|
"content": req.content,
|
|
479
|
-
"layerId":
|
|
491
|
+
"layerId": _layer_id(arena, (req.metadata or {}).get("layer_type")),
|
|
480
492
|
"engine": {
|
|
481
493
|
"l0": l2_internal.get("l0", 0),
|
|
482
494
|
"l3_chunks": l2_internal.get("l3_chunks", 0),
|
|
@@ -536,29 +548,49 @@ async def store_batch(req: StoreBatchRequest):
|
|
|
536
548
|
}
|
|
537
549
|
|
|
538
550
|
|
|
551
|
+
def _arenas_for(req: SearchRequest) -> list[str]:
|
|
552
|
+
"""Normalize req's single-arena + multi-arena fields into one list.
|
|
553
|
+
|
|
554
|
+
`arenas` is authoritative when set; otherwise `arena` is treated as
|
|
555
|
+
a one-element list; otherwise empty (= search is unscoped, dev/test).
|
|
556
|
+
"""
|
|
557
|
+
if req.arenas:
|
|
558
|
+
return [a for a in req.arenas if a]
|
|
559
|
+
if req.arena:
|
|
560
|
+
return [req.arena]
|
|
561
|
+
return []
|
|
562
|
+
|
|
563
|
+
|
|
539
564
|
def _apply_metadata_filters(results: list[dict[str, Any]], req: SearchRequest) -> list[dict[str, Any]]:
|
|
540
|
-
"""Post-filter results by arena + arbitrary metadata equality.
|
|
565
|
+
"""Post-filter results by arena set + arbitrary metadata equality.
|
|
541
566
|
|
|
542
567
|
Many layer searches don't yet honour arena/metadata at the storage
|
|
543
568
|
level, so the shim enforces tenant isolation here as defence in
|
|
544
569
|
depth. Even if the underlying layer leaks across arenas, the shim
|
|
545
|
-
drops cross-
|
|
570
|
+
drops cross-arena rows before returning.
|
|
571
|
+
|
|
572
|
+
Multi-arena rule: a row passes if its arena tag is in the request's
|
|
573
|
+
arena set. So a user-scoped search (arenas=[acme, acme:u-42]) sees
|
|
574
|
+
both tenant-wide rows (arena=acme) and that user's own user-scoped
|
|
575
|
+
rows (arena=acme:u-42), but never another user's user-scoped rows
|
|
576
|
+
(arena=acme:u-99).
|
|
546
577
|
"""
|
|
547
|
-
|
|
578
|
+
arenas = _arenas_for(req)
|
|
548
579
|
extra = req.metadata_filter or {}
|
|
549
|
-
if not
|
|
580
|
+
if not arenas and not extra:
|
|
550
581
|
return results
|
|
582
|
+
arena_set = set(arenas)
|
|
551
583
|
out: list[dict[str, Any]] = []
|
|
552
584
|
for item in results:
|
|
553
585
|
meta = item.get("metadata") or {}
|
|
554
|
-
if
|
|
586
|
+
if arena_set:
|
|
555
587
|
row_arena = meta.get("arena") or item.get("arena")
|
|
556
|
-
if row_arena and row_arena
|
|
588
|
+
if row_arena and row_arena not in arena_set:
|
|
557
589
|
continue
|
|
558
590
|
# If row has no arena tag at all, drop on multi-tenant
|
|
559
591
|
# safety: a row without arena predates the multi-tenant
|
|
560
592
|
# plumbing and could belong to anyone.
|
|
561
|
-
if
|
|
593
|
+
if not row_arena:
|
|
562
594
|
continue
|
|
563
595
|
ok = True
|
|
564
596
|
for k, v in extra.items():
|
|
@@ -578,7 +610,7 @@ def _search_overfetch(req: SearchRequest) -> int:
|
|
|
578
610
|
between accuracy and latency.
|
|
579
611
|
"""
|
|
580
612
|
base = req.limit or 10
|
|
581
|
-
return base * 5 if (req
|
|
613
|
+
return base * 5 if (_arenas_for(req) or req.metadata_filter) else base * 3
|
|
582
614
|
|
|
583
615
|
|
|
584
616
|
@app.post("/search")
|
|
@@ -615,16 +647,17 @@ async def search(req: SearchRequest):
|
|
|
615
647
|
import asyncio
|
|
616
648
|
async def _q_l6(query: str):
|
|
617
649
|
try:
|
|
618
|
-
params:
|
|
619
|
-
"q"
|
|
620
|
-
"limit"
|
|
621
|
-
"method"
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
650
|
+
params: list = [
|
|
651
|
+
("q", query),
|
|
652
|
+
("limit", str(_search_overfetch(req))),
|
|
653
|
+
("method", "hybrid"),
|
|
654
|
+
]
|
|
655
|
+
# L6 supports arena natively (l6-document-store.py).
|
|
656
|
+
# Forward all arenas in the search scope; L6 expands the
|
|
657
|
+
# filter to `arena IN (...)`. Multiple `arenas` query
|
|
658
|
+
# params on the wire = list-shaped server side.
|
|
659
|
+
for a in _arenas_for(req):
|
|
660
|
+
params.append(("arenas", a))
|
|
628
661
|
r = await _client().get(
|
|
629
662
|
f"{L6_DOC_URL}/search",
|
|
630
663
|
params=params,
|
|
@@ -717,13 +750,14 @@ async def search(req: SearchRequest):
|
|
|
717
750
|
if item.get(k)
|
|
718
751
|
}
|
|
719
752
|
merged_meta = {**raw_top_level, **(attached_meta or item.get("metadata") or {})}
|
|
753
|
+
row_arena = merged_meta.get("arena")
|
|
754
|
+
row_layer_type = merged_meta.get("layer_type")
|
|
720
755
|
out_results.append({
|
|
721
756
|
"id": key,
|
|
722
757
|
"content": item.get("text") or item.get("content") or item.get("snippet") or "",
|
|
723
758
|
"metadata": merged_meta,
|
|
724
759
|
"similarity": float(rrf_scores[key]),
|
|
725
|
-
"layer_id":
|
|
726
|
-
"client_id": CLIENT_ID,
|
|
760
|
+
"layer_id": _layer_id(row_arena, row_layer_type),
|
|
727
761
|
"source": item.get("source_file") or item.get("path") or "",
|
|
728
762
|
"engine_layer": "+".join(sorted(set(layer_provenance.get(key, [])))),
|
|
729
763
|
})
|
|
@@ -731,10 +765,14 @@ async def search(req: SearchRequest):
|
|
|
731
765
|
# then trim to the requested limit.
|
|
732
766
|
out_results = _apply_metadata_filters(out_results, req)
|
|
733
767
|
return {"results": out_results[: req.limit or 10]}
|
|
768
|
+
arenas = _arenas_for(req)
|
|
734
769
|
try:
|
|
735
|
-
get_params:
|
|
736
|
-
|
|
737
|
-
|
|
770
|
+
get_params: list = [
|
|
771
|
+
("q", req.query),
|
|
772
|
+
("limit", str(_search_overfetch(req))),
|
|
773
|
+
]
|
|
774
|
+
for a in arenas:
|
|
775
|
+
get_params.append(("arenas", a))
|
|
738
776
|
r = await _client().get(
|
|
739
777
|
f"{L2_PROXY_URL}/search",
|
|
740
778
|
params=get_params,
|
|
@@ -750,8 +788,8 @@ async def search(req: SearchRequest):
|
|
|
750
788
|
"limit": _search_overfetch(req),
|
|
751
789
|
"min_score": req.min_score or 0.001,
|
|
752
790
|
}
|
|
753
|
-
if
|
|
754
|
-
post_body["
|
|
791
|
+
if arenas:
|
|
792
|
+
post_body["arenas"] = arenas
|
|
755
793
|
r = await _client().post(
|
|
756
794
|
f"{L2_PROXY_URL}/v1/search",
|
|
757
795
|
json=post_body,
|
|
@@ -762,11 +800,14 @@ async def search(req: SearchRequest):
|
|
|
762
800
|
except Exception as exc2:
|
|
763
801
|
last_err = exc2
|
|
764
802
|
try:
|
|
765
|
-
params:
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
803
|
+
params: list = [
|
|
804
|
+
("q", req.query),
|
|
805
|
+
("limit", str(_search_overfetch(req))),
|
|
806
|
+
]
|
|
807
|
+
# L6 supports arena natively; forward all in the search
|
|
808
|
+
# scope on the last-resort fallback path too.
|
|
809
|
+
for a in arenas:
|
|
810
|
+
params.append(("arenas", a))
|
|
770
811
|
r = await _client().get(
|
|
771
812
|
f"{L6_DOC_URL}/search",
|
|
772
813
|
params=params,
|
|
@@ -819,13 +860,14 @@ async def search(req: SearchRequest):
|
|
|
819
860
|
if item.get(k)
|
|
820
861
|
}
|
|
821
862
|
merged_meta = {**raw_top_level, **(attached_meta or item.get("metadata") or {})}
|
|
863
|
+
row_arena = merged_meta.get("arena")
|
|
864
|
+
row_layer_type = merged_meta.get("layer_type")
|
|
822
865
|
out_results.append({
|
|
823
866
|
"id": chosen_id,
|
|
824
867
|
"content": item.get("text") or item.get("content") or item.get("snippet") or "",
|
|
825
868
|
"metadata": merged_meta,
|
|
826
869
|
"similarity": float(item.get("score") or item.get("similarity") or 0.0),
|
|
827
|
-
"layer_id":
|
|
828
|
-
"client_id": CLIENT_ID,
|
|
870
|
+
"layer_id": _layer_id(row_arena, row_layer_type),
|
|
829
871
|
"source": item.get("source", item.get("source_file", "")),
|
|
830
872
|
"engine_layer": item.get("layer", item.get("source_layer", "")),
|
|
831
873
|
})
|
|
@@ -215,7 +215,6 @@ services:
|
|
|
215
215
|
L5_MILVUS_URL: http://l5:8034
|
|
216
216
|
L6_DOC_URL: http://l6:8037
|
|
217
217
|
NV_EMBED_URL: ${NV_EMBED_URL:-http://host.docker.internal:8041/v1/embeddings}
|
|
218
|
-
CLIENT_ID: ${CLIENT_ID:-default}
|
|
219
218
|
BYPASS_L2_PROXY: ${BYPASS_L2_PROXY:-0}
|
|
220
219
|
extra_hosts:
|
|
221
220
|
- "host.docker.internal:host-gateway"
|
|
@@ -719,17 +719,18 @@ L0_MEMORY_DB = Path(os.environ.get(
|
|
|
719
719
|
str(Path.home() / ".pentatonic" / "memory" / "main.sqlite"),
|
|
720
720
|
))
|
|
721
721
|
|
|
722
|
-
def search_l0_bm25(query: str, limit: int = 6, arena: str = None
|
|
722
|
+
def search_l0_bm25(query: str, limit: int = 6, arena: str = None,
|
|
723
|
+
arenas: List[str] = None) -> List[Dict]:
|
|
723
724
|
"""Search native BM25 index over workspace memory files.
|
|
724
725
|
|
|
725
726
|
Covers chunks from daily notes, memory files, people profiles,
|
|
726
727
|
infrastructure docs, project files — corpus that L3-L6 don't index.
|
|
727
728
|
Sub-millisecond local SQLite reads, zero network overhead.
|
|
728
729
|
|
|
729
|
-
arena
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
arena
|
|
730
|
+
arena / arenas: when set, filter to paths under bench/<arena>/.
|
|
731
|
+
Multi-arena queries (e.g. tenant-wide + user-scoped in one search)
|
|
732
|
+
use OR'd path-prefix LIKE clauses. `arenas` wins when both are
|
|
733
|
+
supplied; `arena` is treated as a one-element list for back-compat.
|
|
733
734
|
"""
|
|
734
735
|
if not L0_MEMORY_DB.exists():
|
|
735
736
|
return []
|
|
@@ -744,6 +745,9 @@ def search_l0_bm25(query: str, limit: int = 6, arena: str = None) -> List[Dict]:
|
|
|
744
745
|
return []
|
|
745
746
|
fts_query = " OR ".join(f'"{t}"' for t in meaningful)
|
|
746
747
|
|
|
748
|
+
# Normalize single+multi arena inputs into one list.
|
|
749
|
+
arena_list = list(arenas) if arenas else ([arena] if arena else [])
|
|
750
|
+
|
|
747
751
|
conn = sqlite3.connect(str(L0_MEMORY_DB), timeout=2)
|
|
748
752
|
conn.execute("PRAGMA journal_mode=WAL")
|
|
749
753
|
sql = """
|
|
@@ -755,9 +759,10 @@ def search_l0_bm25(query: str, limit: int = 6, arena: str = None) -> List[Dict]:
|
|
|
755
759
|
AND path NOT LIKE '%-backup-%'
|
|
756
760
|
"""
|
|
757
761
|
params: list = [fts_query]
|
|
758
|
-
if
|
|
759
|
-
|
|
760
|
-
|
|
762
|
+
if arena_list:
|
|
763
|
+
clauses = " OR ".join(["path LIKE ?"] * len(arena_list))
|
|
764
|
+
sql += f" AND ({clauses})"
|
|
765
|
+
params.extend([f"bench/{a}/%" for a in arena_list])
|
|
761
766
|
sql += " ORDER BY rank ASC LIMIT ?"
|
|
762
767
|
params.append(limit * 2)
|
|
763
768
|
rows = conn.execute(sql, params).fetchall()
|
|
@@ -800,17 +805,21 @@ def search_l0_bm25(query: str, limit: int = 6, arena: str = None) -> List[Dict]:
|
|
|
800
805
|
|
|
801
806
|
L5_API_URL = os.environ.get("PME_L5_URL", "http://127.0.0.1:8034")
|
|
802
807
|
|
|
803
|
-
def search_l5_communications(query: str, limit: int = 6, arena: str = None
|
|
808
|
+
def search_l5_communications(query: str, limit: int = 6, arena: str = None,
|
|
809
|
+
arenas: List[str] = None) -> List[Dict]:
|
|
804
810
|
"""Search L5 Communications Context via L5 API (emails, chats, calendar).
|
|
805
811
|
|
|
806
|
-
arena (optional): forwarded to L5; filters Milvus by the
|
|
807
|
-
dynamic field.
|
|
808
|
-
|
|
812
|
+
arena / arenas (optional): forwarded to L5; filters Milvus by the
|
|
813
|
+
arena dynamic field. Multi-arena calls become a Milvus
|
|
814
|
+
`arena IN ["X","Y"]` filter expression on the L5 side.
|
|
809
815
|
"""
|
|
810
816
|
try:
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
817
|
+
# Build a list of (key, value) tuples so multi-valued query
|
|
818
|
+
# params (?arenas=A&arenas=B) wire-shape correctly.
|
|
819
|
+
arena_list = list(arenas) if arenas else ([arena] if arena else [])
|
|
820
|
+
params: list = [("q", query), ("limit", str(limit))]
|
|
821
|
+
for a in arena_list:
|
|
822
|
+
params.append(("arenas", a))
|
|
814
823
|
resp = requests.get(
|
|
815
824
|
f"{L5_API_URL}/search",
|
|
816
825
|
params=params,
|
|
@@ -857,16 +866,23 @@ def search_l5_communications(query: str, limit: int = 6, arena: str = None) -> L
|
|
|
857
866
|
# L6: Document Store Search
|
|
858
867
|
L6_URL = os.environ.get("PME_L6_URL", "http://localhost:8037")
|
|
859
868
|
|
|
860
|
-
def search_l6_documents(query: str, limit: int = 6, arena: str = None
|
|
869
|
+
def search_l6_documents(query: str, limit: int = 6, arena: str = None,
|
|
870
|
+
arenas: List[str] = None) -> List[Dict]:
|
|
861
871
|
"""Search L6 Document Store (research, legal, financial, project docs).
|
|
862
872
|
|
|
863
|
-
arena (optional): forwarded to L6 — L6
|
|
873
|
+
arena / arenas (optional): forwarded to L6 — L6 supports multi-arena
|
|
864
874
|
natively (see l6-document-store.py search_vector / search_fts).
|
|
865
875
|
"""
|
|
866
876
|
try:
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
877
|
+
arena_list = list(arenas) if arenas else ([arena] if arena else [])
|
|
878
|
+
params: list = [
|
|
879
|
+
("q", query),
|
|
880
|
+
("method", "hybrid"),
|
|
881
|
+
("limit", str(limit)),
|
|
882
|
+
("rerank", "true"),
|
|
883
|
+
]
|
|
884
|
+
for a in arena_list:
|
|
885
|
+
params.append(("arenas", a))
|
|
870
886
|
resp = requests.get(
|
|
871
887
|
f"{L6_URL}/search",
|
|
872
888
|
params=params,
|
|
@@ -914,19 +930,22 @@ def search_l6_documents(query: str, limit: int = 6, arena: str = None) -> List[D
|
|
|
914
930
|
return []
|
|
915
931
|
|
|
916
932
|
|
|
917
|
-
def sequential_hybridrag_search(query: str, limit: int = 16,
|
|
933
|
+
def sequential_hybridrag_search(query: str, limit: int = 16,
|
|
934
|
+
arena: str = None,
|
|
935
|
+
arenas: List[str] = None) -> List[Dict]:
|
|
918
936
|
"""Main HybridRAG processing: L0 BM25 → L1 System Files → L2 HybridRAG (L3 Graph + L4 Vector + L5 Comms + L6 Docs).
|
|
919
937
|
|
|
920
|
-
arena (optional): tenant scope.
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
the compat shim post-filter
|
|
938
|
+
arena / arenas (optional): tenant + user scope. Multi-arena lets a
|
|
939
|
+
user's search span tenant-wide rows + their own user-scoped rows in
|
|
940
|
+
a single hybrid pass. Forwarded to L0, L5, L6 native filters; L4
|
|
941
|
+
and L3 still rely on the compat shim post-filter.
|
|
924
942
|
"""
|
|
943
|
+
arena_list = list(arenas) if arenas else ([arena] if arena else [])
|
|
925
944
|
start_time = time.time()
|
|
926
|
-
log.info(f"Starting sequential HybridRAG search for: '{query}'
|
|
945
|
+
log.info(f"Starting sequential HybridRAG search for: '{query}' arenas={arena_list!r}")
|
|
927
946
|
|
|
928
947
|
# L0: BM25 workspace memory (keyword search — complements semantic layers)
|
|
929
|
-
l0_results = search_l0_bm25(query, limit=6,
|
|
948
|
+
l0_results = search_l0_bm25(query, limit=6, arenas=arena_list)
|
|
930
949
|
log.info(f"L0 BM25 workspace: {len(l0_results)} results")
|
|
931
950
|
|
|
932
951
|
# L1: System Files (HIGHEST PRIORITY)
|
|
@@ -947,11 +966,11 @@ def sequential_hybridrag_search(query: str, limit: int = 16, arena: str = None)
|
|
|
947
966
|
log.info(f"L4 Vector search: {len(vector_results)} results (HyDE={'on' if hyde_query != query else 'off'})")
|
|
948
967
|
|
|
949
968
|
# L5: Communications Context (emails, chats, calendar) — also use HyDE
|
|
950
|
-
l5_results = search_l5_communications(hyde_query, limit=6,
|
|
969
|
+
l5_results = search_l5_communications(hyde_query, limit=6, arenas=arena_list)
|
|
951
970
|
log.info(f"L5 Communications: {len(l5_results)} results")
|
|
952
971
|
|
|
953
972
|
# L6: Document Store (research, legal, financial, project docs)
|
|
954
|
-
l6_results = search_l6_documents(hyde_query, limit=6,
|
|
973
|
+
l6_results = search_l6_documents(hyde_query, limit=6, arenas=arena_list)
|
|
955
974
|
log.info(f"L6 Documents: {len(l6_results)} results")
|
|
956
975
|
|
|
957
976
|
# L2: HybridRAG fusion (combines all layers with L1 priority)
|
|
@@ -1012,10 +1031,11 @@ async def search_endpoint(request: Request) -> dict:
|
|
|
1012
1031
|
query = body.get("query", "")
|
|
1013
1032
|
limit = body.get("limit", 16)
|
|
1014
1033
|
arena = body.get("arena") or None
|
|
1034
|
+
arenas = body.get("arenas") or None
|
|
1015
1035
|
if not query:
|
|
1016
1036
|
raise HTTPException(status_code=400, detail="query is required")
|
|
1017
1037
|
|
|
1018
|
-
results = sequential_hybridrag_search(query, limit=limit, arena=arena)
|
|
1038
|
+
results = sequential_hybridrag_search(query, limit=limit, arena=arena, arenas=arenas)
|
|
1019
1039
|
|
|
1020
1040
|
# Also return raw graph entities for context enrichment
|
|
1021
1041
|
entities = extract_query_entities(query)
|
|
@@ -449,12 +449,15 @@ def index_memory(client):
|
|
|
449
449
|
|
|
450
450
|
# --- Search ---
|
|
451
451
|
|
|
452
|
-
def search(query: str, collection: str = None, limit: int = 10,
|
|
452
|
+
def search(query: str, collection: str = None, limit: int = 10,
|
|
453
|
+
arena: str = None, arenas=None):
|
|
453
454
|
"""Search across collections.
|
|
454
455
|
|
|
455
|
-
arena (optional): when set, filter
|
|
456
|
-
field matches.
|
|
457
|
-
|
|
456
|
+
arena / arenas (optional): when set, filter rows whose `arena`
|
|
457
|
+
dynamic field matches. Multi-arena uses Milvus `in [...]` so a
|
|
458
|
+
single-pass user-scoped search (tenant + own user) returns rows
|
|
459
|
+
from both buckets. Records without an arena tag are dropped under
|
|
460
|
+
multi-tenant safety.
|
|
458
461
|
"""
|
|
459
462
|
client = get_client()
|
|
460
463
|
vectors = embed_texts([query])
|
|
@@ -465,11 +468,20 @@ def search(query: str, collection: str = None, limit: int = 10, arena: str = Non
|
|
|
465
468
|
collections = [collection] if collection else ["chats", "emails", "contacts", "memory"]
|
|
466
469
|
all_results = []
|
|
467
470
|
|
|
471
|
+
# Normalize arenas list and build the Milvus filter expression.
|
|
472
|
+
if arenas is None:
|
|
473
|
+
arena_list = [arena] if arena else []
|
|
474
|
+
else:
|
|
475
|
+
arena_list = [a for a in arenas if a]
|
|
468
476
|
filter_expr = ""
|
|
469
|
-
if
|
|
470
|
-
|
|
471
|
-
safe = str(arena).replace('"', '\\"')
|
|
477
|
+
if len(arena_list) == 1:
|
|
478
|
+
safe = str(arena_list[0]).replace('"', '\\"')
|
|
472
479
|
filter_expr = f'arena == "{safe}"'
|
|
480
|
+
elif len(arena_list) > 1:
|
|
481
|
+
quoted = ", ".join(
|
|
482
|
+
'"{}"'.format(str(a).replace('"', '\\"')) for a in arena_list
|
|
483
|
+
)
|
|
484
|
+
filter_expr = f'arena in [{quoted}]'
|
|
473
485
|
|
|
474
486
|
for coll in collections:
|
|
475
487
|
if not client.has_collection(coll):
|
|
@@ -562,8 +574,12 @@ def serve(port=8034):
|
|
|
562
574
|
|
|
563
575
|
@api.get("/search")
|
|
564
576
|
def api_search(q: str = Query(...), collection: str = None, limit: int = 10,
|
|
565
|
-
arena: str = None):
|
|
566
|
-
|
|
577
|
+
arena: str = None, arenas: list = Query(default=[])):
|
|
578
|
+
# `arenas` (repeated query param) wins when both are present.
|
|
579
|
+
results = search(
|
|
580
|
+
q, collection=collection, limit=limit,
|
|
581
|
+
arena=arena, arenas=arenas or None,
|
|
582
|
+
)
|
|
567
583
|
return {"query": q, "results": results, "count": len(results)}
|
|
568
584
|
|
|
569
585
|
@api.get("/stats")
|
|
@@ -303,9 +303,25 @@ def get_milvus() -> MilvusClient:
|
|
|
303
303
|
|
|
304
304
|
|
|
305
305
|
def search_vector(client: MilvusClient, query_vec: List[float], limit: int = 20,
|
|
306
|
-
arena: Optional[str] = None
|
|
307
|
-
|
|
308
|
-
|
|
306
|
+
arena: Optional[str] = None,
|
|
307
|
+
arenas: Optional[List[str]] = None) -> List[Dict]:
|
|
308
|
+
"""Vector similarity search.
|
|
309
|
+
|
|
310
|
+
Multi-arena: pass `arenas=[...]` to span more than one tenant scope
|
|
311
|
+
(e.g. tenant-wide + a single user-scope). Builds an `arena IN [...]`
|
|
312
|
+
Milvus filter. `arena` is treated as a single-element list when set.
|
|
313
|
+
"""
|
|
314
|
+
arena_list = list(arenas) if arenas else ([arena] if arena else [])
|
|
315
|
+
if len(arena_list) == 1:
|
|
316
|
+
safe = str(arena_list[0]).replace('"', '\\"')
|
|
317
|
+
filter_expr = f'arena == "{safe}"'
|
|
318
|
+
elif len(arena_list) > 1:
|
|
319
|
+
quoted = ", ".join(
|
|
320
|
+
'"{}"'.format(str(a).replace('"', '\\"')) for a in arena_list
|
|
321
|
+
)
|
|
322
|
+
filter_expr = f'arena in [{quoted}]'
|
|
323
|
+
else:
|
|
324
|
+
filter_expr = ""
|
|
309
325
|
results = client.search(
|
|
310
326
|
collection_name=COLLECTION_NAME,
|
|
311
327
|
data=[query_vec],
|
|
@@ -386,15 +402,26 @@ def get_fts_db() -> sqlite3.Connection:
|
|
|
386
402
|
|
|
387
403
|
|
|
388
404
|
def search_fts(conn: sqlite3.Connection, query: str, limit: int = 20,
|
|
389
|
-
arena: Optional[str] = None
|
|
390
|
-
|
|
405
|
+
arena: Optional[str] = None,
|
|
406
|
+
arenas: Optional[List[str]] = None) -> List[Dict]:
|
|
407
|
+
"""BM25 keyword search via FTS5.
|
|
408
|
+
|
|
409
|
+
Multi-arena: pass `arenas=[...]` to OR multiple `c.arena = ?` clauses,
|
|
410
|
+
so a single search can span tenant-wide + own user-scope.
|
|
411
|
+
"""
|
|
391
412
|
# Escape FTS5 special chars
|
|
392
413
|
safe_query = re.sub(r'[^\w\s]', ' ', query).strip()
|
|
393
414
|
if not safe_query:
|
|
394
415
|
return []
|
|
395
416
|
|
|
396
|
-
|
|
397
|
-
|
|
417
|
+
arena_list = list(arenas) if arenas else ([arena] if arena else [])
|
|
418
|
+
if arena_list:
|
|
419
|
+
placeholders = ", ".join(["?"] * len(arena_list))
|
|
420
|
+
arena_filter = f"AND c.arena IN ({placeholders})"
|
|
421
|
+
params = [safe_query, *arena_list, limit]
|
|
422
|
+
else:
|
|
423
|
+
arena_filter = ""
|
|
424
|
+
params = [safe_query, limit]
|
|
398
425
|
|
|
399
426
|
sql = f"""
|
|
400
427
|
SELECT c.*, bm25(chunks_fts) as rank
|
|
@@ -690,19 +717,28 @@ def _parse_entities_json(s: str) -> List[str]:
|
|
|
690
717
|
# ---------------------------------------------------------------------------
|
|
691
718
|
|
|
692
719
|
def search(query: str, method: str = "hybrid", limit: int = 10,
|
|
693
|
-
arena: Optional[str] = None,
|
|
694
|
-
|
|
720
|
+
arena: Optional[str] = None,
|
|
721
|
+
arenas: Optional[List[str]] = None,
|
|
722
|
+
enable_rerank: bool = True) -> List[Dict]:
|
|
723
|
+
"""Search documents with specified method.
|
|
724
|
+
|
|
725
|
+
arena / arenas: pass either; multi-arena lets a single query span
|
|
726
|
+
multiple tenant scopes (tenant-wide + user-scope). Forwarded
|
|
727
|
+
natively to both the vector path (Milvus `arena IN [...]`) and the
|
|
728
|
+
BM25 path (SQLite `c.arena IN (...)`).
|
|
729
|
+
"""
|
|
730
|
+
arena_list = list(arenas) if arenas else ([arena] if arena else [])
|
|
695
731
|
|
|
696
732
|
if method == "vector":
|
|
697
733
|
vec = embed_text(query)
|
|
698
|
-
results = search_vector(get_milvus(), vec, limit=limit,
|
|
734
|
+
results = search_vector(get_milvus(), vec, limit=limit, arenas=arena_list)
|
|
699
735
|
elif method == "bm25":
|
|
700
|
-
results = search_fts(get_fts_db(), query, limit=limit,
|
|
736
|
+
results = search_fts(get_fts_db(), query, limit=limit, arenas=arena_list)
|
|
701
737
|
else:
|
|
702
738
|
# Hybrid: RRF fusion
|
|
703
739
|
vec = embed_text(query)
|
|
704
|
-
vector_results = search_vector(get_milvus(), vec, limit=20,
|
|
705
|
-
bm25_results = search_fts(get_fts_db(), query, limit=20,
|
|
740
|
+
vector_results = search_vector(get_milvus(), vec, limit=20, arenas=arena_list)
|
|
741
|
+
bm25_results = search_fts(get_fts_db(), query, limit=20, arenas=arena_list)
|
|
706
742
|
results = rrf_fuse(vector_results, bm25_results)
|
|
707
743
|
|
|
708
744
|
# Rerank if enabled
|
|
@@ -812,9 +848,14 @@ def serve(port: int = DEFAULT_PORT):
|
|
|
812
848
|
method: str = Q("hybrid", description="hybrid|vector|bm25"),
|
|
813
849
|
limit: int = Q(10, ge=1, le=50),
|
|
814
850
|
arena: Optional[str] = Q(None),
|
|
851
|
+
arenas: List[str] = Q(default=[]),
|
|
815
852
|
rerank: bool = Q(True),
|
|
816
853
|
):
|
|
817
|
-
results = search(
|
|
854
|
+
results = search(
|
|
855
|
+
q, method=method, limit=limit,
|
|
856
|
+
arena=arena, arenas=arenas or None,
|
|
857
|
+
enable_rerank=rerank,
|
|
858
|
+
)
|
|
818
859
|
return {"query": q, "method": method, "results": results, "count": len(results)}
|
|
819
860
|
|
|
820
861
|
@api.post("/search")
|
|
@@ -823,10 +864,15 @@ def serve(port: int = DEFAULT_PORT):
|
|
|
823
864
|
method: str = "hybrid",
|
|
824
865
|
limit: int = 10,
|
|
825
866
|
arena: Optional[str] = None,
|
|
867
|
+
arenas: Optional[List[str]] = None,
|
|
826
868
|
rerank: bool = True,
|
|
827
869
|
):
|
|
828
870
|
"""POST version of search for compatibility."""
|
|
829
|
-
results = search(
|
|
871
|
+
results = search(
|
|
872
|
+
q, method=method, limit=limit,
|
|
873
|
+
arena=arena, arenas=arenas,
|
|
874
|
+
enable_rerank=rerank,
|
|
875
|
+
)
|
|
830
876
|
return {"query": q, "method": method, "results": results, "count": len(results)}
|
|
831
877
|
|
|
832
878
|
@api.post("/index")
|
|
@@ -125,6 +125,66 @@ print("yes" if ok and data else "no")')
|
|
|
125
125
|
[ "$all_match" = "yes" ] && ok "metadata_filter scopes to probe + arena" \
|
|
126
126
|
|| fail "metadata_filter let other rows through"
|
|
127
127
|
|
|
128
|
+
# ---------------------------------------------------------------------------
|
|
129
|
+
# User-scope vs tenant-wide arenas — proves the multi-arena search model.
|
|
130
|
+
#
|
|
131
|
+
# tenant-wide row arena=acme (visible to every user in acme)
|
|
132
|
+
# user-A's row arena=acme:user-a (only user-A retrieves it)
|
|
133
|
+
# user-B's row arena=acme:user-b (only user-B retrieves it)
|
|
134
|
+
#
|
|
135
|
+
# A user-scoped search sends arenas=[acme, acme:userX] so the user sees
|
|
136
|
+
# tenant-wide AND own user-scope, but never another user's user-scope.
|
|
137
|
+
# ---------------------------------------------------------------------------
|
|
138
|
+
|
|
139
|
+
echo ""
|
|
140
|
+
echo "=== user-scope vs tenant-wide ==="
|
|
141
|
+
post '{"content":"acme tenant-wide rules of engagement","metadata":{"arena":"acme","probe":"e2e-arena"}}' >/dev/null
|
|
142
|
+
post '{"content":"alice private note about Project Mercury","metadata":{"arena":"acme:alice","probe":"e2e-arena"}}' >/dev/null
|
|
143
|
+
post '{"content":"bob private note about Project Saturn","metadata":{"arena":"acme:bob","probe":"e2e-arena"}}' >/dev/null
|
|
144
|
+
sleep 3
|
|
145
|
+
|
|
146
|
+
# Search as alice: arenas=[acme, acme:alice] — should see tenant-wide + own
|
|
147
|
+
SAlice=$(curl -sf -X POST "$BASE/search" -H "Content-Type: application/json" \
|
|
148
|
+
-d '{"query":"Project rules note","limit":20,"arenas":["acme","acme:alice"]}')
|
|
149
|
+
|
|
150
|
+
alice_sees_tenant=$(echo "$SAlice" | python3 -c '
|
|
151
|
+
import json,sys
|
|
152
|
+
data=json.load(sys.stdin).get("results",[])
|
|
153
|
+
print("yes" if any("tenant-wide" in r.get("content","") for r in data) else "no")')
|
|
154
|
+
alice_sees_own=$(echo "$SAlice" | python3 -c '
|
|
155
|
+
import json,sys
|
|
156
|
+
data=json.load(sys.stdin).get("results",[])
|
|
157
|
+
print("yes" if any("Mercury" in r.get("content","") for r in data) else "no")')
|
|
158
|
+
alice_leak_bob=$(echo "$SAlice" | python3 -c '
|
|
159
|
+
import json,sys
|
|
160
|
+
data=json.load(sys.stdin).get("results",[])
|
|
161
|
+
print(sum(1 for r in data if "Saturn" in r.get("content","")))')
|
|
162
|
+
|
|
163
|
+
[ "$alice_sees_tenant" = "yes" ] && ok "alice: tenant-wide visible" \
|
|
164
|
+
|| fail "alice: missing tenant-wide row"
|
|
165
|
+
[ "$alice_sees_own" = "yes" ] && ok "alice: own user-scope visible" \
|
|
166
|
+
|| fail "alice: missing own user-scope row"
|
|
167
|
+
[ "$alice_leak_bob" = "0" ] && ok "alice: no leakage of bob's user-scope" \
|
|
168
|
+
|| fail "alice leaked $alice_leak_bob bob rows (cross-user!)"
|
|
169
|
+
|
|
170
|
+
# Search as bob: arenas=[acme, acme:bob] — should see tenant-wide + own
|
|
171
|
+
SBob=$(curl -sf -X POST "$BASE/search" -H "Content-Type: application/json" \
|
|
172
|
+
-d '{"query":"Project rules note","limit":20,"arenas":["acme","acme:bob"]}')
|
|
173
|
+
|
|
174
|
+
bob_sees_own=$(echo "$SBob" | python3 -c '
|
|
175
|
+
import json,sys
|
|
176
|
+
data=json.load(sys.stdin).get("results",[])
|
|
177
|
+
print("yes" if any("Saturn" in r.get("content","") for r in data) else "no")')
|
|
178
|
+
bob_leak_alice=$(echo "$SBob" | python3 -c '
|
|
179
|
+
import json,sys
|
|
180
|
+
data=json.load(sys.stdin).get("results",[])
|
|
181
|
+
print(sum(1 for r in data if "Mercury" in r.get("content","")))')
|
|
182
|
+
|
|
183
|
+
[ "$bob_sees_own" = "yes" ] && ok "bob: own user-scope visible" \
|
|
184
|
+
|| fail "bob: missing own user-scope row"
|
|
185
|
+
[ "$bob_leak_alice" = "0" ] && ok "bob: no leakage of alice's user-scope" \
|
|
186
|
+
|| fail "bob leaked $bob_leak_alice alice rows (cross-user!)"
|
|
187
|
+
|
|
128
188
|
# ---------------------------------------------------------------------------
|
|
129
189
|
# Same content across two arenas — proves the arena-aware id derivation.
|
|
130
190
|
# Pre-v0.7.8, identical content collapsed to one row in L4/L5/L6 because
|