@cerefox/memory 0.7.2 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/dist/bin/cerefox.js +1096 -344
  2. package/dist/frontend/assets/{index-BzAPcCXA.js → index-CAp2_lFX.js} +2 -2
  3. package/dist/frontend/assets/index-CAp2_lFX.js.map +1 -0
  4. package/dist/frontend/index.html +1 -1
  5. package/dist/server-assets/_shared/ef-meta/index.ts +97 -0
  6. package/dist/server-assets/_shared/embeddings/index.ts +175 -0
  7. package/dist/server-assets/_shared/mcp-tools/_chunker.ts +187 -0
  8. package/dist/server-assets/_shared/mcp-tools/_projects.ts +121 -0
  9. package/dist/server-assets/_shared/mcp-tools/_utils.ts +73 -0
  10. package/dist/server-assets/_shared/mcp-tools/audit-log.ts +95 -0
  11. package/dist/server-assets/_shared/mcp-tools/get-document.ts +73 -0
  12. package/dist/server-assets/_shared/mcp-tools/get-help-content.ts +26 -0
  13. package/dist/server-assets/_shared/mcp-tools/get-help.ts +90 -0
  14. package/dist/server-assets/_shared/mcp-tools/index.ts +67 -0
  15. package/dist/server-assets/_shared/mcp-tools/ingest.ts +315 -0
  16. package/dist/server-assets/_shared/mcp-tools/list-metadata-keys.ts +55 -0
  17. package/dist/server-assets/_shared/mcp-tools/list-projects.ts +59 -0
  18. package/dist/server-assets/_shared/mcp-tools/list-versions.ts +72 -0
  19. package/dist/server-assets/_shared/mcp-tools/metadata-search.ts +154 -0
  20. package/dist/server-assets/_shared/mcp-tools/search.ts +193 -0
  21. package/dist/server-assets/_shared/mcp-tools/set-document-projects.ts +163 -0
  22. package/dist/server-assets/_shared/mcp-tools/types.ts +92 -0
  23. package/dist/server-assets/db/migrations/0003_add_document_versions.sql +91 -0
  24. package/dist/server-assets/db/migrations/0004_add_audit_log_review_status_archived.sql +71 -0
  25. package/dist/server-assets/db/migrations/0005_metadata_search.sql +628 -0
  26. package/dist/server-assets/db/migrations/0006_usage_log.sql +255 -0
  27. package/dist/server-assets/db/migrations/0007_usage_log_requestor.sql +178 -0
  28. package/dist/server-assets/db/migrations/0008_soft_delete.sql +130 -0
  29. package/dist/server-assets/db/migrations/0009_audit_log_restore_operation.sql +20 -0
  30. package/dist/server-assets/db/migrations/0010_requestor_enforcement_config.sql +12 -0
  31. package/dist/server-assets/db/migrations/0011_title_boosting.sql +48 -0
  32. package/dist/server-assets/db/rpcs.sql +1723 -0
  33. package/dist/server-assets/db/schema.sql +380 -0
  34. package/dist/server-assets/supabase/functions/cerefox-get-audit-log/index.ts +117 -0
  35. package/dist/server-assets/supabase/functions/cerefox-get-document/index.ts +138 -0
  36. package/dist/server-assets/supabase/functions/cerefox-ingest/index.ts +819 -0
  37. package/dist/server-assets/supabase/functions/cerefox-list-projects/index.ts +96 -0
  38. package/dist/server-assets/supabase/functions/cerefox-list-versions/index.ts +113 -0
  39. package/dist/server-assets/supabase/functions/cerefox-mcp/index.ts +294 -0
  40. package/dist/server-assets/supabase/functions/cerefox-mcp/shared.ts +42 -0
  41. package/dist/server-assets/supabase/functions/cerefox-metadata/index.ts +99 -0
  42. package/dist/server-assets/supabase/functions/cerefox-metadata-search/index.ts +146 -0
  43. package/dist/server-assets/supabase/functions/cerefox-search/index.ts +382 -0
  44. package/docs/guides/connect-agents.md +58 -3
  45. package/docs/guides/migration-v0.5.md +50 -0
  46. package/package.json +3 -2
  47. package/dist/frontend/assets/index-BzAPcCXA.js.map +0 -1
@@ -0,0 +1,146 @@
1
+ import "jsr:@supabase/functions-js/edge-runtime.d.ts";
2
+ import { createClient } from "jsr:@supabase/supabase-js@2";
3
+ import { isVersionRequest, versionResponse } from "../../../_shared/ef-meta/index.ts";
4
+
5
+ /**
6
+ * cerefox-metadata-search -- Supabase Edge Function
7
+ *
8
+ * Query documents by metadata key-value criteria without a text search term.
9
+ * Calls the cerefox_metadata_search() RPC via the service-role key.
10
+ *
11
+ * Called by:
12
+ * - GPT Custom Actions (direct HTTP POST via OpenAPI schema)
13
+ * - Any authenticated HTTP client
14
+ *
15
+ * Note: cerefox-mcp calls the RPC directly (not this Edge Function).
16
+ *
17
+ * Request body (JSON):
18
+ * metadata_filter object required Key-value pairs (AND semantics)
19
+ * project_id string optional Project UUID filter
20
+ * updated_since string optional ISO-8601 lower bound for updated_at
21
+ * created_since string optional ISO-8601 lower bound for created_at
22
+ * limit number optional Max results (default: 10)
23
+ * include_content boolean optional Include full text (default: false)
24
+ * max_bytes number optional Byte budget when include_content=true
25
+ *
26
+ * Response (200): Array of matching documents
27
+ * Response (400): { error: "..." }
28
+ */
29
+
30
+ const MAX_BYTES = 200_000;
31
+
32
+ const CORS_HEADERS = {
33
+ "Access-Control-Allow-Origin": "*",
34
+ "Access-Control-Allow-Methods": "POST, OPTIONS",
35
+ "Access-Control-Allow-Headers": "Content-Type, Authorization, apikey",
36
+ };
37
+
38
+ Deno.serve(async (req: Request): Promise<Response> => {
39
+ if (req.method === "OPTIONS") {
40
+ return new Response(null, { status: 200, headers: CORS_HEADERS });
41
+ }
42
+
43
+ if (isVersionRequest(req)) {
44
+ return versionResponse("cerefox-metadata-search", { ...CORS_HEADERS, "Content-Type": "application/json" });
45
+ }
46
+
47
+ if (req.method !== "POST") {
48
+ return new Response("Method Not Allowed", { status: 405, headers: CORS_HEADERS });
49
+ }
50
+
51
+ try {
52
+ const body = await req.json();
53
+ const metadata_filter = body.metadata_filter;
54
+
55
+ if (
56
+ !metadata_filter ||
57
+ typeof metadata_filter !== "object" ||
58
+ Array.isArray(metadata_filter) ||
59
+ Object.keys(metadata_filter).length === 0
60
+ ) {
61
+ return new Response(
62
+ JSON.stringify({ error: "metadata_filter is required and must be a non-empty JSON object" }),
63
+ { status: 400, headers: { ...CORS_HEADERS, "Content-Type": "application/json" } },
64
+ );
65
+ }
66
+
67
+ const project_id = body.project_id ?? null;
68
+ const updated_since = body.updated_since ?? null;
69
+ const created_since = body.created_since ?? null;
70
+ const limit = body.limit ?? 10;
71
+ const include_content = body.include_content ?? false;
72
+ const requested_max_bytes = body.max_bytes;
73
+
74
+ const max_bytes = include_content
75
+ ? Math.min(requested_max_bytes ?? MAX_BYTES, MAX_BYTES)
76
+ : null;
77
+
78
+ const supabaseUrl = Deno.env.get("SUPABASE_URL")!;
79
+ const supabaseKey = Deno.env.get("SUPABASE_SERVICE_ROLE_KEY")!;
80
+ const supabase = createClient(supabaseUrl, supabaseKey);
81
+
82
+ // Configurable requestor enforcement
83
+ const identityField = "requestor";
84
+ const identityValue = body[identityField];
85
+ const { data: reqConfig } = await supabase.rpc("cerefox_get_config", { p_key: "require_requestor_identity" });
86
+ if (reqConfig === "true") {
87
+ if (!identityValue || (typeof identityValue === "string" && identityValue.trim() === "")) {
88
+ return new Response(
89
+ JSON.stringify({ error: `Missing required parameter "${identityField}". Server requires caller identity.` }),
90
+ { status: 400, headers: { ...CORS_HEADERS, "Content-Type": "application/json" } },
91
+ );
92
+ }
93
+ const { data: fmtConfig } = await supabase.rpc("cerefox_get_config", { p_key: "requestor_identity_format" });
94
+ if (fmtConfig && typeof fmtConfig === "string" && fmtConfig.trim() !== "") {
95
+ if (!new RegExp(fmtConfig).test(identityValue)) {
96
+ return new Response(
97
+ JSON.stringify({ error: `Invalid "${identityField}" format. Does not match pattern: ${fmtConfig}` }),
98
+ { status: 400, headers: { ...CORS_HEADERS, "Content-Type": "application/json" } },
99
+ );
100
+ }
101
+ }
102
+ }
103
+
104
+ const params: Record<string, unknown> = {
105
+ p_metadata_filter: metadata_filter,
106
+ p_project_id: project_id,
107
+ p_updated_since: updated_since,
108
+ p_created_since: created_since,
109
+ p_limit: limit,
110
+ p_include_content: include_content,
111
+ };
112
+ if (max_bytes !== null) {
113
+ params.p_max_bytes = max_bytes;
114
+ }
115
+
116
+ const { data, error } = await supabase.rpc("cerefox_metadata_search", params);
117
+
118
+ if (error) {
119
+ return new Response(JSON.stringify({ error: error.message }), {
120
+ status: 500,
121
+ headers: { ...CORS_HEADERS, "Content-Type": "application/json" },
122
+ });
123
+ }
124
+
125
+ // Fire-and-forget usage logging
126
+ Promise.resolve(supabase.rpc("cerefox_log_usage", {
127
+ p_operation: "metadata_search",
128
+ p_access_path: "edge-function",
129
+ p_requestor: body.requestor ?? null,
130
+ p_query_text: JSON.stringify(metadata_filter),
131
+ p_result_count: (data ?? []).length,
132
+ p_project_id: project_id,
133
+ })).catch(() => {});
134
+
135
+ return new Response(JSON.stringify(data ?? []), {
136
+ status: 200,
137
+ headers: { ...CORS_HEADERS, "Content-Type": "application/json" },
138
+ });
139
+ } catch (err) {
140
+ const message = err instanceof Error ? err.message : String(err);
141
+ return new Response(JSON.stringify({ error: message }), {
142
+ status: 500,
143
+ headers: { ...CORS_HEADERS, "Content-Type": "application/json" },
144
+ });
145
+ }
146
+ });
@@ -0,0 +1,382 @@
1
+ import "jsr:@supabase/functions-js/edge-runtime.d.ts";
2
+ import { createClient } from "jsr:@supabase/supabase-js@2";
3
+ import { isVersionRequest, versionResponse } from "../../../_shared/ef-meta/index.ts";
4
+
5
+ /**
6
+ * cerefox-search — Supabase Edge Function
7
+ *
8
+ * Accepts a plain-text query, embeds it server-side using the OpenAI API,
9
+ * then calls the appropriate Cerefox search RPC and returns the results.
10
+ *
11
+ * Called by the cerefox-mcp Edge Function (MCP Streamable HTTP), GPT Actions
12
+ * (direct HTTP POST), or any HTTP client. No SQL required, no local embedder.
13
+ *
14
+ * Request body (JSON):
15
+ * query string required Natural-language search query
16
+ * project_name string optional Project to filter by (looked up by name)
17
+ * match_count number optional Max results (default: 5)
18
+ * mode string optional "hybrid" | "fts" | "docs" (default: "docs")
19
+ * alpha number optional Semantic weight for hybrid search (default: 0.7)
20
+ * min_score number optional Min cosine similarity (default: 0.5)
21
+ * metadata_filter object optional JSONB containment filter. Only documents whose
22
+ * metadata contains ALL specified key-value pairs
23
+ * are returned. Example: {"type":"decision"}.
24
+ * Use cerefox-metadata to discover available keys.
25
+ * max_bytes number optional Response size budget in bytes (default: 200000,
26
+ * hard ceiling: 200000). Agents may pass a smaller
27
+ * value to fit their context window; values above
28
+ * the server ceiling are silently capped. Results
29
+ * are dropped whole (never truncated mid-doc) until
30
+ * the budget is satisfied. The response includes a
31
+ * `truncated` flag when results were dropped.
32
+ *
33
+ * Response: { results: [...], query, mode, match_count, project_name?,
34
+ * truncated: boolean, response_bytes: number }
35
+ *
36
+ * Example agent prompt:
37
+ * "Invoke the cerefox-search edge function with query='knowledge management'
38
+ * and project_name='Personal'. Summarize the results."
39
+ */
40
+
41
+ const OPENAI_EMBEDDING_URL = "https://api.openai.com/v1/embeddings";
42
+ const OPENAI_MODEL = "text-embedding-3-small";
43
+ const EMBEDDING_DIMENSIONS = 768;
44
+
45
+ // Response size ceiling — server-enforced hard limit.
46
+ // Agents may pass a smaller max_bytes to fit their context budget, but cannot
47
+ // exceed this value. Acts as both the default (when max_bytes is omitted) and
48
+ // the hard ceiling (when the caller requests more). Small-to-big retrieval
49
+ // bounds individual large-doc results to matched chunks + neighbours, so this
50
+ // ceiling is rarely reached under normal usage at the default match_count=5.
51
+ const MAX_BYTES = 200_000;
52
+
53
+
54
+ interface SearchRequest {
55
+ query: string;
56
+ project_name?: string;
57
+ match_count?: number;
58
+ mode?: "hybrid" | "fts" | "docs";
59
+ alpha?: number;
60
+ min_score?: number;
61
+ metadata_filter?: Record<string, string> | null;
62
+ max_bytes?: number;
63
+ requestor?: string;
64
+ }
65
+
66
+ const EMBEDDING_MAX_RETRIES = 3;
67
+ const EMBEDDING_INITIAL_BACKOFF_MS = 500; // 500ms, 1s, 2s exponential backoff
68
+
69
+ async function getEmbedding(text: string, apiKey: string): Promise<number[]> {
70
+ let lastError: Error | null = null;
71
+
72
+ for (let attempt = 0; attempt < EMBEDDING_MAX_RETRIES; attempt++) {
73
+ try {
74
+ const response = await fetch(OPENAI_EMBEDDING_URL, {
75
+ method: "POST",
76
+ headers: {
77
+ "Authorization": `Bearer ${apiKey}`,
78
+ "Content-Type": "application/json",
79
+ },
80
+ body: JSON.stringify({
81
+ model: OPENAI_MODEL,
82
+ input: text,
83
+ dimensions: EMBEDDING_DIMENSIONS,
84
+ }),
85
+ });
86
+
87
+ if (!response.ok) {
88
+ const err = await response.text();
89
+ // Don't retry client errors (4xx)
90
+ if (response.status < 500) {
91
+ throw new Error(`OpenAI embedding error ${response.status}: ${err}`);
92
+ }
93
+ // Server errors (5xx) are retryable
94
+ lastError = new Error(`OpenAI embedding error ${response.status}: ${err}`);
95
+ const backoff = EMBEDDING_INITIAL_BACKOFF_MS * Math.pow(2, attempt);
96
+ console.warn(
97
+ `Embedding API returned ${response.status} (attempt ${attempt + 1}/${EMBEDDING_MAX_RETRIES}), retrying in ${backoff}ms`,
98
+ );
99
+ await new Promise((r) => setTimeout(r, backoff));
100
+ continue;
101
+ }
102
+
103
+ const data = await response.json();
104
+ if (attempt > 0) {
105
+ console.info(`Embedding API succeeded on retry ${attempt}`);
106
+ }
107
+ return data.data[0].embedding;
108
+ } catch (err) {
109
+ if (err instanceof Error && err.message.startsWith("OpenAI embedding error")) {
110
+ // Non-retryable (4xx) errors already thrown above
111
+ throw err;
112
+ }
113
+ // Network/timeout errors are retryable
114
+ lastError = err instanceof Error ? err : new Error(String(err));
115
+ const backoff = EMBEDDING_INITIAL_BACKOFF_MS * Math.pow(2, attempt);
116
+ console.warn(
117
+ `Embedding API request failed: ${lastError.message} (attempt ${attempt + 1}/${EMBEDDING_MAX_RETRIES}), retrying in ${backoff}ms`,
118
+ );
119
+ await new Promise((r) => setTimeout(r, backoff));
120
+ }
121
+ }
122
+
123
+ throw lastError ?? new Error(`Embedding API failed after ${EMBEDDING_MAX_RETRIES} attempts`);
124
+ }
125
+
126
+ async function lookupProjectId(
127
+ supabase: ReturnType<typeof createClient>,
128
+ projectName: string,
129
+ ): Promise<string | null> {
130
+ const { data, error } = await supabase
131
+ .from("cerefox_projects")
132
+ .select("id")
133
+ .ilike("name", projectName)
134
+ .limit(1);
135
+
136
+ if (error || !data?.length) return null;
137
+ return data[0].id;
138
+ }
139
+
140
+ /**
141
+ * Apply a byte budget to an array of result rows.
142
+ *
143
+ * Each row is serialised to JSON to measure its size. Rows are included in
144
+ * order until the next row would push the running total over `maxBytes`.
145
+ * Rows are always kept or dropped whole — content is never truncated
146
+ * mid-document. Returns the accepted rows and a `truncated` flag.
147
+ */
148
+ function applyByteBudget(
149
+ rows: unknown[],
150
+ maxBytes: number,
151
+ ): { accepted: unknown[]; truncated: boolean; usedBytes: number } {
152
+ const accepted: unknown[] = [];
153
+ let usedBytes = 0;
154
+ let truncated = false;
155
+
156
+ for (const row of rows) {
157
+ const rowBytes = new TextEncoder().encode(JSON.stringify(row)).length;
158
+ if (usedBytes + rowBytes > maxBytes) {
159
+ truncated = true;
160
+ break;
161
+ }
162
+ accepted.push(row);
163
+ usedBytes += rowBytes;
164
+ }
165
+
166
+ return { accepted, truncated, usedBytes };
167
+ }
168
+
169
+ const headers = {
170
+ "Content-Type": "application/json",
171
+ "Access-Control-Allow-Origin": "*",
172
+ };
173
+
174
+ Deno.serve(async (req: Request) => {
175
+ // CORS preflight
176
+ if (req.method === "OPTIONS") {
177
+ return new Response(null, {
178
+ headers: {
179
+ "Access-Control-Allow-Origin": "*",
180
+ "Access-Control-Allow-Headers": "authorization, x-client-info, apikey, content-type",
181
+ },
182
+ });
183
+ }
184
+
185
+ if (isVersionRequest(req)) {
186
+ return versionResponse("cerefox-search", headers);
187
+ }
188
+
189
+ if (req.method !== "POST") {
190
+ return new Response(JSON.stringify({ error: "POST required" }), {
191
+ status: 405,
192
+ headers,
193
+ });
194
+ }
195
+
196
+ let body: SearchRequest;
197
+ try {
198
+ body = await req.json();
199
+ } catch {
200
+ return new Response(JSON.stringify({ error: "Invalid JSON body" }), {
201
+ status: 400,
202
+ headers,
203
+ });
204
+ }
205
+
206
+ const {
207
+ query,
208
+ project_name,
209
+ match_count = 5,
210
+ mode = "docs",
211
+ alpha = 0.7,
212
+ min_score = 0.5,
213
+ metadata_filter = null,
214
+ max_bytes: requested_max_bytes,
215
+ } = body;
216
+
217
+ // Enforce ceiling: agents may request less but never more than MAX_BYTES.
218
+ const max_bytes = Math.min(requested_max_bytes ?? MAX_BYTES, MAX_BYTES);
219
+
220
+ // Validate metadata_filter: must be a plain object (or null/absent).
221
+ // Reject arrays, strings, and other non-object types to prevent RPC errors.
222
+ if (
223
+ metadata_filter !== null &&
224
+ metadata_filter !== undefined &&
225
+ (typeof metadata_filter !== "object" || Array.isArray(metadata_filter))
226
+ ) {
227
+ return new Response(
228
+ JSON.stringify({ error: "metadata_filter must be a JSON object or null" }),
229
+ { status: 400, headers },
230
+ );
231
+ }
232
+
233
+ if (!query || typeof query !== "string" || !query.trim()) {
234
+ return new Response(JSON.stringify({ error: "query is required" }), {
235
+ status: 400,
236
+ headers,
237
+ });
238
+ }
239
+
240
+ const openaiKey = Deno.env.get("OPENAI_API_KEY");
241
+ if (!openaiKey) {
242
+ return new Response(
243
+ JSON.stringify({ error: "OPENAI_API_KEY secret not set on this project" }),
244
+ { status: 500, headers },
245
+ );
246
+ }
247
+
248
+ const supabaseUrl = Deno.env.get("SUPABASE_URL")!;
249
+ const supabaseKey = Deno.env.get("SUPABASE_SERVICE_ROLE_KEY")!;
250
+ const supabase = createClient(supabaseUrl, supabaseKey);
251
+
252
+ // Configurable requestor enforcement
253
+ const identityField = "requestor";
254
+ const identityValue = body[identityField as keyof SearchRequest] as string | undefined;
255
+ const { data: reqConfig } = await supabase.rpc("cerefox_get_config", { p_key: "require_requestor_identity" });
256
+ if (reqConfig === "true") {
257
+ if (!identityValue || (typeof identityValue === "string" && identityValue.trim() === "")) {
258
+ return new Response(
259
+ JSON.stringify({ error: `Missing required parameter "${identityField}". Server requires caller identity.` }),
260
+ { status: 400, headers },
261
+ );
262
+ }
263
+ const { data: fmtConfig } = await supabase.rpc("cerefox_get_config", { p_key: "requestor_identity_format" });
264
+ if (fmtConfig && typeof fmtConfig === "string" && fmtConfig.trim() !== "") {
265
+ if (!new RegExp(fmtConfig).test(identityValue)) {
266
+ return new Response(
267
+ JSON.stringify({ error: `Invalid "${identityField}" format. Does not match pattern: ${fmtConfig}` }),
268
+ { status: 400, headers },
269
+ );
270
+ }
271
+ }
272
+ }
273
+
274
+ // Resolve project name → UUID if provided
275
+ let projectId: string | null = null;
276
+ if (project_name) {
277
+ projectId = await lookupProjectId(supabase, project_name);
278
+ if (!projectId) {
279
+ return new Response(
280
+ JSON.stringify({ error: `Project not found: ${project_name}` }),
281
+ { status: 404, headers },
282
+ );
283
+ }
284
+ }
285
+
286
+ // FTS mode doesn't need an embedding
287
+ let embedding: number[] | null = null;
288
+ if (mode !== "fts") {
289
+ try {
290
+ embedding = await getEmbedding(query, openaiKey);
291
+ } catch (err) {
292
+ return new Response(JSON.stringify({ error: String(err) }), {
293
+ status: 502,
294
+ headers,
295
+ });
296
+ }
297
+ }
298
+
299
+ // Call the appropriate RPC
300
+ let rpcName: string;
301
+ let rpcParams: Record<string, unknown>;
302
+
303
+ // Build a metadata filter param only when a non-empty filter object is provided.
304
+ // Passing null explicitly or an empty object {} to the RPC is equivalent to no filter,
305
+ // but we omit it entirely when absent to keep RPC call params minimal.
306
+ const metaFilterParam = metadata_filter && Object.keys(metadata_filter).length > 0
307
+ ? { p_metadata_filter: metadata_filter }
308
+ : {};
309
+
310
+ if (mode === "fts") {
311
+ rpcName = "cerefox_fts_search";
312
+ rpcParams = {
313
+ p_query_text: query,
314
+ p_match_count: match_count,
315
+ p_project_id: projectId,
316
+ ...metaFilterParam,
317
+ };
318
+ } else if (mode === "hybrid") {
319
+ rpcName = "cerefox_hybrid_search";
320
+ rpcParams = {
321
+ p_query_text: query,
322
+ p_query_embedding: embedding,
323
+ p_match_count: match_count,
324
+ p_alpha: alpha,
325
+ p_use_upgrade: false,
326
+ p_project_id: projectId,
327
+ p_min_score: min_score,
328
+ ...metaFilterParam,
329
+ };
330
+ } else {
331
+ // "docs" — document-level hybrid search (recommended default).
332
+ // Small-to-big threshold and context window use the RPC defaults (20000 / 1).
333
+ // Override them in Postgres (rpcs.sql) if you need a different server-wide value.
334
+ rpcName = "cerefox_search_docs";
335
+ rpcParams = {
336
+ p_query_text: query,
337
+ p_query_embedding: embedding,
338
+ p_match_count: match_count,
339
+ p_alpha: alpha,
340
+ p_project_id: projectId,
341
+ p_min_score: min_score,
342
+ ...metaFilterParam,
343
+ };
344
+ }
345
+
346
+ const { data, error } = await supabase.rpc(rpcName, rpcParams);
347
+
348
+ if (error) {
349
+ return new Response(JSON.stringify({ error: `RPC error: ${error.message}` }), {
350
+ status: 500,
351
+ headers,
352
+ });
353
+ }
354
+
355
+ // Apply byte budget — drop whole results (never truncate mid-doc) to stay
356
+ // under the limit. This mirrors the local MCP server's truncation behaviour.
357
+ const { accepted, truncated, usedBytes } = applyByteBudget(data ?? [], max_bytes);
358
+
359
+ // Fire-and-forget usage logging (never blocks the response)
360
+ Promise.resolve(supabase.rpc("cerefox_log_usage", {
361
+ p_operation: "search",
362
+ p_access_path: "edge-function",
363
+ p_requestor: body.requestor ?? null,
364
+ p_query_text: query,
365
+ p_result_count: accepted.length,
366
+ p_project_id: projectId,
367
+ })).catch(() => {});
368
+
369
+ return new Response(
370
+ JSON.stringify({
371
+ results: accepted,
372
+ query,
373
+ mode,
374
+ match_count,
375
+ project_name: project_name ?? null,
376
+ metadata_filter: metadata_filter ?? null,
377
+ truncated,
378
+ response_bytes: usedBytes,
379
+ }),
380
+ { headers },
381
+ );
382
+ });
@@ -657,7 +657,7 @@ In the action editor, paste this schema (replace `<your-project-ref>`):
657
657
  openapi: 3.1.0
658
658
  info:
659
659
  title: Cerefox Knowledge Base
660
- version: 1.7.0
660
+ version: 1.8.0
661
661
  servers:
662
662
  - url: https://<your-project-ref>.supabase.co/functions/v1
663
663
  paths:
@@ -682,7 +682,11 @@ paths:
682
682
  type: string
683
683
  mode:
684
684
  type: string
685
+ enum: [docs, hybrid, fts]
685
686
  default: docs
687
+ description: >
688
+ docs = document-level hybrid (recommended); hybrid = chunk-level
689
+ semantic+FTS; fts = keyword-only (no embedding).
686
690
  metadata_filter:
687
691
  type: object
688
692
  additionalProperties:
@@ -693,6 +697,26 @@ paths:
693
697
  Example: {"type": "decision", "status": "active"}.
694
698
  Call listMetadataKeys to discover available keys and their values.
695
699
  Omit or set to null to search all documents.
700
+ alpha:
701
+ type: number
702
+ default: 0.7
703
+ description: >
704
+ Semantic weight for hybrid/docs modes (0 = pure FTS, 1 = pure
705
+ semantic). Advanced; leave unset for the default blend.
706
+ min_score:
707
+ type: number
708
+ default: 0.5
709
+ description: >
710
+ Minimum cosine similarity for a vector-only match to be included.
711
+ Advanced; leave unset for the default threshold.
712
+ max_bytes:
713
+ type: integer
714
+ default: 200000
715
+ description: >
716
+ Response size budget in bytes (server hard ceiling 200000).
717
+ Whole results are dropped (never truncated mid-document) until
718
+ the budget is met; the response sets `truncated: true` when this
719
+ happens. Advanced; leave unset for the default.
696
720
  requestor:
697
721
  type: string
698
722
  description: >
@@ -735,6 +759,18 @@ paths:
735
759
  the document, note the document_id, pass it here.
736
760
  project_name:
737
761
  type: string
762
+ description: >
763
+ Add the document to this project (non-destructive — keeps any
764
+ existing project memberships). Looked up by name.
765
+ project_names:
766
+ type: array
767
+ items:
768
+ type: string
769
+ description: >
770
+ Destructive full-set project assignment: the document's project
771
+ memberships are replaced with exactly this list. Use project_name
772
+ (singular) to add without removing. If both are given, project_names
773
+ wins.
738
774
  source:
739
775
  type: string
740
776
  default: agent
@@ -764,7 +800,14 @@ paths:
764
800
  the document to pending_review, user writes set it to approved.
765
801
  responses:
766
802
  '200':
767
- description: Ingest result
803
+ description: >
804
+ Ingest result. Fields vary by outcome:
805
+ { document_id, title, chunk_count, total_chars,
806
+ project_id?, project_name?, # set when a project was assigned on create
807
+ skipped?, # true when identical content was deduplicated
808
+ updated?, # true when an existing doc was updated
809
+ message?, # human note on dedup/skip/update
810
+ note? } # note when a flag (e.g. update_if_exists) was overridden
768
811
  /cerefox-metadata:
769
812
  post:
770
813
  operationId: listMetadataKeys
@@ -867,6 +910,9 @@ paths:
867
910
  since:
868
911
  type: string
869
912
  description: ISO timestamp lower bound for temporal queries (optional)
913
+ until:
914
+ type: string
915
+ description: ISO timestamp upper bound for temporal queries (optional)
870
916
  limit:
871
917
  type: integer
872
918
  default: 50
@@ -921,7 +967,10 @@ paths:
921
967
  Example: {"type": "decision", "status": "active"}.
922
968
  project_id:
923
969
  type: string
924
- description: Filter by project UUID (optional)
970
+ description: >
971
+ Filter by project UUID (optional). NOTE: this is the project
972
+ UUID, not its name — unlike searchKnowledgeBase / ingestNote
973
+ which take project_name. Get UUIDs from listProjects.
925
974
  updated_since:
926
975
  type: string
927
976
  description: ISO-8601 timestamp; only docs updated on/after (optional)
@@ -935,6 +984,12 @@ paths:
935
984
  type: boolean
936
985
  default: false
937
986
  description: Include full document text in results
987
+ max_bytes:
988
+ type: integer
989
+ default: 200000
990
+ description: >
991
+ Response size budget in bytes when include_content is true
992
+ (whole results dropped to fit). Advanced; leave unset for the default.
938
993
  requestor:
939
994
  type: string
940
995
  description: Name of the agent making this request. Optional.