@cerefox/memory 0.7.2 → 0.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bin/cerefox.js +1357 -361
- package/dist/frontend/assets/{index-BzAPcCXA.js → index-CAp2_lFX.js} +2 -2
- package/dist/frontend/assets/index-CAp2_lFX.js.map +1 -0
- package/dist/frontend/index.html +1 -1
- package/dist/server-assets/_shared/ef-meta/index.ts +97 -0
- package/dist/server-assets/_shared/embeddings/index.ts +175 -0
- package/dist/server-assets/_shared/mcp-tools/_chunker.ts +187 -0
- package/dist/server-assets/_shared/mcp-tools/_projects.ts +121 -0
- package/dist/server-assets/_shared/mcp-tools/_utils.ts +73 -0
- package/dist/server-assets/_shared/mcp-tools/audit-log.ts +95 -0
- package/dist/server-assets/_shared/mcp-tools/get-document.ts +73 -0
- package/dist/server-assets/_shared/mcp-tools/get-help-content.ts +26 -0
- package/dist/server-assets/_shared/mcp-tools/get-help.ts +90 -0
- package/dist/server-assets/_shared/mcp-tools/index.ts +67 -0
- package/dist/server-assets/_shared/mcp-tools/ingest.ts +315 -0
- package/dist/server-assets/_shared/mcp-tools/list-metadata-keys.ts +55 -0
- package/dist/server-assets/_shared/mcp-tools/list-projects.ts +59 -0
- package/dist/server-assets/_shared/mcp-tools/list-versions.ts +72 -0
- package/dist/server-assets/_shared/mcp-tools/metadata-search.ts +154 -0
- package/dist/server-assets/_shared/mcp-tools/search.ts +193 -0
- package/dist/server-assets/_shared/mcp-tools/set-document-projects.ts +163 -0
- package/dist/server-assets/_shared/mcp-tools/types.ts +92 -0
- package/dist/server-assets/db/migrations/0003_add_document_versions.sql +91 -0
- package/dist/server-assets/db/migrations/0004_add_audit_log_review_status_archived.sql +71 -0
- package/dist/server-assets/db/migrations/0005_metadata_search.sql +628 -0
- package/dist/server-assets/db/migrations/0006_usage_log.sql +255 -0
- package/dist/server-assets/db/migrations/0007_usage_log_requestor.sql +178 -0
- package/dist/server-assets/db/migrations/0008_soft_delete.sql +130 -0
- package/dist/server-assets/db/migrations/0009_audit_log_restore_operation.sql +20 -0
- package/dist/server-assets/db/migrations/0010_requestor_enforcement_config.sql +12 -0
- package/dist/server-assets/db/migrations/0011_title_boosting.sql +48 -0
- package/dist/server-assets/db/rpcs.sql +1723 -0
- package/dist/server-assets/db/schema.sql +380 -0
- package/dist/server-assets/supabase/functions/cerefox-get-audit-log/index.ts +117 -0
- package/dist/server-assets/supabase/functions/cerefox-get-document/index.ts +138 -0
- package/dist/server-assets/supabase/functions/cerefox-ingest/index.ts +819 -0
- package/dist/server-assets/supabase/functions/cerefox-list-projects/index.ts +96 -0
- package/dist/server-assets/supabase/functions/cerefox-list-versions/index.ts +113 -0
- package/dist/server-assets/supabase/functions/cerefox-mcp/index.ts +294 -0
- package/dist/server-assets/supabase/functions/cerefox-mcp/shared.ts +42 -0
- package/dist/server-assets/supabase/functions/cerefox-metadata/index.ts +99 -0
- package/dist/server-assets/supabase/functions/cerefox-metadata-search/index.ts +146 -0
- package/dist/server-assets/supabase/functions/cerefox-search/index.ts +382 -0
- package/docs/guides/connect-agents.md +78 -3
- package/docs/guides/migration-v0.5.md +50 -0
- package/docs/guides/quickstart.md +6 -2
- package/package.json +3 -2
- package/dist/frontend/assets/index-BzAPcCXA.js.map +0 -1
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
import "jsr:@supabase/functions-js/edge-runtime.d.ts";
|
|
2
|
+
import { createClient } from "jsr:@supabase/supabase-js@2";
|
|
3
|
+
import { isVersionRequest, versionResponse } from "../../../_shared/ef-meta/index.ts";
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* cerefox-metadata-search -- Supabase Edge Function
|
|
7
|
+
*
|
|
8
|
+
* Query documents by metadata key-value criteria without a text search term.
|
|
9
|
+
* Calls the cerefox_metadata_search() RPC via the service-role key.
|
|
10
|
+
*
|
|
11
|
+
* Called by:
|
|
12
|
+
* - GPT Custom Actions (direct HTTP POST via OpenAPI schema)
|
|
13
|
+
* - Any authenticated HTTP client
|
|
14
|
+
*
|
|
15
|
+
* Note: cerefox-mcp calls the RPC directly (not this Edge Function).
|
|
16
|
+
*
|
|
17
|
+
* Request body (JSON):
|
|
18
|
+
* metadata_filter object required Key-value pairs (AND semantics)
|
|
19
|
+
* project_id string optional Project UUID filter
|
|
20
|
+
* updated_since string optional ISO-8601 lower bound for updated_at
|
|
21
|
+
* created_since string optional ISO-8601 lower bound for created_at
|
|
22
|
+
* limit number optional Max results (default: 10)
|
|
23
|
+
* include_content boolean optional Include full text (default: false)
|
|
24
|
+
* max_bytes number optional Byte budget when include_content=true
|
|
25
|
+
*
|
|
26
|
+
* Response (200): Array of matching documents
|
|
27
|
+
* Response (400): { error: "..." }
|
|
28
|
+
*/
|
|
29
|
+
|
|
30
|
+
const MAX_BYTES = 200_000;
|
|
31
|
+
|
|
32
|
+
const CORS_HEADERS = {
|
|
33
|
+
"Access-Control-Allow-Origin": "*",
|
|
34
|
+
"Access-Control-Allow-Methods": "POST, OPTIONS",
|
|
35
|
+
"Access-Control-Allow-Headers": "Content-Type, Authorization, apikey",
|
|
36
|
+
};
|
|
37
|
+
|
|
38
|
+
Deno.serve(async (req: Request): Promise<Response> => {
|
|
39
|
+
if (req.method === "OPTIONS") {
|
|
40
|
+
return new Response(null, { status: 200, headers: CORS_HEADERS });
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
if (isVersionRequest(req)) {
|
|
44
|
+
return versionResponse("cerefox-metadata-search", { ...CORS_HEADERS, "Content-Type": "application/json" });
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
if (req.method !== "POST") {
|
|
48
|
+
return new Response("Method Not Allowed", { status: 405, headers: CORS_HEADERS });
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
try {
|
|
52
|
+
const body = await req.json();
|
|
53
|
+
const metadata_filter = body.metadata_filter;
|
|
54
|
+
|
|
55
|
+
if (
|
|
56
|
+
!metadata_filter ||
|
|
57
|
+
typeof metadata_filter !== "object" ||
|
|
58
|
+
Array.isArray(metadata_filter) ||
|
|
59
|
+
Object.keys(metadata_filter).length === 0
|
|
60
|
+
) {
|
|
61
|
+
return new Response(
|
|
62
|
+
JSON.stringify({ error: "metadata_filter is required and must be a non-empty JSON object" }),
|
|
63
|
+
{ status: 400, headers: { ...CORS_HEADERS, "Content-Type": "application/json" } },
|
|
64
|
+
);
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
const project_id = body.project_id ?? null;
|
|
68
|
+
const updated_since = body.updated_since ?? null;
|
|
69
|
+
const created_since = body.created_since ?? null;
|
|
70
|
+
const limit = body.limit ?? 10;
|
|
71
|
+
const include_content = body.include_content ?? false;
|
|
72
|
+
const requested_max_bytes = body.max_bytes;
|
|
73
|
+
|
|
74
|
+
const max_bytes = include_content
|
|
75
|
+
? Math.min(requested_max_bytes ?? MAX_BYTES, MAX_BYTES)
|
|
76
|
+
: null;
|
|
77
|
+
|
|
78
|
+
const supabaseUrl = Deno.env.get("SUPABASE_URL")!;
|
|
79
|
+
const supabaseKey = Deno.env.get("SUPABASE_SERVICE_ROLE_KEY")!;
|
|
80
|
+
const supabase = createClient(supabaseUrl, supabaseKey);
|
|
81
|
+
|
|
82
|
+
// Configurable requestor enforcement
|
|
83
|
+
const identityField = "requestor";
|
|
84
|
+
const identityValue = body[identityField];
|
|
85
|
+
const { data: reqConfig } = await supabase.rpc("cerefox_get_config", { p_key: "require_requestor_identity" });
|
|
86
|
+
if (reqConfig === "true") {
|
|
87
|
+
if (!identityValue || (typeof identityValue === "string" && identityValue.trim() === "")) {
|
|
88
|
+
return new Response(
|
|
89
|
+
JSON.stringify({ error: `Missing required parameter "${identityField}". Server requires caller identity.` }),
|
|
90
|
+
{ status: 400, headers: { ...CORS_HEADERS, "Content-Type": "application/json" } },
|
|
91
|
+
);
|
|
92
|
+
}
|
|
93
|
+
const { data: fmtConfig } = await supabase.rpc("cerefox_get_config", { p_key: "requestor_identity_format" });
|
|
94
|
+
if (fmtConfig && typeof fmtConfig === "string" && fmtConfig.trim() !== "") {
|
|
95
|
+
if (!new RegExp(fmtConfig).test(identityValue)) {
|
|
96
|
+
return new Response(
|
|
97
|
+
JSON.stringify({ error: `Invalid "${identityField}" format. Does not match pattern: ${fmtConfig}` }),
|
|
98
|
+
{ status: 400, headers: { ...CORS_HEADERS, "Content-Type": "application/json" } },
|
|
99
|
+
);
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
const params: Record<string, unknown> = {
|
|
105
|
+
p_metadata_filter: metadata_filter,
|
|
106
|
+
p_project_id: project_id,
|
|
107
|
+
p_updated_since: updated_since,
|
|
108
|
+
p_created_since: created_since,
|
|
109
|
+
p_limit: limit,
|
|
110
|
+
p_include_content: include_content,
|
|
111
|
+
};
|
|
112
|
+
if (max_bytes !== null) {
|
|
113
|
+
params.p_max_bytes = max_bytes;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
const { data, error } = await supabase.rpc("cerefox_metadata_search", params);
|
|
117
|
+
|
|
118
|
+
if (error) {
|
|
119
|
+
return new Response(JSON.stringify({ error: error.message }), {
|
|
120
|
+
status: 500,
|
|
121
|
+
headers: { ...CORS_HEADERS, "Content-Type": "application/json" },
|
|
122
|
+
});
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
// Fire-and-forget usage logging
|
|
126
|
+
Promise.resolve(supabase.rpc("cerefox_log_usage", {
|
|
127
|
+
p_operation: "metadata_search",
|
|
128
|
+
p_access_path: "edge-function",
|
|
129
|
+
p_requestor: body.requestor ?? null,
|
|
130
|
+
p_query_text: JSON.stringify(metadata_filter),
|
|
131
|
+
p_result_count: (data ?? []).length,
|
|
132
|
+
p_project_id: project_id,
|
|
133
|
+
})).catch(() => {});
|
|
134
|
+
|
|
135
|
+
return new Response(JSON.stringify(data ?? []), {
|
|
136
|
+
status: 200,
|
|
137
|
+
headers: { ...CORS_HEADERS, "Content-Type": "application/json" },
|
|
138
|
+
});
|
|
139
|
+
} catch (err) {
|
|
140
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
141
|
+
return new Response(JSON.stringify({ error: message }), {
|
|
142
|
+
status: 500,
|
|
143
|
+
headers: { ...CORS_HEADERS, "Content-Type": "application/json" },
|
|
144
|
+
});
|
|
145
|
+
}
|
|
146
|
+
});
|
|
@@ -0,0 +1,382 @@
|
|
|
1
|
+
import "jsr:@supabase/functions-js/edge-runtime.d.ts";
|
|
2
|
+
import { createClient } from "jsr:@supabase/supabase-js@2";
|
|
3
|
+
import { isVersionRequest, versionResponse } from "../../../_shared/ef-meta/index.ts";
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* cerefox-search — Supabase Edge Function
|
|
7
|
+
*
|
|
8
|
+
* Accepts a plain-text query, embeds it server-side using the OpenAI API,
|
|
9
|
+
* then calls the appropriate Cerefox search RPC and returns the results.
|
|
10
|
+
*
|
|
11
|
+
* Called by the cerefox-mcp Edge Function (MCP Streamable HTTP), GPT Actions
|
|
12
|
+
* (direct HTTP POST), or any HTTP client. No SQL required, no local embedder.
|
|
13
|
+
*
|
|
14
|
+
* Request body (JSON):
|
|
15
|
+
* query string required Natural-language search query
|
|
16
|
+
* project_name string optional Project to filter by (looked up by name)
|
|
17
|
+
* match_count number optional Max results (default: 5)
|
|
18
|
+
* mode string optional "hybrid" | "fts" | "docs" (default: "docs")
|
|
19
|
+
* alpha number optional Semantic weight for hybrid search (default: 0.7)
|
|
20
|
+
* min_score number optional Min cosine similarity (default: 0.5)
|
|
21
|
+
* metadata_filter object optional JSONB containment filter. Only documents whose
|
|
22
|
+
* metadata contains ALL specified key-value pairs
|
|
23
|
+
* are returned. Example: {"type":"decision"}.
|
|
24
|
+
* Use cerefox-metadata to discover available keys.
|
|
25
|
+
* max_bytes number optional Response size budget in bytes (default: 200000,
|
|
26
|
+
* hard ceiling: 200000). Agents may pass a smaller
|
|
27
|
+
* value to fit their context window; values above
|
|
28
|
+
* the server ceiling are silently capped. Results
|
|
29
|
+
* are dropped whole (never truncated mid-doc) until
|
|
30
|
+
* the budget is satisfied. The response includes a
|
|
31
|
+
* `truncated` flag when results were dropped.
|
|
32
|
+
*
|
|
33
|
+
* Response: { results: [...], query, mode, match_count, project_name?,
|
|
34
|
+
* truncated: boolean, response_bytes: number }
|
|
35
|
+
*
|
|
36
|
+
* Example agent prompt:
|
|
37
|
+
* "Invoke the cerefox-search edge function with query='knowledge management'
|
|
38
|
+
* and project_name='Personal'. Summarize the results."
|
|
39
|
+
*/
|
|
40
|
+
|
|
41
|
+
const OPENAI_EMBEDDING_URL = "https://api.openai.com/v1/embeddings";
|
|
42
|
+
const OPENAI_MODEL = "text-embedding-3-small";
|
|
43
|
+
const EMBEDDING_DIMENSIONS = 768;
|
|
44
|
+
|
|
45
|
+
// Response size ceiling — server-enforced hard limit.
|
|
46
|
+
// Agents may pass a smaller max_bytes to fit their context budget, but cannot
|
|
47
|
+
// exceed this value. Acts as both the default (when max_bytes is omitted) and
|
|
48
|
+
// the hard ceiling (when the caller requests more). Small-to-big retrieval
|
|
49
|
+
// bounds individual large-doc results to matched chunks + neighbours, so this
|
|
50
|
+
// ceiling is rarely reached under normal usage at the default match_count=5.
|
|
51
|
+
const MAX_BYTES = 200_000;
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
interface SearchRequest {
|
|
55
|
+
query: string;
|
|
56
|
+
project_name?: string;
|
|
57
|
+
match_count?: number;
|
|
58
|
+
mode?: "hybrid" | "fts" | "docs";
|
|
59
|
+
alpha?: number;
|
|
60
|
+
min_score?: number;
|
|
61
|
+
metadata_filter?: Record<string, string> | null;
|
|
62
|
+
max_bytes?: number;
|
|
63
|
+
requestor?: string;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
const EMBEDDING_MAX_RETRIES = 3;
|
|
67
|
+
const EMBEDDING_INITIAL_BACKOFF_MS = 500; // 500ms, 1s, 2s exponential backoff
|
|
68
|
+
|
|
69
|
+
async function getEmbedding(text: string, apiKey: string): Promise<number[]> {
|
|
70
|
+
let lastError: Error | null = null;
|
|
71
|
+
|
|
72
|
+
for (let attempt = 0; attempt < EMBEDDING_MAX_RETRIES; attempt++) {
|
|
73
|
+
try {
|
|
74
|
+
const response = await fetch(OPENAI_EMBEDDING_URL, {
|
|
75
|
+
method: "POST",
|
|
76
|
+
headers: {
|
|
77
|
+
"Authorization": `Bearer ${apiKey}`,
|
|
78
|
+
"Content-Type": "application/json",
|
|
79
|
+
},
|
|
80
|
+
body: JSON.stringify({
|
|
81
|
+
model: OPENAI_MODEL,
|
|
82
|
+
input: text,
|
|
83
|
+
dimensions: EMBEDDING_DIMENSIONS,
|
|
84
|
+
}),
|
|
85
|
+
});
|
|
86
|
+
|
|
87
|
+
if (!response.ok) {
|
|
88
|
+
const err = await response.text();
|
|
89
|
+
// Don't retry client errors (4xx)
|
|
90
|
+
if (response.status < 500) {
|
|
91
|
+
throw new Error(`OpenAI embedding error ${response.status}: ${err}`);
|
|
92
|
+
}
|
|
93
|
+
// Server errors (5xx) are retryable
|
|
94
|
+
lastError = new Error(`OpenAI embedding error ${response.status}: ${err}`);
|
|
95
|
+
const backoff = EMBEDDING_INITIAL_BACKOFF_MS * Math.pow(2, attempt);
|
|
96
|
+
console.warn(
|
|
97
|
+
`Embedding API returned ${response.status} (attempt ${attempt + 1}/${EMBEDDING_MAX_RETRIES}), retrying in ${backoff}ms`,
|
|
98
|
+
);
|
|
99
|
+
await new Promise((r) => setTimeout(r, backoff));
|
|
100
|
+
continue;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
const data = await response.json();
|
|
104
|
+
if (attempt > 0) {
|
|
105
|
+
console.info(`Embedding API succeeded on retry ${attempt}`);
|
|
106
|
+
}
|
|
107
|
+
return data.data[0].embedding;
|
|
108
|
+
} catch (err) {
|
|
109
|
+
if (err instanceof Error && err.message.startsWith("OpenAI embedding error")) {
|
|
110
|
+
// Non-retryable (4xx) errors already thrown above
|
|
111
|
+
throw err;
|
|
112
|
+
}
|
|
113
|
+
// Network/timeout errors are retryable
|
|
114
|
+
lastError = err instanceof Error ? err : new Error(String(err));
|
|
115
|
+
const backoff = EMBEDDING_INITIAL_BACKOFF_MS * Math.pow(2, attempt);
|
|
116
|
+
console.warn(
|
|
117
|
+
`Embedding API request failed: ${lastError.message} (attempt ${attempt + 1}/${EMBEDDING_MAX_RETRIES}), retrying in ${backoff}ms`,
|
|
118
|
+
);
|
|
119
|
+
await new Promise((r) => setTimeout(r, backoff));
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
throw lastError ?? new Error(`Embedding API failed after ${EMBEDDING_MAX_RETRIES} attempts`);
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
async function lookupProjectId(
|
|
127
|
+
supabase: ReturnType<typeof createClient>,
|
|
128
|
+
projectName: string,
|
|
129
|
+
): Promise<string | null> {
|
|
130
|
+
const { data, error } = await supabase
|
|
131
|
+
.from("cerefox_projects")
|
|
132
|
+
.select("id")
|
|
133
|
+
.ilike("name", projectName)
|
|
134
|
+
.limit(1);
|
|
135
|
+
|
|
136
|
+
if (error || !data?.length) return null;
|
|
137
|
+
return data[0].id;
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
/**
|
|
141
|
+
* Apply a byte budget to an array of result rows.
|
|
142
|
+
*
|
|
143
|
+
* Each row is serialised to JSON to measure its size. Rows are included in
|
|
144
|
+
* order until the next row would push the running total over `maxBytes`.
|
|
145
|
+
* Rows are always kept or dropped whole — content is never truncated
|
|
146
|
+
* mid-document. Returns the accepted rows and a `truncated` flag.
|
|
147
|
+
*/
|
|
148
|
+
function applyByteBudget(
|
|
149
|
+
rows: unknown[],
|
|
150
|
+
maxBytes: number,
|
|
151
|
+
): { accepted: unknown[]; truncated: boolean; usedBytes: number } {
|
|
152
|
+
const accepted: unknown[] = [];
|
|
153
|
+
let usedBytes = 0;
|
|
154
|
+
let truncated = false;
|
|
155
|
+
|
|
156
|
+
for (const row of rows) {
|
|
157
|
+
const rowBytes = new TextEncoder().encode(JSON.stringify(row)).length;
|
|
158
|
+
if (usedBytes + rowBytes > maxBytes) {
|
|
159
|
+
truncated = true;
|
|
160
|
+
break;
|
|
161
|
+
}
|
|
162
|
+
accepted.push(row);
|
|
163
|
+
usedBytes += rowBytes;
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
return { accepted, truncated, usedBytes };
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
const headers = {
|
|
170
|
+
"Content-Type": "application/json",
|
|
171
|
+
"Access-Control-Allow-Origin": "*",
|
|
172
|
+
};
|
|
173
|
+
|
|
174
|
+
Deno.serve(async (req: Request) => {
|
|
175
|
+
// CORS preflight
|
|
176
|
+
if (req.method === "OPTIONS") {
|
|
177
|
+
return new Response(null, {
|
|
178
|
+
headers: {
|
|
179
|
+
"Access-Control-Allow-Origin": "*",
|
|
180
|
+
"Access-Control-Allow-Headers": "authorization, x-client-info, apikey, content-type",
|
|
181
|
+
},
|
|
182
|
+
});
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
if (isVersionRequest(req)) {
|
|
186
|
+
return versionResponse("cerefox-search", headers);
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
if (req.method !== "POST") {
|
|
190
|
+
return new Response(JSON.stringify({ error: "POST required" }), {
|
|
191
|
+
status: 405,
|
|
192
|
+
headers,
|
|
193
|
+
});
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
let body: SearchRequest;
|
|
197
|
+
try {
|
|
198
|
+
body = await req.json();
|
|
199
|
+
} catch {
|
|
200
|
+
return new Response(JSON.stringify({ error: "Invalid JSON body" }), {
|
|
201
|
+
status: 400,
|
|
202
|
+
headers,
|
|
203
|
+
});
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
const {
|
|
207
|
+
query,
|
|
208
|
+
project_name,
|
|
209
|
+
match_count = 5,
|
|
210
|
+
mode = "docs",
|
|
211
|
+
alpha = 0.7,
|
|
212
|
+
min_score = 0.5,
|
|
213
|
+
metadata_filter = null,
|
|
214
|
+
max_bytes: requested_max_bytes,
|
|
215
|
+
} = body;
|
|
216
|
+
|
|
217
|
+
// Enforce ceiling: agents may request less but never more than MAX_BYTES.
|
|
218
|
+
const max_bytes = Math.min(requested_max_bytes ?? MAX_BYTES, MAX_BYTES);
|
|
219
|
+
|
|
220
|
+
// Validate metadata_filter: must be a plain object (or null/absent).
|
|
221
|
+
// Reject arrays, strings, and other non-object types to prevent RPC errors.
|
|
222
|
+
if (
|
|
223
|
+
metadata_filter !== null &&
|
|
224
|
+
metadata_filter !== undefined &&
|
|
225
|
+
(typeof metadata_filter !== "object" || Array.isArray(metadata_filter))
|
|
226
|
+
) {
|
|
227
|
+
return new Response(
|
|
228
|
+
JSON.stringify({ error: "metadata_filter must be a JSON object or null" }),
|
|
229
|
+
{ status: 400, headers },
|
|
230
|
+
);
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
if (!query || typeof query !== "string" || !query.trim()) {
|
|
234
|
+
return new Response(JSON.stringify({ error: "query is required" }), {
|
|
235
|
+
status: 400,
|
|
236
|
+
headers,
|
|
237
|
+
});
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
const openaiKey = Deno.env.get("OPENAI_API_KEY");
|
|
241
|
+
if (!openaiKey) {
|
|
242
|
+
return new Response(
|
|
243
|
+
JSON.stringify({ error: "OPENAI_API_KEY secret not set on this project" }),
|
|
244
|
+
{ status: 500, headers },
|
|
245
|
+
);
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
const supabaseUrl = Deno.env.get("SUPABASE_URL")!;
|
|
249
|
+
const supabaseKey = Deno.env.get("SUPABASE_SERVICE_ROLE_KEY")!;
|
|
250
|
+
const supabase = createClient(supabaseUrl, supabaseKey);
|
|
251
|
+
|
|
252
|
+
// Configurable requestor enforcement
|
|
253
|
+
const identityField = "requestor";
|
|
254
|
+
const identityValue = body[identityField as keyof SearchRequest] as string | undefined;
|
|
255
|
+
const { data: reqConfig } = await supabase.rpc("cerefox_get_config", { p_key: "require_requestor_identity" });
|
|
256
|
+
if (reqConfig === "true") {
|
|
257
|
+
if (!identityValue || (typeof identityValue === "string" && identityValue.trim() === "")) {
|
|
258
|
+
return new Response(
|
|
259
|
+
JSON.stringify({ error: `Missing required parameter "${identityField}". Server requires caller identity.` }),
|
|
260
|
+
{ status: 400, headers },
|
|
261
|
+
);
|
|
262
|
+
}
|
|
263
|
+
const { data: fmtConfig } = await supabase.rpc("cerefox_get_config", { p_key: "requestor_identity_format" });
|
|
264
|
+
if (fmtConfig && typeof fmtConfig === "string" && fmtConfig.trim() !== "") {
|
|
265
|
+
if (!new RegExp(fmtConfig).test(identityValue)) {
|
|
266
|
+
return new Response(
|
|
267
|
+
JSON.stringify({ error: `Invalid "${identityField}" format. Does not match pattern: ${fmtConfig}` }),
|
|
268
|
+
{ status: 400, headers },
|
|
269
|
+
);
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
// Resolve project name → UUID if provided
|
|
275
|
+
let projectId: string | null = null;
|
|
276
|
+
if (project_name) {
|
|
277
|
+
projectId = await lookupProjectId(supabase, project_name);
|
|
278
|
+
if (!projectId) {
|
|
279
|
+
return new Response(
|
|
280
|
+
JSON.stringify({ error: `Project not found: ${project_name}` }),
|
|
281
|
+
{ status: 404, headers },
|
|
282
|
+
);
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
// FTS mode doesn't need an embedding
|
|
287
|
+
let embedding: number[] | null = null;
|
|
288
|
+
if (mode !== "fts") {
|
|
289
|
+
try {
|
|
290
|
+
embedding = await getEmbedding(query, openaiKey);
|
|
291
|
+
} catch (err) {
|
|
292
|
+
return new Response(JSON.stringify({ error: String(err) }), {
|
|
293
|
+
status: 502,
|
|
294
|
+
headers,
|
|
295
|
+
});
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
// Call the appropriate RPC
|
|
300
|
+
let rpcName: string;
|
|
301
|
+
let rpcParams: Record<string, unknown>;
|
|
302
|
+
|
|
303
|
+
// Build a metadata filter param only when a non-empty filter object is provided.
|
|
304
|
+
// Passing null explicitly or an empty object {} to the RPC is equivalent to no filter,
|
|
305
|
+
// but we omit it entirely when absent to keep RPC call params minimal.
|
|
306
|
+
const metaFilterParam = metadata_filter && Object.keys(metadata_filter).length > 0
|
|
307
|
+
? { p_metadata_filter: metadata_filter }
|
|
308
|
+
: {};
|
|
309
|
+
|
|
310
|
+
if (mode === "fts") {
|
|
311
|
+
rpcName = "cerefox_fts_search";
|
|
312
|
+
rpcParams = {
|
|
313
|
+
p_query_text: query,
|
|
314
|
+
p_match_count: match_count,
|
|
315
|
+
p_project_id: projectId,
|
|
316
|
+
...metaFilterParam,
|
|
317
|
+
};
|
|
318
|
+
} else if (mode === "hybrid") {
|
|
319
|
+
rpcName = "cerefox_hybrid_search";
|
|
320
|
+
rpcParams = {
|
|
321
|
+
p_query_text: query,
|
|
322
|
+
p_query_embedding: embedding,
|
|
323
|
+
p_match_count: match_count,
|
|
324
|
+
p_alpha: alpha,
|
|
325
|
+
p_use_upgrade: false,
|
|
326
|
+
p_project_id: projectId,
|
|
327
|
+
p_min_score: min_score,
|
|
328
|
+
...metaFilterParam,
|
|
329
|
+
};
|
|
330
|
+
} else {
|
|
331
|
+
// "docs" — document-level hybrid search (recommended default).
|
|
332
|
+
// Small-to-big threshold and context window use the RPC defaults (20000 / 1).
|
|
333
|
+
// Override them in Postgres (rpcs.sql) if you need a different server-wide value.
|
|
334
|
+
rpcName = "cerefox_search_docs";
|
|
335
|
+
rpcParams = {
|
|
336
|
+
p_query_text: query,
|
|
337
|
+
p_query_embedding: embedding,
|
|
338
|
+
p_match_count: match_count,
|
|
339
|
+
p_alpha: alpha,
|
|
340
|
+
p_project_id: projectId,
|
|
341
|
+
p_min_score: min_score,
|
|
342
|
+
...metaFilterParam,
|
|
343
|
+
};
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
const { data, error } = await supabase.rpc(rpcName, rpcParams);
|
|
347
|
+
|
|
348
|
+
if (error) {
|
|
349
|
+
return new Response(JSON.stringify({ error: `RPC error: ${error.message}` }), {
|
|
350
|
+
status: 500,
|
|
351
|
+
headers,
|
|
352
|
+
});
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
// Apply byte budget — drop whole results (never truncate mid-doc) to stay
|
|
356
|
+
// under the limit. This mirrors the local MCP server's truncation behaviour.
|
|
357
|
+
const { accepted, truncated, usedBytes } = applyByteBudget(data ?? [], max_bytes);
|
|
358
|
+
|
|
359
|
+
// Fire-and-forget usage logging (never blocks the response)
|
|
360
|
+
Promise.resolve(supabase.rpc("cerefox_log_usage", {
|
|
361
|
+
p_operation: "search",
|
|
362
|
+
p_access_path: "edge-function",
|
|
363
|
+
p_requestor: body.requestor ?? null,
|
|
364
|
+
p_query_text: query,
|
|
365
|
+
p_result_count: accepted.length,
|
|
366
|
+
p_project_id: projectId,
|
|
367
|
+
})).catch(() => {});
|
|
368
|
+
|
|
369
|
+
return new Response(
|
|
370
|
+
JSON.stringify({
|
|
371
|
+
results: accepted,
|
|
372
|
+
query,
|
|
373
|
+
mode,
|
|
374
|
+
match_count,
|
|
375
|
+
project_name: project_name ?? null,
|
|
376
|
+
metadata_filter: metadata_filter ?? null,
|
|
377
|
+
truncated,
|
|
378
|
+
response_bytes: usedBytes,
|
|
379
|
+
}),
|
|
380
|
+
{ headers },
|
|
381
|
+
);
|
|
382
|
+
});
|
|
@@ -133,6 +133,26 @@ change.
|
|
|
133
133
|
> make authenticated POST calls to the Edge Functions. The built-in local server is
|
|
134
134
|
> the correct solution.
|
|
135
135
|
|
|
136
|
+
### Fastest setup: `cerefox configure-agent`
|
|
137
|
+
|
|
138
|
+
You don't have to hand-edit the per-client config files below. `cerefox configure-agent
|
|
139
|
+
--tool <client>` writes the correct local-stdio entry (`npx -y --package=@cerefox/memory
|
|
140
|
+
cerefox mcp`) into the right config file for you. Supported clients:
|
|
141
|
+
|
|
142
|
+
```bash
|
|
143
|
+
cerefox configure-agent --tool claude-code # ~/.claude.json (via `claude mcp add`)
|
|
144
|
+
cerefox configure-agent --tool claude-desktop # Claude Desktop config
|
|
145
|
+
cerefox configure-agent --tool cursor # ~/.cursor/mcp.json
|
|
146
|
+
cerefox configure-agent --tool codex # ~/.codex/config.toml
|
|
147
|
+
cerefox configure-agent --tool gemini # ~/.gemini/settings.json
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
Useful flags: `--dry-run` (print the planned write without touching any file), `--json`
|
|
151
|
+
(machine-readable result), `--config-path <path>` (override the target file), `--no-backup`
|
|
152
|
+
(skip the `.pre-cerefox.bak` backup). The command is idempotent and backs up any existing
|
|
153
|
+
config before writing. The per-client sections below document the same entries for anyone
|
|
154
|
+
who prefers to edit by hand or needs the remote (`Path A-Remote`) HTTP transport instead.
|
|
155
|
+
|
|
136
156
|
### Path A MCP tools
|
|
137
157
|
|
|
138
158
|
Once configured, every Path A client has these tools:
|
|
@@ -657,7 +677,7 @@ In the action editor, paste this schema (replace `<your-project-ref>`):
|
|
|
657
677
|
openapi: 3.1.0
|
|
658
678
|
info:
|
|
659
679
|
title: Cerefox Knowledge Base
|
|
660
|
-
version: 1.
|
|
680
|
+
version: 1.8.0
|
|
661
681
|
servers:
|
|
662
682
|
- url: https://<your-project-ref>.supabase.co/functions/v1
|
|
663
683
|
paths:
|
|
@@ -682,7 +702,11 @@ paths:
|
|
|
682
702
|
type: string
|
|
683
703
|
mode:
|
|
684
704
|
type: string
|
|
705
|
+
enum: [docs, hybrid, fts]
|
|
685
706
|
default: docs
|
|
707
|
+
description: >
|
|
708
|
+
docs = document-level hybrid (recommended); hybrid = chunk-level
|
|
709
|
+
semantic+FTS; fts = keyword-only (no embedding).
|
|
686
710
|
metadata_filter:
|
|
687
711
|
type: object
|
|
688
712
|
additionalProperties:
|
|
@@ -693,6 +717,26 @@ paths:
|
|
|
693
717
|
Example: {"type": "decision", "status": "active"}.
|
|
694
718
|
Call listMetadataKeys to discover available keys and their values.
|
|
695
719
|
Omit or set to null to search all documents.
|
|
720
|
+
alpha:
|
|
721
|
+
type: number
|
|
722
|
+
default: 0.7
|
|
723
|
+
description: >
|
|
724
|
+
Semantic weight for hybrid/docs modes (0 = pure FTS, 1 = pure
|
|
725
|
+
semantic). Advanced; leave unset for the default blend.
|
|
726
|
+
min_score:
|
|
727
|
+
type: number
|
|
728
|
+
default: 0.5
|
|
729
|
+
description: >
|
|
730
|
+
Minimum cosine similarity for a vector-only match to be included.
|
|
731
|
+
Advanced; leave unset for the default threshold.
|
|
732
|
+
max_bytes:
|
|
733
|
+
type: integer
|
|
734
|
+
default: 200000
|
|
735
|
+
description: >
|
|
736
|
+
Response size budget in bytes (server hard ceiling 200000).
|
|
737
|
+
Whole results are dropped (never truncated mid-document) until
|
|
738
|
+
the budget is met; the response sets `truncated: true` when this
|
|
739
|
+
happens. Advanced; leave unset for the default.
|
|
696
740
|
requestor:
|
|
697
741
|
type: string
|
|
698
742
|
description: >
|
|
@@ -735,6 +779,18 @@ paths:
|
|
|
735
779
|
the document, note the document_id, pass it here.
|
|
736
780
|
project_name:
|
|
737
781
|
type: string
|
|
782
|
+
description: >
|
|
783
|
+
Add the document to this project (non-destructive — keeps any
|
|
784
|
+
existing project memberships). Looked up by name.
|
|
785
|
+
project_names:
|
|
786
|
+
type: array
|
|
787
|
+
items:
|
|
788
|
+
type: string
|
|
789
|
+
description: >
|
|
790
|
+
Destructive full-set project assignment: the document's project
|
|
791
|
+
memberships are replaced with exactly this list. Use project_name
|
|
792
|
+
(singular) to add without removing. If both are given, project_names
|
|
793
|
+
wins.
|
|
738
794
|
source:
|
|
739
795
|
type: string
|
|
740
796
|
default: agent
|
|
@@ -764,7 +820,14 @@ paths:
|
|
|
764
820
|
the document to pending_review, user writes set it to approved.
|
|
765
821
|
responses:
|
|
766
822
|
'200':
|
|
767
|
-
description:
|
|
823
|
+
description: >
|
|
824
|
+
Ingest result. Fields vary by outcome:
|
|
825
|
+
{ document_id, title, chunk_count, total_chars,
|
|
826
|
+
project_id?, project_name?, # set when a project was assigned on create
|
|
827
|
+
skipped?, # true when identical content was deduplicated
|
|
828
|
+
updated?, # true when an existing doc was updated
|
|
829
|
+
message?, # human note on dedup/skip/update
|
|
830
|
+
note? } # note when a flag (e.g. update_if_exists) was overridden
|
|
768
831
|
/cerefox-metadata:
|
|
769
832
|
post:
|
|
770
833
|
operationId: listMetadataKeys
|
|
@@ -867,6 +930,9 @@ paths:
|
|
|
867
930
|
since:
|
|
868
931
|
type: string
|
|
869
932
|
description: ISO timestamp lower bound for temporal queries (optional)
|
|
933
|
+
until:
|
|
934
|
+
type: string
|
|
935
|
+
description: ISO timestamp upper bound for temporal queries (optional)
|
|
870
936
|
limit:
|
|
871
937
|
type: integer
|
|
872
938
|
default: 50
|
|
@@ -921,7 +987,10 @@ paths:
|
|
|
921
987
|
Example: {"type": "decision", "status": "active"}.
|
|
922
988
|
project_id:
|
|
923
989
|
type: string
|
|
924
|
-
description:
|
|
990
|
+
description: >
|
|
991
|
+
Filter by project UUID (optional). NOTE: this is the project
|
|
992
|
+
UUID, not its name — unlike searchKnowledgeBase / ingestNote
|
|
993
|
+
which take project_name. Get UUIDs from listProjects.
|
|
925
994
|
updated_since:
|
|
926
995
|
type: string
|
|
927
996
|
description: ISO-8601 timestamp; only docs updated on/after (optional)
|
|
@@ -935,6 +1004,12 @@ paths:
|
|
|
935
1004
|
type: boolean
|
|
936
1005
|
default: false
|
|
937
1006
|
description: Include full document text in results
|
|
1007
|
+
max_bytes:
|
|
1008
|
+
type: integer
|
|
1009
|
+
default: 200000
|
|
1010
|
+
description: >
|
|
1011
|
+
Response size budget in bytes when include_content is true
|
|
1012
|
+
(whole results dropped to fit). Advanced; leave unset for the default.
|
|
938
1013
|
requestor:
|
|
939
1014
|
type: string
|
|
940
1015
|
description: Name of the agent making this request. Optional.
|