@cerefox/memory 0.7.1 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +62 -25
- package/dist/bin/cerefox.js +1163 -344
- package/dist/frontend/assets/{index-HNlMcvli.js → index-CAp2_lFX.js} +2 -2
- package/dist/frontend/assets/index-CAp2_lFX.js.map +1 -0
- package/dist/frontend/index.html +1 -1
- package/dist/server-assets/_shared/ef-meta/index.ts +97 -0
- package/dist/server-assets/_shared/embeddings/index.ts +175 -0
- package/dist/server-assets/_shared/mcp-tools/_chunker.ts +187 -0
- package/dist/server-assets/_shared/mcp-tools/_projects.ts +121 -0
- package/dist/server-assets/_shared/mcp-tools/_utils.ts +73 -0
- package/dist/server-assets/_shared/mcp-tools/audit-log.ts +95 -0
- package/dist/server-assets/_shared/mcp-tools/get-document.ts +73 -0
- package/dist/server-assets/_shared/mcp-tools/get-help-content.ts +26 -0
- package/dist/server-assets/_shared/mcp-tools/get-help.ts +90 -0
- package/dist/server-assets/_shared/mcp-tools/index.ts +67 -0
- package/dist/server-assets/_shared/mcp-tools/ingest.ts +315 -0
- package/dist/server-assets/_shared/mcp-tools/list-metadata-keys.ts +55 -0
- package/dist/server-assets/_shared/mcp-tools/list-projects.ts +59 -0
- package/dist/server-assets/_shared/mcp-tools/list-versions.ts +72 -0
- package/dist/server-assets/_shared/mcp-tools/metadata-search.ts +154 -0
- package/dist/server-assets/_shared/mcp-tools/search.ts +193 -0
- package/dist/server-assets/_shared/mcp-tools/set-document-projects.ts +163 -0
- package/dist/server-assets/_shared/mcp-tools/types.ts +92 -0
- package/dist/server-assets/db/migrations/0003_add_document_versions.sql +91 -0
- package/dist/server-assets/db/migrations/0004_add_audit_log_review_status_archived.sql +71 -0
- package/dist/server-assets/db/migrations/0005_metadata_search.sql +628 -0
- package/dist/server-assets/db/migrations/0006_usage_log.sql +255 -0
- package/dist/server-assets/db/migrations/0007_usage_log_requestor.sql +178 -0
- package/dist/server-assets/db/migrations/0008_soft_delete.sql +130 -0
- package/dist/server-assets/db/migrations/0009_audit_log_restore_operation.sql +20 -0
- package/dist/server-assets/db/migrations/0010_requestor_enforcement_config.sql +12 -0
- package/dist/server-assets/db/migrations/0011_title_boosting.sql +48 -0
- package/dist/server-assets/db/rpcs.sql +1723 -0
- package/dist/server-assets/db/schema.sql +380 -0
- package/dist/server-assets/supabase/functions/cerefox-get-audit-log/index.ts +117 -0
- package/dist/server-assets/supabase/functions/cerefox-get-document/index.ts +138 -0
- package/dist/server-assets/supabase/functions/cerefox-ingest/index.ts +819 -0
- package/dist/server-assets/supabase/functions/cerefox-list-projects/index.ts +96 -0
- package/dist/server-assets/supabase/functions/cerefox-list-versions/index.ts +113 -0
- package/dist/server-assets/supabase/functions/cerefox-mcp/index.ts +294 -0
- package/dist/server-assets/supabase/functions/cerefox-mcp/shared.ts +42 -0
- package/dist/server-assets/supabase/functions/cerefox-metadata/index.ts +99 -0
- package/dist/server-assets/supabase/functions/cerefox-metadata-search/index.ts +146 -0
- package/dist/server-assets/supabase/functions/cerefox-search/index.ts +382 -0
- package/docs/guides/connect-agents.md +58 -3
- package/docs/guides/migration-v0.5.md +50 -0
- package/package.json +3 -2
- package/dist/frontend/assets/index-HNlMcvli.js.map +0 -1
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
import "jsr:@supabase/functions-js/edge-runtime.d.ts";
|
|
2
|
+
import { createClient } from "jsr:@supabase/supabase-js@2";
|
|
3
|
+
import { isVersionRequest, versionResponse } from "../../../_shared/ef-meta/index.ts";
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* cerefox-metadata-search -- Supabase Edge Function
|
|
7
|
+
*
|
|
8
|
+
* Query documents by metadata key-value criteria without a text search term.
|
|
9
|
+
* Calls the cerefox_metadata_search() RPC via the service-role key.
|
|
10
|
+
*
|
|
11
|
+
* Called by:
|
|
12
|
+
* - GPT Custom Actions (direct HTTP POST via OpenAPI schema)
|
|
13
|
+
* - Any authenticated HTTP client
|
|
14
|
+
*
|
|
15
|
+
* Note: cerefox-mcp calls the RPC directly (not this Edge Function).
|
|
16
|
+
*
|
|
17
|
+
* Request body (JSON):
|
|
18
|
+
* metadata_filter object required Key-value pairs (AND semantics)
|
|
19
|
+
* project_id string optional Project UUID filter
|
|
20
|
+
* updated_since string optional ISO-8601 lower bound for updated_at
|
|
21
|
+
* created_since string optional ISO-8601 lower bound for created_at
|
|
22
|
+
* limit number optional Max results (default: 10)
|
|
23
|
+
* include_content boolean optional Include full text (default: false)
|
|
24
|
+
* max_bytes number optional Byte budget when include_content=true
|
|
25
|
+
*
|
|
26
|
+
* Response (200): Array of matching documents
|
|
27
|
+
* Response (400): { error: "..." }
|
|
28
|
+
*/
|
|
29
|
+
|
|
30
|
+
const MAX_BYTES = 200_000;
|
|
31
|
+
|
|
32
|
+
const CORS_HEADERS = {
|
|
33
|
+
"Access-Control-Allow-Origin": "*",
|
|
34
|
+
"Access-Control-Allow-Methods": "POST, OPTIONS",
|
|
35
|
+
"Access-Control-Allow-Headers": "Content-Type, Authorization, apikey",
|
|
36
|
+
};
|
|
37
|
+
|
|
38
|
+
Deno.serve(async (req: Request): Promise<Response> => {
|
|
39
|
+
if (req.method === "OPTIONS") {
|
|
40
|
+
return new Response(null, { status: 200, headers: CORS_HEADERS });
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
if (isVersionRequest(req)) {
|
|
44
|
+
return versionResponse("cerefox-metadata-search", { ...CORS_HEADERS, "Content-Type": "application/json" });
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
if (req.method !== "POST") {
|
|
48
|
+
return new Response("Method Not Allowed", { status: 405, headers: CORS_HEADERS });
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
try {
|
|
52
|
+
const body = await req.json();
|
|
53
|
+
const metadata_filter = body.metadata_filter;
|
|
54
|
+
|
|
55
|
+
if (
|
|
56
|
+
!metadata_filter ||
|
|
57
|
+
typeof metadata_filter !== "object" ||
|
|
58
|
+
Array.isArray(metadata_filter) ||
|
|
59
|
+
Object.keys(metadata_filter).length === 0
|
|
60
|
+
) {
|
|
61
|
+
return new Response(
|
|
62
|
+
JSON.stringify({ error: "metadata_filter is required and must be a non-empty JSON object" }),
|
|
63
|
+
{ status: 400, headers: { ...CORS_HEADERS, "Content-Type": "application/json" } },
|
|
64
|
+
);
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
const project_id = body.project_id ?? null;
|
|
68
|
+
const updated_since = body.updated_since ?? null;
|
|
69
|
+
const created_since = body.created_since ?? null;
|
|
70
|
+
const limit = body.limit ?? 10;
|
|
71
|
+
const include_content = body.include_content ?? false;
|
|
72
|
+
const requested_max_bytes = body.max_bytes;
|
|
73
|
+
|
|
74
|
+
const max_bytes = include_content
|
|
75
|
+
? Math.min(requested_max_bytes ?? MAX_BYTES, MAX_BYTES)
|
|
76
|
+
: null;
|
|
77
|
+
|
|
78
|
+
const supabaseUrl = Deno.env.get("SUPABASE_URL")!;
|
|
79
|
+
const supabaseKey = Deno.env.get("SUPABASE_SERVICE_ROLE_KEY")!;
|
|
80
|
+
const supabase = createClient(supabaseUrl, supabaseKey);
|
|
81
|
+
|
|
82
|
+
// Configurable requestor enforcement
|
|
83
|
+
const identityField = "requestor";
|
|
84
|
+
const identityValue = body[identityField];
|
|
85
|
+
const { data: reqConfig } = await supabase.rpc("cerefox_get_config", { p_key: "require_requestor_identity" });
|
|
86
|
+
if (reqConfig === "true") {
|
|
87
|
+
if (!identityValue || (typeof identityValue === "string" && identityValue.trim() === "")) {
|
|
88
|
+
return new Response(
|
|
89
|
+
JSON.stringify({ error: `Missing required parameter "${identityField}". Server requires caller identity.` }),
|
|
90
|
+
{ status: 400, headers: { ...CORS_HEADERS, "Content-Type": "application/json" } },
|
|
91
|
+
);
|
|
92
|
+
}
|
|
93
|
+
const { data: fmtConfig } = await supabase.rpc("cerefox_get_config", { p_key: "requestor_identity_format" });
|
|
94
|
+
if (fmtConfig && typeof fmtConfig === "string" && fmtConfig.trim() !== "") {
|
|
95
|
+
if (!new RegExp(fmtConfig).test(identityValue)) {
|
|
96
|
+
return new Response(
|
|
97
|
+
JSON.stringify({ error: `Invalid "${identityField}" format. Does not match pattern: ${fmtConfig}` }),
|
|
98
|
+
{ status: 400, headers: { ...CORS_HEADERS, "Content-Type": "application/json" } },
|
|
99
|
+
);
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
const params: Record<string, unknown> = {
|
|
105
|
+
p_metadata_filter: metadata_filter,
|
|
106
|
+
p_project_id: project_id,
|
|
107
|
+
p_updated_since: updated_since,
|
|
108
|
+
p_created_since: created_since,
|
|
109
|
+
p_limit: limit,
|
|
110
|
+
p_include_content: include_content,
|
|
111
|
+
};
|
|
112
|
+
if (max_bytes !== null) {
|
|
113
|
+
params.p_max_bytes = max_bytes;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
const { data, error } = await supabase.rpc("cerefox_metadata_search", params);
|
|
117
|
+
|
|
118
|
+
if (error) {
|
|
119
|
+
return new Response(JSON.stringify({ error: error.message }), {
|
|
120
|
+
status: 500,
|
|
121
|
+
headers: { ...CORS_HEADERS, "Content-Type": "application/json" },
|
|
122
|
+
});
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
// Fire-and-forget usage logging
|
|
126
|
+
Promise.resolve(supabase.rpc("cerefox_log_usage", {
|
|
127
|
+
p_operation: "metadata_search",
|
|
128
|
+
p_access_path: "edge-function",
|
|
129
|
+
p_requestor: body.requestor ?? null,
|
|
130
|
+
p_query_text: JSON.stringify(metadata_filter),
|
|
131
|
+
p_result_count: (data ?? []).length,
|
|
132
|
+
p_project_id: project_id,
|
|
133
|
+
})).catch(() => {});
|
|
134
|
+
|
|
135
|
+
return new Response(JSON.stringify(data ?? []), {
|
|
136
|
+
status: 200,
|
|
137
|
+
headers: { ...CORS_HEADERS, "Content-Type": "application/json" },
|
|
138
|
+
});
|
|
139
|
+
} catch (err) {
|
|
140
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
141
|
+
return new Response(JSON.stringify({ error: message }), {
|
|
142
|
+
status: 500,
|
|
143
|
+
headers: { ...CORS_HEADERS, "Content-Type": "application/json" },
|
|
144
|
+
});
|
|
145
|
+
}
|
|
146
|
+
});
|
|
@@ -0,0 +1,382 @@
|
|
|
1
|
+
import "jsr:@supabase/functions-js/edge-runtime.d.ts";
|
|
2
|
+
import { createClient } from "jsr:@supabase/supabase-js@2";
|
|
3
|
+
import { isVersionRequest, versionResponse } from "../../../_shared/ef-meta/index.ts";
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* cerefox-search — Supabase Edge Function
|
|
7
|
+
*
|
|
8
|
+
* Accepts a plain-text query, embeds it server-side using the OpenAI API,
|
|
9
|
+
* then calls the appropriate Cerefox search RPC and returns the results.
|
|
10
|
+
*
|
|
11
|
+
* Called by the cerefox-mcp Edge Function (MCP Streamable HTTP), GPT Actions
|
|
12
|
+
* (direct HTTP POST), or any HTTP client. No SQL required, no local embedder.
|
|
13
|
+
*
|
|
14
|
+
* Request body (JSON):
|
|
15
|
+
* query string required Natural-language search query
|
|
16
|
+
* project_name string optional Project to filter by (looked up by name)
|
|
17
|
+
* match_count number optional Max results (default: 5)
|
|
18
|
+
* mode string optional "hybrid" | "fts" | "docs" (default: "docs")
|
|
19
|
+
* alpha number optional Semantic weight for hybrid search (default: 0.7)
|
|
20
|
+
* min_score number optional Min cosine similarity (default: 0.5)
|
|
21
|
+
* metadata_filter object optional JSONB containment filter. Only documents whose
|
|
22
|
+
* metadata contains ALL specified key-value pairs
|
|
23
|
+
* are returned. Example: {"type":"decision"}.
|
|
24
|
+
* Use cerefox-metadata to discover available keys.
|
|
25
|
+
* max_bytes number optional Response size budget in bytes (default: 200000,
|
|
26
|
+
* hard ceiling: 200000). Agents may pass a smaller
|
|
27
|
+
* value to fit their context window; values above
|
|
28
|
+
* the server ceiling are silently capped. Results
|
|
29
|
+
* are dropped whole (never truncated mid-doc) until
|
|
30
|
+
* the budget is satisfied. The response includes a
|
|
31
|
+
* `truncated` flag when results were dropped.
|
|
32
|
+
*
|
|
33
|
+
* Response: { results: [...], query, mode, match_count, project_name?,
|
|
34
|
+
* truncated: boolean, response_bytes: number }
|
|
35
|
+
*
|
|
36
|
+
* Example agent prompt:
|
|
37
|
+
* "Invoke the cerefox-search edge function with query='knowledge management'
|
|
38
|
+
* and project_name='Personal'. Summarize the results."
|
|
39
|
+
*/
|
|
40
|
+
|
|
41
|
+
const OPENAI_EMBEDDING_URL = "https://api.openai.com/v1/embeddings";
|
|
42
|
+
const OPENAI_MODEL = "text-embedding-3-small";
|
|
43
|
+
const EMBEDDING_DIMENSIONS = 768;
|
|
44
|
+
|
|
45
|
+
// Response size ceiling — server-enforced hard limit.
|
|
46
|
+
// Agents may pass a smaller max_bytes to fit their context budget, but cannot
|
|
47
|
+
// exceed this value. Acts as both the default (when max_bytes is omitted) and
|
|
48
|
+
// the hard ceiling (when the caller requests more). Small-to-big retrieval
|
|
49
|
+
// bounds individual large-doc results to matched chunks + neighbours, so this
|
|
50
|
+
// ceiling is rarely reached under normal usage at the default match_count=5.
|
|
51
|
+
const MAX_BYTES = 200_000;
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
interface SearchRequest {
|
|
55
|
+
query: string;
|
|
56
|
+
project_name?: string;
|
|
57
|
+
match_count?: number;
|
|
58
|
+
mode?: "hybrid" | "fts" | "docs";
|
|
59
|
+
alpha?: number;
|
|
60
|
+
min_score?: number;
|
|
61
|
+
metadata_filter?: Record<string, string> | null;
|
|
62
|
+
max_bytes?: number;
|
|
63
|
+
requestor?: string;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
const EMBEDDING_MAX_RETRIES = 3;
|
|
67
|
+
const EMBEDDING_INITIAL_BACKOFF_MS = 500; // 500ms, 1s, 2s exponential backoff
|
|
68
|
+
|
|
69
|
+
async function getEmbedding(text: string, apiKey: string): Promise<number[]> {
|
|
70
|
+
let lastError: Error | null = null;
|
|
71
|
+
|
|
72
|
+
for (let attempt = 0; attempt < EMBEDDING_MAX_RETRIES; attempt++) {
|
|
73
|
+
try {
|
|
74
|
+
const response = await fetch(OPENAI_EMBEDDING_URL, {
|
|
75
|
+
method: "POST",
|
|
76
|
+
headers: {
|
|
77
|
+
"Authorization": `Bearer ${apiKey}`,
|
|
78
|
+
"Content-Type": "application/json",
|
|
79
|
+
},
|
|
80
|
+
body: JSON.stringify({
|
|
81
|
+
model: OPENAI_MODEL,
|
|
82
|
+
input: text,
|
|
83
|
+
dimensions: EMBEDDING_DIMENSIONS,
|
|
84
|
+
}),
|
|
85
|
+
});
|
|
86
|
+
|
|
87
|
+
if (!response.ok) {
|
|
88
|
+
const err = await response.text();
|
|
89
|
+
// Don't retry client errors (4xx)
|
|
90
|
+
if (response.status < 500) {
|
|
91
|
+
throw new Error(`OpenAI embedding error ${response.status}: ${err}`);
|
|
92
|
+
}
|
|
93
|
+
// Server errors (5xx) are retryable
|
|
94
|
+
lastError = new Error(`OpenAI embedding error ${response.status}: ${err}`);
|
|
95
|
+
const backoff = EMBEDDING_INITIAL_BACKOFF_MS * Math.pow(2, attempt);
|
|
96
|
+
console.warn(
|
|
97
|
+
`Embedding API returned ${response.status} (attempt ${attempt + 1}/${EMBEDDING_MAX_RETRIES}), retrying in ${backoff}ms`,
|
|
98
|
+
);
|
|
99
|
+
await new Promise((r) => setTimeout(r, backoff));
|
|
100
|
+
continue;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
const data = await response.json();
|
|
104
|
+
if (attempt > 0) {
|
|
105
|
+
console.info(`Embedding API succeeded on retry ${attempt}`);
|
|
106
|
+
}
|
|
107
|
+
return data.data[0].embedding;
|
|
108
|
+
} catch (err) {
|
|
109
|
+
if (err instanceof Error && err.message.startsWith("OpenAI embedding error")) {
|
|
110
|
+
// Non-retryable (4xx) errors already thrown above
|
|
111
|
+
throw err;
|
|
112
|
+
}
|
|
113
|
+
// Network/timeout errors are retryable
|
|
114
|
+
lastError = err instanceof Error ? err : new Error(String(err));
|
|
115
|
+
const backoff = EMBEDDING_INITIAL_BACKOFF_MS * Math.pow(2, attempt);
|
|
116
|
+
console.warn(
|
|
117
|
+
`Embedding API request failed: ${lastError.message} (attempt ${attempt + 1}/${EMBEDDING_MAX_RETRIES}), retrying in ${backoff}ms`,
|
|
118
|
+
);
|
|
119
|
+
await new Promise((r) => setTimeout(r, backoff));
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
throw lastError ?? new Error(`Embedding API failed after ${EMBEDDING_MAX_RETRIES} attempts`);
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
async function lookupProjectId(
|
|
127
|
+
supabase: ReturnType<typeof createClient>,
|
|
128
|
+
projectName: string,
|
|
129
|
+
): Promise<string | null> {
|
|
130
|
+
const { data, error } = await supabase
|
|
131
|
+
.from("cerefox_projects")
|
|
132
|
+
.select("id")
|
|
133
|
+
.ilike("name", projectName)
|
|
134
|
+
.limit(1);
|
|
135
|
+
|
|
136
|
+
if (error || !data?.length) return null;
|
|
137
|
+
return data[0].id;
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
/**
|
|
141
|
+
* Apply a byte budget to an array of result rows.
|
|
142
|
+
*
|
|
143
|
+
* Each row is serialised to JSON to measure its size. Rows are included in
|
|
144
|
+
* order until the next row would push the running total over `maxBytes`.
|
|
145
|
+
* Rows are always kept or dropped whole — content is never truncated
|
|
146
|
+
* mid-document. Returns the accepted rows and a `truncated` flag.
|
|
147
|
+
*/
|
|
148
|
+
function applyByteBudget(
|
|
149
|
+
rows: unknown[],
|
|
150
|
+
maxBytes: number,
|
|
151
|
+
): { accepted: unknown[]; truncated: boolean; usedBytes: number } {
|
|
152
|
+
const accepted: unknown[] = [];
|
|
153
|
+
let usedBytes = 0;
|
|
154
|
+
let truncated = false;
|
|
155
|
+
|
|
156
|
+
for (const row of rows) {
|
|
157
|
+
const rowBytes = new TextEncoder().encode(JSON.stringify(row)).length;
|
|
158
|
+
if (usedBytes + rowBytes > maxBytes) {
|
|
159
|
+
truncated = true;
|
|
160
|
+
break;
|
|
161
|
+
}
|
|
162
|
+
accepted.push(row);
|
|
163
|
+
usedBytes += rowBytes;
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
return { accepted, truncated, usedBytes };
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
const headers = {
|
|
170
|
+
"Content-Type": "application/json",
|
|
171
|
+
"Access-Control-Allow-Origin": "*",
|
|
172
|
+
};
|
|
173
|
+
|
|
174
|
+
Deno.serve(async (req: Request) => {
|
|
175
|
+
// CORS preflight
|
|
176
|
+
if (req.method === "OPTIONS") {
|
|
177
|
+
return new Response(null, {
|
|
178
|
+
headers: {
|
|
179
|
+
"Access-Control-Allow-Origin": "*",
|
|
180
|
+
"Access-Control-Allow-Headers": "authorization, x-client-info, apikey, content-type",
|
|
181
|
+
},
|
|
182
|
+
});
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
if (isVersionRequest(req)) {
|
|
186
|
+
return versionResponse("cerefox-search", headers);
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
if (req.method !== "POST") {
|
|
190
|
+
return new Response(JSON.stringify({ error: "POST required" }), {
|
|
191
|
+
status: 405,
|
|
192
|
+
headers,
|
|
193
|
+
});
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
let body: SearchRequest;
|
|
197
|
+
try {
|
|
198
|
+
body = await req.json();
|
|
199
|
+
} catch {
|
|
200
|
+
return new Response(JSON.stringify({ error: "Invalid JSON body" }), {
|
|
201
|
+
status: 400,
|
|
202
|
+
headers,
|
|
203
|
+
});
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
const {
|
|
207
|
+
query,
|
|
208
|
+
project_name,
|
|
209
|
+
match_count = 5,
|
|
210
|
+
mode = "docs",
|
|
211
|
+
alpha = 0.7,
|
|
212
|
+
min_score = 0.5,
|
|
213
|
+
metadata_filter = null,
|
|
214
|
+
max_bytes: requested_max_bytes,
|
|
215
|
+
} = body;
|
|
216
|
+
|
|
217
|
+
// Enforce ceiling: agents may request less but never more than MAX_BYTES.
|
|
218
|
+
const max_bytes = Math.min(requested_max_bytes ?? MAX_BYTES, MAX_BYTES);
|
|
219
|
+
|
|
220
|
+
// Validate metadata_filter: must be a plain object (or null/absent).
|
|
221
|
+
// Reject arrays, strings, and other non-object types to prevent RPC errors.
|
|
222
|
+
if (
|
|
223
|
+
metadata_filter !== null &&
|
|
224
|
+
metadata_filter !== undefined &&
|
|
225
|
+
(typeof metadata_filter !== "object" || Array.isArray(metadata_filter))
|
|
226
|
+
) {
|
|
227
|
+
return new Response(
|
|
228
|
+
JSON.stringify({ error: "metadata_filter must be a JSON object or null" }),
|
|
229
|
+
{ status: 400, headers },
|
|
230
|
+
);
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
if (!query || typeof query !== "string" || !query.trim()) {
|
|
234
|
+
return new Response(JSON.stringify({ error: "query is required" }), {
|
|
235
|
+
status: 400,
|
|
236
|
+
headers,
|
|
237
|
+
});
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
const openaiKey = Deno.env.get("OPENAI_API_KEY");
|
|
241
|
+
if (!openaiKey) {
|
|
242
|
+
return new Response(
|
|
243
|
+
JSON.stringify({ error: "OPENAI_API_KEY secret not set on this project" }),
|
|
244
|
+
{ status: 500, headers },
|
|
245
|
+
);
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
const supabaseUrl = Deno.env.get("SUPABASE_URL")!;
|
|
249
|
+
const supabaseKey = Deno.env.get("SUPABASE_SERVICE_ROLE_KEY")!;
|
|
250
|
+
const supabase = createClient(supabaseUrl, supabaseKey);
|
|
251
|
+
|
|
252
|
+
// Configurable requestor enforcement
|
|
253
|
+
const identityField = "requestor";
|
|
254
|
+
const identityValue = body[identityField as keyof SearchRequest] as string | undefined;
|
|
255
|
+
const { data: reqConfig } = await supabase.rpc("cerefox_get_config", { p_key: "require_requestor_identity" });
|
|
256
|
+
if (reqConfig === "true") {
|
|
257
|
+
if (!identityValue || (typeof identityValue === "string" && identityValue.trim() === "")) {
|
|
258
|
+
return new Response(
|
|
259
|
+
JSON.stringify({ error: `Missing required parameter "${identityField}". Server requires caller identity.` }),
|
|
260
|
+
{ status: 400, headers },
|
|
261
|
+
);
|
|
262
|
+
}
|
|
263
|
+
const { data: fmtConfig } = await supabase.rpc("cerefox_get_config", { p_key: "requestor_identity_format" });
|
|
264
|
+
if (fmtConfig && typeof fmtConfig === "string" && fmtConfig.trim() !== "") {
|
|
265
|
+
if (!new RegExp(fmtConfig).test(identityValue)) {
|
|
266
|
+
return new Response(
|
|
267
|
+
JSON.stringify({ error: `Invalid "${identityField}" format. Does not match pattern: ${fmtConfig}` }),
|
|
268
|
+
{ status: 400, headers },
|
|
269
|
+
);
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
// Resolve project name → UUID if provided
|
|
275
|
+
let projectId: string | null = null;
|
|
276
|
+
if (project_name) {
|
|
277
|
+
projectId = await lookupProjectId(supabase, project_name);
|
|
278
|
+
if (!projectId) {
|
|
279
|
+
return new Response(
|
|
280
|
+
JSON.stringify({ error: `Project not found: ${project_name}` }),
|
|
281
|
+
{ status: 404, headers },
|
|
282
|
+
);
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
// FTS mode doesn't need an embedding
|
|
287
|
+
let embedding: number[] | null = null;
|
|
288
|
+
if (mode !== "fts") {
|
|
289
|
+
try {
|
|
290
|
+
embedding = await getEmbedding(query, openaiKey);
|
|
291
|
+
} catch (err) {
|
|
292
|
+
return new Response(JSON.stringify({ error: String(err) }), {
|
|
293
|
+
status: 502,
|
|
294
|
+
headers,
|
|
295
|
+
});
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
// Call the appropriate RPC
|
|
300
|
+
let rpcName: string;
|
|
301
|
+
let rpcParams: Record<string, unknown>;
|
|
302
|
+
|
|
303
|
+
// Build a metadata filter param only when a non-empty filter object is provided.
|
|
304
|
+
// Passing null explicitly or an empty object {} to the RPC is equivalent to no filter,
|
|
305
|
+
// but we omit it entirely when absent to keep RPC call params minimal.
|
|
306
|
+
const metaFilterParam = metadata_filter && Object.keys(metadata_filter).length > 0
|
|
307
|
+
? { p_metadata_filter: metadata_filter }
|
|
308
|
+
: {};
|
|
309
|
+
|
|
310
|
+
if (mode === "fts") {
|
|
311
|
+
rpcName = "cerefox_fts_search";
|
|
312
|
+
rpcParams = {
|
|
313
|
+
p_query_text: query,
|
|
314
|
+
p_match_count: match_count,
|
|
315
|
+
p_project_id: projectId,
|
|
316
|
+
...metaFilterParam,
|
|
317
|
+
};
|
|
318
|
+
} else if (mode === "hybrid") {
|
|
319
|
+
rpcName = "cerefox_hybrid_search";
|
|
320
|
+
rpcParams = {
|
|
321
|
+
p_query_text: query,
|
|
322
|
+
p_query_embedding: embedding,
|
|
323
|
+
p_match_count: match_count,
|
|
324
|
+
p_alpha: alpha,
|
|
325
|
+
p_use_upgrade: false,
|
|
326
|
+
p_project_id: projectId,
|
|
327
|
+
p_min_score: min_score,
|
|
328
|
+
...metaFilterParam,
|
|
329
|
+
};
|
|
330
|
+
} else {
|
|
331
|
+
// "docs" — document-level hybrid search (recommended default).
|
|
332
|
+
// Small-to-big threshold and context window use the RPC defaults (20000 / 1).
|
|
333
|
+
// Override them in Postgres (rpcs.sql) if you need a different server-wide value.
|
|
334
|
+
rpcName = "cerefox_search_docs";
|
|
335
|
+
rpcParams = {
|
|
336
|
+
p_query_text: query,
|
|
337
|
+
p_query_embedding: embedding,
|
|
338
|
+
p_match_count: match_count,
|
|
339
|
+
p_alpha: alpha,
|
|
340
|
+
p_project_id: projectId,
|
|
341
|
+
p_min_score: min_score,
|
|
342
|
+
...metaFilterParam,
|
|
343
|
+
};
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
const { data, error } = await supabase.rpc(rpcName, rpcParams);
|
|
347
|
+
|
|
348
|
+
if (error) {
|
|
349
|
+
return new Response(JSON.stringify({ error: `RPC error: ${error.message}` }), {
|
|
350
|
+
status: 500,
|
|
351
|
+
headers,
|
|
352
|
+
});
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
// Apply byte budget — drop whole results (never truncate mid-doc) to stay
|
|
356
|
+
// under the limit. This mirrors the local MCP server's truncation behaviour.
|
|
357
|
+
const { accepted, truncated, usedBytes } = applyByteBudget(data ?? [], max_bytes);
|
|
358
|
+
|
|
359
|
+
// Fire-and-forget usage logging (never blocks the response)
|
|
360
|
+
Promise.resolve(supabase.rpc("cerefox_log_usage", {
|
|
361
|
+
p_operation: "search",
|
|
362
|
+
p_access_path: "edge-function",
|
|
363
|
+
p_requestor: body.requestor ?? null,
|
|
364
|
+
p_query_text: query,
|
|
365
|
+
p_result_count: accepted.length,
|
|
366
|
+
p_project_id: projectId,
|
|
367
|
+
})).catch(() => {});
|
|
368
|
+
|
|
369
|
+
return new Response(
|
|
370
|
+
JSON.stringify({
|
|
371
|
+
results: accepted,
|
|
372
|
+
query,
|
|
373
|
+
mode,
|
|
374
|
+
match_count,
|
|
375
|
+
project_name: project_name ?? null,
|
|
376
|
+
metadata_filter: metadata_filter ?? null,
|
|
377
|
+
truncated,
|
|
378
|
+
response_bytes: usedBytes,
|
|
379
|
+
}),
|
|
380
|
+
{ headers },
|
|
381
|
+
);
|
|
382
|
+
});
|
|
@@ -657,7 +657,7 @@ In the action editor, paste this schema (replace `<your-project-ref>`):
|
|
|
657
657
|
openapi: 3.1.0
|
|
658
658
|
info:
|
|
659
659
|
title: Cerefox Knowledge Base
|
|
660
|
-
version: 1.
|
|
660
|
+
version: 1.8.0
|
|
661
661
|
servers:
|
|
662
662
|
- url: https://<your-project-ref>.supabase.co/functions/v1
|
|
663
663
|
paths:
|
|
@@ -682,7 +682,11 @@ paths:
|
|
|
682
682
|
type: string
|
|
683
683
|
mode:
|
|
684
684
|
type: string
|
|
685
|
+
enum: [docs, hybrid, fts]
|
|
685
686
|
default: docs
|
|
687
|
+
description: >
|
|
688
|
+
docs = document-level hybrid (recommended); hybrid = chunk-level
|
|
689
|
+
semantic+FTS; fts = keyword-only (no embedding).
|
|
686
690
|
metadata_filter:
|
|
687
691
|
type: object
|
|
688
692
|
additionalProperties:
|
|
@@ -693,6 +697,26 @@ paths:
|
|
|
693
697
|
Example: {"type": "decision", "status": "active"}.
|
|
694
698
|
Call listMetadataKeys to discover available keys and their values.
|
|
695
699
|
Omit or set to null to search all documents.
|
|
700
|
+
alpha:
|
|
701
|
+
type: number
|
|
702
|
+
default: 0.7
|
|
703
|
+
description: >
|
|
704
|
+
Semantic weight for hybrid/docs modes (0 = pure FTS, 1 = pure
|
|
705
|
+
semantic). Advanced; leave unset for the default blend.
|
|
706
|
+
min_score:
|
|
707
|
+
type: number
|
|
708
|
+
default: 0.5
|
|
709
|
+
description: >
|
|
710
|
+
Minimum cosine similarity for a vector-only match to be included.
|
|
711
|
+
Advanced; leave unset for the default threshold.
|
|
712
|
+
max_bytes:
|
|
713
|
+
type: integer
|
|
714
|
+
default: 200000
|
|
715
|
+
description: >
|
|
716
|
+
Response size budget in bytes (server hard ceiling 200000).
|
|
717
|
+
Whole results are dropped (never truncated mid-document) until
|
|
718
|
+
the budget is met; the response sets `truncated: true` when this
|
|
719
|
+
happens. Advanced; leave unset for the default.
|
|
696
720
|
requestor:
|
|
697
721
|
type: string
|
|
698
722
|
description: >
|
|
@@ -735,6 +759,18 @@ paths:
|
|
|
735
759
|
the document, note the document_id, pass it here.
|
|
736
760
|
project_name:
|
|
737
761
|
type: string
|
|
762
|
+
description: >
|
|
763
|
+
Add the document to this project (non-destructive — keeps any
|
|
764
|
+
existing project memberships). Looked up by name.
|
|
765
|
+
project_names:
|
|
766
|
+
type: array
|
|
767
|
+
items:
|
|
768
|
+
type: string
|
|
769
|
+
description: >
|
|
770
|
+
Destructive full-set project assignment: the document's project
|
|
771
|
+
memberships are replaced with exactly this list. Use project_name
|
|
772
|
+
(singular) to add without removing. If both are given, project_names
|
|
773
|
+
wins.
|
|
738
774
|
source:
|
|
739
775
|
type: string
|
|
740
776
|
default: agent
|
|
@@ -764,7 +800,14 @@ paths:
|
|
|
764
800
|
the document to pending_review, user writes set it to approved.
|
|
765
801
|
responses:
|
|
766
802
|
'200':
|
|
767
|
-
description:
|
|
803
|
+
description: >
|
|
804
|
+
Ingest result. Fields vary by outcome:
|
|
805
|
+
{ document_id, title, chunk_count, total_chars,
|
|
806
|
+
project_id?, project_name?, # set when a project was assigned on create
|
|
807
|
+
skipped?, # true when identical content was deduplicated
|
|
808
|
+
updated?, # true when an existing doc was updated
|
|
809
|
+
message?, # human note on dedup/skip/update
|
|
810
|
+
note? } # note when a flag (e.g. update_if_exists) was overridden
|
|
768
811
|
/cerefox-metadata:
|
|
769
812
|
post:
|
|
770
813
|
operationId: listMetadataKeys
|
|
@@ -867,6 +910,9 @@ paths:
|
|
|
867
910
|
since:
|
|
868
911
|
type: string
|
|
869
912
|
description: ISO timestamp lower bound for temporal queries (optional)
|
|
913
|
+
until:
|
|
914
|
+
type: string
|
|
915
|
+
description: ISO timestamp upper bound for temporal queries (optional)
|
|
870
916
|
limit:
|
|
871
917
|
type: integer
|
|
872
918
|
default: 50
|
|
@@ -921,7 +967,10 @@ paths:
|
|
|
921
967
|
Example: {"type": "decision", "status": "active"}.
|
|
922
968
|
project_id:
|
|
923
969
|
type: string
|
|
924
|
-
description:
|
|
970
|
+
description: >
|
|
971
|
+
Filter by project UUID (optional). NOTE: this is the project
|
|
972
|
+
UUID, not its name — unlike searchKnowledgeBase / ingestNote
|
|
973
|
+
which take project_name. Get UUIDs from listProjects.
|
|
925
974
|
updated_since:
|
|
926
975
|
type: string
|
|
927
976
|
description: ISO-8601 timestamp; only docs updated on/after (optional)
|
|
@@ -935,6 +984,12 @@ paths:
|
|
|
935
984
|
type: boolean
|
|
936
985
|
default: false
|
|
937
986
|
description: Include full document text in results
|
|
987
|
+
max_bytes:
|
|
988
|
+
type: integer
|
|
989
|
+
default: 200000
|
|
990
|
+
description: >
|
|
991
|
+
Response size budget in bytes when include_content is true
|
|
992
|
+
(whole results dropped to fit). Advanced; leave unset for the default.
|
|
938
993
|
requestor:
|
|
939
994
|
type: string
|
|
940
995
|
description: Name of the agent making this request. Optional.
|