@cerefox/memory 0.7.2 → 0.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bin/cerefox.js +1357 -361
- package/dist/frontend/assets/{index-BzAPcCXA.js → index-CAp2_lFX.js} +2 -2
- package/dist/frontend/assets/index-CAp2_lFX.js.map +1 -0
- package/dist/frontend/index.html +1 -1
- package/dist/server-assets/_shared/ef-meta/index.ts +97 -0
- package/dist/server-assets/_shared/embeddings/index.ts +175 -0
- package/dist/server-assets/_shared/mcp-tools/_chunker.ts +187 -0
- package/dist/server-assets/_shared/mcp-tools/_projects.ts +121 -0
- package/dist/server-assets/_shared/mcp-tools/_utils.ts +73 -0
- package/dist/server-assets/_shared/mcp-tools/audit-log.ts +95 -0
- package/dist/server-assets/_shared/mcp-tools/get-document.ts +73 -0
- package/dist/server-assets/_shared/mcp-tools/get-help-content.ts +26 -0
- package/dist/server-assets/_shared/mcp-tools/get-help.ts +90 -0
- package/dist/server-assets/_shared/mcp-tools/index.ts +67 -0
- package/dist/server-assets/_shared/mcp-tools/ingest.ts +315 -0
- package/dist/server-assets/_shared/mcp-tools/list-metadata-keys.ts +55 -0
- package/dist/server-assets/_shared/mcp-tools/list-projects.ts +59 -0
- package/dist/server-assets/_shared/mcp-tools/list-versions.ts +72 -0
- package/dist/server-assets/_shared/mcp-tools/metadata-search.ts +154 -0
- package/dist/server-assets/_shared/mcp-tools/search.ts +193 -0
- package/dist/server-assets/_shared/mcp-tools/set-document-projects.ts +163 -0
- package/dist/server-assets/_shared/mcp-tools/types.ts +92 -0
- package/dist/server-assets/db/migrations/0003_add_document_versions.sql +91 -0
- package/dist/server-assets/db/migrations/0004_add_audit_log_review_status_archived.sql +71 -0
- package/dist/server-assets/db/migrations/0005_metadata_search.sql +628 -0
- package/dist/server-assets/db/migrations/0006_usage_log.sql +255 -0
- package/dist/server-assets/db/migrations/0007_usage_log_requestor.sql +178 -0
- package/dist/server-assets/db/migrations/0008_soft_delete.sql +130 -0
- package/dist/server-assets/db/migrations/0009_audit_log_restore_operation.sql +20 -0
- package/dist/server-assets/db/migrations/0010_requestor_enforcement_config.sql +12 -0
- package/dist/server-assets/db/migrations/0011_title_boosting.sql +48 -0
- package/dist/server-assets/db/rpcs.sql +1723 -0
- package/dist/server-assets/db/schema.sql +380 -0
- package/dist/server-assets/supabase/functions/cerefox-get-audit-log/index.ts +117 -0
- package/dist/server-assets/supabase/functions/cerefox-get-document/index.ts +138 -0
- package/dist/server-assets/supabase/functions/cerefox-ingest/index.ts +819 -0
- package/dist/server-assets/supabase/functions/cerefox-list-projects/index.ts +96 -0
- package/dist/server-assets/supabase/functions/cerefox-list-versions/index.ts +113 -0
- package/dist/server-assets/supabase/functions/cerefox-mcp/index.ts +294 -0
- package/dist/server-assets/supabase/functions/cerefox-mcp/shared.ts +42 -0
- package/dist/server-assets/supabase/functions/cerefox-metadata/index.ts +99 -0
- package/dist/server-assets/supabase/functions/cerefox-metadata-search/index.ts +146 -0
- package/dist/server-assets/supabase/functions/cerefox-search/index.ts +382 -0
- package/docs/guides/connect-agents.md +78 -3
- package/docs/guides/migration-v0.5.md +50 -0
- package/docs/guides/quickstart.md +6 -2
- package/package.json +3 -2
- package/dist/frontend/assets/index-BzAPcCXA.js.map +0 -1
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `cerefox_search` — hybrid (FTS + semantic) search over the knowledge base.
|
|
3
|
+
*
|
|
4
|
+
* Three modes:
|
|
5
|
+
* - `docs` (default) — document-level hybrid via `cerefox_search_docs`.
|
|
6
|
+
* - `hybrid` — chunk-level hybrid via `cerefox_hybrid_search`.
|
|
7
|
+
* - `fts` — FTS-only via `cerefox_fts_search` (no embedding needed).
|
|
8
|
+
*
|
|
9
|
+
* Embedding is computed for `docs` and `hybrid` modes via the shared
|
|
10
|
+
* embedder. Results respect a per-call `max_bytes` budget capped at
|
|
11
|
+
* `MAX_RESPONSE_BYTES`; whole rows are dropped to fit the budget.
|
|
12
|
+
*
|
|
13
|
+
* Mirrors `supabase/functions/cerefox-mcp/tools/search.ts` byte-for-byte
|
|
14
|
+
* in response shape so v0.4.0 can keep agents on the same on-the-wire
|
|
15
|
+
* format whether they go through the remote MCP or the new local TS one.
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
import type { MCPSupabaseClient } from "./types.ts";
|
|
19
|
+
|
|
20
|
+
import { getEmbedding } from "../embeddings/index.ts";
|
|
21
|
+
import { applyByteBudget, logUsage, MAX_RESPONSE_BYTES } from "./_utils.ts";
|
|
22
|
+
import { lookupProjectId } from "./_projects.ts";
|
|
23
|
+
import { McpInvalidParams, type ToolContext, type ToolDefinition } from "./types.ts";
|
|
24
|
+
|
|
25
|
+
async function handler(
|
|
26
|
+
supabase: MCPSupabaseClient,
|
|
27
|
+
args: Record<string, unknown>,
|
|
28
|
+
ctx: ToolContext,
|
|
29
|
+
): Promise<string> {
|
|
30
|
+
const query = args.query as string;
|
|
31
|
+
const project_name = args.project_name as string | undefined;
|
|
32
|
+
const match_count = (args.match_count as number | undefined) ?? 5;
|
|
33
|
+
const mode = (args.mode as string | undefined) ?? "docs";
|
|
34
|
+
const alpha = (args.alpha as number | undefined) ?? 0.7;
|
|
35
|
+
const min_score = (args.min_score as number | undefined) ?? 0.5;
|
|
36
|
+
const metadata_filter =
|
|
37
|
+
(args.metadata_filter as Record<string, string> | null | undefined) ?? null;
|
|
38
|
+
const requested_max_bytes = args.max_bytes as number | undefined;
|
|
39
|
+
|
|
40
|
+
const max_bytes = Math.min(requested_max_bytes ?? MAX_RESPONSE_BYTES, MAX_RESPONSE_BYTES);
|
|
41
|
+
|
|
42
|
+
if (
|
|
43
|
+
metadata_filter !== null &&
|
|
44
|
+
metadata_filter !== undefined &&
|
|
45
|
+
(typeof metadata_filter !== "object" || Array.isArray(metadata_filter))
|
|
46
|
+
) {
|
|
47
|
+
throw new McpInvalidParams("metadata_filter must be a JSON object or null");
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
if (!query?.trim()) throw new McpInvalidParams("query is required");
|
|
51
|
+
|
|
52
|
+
if (mode !== "fts" && !ctx.openaiApiKey) {
|
|
53
|
+
throw new Error(
|
|
54
|
+
"OpenAI API key not configured. Set OPENAI_API_KEY (Edge Function) or CEREFOX_OPENAI_API_KEY (.env, local).",
|
|
55
|
+
);
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
// Resolve project name to UUID if provided
|
|
59
|
+
let projectId: string | null = null;
|
|
60
|
+
if (project_name) {
|
|
61
|
+
projectId = await lookupProjectId(supabase, project_name);
|
|
62
|
+
if (!projectId) throw new Error(`Project not found: ${project_name}`);
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
// FTS mode doesn't need an embedding
|
|
66
|
+
let embedding: number[] | null = null;
|
|
67
|
+
if (mode !== "fts") {
|
|
68
|
+
embedding = await getEmbedding(query, ctx.openaiApiKey!);
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
const metaFilterParam =
|
|
72
|
+
metadata_filter && Object.keys(metadata_filter).length > 0
|
|
73
|
+
? { p_metadata_filter: metadata_filter }
|
|
74
|
+
: {};
|
|
75
|
+
|
|
76
|
+
let rpcName: string;
|
|
77
|
+
let rpcParams: Record<string, unknown>;
|
|
78
|
+
|
|
79
|
+
if (mode === "fts") {
|
|
80
|
+
rpcName = "cerefox_fts_search";
|
|
81
|
+
rpcParams = {
|
|
82
|
+
p_query_text: query,
|
|
83
|
+
p_match_count: match_count,
|
|
84
|
+
p_project_id: projectId,
|
|
85
|
+
...metaFilterParam,
|
|
86
|
+
};
|
|
87
|
+
} else if (mode === "hybrid") {
|
|
88
|
+
rpcName = "cerefox_hybrid_search";
|
|
89
|
+
rpcParams = {
|
|
90
|
+
p_query_text: query,
|
|
91
|
+
p_query_embedding: embedding,
|
|
92
|
+
p_match_count: match_count,
|
|
93
|
+
p_alpha: alpha,
|
|
94
|
+
p_use_upgrade: false,
|
|
95
|
+
p_project_id: projectId,
|
|
96
|
+
p_min_score: min_score,
|
|
97
|
+
...metaFilterParam,
|
|
98
|
+
};
|
|
99
|
+
} else {
|
|
100
|
+
rpcName = "cerefox_search_docs";
|
|
101
|
+
rpcParams = {
|
|
102
|
+
p_query_text: query,
|
|
103
|
+
p_query_embedding: embedding,
|
|
104
|
+
p_match_count: match_count,
|
|
105
|
+
p_alpha: alpha,
|
|
106
|
+
p_project_id: projectId,
|
|
107
|
+
p_min_score: min_score,
|
|
108
|
+
...metaFilterParam,
|
|
109
|
+
};
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
const { data, error } = await supabase.rpc(rpcName, rpcParams);
|
|
113
|
+
|
|
114
|
+
if (error) throw new Error(`RPC error: ${error.message}`);
|
|
115
|
+
|
|
116
|
+
const { accepted, truncated, usedBytes } = applyByteBudget(data ?? [], max_bytes);
|
|
117
|
+
|
|
118
|
+
logUsage(supabase, {
|
|
119
|
+
operation: "search",
|
|
120
|
+
accessPath: ctx.accessPath,
|
|
121
|
+
requestor: args.requestor as string | undefined,
|
|
122
|
+
query_text: query,
|
|
123
|
+
project_id: projectId,
|
|
124
|
+
result_count: accepted.length,
|
|
125
|
+
});
|
|
126
|
+
|
|
127
|
+
if (accepted.length === 0) return "No results found.";
|
|
128
|
+
|
|
129
|
+
const rows = accepted as Array<{
|
|
130
|
+
document_id?: string;
|
|
131
|
+
doc_title?: string;
|
|
132
|
+
full_content?: string;
|
|
133
|
+
best_score?: number;
|
|
134
|
+
is_partial?: boolean;
|
|
135
|
+
chunk_count?: number;
|
|
136
|
+
total_chars?: number;
|
|
137
|
+
}>;
|
|
138
|
+
|
|
139
|
+
const parts: string[] = rows.map((row) => {
|
|
140
|
+
const title = row.doc_title ?? "Untitled";
|
|
141
|
+
const docId = row.document_id ? ` [id: ${row.document_id}]` : "";
|
|
142
|
+
const score = row.best_score != null ? ` (score: ${row.best_score.toFixed(3)})` : "";
|
|
143
|
+
const partial = row.is_partial
|
|
144
|
+
? ` -- partial (${row.chunk_count} of ${(row.total_chars ?? 0).toLocaleString()} chars)`
|
|
145
|
+
: "";
|
|
146
|
+
return `## ${title}${docId}${score}${partial}\n\n${row.full_content ?? ""}`;
|
|
147
|
+
});
|
|
148
|
+
|
|
149
|
+
let output = parts.join("\n\n---\n\n");
|
|
150
|
+
if (truncated) {
|
|
151
|
+
output +=
|
|
152
|
+
`\n\n[Results truncated at ${usedBytes} bytes. Use a more specific query or a smaller match_count to see more.]`;
|
|
153
|
+
}
|
|
154
|
+
return output;
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
export const searchTool: ToolDefinition = {
|
|
158
|
+
name: "cerefox_search",
|
|
159
|
+
description:
|
|
160
|
+
"Search the Cerefox personal knowledge base. Returns complete documents ranked by hybrid (FTS + semantic) relevance.",
|
|
161
|
+
inputSchema: {
|
|
162
|
+
type: "object",
|
|
163
|
+
required: ["query"],
|
|
164
|
+
properties: {
|
|
165
|
+
query: { type: "string", description: "Natural-language search query" },
|
|
166
|
+
match_count: {
|
|
167
|
+
type: "integer",
|
|
168
|
+
description: "Maximum number of documents to return (default: 5)",
|
|
169
|
+
},
|
|
170
|
+
project_name: {
|
|
171
|
+
type: "string",
|
|
172
|
+
description: "Filter results to a specific project by name (optional)",
|
|
173
|
+
},
|
|
174
|
+
metadata_filter: {
|
|
175
|
+
type: "object",
|
|
176
|
+
description:
|
|
177
|
+
'Optional JSONB containment filter. Only documents whose metadata contains ALL specified key-value pairs are returned. Example: {"type": "decision", "status": "active"}. Call cerefox_list_metadata_keys first to discover available keys and values. Omit to search all documents.',
|
|
178
|
+
additionalProperties: { type: "string" },
|
|
179
|
+
},
|
|
180
|
+
max_bytes: {
|
|
181
|
+
type: "integer",
|
|
182
|
+
description:
|
|
183
|
+
"Optional response size budget in bytes. Results are dropped whole until the budget is satisfied; a truncated flag is set when results are dropped. Defaults to the server maximum (200000). Pass a smaller value if your context window is limited. Values above the server maximum are silently capped.",
|
|
184
|
+
},
|
|
185
|
+
requestor: {
|
|
186
|
+
type: "string",
|
|
187
|
+
description:
|
|
188
|
+
'Name of the agent or user making this request (e.g., "Claude Code", "archiver"). Recorded in the usage log for attribution. Defaults to "mcp-agent" if not provided. May be enforced via server config.',
|
|
189
|
+
},
|
|
190
|
+
},
|
|
191
|
+
},
|
|
192
|
+
handler,
|
|
193
|
+
};
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `cerefox_set_document_projects` — destructive replace of a document's
|
|
3
|
+
* project memberships. Empty list clears all memberships. Logged as
|
|
4
|
+
* `update-metadata` in the audit log; document content is untouched.
|
|
5
|
+
*
|
|
6
|
+
* Mirrors Python `CerefoxClient.set_document_projects` +
|
|
7
|
+
* `_handle_set_document_projects`. v0.1.20 introduced this tool to give
|
|
8
|
+
* agents an explicit full-set primitive without rewriting content (issue
|
|
9
|
+
* #38, Part 4).
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import type { MCPSupabaseClient } from "./types.ts";
|
|
13
|
+
|
|
14
|
+
import { logUsage } from "./_utils.ts";
|
|
15
|
+
import { McpInvalidParams, type ToolContext, type ToolDefinition } from "./types.ts";
|
|
16
|
+
|
|
17
|
+
async function handler(
|
|
18
|
+
supabase: MCPSupabaseClient,
|
|
19
|
+
args: Record<string, unknown>,
|
|
20
|
+
ctx: ToolContext,
|
|
21
|
+
): Promise<string> {
|
|
22
|
+
const document_id = (args.document_id as string | undefined)?.trim();
|
|
23
|
+
const project_names_raw = args.project_names;
|
|
24
|
+
const author = (args.author as string | undefined) ?? "mcp-agent";
|
|
25
|
+
|
|
26
|
+
if (!document_id) {
|
|
27
|
+
throw new McpInvalidParams(
|
|
28
|
+
"Missing required argument: document_id (UUID from a prior cerefox_search result).",
|
|
29
|
+
);
|
|
30
|
+
}
|
|
31
|
+
if (
|
|
32
|
+
project_names_raw === undefined ||
|
|
33
|
+
project_names_raw === null ||
|
|
34
|
+
!Array.isArray(project_names_raw)
|
|
35
|
+
) {
|
|
36
|
+
throw new McpInvalidParams(
|
|
37
|
+
"Missing or invalid argument: project_names must be a JSON array of strings " +
|
|
38
|
+
"(empty array allowed to clear all memberships).",
|
|
39
|
+
);
|
|
40
|
+
}
|
|
41
|
+
if (!project_names_raw.every((n) => typeof n === "string")) {
|
|
42
|
+
throw new McpInvalidParams("project_names must contain only strings.");
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
// Strip empties; preserve order; dedup case-insensitively.
|
|
46
|
+
const seenLower = new Set<string>();
|
|
47
|
+
const cleanNames: string[] = [];
|
|
48
|
+
for (const n of project_names_raw as string[]) {
|
|
49
|
+
const stripped = n.trim();
|
|
50
|
+
if (!stripped) continue;
|
|
51
|
+
const key = stripped.toLowerCase();
|
|
52
|
+
if (seenLower.has(key)) continue;
|
|
53
|
+
seenLower.add(key);
|
|
54
|
+
cleanNames.push(stripped);
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
// Verify the document exists and isn't soft-deleted.
|
|
58
|
+
const { data: doc } = await supabase
|
|
59
|
+
.from("cerefox_documents")
|
|
60
|
+
.select("id, title")
|
|
61
|
+
.eq("id", document_id)
|
|
62
|
+
.is("deleted_at", null)
|
|
63
|
+
.limit(1);
|
|
64
|
+
if (!doc?.length) {
|
|
65
|
+
throw new Error(`Document not found (or soft-deleted): ${document_id}`);
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
// Resolve each name → project_id (create if absent). Preserve order.
|
|
69
|
+
const projectIds: string[] = [];
|
|
70
|
+
for (const name of cleanNames) {
|
|
71
|
+
const { data: proj } = await supabase
|
|
72
|
+
.from("cerefox_projects")
|
|
73
|
+
.select("id")
|
|
74
|
+
.ilike("name", name)
|
|
75
|
+
.limit(1);
|
|
76
|
+
if (proj?.length) {
|
|
77
|
+
projectIds.push(proj[0].id);
|
|
78
|
+
} else {
|
|
79
|
+
const { data: newProj } = await supabase
|
|
80
|
+
.from("cerefox_projects")
|
|
81
|
+
.insert({ name })
|
|
82
|
+
.select("id");
|
|
83
|
+
if (newProj?.[0]?.id) projectIds.push(newProj[0].id);
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
// DELETE-then-INSERT replace (matches Python assign_document_projects).
|
|
88
|
+
await supabase.from("cerefox_document_projects").delete().eq("document_id", document_id);
|
|
89
|
+
if (projectIds.length > 0) {
|
|
90
|
+
const rows = projectIds.map((pid) => ({ document_id, project_id: pid }));
|
|
91
|
+
await supabase.from("cerefox_document_projects").insert(rows);
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
// Audit entry — project membership is metadata, not content.
|
|
95
|
+
try {
|
|
96
|
+
await supabase.rpc("cerefox_create_audit_entry", {
|
|
97
|
+
p_document_id: document_id,
|
|
98
|
+
p_version_id: null,
|
|
99
|
+
p_operation: "update-metadata",
|
|
100
|
+
p_author: author,
|
|
101
|
+
p_author_type: "agent",
|
|
102
|
+
p_size_before: null,
|
|
103
|
+
p_size_after: null,
|
|
104
|
+
p_description:
|
|
105
|
+
cleanNames.length > 0
|
|
106
|
+
? `Set document projects to [${cleanNames.join(", ")}]`
|
|
107
|
+
: "Cleared all project memberships",
|
|
108
|
+
});
|
|
109
|
+
} catch (err) {
|
|
110
|
+
console.warn("set-document-projects: audit entry failed", err);
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
logUsage(supabase, {
|
|
114
|
+
operation: "set-document-projects",
|
|
115
|
+
accessPath: ctx.accessPath,
|
|
116
|
+
requestor: author,
|
|
117
|
+
document_id,
|
|
118
|
+
result_count: projectIds.length,
|
|
119
|
+
});
|
|
120
|
+
|
|
121
|
+
if (cleanNames.length === 0) {
|
|
122
|
+
return (
|
|
123
|
+
`Cleared all project memberships for document ${document_id}. ` +
|
|
124
|
+
"The document no longer belongs to any project."
|
|
125
|
+
);
|
|
126
|
+
}
|
|
127
|
+
return (
|
|
128
|
+
`Set project memberships for document ${document_id}:\n` +
|
|
129
|
+
` Projects (${cleanNames.length}): ${cleanNames.join(", ")}\n` +
|
|
130
|
+
` Project IDs: ${projectIds.join(", ")}\n` +
|
|
131
|
+
" Note: this REPLACED the previous membership set. Any projects not " +
|
|
132
|
+
"listed above are no longer associated with this document."
|
|
133
|
+
);
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
export const setDocumentProjectsTool: ToolDefinition = {
|
|
137
|
+
name: "cerefox_set_document_projects",
|
|
138
|
+
description:
|
|
139
|
+
"Set the document's project memberships to EXACTLY the given list. Destructive replace: any existing memberships not in this list are removed. Pass an empty list to clear all project memberships. Projects are looked up by name (case-insensitive); missing projects are created. Logged as update-metadata in the audit log — content is untouched. Use cerefox_ingest with project_names if you want to set memberships AND update content in one call. Use this tool when you only need to change project membership without re-writing the document body.",
|
|
140
|
+
inputSchema: {
|
|
141
|
+
type: "object",
|
|
142
|
+
required: ["document_id", "project_names"],
|
|
143
|
+
properties: {
|
|
144
|
+
document_id: {
|
|
145
|
+
type: "string",
|
|
146
|
+
description:
|
|
147
|
+
"UUID of the document. Get this from a prior cerefox_search result (the [id: ...] tag after the title).",
|
|
148
|
+
},
|
|
149
|
+
project_names: {
|
|
150
|
+
type: "array",
|
|
151
|
+
items: { type: "string" },
|
|
152
|
+
description:
|
|
153
|
+
"Explicit list of project names. Each created if absent. Order is preserved. Empty list = remove from all projects.",
|
|
154
|
+
},
|
|
155
|
+
author: {
|
|
156
|
+
type: "string",
|
|
157
|
+
description:
|
|
158
|
+
'Agent or tool name recorded in the audit log. Defaults to "mcp-agent". May be enforced via server config.',
|
|
159
|
+
},
|
|
160
|
+
},
|
|
161
|
+
},
|
|
162
|
+
handler,
|
|
163
|
+
};
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared MCP tool-handler contract.
|
|
3
|
+
*
|
|
4
|
+
* Each `cerefox_*` tool is a `ToolDefinition` exporting:
|
|
5
|
+
* - `name` — MCP tool name (e.g. `cerefox_search`).
|
|
6
|
+
* - `description` — single-paragraph description shown to agents.
|
|
7
|
+
* - `inputSchema` — JSON Schema for the tool's `arguments` object.
|
|
8
|
+
* - `handler(supabase, args, ctx)` — async function returning the MCP
|
|
9
|
+
* `TextContent` body as a string.
|
|
10
|
+
*
|
|
11
|
+
* The same `ToolDefinition`s are wired into both:
|
|
12
|
+
* - The remote `cerefox-mcp` Edge Function (Deno; HTTP-framed JSON-RPC).
|
|
13
|
+
* - The local `@cerefox/memory` stdio MCP server (Bun/Node; stdio-framed).
|
|
14
|
+
*
|
|
15
|
+
* Wiring code (request dispatch, framing, identity enforcement) lives in
|
|
16
|
+
* each consumer; the handlers themselves are runtime-agnostic.
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
/** Structural type for the Supabase client surface the handlers actually
|
|
20
|
+
* use (`.rpc()` + `.from()`). We deliberately don't `import { SupabaseClient }
|
|
21
|
+
* from "@supabase/supabase-js"` here because Bun workspaces install a
|
|
22
|
+
* separate copy of supabase-js into each workspace member, and TypeScript
|
|
23
|
+
* then sees two distinct (but structurally identical) `SupabaseClient`
|
|
24
|
+
* classes. Decoupling the shared module with a minimal structural type
|
|
25
|
+
* side-steps the duplicate-class problem and keeps the shared modules
|
|
26
|
+
* truly runtime-neutral. */
|
|
27
|
+
// deno-lint-ignore no-explicit-any
|
|
28
|
+
type AnyChain = any;
|
|
29
|
+
|
|
30
|
+
export interface MCPSupabaseClient {
|
|
31
|
+
rpc<T = unknown>(fn: string, params?: Record<string, unknown>): AnyChain;
|
|
32
|
+
from(table: string): AnyChain;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
/** Re-export an alias name so callers can use the descriptive name. */
|
|
36
|
+
export type SupabaseClient = MCPSupabaseClient;
|
|
37
|
+
|
|
38
|
+
/** JSON Schema fragment for tool inputs. We use a permissive `unknown` value
|
|
39
|
+
* type rather than a strict JSON-Schema TS type to avoid forcing every tool
|
|
40
|
+
* to maintain a type-perfect schema literal. */
|
|
41
|
+
export type JsonSchema = Record<string, unknown>;
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Logical channel through which a Cerefox operation reached the backend.
|
|
45
|
+
* Recorded in `cerefox_usage_log.access_path` so the analytics dashboard
|
|
46
|
+
* can attribute load to each surface.
|
|
47
|
+
*
|
|
48
|
+
* Values:
|
|
49
|
+
* - `remote-mcp` — `cerefox-mcp` Edge Function (HTTP MCP transport).
|
|
50
|
+
* - `local-mcp` — `@cerefox/memory`'s `cerefox-mcp` stdio bin.
|
|
51
|
+
* - `cli` — the `cerefox` CLI bin (v0.5+). Mirrors the
|
|
52
|
+
* Python CLI's `access_path = "cli"`.
|
|
53
|
+
*
|
|
54
|
+
* Adding a new channel here also requires updating
|
|
55
|
+
* `cerefox_usage_log.access_path`'s domain (Postgres CHECK constraint).
|
|
56
|
+
*/
|
|
57
|
+
export type AccessPath = "remote-mcp" | "local-mcp" | "cli";
|
|
58
|
+
|
|
59
|
+
export interface ToolContext {
|
|
60
|
+
/** OpenAI/Fireworks API key for tools that need to embed (search, ingest).
|
|
61
|
+
* Resolved by the consumer (EF: `Deno.env.get("OPENAI_API_KEY")`;
|
|
62
|
+
* local: `Settings.openaiApiKey`). */
|
|
63
|
+
openaiApiKey?: string;
|
|
64
|
+
/** Identifies the wire path the call came in on. Recorded in
|
|
65
|
+
* `cerefox_usage_log.access_path`. */
|
|
66
|
+
accessPath: AccessPath;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
export interface ToolDefinition {
|
|
70
|
+
name: string;
|
|
71
|
+
description: string;
|
|
72
|
+
inputSchema: JsonSchema;
|
|
73
|
+
/** Returns the MCP `TextContent.text` body. Tools that fail throw; the
|
|
74
|
+
* consumer's request wrapper translates thrown errors into JSON-RPC
|
|
75
|
+
* `-32603` (internal error) responses, or `-32602` (invalid params)
|
|
76
|
+
* when the thrown error is `McpInvalidParams`. */
|
|
77
|
+
handler: (
|
|
78
|
+
supabase: MCPSupabaseClient,
|
|
79
|
+
args: Record<string, unknown>,
|
|
80
|
+
ctx: ToolContext,
|
|
81
|
+
) => Promise<string>;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
/** Typed `Error` subclass for input-validation failures. Consumers translate
|
|
85
|
+
* this into JSON-RPC `-32602` (invalid params). Plain `Error`s become
|
|
86
|
+
* `-32603` (internal). */
|
|
87
|
+
export class McpInvalidParams extends Error {
|
|
88
|
+
constructor(message: string) {
|
|
89
|
+
super(message);
|
|
90
|
+
this.name = "McpInvalidParams";
|
|
91
|
+
}
|
|
92
|
+
}
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
-- Migration 0003: Add document versioning support
|
|
2
|
+
-- Applied by: scripts/db_migrate.py
|
|
3
|
+
-- Safe to apply on a live database with existing documents and chunks.
|
|
4
|
+
-- All changes are additive — no data is dropped or altered.
|
|
5
|
+
--
|
|
6
|
+
-- What this migration does:
|
|
7
|
+
-- 1. Creates cerefox_document_versions table
|
|
8
|
+
-- 2. Adds nullable version_id FK to cerefox_chunks
|
|
9
|
+
-- 3. Drops the plain UNIQUE constraint on (document_id, chunk_index)
|
|
10
|
+
-- 4. Adds a partial unique index on (document_id, chunk_index) WHERE version_id IS NULL
|
|
11
|
+
-- 5. Drops the plain HNSW and GIN indexes (replaced by partial equivalents)
|
|
12
|
+
-- 6. Creates partial HNSW, GIN, and version-lookup indexes
|
|
13
|
+
-- 7. Enables RLS on cerefox_document_versions
|
|
14
|
+
|
|
15
|
+
-- ── 1. Document versions table ─────────────────────────────────────────────
|
|
16
|
+
|
|
17
|
+
CREATE TABLE IF NOT EXISTS cerefox_document_versions (
|
|
18
|
+
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
19
|
+
document_id UUID NOT NULL REFERENCES cerefox_documents(id) ON DELETE CASCADE,
|
|
20
|
+
version_number INT NOT NULL,
|
|
21
|
+
source TEXT NOT NULL DEFAULT 'manual',
|
|
22
|
+
chunk_count INT NOT NULL DEFAULT 0,
|
|
23
|
+
total_chars INT NOT NULL DEFAULT 0,
|
|
24
|
+
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
25
|
+
|
|
26
|
+
CONSTRAINT cerefox_document_versions_doc_num_unique UNIQUE (document_id, version_number)
|
|
27
|
+
);
|
|
28
|
+
|
|
29
|
+
-- ── 2. Add version_id to chunks ────────────────────────────────────────────
|
|
30
|
+
-- NULL = current version (searchable, indexed)
|
|
31
|
+
-- non-NULL = archived under that version (not searchable, lazily deleted)
|
|
32
|
+
|
|
33
|
+
ALTER TABLE cerefox_chunks
|
|
34
|
+
ADD COLUMN IF NOT EXISTS version_id UUID
|
|
35
|
+
REFERENCES cerefox_document_versions(id) ON DELETE CASCADE;
|
|
36
|
+
|
|
37
|
+
-- ── 3. Drop plain unique constraint ────────────────────────────────────────
|
|
38
|
+
-- The old constraint disallows duplicate (document_id, chunk_index) across ALL
|
|
39
|
+
-- chunks. With versioning, the same chunk_index can exist in both current and
|
|
40
|
+
-- archived versions. The partial unique index below replaces this constraint.
|
|
41
|
+
|
|
42
|
+
ALTER TABLE cerefox_chunks
|
|
43
|
+
DROP CONSTRAINT IF EXISTS cerefox_chunks_doc_idx_unique;
|
|
44
|
+
|
|
45
|
+
-- ── 4. Partial unique index for current chunks ──────────────────────────────
|
|
46
|
+
-- Ensures (document_id, chunk_index) is unique among current chunks (version_id IS NULL).
|
|
47
|
+
-- Archived chunks are excluded and may share chunk_index values across versions.
|
|
48
|
+
|
|
49
|
+
CREATE UNIQUE INDEX IF NOT EXISTS idx_cerefox_chunks_current_unique
|
|
50
|
+
ON cerefox_chunks(document_id, chunk_index)
|
|
51
|
+
WHERE version_id IS NULL;
|
|
52
|
+
|
|
53
|
+
-- ── 5. Drop plain indexes (replaced by partial equivalents below) ───────────
|
|
54
|
+
|
|
55
|
+
DROP INDEX IF EXISTS idx_cerefox_chunks_fts;
|
|
56
|
+
DROP INDEX IF EXISTS idx_cerefox_chunks_emb_primary;
|
|
57
|
+
DROP INDEX IF EXISTS idx_cerefox_chunks_emb_upgrade;
|
|
58
|
+
|
|
59
|
+
-- ── 6. Partial FTS, HNSW, and version-lookup indexes ───────────────────────
|
|
60
|
+
-- WHERE version_id IS NULL ensures only current chunks are indexed for search.
|
|
61
|
+
-- Archived chunks are never returned in search results.
|
|
62
|
+
|
|
63
|
+
-- Full-text search (current chunks only)
|
|
64
|
+
CREATE INDEX IF NOT EXISTS idx_cerefox_chunks_fts
|
|
65
|
+
ON cerefox_chunks USING GIN(fts)
|
|
66
|
+
WHERE version_id IS NULL;
|
|
67
|
+
|
|
68
|
+
-- Primary vector index (current chunks only)
|
|
69
|
+
CREATE INDEX IF NOT EXISTS idx_cerefox_chunks_emb_primary
|
|
70
|
+
ON cerefox_chunks USING hnsw (embedding_primary vector_cosine_ops)
|
|
71
|
+
WITH (m = 16, ef_construction = 64)
|
|
72
|
+
WHERE version_id IS NULL;
|
|
73
|
+
|
|
74
|
+
-- Upgrade vector index (current chunks only)
|
|
75
|
+
CREATE INDEX IF NOT EXISTS idx_cerefox_chunks_emb_upgrade
|
|
76
|
+
ON cerefox_chunks USING hnsw (embedding_upgrade vector_cosine_ops)
|
|
77
|
+
WITH (m = 16, ef_construction = 64)
|
|
78
|
+
WHERE version_id IS NULL;
|
|
79
|
+
|
|
80
|
+
-- Archived chunk lookup (for version retrieval)
|
|
81
|
+
CREATE INDEX IF NOT EXISTS idx_cerefox_chunks_version
|
|
82
|
+
ON cerefox_chunks(version_id, chunk_index)
|
|
83
|
+
WHERE version_id IS NOT NULL;
|
|
84
|
+
|
|
85
|
+
-- ── 7. RLS on new table ────────────────────────────────────────────────────
|
|
86
|
+
|
|
87
|
+
ALTER TABLE cerefox_document_versions ENABLE ROW LEVEL SECURITY;
|
|
88
|
+
|
|
89
|
+
-- ── 8. updated_at trigger on versions table ────────────────────────────────
|
|
90
|
+
-- cerefox_document_versions has no updated_at column (immutable after creation),
|
|
91
|
+
-- so no trigger is needed.
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
-- Migration 0004: Add audit log table, review_status column, archived flag
|
|
2
|
+
--
|
|
3
|
+
-- Adds:
|
|
4
|
+
-- 1. cerefox_audit_log table (immutable, append-only)
|
|
5
|
+
-- 2. review_status column on cerefox_documents (approved | pending_review)
|
|
6
|
+
-- 3. archived boolean on cerefox_document_versions (protection from cleanup)
|
|
7
|
+
-- 4. Indexes for audit log queries (temporal, author, document, FTS on description)
|
|
8
|
+
-- 5. RLS on cerefox_audit_log
|
|
9
|
+
|
|
10
|
+
-- ── 1. Audit log table ──────────────────────────────────────────────────────
|
|
11
|
+
|
|
12
|
+
CREATE TABLE IF NOT EXISTS cerefox_audit_log (
|
|
13
|
+
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
14
|
+
document_id UUID REFERENCES cerefox_documents(id) ON DELETE SET NULL,
|
|
15
|
+
version_id UUID REFERENCES cerefox_document_versions(id) ON DELETE SET NULL,
|
|
16
|
+
operation TEXT NOT NULL,
|
|
17
|
+
author TEXT NOT NULL DEFAULT 'unknown',
|
|
18
|
+
author_type TEXT NOT NULL DEFAULT 'user',
|
|
19
|
+
size_before INT,
|
|
20
|
+
size_after INT,
|
|
21
|
+
description TEXT NOT NULL DEFAULT '',
|
|
22
|
+
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
23
|
+
|
|
24
|
+
CONSTRAINT cerefox_audit_log_operation_check CHECK (
|
|
25
|
+
operation IN ('create', 'update-content', 'update-metadata', 'delete',
|
|
26
|
+
'status-change', 'archive', 'unarchive')
|
|
27
|
+
),
|
|
28
|
+
CONSTRAINT cerefox_audit_log_author_type_check CHECK (author_type IN ('user', 'agent'))
|
|
29
|
+
);
|
|
30
|
+
|
|
31
|
+
-- ── 2. Review status on documents ───────────────────────────────────────────
|
|
32
|
+
|
|
33
|
+
ALTER TABLE cerefox_documents
|
|
34
|
+
ADD COLUMN IF NOT EXISTS review_status TEXT NOT NULL DEFAULT 'approved';
|
|
35
|
+
|
|
36
|
+
-- Add check constraint (idempotent: drop if exists, then add)
|
|
37
|
+
DO $$
|
|
38
|
+
BEGIN
|
|
39
|
+
IF NOT EXISTS (
|
|
40
|
+
SELECT 1 FROM information_schema.check_constraints
|
|
41
|
+
WHERE constraint_name = 'cerefox_documents_review_status_check'
|
|
42
|
+
) THEN
|
|
43
|
+
ALTER TABLE cerefox_documents
|
|
44
|
+
ADD CONSTRAINT cerefox_documents_review_status_check
|
|
45
|
+
CHECK (review_status IN ('approved', 'pending_review'));
|
|
46
|
+
END IF;
|
|
47
|
+
END $$;
|
|
48
|
+
|
|
49
|
+
-- ── 3. Archived flag on versions ────────────────────────────────────────────
|
|
50
|
+
|
|
51
|
+
ALTER TABLE cerefox_document_versions
|
|
52
|
+
ADD COLUMN IF NOT EXISTS archived BOOLEAN NOT NULL DEFAULT FALSE;
|
|
53
|
+
|
|
54
|
+
-- ── 4. Indexes for audit log ────────────────────────────────────────────────
|
|
55
|
+
|
|
56
|
+
CREATE INDEX IF NOT EXISTS idx_cerefox_audit_log_created
|
|
57
|
+
ON cerefox_audit_log(created_at DESC);
|
|
58
|
+
|
|
59
|
+
CREATE INDEX IF NOT EXISTS idx_cerefox_audit_log_document
|
|
60
|
+
ON cerefox_audit_log(document_id, created_at DESC)
|
|
61
|
+
WHERE document_id IS NOT NULL;
|
|
62
|
+
|
|
63
|
+
CREATE INDEX IF NOT EXISTS idx_cerefox_audit_log_author
|
|
64
|
+
ON cerefox_audit_log(author, created_at DESC);
|
|
65
|
+
|
|
66
|
+
CREATE INDEX IF NOT EXISTS idx_cerefox_audit_log_desc_fts
|
|
67
|
+
ON cerefox_audit_log USING GIN(to_tsvector('english', description));
|
|
68
|
+
|
|
69
|
+
-- ── 5. RLS ──────────────────────────────────────────────────────────────────
|
|
70
|
+
|
|
71
|
+
ALTER TABLE cerefox_audit_log ENABLE ROW LEVEL SECURITY;
|