@cerefox/memory 0.7.2 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/dist/bin/cerefox.js +1357 -361
  2. package/dist/frontend/assets/{index-BzAPcCXA.js → index-CAp2_lFX.js} +2 -2
  3. package/dist/frontend/assets/index-CAp2_lFX.js.map +1 -0
  4. package/dist/frontend/index.html +1 -1
  5. package/dist/server-assets/_shared/ef-meta/index.ts +97 -0
  6. package/dist/server-assets/_shared/embeddings/index.ts +175 -0
  7. package/dist/server-assets/_shared/mcp-tools/_chunker.ts +187 -0
  8. package/dist/server-assets/_shared/mcp-tools/_projects.ts +121 -0
  9. package/dist/server-assets/_shared/mcp-tools/_utils.ts +73 -0
  10. package/dist/server-assets/_shared/mcp-tools/audit-log.ts +95 -0
  11. package/dist/server-assets/_shared/mcp-tools/get-document.ts +73 -0
  12. package/dist/server-assets/_shared/mcp-tools/get-help-content.ts +26 -0
  13. package/dist/server-assets/_shared/mcp-tools/get-help.ts +90 -0
  14. package/dist/server-assets/_shared/mcp-tools/index.ts +67 -0
  15. package/dist/server-assets/_shared/mcp-tools/ingest.ts +315 -0
  16. package/dist/server-assets/_shared/mcp-tools/list-metadata-keys.ts +55 -0
  17. package/dist/server-assets/_shared/mcp-tools/list-projects.ts +59 -0
  18. package/dist/server-assets/_shared/mcp-tools/list-versions.ts +72 -0
  19. package/dist/server-assets/_shared/mcp-tools/metadata-search.ts +154 -0
  20. package/dist/server-assets/_shared/mcp-tools/search.ts +193 -0
  21. package/dist/server-assets/_shared/mcp-tools/set-document-projects.ts +163 -0
  22. package/dist/server-assets/_shared/mcp-tools/types.ts +92 -0
  23. package/dist/server-assets/db/migrations/0003_add_document_versions.sql +91 -0
  24. package/dist/server-assets/db/migrations/0004_add_audit_log_review_status_archived.sql +71 -0
  25. package/dist/server-assets/db/migrations/0005_metadata_search.sql +628 -0
  26. package/dist/server-assets/db/migrations/0006_usage_log.sql +255 -0
  27. package/dist/server-assets/db/migrations/0007_usage_log_requestor.sql +178 -0
  28. package/dist/server-assets/db/migrations/0008_soft_delete.sql +130 -0
  29. package/dist/server-assets/db/migrations/0009_audit_log_restore_operation.sql +20 -0
  30. package/dist/server-assets/db/migrations/0010_requestor_enforcement_config.sql +12 -0
  31. package/dist/server-assets/db/migrations/0011_title_boosting.sql +48 -0
  32. package/dist/server-assets/db/rpcs.sql +1723 -0
  33. package/dist/server-assets/db/schema.sql +380 -0
  34. package/dist/server-assets/supabase/functions/cerefox-get-audit-log/index.ts +117 -0
  35. package/dist/server-assets/supabase/functions/cerefox-get-document/index.ts +138 -0
  36. package/dist/server-assets/supabase/functions/cerefox-ingest/index.ts +819 -0
  37. package/dist/server-assets/supabase/functions/cerefox-list-projects/index.ts +96 -0
  38. package/dist/server-assets/supabase/functions/cerefox-list-versions/index.ts +113 -0
  39. package/dist/server-assets/supabase/functions/cerefox-mcp/index.ts +294 -0
  40. package/dist/server-assets/supabase/functions/cerefox-mcp/shared.ts +42 -0
  41. package/dist/server-assets/supabase/functions/cerefox-metadata/index.ts +99 -0
  42. package/dist/server-assets/supabase/functions/cerefox-metadata-search/index.ts +146 -0
  43. package/dist/server-assets/supabase/functions/cerefox-search/index.ts +382 -0
  44. package/docs/guides/connect-agents.md +78 -3
  45. package/docs/guides/migration-v0.5.md +50 -0
  46. package/docs/guides/quickstart.md +6 -2
  47. package/package.json +3 -2
  48. package/dist/frontend/assets/index-BzAPcCXA.js.map +0 -1
@@ -0,0 +1,193 @@
1
+ /**
2
+ * `cerefox_search` — hybrid (FTS + semantic) search over the knowledge base.
3
+ *
4
+ * Three modes:
5
+ * - `docs` (default) — document-level hybrid via `cerefox_search_docs`.
6
+ * - `hybrid` — chunk-level hybrid via `cerefox_hybrid_search`.
7
+ * - `fts` — FTS-only via `cerefox_fts_search` (no embedding needed).
8
+ *
9
+ * Embedding is computed for `docs` and `hybrid` modes via the shared
10
+ * embedder. Results respect a per-call `max_bytes` budget capped at
11
+ * `MAX_RESPONSE_BYTES`; whole rows are dropped to fit the budget.
12
+ *
13
+ * Mirrors `supabase/functions/cerefox-mcp/tools/search.ts` byte-for-byte
14
+ * in response shape so v0.4.0 can keep agents on the same on-the-wire
15
+ * format whether they go through the remote MCP or the new local TS one.
16
+ */
17
+
18
+ import type { MCPSupabaseClient } from "./types.ts";
19
+
20
+ import { getEmbedding } from "../embeddings/index.ts";
21
+ import { applyByteBudget, logUsage, MAX_RESPONSE_BYTES } from "./_utils.ts";
22
+ import { lookupProjectId } from "./_projects.ts";
23
+ import { McpInvalidParams, type ToolContext, type ToolDefinition } from "./types.ts";
24
+
25
+ async function handler(
26
+ supabase: MCPSupabaseClient,
27
+ args: Record<string, unknown>,
28
+ ctx: ToolContext,
29
+ ): Promise<string> {
30
+ const query = args.query as string;
31
+ const project_name = args.project_name as string | undefined;
32
+ const match_count = (args.match_count as number | undefined) ?? 5;
33
+ const mode = (args.mode as string | undefined) ?? "docs";
34
+ const alpha = (args.alpha as number | undefined) ?? 0.7;
35
+ const min_score = (args.min_score as number | undefined) ?? 0.5;
36
+ const metadata_filter =
37
+ (args.metadata_filter as Record<string, string> | null | undefined) ?? null;
38
+ const requested_max_bytes = args.max_bytes as number | undefined;
39
+
40
+ const max_bytes = Math.min(requested_max_bytes ?? MAX_RESPONSE_BYTES, MAX_RESPONSE_BYTES);
41
+
42
+ if (
43
+ metadata_filter !== null &&
44
+ metadata_filter !== undefined &&
45
+ (typeof metadata_filter !== "object" || Array.isArray(metadata_filter))
46
+ ) {
47
+ throw new McpInvalidParams("metadata_filter must be a JSON object or null");
48
+ }
49
+
50
+ if (!query?.trim()) throw new McpInvalidParams("query is required");
51
+
52
+ if (mode !== "fts" && !ctx.openaiApiKey) {
53
+ throw new Error(
54
+ "OpenAI API key not configured. Set OPENAI_API_KEY (Edge Function) or CEREFOX_OPENAI_API_KEY (.env, local).",
55
+ );
56
+ }
57
+
58
+ // Resolve project name to UUID if provided
59
+ let projectId: string | null = null;
60
+ if (project_name) {
61
+ projectId = await lookupProjectId(supabase, project_name);
62
+ if (!projectId) throw new Error(`Project not found: ${project_name}`);
63
+ }
64
+
65
+ // FTS mode doesn't need an embedding
66
+ let embedding: number[] | null = null;
67
+ if (mode !== "fts") {
68
+ embedding = await getEmbedding(query, ctx.openaiApiKey!);
69
+ }
70
+
71
+ const metaFilterParam =
72
+ metadata_filter && Object.keys(metadata_filter).length > 0
73
+ ? { p_metadata_filter: metadata_filter }
74
+ : {};
75
+
76
+ let rpcName: string;
77
+ let rpcParams: Record<string, unknown>;
78
+
79
+ if (mode === "fts") {
80
+ rpcName = "cerefox_fts_search";
81
+ rpcParams = {
82
+ p_query_text: query,
83
+ p_match_count: match_count,
84
+ p_project_id: projectId,
85
+ ...metaFilterParam,
86
+ };
87
+ } else if (mode === "hybrid") {
88
+ rpcName = "cerefox_hybrid_search";
89
+ rpcParams = {
90
+ p_query_text: query,
91
+ p_query_embedding: embedding,
92
+ p_match_count: match_count,
93
+ p_alpha: alpha,
94
+ p_use_upgrade: false,
95
+ p_project_id: projectId,
96
+ p_min_score: min_score,
97
+ ...metaFilterParam,
98
+ };
99
+ } else {
100
+ rpcName = "cerefox_search_docs";
101
+ rpcParams = {
102
+ p_query_text: query,
103
+ p_query_embedding: embedding,
104
+ p_match_count: match_count,
105
+ p_alpha: alpha,
106
+ p_project_id: projectId,
107
+ p_min_score: min_score,
108
+ ...metaFilterParam,
109
+ };
110
+ }
111
+
112
+ const { data, error } = await supabase.rpc(rpcName, rpcParams);
113
+
114
+ if (error) throw new Error(`RPC error: ${error.message}`);
115
+
116
+ const { accepted, truncated, usedBytes } = applyByteBudget(data ?? [], max_bytes);
117
+
118
+ logUsage(supabase, {
119
+ operation: "search",
120
+ accessPath: ctx.accessPath,
121
+ requestor: args.requestor as string | undefined,
122
+ query_text: query,
123
+ project_id: projectId,
124
+ result_count: accepted.length,
125
+ });
126
+
127
+ if (accepted.length === 0) return "No results found.";
128
+
129
+ const rows = accepted as Array<{
130
+ document_id?: string;
131
+ doc_title?: string;
132
+ full_content?: string;
133
+ best_score?: number;
134
+ is_partial?: boolean;
135
+ chunk_count?: number;
136
+ total_chars?: number;
137
+ }>;
138
+
139
+ const parts: string[] = rows.map((row) => {
140
+ const title = row.doc_title ?? "Untitled";
141
+ const docId = row.document_id ? ` [id: ${row.document_id}]` : "";
142
+ const score = row.best_score != null ? ` (score: ${row.best_score.toFixed(3)})` : "";
143
+ const partial = row.is_partial
144
+ ? ` -- partial (${row.chunk_count} of ${(row.total_chars ?? 0).toLocaleString()} chars)`
145
+ : "";
146
+ return `## ${title}${docId}${score}${partial}\n\n${row.full_content ?? ""}`;
147
+ });
148
+
149
+ let output = parts.join("\n\n---\n\n");
150
+ if (truncated) {
151
+ output +=
152
+ `\n\n[Results truncated at ${usedBytes} bytes. Use a more specific query or a smaller match_count to see more.]`;
153
+ }
154
+ return output;
155
+ }
156
+
157
+ export const searchTool: ToolDefinition = {
158
+ name: "cerefox_search",
159
+ description:
160
+ "Search the Cerefox personal knowledge base. Returns complete documents ranked by hybrid (FTS + semantic) relevance.",
161
+ inputSchema: {
162
+ type: "object",
163
+ required: ["query"],
164
+ properties: {
165
+ query: { type: "string", description: "Natural-language search query" },
166
+ match_count: {
167
+ type: "integer",
168
+ description: "Maximum number of documents to return (default: 5)",
169
+ },
170
+ project_name: {
171
+ type: "string",
172
+ description: "Filter results to a specific project by name (optional)",
173
+ },
174
+ metadata_filter: {
175
+ type: "object",
176
+ description:
177
+ 'Optional JSONB containment filter. Only documents whose metadata contains ALL specified key-value pairs are returned. Example: {"type": "decision", "status": "active"}. Call cerefox_list_metadata_keys first to discover available keys and values. Omit to search all documents.',
178
+ additionalProperties: { type: "string" },
179
+ },
180
+ max_bytes: {
181
+ type: "integer",
182
+ description:
183
+ "Optional response size budget in bytes. Results are dropped whole until the budget is satisfied; a truncated flag is set when results are dropped. Defaults to the server maximum (200000). Pass a smaller value if your context window is limited. Values above the server maximum are silently capped.",
184
+ },
185
+ requestor: {
186
+ type: "string",
187
+ description:
188
+ 'Name of the agent or user making this request (e.g., "Claude Code", "archiver"). Recorded in the usage log for attribution. Defaults to "mcp-agent" if not provided. May be enforced via server config.',
189
+ },
190
+ },
191
+ },
192
+ handler,
193
+ };
@@ -0,0 +1,163 @@
1
+ /**
2
+ * `cerefox_set_document_projects` — destructive replace of a document's
3
+ * project memberships. Empty list clears all memberships. Logged as
4
+ * `update-metadata` in the audit log; document content is untouched.
5
+ *
6
+ * Mirrors Python `CerefoxClient.set_document_projects` +
7
+ * `_handle_set_document_projects`. v0.1.20 introduced this tool to give
8
+ * agents an explicit full-set primitive without rewriting content (issue
9
+ * #38, Part 4).
10
+ */
11
+
12
+ import type { MCPSupabaseClient } from "./types.ts";
13
+
14
+ import { logUsage } from "./_utils.ts";
15
+ import { McpInvalidParams, type ToolContext, type ToolDefinition } from "./types.ts";
16
+
17
+ async function handler(
18
+ supabase: MCPSupabaseClient,
19
+ args: Record<string, unknown>,
20
+ ctx: ToolContext,
21
+ ): Promise<string> {
22
+ const document_id = (args.document_id as string | undefined)?.trim();
23
+ const project_names_raw = args.project_names;
24
+ const author = (args.author as string | undefined) ?? "mcp-agent";
25
+
26
+ if (!document_id) {
27
+ throw new McpInvalidParams(
28
+ "Missing required argument: document_id (UUID from a prior cerefox_search result).",
29
+ );
30
+ }
31
+ if (
32
+ project_names_raw === undefined ||
33
+ project_names_raw === null ||
34
+ !Array.isArray(project_names_raw)
35
+ ) {
36
+ throw new McpInvalidParams(
37
+ "Missing or invalid argument: project_names must be a JSON array of strings " +
38
+ "(empty array allowed to clear all memberships).",
39
+ );
40
+ }
41
+ if (!project_names_raw.every((n) => typeof n === "string")) {
42
+ throw new McpInvalidParams("project_names must contain only strings.");
43
+ }
44
+
45
+ // Strip empties; preserve order; dedup case-insensitively.
46
+ const seenLower = new Set<string>();
47
+ const cleanNames: string[] = [];
48
+ for (const n of project_names_raw as string[]) {
49
+ const stripped = n.trim();
50
+ if (!stripped) continue;
51
+ const key = stripped.toLowerCase();
52
+ if (seenLower.has(key)) continue;
53
+ seenLower.add(key);
54
+ cleanNames.push(stripped);
55
+ }
56
+
57
+ // Verify the document exists and isn't soft-deleted.
58
+ const { data: doc } = await supabase
59
+ .from("cerefox_documents")
60
+ .select("id, title")
61
+ .eq("id", document_id)
62
+ .is("deleted_at", null)
63
+ .limit(1);
64
+ if (!doc?.length) {
65
+ throw new Error(`Document not found (or soft-deleted): ${document_id}`);
66
+ }
67
+
68
+ // Resolve each name → project_id (create if absent). Preserve order.
69
+ const projectIds: string[] = [];
70
+ for (const name of cleanNames) {
71
+ const { data: proj } = await supabase
72
+ .from("cerefox_projects")
73
+ .select("id")
74
+ .ilike("name", name)
75
+ .limit(1);
76
+ if (proj?.length) {
77
+ projectIds.push(proj[0].id);
78
+ } else {
79
+ const { data: newProj } = await supabase
80
+ .from("cerefox_projects")
81
+ .insert({ name })
82
+ .select("id");
83
+ if (newProj?.[0]?.id) projectIds.push(newProj[0].id);
84
+ }
85
+ }
86
+
87
+ // DELETE-then-INSERT replace (matches Python assign_document_projects).
88
+ await supabase.from("cerefox_document_projects").delete().eq("document_id", document_id);
89
+ if (projectIds.length > 0) {
90
+ const rows = projectIds.map((pid) => ({ document_id, project_id: pid }));
91
+ await supabase.from("cerefox_document_projects").insert(rows);
92
+ }
93
+
94
+ // Audit entry — project membership is metadata, not content.
95
+ try {
96
+ await supabase.rpc("cerefox_create_audit_entry", {
97
+ p_document_id: document_id,
98
+ p_version_id: null,
99
+ p_operation: "update-metadata",
100
+ p_author: author,
101
+ p_author_type: "agent",
102
+ p_size_before: null,
103
+ p_size_after: null,
104
+ p_description:
105
+ cleanNames.length > 0
106
+ ? `Set document projects to [${cleanNames.join(", ")}]`
107
+ : "Cleared all project memberships",
108
+ });
109
+ } catch (err) {
110
+ console.warn("set-document-projects: audit entry failed", err);
111
+ }
112
+
113
+ logUsage(supabase, {
114
+ operation: "set-document-projects",
115
+ accessPath: ctx.accessPath,
116
+ requestor: author,
117
+ document_id,
118
+ result_count: projectIds.length,
119
+ });
120
+
121
+ if (cleanNames.length === 0) {
122
+ return (
123
+ `Cleared all project memberships for document ${document_id}. ` +
124
+ "The document no longer belongs to any project."
125
+ );
126
+ }
127
+ return (
128
+ `Set project memberships for document ${document_id}:\n` +
129
+ ` Projects (${cleanNames.length}): ${cleanNames.join(", ")}\n` +
130
+ ` Project IDs: ${projectIds.join(", ")}\n` +
131
+ " Note: this REPLACED the previous membership set. Any projects not " +
132
+ "listed above are no longer associated with this document."
133
+ );
134
+ }
135
+
136
+ export const setDocumentProjectsTool: ToolDefinition = {
137
+ name: "cerefox_set_document_projects",
138
+ description:
139
+ "Set the document's project memberships to EXACTLY the given list. Destructive replace: any existing memberships not in this list are removed. Pass an empty list to clear all project memberships. Projects are looked up by name (case-insensitive); missing projects are created. Logged as update-metadata in the audit log — content is untouched. Use cerefox_ingest with project_names if you want to set memberships AND update content in one call. Use this tool when you only need to change project membership without re-writing the document body.",
140
+ inputSchema: {
141
+ type: "object",
142
+ required: ["document_id", "project_names"],
143
+ properties: {
144
+ document_id: {
145
+ type: "string",
146
+ description:
147
+ "UUID of the document. Get this from a prior cerefox_search result (the [id: ...] tag after the title).",
148
+ },
149
+ project_names: {
150
+ type: "array",
151
+ items: { type: "string" },
152
+ description:
153
+ "Explicit list of project names. Each created if absent. Order is preserved. Empty list = remove from all projects.",
154
+ },
155
+ author: {
156
+ type: "string",
157
+ description:
158
+ 'Agent or tool name recorded in the audit log. Defaults to "mcp-agent". May be enforced via server config.',
159
+ },
160
+ },
161
+ },
162
+ handler,
163
+ };
@@ -0,0 +1,92 @@
1
+ /**
2
+ * Shared MCP tool-handler contract.
3
+ *
4
+ * Each `cerefox_*` tool is a `ToolDefinition` exporting:
5
+ * - `name` — MCP tool name (e.g. `cerefox_search`).
6
+ * - `description` — single-paragraph description shown to agents.
7
+ * - `inputSchema` — JSON Schema for the tool's `arguments` object.
8
+ * - `handler(supabase, args, ctx)` — async function returning the MCP
9
+ * `TextContent` body as a string.
10
+ *
11
+ * The same `ToolDefinition`s are wired into both:
12
+ * - The remote `cerefox-mcp` Edge Function (Deno; HTTP-framed JSON-RPC).
13
+ * - The local `@cerefox/memory` stdio MCP server (Bun/Node; stdio-framed).
14
+ *
15
+ * Wiring code (request dispatch, framing, identity enforcement) lives in
16
+ * each consumer; the handlers themselves are runtime-agnostic.
17
+ */
18
+
19
+ /** Structural type for the Supabase client surface the handlers actually
20
+ * use (`.rpc()` + `.from()`). We deliberately don't `import { SupabaseClient }
21
+ * from "@supabase/supabase-js"` here because Bun workspaces install a
22
+ * separate copy of supabase-js into each workspace member, and TypeScript
23
+ * then sees two distinct (but structurally identical) `SupabaseClient`
24
+ * classes. Decoupling the shared module with a minimal structural type
25
+ * side-steps the duplicate-class problem and keeps the shared modules
26
+ * truly runtime-neutral. */
27
+ // deno-lint-ignore no-explicit-any
28
+ type AnyChain = any;
29
+
30
+ export interface MCPSupabaseClient {
31
+ rpc<T = unknown>(fn: string, params?: Record<string, unknown>): AnyChain;
32
+ from(table: string): AnyChain;
33
+ }
34
+
35
+ /** Re-export an alias name so callers can use the descriptive name. */
36
+ export type SupabaseClient = MCPSupabaseClient;
37
+
38
+ /** JSON Schema fragment for tool inputs. We use a permissive `unknown` value
39
+ * type rather than a strict JSON-Schema TS type to avoid forcing every tool
40
+ * to maintain a type-perfect schema literal. */
41
+ export type JsonSchema = Record<string, unknown>;
42
+
43
+ /**
44
+ * Logical channel through which a Cerefox operation reached the backend.
45
+ * Recorded in `cerefox_usage_log.access_path` so the analytics dashboard
46
+ * can attribute load to each surface.
47
+ *
48
+ * Values:
49
+ * - `remote-mcp` — `cerefox-mcp` Edge Function (HTTP MCP transport).
50
+ * - `local-mcp` — `@cerefox/memory`'s `cerefox-mcp` stdio bin.
51
+ * - `cli` — the `cerefox` CLI bin (v0.5+). Mirrors the
52
+ * Python CLI's `access_path = "cli"`.
53
+ *
54
+ * Adding a new channel here also requires updating
55
+ * `cerefox_usage_log.access_path`'s domain (Postgres CHECK constraint).
56
+ */
57
+ export type AccessPath = "remote-mcp" | "local-mcp" | "cli";
58
+
59
+ export interface ToolContext {
60
+ /** OpenAI/Fireworks API key for tools that need to embed (search, ingest).
61
+ * Resolved by the consumer (EF: `Deno.env.get("OPENAI_API_KEY")`;
62
+ * local: `Settings.openaiApiKey`). */
63
+ openaiApiKey?: string;
64
+ /** Identifies the wire path the call came in on. Recorded in
65
+ * `cerefox_usage_log.access_path`. */
66
+ accessPath: AccessPath;
67
+ }
68
+
69
+ export interface ToolDefinition {
70
+ name: string;
71
+ description: string;
72
+ inputSchema: JsonSchema;
73
+ /** Returns the MCP `TextContent.text` body. Tools that fail throw; the
74
+ * consumer's request wrapper translates thrown errors into JSON-RPC
75
+ * `-32603` (internal error) responses, or `-32602` (invalid params)
76
+ * when the thrown error is `McpInvalidParams`. */
77
+ handler: (
78
+ supabase: MCPSupabaseClient,
79
+ args: Record<string, unknown>,
80
+ ctx: ToolContext,
81
+ ) => Promise<string>;
82
+ }
83
+
84
+ /** Typed `Error` subclass for input-validation failures. Consumers translate
85
+ * this into JSON-RPC `-32602` (invalid params). Plain `Error`s become
86
+ * `-32603` (internal). */
87
+ export class McpInvalidParams extends Error {
88
+ constructor(message: string) {
89
+ super(message);
90
+ this.name = "McpInvalidParams";
91
+ }
92
+ }
@@ -0,0 +1,91 @@
1
+ -- Migration 0003: Add document versioning support
2
+ -- Applied by: scripts/db_migrate.py
3
+ -- Safe to apply on a live database with existing documents and chunks.
4
+ -- All changes are additive — no data is dropped or altered.
5
+ --
6
+ -- What this migration does:
7
+ -- 1. Creates cerefox_document_versions table
8
+ -- 2. Adds nullable version_id FK to cerefox_chunks
9
+ -- 3. Drops the plain UNIQUE constraint on (document_id, chunk_index)
10
+ -- 4. Adds a partial unique index on (document_id, chunk_index) WHERE version_id IS NULL
11
+ -- 5. Drops the plain HNSW and GIN indexes (replaced by partial equivalents)
12
+ -- 6. Creates partial HNSW, GIN, and version-lookup indexes
13
+ -- 7. Enables RLS on cerefox_document_versions
14
+
15
+ -- ── 1. Document versions table ─────────────────────────────────────────────
16
+
17
+ CREATE TABLE IF NOT EXISTS cerefox_document_versions (
18
+ id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
19
+ document_id UUID NOT NULL REFERENCES cerefox_documents(id) ON DELETE CASCADE,
20
+ version_number INT NOT NULL,
21
+ source TEXT NOT NULL DEFAULT 'manual',
22
+ chunk_count INT NOT NULL DEFAULT 0,
23
+ total_chars INT NOT NULL DEFAULT 0,
24
+ created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
25
+
26
+ CONSTRAINT cerefox_document_versions_doc_num_unique UNIQUE (document_id, version_number)
27
+ );
28
+
29
+ -- ── 2. Add version_id to chunks ────────────────────────────────────────────
30
+ -- NULL = current version (searchable, indexed)
31
+ -- non-NULL = archived under that version (not searchable, lazily deleted)
32
+
33
+ ALTER TABLE cerefox_chunks
34
+ ADD COLUMN IF NOT EXISTS version_id UUID
35
+ REFERENCES cerefox_document_versions(id) ON DELETE CASCADE;
36
+
37
+ -- ── 3. Drop plain unique constraint ────────────────────────────────────────
38
+ -- The old constraint disallows duplicate (document_id, chunk_index) across ALL
39
+ -- chunks. With versioning, the same chunk_index can exist in both current and
40
+ -- archived versions. The partial unique index below replaces this constraint.
41
+
42
+ ALTER TABLE cerefox_chunks
43
+ DROP CONSTRAINT IF EXISTS cerefox_chunks_doc_idx_unique;
44
+
45
+ -- ── 4. Partial unique index for current chunks ──────────────────────────────
46
+ -- Ensures (document_id, chunk_index) is unique among current chunks (version_id IS NULL).
47
+ -- Archived chunks are excluded and may share chunk_index values across versions.
48
+
49
+ CREATE UNIQUE INDEX IF NOT EXISTS idx_cerefox_chunks_current_unique
50
+ ON cerefox_chunks(document_id, chunk_index)
51
+ WHERE version_id IS NULL;
52
+
53
+ -- ── 5. Drop plain indexes (replaced by partial equivalents below) ───────────
54
+
55
+ DROP INDEX IF EXISTS idx_cerefox_chunks_fts;
56
+ DROP INDEX IF EXISTS idx_cerefox_chunks_emb_primary;
57
+ DROP INDEX IF EXISTS idx_cerefox_chunks_emb_upgrade;
58
+
59
+ -- ── 6. Partial FTS, HNSW, and version-lookup indexes ───────────────────────
60
+ -- WHERE version_id IS NULL ensures only current chunks are indexed for search.
61
+ -- Archived chunks are never returned in search results.
62
+
63
+ -- Full-text search (current chunks only)
64
+ CREATE INDEX IF NOT EXISTS idx_cerefox_chunks_fts
65
+ ON cerefox_chunks USING GIN(fts)
66
+ WHERE version_id IS NULL;
67
+
68
+ -- Primary vector index (current chunks only)
69
+ CREATE INDEX IF NOT EXISTS idx_cerefox_chunks_emb_primary
70
+ ON cerefox_chunks USING hnsw (embedding_primary vector_cosine_ops)
71
+ WITH (m = 16, ef_construction = 64)
72
+ WHERE version_id IS NULL;
73
+
74
+ -- Upgrade vector index (current chunks only)
75
+ CREATE INDEX IF NOT EXISTS idx_cerefox_chunks_emb_upgrade
76
+ ON cerefox_chunks USING hnsw (embedding_upgrade vector_cosine_ops)
77
+ WITH (m = 16, ef_construction = 64)
78
+ WHERE version_id IS NULL;
79
+
80
+ -- Archived chunk lookup (for version retrieval)
81
+ CREATE INDEX IF NOT EXISTS idx_cerefox_chunks_version
82
+ ON cerefox_chunks(version_id, chunk_index)
83
+ WHERE version_id IS NOT NULL;
84
+
85
+ -- ── 7. RLS on new table ────────────────────────────────────────────────────
86
+
87
+ ALTER TABLE cerefox_document_versions ENABLE ROW LEVEL SECURITY;
88
+
89
+ -- ── 8. updated_at trigger on versions table ────────────────────────────────
90
+ -- cerefox_document_versions has no updated_at column (immutable after creation),
91
+ -- so no trigger is needed.
@@ -0,0 +1,71 @@
1
+ -- Migration 0004: Add audit log table, review_status column, archived flag
2
+ --
3
+ -- Adds:
4
+ -- 1. cerefox_audit_log table (immutable, append-only)
5
+ -- 2. review_status column on cerefox_documents (approved | pending_review)
6
+ -- 3. archived boolean on cerefox_document_versions (protection from cleanup)
7
+ -- 4. Indexes for audit log queries (temporal, author, document, FTS on description)
8
+ -- 5. RLS on cerefox_audit_log
9
+
10
+ -- ── 1. Audit log table ──────────────────────────────────────────────────────
11
+
12
+ CREATE TABLE IF NOT EXISTS cerefox_audit_log (
13
+ id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
14
+ document_id UUID REFERENCES cerefox_documents(id) ON DELETE SET NULL,
15
+ version_id UUID REFERENCES cerefox_document_versions(id) ON DELETE SET NULL,
16
+ operation TEXT NOT NULL,
17
+ author TEXT NOT NULL DEFAULT 'unknown',
18
+ author_type TEXT NOT NULL DEFAULT 'user',
19
+ size_before INT,
20
+ size_after INT,
21
+ description TEXT NOT NULL DEFAULT '',
22
+ created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
23
+
24
+ CONSTRAINT cerefox_audit_log_operation_check CHECK (
25
+ operation IN ('create', 'update-content', 'update-metadata', 'delete',
26
+ 'status-change', 'archive', 'unarchive')
27
+ ),
28
+ CONSTRAINT cerefox_audit_log_author_type_check CHECK (author_type IN ('user', 'agent'))
29
+ );
30
+
31
+ -- ── 2. Review status on documents ───────────────────────────────────────────
32
+
33
+ ALTER TABLE cerefox_documents
34
+ ADD COLUMN IF NOT EXISTS review_status TEXT NOT NULL DEFAULT 'approved';
35
+
36
+ -- Add check constraint (idempotent: drop if exists, then add)
37
+ DO $$
38
+ BEGIN
39
+ IF NOT EXISTS (
40
+ SELECT 1 FROM information_schema.check_constraints
41
+ WHERE constraint_name = 'cerefox_documents_review_status_check'
42
+ ) THEN
43
+ ALTER TABLE cerefox_documents
44
+ ADD CONSTRAINT cerefox_documents_review_status_check
45
+ CHECK (review_status IN ('approved', 'pending_review'));
46
+ END IF;
47
+ END $$;
48
+
49
+ -- ── 3. Archived flag on versions ────────────────────────────────────────────
50
+
51
+ ALTER TABLE cerefox_document_versions
52
+ ADD COLUMN IF NOT EXISTS archived BOOLEAN NOT NULL DEFAULT FALSE;
53
+
54
+ -- ── 4. Indexes for audit log ────────────────────────────────────────────────
55
+
56
+ CREATE INDEX IF NOT EXISTS idx_cerefox_audit_log_created
57
+ ON cerefox_audit_log(created_at DESC);
58
+
59
+ CREATE INDEX IF NOT EXISTS idx_cerefox_audit_log_document
60
+ ON cerefox_audit_log(document_id, created_at DESC)
61
+ WHERE document_id IS NOT NULL;
62
+
63
+ CREATE INDEX IF NOT EXISTS idx_cerefox_audit_log_author
64
+ ON cerefox_audit_log(author, created_at DESC);
65
+
66
+ CREATE INDEX IF NOT EXISTS idx_cerefox_audit_log_desc_fts
67
+ ON cerefox_audit_log USING GIN(to_tsvector('english', description));
68
+
69
+ -- ── 5. RLS ──────────────────────────────────────────────────────────────────
70
+
71
+ ALTER TABLE cerefox_audit_log ENABLE ROW LEVEL SECURITY;