@cerefox/memory 0.7.2 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/dist/bin/cerefox.js +1096 -344
  2. package/dist/frontend/assets/{index-BzAPcCXA.js → index-CAp2_lFX.js} +2 -2
  3. package/dist/frontend/assets/index-CAp2_lFX.js.map +1 -0
  4. package/dist/frontend/index.html +1 -1
  5. package/dist/server-assets/_shared/ef-meta/index.ts +97 -0
  6. package/dist/server-assets/_shared/embeddings/index.ts +175 -0
  7. package/dist/server-assets/_shared/mcp-tools/_chunker.ts +187 -0
  8. package/dist/server-assets/_shared/mcp-tools/_projects.ts +121 -0
  9. package/dist/server-assets/_shared/mcp-tools/_utils.ts +73 -0
  10. package/dist/server-assets/_shared/mcp-tools/audit-log.ts +95 -0
  11. package/dist/server-assets/_shared/mcp-tools/get-document.ts +73 -0
  12. package/dist/server-assets/_shared/mcp-tools/get-help-content.ts +26 -0
  13. package/dist/server-assets/_shared/mcp-tools/get-help.ts +90 -0
  14. package/dist/server-assets/_shared/mcp-tools/index.ts +67 -0
  15. package/dist/server-assets/_shared/mcp-tools/ingest.ts +315 -0
  16. package/dist/server-assets/_shared/mcp-tools/list-metadata-keys.ts +55 -0
  17. package/dist/server-assets/_shared/mcp-tools/list-projects.ts +59 -0
  18. package/dist/server-assets/_shared/mcp-tools/list-versions.ts +72 -0
  19. package/dist/server-assets/_shared/mcp-tools/metadata-search.ts +154 -0
  20. package/dist/server-assets/_shared/mcp-tools/search.ts +193 -0
  21. package/dist/server-assets/_shared/mcp-tools/set-document-projects.ts +163 -0
  22. package/dist/server-assets/_shared/mcp-tools/types.ts +92 -0
  23. package/dist/server-assets/db/migrations/0003_add_document_versions.sql +91 -0
  24. package/dist/server-assets/db/migrations/0004_add_audit_log_review_status_archived.sql +71 -0
  25. package/dist/server-assets/db/migrations/0005_metadata_search.sql +628 -0
  26. package/dist/server-assets/db/migrations/0006_usage_log.sql +255 -0
  27. package/dist/server-assets/db/migrations/0007_usage_log_requestor.sql +178 -0
  28. package/dist/server-assets/db/migrations/0008_soft_delete.sql +130 -0
  29. package/dist/server-assets/db/migrations/0009_audit_log_restore_operation.sql +20 -0
  30. package/dist/server-assets/db/migrations/0010_requestor_enforcement_config.sql +12 -0
  31. package/dist/server-assets/db/migrations/0011_title_boosting.sql +48 -0
  32. package/dist/server-assets/db/rpcs.sql +1723 -0
  33. package/dist/server-assets/db/schema.sql +380 -0
  34. package/dist/server-assets/supabase/functions/cerefox-get-audit-log/index.ts +117 -0
  35. package/dist/server-assets/supabase/functions/cerefox-get-document/index.ts +138 -0
  36. package/dist/server-assets/supabase/functions/cerefox-ingest/index.ts +819 -0
  37. package/dist/server-assets/supabase/functions/cerefox-list-projects/index.ts +96 -0
  38. package/dist/server-assets/supabase/functions/cerefox-list-versions/index.ts +113 -0
  39. package/dist/server-assets/supabase/functions/cerefox-mcp/index.ts +294 -0
  40. package/dist/server-assets/supabase/functions/cerefox-mcp/shared.ts +42 -0
  41. package/dist/server-assets/supabase/functions/cerefox-metadata/index.ts +99 -0
  42. package/dist/server-assets/supabase/functions/cerefox-metadata-search/index.ts +146 -0
  43. package/dist/server-assets/supabase/functions/cerefox-search/index.ts +382 -0
  44. package/docs/guides/connect-agents.md +58 -3
  45. package/docs/guides/migration-v0.5.md +50 -0
  46. package/package.json +3 -2
  47. package/dist/frontend/assets/index-BzAPcCXA.js.map +0 -1
@@ -5,7 +5,7 @@
5
5
  <link rel="icon" type="image/png" href="/app/cerefox_icon.png" />
6
6
  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
7
7
  <title>Cerefox</title>
8
- <script type="module" crossorigin src="/app/assets/index-BzAPcCXA.js"></script>
8
+ <script type="module" crossorigin src="/app/assets/index-CAp2_lFX.js"></script>
9
9
  <link rel="stylesheet" crossorigin href="/app/assets/index-DoDJGRih.css">
10
10
  </head>
11
11
  <body>
@@ -0,0 +1,97 @@
1
+ /**
2
+ * Shared Edge Function metadata (iter-26 Part 26B).
3
+ *
4
+ * Every Cerefox Edge Function answers `GET <ef>/version` with
5
+ * `{ name, version }` so clients can detect server↔client drift
6
+ * (see `_shared/compatibility/`). `cerefox-mcp` additionally exposes an
7
+ * aggregator at `GET cerefox-mcp/version?peers=true` that probes every
8
+ * peer EF + the Postgres schema version, so `cerefox doctor` learns the
9
+ * whole server-side version picture in one round-trip.
10
+ *
11
+ * This module is Deno-runtime safe (no `node:` imports) so it can be
12
+ * imported by the EFs as well as the Node/Bun local client. It is one of
13
+ * the `_shared` subtrees bundled into the npm package's
14
+ * `dist/server-assets/_shared/` (Part 26A) so EFs deploy with it intact.
15
+ *
16
+ * `EF_VERSION` is bumped by `scripts/cut_release.ts` when EF source
17
+ * actually changed since the previous tag (guarded — a release that
18
+ * doesn't touch `supabase/functions/` leaves it alone).
19
+ */
20
+
21
+ export const EF_VERSION = "0.8.0";
22
+
23
+ /**
24
+ * The 8 peer EFs the cerefox-mcp aggregator probes (excludes cerefox-mcp
25
+ * itself). Order is the probe order.
26
+ */
27
+ export const PEER_EF_NAMES = [
28
+ "cerefox-search",
29
+ "cerefox-ingest",
30
+ "cerefox-metadata",
31
+ "cerefox-get-document",
32
+ "cerefox-list-versions",
33
+ "cerefox-get-audit-log",
34
+ "cerefox-metadata-search",
35
+ "cerefox-list-projects",
36
+ ] as const;
37
+
38
+ export interface EfVersionPayload {
39
+ name: string;
40
+ version: string;
41
+ }
42
+
43
+ /** A peer probe result for the aggregator response. */
44
+ export interface PeerVersion {
45
+ name: string;
46
+ version: string;
47
+ }
48
+
49
+ export interface PeerError {
50
+ name: string;
51
+ error: string;
52
+ }
53
+
54
+ export interface AggregatedVersions {
55
+ /** This EF (cerefox-mcp). */
56
+ name: string;
57
+ version: string;
58
+ /** Deployed Postgres schema version, or null if the probe failed. */
59
+ schema: string | null;
60
+ /** Successfully-probed peer EFs. */
61
+ efs: PeerVersion[];
62
+ /** Peers that failed to respond (timeout, 404, network). */
63
+ errors: PeerError[];
64
+ }
65
+
66
+ /** True when a request targets an EF's `/version` path via GET. */
67
+ export function isVersionRequest(req: Request): boolean {
68
+ if (req.method !== "GET") return false;
69
+ const { pathname } = new URL(req.url);
70
+ return pathname.endsWith("/version");
71
+ }
72
+
73
+ /** True when the aggregator was requested (`?peers=true`). */
74
+ export function wantsPeers(req: Request): boolean {
75
+ return new URL(req.url).searchParams.get("peers") === "true";
76
+ }
77
+
78
+ /** Single-EF version response. */
79
+ export function versionResponse(
80
+ name: string,
81
+ headers: Record<string, string>,
82
+ ): Response {
83
+ const payload: EfVersionPayload = { name, version: EF_VERSION };
84
+ return new Response(JSON.stringify(payload), { status: 200, headers });
85
+ }
86
+
87
+ /**
88
+ * Derive a peer EF's `/version` URL from the incoming cerefox-mcp request.
89
+ * Replaces the trailing `/cerefox-mcp[/...]` path segment with
90
+ * `/<peerName>/version`, preserving origin + the functions base path.
91
+ */
92
+ export function peerVersionUrl(reqUrl: string, peerName: string): string {
93
+ const url = new URL(reqUrl);
94
+ // Strip everything from `/cerefox-mcp` onward, then append the peer path.
95
+ const base = url.pathname.replace(/\/cerefox-mcp(\/.*)?$/, "");
96
+ return `${url.origin}${base}/${peerName}/version`;
97
+ }
@@ -0,0 +1,175 @@
1
+ /**
2
+ * Shared OpenAI-compatible embedding client.
3
+ *
4
+ * Used by `_shared/mcp-tools/search.ts` (query embedding) and
5
+ * `_shared/mcp-tools/ingest.ts` (chunk embeddings). Both the Edge Function
6
+ * and the local TS MCP server use this module via `_shared/mcp-tools/`.
7
+ *
8
+ * Runtime-neutral: uses only `fetch` and `setTimeout`. No Deno- or Bun-
9
+ * specific APIs. The OpenAI API key is always passed in by the caller —
10
+ * the module never reads env vars directly.
11
+ *
12
+ * Mirrors `supabase/functions/cerefox-mcp/embeddings.ts` exactly for v0.4.0
13
+ * (extraction commit; no behaviour change). Future tweaks live here.
14
+ */
15
+
16
+ export const OPENAI_EMBEDDING_URL = "https://api.openai.com/v1/embeddings";
17
+ export const OPENAI_MODEL = "text-embedding-3-small";
18
+ export const EMBEDDING_DIMENSIONS = 768;
19
+
20
+ const EMBEDDING_MAX_RETRIES = 3;
21
+ const EMBEDDING_INITIAL_BACKOFF_MS = 500; // 500ms → 1s → 2s
22
+
23
+ /** Embed a single string. Used for the query vector in `cerefox_search`. */
24
+ export async function getEmbedding(text: string, apiKey: string): Promise<number[]> {
25
+ let lastError: Error | null = null;
26
+
27
+ for (let attempt = 0; attempt < EMBEDDING_MAX_RETRIES; attempt++) {
28
+ try {
29
+ const response = await fetch(OPENAI_EMBEDDING_URL, {
30
+ method: "POST",
31
+ headers: {
32
+ "Authorization": `Bearer ${apiKey}`,
33
+ "Content-Type": "application/json",
34
+ },
35
+ body: JSON.stringify({
36
+ model: OPENAI_MODEL,
37
+ input: text,
38
+ dimensions: EMBEDDING_DIMENSIONS,
39
+ }),
40
+ });
41
+
42
+ if (!response.ok) {
43
+ const err = await response.text();
44
+ if (response.status < 500) {
45
+ // 4xx — don't retry; throw immediately.
46
+ throw new Error(`OpenAI embedding error ${response.status}: ${err}`);
47
+ }
48
+ lastError = new Error(`OpenAI embedding error ${response.status}: ${err}`);
49
+ const backoff = EMBEDDING_INITIAL_BACKOFF_MS * Math.pow(2, attempt);
50
+ console.warn(
51
+ `Embedding API returned ${response.status} (attempt ${attempt + 1}/${EMBEDDING_MAX_RETRIES}), retrying in ${backoff}ms`,
52
+ );
53
+ await new Promise((r) => setTimeout(r, backoff));
54
+ continue;
55
+ }
56
+
57
+ const data = await response.json();
58
+ if (attempt > 0) console.info(`Embedding API succeeded on retry ${attempt}`);
59
+ return data.data[0].embedding;
60
+ } catch (err) {
61
+ if (err instanceof Error && err.message.startsWith("OpenAI embedding error")) throw err;
62
+ lastError = err instanceof Error ? err : new Error(String(err));
63
+ const backoff = EMBEDDING_INITIAL_BACKOFF_MS * Math.pow(2, attempt);
64
+ console.warn(
65
+ `Embedding API request failed: ${lastError.message} (attempt ${attempt + 1}/${EMBEDDING_MAX_RETRIES}), retrying in ${backoff}ms`,
66
+ );
67
+ await new Promise((r) => setTimeout(r, backoff));
68
+ }
69
+ }
70
+
71
+ throw lastError ?? new Error(`Embedding API failed after ${EMBEDDING_MAX_RETRIES} attempts`);
72
+ }
73
+
74
+ /**
75
+ * Per-API-call batch limit. Mirrors Python's `CloudEmbedder.BATCH_SIZE`
76
+ * (in `src/cerefox/embeddings/cloud.py`). OpenAI's `/v1/embeddings`
77
+ * accepts up to 2048 inputs per request, but 96 is the Python contract
78
+ * and matches what the existing corpus was embedded with.
79
+ *
80
+ * v0.7 (iter-25 / Part 25B) introduces this constant to TS — the v0.4
81
+ * `embedBatch` had no batching and would blow the API limit on bulk
82
+ * ingest of large documents.
83
+ */
84
+ export const EMBEDDING_BATCH_SIZE = 96;
85
+
86
+ /**
87
+ * Single API call to OpenAI's embeddings endpoint. Caller is responsible
88
+ * for staying within the API's per-request limit; in practice, use
89
+ * `embedBatch` which chunks calls at `EMBEDDING_BATCH_SIZE`.
90
+ */
91
+ async function embedBatchSingleCall(
92
+ texts: string[],
93
+ apiKey: string,
94
+ ): Promise<number[][]> {
95
+ let lastError: Error | null = null;
96
+
97
+ for (let attempt = 0; attempt < EMBEDDING_MAX_RETRIES; attempt++) {
98
+ try {
99
+ const response = await fetch(OPENAI_EMBEDDING_URL, {
100
+ method: "POST",
101
+ headers: {
102
+ "Authorization": `Bearer ${apiKey}`,
103
+ "Content-Type": "application/json",
104
+ },
105
+ body: JSON.stringify({
106
+ model: OPENAI_MODEL,
107
+ input: texts,
108
+ dimensions: EMBEDDING_DIMENSIONS,
109
+ }),
110
+ });
111
+
112
+ if (!response.ok) {
113
+ const err = await response.text();
114
+ if (response.status < 500) {
115
+ throw new Error(`OpenAI embedding error ${response.status}: ${err}`);
116
+ }
117
+ lastError = new Error(`OpenAI embedding error ${response.status}: ${err}`);
118
+ const backoff = EMBEDDING_INITIAL_BACKOFF_MS * Math.pow(2, attempt);
119
+ console.warn(
120
+ `Embedding API returned ${response.status} (attempt ${attempt + 1}/${EMBEDDING_MAX_RETRIES}), retrying in ${backoff}ms`,
121
+ );
122
+ await new Promise((r) => setTimeout(r, backoff));
123
+ continue;
124
+ }
125
+
126
+ const data = await response.json();
127
+ if (attempt > 0) console.info(`Embedding API succeeded on retry ${attempt}`);
128
+ const sorted = data.data.sort(
129
+ (a: { index: number }, b: { index: number }) => a.index - b.index,
130
+ );
131
+ return sorted.map((d: { embedding: number[] }) => d.embedding);
132
+ } catch (err) {
133
+ if (err instanceof Error && err.message.startsWith("OpenAI embedding error")) throw err;
134
+ lastError = err instanceof Error ? err : new Error(String(err));
135
+ const backoff = EMBEDDING_INITIAL_BACKOFF_MS * Math.pow(2, attempt);
136
+ console.warn(
137
+ `Embedding API request failed: ${lastError.message} (attempt ${attempt + 1}/${EMBEDDING_MAX_RETRIES}), retrying in ${backoff}ms`,
138
+ );
139
+ await new Promise((r) => setTimeout(r, backoff));
140
+ }
141
+ }
142
+
143
+ throw lastError ?? new Error(`Embedding API failed after ${EMBEDDING_MAX_RETRIES} attempts`);
144
+ }
145
+
146
+ /**
147
+ * Embed multiple strings, chunked into per-API-call batches of
148
+ * `batchSize` (default 96). Used by the v0.7 ingestion pipeline + the
149
+ * MCP-tools ingest handler.
150
+ *
151
+ * Results are returned in input order (each per-call response is sorted
152
+ * by `index` and the results concatenated in input order).
153
+ *
154
+ * Pre-v0.7 callers that used the old single-call `embedBatch` (no
155
+ * batching) continue to work — the signature is backward-compatible.
156
+ * The new `batchSize` param is opt-in; default 96 matches Python.
157
+ */
158
+ export async function embedBatch(
159
+ texts: string[],
160
+ apiKey: string,
161
+ batchSize: number = EMBEDDING_BATCH_SIZE,
162
+ ): Promise<number[][]> {
163
+ if (texts.length === 0) return [];
164
+ if (texts.length <= batchSize) {
165
+ return embedBatchSingleCall(texts, apiKey);
166
+ }
167
+
168
+ const out: number[][] = [];
169
+ for (let start = 0; start < texts.length; start += batchSize) {
170
+ const slice = texts.slice(start, start + batchSize);
171
+ const vectors = await embedBatchSingleCall(slice, apiKey);
172
+ for (const v of vectors) out.push(v);
173
+ }
174
+ return out;
175
+ }
@@ -0,0 +1,187 @@
1
+ /**
2
+ * Heading-aware markdown chunker.
3
+ *
4
+ * Mirrors:
5
+ * - `src/cerefox/chunking/markdown.py` (Python pipeline)
6
+ * - `supabase/functions/cerefox-ingest/index.ts` (standalone ingest EF)
7
+ *
8
+ * Greedy section accumulation: H1/H2/H3 sections are joined into a buffer
9
+ * until adding the next would exceed `MAX_CHUNK_CHARS`. Oversized sections
10
+ * are paragraph-split. Short documents collapse to a single chunk.
11
+ *
12
+ * The hash of the chunked output (via `_hash.ts:sha256hex(normalizeContent(...))`)
13
+ * must match the Python pipeline byte-for-byte so dedup works across access
14
+ * paths. Don't change chunk boundaries without updating both.
15
+ */
16
+
17
+ export const MAX_CHUNK_CHARS = 4000;
18
+
19
+ interface Section {
20
+ level: number;
21
+ headings: string[];
22
+ heading: string;
23
+ content: string;
24
+ body: string;
25
+ }
26
+
27
+ export interface Chunk {
28
+ heading_path: string[];
29
+ heading_level: number;
30
+ title: string;
31
+ content: string;
32
+ char_count: number;
33
+ }
34
+
35
+ function parseSections(text: string): Section[] {
36
+ const lines = text.split("\n");
37
+ const sections: Section[] = [];
38
+ let currentHeadings: string[] = [];
39
+ let currentLevel = 0;
40
+ let bodyLines: string[] = [];
41
+
42
+ function collectSection() {
43
+ const body = bodyLines.join("\n").trim();
44
+ bodyLines = [];
45
+ let content: string;
46
+ if (currentLevel > 0) {
47
+ const headerLine = "#".repeat(currentLevel) + " " +
48
+ (currentHeadings[currentHeadings.length - 1] ?? "");
49
+ content = body ? headerLine + "\n\n" + body : headerLine;
50
+ } else {
51
+ content = body;
52
+ }
53
+ if (!content.trim()) return;
54
+ sections.push({
55
+ level: currentLevel,
56
+ headings: [...currentHeadings],
57
+ heading: currentHeadings[currentHeadings.length - 1] ?? "",
58
+ content,
59
+ body,
60
+ });
61
+ }
62
+
63
+ for (const line of lines) {
64
+ const h1 = line.match(/^# (.+)/);
65
+ const h2 = line.match(/^## (.+)/);
66
+ const h3 = line.match(/^### (.+)/);
67
+
68
+ if (h1) {
69
+ collectSection();
70
+ currentHeadings = [h1[1].trim()];
71
+ currentLevel = 1;
72
+ } else if (h2) {
73
+ collectSection();
74
+ currentHeadings = [currentHeadings[0] ?? "", h2[1].trim()].filter(Boolean);
75
+ currentLevel = 2;
76
+ } else if (h3) {
77
+ collectSection();
78
+ currentHeadings = [
79
+ currentHeadings[0] ?? "",
80
+ currentHeadings[1] ?? "",
81
+ h3[1].trim(),
82
+ ].filter(Boolean);
83
+ currentLevel = 3;
84
+ } else {
85
+ bodyLines.push(line);
86
+ }
87
+ }
88
+ collectSection();
89
+ return sections;
90
+ }
91
+
92
+ function makeChunk(headings: string[], level: number, content: string): Chunk {
93
+ const title = headings[headings.length - 1] ?? "";
94
+ return {
95
+ heading_path: [...headings],
96
+ heading_level: level,
97
+ title,
98
+ content,
99
+ char_count: content.length,
100
+ };
101
+ }
102
+
103
+ export function chunkMarkdown(text: string): Chunk[] {
104
+ const trimmed = text.trim();
105
+ if (!trimmed) return [];
106
+
107
+ if (trimmed.length <= MAX_CHUNK_CHARS) {
108
+ return [makeChunk([], 0, trimmed)];
109
+ }
110
+
111
+ const sections = parseSections(trimmed);
112
+ const chunks: Chunk[] = [];
113
+
114
+ let bufParts: string[] = [];
115
+ let bufHeadings: string[] = [];
116
+ let bufLevel = 0;
117
+ let bufChars = 0;
118
+
119
+ function flushBuf() {
120
+ if (bufParts.length === 0) return;
121
+ chunks.push(makeChunk(bufHeadings, bufLevel, bufParts.join("\n\n")));
122
+ bufParts = [];
123
+ bufHeadings = [];
124
+ bufLevel = 0;
125
+ bufChars = 0;
126
+ }
127
+
128
+ for (const section of sections) {
129
+ const { level, headings, heading, content, body } = section;
130
+
131
+ if (content.length > MAX_CHUNK_CHARS) {
132
+ flushBuf();
133
+ const headerPrefix = level > 0 ? "#".repeat(level) + " " + heading + "\n\n" : "";
134
+ const bodyToSplit = body || content;
135
+ const paragraphs = bodyToSplit.split(/\n\n+/);
136
+ let sub = "";
137
+ let isFirst = true;
138
+ for (const para of paragraphs) {
139
+ const prefix = isFirst ? headerPrefix : "";
140
+ if (sub.length + prefix.length + para.length + 2 > MAX_CHUNK_CHARS && sub.length > 0) {
141
+ chunks.push(makeChunk(headings, level, sub.trim()));
142
+ sub = para;
143
+ isFirst = false;
144
+ } else {
145
+ sub = sub ? sub + "\n\n" + para : prefix + para;
146
+ isFirst = false;
147
+ }
148
+ }
149
+ if (sub.trim()) chunks.push(makeChunk(headings, level, sub.trim()));
150
+ continue;
151
+ }
152
+
153
+ const addition = content.length + (bufParts.length > 0 ? 2 : 0);
154
+
155
+ if (bufChars + addition <= MAX_CHUNK_CHARS) {
156
+ if (bufParts.length === 0) {
157
+ bufHeadings = headings;
158
+ bufLevel = level;
159
+ }
160
+ bufParts.push(content);
161
+ bufChars += addition;
162
+ } else {
163
+ flushBuf();
164
+ bufParts = [content];
165
+ bufHeadings = headings;
166
+ bufLevel = level;
167
+ bufChars = content.length;
168
+ }
169
+ }
170
+
171
+ flushBuf();
172
+ return chunks;
173
+ }
174
+
175
+ /** Content-hash normalization. Must match `pipeline.py::_normalize`
176
+ * byte-for-byte so cross-runtime dedup works. */
177
+ export function normalizeContent(text: string): string {
178
+ return text.trim().replace(/\r\n/g, "\n").replace(/\r/g, "\n").replace(/\n{3,}/g, "\n\n");
179
+ }
180
+
181
+ export async function sha256hex(text: string): Promise<string> {
182
+ const bytes = new TextEncoder().encode(text);
183
+ const hash = await crypto.subtle.digest("SHA-256", bytes);
184
+ return Array.from(new Uint8Array(hash))
185
+ .map((b) => b.toString(16).padStart(2, "0"))
186
+ .join("");
187
+ }
@@ -0,0 +1,121 @@
1
+ /**
2
+ * Project-membership helpers shared by the `ingest` and
3
+ * `set-document-projects` tools.
4
+ *
5
+ * Two semantics are needed by callers:
6
+ *
7
+ * - **Non-destructive add** (`ensureDocumentInProject`): used when an ingest
8
+ * call supplies a singular `project_name`. Resolves (or creates) the
9
+ * project, then ensures the `(document, project)` row exists. Idempotent;
10
+ * does NOT remove any existing memberships. Per issue #38: the v0.1.20
11
+ * fix that stopped agent updates from silently wiping operator-curated
12
+ * memberships.
13
+ *
14
+ * - **Destructive replace** (`setDocumentProjectsByName`): used when a call
15
+ * supplies an explicit `project_names` list (or via the dedicated
16
+ * `cerefox_set_document_projects` tool). DELETE-then-INSERT replaces the
17
+ * document's memberships with exactly the given set.
18
+ *
19
+ * Both call sites need consistent name resolution (case-insensitive
20
+ * `ilike` match against `cerefox_projects.name`); centralising here
21
+ * prevents drift.
22
+ */
23
+
24
+ import type { MCPSupabaseClient } from "./types.ts";
25
+
26
+ /** Ensure `(documentId, project)` exists. Resolves project by name
27
+ * (case-insensitive); creates the project if missing. Idempotent.
28
+ * Returns the resolved project_id, or `null` if creation failed. */
29
+ export async function ensureDocumentInProject(
30
+ supabase: MCPSupabaseClient,
31
+ documentId: string,
32
+ projectName: string,
33
+ ): Promise<string | null> {
34
+ let projectId: string | null = null;
35
+ const { data: proj } = await supabase
36
+ .from("cerefox_projects")
37
+ .select("id")
38
+ .ilike("name", projectName)
39
+ .limit(1);
40
+ if (proj?.length) {
41
+ projectId = proj[0].id;
42
+ } else {
43
+ const { data: newProj } = await supabase
44
+ .from("cerefox_projects")
45
+ .insert({ name: projectName })
46
+ .select("id");
47
+ projectId = newProj?.[0]?.id ?? null;
48
+ }
49
+ if (!projectId) return null;
50
+
51
+ const { data: existing } = await supabase
52
+ .from("cerefox_document_projects")
53
+ .select("document_id")
54
+ .eq("document_id", documentId)
55
+ .eq("project_id", projectId)
56
+ .limit(1);
57
+ if (existing?.length) return projectId;
58
+
59
+ const { error: insertErr } = await supabase
60
+ .from("cerefox_document_projects")
61
+ .insert({ document_id: documentId, project_id: projectId });
62
+ if (insertErr && !String(insertErr.message ?? "").includes("duplicate key")) {
63
+ console.warn("ensureDocumentInProject: insert failed", insertErr);
64
+ }
65
+ return projectId;
66
+ }
67
+
68
+ /** DELETE-then-INSERT replacement of a document's project memberships.
69
+ * Resolves each name → project_id (creating if absent); preserves order.
70
+ * Empty `projectNames` clears all memberships. Returns the resolved
71
+ * project_ids in input order. */
72
+ export async function setDocumentProjectsByName(
73
+ supabase: MCPSupabaseClient,
74
+ documentId: string,
75
+ projectNames: string[],
76
+ ): Promise<string[]> {
77
+ const projectIds: string[] = [];
78
+ for (const name of projectNames) {
79
+ if (!name) continue;
80
+ const { data: proj } = await supabase
81
+ .from("cerefox_projects")
82
+ .select("id")
83
+ .ilike("name", name)
84
+ .limit(1);
85
+ if (proj?.length) {
86
+ projectIds.push(proj[0].id);
87
+ } else {
88
+ const { data: newProj } = await supabase
89
+ .from("cerefox_projects")
90
+ .insert({ name })
91
+ .select("id");
92
+ if (newProj?.[0]?.id) projectIds.push(newProj[0].id);
93
+ }
94
+ }
95
+
96
+ await supabase
97
+ .from("cerefox_document_projects")
98
+ .delete()
99
+ .eq("document_id", documentId);
100
+ if (projectIds.length > 0) {
101
+ const rows = projectIds.map((pid) => ({ document_id: documentId, project_id: pid }));
102
+ await supabase.from("cerefox_document_projects").insert(rows);
103
+ }
104
+ return projectIds;
105
+ }
106
+
107
+ /** Resolve a project name → project_id (case-insensitive), or `null` if
108
+ * not found. Does NOT create. Used by search / metadata-search to translate
109
+ * `project_name` parameters to UUIDs. */
110
+ export async function lookupProjectId(
111
+ supabase: MCPSupabaseClient,
112
+ projectName: string,
113
+ ): Promise<string | null> {
114
+ const { data, error } = await supabase
115
+ .from("cerefox_projects")
116
+ .select("id")
117
+ .ilike("name", projectName)
118
+ .limit(1);
119
+ if (error || !data?.length) return null;
120
+ return data[0].id;
121
+ }
@@ -0,0 +1,73 @@
1
+ /**
2
+ * Internal helpers shared by `_shared/mcp-tools/` handlers.
3
+ *
4
+ * - `applyByteBudget`: drop whole rows until the cumulative serialized size
5
+ * fits within the budget. Used by `search` and `metadata-search`.
6
+ * - `logUsage`: fire-and-forget write to `cerefox_usage_log` via RPC.
7
+ * Never blocks the tool response.
8
+ *
9
+ * Both helpers are mirrored from `supabase/functions/cerefox-mcp/shared.ts`
10
+ * for the v0.4.0 extraction — once `_shared/mcp-tools/` is the source of
11
+ * truth (after 22D refactors the EF to import from here), the EF's `shared.ts`
12
+ * removes its copies.
13
+ */
14
+
15
+ import type { MCPSupabaseClient } from "./types.ts";
16
+
17
+ /** Server-enforced response-size ceiling for MCP results. Agents can request
18
+ * smaller budgets via `max_bytes`; values above this are capped. */
19
+ export const MAX_RESPONSE_BYTES = 200_000;
20
+
21
+ export function applyByteBudget(
22
+ rows: unknown[],
23
+ maxBytes: number,
24
+ ): { accepted: unknown[]; truncated: boolean; usedBytes: number } {
25
+ const accepted: unknown[] = [];
26
+ let usedBytes = 0;
27
+ let truncated = false;
28
+
29
+ for (const row of rows) {
30
+ const rowBytes = new TextEncoder().encode(JSON.stringify(row)).length;
31
+ if (usedBytes + rowBytes > maxBytes) {
32
+ truncated = true;
33
+ break;
34
+ }
35
+ accepted.push(row);
36
+ usedBytes += rowBytes;
37
+ }
38
+
39
+ return { accepted, truncated, usedBytes };
40
+ }
41
+
42
+ import type { AccessPath } from "./types.ts";
43
+
44
+ export interface LogUsageParams {
45
+ operation: string;
46
+ accessPath: AccessPath;
47
+ query_text?: string | null;
48
+ document_id?: string | null;
49
+ project_id?: string | null;
50
+ result_count?: number | null;
51
+ requestor?: string | null;
52
+ extra?: Record<string, unknown>;
53
+ }
54
+
55
+ /** Fire-and-forget usage logging. Never throws, never blocks the response.
56
+ * Failures are silently swallowed — usage logging is best-effort by design.
57
+ * Differs from the EF's `logUsage` only in that `accessPath` is a required
58
+ * parameter (was hardcoded to `"remote-mcp"` in the EF) so the local TS
59
+ * MCP server can pass `"local-mcp"` for the same call site. */
60
+ export function logUsage(supabase: MCPSupabaseClient, params: LogUsageParams): void {
61
+ Promise.resolve(
62
+ supabase.rpc("cerefox_log_usage", {
63
+ p_operation: params.operation,
64
+ p_access_path: params.accessPath,
65
+ p_requestor: params.requestor ?? "mcp-agent",
66
+ p_document_id: params.document_id ?? null,
67
+ p_project_id: params.project_id ?? null,
68
+ p_query_text: params.query_text ?? null,
69
+ p_result_count: params.result_count ?? null,
70
+ p_extra: params.extra ?? {},
71
+ }),
72
+ ).catch(() => {});
73
+ }