@cerefox/memory 0.7.1 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/README.md +62 -25
  2. package/dist/bin/cerefox.js +1163 -344
  3. package/dist/frontend/assets/{index-HNlMcvli.js → index-CAp2_lFX.js} +2 -2
  4. package/dist/frontend/assets/index-CAp2_lFX.js.map +1 -0
  5. package/dist/frontend/index.html +1 -1
  6. package/dist/server-assets/_shared/ef-meta/index.ts +97 -0
  7. package/dist/server-assets/_shared/embeddings/index.ts +175 -0
  8. package/dist/server-assets/_shared/mcp-tools/_chunker.ts +187 -0
  9. package/dist/server-assets/_shared/mcp-tools/_projects.ts +121 -0
  10. package/dist/server-assets/_shared/mcp-tools/_utils.ts +73 -0
  11. package/dist/server-assets/_shared/mcp-tools/audit-log.ts +95 -0
  12. package/dist/server-assets/_shared/mcp-tools/get-document.ts +73 -0
  13. package/dist/server-assets/_shared/mcp-tools/get-help-content.ts +26 -0
  14. package/dist/server-assets/_shared/mcp-tools/get-help.ts +90 -0
  15. package/dist/server-assets/_shared/mcp-tools/index.ts +67 -0
  16. package/dist/server-assets/_shared/mcp-tools/ingest.ts +315 -0
  17. package/dist/server-assets/_shared/mcp-tools/list-metadata-keys.ts +55 -0
  18. package/dist/server-assets/_shared/mcp-tools/list-projects.ts +59 -0
  19. package/dist/server-assets/_shared/mcp-tools/list-versions.ts +72 -0
  20. package/dist/server-assets/_shared/mcp-tools/metadata-search.ts +154 -0
  21. package/dist/server-assets/_shared/mcp-tools/search.ts +193 -0
  22. package/dist/server-assets/_shared/mcp-tools/set-document-projects.ts +163 -0
  23. package/dist/server-assets/_shared/mcp-tools/types.ts +92 -0
  24. package/dist/server-assets/db/migrations/0003_add_document_versions.sql +91 -0
  25. package/dist/server-assets/db/migrations/0004_add_audit_log_review_status_archived.sql +71 -0
  26. package/dist/server-assets/db/migrations/0005_metadata_search.sql +628 -0
  27. package/dist/server-assets/db/migrations/0006_usage_log.sql +255 -0
  28. package/dist/server-assets/db/migrations/0007_usage_log_requestor.sql +178 -0
  29. package/dist/server-assets/db/migrations/0008_soft_delete.sql +130 -0
  30. package/dist/server-assets/db/migrations/0009_audit_log_restore_operation.sql +20 -0
  31. package/dist/server-assets/db/migrations/0010_requestor_enforcement_config.sql +12 -0
  32. package/dist/server-assets/db/migrations/0011_title_boosting.sql +48 -0
  33. package/dist/server-assets/db/rpcs.sql +1723 -0
  34. package/dist/server-assets/db/schema.sql +380 -0
  35. package/dist/server-assets/supabase/functions/cerefox-get-audit-log/index.ts +117 -0
  36. package/dist/server-assets/supabase/functions/cerefox-get-document/index.ts +138 -0
  37. package/dist/server-assets/supabase/functions/cerefox-ingest/index.ts +819 -0
  38. package/dist/server-assets/supabase/functions/cerefox-list-projects/index.ts +96 -0
  39. package/dist/server-assets/supabase/functions/cerefox-list-versions/index.ts +113 -0
  40. package/dist/server-assets/supabase/functions/cerefox-mcp/index.ts +294 -0
  41. package/dist/server-assets/supabase/functions/cerefox-mcp/shared.ts +42 -0
  42. package/dist/server-assets/supabase/functions/cerefox-metadata/index.ts +99 -0
  43. package/dist/server-assets/supabase/functions/cerefox-metadata-search/index.ts +146 -0
  44. package/dist/server-assets/supabase/functions/cerefox-search/index.ts +382 -0
  45. package/docs/guides/connect-agents.md +58 -3
  46. package/docs/guides/migration-v0.5.md +50 -0
  47. package/package.json +3 -2
  48. package/dist/frontend/assets/index-HNlMcvli.js.map +0 -1
@@ -0,0 +1,628 @@
1
+ -- Migration 0005: Metadata search + project name standardisation (Iteration 16B)
2
+ --
3
+ -- Changes:
4
+ -- 1. Add project_names TEXT[] to all chunk-level search RPCs (hybrid, fts, semantic)
5
+ -- 2. Add project_names TEXT[] to document-level RPCs (search_docs, reconstruct_doc, get_document)
6
+ -- 3. New cerefox_list_projects() RPC
7
+ -- 4. New cerefox_metadata_search() RPC
8
+ --
9
+ -- All DROP + CREATE pairs are required because RETURNS TABLE signature changes
10
+ -- cannot be applied via CREATE OR REPLACE.
11
+
12
+ -- ── 1. Drop existing signatures before recreating with project_names ─────────
13
+
14
+ -- Chunk-level RPCs
15
+ DROP FUNCTION IF EXISTS cerefox_hybrid_search(TEXT, VECTOR(768), INT, FLOAT, BOOLEAN, UUID, FLOAT, JSONB);
16
+ DROP FUNCTION IF EXISTS cerefox_fts_search(TEXT, INT, UUID, JSONB);
17
+ DROP FUNCTION IF EXISTS cerefox_semantic_search(VECTOR(768), INT, BOOLEAN, UUID, FLOAT, JSONB);
18
+
19
+ -- Document-level RPCs
20
+ DROP FUNCTION IF EXISTS cerefox_search_docs(TEXT, VECTOR(768), INT, FLOAT, UUID, FLOAT, INT, INT, JSONB);
21
+ DROP FUNCTION IF EXISTS cerefox_reconstruct_doc(UUID);
22
+ DROP FUNCTION IF EXISTS cerefox_get_document(UUID, UUID);
23
+
24
+ -- ── 2. Recreate chunk-level RPCs with project_names TEXT[] ───────────────────
25
+
26
+ CREATE OR REPLACE FUNCTION cerefox_hybrid_search(
27
+ p_query_text TEXT,
28
+ p_query_embedding VECTOR(768),
29
+ p_match_count INT DEFAULT 10,
30
+ p_alpha FLOAT DEFAULT 0.7,
31
+ p_use_upgrade BOOLEAN DEFAULT FALSE,
32
+ p_project_id UUID DEFAULT NULL,
33
+ p_min_score FLOAT DEFAULT 0.0,
34
+ p_metadata_filter JSONB DEFAULT NULL
35
+ )
36
+ RETURNS TABLE (
37
+ chunk_id UUID,
38
+ document_id UUID,
39
+ chunk_index INT,
40
+ title TEXT,
41
+ content TEXT,
42
+ heading_path TEXT[],
43
+ heading_level INT,
44
+ score FLOAT,
45
+ doc_title TEXT,
46
+ doc_source TEXT,
47
+ doc_project_ids UUID[],
48
+ doc_project_names TEXT[],
49
+ doc_metadata JSONB,
50
+ version_count INT
51
+ )
52
+ LANGUAGE plpgsql
53
+ SECURITY DEFINER
54
+ SET search_path = public, pg_catalog
55
+ AS $$
56
+ DECLARE
57
+ query_fts tsquery := websearch_to_tsquery('english', p_query_text);
58
+ candidate_count INT := p_match_count * 5;
59
+ BEGIN
60
+ RETURN QUERY
61
+ WITH
62
+ fts_results AS (
63
+ SELECT
64
+ c.id,
65
+ ts_rank_cd(c.fts, query_fts)::FLOAT AS fts_score
66
+ FROM cerefox_chunks c
67
+ JOIN cerefox_documents d ON c.document_id = d.id
68
+ WHERE c.version_id IS NULL
69
+ AND c.fts @@ query_fts
70
+ AND (p_project_id IS NULL OR EXISTS (
71
+ SELECT 1 FROM cerefox_document_projects dp
72
+ WHERE dp.document_id = d.id AND dp.project_id = p_project_id
73
+ ))
74
+ AND (p_metadata_filter IS NULL OR d.metadata @> p_metadata_filter)
75
+ ORDER BY fts_score DESC
76
+ LIMIT candidate_count
77
+ ),
78
+ vec_results AS (
79
+ SELECT
80
+ c.id,
81
+ CASE
82
+ WHEN p_use_upgrade AND c.embedding_upgrade IS NOT NULL
83
+ THEN (1.0 - (c.embedding_upgrade <=> p_query_embedding))::FLOAT
84
+ ELSE
85
+ (1.0 - (c.embedding_primary <=> p_query_embedding))::FLOAT
86
+ END AS vec_score
87
+ FROM cerefox_chunks c
88
+ JOIN cerefox_documents d ON c.document_id = d.id
89
+ WHERE c.version_id IS NULL
90
+ AND (p_project_id IS NULL OR EXISTS (
91
+ SELECT 1 FROM cerefox_document_projects dp
92
+ WHERE dp.document_id = d.id AND dp.project_id = p_project_id
93
+ ))
94
+ AND (p_metadata_filter IS NULL OR d.metadata @> p_metadata_filter)
95
+ ORDER BY
96
+ CASE
97
+ WHEN p_use_upgrade AND c.embedding_upgrade IS NOT NULL
98
+ THEN c.embedding_upgrade <=> p_query_embedding
99
+ ELSE c.embedding_primary <=> p_query_embedding
100
+ END
101
+ LIMIT candidate_count
102
+ ),
103
+ combined AS (
104
+ SELECT
105
+ COALESCE(f.id, v.id) AS id,
106
+ ( p_alpha * COALESCE(v.vec_score, 0.0) +
107
+ (1.0 - p_alpha) * COALESCE(f.fts_score, 0.0)
108
+ ) AS score,
109
+ COALESCE(v.vec_score, 0.0) AS vec_score,
110
+ f.id IS NOT NULL AS has_fts_match
111
+ FROM fts_results f
112
+ FULL OUTER JOIN vec_results v ON f.id = v.id
113
+ )
114
+ SELECT
115
+ c.id AS chunk_id,
116
+ c.document_id,
117
+ c.chunk_index,
118
+ c.title,
119
+ c.content,
120
+ c.heading_path,
121
+ c.heading_level,
122
+ cm.score,
123
+ d.title AS doc_title,
124
+ d.source AS doc_source,
125
+ ARRAY(SELECT dp.project_id FROM cerefox_document_projects dp
126
+ WHERE dp.document_id = d.id) AS doc_project_ids,
127
+ ARRAY(SELECT p.name FROM cerefox_projects p
128
+ JOIN cerefox_document_projects dp ON p.id = dp.project_id
129
+ WHERE dp.document_id = d.id) AS doc_project_names,
130
+ d.metadata AS doc_metadata,
131
+ (SELECT COUNT(*)::INT FROM cerefox_document_versions dv
132
+ WHERE dv.document_id = d.id) AS version_count
133
+ FROM combined cm
134
+ JOIN cerefox_chunks c ON c.id = cm.id
135
+ JOIN cerefox_documents d ON c.document_id = d.id
136
+ WHERE cm.has_fts_match OR cm.vec_score >= p_min_score
137
+ ORDER BY cm.score DESC
138
+ LIMIT p_match_count;
139
+ END;
140
+ $$;
141
+
142
+
143
+ CREATE OR REPLACE FUNCTION cerefox_fts_search(
144
+ p_query_text TEXT,
145
+ p_match_count INT DEFAULT 10,
146
+ p_project_id UUID DEFAULT NULL,
147
+ p_metadata_filter JSONB DEFAULT NULL
148
+ )
149
+ RETURNS TABLE (
150
+ chunk_id UUID,
151
+ document_id UUID,
152
+ chunk_index INT,
153
+ title TEXT,
154
+ content TEXT,
155
+ heading_path TEXT[],
156
+ heading_level INT,
157
+ score FLOAT,
158
+ doc_title TEXT,
159
+ doc_source TEXT,
160
+ doc_project_ids UUID[],
161
+ doc_project_names TEXT[],
162
+ doc_metadata JSONB,
163
+ version_count INT
164
+ )
165
+ LANGUAGE plpgsql
166
+ SECURITY DEFINER
167
+ SET search_path = public, pg_catalog
168
+ AS $$
169
+ DECLARE
170
+ query_fts tsquery := websearch_to_tsquery('english', p_query_text);
171
+ BEGIN
172
+ RETURN QUERY
173
+ SELECT
174
+ c.id AS chunk_id,
175
+ c.document_id,
176
+ c.chunk_index,
177
+ c.title,
178
+ c.content,
179
+ c.heading_path,
180
+ c.heading_level,
181
+ ts_rank_cd(c.fts, query_fts)::FLOAT AS score,
182
+ d.title AS doc_title,
183
+ d.source AS doc_source,
184
+ ARRAY(SELECT dp.project_id FROM cerefox_document_projects dp
185
+ WHERE dp.document_id = d.id) AS doc_project_ids,
186
+ ARRAY(SELECT p.name FROM cerefox_projects p
187
+ JOIN cerefox_document_projects dp ON p.id = dp.project_id
188
+ WHERE dp.document_id = d.id) AS doc_project_names,
189
+ d.metadata AS doc_metadata,
190
+ (SELECT COUNT(*)::INT FROM cerefox_document_versions dv
191
+ WHERE dv.document_id = d.id) AS version_count
192
+ FROM cerefox_chunks c
193
+ JOIN cerefox_documents d ON c.document_id = d.id
194
+ WHERE c.version_id IS NULL
195
+ AND c.fts @@ query_fts
196
+ AND (p_project_id IS NULL OR EXISTS (
197
+ SELECT 1 FROM cerefox_document_projects dp
198
+ WHERE dp.document_id = d.id AND dp.project_id = p_project_id
199
+ ))
200
+ AND (p_metadata_filter IS NULL OR d.metadata @> p_metadata_filter)
201
+ ORDER BY score DESC
202
+ LIMIT p_match_count;
203
+ END;
204
+ $$;
205
+
206
+
207
+ CREATE OR REPLACE FUNCTION cerefox_semantic_search(
208
+ p_query_embedding VECTOR(768),
209
+ p_match_count INT DEFAULT 10,
210
+ p_use_upgrade BOOLEAN DEFAULT FALSE,
211
+ p_project_id UUID DEFAULT NULL,
212
+ p_min_score FLOAT DEFAULT 0.0,
213
+ p_metadata_filter JSONB DEFAULT NULL
214
+ )
215
+ RETURNS TABLE (
216
+ chunk_id UUID,
217
+ document_id UUID,
218
+ chunk_index INT,
219
+ title TEXT,
220
+ content TEXT,
221
+ heading_path TEXT[],
222
+ heading_level INT,
223
+ score FLOAT,
224
+ doc_title TEXT,
225
+ doc_source TEXT,
226
+ doc_project_ids UUID[],
227
+ doc_project_names TEXT[],
228
+ doc_metadata JSONB,
229
+ version_count INT
230
+ )
231
+ LANGUAGE plpgsql
232
+ SECURITY DEFINER
233
+ SET search_path = public, pg_catalog
234
+ AS $$
235
+ BEGIN
236
+ RETURN QUERY
237
+ SELECT
238
+ c.id AS chunk_id,
239
+ c.document_id,
240
+ c.chunk_index,
241
+ c.title,
242
+ c.content,
243
+ c.heading_path,
244
+ c.heading_level,
245
+ CASE
246
+ WHEN p_use_upgrade AND c.embedding_upgrade IS NOT NULL
247
+ THEN (1.0 - (c.embedding_upgrade <=> p_query_embedding))::FLOAT
248
+ ELSE
249
+ (1.0 - (c.embedding_primary <=> p_query_embedding))::FLOAT
250
+ END AS score,
251
+ d.title AS doc_title,
252
+ d.source AS doc_source,
253
+ ARRAY(SELECT dp.project_id FROM cerefox_document_projects dp
254
+ WHERE dp.document_id = d.id) AS doc_project_ids,
255
+ ARRAY(SELECT p.name FROM cerefox_projects p
256
+ JOIN cerefox_document_projects dp ON p.id = dp.project_id
257
+ WHERE dp.document_id = d.id) AS doc_project_names,
258
+ d.metadata AS doc_metadata,
259
+ (SELECT COUNT(*)::INT FROM cerefox_document_versions dv
260
+ WHERE dv.document_id = d.id) AS version_count
261
+ FROM cerefox_chunks c
262
+ JOIN cerefox_documents d ON c.document_id = d.id
263
+ WHERE c.version_id IS NULL
264
+ AND (p_project_id IS NULL OR EXISTS (
265
+ SELECT 1 FROM cerefox_document_projects dp
266
+ WHERE dp.document_id = d.id AND dp.project_id = p_project_id
267
+ ))
268
+ AND (p_metadata_filter IS NULL OR d.metadata @> p_metadata_filter)
269
+ AND (p_use_upgrade = FALSE OR c.embedding_upgrade IS NOT NULL)
270
+ AND CASE
271
+ WHEN p_use_upgrade AND c.embedding_upgrade IS NOT NULL
272
+ THEN (1.0 - (c.embedding_upgrade <=> p_query_embedding))::FLOAT
273
+ ELSE (1.0 - (c.embedding_primary <=> p_query_embedding))::FLOAT
274
+ END >= p_min_score
275
+ ORDER BY
276
+ CASE
277
+ WHEN p_use_upgrade AND c.embedding_upgrade IS NOT NULL
278
+ THEN c.embedding_upgrade <=> p_query_embedding
279
+ ELSE c.embedding_primary <=> p_query_embedding
280
+ END
281
+ LIMIT p_match_count;
282
+ END;
283
+ $$;
284
+
285
+ -- ── 3. Recreate document-level RPCs with project_names TEXT[] ────────────────
286
+
287
+ CREATE OR REPLACE FUNCTION cerefox_search_docs(
288
+ p_query_text TEXT,
289
+ p_query_embedding VECTOR(768),
290
+ p_match_count INT DEFAULT 5,
291
+ p_alpha FLOAT DEFAULT 0.7,
292
+ p_project_id UUID DEFAULT NULL,
293
+ p_min_score FLOAT DEFAULT 0.0,
294
+ p_small_to_big_threshold INT DEFAULT 20000,
295
+ p_context_window INT DEFAULT 1,
296
+ p_metadata_filter JSONB DEFAULT NULL
297
+ )
298
+ RETURNS TABLE (
299
+ document_id UUID,
300
+ doc_title TEXT,
301
+ doc_source TEXT,
302
+ doc_metadata JSONB,
303
+ doc_project_ids UUID[],
304
+ doc_project_names TEXT[],
305
+ best_score FLOAT,
306
+ best_chunk_heading_path TEXT[],
307
+ full_content TEXT,
308
+ chunk_count INT,
309
+ total_chars INT,
310
+ doc_updated_at TIMESTAMPTZ,
311
+ version_count INT,
312
+ is_partial BOOL
313
+ )
314
+ LANGUAGE sql
315
+ SECURITY DEFINER
316
+ STABLE
317
+ SET search_path = public, pg_catalog
318
+ AS $$
319
+ WITH chunk_results AS (
320
+ SELECT * FROM cerefox_hybrid_search(
321
+ p_query_text := p_query_text,
322
+ p_query_embedding := p_query_embedding,
323
+ p_match_count := p_match_count * 10,
324
+ p_alpha := p_alpha,
325
+ p_use_upgrade := FALSE,
326
+ p_project_id := p_project_id,
327
+ p_min_score := p_min_score,
328
+ p_metadata_filter := p_metadata_filter
329
+ )
330
+ ),
331
+ best_per_doc AS (
332
+ SELECT DISTINCT ON (cr.document_id)
333
+ cr.document_id,
334
+ cr.heading_path AS best_chunk_heading_path,
335
+ cr.score AS best_score,
336
+ cr.doc_title,
337
+ cr.doc_source,
338
+ cr.doc_metadata,
339
+ cr.doc_project_ids,
340
+ cr.doc_project_names,
341
+ cr.version_count,
342
+ d.updated_at AS doc_updated_at
343
+ FROM chunk_results cr
344
+ JOIN cerefox_documents d ON d.id = cr.document_id
345
+ ORDER BY cr.document_id, cr.score DESC
346
+ ),
347
+ top_docs AS (
348
+ SELECT *
349
+ FROM best_per_doc
350
+ ORDER BY best_score DESC
351
+ LIMIT p_match_count
352
+ ),
353
+ doc_sizes AS (
354
+ SELECT c.document_id, SUM(c.char_count)::INT AS total_chars
355
+ FROM cerefox_chunks c
356
+ WHERE c.document_id IN (SELECT document_id FROM top_docs)
357
+ AND c.version_id IS NULL
358
+ GROUP BY c.document_id
359
+ ),
360
+ large_doc_seeds AS (
361
+ SELECT cr.chunk_id
362
+ FROM chunk_results cr
363
+ JOIN doc_sizes ds ON cr.document_id = ds.document_id
364
+ WHERE p_small_to_big_threshold > 0
365
+ AND ds.total_chars > p_small_to_big_threshold
366
+ AND cr.document_id IN (SELECT document_id FROM top_docs)
367
+ ),
368
+ expanded AS (
369
+ SELECT ec.chunk_id, ec.document_id, ec.chunk_index, ec.content
370
+ FROM cerefox_context_expand(
371
+ COALESCE((SELECT ARRAY_AGG(chunk_id) FROM large_doc_seeds), ARRAY[]::UUID[]),
372
+ p_context_window
373
+ ) ec
374
+ ),
375
+ large_doc_content AS (
376
+ SELECT
377
+ e.document_id,
378
+ STRING_AGG(e.content, E'\n\n' ORDER BY e.chunk_index) AS full_content,
379
+ COUNT(*)::INT AS chunk_count,
380
+ TRUE AS is_partial
381
+ FROM expanded e
382
+ GROUP BY e.document_id
383
+ ),
384
+ small_doc_content AS (
385
+ SELECT
386
+ c.document_id,
387
+ STRING_AGG(c.content, E'\n\n' ORDER BY c.chunk_index) AS full_content,
388
+ COUNT(*)::INT AS chunk_count,
389
+ FALSE AS is_partial
390
+ FROM cerefox_chunks c
391
+ WHERE c.document_id IN (SELECT document_id FROM top_docs)
392
+ AND c.document_id NOT IN (SELECT document_id FROM large_doc_content)
393
+ AND c.version_id IS NULL
394
+ GROUP BY c.document_id
395
+ ),
396
+ all_content AS (
397
+ SELECT document_id, full_content, chunk_count, is_partial FROM large_doc_content
398
+ UNION ALL
399
+ SELECT document_id, full_content, chunk_count, is_partial FROM small_doc_content
400
+ )
401
+ SELECT
402
+ td.document_id,
403
+ td.doc_title,
404
+ td.doc_source,
405
+ td.doc_metadata,
406
+ td.doc_project_ids,
407
+ td.doc_project_names,
408
+ td.best_score,
409
+ td.best_chunk_heading_path,
410
+ ac.full_content,
411
+ ac.chunk_count,
412
+ ds.total_chars,
413
+ td.doc_updated_at,
414
+ td.version_count,
415
+ ac.is_partial
416
+ FROM top_docs td
417
+ JOIN doc_sizes ds ON ds.document_id = td.document_id
418
+ JOIN all_content ac ON ac.document_id = td.document_id
419
+ ORDER BY td.best_score DESC;
420
+ $$;
421
+
422
+
423
+ CREATE FUNCTION cerefox_reconstruct_doc(
424
+ p_document_id UUID
425
+ )
426
+ RETURNS TABLE (
427
+ document_id UUID,
428
+ doc_title TEXT,
429
+ doc_source TEXT,
430
+ doc_metadata JSONB,
431
+ doc_project_ids UUID[],
432
+ doc_project_names TEXT[],
433
+ full_content TEXT,
434
+ chunk_count INT,
435
+ total_chars INT,
436
+ version_count INT
437
+ )
438
+ LANGUAGE sql
439
+ SECURITY DEFINER
440
+ STABLE
441
+ SET search_path = public, pg_catalog
442
+ AS $$
443
+ SELECT
444
+ d.id AS document_id,
445
+ d.title AS doc_title,
446
+ d.source AS doc_source,
447
+ d.metadata AS doc_metadata,
448
+ ARRAY(SELECT dp.project_id FROM cerefox_document_projects dp
449
+ WHERE dp.document_id = d.id) AS doc_project_ids,
450
+ ARRAY(SELECT p.name FROM cerefox_projects p
451
+ JOIN cerefox_document_projects dp ON p.id = dp.project_id
452
+ WHERE dp.document_id = d.id) AS doc_project_names,
453
+ STRING_AGG(c.content, E'\n\n' ORDER BY c.chunk_index) AS full_content,
454
+ COUNT(*)::INT AS chunk_count,
455
+ SUM(c.char_count)::INT AS total_chars,
456
+ (SELECT COUNT(*)::INT FROM cerefox_document_versions dv
457
+ WHERE dv.document_id = d.id) AS version_count
458
+ FROM cerefox_documents d
459
+ JOIN cerefox_chunks c ON c.document_id = d.id
460
+ WHERE d.id = p_document_id
461
+ AND c.version_id IS NULL
462
+ GROUP BY d.id, d.title, d.source, d.metadata;
463
+ $$;
464
+
465
+
466
+ CREATE FUNCTION cerefox_get_document(
467
+ p_document_id UUID,
468
+ p_version_id UUID DEFAULT NULL
469
+ )
470
+ RETURNS TABLE (
471
+ document_id UUID,
472
+ doc_title TEXT,
473
+ doc_source TEXT,
474
+ doc_metadata JSONB,
475
+ doc_project_ids UUID[],
476
+ doc_project_names TEXT[],
477
+ version_id UUID,
478
+ full_content TEXT,
479
+ chunk_count INT,
480
+ total_chars INT,
481
+ created_at TIMESTAMPTZ
482
+ )
483
+ LANGUAGE sql
484
+ SECURITY DEFINER
485
+ STABLE
486
+ SET search_path = public, pg_catalog
487
+ AS $$
488
+ SELECT
489
+ d.id AS document_id,
490
+ d.title AS doc_title,
491
+ d.source AS doc_source,
492
+ d.metadata AS doc_metadata,
493
+ ARRAY(SELECT dp.project_id FROM cerefox_document_projects dp
494
+ WHERE dp.document_id = d.id) AS doc_project_ids,
495
+ ARRAY(SELECT p.name FROM cerefox_projects p
496
+ JOIN cerefox_document_projects dp ON p.id = dp.project_id
497
+ WHERE dp.document_id = d.id) AS doc_project_names,
498
+ p_version_id AS version_id,
499
+ STRING_AGG(c.content, E'\n\n' ORDER BY c.chunk_index) AS full_content,
500
+ COUNT(*)::INT AS chunk_count,
501
+ SUM(c.char_count)::INT AS total_chars,
502
+ d.created_at
503
+ FROM cerefox_documents d
504
+ JOIN cerefox_chunks c ON c.document_id = d.id
505
+ WHERE d.id = p_document_id
506
+ AND (
507
+ (p_version_id IS NULL AND c.version_id IS NULL) OR
508
+ (p_version_id IS NOT NULL AND c.version_id = p_version_id)
509
+ )
510
+ GROUP BY d.id, d.title, d.source, d.metadata, d.created_at;
511
+ $$;
512
+
513
+ -- ── 4. New RPC: cerefox_list_projects ────────────────────────────────────────
514
+
515
+ CREATE OR REPLACE FUNCTION cerefox_list_projects()
516
+ RETURNS TABLE (
517
+ id UUID,
518
+ name TEXT,
519
+ description TEXT
520
+ )
521
+ LANGUAGE sql
522
+ SECURITY DEFINER
523
+ STABLE
524
+ SET search_path = public, pg_catalog
525
+ AS $$
526
+ SELECT p.id, p.name, p.description
527
+ FROM cerefox_projects p
528
+ ORDER BY p.name;
529
+ $$;
530
+
531
+ -- ── 5. New RPC: cerefox_metadata_search ──────────────────────────────────────
532
+ -- Query documents by metadata key-value criteria without a text search term.
533
+ -- Uses JSONB containment (@>) which leverages the existing GIN index on
534
+ -- cerefox_documents.metadata.
535
+
536
+ CREATE OR REPLACE FUNCTION cerefox_metadata_search(
537
+ p_metadata_filter JSONB,
538
+ p_project_id UUID DEFAULT NULL,
539
+ p_updated_since TIMESTAMPTZ DEFAULT NULL,
540
+ p_created_since TIMESTAMPTZ DEFAULT NULL,
541
+ p_limit INT DEFAULT 10,
542
+ p_include_content BOOLEAN DEFAULT FALSE,
543
+ p_max_bytes INT DEFAULT NULL
544
+ )
545
+ RETURNS TABLE (
546
+ document_id UUID,
547
+ title TEXT,
548
+ doc_metadata JSONB,
549
+ review_status TEXT,
550
+ source TEXT,
551
+ created_at TIMESTAMPTZ,
552
+ updated_at TIMESTAMPTZ,
553
+ total_chars INT,
554
+ chunk_count INT,
555
+ project_ids UUID[],
556
+ project_names TEXT[],
557
+ version_count INT,
558
+ content TEXT
559
+ )
560
+ LANGUAGE plpgsql
561
+ SECURITY DEFINER
562
+ SET search_path = public, pg_catalog
563
+ AS $$
564
+ DECLARE
565
+ v_bytes_used INT := 0;
566
+ v_row RECORD;
567
+ v_row_bytes INT;
568
+ BEGIN
569
+ FOR v_row IN
570
+ SELECT
571
+ d.id AS document_id,
572
+ d.title,
573
+ d.metadata AS doc_metadata,
574
+ d.review_status,
575
+ d.source,
576
+ d.created_at,
577
+ d.updated_at,
578
+ d.total_chars,
579
+ d.chunk_count,
580
+ ARRAY(SELECT dp.project_id FROM cerefox_document_projects dp
581
+ WHERE dp.document_id = d.id) AS project_ids,
582
+ ARRAY(SELECT p.name FROM cerefox_projects p
583
+ JOIN cerefox_document_projects dp ON p.id = dp.project_id
584
+ WHERE dp.document_id = d.id) AS project_names,
585
+ (SELECT COUNT(*)::INT FROM cerefox_document_versions dv
586
+ WHERE dv.document_id = d.id) AS version_count,
587
+ CASE WHEN p_include_content THEN
588
+ (SELECT STRING_AGG(c.content, E'\n\n' ORDER BY c.chunk_index)
589
+ FROM cerefox_chunks c
590
+ WHERE c.document_id = d.id AND c.version_id IS NULL)
591
+ ELSE NULL END AS content
592
+ FROM cerefox_documents d
593
+ WHERE d.metadata @> p_metadata_filter
594
+ AND (p_project_id IS NULL OR EXISTS (
595
+ SELECT 1 FROM cerefox_document_projects dp
596
+ WHERE dp.document_id = d.id AND dp.project_id = p_project_id
597
+ ))
598
+ AND (p_updated_since IS NULL OR d.updated_at >= p_updated_since)
599
+ AND (p_created_since IS NULL OR d.created_at >= p_created_since)
600
+ ORDER BY d.updated_at DESC
601
+ LIMIT p_limit
602
+ LOOP
603
+ -- Byte budget enforcement (when p_max_bytes is set and content is included)
604
+ IF p_max_bytes IS NOT NULL AND p_include_content AND v_row.content IS NOT NULL THEN
605
+ v_row_bytes := octet_length(v_row.content);
606
+ IF v_bytes_used + v_row_bytes > p_max_bytes THEN
607
+ EXIT; -- stop emitting rows
608
+ END IF;
609
+ v_bytes_used := v_bytes_used + v_row_bytes;
610
+ END IF;
611
+
612
+ document_id := v_row.document_id;
613
+ title := v_row.title;
614
+ doc_metadata := v_row.doc_metadata;
615
+ review_status := v_row.review_status;
616
+ source := v_row.source;
617
+ created_at := v_row.created_at;
618
+ updated_at := v_row.updated_at;
619
+ total_chars := v_row.total_chars;
620
+ chunk_count := v_row.chunk_count;
621
+ project_ids := v_row.project_ids;
622
+ project_names := v_row.project_names;
623
+ version_count := v_row.version_count;
624
+ content := v_row.content;
625
+ RETURN NEXT;
626
+ END LOOP;
627
+ END;
628
+ $$;