@cerefox/memory 0.7.2 → 0.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bin/cerefox.js +1357 -361
- package/dist/frontend/assets/{index-BzAPcCXA.js → index-CAp2_lFX.js} +2 -2
- package/dist/frontend/assets/index-CAp2_lFX.js.map +1 -0
- package/dist/frontend/index.html +1 -1
- package/dist/server-assets/_shared/ef-meta/index.ts +97 -0
- package/dist/server-assets/_shared/embeddings/index.ts +175 -0
- package/dist/server-assets/_shared/mcp-tools/_chunker.ts +187 -0
- package/dist/server-assets/_shared/mcp-tools/_projects.ts +121 -0
- package/dist/server-assets/_shared/mcp-tools/_utils.ts +73 -0
- package/dist/server-assets/_shared/mcp-tools/audit-log.ts +95 -0
- package/dist/server-assets/_shared/mcp-tools/get-document.ts +73 -0
- package/dist/server-assets/_shared/mcp-tools/get-help-content.ts +26 -0
- package/dist/server-assets/_shared/mcp-tools/get-help.ts +90 -0
- package/dist/server-assets/_shared/mcp-tools/index.ts +67 -0
- package/dist/server-assets/_shared/mcp-tools/ingest.ts +315 -0
- package/dist/server-assets/_shared/mcp-tools/list-metadata-keys.ts +55 -0
- package/dist/server-assets/_shared/mcp-tools/list-projects.ts +59 -0
- package/dist/server-assets/_shared/mcp-tools/list-versions.ts +72 -0
- package/dist/server-assets/_shared/mcp-tools/metadata-search.ts +154 -0
- package/dist/server-assets/_shared/mcp-tools/search.ts +193 -0
- package/dist/server-assets/_shared/mcp-tools/set-document-projects.ts +163 -0
- package/dist/server-assets/_shared/mcp-tools/types.ts +92 -0
- package/dist/server-assets/db/migrations/0003_add_document_versions.sql +91 -0
- package/dist/server-assets/db/migrations/0004_add_audit_log_review_status_archived.sql +71 -0
- package/dist/server-assets/db/migrations/0005_metadata_search.sql +628 -0
- package/dist/server-assets/db/migrations/0006_usage_log.sql +255 -0
- package/dist/server-assets/db/migrations/0007_usage_log_requestor.sql +178 -0
- package/dist/server-assets/db/migrations/0008_soft_delete.sql +130 -0
- package/dist/server-assets/db/migrations/0009_audit_log_restore_operation.sql +20 -0
- package/dist/server-assets/db/migrations/0010_requestor_enforcement_config.sql +12 -0
- package/dist/server-assets/db/migrations/0011_title_boosting.sql +48 -0
- package/dist/server-assets/db/rpcs.sql +1723 -0
- package/dist/server-assets/db/schema.sql +380 -0
- package/dist/server-assets/supabase/functions/cerefox-get-audit-log/index.ts +117 -0
- package/dist/server-assets/supabase/functions/cerefox-get-document/index.ts +138 -0
- package/dist/server-assets/supabase/functions/cerefox-ingest/index.ts +819 -0
- package/dist/server-assets/supabase/functions/cerefox-list-projects/index.ts +96 -0
- package/dist/server-assets/supabase/functions/cerefox-list-versions/index.ts +113 -0
- package/dist/server-assets/supabase/functions/cerefox-mcp/index.ts +294 -0
- package/dist/server-assets/supabase/functions/cerefox-mcp/shared.ts +42 -0
- package/dist/server-assets/supabase/functions/cerefox-metadata/index.ts +99 -0
- package/dist/server-assets/supabase/functions/cerefox-metadata-search/index.ts +146 -0
- package/dist/server-assets/supabase/functions/cerefox-search/index.ts +382 -0
- package/docs/guides/connect-agents.md +78 -3
- package/docs/guides/migration-v0.5.md +50 -0
- package/docs/guides/quickstart.md +6 -2
- package/package.json +3 -2
- package/dist/frontend/assets/index-BzAPcCXA.js.map +0 -1
|
@@ -0,0 +1,628 @@
|
|
|
1
|
+
-- Migration 0005: Metadata search + project name standardisation (Iteration 16B)
|
|
2
|
+
--
|
|
3
|
+
-- Changes:
|
|
4
|
+
-- 1. Add project_names TEXT[] to all chunk-level search RPCs (hybrid, fts, semantic)
|
|
5
|
+
-- 2. Add project_names TEXT[] to document-level RPCs (search_docs, reconstruct_doc, get_document)
|
|
6
|
+
-- 3. New cerefox_list_projects() RPC
|
|
7
|
+
-- 4. New cerefox_metadata_search() RPC
|
|
8
|
+
--
|
|
9
|
+
-- All DROP + CREATE pairs are required because RETURNS TABLE signature changes
|
|
10
|
+
-- cannot be applied via CREATE OR REPLACE.
|
|
11
|
+
|
|
12
|
+
-- ── 1. Drop existing signatures before recreating with project_names ─────────
|
|
13
|
+
|
|
14
|
+
-- Chunk-level RPCs
|
|
15
|
+
DROP FUNCTION IF EXISTS cerefox_hybrid_search(TEXT, VECTOR(768), INT, FLOAT, BOOLEAN, UUID, FLOAT, JSONB);
|
|
16
|
+
DROP FUNCTION IF EXISTS cerefox_fts_search(TEXT, INT, UUID, JSONB);
|
|
17
|
+
DROP FUNCTION IF EXISTS cerefox_semantic_search(VECTOR(768), INT, BOOLEAN, UUID, FLOAT, JSONB);
|
|
18
|
+
|
|
19
|
+
-- Document-level RPCs
|
|
20
|
+
DROP FUNCTION IF EXISTS cerefox_search_docs(TEXT, VECTOR(768), INT, FLOAT, UUID, FLOAT, INT, INT, JSONB);
|
|
21
|
+
DROP FUNCTION IF EXISTS cerefox_reconstruct_doc(UUID);
|
|
22
|
+
DROP FUNCTION IF EXISTS cerefox_get_document(UUID, UUID);
|
|
23
|
+
|
|
24
|
+
-- ── 2. Recreate chunk-level RPCs with project_names TEXT[] ───────────────────
|
|
25
|
+
|
|
26
|
+
CREATE OR REPLACE FUNCTION cerefox_hybrid_search(
|
|
27
|
+
p_query_text TEXT,
|
|
28
|
+
p_query_embedding VECTOR(768),
|
|
29
|
+
p_match_count INT DEFAULT 10,
|
|
30
|
+
p_alpha FLOAT DEFAULT 0.7,
|
|
31
|
+
p_use_upgrade BOOLEAN DEFAULT FALSE,
|
|
32
|
+
p_project_id UUID DEFAULT NULL,
|
|
33
|
+
p_min_score FLOAT DEFAULT 0.0,
|
|
34
|
+
p_metadata_filter JSONB DEFAULT NULL
|
|
35
|
+
)
|
|
36
|
+
RETURNS TABLE (
|
|
37
|
+
chunk_id UUID,
|
|
38
|
+
document_id UUID,
|
|
39
|
+
chunk_index INT,
|
|
40
|
+
title TEXT,
|
|
41
|
+
content TEXT,
|
|
42
|
+
heading_path TEXT[],
|
|
43
|
+
heading_level INT,
|
|
44
|
+
score FLOAT,
|
|
45
|
+
doc_title TEXT,
|
|
46
|
+
doc_source TEXT,
|
|
47
|
+
doc_project_ids UUID[],
|
|
48
|
+
doc_project_names TEXT[],
|
|
49
|
+
doc_metadata JSONB,
|
|
50
|
+
version_count INT
|
|
51
|
+
)
|
|
52
|
+
LANGUAGE plpgsql
|
|
53
|
+
SECURITY DEFINER
|
|
54
|
+
SET search_path = public, pg_catalog
|
|
55
|
+
AS $$
|
|
56
|
+
DECLARE
|
|
57
|
+
query_fts tsquery := websearch_to_tsquery('english', p_query_text);
|
|
58
|
+
candidate_count INT := p_match_count * 5;
|
|
59
|
+
BEGIN
|
|
60
|
+
RETURN QUERY
|
|
61
|
+
WITH
|
|
62
|
+
fts_results AS (
|
|
63
|
+
SELECT
|
|
64
|
+
c.id,
|
|
65
|
+
ts_rank_cd(c.fts, query_fts)::FLOAT AS fts_score
|
|
66
|
+
FROM cerefox_chunks c
|
|
67
|
+
JOIN cerefox_documents d ON c.document_id = d.id
|
|
68
|
+
WHERE c.version_id IS NULL
|
|
69
|
+
AND c.fts @@ query_fts
|
|
70
|
+
AND (p_project_id IS NULL OR EXISTS (
|
|
71
|
+
SELECT 1 FROM cerefox_document_projects dp
|
|
72
|
+
WHERE dp.document_id = d.id AND dp.project_id = p_project_id
|
|
73
|
+
))
|
|
74
|
+
AND (p_metadata_filter IS NULL OR d.metadata @> p_metadata_filter)
|
|
75
|
+
ORDER BY fts_score DESC
|
|
76
|
+
LIMIT candidate_count
|
|
77
|
+
),
|
|
78
|
+
vec_results AS (
|
|
79
|
+
SELECT
|
|
80
|
+
c.id,
|
|
81
|
+
CASE
|
|
82
|
+
WHEN p_use_upgrade AND c.embedding_upgrade IS NOT NULL
|
|
83
|
+
THEN (1.0 - (c.embedding_upgrade <=> p_query_embedding))::FLOAT
|
|
84
|
+
ELSE
|
|
85
|
+
(1.0 - (c.embedding_primary <=> p_query_embedding))::FLOAT
|
|
86
|
+
END AS vec_score
|
|
87
|
+
FROM cerefox_chunks c
|
|
88
|
+
JOIN cerefox_documents d ON c.document_id = d.id
|
|
89
|
+
WHERE c.version_id IS NULL
|
|
90
|
+
AND (p_project_id IS NULL OR EXISTS (
|
|
91
|
+
SELECT 1 FROM cerefox_document_projects dp
|
|
92
|
+
WHERE dp.document_id = d.id AND dp.project_id = p_project_id
|
|
93
|
+
))
|
|
94
|
+
AND (p_metadata_filter IS NULL OR d.metadata @> p_metadata_filter)
|
|
95
|
+
ORDER BY
|
|
96
|
+
CASE
|
|
97
|
+
WHEN p_use_upgrade AND c.embedding_upgrade IS NOT NULL
|
|
98
|
+
THEN c.embedding_upgrade <=> p_query_embedding
|
|
99
|
+
ELSE c.embedding_primary <=> p_query_embedding
|
|
100
|
+
END
|
|
101
|
+
LIMIT candidate_count
|
|
102
|
+
),
|
|
103
|
+
combined AS (
|
|
104
|
+
SELECT
|
|
105
|
+
COALESCE(f.id, v.id) AS id,
|
|
106
|
+
( p_alpha * COALESCE(v.vec_score, 0.0) +
|
|
107
|
+
(1.0 - p_alpha) * COALESCE(f.fts_score, 0.0)
|
|
108
|
+
) AS score,
|
|
109
|
+
COALESCE(v.vec_score, 0.0) AS vec_score,
|
|
110
|
+
f.id IS NOT NULL AS has_fts_match
|
|
111
|
+
FROM fts_results f
|
|
112
|
+
FULL OUTER JOIN vec_results v ON f.id = v.id
|
|
113
|
+
)
|
|
114
|
+
SELECT
|
|
115
|
+
c.id AS chunk_id,
|
|
116
|
+
c.document_id,
|
|
117
|
+
c.chunk_index,
|
|
118
|
+
c.title,
|
|
119
|
+
c.content,
|
|
120
|
+
c.heading_path,
|
|
121
|
+
c.heading_level,
|
|
122
|
+
cm.score,
|
|
123
|
+
d.title AS doc_title,
|
|
124
|
+
d.source AS doc_source,
|
|
125
|
+
ARRAY(SELECT dp.project_id FROM cerefox_document_projects dp
|
|
126
|
+
WHERE dp.document_id = d.id) AS doc_project_ids,
|
|
127
|
+
ARRAY(SELECT p.name FROM cerefox_projects p
|
|
128
|
+
JOIN cerefox_document_projects dp ON p.id = dp.project_id
|
|
129
|
+
WHERE dp.document_id = d.id) AS doc_project_names,
|
|
130
|
+
d.metadata AS doc_metadata,
|
|
131
|
+
(SELECT COUNT(*)::INT FROM cerefox_document_versions dv
|
|
132
|
+
WHERE dv.document_id = d.id) AS version_count
|
|
133
|
+
FROM combined cm
|
|
134
|
+
JOIN cerefox_chunks c ON c.id = cm.id
|
|
135
|
+
JOIN cerefox_documents d ON c.document_id = d.id
|
|
136
|
+
WHERE cm.has_fts_match OR cm.vec_score >= p_min_score
|
|
137
|
+
ORDER BY cm.score DESC
|
|
138
|
+
LIMIT p_match_count;
|
|
139
|
+
END;
|
|
140
|
+
$$;
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
CREATE OR REPLACE FUNCTION cerefox_fts_search(
|
|
144
|
+
p_query_text TEXT,
|
|
145
|
+
p_match_count INT DEFAULT 10,
|
|
146
|
+
p_project_id UUID DEFAULT NULL,
|
|
147
|
+
p_metadata_filter JSONB DEFAULT NULL
|
|
148
|
+
)
|
|
149
|
+
RETURNS TABLE (
|
|
150
|
+
chunk_id UUID,
|
|
151
|
+
document_id UUID,
|
|
152
|
+
chunk_index INT,
|
|
153
|
+
title TEXT,
|
|
154
|
+
content TEXT,
|
|
155
|
+
heading_path TEXT[],
|
|
156
|
+
heading_level INT,
|
|
157
|
+
score FLOAT,
|
|
158
|
+
doc_title TEXT,
|
|
159
|
+
doc_source TEXT,
|
|
160
|
+
doc_project_ids UUID[],
|
|
161
|
+
doc_project_names TEXT[],
|
|
162
|
+
doc_metadata JSONB,
|
|
163
|
+
version_count INT
|
|
164
|
+
)
|
|
165
|
+
LANGUAGE plpgsql
|
|
166
|
+
SECURITY DEFINER
|
|
167
|
+
SET search_path = public, pg_catalog
|
|
168
|
+
AS $$
|
|
169
|
+
DECLARE
|
|
170
|
+
query_fts tsquery := websearch_to_tsquery('english', p_query_text);
|
|
171
|
+
BEGIN
|
|
172
|
+
RETURN QUERY
|
|
173
|
+
SELECT
|
|
174
|
+
c.id AS chunk_id,
|
|
175
|
+
c.document_id,
|
|
176
|
+
c.chunk_index,
|
|
177
|
+
c.title,
|
|
178
|
+
c.content,
|
|
179
|
+
c.heading_path,
|
|
180
|
+
c.heading_level,
|
|
181
|
+
ts_rank_cd(c.fts, query_fts)::FLOAT AS score,
|
|
182
|
+
d.title AS doc_title,
|
|
183
|
+
d.source AS doc_source,
|
|
184
|
+
ARRAY(SELECT dp.project_id FROM cerefox_document_projects dp
|
|
185
|
+
WHERE dp.document_id = d.id) AS doc_project_ids,
|
|
186
|
+
ARRAY(SELECT p.name FROM cerefox_projects p
|
|
187
|
+
JOIN cerefox_document_projects dp ON p.id = dp.project_id
|
|
188
|
+
WHERE dp.document_id = d.id) AS doc_project_names,
|
|
189
|
+
d.metadata AS doc_metadata,
|
|
190
|
+
(SELECT COUNT(*)::INT FROM cerefox_document_versions dv
|
|
191
|
+
WHERE dv.document_id = d.id) AS version_count
|
|
192
|
+
FROM cerefox_chunks c
|
|
193
|
+
JOIN cerefox_documents d ON c.document_id = d.id
|
|
194
|
+
WHERE c.version_id IS NULL
|
|
195
|
+
AND c.fts @@ query_fts
|
|
196
|
+
AND (p_project_id IS NULL OR EXISTS (
|
|
197
|
+
SELECT 1 FROM cerefox_document_projects dp
|
|
198
|
+
WHERE dp.document_id = d.id AND dp.project_id = p_project_id
|
|
199
|
+
))
|
|
200
|
+
AND (p_metadata_filter IS NULL OR d.metadata @> p_metadata_filter)
|
|
201
|
+
ORDER BY score DESC
|
|
202
|
+
LIMIT p_match_count;
|
|
203
|
+
END;
|
|
204
|
+
$$;
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
CREATE OR REPLACE FUNCTION cerefox_semantic_search(
|
|
208
|
+
p_query_embedding VECTOR(768),
|
|
209
|
+
p_match_count INT DEFAULT 10,
|
|
210
|
+
p_use_upgrade BOOLEAN DEFAULT FALSE,
|
|
211
|
+
p_project_id UUID DEFAULT NULL,
|
|
212
|
+
p_min_score FLOAT DEFAULT 0.0,
|
|
213
|
+
p_metadata_filter JSONB DEFAULT NULL
|
|
214
|
+
)
|
|
215
|
+
RETURNS TABLE (
|
|
216
|
+
chunk_id UUID,
|
|
217
|
+
document_id UUID,
|
|
218
|
+
chunk_index INT,
|
|
219
|
+
title TEXT,
|
|
220
|
+
content TEXT,
|
|
221
|
+
heading_path TEXT[],
|
|
222
|
+
heading_level INT,
|
|
223
|
+
score FLOAT,
|
|
224
|
+
doc_title TEXT,
|
|
225
|
+
doc_source TEXT,
|
|
226
|
+
doc_project_ids UUID[],
|
|
227
|
+
doc_project_names TEXT[],
|
|
228
|
+
doc_metadata JSONB,
|
|
229
|
+
version_count INT
|
|
230
|
+
)
|
|
231
|
+
LANGUAGE plpgsql
|
|
232
|
+
SECURITY DEFINER
|
|
233
|
+
SET search_path = public, pg_catalog
|
|
234
|
+
AS $$
|
|
235
|
+
BEGIN
|
|
236
|
+
RETURN QUERY
|
|
237
|
+
SELECT
|
|
238
|
+
c.id AS chunk_id,
|
|
239
|
+
c.document_id,
|
|
240
|
+
c.chunk_index,
|
|
241
|
+
c.title,
|
|
242
|
+
c.content,
|
|
243
|
+
c.heading_path,
|
|
244
|
+
c.heading_level,
|
|
245
|
+
CASE
|
|
246
|
+
WHEN p_use_upgrade AND c.embedding_upgrade IS NOT NULL
|
|
247
|
+
THEN (1.0 - (c.embedding_upgrade <=> p_query_embedding))::FLOAT
|
|
248
|
+
ELSE
|
|
249
|
+
(1.0 - (c.embedding_primary <=> p_query_embedding))::FLOAT
|
|
250
|
+
END AS score,
|
|
251
|
+
d.title AS doc_title,
|
|
252
|
+
d.source AS doc_source,
|
|
253
|
+
ARRAY(SELECT dp.project_id FROM cerefox_document_projects dp
|
|
254
|
+
WHERE dp.document_id = d.id) AS doc_project_ids,
|
|
255
|
+
ARRAY(SELECT p.name FROM cerefox_projects p
|
|
256
|
+
JOIN cerefox_document_projects dp ON p.id = dp.project_id
|
|
257
|
+
WHERE dp.document_id = d.id) AS doc_project_names,
|
|
258
|
+
d.metadata AS doc_metadata,
|
|
259
|
+
(SELECT COUNT(*)::INT FROM cerefox_document_versions dv
|
|
260
|
+
WHERE dv.document_id = d.id) AS version_count
|
|
261
|
+
FROM cerefox_chunks c
|
|
262
|
+
JOIN cerefox_documents d ON c.document_id = d.id
|
|
263
|
+
WHERE c.version_id IS NULL
|
|
264
|
+
AND (p_project_id IS NULL OR EXISTS (
|
|
265
|
+
SELECT 1 FROM cerefox_document_projects dp
|
|
266
|
+
WHERE dp.document_id = d.id AND dp.project_id = p_project_id
|
|
267
|
+
))
|
|
268
|
+
AND (p_metadata_filter IS NULL OR d.metadata @> p_metadata_filter)
|
|
269
|
+
AND (p_use_upgrade = FALSE OR c.embedding_upgrade IS NOT NULL)
|
|
270
|
+
AND CASE
|
|
271
|
+
WHEN p_use_upgrade AND c.embedding_upgrade IS NOT NULL
|
|
272
|
+
THEN (1.0 - (c.embedding_upgrade <=> p_query_embedding))::FLOAT
|
|
273
|
+
ELSE (1.0 - (c.embedding_primary <=> p_query_embedding))::FLOAT
|
|
274
|
+
END >= p_min_score
|
|
275
|
+
ORDER BY
|
|
276
|
+
CASE
|
|
277
|
+
WHEN p_use_upgrade AND c.embedding_upgrade IS NOT NULL
|
|
278
|
+
THEN c.embedding_upgrade <=> p_query_embedding
|
|
279
|
+
ELSE c.embedding_primary <=> p_query_embedding
|
|
280
|
+
END
|
|
281
|
+
LIMIT p_match_count;
|
|
282
|
+
END;
|
|
283
|
+
$$;
|
|
284
|
+
|
|
285
|
+
-- ── 3. Recreate document-level RPCs with project_names TEXT[] ────────────────
|
|
286
|
+
|
|
287
|
+
CREATE OR REPLACE FUNCTION cerefox_search_docs(
|
|
288
|
+
p_query_text TEXT,
|
|
289
|
+
p_query_embedding VECTOR(768),
|
|
290
|
+
p_match_count INT DEFAULT 5,
|
|
291
|
+
p_alpha FLOAT DEFAULT 0.7,
|
|
292
|
+
p_project_id UUID DEFAULT NULL,
|
|
293
|
+
p_min_score FLOAT DEFAULT 0.0,
|
|
294
|
+
p_small_to_big_threshold INT DEFAULT 20000,
|
|
295
|
+
p_context_window INT DEFAULT 1,
|
|
296
|
+
p_metadata_filter JSONB DEFAULT NULL
|
|
297
|
+
)
|
|
298
|
+
RETURNS TABLE (
|
|
299
|
+
document_id UUID,
|
|
300
|
+
doc_title TEXT,
|
|
301
|
+
doc_source TEXT,
|
|
302
|
+
doc_metadata JSONB,
|
|
303
|
+
doc_project_ids UUID[],
|
|
304
|
+
doc_project_names TEXT[],
|
|
305
|
+
best_score FLOAT,
|
|
306
|
+
best_chunk_heading_path TEXT[],
|
|
307
|
+
full_content TEXT,
|
|
308
|
+
chunk_count INT,
|
|
309
|
+
total_chars INT,
|
|
310
|
+
doc_updated_at TIMESTAMPTZ,
|
|
311
|
+
version_count INT,
|
|
312
|
+
is_partial BOOL
|
|
313
|
+
)
|
|
314
|
+
LANGUAGE sql
|
|
315
|
+
SECURITY DEFINER
|
|
316
|
+
STABLE
|
|
317
|
+
SET search_path = public, pg_catalog
|
|
318
|
+
AS $$
|
|
319
|
+
WITH chunk_results AS (
|
|
320
|
+
SELECT * FROM cerefox_hybrid_search(
|
|
321
|
+
p_query_text := p_query_text,
|
|
322
|
+
p_query_embedding := p_query_embedding,
|
|
323
|
+
p_match_count := p_match_count * 10,
|
|
324
|
+
p_alpha := p_alpha,
|
|
325
|
+
p_use_upgrade := FALSE,
|
|
326
|
+
p_project_id := p_project_id,
|
|
327
|
+
p_min_score := p_min_score,
|
|
328
|
+
p_metadata_filter := p_metadata_filter
|
|
329
|
+
)
|
|
330
|
+
),
|
|
331
|
+
best_per_doc AS (
|
|
332
|
+
SELECT DISTINCT ON (cr.document_id)
|
|
333
|
+
cr.document_id,
|
|
334
|
+
cr.heading_path AS best_chunk_heading_path,
|
|
335
|
+
cr.score AS best_score,
|
|
336
|
+
cr.doc_title,
|
|
337
|
+
cr.doc_source,
|
|
338
|
+
cr.doc_metadata,
|
|
339
|
+
cr.doc_project_ids,
|
|
340
|
+
cr.doc_project_names,
|
|
341
|
+
cr.version_count,
|
|
342
|
+
d.updated_at AS doc_updated_at
|
|
343
|
+
FROM chunk_results cr
|
|
344
|
+
JOIN cerefox_documents d ON d.id = cr.document_id
|
|
345
|
+
ORDER BY cr.document_id, cr.score DESC
|
|
346
|
+
),
|
|
347
|
+
top_docs AS (
|
|
348
|
+
SELECT *
|
|
349
|
+
FROM best_per_doc
|
|
350
|
+
ORDER BY best_score DESC
|
|
351
|
+
LIMIT p_match_count
|
|
352
|
+
),
|
|
353
|
+
doc_sizes AS (
|
|
354
|
+
SELECT c.document_id, SUM(c.char_count)::INT AS total_chars
|
|
355
|
+
FROM cerefox_chunks c
|
|
356
|
+
WHERE c.document_id IN (SELECT document_id FROM top_docs)
|
|
357
|
+
AND c.version_id IS NULL
|
|
358
|
+
GROUP BY c.document_id
|
|
359
|
+
),
|
|
360
|
+
large_doc_seeds AS (
|
|
361
|
+
SELECT cr.chunk_id
|
|
362
|
+
FROM chunk_results cr
|
|
363
|
+
JOIN doc_sizes ds ON cr.document_id = ds.document_id
|
|
364
|
+
WHERE p_small_to_big_threshold > 0
|
|
365
|
+
AND ds.total_chars > p_small_to_big_threshold
|
|
366
|
+
AND cr.document_id IN (SELECT document_id FROM top_docs)
|
|
367
|
+
),
|
|
368
|
+
expanded AS (
|
|
369
|
+
SELECT ec.chunk_id, ec.document_id, ec.chunk_index, ec.content
|
|
370
|
+
FROM cerefox_context_expand(
|
|
371
|
+
COALESCE((SELECT ARRAY_AGG(chunk_id) FROM large_doc_seeds), ARRAY[]::UUID[]),
|
|
372
|
+
p_context_window
|
|
373
|
+
) ec
|
|
374
|
+
),
|
|
375
|
+
large_doc_content AS (
|
|
376
|
+
SELECT
|
|
377
|
+
e.document_id,
|
|
378
|
+
STRING_AGG(e.content, E'\n\n' ORDER BY e.chunk_index) AS full_content,
|
|
379
|
+
COUNT(*)::INT AS chunk_count,
|
|
380
|
+
TRUE AS is_partial
|
|
381
|
+
FROM expanded e
|
|
382
|
+
GROUP BY e.document_id
|
|
383
|
+
),
|
|
384
|
+
small_doc_content AS (
|
|
385
|
+
SELECT
|
|
386
|
+
c.document_id,
|
|
387
|
+
STRING_AGG(c.content, E'\n\n' ORDER BY c.chunk_index) AS full_content,
|
|
388
|
+
COUNT(*)::INT AS chunk_count,
|
|
389
|
+
FALSE AS is_partial
|
|
390
|
+
FROM cerefox_chunks c
|
|
391
|
+
WHERE c.document_id IN (SELECT document_id FROM top_docs)
|
|
392
|
+
AND c.document_id NOT IN (SELECT document_id FROM large_doc_content)
|
|
393
|
+
AND c.version_id IS NULL
|
|
394
|
+
GROUP BY c.document_id
|
|
395
|
+
),
|
|
396
|
+
all_content AS (
|
|
397
|
+
SELECT document_id, full_content, chunk_count, is_partial FROM large_doc_content
|
|
398
|
+
UNION ALL
|
|
399
|
+
SELECT document_id, full_content, chunk_count, is_partial FROM small_doc_content
|
|
400
|
+
)
|
|
401
|
+
SELECT
|
|
402
|
+
td.document_id,
|
|
403
|
+
td.doc_title,
|
|
404
|
+
td.doc_source,
|
|
405
|
+
td.doc_metadata,
|
|
406
|
+
td.doc_project_ids,
|
|
407
|
+
td.doc_project_names,
|
|
408
|
+
td.best_score,
|
|
409
|
+
td.best_chunk_heading_path,
|
|
410
|
+
ac.full_content,
|
|
411
|
+
ac.chunk_count,
|
|
412
|
+
ds.total_chars,
|
|
413
|
+
td.doc_updated_at,
|
|
414
|
+
td.version_count,
|
|
415
|
+
ac.is_partial
|
|
416
|
+
FROM top_docs td
|
|
417
|
+
JOIN doc_sizes ds ON ds.document_id = td.document_id
|
|
418
|
+
JOIN all_content ac ON ac.document_id = td.document_id
|
|
419
|
+
ORDER BY td.best_score DESC;
|
|
420
|
+
$$;
|
|
421
|
+
|
|
422
|
+
|
|
423
|
+
CREATE FUNCTION cerefox_reconstruct_doc(
|
|
424
|
+
p_document_id UUID
|
|
425
|
+
)
|
|
426
|
+
RETURNS TABLE (
|
|
427
|
+
document_id UUID,
|
|
428
|
+
doc_title TEXT,
|
|
429
|
+
doc_source TEXT,
|
|
430
|
+
doc_metadata JSONB,
|
|
431
|
+
doc_project_ids UUID[],
|
|
432
|
+
doc_project_names TEXT[],
|
|
433
|
+
full_content TEXT,
|
|
434
|
+
chunk_count INT,
|
|
435
|
+
total_chars INT,
|
|
436
|
+
version_count INT
|
|
437
|
+
)
|
|
438
|
+
LANGUAGE sql
|
|
439
|
+
SECURITY DEFINER
|
|
440
|
+
STABLE
|
|
441
|
+
SET search_path = public, pg_catalog
|
|
442
|
+
AS $$
|
|
443
|
+
SELECT
|
|
444
|
+
d.id AS document_id,
|
|
445
|
+
d.title AS doc_title,
|
|
446
|
+
d.source AS doc_source,
|
|
447
|
+
d.metadata AS doc_metadata,
|
|
448
|
+
ARRAY(SELECT dp.project_id FROM cerefox_document_projects dp
|
|
449
|
+
WHERE dp.document_id = d.id) AS doc_project_ids,
|
|
450
|
+
ARRAY(SELECT p.name FROM cerefox_projects p
|
|
451
|
+
JOIN cerefox_document_projects dp ON p.id = dp.project_id
|
|
452
|
+
WHERE dp.document_id = d.id) AS doc_project_names,
|
|
453
|
+
STRING_AGG(c.content, E'\n\n' ORDER BY c.chunk_index) AS full_content,
|
|
454
|
+
COUNT(*)::INT AS chunk_count,
|
|
455
|
+
SUM(c.char_count)::INT AS total_chars,
|
|
456
|
+
(SELECT COUNT(*)::INT FROM cerefox_document_versions dv
|
|
457
|
+
WHERE dv.document_id = d.id) AS version_count
|
|
458
|
+
FROM cerefox_documents d
|
|
459
|
+
JOIN cerefox_chunks c ON c.document_id = d.id
|
|
460
|
+
WHERE d.id = p_document_id
|
|
461
|
+
AND c.version_id IS NULL
|
|
462
|
+
GROUP BY d.id, d.title, d.source, d.metadata;
|
|
463
|
+
$$;
|
|
464
|
+
|
|
465
|
+
|
|
466
|
+
CREATE FUNCTION cerefox_get_document(
|
|
467
|
+
p_document_id UUID,
|
|
468
|
+
p_version_id UUID DEFAULT NULL
|
|
469
|
+
)
|
|
470
|
+
RETURNS TABLE (
|
|
471
|
+
document_id UUID,
|
|
472
|
+
doc_title TEXT,
|
|
473
|
+
doc_source TEXT,
|
|
474
|
+
doc_metadata JSONB,
|
|
475
|
+
doc_project_ids UUID[],
|
|
476
|
+
doc_project_names TEXT[],
|
|
477
|
+
version_id UUID,
|
|
478
|
+
full_content TEXT,
|
|
479
|
+
chunk_count INT,
|
|
480
|
+
total_chars INT,
|
|
481
|
+
created_at TIMESTAMPTZ
|
|
482
|
+
)
|
|
483
|
+
LANGUAGE sql
|
|
484
|
+
SECURITY DEFINER
|
|
485
|
+
STABLE
|
|
486
|
+
SET search_path = public, pg_catalog
|
|
487
|
+
AS $$
|
|
488
|
+
SELECT
|
|
489
|
+
d.id AS document_id,
|
|
490
|
+
d.title AS doc_title,
|
|
491
|
+
d.source AS doc_source,
|
|
492
|
+
d.metadata AS doc_metadata,
|
|
493
|
+
ARRAY(SELECT dp.project_id FROM cerefox_document_projects dp
|
|
494
|
+
WHERE dp.document_id = d.id) AS doc_project_ids,
|
|
495
|
+
ARRAY(SELECT p.name FROM cerefox_projects p
|
|
496
|
+
JOIN cerefox_document_projects dp ON p.id = dp.project_id
|
|
497
|
+
WHERE dp.document_id = d.id) AS doc_project_names,
|
|
498
|
+
p_version_id AS version_id,
|
|
499
|
+
STRING_AGG(c.content, E'\n\n' ORDER BY c.chunk_index) AS full_content,
|
|
500
|
+
COUNT(*)::INT AS chunk_count,
|
|
501
|
+
SUM(c.char_count)::INT AS total_chars,
|
|
502
|
+
d.created_at
|
|
503
|
+
FROM cerefox_documents d
|
|
504
|
+
JOIN cerefox_chunks c ON c.document_id = d.id
|
|
505
|
+
WHERE d.id = p_document_id
|
|
506
|
+
AND (
|
|
507
|
+
(p_version_id IS NULL AND c.version_id IS NULL) OR
|
|
508
|
+
(p_version_id IS NOT NULL AND c.version_id = p_version_id)
|
|
509
|
+
)
|
|
510
|
+
GROUP BY d.id, d.title, d.source, d.metadata, d.created_at;
|
|
511
|
+
$$;
|
|
512
|
+
|
|
513
|
+
-- ── 4. New RPC: cerefox_list_projects ────────────────────────────────────────
|
|
514
|
+
|
|
515
|
+
CREATE OR REPLACE FUNCTION cerefox_list_projects()
|
|
516
|
+
RETURNS TABLE (
|
|
517
|
+
id UUID,
|
|
518
|
+
name TEXT,
|
|
519
|
+
description TEXT
|
|
520
|
+
)
|
|
521
|
+
LANGUAGE sql
|
|
522
|
+
SECURITY DEFINER
|
|
523
|
+
STABLE
|
|
524
|
+
SET search_path = public, pg_catalog
|
|
525
|
+
AS $$
|
|
526
|
+
SELECT p.id, p.name, p.description
|
|
527
|
+
FROM cerefox_projects p
|
|
528
|
+
ORDER BY p.name;
|
|
529
|
+
$$;
|
|
530
|
+
|
|
531
|
+
-- ── 5. New RPC: cerefox_metadata_search ──────────────────────────────────────
|
|
532
|
+
-- Query documents by metadata key-value criteria without a text search term.
|
|
533
|
+
-- Uses JSONB containment (@>) which leverages the existing GIN index on
|
|
534
|
+
-- cerefox_documents.metadata.
|
|
535
|
+
|
|
536
|
+
CREATE OR REPLACE FUNCTION cerefox_metadata_search(
|
|
537
|
+
p_metadata_filter JSONB,
|
|
538
|
+
p_project_id UUID DEFAULT NULL,
|
|
539
|
+
p_updated_since TIMESTAMPTZ DEFAULT NULL,
|
|
540
|
+
p_created_since TIMESTAMPTZ DEFAULT NULL,
|
|
541
|
+
p_limit INT DEFAULT 10,
|
|
542
|
+
p_include_content BOOLEAN DEFAULT FALSE,
|
|
543
|
+
p_max_bytes INT DEFAULT NULL
|
|
544
|
+
)
|
|
545
|
+
RETURNS TABLE (
|
|
546
|
+
document_id UUID,
|
|
547
|
+
title TEXT,
|
|
548
|
+
doc_metadata JSONB,
|
|
549
|
+
review_status TEXT,
|
|
550
|
+
source TEXT,
|
|
551
|
+
created_at TIMESTAMPTZ,
|
|
552
|
+
updated_at TIMESTAMPTZ,
|
|
553
|
+
total_chars INT,
|
|
554
|
+
chunk_count INT,
|
|
555
|
+
project_ids UUID[],
|
|
556
|
+
project_names TEXT[],
|
|
557
|
+
version_count INT,
|
|
558
|
+
content TEXT
|
|
559
|
+
)
|
|
560
|
+
LANGUAGE plpgsql
|
|
561
|
+
SECURITY DEFINER
|
|
562
|
+
SET search_path = public, pg_catalog
|
|
563
|
+
AS $$
|
|
564
|
+
DECLARE
|
|
565
|
+
v_bytes_used INT := 0;
|
|
566
|
+
v_row RECORD;
|
|
567
|
+
v_row_bytes INT;
|
|
568
|
+
BEGIN
|
|
569
|
+
FOR v_row IN
|
|
570
|
+
SELECT
|
|
571
|
+
d.id AS document_id,
|
|
572
|
+
d.title,
|
|
573
|
+
d.metadata AS doc_metadata,
|
|
574
|
+
d.review_status,
|
|
575
|
+
d.source,
|
|
576
|
+
d.created_at,
|
|
577
|
+
d.updated_at,
|
|
578
|
+
d.total_chars,
|
|
579
|
+
d.chunk_count,
|
|
580
|
+
ARRAY(SELECT dp.project_id FROM cerefox_document_projects dp
|
|
581
|
+
WHERE dp.document_id = d.id) AS project_ids,
|
|
582
|
+
ARRAY(SELECT p.name FROM cerefox_projects p
|
|
583
|
+
JOIN cerefox_document_projects dp ON p.id = dp.project_id
|
|
584
|
+
WHERE dp.document_id = d.id) AS project_names,
|
|
585
|
+
(SELECT COUNT(*)::INT FROM cerefox_document_versions dv
|
|
586
|
+
WHERE dv.document_id = d.id) AS version_count,
|
|
587
|
+
CASE WHEN p_include_content THEN
|
|
588
|
+
(SELECT STRING_AGG(c.content, E'\n\n' ORDER BY c.chunk_index)
|
|
589
|
+
FROM cerefox_chunks c
|
|
590
|
+
WHERE c.document_id = d.id AND c.version_id IS NULL)
|
|
591
|
+
ELSE NULL END AS content
|
|
592
|
+
FROM cerefox_documents d
|
|
593
|
+
WHERE d.metadata @> p_metadata_filter
|
|
594
|
+
AND (p_project_id IS NULL OR EXISTS (
|
|
595
|
+
SELECT 1 FROM cerefox_document_projects dp
|
|
596
|
+
WHERE dp.document_id = d.id AND dp.project_id = p_project_id
|
|
597
|
+
))
|
|
598
|
+
AND (p_updated_since IS NULL OR d.updated_at >= p_updated_since)
|
|
599
|
+
AND (p_created_since IS NULL OR d.created_at >= p_created_since)
|
|
600
|
+
ORDER BY d.updated_at DESC
|
|
601
|
+
LIMIT p_limit
|
|
602
|
+
LOOP
|
|
603
|
+
-- Byte budget enforcement (when p_max_bytes is set and content is included)
|
|
604
|
+
IF p_max_bytes IS NOT NULL AND p_include_content AND v_row.content IS NOT NULL THEN
|
|
605
|
+
v_row_bytes := octet_length(v_row.content);
|
|
606
|
+
IF v_bytes_used + v_row_bytes > p_max_bytes THEN
|
|
607
|
+
EXIT; -- stop emitting rows
|
|
608
|
+
END IF;
|
|
609
|
+
v_bytes_used := v_bytes_used + v_row_bytes;
|
|
610
|
+
END IF;
|
|
611
|
+
|
|
612
|
+
document_id := v_row.document_id;
|
|
613
|
+
title := v_row.title;
|
|
614
|
+
doc_metadata := v_row.doc_metadata;
|
|
615
|
+
review_status := v_row.review_status;
|
|
616
|
+
source := v_row.source;
|
|
617
|
+
created_at := v_row.created_at;
|
|
618
|
+
updated_at := v_row.updated_at;
|
|
619
|
+
total_chars := v_row.total_chars;
|
|
620
|
+
chunk_count := v_row.chunk_count;
|
|
621
|
+
project_ids := v_row.project_ids;
|
|
622
|
+
project_names := v_row.project_names;
|
|
623
|
+
version_count := v_row.version_count;
|
|
624
|
+
content := v_row.content;
|
|
625
|
+
RETURN NEXT;
|
|
626
|
+
END LOOP;
|
|
627
|
+
END;
|
|
628
|
+
$$;
|