remdb 0.3.171__py3-none-any.whl → 0.3.230__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rem/agentic/README.md +36 -2
- rem/agentic/context.py +173 -0
- rem/agentic/context_builder.py +12 -2
- rem/agentic/mcp/tool_wrapper.py +39 -16
- rem/agentic/providers/pydantic_ai.py +78 -45
- rem/agentic/schema.py +6 -5
- rem/agentic/tools/rem_tools.py +11 -0
- rem/api/main.py +1 -1
- rem/api/mcp_router/resources.py +75 -14
- rem/api/mcp_router/server.py +31 -24
- rem/api/mcp_router/tools.py +621 -166
- rem/api/routers/admin.py +30 -4
- rem/api/routers/auth.py +114 -15
- rem/api/routers/chat/child_streaming.py +379 -0
- rem/api/routers/chat/completions.py +74 -37
- rem/api/routers/chat/sse_events.py +7 -3
- rem/api/routers/chat/streaming.py +352 -257
- rem/api/routers/chat/streaming_utils.py +327 -0
- rem/api/routers/common.py +18 -0
- rem/api/routers/dev.py +7 -1
- rem/api/routers/feedback.py +9 -1
- rem/api/routers/messages.py +176 -38
- rem/api/routers/models.py +9 -1
- rem/api/routers/query.py +12 -1
- rem/api/routers/shared_sessions.py +16 -0
- rem/auth/jwt.py +19 -4
- rem/auth/middleware.py +42 -28
- rem/cli/README.md +62 -0
- rem/cli/commands/ask.py +61 -81
- rem/cli/commands/db.py +148 -70
- rem/cli/commands/process.py +171 -43
- rem/models/entities/ontology.py +91 -101
- rem/schemas/agents/rem.yaml +1 -1
- rem/services/content/service.py +18 -5
- rem/services/email/service.py +11 -2
- rem/services/embeddings/worker.py +26 -12
- rem/services/postgres/__init__.py +28 -3
- rem/services/postgres/diff_service.py +57 -5
- rem/services/postgres/programmable_diff_service.py +635 -0
- rem/services/postgres/pydantic_to_sqlalchemy.py +2 -2
- rem/services/postgres/register_type.py +12 -11
- rem/services/postgres/repository.py +39 -29
- rem/services/postgres/schema_generator.py +5 -5
- rem/services/postgres/sql_builder.py +6 -5
- rem/services/session/__init__.py +8 -1
- rem/services/session/compression.py +40 -2
- rem/services/session/pydantic_messages.py +292 -0
- rem/settings.py +34 -0
- rem/sql/background_indexes.sql +5 -0
- rem/sql/migrations/001_install.sql +157 -10
- rem/sql/migrations/002_install_models.sql +160 -132
- rem/sql/migrations/004_cache_system.sql +7 -275
- rem/sql/migrations/migrate_session_id_to_uuid.sql +45 -0
- rem/utils/model_helpers.py +101 -0
- rem/utils/schema_loader.py +79 -51
- {remdb-0.3.171.dist-info → remdb-0.3.230.dist-info}/METADATA +2 -2
- {remdb-0.3.171.dist-info → remdb-0.3.230.dist-info}/RECORD +59 -53
- {remdb-0.3.171.dist-info → remdb-0.3.230.dist-info}/WHEEL +0 -0
- {remdb-0.3.171.dist-info → remdb-0.3.230.dist-info}/entry_points.txt +0 -0
|
@@ -1,16 +1,15 @@
|
|
|
1
1
|
-- REM Cache System
|
|
2
|
-
-- Description:
|
|
2
|
+
-- Description: Cache management helpers for UNLOGGED tables (kv_store)
|
|
3
3
|
-- Version: 1.0.0
|
|
4
4
|
-- Date: 2025-11-29
|
|
5
5
|
--
|
|
6
6
|
-- This migration adds:
|
|
7
7
|
-- 1. cache_system_state table for debouncing and API secret storage
|
|
8
8
|
-- 2. maybe_trigger_kv_rebuild() function for async rebuild triggering
|
|
9
|
-
-- 3.
|
|
9
|
+
-- 3. Helper functions for cache management
|
|
10
10
|
--
|
|
11
|
-
--
|
|
12
|
-
--
|
|
13
|
-
-- Priority: pg_net (if available) → dblink (always available)
|
|
11
|
+
-- NOTE: Core functions (rem_lookup, rem_fuzzy, rem_traverse) are defined in 001_install.sql
|
|
12
|
+
-- This file only provides cache-specific infrastructure.
|
|
14
13
|
|
|
15
14
|
-- ============================================================================
|
|
16
15
|
-- REQUIRED EXTENSION
|
|
@@ -194,271 +193,6 @@ $$ LANGUAGE plpgsql;
|
|
|
194
193
|
COMMENT ON FUNCTION maybe_trigger_kv_rebuild IS
|
|
195
194
|
'Async trigger for kv_store rebuild. Uses pg_net (API) or dblink (SQL). Includes debouncing.';
|
|
196
195
|
|
|
197
|
-
-- ============================================================================
|
|
198
|
-
-- UPDATED: rem_lookup with self-healing
|
|
199
|
-
-- ============================================================================
|
|
200
|
-
|
|
201
|
-
CREATE OR REPLACE FUNCTION rem_lookup(
|
|
202
|
-
p_entity_key VARCHAR(255),
|
|
203
|
-
p_tenant_id VARCHAR(100),
|
|
204
|
-
p_user_id VARCHAR(100)
|
|
205
|
-
)
|
|
206
|
-
RETURNS TABLE(
|
|
207
|
-
entity_type VARCHAR(100),
|
|
208
|
-
data JSONB
|
|
209
|
-
) AS $$
|
|
210
|
-
DECLARE
|
|
211
|
-
entity_table VARCHAR(100);
|
|
212
|
-
query_sql TEXT;
|
|
213
|
-
effective_user_id VARCHAR(100);
|
|
214
|
-
v_result_count INTEGER := 0;
|
|
215
|
-
BEGIN
|
|
216
|
-
effective_user_id := COALESCE(p_user_id, p_tenant_id);
|
|
217
|
-
|
|
218
|
-
-- First lookup in KV store to get entity_type (table name)
|
|
219
|
-
SELECT kv.entity_type INTO entity_table
|
|
220
|
-
FROM kv_store kv
|
|
221
|
-
WHERE (kv.user_id = effective_user_id OR kv.user_id IS NULL)
|
|
222
|
-
AND kv.entity_key = p_entity_key
|
|
223
|
-
LIMIT 1;
|
|
224
|
-
|
|
225
|
-
-- If not found, check if cache is empty and maybe trigger rebuild
|
|
226
|
-
IF entity_table IS NULL THEN
|
|
227
|
-
-- SELF-HEALING: Check if this is because cache is empty
|
|
228
|
-
IF rem_kv_store_empty(effective_user_id) THEN
|
|
229
|
-
PERFORM maybe_trigger_kv_rebuild(effective_user_id, 'rem_lookup');
|
|
230
|
-
END IF;
|
|
231
|
-
RETURN;
|
|
232
|
-
END IF;
|
|
233
|
-
|
|
234
|
-
-- Fetch raw record from underlying table as JSONB
|
|
235
|
-
query_sql := format('
|
|
236
|
-
SELECT
|
|
237
|
-
%L::VARCHAR(100) AS entity_type,
|
|
238
|
-
row_to_json(t)::jsonb AS data
|
|
239
|
-
FROM %I t
|
|
240
|
-
WHERE (t.user_id = $1 OR t.user_id IS NULL)
|
|
241
|
-
AND t.name = $2
|
|
242
|
-
AND t.deleted_at IS NULL
|
|
243
|
-
', entity_table, entity_table);
|
|
244
|
-
|
|
245
|
-
RETURN QUERY EXECUTE query_sql USING effective_user_id, p_entity_key;
|
|
246
|
-
END;
|
|
247
|
-
$$ LANGUAGE plpgsql STABLE;
|
|
248
|
-
|
|
249
|
-
-- ============================================================================
|
|
250
|
-
-- UPDATED: rem_fuzzy with self-healing
|
|
251
|
-
-- ============================================================================
|
|
252
|
-
|
|
253
|
-
CREATE OR REPLACE FUNCTION rem_fuzzy(
|
|
254
|
-
p_query TEXT,
|
|
255
|
-
p_tenant_id VARCHAR(100),
|
|
256
|
-
p_threshold REAL DEFAULT 0.3,
|
|
257
|
-
p_limit INTEGER DEFAULT 10,
|
|
258
|
-
p_user_id VARCHAR(100) DEFAULT NULL
|
|
259
|
-
)
|
|
260
|
-
RETURNS TABLE(
|
|
261
|
-
entity_type VARCHAR(100),
|
|
262
|
-
similarity_score REAL,
|
|
263
|
-
data JSONB
|
|
264
|
-
) AS $$
|
|
265
|
-
DECLARE
|
|
266
|
-
kv_matches RECORD;
|
|
267
|
-
entities_by_table JSONB := '{}'::jsonb;
|
|
268
|
-
table_keys JSONB;
|
|
269
|
-
effective_user_id VARCHAR(100);
|
|
270
|
-
v_found_any BOOLEAN := FALSE;
|
|
271
|
-
BEGIN
|
|
272
|
-
effective_user_id := COALESCE(p_user_id, p_tenant_id);
|
|
273
|
-
|
|
274
|
-
-- Find matching keys in KV store
|
|
275
|
-
FOR kv_matches IN
|
|
276
|
-
SELECT
|
|
277
|
-
kv.entity_key,
|
|
278
|
-
kv.entity_type,
|
|
279
|
-
similarity(kv.entity_key, p_query) AS sim_score
|
|
280
|
-
FROM kv_store kv
|
|
281
|
-
WHERE (kv.user_id = effective_user_id OR kv.user_id IS NULL)
|
|
282
|
-
AND kv.entity_key % p_query
|
|
283
|
-
AND similarity(kv.entity_key, p_query) >= p_threshold
|
|
284
|
-
ORDER BY sim_score DESC
|
|
285
|
-
LIMIT p_limit
|
|
286
|
-
LOOP
|
|
287
|
-
v_found_any := TRUE;
|
|
288
|
-
-- Build JSONB mapping {table: [keys]}
|
|
289
|
-
IF entities_by_table ? kv_matches.entity_type THEN
|
|
290
|
-
table_keys := entities_by_table->kv_matches.entity_type;
|
|
291
|
-
entities_by_table := jsonb_set(
|
|
292
|
-
entities_by_table,
|
|
293
|
-
ARRAY[kv_matches.entity_type],
|
|
294
|
-
table_keys || jsonb_build_array(kv_matches.entity_key)
|
|
295
|
-
);
|
|
296
|
-
ELSE
|
|
297
|
-
entities_by_table := jsonb_set(
|
|
298
|
-
entities_by_table,
|
|
299
|
-
ARRAY[kv_matches.entity_type],
|
|
300
|
-
jsonb_build_array(kv_matches.entity_key)
|
|
301
|
-
);
|
|
302
|
-
END IF;
|
|
303
|
-
END LOOP;
|
|
304
|
-
|
|
305
|
-
-- SELF-HEALING: If no matches and cache is empty, trigger rebuild
|
|
306
|
-
IF NOT v_found_any AND rem_kv_store_empty(effective_user_id) THEN
|
|
307
|
-
PERFORM maybe_trigger_kv_rebuild(effective_user_id, 'rem_fuzzy');
|
|
308
|
-
END IF;
|
|
309
|
-
|
|
310
|
-
-- Fetch full records
|
|
311
|
-
RETURN QUERY
|
|
312
|
-
SELECT
|
|
313
|
-
f.entity_type::VARCHAR(100),
|
|
314
|
-
similarity(f.entity_key, p_query) AS similarity_score,
|
|
315
|
-
f.entity_record AS data
|
|
316
|
-
FROM rem_fetch(entities_by_table, effective_user_id) f
|
|
317
|
-
ORDER BY similarity_score DESC;
|
|
318
|
-
END;
|
|
319
|
-
$$ LANGUAGE plpgsql STABLE;
|
|
320
|
-
|
|
321
|
-
-- ============================================================================
|
|
322
|
-
-- UPDATED: rem_traverse with self-healing
|
|
323
|
-
-- ============================================================================
|
|
324
|
-
|
|
325
|
-
CREATE OR REPLACE FUNCTION rem_traverse(
|
|
326
|
-
p_entity_key VARCHAR(255),
|
|
327
|
-
p_tenant_id VARCHAR(100),
|
|
328
|
-
p_user_id VARCHAR(100),
|
|
329
|
-
p_max_depth INTEGER DEFAULT 1,
|
|
330
|
-
p_rel_type VARCHAR(100) DEFAULT NULL,
|
|
331
|
-
p_keys_only BOOLEAN DEFAULT FALSE
|
|
332
|
-
)
|
|
333
|
-
RETURNS TABLE(
|
|
334
|
-
depth INTEGER,
|
|
335
|
-
entity_key VARCHAR(255),
|
|
336
|
-
entity_type VARCHAR(100),
|
|
337
|
-
entity_id UUID,
|
|
338
|
-
rel_type VARCHAR(100),
|
|
339
|
-
rel_weight REAL,
|
|
340
|
-
path TEXT[],
|
|
341
|
-
entity_record JSONB
|
|
342
|
-
) AS $$
|
|
343
|
-
DECLARE
|
|
344
|
-
graph_keys RECORD;
|
|
345
|
-
entities_by_table JSONB := '{}'::jsonb;
|
|
346
|
-
table_keys JSONB;
|
|
347
|
-
effective_user_id VARCHAR(100);
|
|
348
|
-
v_found_start BOOLEAN := FALSE;
|
|
349
|
-
BEGIN
|
|
350
|
-
effective_user_id := COALESCE(p_user_id, p_tenant_id);
|
|
351
|
-
|
|
352
|
-
-- Check if start entity exists in kv_store
|
|
353
|
-
SELECT TRUE INTO v_found_start
|
|
354
|
-
FROM kv_store kv
|
|
355
|
-
WHERE (kv.user_id = effective_user_id OR kv.user_id IS NULL)
|
|
356
|
-
AND kv.entity_key = p_entity_key
|
|
357
|
-
LIMIT 1;
|
|
358
|
-
|
|
359
|
-
-- SELF-HEALING: If start not found and cache is empty, trigger rebuild
|
|
360
|
-
IF NOT COALESCE(v_found_start, FALSE) THEN
|
|
361
|
-
IF rem_kv_store_empty(effective_user_id) THEN
|
|
362
|
-
PERFORM maybe_trigger_kv_rebuild(effective_user_id, 'rem_traverse');
|
|
363
|
-
END IF;
|
|
364
|
-
RETURN;
|
|
365
|
-
END IF;
|
|
366
|
-
|
|
367
|
-
-- Original traverse logic
|
|
368
|
-
FOR graph_keys IN
|
|
369
|
-
WITH RECURSIVE graph_traversal AS (
|
|
370
|
-
SELECT
|
|
371
|
-
0 AS depth,
|
|
372
|
-
kv.entity_key,
|
|
373
|
-
kv.entity_type,
|
|
374
|
-
kv.entity_id,
|
|
375
|
-
NULL::VARCHAR(100) AS rel_type,
|
|
376
|
-
NULL::REAL AS rel_weight,
|
|
377
|
-
ARRAY[kv.entity_key]::TEXT[] AS path
|
|
378
|
-
FROM kv_store kv
|
|
379
|
-
WHERE (kv.user_id = effective_user_id OR kv.user_id IS NULL)
|
|
380
|
-
AND kv.entity_key = p_entity_key
|
|
381
|
-
|
|
382
|
-
UNION ALL
|
|
383
|
-
|
|
384
|
-
SELECT
|
|
385
|
-
gt.depth + 1,
|
|
386
|
-
target_kv.entity_key,
|
|
387
|
-
target_kv.entity_type,
|
|
388
|
-
target_kv.entity_id,
|
|
389
|
-
(edge->>'rel_type')::VARCHAR(100) AS rel_type,
|
|
390
|
-
COALESCE((edge->>'weight')::REAL, 1.0) AS rel_weight,
|
|
391
|
-
gt.path || target_kv.entity_key AS path
|
|
392
|
-
FROM graph_traversal gt
|
|
393
|
-
JOIN kv_store source_kv ON source_kv.entity_key = gt.entity_key
|
|
394
|
-
AND (source_kv.user_id = effective_user_id OR source_kv.user_id IS NULL)
|
|
395
|
-
CROSS JOIN LATERAL jsonb_array_elements(COALESCE(source_kv.graph_edges, '[]'::jsonb)) AS edge
|
|
396
|
-
JOIN kv_store target_kv ON target_kv.entity_key = (edge->>'dst')::VARCHAR(255)
|
|
397
|
-
AND (target_kv.user_id = effective_user_id OR target_kv.user_id IS NULL)
|
|
398
|
-
WHERE gt.depth < p_max_depth
|
|
399
|
-
AND (p_rel_type IS NULL OR (edge->>'rel_type')::VARCHAR(100) = p_rel_type)
|
|
400
|
-
AND NOT (target_kv.entity_key = ANY(gt.path))
|
|
401
|
-
)
|
|
402
|
-
SELECT DISTINCT ON (gt.entity_key)
|
|
403
|
-
gt.depth,
|
|
404
|
-
gt.entity_key,
|
|
405
|
-
gt.entity_type,
|
|
406
|
-
gt.entity_id,
|
|
407
|
-
gt.rel_type,
|
|
408
|
-
gt.rel_weight,
|
|
409
|
-
gt.path
|
|
410
|
-
FROM graph_traversal gt
|
|
411
|
-
WHERE gt.depth > 0
|
|
412
|
-
ORDER BY gt.entity_key, gt.depth
|
|
413
|
-
LOOP
|
|
414
|
-
IF p_keys_only THEN
|
|
415
|
-
depth := graph_keys.depth;
|
|
416
|
-
entity_key := graph_keys.entity_key;
|
|
417
|
-
entity_type := graph_keys.entity_type;
|
|
418
|
-
entity_id := graph_keys.entity_id;
|
|
419
|
-
rel_type := graph_keys.rel_type;
|
|
420
|
-
rel_weight := graph_keys.rel_weight;
|
|
421
|
-
path := graph_keys.path;
|
|
422
|
-
entity_record := NULL;
|
|
423
|
-
RETURN NEXT;
|
|
424
|
-
ELSE
|
|
425
|
-
IF entities_by_table ? graph_keys.entity_type THEN
|
|
426
|
-
table_keys := entities_by_table->graph_keys.entity_type;
|
|
427
|
-
entities_by_table := jsonb_set(
|
|
428
|
-
entities_by_table,
|
|
429
|
-
ARRAY[graph_keys.entity_type],
|
|
430
|
-
table_keys || jsonb_build_array(graph_keys.entity_key)
|
|
431
|
-
);
|
|
432
|
-
ELSE
|
|
433
|
-
entities_by_table := jsonb_set(
|
|
434
|
-
entities_by_table,
|
|
435
|
-
ARRAY[graph_keys.entity_type],
|
|
436
|
-
jsonb_build_array(graph_keys.entity_key)
|
|
437
|
-
);
|
|
438
|
-
END IF;
|
|
439
|
-
END IF;
|
|
440
|
-
END LOOP;
|
|
441
|
-
|
|
442
|
-
IF NOT p_keys_only THEN
|
|
443
|
-
RETURN QUERY
|
|
444
|
-
SELECT
|
|
445
|
-
g.depth,
|
|
446
|
-
g.entity_key,
|
|
447
|
-
g.entity_type,
|
|
448
|
-
g.entity_id,
|
|
449
|
-
g.rel_type,
|
|
450
|
-
g.rel_weight,
|
|
451
|
-
g.path,
|
|
452
|
-
f.entity_record
|
|
453
|
-
FROM (
|
|
454
|
-
SELECT * FROM rem_traverse(p_entity_key, p_tenant_id, effective_user_id, p_max_depth, p_rel_type, TRUE)
|
|
455
|
-
) g
|
|
456
|
-
LEFT JOIN rem_fetch(entities_by_table, effective_user_id) f
|
|
457
|
-
ON g.entity_key = f.entity_key;
|
|
458
|
-
END IF;
|
|
459
|
-
END;
|
|
460
|
-
$$ LANGUAGE plpgsql STABLE;
|
|
461
|
-
|
|
462
196
|
-- ============================================================================
|
|
463
197
|
-- HELPER: Get API secret for validation
|
|
464
198
|
-- ============================================================================
|
|
@@ -527,9 +261,9 @@ BEGIN
|
|
|
527
261
|
RAISE NOTICE '';
|
|
528
262
|
RAISE NOTICE 'Functions:';
|
|
529
263
|
RAISE NOTICE ' maybe_trigger_kv_rebuild() - Async rebuild trigger';
|
|
530
|
-
RAISE NOTICE '
|
|
531
|
-
RAISE NOTICE '
|
|
532
|
-
RAISE NOTICE '
|
|
264
|
+
RAISE NOTICE ' rem_kv_store_empty() - Check if cache is empty';
|
|
265
|
+
RAISE NOTICE ' rem_get_cache_api_secret() - Get API secret';
|
|
266
|
+
RAISE NOTICE ' rem_record_cache_rebuild() - Record rebuild completion';
|
|
533
267
|
RAISE NOTICE '';
|
|
534
268
|
RAISE NOTICE 'Async Methods Available:';
|
|
535
269
|
IF v_has_pgnet THEN
|
|
@@ -542,7 +276,5 @@ BEGIN
|
|
|
542
276
|
ELSE
|
|
543
277
|
RAISE NOTICE ' [ ] dblink - Not installed';
|
|
544
278
|
END IF;
|
|
545
|
-
RAISE NOTICE '';
|
|
546
|
-
RAISE NOTICE 'Self-Healing: Queries will auto-trigger rebuild on empty cache';
|
|
547
279
|
RAISE NOTICE '============================================================';
|
|
548
280
|
END $$;
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
-- Migration: Update messages.session_id from session name to session UUID
|
|
2
|
+
-- This fixes the bug where messages were stored with session.name instead of session.id
|
|
3
|
+
--
|
|
4
|
+
-- Run this migration AFTER deploying the code fixes in remdb 0.3.204+
|
|
5
|
+
-- The code now correctly stores session.id (UUID), but existing data needs migration.
|
|
6
|
+
|
|
7
|
+
BEGIN;
|
|
8
|
+
|
|
9
|
+
-- First, count how many messages need to be updated
|
|
10
|
+
DO $$
|
|
11
|
+
DECLARE
|
|
12
|
+
count_to_migrate INTEGER;
|
|
13
|
+
BEGIN
|
|
14
|
+
SELECT COUNT(*) INTO count_to_migrate
|
|
15
|
+
FROM messages m
|
|
16
|
+
JOIN sessions s ON m.session_id = s.name
|
|
17
|
+
WHERE m.session_id != s.id::text;
|
|
18
|
+
|
|
19
|
+
RAISE NOTICE 'Messages needing migration: %', count_to_migrate;
|
|
20
|
+
END $$;
|
|
21
|
+
|
|
22
|
+
-- Update messages.session_id from session name to session UUID
|
|
23
|
+
UPDATE messages m
|
|
24
|
+
SET session_id = s.id::text
|
|
25
|
+
FROM sessions s
|
|
26
|
+
WHERE m.session_id = s.name
|
|
27
|
+
AND m.session_id != s.id::text;
|
|
28
|
+
|
|
29
|
+
-- Report how many were updated
|
|
30
|
+
DO $$
|
|
31
|
+
DECLARE
|
|
32
|
+
updated_count INTEGER;
|
|
33
|
+
BEGIN
|
|
34
|
+
GET DIAGNOSTICS updated_count = ROW_COUNT;
|
|
35
|
+
RAISE NOTICE 'Messages updated: %', updated_count;
|
|
36
|
+
END $$;
|
|
37
|
+
|
|
38
|
+
COMMIT;
|
|
39
|
+
|
|
40
|
+
-- Verify the fix - all messages should now join by UUID
|
|
41
|
+
SELECT
|
|
42
|
+
'Messages matching sessions by UUID' as status,
|
|
43
|
+
COUNT(*) as count
|
|
44
|
+
FROM messages m
|
|
45
|
+
JOIN sessions s ON m.session_id = s.id::text;
|
rem/utils/model_helpers.py
CHANGED
|
@@ -19,6 +19,9 @@ Table Name Inference:
|
|
|
19
19
|
|
|
20
20
|
Model Resolution:
|
|
21
21
|
- model_from_arbitrary_casing: Resolve model class from flexible input casing
|
|
22
|
+
|
|
23
|
+
Data Validation:
|
|
24
|
+
- validate_data_for_model: Validate row data against a Pydantic model with clear error reporting
|
|
22
25
|
"""
|
|
23
26
|
|
|
24
27
|
import re
|
|
@@ -389,3 +392,101 @@ def model_from_arbitrary_casing(
|
|
|
389
392
|
f"Unknown model: '{name}' (normalized: '{normalized}'). "
|
|
390
393
|
f"Available models: {', '.join(available)}"
|
|
391
394
|
)
|
|
395
|
+
|
|
396
|
+
|
|
397
|
+
class ValidationResult:
|
|
398
|
+
"""Result of validating data against a Pydantic model."""
|
|
399
|
+
|
|
400
|
+
def __init__(
|
|
401
|
+
self,
|
|
402
|
+
valid: bool,
|
|
403
|
+
instance: BaseModel | None = None,
|
|
404
|
+
errors: list[str] | None = None,
|
|
405
|
+
missing_required: set[str] | None = None,
|
|
406
|
+
extra_fields: set[str] | None = None,
|
|
407
|
+
required_fields: set[str] | None = None,
|
|
408
|
+
optional_fields: set[str] | None = None,
|
|
409
|
+
):
|
|
410
|
+
self.valid = valid
|
|
411
|
+
self.instance = instance
|
|
412
|
+
self.errors = errors or []
|
|
413
|
+
self.missing_required = missing_required or set()
|
|
414
|
+
self.extra_fields = extra_fields or set()
|
|
415
|
+
self.required_fields = required_fields or set()
|
|
416
|
+
self.optional_fields = optional_fields or set()
|
|
417
|
+
|
|
418
|
+
def log_errors(self, row_label: str = "Row") -> None:
|
|
419
|
+
"""Log validation errors using loguru."""
|
|
420
|
+
if self.valid:
|
|
421
|
+
return
|
|
422
|
+
|
|
423
|
+
logger.error(f"{row_label}: Validation failed")
|
|
424
|
+
if self.missing_required:
|
|
425
|
+
logger.error(f" Missing required: {self.missing_required}")
|
|
426
|
+
if self.extra_fields:
|
|
427
|
+
logger.warning(f" Unknown fields (ignored): {self.extra_fields}")
|
|
428
|
+
for err in self.errors:
|
|
429
|
+
logger.error(f" - {err}")
|
|
430
|
+
logger.info(f" Required: {self.required_fields or '(none)'}")
|
|
431
|
+
logger.info(f" Optional: {self.optional_fields}")
|
|
432
|
+
|
|
433
|
+
|
|
434
|
+
def validate_data_for_model(
|
|
435
|
+
model: Type[BaseModel],
|
|
436
|
+
data: dict[str, Any],
|
|
437
|
+
) -> ValidationResult:
|
|
438
|
+
"""
|
|
439
|
+
Validate a data dict against a Pydantic model with detailed error reporting.
|
|
440
|
+
|
|
441
|
+
Args:
|
|
442
|
+
model: Pydantic model class to validate against
|
|
443
|
+
data: Dictionary of field values
|
|
444
|
+
|
|
445
|
+
Returns:
|
|
446
|
+
ValidationResult with validation status and detailed field info
|
|
447
|
+
|
|
448
|
+
Example:
|
|
449
|
+
>>> from rem.models.entities import Resource
|
|
450
|
+
>>> result = validate_data_for_model(Resource, {"name": "test", "content": "hello"})
|
|
451
|
+
>>> result.valid
|
|
452
|
+
True
|
|
453
|
+
>>> result = validate_data_for_model(Resource, {"unknown_field": "value"})
|
|
454
|
+
>>> result.valid
|
|
455
|
+
True # Resource has no required fields
|
|
456
|
+
>>> result.extra_fields
|
|
457
|
+
{'unknown_field'}
|
|
458
|
+
"""
|
|
459
|
+
from pydantic import ValidationError
|
|
460
|
+
|
|
461
|
+
model_fields = set(model.model_fields.keys())
|
|
462
|
+
required = {k for k, v in model.model_fields.items() if v.is_required()}
|
|
463
|
+
optional = model_fields - required
|
|
464
|
+
data_fields = set(data.keys())
|
|
465
|
+
|
|
466
|
+
missing_required = required - data_fields
|
|
467
|
+
extra_fields = data_fields - model_fields
|
|
468
|
+
|
|
469
|
+
try:
|
|
470
|
+
instance = model(**data)
|
|
471
|
+
return ValidationResult(
|
|
472
|
+
valid=True,
|
|
473
|
+
instance=instance,
|
|
474
|
+
required_fields=required,
|
|
475
|
+
optional_fields=optional,
|
|
476
|
+
extra_fields=extra_fields,
|
|
477
|
+
)
|
|
478
|
+
except ValidationError as e:
|
|
479
|
+
errors = []
|
|
480
|
+
for err in e.errors():
|
|
481
|
+
field = ".".join(str(p) for p in err["loc"])
|
|
482
|
+
if field not in missing_required: # Don't double-report missing
|
|
483
|
+
errors.append(f"{field}: {err['msg']}")
|
|
484
|
+
|
|
485
|
+
return ValidationResult(
|
|
486
|
+
valid=False,
|
|
487
|
+
errors=errors,
|
|
488
|
+
missing_required=missing_required,
|
|
489
|
+
extra_fields=extra_fields,
|
|
490
|
+
required_fields=required,
|
|
491
|
+
optional_fields=optional,
|
|
492
|
+
)
|
rem/utils/schema_loader.py
CHANGED
|
@@ -147,15 +147,25 @@ def _load_schema_from_database(schema_name: str, user_id: str) -> dict[str, Any]
|
|
|
147
147
|
try:
|
|
148
148
|
await db.connect()
|
|
149
149
|
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
150
|
+
# Query for public schemas (user_id IS NULL) and optionally user-specific
|
|
151
|
+
if user_id:
|
|
152
|
+
query = """
|
|
153
|
+
SELECT spec FROM schemas
|
|
154
|
+
WHERE LOWER(name) = LOWER($1)
|
|
155
|
+
AND (user_id = $2 OR user_id = 'system' OR user_id IS NULL)
|
|
156
|
+
LIMIT 1
|
|
157
|
+
"""
|
|
158
|
+
row = await db.fetchrow(query, schema_name, user_id)
|
|
159
|
+
else:
|
|
160
|
+
# No user_id - only search public schemas
|
|
161
|
+
query = """
|
|
162
|
+
SELECT spec FROM schemas
|
|
163
|
+
WHERE LOWER(name) = LOWER($1)
|
|
164
|
+
AND (user_id = 'system' OR user_id IS NULL)
|
|
165
|
+
LIMIT 1
|
|
166
|
+
"""
|
|
167
|
+
row = await db.fetchrow(query, schema_name)
|
|
168
|
+
logger.debug(f"Executing schema lookup: name={schema_name}, user_id={user_id or 'public'}")
|
|
159
169
|
|
|
160
170
|
if row:
|
|
161
171
|
spec = row.get("spec")
|
|
@@ -193,17 +203,25 @@ def _load_schema_from_database(schema_name: str, user_id: str) -> dict[str, Any]
|
|
|
193
203
|
try:
|
|
194
204
|
await db.connect()
|
|
195
205
|
|
|
196
|
-
# Query schemas
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
206
|
+
# Query for public schemas (user_id IS NULL) and optionally user-specific
|
|
207
|
+
if user_id:
|
|
208
|
+
query = """
|
|
209
|
+
SELECT spec FROM schemas
|
|
210
|
+
WHERE LOWER(name) = LOWER($1)
|
|
211
|
+
AND (user_id = $2 OR user_id = 'system' OR user_id IS NULL)
|
|
212
|
+
LIMIT 1
|
|
213
|
+
"""
|
|
214
|
+
row = await db.fetchrow(query, schema_name, user_id)
|
|
215
|
+
else:
|
|
216
|
+
# No user_id - only search public schemas
|
|
217
|
+
query = """
|
|
218
|
+
SELECT spec FROM schemas
|
|
219
|
+
WHERE LOWER(name) = LOWER($1)
|
|
220
|
+
AND (user_id = 'system' OR user_id IS NULL)
|
|
221
|
+
LIMIT 1
|
|
222
|
+
"""
|
|
223
|
+
row = await db.fetchrow(query, schema_name)
|
|
224
|
+
logger.debug(f"Executing schema lookup: name={schema_name}, user_id={user_id or 'public'}")
|
|
207
225
|
|
|
208
226
|
if row:
|
|
209
227
|
spec = row.get("spec")
|
|
@@ -291,14 +309,14 @@ def load_agent_schema(
|
|
|
291
309
|
|
|
292
310
|
# Check cache first (only for package resources, not custom paths)
|
|
293
311
|
path = Path(schema_name_or_path)
|
|
294
|
-
is_custom_path = path.exists() or '/' in str(schema_name_or_path) or '\\' in str(schema_name_or_path)
|
|
312
|
+
is_custom_path = (path.exists() and path.is_file()) or '/' in str(schema_name_or_path) or '\\' in str(schema_name_or_path)
|
|
295
313
|
|
|
296
314
|
if use_cache and not is_custom_path and cache_key in _fs_schema_cache:
|
|
297
315
|
logger.debug(f"Loading schema from cache: {cache_key}")
|
|
298
316
|
return _fs_schema_cache[cache_key]
|
|
299
317
|
|
|
300
|
-
# 1. Try exact path first (absolute or relative to cwd)
|
|
301
|
-
if path.exists():
|
|
318
|
+
# 1. Try exact path first (absolute or relative to cwd) - must be a file, not directory
|
|
319
|
+
if path.exists() and path.is_file():
|
|
302
320
|
logger.debug(f"Loading schema from exact path: {path}")
|
|
303
321
|
with open(path, "r") as f:
|
|
304
322
|
schema = yaml.safe_load(f)
|
|
@@ -365,13 +383,14 @@ def load_agent_schema(
|
|
|
365
383
|
logger.debug(f"Could not load from {search_path}: {e}")
|
|
366
384
|
continue
|
|
367
385
|
|
|
368
|
-
# 5. Try database LOOKUP fallback (if enabled
|
|
369
|
-
|
|
386
|
+
# 5. Try database LOOKUP fallback (if enabled)
|
|
387
|
+
# Always search for public schemas (user_id IS NULL), plus user-specific if user_id provided
|
|
388
|
+
if enable_db_fallback:
|
|
370
389
|
try:
|
|
371
|
-
logger.debug(f"Attempting database LOOKUP for schema: {base_name} (user_id={user_id})")
|
|
390
|
+
logger.debug(f"Attempting database LOOKUP for schema: {base_name} (user_id={user_id or 'public'})")
|
|
372
391
|
db_schema = _load_schema_from_database(base_name, user_id)
|
|
373
392
|
if db_schema:
|
|
374
|
-
logger.info(f"✅ Loaded schema from database: {base_name}
|
|
393
|
+
logger.info(f"✅ Loaded schema from database: {base_name}")
|
|
375
394
|
return db_schema
|
|
376
395
|
except Exception as e:
|
|
377
396
|
logger.debug(f"Database schema lookup failed: {e}")
|
|
@@ -387,9 +406,9 @@ def load_agent_schema(
|
|
|
387
406
|
db_search_note = ""
|
|
388
407
|
if enable_db_fallback:
|
|
389
408
|
if user_id:
|
|
390
|
-
db_search_note = f"\n - Database: LOOKUP '{base_name}' FROM schemas WHERE user_id
|
|
409
|
+
db_search_note = f"\n - Database: LOOKUP '{base_name}' FROM schemas WHERE user_id IN ('{user_id}', 'system', NULL) (no match)"
|
|
391
410
|
else:
|
|
392
|
-
db_search_note = "\n - Database:
|
|
411
|
+
db_search_note = f"\n - Database: LOOKUP '{base_name}' FROM schemas WHERE user_id IN ('system', NULL) (no match)"
|
|
393
412
|
|
|
394
413
|
raise FileNotFoundError(
|
|
395
414
|
f"Schema not found: {schema_name_or_path}\n"
|
|
@@ -432,15 +451,15 @@ async def load_agent_schema_async(
|
|
|
432
451
|
if cache_key.endswith('.yaml') or cache_key.endswith('.yml'):
|
|
433
452
|
cache_key = cache_key.rsplit('.', 1)[0]
|
|
434
453
|
|
|
435
|
-
is_custom_path = path.exists() or '/' in str(schema_name_or_path) or '\\' in str(schema_name_or_path)
|
|
454
|
+
is_custom_path = (path.exists() and path.is_file()) or '/' in str(schema_name_or_path) or '\\' in str(schema_name_or_path)
|
|
436
455
|
|
|
437
456
|
# Check cache
|
|
438
457
|
if not is_custom_path and cache_key in _fs_schema_cache:
|
|
439
458
|
logger.debug(f"Loading schema from cache: {cache_key}")
|
|
440
459
|
return _fs_schema_cache[cache_key]
|
|
441
460
|
|
|
442
|
-
# Try exact path
|
|
443
|
-
if path.exists():
|
|
461
|
+
# Try exact path (must be a file, not directory)
|
|
462
|
+
if path.exists() and path.is_file():
|
|
444
463
|
logger.debug(f"Loading schema from exact path: {path}")
|
|
445
464
|
with open(path, "r") as f:
|
|
446
465
|
schema = yaml.safe_load(f)
|
|
@@ -484,19 +503,19 @@ async def load_agent_schema_async(
|
|
|
484
503
|
except Exception:
|
|
485
504
|
continue
|
|
486
505
|
|
|
487
|
-
# Try database lookup
|
|
488
|
-
|
|
489
|
-
from rem.services.postgres import get_postgres_service
|
|
490
|
-
|
|
491
|
-
should_disconnect = False
|
|
492
|
-
if db is None:
|
|
493
|
-
db = get_postgres_service()
|
|
494
|
-
if db:
|
|
495
|
-
await db.connect()
|
|
496
|
-
should_disconnect = True
|
|
506
|
+
# Try database lookup - always search public schemas, plus user-specific if user_id provided
|
|
507
|
+
from rem.services.postgres import get_postgres_service
|
|
497
508
|
|
|
509
|
+
should_disconnect = False
|
|
510
|
+
if db is None:
|
|
511
|
+
db = get_postgres_service()
|
|
498
512
|
if db:
|
|
499
|
-
|
|
513
|
+
await db.connect()
|
|
514
|
+
should_disconnect = True
|
|
515
|
+
|
|
516
|
+
if db:
|
|
517
|
+
try:
|
|
518
|
+
if user_id:
|
|
500
519
|
query = """
|
|
501
520
|
SELECT spec FROM schemas
|
|
502
521
|
WHERE LOWER(name) = LOWER($1)
|
|
@@ -504,14 +523,23 @@ async def load_agent_schema_async(
|
|
|
504
523
|
LIMIT 1
|
|
505
524
|
"""
|
|
506
525
|
row = await db.fetchrow(query, base_name, user_id)
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
526
|
+
else:
|
|
527
|
+
# No user_id - only search public schemas
|
|
528
|
+
query = """
|
|
529
|
+
SELECT spec FROM schemas
|
|
530
|
+
WHERE LOWER(name) = LOWER($1)
|
|
531
|
+
AND (user_id = 'system' OR user_id IS NULL)
|
|
532
|
+
LIMIT 1
|
|
533
|
+
"""
|
|
534
|
+
row = await db.fetchrow(query, base_name)
|
|
535
|
+
if row:
|
|
536
|
+
spec = row.get("spec")
|
|
537
|
+
if spec and isinstance(spec, dict):
|
|
538
|
+
logger.info(f"✅ Loaded schema from database: {base_name}")
|
|
539
|
+
return spec
|
|
540
|
+
finally:
|
|
541
|
+
if should_disconnect:
|
|
542
|
+
await db.disconnect()
|
|
515
543
|
|
|
516
544
|
# Not found
|
|
517
545
|
raise FileNotFoundError(f"Schema not found: {schema_name_or_path}")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: remdb
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.230
|
|
4
4
|
Summary: Resources Entities Moments - Bio-inspired memory system for agentic AI workloads
|
|
5
5
|
Project-URL: Homepage, https://github.com/Percolation-Labs/reminiscent
|
|
6
6
|
Project-URL: Documentation, https://github.com/Percolation-Labs/reminiscent/blob/main/README.md
|
|
@@ -39,7 +39,7 @@ Requires-Dist: opentelemetry-instrumentation-fastapi>=0.49b0
|
|
|
39
39
|
Requires-Dist: opentelemetry-instrumentation>=0.49b0
|
|
40
40
|
Requires-Dist: opentelemetry-sdk>=1.28.0
|
|
41
41
|
Requires-Dist: psycopg[binary]>=3.2.0
|
|
42
|
-
Requires-Dist: pydantic-ai>=0.0
|
|
42
|
+
Requires-Dist: pydantic-ai>=1.0.0
|
|
43
43
|
Requires-Dist: pydantic-settings>=2.6.0
|
|
44
44
|
Requires-Dist: pydantic>=2.10.0
|
|
45
45
|
Requires-Dist: pydub>=0.25.0
|