remdb 0.3.172__py3-none-any.whl → 0.3.223__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of remdb might be problematic. Click here for more details.
- rem/agentic/README.md +262 -2
- rem/agentic/context.py +173 -0
- rem/agentic/context_builder.py +12 -2
- rem/agentic/mcp/tool_wrapper.py +39 -16
- rem/agentic/providers/pydantic_ai.py +46 -43
- rem/agentic/schema.py +2 -2
- rem/agentic/tools/rem_tools.py +11 -0
- rem/api/main.py +1 -1
- rem/api/mcp_router/resources.py +64 -8
- rem/api/mcp_router/server.py +31 -24
- rem/api/mcp_router/tools.py +621 -166
- rem/api/routers/admin.py +30 -4
- rem/api/routers/auth.py +114 -15
- rem/api/routers/chat/completions.py +66 -18
- rem/api/routers/chat/sse_events.py +7 -3
- rem/api/routers/chat/streaming.py +254 -22
- rem/api/routers/common.py +18 -0
- rem/api/routers/dev.py +7 -1
- rem/api/routers/feedback.py +9 -1
- rem/api/routers/messages.py +176 -38
- rem/api/routers/models.py +9 -1
- rem/api/routers/query.py +12 -1
- rem/api/routers/shared_sessions.py +16 -0
- rem/auth/jwt.py +19 -4
- rem/auth/middleware.py +42 -28
- rem/cli/README.md +62 -0
- rem/cli/commands/ask.py +1 -1
- rem/cli/commands/db.py +148 -70
- rem/cli/commands/process.py +171 -43
- rem/models/entities/ontology.py +91 -101
- rem/schemas/agents/rem.yaml +1 -1
- rem/services/content/service.py +18 -5
- rem/services/email/service.py +11 -2
- rem/services/embeddings/worker.py +26 -12
- rem/services/postgres/__init__.py +28 -3
- rem/services/postgres/diff_service.py +57 -5
- rem/services/postgres/programmable_diff_service.py +635 -0
- rem/services/postgres/pydantic_to_sqlalchemy.py +2 -2
- rem/services/postgres/register_type.py +12 -11
- rem/services/postgres/repository.py +46 -25
- rem/services/postgres/schema_generator.py +5 -5
- rem/services/postgres/sql_builder.py +6 -5
- rem/services/session/__init__.py +8 -1
- rem/services/session/compression.py +40 -2
- rem/services/session/pydantic_messages.py +276 -0
- rem/settings.py +28 -0
- rem/sql/background_indexes.sql +5 -0
- rem/sql/migrations/001_install.sql +157 -10
- rem/sql/migrations/002_install_models.sql +160 -132
- rem/sql/migrations/004_cache_system.sql +7 -275
- rem/sql/migrations/migrate_session_id_to_uuid.sql +45 -0
- rem/utils/model_helpers.py +101 -0
- rem/utils/schema_loader.py +6 -6
- {remdb-0.3.172.dist-info → remdb-0.3.223.dist-info}/METADATA +1 -1
- {remdb-0.3.172.dist-info → remdb-0.3.223.dist-info}/RECORD +57 -53
- {remdb-0.3.172.dist-info → remdb-0.3.223.dist-info}/WHEEL +0 -0
- {remdb-0.3.172.dist-info → remdb-0.3.223.dist-info}/entry_points.txt +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
-- REM Model Schema (install_models.sql)
|
|
2
2
|
-- Generated from Pydantic models
|
|
3
3
|
-- Source: model registry
|
|
4
|
-
-- Generated at: 2025-
|
|
4
|
+
-- Generated at: 2025-12-22T17:34:54.187339
|
|
5
5
|
--
|
|
6
6
|
-- DO NOT EDIT MANUALLY - Regenerate with: rem db schema generate
|
|
7
7
|
--
|
|
@@ -36,7 +36,7 @@ END $$;
|
|
|
36
36
|
|
|
37
37
|
CREATE TABLE IF NOT EXISTS feedbacks (
|
|
38
38
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
39
|
-
tenant_id VARCHAR(100)
|
|
39
|
+
tenant_id VARCHAR(100),
|
|
40
40
|
user_id VARCHAR(256),
|
|
41
41
|
session_id VARCHAR(256) NOT NULL,
|
|
42
42
|
message_id VARCHAR(256),
|
|
@@ -74,6 +74,7 @@ BEGIN
|
|
|
74
74
|
RETURN OLD;
|
|
75
75
|
ELSIF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN
|
|
76
76
|
-- Upsert to KV_STORE (O(1) lookup by entity_key)
|
|
77
|
+
-- tenant_id can be NULL (meaning public/shared data)
|
|
77
78
|
INSERT INTO kv_store (
|
|
78
79
|
entity_key,
|
|
79
80
|
entity_type,
|
|
@@ -84,7 +85,7 @@ BEGIN
|
|
|
84
85
|
graph_edges,
|
|
85
86
|
updated_at
|
|
86
87
|
) VALUES (
|
|
87
|
-
NEW.id::VARCHAR,
|
|
88
|
+
normalize_key(NEW.id::VARCHAR),
|
|
88
89
|
'feedbacks',
|
|
89
90
|
NEW.id,
|
|
90
91
|
NEW.tenant_id,
|
|
@@ -93,7 +94,7 @@ BEGIN
|
|
|
93
94
|
COALESCE(NEW.graph_edges, '[]'::jsonb),
|
|
94
95
|
CURRENT_TIMESTAMP
|
|
95
96
|
)
|
|
96
|
-
ON CONFLICT (tenant_id, entity_key)
|
|
97
|
+
ON CONFLICT (COALESCE(tenant_id, ''), entity_key)
|
|
97
98
|
DO UPDATE SET
|
|
98
99
|
entity_id = EXCLUDED.entity_id,
|
|
99
100
|
user_id = EXCLUDED.user_id,
|
|
@@ -118,7 +119,7 @@ FOR EACH ROW EXECUTE FUNCTION fn_feedbacks_kv_store_upsert();
|
|
|
118
119
|
|
|
119
120
|
CREATE TABLE IF NOT EXISTS files (
|
|
120
121
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
121
|
-
tenant_id VARCHAR(100)
|
|
122
|
+
tenant_id VARCHAR(100),
|
|
122
123
|
user_id VARCHAR(256),
|
|
123
124
|
name VARCHAR(256) NOT NULL,
|
|
124
125
|
uri VARCHAR(256) NOT NULL,
|
|
@@ -164,7 +165,7 @@ CREATE INDEX IF NOT EXISTS idx_embeddings_files_field_provider ON embeddings_fil
|
|
|
164
165
|
|
|
165
166
|
-- HNSW index for vector similarity search (created in background)
|
|
166
167
|
-- Note: This will be created by background thread after data load
|
|
167
|
-
-- CREATE INDEX
|
|
168
|
+
-- CREATE INDEX idx_embeddings_files_vector_hnsw ON embeddings_files
|
|
168
169
|
-- USING hnsw (embedding vector_cosine_ops);
|
|
169
170
|
|
|
170
171
|
-- KV_STORE trigger for files
|
|
@@ -179,6 +180,7 @@ BEGIN
|
|
|
179
180
|
RETURN OLD;
|
|
180
181
|
ELSIF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN
|
|
181
182
|
-- Upsert to KV_STORE (O(1) lookup by entity_key)
|
|
183
|
+
-- tenant_id can be NULL (meaning public/shared data)
|
|
182
184
|
INSERT INTO kv_store (
|
|
183
185
|
entity_key,
|
|
184
186
|
entity_type,
|
|
@@ -189,7 +191,7 @@ BEGIN
|
|
|
189
191
|
graph_edges,
|
|
190
192
|
updated_at
|
|
191
193
|
) VALUES (
|
|
192
|
-
NEW.
|
|
194
|
+
normalize_key(NEW.name::VARCHAR),
|
|
193
195
|
'files',
|
|
194
196
|
NEW.id,
|
|
195
197
|
NEW.tenant_id,
|
|
@@ -198,7 +200,7 @@ BEGIN
|
|
|
198
200
|
COALESCE(NEW.graph_edges, '[]'::jsonb),
|
|
199
201
|
CURRENT_TIMESTAMP
|
|
200
202
|
)
|
|
201
|
-
ON CONFLICT (tenant_id, entity_key)
|
|
203
|
+
ON CONFLICT (COALESCE(tenant_id, ''), entity_key)
|
|
202
204
|
DO UPDATE SET
|
|
203
205
|
entity_id = EXCLUDED.entity_id,
|
|
204
206
|
user_id = EXCLUDED.user_id,
|
|
@@ -223,7 +225,7 @@ FOR EACH ROW EXECUTE FUNCTION fn_files_kv_store_upsert();
|
|
|
223
225
|
|
|
224
226
|
CREATE TABLE IF NOT EXISTS image_resources (
|
|
225
227
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
226
|
-
tenant_id VARCHAR(100)
|
|
228
|
+
tenant_id VARCHAR(100),
|
|
227
229
|
user_id VARCHAR(256),
|
|
228
230
|
name VARCHAR(256),
|
|
229
231
|
uri VARCHAR(256),
|
|
@@ -277,7 +279,7 @@ CREATE INDEX IF NOT EXISTS idx_embeddings_image_resources_field_provider ON embe
|
|
|
277
279
|
|
|
278
280
|
-- HNSW index for vector similarity search (created in background)
|
|
279
281
|
-- Note: This will be created by background thread after data load
|
|
280
|
-
-- CREATE INDEX
|
|
282
|
+
-- CREATE INDEX idx_embeddings_image_resources_vector_hnsw ON embeddings_image_resources
|
|
281
283
|
-- USING hnsw (embedding vector_cosine_ops);
|
|
282
284
|
|
|
283
285
|
-- KV_STORE trigger for image_resources
|
|
@@ -292,6 +294,7 @@ BEGIN
|
|
|
292
294
|
RETURN OLD;
|
|
293
295
|
ELSIF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN
|
|
294
296
|
-- Upsert to KV_STORE (O(1) lookup by entity_key)
|
|
297
|
+
-- tenant_id can be NULL (meaning public/shared data)
|
|
295
298
|
INSERT INTO kv_store (
|
|
296
299
|
entity_key,
|
|
297
300
|
entity_type,
|
|
@@ -302,7 +305,7 @@ BEGIN
|
|
|
302
305
|
graph_edges,
|
|
303
306
|
updated_at
|
|
304
307
|
) VALUES (
|
|
305
|
-
NEW.name::VARCHAR,
|
|
308
|
+
normalize_key(NEW.name::VARCHAR),
|
|
306
309
|
'image_resources',
|
|
307
310
|
NEW.id,
|
|
308
311
|
NEW.tenant_id,
|
|
@@ -311,7 +314,7 @@ BEGIN
|
|
|
311
314
|
COALESCE(NEW.graph_edges, '[]'::jsonb),
|
|
312
315
|
CURRENT_TIMESTAMP
|
|
313
316
|
)
|
|
314
|
-
ON CONFLICT (tenant_id, entity_key)
|
|
317
|
+
ON CONFLICT (COALESCE(tenant_id, ''), entity_key)
|
|
315
318
|
DO UPDATE SET
|
|
316
319
|
entity_id = EXCLUDED.entity_id,
|
|
317
320
|
user_id = EXCLUDED.user_id,
|
|
@@ -336,7 +339,7 @@ FOR EACH ROW EXECUTE FUNCTION fn_image_resources_kv_store_upsert();
|
|
|
336
339
|
|
|
337
340
|
CREATE TABLE IF NOT EXISTS messages (
|
|
338
341
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
339
|
-
tenant_id VARCHAR(100)
|
|
342
|
+
tenant_id VARCHAR(100),
|
|
340
343
|
user_id VARCHAR(256),
|
|
341
344
|
content TEXT NOT NULL,
|
|
342
345
|
message_type VARCHAR(256),
|
|
@@ -383,7 +386,7 @@ CREATE INDEX IF NOT EXISTS idx_embeddings_messages_field_provider ON embeddings_
|
|
|
383
386
|
|
|
384
387
|
-- HNSW index for vector similarity search (created in background)
|
|
385
388
|
-- Note: This will be created by background thread after data load
|
|
386
|
-
-- CREATE INDEX
|
|
389
|
+
-- CREATE INDEX idx_embeddings_messages_vector_hnsw ON embeddings_messages
|
|
387
390
|
-- USING hnsw (embedding vector_cosine_ops);
|
|
388
391
|
|
|
389
392
|
-- KV_STORE trigger for messages
|
|
@@ -398,6 +401,7 @@ BEGIN
|
|
|
398
401
|
RETURN OLD;
|
|
399
402
|
ELSIF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN
|
|
400
403
|
-- Upsert to KV_STORE (O(1) lookup by entity_key)
|
|
404
|
+
-- tenant_id can be NULL (meaning public/shared data)
|
|
401
405
|
INSERT INTO kv_store (
|
|
402
406
|
entity_key,
|
|
403
407
|
entity_type,
|
|
@@ -408,7 +412,7 @@ BEGIN
|
|
|
408
412
|
graph_edges,
|
|
409
413
|
updated_at
|
|
410
414
|
) VALUES (
|
|
411
|
-
NEW.id::VARCHAR,
|
|
415
|
+
normalize_key(NEW.id::VARCHAR),
|
|
412
416
|
'messages',
|
|
413
417
|
NEW.id,
|
|
414
418
|
NEW.tenant_id,
|
|
@@ -417,7 +421,7 @@ BEGIN
|
|
|
417
421
|
COALESCE(NEW.graph_edges, '[]'::jsonb),
|
|
418
422
|
CURRENT_TIMESTAMP
|
|
419
423
|
)
|
|
420
|
-
ON CONFLICT (tenant_id, entity_key)
|
|
424
|
+
ON CONFLICT (COALESCE(tenant_id, ''), entity_key)
|
|
421
425
|
DO UPDATE SET
|
|
422
426
|
entity_id = EXCLUDED.entity_id,
|
|
423
427
|
user_id = EXCLUDED.user_id,
|
|
@@ -442,7 +446,7 @@ FOR EACH ROW EXECUTE FUNCTION fn_messages_kv_store_upsert();
|
|
|
442
446
|
|
|
443
447
|
CREATE TABLE IF NOT EXISTS moments (
|
|
444
448
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
445
|
-
tenant_id VARCHAR(100)
|
|
449
|
+
tenant_id VARCHAR(100),
|
|
446
450
|
user_id VARCHAR(256),
|
|
447
451
|
name VARCHAR(256),
|
|
448
452
|
moment_type VARCHAR(256),
|
|
@@ -491,7 +495,7 @@ CREATE INDEX IF NOT EXISTS idx_embeddings_moments_field_provider ON embeddings_m
|
|
|
491
495
|
|
|
492
496
|
-- HNSW index for vector similarity search (created in background)
|
|
493
497
|
-- Note: This will be created by background thread after data load
|
|
494
|
-
-- CREATE INDEX
|
|
498
|
+
-- CREATE INDEX idx_embeddings_moments_vector_hnsw ON embeddings_moments
|
|
495
499
|
-- USING hnsw (embedding vector_cosine_ops);
|
|
496
500
|
|
|
497
501
|
-- KV_STORE trigger for moments
|
|
@@ -506,6 +510,7 @@ BEGIN
|
|
|
506
510
|
RETURN OLD;
|
|
507
511
|
ELSIF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN
|
|
508
512
|
-- Upsert to KV_STORE (O(1) lookup by entity_key)
|
|
513
|
+
-- tenant_id can be NULL (meaning public/shared data)
|
|
509
514
|
INSERT INTO kv_store (
|
|
510
515
|
entity_key,
|
|
511
516
|
entity_type,
|
|
@@ -516,7 +521,7 @@ BEGIN
|
|
|
516
521
|
graph_edges,
|
|
517
522
|
updated_at
|
|
518
523
|
) VALUES (
|
|
519
|
-
NEW.name::VARCHAR,
|
|
524
|
+
normalize_key(NEW.name::VARCHAR),
|
|
520
525
|
'moments',
|
|
521
526
|
NEW.id,
|
|
522
527
|
NEW.tenant_id,
|
|
@@ -525,7 +530,7 @@ BEGIN
|
|
|
525
530
|
COALESCE(NEW.graph_edges, '[]'::jsonb),
|
|
526
531
|
CURRENT_TIMESTAMP
|
|
527
532
|
)
|
|
528
|
-
ON CONFLICT (tenant_id, entity_key)
|
|
533
|
+
ON CONFLICT (COALESCE(tenant_id, ''), entity_key)
|
|
529
534
|
DO UPDATE SET
|
|
530
535
|
entity_id = EXCLUDED.entity_id,
|
|
531
536
|
user_id = EXCLUDED.user_id,
|
|
@@ -550,17 +555,18 @@ FOR EACH ROW EXECUTE FUNCTION fn_moments_kv_store_upsert();
|
|
|
550
555
|
|
|
551
556
|
CREATE TABLE IF NOT EXISTS ontologies (
|
|
552
557
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
553
|
-
tenant_id VARCHAR(100)
|
|
558
|
+
tenant_id VARCHAR(100),
|
|
554
559
|
user_id VARCHAR(256),
|
|
555
560
|
name VARCHAR(256) NOT NULL,
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
+
uri VARCHAR(256),
|
|
562
|
+
file_id UUID,
|
|
563
|
+
agent_schema_id VARCHAR(256),
|
|
564
|
+
provider_name VARCHAR(256),
|
|
565
|
+
model_name VARCHAR(256),
|
|
566
|
+
extracted_data JSONB,
|
|
561
567
|
confidence_score DOUBLE PRECISION,
|
|
562
568
|
extraction_timestamp VARCHAR(256),
|
|
563
|
-
|
|
569
|
+
content TEXT,
|
|
564
570
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
565
571
|
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
566
572
|
deleted_at TIMESTAMP,
|
|
@@ -575,6 +581,32 @@ CREATE INDEX IF NOT EXISTS idx_ontologies_graph_edges ON ontologies USING GIN (g
|
|
|
575
581
|
CREATE INDEX IF NOT EXISTS idx_ontologies_metadata ON ontologies USING GIN (metadata);
|
|
576
582
|
CREATE INDEX IF NOT EXISTS idx_ontologies_tags ON ontologies USING GIN (tags);
|
|
577
583
|
|
|
584
|
+
-- Embeddings for ontologies
|
|
585
|
+
CREATE TABLE IF NOT EXISTS embeddings_ontologies (
|
|
586
|
+
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
587
|
+
entity_id UUID NOT NULL REFERENCES ontologies(id) ON DELETE CASCADE,
|
|
588
|
+
field_name VARCHAR(100) NOT NULL,
|
|
589
|
+
provider VARCHAR(50) NOT NULL DEFAULT 'openai',
|
|
590
|
+
model VARCHAR(100) NOT NULL DEFAULT 'text-embedding-3-small',
|
|
591
|
+
embedding vector(1536) NOT NULL,
|
|
592
|
+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
593
|
+
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
594
|
+
|
|
595
|
+
-- Unique: one embedding per entity per field per provider
|
|
596
|
+
UNIQUE (entity_id, field_name, provider)
|
|
597
|
+
);
|
|
598
|
+
|
|
599
|
+
-- Index for entity lookup (get all embeddings for entity)
|
|
600
|
+
CREATE INDEX IF NOT EXISTS idx_embeddings_ontologies_entity ON embeddings_ontologies (entity_id);
|
|
601
|
+
|
|
602
|
+
-- Index for field + provider lookup
|
|
603
|
+
CREATE INDEX IF NOT EXISTS idx_embeddings_ontologies_field_provider ON embeddings_ontologies (field_name, provider);
|
|
604
|
+
|
|
605
|
+
-- HNSW index for vector similarity search (created in background)
|
|
606
|
+
-- Note: This will be created by background thread after data load
|
|
607
|
+
-- CREATE INDEX idx_embeddings_ontologies_vector_hnsw ON embeddings_ontologies
|
|
608
|
+
-- USING hnsw (embedding vector_cosine_ops);
|
|
609
|
+
|
|
578
610
|
-- KV_STORE trigger for ontologies
|
|
579
611
|
-- Trigger function to maintain KV_STORE for ontologies
|
|
580
612
|
CREATE OR REPLACE FUNCTION fn_ontologies_kv_store_upsert()
|
|
@@ -587,6 +619,7 @@ BEGIN
|
|
|
587
619
|
RETURN OLD;
|
|
588
620
|
ELSIF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN
|
|
589
621
|
-- Upsert to KV_STORE (O(1) lookup by entity_key)
|
|
622
|
+
-- tenant_id can be NULL (meaning public/shared data)
|
|
590
623
|
INSERT INTO kv_store (
|
|
591
624
|
entity_key,
|
|
592
625
|
entity_type,
|
|
@@ -597,7 +630,7 @@ BEGIN
|
|
|
597
630
|
graph_edges,
|
|
598
631
|
updated_at
|
|
599
632
|
) VALUES (
|
|
600
|
-
NEW.
|
|
633
|
+
normalize_key(NEW.name::VARCHAR),
|
|
601
634
|
'ontologies',
|
|
602
635
|
NEW.id,
|
|
603
636
|
NEW.tenant_id,
|
|
@@ -606,7 +639,7 @@ BEGIN
|
|
|
606
639
|
COALESCE(NEW.graph_edges, '[]'::jsonb),
|
|
607
640
|
CURRENT_TIMESTAMP
|
|
608
641
|
)
|
|
609
|
-
ON CONFLICT (tenant_id, entity_key)
|
|
642
|
+
ON CONFLICT (COALESCE(tenant_id, ''), entity_key)
|
|
610
643
|
DO UPDATE SET
|
|
611
644
|
entity_id = EXCLUDED.entity_id,
|
|
612
645
|
user_id = EXCLUDED.user_id,
|
|
@@ -631,7 +664,7 @@ FOR EACH ROW EXECUTE FUNCTION fn_ontologies_kv_store_upsert();
|
|
|
631
664
|
|
|
632
665
|
CREATE TABLE IF NOT EXISTS ontology_configs (
|
|
633
666
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
634
|
-
tenant_id VARCHAR(100)
|
|
667
|
+
tenant_id VARCHAR(100),
|
|
635
668
|
user_id VARCHAR(256),
|
|
636
669
|
name VARCHAR(256) NOT NULL,
|
|
637
670
|
agent_schema_id VARCHAR(256) NOT NULL,
|
|
@@ -680,7 +713,7 @@ CREATE INDEX IF NOT EXISTS idx_embeddings_ontology_configs_field_provider ON emb
|
|
|
680
713
|
|
|
681
714
|
-- HNSW index for vector similarity search (created in background)
|
|
682
715
|
-- Note: This will be created by background thread after data load
|
|
683
|
-
-- CREATE INDEX
|
|
716
|
+
-- CREATE INDEX idx_embeddings_ontology_configs_vector_hnsw ON embeddings_ontology_configs
|
|
684
717
|
-- USING hnsw (embedding vector_cosine_ops);
|
|
685
718
|
|
|
686
719
|
-- KV_STORE trigger for ontology_configs
|
|
@@ -695,6 +728,7 @@ BEGIN
|
|
|
695
728
|
RETURN OLD;
|
|
696
729
|
ELSIF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN
|
|
697
730
|
-- Upsert to KV_STORE (O(1) lookup by entity_key)
|
|
731
|
+
-- tenant_id can be NULL (meaning public/shared data)
|
|
698
732
|
INSERT INTO kv_store (
|
|
699
733
|
entity_key,
|
|
700
734
|
entity_type,
|
|
@@ -705,7 +739,7 @@ BEGIN
|
|
|
705
739
|
graph_edges,
|
|
706
740
|
updated_at
|
|
707
741
|
) VALUES (
|
|
708
|
-
NEW.
|
|
742
|
+
normalize_key(NEW.name::VARCHAR),
|
|
709
743
|
'ontology_configs',
|
|
710
744
|
NEW.id,
|
|
711
745
|
NEW.tenant_id,
|
|
@@ -714,7 +748,7 @@ BEGIN
|
|
|
714
748
|
COALESCE(NEW.graph_edges, '[]'::jsonb),
|
|
715
749
|
CURRENT_TIMESTAMP
|
|
716
750
|
)
|
|
717
|
-
ON CONFLICT (tenant_id, entity_key)
|
|
751
|
+
ON CONFLICT (COALESCE(tenant_id, ''), entity_key)
|
|
718
752
|
DO UPDATE SET
|
|
719
753
|
entity_id = EXCLUDED.entity_id,
|
|
720
754
|
user_id = EXCLUDED.user_id,
|
|
@@ -739,7 +773,7 @@ FOR EACH ROW EXECUTE FUNCTION fn_ontology_configs_kv_store_upsert();
|
|
|
739
773
|
|
|
740
774
|
CREATE TABLE IF NOT EXISTS resources (
|
|
741
775
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
742
|
-
tenant_id VARCHAR(100)
|
|
776
|
+
tenant_id VARCHAR(100),
|
|
743
777
|
user_id VARCHAR(256),
|
|
744
778
|
name VARCHAR(256),
|
|
745
779
|
uri VARCHAR(256),
|
|
@@ -785,7 +819,7 @@ CREATE INDEX IF NOT EXISTS idx_embeddings_resources_field_provider ON embeddings
|
|
|
785
819
|
|
|
786
820
|
-- HNSW index for vector similarity search (created in background)
|
|
787
821
|
-- Note: This will be created by background thread after data load
|
|
788
|
-
-- CREATE INDEX
|
|
822
|
+
-- CREATE INDEX idx_embeddings_resources_vector_hnsw ON embeddings_resources
|
|
789
823
|
-- USING hnsw (embedding vector_cosine_ops);
|
|
790
824
|
|
|
791
825
|
-- KV_STORE trigger for resources
|
|
@@ -800,6 +834,7 @@ BEGIN
|
|
|
800
834
|
RETURN OLD;
|
|
801
835
|
ELSIF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN
|
|
802
836
|
-- Upsert to KV_STORE (O(1) lookup by entity_key)
|
|
837
|
+
-- tenant_id can be NULL (meaning public/shared data)
|
|
803
838
|
INSERT INTO kv_store (
|
|
804
839
|
entity_key,
|
|
805
840
|
entity_type,
|
|
@@ -810,7 +845,7 @@ BEGIN
|
|
|
810
845
|
graph_edges,
|
|
811
846
|
updated_at
|
|
812
847
|
) VALUES (
|
|
813
|
-
NEW.name::VARCHAR,
|
|
848
|
+
normalize_key(NEW.name::VARCHAR),
|
|
814
849
|
'resources',
|
|
815
850
|
NEW.id,
|
|
816
851
|
NEW.tenant_id,
|
|
@@ -819,7 +854,7 @@ BEGIN
|
|
|
819
854
|
COALESCE(NEW.graph_edges, '[]'::jsonb),
|
|
820
855
|
CURRENT_TIMESTAMP
|
|
821
856
|
)
|
|
822
|
-
ON CONFLICT (tenant_id, entity_key)
|
|
857
|
+
ON CONFLICT (COALESCE(tenant_id, ''), entity_key)
|
|
823
858
|
DO UPDATE SET
|
|
824
859
|
entity_id = EXCLUDED.entity_id,
|
|
825
860
|
user_id = EXCLUDED.user_id,
|
|
@@ -844,7 +879,7 @@ FOR EACH ROW EXECUTE FUNCTION fn_resources_kv_store_upsert();
|
|
|
844
879
|
|
|
845
880
|
CREATE TABLE IF NOT EXISTS schemas (
|
|
846
881
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
847
|
-
tenant_id VARCHAR(100)
|
|
882
|
+
tenant_id VARCHAR(100),
|
|
848
883
|
user_id VARCHAR(256),
|
|
849
884
|
name VARCHAR(256) NOT NULL,
|
|
850
885
|
content TEXT,
|
|
@@ -889,7 +924,7 @@ CREATE INDEX IF NOT EXISTS idx_embeddings_schemas_field_provider ON embeddings_s
|
|
|
889
924
|
|
|
890
925
|
-- HNSW index for vector similarity search (created in background)
|
|
891
926
|
-- Note: This will be created by background thread after data load
|
|
892
|
-
-- CREATE INDEX
|
|
927
|
+
-- CREATE INDEX idx_embeddings_schemas_vector_hnsw ON embeddings_schemas
|
|
893
928
|
-- USING hnsw (embedding vector_cosine_ops);
|
|
894
929
|
|
|
895
930
|
-- KV_STORE trigger for schemas
|
|
@@ -904,6 +939,7 @@ BEGIN
|
|
|
904
939
|
RETURN OLD;
|
|
905
940
|
ELSIF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN
|
|
906
941
|
-- Upsert to KV_STORE (O(1) lookup by entity_key)
|
|
942
|
+
-- tenant_id can be NULL (meaning public/shared data)
|
|
907
943
|
INSERT INTO kv_store (
|
|
908
944
|
entity_key,
|
|
909
945
|
entity_type,
|
|
@@ -914,7 +950,7 @@ BEGIN
|
|
|
914
950
|
graph_edges,
|
|
915
951
|
updated_at
|
|
916
952
|
) VALUES (
|
|
917
|
-
NEW.
|
|
953
|
+
normalize_key(NEW.name::VARCHAR),
|
|
918
954
|
'schemas',
|
|
919
955
|
NEW.id,
|
|
920
956
|
NEW.tenant_id,
|
|
@@ -923,7 +959,7 @@ BEGIN
|
|
|
923
959
|
COALESCE(NEW.graph_edges, '[]'::jsonb),
|
|
924
960
|
CURRENT_TIMESTAMP
|
|
925
961
|
)
|
|
926
|
-
ON CONFLICT (tenant_id, entity_key)
|
|
962
|
+
ON CONFLICT (COALESCE(tenant_id, ''), entity_key)
|
|
927
963
|
DO UPDATE SET
|
|
928
964
|
entity_id = EXCLUDED.entity_id,
|
|
929
965
|
user_id = EXCLUDED.user_id,
|
|
@@ -948,7 +984,7 @@ FOR EACH ROW EXECUTE FUNCTION fn_schemas_kv_store_upsert();
|
|
|
948
984
|
|
|
949
985
|
CREATE TABLE IF NOT EXISTS sessions (
|
|
950
986
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
951
|
-
tenant_id VARCHAR(100)
|
|
987
|
+
tenant_id VARCHAR(100),
|
|
952
988
|
user_id VARCHAR(256),
|
|
953
989
|
name VARCHAR(256) NOT NULL,
|
|
954
990
|
mode TEXT,
|
|
@@ -996,7 +1032,7 @@ CREATE INDEX IF NOT EXISTS idx_embeddings_sessions_field_provider ON embeddings_
|
|
|
996
1032
|
|
|
997
1033
|
-- HNSW index for vector similarity search (created in background)
|
|
998
1034
|
-- Note: This will be created by background thread after data load
|
|
999
|
-
-- CREATE INDEX
|
|
1035
|
+
-- CREATE INDEX idx_embeddings_sessions_vector_hnsw ON embeddings_sessions
|
|
1000
1036
|
-- USING hnsw (embedding vector_cosine_ops);
|
|
1001
1037
|
|
|
1002
1038
|
-- KV_STORE trigger for sessions
|
|
@@ -1011,6 +1047,7 @@ BEGIN
|
|
|
1011
1047
|
RETURN OLD;
|
|
1012
1048
|
ELSIF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN
|
|
1013
1049
|
-- Upsert to KV_STORE (O(1) lookup by entity_key)
|
|
1050
|
+
-- tenant_id can be NULL (meaning public/shared data)
|
|
1014
1051
|
INSERT INTO kv_store (
|
|
1015
1052
|
entity_key,
|
|
1016
1053
|
entity_type,
|
|
@@ -1021,7 +1058,7 @@ BEGIN
|
|
|
1021
1058
|
graph_edges,
|
|
1022
1059
|
updated_at
|
|
1023
1060
|
) VALUES (
|
|
1024
|
-
NEW.name::VARCHAR,
|
|
1061
|
+
normalize_key(NEW.name::VARCHAR),
|
|
1025
1062
|
'sessions',
|
|
1026
1063
|
NEW.id,
|
|
1027
1064
|
NEW.tenant_id,
|
|
@@ -1030,7 +1067,7 @@ BEGIN
|
|
|
1030
1067
|
COALESCE(NEW.graph_edges, '[]'::jsonb),
|
|
1031
1068
|
CURRENT_TIMESTAMP
|
|
1032
1069
|
)
|
|
1033
|
-
ON CONFLICT (tenant_id, entity_key)
|
|
1070
|
+
ON CONFLICT (COALESCE(tenant_id, ''), entity_key)
|
|
1034
1071
|
DO UPDATE SET
|
|
1035
1072
|
entity_id = EXCLUDED.entity_id,
|
|
1036
1073
|
user_id = EXCLUDED.user_id,
|
|
@@ -1055,7 +1092,7 @@ FOR EACH ROW EXECUTE FUNCTION fn_sessions_kv_store_upsert();
|
|
|
1055
1092
|
|
|
1056
1093
|
CREATE TABLE IF NOT EXISTS shared_sessions (
|
|
1057
1094
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
1058
|
-
tenant_id VARCHAR(100)
|
|
1095
|
+
tenant_id VARCHAR(100),
|
|
1059
1096
|
user_id VARCHAR(256),
|
|
1060
1097
|
session_id VARCHAR(256) NOT NULL,
|
|
1061
1098
|
owner_user_id VARCHAR(256) NOT NULL,
|
|
@@ -1086,6 +1123,7 @@ BEGIN
|
|
|
1086
1123
|
RETURN OLD;
|
|
1087
1124
|
ELSIF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN
|
|
1088
1125
|
-- Upsert to KV_STORE (O(1) lookup by entity_key)
|
|
1126
|
+
-- tenant_id can be NULL (meaning public/shared data)
|
|
1089
1127
|
INSERT INTO kv_store (
|
|
1090
1128
|
entity_key,
|
|
1091
1129
|
entity_type,
|
|
@@ -1096,7 +1134,7 @@ BEGIN
|
|
|
1096
1134
|
graph_edges,
|
|
1097
1135
|
updated_at
|
|
1098
1136
|
) VALUES (
|
|
1099
|
-
NEW.id::VARCHAR,
|
|
1137
|
+
normalize_key(NEW.id::VARCHAR),
|
|
1100
1138
|
'shared_sessions',
|
|
1101
1139
|
NEW.id,
|
|
1102
1140
|
NEW.tenant_id,
|
|
@@ -1105,7 +1143,7 @@ BEGIN
|
|
|
1105
1143
|
COALESCE(NEW.graph_edges, '[]'::jsonb),
|
|
1106
1144
|
CURRENT_TIMESTAMP
|
|
1107
1145
|
)
|
|
1108
|
-
ON CONFLICT (tenant_id, entity_key)
|
|
1146
|
+
ON CONFLICT (COALESCE(tenant_id, ''), entity_key)
|
|
1109
1147
|
DO UPDATE SET
|
|
1110
1148
|
entity_id = EXCLUDED.entity_id,
|
|
1111
1149
|
user_id = EXCLUDED.user_id,
|
|
@@ -1130,7 +1168,7 @@ FOR EACH ROW EXECUTE FUNCTION fn_shared_sessions_kv_store_upsert();
|
|
|
1130
1168
|
|
|
1131
1169
|
CREATE TABLE IF NOT EXISTS users (
|
|
1132
1170
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
1133
|
-
tenant_id VARCHAR(100)
|
|
1171
|
+
tenant_id VARCHAR(100),
|
|
1134
1172
|
user_id VARCHAR(256),
|
|
1135
1173
|
name VARCHAR(256) NOT NULL,
|
|
1136
1174
|
email VARCHAR(256),
|
|
@@ -1180,7 +1218,7 @@ CREATE INDEX IF NOT EXISTS idx_embeddings_users_field_provider ON embeddings_use
|
|
|
1180
1218
|
|
|
1181
1219
|
-- HNSW index for vector similarity search (created in background)
|
|
1182
1220
|
-- Note: This will be created by background thread after data load
|
|
1183
|
-
-- CREATE INDEX
|
|
1221
|
+
-- CREATE INDEX idx_embeddings_users_vector_hnsw ON embeddings_users
|
|
1184
1222
|
-- USING hnsw (embedding vector_cosine_ops);
|
|
1185
1223
|
|
|
1186
1224
|
-- KV_STORE trigger for users
|
|
@@ -1195,6 +1233,7 @@ BEGIN
|
|
|
1195
1233
|
RETURN OLD;
|
|
1196
1234
|
ELSIF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN
|
|
1197
1235
|
-- Upsert to KV_STORE (O(1) lookup by entity_key)
|
|
1236
|
+
-- tenant_id can be NULL (meaning public/shared data)
|
|
1198
1237
|
INSERT INTO kv_store (
|
|
1199
1238
|
entity_key,
|
|
1200
1239
|
entity_type,
|
|
@@ -1205,7 +1244,7 @@ BEGIN
|
|
|
1205
1244
|
graph_edges,
|
|
1206
1245
|
updated_at
|
|
1207
1246
|
) VALUES (
|
|
1208
|
-
NEW.name::VARCHAR,
|
|
1247
|
+
normalize_key(NEW.name::VARCHAR),
|
|
1209
1248
|
'users',
|
|
1210
1249
|
NEW.id,
|
|
1211
1250
|
NEW.tenant_id,
|
|
@@ -1214,7 +1253,7 @@ BEGIN
|
|
|
1214
1253
|
COALESCE(NEW.graph_edges, '[]'::jsonb),
|
|
1215
1254
|
CURRENT_TIMESTAMP
|
|
1216
1255
|
)
|
|
1217
|
-
ON CONFLICT (tenant_id, entity_key)
|
|
1256
|
+
ON CONFLICT (COALESCE(tenant_id, ''), entity_key)
|
|
1218
1257
|
DO UPDATE SET
|
|
1219
1258
|
entity_id = EXCLUDED.entity_id,
|
|
1220
1259
|
user_id = EXCLUDED.user_id,
|
|
@@ -1411,12 +1450,12 @@ VALUES (
|
|
|
1411
1450
|
## Overview
|
|
1412
1451
|
|
|
1413
1452
|
The `File` entity is stored in the `files` table. Each record is uniquely
|
|
1414
|
-
identified by its `
|
|
1453
|
+
identified by its `name` field for lookups and graph traversal.
|
|
1415
1454
|
|
|
1416
1455
|
## Search Capabilities
|
|
1417
1456
|
|
|
1418
1457
|
This schema includes the `search_rem` tool which supports:
|
|
1419
|
-
- **LOOKUP**: O(1) exact match by
|
|
1458
|
+
- **LOOKUP**: O(1) exact match by name (e.g., `LOOKUP "entity-name"`)
|
|
1420
1459
|
- **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
|
|
1421
1460
|
- **SEARCH**: Semantic vector search on content (e.g., `SEARCH "concept" FROM files LIMIT 10`)
|
|
1422
1461
|
- **SQL**: Complex queries (e.g., `SELECT * FROM files WHERE ...`)
|
|
@@ -1426,7 +1465,7 @@ This schema includes the `search_rem` tool which supports:
|
|
|
1426
1465
|
| Property | Value |
|
|
1427
1466
|
|----------|-------|
|
|
1428
1467
|
| Table | `files` |
|
|
1429
|
-
| Entity Key | `
|
|
1468
|
+
| Entity Key | `name` |
|
|
1430
1469
|
| Embedding Fields | `content` |
|
|
1431
1470
|
| Tools | `search_rem` |
|
|
1432
1471
|
|
|
@@ -1513,9 +1552,9 @@ This schema includes the `search_rem` tool which supports:
|
|
|
1513
1552
|
- File processing status (pending, processing, completed, failed)
|
|
1514
1553
|
|
|
1515
1554
|
',
|
|
1516
|
-
'{"type": "object", "description": "\n File metadata and tracking.\n\n Represents files uploaded to or referenced by the REM system,\n tracking their metadata and processing status. Tenant isolation\n is provided via CoreModel.tenant_id field.\n \n\nThis agent can search the `files` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"description": "File name", "title": "Name", "type": "string"}, "uri": {"description": "File storage URI (S3, local path, etc.)", "title": "Uri", "type": "string"}, "content": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Extracted text content (if applicable)", "title": "Content"}, "timestamp": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "File creation/modification timestamp", "title": "Timestamp"}, "size_bytes": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "description": "File size in bytes", "title": "Size Bytes"}, "mime_type": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "File MIME type", "title": "Mime Type"}, "processing_status": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": "pending", "description": "File processing status (pending, processing, completed, failed)", "title": "Processing Status"}}, "required": ["name", "uri"], "json_schema_extra": {"table_name": "files", "entity_key_field": "
|
|
1555
|
+
'{"type": "object", "description": "\n File metadata and tracking.\n\n Represents files uploaded to or referenced by the REM system,\n tracking their metadata and processing status. Tenant isolation\n is provided via CoreModel.tenant_id field.\n \n\nThis agent can search the `files` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"description": "File name", "title": "Name", "type": "string"}, "uri": {"description": "File storage URI (S3, local path, etc.)", "title": "Uri", "type": "string"}, "content": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Extracted text content (if applicable)", "title": "Content"}, "timestamp": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "File creation/modification timestamp", "title": "Timestamp"}, "size_bytes": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "description": "File size in bytes", "title": "Size Bytes"}, "mime_type": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "File MIME type", "title": "Mime Type"}, "processing_status": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": "pending", "description": "File processing status (pending, processing, completed, failed)", "title": "Processing Status"}}, "required": ["name", "uri"], "json_schema_extra": {"table_name": "files", "entity_key_field": "name", "embedding_fields": ["content"], "fully_qualified_name": "rem.models.entities.file.File", "tools": ["search_rem"], "default_search_table": "files", "has_embeddings": true}}'::jsonb,
|
|
1517
1556
|
'entity',
|
|
1518
|
-
'{"table_name": "files", "entity_key_field": "
|
|
1557
|
+
'{"table_name": "files", "entity_key_field": "name", "embedding_fields": ["content"], "fqn": "rem.models.entities.file.File"}'::jsonb
|
|
1519
1558
|
)
|
|
1520
1559
|
ON CONFLICT (id) DO UPDATE SET
|
|
1521
1560
|
name = EXCLUDED.name,
|
|
@@ -2008,18 +2047,19 @@ VALUES (
|
|
|
2008
2047
|
'Ontology',
|
|
2009
2048
|
'# Ontology
|
|
2010
2049
|
|
|
2011
|
-
Domain-specific knowledge
|
|
2050
|
+
Domain-specific knowledge - either agent-extracted or direct-loaded.
|
|
2012
2051
|
|
|
2013
2052
|
Attributes:
|
|
2014
2053
|
name: Human-readable label for this ontology instance
|
|
2015
|
-
|
|
2016
|
-
|
|
2017
|
-
|
|
2018
|
-
|
|
2019
|
-
|
|
2054
|
+
uri: External source reference (git://, s3://, https://) for direct-loaded ontologies
|
|
2055
|
+
file_id: Foreign key to File entity (optional - only for agent-extracted)
|
|
2056
|
+
agent_schema_id: Schema that performed extraction (optional - only for agent-extracted)
|
|
2057
|
+
provider_name: LLM provider used for extraction (optional)
|
|
2058
|
+
model_name: Specific model used (optional)
|
|
2059
|
+
extracted_data: Structured data - either extracted by agent or parsed from source
|
|
2020
2060
|
confidence_score: Optional confidence score from extraction (0.0-1.0)
|
|
2021
2061
|
extraction_timestamp: When extraction was performed
|
|
2022
|
-
|
|
2062
|
+
content: Text used for generating embedding
|
|
2023
2063
|
|
|
2024
2064
|
Inherited from CoreModel:
|
|
2025
2065
|
id: UUID or string identifier
|
|
@@ -2031,10 +2071,9 @@ Domain-specific knowledge extracted from files using custom agents.
|
|
|
2031
2071
|
graph_edges: Relationships to other entities
|
|
2032
2072
|
metadata: Flexible metadata storage
|
|
2033
2073
|
tags: Classification tags
|
|
2034
|
-
column: Database schema metadata
|
|
2035
2074
|
|
|
2036
2075
|
Example Usage:
|
|
2037
|
-
# CV
|
|
2076
|
+
# Agent-extracted: CV parsing
|
|
2038
2077
|
cv_ontology = Ontology(
|
|
2039
2078
|
name="john-doe-cv-2024",
|
|
2040
2079
|
file_id="file-uuid-123",
|
|
@@ -2043,63 +2082,48 @@ Domain-specific knowledge extracted from files using custom agents.
|
|
|
2043
2082
|
model_name="claude-sonnet-4-5-20250929",
|
|
2044
2083
|
extracted_data={
|
|
2045
2084
|
"candidate_name": "John Doe",
|
|
2046
|
-
"email": "john@example.com",
|
|
2047
2085
|
"skills": ["Python", "PostgreSQL", "Kubernetes"],
|
|
2048
|
-
"experience": [
|
|
2049
|
-
{
|
|
2050
|
-
"company": "TechCorp",
|
|
2051
|
-
"role": "Senior Engineer",
|
|
2052
|
-
"years": 3,
|
|
2053
|
-
"achievements": ["Led migration to k8s", "Reduced costs 40%"]
|
|
2054
|
-
}
|
|
2055
|
-
],
|
|
2056
|
-
"education": [
|
|
2057
|
-
{"degree": "BS Computer Science", "institution": "MIT", "year": 2018}
|
|
2058
|
-
]
|
|
2059
2086
|
},
|
|
2060
2087
|
confidence_score=0.95,
|
|
2061
|
-
tags=["cv", "engineering"
|
|
2088
|
+
tags=["cv", "engineering"]
|
|
2062
2089
|
)
|
|
2063
2090
|
|
|
2064
|
-
#
|
|
2065
|
-
|
|
2066
|
-
name="
|
|
2067
|
-
|
|
2068
|
-
|
|
2069
|
-
|
|
2070
|
-
|
|
2091
|
+
# Direct-loaded: Knowledge base from git
|
|
2092
|
+
api_docs = Ontology(
|
|
2093
|
+
name="rest-api-guide",
|
|
2094
|
+
uri="git://example-org/docs/api/rest-api-guide.md",
|
|
2095
|
+
content="# REST API Guide\n\nThis guide covers RESTful API design...",
|
|
2096
|
+
extracted_data={
|
|
2097
|
+
"type": "documentation",
|
|
2098
|
+
"category": "api",
|
|
2099
|
+
"version": "2.0",
|
|
2100
|
+
},
|
|
2101
|
+
tags=["api", "rest", "documentation"]
|
|
2102
|
+
)
|
|
2103
|
+
|
|
2104
|
+
# Direct-loaded: Technical spec from git
|
|
2105
|
+
config_spec = Ontology(
|
|
2106
|
+
name="config-schema",
|
|
2107
|
+
uri="git://example-org/docs/specs/config-schema.md",
|
|
2108
|
+
content="# Configuration Schema\n\nThis document defines...",
|
|
2071
2109
|
extracted_data={
|
|
2072
|
-
"
|
|
2073
|
-
"
|
|
2074
|
-
|
|
2075
|
-
{"name": "SupplyChain Inc", "role": "supplier"}
|
|
2076
|
-
],
|
|
2077
|
-
"effective_date": "2024-01-01",
|
|
2078
|
-
"termination_date": "2026-12-31",
|
|
2079
|
-
"payment_terms": {
|
|
2080
|
-
"amount": 500000,
|
|
2081
|
-
"currency": "USD",
|
|
2082
|
-
"frequency": "quarterly"
|
|
2083
|
-
},
|
|
2084
|
-
"key_obligations": [
|
|
2085
|
-
"Supplier must deliver within 30 days",
|
|
2086
|
-
"Buyer must pay within 60 days of invoice"
|
|
2087
|
-
]
|
|
2110
|
+
"type": "specification",
|
|
2111
|
+
"format": "yaml",
|
|
2112
|
+
"version": "1.0",
|
|
2088
2113
|
},
|
|
2089
|
-
|
|
2090
|
-
tags=["contract", "supplier", "procurement"]
|
|
2114
|
+
tags=["config", "schema", "specification"]
|
|
2091
2115
|
)
|
|
2092
2116
|
|
|
2093
2117
|
|
|
2094
2118
|
## Overview
|
|
2095
2119
|
|
|
2096
2120
|
The `Ontology` entity is stored in the `ontologies` table. Each record is uniquely
|
|
2097
|
-
identified by its `
|
|
2121
|
+
identified by its `name` field for lookups and graph traversal.
|
|
2098
2122
|
|
|
2099
2123
|
## Search Capabilities
|
|
2100
2124
|
|
|
2101
2125
|
This schema includes the `search_rem` tool which supports:
|
|
2102
|
-
- **LOOKUP**: O(1) exact match by
|
|
2126
|
+
- **LOOKUP**: O(1) exact match by name (e.g., `LOOKUP "entity-name"`)
|
|
2103
2127
|
- **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
|
|
2104
2128
|
- **SEARCH**: Semantic vector search on content (e.g., `SEARCH "concept" FROM ontologies LIMIT 10`)
|
|
2105
2129
|
- **SQL**: Complex queries (e.g., `SELECT * FROM ontologies WHERE ...`)
|
|
@@ -2109,8 +2133,8 @@ This schema includes the `search_rem` tool which supports:
|
|
|
2109
2133
|
| Property | Value |
|
|
2110
2134
|
|----------|-------|
|
|
2111
2135
|
| Table | `ontologies` |
|
|
2112
|
-
| Entity Key | `
|
|
2113
|
-
| Embedding Fields |
|
|
2136
|
+
| Entity Key | `name` |
|
|
2137
|
+
| Embedding Fields | `content` |
|
|
2114
2138
|
| Tools | `search_rem` |
|
|
2115
2139
|
|
|
2116
2140
|
## Fields
|
|
@@ -2164,25 +2188,29 @@ This schema includes the `search_rem` tool which supports:
|
|
|
2164
2188
|
- **Type**: `<class ''str''>`
|
|
2165
2189
|
- **Required**
|
|
2166
2190
|
|
|
2191
|
+
### `uri`
|
|
2192
|
+
- **Type**: `typing.Optional[str]`
|
|
2193
|
+
- **Optional**
|
|
2194
|
+
|
|
2167
2195
|
### `file_id`
|
|
2168
|
-
- **Type**: `uuid.UUID
|
|
2169
|
-
- **
|
|
2196
|
+
- **Type**: `typing.Union[uuid.UUID, str, NoneType]`
|
|
2197
|
+
- **Optional**
|
|
2170
2198
|
|
|
2171
2199
|
### `agent_schema_id`
|
|
2172
|
-
- **Type**:
|
|
2173
|
-
- **
|
|
2200
|
+
- **Type**: `typing.Optional[str]`
|
|
2201
|
+
- **Optional**
|
|
2174
2202
|
|
|
2175
2203
|
### `provider_name`
|
|
2176
|
-
- **Type**:
|
|
2177
|
-
- **
|
|
2204
|
+
- **Type**: `typing.Optional[str]`
|
|
2205
|
+
- **Optional**
|
|
2178
2206
|
|
|
2179
2207
|
### `model_name`
|
|
2180
|
-
- **Type**:
|
|
2181
|
-
- **
|
|
2208
|
+
- **Type**: `typing.Optional[str]`
|
|
2209
|
+
- **Optional**
|
|
2182
2210
|
|
|
2183
2211
|
### `extracted_data`
|
|
2184
|
-
- **Type**: `dict[str, typing.Any]`
|
|
2185
|
-
- **
|
|
2212
|
+
- **Type**: `typing.Optional[dict[str, typing.Any]]`
|
|
2213
|
+
- **Optional**
|
|
2186
2214
|
|
|
2187
2215
|
### `confidence_score`
|
|
2188
2216
|
- **Type**: `typing.Optional[float]`
|
|
@@ -2192,14 +2220,14 @@ This schema includes the `search_rem` tool which supports:
|
|
|
2192
2220
|
- **Type**: `typing.Optional[str]`
|
|
2193
2221
|
- **Optional**
|
|
2194
2222
|
|
|
2195
|
-
### `
|
|
2223
|
+
### `content`
|
|
2196
2224
|
- **Type**: `typing.Optional[str]`
|
|
2197
2225
|
- **Optional**
|
|
2198
2226
|
|
|
2199
2227
|
',
|
|
2200
|
-
'{"type": "object", "description": "Domain-specific knowledge
|
|
2228
|
+
'{"type": "object", "description": "Domain-specific knowledge - either agent-extracted or direct-loaded.\n\n Attributes:\n name: Human-readable label for this ontology instance\n uri: External source reference (git://, s3://, https://) for direct-loaded ontologies\n file_id: Foreign key to File entity (optional - only for agent-extracted)\n agent_schema_id: Schema that performed extraction (optional - only for agent-extracted)\n provider_name: LLM provider used for extraction (optional)\n model_name: Specific model used (optional)\n extracted_data: Structured data - either extracted by agent or parsed from source\n confidence_score: Optional confidence score from extraction (0.0-1.0)\n extraction_timestamp: When extraction was performed\n content: Text used for generating embedding\n\n Inherited from CoreModel:\n id: UUID or string identifier\n created_at: Entity creation timestamp\n updated_at: Last update timestamp\n deleted_at: Soft deletion timestamp\n tenant_id: Multi-tenancy isolation\n user_id: Ownership\n graph_edges: Relationships to other entities\n metadata: Flexible metadata storage\n tags: Classification tags\n\n Example Usage:\n # Agent-extracted: CV parsing\n cv_ontology = Ontology(\n name=\"john-doe-cv-2024\",\n file_id=\"file-uuid-123\",\n agent_schema_id=\"cv-parser-v1\",\n provider_name=\"anthropic\",\n model_name=\"claude-sonnet-4-5-20250929\",\n extracted_data={\n \"candidate_name\": \"John Doe\",\n \"skills\": [\"Python\", \"PostgreSQL\", \"Kubernetes\"],\n },\n confidence_score=0.95,\n tags=[\"cv\", \"engineering\"]\n )\n\n # Direct-loaded: Knowledge base from git\n api_docs = Ontology(\n name=\"rest-api-guide\",\n uri=\"git://example-org/docs/api/rest-api-guide.md\",\n content=\"# REST API Guide\\n\\nThis guide covers RESTful API design...\",\n extracted_data={\n \"type\": \"documentation\",\n \"category\": \"api\",\n \"version\": \"2.0\",\n },\n tags=[\"api\", \"rest\", \"documentation\"]\n )\n\n # Direct-loaded: Technical spec from git\n config_spec = Ontology(\n name=\"config-schema\",\n uri=\"git://example-org/docs/specs/config-schema.md\",\n content=\"# Configuration Schema\\n\\nThis document defines...\",\n extracted_data={\n \"type\": \"specification\",\n \"format\": \"yaml\",\n \"version\": \"1.0\",\n },\n tags=[\"config\", \"schema\", \"specification\"]\n )\n \n\nThis agent can search the `ontologies` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"title": "Name", "type": "string"}, "uri": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Uri"}, "file_id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "title": "File Id"}, "agent_schema_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Agent Schema Id"}, "provider_name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Provider Name"}, "model_name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Model Name"}, "extracted_data": {"anyOf": [{"additionalProperties": true, "type": "object"}, {"type": "null"}], "default": null, "title": "Extracted Data"}, "confidence_score": {"anyOf": [{"type": "number"}, {"type": "null"}], "default": null, "title": "Confidence Score"}, "extraction_timestamp": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Extraction Timestamp"}, "content": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Content"}}, "required": ["name"], "json_schema_extra": {"table_name": "ontologies", "entity_key_field": "name", "embedding_fields": ["content"], "fully_qualified_name": "rem.models.entities.ontology.Ontology", "tools": ["search_rem"], "default_search_table": "ontologies", "has_embeddings": true}}'::jsonb,
|
|
2201
2229
|
'entity',
|
|
2202
|
-
'{"table_name": "ontologies", "entity_key_field": "
|
|
2230
|
+
'{"table_name": "ontologies", "entity_key_field": "name", "embedding_fields": ["content"], "fqn": "rem.models.entities.ontology.Ontology"}'::jsonb
|
|
2203
2231
|
)
|
|
2204
2232
|
ON CONFLICT (id) DO UPDATE SET
|
|
2205
2233
|
name = EXCLUDED.name,
|
|
@@ -2288,12 +2316,12 @@ User configuration for automatic ontology extraction.
|
|
|
2288
2316
|
## Overview
|
|
2289
2317
|
|
|
2290
2318
|
The `OntologyConfig` entity is stored in the `ontology_configs` table. Each record is uniquely
|
|
2291
|
-
identified by its `
|
|
2319
|
+
identified by its `name` field for lookups and graph traversal.
|
|
2292
2320
|
|
|
2293
2321
|
## Search Capabilities
|
|
2294
2322
|
|
|
2295
2323
|
This schema includes the `search_rem` tool which supports:
|
|
2296
|
-
- **LOOKUP**: O(1) exact match by
|
|
2324
|
+
- **LOOKUP**: O(1) exact match by name (e.g., `LOOKUP "entity-name"`)
|
|
2297
2325
|
- **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
|
|
2298
2326
|
- **SEARCH**: Semantic vector search on description (e.g., `SEARCH "concept" FROM ontology_configs LIMIT 10`)
|
|
2299
2327
|
- **SQL**: Complex queries (e.g., `SELECT * FROM ontology_configs WHERE ...`)
|
|
@@ -2303,7 +2331,7 @@ This schema includes the `search_rem` tool which supports:
|
|
|
2303
2331
|
| Property | Value |
|
|
2304
2332
|
|----------|-------|
|
|
2305
2333
|
| Table | `ontology_configs` |
|
|
2306
|
-
| Entity Key | `
|
|
2334
|
+
| Entity Key | `name` |
|
|
2307
2335
|
| Embedding Fields | `description` |
|
|
2308
2336
|
| Tools | `search_rem` |
|
|
2309
2337
|
|
|
@@ -2395,9 +2423,9 @@ This schema includes the `search_rem` tool which supports:
|
|
|
2395
2423
|
- **Optional**
|
|
2396
2424
|
|
|
2397
2425
|
',
|
|
2398
|
-
'{"type": "object", "description": "User configuration for automatic ontology extraction.\n\n Attributes:\n name: Human-readable config name\n agent_schema_id: Foreign key to Schema entity to use for extraction\n description: Purpose and scope of this config\n\n # File matching rules (ANY matching rule triggers extraction)\n mime_type_pattern: Regex pattern for file MIME types (e.g., \"application/pdf\")\n uri_pattern: Regex pattern for file URIs (e.g., \"s3://bucket/resumes/.*\")\n tag_filter: List of tags (file must have ALL tags to match)\n\n # Execution control\n priority: Execution order (higher = earlier, default 100)\n enabled: Whether this config is active (default True)\n\n # LLM provider configuration\n provider_name: Optional LLM provider override (defaults to settings)\n model_name: Optional model override (defaults to settings)\n\n Inherited from CoreModel:\n id, created_at, updated_at, deleted_at, tenant_id, user_id,\n graph_edges, metadata, tags, column\n\n Example Usage:\n # CV extraction for recruitment\n cv_config = OntologyConfig(\n name=\"recruitment-cv-parser\",\n agent_schema_id=\"cv-parser-v1\",\n description=\"Extract candidate information from resumes\",\n mime_type_pattern=\"application/pdf\",\n uri_pattern=\".*/resumes/.*\",\n tag_filter=[\"cv\", \"candidate\"],\n priority=100,\n enabled=True,\n tenant_id=\"acme-corp\",\n tags=[\"recruitment\", \"hr\"]\n )\n\n # Contract analysis for legal team\n contract_config = OntologyConfig(\n name=\"legal-contract-analyzer\",\n agent_schema_id=\"contract-parser-v2\",\n description=\"Extract key terms from supplier contracts\",\n mime_type_pattern=\"application/(pdf|msword|vnd.openxmlformats.*)\",\n tag_filter=[\"legal\", \"contract\"],\n priority=200, # Higher priority = runs first\n enabled=True,\n provider_name=\"openai\", # Override default provider\n model_name=\"gpt-4.1\",\n tenant_id=\"acme-corp\",\n tags=[\"legal\", \"procurement\"]\n )\n\n # Medical records for healthcare\n medical_config = OntologyConfig(\n name=\"medical-records-extractor\",\n agent_schema_id=\"medical-parser-v1\",\n description=\"Extract diagnoses and treatments from medical records\",\n mime_type_pattern=\"application/pdf\",\n tag_filter=[\"medical\", \"patient-record\"],\n priority=50,\n enabled=True,\n tenant_id=\"healthsystem\",\n tags=[\"medical\", \"hipaa-compliant\"]\n )\n \n\nThis agent can search the `ontology_configs` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"title": "Name", "type": "string"}, "agent_schema_id": {"title": "Agent Schema Id", "type": "string"}, "description": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Description"}, "mime_type_pattern": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Mime Type Pattern"}, "uri_pattern": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Uri Pattern"}, "tag_filter": {"default": [], "items": {"type": "string"}, "title": "Tag Filter", "type": "array"}, "priority": {"default": 100, "title": "Priority", "type": "integer"}, "enabled": {"default": true, "title": "Enabled", "type": "boolean"}, "provider_name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Provider Name"}, "model_name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Model Name"}}, "required": ["name", "agent_schema_id"], "json_schema_extra": {"table_name": "ontology_configs", "entity_key_field": "
|
|
2426
|
+
'{"type": "object", "description": "User configuration for automatic ontology extraction.\n\n Attributes:\n name: Human-readable config name\n agent_schema_id: Foreign key to Schema entity to use for extraction\n description: Purpose and scope of this config\n\n # File matching rules (ANY matching rule triggers extraction)\n mime_type_pattern: Regex pattern for file MIME types (e.g., \"application/pdf\")\n uri_pattern: Regex pattern for file URIs (e.g., \"s3://bucket/resumes/.*\")\n tag_filter: List of tags (file must have ALL tags to match)\n\n # Execution control\n priority: Execution order (higher = earlier, default 100)\n enabled: Whether this config is active (default True)\n\n # LLM provider configuration\n provider_name: Optional LLM provider override (defaults to settings)\n model_name: Optional model override (defaults to settings)\n\n Inherited from CoreModel:\n id, created_at, updated_at, deleted_at, tenant_id, user_id,\n graph_edges, metadata, tags, column\n\n Example Usage:\n # CV extraction for recruitment\n cv_config = OntologyConfig(\n name=\"recruitment-cv-parser\",\n agent_schema_id=\"cv-parser-v1\",\n description=\"Extract candidate information from resumes\",\n mime_type_pattern=\"application/pdf\",\n uri_pattern=\".*/resumes/.*\",\n tag_filter=[\"cv\", \"candidate\"],\n priority=100,\n enabled=True,\n tenant_id=\"acme-corp\",\n tags=[\"recruitment\", \"hr\"]\n )\n\n # Contract analysis for legal team\n contract_config = OntologyConfig(\n name=\"legal-contract-analyzer\",\n agent_schema_id=\"contract-parser-v2\",\n description=\"Extract key terms from supplier contracts\",\n mime_type_pattern=\"application/(pdf|msword|vnd.openxmlformats.*)\",\n tag_filter=[\"legal\", \"contract\"],\n priority=200, # Higher priority = runs first\n enabled=True,\n provider_name=\"openai\", # Override default provider\n model_name=\"gpt-4.1\",\n tenant_id=\"acme-corp\",\n tags=[\"legal\", \"procurement\"]\n )\n\n # Medical records for healthcare\n medical_config = OntologyConfig(\n name=\"medical-records-extractor\",\n agent_schema_id=\"medical-parser-v1\",\n description=\"Extract diagnoses and treatments from medical records\",\n mime_type_pattern=\"application/pdf\",\n tag_filter=[\"medical\", \"patient-record\"],\n priority=50,\n enabled=True,\n tenant_id=\"healthsystem\",\n tags=[\"medical\", \"hipaa-compliant\"]\n )\n \n\nThis agent can search the `ontology_configs` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"title": "Name", "type": "string"}, "agent_schema_id": {"title": "Agent Schema Id", "type": "string"}, "description": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Description"}, "mime_type_pattern": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Mime Type Pattern"}, "uri_pattern": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Uri Pattern"}, "tag_filter": {"default": [], "items": {"type": "string"}, "title": "Tag Filter", "type": "array"}, "priority": {"default": 100, "title": "Priority", "type": "integer"}, "enabled": {"default": true, "title": "Enabled", "type": "boolean"}, "provider_name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Provider Name"}, "model_name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Model Name"}}, "required": ["name", "agent_schema_id"], "json_schema_extra": {"table_name": "ontology_configs", "entity_key_field": "name", "embedding_fields": ["description"], "fully_qualified_name": "rem.models.entities.ontology_config.OntologyConfig", "tools": ["search_rem"], "default_search_table": "ontology_configs", "has_embeddings": true}}'::jsonb,
|
|
2399
2427
|
'entity',
|
|
2400
|
-
'{"table_name": "ontology_configs", "entity_key_field": "
|
|
2428
|
+
'{"table_name": "ontology_configs", "entity_key_field": "name", "embedding_fields": ["description"], "fqn": "rem.models.entities.ontology_config.OntologyConfig"}'::jsonb
|
|
2401
2429
|
)
|
|
2402
2430
|
ON CONFLICT (id) DO UPDATE SET
|
|
2403
2431
|
name = EXCLUDED.name,
|
|
@@ -2565,12 +2593,12 @@ VALUES (
|
|
|
2565
2593
|
## Overview
|
|
2566
2594
|
|
|
2567
2595
|
The `Schema` entity is stored in the `schemas` table. Each record is uniquely
|
|
2568
|
-
identified by its `
|
|
2596
|
+
identified by its `name` field for lookups and graph traversal.
|
|
2569
2597
|
|
|
2570
2598
|
## Search Capabilities
|
|
2571
2599
|
|
|
2572
2600
|
This schema includes the `search_rem` tool which supports:
|
|
2573
|
-
- **LOOKUP**: O(1) exact match by
|
|
2601
|
+
- **LOOKUP**: O(1) exact match by name (e.g., `LOOKUP "entity-name"`)
|
|
2574
2602
|
- **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
|
|
2575
2603
|
- **SEARCH**: Semantic vector search on content (e.g., `SEARCH "concept" FROM schemas LIMIT 10`)
|
|
2576
2604
|
- **SQL**: Complex queries (e.g., `SELECT * FROM schemas WHERE ...`)
|
|
@@ -2580,7 +2608,7 @@ This schema includes the `search_rem` tool which supports:
|
|
|
2580
2608
|
| Property | Value |
|
|
2581
2609
|
|----------|-------|
|
|
2582
2610
|
| Table | `schemas` |
|
|
2583
|
-
| Entity Key | `
|
|
2611
|
+
| Entity Key | `name` |
|
|
2584
2612
|
| Embedding Fields | `content` |
|
|
2585
2613
|
| Tools | `search_rem` |
|
|
2586
2614
|
|
|
@@ -2662,9 +2690,9 @@ This schema includes the `search_rem` tool which supports:
|
|
|
2662
2690
|
- JSON paths in extracted_data to embed for semantic search. Example: [''summary'', ''candidate_name'', ''skills''] for CV extraction. Values will be concatenated and embedded using configured embedding provider.
|
|
2663
2691
|
|
|
2664
2692
|
',
|
|
2665
|
-
'{"type": "object", "description": "\n Agent schema definition.\n\n Schemas define agents that can be dynamically loaded into Pydantic AI.\n They store JsonSchema specifications with embedded metadata for tools,\n resources, and system prompts.\n\n For ontology extraction agents:\n - `provider_configs` enables multi-provider support (test across Anthropic, OpenAI, etc.)\n - `embedding_fields` specifies which output fields should be embedded for semantic search\n\n Tenant isolation is provided via CoreModel.tenant_id field.\n \n\nThis agent can search the `schemas` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"description": "Human-readable schema name (used as identifier)", "title": "Name", "type": "string"}, "content": {"default": "", "description": "Markdown documentation and instructions for the schema", "title": "Content", "type": "string"}, "spec": {"additionalProperties": true, "description": "JsonSchema specification defining the agent structure and capabilities", "title": "Spec", "type": "object"}, "category": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Schema category distinguishing schema types. Values: ''agent'' (AI agents), ''evaluator'' (LLM-as-a-Judge evaluators). Maps directly from json_schema_extra.kind field during ingestion.", "title": "Category"}, "provider_configs": {"description": "Optional provider configurations for multi-provider testing. Each dict has ''provider_name'' and ''model_name''. Example: [{''provider_name'': ''anthropic'', ''model_name'': ''claude-sonnet-4-5''}]", "items": {"additionalProperties": true, "type": "object"}, "title": "Provider Configs", "type": "array"}, "embedding_fields": {"description": "JSON paths in extracted_data to embed for semantic search. Example: [''summary'', ''candidate_name'', ''skills''] for CV extraction. Values will be concatenated and embedded using configured embedding provider.", "items": {"type": "string"}, "title": "Embedding Fields", "type": "array"}}, "required": ["name", "spec"], "json_schema_extra": {"table_name": "schemas", "entity_key_field": "
|
|
2693
|
+
'{"type": "object", "description": "\n Agent schema definition.\n\n Schemas define agents that can be dynamically loaded into Pydantic AI.\n They store JsonSchema specifications with embedded metadata for tools,\n resources, and system prompts.\n\n For ontology extraction agents:\n - `provider_configs` enables multi-provider support (test across Anthropic, OpenAI, etc.)\n - `embedding_fields` specifies which output fields should be embedded for semantic search\n\n Tenant isolation is provided via CoreModel.tenant_id field.\n \n\nThis agent can search the `schemas` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"description": "Human-readable schema name (used as identifier)", "title": "Name", "type": "string"}, "content": {"default": "", "description": "Markdown documentation and instructions for the schema", "title": "Content", "type": "string"}, "spec": {"additionalProperties": true, "description": "JsonSchema specification defining the agent structure and capabilities", "title": "Spec", "type": "object"}, "category": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Schema category distinguishing schema types. Values: ''agent'' (AI agents), ''evaluator'' (LLM-as-a-Judge evaluators). Maps directly from json_schema_extra.kind field during ingestion.", "title": "Category"}, "provider_configs": {"description": "Optional provider configurations for multi-provider testing. Each dict has ''provider_name'' and ''model_name''. Example: [{''provider_name'': ''anthropic'', ''model_name'': ''claude-sonnet-4-5''}]", "items": {"additionalProperties": true, "type": "object"}, "title": "Provider Configs", "type": "array"}, "embedding_fields": {"description": "JSON paths in extracted_data to embed for semantic search. Example: [''summary'', ''candidate_name'', ''skills''] for CV extraction. Values will be concatenated and embedded using configured embedding provider.", "items": {"type": "string"}, "title": "Embedding Fields", "type": "array"}}, "required": ["name", "spec"], "json_schema_extra": {"table_name": "schemas", "entity_key_field": "name", "embedding_fields": ["content"], "fully_qualified_name": "rem.models.entities.schema.Schema", "tools": ["search_rem"], "default_search_table": "schemas", "has_embeddings": true}}'::jsonb,
|
|
2666
2694
|
'entity',
|
|
2667
|
-
'{"table_name": "schemas", "entity_key_field": "
|
|
2695
|
+
'{"table_name": "schemas", "entity_key_field": "name", "embedding_fields": ["content"], "fqn": "rem.models.entities.schema.Schema"}'::jsonb
|
|
2668
2696
|
)
|
|
2669
2697
|
ON CONFLICT (id) DO UPDATE SET
|
|
2670
2698
|
name = EXCLUDED.name,
|
|
@@ -3115,7 +3143,7 @@ BEGIN
|
|
|
3115
3143
|
RAISE NOTICE ' ✓ image_resources (1 embeddable fields)';
|
|
3116
3144
|
RAISE NOTICE ' ✓ messages (1 embeddable fields)';
|
|
3117
3145
|
RAISE NOTICE ' ✓ moments (1 embeddable fields)';
|
|
3118
|
-
RAISE NOTICE ' ✓ ontologies';
|
|
3146
|
+
RAISE NOTICE ' ✓ ontologies (1 embeddable fields)';
|
|
3119
3147
|
RAISE NOTICE ' ✓ ontology_configs (1 embeddable fields)';
|
|
3120
3148
|
RAISE NOTICE ' ✓ resources (1 embeddable fields)';
|
|
3121
3149
|
RAISE NOTICE ' ✓ schemas (1 embeddable fields)';
|