remdb 0.3.146__py3-none-any.whl → 0.3.181__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of remdb might be problematic. Click here for more details.
- rem/agentic/agents/__init__.py +16 -0
- rem/agentic/agents/agent_manager.py +311 -0
- rem/agentic/context.py +81 -3
- rem/agentic/context_builder.py +36 -9
- rem/agentic/mcp/tool_wrapper.py +43 -14
- rem/agentic/providers/pydantic_ai.py +76 -34
- rem/agentic/schema.py +4 -3
- rem/agentic/tools/rem_tools.py +11 -0
- rem/api/deps.py +3 -5
- rem/api/main.py +22 -3
- rem/api/mcp_router/resources.py +75 -14
- rem/api/mcp_router/server.py +28 -23
- rem/api/mcp_router/tools.py +177 -2
- rem/api/middleware/tracking.py +5 -5
- rem/api/routers/auth.py +352 -6
- rem/api/routers/chat/completions.py +5 -3
- rem/api/routers/chat/streaming.py +95 -22
- rem/api/routers/messages.py +24 -15
- rem/auth/__init__.py +13 -3
- rem/auth/jwt.py +352 -0
- rem/auth/middleware.py +70 -30
- rem/auth/providers/__init__.py +4 -1
- rem/auth/providers/email.py +215 -0
- rem/cli/commands/ask.py +1 -1
- rem/cli/commands/db.py +118 -54
- rem/models/entities/__init__.py +4 -0
- rem/models/entities/ontology.py +93 -101
- rem/models/entities/subscriber.py +175 -0
- rem/models/entities/user.py +1 -0
- rem/schemas/agents/core/agent-builder.yaml +235 -0
- rem/services/__init__.py +3 -1
- rem/services/content/service.py +4 -3
- rem/services/email/__init__.py +10 -0
- rem/services/email/service.py +522 -0
- rem/services/email/templates.py +360 -0
- rem/services/embeddings/worker.py +26 -12
- rem/services/postgres/README.md +38 -0
- rem/services/postgres/diff_service.py +19 -3
- rem/services/postgres/pydantic_to_sqlalchemy.py +37 -2
- rem/services/postgres/register_type.py +1 -1
- rem/services/postgres/repository.py +37 -25
- rem/services/postgres/schema_generator.py +5 -5
- rem/services/postgres/sql_builder.py +6 -5
- rem/services/session/compression.py +113 -50
- rem/services/session/reload.py +14 -7
- rem/services/user_service.py +41 -9
- rem/settings.py +182 -1
- rem/sql/background_indexes.sql +5 -0
- rem/sql/migrations/001_install.sql +33 -4
- rem/sql/migrations/002_install_models.sql +204 -186
- rem/sql/migrations/005_schema_update.sql +145 -0
- rem/utils/model_helpers.py +101 -0
- rem/utils/schema_loader.py +45 -7
- {remdb-0.3.146.dist-info → remdb-0.3.181.dist-info}/METADATA +1 -1
- {remdb-0.3.146.dist-info → remdb-0.3.181.dist-info}/RECORD +57 -48
- {remdb-0.3.146.dist-info → remdb-0.3.181.dist-info}/WHEEL +0 -0
- {remdb-0.3.146.dist-info → remdb-0.3.181.dist-info}/entry_points.txt +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
-- REM Model Schema (install_models.sql)
|
|
2
2
|
-- Generated from Pydantic models
|
|
3
3
|
-- Source: model registry
|
|
4
|
-
-- Generated at: 2025-
|
|
4
|
+
-- Generated at: 2025-12-11T08:40:31.986919
|
|
5
5
|
--
|
|
6
6
|
-- DO NOT EDIT MANUALLY - Regenerate with: rem db schema generate
|
|
7
7
|
--
|
|
@@ -56,11 +56,11 @@ CREATE TABLE IF NOT EXISTS feedbacks (
|
|
|
56
56
|
tags TEXT[] DEFAULT ARRAY[]::TEXT[]
|
|
57
57
|
);
|
|
58
58
|
|
|
59
|
-
CREATE INDEX
|
|
60
|
-
CREATE INDEX
|
|
61
|
-
CREATE INDEX
|
|
62
|
-
CREATE INDEX
|
|
63
|
-
CREATE INDEX
|
|
59
|
+
CREATE INDEX idx_feedbacks_tenant ON feedbacks (tenant_id);
|
|
60
|
+
CREATE INDEX idx_feedbacks_user ON feedbacks (user_id);
|
|
61
|
+
CREATE INDEX idx_feedbacks_graph_edges ON feedbacks USING GIN (graph_edges);
|
|
62
|
+
CREATE INDEX idx_feedbacks_metadata ON feedbacks USING GIN (metadata);
|
|
63
|
+
CREATE INDEX idx_feedbacks_tags ON feedbacks USING GIN (tags);
|
|
64
64
|
|
|
65
65
|
-- KV_STORE trigger for feedbacks
|
|
66
66
|
-- Trigger function to maintain KV_STORE for feedbacks
|
|
@@ -84,7 +84,7 @@ BEGIN
|
|
|
84
84
|
graph_edges,
|
|
85
85
|
updated_at
|
|
86
86
|
) VALUES (
|
|
87
|
-
NEW.id::VARCHAR,
|
|
87
|
+
normalize_key(NEW.id::VARCHAR),
|
|
88
88
|
'feedbacks',
|
|
89
89
|
NEW.id,
|
|
90
90
|
NEW.tenant_id,
|
|
@@ -135,11 +135,11 @@ CREATE TABLE IF NOT EXISTS files (
|
|
|
135
135
|
tags TEXT[] DEFAULT ARRAY[]::TEXT[]
|
|
136
136
|
);
|
|
137
137
|
|
|
138
|
-
CREATE INDEX
|
|
139
|
-
CREATE INDEX
|
|
140
|
-
CREATE INDEX
|
|
141
|
-
CREATE INDEX
|
|
142
|
-
CREATE INDEX
|
|
138
|
+
CREATE INDEX idx_files_tenant ON files (tenant_id);
|
|
139
|
+
CREATE INDEX idx_files_user ON files (user_id);
|
|
140
|
+
CREATE INDEX idx_files_graph_edges ON files USING GIN (graph_edges);
|
|
141
|
+
CREATE INDEX idx_files_metadata ON files USING GIN (metadata);
|
|
142
|
+
CREATE INDEX idx_files_tags ON files USING GIN (tags);
|
|
143
143
|
|
|
144
144
|
-- Embeddings for files
|
|
145
145
|
CREATE TABLE IF NOT EXISTS embeddings_files (
|
|
@@ -157,14 +157,14 @@ CREATE TABLE IF NOT EXISTS embeddings_files (
|
|
|
157
157
|
);
|
|
158
158
|
|
|
159
159
|
-- Index for entity lookup (get all embeddings for entity)
|
|
160
|
-
CREATE INDEX
|
|
160
|
+
CREATE INDEX idx_embeddings_files_entity ON embeddings_files (entity_id);
|
|
161
161
|
|
|
162
162
|
-- Index for field + provider lookup
|
|
163
|
-
CREATE INDEX
|
|
163
|
+
CREATE INDEX idx_embeddings_files_field_provider ON embeddings_files (field_name, provider);
|
|
164
164
|
|
|
165
165
|
-- HNSW index for vector similarity search (created in background)
|
|
166
166
|
-- Note: This will be created by background thread after data load
|
|
167
|
-
-- CREATE INDEX
|
|
167
|
+
-- CREATE INDEX idx_embeddings_files_vector_hnsw ON embeddings_files
|
|
168
168
|
-- USING hnsw (embedding vector_cosine_ops);
|
|
169
169
|
|
|
170
170
|
-- KV_STORE trigger for files
|
|
@@ -189,7 +189,7 @@ BEGIN
|
|
|
189
189
|
graph_edges,
|
|
190
190
|
updated_at
|
|
191
191
|
) VALUES (
|
|
192
|
-
NEW.
|
|
192
|
+
normalize_key(NEW.name::VARCHAR),
|
|
193
193
|
'files',
|
|
194
194
|
NEW.id,
|
|
195
195
|
NEW.tenant_id,
|
|
@@ -248,11 +248,11 @@ CREATE TABLE IF NOT EXISTS image_resources (
|
|
|
248
248
|
tags TEXT[] DEFAULT ARRAY[]::TEXT[]
|
|
249
249
|
);
|
|
250
250
|
|
|
251
|
-
CREATE INDEX
|
|
252
|
-
CREATE INDEX
|
|
253
|
-
CREATE INDEX
|
|
254
|
-
CREATE INDEX
|
|
255
|
-
CREATE INDEX
|
|
251
|
+
CREATE INDEX idx_image_resources_tenant ON image_resources (tenant_id);
|
|
252
|
+
CREATE INDEX idx_image_resources_user ON image_resources (user_id);
|
|
253
|
+
CREATE INDEX idx_image_resources_graph_edges ON image_resources USING GIN (graph_edges);
|
|
254
|
+
CREATE INDEX idx_image_resources_metadata ON image_resources USING GIN (metadata);
|
|
255
|
+
CREATE INDEX idx_image_resources_tags ON image_resources USING GIN (tags);
|
|
256
256
|
|
|
257
257
|
-- Embeddings for image_resources
|
|
258
258
|
CREATE TABLE IF NOT EXISTS embeddings_image_resources (
|
|
@@ -270,14 +270,14 @@ CREATE TABLE IF NOT EXISTS embeddings_image_resources (
|
|
|
270
270
|
);
|
|
271
271
|
|
|
272
272
|
-- Index for entity lookup (get all embeddings for entity)
|
|
273
|
-
CREATE INDEX
|
|
273
|
+
CREATE INDEX idx_embeddings_image_resources_entity ON embeddings_image_resources (entity_id);
|
|
274
274
|
|
|
275
275
|
-- Index for field + provider lookup
|
|
276
|
-
CREATE INDEX
|
|
276
|
+
CREATE INDEX idx_embeddings_image_resources_field_provider ON embeddings_image_resources (field_name, provider);
|
|
277
277
|
|
|
278
278
|
-- HNSW index for vector similarity search (created in background)
|
|
279
279
|
-- Note: This will be created by background thread after data load
|
|
280
|
-
-- CREATE INDEX
|
|
280
|
+
-- CREATE INDEX idx_embeddings_image_resources_vector_hnsw ON embeddings_image_resources
|
|
281
281
|
-- USING hnsw (embedding vector_cosine_ops);
|
|
282
282
|
|
|
283
283
|
-- KV_STORE trigger for image_resources
|
|
@@ -302,7 +302,7 @@ BEGIN
|
|
|
302
302
|
graph_edges,
|
|
303
303
|
updated_at
|
|
304
304
|
) VALUES (
|
|
305
|
-
NEW.name::VARCHAR,
|
|
305
|
+
normalize_key(NEW.name::VARCHAR),
|
|
306
306
|
'image_resources',
|
|
307
307
|
NEW.id,
|
|
308
308
|
NEW.tenant_id,
|
|
@@ -354,11 +354,11 @@ CREATE TABLE IF NOT EXISTS messages (
|
|
|
354
354
|
tags TEXT[] DEFAULT ARRAY[]::TEXT[]
|
|
355
355
|
);
|
|
356
356
|
|
|
357
|
-
CREATE INDEX
|
|
358
|
-
CREATE INDEX
|
|
359
|
-
CREATE INDEX
|
|
360
|
-
CREATE INDEX
|
|
361
|
-
CREATE INDEX
|
|
357
|
+
CREATE INDEX idx_messages_tenant ON messages (tenant_id);
|
|
358
|
+
CREATE INDEX idx_messages_user ON messages (user_id);
|
|
359
|
+
CREATE INDEX idx_messages_graph_edges ON messages USING GIN (graph_edges);
|
|
360
|
+
CREATE INDEX idx_messages_metadata ON messages USING GIN (metadata);
|
|
361
|
+
CREATE INDEX idx_messages_tags ON messages USING GIN (tags);
|
|
362
362
|
|
|
363
363
|
-- Embeddings for messages
|
|
364
364
|
CREATE TABLE IF NOT EXISTS embeddings_messages (
|
|
@@ -376,14 +376,14 @@ CREATE TABLE IF NOT EXISTS embeddings_messages (
|
|
|
376
376
|
);
|
|
377
377
|
|
|
378
378
|
-- Index for entity lookup (get all embeddings for entity)
|
|
379
|
-
CREATE INDEX
|
|
379
|
+
CREATE INDEX idx_embeddings_messages_entity ON embeddings_messages (entity_id);
|
|
380
380
|
|
|
381
381
|
-- Index for field + provider lookup
|
|
382
|
-
CREATE INDEX
|
|
382
|
+
CREATE INDEX idx_embeddings_messages_field_provider ON embeddings_messages (field_name, provider);
|
|
383
383
|
|
|
384
384
|
-- HNSW index for vector similarity search (created in background)
|
|
385
385
|
-- Note: This will be created by background thread after data load
|
|
386
|
-
-- CREATE INDEX
|
|
386
|
+
-- CREATE INDEX idx_embeddings_messages_vector_hnsw ON embeddings_messages
|
|
387
387
|
-- USING hnsw (embedding vector_cosine_ops);
|
|
388
388
|
|
|
389
389
|
-- KV_STORE trigger for messages
|
|
@@ -408,7 +408,7 @@ BEGIN
|
|
|
408
408
|
graph_edges,
|
|
409
409
|
updated_at
|
|
410
410
|
) VALUES (
|
|
411
|
-
NEW.id::VARCHAR,
|
|
411
|
+
normalize_key(NEW.id::VARCHAR),
|
|
412
412
|
'messages',
|
|
413
413
|
NEW.id,
|
|
414
414
|
NEW.tenant_id,
|
|
@@ -462,11 +462,11 @@ CREATE TABLE IF NOT EXISTS moments (
|
|
|
462
462
|
tags TEXT[] DEFAULT ARRAY[]::TEXT[]
|
|
463
463
|
);
|
|
464
464
|
|
|
465
|
-
CREATE INDEX
|
|
466
|
-
CREATE INDEX
|
|
467
|
-
CREATE INDEX
|
|
468
|
-
CREATE INDEX
|
|
469
|
-
CREATE INDEX
|
|
465
|
+
CREATE INDEX idx_moments_tenant ON moments (tenant_id);
|
|
466
|
+
CREATE INDEX idx_moments_user ON moments (user_id);
|
|
467
|
+
CREATE INDEX idx_moments_graph_edges ON moments USING GIN (graph_edges);
|
|
468
|
+
CREATE INDEX idx_moments_metadata ON moments USING GIN (metadata);
|
|
469
|
+
CREATE INDEX idx_moments_tags ON moments USING GIN (tags);
|
|
470
470
|
|
|
471
471
|
-- Embeddings for moments
|
|
472
472
|
CREATE TABLE IF NOT EXISTS embeddings_moments (
|
|
@@ -484,14 +484,14 @@ CREATE TABLE IF NOT EXISTS embeddings_moments (
|
|
|
484
484
|
);
|
|
485
485
|
|
|
486
486
|
-- Index for entity lookup (get all embeddings for entity)
|
|
487
|
-
CREATE INDEX
|
|
487
|
+
CREATE INDEX idx_embeddings_moments_entity ON embeddings_moments (entity_id);
|
|
488
488
|
|
|
489
489
|
-- Index for field + provider lookup
|
|
490
|
-
CREATE INDEX
|
|
490
|
+
CREATE INDEX idx_embeddings_moments_field_provider ON embeddings_moments (field_name, provider);
|
|
491
491
|
|
|
492
492
|
-- HNSW index for vector similarity search (created in background)
|
|
493
493
|
-- Note: This will be created by background thread after data load
|
|
494
|
-
-- CREATE INDEX
|
|
494
|
+
-- CREATE INDEX idx_embeddings_moments_vector_hnsw ON embeddings_moments
|
|
495
495
|
-- USING hnsw (embedding vector_cosine_ops);
|
|
496
496
|
|
|
497
497
|
-- KV_STORE trigger for moments
|
|
@@ -516,7 +516,7 @@ BEGIN
|
|
|
516
516
|
graph_edges,
|
|
517
517
|
updated_at
|
|
518
518
|
) VALUES (
|
|
519
|
-
NEW.name::VARCHAR,
|
|
519
|
+
normalize_key(NEW.name::VARCHAR),
|
|
520
520
|
'moments',
|
|
521
521
|
NEW.id,
|
|
522
522
|
NEW.tenant_id,
|
|
@@ -553,14 +553,15 @@ CREATE TABLE IF NOT EXISTS ontologies (
|
|
|
553
553
|
tenant_id VARCHAR(100) NOT NULL,
|
|
554
554
|
user_id VARCHAR(256),
|
|
555
555
|
name VARCHAR(256) NOT NULL,
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
556
|
+
uri VARCHAR(256),
|
|
557
|
+
file_id UUID,
|
|
558
|
+
agent_schema_id VARCHAR(256),
|
|
559
|
+
provider_name VARCHAR(256),
|
|
560
|
+
model_name VARCHAR(256),
|
|
561
|
+
extracted_data JSONB,
|
|
561
562
|
confidence_score DOUBLE PRECISION,
|
|
562
563
|
extraction_timestamp VARCHAR(256),
|
|
563
|
-
|
|
564
|
+
content TEXT,
|
|
564
565
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
565
566
|
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
566
567
|
deleted_at TIMESTAMP,
|
|
@@ -569,11 +570,37 @@ CREATE TABLE IF NOT EXISTS ontologies (
|
|
|
569
570
|
tags TEXT[] DEFAULT ARRAY[]::TEXT[]
|
|
570
571
|
);
|
|
571
572
|
|
|
572
|
-
CREATE INDEX
|
|
573
|
-
CREATE INDEX
|
|
574
|
-
CREATE INDEX
|
|
575
|
-
CREATE INDEX
|
|
576
|
-
CREATE INDEX
|
|
573
|
+
CREATE INDEX idx_ontologies_tenant ON ontologies (tenant_id);
|
|
574
|
+
CREATE INDEX idx_ontologies_user ON ontologies (user_id);
|
|
575
|
+
CREATE INDEX idx_ontologies_graph_edges ON ontologies USING GIN (graph_edges);
|
|
576
|
+
CREATE INDEX idx_ontologies_metadata ON ontologies USING GIN (metadata);
|
|
577
|
+
CREATE INDEX idx_ontologies_tags ON ontologies USING GIN (tags);
|
|
578
|
+
|
|
579
|
+
-- Embeddings for ontologies
|
|
580
|
+
CREATE TABLE IF NOT EXISTS embeddings_ontologies (
|
|
581
|
+
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
582
|
+
entity_id UUID NOT NULL REFERENCES ontologies(id) ON DELETE CASCADE,
|
|
583
|
+
field_name VARCHAR(100) NOT NULL,
|
|
584
|
+
provider VARCHAR(50) NOT NULL DEFAULT 'openai',
|
|
585
|
+
model VARCHAR(100) NOT NULL DEFAULT 'text-embedding-3-small',
|
|
586
|
+
embedding vector(1536) NOT NULL,
|
|
587
|
+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
588
|
+
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
589
|
+
|
|
590
|
+
-- Unique: one embedding per entity per field per provider
|
|
591
|
+
UNIQUE (entity_id, field_name, provider)
|
|
592
|
+
);
|
|
593
|
+
|
|
594
|
+
-- Index for entity lookup (get all embeddings for entity)
|
|
595
|
+
CREATE INDEX idx_embeddings_ontologies_entity ON embeddings_ontologies (entity_id);
|
|
596
|
+
|
|
597
|
+
-- Index for field + provider lookup
|
|
598
|
+
CREATE INDEX idx_embeddings_ontologies_field_provider ON embeddings_ontologies (field_name, provider);
|
|
599
|
+
|
|
600
|
+
-- HNSW index for vector similarity search (created in background)
|
|
601
|
+
-- Note: This will be created by background thread after data load
|
|
602
|
+
-- CREATE INDEX idx_embeddings_ontologies_vector_hnsw ON embeddings_ontologies
|
|
603
|
+
-- USING hnsw (embedding vector_cosine_ops);
|
|
577
604
|
|
|
578
605
|
-- KV_STORE trigger for ontologies
|
|
579
606
|
-- Trigger function to maintain KV_STORE for ontologies
|
|
@@ -597,7 +624,7 @@ BEGIN
|
|
|
597
624
|
graph_edges,
|
|
598
625
|
updated_at
|
|
599
626
|
) VALUES (
|
|
600
|
-
NEW.
|
|
627
|
+
normalize_key(NEW.name::VARCHAR),
|
|
601
628
|
'ontologies',
|
|
602
629
|
NEW.id,
|
|
603
630
|
NEW.tenant_id,
|
|
@@ -651,11 +678,11 @@ CREATE TABLE IF NOT EXISTS ontology_configs (
|
|
|
651
678
|
tags TEXT[] DEFAULT ARRAY[]::TEXT[]
|
|
652
679
|
);
|
|
653
680
|
|
|
654
|
-
CREATE INDEX
|
|
655
|
-
CREATE INDEX
|
|
656
|
-
CREATE INDEX
|
|
657
|
-
CREATE INDEX
|
|
658
|
-
CREATE INDEX
|
|
681
|
+
CREATE INDEX idx_ontology_configs_tenant ON ontology_configs (tenant_id);
|
|
682
|
+
CREATE INDEX idx_ontology_configs_user ON ontology_configs (user_id);
|
|
683
|
+
CREATE INDEX idx_ontology_configs_graph_edges ON ontology_configs USING GIN (graph_edges);
|
|
684
|
+
CREATE INDEX idx_ontology_configs_metadata ON ontology_configs USING GIN (metadata);
|
|
685
|
+
CREATE INDEX idx_ontology_configs_tags ON ontology_configs USING GIN (tags);
|
|
659
686
|
|
|
660
687
|
-- Embeddings for ontology_configs
|
|
661
688
|
CREATE TABLE IF NOT EXISTS embeddings_ontology_configs (
|
|
@@ -673,14 +700,14 @@ CREATE TABLE IF NOT EXISTS embeddings_ontology_configs (
|
|
|
673
700
|
);
|
|
674
701
|
|
|
675
702
|
-- Index for entity lookup (get all embeddings for entity)
|
|
676
|
-
CREATE INDEX
|
|
703
|
+
CREATE INDEX idx_embeddings_ontology_configs_entity ON embeddings_ontology_configs (entity_id);
|
|
677
704
|
|
|
678
705
|
-- Index for field + provider lookup
|
|
679
|
-
CREATE INDEX
|
|
706
|
+
CREATE INDEX idx_embeddings_ontology_configs_field_provider ON embeddings_ontology_configs (field_name, provider);
|
|
680
707
|
|
|
681
708
|
-- HNSW index for vector similarity search (created in background)
|
|
682
709
|
-- Note: This will be created by background thread after data load
|
|
683
|
-
-- CREATE INDEX
|
|
710
|
+
-- CREATE INDEX idx_embeddings_ontology_configs_vector_hnsw ON embeddings_ontology_configs
|
|
684
711
|
-- USING hnsw (embedding vector_cosine_ops);
|
|
685
712
|
|
|
686
713
|
-- KV_STORE trigger for ontology_configs
|
|
@@ -705,7 +732,7 @@ BEGIN
|
|
|
705
732
|
graph_edges,
|
|
706
733
|
updated_at
|
|
707
734
|
) VALUES (
|
|
708
|
-
NEW.
|
|
735
|
+
normalize_key(NEW.name::VARCHAR),
|
|
709
736
|
'ontology_configs',
|
|
710
737
|
NEW.id,
|
|
711
738
|
NEW.tenant_id,
|
|
@@ -756,11 +783,11 @@ CREATE TABLE IF NOT EXISTS resources (
|
|
|
756
783
|
tags TEXT[] DEFAULT ARRAY[]::TEXT[]
|
|
757
784
|
);
|
|
758
785
|
|
|
759
|
-
CREATE INDEX
|
|
760
|
-
CREATE INDEX
|
|
761
|
-
CREATE INDEX
|
|
762
|
-
CREATE INDEX
|
|
763
|
-
CREATE INDEX
|
|
786
|
+
CREATE INDEX idx_resources_tenant ON resources (tenant_id);
|
|
787
|
+
CREATE INDEX idx_resources_user ON resources (user_id);
|
|
788
|
+
CREATE INDEX idx_resources_graph_edges ON resources USING GIN (graph_edges);
|
|
789
|
+
CREATE INDEX idx_resources_metadata ON resources USING GIN (metadata);
|
|
790
|
+
CREATE INDEX idx_resources_tags ON resources USING GIN (tags);
|
|
764
791
|
|
|
765
792
|
-- Embeddings for resources
|
|
766
793
|
CREATE TABLE IF NOT EXISTS embeddings_resources (
|
|
@@ -778,14 +805,14 @@ CREATE TABLE IF NOT EXISTS embeddings_resources (
|
|
|
778
805
|
);
|
|
779
806
|
|
|
780
807
|
-- Index for entity lookup (get all embeddings for entity)
|
|
781
|
-
CREATE INDEX
|
|
808
|
+
CREATE INDEX idx_embeddings_resources_entity ON embeddings_resources (entity_id);
|
|
782
809
|
|
|
783
810
|
-- Index for field + provider lookup
|
|
784
|
-
CREATE INDEX
|
|
811
|
+
CREATE INDEX idx_embeddings_resources_field_provider ON embeddings_resources (field_name, provider);
|
|
785
812
|
|
|
786
813
|
-- HNSW index for vector similarity search (created in background)
|
|
787
814
|
-- Note: This will be created by background thread after data load
|
|
788
|
-
-- CREATE INDEX
|
|
815
|
+
-- CREATE INDEX idx_embeddings_resources_vector_hnsw ON embeddings_resources
|
|
789
816
|
-- USING hnsw (embedding vector_cosine_ops);
|
|
790
817
|
|
|
791
818
|
-- KV_STORE trigger for resources
|
|
@@ -810,7 +837,7 @@ BEGIN
|
|
|
810
837
|
graph_edges,
|
|
811
838
|
updated_at
|
|
812
839
|
) VALUES (
|
|
813
|
-
NEW.name::VARCHAR,
|
|
840
|
+
normalize_key(NEW.name::VARCHAR),
|
|
814
841
|
'resources',
|
|
815
842
|
NEW.id,
|
|
816
843
|
NEW.tenant_id,
|
|
@@ -860,11 +887,11 @@ CREATE TABLE IF NOT EXISTS schemas (
|
|
|
860
887
|
tags TEXT[] DEFAULT ARRAY[]::TEXT[]
|
|
861
888
|
);
|
|
862
889
|
|
|
863
|
-
CREATE INDEX
|
|
864
|
-
CREATE INDEX
|
|
865
|
-
CREATE INDEX
|
|
866
|
-
CREATE INDEX
|
|
867
|
-
CREATE INDEX
|
|
890
|
+
CREATE INDEX idx_schemas_tenant ON schemas (tenant_id);
|
|
891
|
+
CREATE INDEX idx_schemas_user ON schemas (user_id);
|
|
892
|
+
CREATE INDEX idx_schemas_graph_edges ON schemas USING GIN (graph_edges);
|
|
893
|
+
CREATE INDEX idx_schemas_metadata ON schemas USING GIN (metadata);
|
|
894
|
+
CREATE INDEX idx_schemas_tags ON schemas USING GIN (tags);
|
|
868
895
|
|
|
869
896
|
-- Embeddings for schemas
|
|
870
897
|
CREATE TABLE IF NOT EXISTS embeddings_schemas (
|
|
@@ -882,14 +909,14 @@ CREATE TABLE IF NOT EXISTS embeddings_schemas (
|
|
|
882
909
|
);
|
|
883
910
|
|
|
884
911
|
-- Index for entity lookup (get all embeddings for entity)
|
|
885
|
-
CREATE INDEX
|
|
912
|
+
CREATE INDEX idx_embeddings_schemas_entity ON embeddings_schemas (entity_id);
|
|
886
913
|
|
|
887
914
|
-- Index for field + provider lookup
|
|
888
|
-
CREATE INDEX
|
|
915
|
+
CREATE INDEX idx_embeddings_schemas_field_provider ON embeddings_schemas (field_name, provider);
|
|
889
916
|
|
|
890
917
|
-- HNSW index for vector similarity search (created in background)
|
|
891
918
|
-- Note: This will be created by background thread after data load
|
|
892
|
-
-- CREATE INDEX
|
|
919
|
+
-- CREATE INDEX idx_embeddings_schemas_vector_hnsw ON embeddings_schemas
|
|
893
920
|
-- USING hnsw (embedding vector_cosine_ops);
|
|
894
921
|
|
|
895
922
|
-- KV_STORE trigger for schemas
|
|
@@ -914,7 +941,7 @@ BEGIN
|
|
|
914
941
|
graph_edges,
|
|
915
942
|
updated_at
|
|
916
943
|
) VALUES (
|
|
917
|
-
NEW.
|
|
944
|
+
normalize_key(NEW.name::VARCHAR),
|
|
918
945
|
'schemas',
|
|
919
946
|
NEW.id,
|
|
920
947
|
NEW.tenant_id,
|
|
@@ -967,11 +994,11 @@ CREATE TABLE IF NOT EXISTS sessions (
|
|
|
967
994
|
tags TEXT[] DEFAULT ARRAY[]::TEXT[]
|
|
968
995
|
);
|
|
969
996
|
|
|
970
|
-
CREATE INDEX
|
|
971
|
-
CREATE INDEX
|
|
972
|
-
CREATE INDEX
|
|
973
|
-
CREATE INDEX
|
|
974
|
-
CREATE INDEX
|
|
997
|
+
CREATE INDEX idx_sessions_tenant ON sessions (tenant_id);
|
|
998
|
+
CREATE INDEX idx_sessions_user ON sessions (user_id);
|
|
999
|
+
CREATE INDEX idx_sessions_graph_edges ON sessions USING GIN (graph_edges);
|
|
1000
|
+
CREATE INDEX idx_sessions_metadata ON sessions USING GIN (metadata);
|
|
1001
|
+
CREATE INDEX idx_sessions_tags ON sessions USING GIN (tags);
|
|
975
1002
|
|
|
976
1003
|
-- Embeddings for sessions
|
|
977
1004
|
CREATE TABLE IF NOT EXISTS embeddings_sessions (
|
|
@@ -989,14 +1016,14 @@ CREATE TABLE IF NOT EXISTS embeddings_sessions (
|
|
|
989
1016
|
);
|
|
990
1017
|
|
|
991
1018
|
-- Index for entity lookup (get all embeddings for entity)
|
|
992
|
-
CREATE INDEX
|
|
1019
|
+
CREATE INDEX idx_embeddings_sessions_entity ON embeddings_sessions (entity_id);
|
|
993
1020
|
|
|
994
1021
|
-- Index for field + provider lookup
|
|
995
|
-
CREATE INDEX
|
|
1022
|
+
CREATE INDEX idx_embeddings_sessions_field_provider ON embeddings_sessions (field_name, provider);
|
|
996
1023
|
|
|
997
1024
|
-- HNSW index for vector similarity search (created in background)
|
|
998
1025
|
-- Note: This will be created by background thread after data load
|
|
999
|
-
-- CREATE INDEX
|
|
1026
|
+
-- CREATE INDEX idx_embeddings_sessions_vector_hnsw ON embeddings_sessions
|
|
1000
1027
|
-- USING hnsw (embedding vector_cosine_ops);
|
|
1001
1028
|
|
|
1002
1029
|
-- KV_STORE trigger for sessions
|
|
@@ -1021,7 +1048,7 @@ BEGIN
|
|
|
1021
1048
|
graph_edges,
|
|
1022
1049
|
updated_at
|
|
1023
1050
|
) VALUES (
|
|
1024
|
-
NEW.name::VARCHAR,
|
|
1051
|
+
normalize_key(NEW.name::VARCHAR),
|
|
1025
1052
|
'sessions',
|
|
1026
1053
|
NEW.id,
|
|
1027
1054
|
NEW.tenant_id,
|
|
@@ -1068,11 +1095,11 @@ CREATE TABLE IF NOT EXISTS shared_sessions (
|
|
|
1068
1095
|
tags TEXT[] DEFAULT ARRAY[]::TEXT[]
|
|
1069
1096
|
);
|
|
1070
1097
|
|
|
1071
|
-
CREATE INDEX
|
|
1072
|
-
CREATE INDEX
|
|
1073
|
-
CREATE INDEX
|
|
1074
|
-
CREATE INDEX
|
|
1075
|
-
CREATE INDEX
|
|
1098
|
+
CREATE INDEX idx_shared_sessions_tenant ON shared_sessions (tenant_id);
|
|
1099
|
+
CREATE INDEX idx_shared_sessions_user ON shared_sessions (user_id);
|
|
1100
|
+
CREATE INDEX idx_shared_sessions_graph_edges ON shared_sessions USING GIN (graph_edges);
|
|
1101
|
+
CREATE INDEX idx_shared_sessions_metadata ON shared_sessions USING GIN (metadata);
|
|
1102
|
+
CREATE INDEX idx_shared_sessions_tags ON shared_sessions USING GIN (tags);
|
|
1076
1103
|
|
|
1077
1104
|
-- KV_STORE trigger for shared_sessions
|
|
1078
1105
|
-- Trigger function to maintain KV_STORE for shared_sessions
|
|
@@ -1096,7 +1123,7 @@ BEGIN
|
|
|
1096
1123
|
graph_edges,
|
|
1097
1124
|
updated_at
|
|
1098
1125
|
) VALUES (
|
|
1099
|
-
NEW.id::VARCHAR,
|
|
1126
|
+
normalize_key(NEW.id::VARCHAR),
|
|
1100
1127
|
'shared_sessions',
|
|
1101
1128
|
NEW.id,
|
|
1102
1129
|
NEW.tenant_id,
|
|
@@ -1151,11 +1178,11 @@ CREATE TABLE IF NOT EXISTS users (
|
|
|
1151
1178
|
tags TEXT[] DEFAULT ARRAY[]::TEXT[]
|
|
1152
1179
|
);
|
|
1153
1180
|
|
|
1154
|
-
CREATE INDEX
|
|
1155
|
-
CREATE INDEX
|
|
1156
|
-
CREATE INDEX
|
|
1157
|
-
CREATE INDEX
|
|
1158
|
-
CREATE INDEX
|
|
1181
|
+
CREATE INDEX idx_users_tenant ON users (tenant_id);
|
|
1182
|
+
CREATE INDEX idx_users_user ON users (user_id);
|
|
1183
|
+
CREATE INDEX idx_users_graph_edges ON users USING GIN (graph_edges);
|
|
1184
|
+
CREATE INDEX idx_users_metadata ON users USING GIN (metadata);
|
|
1185
|
+
CREATE INDEX idx_users_tags ON users USING GIN (tags);
|
|
1159
1186
|
|
|
1160
1187
|
-- Embeddings for users
|
|
1161
1188
|
CREATE TABLE IF NOT EXISTS embeddings_users (
|
|
@@ -1173,14 +1200,14 @@ CREATE TABLE IF NOT EXISTS embeddings_users (
|
|
|
1173
1200
|
);
|
|
1174
1201
|
|
|
1175
1202
|
-- Index for entity lookup (get all embeddings for entity)
|
|
1176
|
-
CREATE INDEX
|
|
1203
|
+
CREATE INDEX idx_embeddings_users_entity ON embeddings_users (entity_id);
|
|
1177
1204
|
|
|
1178
1205
|
-- Index for field + provider lookup
|
|
1179
|
-
CREATE INDEX
|
|
1206
|
+
CREATE INDEX idx_embeddings_users_field_provider ON embeddings_users (field_name, provider);
|
|
1180
1207
|
|
|
1181
1208
|
-- HNSW index for vector similarity search (created in background)
|
|
1182
1209
|
-- Note: This will be created by background thread after data load
|
|
1183
|
-
-- CREATE INDEX
|
|
1210
|
+
-- CREATE INDEX idx_embeddings_users_vector_hnsw ON embeddings_users
|
|
1184
1211
|
-- USING hnsw (embedding vector_cosine_ops);
|
|
1185
1212
|
|
|
1186
1213
|
-- KV_STORE trigger for users
|
|
@@ -1205,7 +1232,7 @@ BEGIN
|
|
|
1205
1232
|
graph_edges,
|
|
1206
1233
|
updated_at
|
|
1207
1234
|
) VALUES (
|
|
1208
|
-
NEW.name::VARCHAR,
|
|
1235
|
+
normalize_key(NEW.name::VARCHAR),
|
|
1209
1236
|
'users',
|
|
1210
1237
|
NEW.id,
|
|
1211
1238
|
NEW.tenant_id,
|
|
@@ -1411,12 +1438,12 @@ VALUES (
|
|
|
1411
1438
|
## Overview
|
|
1412
1439
|
|
|
1413
1440
|
The `File` entity is stored in the `files` table. Each record is uniquely
|
|
1414
|
-
identified by its `
|
|
1441
|
+
identified by its `name` field for lookups and graph traversal.
|
|
1415
1442
|
|
|
1416
1443
|
## Search Capabilities
|
|
1417
1444
|
|
|
1418
1445
|
This schema includes the `search_rem` tool which supports:
|
|
1419
|
-
- **LOOKUP**: O(1) exact match by
|
|
1446
|
+
- **LOOKUP**: O(1) exact match by name (e.g., `LOOKUP "entity-name"`)
|
|
1420
1447
|
- **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
|
|
1421
1448
|
- **SEARCH**: Semantic vector search on content (e.g., `SEARCH "concept" FROM files LIMIT 10`)
|
|
1422
1449
|
- **SQL**: Complex queries (e.g., `SELECT * FROM files WHERE ...`)
|
|
@@ -1426,7 +1453,7 @@ This schema includes the `search_rem` tool which supports:
|
|
|
1426
1453
|
| Property | Value |
|
|
1427
1454
|
|----------|-------|
|
|
1428
1455
|
| Table | `files` |
|
|
1429
|
-
| Entity Key | `
|
|
1456
|
+
| Entity Key | `name` |
|
|
1430
1457
|
| Embedding Fields | `content` |
|
|
1431
1458
|
| Tools | `search_rem` |
|
|
1432
1459
|
|
|
@@ -1513,9 +1540,9 @@ This schema includes the `search_rem` tool which supports:
|
|
|
1513
1540
|
- File processing status (pending, processing, completed, failed)
|
|
1514
1541
|
|
|
1515
1542
|
',
|
|
1516
|
-
'{"type": "object", "description": "\n File metadata and tracking.\n\n Represents files uploaded to or referenced by the REM system,\n tracking their metadata and processing status. Tenant isolation\n is provided via CoreModel.tenant_id field.\n \n\nThis agent can search the `files` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"description": "File name", "title": "Name", "type": "string"}, "uri": {"description": "File storage URI (S3, local path, etc.)", "title": "Uri", "type": "string"}, "content": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Extracted text content (if applicable)", "title": "Content"}, "timestamp": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "File creation/modification timestamp", "title": "Timestamp"}, "size_bytes": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "description": "File size in bytes", "title": "Size Bytes"}, "mime_type": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "File MIME type", "title": "Mime Type"}, "processing_status": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": "pending", "description": "File processing status (pending, processing, completed, failed)", "title": "Processing Status"}}, "required": ["name", "uri"], "json_schema_extra": {"table_name": "files", "entity_key_field": "
|
|
1543
|
+
'{"type": "object", "description": "\n File metadata and tracking.\n\n Represents files uploaded to or referenced by the REM system,\n tracking their metadata and processing status. Tenant isolation\n is provided via CoreModel.tenant_id field.\n \n\nThis agent can search the `files` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"description": "File name", "title": "Name", "type": "string"}, "uri": {"description": "File storage URI (S3, local path, etc.)", "title": "Uri", "type": "string"}, "content": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Extracted text content (if applicable)", "title": "Content"}, "timestamp": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "File creation/modification timestamp", "title": "Timestamp"}, "size_bytes": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "description": "File size in bytes", "title": "Size Bytes"}, "mime_type": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "File MIME type", "title": "Mime Type"}, "processing_status": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": "pending", "description": "File processing status (pending, processing, completed, failed)", "title": "Processing Status"}}, "required": ["name", "uri"], "json_schema_extra": {"table_name": "files", "entity_key_field": "name", "embedding_fields": ["content"], "fully_qualified_name": "rem.models.entities.file.File", "tools": ["search_rem"], "default_search_table": "files", "has_embeddings": true}}'::jsonb,
|
|
1517
1544
|
'entity',
|
|
1518
|
-
'{"table_name": "files", "entity_key_field": "
|
|
1545
|
+
'{"table_name": "files", "entity_key_field": "name", "embedding_fields": ["content"], "fqn": "rem.models.entities.file.File"}'::jsonb
|
|
1519
1546
|
)
|
|
1520
1547
|
ON CONFLICT (id) DO UPDATE SET
|
|
1521
1548
|
name = EXCLUDED.name,
|
|
@@ -2008,18 +2035,19 @@ VALUES (
|
|
|
2008
2035
|
'Ontology',
|
|
2009
2036
|
'# Ontology
|
|
2010
2037
|
|
|
2011
|
-
Domain-specific knowledge
|
|
2038
|
+
Domain-specific knowledge - either agent-extracted or direct-loaded.
|
|
2012
2039
|
|
|
2013
2040
|
Attributes:
|
|
2014
2041
|
name: Human-readable label for this ontology instance
|
|
2015
|
-
|
|
2016
|
-
|
|
2017
|
-
|
|
2018
|
-
|
|
2019
|
-
|
|
2042
|
+
uri: External source reference (git://, s3://, https://) for direct-loaded ontologies
|
|
2043
|
+
file_id: Foreign key to File entity (optional - only for agent-extracted)
|
|
2044
|
+
agent_schema_id: Schema that performed extraction (optional - only for agent-extracted)
|
|
2045
|
+
provider_name: LLM provider used for extraction (optional)
|
|
2046
|
+
model_name: Specific model used (optional)
|
|
2047
|
+
extracted_data: Structured data - either extracted by agent or parsed from source
|
|
2020
2048
|
confidence_score: Optional confidence score from extraction (0.0-1.0)
|
|
2021
2049
|
extraction_timestamp: When extraction was performed
|
|
2022
|
-
|
|
2050
|
+
content: Text used for generating embedding
|
|
2023
2051
|
|
|
2024
2052
|
Inherited from CoreModel:
|
|
2025
2053
|
id: UUID or string identifier
|
|
@@ -2031,10 +2059,9 @@ Domain-specific knowledge extracted from files using custom agents.
|
|
|
2031
2059
|
graph_edges: Relationships to other entities
|
|
2032
2060
|
metadata: Flexible metadata storage
|
|
2033
2061
|
tags: Classification tags
|
|
2034
|
-
column: Database schema metadata
|
|
2035
2062
|
|
|
2036
2063
|
Example Usage:
|
|
2037
|
-
# CV
|
|
2064
|
+
# Agent-extracted: CV parsing
|
|
2038
2065
|
cv_ontology = Ontology(
|
|
2039
2066
|
name="john-doe-cv-2024",
|
|
2040
2067
|
file_id="file-uuid-123",
|
|
@@ -2043,63 +2070,50 @@ Domain-specific knowledge extracted from files using custom agents.
|
|
|
2043
2070
|
model_name="claude-sonnet-4-5-20250929",
|
|
2044
2071
|
extracted_data={
|
|
2045
2072
|
"candidate_name": "John Doe",
|
|
2046
|
-
"email": "john@example.com",
|
|
2047
2073
|
"skills": ["Python", "PostgreSQL", "Kubernetes"],
|
|
2048
|
-
"experience": [
|
|
2049
|
-
{
|
|
2050
|
-
"company": "TechCorp",
|
|
2051
|
-
"role": "Senior Engineer",
|
|
2052
|
-
"years": 3,
|
|
2053
|
-
"achievements": ["Led migration to k8s", "Reduced costs 40%"]
|
|
2054
|
-
}
|
|
2055
|
-
],
|
|
2056
|
-
"education": [
|
|
2057
|
-
{"degree": "BS Computer Science", "institution": "MIT", "year": 2018}
|
|
2058
|
-
]
|
|
2059
2074
|
},
|
|
2060
2075
|
confidence_score=0.95,
|
|
2061
|
-
tags=["cv", "engineering"
|
|
2076
|
+
tags=["cv", "engineering"]
|
|
2062
2077
|
)
|
|
2063
2078
|
|
|
2064
|
-
#
|
|
2065
|
-
|
|
2066
|
-
name="
|
|
2067
|
-
|
|
2068
|
-
|
|
2069
|
-
|
|
2070
|
-
|
|
2079
|
+
# Direct-loaded: Medical knowledge base from git
|
|
2080
|
+
disorder_ontology = Ontology(
|
|
2081
|
+
name="panic-disorder",
|
|
2082
|
+
uri="git://bwolfson-siggie/Siggy-MVP/ontology/disorders/anxiety/panic-disorder.md",
|
|
2083
|
+
content="# Panic Disorder\n\nPanic disorder is characterized by...",
|
|
2084
|
+
extracted_data={
|
|
2085
|
+
"type": "disorder",
|
|
2086
|
+
"category": "anxiety",
|
|
2087
|
+
"icd10": "F41.0",
|
|
2088
|
+
"dsm5_criteria": ["A", "B", "C", "D"],
|
|
2089
|
+
},
|
|
2090
|
+
tags=["disorder", "anxiety", "dsm5"]
|
|
2091
|
+
)
|
|
2092
|
+
|
|
2093
|
+
# Direct-loaded: Clinical procedure from git
|
|
2094
|
+
scid_node = Ontology(
|
|
2095
|
+
name="scid-5-f1",
|
|
2096
|
+
uri="git://bwolfson-siggie/Siggy-MVP/ontology/procedures/scid-5/module-f/scid-5-f1.md",
|
|
2097
|
+
content="# scid-5-f1: Panic Attack Screening\n\n...",
|
|
2071
2098
|
extracted_data={
|
|
2072
|
-
"
|
|
2073
|
-
"
|
|
2074
|
-
|
|
2075
|
-
|
|
2076
|
-
],
|
|
2077
|
-
"effective_date": "2024-01-01",
|
|
2078
|
-
"termination_date": "2026-12-31",
|
|
2079
|
-
"payment_terms": {
|
|
2080
|
-
"amount": 500000,
|
|
2081
|
-
"currency": "USD",
|
|
2082
|
-
"frequency": "quarterly"
|
|
2083
|
-
},
|
|
2084
|
-
"key_obligations": [
|
|
2085
|
-
"Supplier must deliver within 30 days",
|
|
2086
|
-
"Buyer must pay within 60 days of invoice"
|
|
2087
|
-
]
|
|
2099
|
+
"type": "procedure",
|
|
2100
|
+
"module": "F",
|
|
2101
|
+
"section": "Panic Disorder",
|
|
2102
|
+
"dsm5_criterion": "Panic Attack Specifier",
|
|
2088
2103
|
},
|
|
2089
|
-
|
|
2090
|
-
tags=["contract", "supplier", "procurement"]
|
|
2104
|
+
tags=["scid-5", "procedure", "anxiety"]
|
|
2091
2105
|
)
|
|
2092
2106
|
|
|
2093
2107
|
|
|
2094
2108
|
## Overview
|
|
2095
2109
|
|
|
2096
2110
|
The `Ontology` entity is stored in the `ontologies` table. Each record is uniquely
|
|
2097
|
-
identified by its `
|
|
2111
|
+
identified by its `name` field for lookups and graph traversal.
|
|
2098
2112
|
|
|
2099
2113
|
## Search Capabilities
|
|
2100
2114
|
|
|
2101
2115
|
This schema includes the `search_rem` tool which supports:
|
|
2102
|
-
- **LOOKUP**: O(1) exact match by
|
|
2116
|
+
- **LOOKUP**: O(1) exact match by name (e.g., `LOOKUP "entity-name"`)
|
|
2103
2117
|
- **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
|
|
2104
2118
|
- **SEARCH**: Semantic vector search on content (e.g., `SEARCH "concept" FROM ontologies LIMIT 10`)
|
|
2105
2119
|
- **SQL**: Complex queries (e.g., `SELECT * FROM ontologies WHERE ...`)
|
|
@@ -2109,8 +2123,8 @@ This schema includes the `search_rem` tool which supports:
|
|
|
2109
2123
|
| Property | Value |
|
|
2110
2124
|
|----------|-------|
|
|
2111
2125
|
| Table | `ontologies` |
|
|
2112
|
-
| Entity Key | `
|
|
2113
|
-
| Embedding Fields |
|
|
2126
|
+
| Entity Key | `name` |
|
|
2127
|
+
| Embedding Fields | `content` |
|
|
2114
2128
|
| Tools | `search_rem` |
|
|
2115
2129
|
|
|
2116
2130
|
## Fields
|
|
@@ -2164,25 +2178,29 @@ This schema includes the `search_rem` tool which supports:
|
|
|
2164
2178
|
- **Type**: `<class ''str''>`
|
|
2165
2179
|
- **Required**
|
|
2166
2180
|
|
|
2181
|
+
### `uri`
|
|
2182
|
+
- **Type**: `typing.Optional[str]`
|
|
2183
|
+
- **Optional**
|
|
2184
|
+
|
|
2167
2185
|
### `file_id`
|
|
2168
|
-
- **Type**: `uuid.UUID
|
|
2169
|
-
- **
|
|
2186
|
+
- **Type**: `typing.Union[uuid.UUID, str, NoneType]`
|
|
2187
|
+
- **Optional**
|
|
2170
2188
|
|
|
2171
2189
|
### `agent_schema_id`
|
|
2172
|
-
- **Type**:
|
|
2173
|
-
- **
|
|
2190
|
+
- **Type**: `typing.Optional[str]`
|
|
2191
|
+
- **Optional**
|
|
2174
2192
|
|
|
2175
2193
|
### `provider_name`
|
|
2176
|
-
- **Type**:
|
|
2177
|
-
- **
|
|
2194
|
+
- **Type**: `typing.Optional[str]`
|
|
2195
|
+
- **Optional**
|
|
2178
2196
|
|
|
2179
2197
|
### `model_name`
|
|
2180
|
-
- **Type**:
|
|
2181
|
-
- **
|
|
2198
|
+
- **Type**: `typing.Optional[str]`
|
|
2199
|
+
- **Optional**
|
|
2182
2200
|
|
|
2183
2201
|
### `extracted_data`
|
|
2184
|
-
- **Type**: `dict[str, typing.Any]`
|
|
2185
|
-
- **
|
|
2202
|
+
- **Type**: `typing.Optional[dict[str, typing.Any]]`
|
|
2203
|
+
- **Optional**
|
|
2186
2204
|
|
|
2187
2205
|
### `confidence_score`
|
|
2188
2206
|
- **Type**: `typing.Optional[float]`
|
|
@@ -2192,14 +2210,14 @@ This schema includes the `search_rem` tool which supports:
|
|
|
2192
2210
|
- **Type**: `typing.Optional[str]`
|
|
2193
2211
|
- **Optional**
|
|
2194
2212
|
|
|
2195
|
-
### `
|
|
2213
|
+
### `content`
|
|
2196
2214
|
- **Type**: `typing.Optional[str]`
|
|
2197
2215
|
- **Optional**
|
|
2198
2216
|
|
|
2199
2217
|
',
|
|
2200
|
-
'{"type": "object", "description": "Domain-specific knowledge
|
|
2218
|
+
'{"type": "object", "description": "Domain-specific knowledge - either agent-extracted or direct-loaded.\n\n Attributes:\n name: Human-readable label for this ontology instance\n uri: External source reference (git://, s3://, https://) for direct-loaded ontologies\n file_id: Foreign key to File entity (optional - only for agent-extracted)\n agent_schema_id: Schema that performed extraction (optional - only for agent-extracted)\n provider_name: LLM provider used for extraction (optional)\n model_name: Specific model used (optional)\n extracted_data: Structured data - either extracted by agent or parsed from source\n confidence_score: Optional confidence score from extraction (0.0-1.0)\n extraction_timestamp: When extraction was performed\n content: Text used for generating embedding\n\n Inherited from CoreModel:\n id: UUID or string identifier\n created_at: Entity creation timestamp\n updated_at: Last update timestamp\n deleted_at: Soft deletion timestamp\n tenant_id: Multi-tenancy isolation\n user_id: Ownership\n graph_edges: Relationships to other entities\n metadata: Flexible metadata storage\n tags: Classification tags\n\n Example Usage:\n # Agent-extracted: CV parsing\n cv_ontology = Ontology(\n name=\"john-doe-cv-2024\",\n file_id=\"file-uuid-123\",\n agent_schema_id=\"cv-parser-v1\",\n provider_name=\"anthropic\",\n model_name=\"claude-sonnet-4-5-20250929\",\n extracted_data={\n \"candidate_name\": \"John Doe\",\n \"skills\": [\"Python\", \"PostgreSQL\", \"Kubernetes\"],\n },\n confidence_score=0.95,\n tags=[\"cv\", \"engineering\"]\n )\n\n # Direct-loaded: Medical knowledge base from git\n disorder_ontology = Ontology(\n name=\"panic-disorder\",\n uri=\"git://bwolfson-siggie/Siggy-MVP/ontology/disorders/anxiety/panic-disorder.md\",\n content=\"# Panic Disorder\\n\\nPanic disorder is characterized by...\",\n extracted_data={\n \"type\": \"disorder\",\n \"category\": \"anxiety\",\n \"icd10\": \"F41.0\",\n \"dsm5_criteria\": [\"A\", \"B\", \"C\", \"D\"],\n },\n tags=[\"disorder\", \"anxiety\", \"dsm5\"]\n )\n\n # Direct-loaded: Clinical procedure from git\n scid_node = Ontology(\n name=\"scid-5-f1\",\n uri=\"git://bwolfson-siggie/Siggy-MVP/ontology/procedures/scid-5/module-f/scid-5-f1.md\",\n content=\"# scid-5-f1: Panic Attack Screening\\n\\n...\",\n extracted_data={\n \"type\": \"procedure\",\n \"module\": \"F\",\n \"section\": \"Panic Disorder\",\n \"dsm5_criterion\": \"Panic Attack Specifier\",\n },\n tags=[\"scid-5\", \"procedure\", \"anxiety\"]\n )\n \n\nThis agent can search the `ontologies` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"title": "Name", "type": "string"}, "uri": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Uri"}, "file_id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "title": "File Id"}, "agent_schema_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Agent Schema Id"}, "provider_name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Provider Name"}, "model_name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Model Name"}, "extracted_data": {"anyOf": [{"additionalProperties": true, "type": "object"}, {"type": "null"}], "default": null, "title": "Extracted Data"}, "confidence_score": {"anyOf": [{"type": "number"}, {"type": "null"}], "default": null, "title": "Confidence Score"}, "extraction_timestamp": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Extraction Timestamp"}, "content": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Content"}}, "required": ["name"], "json_schema_extra": {"table_name": "ontologies", "entity_key_field": "name", "embedding_fields": ["content"], "fully_qualified_name": "rem.models.entities.ontology.Ontology", "tools": ["search_rem"], "default_search_table": "ontologies", "has_embeddings": true}}'::jsonb,
|
|
2201
2219
|
'entity',
|
|
2202
|
-
'{"table_name": "ontologies", "entity_key_field": "
|
|
2220
|
+
'{"table_name": "ontologies", "entity_key_field": "name", "embedding_fields": ["content"], "fqn": "rem.models.entities.ontology.Ontology"}'::jsonb
|
|
2203
2221
|
)
|
|
2204
2222
|
ON CONFLICT (id) DO UPDATE SET
|
|
2205
2223
|
name = EXCLUDED.name,
|
|
@@ -2288,12 +2306,12 @@ User configuration for automatic ontology extraction.
|
|
|
2288
2306
|
## Overview
|
|
2289
2307
|
|
|
2290
2308
|
The `OntologyConfig` entity is stored in the `ontology_configs` table. Each record is uniquely
|
|
2291
|
-
identified by its `
|
|
2309
|
+
identified by its `name` field for lookups and graph traversal.
|
|
2292
2310
|
|
|
2293
2311
|
## Search Capabilities
|
|
2294
2312
|
|
|
2295
2313
|
This schema includes the `search_rem` tool which supports:
|
|
2296
|
-
- **LOOKUP**: O(1) exact match by
|
|
2314
|
+
- **LOOKUP**: O(1) exact match by name (e.g., `LOOKUP "entity-name"`)
|
|
2297
2315
|
- **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
|
|
2298
2316
|
- **SEARCH**: Semantic vector search on description (e.g., `SEARCH "concept" FROM ontology_configs LIMIT 10`)
|
|
2299
2317
|
- **SQL**: Complex queries (e.g., `SELECT * FROM ontology_configs WHERE ...`)
|
|
@@ -2303,7 +2321,7 @@ This schema includes the `search_rem` tool which supports:
|
|
|
2303
2321
|
| Property | Value |
|
|
2304
2322
|
|----------|-------|
|
|
2305
2323
|
| Table | `ontology_configs` |
|
|
2306
|
-
| Entity Key | `
|
|
2324
|
+
| Entity Key | `name` |
|
|
2307
2325
|
| Embedding Fields | `description` |
|
|
2308
2326
|
| Tools | `search_rem` |
|
|
2309
2327
|
|
|
@@ -2395,9 +2413,9 @@ This schema includes the `search_rem` tool which supports:
|
|
|
2395
2413
|
- **Optional**
|
|
2396
2414
|
|
|
2397
2415
|
',
|
|
2398
|
-
'{"type": "object", "description": "User configuration for automatic ontology extraction.\n\n Attributes:\n name: Human-readable config name\n agent_schema_id: Foreign key to Schema entity to use for extraction\n description: Purpose and scope of this config\n\n # File matching rules (ANY matching rule triggers extraction)\n mime_type_pattern: Regex pattern for file MIME types (e.g., \"application/pdf\")\n uri_pattern: Regex pattern for file URIs (e.g., \"s3://bucket/resumes/.*\")\n tag_filter: List of tags (file must have ALL tags to match)\n\n # Execution control\n priority: Execution order (higher = earlier, default 100)\n enabled: Whether this config is active (default True)\n\n # LLM provider configuration\n provider_name: Optional LLM provider override (defaults to settings)\n model_name: Optional model override (defaults to settings)\n\n Inherited from CoreModel:\n id, created_at, updated_at, deleted_at, tenant_id, user_id,\n graph_edges, metadata, tags, column\n\n Example Usage:\n # CV extraction for recruitment\n cv_config = OntologyConfig(\n name=\"recruitment-cv-parser\",\n agent_schema_id=\"cv-parser-v1\",\n description=\"Extract candidate information from resumes\",\n mime_type_pattern=\"application/pdf\",\n uri_pattern=\".*/resumes/.*\",\n tag_filter=[\"cv\", \"candidate\"],\n priority=100,\n enabled=True,\n tenant_id=\"acme-corp\",\n tags=[\"recruitment\", \"hr\"]\n )\n\n # Contract analysis for legal team\n contract_config = OntologyConfig(\n name=\"legal-contract-analyzer\",\n agent_schema_id=\"contract-parser-v2\",\n description=\"Extract key terms from supplier contracts\",\n mime_type_pattern=\"application/(pdf|msword|vnd.openxmlformats.*)\",\n tag_filter=[\"legal\", \"contract\"],\n priority=200, # Higher priority = runs first\n enabled=True,\n provider_name=\"openai\", # Override default provider\n model_name=\"gpt-4.1\",\n tenant_id=\"acme-corp\",\n tags=[\"legal\", \"procurement\"]\n )\n\n # Medical records for healthcare\n medical_config = OntologyConfig(\n name=\"medical-records-extractor\",\n agent_schema_id=\"medical-parser-v1\",\n description=\"Extract diagnoses and treatments from medical records\",\n mime_type_pattern=\"application/pdf\",\n tag_filter=[\"medical\", \"patient-record\"],\n priority=50,\n enabled=True,\n tenant_id=\"healthsystem\",\n tags=[\"medical\", \"hipaa-compliant\"]\n )\n \n\nThis agent can search the `ontology_configs` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"title": "Name", "type": "string"}, "agent_schema_id": {"title": "Agent Schema Id", "type": "string"}, "description": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Description"}, "mime_type_pattern": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Mime Type Pattern"}, "uri_pattern": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Uri Pattern"}, "tag_filter": {"default": [], "items": {"type": "string"}, "title": "Tag Filter", "type": "array"}, "priority": {"default": 100, "title": "Priority", "type": "integer"}, "enabled": {"default": true, "title": "Enabled", "type": "boolean"}, "provider_name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Provider Name"}, "model_name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Model Name"}}, "required": ["name", "agent_schema_id"], "json_schema_extra": {"table_name": "ontology_configs", "entity_key_field": "
|
|
2416
|
+
'{"type": "object", "description": "User configuration for automatic ontology extraction.\n\n Attributes:\n name: Human-readable config name\n agent_schema_id: Foreign key to Schema entity to use for extraction\n description: Purpose and scope of this config\n\n # File matching rules (ANY matching rule triggers extraction)\n mime_type_pattern: Regex pattern for file MIME types (e.g., \"application/pdf\")\n uri_pattern: Regex pattern for file URIs (e.g., \"s3://bucket/resumes/.*\")\n tag_filter: List of tags (file must have ALL tags to match)\n\n # Execution control\n priority: Execution order (higher = earlier, default 100)\n enabled: Whether this config is active (default True)\n\n # LLM provider configuration\n provider_name: Optional LLM provider override (defaults to settings)\n model_name: Optional model override (defaults to settings)\n\n Inherited from CoreModel:\n id, created_at, updated_at, deleted_at, tenant_id, user_id,\n graph_edges, metadata, tags, column\n\n Example Usage:\n # CV extraction for recruitment\n cv_config = OntologyConfig(\n name=\"recruitment-cv-parser\",\n agent_schema_id=\"cv-parser-v1\",\n description=\"Extract candidate information from resumes\",\n mime_type_pattern=\"application/pdf\",\n uri_pattern=\".*/resumes/.*\",\n tag_filter=[\"cv\", \"candidate\"],\n priority=100,\n enabled=True,\n tenant_id=\"acme-corp\",\n tags=[\"recruitment\", \"hr\"]\n )\n\n # Contract analysis for legal team\n contract_config = OntologyConfig(\n name=\"legal-contract-analyzer\",\n agent_schema_id=\"contract-parser-v2\",\n description=\"Extract key terms from supplier contracts\",\n mime_type_pattern=\"application/(pdf|msword|vnd.openxmlformats.*)\",\n tag_filter=[\"legal\", \"contract\"],\n priority=200, # Higher priority = runs first\n enabled=True,\n provider_name=\"openai\", # Override default provider\n model_name=\"gpt-4.1\",\n tenant_id=\"acme-corp\",\n tags=[\"legal\", \"procurement\"]\n )\n\n # Medical records for healthcare\n medical_config = OntologyConfig(\n name=\"medical-records-extractor\",\n agent_schema_id=\"medical-parser-v1\",\n description=\"Extract diagnoses and treatments from medical records\",\n mime_type_pattern=\"application/pdf\",\n tag_filter=[\"medical\", \"patient-record\"],\n priority=50,\n enabled=True,\n tenant_id=\"healthsystem\",\n tags=[\"medical\", \"hipaa-compliant\"]\n )\n \n\nThis agent can search the `ontology_configs` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"title": "Name", "type": "string"}, "agent_schema_id": {"title": "Agent Schema Id", "type": "string"}, "description": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Description"}, "mime_type_pattern": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Mime Type Pattern"}, "uri_pattern": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Uri Pattern"}, "tag_filter": {"default": [], "items": {"type": "string"}, "title": "Tag Filter", "type": "array"}, "priority": {"default": 100, "title": "Priority", "type": "integer"}, "enabled": {"default": true, "title": "Enabled", "type": "boolean"}, "provider_name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Provider Name"}, "model_name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Model Name"}}, "required": ["name", "agent_schema_id"], "json_schema_extra": {"table_name": "ontology_configs", "entity_key_field": "name", "embedding_fields": ["description"], "fully_qualified_name": "rem.models.entities.ontology_config.OntologyConfig", "tools": ["search_rem"], "default_search_table": "ontology_configs", "has_embeddings": true}}'::jsonb,
|
|
2399
2417
|
'entity',
|
|
2400
|
-
'{"table_name": "ontology_configs", "entity_key_field": "
|
|
2418
|
+
'{"table_name": "ontology_configs", "entity_key_field": "name", "embedding_fields": ["description"], "fqn": "rem.models.entities.ontology_config.OntologyConfig"}'::jsonb
|
|
2401
2419
|
)
|
|
2402
2420
|
ON CONFLICT (id) DO UPDATE SET
|
|
2403
2421
|
name = EXCLUDED.name,
|
|
@@ -2565,12 +2583,12 @@ VALUES (
|
|
|
2565
2583
|
## Overview
|
|
2566
2584
|
|
|
2567
2585
|
The `Schema` entity is stored in the `schemas` table. Each record is uniquely
|
|
2568
|
-
identified by its `
|
|
2586
|
+
identified by its `name` field for lookups and graph traversal.
|
|
2569
2587
|
|
|
2570
2588
|
## Search Capabilities
|
|
2571
2589
|
|
|
2572
2590
|
This schema includes the `search_rem` tool which supports:
|
|
2573
|
-
- **LOOKUP**: O(1) exact match by
|
|
2591
|
+
- **LOOKUP**: O(1) exact match by name (e.g., `LOOKUP "entity-name"`)
|
|
2574
2592
|
- **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
|
|
2575
2593
|
- **SEARCH**: Semantic vector search on content (e.g., `SEARCH "concept" FROM schemas LIMIT 10`)
|
|
2576
2594
|
- **SQL**: Complex queries (e.g., `SELECT * FROM schemas WHERE ...`)
|
|
@@ -2580,7 +2598,7 @@ This schema includes the `search_rem` tool which supports:
|
|
|
2580
2598
|
| Property | Value |
|
|
2581
2599
|
|----------|-------|
|
|
2582
2600
|
| Table | `schemas` |
|
|
2583
|
-
| Entity Key | `
|
|
2601
|
+
| Entity Key | `name` |
|
|
2584
2602
|
| Embedding Fields | `content` |
|
|
2585
2603
|
| Tools | `search_rem` |
|
|
2586
2604
|
|
|
@@ -2662,9 +2680,9 @@ This schema includes the `search_rem` tool which supports:
|
|
|
2662
2680
|
- JSON paths in extracted_data to embed for semantic search. Example: [''summary'', ''candidate_name'', ''skills''] for CV extraction. Values will be concatenated and embedded using configured embedding provider.
|
|
2663
2681
|
|
|
2664
2682
|
',
|
|
2665
|
-
'{"type": "object", "description": "\n Agent schema definition.\n\n Schemas define agents that can be dynamically loaded into Pydantic AI.\n They store JsonSchema specifications with embedded metadata for tools,\n resources, and system prompts.\n\n For ontology extraction agents:\n - `provider_configs` enables multi-provider support (test across Anthropic, OpenAI, etc.)\n - `embedding_fields` specifies which output fields should be embedded for semantic search\n\n Tenant isolation is provided via CoreModel.tenant_id field.\n \n\nThis agent can search the `schemas` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"description": "Human-readable schema name (used as identifier)", "title": "Name", "type": "string"}, "content": {"default": "", "description": "Markdown documentation and instructions for the schema", "title": "Content", "type": "string"}, "spec": {"additionalProperties": true, "description": "JsonSchema specification defining the agent structure and capabilities", "title": "Spec", "type": "object"}, "category": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Schema category distinguishing schema types. Values: ''agent'' (AI agents), ''evaluator'' (LLM-as-a-Judge evaluators). Maps directly from json_schema_extra.kind field during ingestion.", "title": "Category"}, "provider_configs": {"description": "Optional provider configurations for multi-provider testing. Each dict has ''provider_name'' and ''model_name''. Example: [{''provider_name'': ''anthropic'', ''model_name'': ''claude-sonnet-4-5''}]", "items": {"additionalProperties": true, "type": "object"}, "title": "Provider Configs", "type": "array"}, "embedding_fields": {"description": "JSON paths in extracted_data to embed for semantic search. Example: [''summary'', ''candidate_name'', ''skills''] for CV extraction. Values will be concatenated and embedded using configured embedding provider.", "items": {"type": "string"}, "title": "Embedding Fields", "type": "array"}}, "required": ["name", "spec"], "json_schema_extra": {"table_name": "schemas", "entity_key_field": "
|
|
2683
|
+
'{"type": "object", "description": "\n Agent schema definition.\n\n Schemas define agents that can be dynamically loaded into Pydantic AI.\n They store JsonSchema specifications with embedded metadata for tools,\n resources, and system prompts.\n\n For ontology extraction agents:\n - `provider_configs` enables multi-provider support (test across Anthropic, OpenAI, etc.)\n - `embedding_fields` specifies which output fields should be embedded for semantic search\n\n Tenant isolation is provided via CoreModel.tenant_id field.\n \n\nThis agent can search the `schemas` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"description": "Human-readable schema name (used as identifier)", "title": "Name", "type": "string"}, "content": {"default": "", "description": "Markdown documentation and instructions for the schema", "title": "Content", "type": "string"}, "spec": {"additionalProperties": true, "description": "JsonSchema specification defining the agent structure and capabilities", "title": "Spec", "type": "object"}, "category": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Schema category distinguishing schema types. Values: ''agent'' (AI agents), ''evaluator'' (LLM-as-a-Judge evaluators). Maps directly from json_schema_extra.kind field during ingestion.", "title": "Category"}, "provider_configs": {"description": "Optional provider configurations for multi-provider testing. Each dict has ''provider_name'' and ''model_name''. Example: [{''provider_name'': ''anthropic'', ''model_name'': ''claude-sonnet-4-5''}]", "items": {"additionalProperties": true, "type": "object"}, "title": "Provider Configs", "type": "array"}, "embedding_fields": {"description": "JSON paths in extracted_data to embed for semantic search. Example: [''summary'', ''candidate_name'', ''skills''] for CV extraction. Values will be concatenated and embedded using configured embedding provider.", "items": {"type": "string"}, "title": "Embedding Fields", "type": "array"}}, "required": ["name", "spec"], "json_schema_extra": {"table_name": "schemas", "entity_key_field": "name", "embedding_fields": ["content"], "fully_qualified_name": "rem.models.entities.schema.Schema", "tools": ["search_rem"], "default_search_table": "schemas", "has_embeddings": true}}'::jsonb,
|
|
2666
2684
|
'entity',
|
|
2667
|
-
'{"table_name": "schemas", "entity_key_field": "
|
|
2685
|
+
'{"table_name": "schemas", "entity_key_field": "name", "embedding_fields": ["content"], "fqn": "rem.models.entities.schema.Schema"}'::jsonb
|
|
2668
2686
|
)
|
|
2669
2687
|
ON CONFLICT (id) DO UPDATE SET
|
|
2670
2688
|
name = EXCLUDED.name,
|
|
@@ -3115,7 +3133,7 @@ BEGIN
|
|
|
3115
3133
|
RAISE NOTICE ' ✓ image_resources (1 embeddable fields)';
|
|
3116
3134
|
RAISE NOTICE ' ✓ messages (1 embeddable fields)';
|
|
3117
3135
|
RAISE NOTICE ' ✓ moments (1 embeddable fields)';
|
|
3118
|
-
RAISE NOTICE ' ✓ ontologies';
|
|
3136
|
+
RAISE NOTICE ' ✓ ontologies (1 embeddable fields)';
|
|
3119
3137
|
RAISE NOTICE ' ✓ ontology_configs (1 embeddable fields)';
|
|
3120
3138
|
RAISE NOTICE ' ✓ resources (1 embeddable fields)';
|
|
3121
3139
|
RAISE NOTICE ' ✓ schemas (1 embeddable fields)';
|