remdb 0.3.171__py3-none-any.whl → 0.3.180__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  -- REM Model Schema (install_models.sql)
2
2
  -- Generated from Pydantic models
3
3
  -- Source: model registry
4
- -- Generated at: 2025-11-29T18:45:11.372432
4
+ -- Generated at: 2025-12-11T07:54:56.914558
5
5
  --
6
6
  -- DO NOT EDIT MANUALLY - Regenerate with: rem db schema generate
7
7
  --
@@ -56,11 +56,11 @@ CREATE TABLE IF NOT EXISTS feedbacks (
56
56
  tags TEXT[] DEFAULT ARRAY[]::TEXT[]
57
57
  );
58
58
 
59
- CREATE INDEX IF NOT EXISTS idx_feedbacks_tenant ON feedbacks (tenant_id);
60
- CREATE INDEX IF NOT EXISTS idx_feedbacks_user ON feedbacks (user_id);
61
- CREATE INDEX IF NOT EXISTS idx_feedbacks_graph_edges ON feedbacks USING GIN (graph_edges);
62
- CREATE INDEX IF NOT EXISTS idx_feedbacks_metadata ON feedbacks USING GIN (metadata);
63
- CREATE INDEX IF NOT EXISTS idx_feedbacks_tags ON feedbacks USING GIN (tags);
59
+ CREATE INDEX idx_feedbacks_tenant ON feedbacks (tenant_id);
60
+ CREATE INDEX idx_feedbacks_user ON feedbacks (user_id);
61
+ CREATE INDEX idx_feedbacks_graph_edges ON feedbacks USING GIN (graph_edges);
62
+ CREATE INDEX idx_feedbacks_metadata ON feedbacks USING GIN (metadata);
63
+ CREATE INDEX idx_feedbacks_tags ON feedbacks USING GIN (tags);
64
64
 
65
65
  -- KV_STORE trigger for feedbacks
66
66
  -- Trigger function to maintain KV_STORE for feedbacks
@@ -84,7 +84,7 @@ BEGIN
84
84
  graph_edges,
85
85
  updated_at
86
86
  ) VALUES (
87
- NEW.id::VARCHAR,
87
+ normalize_key(NEW.id::VARCHAR),
88
88
  'feedbacks',
89
89
  NEW.id,
90
90
  NEW.tenant_id,
@@ -135,11 +135,11 @@ CREATE TABLE IF NOT EXISTS files (
135
135
  tags TEXT[] DEFAULT ARRAY[]::TEXT[]
136
136
  );
137
137
 
138
- CREATE INDEX IF NOT EXISTS idx_files_tenant ON files (tenant_id);
139
- CREATE INDEX IF NOT EXISTS idx_files_user ON files (user_id);
140
- CREATE INDEX IF NOT EXISTS idx_files_graph_edges ON files USING GIN (graph_edges);
141
- CREATE INDEX IF NOT EXISTS idx_files_metadata ON files USING GIN (metadata);
142
- CREATE INDEX IF NOT EXISTS idx_files_tags ON files USING GIN (tags);
138
+ CREATE INDEX idx_files_tenant ON files (tenant_id);
139
+ CREATE INDEX idx_files_user ON files (user_id);
140
+ CREATE INDEX idx_files_graph_edges ON files USING GIN (graph_edges);
141
+ CREATE INDEX idx_files_metadata ON files USING GIN (metadata);
142
+ CREATE INDEX idx_files_tags ON files USING GIN (tags);
143
143
 
144
144
  -- Embeddings for files
145
145
  CREATE TABLE IF NOT EXISTS embeddings_files (
@@ -157,14 +157,14 @@ CREATE TABLE IF NOT EXISTS embeddings_files (
157
157
  );
158
158
 
159
159
  -- Index for entity lookup (get all embeddings for entity)
160
- CREATE INDEX IF NOT EXISTS idx_embeddings_files_entity ON embeddings_files (entity_id);
160
+ CREATE INDEX idx_embeddings_files_entity ON embeddings_files (entity_id);
161
161
 
162
162
  -- Index for field + provider lookup
163
- CREATE INDEX IF NOT EXISTS idx_embeddings_files_field_provider ON embeddings_files (field_name, provider);
163
+ CREATE INDEX idx_embeddings_files_field_provider ON embeddings_files (field_name, provider);
164
164
 
165
165
  -- HNSW index for vector similarity search (created in background)
166
166
  -- Note: This will be created by background thread after data load
167
- -- CREATE INDEX IF NOT EXISTS idx_embeddings_files_vector_hnsw ON embeddings_files
167
+ -- CREATE INDEX idx_embeddings_files_vector_hnsw ON embeddings_files
168
168
  -- USING hnsw (embedding vector_cosine_ops);
169
169
 
170
170
  -- KV_STORE trigger for files
@@ -189,7 +189,7 @@ BEGIN
189
189
  graph_edges,
190
190
  updated_at
191
191
  ) VALUES (
192
- NEW.id::VARCHAR,
192
+ normalize_key(NEW.id::VARCHAR),
193
193
  'files',
194
194
  NEW.id,
195
195
  NEW.tenant_id,
@@ -248,11 +248,11 @@ CREATE TABLE IF NOT EXISTS image_resources (
248
248
  tags TEXT[] DEFAULT ARRAY[]::TEXT[]
249
249
  );
250
250
 
251
- CREATE INDEX IF NOT EXISTS idx_image_resources_tenant ON image_resources (tenant_id);
252
- CREATE INDEX IF NOT EXISTS idx_image_resources_user ON image_resources (user_id);
253
- CREATE INDEX IF NOT EXISTS idx_image_resources_graph_edges ON image_resources USING GIN (graph_edges);
254
- CREATE INDEX IF NOT EXISTS idx_image_resources_metadata ON image_resources USING GIN (metadata);
255
- CREATE INDEX IF NOT EXISTS idx_image_resources_tags ON image_resources USING GIN (tags);
251
+ CREATE INDEX idx_image_resources_tenant ON image_resources (tenant_id);
252
+ CREATE INDEX idx_image_resources_user ON image_resources (user_id);
253
+ CREATE INDEX idx_image_resources_graph_edges ON image_resources USING GIN (graph_edges);
254
+ CREATE INDEX idx_image_resources_metadata ON image_resources USING GIN (metadata);
255
+ CREATE INDEX idx_image_resources_tags ON image_resources USING GIN (tags);
256
256
 
257
257
  -- Embeddings for image_resources
258
258
  CREATE TABLE IF NOT EXISTS embeddings_image_resources (
@@ -270,14 +270,14 @@ CREATE TABLE IF NOT EXISTS embeddings_image_resources (
270
270
  );
271
271
 
272
272
  -- Index for entity lookup (get all embeddings for entity)
273
- CREATE INDEX IF NOT EXISTS idx_embeddings_image_resources_entity ON embeddings_image_resources (entity_id);
273
+ CREATE INDEX idx_embeddings_image_resources_entity ON embeddings_image_resources (entity_id);
274
274
 
275
275
  -- Index for field + provider lookup
276
- CREATE INDEX IF NOT EXISTS idx_embeddings_image_resources_field_provider ON embeddings_image_resources (field_name, provider);
276
+ CREATE INDEX idx_embeddings_image_resources_field_provider ON embeddings_image_resources (field_name, provider);
277
277
 
278
278
  -- HNSW index for vector similarity search (created in background)
279
279
  -- Note: This will be created by background thread after data load
280
- -- CREATE INDEX IF NOT EXISTS idx_embeddings_image_resources_vector_hnsw ON embeddings_image_resources
280
+ -- CREATE INDEX idx_embeddings_image_resources_vector_hnsw ON embeddings_image_resources
281
281
  -- USING hnsw (embedding vector_cosine_ops);
282
282
 
283
283
  -- KV_STORE trigger for image_resources
@@ -302,7 +302,7 @@ BEGIN
302
302
  graph_edges,
303
303
  updated_at
304
304
  ) VALUES (
305
- NEW.name::VARCHAR,
305
+ normalize_key(NEW.name::VARCHAR),
306
306
  'image_resources',
307
307
  NEW.id,
308
308
  NEW.tenant_id,
@@ -354,11 +354,11 @@ CREATE TABLE IF NOT EXISTS messages (
354
354
  tags TEXT[] DEFAULT ARRAY[]::TEXT[]
355
355
  );
356
356
 
357
- CREATE INDEX IF NOT EXISTS idx_messages_tenant ON messages (tenant_id);
358
- CREATE INDEX IF NOT EXISTS idx_messages_user ON messages (user_id);
359
- CREATE INDEX IF NOT EXISTS idx_messages_graph_edges ON messages USING GIN (graph_edges);
360
- CREATE INDEX IF NOT EXISTS idx_messages_metadata ON messages USING GIN (metadata);
361
- CREATE INDEX IF NOT EXISTS idx_messages_tags ON messages USING GIN (tags);
357
+ CREATE INDEX idx_messages_tenant ON messages (tenant_id);
358
+ CREATE INDEX idx_messages_user ON messages (user_id);
359
+ CREATE INDEX idx_messages_graph_edges ON messages USING GIN (graph_edges);
360
+ CREATE INDEX idx_messages_metadata ON messages USING GIN (metadata);
361
+ CREATE INDEX idx_messages_tags ON messages USING GIN (tags);
362
362
 
363
363
  -- Embeddings for messages
364
364
  CREATE TABLE IF NOT EXISTS embeddings_messages (
@@ -376,14 +376,14 @@ CREATE TABLE IF NOT EXISTS embeddings_messages (
376
376
  );
377
377
 
378
378
  -- Index for entity lookup (get all embeddings for entity)
379
- CREATE INDEX IF NOT EXISTS idx_embeddings_messages_entity ON embeddings_messages (entity_id);
379
+ CREATE INDEX idx_embeddings_messages_entity ON embeddings_messages (entity_id);
380
380
 
381
381
  -- Index for field + provider lookup
382
- CREATE INDEX IF NOT EXISTS idx_embeddings_messages_field_provider ON embeddings_messages (field_name, provider);
382
+ CREATE INDEX idx_embeddings_messages_field_provider ON embeddings_messages (field_name, provider);
383
383
 
384
384
  -- HNSW index for vector similarity search (created in background)
385
385
  -- Note: This will be created by background thread after data load
386
- -- CREATE INDEX IF NOT EXISTS idx_embeddings_messages_vector_hnsw ON embeddings_messages
386
+ -- CREATE INDEX idx_embeddings_messages_vector_hnsw ON embeddings_messages
387
387
  -- USING hnsw (embedding vector_cosine_ops);
388
388
 
389
389
  -- KV_STORE trigger for messages
@@ -408,7 +408,7 @@ BEGIN
408
408
  graph_edges,
409
409
  updated_at
410
410
  ) VALUES (
411
- NEW.id::VARCHAR,
411
+ normalize_key(NEW.id::VARCHAR),
412
412
  'messages',
413
413
  NEW.id,
414
414
  NEW.tenant_id,
@@ -462,11 +462,11 @@ CREATE TABLE IF NOT EXISTS moments (
462
462
  tags TEXT[] DEFAULT ARRAY[]::TEXT[]
463
463
  );
464
464
 
465
- CREATE INDEX IF NOT EXISTS idx_moments_tenant ON moments (tenant_id);
466
- CREATE INDEX IF NOT EXISTS idx_moments_user ON moments (user_id);
467
- CREATE INDEX IF NOT EXISTS idx_moments_graph_edges ON moments USING GIN (graph_edges);
468
- CREATE INDEX IF NOT EXISTS idx_moments_metadata ON moments USING GIN (metadata);
469
- CREATE INDEX IF NOT EXISTS idx_moments_tags ON moments USING GIN (tags);
465
+ CREATE INDEX idx_moments_tenant ON moments (tenant_id);
466
+ CREATE INDEX idx_moments_user ON moments (user_id);
467
+ CREATE INDEX idx_moments_graph_edges ON moments USING GIN (graph_edges);
468
+ CREATE INDEX idx_moments_metadata ON moments USING GIN (metadata);
469
+ CREATE INDEX idx_moments_tags ON moments USING GIN (tags);
470
470
 
471
471
  -- Embeddings for moments
472
472
  CREATE TABLE IF NOT EXISTS embeddings_moments (
@@ -484,14 +484,14 @@ CREATE TABLE IF NOT EXISTS embeddings_moments (
484
484
  );
485
485
 
486
486
  -- Index for entity lookup (get all embeddings for entity)
487
- CREATE INDEX IF NOT EXISTS idx_embeddings_moments_entity ON embeddings_moments (entity_id);
487
+ CREATE INDEX idx_embeddings_moments_entity ON embeddings_moments (entity_id);
488
488
 
489
489
  -- Index for field + provider lookup
490
- CREATE INDEX IF NOT EXISTS idx_embeddings_moments_field_provider ON embeddings_moments (field_name, provider);
490
+ CREATE INDEX idx_embeddings_moments_field_provider ON embeddings_moments (field_name, provider);
491
491
 
492
492
  -- HNSW index for vector similarity search (created in background)
493
493
  -- Note: This will be created by background thread after data load
494
- -- CREATE INDEX IF NOT EXISTS idx_embeddings_moments_vector_hnsw ON embeddings_moments
494
+ -- CREATE INDEX idx_embeddings_moments_vector_hnsw ON embeddings_moments
495
495
  -- USING hnsw (embedding vector_cosine_ops);
496
496
 
497
497
  -- KV_STORE trigger for moments
@@ -516,7 +516,7 @@ BEGIN
516
516
  graph_edges,
517
517
  updated_at
518
518
  ) VALUES (
519
- NEW.name::VARCHAR,
519
+ normalize_key(NEW.name::VARCHAR),
520
520
  'moments',
521
521
  NEW.id,
522
522
  NEW.tenant_id,
@@ -553,14 +553,15 @@ CREATE TABLE IF NOT EXISTS ontologies (
553
553
  tenant_id VARCHAR(100) NOT NULL,
554
554
  user_id VARCHAR(256),
555
555
  name VARCHAR(256) NOT NULL,
556
- file_id UUID NOT NULL,
557
- agent_schema_id VARCHAR(256) NOT NULL,
558
- provider_name VARCHAR(256) NOT NULL,
559
- model_name VARCHAR(256) NOT NULL,
560
- extracted_data JSONB NOT NULL,
556
+ uri VARCHAR(256),
557
+ file_id UUID,
558
+ agent_schema_id VARCHAR(256),
559
+ provider_name VARCHAR(256),
560
+ model_name VARCHAR(256),
561
+ extracted_data JSONB,
561
562
  confidence_score DOUBLE PRECISION,
562
563
  extraction_timestamp VARCHAR(256),
563
- embedding_text TEXT,
564
+ content TEXT,
564
565
  created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
565
566
  updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
566
567
  deleted_at TIMESTAMP,
@@ -569,11 +570,37 @@ CREATE TABLE IF NOT EXISTS ontologies (
569
570
  tags TEXT[] DEFAULT ARRAY[]::TEXT[]
570
571
  );
571
572
 
572
- CREATE INDEX IF NOT EXISTS idx_ontologies_tenant ON ontologies (tenant_id);
573
- CREATE INDEX IF NOT EXISTS idx_ontologies_user ON ontologies (user_id);
574
- CREATE INDEX IF NOT EXISTS idx_ontologies_graph_edges ON ontologies USING GIN (graph_edges);
575
- CREATE INDEX IF NOT EXISTS idx_ontologies_metadata ON ontologies USING GIN (metadata);
576
- CREATE INDEX IF NOT EXISTS idx_ontologies_tags ON ontologies USING GIN (tags);
573
+ CREATE INDEX idx_ontologies_tenant ON ontologies (tenant_id);
574
+ CREATE INDEX idx_ontologies_user ON ontologies (user_id);
575
+ CREATE INDEX idx_ontologies_graph_edges ON ontologies USING GIN (graph_edges);
576
+ CREATE INDEX idx_ontologies_metadata ON ontologies USING GIN (metadata);
577
+ CREATE INDEX idx_ontologies_tags ON ontologies USING GIN (tags);
578
+
579
+ -- Embeddings for ontologies
580
+ CREATE TABLE IF NOT EXISTS embeddings_ontologies (
581
+ id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
582
+ entity_id UUID NOT NULL REFERENCES ontologies(id) ON DELETE CASCADE,
583
+ field_name VARCHAR(100) NOT NULL,
584
+ provider VARCHAR(50) NOT NULL DEFAULT 'openai',
585
+ model VARCHAR(100) NOT NULL DEFAULT 'text-embedding-3-small',
586
+ embedding vector(1536) NOT NULL,
587
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
588
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
589
+
590
+ -- Unique: one embedding per entity per field per provider
591
+ UNIQUE (entity_id, field_name, provider)
592
+ );
593
+
594
+ -- Index for entity lookup (get all embeddings for entity)
595
+ CREATE INDEX idx_embeddings_ontologies_entity ON embeddings_ontologies (entity_id);
596
+
597
+ -- Index for field + provider lookup
598
+ CREATE INDEX idx_embeddings_ontologies_field_provider ON embeddings_ontologies (field_name, provider);
599
+
600
+ -- HNSW index for vector similarity search (created in background)
601
+ -- Note: This will be created by background thread after data load
602
+ -- CREATE INDEX idx_embeddings_ontologies_vector_hnsw ON embeddings_ontologies
603
+ -- USING hnsw (embedding vector_cosine_ops);
577
604
 
578
605
  -- KV_STORE trigger for ontologies
579
606
  -- Trigger function to maintain KV_STORE for ontologies
@@ -597,7 +624,7 @@ BEGIN
597
624
  graph_edges,
598
625
  updated_at
599
626
  ) VALUES (
600
- NEW.id::VARCHAR,
627
+ normalize_key(NEW.id::VARCHAR),
601
628
  'ontologies',
602
629
  NEW.id,
603
630
  NEW.tenant_id,
@@ -651,11 +678,11 @@ CREATE TABLE IF NOT EXISTS ontology_configs (
651
678
  tags TEXT[] DEFAULT ARRAY[]::TEXT[]
652
679
  );
653
680
 
654
- CREATE INDEX IF NOT EXISTS idx_ontology_configs_tenant ON ontology_configs (tenant_id);
655
- CREATE INDEX IF NOT EXISTS idx_ontology_configs_user ON ontology_configs (user_id);
656
- CREATE INDEX IF NOT EXISTS idx_ontology_configs_graph_edges ON ontology_configs USING GIN (graph_edges);
657
- CREATE INDEX IF NOT EXISTS idx_ontology_configs_metadata ON ontology_configs USING GIN (metadata);
658
- CREATE INDEX IF NOT EXISTS idx_ontology_configs_tags ON ontology_configs USING GIN (tags);
681
+ CREATE INDEX idx_ontology_configs_tenant ON ontology_configs (tenant_id);
682
+ CREATE INDEX idx_ontology_configs_user ON ontology_configs (user_id);
683
+ CREATE INDEX idx_ontology_configs_graph_edges ON ontology_configs USING GIN (graph_edges);
684
+ CREATE INDEX idx_ontology_configs_metadata ON ontology_configs USING GIN (metadata);
685
+ CREATE INDEX idx_ontology_configs_tags ON ontology_configs USING GIN (tags);
659
686
 
660
687
  -- Embeddings for ontology_configs
661
688
  CREATE TABLE IF NOT EXISTS embeddings_ontology_configs (
@@ -673,14 +700,14 @@ CREATE TABLE IF NOT EXISTS embeddings_ontology_configs (
673
700
  );
674
701
 
675
702
  -- Index for entity lookup (get all embeddings for entity)
676
- CREATE INDEX IF NOT EXISTS idx_embeddings_ontology_configs_entity ON embeddings_ontology_configs (entity_id);
703
+ CREATE INDEX idx_embeddings_ontology_configs_entity ON embeddings_ontology_configs (entity_id);
677
704
 
678
705
  -- Index for field + provider lookup
679
- CREATE INDEX IF NOT EXISTS idx_embeddings_ontology_configs_field_provider ON embeddings_ontology_configs (field_name, provider);
706
+ CREATE INDEX idx_embeddings_ontology_configs_field_provider ON embeddings_ontology_configs (field_name, provider);
680
707
 
681
708
  -- HNSW index for vector similarity search (created in background)
682
709
  -- Note: This will be created by background thread after data load
683
- -- CREATE INDEX IF NOT EXISTS idx_embeddings_ontology_configs_vector_hnsw ON embeddings_ontology_configs
710
+ -- CREATE INDEX idx_embeddings_ontology_configs_vector_hnsw ON embeddings_ontology_configs
684
711
  -- USING hnsw (embedding vector_cosine_ops);
685
712
 
686
713
  -- KV_STORE trigger for ontology_configs
@@ -705,7 +732,7 @@ BEGIN
705
732
  graph_edges,
706
733
  updated_at
707
734
  ) VALUES (
708
- NEW.id::VARCHAR,
735
+ normalize_key(NEW.id::VARCHAR),
709
736
  'ontology_configs',
710
737
  NEW.id,
711
738
  NEW.tenant_id,
@@ -756,11 +783,11 @@ CREATE TABLE IF NOT EXISTS resources (
756
783
  tags TEXT[] DEFAULT ARRAY[]::TEXT[]
757
784
  );
758
785
 
759
- CREATE INDEX IF NOT EXISTS idx_resources_tenant ON resources (tenant_id);
760
- CREATE INDEX IF NOT EXISTS idx_resources_user ON resources (user_id);
761
- CREATE INDEX IF NOT EXISTS idx_resources_graph_edges ON resources USING GIN (graph_edges);
762
- CREATE INDEX IF NOT EXISTS idx_resources_metadata ON resources USING GIN (metadata);
763
- CREATE INDEX IF NOT EXISTS idx_resources_tags ON resources USING GIN (tags);
786
+ CREATE INDEX idx_resources_tenant ON resources (tenant_id);
787
+ CREATE INDEX idx_resources_user ON resources (user_id);
788
+ CREATE INDEX idx_resources_graph_edges ON resources USING GIN (graph_edges);
789
+ CREATE INDEX idx_resources_metadata ON resources USING GIN (metadata);
790
+ CREATE INDEX idx_resources_tags ON resources USING GIN (tags);
764
791
 
765
792
  -- Embeddings for resources
766
793
  CREATE TABLE IF NOT EXISTS embeddings_resources (
@@ -778,14 +805,14 @@ CREATE TABLE IF NOT EXISTS embeddings_resources (
778
805
  );
779
806
 
780
807
  -- Index for entity lookup (get all embeddings for entity)
781
- CREATE INDEX IF NOT EXISTS idx_embeddings_resources_entity ON embeddings_resources (entity_id);
808
+ CREATE INDEX idx_embeddings_resources_entity ON embeddings_resources (entity_id);
782
809
 
783
810
  -- Index for field + provider lookup
784
- CREATE INDEX IF NOT EXISTS idx_embeddings_resources_field_provider ON embeddings_resources (field_name, provider);
811
+ CREATE INDEX idx_embeddings_resources_field_provider ON embeddings_resources (field_name, provider);
785
812
 
786
813
  -- HNSW index for vector similarity search (created in background)
787
814
  -- Note: This will be created by background thread after data load
788
- -- CREATE INDEX IF NOT EXISTS idx_embeddings_resources_vector_hnsw ON embeddings_resources
815
+ -- CREATE INDEX idx_embeddings_resources_vector_hnsw ON embeddings_resources
789
816
  -- USING hnsw (embedding vector_cosine_ops);
790
817
 
791
818
  -- KV_STORE trigger for resources
@@ -810,7 +837,7 @@ BEGIN
810
837
  graph_edges,
811
838
  updated_at
812
839
  ) VALUES (
813
- NEW.name::VARCHAR,
840
+ normalize_key(NEW.name::VARCHAR),
814
841
  'resources',
815
842
  NEW.id,
816
843
  NEW.tenant_id,
@@ -860,11 +887,11 @@ CREATE TABLE IF NOT EXISTS schemas (
860
887
  tags TEXT[] DEFAULT ARRAY[]::TEXT[]
861
888
  );
862
889
 
863
- CREATE INDEX IF NOT EXISTS idx_schemas_tenant ON schemas (tenant_id);
864
- CREATE INDEX IF NOT EXISTS idx_schemas_user ON schemas (user_id);
865
- CREATE INDEX IF NOT EXISTS idx_schemas_graph_edges ON schemas USING GIN (graph_edges);
866
- CREATE INDEX IF NOT EXISTS idx_schemas_metadata ON schemas USING GIN (metadata);
867
- CREATE INDEX IF NOT EXISTS idx_schemas_tags ON schemas USING GIN (tags);
890
+ CREATE INDEX idx_schemas_tenant ON schemas (tenant_id);
891
+ CREATE INDEX idx_schemas_user ON schemas (user_id);
892
+ CREATE INDEX idx_schemas_graph_edges ON schemas USING GIN (graph_edges);
893
+ CREATE INDEX idx_schemas_metadata ON schemas USING GIN (metadata);
894
+ CREATE INDEX idx_schemas_tags ON schemas USING GIN (tags);
868
895
 
869
896
  -- Embeddings for schemas
870
897
  CREATE TABLE IF NOT EXISTS embeddings_schemas (
@@ -882,14 +909,14 @@ CREATE TABLE IF NOT EXISTS embeddings_schemas (
882
909
  );
883
910
 
884
911
  -- Index for entity lookup (get all embeddings for entity)
885
- CREATE INDEX IF NOT EXISTS idx_embeddings_schemas_entity ON embeddings_schemas (entity_id);
912
+ CREATE INDEX idx_embeddings_schemas_entity ON embeddings_schemas (entity_id);
886
913
 
887
914
  -- Index for field + provider lookup
888
- CREATE INDEX IF NOT EXISTS idx_embeddings_schemas_field_provider ON embeddings_schemas (field_name, provider);
915
+ CREATE INDEX idx_embeddings_schemas_field_provider ON embeddings_schemas (field_name, provider);
889
916
 
890
917
  -- HNSW index for vector similarity search (created in background)
891
918
  -- Note: This will be created by background thread after data load
892
- -- CREATE INDEX IF NOT EXISTS idx_embeddings_schemas_vector_hnsw ON embeddings_schemas
919
+ -- CREATE INDEX idx_embeddings_schemas_vector_hnsw ON embeddings_schemas
893
920
  -- USING hnsw (embedding vector_cosine_ops);
894
921
 
895
922
  -- KV_STORE trigger for schemas
@@ -914,7 +941,7 @@ BEGIN
914
941
  graph_edges,
915
942
  updated_at
916
943
  ) VALUES (
917
- NEW.id::VARCHAR,
944
+ normalize_key(NEW.id::VARCHAR),
918
945
  'schemas',
919
946
  NEW.id,
920
947
  NEW.tenant_id,
@@ -967,11 +994,11 @@ CREATE TABLE IF NOT EXISTS sessions (
967
994
  tags TEXT[] DEFAULT ARRAY[]::TEXT[]
968
995
  );
969
996
 
970
- CREATE INDEX IF NOT EXISTS idx_sessions_tenant ON sessions (tenant_id);
971
- CREATE INDEX IF NOT EXISTS idx_sessions_user ON sessions (user_id);
972
- CREATE INDEX IF NOT EXISTS idx_sessions_graph_edges ON sessions USING GIN (graph_edges);
973
- CREATE INDEX IF NOT EXISTS idx_sessions_metadata ON sessions USING GIN (metadata);
974
- CREATE INDEX IF NOT EXISTS idx_sessions_tags ON sessions USING GIN (tags);
997
+ CREATE INDEX idx_sessions_tenant ON sessions (tenant_id);
998
+ CREATE INDEX idx_sessions_user ON sessions (user_id);
999
+ CREATE INDEX idx_sessions_graph_edges ON sessions USING GIN (graph_edges);
1000
+ CREATE INDEX idx_sessions_metadata ON sessions USING GIN (metadata);
1001
+ CREATE INDEX idx_sessions_tags ON sessions USING GIN (tags);
975
1002
 
976
1003
  -- Embeddings for sessions
977
1004
  CREATE TABLE IF NOT EXISTS embeddings_sessions (
@@ -989,14 +1016,14 @@ CREATE TABLE IF NOT EXISTS embeddings_sessions (
989
1016
  );
990
1017
 
991
1018
  -- Index for entity lookup (get all embeddings for entity)
992
- CREATE INDEX IF NOT EXISTS idx_embeddings_sessions_entity ON embeddings_sessions (entity_id);
1019
+ CREATE INDEX idx_embeddings_sessions_entity ON embeddings_sessions (entity_id);
993
1020
 
994
1021
  -- Index for field + provider lookup
995
- CREATE INDEX IF NOT EXISTS idx_embeddings_sessions_field_provider ON embeddings_sessions (field_name, provider);
1022
+ CREATE INDEX idx_embeddings_sessions_field_provider ON embeddings_sessions (field_name, provider);
996
1023
 
997
1024
  -- HNSW index for vector similarity search (created in background)
998
1025
  -- Note: This will be created by background thread after data load
999
- -- CREATE INDEX IF NOT EXISTS idx_embeddings_sessions_vector_hnsw ON embeddings_sessions
1026
+ -- CREATE INDEX idx_embeddings_sessions_vector_hnsw ON embeddings_sessions
1000
1027
  -- USING hnsw (embedding vector_cosine_ops);
1001
1028
 
1002
1029
  -- KV_STORE trigger for sessions
@@ -1021,7 +1048,7 @@ BEGIN
1021
1048
  graph_edges,
1022
1049
  updated_at
1023
1050
  ) VALUES (
1024
- NEW.name::VARCHAR,
1051
+ normalize_key(NEW.name::VARCHAR),
1025
1052
  'sessions',
1026
1053
  NEW.id,
1027
1054
  NEW.tenant_id,
@@ -1068,11 +1095,11 @@ CREATE TABLE IF NOT EXISTS shared_sessions (
1068
1095
  tags TEXT[] DEFAULT ARRAY[]::TEXT[]
1069
1096
  );
1070
1097
 
1071
- CREATE INDEX IF NOT EXISTS idx_shared_sessions_tenant ON shared_sessions (tenant_id);
1072
- CREATE INDEX IF NOT EXISTS idx_shared_sessions_user ON shared_sessions (user_id);
1073
- CREATE INDEX IF NOT EXISTS idx_shared_sessions_graph_edges ON shared_sessions USING GIN (graph_edges);
1074
- CREATE INDEX IF NOT EXISTS idx_shared_sessions_metadata ON shared_sessions USING GIN (metadata);
1075
- CREATE INDEX IF NOT EXISTS idx_shared_sessions_tags ON shared_sessions USING GIN (tags);
1098
+ CREATE INDEX idx_shared_sessions_tenant ON shared_sessions (tenant_id);
1099
+ CREATE INDEX idx_shared_sessions_user ON shared_sessions (user_id);
1100
+ CREATE INDEX idx_shared_sessions_graph_edges ON shared_sessions USING GIN (graph_edges);
1101
+ CREATE INDEX idx_shared_sessions_metadata ON shared_sessions USING GIN (metadata);
1102
+ CREATE INDEX idx_shared_sessions_tags ON shared_sessions USING GIN (tags);
1076
1103
 
1077
1104
  -- KV_STORE trigger for shared_sessions
1078
1105
  -- Trigger function to maintain KV_STORE for shared_sessions
@@ -1096,7 +1123,7 @@ BEGIN
1096
1123
  graph_edges,
1097
1124
  updated_at
1098
1125
  ) VALUES (
1099
- NEW.id::VARCHAR,
1126
+ normalize_key(NEW.id::VARCHAR),
1100
1127
  'shared_sessions',
1101
1128
  NEW.id,
1102
1129
  NEW.tenant_id,
@@ -1151,11 +1178,11 @@ CREATE TABLE IF NOT EXISTS users (
1151
1178
  tags TEXT[] DEFAULT ARRAY[]::TEXT[]
1152
1179
  );
1153
1180
 
1154
- CREATE INDEX IF NOT EXISTS idx_users_tenant ON users (tenant_id);
1155
- CREATE INDEX IF NOT EXISTS idx_users_user ON users (user_id);
1156
- CREATE INDEX IF NOT EXISTS idx_users_graph_edges ON users USING GIN (graph_edges);
1157
- CREATE INDEX IF NOT EXISTS idx_users_metadata ON users USING GIN (metadata);
1158
- CREATE INDEX IF NOT EXISTS idx_users_tags ON users USING GIN (tags);
1181
+ CREATE INDEX idx_users_tenant ON users (tenant_id);
1182
+ CREATE INDEX idx_users_user ON users (user_id);
1183
+ CREATE INDEX idx_users_graph_edges ON users USING GIN (graph_edges);
1184
+ CREATE INDEX idx_users_metadata ON users USING GIN (metadata);
1185
+ CREATE INDEX idx_users_tags ON users USING GIN (tags);
1159
1186
 
1160
1187
  -- Embeddings for users
1161
1188
  CREATE TABLE IF NOT EXISTS embeddings_users (
@@ -1173,14 +1200,14 @@ CREATE TABLE IF NOT EXISTS embeddings_users (
1173
1200
  );
1174
1201
 
1175
1202
  -- Index for entity lookup (get all embeddings for entity)
1176
- CREATE INDEX IF NOT EXISTS idx_embeddings_users_entity ON embeddings_users (entity_id);
1203
+ CREATE INDEX idx_embeddings_users_entity ON embeddings_users (entity_id);
1177
1204
 
1178
1205
  -- Index for field + provider lookup
1179
- CREATE INDEX IF NOT EXISTS idx_embeddings_users_field_provider ON embeddings_users (field_name, provider);
1206
+ CREATE INDEX idx_embeddings_users_field_provider ON embeddings_users (field_name, provider);
1180
1207
 
1181
1208
  -- HNSW index for vector similarity search (created in background)
1182
1209
  -- Note: This will be created by background thread after data load
1183
- -- CREATE INDEX IF NOT EXISTS idx_embeddings_users_vector_hnsw ON embeddings_users
1210
+ -- CREATE INDEX idx_embeddings_users_vector_hnsw ON embeddings_users
1184
1211
  -- USING hnsw (embedding vector_cosine_ops);
1185
1212
 
1186
1213
  -- KV_STORE trigger for users
@@ -1205,7 +1232,7 @@ BEGIN
1205
1232
  graph_edges,
1206
1233
  updated_at
1207
1234
  ) VALUES (
1208
- NEW.name::VARCHAR,
1235
+ normalize_key(NEW.name::VARCHAR),
1209
1236
  'users',
1210
1237
  NEW.id,
1211
1238
  NEW.tenant_id,
@@ -2008,18 +2035,19 @@ VALUES (
2008
2035
  'Ontology',
2009
2036
  '# Ontology
2010
2037
 
2011
- Domain-specific knowledge extracted from files using custom agents.
2038
+ Domain-specific knowledge - either agent-extracted or direct-loaded.
2012
2039
 
2013
2040
  Attributes:
2014
2041
  name: Human-readable label for this ontology instance
2015
- file_id: Foreign key to File entity that was processed
2016
- agent_schema_id: Foreign key to Schema entity that performed extraction
2017
- provider_name: LLM provider used for extraction (e.g., "anthropic", "openai")
2018
- model_name: Specific model used (e.g., "claude-sonnet-4-5")
2019
- extracted_data: Structured data extracted by agent (arbitrary JSON)
2042
+ uri: External source reference (git://, s3://, https://) for direct-loaded ontologies
2043
+ file_id: Foreign key to File entity (optional - only for agent-extracted)
2044
+ agent_schema_id: Schema that performed extraction (optional - only for agent-extracted)
2045
+ provider_name: LLM provider used for extraction (optional)
2046
+ model_name: Specific model used (optional)
2047
+ extracted_data: Structured data - either extracted by agent or parsed from source
2020
2048
  confidence_score: Optional confidence score from extraction (0.0-1.0)
2021
2049
  extraction_timestamp: When extraction was performed
2022
- embedding_text: Text used for generating embedding (derived from extracted_data)
2050
+ content: Text used for generating embedding
2023
2051
 
2024
2052
  Inherited from CoreModel:
2025
2053
  id: UUID or string identifier
@@ -2031,10 +2059,9 @@ Domain-specific knowledge extracted from files using custom agents.
2031
2059
  graph_edges: Relationships to other entities
2032
2060
  metadata: Flexible metadata storage
2033
2061
  tags: Classification tags
2034
- column: Database schema metadata
2035
2062
 
2036
2063
  Example Usage:
2037
- # CV extraction
2064
+ # Agent-extracted: CV parsing
2038
2065
  cv_ontology = Ontology(
2039
2066
  name="john-doe-cv-2024",
2040
2067
  file_id="file-uuid-123",
@@ -2043,51 +2070,38 @@ Domain-specific knowledge extracted from files using custom agents.
2043
2070
  model_name="claude-sonnet-4-5-20250929",
2044
2071
  extracted_data={
2045
2072
  "candidate_name": "John Doe",
2046
- "email": "john@example.com",
2047
2073
  "skills": ["Python", "PostgreSQL", "Kubernetes"],
2048
- "experience": [
2049
- {
2050
- "company": "TechCorp",
2051
- "role": "Senior Engineer",
2052
- "years": 3,
2053
- "achievements": ["Led migration to k8s", "Reduced costs 40%"]
2054
- }
2055
- ],
2056
- "education": [
2057
- {"degree": "BS Computer Science", "institution": "MIT", "year": 2018}
2058
- ]
2059
2074
  },
2060
2075
  confidence_score=0.95,
2061
- tags=["cv", "engineering", "senior-level"]
2076
+ tags=["cv", "engineering"]
2062
2077
  )
2063
2078
 
2064
- # Contract extraction
2065
- contract_ontology = Ontology(
2066
- name="acme-supplier-agreement-2024",
2067
- file_id="file-uuid-456",
2068
- agent_schema_id="contract-parser-v2",
2069
- provider_name="openai",
2070
- model_name="gpt-4.1",
2079
+ # Direct-loaded: Medical knowledge base from git
2080
+ disorder_ontology = Ontology(
2081
+ name="panic-disorder",
2082
+ uri="git://bwolfson-siggie/Siggy-MVP/ontology/disorders/anxiety/panic-disorder.md",
2083
+ content="# Panic Disorder\n\nPanic disorder is characterized by...",
2071
2084
  extracted_data={
2072
- "contract_type": "supplier_agreement",
2073
- "parties": [
2074
- {"name": "ACME Corp", "role": "buyer"},
2075
- {"name": "SupplyChain Inc", "role": "supplier"}
2076
- ],
2077
- "effective_date": "2024-01-01",
2078
- "termination_date": "2026-12-31",
2079
- "payment_terms": {
2080
- "amount": 500000,
2081
- "currency": "USD",
2082
- "frequency": "quarterly"
2083
- },
2084
- "key_obligations": [
2085
- "Supplier must deliver within 30 days",
2086
- "Buyer must pay within 60 days of invoice"
2087
- ]
2085
+ "type": "disorder",
2086
+ "category": "anxiety",
2087
+ "icd10": "F41.0",
2088
+ "dsm5_criteria": ["A", "B", "C", "D"],
2088
2089
  },
2089
- confidence_score=0.92,
2090
- tags=["contract", "supplier", "procurement"]
2090
+ tags=["disorder", "anxiety", "dsm5"]
2091
+ )
2092
+
2093
+ # Direct-loaded: Clinical procedure from git
2094
+ scid_node = Ontology(
2095
+ name="scid-5-f1",
2096
+ uri="git://bwolfson-siggie/Siggy-MVP/ontology/procedures/scid-5/module-f/scid-5-f1.md",
2097
+ content="# scid-5-f1: Panic Attack Screening\n\n...",
2098
+ extracted_data={
2099
+ "type": "procedure",
2100
+ "module": "F",
2101
+ "section": "Panic Disorder",
2102
+ "dsm5_criterion": "Panic Attack Specifier",
2103
+ },
2104
+ tags=["scid-5", "procedure", "anxiety"]
2091
2105
  )
2092
2106
 
2093
2107
 
@@ -2110,7 +2124,7 @@ This schema includes the `search_rem` tool which supports:
2110
2124
  |----------|-------|
2111
2125
  | Table | `ontologies` |
2112
2126
  | Entity Key | `id` |
2113
- | Embedding Fields | None |
2127
+ | Embedding Fields | `content` |
2114
2128
  | Tools | `search_rem` |
2115
2129
 
2116
2130
  ## Fields
@@ -2164,25 +2178,29 @@ This schema includes the `search_rem` tool which supports:
2164
2178
  - **Type**: `<class ''str''>`
2165
2179
  - **Required**
2166
2180
 
2181
+ ### `uri`
2182
+ - **Type**: `typing.Optional[str]`
2183
+ - **Optional**
2184
+
2167
2185
  ### `file_id`
2168
- - **Type**: `uuid.UUID | str`
2169
- - **Required**
2186
+ - **Type**: `typing.Union[uuid.UUID, str, NoneType]`
2187
+ - **Optional**
2170
2188
 
2171
2189
  ### `agent_schema_id`
2172
- - **Type**: `<class ''str''>`
2173
- - **Required**
2190
+ - **Type**: `typing.Optional[str]`
2191
+ - **Optional**
2174
2192
 
2175
2193
  ### `provider_name`
2176
- - **Type**: `<class ''str''>`
2177
- - **Required**
2194
+ - **Type**: `typing.Optional[str]`
2195
+ - **Optional**
2178
2196
 
2179
2197
  ### `model_name`
2180
- - **Type**: `<class ''str''>`
2181
- - **Required**
2198
+ - **Type**: `typing.Optional[str]`
2199
+ - **Optional**
2182
2200
 
2183
2201
  ### `extracted_data`
2184
- - **Type**: `dict[str, typing.Any]`
2185
- - **Required**
2202
+ - **Type**: `typing.Optional[dict[str, typing.Any]]`
2203
+ - **Optional**
2186
2204
 
2187
2205
  ### `confidence_score`
2188
2206
  - **Type**: `typing.Optional[float]`
@@ -2192,14 +2210,14 @@ This schema includes the `search_rem` tool which supports:
2192
2210
  - **Type**: `typing.Optional[str]`
2193
2211
  - **Optional**
2194
2212
 
2195
- ### `embedding_text`
2213
+ ### `content`
2196
2214
  - **Type**: `typing.Optional[str]`
2197
2215
  - **Optional**
2198
2216
 
2199
2217
  ',
2200
- '{"type": "object", "description": "Domain-specific knowledge extracted from files using custom agents.\n\n Attributes:\n name: Human-readable label for this ontology instance\n file_id: Foreign key to File entity that was processed\n agent_schema_id: Foreign key to Schema entity that performed extraction\n provider_name: LLM provider used for extraction (e.g., \"anthropic\", \"openai\")\n model_name: Specific model used (e.g., \"claude-sonnet-4-5\")\n extracted_data: Structured data extracted by agent (arbitrary JSON)\n confidence_score: Optional confidence score from extraction (0.0-1.0)\n extraction_timestamp: When extraction was performed\n embedding_text: Text used for generating embedding (derived from extracted_data)\n\n Inherited from CoreModel:\n id: UUID or string identifier\n created_at: Entity creation timestamp\n updated_at: Last update timestamp\n deleted_at: Soft deletion timestamp\n tenant_id: Multi-tenancy isolation\n user_id: Ownership\n graph_edges: Relationships to other entities\n metadata: Flexible metadata storage\n tags: Classification tags\n column: Database schema metadata\n\n Example Usage:\n # CV extraction\n cv_ontology = Ontology(\n name=\"john-doe-cv-2024\",\n file_id=\"file-uuid-123\",\n agent_schema_id=\"cv-parser-v1\",\n provider_name=\"anthropic\",\n model_name=\"claude-sonnet-4-5-20250929\",\n extracted_data={\n \"candidate_name\": \"John Doe\",\n \"email\": \"john@example.com\",\n \"skills\": [\"Python\", \"PostgreSQL\", \"Kubernetes\"],\n \"experience\": [\n {\n \"company\": \"TechCorp\",\n \"role\": \"Senior Engineer\",\n \"years\": 3,\n \"achievements\": [\"Led migration to k8s\", \"Reduced costs 40%\"]\n }\n ],\n \"education\": [\n {\"degree\": \"BS Computer Science\", \"institution\": \"MIT\", \"year\": 2018}\n ]\n },\n confidence_score=0.95,\n tags=[\"cv\", \"engineering\", \"senior-level\"]\n )\n\n # Contract extraction\n contract_ontology = Ontology(\n name=\"acme-supplier-agreement-2024\",\n file_id=\"file-uuid-456\",\n agent_schema_id=\"contract-parser-v2\",\n provider_name=\"openai\",\n model_name=\"gpt-4.1\",\n extracted_data={\n \"contract_type\": \"supplier_agreement\",\n \"parties\": [\n {\"name\": \"ACME Corp\", \"role\": \"buyer\"},\n {\"name\": \"SupplyChain Inc\", \"role\": \"supplier\"}\n ],\n \"effective_date\": \"2024-01-01\",\n \"termination_date\": \"2026-12-31\",\n \"payment_terms\": {\n \"amount\": 500000,\n \"currency\": \"USD\",\n \"frequency\": \"quarterly\"\n },\n \"key_obligations\": [\n \"Supplier must deliver within 30 days\",\n \"Buyer must pay within 60 days of invoice\"\n ]\n },\n confidence_score=0.92,\n tags=[\"contract\", \"supplier\", \"procurement\"]\n )\n \n\nThis agent can search the `ontologies` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"title": "Name", "type": "string"}, "file_id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}], "title": "File Id"}, "agent_schema_id": {"title": "Agent Schema Id", "type": "string"}, "provider_name": {"title": "Provider Name", "type": "string"}, "model_name": {"title": "Model Name", "type": "string"}, "extracted_data": {"additionalProperties": true, "title": "Extracted Data", "type": "object"}, "confidence_score": {"anyOf": [{"type": "number"}, {"type": "null"}], "default": null, "title": "Confidence Score"}, "extraction_timestamp": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Extraction Timestamp"}, "embedding_text": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Embedding Text"}}, "required": ["name", "file_id", "agent_schema_id", "provider_name", "model_name", "extracted_data"], "json_schema_extra": {"table_name": "ontologies", "entity_key_field": "id", "embedding_fields": [], "fully_qualified_name": "rem.models.entities.ontology.Ontology", "tools": ["search_rem"], "default_search_table": "ontologies", "has_embeddings": false}}'::jsonb,
2218
+ '{"type": "object", "description": "Domain-specific knowledge - either agent-extracted or direct-loaded.\n\n Attributes:\n name: Human-readable label for this ontology instance\n uri: External source reference (git://, s3://, https://) for direct-loaded ontologies\n file_id: Foreign key to File entity (optional - only for agent-extracted)\n agent_schema_id: Schema that performed extraction (optional - only for agent-extracted)\n provider_name: LLM provider used for extraction (optional)\n model_name: Specific model used (optional)\n extracted_data: Structured data - either extracted by agent or parsed from source\n confidence_score: Optional confidence score from extraction (0.0-1.0)\n extraction_timestamp: When extraction was performed\n content: Text used for generating embedding\n\n Inherited from CoreModel:\n id: UUID or string identifier\n created_at: Entity creation timestamp\n updated_at: Last update timestamp\n deleted_at: Soft deletion timestamp\n tenant_id: Multi-tenancy isolation\n user_id: Ownership\n graph_edges: Relationships to other entities\n metadata: Flexible metadata storage\n tags: Classification tags\n\n Example Usage:\n # Agent-extracted: CV parsing\n cv_ontology = Ontology(\n name=\"john-doe-cv-2024\",\n file_id=\"file-uuid-123\",\n agent_schema_id=\"cv-parser-v1\",\n provider_name=\"anthropic\",\n model_name=\"claude-sonnet-4-5-20250929\",\n extracted_data={\n \"candidate_name\": \"John Doe\",\n \"skills\": [\"Python\", \"PostgreSQL\", \"Kubernetes\"],\n },\n confidence_score=0.95,\n tags=[\"cv\", \"engineering\"]\n )\n\n # Direct-loaded: Medical knowledge base from git\n disorder_ontology = Ontology(\n name=\"panic-disorder\",\n uri=\"git://bwolfson-siggie/Siggy-MVP/ontology/disorders/anxiety/panic-disorder.md\",\n content=\"# Panic Disorder\\n\\nPanic disorder is characterized by...\",\n extracted_data={\n \"type\": \"disorder\",\n \"category\": \"anxiety\",\n \"icd10\": \"F41.0\",\n \"dsm5_criteria\": [\"A\", \"B\", \"C\", \"D\"],\n },\n tags=[\"disorder\", \"anxiety\", \"dsm5\"]\n )\n\n # Direct-loaded: Clinical procedure from git\n scid_node = Ontology(\n name=\"scid-5-f1\",\n uri=\"git://bwolfson-siggie/Siggy-MVP/ontology/procedures/scid-5/module-f/scid-5-f1.md\",\n content=\"# scid-5-f1: Panic Attack Screening\\n\\n...\",\n extracted_data={\n \"type\": \"procedure\",\n \"module\": \"F\",\n \"section\": \"Panic Disorder\",\n \"dsm5_criterion\": \"Panic Attack Specifier\",\n },\n tags=[\"scid-5\", \"procedure\", \"anxiety\"]\n )\n \n\nThis agent can search the `ontologies` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"title": "Name", "type": "string"}, "uri": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Uri"}, "file_id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "title": "File Id"}, "agent_schema_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Agent Schema Id"}, "provider_name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Provider Name"}, "model_name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Model Name"}, "extracted_data": {"anyOf": [{"additionalProperties": true, "type": "object"}, {"type": "null"}], "default": null, "title": "Extracted Data"}, "confidence_score": {"anyOf": [{"type": "number"}, {"type": "null"}], "default": null, "title": "Confidence Score"}, "extraction_timestamp": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Extraction Timestamp"}, "content": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Content"}}, "required": ["name"], "json_schema_extra": {"table_name": "ontologies", "entity_key_field": "id", "embedding_fields": ["content"], "fully_qualified_name": "rem.models.entities.ontology.Ontology", "tools": ["search_rem"], "default_search_table": "ontologies", "has_embeddings": true}}'::jsonb,
2201
2219
  'entity',
2202
- '{"table_name": "ontologies", "entity_key_field": "id", "embedding_fields": [], "fqn": "rem.models.entities.ontology.Ontology"}'::jsonb
2220
+ '{"table_name": "ontologies", "entity_key_field": "id", "embedding_fields": ["content"], "fqn": "rem.models.entities.ontology.Ontology"}'::jsonb
2203
2221
  )
2204
2222
  ON CONFLICT (id) DO UPDATE SET
2205
2223
  name = EXCLUDED.name,
@@ -3115,7 +3133,7 @@ BEGIN
3115
3133
  RAISE NOTICE ' ✓ image_resources (1 embeddable fields)';
3116
3134
  RAISE NOTICE ' ✓ messages (1 embeddable fields)';
3117
3135
  RAISE NOTICE ' ✓ moments (1 embeddable fields)';
3118
- RAISE NOTICE ' ✓ ontologies';
3136
+ RAISE NOTICE ' ✓ ontologies (1 embeddable fields)';
3119
3137
  RAISE NOTICE ' ✓ ontology_configs (1 embeddable fields)';
3120
3138
  RAISE NOTICE ' ✓ resources (1 embeddable fields)';
3121
3139
  RAISE NOTICE ' ✓ schemas (1 embeddable fields)';