remdb 0.3.172__py3-none-any.whl → 0.3.223__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

Files changed (57) hide show
  1. rem/agentic/README.md +262 -2
  2. rem/agentic/context.py +173 -0
  3. rem/agentic/context_builder.py +12 -2
  4. rem/agentic/mcp/tool_wrapper.py +39 -16
  5. rem/agentic/providers/pydantic_ai.py +46 -43
  6. rem/agentic/schema.py +2 -2
  7. rem/agentic/tools/rem_tools.py +11 -0
  8. rem/api/main.py +1 -1
  9. rem/api/mcp_router/resources.py +64 -8
  10. rem/api/mcp_router/server.py +31 -24
  11. rem/api/mcp_router/tools.py +621 -166
  12. rem/api/routers/admin.py +30 -4
  13. rem/api/routers/auth.py +114 -15
  14. rem/api/routers/chat/completions.py +66 -18
  15. rem/api/routers/chat/sse_events.py +7 -3
  16. rem/api/routers/chat/streaming.py +254 -22
  17. rem/api/routers/common.py +18 -0
  18. rem/api/routers/dev.py +7 -1
  19. rem/api/routers/feedback.py +9 -1
  20. rem/api/routers/messages.py +176 -38
  21. rem/api/routers/models.py +9 -1
  22. rem/api/routers/query.py +12 -1
  23. rem/api/routers/shared_sessions.py +16 -0
  24. rem/auth/jwt.py +19 -4
  25. rem/auth/middleware.py +42 -28
  26. rem/cli/README.md +62 -0
  27. rem/cli/commands/ask.py +1 -1
  28. rem/cli/commands/db.py +148 -70
  29. rem/cli/commands/process.py +171 -43
  30. rem/models/entities/ontology.py +91 -101
  31. rem/schemas/agents/rem.yaml +1 -1
  32. rem/services/content/service.py +18 -5
  33. rem/services/email/service.py +11 -2
  34. rem/services/embeddings/worker.py +26 -12
  35. rem/services/postgres/__init__.py +28 -3
  36. rem/services/postgres/diff_service.py +57 -5
  37. rem/services/postgres/programmable_diff_service.py +635 -0
  38. rem/services/postgres/pydantic_to_sqlalchemy.py +2 -2
  39. rem/services/postgres/register_type.py +12 -11
  40. rem/services/postgres/repository.py +46 -25
  41. rem/services/postgres/schema_generator.py +5 -5
  42. rem/services/postgres/sql_builder.py +6 -5
  43. rem/services/session/__init__.py +8 -1
  44. rem/services/session/compression.py +40 -2
  45. rem/services/session/pydantic_messages.py +276 -0
  46. rem/settings.py +28 -0
  47. rem/sql/background_indexes.sql +5 -0
  48. rem/sql/migrations/001_install.sql +157 -10
  49. rem/sql/migrations/002_install_models.sql +160 -132
  50. rem/sql/migrations/004_cache_system.sql +7 -275
  51. rem/sql/migrations/migrate_session_id_to_uuid.sql +45 -0
  52. rem/utils/model_helpers.py +101 -0
  53. rem/utils/schema_loader.py +6 -6
  54. {remdb-0.3.172.dist-info → remdb-0.3.223.dist-info}/METADATA +1 -1
  55. {remdb-0.3.172.dist-info → remdb-0.3.223.dist-info}/RECORD +57 -53
  56. {remdb-0.3.172.dist-info → remdb-0.3.223.dist-info}/WHEEL +0 -0
  57. {remdb-0.3.172.dist-info → remdb-0.3.223.dist-info}/entry_points.txt +0 -0
@@ -1,7 +1,7 @@
1
1
  -- REM Model Schema (install_models.sql)
2
2
  -- Generated from Pydantic models
3
3
  -- Source: model registry
4
- -- Generated at: 2025-11-29T18:45:11.372432
4
+ -- Generated at: 2025-12-22T17:34:54.187339
5
5
  --
6
6
  -- DO NOT EDIT MANUALLY - Regenerate with: rem db schema generate
7
7
  --
@@ -36,7 +36,7 @@ END $$;
36
36
 
37
37
  CREATE TABLE IF NOT EXISTS feedbacks (
38
38
  id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
39
- tenant_id VARCHAR(100) NOT NULL,
39
+ tenant_id VARCHAR(100),
40
40
  user_id VARCHAR(256),
41
41
  session_id VARCHAR(256) NOT NULL,
42
42
  message_id VARCHAR(256),
@@ -74,6 +74,7 @@ BEGIN
74
74
  RETURN OLD;
75
75
  ELSIF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN
76
76
  -- Upsert to KV_STORE (O(1) lookup by entity_key)
77
+ -- tenant_id can be NULL (meaning public/shared data)
77
78
  INSERT INTO kv_store (
78
79
  entity_key,
79
80
  entity_type,
@@ -84,7 +85,7 @@ BEGIN
84
85
  graph_edges,
85
86
  updated_at
86
87
  ) VALUES (
87
- NEW.id::VARCHAR,
88
+ normalize_key(NEW.id::VARCHAR),
88
89
  'feedbacks',
89
90
  NEW.id,
90
91
  NEW.tenant_id,
@@ -93,7 +94,7 @@ BEGIN
93
94
  COALESCE(NEW.graph_edges, '[]'::jsonb),
94
95
  CURRENT_TIMESTAMP
95
96
  )
96
- ON CONFLICT (tenant_id, entity_key)
97
+ ON CONFLICT (COALESCE(tenant_id, ''), entity_key)
97
98
  DO UPDATE SET
98
99
  entity_id = EXCLUDED.entity_id,
99
100
  user_id = EXCLUDED.user_id,
@@ -118,7 +119,7 @@ FOR EACH ROW EXECUTE FUNCTION fn_feedbacks_kv_store_upsert();
118
119
 
119
120
  CREATE TABLE IF NOT EXISTS files (
120
121
  id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
121
- tenant_id VARCHAR(100) NOT NULL,
122
+ tenant_id VARCHAR(100),
122
123
  user_id VARCHAR(256),
123
124
  name VARCHAR(256) NOT NULL,
124
125
  uri VARCHAR(256) NOT NULL,
@@ -164,7 +165,7 @@ CREATE INDEX IF NOT EXISTS idx_embeddings_files_field_provider ON embeddings_fil
164
165
 
165
166
  -- HNSW index for vector similarity search (created in background)
166
167
  -- Note: This will be created by background thread after data load
167
- -- CREATE INDEX IF NOT EXISTS idx_embeddings_files_vector_hnsw ON embeddings_files
168
+ -- CREATE INDEX idx_embeddings_files_vector_hnsw ON embeddings_files
168
169
  -- USING hnsw (embedding vector_cosine_ops);
169
170
 
170
171
  -- KV_STORE trigger for files
@@ -179,6 +180,7 @@ BEGIN
179
180
  RETURN OLD;
180
181
  ELSIF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN
181
182
  -- Upsert to KV_STORE (O(1) lookup by entity_key)
183
+ -- tenant_id can be NULL (meaning public/shared data)
182
184
  INSERT INTO kv_store (
183
185
  entity_key,
184
186
  entity_type,
@@ -189,7 +191,7 @@ BEGIN
189
191
  graph_edges,
190
192
  updated_at
191
193
  ) VALUES (
192
- NEW.id::VARCHAR,
194
+ normalize_key(NEW.name::VARCHAR),
193
195
  'files',
194
196
  NEW.id,
195
197
  NEW.tenant_id,
@@ -198,7 +200,7 @@ BEGIN
198
200
  COALESCE(NEW.graph_edges, '[]'::jsonb),
199
201
  CURRENT_TIMESTAMP
200
202
  )
201
- ON CONFLICT (tenant_id, entity_key)
203
+ ON CONFLICT (COALESCE(tenant_id, ''), entity_key)
202
204
  DO UPDATE SET
203
205
  entity_id = EXCLUDED.entity_id,
204
206
  user_id = EXCLUDED.user_id,
@@ -223,7 +225,7 @@ FOR EACH ROW EXECUTE FUNCTION fn_files_kv_store_upsert();
223
225
 
224
226
  CREATE TABLE IF NOT EXISTS image_resources (
225
227
  id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
226
- tenant_id VARCHAR(100) NOT NULL,
228
+ tenant_id VARCHAR(100),
227
229
  user_id VARCHAR(256),
228
230
  name VARCHAR(256),
229
231
  uri VARCHAR(256),
@@ -277,7 +279,7 @@ CREATE INDEX IF NOT EXISTS idx_embeddings_image_resources_field_provider ON embe
277
279
 
278
280
  -- HNSW index for vector similarity search (created in background)
279
281
  -- Note: This will be created by background thread after data load
280
- -- CREATE INDEX IF NOT EXISTS idx_embeddings_image_resources_vector_hnsw ON embeddings_image_resources
282
+ -- CREATE INDEX idx_embeddings_image_resources_vector_hnsw ON embeddings_image_resources
281
283
  -- USING hnsw (embedding vector_cosine_ops);
282
284
 
283
285
  -- KV_STORE trigger for image_resources
@@ -292,6 +294,7 @@ BEGIN
292
294
  RETURN OLD;
293
295
  ELSIF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN
294
296
  -- Upsert to KV_STORE (O(1) lookup by entity_key)
297
+ -- tenant_id can be NULL (meaning public/shared data)
295
298
  INSERT INTO kv_store (
296
299
  entity_key,
297
300
  entity_type,
@@ -302,7 +305,7 @@ BEGIN
302
305
  graph_edges,
303
306
  updated_at
304
307
  ) VALUES (
305
- NEW.name::VARCHAR,
308
+ normalize_key(NEW.name::VARCHAR),
306
309
  'image_resources',
307
310
  NEW.id,
308
311
  NEW.tenant_id,
@@ -311,7 +314,7 @@ BEGIN
311
314
  COALESCE(NEW.graph_edges, '[]'::jsonb),
312
315
  CURRENT_TIMESTAMP
313
316
  )
314
- ON CONFLICT (tenant_id, entity_key)
317
+ ON CONFLICT (COALESCE(tenant_id, ''), entity_key)
315
318
  DO UPDATE SET
316
319
  entity_id = EXCLUDED.entity_id,
317
320
  user_id = EXCLUDED.user_id,
@@ -336,7 +339,7 @@ FOR EACH ROW EXECUTE FUNCTION fn_image_resources_kv_store_upsert();
336
339
 
337
340
  CREATE TABLE IF NOT EXISTS messages (
338
341
  id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
339
- tenant_id VARCHAR(100) NOT NULL,
342
+ tenant_id VARCHAR(100),
340
343
  user_id VARCHAR(256),
341
344
  content TEXT NOT NULL,
342
345
  message_type VARCHAR(256),
@@ -383,7 +386,7 @@ CREATE INDEX IF NOT EXISTS idx_embeddings_messages_field_provider ON embeddings_
383
386
 
384
387
  -- HNSW index for vector similarity search (created in background)
385
388
  -- Note: This will be created by background thread after data load
386
- -- CREATE INDEX IF NOT EXISTS idx_embeddings_messages_vector_hnsw ON embeddings_messages
389
+ -- CREATE INDEX idx_embeddings_messages_vector_hnsw ON embeddings_messages
387
390
  -- USING hnsw (embedding vector_cosine_ops);
388
391
 
389
392
  -- KV_STORE trigger for messages
@@ -398,6 +401,7 @@ BEGIN
398
401
  RETURN OLD;
399
402
  ELSIF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN
400
403
  -- Upsert to KV_STORE (O(1) lookup by entity_key)
404
+ -- tenant_id can be NULL (meaning public/shared data)
401
405
  INSERT INTO kv_store (
402
406
  entity_key,
403
407
  entity_type,
@@ -408,7 +412,7 @@ BEGIN
408
412
  graph_edges,
409
413
  updated_at
410
414
  ) VALUES (
411
- NEW.id::VARCHAR,
415
+ normalize_key(NEW.id::VARCHAR),
412
416
  'messages',
413
417
  NEW.id,
414
418
  NEW.tenant_id,
@@ -417,7 +421,7 @@ BEGIN
417
421
  COALESCE(NEW.graph_edges, '[]'::jsonb),
418
422
  CURRENT_TIMESTAMP
419
423
  )
420
- ON CONFLICT (tenant_id, entity_key)
424
+ ON CONFLICT (COALESCE(tenant_id, ''), entity_key)
421
425
  DO UPDATE SET
422
426
  entity_id = EXCLUDED.entity_id,
423
427
  user_id = EXCLUDED.user_id,
@@ -442,7 +446,7 @@ FOR EACH ROW EXECUTE FUNCTION fn_messages_kv_store_upsert();
442
446
 
443
447
  CREATE TABLE IF NOT EXISTS moments (
444
448
  id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
445
- tenant_id VARCHAR(100) NOT NULL,
449
+ tenant_id VARCHAR(100),
446
450
  user_id VARCHAR(256),
447
451
  name VARCHAR(256),
448
452
  moment_type VARCHAR(256),
@@ -491,7 +495,7 @@ CREATE INDEX IF NOT EXISTS idx_embeddings_moments_field_provider ON embeddings_m
491
495
 
492
496
  -- HNSW index for vector similarity search (created in background)
493
497
  -- Note: This will be created by background thread after data load
494
- -- CREATE INDEX IF NOT EXISTS idx_embeddings_moments_vector_hnsw ON embeddings_moments
498
+ -- CREATE INDEX idx_embeddings_moments_vector_hnsw ON embeddings_moments
495
499
  -- USING hnsw (embedding vector_cosine_ops);
496
500
 
497
501
  -- KV_STORE trigger for moments
@@ -506,6 +510,7 @@ BEGIN
506
510
  RETURN OLD;
507
511
  ELSIF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN
508
512
  -- Upsert to KV_STORE (O(1) lookup by entity_key)
513
+ -- tenant_id can be NULL (meaning public/shared data)
509
514
  INSERT INTO kv_store (
510
515
  entity_key,
511
516
  entity_type,
@@ -516,7 +521,7 @@ BEGIN
516
521
  graph_edges,
517
522
  updated_at
518
523
  ) VALUES (
519
- NEW.name::VARCHAR,
524
+ normalize_key(NEW.name::VARCHAR),
520
525
  'moments',
521
526
  NEW.id,
522
527
  NEW.tenant_id,
@@ -525,7 +530,7 @@ BEGIN
525
530
  COALESCE(NEW.graph_edges, '[]'::jsonb),
526
531
  CURRENT_TIMESTAMP
527
532
  )
528
- ON CONFLICT (tenant_id, entity_key)
533
+ ON CONFLICT (COALESCE(tenant_id, ''), entity_key)
529
534
  DO UPDATE SET
530
535
  entity_id = EXCLUDED.entity_id,
531
536
  user_id = EXCLUDED.user_id,
@@ -550,17 +555,18 @@ FOR EACH ROW EXECUTE FUNCTION fn_moments_kv_store_upsert();
550
555
 
551
556
  CREATE TABLE IF NOT EXISTS ontologies (
552
557
  id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
553
- tenant_id VARCHAR(100) NOT NULL,
558
+ tenant_id VARCHAR(100),
554
559
  user_id VARCHAR(256),
555
560
  name VARCHAR(256) NOT NULL,
556
- file_id UUID NOT NULL,
557
- agent_schema_id VARCHAR(256) NOT NULL,
558
- provider_name VARCHAR(256) NOT NULL,
559
- model_name VARCHAR(256) NOT NULL,
560
- extracted_data JSONB NOT NULL,
561
+ uri VARCHAR(256),
562
+ file_id UUID,
563
+ agent_schema_id VARCHAR(256),
564
+ provider_name VARCHAR(256),
565
+ model_name VARCHAR(256),
566
+ extracted_data JSONB,
561
567
  confidence_score DOUBLE PRECISION,
562
568
  extraction_timestamp VARCHAR(256),
563
- embedding_text TEXT,
569
+ content TEXT,
564
570
  created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
565
571
  updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
566
572
  deleted_at TIMESTAMP,
@@ -575,6 +581,32 @@ CREATE INDEX IF NOT EXISTS idx_ontologies_graph_edges ON ontologies USING GIN (g
575
581
  CREATE INDEX IF NOT EXISTS idx_ontologies_metadata ON ontologies USING GIN (metadata);
576
582
  CREATE INDEX IF NOT EXISTS idx_ontologies_tags ON ontologies USING GIN (tags);
577
583
 
584
+ -- Embeddings for ontologies
585
+ CREATE TABLE IF NOT EXISTS embeddings_ontologies (
586
+ id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
587
+ entity_id UUID NOT NULL REFERENCES ontologies(id) ON DELETE CASCADE,
588
+ field_name VARCHAR(100) NOT NULL,
589
+ provider VARCHAR(50) NOT NULL DEFAULT 'openai',
590
+ model VARCHAR(100) NOT NULL DEFAULT 'text-embedding-3-small',
591
+ embedding vector(1536) NOT NULL,
592
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
593
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
594
+
595
+ -- Unique: one embedding per entity per field per provider
596
+ UNIQUE (entity_id, field_name, provider)
597
+ );
598
+
599
+ -- Index for entity lookup (get all embeddings for entity)
600
+ CREATE INDEX IF NOT EXISTS idx_embeddings_ontologies_entity ON embeddings_ontologies (entity_id);
601
+
602
+ -- Index for field + provider lookup
603
+ CREATE INDEX IF NOT EXISTS idx_embeddings_ontologies_field_provider ON embeddings_ontologies (field_name, provider);
604
+
605
+ -- HNSW index for vector similarity search (created in background)
606
+ -- Note: This will be created by background thread after data load
607
+ -- CREATE INDEX idx_embeddings_ontologies_vector_hnsw ON embeddings_ontologies
608
+ -- USING hnsw (embedding vector_cosine_ops);
609
+
578
610
  -- KV_STORE trigger for ontologies
579
611
  -- Trigger function to maintain KV_STORE for ontologies
580
612
  CREATE OR REPLACE FUNCTION fn_ontologies_kv_store_upsert()
@@ -587,6 +619,7 @@ BEGIN
587
619
  RETURN OLD;
588
620
  ELSIF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN
589
621
  -- Upsert to KV_STORE (O(1) lookup by entity_key)
622
+ -- tenant_id can be NULL (meaning public/shared data)
590
623
  INSERT INTO kv_store (
591
624
  entity_key,
592
625
  entity_type,
@@ -597,7 +630,7 @@ BEGIN
597
630
  graph_edges,
598
631
  updated_at
599
632
  ) VALUES (
600
- NEW.id::VARCHAR,
633
+ normalize_key(NEW.name::VARCHAR),
601
634
  'ontologies',
602
635
  NEW.id,
603
636
  NEW.tenant_id,
@@ -606,7 +639,7 @@ BEGIN
606
639
  COALESCE(NEW.graph_edges, '[]'::jsonb),
607
640
  CURRENT_TIMESTAMP
608
641
  )
609
- ON CONFLICT (tenant_id, entity_key)
642
+ ON CONFLICT (COALESCE(tenant_id, ''), entity_key)
610
643
  DO UPDATE SET
611
644
  entity_id = EXCLUDED.entity_id,
612
645
  user_id = EXCLUDED.user_id,
@@ -631,7 +664,7 @@ FOR EACH ROW EXECUTE FUNCTION fn_ontologies_kv_store_upsert();
631
664
 
632
665
  CREATE TABLE IF NOT EXISTS ontology_configs (
633
666
  id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
634
- tenant_id VARCHAR(100) NOT NULL,
667
+ tenant_id VARCHAR(100),
635
668
  user_id VARCHAR(256),
636
669
  name VARCHAR(256) NOT NULL,
637
670
  agent_schema_id VARCHAR(256) NOT NULL,
@@ -680,7 +713,7 @@ CREATE INDEX IF NOT EXISTS idx_embeddings_ontology_configs_field_provider ON emb
680
713
 
681
714
  -- HNSW index for vector similarity search (created in background)
682
715
  -- Note: This will be created by background thread after data load
683
- -- CREATE INDEX IF NOT EXISTS idx_embeddings_ontology_configs_vector_hnsw ON embeddings_ontology_configs
716
+ -- CREATE INDEX idx_embeddings_ontology_configs_vector_hnsw ON embeddings_ontology_configs
684
717
  -- USING hnsw (embedding vector_cosine_ops);
685
718
 
686
719
  -- KV_STORE trigger for ontology_configs
@@ -695,6 +728,7 @@ BEGIN
695
728
  RETURN OLD;
696
729
  ELSIF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN
697
730
  -- Upsert to KV_STORE (O(1) lookup by entity_key)
731
+ -- tenant_id can be NULL (meaning public/shared data)
698
732
  INSERT INTO kv_store (
699
733
  entity_key,
700
734
  entity_type,
@@ -705,7 +739,7 @@ BEGIN
705
739
  graph_edges,
706
740
  updated_at
707
741
  ) VALUES (
708
- NEW.id::VARCHAR,
742
+ normalize_key(NEW.name::VARCHAR),
709
743
  'ontology_configs',
710
744
  NEW.id,
711
745
  NEW.tenant_id,
@@ -714,7 +748,7 @@ BEGIN
714
748
  COALESCE(NEW.graph_edges, '[]'::jsonb),
715
749
  CURRENT_TIMESTAMP
716
750
  )
717
- ON CONFLICT (tenant_id, entity_key)
751
+ ON CONFLICT (COALESCE(tenant_id, ''), entity_key)
718
752
  DO UPDATE SET
719
753
  entity_id = EXCLUDED.entity_id,
720
754
  user_id = EXCLUDED.user_id,
@@ -739,7 +773,7 @@ FOR EACH ROW EXECUTE FUNCTION fn_ontology_configs_kv_store_upsert();
739
773
 
740
774
  CREATE TABLE IF NOT EXISTS resources (
741
775
  id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
742
- tenant_id VARCHAR(100) NOT NULL,
776
+ tenant_id VARCHAR(100),
743
777
  user_id VARCHAR(256),
744
778
  name VARCHAR(256),
745
779
  uri VARCHAR(256),
@@ -785,7 +819,7 @@ CREATE INDEX IF NOT EXISTS idx_embeddings_resources_field_provider ON embeddings
785
819
 
786
820
  -- HNSW index for vector similarity search (created in background)
787
821
  -- Note: This will be created by background thread after data load
788
- -- CREATE INDEX IF NOT EXISTS idx_embeddings_resources_vector_hnsw ON embeddings_resources
822
+ -- CREATE INDEX idx_embeddings_resources_vector_hnsw ON embeddings_resources
789
823
  -- USING hnsw (embedding vector_cosine_ops);
790
824
 
791
825
  -- KV_STORE trigger for resources
@@ -800,6 +834,7 @@ BEGIN
800
834
  RETURN OLD;
801
835
  ELSIF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN
802
836
  -- Upsert to KV_STORE (O(1) lookup by entity_key)
837
+ -- tenant_id can be NULL (meaning public/shared data)
803
838
  INSERT INTO kv_store (
804
839
  entity_key,
805
840
  entity_type,
@@ -810,7 +845,7 @@ BEGIN
810
845
  graph_edges,
811
846
  updated_at
812
847
  ) VALUES (
813
- NEW.name::VARCHAR,
848
+ normalize_key(NEW.name::VARCHAR),
814
849
  'resources',
815
850
  NEW.id,
816
851
  NEW.tenant_id,
@@ -819,7 +854,7 @@ BEGIN
819
854
  COALESCE(NEW.graph_edges, '[]'::jsonb),
820
855
  CURRENT_TIMESTAMP
821
856
  )
822
- ON CONFLICT (tenant_id, entity_key)
857
+ ON CONFLICT (COALESCE(tenant_id, ''), entity_key)
823
858
  DO UPDATE SET
824
859
  entity_id = EXCLUDED.entity_id,
825
860
  user_id = EXCLUDED.user_id,
@@ -844,7 +879,7 @@ FOR EACH ROW EXECUTE FUNCTION fn_resources_kv_store_upsert();
844
879
 
845
880
  CREATE TABLE IF NOT EXISTS schemas (
846
881
  id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
847
- tenant_id VARCHAR(100) NOT NULL,
882
+ tenant_id VARCHAR(100),
848
883
  user_id VARCHAR(256),
849
884
  name VARCHAR(256) NOT NULL,
850
885
  content TEXT,
@@ -889,7 +924,7 @@ CREATE INDEX IF NOT EXISTS idx_embeddings_schemas_field_provider ON embeddings_s
889
924
 
890
925
  -- HNSW index for vector similarity search (created in background)
891
926
  -- Note: This will be created by background thread after data load
892
- -- CREATE INDEX IF NOT EXISTS idx_embeddings_schemas_vector_hnsw ON embeddings_schemas
927
+ -- CREATE INDEX idx_embeddings_schemas_vector_hnsw ON embeddings_schemas
893
928
  -- USING hnsw (embedding vector_cosine_ops);
894
929
 
895
930
  -- KV_STORE trigger for schemas
@@ -904,6 +939,7 @@ BEGIN
904
939
  RETURN OLD;
905
940
  ELSIF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN
906
941
  -- Upsert to KV_STORE (O(1) lookup by entity_key)
942
+ -- tenant_id can be NULL (meaning public/shared data)
907
943
  INSERT INTO kv_store (
908
944
  entity_key,
909
945
  entity_type,
@@ -914,7 +950,7 @@ BEGIN
914
950
  graph_edges,
915
951
  updated_at
916
952
  ) VALUES (
917
- NEW.id::VARCHAR,
953
+ normalize_key(NEW.name::VARCHAR),
918
954
  'schemas',
919
955
  NEW.id,
920
956
  NEW.tenant_id,
@@ -923,7 +959,7 @@ BEGIN
923
959
  COALESCE(NEW.graph_edges, '[]'::jsonb),
924
960
  CURRENT_TIMESTAMP
925
961
  )
926
- ON CONFLICT (tenant_id, entity_key)
962
+ ON CONFLICT (COALESCE(tenant_id, ''), entity_key)
927
963
  DO UPDATE SET
928
964
  entity_id = EXCLUDED.entity_id,
929
965
  user_id = EXCLUDED.user_id,
@@ -948,7 +984,7 @@ FOR EACH ROW EXECUTE FUNCTION fn_schemas_kv_store_upsert();
948
984
 
949
985
  CREATE TABLE IF NOT EXISTS sessions (
950
986
  id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
951
- tenant_id VARCHAR(100) NOT NULL,
987
+ tenant_id VARCHAR(100),
952
988
  user_id VARCHAR(256),
953
989
  name VARCHAR(256) NOT NULL,
954
990
  mode TEXT,
@@ -996,7 +1032,7 @@ CREATE INDEX IF NOT EXISTS idx_embeddings_sessions_field_provider ON embeddings_
996
1032
 
997
1033
  -- HNSW index for vector similarity search (created in background)
998
1034
  -- Note: This will be created by background thread after data load
999
- -- CREATE INDEX IF NOT EXISTS idx_embeddings_sessions_vector_hnsw ON embeddings_sessions
1035
+ -- CREATE INDEX idx_embeddings_sessions_vector_hnsw ON embeddings_sessions
1000
1036
  -- USING hnsw (embedding vector_cosine_ops);
1001
1037
 
1002
1038
  -- KV_STORE trigger for sessions
@@ -1011,6 +1047,7 @@ BEGIN
1011
1047
  RETURN OLD;
1012
1048
  ELSIF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN
1013
1049
  -- Upsert to KV_STORE (O(1) lookup by entity_key)
1050
+ -- tenant_id can be NULL (meaning public/shared data)
1014
1051
  INSERT INTO kv_store (
1015
1052
  entity_key,
1016
1053
  entity_type,
@@ -1021,7 +1058,7 @@ BEGIN
1021
1058
  graph_edges,
1022
1059
  updated_at
1023
1060
  ) VALUES (
1024
- NEW.name::VARCHAR,
1061
+ normalize_key(NEW.name::VARCHAR),
1025
1062
  'sessions',
1026
1063
  NEW.id,
1027
1064
  NEW.tenant_id,
@@ -1030,7 +1067,7 @@ BEGIN
1030
1067
  COALESCE(NEW.graph_edges, '[]'::jsonb),
1031
1068
  CURRENT_TIMESTAMP
1032
1069
  )
1033
- ON CONFLICT (tenant_id, entity_key)
1070
+ ON CONFLICT (COALESCE(tenant_id, ''), entity_key)
1034
1071
  DO UPDATE SET
1035
1072
  entity_id = EXCLUDED.entity_id,
1036
1073
  user_id = EXCLUDED.user_id,
@@ -1055,7 +1092,7 @@ FOR EACH ROW EXECUTE FUNCTION fn_sessions_kv_store_upsert();
1055
1092
 
1056
1093
  CREATE TABLE IF NOT EXISTS shared_sessions (
1057
1094
  id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
1058
- tenant_id VARCHAR(100) NOT NULL,
1095
+ tenant_id VARCHAR(100),
1059
1096
  user_id VARCHAR(256),
1060
1097
  session_id VARCHAR(256) NOT NULL,
1061
1098
  owner_user_id VARCHAR(256) NOT NULL,
@@ -1086,6 +1123,7 @@ BEGIN
1086
1123
  RETURN OLD;
1087
1124
  ELSIF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN
1088
1125
  -- Upsert to KV_STORE (O(1) lookup by entity_key)
1126
+ -- tenant_id can be NULL (meaning public/shared data)
1089
1127
  INSERT INTO kv_store (
1090
1128
  entity_key,
1091
1129
  entity_type,
@@ -1096,7 +1134,7 @@ BEGIN
1096
1134
  graph_edges,
1097
1135
  updated_at
1098
1136
  ) VALUES (
1099
- NEW.id::VARCHAR,
1137
+ normalize_key(NEW.id::VARCHAR),
1100
1138
  'shared_sessions',
1101
1139
  NEW.id,
1102
1140
  NEW.tenant_id,
@@ -1105,7 +1143,7 @@ BEGIN
1105
1143
  COALESCE(NEW.graph_edges, '[]'::jsonb),
1106
1144
  CURRENT_TIMESTAMP
1107
1145
  )
1108
- ON CONFLICT (tenant_id, entity_key)
1146
+ ON CONFLICT (COALESCE(tenant_id, ''), entity_key)
1109
1147
  DO UPDATE SET
1110
1148
  entity_id = EXCLUDED.entity_id,
1111
1149
  user_id = EXCLUDED.user_id,
@@ -1130,7 +1168,7 @@ FOR EACH ROW EXECUTE FUNCTION fn_shared_sessions_kv_store_upsert();
1130
1168
 
1131
1169
  CREATE TABLE IF NOT EXISTS users (
1132
1170
  id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
1133
- tenant_id VARCHAR(100) NOT NULL,
1171
+ tenant_id VARCHAR(100),
1134
1172
  user_id VARCHAR(256),
1135
1173
  name VARCHAR(256) NOT NULL,
1136
1174
  email VARCHAR(256),
@@ -1180,7 +1218,7 @@ CREATE INDEX IF NOT EXISTS idx_embeddings_users_field_provider ON embeddings_use
1180
1218
 
1181
1219
  -- HNSW index for vector similarity search (created in background)
1182
1220
  -- Note: This will be created by background thread after data load
1183
- -- CREATE INDEX IF NOT EXISTS idx_embeddings_users_vector_hnsw ON embeddings_users
1221
+ -- CREATE INDEX idx_embeddings_users_vector_hnsw ON embeddings_users
1184
1222
  -- USING hnsw (embedding vector_cosine_ops);
1185
1223
 
1186
1224
  -- KV_STORE trigger for users
@@ -1195,6 +1233,7 @@ BEGIN
1195
1233
  RETURN OLD;
1196
1234
  ELSIF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN
1197
1235
  -- Upsert to KV_STORE (O(1) lookup by entity_key)
1236
+ -- tenant_id can be NULL (meaning public/shared data)
1198
1237
  INSERT INTO kv_store (
1199
1238
  entity_key,
1200
1239
  entity_type,
@@ -1205,7 +1244,7 @@ BEGIN
1205
1244
  graph_edges,
1206
1245
  updated_at
1207
1246
  ) VALUES (
1208
- NEW.name::VARCHAR,
1247
+ normalize_key(NEW.name::VARCHAR),
1209
1248
  'users',
1210
1249
  NEW.id,
1211
1250
  NEW.tenant_id,
@@ -1214,7 +1253,7 @@ BEGIN
1214
1253
  COALESCE(NEW.graph_edges, '[]'::jsonb),
1215
1254
  CURRENT_TIMESTAMP
1216
1255
  )
1217
- ON CONFLICT (tenant_id, entity_key)
1256
+ ON CONFLICT (COALESCE(tenant_id, ''), entity_key)
1218
1257
  DO UPDATE SET
1219
1258
  entity_id = EXCLUDED.entity_id,
1220
1259
  user_id = EXCLUDED.user_id,
@@ -1411,12 +1450,12 @@ VALUES (
1411
1450
  ## Overview
1412
1451
 
1413
1452
  The `File` entity is stored in the `files` table. Each record is uniquely
1414
- identified by its `id` field for lookups and graph traversal.
1453
+ identified by its `name` field for lookups and graph traversal.
1415
1454
 
1416
1455
  ## Search Capabilities
1417
1456
 
1418
1457
  This schema includes the `search_rem` tool which supports:
1419
- - **LOOKUP**: O(1) exact match by id (e.g., `LOOKUP "entity-name"`)
1458
+ - **LOOKUP**: O(1) exact match by name (e.g., `LOOKUP "entity-name"`)
1420
1459
  - **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
1421
1460
  - **SEARCH**: Semantic vector search on content (e.g., `SEARCH "concept" FROM files LIMIT 10`)
1422
1461
  - **SQL**: Complex queries (e.g., `SELECT * FROM files WHERE ...`)
@@ -1426,7 +1465,7 @@ This schema includes the `search_rem` tool which supports:
1426
1465
  | Property | Value |
1427
1466
  |----------|-------|
1428
1467
  | Table | `files` |
1429
- | Entity Key | `id` |
1468
+ | Entity Key | `name` |
1430
1469
  | Embedding Fields | `content` |
1431
1470
  | Tools | `search_rem` |
1432
1471
 
@@ -1513,9 +1552,9 @@ This schema includes the `search_rem` tool which supports:
1513
1552
  - File processing status (pending, processing, completed, failed)
1514
1553
 
1515
1554
  ',
1516
- '{"type": "object", "description": "\n File metadata and tracking.\n\n Represents files uploaded to or referenced by the REM system,\n tracking their metadata and processing status. Tenant isolation\n is provided via CoreModel.tenant_id field.\n \n\nThis agent can search the `files` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"description": "File name", "title": "Name", "type": "string"}, "uri": {"description": "File storage URI (S3, local path, etc.)", "title": "Uri", "type": "string"}, "content": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Extracted text content (if applicable)", "title": "Content"}, "timestamp": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "File creation/modification timestamp", "title": "Timestamp"}, "size_bytes": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "description": "File size in bytes", "title": "Size Bytes"}, "mime_type": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "File MIME type", "title": "Mime Type"}, "processing_status": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": "pending", "description": "File processing status (pending, processing, completed, failed)", "title": "Processing Status"}}, "required": ["name", "uri"], "json_schema_extra": {"table_name": "files", "entity_key_field": "id", "embedding_fields": ["content"], "fully_qualified_name": "rem.models.entities.file.File", "tools": ["search_rem"], "default_search_table": "files", "has_embeddings": true}}'::jsonb,
1555
+ '{"type": "object", "description": "\n File metadata and tracking.\n\n Represents files uploaded to or referenced by the REM system,\n tracking their metadata and processing status. Tenant isolation\n is provided via CoreModel.tenant_id field.\n \n\nThis agent can search the `files` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"description": "File name", "title": "Name", "type": "string"}, "uri": {"description": "File storage URI (S3, local path, etc.)", "title": "Uri", "type": "string"}, "content": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Extracted text content (if applicable)", "title": "Content"}, "timestamp": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "File creation/modification timestamp", "title": "Timestamp"}, "size_bytes": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "description": "File size in bytes", "title": "Size Bytes"}, "mime_type": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "File MIME type", "title": "Mime Type"}, "processing_status": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": "pending", "description": "File processing status (pending, processing, completed, failed)", "title": "Processing Status"}}, "required": ["name", "uri"], "json_schema_extra": {"table_name": "files", "entity_key_field": "name", "embedding_fields": ["content"], "fully_qualified_name": "rem.models.entities.file.File", "tools": ["search_rem"], "default_search_table": "files", "has_embeddings": true}}'::jsonb,
1517
1556
  'entity',
1518
- '{"table_name": "files", "entity_key_field": "id", "embedding_fields": ["content"], "fqn": "rem.models.entities.file.File"}'::jsonb
1557
+ '{"table_name": "files", "entity_key_field": "name", "embedding_fields": ["content"], "fqn": "rem.models.entities.file.File"}'::jsonb
1519
1558
  )
1520
1559
  ON CONFLICT (id) DO UPDATE SET
1521
1560
  name = EXCLUDED.name,
@@ -2008,18 +2047,19 @@ VALUES (
2008
2047
  'Ontology',
2009
2048
  '# Ontology
2010
2049
 
2011
- Domain-specific knowledge extracted from files using custom agents.
2050
+ Domain-specific knowledge - either agent-extracted or direct-loaded.
2012
2051
 
2013
2052
  Attributes:
2014
2053
  name: Human-readable label for this ontology instance
2015
- file_id: Foreign key to File entity that was processed
2016
- agent_schema_id: Foreign key to Schema entity that performed extraction
2017
- provider_name: LLM provider used for extraction (e.g., "anthropic", "openai")
2018
- model_name: Specific model used (e.g., "claude-sonnet-4-5")
2019
- extracted_data: Structured data extracted by agent (arbitrary JSON)
2054
+ uri: External source reference (git://, s3://, https://) for direct-loaded ontologies
2055
+ file_id: Foreign key to File entity (optional - only for agent-extracted)
2056
+ agent_schema_id: Schema that performed extraction (optional - only for agent-extracted)
2057
+ provider_name: LLM provider used for extraction (optional)
2058
+ model_name: Specific model used (optional)
2059
+ extracted_data: Structured data - either extracted by agent or parsed from source
2020
2060
  confidence_score: Optional confidence score from extraction (0.0-1.0)
2021
2061
  extraction_timestamp: When extraction was performed
2022
- embedding_text: Text used for generating embedding (derived from extracted_data)
2062
+ content: Text used for generating embedding
2023
2063
 
2024
2064
  Inherited from CoreModel:
2025
2065
  id: UUID or string identifier
@@ -2031,10 +2071,9 @@ Domain-specific knowledge extracted from files using custom agents.
2031
2071
  graph_edges: Relationships to other entities
2032
2072
  metadata: Flexible metadata storage
2033
2073
  tags: Classification tags
2034
- column: Database schema metadata
2035
2074
 
2036
2075
  Example Usage:
2037
- # CV extraction
2076
+ # Agent-extracted: CV parsing
2038
2077
  cv_ontology = Ontology(
2039
2078
  name="john-doe-cv-2024",
2040
2079
  file_id="file-uuid-123",
@@ -2043,63 +2082,48 @@ Domain-specific knowledge extracted from files using custom agents.
2043
2082
  model_name="claude-sonnet-4-5-20250929",
2044
2083
  extracted_data={
2045
2084
  "candidate_name": "John Doe",
2046
- "email": "john@example.com",
2047
2085
  "skills": ["Python", "PostgreSQL", "Kubernetes"],
2048
- "experience": [
2049
- {
2050
- "company": "TechCorp",
2051
- "role": "Senior Engineer",
2052
- "years": 3,
2053
- "achievements": ["Led migration to k8s", "Reduced costs 40%"]
2054
- }
2055
- ],
2056
- "education": [
2057
- {"degree": "BS Computer Science", "institution": "MIT", "year": 2018}
2058
- ]
2059
2086
  },
2060
2087
  confidence_score=0.95,
2061
- tags=["cv", "engineering", "senior-level"]
2088
+ tags=["cv", "engineering"]
2062
2089
  )
2063
2090
 
2064
- # Contract extraction
2065
- contract_ontology = Ontology(
2066
- name="acme-supplier-agreement-2024",
2067
- file_id="file-uuid-456",
2068
- agent_schema_id="contract-parser-v2",
2069
- provider_name="openai",
2070
- model_name="gpt-4.1",
2091
+ # Direct-loaded: Knowledge base from git
2092
+ api_docs = Ontology(
2093
+ name="rest-api-guide",
2094
+ uri="git://example-org/docs/api/rest-api-guide.md",
2095
+ content="# REST API Guide\n\nThis guide covers RESTful API design...",
2096
+ extracted_data={
2097
+ "type": "documentation",
2098
+ "category": "api",
2099
+ "version": "2.0",
2100
+ },
2101
+ tags=["api", "rest", "documentation"]
2102
+ )
2103
+
2104
+ # Direct-loaded: Technical spec from git
2105
+ config_spec = Ontology(
2106
+ name="config-schema",
2107
+ uri="git://example-org/docs/specs/config-schema.md",
2108
+ content="# Configuration Schema\n\nThis document defines...",
2071
2109
  extracted_data={
2072
- "contract_type": "supplier_agreement",
2073
- "parties": [
2074
- {"name": "ACME Corp", "role": "buyer"},
2075
- {"name": "SupplyChain Inc", "role": "supplier"}
2076
- ],
2077
- "effective_date": "2024-01-01",
2078
- "termination_date": "2026-12-31",
2079
- "payment_terms": {
2080
- "amount": 500000,
2081
- "currency": "USD",
2082
- "frequency": "quarterly"
2083
- },
2084
- "key_obligations": [
2085
- "Supplier must deliver within 30 days",
2086
- "Buyer must pay within 60 days of invoice"
2087
- ]
2110
+ "type": "specification",
2111
+ "format": "yaml",
2112
+ "version": "1.0",
2088
2113
  },
2089
- confidence_score=0.92,
2090
- tags=["contract", "supplier", "procurement"]
2114
+ tags=["config", "schema", "specification"]
2091
2115
  )
2092
2116
 
2093
2117
 
2094
2118
  ## Overview
2095
2119
 
2096
2120
  The `Ontology` entity is stored in the `ontologies` table. Each record is uniquely
2097
- identified by its `id` field for lookups and graph traversal.
2121
+ identified by its `name` field for lookups and graph traversal.
2098
2122
 
2099
2123
  ## Search Capabilities
2100
2124
 
2101
2125
  This schema includes the `search_rem` tool which supports:
2102
- - **LOOKUP**: O(1) exact match by id (e.g., `LOOKUP "entity-name"`)
2126
+ - **LOOKUP**: O(1) exact match by name (e.g., `LOOKUP "entity-name"`)
2103
2127
  - **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
2104
2128
  - **SEARCH**: Semantic vector search on content (e.g., `SEARCH "concept" FROM ontologies LIMIT 10`)
2105
2129
  - **SQL**: Complex queries (e.g., `SELECT * FROM ontologies WHERE ...`)
@@ -2109,8 +2133,8 @@ This schema includes the `search_rem` tool which supports:
2109
2133
  | Property | Value |
2110
2134
  |----------|-------|
2111
2135
  | Table | `ontologies` |
2112
- | Entity Key | `id` |
2113
- | Embedding Fields | None |
2136
+ | Entity Key | `name` |
2137
+ | Embedding Fields | `content` |
2114
2138
  | Tools | `search_rem` |
2115
2139
 
2116
2140
  ## Fields
@@ -2164,25 +2188,29 @@ This schema includes the `search_rem` tool which supports:
2164
2188
  - **Type**: `<class ''str''>`
2165
2189
  - **Required**
2166
2190
 
2191
+ ### `uri`
2192
+ - **Type**: `typing.Optional[str]`
2193
+ - **Optional**
2194
+
2167
2195
  ### `file_id`
2168
- - **Type**: `uuid.UUID | str`
2169
- - **Required**
2196
+ - **Type**: `typing.Union[uuid.UUID, str, NoneType]`
2197
+ - **Optional**
2170
2198
 
2171
2199
  ### `agent_schema_id`
2172
- - **Type**: `<class ''str''>`
2173
- - **Required**
2200
+ - **Type**: `typing.Optional[str]`
2201
+ - **Optional**
2174
2202
 
2175
2203
  ### `provider_name`
2176
- - **Type**: `<class ''str''>`
2177
- - **Required**
2204
+ - **Type**: `typing.Optional[str]`
2205
+ - **Optional**
2178
2206
 
2179
2207
  ### `model_name`
2180
- - **Type**: `<class ''str''>`
2181
- - **Required**
2208
+ - **Type**: `typing.Optional[str]`
2209
+ - **Optional**
2182
2210
 
2183
2211
  ### `extracted_data`
2184
- - **Type**: `dict[str, typing.Any]`
2185
- - **Required**
2212
+ - **Type**: `typing.Optional[dict[str, typing.Any]]`
2213
+ - **Optional**
2186
2214
 
2187
2215
  ### `confidence_score`
2188
2216
  - **Type**: `typing.Optional[float]`
@@ -2192,14 +2220,14 @@ This schema includes the `search_rem` tool which supports:
2192
2220
  - **Type**: `typing.Optional[str]`
2193
2221
  - **Optional**
2194
2222
 
2195
- ### `embedding_text`
2223
+ ### `content`
2196
2224
  - **Type**: `typing.Optional[str]`
2197
2225
  - **Optional**
2198
2226
 
2199
2227
  ',
2200
- '{"type": "object", "description": "Domain-specific knowledge extracted from files using custom agents.\n\n Attributes:\n name: Human-readable label for this ontology instance\n file_id: Foreign key to File entity that was processed\n agent_schema_id: Foreign key to Schema entity that performed extraction\n provider_name: LLM provider used for extraction (e.g., \"anthropic\", \"openai\")\n model_name: Specific model used (e.g., \"claude-sonnet-4-5\")\n extracted_data: Structured data extracted by agent (arbitrary JSON)\n confidence_score: Optional confidence score from extraction (0.0-1.0)\n extraction_timestamp: When extraction was performed\n embedding_text: Text used for generating embedding (derived from extracted_data)\n\n Inherited from CoreModel:\n id: UUID or string identifier\n created_at: Entity creation timestamp\n updated_at: Last update timestamp\n deleted_at: Soft deletion timestamp\n tenant_id: Multi-tenancy isolation\n user_id: Ownership\n graph_edges: Relationships to other entities\n metadata: Flexible metadata storage\n tags: Classification tags\n column: Database schema metadata\n\n Example Usage:\n # CV extraction\n cv_ontology = Ontology(\n name=\"john-doe-cv-2024\",\n file_id=\"file-uuid-123\",\n agent_schema_id=\"cv-parser-v1\",\n provider_name=\"anthropic\",\n model_name=\"claude-sonnet-4-5-20250929\",\n extracted_data={\n \"candidate_name\": \"John Doe\",\n \"email\": \"john@example.com\",\n \"skills\": [\"Python\", \"PostgreSQL\", \"Kubernetes\"],\n \"experience\": [\n {\n \"company\": \"TechCorp\",\n \"role\": \"Senior Engineer\",\n \"years\": 3,\n \"achievements\": [\"Led migration to k8s\", \"Reduced costs 40%\"]\n }\n ],\n \"education\": [\n {\"degree\": \"BS Computer Science\", \"institution\": \"MIT\", \"year\": 2018}\n ]\n },\n confidence_score=0.95,\n tags=[\"cv\", \"engineering\", \"senior-level\"]\n )\n\n # Contract extraction\n contract_ontology = Ontology(\n name=\"acme-supplier-agreement-2024\",\n file_id=\"file-uuid-456\",\n agent_schema_id=\"contract-parser-v2\",\n provider_name=\"openai\",\n model_name=\"gpt-4.1\",\n extracted_data={\n \"contract_type\": \"supplier_agreement\",\n \"parties\": [\n {\"name\": \"ACME Corp\", \"role\": \"buyer\"},\n {\"name\": \"SupplyChain Inc\", \"role\": \"supplier\"}\n ],\n \"effective_date\": \"2024-01-01\",\n \"termination_date\": \"2026-12-31\",\n \"payment_terms\": {\n \"amount\": 500000,\n \"currency\": \"USD\",\n \"frequency\": \"quarterly\"\n },\n \"key_obligations\": [\n \"Supplier must deliver within 30 days\",\n \"Buyer must pay within 60 days of invoice\"\n ]\n },\n confidence_score=0.92,\n tags=[\"contract\", \"supplier\", \"procurement\"]\n )\n \n\nThis agent can search the `ontologies` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"title": "Name", "type": "string"}, "file_id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}], "title": "File Id"}, "agent_schema_id": {"title": "Agent Schema Id", "type": "string"}, "provider_name": {"title": "Provider Name", "type": "string"}, "model_name": {"title": "Model Name", "type": "string"}, "extracted_data": {"additionalProperties": true, "title": "Extracted Data", "type": "object"}, "confidence_score": {"anyOf": [{"type": "number"}, {"type": "null"}], "default": null, "title": "Confidence Score"}, "extraction_timestamp": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Extraction Timestamp"}, "embedding_text": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Embedding Text"}}, "required": ["name", "file_id", "agent_schema_id", "provider_name", "model_name", "extracted_data"], "json_schema_extra": {"table_name": "ontologies", "entity_key_field": "id", "embedding_fields": [], "fully_qualified_name": "rem.models.entities.ontology.Ontology", "tools": ["search_rem"], "default_search_table": "ontologies", "has_embeddings": false}}'::jsonb,
2228
+ '{"type": "object", "description": "Domain-specific knowledge - either agent-extracted or direct-loaded.\n\n Attributes:\n name: Human-readable label for this ontology instance\n uri: External source reference (git://, s3://, https://) for direct-loaded ontologies\n file_id: Foreign key to File entity (optional - only for agent-extracted)\n agent_schema_id: Schema that performed extraction (optional - only for agent-extracted)\n provider_name: LLM provider used for extraction (optional)\n model_name: Specific model used (optional)\n extracted_data: Structured data - either extracted by agent or parsed from source\n confidence_score: Optional confidence score from extraction (0.0-1.0)\n extraction_timestamp: When extraction was performed\n content: Text used for generating embedding\n\n Inherited from CoreModel:\n id: UUID or string identifier\n created_at: Entity creation timestamp\n updated_at: Last update timestamp\n deleted_at: Soft deletion timestamp\n tenant_id: Multi-tenancy isolation\n user_id: Ownership\n graph_edges: Relationships to other entities\n metadata: Flexible metadata storage\n tags: Classification tags\n\n Example Usage:\n # Agent-extracted: CV parsing\n cv_ontology = Ontology(\n name=\"john-doe-cv-2024\",\n file_id=\"file-uuid-123\",\n agent_schema_id=\"cv-parser-v1\",\n provider_name=\"anthropic\",\n model_name=\"claude-sonnet-4-5-20250929\",\n extracted_data={\n \"candidate_name\": \"John Doe\",\n \"skills\": [\"Python\", \"PostgreSQL\", \"Kubernetes\"],\n },\n confidence_score=0.95,\n tags=[\"cv\", \"engineering\"]\n )\n\n # Direct-loaded: Knowledge base from git\n api_docs = Ontology(\n name=\"rest-api-guide\",\n uri=\"git://example-org/docs/api/rest-api-guide.md\",\n content=\"# REST API Guide\\n\\nThis guide covers RESTful API design...\",\n extracted_data={\n \"type\": \"documentation\",\n \"category\": \"api\",\n \"version\": \"2.0\",\n },\n tags=[\"api\", \"rest\", \"documentation\"]\n )\n\n # Direct-loaded: Technical spec from git\n config_spec = Ontology(\n name=\"config-schema\",\n uri=\"git://example-org/docs/specs/config-schema.md\",\n content=\"# Configuration Schema\\n\\nThis document defines...\",\n extracted_data={\n \"type\": \"specification\",\n \"format\": \"yaml\",\n \"version\": \"1.0\",\n },\n tags=[\"config\", \"schema\", \"specification\"]\n )\n \n\nThis agent can search the `ontologies` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"title": "Name", "type": "string"}, "uri": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Uri"}, "file_id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "title": "File Id"}, "agent_schema_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Agent Schema Id"}, "provider_name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Provider Name"}, "model_name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Model Name"}, "extracted_data": {"anyOf": [{"additionalProperties": true, "type": "object"}, {"type": "null"}], "default": null, "title": "Extracted Data"}, "confidence_score": {"anyOf": [{"type": "number"}, {"type": "null"}], "default": null, "title": "Confidence Score"}, "extraction_timestamp": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Extraction Timestamp"}, "content": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Content"}}, "required": ["name"], "json_schema_extra": {"table_name": "ontologies", "entity_key_field": "name", "embedding_fields": ["content"], "fully_qualified_name": "rem.models.entities.ontology.Ontology", "tools": ["search_rem"], "default_search_table": "ontologies", "has_embeddings": true}}'::jsonb,
2201
2229
  'entity',
2202
- '{"table_name": "ontologies", "entity_key_field": "id", "embedding_fields": [], "fqn": "rem.models.entities.ontology.Ontology"}'::jsonb
2230
+ '{"table_name": "ontologies", "entity_key_field": "name", "embedding_fields": ["content"], "fqn": "rem.models.entities.ontology.Ontology"}'::jsonb
2203
2231
  )
2204
2232
  ON CONFLICT (id) DO UPDATE SET
2205
2233
  name = EXCLUDED.name,
@@ -2288,12 +2316,12 @@ User configuration for automatic ontology extraction.
2288
2316
  ## Overview
2289
2317
 
2290
2318
  The `OntologyConfig` entity is stored in the `ontology_configs` table. Each record is uniquely
2291
- identified by its `id` field for lookups and graph traversal.
2319
+ identified by its `name` field for lookups and graph traversal.
2292
2320
 
2293
2321
  ## Search Capabilities
2294
2322
 
2295
2323
  This schema includes the `search_rem` tool which supports:
2296
- - **LOOKUP**: O(1) exact match by id (e.g., `LOOKUP "entity-name"`)
2324
+ - **LOOKUP**: O(1) exact match by name (e.g., `LOOKUP "entity-name"`)
2297
2325
  - **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
2298
2326
  - **SEARCH**: Semantic vector search on description (e.g., `SEARCH "concept" FROM ontology_configs LIMIT 10`)
2299
2327
  - **SQL**: Complex queries (e.g., `SELECT * FROM ontology_configs WHERE ...`)
@@ -2303,7 +2331,7 @@ This schema includes the `search_rem` tool which supports:
2303
2331
  | Property | Value |
2304
2332
  |----------|-------|
2305
2333
  | Table | `ontology_configs` |
2306
- | Entity Key | `id` |
2334
+ | Entity Key | `name` |
2307
2335
  | Embedding Fields | `description` |
2308
2336
  | Tools | `search_rem` |
2309
2337
 
@@ -2395,9 +2423,9 @@ This schema includes the `search_rem` tool which supports:
2395
2423
  - **Optional**
2396
2424
 
2397
2425
  ',
2398
- '{"type": "object", "description": "User configuration for automatic ontology extraction.\n\n Attributes:\n name: Human-readable config name\n agent_schema_id: Foreign key to Schema entity to use for extraction\n description: Purpose and scope of this config\n\n # File matching rules (ANY matching rule triggers extraction)\n mime_type_pattern: Regex pattern for file MIME types (e.g., \"application/pdf\")\n uri_pattern: Regex pattern for file URIs (e.g., \"s3://bucket/resumes/.*\")\n tag_filter: List of tags (file must have ALL tags to match)\n\n # Execution control\n priority: Execution order (higher = earlier, default 100)\n enabled: Whether this config is active (default True)\n\n # LLM provider configuration\n provider_name: Optional LLM provider override (defaults to settings)\n model_name: Optional model override (defaults to settings)\n\n Inherited from CoreModel:\n id, created_at, updated_at, deleted_at, tenant_id, user_id,\n graph_edges, metadata, tags, column\n\n Example Usage:\n # CV extraction for recruitment\n cv_config = OntologyConfig(\n name=\"recruitment-cv-parser\",\n agent_schema_id=\"cv-parser-v1\",\n description=\"Extract candidate information from resumes\",\n mime_type_pattern=\"application/pdf\",\n uri_pattern=\".*/resumes/.*\",\n tag_filter=[\"cv\", \"candidate\"],\n priority=100,\n enabled=True,\n tenant_id=\"acme-corp\",\n tags=[\"recruitment\", \"hr\"]\n )\n\n # Contract analysis for legal team\n contract_config = OntologyConfig(\n name=\"legal-contract-analyzer\",\n agent_schema_id=\"contract-parser-v2\",\n description=\"Extract key terms from supplier contracts\",\n mime_type_pattern=\"application/(pdf|msword|vnd.openxmlformats.*)\",\n tag_filter=[\"legal\", \"contract\"],\n priority=200, # Higher priority = runs first\n enabled=True,\n provider_name=\"openai\", # Override default provider\n model_name=\"gpt-4.1\",\n tenant_id=\"acme-corp\",\n tags=[\"legal\", \"procurement\"]\n )\n\n # Medical records for healthcare\n medical_config = OntologyConfig(\n name=\"medical-records-extractor\",\n agent_schema_id=\"medical-parser-v1\",\n description=\"Extract diagnoses and treatments from medical records\",\n mime_type_pattern=\"application/pdf\",\n tag_filter=[\"medical\", \"patient-record\"],\n priority=50,\n enabled=True,\n tenant_id=\"healthsystem\",\n tags=[\"medical\", \"hipaa-compliant\"]\n )\n \n\nThis agent can search the `ontology_configs` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"title": "Name", "type": "string"}, "agent_schema_id": {"title": "Agent Schema Id", "type": "string"}, "description": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Description"}, "mime_type_pattern": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Mime Type Pattern"}, "uri_pattern": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Uri Pattern"}, "tag_filter": {"default": [], "items": {"type": "string"}, "title": "Tag Filter", "type": "array"}, "priority": {"default": 100, "title": "Priority", "type": "integer"}, "enabled": {"default": true, "title": "Enabled", "type": "boolean"}, "provider_name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Provider Name"}, "model_name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Model Name"}}, "required": ["name", "agent_schema_id"], "json_schema_extra": {"table_name": "ontology_configs", "entity_key_field": "id", "embedding_fields": ["description"], "fully_qualified_name": "rem.models.entities.ontology_config.OntologyConfig", "tools": ["search_rem"], "default_search_table": "ontology_configs", "has_embeddings": true}}'::jsonb,
2426
+ '{"type": "object", "description": "User configuration for automatic ontology extraction.\n\n Attributes:\n name: Human-readable config name\n agent_schema_id: Foreign key to Schema entity to use for extraction\n description: Purpose and scope of this config\n\n # File matching rules (ANY matching rule triggers extraction)\n mime_type_pattern: Regex pattern for file MIME types (e.g., \"application/pdf\")\n uri_pattern: Regex pattern for file URIs (e.g., \"s3://bucket/resumes/.*\")\n tag_filter: List of tags (file must have ALL tags to match)\n\n # Execution control\n priority: Execution order (higher = earlier, default 100)\n enabled: Whether this config is active (default True)\n\n # LLM provider configuration\n provider_name: Optional LLM provider override (defaults to settings)\n model_name: Optional model override (defaults to settings)\n\n Inherited from CoreModel:\n id, created_at, updated_at, deleted_at, tenant_id, user_id,\n graph_edges, metadata, tags, column\n\n Example Usage:\n # CV extraction for recruitment\n cv_config = OntologyConfig(\n name=\"recruitment-cv-parser\",\n agent_schema_id=\"cv-parser-v1\",\n description=\"Extract candidate information from resumes\",\n mime_type_pattern=\"application/pdf\",\n uri_pattern=\".*/resumes/.*\",\n tag_filter=[\"cv\", \"candidate\"],\n priority=100,\n enabled=True,\n tenant_id=\"acme-corp\",\n tags=[\"recruitment\", \"hr\"]\n )\n\n # Contract analysis for legal team\n contract_config = OntologyConfig(\n name=\"legal-contract-analyzer\",\n agent_schema_id=\"contract-parser-v2\",\n description=\"Extract key terms from supplier contracts\",\n mime_type_pattern=\"application/(pdf|msword|vnd.openxmlformats.*)\",\n tag_filter=[\"legal\", \"contract\"],\n priority=200, # Higher priority = runs first\n enabled=True,\n provider_name=\"openai\", # Override default provider\n model_name=\"gpt-4.1\",\n tenant_id=\"acme-corp\",\n tags=[\"legal\", \"procurement\"]\n )\n\n # Medical records for healthcare\n medical_config = OntologyConfig(\n name=\"medical-records-extractor\",\n agent_schema_id=\"medical-parser-v1\",\n description=\"Extract diagnoses and treatments from medical records\",\n mime_type_pattern=\"application/pdf\",\n tag_filter=[\"medical\", \"patient-record\"],\n priority=50,\n enabled=True,\n tenant_id=\"healthsystem\",\n tags=[\"medical\", \"hipaa-compliant\"]\n )\n \n\nThis agent can search the `ontology_configs` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"title": "Name", "type": "string"}, "agent_schema_id": {"title": "Agent Schema Id", "type": "string"}, "description": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Description"}, "mime_type_pattern": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Mime Type Pattern"}, "uri_pattern": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Uri Pattern"}, "tag_filter": {"default": [], "items": {"type": "string"}, "title": "Tag Filter", "type": "array"}, "priority": {"default": 100, "title": "Priority", "type": "integer"}, "enabled": {"default": true, "title": "Enabled", "type": "boolean"}, "provider_name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Provider Name"}, "model_name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Model Name"}}, "required": ["name", "agent_schema_id"], "json_schema_extra": {"table_name": "ontology_configs", "entity_key_field": "name", "embedding_fields": ["description"], "fully_qualified_name": "rem.models.entities.ontology_config.OntologyConfig", "tools": ["search_rem"], "default_search_table": "ontology_configs", "has_embeddings": true}}'::jsonb,
2399
2427
  'entity',
2400
- '{"table_name": "ontology_configs", "entity_key_field": "id", "embedding_fields": ["description"], "fqn": "rem.models.entities.ontology_config.OntologyConfig"}'::jsonb
2428
+ '{"table_name": "ontology_configs", "entity_key_field": "name", "embedding_fields": ["description"], "fqn": "rem.models.entities.ontology_config.OntologyConfig"}'::jsonb
2401
2429
  )
2402
2430
  ON CONFLICT (id) DO UPDATE SET
2403
2431
  name = EXCLUDED.name,
@@ -2565,12 +2593,12 @@ VALUES (
2565
2593
  ## Overview
2566
2594
 
2567
2595
  The `Schema` entity is stored in the `schemas` table. Each record is uniquely
2568
- identified by its `id` field for lookups and graph traversal.
2596
+ identified by its `name` field for lookups and graph traversal.
2569
2597
 
2570
2598
  ## Search Capabilities
2571
2599
 
2572
2600
  This schema includes the `search_rem` tool which supports:
2573
- - **LOOKUP**: O(1) exact match by id (e.g., `LOOKUP "entity-name"`)
2601
+ - **LOOKUP**: O(1) exact match by name (e.g., `LOOKUP "entity-name"`)
2574
2602
  - **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
2575
2603
  - **SEARCH**: Semantic vector search on content (e.g., `SEARCH "concept" FROM schemas LIMIT 10`)
2576
2604
  - **SQL**: Complex queries (e.g., `SELECT * FROM schemas WHERE ...`)
@@ -2580,7 +2608,7 @@ This schema includes the `search_rem` tool which supports:
2580
2608
  | Property | Value |
2581
2609
  |----------|-------|
2582
2610
  | Table | `schemas` |
2583
- | Entity Key | `id` |
2611
+ | Entity Key | `name` |
2584
2612
  | Embedding Fields | `content` |
2585
2613
  | Tools | `search_rem` |
2586
2614
 
@@ -2662,9 +2690,9 @@ This schema includes the `search_rem` tool which supports:
2662
2690
  - JSON paths in extracted_data to embed for semantic search. Example: [''summary'', ''candidate_name'', ''skills''] for CV extraction. Values will be concatenated and embedded using configured embedding provider.
2663
2691
 
2664
2692
  ',
2665
- '{"type": "object", "description": "\n Agent schema definition.\n\n Schemas define agents that can be dynamically loaded into Pydantic AI.\n They store JsonSchema specifications with embedded metadata for tools,\n resources, and system prompts.\n\n For ontology extraction agents:\n - `provider_configs` enables multi-provider support (test across Anthropic, OpenAI, etc.)\n - `embedding_fields` specifies which output fields should be embedded for semantic search\n\n Tenant isolation is provided via CoreModel.tenant_id field.\n \n\nThis agent can search the `schemas` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"description": "Human-readable schema name (used as identifier)", "title": "Name", "type": "string"}, "content": {"default": "", "description": "Markdown documentation and instructions for the schema", "title": "Content", "type": "string"}, "spec": {"additionalProperties": true, "description": "JsonSchema specification defining the agent structure and capabilities", "title": "Spec", "type": "object"}, "category": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Schema category distinguishing schema types. Values: ''agent'' (AI agents), ''evaluator'' (LLM-as-a-Judge evaluators). Maps directly from json_schema_extra.kind field during ingestion.", "title": "Category"}, "provider_configs": {"description": "Optional provider configurations for multi-provider testing. Each dict has ''provider_name'' and ''model_name''. Example: [{''provider_name'': ''anthropic'', ''model_name'': ''claude-sonnet-4-5''}]", "items": {"additionalProperties": true, "type": "object"}, "title": "Provider Configs", "type": "array"}, "embedding_fields": {"description": "JSON paths in extracted_data to embed for semantic search. Example: [''summary'', ''candidate_name'', ''skills''] for CV extraction. Values will be concatenated and embedded using configured embedding provider.", "items": {"type": "string"}, "title": "Embedding Fields", "type": "array"}}, "required": ["name", "spec"], "json_schema_extra": {"table_name": "schemas", "entity_key_field": "id", "embedding_fields": ["content"], "fully_qualified_name": "rem.models.entities.schema.Schema", "tools": ["search_rem"], "default_search_table": "schemas", "has_embeddings": true}}'::jsonb,
2693
+ '{"type": "object", "description": "\n Agent schema definition.\n\n Schemas define agents that can be dynamically loaded into Pydantic AI.\n They store JsonSchema specifications with embedded metadata for tools,\n resources, and system prompts.\n\n For ontology extraction agents:\n - `provider_configs` enables multi-provider support (test across Anthropic, OpenAI, etc.)\n - `embedding_fields` specifies which output fields should be embedded for semantic search\n\n Tenant isolation is provided via CoreModel.tenant_id field.\n \n\nThis agent can search the `schemas` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"description": "Human-readable schema name (used as identifier)", "title": "Name", "type": "string"}, "content": {"default": "", "description": "Markdown documentation and instructions for the schema", "title": "Content", "type": "string"}, "spec": {"additionalProperties": true, "description": "JsonSchema specification defining the agent structure and capabilities", "title": "Spec", "type": "object"}, "category": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Schema category distinguishing schema types. Values: ''agent'' (AI agents), ''evaluator'' (LLM-as-a-Judge evaluators). Maps directly from json_schema_extra.kind field during ingestion.", "title": "Category"}, "provider_configs": {"description": "Optional provider configurations for multi-provider testing. Each dict has ''provider_name'' and ''model_name''. Example: [{''provider_name'': ''anthropic'', ''model_name'': ''claude-sonnet-4-5''}]", "items": {"additionalProperties": true, "type": "object"}, "title": "Provider Configs", "type": "array"}, "embedding_fields": {"description": "JSON paths in extracted_data to embed for semantic search. Example: [''summary'', ''candidate_name'', ''skills''] for CV extraction. Values will be concatenated and embedded using configured embedding provider.", "items": {"type": "string"}, "title": "Embedding Fields", "type": "array"}}, "required": ["name", "spec"], "json_schema_extra": {"table_name": "schemas", "entity_key_field": "name", "embedding_fields": ["content"], "fully_qualified_name": "rem.models.entities.schema.Schema", "tools": ["search_rem"], "default_search_table": "schemas", "has_embeddings": true}}'::jsonb,
2666
2694
  'entity',
2667
- '{"table_name": "schemas", "entity_key_field": "id", "embedding_fields": ["content"], "fqn": "rem.models.entities.schema.Schema"}'::jsonb
2695
+ '{"table_name": "schemas", "entity_key_field": "name", "embedding_fields": ["content"], "fqn": "rem.models.entities.schema.Schema"}'::jsonb
2668
2696
  )
2669
2697
  ON CONFLICT (id) DO UPDATE SET
2670
2698
  name = EXCLUDED.name,
@@ -3115,7 +3143,7 @@ BEGIN
3115
3143
  RAISE NOTICE ' ✓ image_resources (1 embeddable fields)';
3116
3144
  RAISE NOTICE ' ✓ messages (1 embeddable fields)';
3117
3145
  RAISE NOTICE ' ✓ moments (1 embeddable fields)';
3118
- RAISE NOTICE ' ✓ ontologies';
3146
+ RAISE NOTICE ' ✓ ontologies (1 embeddable fields)';
3119
3147
  RAISE NOTICE ' ✓ ontology_configs (1 embeddable fields)';
3120
3148
  RAISE NOTICE ' ✓ resources (1 embeddable fields)';
3121
3149
  RAISE NOTICE ' ✓ schemas (1 embeddable fields)';