remdb 0.3.181__py3-none-any.whl → 0.3.223__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

Files changed (48) hide show
  1. rem/agentic/README.md +262 -2
  2. rem/agentic/context.py +173 -0
  3. rem/agentic/context_builder.py +12 -2
  4. rem/agentic/mcp/tool_wrapper.py +2 -2
  5. rem/agentic/providers/pydantic_ai.py +1 -1
  6. rem/agentic/schema.py +2 -2
  7. rem/api/main.py +1 -1
  8. rem/api/mcp_router/server.py +4 -0
  9. rem/api/mcp_router/tools.py +542 -170
  10. rem/api/routers/admin.py +30 -4
  11. rem/api/routers/auth.py +106 -10
  12. rem/api/routers/chat/completions.py +66 -18
  13. rem/api/routers/chat/sse_events.py +7 -3
  14. rem/api/routers/chat/streaming.py +254 -22
  15. rem/api/routers/common.py +18 -0
  16. rem/api/routers/dev.py +7 -1
  17. rem/api/routers/feedback.py +9 -1
  18. rem/api/routers/messages.py +176 -38
  19. rem/api/routers/models.py +9 -1
  20. rem/api/routers/query.py +12 -1
  21. rem/api/routers/shared_sessions.py +16 -0
  22. rem/auth/jwt.py +19 -4
  23. rem/auth/middleware.py +42 -28
  24. rem/cli/README.md +62 -0
  25. rem/cli/commands/db.py +33 -19
  26. rem/cli/commands/process.py +171 -43
  27. rem/models/entities/ontology.py +18 -20
  28. rem/schemas/agents/rem.yaml +1 -1
  29. rem/services/content/service.py +18 -5
  30. rem/services/postgres/__init__.py +28 -3
  31. rem/services/postgres/diff_service.py +57 -5
  32. rem/services/postgres/programmable_diff_service.py +635 -0
  33. rem/services/postgres/pydantic_to_sqlalchemy.py +2 -2
  34. rem/services/postgres/register_type.py +11 -10
  35. rem/services/postgres/repository.py +14 -4
  36. rem/services/session/__init__.py +8 -1
  37. rem/services/session/compression.py +40 -2
  38. rem/services/session/pydantic_messages.py +276 -0
  39. rem/settings.py +28 -0
  40. rem/sql/migrations/001_install.sql +125 -7
  41. rem/sql/migrations/002_install_models.sql +136 -126
  42. rem/sql/migrations/004_cache_system.sql +7 -275
  43. rem/sql/migrations/migrate_session_id_to_uuid.sql +45 -0
  44. rem/utils/schema_loader.py +6 -6
  45. {remdb-0.3.181.dist-info → remdb-0.3.223.dist-info}/METADATA +1 -1
  46. {remdb-0.3.181.dist-info → remdb-0.3.223.dist-info}/RECORD +48 -44
  47. {remdb-0.3.181.dist-info → remdb-0.3.223.dist-info}/WHEEL +0 -0
  48. {remdb-0.3.181.dist-info → remdb-0.3.223.dist-info}/entry_points.txt +0 -0
@@ -1,7 +1,7 @@
1
1
  -- REM Model Schema (install_models.sql)
2
2
  -- Generated from Pydantic models
3
3
  -- Source: model registry
4
- -- Generated at: 2025-12-11T08:40:31.986919
4
+ -- Generated at: 2025-12-22T17:34:54.187339
5
5
  --
6
6
  -- DO NOT EDIT MANUALLY - Regenerate with: rem db schema generate
7
7
  --
@@ -36,7 +36,7 @@ END $$;
36
36
 
37
37
  CREATE TABLE IF NOT EXISTS feedbacks (
38
38
  id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
39
- tenant_id VARCHAR(100) NOT NULL,
39
+ tenant_id VARCHAR(100),
40
40
  user_id VARCHAR(256),
41
41
  session_id VARCHAR(256) NOT NULL,
42
42
  message_id VARCHAR(256),
@@ -56,11 +56,11 @@ CREATE TABLE IF NOT EXISTS feedbacks (
56
56
  tags TEXT[] DEFAULT ARRAY[]::TEXT[]
57
57
  );
58
58
 
59
- CREATE INDEX idx_feedbacks_tenant ON feedbacks (tenant_id);
60
- CREATE INDEX idx_feedbacks_user ON feedbacks (user_id);
61
- CREATE INDEX idx_feedbacks_graph_edges ON feedbacks USING GIN (graph_edges);
62
- CREATE INDEX idx_feedbacks_metadata ON feedbacks USING GIN (metadata);
63
- CREATE INDEX idx_feedbacks_tags ON feedbacks USING GIN (tags);
59
+ CREATE INDEX IF NOT EXISTS idx_feedbacks_tenant ON feedbacks (tenant_id);
60
+ CREATE INDEX IF NOT EXISTS idx_feedbacks_user ON feedbacks (user_id);
61
+ CREATE INDEX IF NOT EXISTS idx_feedbacks_graph_edges ON feedbacks USING GIN (graph_edges);
62
+ CREATE INDEX IF NOT EXISTS idx_feedbacks_metadata ON feedbacks USING GIN (metadata);
63
+ CREATE INDEX IF NOT EXISTS idx_feedbacks_tags ON feedbacks USING GIN (tags);
64
64
 
65
65
  -- KV_STORE trigger for feedbacks
66
66
  -- Trigger function to maintain KV_STORE for feedbacks
@@ -74,6 +74,7 @@ BEGIN
74
74
  RETURN OLD;
75
75
  ELSIF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN
76
76
  -- Upsert to KV_STORE (O(1) lookup by entity_key)
77
+ -- tenant_id can be NULL (meaning public/shared data)
77
78
  INSERT INTO kv_store (
78
79
  entity_key,
79
80
  entity_type,
@@ -93,7 +94,7 @@ BEGIN
93
94
  COALESCE(NEW.graph_edges, '[]'::jsonb),
94
95
  CURRENT_TIMESTAMP
95
96
  )
96
- ON CONFLICT (tenant_id, entity_key)
97
+ ON CONFLICT (COALESCE(tenant_id, ''), entity_key)
97
98
  DO UPDATE SET
98
99
  entity_id = EXCLUDED.entity_id,
99
100
  user_id = EXCLUDED.user_id,
@@ -118,7 +119,7 @@ FOR EACH ROW EXECUTE FUNCTION fn_feedbacks_kv_store_upsert();
118
119
 
119
120
  CREATE TABLE IF NOT EXISTS files (
120
121
  id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
121
- tenant_id VARCHAR(100) NOT NULL,
122
+ tenant_id VARCHAR(100),
122
123
  user_id VARCHAR(256),
123
124
  name VARCHAR(256) NOT NULL,
124
125
  uri VARCHAR(256) NOT NULL,
@@ -135,11 +136,11 @@ CREATE TABLE IF NOT EXISTS files (
135
136
  tags TEXT[] DEFAULT ARRAY[]::TEXT[]
136
137
  );
137
138
 
138
- CREATE INDEX idx_files_tenant ON files (tenant_id);
139
- CREATE INDEX idx_files_user ON files (user_id);
140
- CREATE INDEX idx_files_graph_edges ON files USING GIN (graph_edges);
141
- CREATE INDEX idx_files_metadata ON files USING GIN (metadata);
142
- CREATE INDEX idx_files_tags ON files USING GIN (tags);
139
+ CREATE INDEX IF NOT EXISTS idx_files_tenant ON files (tenant_id);
140
+ CREATE INDEX IF NOT EXISTS idx_files_user ON files (user_id);
141
+ CREATE INDEX IF NOT EXISTS idx_files_graph_edges ON files USING GIN (graph_edges);
142
+ CREATE INDEX IF NOT EXISTS idx_files_metadata ON files USING GIN (metadata);
143
+ CREATE INDEX IF NOT EXISTS idx_files_tags ON files USING GIN (tags);
143
144
 
144
145
  -- Embeddings for files
145
146
  CREATE TABLE IF NOT EXISTS embeddings_files (
@@ -157,10 +158,10 @@ CREATE TABLE IF NOT EXISTS embeddings_files (
157
158
  );
158
159
 
159
160
  -- Index for entity lookup (get all embeddings for entity)
160
- CREATE INDEX idx_embeddings_files_entity ON embeddings_files (entity_id);
161
+ CREATE INDEX IF NOT EXISTS idx_embeddings_files_entity ON embeddings_files (entity_id);
161
162
 
162
163
  -- Index for field + provider lookup
163
- CREATE INDEX idx_embeddings_files_field_provider ON embeddings_files (field_name, provider);
164
+ CREATE INDEX IF NOT EXISTS idx_embeddings_files_field_provider ON embeddings_files (field_name, provider);
164
165
 
165
166
  -- HNSW index for vector similarity search (created in background)
166
167
  -- Note: This will be created by background thread after data load
@@ -179,6 +180,7 @@ BEGIN
179
180
  RETURN OLD;
180
181
  ELSIF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN
181
182
  -- Upsert to KV_STORE (O(1) lookup by entity_key)
183
+ -- tenant_id can be NULL (meaning public/shared data)
182
184
  INSERT INTO kv_store (
183
185
  entity_key,
184
186
  entity_type,
@@ -198,7 +200,7 @@ BEGIN
198
200
  COALESCE(NEW.graph_edges, '[]'::jsonb),
199
201
  CURRENT_TIMESTAMP
200
202
  )
201
- ON CONFLICT (tenant_id, entity_key)
203
+ ON CONFLICT (COALESCE(tenant_id, ''), entity_key)
202
204
  DO UPDATE SET
203
205
  entity_id = EXCLUDED.entity_id,
204
206
  user_id = EXCLUDED.user_id,
@@ -223,7 +225,7 @@ FOR EACH ROW EXECUTE FUNCTION fn_files_kv_store_upsert();
223
225
 
224
226
  CREATE TABLE IF NOT EXISTS image_resources (
225
227
  id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
226
- tenant_id VARCHAR(100) NOT NULL,
228
+ tenant_id VARCHAR(100),
227
229
  user_id VARCHAR(256),
228
230
  name VARCHAR(256),
229
231
  uri VARCHAR(256),
@@ -248,11 +250,11 @@ CREATE TABLE IF NOT EXISTS image_resources (
248
250
  tags TEXT[] DEFAULT ARRAY[]::TEXT[]
249
251
  );
250
252
 
251
- CREATE INDEX idx_image_resources_tenant ON image_resources (tenant_id);
252
- CREATE INDEX idx_image_resources_user ON image_resources (user_id);
253
- CREATE INDEX idx_image_resources_graph_edges ON image_resources USING GIN (graph_edges);
254
- CREATE INDEX idx_image_resources_metadata ON image_resources USING GIN (metadata);
255
- CREATE INDEX idx_image_resources_tags ON image_resources USING GIN (tags);
253
+ CREATE INDEX IF NOT EXISTS idx_image_resources_tenant ON image_resources (tenant_id);
254
+ CREATE INDEX IF NOT EXISTS idx_image_resources_user ON image_resources (user_id);
255
+ CREATE INDEX IF NOT EXISTS idx_image_resources_graph_edges ON image_resources USING GIN (graph_edges);
256
+ CREATE INDEX IF NOT EXISTS idx_image_resources_metadata ON image_resources USING GIN (metadata);
257
+ CREATE INDEX IF NOT EXISTS idx_image_resources_tags ON image_resources USING GIN (tags);
256
258
 
257
259
  -- Embeddings for image_resources
258
260
  CREATE TABLE IF NOT EXISTS embeddings_image_resources (
@@ -270,10 +272,10 @@ CREATE TABLE IF NOT EXISTS embeddings_image_resources (
270
272
  );
271
273
 
272
274
  -- Index for entity lookup (get all embeddings for entity)
273
- CREATE INDEX idx_embeddings_image_resources_entity ON embeddings_image_resources (entity_id);
275
+ CREATE INDEX IF NOT EXISTS idx_embeddings_image_resources_entity ON embeddings_image_resources (entity_id);
274
276
 
275
277
  -- Index for field + provider lookup
276
- CREATE INDEX idx_embeddings_image_resources_field_provider ON embeddings_image_resources (field_name, provider);
278
+ CREATE INDEX IF NOT EXISTS idx_embeddings_image_resources_field_provider ON embeddings_image_resources (field_name, provider);
277
279
 
278
280
  -- HNSW index for vector similarity search (created in background)
279
281
  -- Note: This will be created by background thread after data load
@@ -292,6 +294,7 @@ BEGIN
292
294
  RETURN OLD;
293
295
  ELSIF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN
294
296
  -- Upsert to KV_STORE (O(1) lookup by entity_key)
297
+ -- tenant_id can be NULL (meaning public/shared data)
295
298
  INSERT INTO kv_store (
296
299
  entity_key,
297
300
  entity_type,
@@ -311,7 +314,7 @@ BEGIN
311
314
  COALESCE(NEW.graph_edges, '[]'::jsonb),
312
315
  CURRENT_TIMESTAMP
313
316
  )
314
- ON CONFLICT (tenant_id, entity_key)
317
+ ON CONFLICT (COALESCE(tenant_id, ''), entity_key)
315
318
  DO UPDATE SET
316
319
  entity_id = EXCLUDED.entity_id,
317
320
  user_id = EXCLUDED.user_id,
@@ -336,7 +339,7 @@ FOR EACH ROW EXECUTE FUNCTION fn_image_resources_kv_store_upsert();
336
339
 
337
340
  CREATE TABLE IF NOT EXISTS messages (
338
341
  id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
339
- tenant_id VARCHAR(100) NOT NULL,
342
+ tenant_id VARCHAR(100),
340
343
  user_id VARCHAR(256),
341
344
  content TEXT NOT NULL,
342
345
  message_type VARCHAR(256),
@@ -354,11 +357,11 @@ CREATE TABLE IF NOT EXISTS messages (
354
357
  tags TEXT[] DEFAULT ARRAY[]::TEXT[]
355
358
  );
356
359
 
357
- CREATE INDEX idx_messages_tenant ON messages (tenant_id);
358
- CREATE INDEX idx_messages_user ON messages (user_id);
359
- CREATE INDEX idx_messages_graph_edges ON messages USING GIN (graph_edges);
360
- CREATE INDEX idx_messages_metadata ON messages USING GIN (metadata);
361
- CREATE INDEX idx_messages_tags ON messages USING GIN (tags);
360
+ CREATE INDEX IF NOT EXISTS idx_messages_tenant ON messages (tenant_id);
361
+ CREATE INDEX IF NOT EXISTS idx_messages_user ON messages (user_id);
362
+ CREATE INDEX IF NOT EXISTS idx_messages_graph_edges ON messages USING GIN (graph_edges);
363
+ CREATE INDEX IF NOT EXISTS idx_messages_metadata ON messages USING GIN (metadata);
364
+ CREATE INDEX IF NOT EXISTS idx_messages_tags ON messages USING GIN (tags);
362
365
 
363
366
  -- Embeddings for messages
364
367
  CREATE TABLE IF NOT EXISTS embeddings_messages (
@@ -376,10 +379,10 @@ CREATE TABLE IF NOT EXISTS embeddings_messages (
376
379
  );
377
380
 
378
381
  -- Index for entity lookup (get all embeddings for entity)
379
- CREATE INDEX idx_embeddings_messages_entity ON embeddings_messages (entity_id);
382
+ CREATE INDEX IF NOT EXISTS idx_embeddings_messages_entity ON embeddings_messages (entity_id);
380
383
 
381
384
  -- Index for field + provider lookup
382
- CREATE INDEX idx_embeddings_messages_field_provider ON embeddings_messages (field_name, provider);
385
+ CREATE INDEX IF NOT EXISTS idx_embeddings_messages_field_provider ON embeddings_messages (field_name, provider);
383
386
 
384
387
  -- HNSW index for vector similarity search (created in background)
385
388
  -- Note: This will be created by background thread after data load
@@ -398,6 +401,7 @@ BEGIN
398
401
  RETURN OLD;
399
402
  ELSIF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN
400
403
  -- Upsert to KV_STORE (O(1) lookup by entity_key)
404
+ -- tenant_id can be NULL (meaning public/shared data)
401
405
  INSERT INTO kv_store (
402
406
  entity_key,
403
407
  entity_type,
@@ -417,7 +421,7 @@ BEGIN
417
421
  COALESCE(NEW.graph_edges, '[]'::jsonb),
418
422
  CURRENT_TIMESTAMP
419
423
  )
420
- ON CONFLICT (tenant_id, entity_key)
424
+ ON CONFLICT (COALESCE(tenant_id, ''), entity_key)
421
425
  DO UPDATE SET
422
426
  entity_id = EXCLUDED.entity_id,
423
427
  user_id = EXCLUDED.user_id,
@@ -442,7 +446,7 @@ FOR EACH ROW EXECUTE FUNCTION fn_messages_kv_store_upsert();
442
446
 
443
447
  CREATE TABLE IF NOT EXISTS moments (
444
448
  id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
445
- tenant_id VARCHAR(100) NOT NULL,
449
+ tenant_id VARCHAR(100),
446
450
  user_id VARCHAR(256),
447
451
  name VARCHAR(256),
448
452
  moment_type VARCHAR(256),
@@ -462,11 +466,11 @@ CREATE TABLE IF NOT EXISTS moments (
462
466
  tags TEXT[] DEFAULT ARRAY[]::TEXT[]
463
467
  );
464
468
 
465
- CREATE INDEX idx_moments_tenant ON moments (tenant_id);
466
- CREATE INDEX idx_moments_user ON moments (user_id);
467
- CREATE INDEX idx_moments_graph_edges ON moments USING GIN (graph_edges);
468
- CREATE INDEX idx_moments_metadata ON moments USING GIN (metadata);
469
- CREATE INDEX idx_moments_tags ON moments USING GIN (tags);
469
+ CREATE INDEX IF NOT EXISTS idx_moments_tenant ON moments (tenant_id);
470
+ CREATE INDEX IF NOT EXISTS idx_moments_user ON moments (user_id);
471
+ CREATE INDEX IF NOT EXISTS idx_moments_graph_edges ON moments USING GIN (graph_edges);
472
+ CREATE INDEX IF NOT EXISTS idx_moments_metadata ON moments USING GIN (metadata);
473
+ CREATE INDEX IF NOT EXISTS idx_moments_tags ON moments USING GIN (tags);
470
474
 
471
475
  -- Embeddings for moments
472
476
  CREATE TABLE IF NOT EXISTS embeddings_moments (
@@ -484,10 +488,10 @@ CREATE TABLE IF NOT EXISTS embeddings_moments (
484
488
  );
485
489
 
486
490
  -- Index for entity lookup (get all embeddings for entity)
487
- CREATE INDEX idx_embeddings_moments_entity ON embeddings_moments (entity_id);
491
+ CREATE INDEX IF NOT EXISTS idx_embeddings_moments_entity ON embeddings_moments (entity_id);
488
492
 
489
493
  -- Index for field + provider lookup
490
- CREATE INDEX idx_embeddings_moments_field_provider ON embeddings_moments (field_name, provider);
494
+ CREATE INDEX IF NOT EXISTS idx_embeddings_moments_field_provider ON embeddings_moments (field_name, provider);
491
495
 
492
496
  -- HNSW index for vector similarity search (created in background)
493
497
  -- Note: This will be created by background thread after data load
@@ -506,6 +510,7 @@ BEGIN
506
510
  RETURN OLD;
507
511
  ELSIF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN
508
512
  -- Upsert to KV_STORE (O(1) lookup by entity_key)
513
+ -- tenant_id can be NULL (meaning public/shared data)
509
514
  INSERT INTO kv_store (
510
515
  entity_key,
511
516
  entity_type,
@@ -525,7 +530,7 @@ BEGIN
525
530
  COALESCE(NEW.graph_edges, '[]'::jsonb),
526
531
  CURRENT_TIMESTAMP
527
532
  )
528
- ON CONFLICT (tenant_id, entity_key)
533
+ ON CONFLICT (COALESCE(tenant_id, ''), entity_key)
529
534
  DO UPDATE SET
530
535
  entity_id = EXCLUDED.entity_id,
531
536
  user_id = EXCLUDED.user_id,
@@ -550,7 +555,7 @@ FOR EACH ROW EXECUTE FUNCTION fn_moments_kv_store_upsert();
550
555
 
551
556
  CREATE TABLE IF NOT EXISTS ontologies (
552
557
  id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
553
- tenant_id VARCHAR(100) NOT NULL,
558
+ tenant_id VARCHAR(100),
554
559
  user_id VARCHAR(256),
555
560
  name VARCHAR(256) NOT NULL,
556
561
  uri VARCHAR(256),
@@ -570,11 +575,11 @@ CREATE TABLE IF NOT EXISTS ontologies (
570
575
  tags TEXT[] DEFAULT ARRAY[]::TEXT[]
571
576
  );
572
577
 
573
- CREATE INDEX idx_ontologies_tenant ON ontologies (tenant_id);
574
- CREATE INDEX idx_ontologies_user ON ontologies (user_id);
575
- CREATE INDEX idx_ontologies_graph_edges ON ontologies USING GIN (graph_edges);
576
- CREATE INDEX idx_ontologies_metadata ON ontologies USING GIN (metadata);
577
- CREATE INDEX idx_ontologies_tags ON ontologies USING GIN (tags);
578
+ CREATE INDEX IF NOT EXISTS idx_ontologies_tenant ON ontologies (tenant_id);
579
+ CREATE INDEX IF NOT EXISTS idx_ontologies_user ON ontologies (user_id);
580
+ CREATE INDEX IF NOT EXISTS idx_ontologies_graph_edges ON ontologies USING GIN (graph_edges);
581
+ CREATE INDEX IF NOT EXISTS idx_ontologies_metadata ON ontologies USING GIN (metadata);
582
+ CREATE INDEX IF NOT EXISTS idx_ontologies_tags ON ontologies USING GIN (tags);
578
583
 
579
584
  -- Embeddings for ontologies
580
585
  CREATE TABLE IF NOT EXISTS embeddings_ontologies (
@@ -592,10 +597,10 @@ CREATE TABLE IF NOT EXISTS embeddings_ontologies (
592
597
  );
593
598
 
594
599
  -- Index for entity lookup (get all embeddings for entity)
595
- CREATE INDEX idx_embeddings_ontologies_entity ON embeddings_ontologies (entity_id);
600
+ CREATE INDEX IF NOT EXISTS idx_embeddings_ontologies_entity ON embeddings_ontologies (entity_id);
596
601
 
597
602
  -- Index for field + provider lookup
598
- CREATE INDEX idx_embeddings_ontologies_field_provider ON embeddings_ontologies (field_name, provider);
603
+ CREATE INDEX IF NOT EXISTS idx_embeddings_ontologies_field_provider ON embeddings_ontologies (field_name, provider);
599
604
 
600
605
  -- HNSW index for vector similarity search (created in background)
601
606
  -- Note: This will be created by background thread after data load
@@ -614,6 +619,7 @@ BEGIN
614
619
  RETURN OLD;
615
620
  ELSIF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN
616
621
  -- Upsert to KV_STORE (O(1) lookup by entity_key)
622
+ -- tenant_id can be NULL (meaning public/shared data)
617
623
  INSERT INTO kv_store (
618
624
  entity_key,
619
625
  entity_type,
@@ -633,7 +639,7 @@ BEGIN
633
639
  COALESCE(NEW.graph_edges, '[]'::jsonb),
634
640
  CURRENT_TIMESTAMP
635
641
  )
636
- ON CONFLICT (tenant_id, entity_key)
642
+ ON CONFLICT (COALESCE(tenant_id, ''), entity_key)
637
643
  DO UPDATE SET
638
644
  entity_id = EXCLUDED.entity_id,
639
645
  user_id = EXCLUDED.user_id,
@@ -658,7 +664,7 @@ FOR EACH ROW EXECUTE FUNCTION fn_ontologies_kv_store_upsert();
658
664
 
659
665
  CREATE TABLE IF NOT EXISTS ontology_configs (
660
666
  id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
661
- tenant_id VARCHAR(100) NOT NULL,
667
+ tenant_id VARCHAR(100),
662
668
  user_id VARCHAR(256),
663
669
  name VARCHAR(256) NOT NULL,
664
670
  agent_schema_id VARCHAR(256) NOT NULL,
@@ -678,11 +684,11 @@ CREATE TABLE IF NOT EXISTS ontology_configs (
678
684
  tags TEXT[] DEFAULT ARRAY[]::TEXT[]
679
685
  );
680
686
 
681
- CREATE INDEX idx_ontology_configs_tenant ON ontology_configs (tenant_id);
682
- CREATE INDEX idx_ontology_configs_user ON ontology_configs (user_id);
683
- CREATE INDEX idx_ontology_configs_graph_edges ON ontology_configs USING GIN (graph_edges);
684
- CREATE INDEX idx_ontology_configs_metadata ON ontology_configs USING GIN (metadata);
685
- CREATE INDEX idx_ontology_configs_tags ON ontology_configs USING GIN (tags);
687
+ CREATE INDEX IF NOT EXISTS idx_ontology_configs_tenant ON ontology_configs (tenant_id);
688
+ CREATE INDEX IF NOT EXISTS idx_ontology_configs_user ON ontology_configs (user_id);
689
+ CREATE INDEX IF NOT EXISTS idx_ontology_configs_graph_edges ON ontology_configs USING GIN (graph_edges);
690
+ CREATE INDEX IF NOT EXISTS idx_ontology_configs_metadata ON ontology_configs USING GIN (metadata);
691
+ CREATE INDEX IF NOT EXISTS idx_ontology_configs_tags ON ontology_configs USING GIN (tags);
686
692
 
687
693
  -- Embeddings for ontology_configs
688
694
  CREATE TABLE IF NOT EXISTS embeddings_ontology_configs (
@@ -700,10 +706,10 @@ CREATE TABLE IF NOT EXISTS embeddings_ontology_configs (
700
706
  );
701
707
 
702
708
  -- Index for entity lookup (get all embeddings for entity)
703
- CREATE INDEX idx_embeddings_ontology_configs_entity ON embeddings_ontology_configs (entity_id);
709
+ CREATE INDEX IF NOT EXISTS idx_embeddings_ontology_configs_entity ON embeddings_ontology_configs (entity_id);
704
710
 
705
711
  -- Index for field + provider lookup
706
- CREATE INDEX idx_embeddings_ontology_configs_field_provider ON embeddings_ontology_configs (field_name, provider);
712
+ CREATE INDEX IF NOT EXISTS idx_embeddings_ontology_configs_field_provider ON embeddings_ontology_configs (field_name, provider);
707
713
 
708
714
  -- HNSW index for vector similarity search (created in background)
709
715
  -- Note: This will be created by background thread after data load
@@ -722,6 +728,7 @@ BEGIN
722
728
  RETURN OLD;
723
729
  ELSIF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN
724
730
  -- Upsert to KV_STORE (O(1) lookup by entity_key)
731
+ -- tenant_id can be NULL (meaning public/shared data)
725
732
  INSERT INTO kv_store (
726
733
  entity_key,
727
734
  entity_type,
@@ -741,7 +748,7 @@ BEGIN
741
748
  COALESCE(NEW.graph_edges, '[]'::jsonb),
742
749
  CURRENT_TIMESTAMP
743
750
  )
744
- ON CONFLICT (tenant_id, entity_key)
751
+ ON CONFLICT (COALESCE(tenant_id, ''), entity_key)
745
752
  DO UPDATE SET
746
753
  entity_id = EXCLUDED.entity_id,
747
754
  user_id = EXCLUDED.user_id,
@@ -766,7 +773,7 @@ FOR EACH ROW EXECUTE FUNCTION fn_ontology_configs_kv_store_upsert();
766
773
 
767
774
  CREATE TABLE IF NOT EXISTS resources (
768
775
  id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
769
- tenant_id VARCHAR(100) NOT NULL,
776
+ tenant_id VARCHAR(100),
770
777
  user_id VARCHAR(256),
771
778
  name VARCHAR(256),
772
779
  uri VARCHAR(256),
@@ -783,11 +790,11 @@ CREATE TABLE IF NOT EXISTS resources (
783
790
  tags TEXT[] DEFAULT ARRAY[]::TEXT[]
784
791
  );
785
792
 
786
- CREATE INDEX idx_resources_tenant ON resources (tenant_id);
787
- CREATE INDEX idx_resources_user ON resources (user_id);
788
- CREATE INDEX idx_resources_graph_edges ON resources USING GIN (graph_edges);
789
- CREATE INDEX idx_resources_metadata ON resources USING GIN (metadata);
790
- CREATE INDEX idx_resources_tags ON resources USING GIN (tags);
793
+ CREATE INDEX IF NOT EXISTS idx_resources_tenant ON resources (tenant_id);
794
+ CREATE INDEX IF NOT EXISTS idx_resources_user ON resources (user_id);
795
+ CREATE INDEX IF NOT EXISTS idx_resources_graph_edges ON resources USING GIN (graph_edges);
796
+ CREATE INDEX IF NOT EXISTS idx_resources_metadata ON resources USING GIN (metadata);
797
+ CREATE INDEX IF NOT EXISTS idx_resources_tags ON resources USING GIN (tags);
791
798
 
792
799
  -- Embeddings for resources
793
800
  CREATE TABLE IF NOT EXISTS embeddings_resources (
@@ -805,10 +812,10 @@ CREATE TABLE IF NOT EXISTS embeddings_resources (
805
812
  );
806
813
 
807
814
  -- Index for entity lookup (get all embeddings for entity)
808
- CREATE INDEX idx_embeddings_resources_entity ON embeddings_resources (entity_id);
815
+ CREATE INDEX IF NOT EXISTS idx_embeddings_resources_entity ON embeddings_resources (entity_id);
809
816
 
810
817
  -- Index for field + provider lookup
811
- CREATE INDEX idx_embeddings_resources_field_provider ON embeddings_resources (field_name, provider);
818
+ CREATE INDEX IF NOT EXISTS idx_embeddings_resources_field_provider ON embeddings_resources (field_name, provider);
812
819
 
813
820
  -- HNSW index for vector similarity search (created in background)
814
821
  -- Note: This will be created by background thread after data load
@@ -827,6 +834,7 @@ BEGIN
827
834
  RETURN OLD;
828
835
  ELSIF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN
829
836
  -- Upsert to KV_STORE (O(1) lookup by entity_key)
837
+ -- tenant_id can be NULL (meaning public/shared data)
830
838
  INSERT INTO kv_store (
831
839
  entity_key,
832
840
  entity_type,
@@ -846,7 +854,7 @@ BEGIN
846
854
  COALESCE(NEW.graph_edges, '[]'::jsonb),
847
855
  CURRENT_TIMESTAMP
848
856
  )
849
- ON CONFLICT (tenant_id, entity_key)
857
+ ON CONFLICT (COALESCE(tenant_id, ''), entity_key)
850
858
  DO UPDATE SET
851
859
  entity_id = EXCLUDED.entity_id,
852
860
  user_id = EXCLUDED.user_id,
@@ -871,7 +879,7 @@ FOR EACH ROW EXECUTE FUNCTION fn_resources_kv_store_upsert();
871
879
 
872
880
  CREATE TABLE IF NOT EXISTS schemas (
873
881
  id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
874
- tenant_id VARCHAR(100) NOT NULL,
882
+ tenant_id VARCHAR(100),
875
883
  user_id VARCHAR(256),
876
884
  name VARCHAR(256) NOT NULL,
877
885
  content TEXT,
@@ -887,11 +895,11 @@ CREATE TABLE IF NOT EXISTS schemas (
887
895
  tags TEXT[] DEFAULT ARRAY[]::TEXT[]
888
896
  );
889
897
 
890
- CREATE INDEX idx_schemas_tenant ON schemas (tenant_id);
891
- CREATE INDEX idx_schemas_user ON schemas (user_id);
892
- CREATE INDEX idx_schemas_graph_edges ON schemas USING GIN (graph_edges);
893
- CREATE INDEX idx_schemas_metadata ON schemas USING GIN (metadata);
894
- CREATE INDEX idx_schemas_tags ON schemas USING GIN (tags);
898
+ CREATE INDEX IF NOT EXISTS idx_schemas_tenant ON schemas (tenant_id);
899
+ CREATE INDEX IF NOT EXISTS idx_schemas_user ON schemas (user_id);
900
+ CREATE INDEX IF NOT EXISTS idx_schemas_graph_edges ON schemas USING GIN (graph_edges);
901
+ CREATE INDEX IF NOT EXISTS idx_schemas_metadata ON schemas USING GIN (metadata);
902
+ CREATE INDEX IF NOT EXISTS idx_schemas_tags ON schemas USING GIN (tags);
895
903
 
896
904
  -- Embeddings for schemas
897
905
  CREATE TABLE IF NOT EXISTS embeddings_schemas (
@@ -909,10 +917,10 @@ CREATE TABLE IF NOT EXISTS embeddings_schemas (
909
917
  );
910
918
 
911
919
  -- Index for entity lookup (get all embeddings for entity)
912
- CREATE INDEX idx_embeddings_schemas_entity ON embeddings_schemas (entity_id);
920
+ CREATE INDEX IF NOT EXISTS idx_embeddings_schemas_entity ON embeddings_schemas (entity_id);
913
921
 
914
922
  -- Index for field + provider lookup
915
- CREATE INDEX idx_embeddings_schemas_field_provider ON embeddings_schemas (field_name, provider);
923
+ CREATE INDEX IF NOT EXISTS idx_embeddings_schemas_field_provider ON embeddings_schemas (field_name, provider);
916
924
 
917
925
  -- HNSW index for vector similarity search (created in background)
918
926
  -- Note: This will be created by background thread after data load
@@ -931,6 +939,7 @@ BEGIN
931
939
  RETURN OLD;
932
940
  ELSIF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN
933
941
  -- Upsert to KV_STORE (O(1) lookup by entity_key)
942
+ -- tenant_id can be NULL (meaning public/shared data)
934
943
  INSERT INTO kv_store (
935
944
  entity_key,
936
945
  entity_type,
@@ -950,7 +959,7 @@ BEGIN
950
959
  COALESCE(NEW.graph_edges, '[]'::jsonb),
951
960
  CURRENT_TIMESTAMP
952
961
  )
953
- ON CONFLICT (tenant_id, entity_key)
962
+ ON CONFLICT (COALESCE(tenant_id, ''), entity_key)
954
963
  DO UPDATE SET
955
964
  entity_id = EXCLUDED.entity_id,
956
965
  user_id = EXCLUDED.user_id,
@@ -975,7 +984,7 @@ FOR EACH ROW EXECUTE FUNCTION fn_schemas_kv_store_upsert();
975
984
 
976
985
  CREATE TABLE IF NOT EXISTS sessions (
977
986
  id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
978
- tenant_id VARCHAR(100) NOT NULL,
987
+ tenant_id VARCHAR(100),
979
988
  user_id VARCHAR(256),
980
989
  name VARCHAR(256) NOT NULL,
981
990
  mode TEXT,
@@ -994,11 +1003,11 @@ CREATE TABLE IF NOT EXISTS sessions (
994
1003
  tags TEXT[] DEFAULT ARRAY[]::TEXT[]
995
1004
  );
996
1005
 
997
- CREATE INDEX idx_sessions_tenant ON sessions (tenant_id);
998
- CREATE INDEX idx_sessions_user ON sessions (user_id);
999
- CREATE INDEX idx_sessions_graph_edges ON sessions USING GIN (graph_edges);
1000
- CREATE INDEX idx_sessions_metadata ON sessions USING GIN (metadata);
1001
- CREATE INDEX idx_sessions_tags ON sessions USING GIN (tags);
1006
+ CREATE INDEX IF NOT EXISTS idx_sessions_tenant ON sessions (tenant_id);
1007
+ CREATE INDEX IF NOT EXISTS idx_sessions_user ON sessions (user_id);
1008
+ CREATE INDEX IF NOT EXISTS idx_sessions_graph_edges ON sessions USING GIN (graph_edges);
1009
+ CREATE INDEX IF NOT EXISTS idx_sessions_metadata ON sessions USING GIN (metadata);
1010
+ CREATE INDEX IF NOT EXISTS idx_sessions_tags ON sessions USING GIN (tags);
1002
1011
 
1003
1012
  -- Embeddings for sessions
1004
1013
  CREATE TABLE IF NOT EXISTS embeddings_sessions (
@@ -1016,10 +1025,10 @@ CREATE TABLE IF NOT EXISTS embeddings_sessions (
1016
1025
  );
1017
1026
 
1018
1027
  -- Index for entity lookup (get all embeddings for entity)
1019
- CREATE INDEX idx_embeddings_sessions_entity ON embeddings_sessions (entity_id);
1028
+ CREATE INDEX IF NOT EXISTS idx_embeddings_sessions_entity ON embeddings_sessions (entity_id);
1020
1029
 
1021
1030
  -- Index for field + provider lookup
1022
- CREATE INDEX idx_embeddings_sessions_field_provider ON embeddings_sessions (field_name, provider);
1031
+ CREATE INDEX IF NOT EXISTS idx_embeddings_sessions_field_provider ON embeddings_sessions (field_name, provider);
1023
1032
 
1024
1033
  -- HNSW index for vector similarity search (created in background)
1025
1034
  -- Note: This will be created by background thread after data load
@@ -1038,6 +1047,7 @@ BEGIN
1038
1047
  RETURN OLD;
1039
1048
  ELSIF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN
1040
1049
  -- Upsert to KV_STORE (O(1) lookup by entity_key)
1050
+ -- tenant_id can be NULL (meaning public/shared data)
1041
1051
  INSERT INTO kv_store (
1042
1052
  entity_key,
1043
1053
  entity_type,
@@ -1057,7 +1067,7 @@ BEGIN
1057
1067
  COALESCE(NEW.graph_edges, '[]'::jsonb),
1058
1068
  CURRENT_TIMESTAMP
1059
1069
  )
1060
- ON CONFLICT (tenant_id, entity_key)
1070
+ ON CONFLICT (COALESCE(tenant_id, ''), entity_key)
1061
1071
  DO UPDATE SET
1062
1072
  entity_id = EXCLUDED.entity_id,
1063
1073
  user_id = EXCLUDED.user_id,
@@ -1082,7 +1092,7 @@ FOR EACH ROW EXECUTE FUNCTION fn_sessions_kv_store_upsert();
1082
1092
 
1083
1093
  CREATE TABLE IF NOT EXISTS shared_sessions (
1084
1094
  id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
1085
- tenant_id VARCHAR(100) NOT NULL,
1095
+ tenant_id VARCHAR(100),
1086
1096
  user_id VARCHAR(256),
1087
1097
  session_id VARCHAR(256) NOT NULL,
1088
1098
  owner_user_id VARCHAR(256) NOT NULL,
@@ -1095,11 +1105,11 @@ CREATE TABLE IF NOT EXISTS shared_sessions (
1095
1105
  tags TEXT[] DEFAULT ARRAY[]::TEXT[]
1096
1106
  );
1097
1107
 
1098
- CREATE INDEX idx_shared_sessions_tenant ON shared_sessions (tenant_id);
1099
- CREATE INDEX idx_shared_sessions_user ON shared_sessions (user_id);
1100
- CREATE INDEX idx_shared_sessions_graph_edges ON shared_sessions USING GIN (graph_edges);
1101
- CREATE INDEX idx_shared_sessions_metadata ON shared_sessions USING GIN (metadata);
1102
- CREATE INDEX idx_shared_sessions_tags ON shared_sessions USING GIN (tags);
1108
+ CREATE INDEX IF NOT EXISTS idx_shared_sessions_tenant ON shared_sessions (tenant_id);
1109
+ CREATE INDEX IF NOT EXISTS idx_shared_sessions_user ON shared_sessions (user_id);
1110
+ CREATE INDEX IF NOT EXISTS idx_shared_sessions_graph_edges ON shared_sessions USING GIN (graph_edges);
1111
+ CREATE INDEX IF NOT EXISTS idx_shared_sessions_metadata ON shared_sessions USING GIN (metadata);
1112
+ CREATE INDEX IF NOT EXISTS idx_shared_sessions_tags ON shared_sessions USING GIN (tags);
1103
1113
 
1104
1114
  -- KV_STORE trigger for shared_sessions
1105
1115
  -- Trigger function to maintain KV_STORE for shared_sessions
@@ -1113,6 +1123,7 @@ BEGIN
1113
1123
  RETURN OLD;
1114
1124
  ELSIF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN
1115
1125
  -- Upsert to KV_STORE (O(1) lookup by entity_key)
1126
+ -- tenant_id can be NULL (meaning public/shared data)
1116
1127
  INSERT INTO kv_store (
1117
1128
  entity_key,
1118
1129
  entity_type,
@@ -1132,7 +1143,7 @@ BEGIN
1132
1143
  COALESCE(NEW.graph_edges, '[]'::jsonb),
1133
1144
  CURRENT_TIMESTAMP
1134
1145
  )
1135
- ON CONFLICT (tenant_id, entity_key)
1146
+ ON CONFLICT (COALESCE(tenant_id, ''), entity_key)
1136
1147
  DO UPDATE SET
1137
1148
  entity_id = EXCLUDED.entity_id,
1138
1149
  user_id = EXCLUDED.user_id,
@@ -1157,7 +1168,7 @@ FOR EACH ROW EXECUTE FUNCTION fn_shared_sessions_kv_store_upsert();
1157
1168
 
1158
1169
  CREATE TABLE IF NOT EXISTS users (
1159
1170
  id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
1160
- tenant_id VARCHAR(100) NOT NULL,
1171
+ tenant_id VARCHAR(100),
1161
1172
  user_id VARCHAR(256),
1162
1173
  name VARCHAR(256) NOT NULL,
1163
1174
  email VARCHAR(256),
@@ -1178,11 +1189,11 @@ CREATE TABLE IF NOT EXISTS users (
1178
1189
  tags TEXT[] DEFAULT ARRAY[]::TEXT[]
1179
1190
  );
1180
1191
 
1181
- CREATE INDEX idx_users_tenant ON users (tenant_id);
1182
- CREATE INDEX idx_users_user ON users (user_id);
1183
- CREATE INDEX idx_users_graph_edges ON users USING GIN (graph_edges);
1184
- CREATE INDEX idx_users_metadata ON users USING GIN (metadata);
1185
- CREATE INDEX idx_users_tags ON users USING GIN (tags);
1192
+ CREATE INDEX IF NOT EXISTS idx_users_tenant ON users (tenant_id);
1193
+ CREATE INDEX IF NOT EXISTS idx_users_user ON users (user_id);
1194
+ CREATE INDEX IF NOT EXISTS idx_users_graph_edges ON users USING GIN (graph_edges);
1195
+ CREATE INDEX IF NOT EXISTS idx_users_metadata ON users USING GIN (metadata);
1196
+ CREATE INDEX IF NOT EXISTS idx_users_tags ON users USING GIN (tags);
1186
1197
 
1187
1198
  -- Embeddings for users
1188
1199
  CREATE TABLE IF NOT EXISTS embeddings_users (
@@ -1200,10 +1211,10 @@ CREATE TABLE IF NOT EXISTS embeddings_users (
1200
1211
  );
1201
1212
 
1202
1213
  -- Index for entity lookup (get all embeddings for entity)
1203
- CREATE INDEX idx_embeddings_users_entity ON embeddings_users (entity_id);
1214
+ CREATE INDEX IF NOT EXISTS idx_embeddings_users_entity ON embeddings_users (entity_id);
1204
1215
 
1205
1216
  -- Index for field + provider lookup
1206
- CREATE INDEX idx_embeddings_users_field_provider ON embeddings_users (field_name, provider);
1217
+ CREATE INDEX IF NOT EXISTS idx_embeddings_users_field_provider ON embeddings_users (field_name, provider);
1207
1218
 
1208
1219
  -- HNSW index for vector similarity search (created in background)
1209
1220
  -- Note: This will be created by background thread after data load
@@ -1222,6 +1233,7 @@ BEGIN
1222
1233
  RETURN OLD;
1223
1234
  ELSIF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN
1224
1235
  -- Upsert to KV_STORE (O(1) lookup by entity_key)
1236
+ -- tenant_id can be NULL (meaning public/shared data)
1225
1237
  INSERT INTO kv_store (
1226
1238
  entity_key,
1227
1239
  entity_type,
@@ -1241,7 +1253,7 @@ BEGIN
1241
1253
  COALESCE(NEW.graph_edges, '[]'::jsonb),
1242
1254
  CURRENT_TIMESTAMP
1243
1255
  )
1244
- ON CONFLICT (tenant_id, entity_key)
1256
+ ON CONFLICT (COALESCE(tenant_id, ''), entity_key)
1245
1257
  DO UPDATE SET
1246
1258
  entity_id = EXCLUDED.entity_id,
1247
1259
  user_id = EXCLUDED.user_id,
@@ -2076,32 +2088,30 @@ Domain-specific knowledge - either agent-extracted or direct-loaded.
2076
2088
  tags=["cv", "engineering"]
2077
2089
  )
2078
2090
 
2079
- # Direct-loaded: Medical knowledge base from git
2080
- disorder_ontology = Ontology(
2081
- name="panic-disorder",
2082
- uri="git://bwolfson-siggie/Siggy-MVP/ontology/disorders/anxiety/panic-disorder.md",
2083
- content="# Panic Disorder\n\nPanic disorder is characterized by...",
2091
+ # Direct-loaded: Knowledge base from git
2092
+ api_docs = Ontology(
2093
+ name="rest-api-guide",
2094
+ uri="git://example-org/docs/api/rest-api-guide.md",
2095
+ content="# REST API Guide\n\nThis guide covers RESTful API design...",
2084
2096
  extracted_data={
2085
- "type": "disorder",
2086
- "category": "anxiety",
2087
- "icd10": "F41.0",
2088
- "dsm5_criteria": ["A", "B", "C", "D"],
2097
+ "type": "documentation",
2098
+ "category": "api",
2099
+ "version": "2.0",
2089
2100
  },
2090
- tags=["disorder", "anxiety", "dsm5"]
2101
+ tags=["api", "rest", "documentation"]
2091
2102
  )
2092
2103
 
2093
- # Direct-loaded: Clinical procedure from git
2094
- scid_node = Ontology(
2095
- name="scid-5-f1",
2096
- uri="git://bwolfson-siggie/Siggy-MVP/ontology/procedures/scid-5/module-f/scid-5-f1.md",
2097
- content="# scid-5-f1: Panic Attack Screening\n\n...",
2104
+ # Direct-loaded: Technical spec from git
2105
+ config_spec = Ontology(
2106
+ name="config-schema",
2107
+ uri="git://example-org/docs/specs/config-schema.md",
2108
+ content="# Configuration Schema\n\nThis document defines...",
2098
2109
  extracted_data={
2099
- "type": "procedure",
2100
- "module": "F",
2101
- "section": "Panic Disorder",
2102
- "dsm5_criterion": "Panic Attack Specifier",
2110
+ "type": "specification",
2111
+ "format": "yaml",
2112
+ "version": "1.0",
2103
2113
  },
2104
- tags=["scid-5", "procedure", "anxiety"]
2114
+ tags=["config", "schema", "specification"]
2105
2115
  )
2106
2116
 
2107
2117
 
@@ -2215,7 +2225,7 @@ This schema includes the `search_rem` tool which supports:
2215
2225
  - **Optional**
2216
2226
 
2217
2227
  ',
2218
- '{"type": "object", "description": "Domain-specific knowledge - either agent-extracted or direct-loaded.\n\n Attributes:\n name: Human-readable label for this ontology instance\n uri: External source reference (git://, s3://, https://) for direct-loaded ontologies\n file_id: Foreign key to File entity (optional - only for agent-extracted)\n agent_schema_id: Schema that performed extraction (optional - only for agent-extracted)\n provider_name: LLM provider used for extraction (optional)\n model_name: Specific model used (optional)\n extracted_data: Structured data - either extracted by agent or parsed from source\n confidence_score: Optional confidence score from extraction (0.0-1.0)\n extraction_timestamp: When extraction was performed\n content: Text used for generating embedding\n\n Inherited from CoreModel:\n id: UUID or string identifier\n created_at: Entity creation timestamp\n updated_at: Last update timestamp\n deleted_at: Soft deletion timestamp\n tenant_id: Multi-tenancy isolation\n user_id: Ownership\n graph_edges: Relationships to other entities\n metadata: Flexible metadata storage\n tags: Classification tags\n\n Example Usage:\n # Agent-extracted: CV parsing\n cv_ontology = Ontology(\n name=\"john-doe-cv-2024\",\n file_id=\"file-uuid-123\",\n agent_schema_id=\"cv-parser-v1\",\n provider_name=\"anthropic\",\n model_name=\"claude-sonnet-4-5-20250929\",\n extracted_data={\n \"candidate_name\": \"John Doe\",\n \"skills\": [\"Python\", \"PostgreSQL\", \"Kubernetes\"],\n },\n confidence_score=0.95,\n tags=[\"cv\", \"engineering\"]\n )\n\n # Direct-loaded: Medical knowledge base from git\n disorder_ontology = Ontology(\n name=\"panic-disorder\",\n uri=\"git://bwolfson-siggie/Siggy-MVP/ontology/disorders/anxiety/panic-disorder.md\",\n content=\"# Panic Disorder\\n\\nPanic disorder is characterized by...\",\n extracted_data={\n \"type\": \"disorder\",\n \"category\": \"anxiety\",\n \"icd10\": \"F41.0\",\n \"dsm5_criteria\": [\"A\", \"B\", \"C\", \"D\"],\n },\n tags=[\"disorder\", \"anxiety\", \"dsm5\"]\n )\n\n # Direct-loaded: Clinical procedure from git\n scid_node = Ontology(\n name=\"scid-5-f1\",\n uri=\"git://bwolfson-siggie/Siggy-MVP/ontology/procedures/scid-5/module-f/scid-5-f1.md\",\n content=\"# scid-5-f1: Panic Attack Screening\\n\\n...\",\n extracted_data={\n \"type\": \"procedure\",\n \"module\": \"F\",\n \"section\": \"Panic Disorder\",\n \"dsm5_criterion\": \"Panic Attack Specifier\",\n },\n tags=[\"scid-5\", \"procedure\", \"anxiety\"]\n )\n \n\nThis agent can search the `ontologies` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"title": "Name", "type": "string"}, "uri": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Uri"}, "file_id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "title": "File Id"}, "agent_schema_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Agent Schema Id"}, "provider_name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Provider Name"}, "model_name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Model Name"}, "extracted_data": {"anyOf": [{"additionalProperties": true, "type": "object"}, {"type": "null"}], "default": null, "title": "Extracted Data"}, "confidence_score": {"anyOf": [{"type": "number"}, {"type": "null"}], "default": null, "title": "Confidence Score"}, "extraction_timestamp": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Extraction Timestamp"}, "content": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Content"}}, "required": ["name"], "json_schema_extra": {"table_name": "ontologies", "entity_key_field": "name", "embedding_fields": ["content"], "fully_qualified_name": "rem.models.entities.ontology.Ontology", "tools": ["search_rem"], "default_search_table": "ontologies", "has_embeddings": true}}'::jsonb,
2228
+ '{"type": "object", "description": "Domain-specific knowledge - either agent-extracted or direct-loaded.\n\n Attributes:\n name: Human-readable label for this ontology instance\n uri: External source reference (git://, s3://, https://) for direct-loaded ontologies\n file_id: Foreign key to File entity (optional - only for agent-extracted)\n agent_schema_id: Schema that performed extraction (optional - only for agent-extracted)\n provider_name: LLM provider used for extraction (optional)\n model_name: Specific model used (optional)\n extracted_data: Structured data - either extracted by agent or parsed from source\n confidence_score: Optional confidence score from extraction (0.0-1.0)\n extraction_timestamp: When extraction was performed\n content: Text used for generating embedding\n\n Inherited from CoreModel:\n id: UUID or string identifier\n created_at: Entity creation timestamp\n updated_at: Last update timestamp\n deleted_at: Soft deletion timestamp\n tenant_id: Multi-tenancy isolation\n user_id: Ownership\n graph_edges: Relationships to other entities\n metadata: Flexible metadata storage\n tags: Classification tags\n\n Example Usage:\n # Agent-extracted: CV parsing\n cv_ontology = Ontology(\n name=\"john-doe-cv-2024\",\n file_id=\"file-uuid-123\",\n agent_schema_id=\"cv-parser-v1\",\n provider_name=\"anthropic\",\n model_name=\"claude-sonnet-4-5-20250929\",\n extracted_data={\n \"candidate_name\": \"John Doe\",\n \"skills\": [\"Python\", \"PostgreSQL\", \"Kubernetes\"],\n },\n confidence_score=0.95,\n tags=[\"cv\", \"engineering\"]\n )\n\n # Direct-loaded: Knowledge base from git\n api_docs = Ontology(\n name=\"rest-api-guide\",\n uri=\"git://example-org/docs/api/rest-api-guide.md\",\n content=\"# REST API Guide\\n\\nThis guide covers RESTful API design...\",\n extracted_data={\n \"type\": \"documentation\",\n \"category\": \"api\",\n \"version\": \"2.0\",\n },\n tags=[\"api\", \"rest\", \"documentation\"]\n )\n\n # Direct-loaded: Technical spec from git\n config_spec = Ontology(\n name=\"config-schema\",\n uri=\"git://example-org/docs/specs/config-schema.md\",\n content=\"# Configuration Schema\\n\\nThis document defines...\",\n extracted_data={\n \"type\": \"specification\",\n \"format\": \"yaml\",\n \"version\": \"1.0\",\n },\n tags=[\"config\", \"schema\", \"specification\"]\n )\n \n\nThis agent can search the `ontologies` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"title": "Name", "type": "string"}, "uri": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Uri"}, "file_id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "title": "File Id"}, "agent_schema_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Agent Schema Id"}, "provider_name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Provider Name"}, "model_name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Model Name"}, "extracted_data": {"anyOf": [{"additionalProperties": true, "type": "object"}, {"type": "null"}], "default": null, "title": "Extracted Data"}, "confidence_score": {"anyOf": [{"type": "number"}, {"type": "null"}], "default": null, "title": "Confidence Score"}, "extraction_timestamp": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Extraction Timestamp"}, "content": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Content"}}, "required": ["name"], "json_schema_extra": {"table_name": "ontologies", "entity_key_field": "name", "embedding_fields": ["content"], "fully_qualified_name": "rem.models.entities.ontology.Ontology", "tools": ["search_rem"], "default_search_table": "ontologies", "has_embeddings": true}}'::jsonb,
2219
2229
  'entity',
2220
2230
  '{"table_name": "ontologies", "entity_key_field": "name", "embedding_fields": ["content"], "fqn": "rem.models.entities.ontology.Ontology"}'::jsonb
2221
2231
  )