remdb 0.3.118__py3-none-any.whl → 0.3.146__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

Files changed (44) hide show
  1. rem/agentic/agents/sse_simulator.py +2 -0
  2. rem/agentic/context.py +23 -3
  3. rem/agentic/mcp/tool_wrapper.py +126 -15
  4. rem/agentic/otel/setup.py +1 -0
  5. rem/agentic/providers/phoenix.py +371 -108
  6. rem/agentic/providers/pydantic_ai.py +122 -43
  7. rem/agentic/schema.py +4 -1
  8. rem/api/mcp_router/tools.py +13 -2
  9. rem/api/routers/chat/completions.py +250 -4
  10. rem/api/routers/chat/models.py +81 -7
  11. rem/api/routers/chat/otel_utils.py +33 -0
  12. rem/api/routers/chat/sse_events.py +17 -1
  13. rem/api/routers/chat/streaming.py +35 -1
  14. rem/api/routers/feedback.py +134 -14
  15. rem/auth/middleware.py +66 -1
  16. rem/cli/commands/cluster.py +590 -82
  17. rem/cli/commands/configure.py +3 -4
  18. rem/cli/commands/experiments.py +468 -76
  19. rem/cli/commands/session.py +336 -0
  20. rem/cli/dreaming.py +2 -2
  21. rem/cli/main.py +2 -0
  22. rem/config.py +8 -1
  23. rem/models/core/experiment.py +58 -14
  24. rem/models/entities/ontology.py +1 -1
  25. rem/models/entities/ontology_config.py +1 -1
  26. rem/schemas/agents/examples/contract-analyzer.yaml +1 -1
  27. rem/schemas/agents/examples/contract-extractor.yaml +1 -1
  28. rem/schemas/agents/examples/cv-parser.yaml +1 -1
  29. rem/services/phoenix/client.py +59 -18
  30. rem/services/postgres/pydantic_to_sqlalchemy.py +9 -12
  31. rem/services/session/compression.py +7 -0
  32. rem/settings.py +260 -17
  33. rem/sql/migrations/002_install_models.sql +91 -91
  34. rem/sql/migrations/004_cache_system.sql +1 -1
  35. rem/utils/README.md +45 -0
  36. rem/utils/files.py +157 -1
  37. rem/utils/schema_loader.py +94 -3
  38. rem/utils/vision.py +1 -1
  39. rem/workers/__init__.py +2 -1
  40. rem/workers/db_listener.py +579 -0
  41. {remdb-0.3.118.dist-info → remdb-0.3.146.dist-info}/METADATA +161 -147
  42. {remdb-0.3.118.dist-info → remdb-0.3.146.dist-info}/RECORD +44 -41
  43. {remdb-0.3.118.dist-info → remdb-0.3.146.dist-info}/WHEEL +0 -0
  44. {remdb-0.3.118.dist-info → remdb-0.3.146.dist-info}/entry_points.txt +0 -0
@@ -56,11 +56,11 @@ CREATE TABLE IF NOT EXISTS feedbacks (
56
56
  tags TEXT[] DEFAULT ARRAY[]::TEXT[]
57
57
  );
58
58
 
59
- CREATE INDEX idx_feedbacks_tenant ON feedbacks (tenant_id);
60
- CREATE INDEX idx_feedbacks_user ON feedbacks (user_id);
61
- CREATE INDEX idx_feedbacks_graph_edges ON feedbacks USING GIN (graph_edges);
62
- CREATE INDEX idx_feedbacks_metadata ON feedbacks USING GIN (metadata);
63
- CREATE INDEX idx_feedbacks_tags ON feedbacks USING GIN (tags);
59
+ CREATE INDEX IF NOT EXISTS idx_feedbacks_tenant ON feedbacks (tenant_id);
60
+ CREATE INDEX IF NOT EXISTS idx_feedbacks_user ON feedbacks (user_id);
61
+ CREATE INDEX IF NOT EXISTS idx_feedbacks_graph_edges ON feedbacks USING GIN (graph_edges);
62
+ CREATE INDEX IF NOT EXISTS idx_feedbacks_metadata ON feedbacks USING GIN (metadata);
63
+ CREATE INDEX IF NOT EXISTS idx_feedbacks_tags ON feedbacks USING GIN (tags);
64
64
 
65
65
  -- KV_STORE trigger for feedbacks
66
66
  -- Trigger function to maintain KV_STORE for feedbacks
@@ -135,11 +135,11 @@ CREATE TABLE IF NOT EXISTS files (
135
135
  tags TEXT[] DEFAULT ARRAY[]::TEXT[]
136
136
  );
137
137
 
138
- CREATE INDEX idx_files_tenant ON files (tenant_id);
139
- CREATE INDEX idx_files_user ON files (user_id);
140
- CREATE INDEX idx_files_graph_edges ON files USING GIN (graph_edges);
141
- CREATE INDEX idx_files_metadata ON files USING GIN (metadata);
142
- CREATE INDEX idx_files_tags ON files USING GIN (tags);
138
+ CREATE INDEX IF NOT EXISTS idx_files_tenant ON files (tenant_id);
139
+ CREATE INDEX IF NOT EXISTS idx_files_user ON files (user_id);
140
+ CREATE INDEX IF NOT EXISTS idx_files_graph_edges ON files USING GIN (graph_edges);
141
+ CREATE INDEX IF NOT EXISTS idx_files_metadata ON files USING GIN (metadata);
142
+ CREATE INDEX IF NOT EXISTS idx_files_tags ON files USING GIN (tags);
143
143
 
144
144
  -- Embeddings for files
145
145
  CREATE TABLE IF NOT EXISTS embeddings_files (
@@ -157,14 +157,14 @@ CREATE TABLE IF NOT EXISTS embeddings_files (
157
157
  );
158
158
 
159
159
  -- Index for entity lookup (get all embeddings for entity)
160
- CREATE INDEX idx_embeddings_files_entity ON embeddings_files (entity_id);
160
+ CREATE INDEX IF NOT EXISTS idx_embeddings_files_entity ON embeddings_files (entity_id);
161
161
 
162
162
  -- Index for field + provider lookup
163
- CREATE INDEX idx_embeddings_files_field_provider ON embeddings_files (field_name, provider);
163
+ CREATE INDEX IF NOT EXISTS idx_embeddings_files_field_provider ON embeddings_files (field_name, provider);
164
164
 
165
165
  -- HNSW index for vector similarity search (created in background)
166
166
  -- Note: This will be created by background thread after data load
167
- -- CREATE INDEX idx_embeddings_files_vector_hnsw ON embeddings_files
167
+ -- CREATE INDEX IF NOT EXISTS idx_embeddings_files_vector_hnsw ON embeddings_files
168
168
  -- USING hnsw (embedding vector_cosine_ops);
169
169
 
170
170
  -- KV_STORE trigger for files
@@ -248,11 +248,11 @@ CREATE TABLE IF NOT EXISTS image_resources (
248
248
  tags TEXT[] DEFAULT ARRAY[]::TEXT[]
249
249
  );
250
250
 
251
- CREATE INDEX idx_image_resources_tenant ON image_resources (tenant_id);
252
- CREATE INDEX idx_image_resources_user ON image_resources (user_id);
253
- CREATE INDEX idx_image_resources_graph_edges ON image_resources USING GIN (graph_edges);
254
- CREATE INDEX idx_image_resources_metadata ON image_resources USING GIN (metadata);
255
- CREATE INDEX idx_image_resources_tags ON image_resources USING GIN (tags);
251
+ CREATE INDEX IF NOT EXISTS idx_image_resources_tenant ON image_resources (tenant_id);
252
+ CREATE INDEX IF NOT EXISTS idx_image_resources_user ON image_resources (user_id);
253
+ CREATE INDEX IF NOT EXISTS idx_image_resources_graph_edges ON image_resources USING GIN (graph_edges);
254
+ CREATE INDEX IF NOT EXISTS idx_image_resources_metadata ON image_resources USING GIN (metadata);
255
+ CREATE INDEX IF NOT EXISTS idx_image_resources_tags ON image_resources USING GIN (tags);
256
256
 
257
257
  -- Embeddings for image_resources
258
258
  CREATE TABLE IF NOT EXISTS embeddings_image_resources (
@@ -270,14 +270,14 @@ CREATE TABLE IF NOT EXISTS embeddings_image_resources (
270
270
  );
271
271
 
272
272
  -- Index for entity lookup (get all embeddings for entity)
273
- CREATE INDEX idx_embeddings_image_resources_entity ON embeddings_image_resources (entity_id);
273
+ CREATE INDEX IF NOT EXISTS idx_embeddings_image_resources_entity ON embeddings_image_resources (entity_id);
274
274
 
275
275
  -- Index for field + provider lookup
276
- CREATE INDEX idx_embeddings_image_resources_field_provider ON embeddings_image_resources (field_name, provider);
276
+ CREATE INDEX IF NOT EXISTS idx_embeddings_image_resources_field_provider ON embeddings_image_resources (field_name, provider);
277
277
 
278
278
  -- HNSW index for vector similarity search (created in background)
279
279
  -- Note: This will be created by background thread after data load
280
- -- CREATE INDEX idx_embeddings_image_resources_vector_hnsw ON embeddings_image_resources
280
+ -- CREATE INDEX IF NOT EXISTS idx_embeddings_image_resources_vector_hnsw ON embeddings_image_resources
281
281
  -- USING hnsw (embedding vector_cosine_ops);
282
282
 
283
283
  -- KV_STORE trigger for image_resources
@@ -354,11 +354,11 @@ CREATE TABLE IF NOT EXISTS messages (
354
354
  tags TEXT[] DEFAULT ARRAY[]::TEXT[]
355
355
  );
356
356
 
357
- CREATE INDEX idx_messages_tenant ON messages (tenant_id);
358
- CREATE INDEX idx_messages_user ON messages (user_id);
359
- CREATE INDEX idx_messages_graph_edges ON messages USING GIN (graph_edges);
360
- CREATE INDEX idx_messages_metadata ON messages USING GIN (metadata);
361
- CREATE INDEX idx_messages_tags ON messages USING GIN (tags);
357
+ CREATE INDEX IF NOT EXISTS idx_messages_tenant ON messages (tenant_id);
358
+ CREATE INDEX IF NOT EXISTS idx_messages_user ON messages (user_id);
359
+ CREATE INDEX IF NOT EXISTS idx_messages_graph_edges ON messages USING GIN (graph_edges);
360
+ CREATE INDEX IF NOT EXISTS idx_messages_metadata ON messages USING GIN (metadata);
361
+ CREATE INDEX IF NOT EXISTS idx_messages_tags ON messages USING GIN (tags);
362
362
 
363
363
  -- Embeddings for messages
364
364
  CREATE TABLE IF NOT EXISTS embeddings_messages (
@@ -376,14 +376,14 @@ CREATE TABLE IF NOT EXISTS embeddings_messages (
376
376
  );
377
377
 
378
378
  -- Index for entity lookup (get all embeddings for entity)
379
- CREATE INDEX idx_embeddings_messages_entity ON embeddings_messages (entity_id);
379
+ CREATE INDEX IF NOT EXISTS idx_embeddings_messages_entity ON embeddings_messages (entity_id);
380
380
 
381
381
  -- Index for field + provider lookup
382
- CREATE INDEX idx_embeddings_messages_field_provider ON embeddings_messages (field_name, provider);
382
+ CREATE INDEX IF NOT EXISTS idx_embeddings_messages_field_provider ON embeddings_messages (field_name, provider);
383
383
 
384
384
  -- HNSW index for vector similarity search (created in background)
385
385
  -- Note: This will be created by background thread after data load
386
- -- CREATE INDEX idx_embeddings_messages_vector_hnsw ON embeddings_messages
386
+ -- CREATE INDEX IF NOT EXISTS idx_embeddings_messages_vector_hnsw ON embeddings_messages
387
387
  -- USING hnsw (embedding vector_cosine_ops);
388
388
 
389
389
  -- KV_STORE trigger for messages
@@ -462,11 +462,11 @@ CREATE TABLE IF NOT EXISTS moments (
462
462
  tags TEXT[] DEFAULT ARRAY[]::TEXT[]
463
463
  );
464
464
 
465
- CREATE INDEX idx_moments_tenant ON moments (tenant_id);
466
- CREATE INDEX idx_moments_user ON moments (user_id);
467
- CREATE INDEX idx_moments_graph_edges ON moments USING GIN (graph_edges);
468
- CREATE INDEX idx_moments_metadata ON moments USING GIN (metadata);
469
- CREATE INDEX idx_moments_tags ON moments USING GIN (tags);
465
+ CREATE INDEX IF NOT EXISTS idx_moments_tenant ON moments (tenant_id);
466
+ CREATE INDEX IF NOT EXISTS idx_moments_user ON moments (user_id);
467
+ CREATE INDEX IF NOT EXISTS idx_moments_graph_edges ON moments USING GIN (graph_edges);
468
+ CREATE INDEX IF NOT EXISTS idx_moments_metadata ON moments USING GIN (metadata);
469
+ CREATE INDEX IF NOT EXISTS idx_moments_tags ON moments USING GIN (tags);
470
470
 
471
471
  -- Embeddings for moments
472
472
  CREATE TABLE IF NOT EXISTS embeddings_moments (
@@ -484,14 +484,14 @@ CREATE TABLE IF NOT EXISTS embeddings_moments (
484
484
  );
485
485
 
486
486
  -- Index for entity lookup (get all embeddings for entity)
487
- CREATE INDEX idx_embeddings_moments_entity ON embeddings_moments (entity_id);
487
+ CREATE INDEX IF NOT EXISTS idx_embeddings_moments_entity ON embeddings_moments (entity_id);
488
488
 
489
489
  -- Index for field + provider lookup
490
- CREATE INDEX idx_embeddings_moments_field_provider ON embeddings_moments (field_name, provider);
490
+ CREATE INDEX IF NOT EXISTS idx_embeddings_moments_field_provider ON embeddings_moments (field_name, provider);
491
491
 
492
492
  -- HNSW index for vector similarity search (created in background)
493
493
  -- Note: This will be created by background thread after data load
494
- -- CREATE INDEX idx_embeddings_moments_vector_hnsw ON embeddings_moments
494
+ -- CREATE INDEX IF NOT EXISTS idx_embeddings_moments_vector_hnsw ON embeddings_moments
495
495
  -- USING hnsw (embedding vector_cosine_ops);
496
496
 
497
497
  -- KV_STORE trigger for moments
@@ -569,11 +569,11 @@ CREATE TABLE IF NOT EXISTS ontologies (
569
569
  tags TEXT[] DEFAULT ARRAY[]::TEXT[]
570
570
  );
571
571
 
572
- CREATE INDEX idx_ontologies_tenant ON ontologies (tenant_id);
573
- CREATE INDEX idx_ontologies_user ON ontologies (user_id);
574
- CREATE INDEX idx_ontologies_graph_edges ON ontologies USING GIN (graph_edges);
575
- CREATE INDEX idx_ontologies_metadata ON ontologies USING GIN (metadata);
576
- CREATE INDEX idx_ontologies_tags ON ontologies USING GIN (tags);
572
+ CREATE INDEX IF NOT EXISTS idx_ontologies_tenant ON ontologies (tenant_id);
573
+ CREATE INDEX IF NOT EXISTS idx_ontologies_user ON ontologies (user_id);
574
+ CREATE INDEX IF NOT EXISTS idx_ontologies_graph_edges ON ontologies USING GIN (graph_edges);
575
+ CREATE INDEX IF NOT EXISTS idx_ontologies_metadata ON ontologies USING GIN (metadata);
576
+ CREATE INDEX IF NOT EXISTS idx_ontologies_tags ON ontologies USING GIN (tags);
577
577
 
578
578
  -- KV_STORE trigger for ontologies
579
579
  -- Trigger function to maintain KV_STORE for ontologies
@@ -651,11 +651,11 @@ CREATE TABLE IF NOT EXISTS ontology_configs (
651
651
  tags TEXT[] DEFAULT ARRAY[]::TEXT[]
652
652
  );
653
653
 
654
- CREATE INDEX idx_ontology_configs_tenant ON ontology_configs (tenant_id);
655
- CREATE INDEX idx_ontology_configs_user ON ontology_configs (user_id);
656
- CREATE INDEX idx_ontology_configs_graph_edges ON ontology_configs USING GIN (graph_edges);
657
- CREATE INDEX idx_ontology_configs_metadata ON ontology_configs USING GIN (metadata);
658
- CREATE INDEX idx_ontology_configs_tags ON ontology_configs USING GIN (tags);
654
+ CREATE INDEX IF NOT EXISTS idx_ontology_configs_tenant ON ontology_configs (tenant_id);
655
+ CREATE INDEX IF NOT EXISTS idx_ontology_configs_user ON ontology_configs (user_id);
656
+ CREATE INDEX IF NOT EXISTS idx_ontology_configs_graph_edges ON ontology_configs USING GIN (graph_edges);
657
+ CREATE INDEX IF NOT EXISTS idx_ontology_configs_metadata ON ontology_configs USING GIN (metadata);
658
+ CREATE INDEX IF NOT EXISTS idx_ontology_configs_tags ON ontology_configs USING GIN (tags);
659
659
 
660
660
  -- Embeddings for ontology_configs
661
661
  CREATE TABLE IF NOT EXISTS embeddings_ontology_configs (
@@ -673,14 +673,14 @@ CREATE TABLE IF NOT EXISTS embeddings_ontology_configs (
673
673
  );
674
674
 
675
675
  -- Index for entity lookup (get all embeddings for entity)
676
- CREATE INDEX idx_embeddings_ontology_configs_entity ON embeddings_ontology_configs (entity_id);
676
+ CREATE INDEX IF NOT EXISTS idx_embeddings_ontology_configs_entity ON embeddings_ontology_configs (entity_id);
677
677
 
678
678
  -- Index for field + provider lookup
679
- CREATE INDEX idx_embeddings_ontology_configs_field_provider ON embeddings_ontology_configs (field_name, provider);
679
+ CREATE INDEX IF NOT EXISTS idx_embeddings_ontology_configs_field_provider ON embeddings_ontology_configs (field_name, provider);
680
680
 
681
681
  -- HNSW index for vector similarity search (created in background)
682
682
  -- Note: This will be created by background thread after data load
683
- -- CREATE INDEX idx_embeddings_ontology_configs_vector_hnsw ON embeddings_ontology_configs
683
+ -- CREATE INDEX IF NOT EXISTS idx_embeddings_ontology_configs_vector_hnsw ON embeddings_ontology_configs
684
684
  -- USING hnsw (embedding vector_cosine_ops);
685
685
 
686
686
  -- KV_STORE trigger for ontology_configs
@@ -756,11 +756,11 @@ CREATE TABLE IF NOT EXISTS resources (
756
756
  tags TEXT[] DEFAULT ARRAY[]::TEXT[]
757
757
  );
758
758
 
759
- CREATE INDEX idx_resources_tenant ON resources (tenant_id);
760
- CREATE INDEX idx_resources_user ON resources (user_id);
761
- CREATE INDEX idx_resources_graph_edges ON resources USING GIN (graph_edges);
762
- CREATE INDEX idx_resources_metadata ON resources USING GIN (metadata);
763
- CREATE INDEX idx_resources_tags ON resources USING GIN (tags);
759
+ CREATE INDEX IF NOT EXISTS idx_resources_tenant ON resources (tenant_id);
760
+ CREATE INDEX IF NOT EXISTS idx_resources_user ON resources (user_id);
761
+ CREATE INDEX IF NOT EXISTS idx_resources_graph_edges ON resources USING GIN (graph_edges);
762
+ CREATE INDEX IF NOT EXISTS idx_resources_metadata ON resources USING GIN (metadata);
763
+ CREATE INDEX IF NOT EXISTS idx_resources_tags ON resources USING GIN (tags);
764
764
 
765
765
  -- Embeddings for resources
766
766
  CREATE TABLE IF NOT EXISTS embeddings_resources (
@@ -778,14 +778,14 @@ CREATE TABLE IF NOT EXISTS embeddings_resources (
778
778
  );
779
779
 
780
780
  -- Index for entity lookup (get all embeddings for entity)
781
- CREATE INDEX idx_embeddings_resources_entity ON embeddings_resources (entity_id);
781
+ CREATE INDEX IF NOT EXISTS idx_embeddings_resources_entity ON embeddings_resources (entity_id);
782
782
 
783
783
  -- Index for field + provider lookup
784
- CREATE INDEX idx_embeddings_resources_field_provider ON embeddings_resources (field_name, provider);
784
+ CREATE INDEX IF NOT EXISTS idx_embeddings_resources_field_provider ON embeddings_resources (field_name, provider);
785
785
 
786
786
  -- HNSW index for vector similarity search (created in background)
787
787
  -- Note: This will be created by background thread after data load
788
- -- CREATE INDEX idx_embeddings_resources_vector_hnsw ON embeddings_resources
788
+ -- CREATE INDEX IF NOT EXISTS idx_embeddings_resources_vector_hnsw ON embeddings_resources
789
789
  -- USING hnsw (embedding vector_cosine_ops);
790
790
 
791
791
  -- KV_STORE trigger for resources
@@ -860,11 +860,11 @@ CREATE TABLE IF NOT EXISTS schemas (
860
860
  tags TEXT[] DEFAULT ARRAY[]::TEXT[]
861
861
  );
862
862
 
863
- CREATE INDEX idx_schemas_tenant ON schemas (tenant_id);
864
- CREATE INDEX idx_schemas_user ON schemas (user_id);
865
- CREATE INDEX idx_schemas_graph_edges ON schemas USING GIN (graph_edges);
866
- CREATE INDEX idx_schemas_metadata ON schemas USING GIN (metadata);
867
- CREATE INDEX idx_schemas_tags ON schemas USING GIN (tags);
863
+ CREATE INDEX IF NOT EXISTS idx_schemas_tenant ON schemas (tenant_id);
864
+ CREATE INDEX IF NOT EXISTS idx_schemas_user ON schemas (user_id);
865
+ CREATE INDEX IF NOT EXISTS idx_schemas_graph_edges ON schemas USING GIN (graph_edges);
866
+ CREATE INDEX IF NOT EXISTS idx_schemas_metadata ON schemas USING GIN (metadata);
867
+ CREATE INDEX IF NOT EXISTS idx_schemas_tags ON schemas USING GIN (tags);
868
868
 
869
869
  -- Embeddings for schemas
870
870
  CREATE TABLE IF NOT EXISTS embeddings_schemas (
@@ -882,14 +882,14 @@ CREATE TABLE IF NOT EXISTS embeddings_schemas (
882
882
  );
883
883
 
884
884
  -- Index for entity lookup (get all embeddings for entity)
885
- CREATE INDEX idx_embeddings_schemas_entity ON embeddings_schemas (entity_id);
885
+ CREATE INDEX IF NOT EXISTS idx_embeddings_schemas_entity ON embeddings_schemas (entity_id);
886
886
 
887
887
  -- Index for field + provider lookup
888
- CREATE INDEX idx_embeddings_schemas_field_provider ON embeddings_schemas (field_name, provider);
888
+ CREATE INDEX IF NOT EXISTS idx_embeddings_schemas_field_provider ON embeddings_schemas (field_name, provider);
889
889
 
890
890
  -- HNSW index for vector similarity search (created in background)
891
891
  -- Note: This will be created by background thread after data load
892
- -- CREATE INDEX idx_embeddings_schemas_vector_hnsw ON embeddings_schemas
892
+ -- CREATE INDEX IF NOT EXISTS idx_embeddings_schemas_vector_hnsw ON embeddings_schemas
893
893
  -- USING hnsw (embedding vector_cosine_ops);
894
894
 
895
895
  -- KV_STORE trigger for schemas
@@ -967,11 +967,11 @@ CREATE TABLE IF NOT EXISTS sessions (
967
967
  tags TEXT[] DEFAULT ARRAY[]::TEXT[]
968
968
  );
969
969
 
970
- CREATE INDEX idx_sessions_tenant ON sessions (tenant_id);
971
- CREATE INDEX idx_sessions_user ON sessions (user_id);
972
- CREATE INDEX idx_sessions_graph_edges ON sessions USING GIN (graph_edges);
973
- CREATE INDEX idx_sessions_metadata ON sessions USING GIN (metadata);
974
- CREATE INDEX idx_sessions_tags ON sessions USING GIN (tags);
970
+ CREATE INDEX IF NOT EXISTS idx_sessions_tenant ON sessions (tenant_id);
971
+ CREATE INDEX IF NOT EXISTS idx_sessions_user ON sessions (user_id);
972
+ CREATE INDEX IF NOT EXISTS idx_sessions_graph_edges ON sessions USING GIN (graph_edges);
973
+ CREATE INDEX IF NOT EXISTS idx_sessions_metadata ON sessions USING GIN (metadata);
974
+ CREATE INDEX IF NOT EXISTS idx_sessions_tags ON sessions USING GIN (tags);
975
975
 
976
976
  -- Embeddings for sessions
977
977
  CREATE TABLE IF NOT EXISTS embeddings_sessions (
@@ -989,14 +989,14 @@ CREATE TABLE IF NOT EXISTS embeddings_sessions (
989
989
  );
990
990
 
991
991
  -- Index for entity lookup (get all embeddings for entity)
992
- CREATE INDEX idx_embeddings_sessions_entity ON embeddings_sessions (entity_id);
992
+ CREATE INDEX IF NOT EXISTS idx_embeddings_sessions_entity ON embeddings_sessions (entity_id);
993
993
 
994
994
  -- Index for field + provider lookup
995
- CREATE INDEX idx_embeddings_sessions_field_provider ON embeddings_sessions (field_name, provider);
995
+ CREATE INDEX IF NOT EXISTS idx_embeddings_sessions_field_provider ON embeddings_sessions (field_name, provider);
996
996
 
997
997
  -- HNSW index for vector similarity search (created in background)
998
998
  -- Note: This will be created by background thread after data load
999
- -- CREATE INDEX idx_embeddings_sessions_vector_hnsw ON embeddings_sessions
999
+ -- CREATE INDEX IF NOT EXISTS idx_embeddings_sessions_vector_hnsw ON embeddings_sessions
1000
1000
  -- USING hnsw (embedding vector_cosine_ops);
1001
1001
 
1002
1002
  -- KV_STORE trigger for sessions
@@ -1068,11 +1068,11 @@ CREATE TABLE IF NOT EXISTS shared_sessions (
1068
1068
  tags TEXT[] DEFAULT ARRAY[]::TEXT[]
1069
1069
  );
1070
1070
 
1071
- CREATE INDEX idx_shared_sessions_tenant ON shared_sessions (tenant_id);
1072
- CREATE INDEX idx_shared_sessions_user ON shared_sessions (user_id);
1073
- CREATE INDEX idx_shared_sessions_graph_edges ON shared_sessions USING GIN (graph_edges);
1074
- CREATE INDEX idx_shared_sessions_metadata ON shared_sessions USING GIN (metadata);
1075
- CREATE INDEX idx_shared_sessions_tags ON shared_sessions USING GIN (tags);
1071
+ CREATE INDEX IF NOT EXISTS idx_shared_sessions_tenant ON shared_sessions (tenant_id);
1072
+ CREATE INDEX IF NOT EXISTS idx_shared_sessions_user ON shared_sessions (user_id);
1073
+ CREATE INDEX IF NOT EXISTS idx_shared_sessions_graph_edges ON shared_sessions USING GIN (graph_edges);
1074
+ CREATE INDEX IF NOT EXISTS idx_shared_sessions_metadata ON shared_sessions USING GIN (metadata);
1075
+ CREATE INDEX IF NOT EXISTS idx_shared_sessions_tags ON shared_sessions USING GIN (tags);
1076
1076
 
1077
1077
  -- KV_STORE trigger for shared_sessions
1078
1078
  -- Trigger function to maintain KV_STORE for shared_sessions
@@ -1151,11 +1151,11 @@ CREATE TABLE IF NOT EXISTS users (
1151
1151
  tags TEXT[] DEFAULT ARRAY[]::TEXT[]
1152
1152
  );
1153
1153
 
1154
- CREATE INDEX idx_users_tenant ON users (tenant_id);
1155
- CREATE INDEX idx_users_user ON users (user_id);
1156
- CREATE INDEX idx_users_graph_edges ON users USING GIN (graph_edges);
1157
- CREATE INDEX idx_users_metadata ON users USING GIN (metadata);
1158
- CREATE INDEX idx_users_tags ON users USING GIN (tags);
1154
+ CREATE INDEX IF NOT EXISTS idx_users_tenant ON users (tenant_id);
1155
+ CREATE INDEX IF NOT EXISTS idx_users_user ON users (user_id);
1156
+ CREATE INDEX IF NOT EXISTS idx_users_graph_edges ON users USING GIN (graph_edges);
1157
+ CREATE INDEX IF NOT EXISTS idx_users_metadata ON users USING GIN (metadata);
1158
+ CREATE INDEX IF NOT EXISTS idx_users_tags ON users USING GIN (tags);
1159
1159
 
1160
1160
  -- Embeddings for users
1161
1161
  CREATE TABLE IF NOT EXISTS embeddings_users (
@@ -1173,14 +1173,14 @@ CREATE TABLE IF NOT EXISTS embeddings_users (
1173
1173
  );
1174
1174
 
1175
1175
  -- Index for entity lookup (get all embeddings for entity)
1176
- CREATE INDEX idx_embeddings_users_entity ON embeddings_users (entity_id);
1176
+ CREATE INDEX IF NOT EXISTS idx_embeddings_users_entity ON embeddings_users (entity_id);
1177
1177
 
1178
1178
  -- Index for field + provider lookup
1179
- CREATE INDEX idx_embeddings_users_field_provider ON embeddings_users (field_name, provider);
1179
+ CREATE INDEX IF NOT EXISTS idx_embeddings_users_field_provider ON embeddings_users (field_name, provider);
1180
1180
 
1181
1181
  -- HNSW index for vector similarity search (created in background)
1182
1182
  -- Note: This will be created by background thread after data load
1183
- -- CREATE INDEX idx_embeddings_users_vector_hnsw ON embeddings_users
1183
+ -- CREATE INDEX IF NOT EXISTS idx_embeddings_users_vector_hnsw ON embeddings_users
1184
1184
  -- USING hnsw (embedding vector_cosine_ops);
1185
1185
 
1186
1186
  -- KV_STORE trigger for users
@@ -2067,7 +2067,7 @@ Domain-specific knowledge extracted from files using custom agents.
2067
2067
  file_id="file-uuid-456",
2068
2068
  agent_schema_id="contract-parser-v2",
2069
2069
  provider_name="openai",
2070
- model_name="gpt-4o",
2070
+ model_name="gpt-4.1",
2071
2071
  extracted_data={
2072
2072
  "contract_type": "supplier_agreement",
2073
2073
  "parties": [
@@ -2197,7 +2197,7 @@ This schema includes the `search_rem` tool which supports:
2197
2197
  - **Optional**
2198
2198
 
2199
2199
  ',
2200
- '{"type": "object", "description": "Domain-specific knowledge extracted from files using custom agents.\n\n Attributes:\n name: Human-readable label for this ontology instance\n file_id: Foreign key to File entity that was processed\n agent_schema_id: Foreign key to Schema entity that performed extraction\n provider_name: LLM provider used for extraction (e.g., \"anthropic\", \"openai\")\n model_name: Specific model used (e.g., \"claude-sonnet-4-5\")\n extracted_data: Structured data extracted by agent (arbitrary JSON)\n confidence_score: Optional confidence score from extraction (0.0-1.0)\n extraction_timestamp: When extraction was performed\n embedding_text: Text used for generating embedding (derived from extracted_data)\n\n Inherited from CoreModel:\n id: UUID or string identifier\n created_at: Entity creation timestamp\n updated_at: Last update timestamp\n deleted_at: Soft deletion timestamp\n tenant_id: Multi-tenancy isolation\n user_id: Ownership\n graph_edges: Relationships to other entities\n metadata: Flexible metadata storage\n tags: Classification tags\n column: Database schema metadata\n\n Example Usage:\n # CV extraction\n cv_ontology = Ontology(\n name=\"john-doe-cv-2024\",\n file_id=\"file-uuid-123\",\n agent_schema_id=\"cv-parser-v1\",\n provider_name=\"anthropic\",\n model_name=\"claude-sonnet-4-5-20250929\",\n extracted_data={\n \"candidate_name\": \"John Doe\",\n \"email\": \"john@example.com\",\n \"skills\": [\"Python\", \"PostgreSQL\", \"Kubernetes\"],\n \"experience\": [\n {\n \"company\": \"TechCorp\",\n \"role\": \"Senior Engineer\",\n \"years\": 3,\n \"achievements\": [\"Led migration to k8s\", \"Reduced costs 40%\"]\n }\n ],\n \"education\": [\n {\"degree\": \"BS Computer Science\", \"institution\": \"MIT\", \"year\": 2018}\n ]\n },\n confidence_score=0.95,\n tags=[\"cv\", \"engineering\", \"senior-level\"]\n )\n\n # Contract extraction\n contract_ontology = Ontology(\n name=\"acme-supplier-agreement-2024\",\n file_id=\"file-uuid-456\",\n agent_schema_id=\"contract-parser-v2\",\n provider_name=\"openai\",\n model_name=\"gpt-4o\",\n extracted_data={\n \"contract_type\": \"supplier_agreement\",\n \"parties\": [\n {\"name\": \"ACME Corp\", \"role\": \"buyer\"},\n {\"name\": \"SupplyChain Inc\", \"role\": \"supplier\"}\n ],\n \"effective_date\": \"2024-01-01\",\n \"termination_date\": \"2026-12-31\",\n \"payment_terms\": {\n \"amount\": 500000,\n \"currency\": \"USD\",\n \"frequency\": \"quarterly\"\n },\n \"key_obligations\": [\n \"Supplier must deliver within 30 days\",\n \"Buyer must pay within 60 days of invoice\"\n ]\n },\n confidence_score=0.92,\n tags=[\"contract\", \"supplier\", \"procurement\"]\n )\n \n\nThis agent can search the `ontologies` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"title": "Name", "type": "string"}, "file_id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}], "title": "File Id"}, "agent_schema_id": {"title": "Agent Schema Id", "type": "string"}, "provider_name": {"title": "Provider Name", "type": "string"}, "model_name": {"title": "Model Name", "type": "string"}, "extracted_data": {"additionalProperties": true, "title": "Extracted Data", "type": "object"}, "confidence_score": {"anyOf": [{"type": "number"}, {"type": "null"}], "default": null, "title": "Confidence Score"}, "extraction_timestamp": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Extraction Timestamp"}, "embedding_text": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Embedding Text"}}, "required": ["name", "file_id", "agent_schema_id", "provider_name", "model_name", "extracted_data"], "json_schema_extra": {"table_name": "ontologies", "entity_key_field": "id", "embedding_fields": [], "fully_qualified_name": "rem.models.entities.ontology.Ontology", "tools": ["search_rem"], "default_search_table": "ontologies", "has_embeddings": false}}'::jsonb,
2200
+ '{"type": "object", "description": "Domain-specific knowledge extracted from files using custom agents.\n\n Attributes:\n name: Human-readable label for this ontology instance\n file_id: Foreign key to File entity that was processed\n agent_schema_id: Foreign key to Schema entity that performed extraction\n provider_name: LLM provider used for extraction (e.g., \"anthropic\", \"openai\")\n model_name: Specific model used (e.g., \"claude-sonnet-4-5\")\n extracted_data: Structured data extracted by agent (arbitrary JSON)\n confidence_score: Optional confidence score from extraction (0.0-1.0)\n extraction_timestamp: When extraction was performed\n embedding_text: Text used for generating embedding (derived from extracted_data)\n\n Inherited from CoreModel:\n id: UUID or string identifier\n created_at: Entity creation timestamp\n updated_at: Last update timestamp\n deleted_at: Soft deletion timestamp\n tenant_id: Multi-tenancy isolation\n user_id: Ownership\n graph_edges: Relationships to other entities\n metadata: Flexible metadata storage\n tags: Classification tags\n column: Database schema metadata\n\n Example Usage:\n # CV extraction\n cv_ontology = Ontology(\n name=\"john-doe-cv-2024\",\n file_id=\"file-uuid-123\",\n agent_schema_id=\"cv-parser-v1\",\n provider_name=\"anthropic\",\n model_name=\"claude-sonnet-4-5-20250929\",\n extracted_data={\n \"candidate_name\": \"John Doe\",\n \"email\": \"john@example.com\",\n \"skills\": [\"Python\", \"PostgreSQL\", \"Kubernetes\"],\n \"experience\": [\n {\n \"company\": \"TechCorp\",\n \"role\": \"Senior Engineer\",\n \"years\": 3,\n \"achievements\": [\"Led migration to k8s\", \"Reduced costs 40%\"]\n }\n ],\n \"education\": [\n {\"degree\": \"BS Computer Science\", \"institution\": \"MIT\", \"year\": 2018}\n ]\n },\n confidence_score=0.95,\n tags=[\"cv\", \"engineering\", \"senior-level\"]\n )\n\n # Contract extraction\n contract_ontology = Ontology(\n name=\"acme-supplier-agreement-2024\",\n file_id=\"file-uuid-456\",\n agent_schema_id=\"contract-parser-v2\",\n provider_name=\"openai\",\n model_name=\"gpt-4.1\",\n extracted_data={\n \"contract_type\": \"supplier_agreement\",\n \"parties\": [\n {\"name\": \"ACME Corp\", \"role\": \"buyer\"},\n {\"name\": \"SupplyChain Inc\", \"role\": \"supplier\"}\n ],\n \"effective_date\": \"2024-01-01\",\n \"termination_date\": \"2026-12-31\",\n \"payment_terms\": {\n \"amount\": 500000,\n \"currency\": \"USD\",\n \"frequency\": \"quarterly\"\n },\n \"key_obligations\": [\n \"Supplier must deliver within 30 days\",\n \"Buyer must pay within 60 days of invoice\"\n ]\n },\n confidence_score=0.92,\n tags=[\"contract\", \"supplier\", \"procurement\"]\n )\n \n\nThis agent can search the `ontologies` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"title": "Name", "type": "string"}, "file_id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}], "title": "File Id"}, "agent_schema_id": {"title": "Agent Schema Id", "type": "string"}, "provider_name": {"title": "Provider Name", "type": "string"}, "model_name": {"title": "Model Name", "type": "string"}, "extracted_data": {"additionalProperties": true, "title": "Extracted Data", "type": "object"}, "confidence_score": {"anyOf": [{"type": "number"}, {"type": "null"}], "default": null, "title": "Confidence Score"}, "extraction_timestamp": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Extraction Timestamp"}, "embedding_text": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Embedding Text"}}, "required": ["name", "file_id", "agent_schema_id", "provider_name", "model_name", "extracted_data"], "json_schema_extra": {"table_name": "ontologies", "entity_key_field": "id", "embedding_fields": [], "fully_qualified_name": "rem.models.entities.ontology.Ontology", "tools": ["search_rem"], "default_search_table": "ontologies", "has_embeddings": false}}'::jsonb,
2201
2201
  'entity',
2202
2202
  '{"table_name": "ontologies", "entity_key_field": "id", "embedding_fields": [], "fqn": "rem.models.entities.ontology.Ontology"}'::jsonb
2203
2203
  )
@@ -2266,7 +2266,7 @@ User configuration for automatic ontology extraction.
2266
2266
  priority=200, # Higher priority = runs first
2267
2267
  enabled=True,
2268
2268
  provider_name="openai", # Override default provider
2269
- model_name="gpt-4o",
2269
+ model_name="gpt-4.1",
2270
2270
  tenant_id="acme-corp",
2271
2271
  tags=["legal", "procurement"]
2272
2272
  )
@@ -2395,7 +2395,7 @@ This schema includes the `search_rem` tool which supports:
2395
2395
  - **Optional**
2396
2396
 
2397
2397
  ',
2398
- '{"type": "object", "description": "User configuration for automatic ontology extraction.\n\n Attributes:\n name: Human-readable config name\n agent_schema_id: Foreign key to Schema entity to use for extraction\n description: Purpose and scope of this config\n\n # File matching rules (ANY matching rule triggers extraction)\n mime_type_pattern: Regex pattern for file MIME types (e.g., \"application/pdf\")\n uri_pattern: Regex pattern for file URIs (e.g., \"s3://bucket/resumes/.*\")\n tag_filter: List of tags (file must have ALL tags to match)\n\n # Execution control\n priority: Execution order (higher = earlier, default 100)\n enabled: Whether this config is active (default True)\n\n # LLM provider configuration\n provider_name: Optional LLM provider override (defaults to settings)\n model_name: Optional model override (defaults to settings)\n\n Inherited from CoreModel:\n id, created_at, updated_at, deleted_at, tenant_id, user_id,\n graph_edges, metadata, tags, column\n\n Example Usage:\n # CV extraction for recruitment\n cv_config = OntologyConfig(\n name=\"recruitment-cv-parser\",\n agent_schema_id=\"cv-parser-v1\",\n description=\"Extract candidate information from resumes\",\n mime_type_pattern=\"application/pdf\",\n uri_pattern=\".*/resumes/.*\",\n tag_filter=[\"cv\", \"candidate\"],\n priority=100,\n enabled=True,\n tenant_id=\"acme-corp\",\n tags=[\"recruitment\", \"hr\"]\n )\n\n # Contract analysis for legal team\n contract_config = OntologyConfig(\n name=\"legal-contract-analyzer\",\n agent_schema_id=\"contract-parser-v2\",\n description=\"Extract key terms from supplier contracts\",\n mime_type_pattern=\"application/(pdf|msword|vnd.openxmlformats.*)\",\n tag_filter=[\"legal\", \"contract\"],\n priority=200, # Higher priority = runs first\n enabled=True,\n provider_name=\"openai\", # Override default provider\n model_name=\"gpt-4o\",\n tenant_id=\"acme-corp\",\n tags=[\"legal\", \"procurement\"]\n )\n\n # Medical records for healthcare\n medical_config = OntologyConfig(\n name=\"medical-records-extractor\",\n agent_schema_id=\"medical-parser-v1\",\n description=\"Extract diagnoses and treatments from medical records\",\n mime_type_pattern=\"application/pdf\",\n tag_filter=[\"medical\", \"patient-record\"],\n priority=50,\n enabled=True,\n tenant_id=\"healthsystem\",\n tags=[\"medical\", \"hipaa-compliant\"]\n )\n \n\nThis agent can search the `ontology_configs` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"title": "Name", "type": "string"}, "agent_schema_id": {"title": "Agent Schema Id", "type": "string"}, "description": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Description"}, "mime_type_pattern": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Mime Type Pattern"}, "uri_pattern": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Uri Pattern"}, "tag_filter": {"default": [], "items": {"type": "string"}, "title": "Tag Filter", "type": "array"}, "priority": {"default": 100, "title": "Priority", "type": "integer"}, "enabled": {"default": true, "title": "Enabled", "type": "boolean"}, "provider_name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Provider Name"}, "model_name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Model Name"}}, "required": ["name", "agent_schema_id"], "json_schema_extra": {"table_name": "ontology_configs", "entity_key_field": "id", "embedding_fields": ["description"], "fully_qualified_name": "rem.models.entities.ontology_config.OntologyConfig", "tools": ["search_rem"], "default_search_table": "ontology_configs", "has_embeddings": true}}'::jsonb,
2398
+ '{"type": "object", "description": "User configuration for automatic ontology extraction.\n\n Attributes:\n name: Human-readable config name\n agent_schema_id: Foreign key to Schema entity to use for extraction\n description: Purpose and scope of this config\n\n # File matching rules (ANY matching rule triggers extraction)\n mime_type_pattern: Regex pattern for file MIME types (e.g., \"application/pdf\")\n uri_pattern: Regex pattern for file URIs (e.g., \"s3://bucket/resumes/.*\")\n tag_filter: List of tags (file must have ALL tags to match)\n\n # Execution control\n priority: Execution order (higher = earlier, default 100)\n enabled: Whether this config is active (default True)\n\n # LLM provider configuration\n provider_name: Optional LLM provider override (defaults to settings)\n model_name: Optional model override (defaults to settings)\n\n Inherited from CoreModel:\n id, created_at, updated_at, deleted_at, tenant_id, user_id,\n graph_edges, metadata, tags, column\n\n Example Usage:\n # CV extraction for recruitment\n cv_config = OntologyConfig(\n name=\"recruitment-cv-parser\",\n agent_schema_id=\"cv-parser-v1\",\n description=\"Extract candidate information from resumes\",\n mime_type_pattern=\"application/pdf\",\n uri_pattern=\".*/resumes/.*\",\n tag_filter=[\"cv\", \"candidate\"],\n priority=100,\n enabled=True,\n tenant_id=\"acme-corp\",\n tags=[\"recruitment\", \"hr\"]\n )\n\n # Contract analysis for legal team\n contract_config = OntologyConfig(\n name=\"legal-contract-analyzer\",\n agent_schema_id=\"contract-parser-v2\",\n description=\"Extract key terms from supplier contracts\",\n mime_type_pattern=\"application/(pdf|msword|vnd.openxmlformats.*)\",\n tag_filter=[\"legal\", \"contract\"],\n priority=200, # Higher priority = runs first\n enabled=True,\n provider_name=\"openai\", # Override default provider\n model_name=\"gpt-4.1\",\n tenant_id=\"acme-corp\",\n tags=[\"legal\", \"procurement\"]\n )\n\n # Medical records for healthcare\n medical_config = OntologyConfig(\n name=\"medical-records-extractor\",\n agent_schema_id=\"medical-parser-v1\",\n description=\"Extract diagnoses and treatments from medical records\",\n mime_type_pattern=\"application/pdf\",\n tag_filter=[\"medical\", \"patient-record\"],\n priority=50,\n enabled=True,\n tenant_id=\"healthsystem\",\n tags=[\"medical\", \"hipaa-compliant\"]\n )\n \n\nThis agent can search the `ontology_configs` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"title": "Name", "type": "string"}, "agent_schema_id": {"title": "Agent Schema Id", "type": "string"}, "description": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Description"}, "mime_type_pattern": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Mime Type Pattern"}, "uri_pattern": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Uri Pattern"}, "tag_filter": {"default": [], "items": {"type": "string"}, "title": "Tag Filter", "type": "array"}, "priority": {"default": 100, "title": "Priority", "type": "integer"}, "enabled": {"default": true, "title": "Enabled", "type": "boolean"}, "provider_name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Provider Name"}, "model_name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Model Name"}}, "required": ["name", "agent_schema_id"], "json_schema_extra": {"table_name": "ontology_configs", "entity_key_field": "id", "embedding_fields": ["description"], "fully_qualified_name": "rem.models.entities.ontology_config.OntologyConfig", "tools": ["search_rem"], "default_search_table": "ontology_configs", "has_embeddings": true}}'::jsonb,
2399
2399
  'entity',
2400
2400
  '{"table_name": "ontology_configs", "entity_key_field": "id", "embedding_fields": ["description"], "fqn": "rem.models.entities.ontology_config.OntologyConfig"}'::jsonb
2401
2401
  )
@@ -91,7 +91,7 @@ DECLARE
91
91
  v_last_trigger TIMESTAMPTZ;
92
92
  v_api_secret TEXT;
93
93
  v_debounce_seconds CONSTANT INTEGER := 30;
94
- v_api_url TEXT := 'http://rem-api.siggy.svc.cluster.local:8000/api/admin/internal/rebuild-kv';
94
+ v_api_url TEXT := 'http://rem-api.rem.svc.cluster.local:8000/api/admin/internal/rebuild-kv';
95
95
  v_request_id BIGINT;
96
96
  BEGIN
97
97
  -- Quick check: is kv_store actually empty for this user?
rem/utils/README.md CHANGED
@@ -4,6 +4,7 @@
4
4
 
5
5
  1. [SQL Types](#sql-types-sql_typespy) - Pydantic to PostgreSQL type mapping
6
6
  2. [Embeddings](#embeddings-embeddingspy) - Vector embeddings generation
7
+ 3. [Files](#files-filespy) - File utilities and DataFrame I/O
7
8
 
8
9
  ## SQL Types (`sql_types.py`)
9
10
 
@@ -581,3 +582,47 @@ This will demonstrate:
581
582
  - `sql_types.py` - Use `embedding_provider` in json_schema_extra for TEXT fields
582
583
  - OpenAI Embeddings API: https://platform.openai.com/docs/api-reference/embeddings
583
584
  - pgvector Documentation: https://github.com/pgvector/pgvector
585
+
586
+ ---
587
+
588
+ ## Files (`files.py`)
589
+
590
+ File utilities including temporary file handling and DataFrame I/O with automatic format detection.
591
+
592
+ ### DataFrame I/O
593
+
594
+ Read and write DataFrames with format auto-detected from file extension:
595
+
596
+ ```python
597
+ from rem.utils.files import read_dataframe, write_dataframe
598
+
599
+ # Read - format inferred from extension
600
+ df = read_dataframe("data.csv")
601
+ df = read_dataframe("data.parquet")
602
+ df = read_dataframe("data.xlsx")
603
+
604
+ # Read from bytes (e.g., from S3)
605
+ df = read_dataframe(content_bytes, filename="data.csv")
606
+
607
+ # Write - format inferred from extension
608
+ write_dataframe(df, "output.parquet")
609
+ ```
610
+
611
+ **Supported formats**: `.csv`, `.tsv`, `.parquet`, `.json`, `.jsonl`, `.avro`, `.xlsx`, `.xls`, `.ods`, `.ipc`, `.arrow`, `.feather`
612
+
613
+ Note: Some formats require optional dependencies (e.g., `fastexcel` for Excel).
614
+
615
+ ### Temporary File Utilities
616
+
617
+ ```python
618
+ from rem.utils.files import temp_file_from_bytes, temp_directory
619
+
620
+ # Create temp file from bytes, auto-cleanup
621
+ with temp_file_from_bytes(pdf_bytes, suffix=".pdf") as tmp_path:
622
+ result = process_pdf(tmp_path)
623
+
624
+ # Create temp directory, auto-cleanup
625
+ with temp_directory() as tmp_dir:
626
+ # Work with files in tmp_dir
627
+ pass
628
+ ```