remdb 0.3.114__py3-none-any.whl → 0.3.172__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of remdb might be problematic. Click here for more details.
- rem/agentic/agents/__init__.py +16 -0
- rem/agentic/agents/agent_manager.py +311 -0
- rem/agentic/agents/sse_simulator.py +2 -0
- rem/agentic/context.py +103 -5
- rem/agentic/context_builder.py +36 -9
- rem/agentic/mcp/tool_wrapper.py +161 -18
- rem/agentic/otel/setup.py +1 -0
- rem/agentic/providers/phoenix.py +371 -108
- rem/agentic/providers/pydantic_ai.py +172 -30
- rem/agentic/schema.py +8 -4
- rem/api/deps.py +3 -5
- rem/api/main.py +26 -4
- rem/api/mcp_router/resources.py +15 -10
- rem/api/mcp_router/server.py +11 -3
- rem/api/mcp_router/tools.py +418 -4
- rem/api/middleware/tracking.py +5 -5
- rem/api/routers/admin.py +218 -1
- rem/api/routers/auth.py +349 -6
- rem/api/routers/chat/completions.py +255 -7
- rem/api/routers/chat/models.py +81 -7
- rem/api/routers/chat/otel_utils.py +33 -0
- rem/api/routers/chat/sse_events.py +17 -1
- rem/api/routers/chat/streaming.py +126 -19
- rem/api/routers/feedback.py +134 -14
- rem/api/routers/messages.py +24 -15
- rem/api/routers/query.py +6 -3
- rem/auth/__init__.py +13 -3
- rem/auth/jwt.py +352 -0
- rem/auth/middleware.py +115 -10
- rem/auth/providers/__init__.py +4 -1
- rem/auth/providers/email.py +215 -0
- rem/cli/commands/README.md +42 -0
- rem/cli/commands/cluster.py +617 -168
- rem/cli/commands/configure.py +4 -7
- rem/cli/commands/db.py +66 -22
- rem/cli/commands/experiments.py +468 -76
- rem/cli/commands/schema.py +6 -5
- rem/cli/commands/session.py +336 -0
- rem/cli/dreaming.py +2 -2
- rem/cli/main.py +2 -0
- rem/config.py +8 -1
- rem/models/core/experiment.py +58 -14
- rem/models/entities/__init__.py +4 -0
- rem/models/entities/ontology.py +1 -1
- rem/models/entities/ontology_config.py +1 -1
- rem/models/entities/subscriber.py +175 -0
- rem/models/entities/user.py +1 -0
- rem/schemas/agents/core/agent-builder.yaml +235 -0
- rem/schemas/agents/examples/contract-analyzer.yaml +1 -1
- rem/schemas/agents/examples/contract-extractor.yaml +1 -1
- rem/schemas/agents/examples/cv-parser.yaml +1 -1
- rem/services/__init__.py +3 -1
- rem/services/content/service.py +4 -3
- rem/services/email/__init__.py +10 -0
- rem/services/email/service.py +513 -0
- rem/services/email/templates.py +360 -0
- rem/services/phoenix/client.py +59 -18
- rem/services/postgres/README.md +38 -0
- rem/services/postgres/diff_service.py +127 -6
- rem/services/postgres/pydantic_to_sqlalchemy.py +45 -13
- rem/services/postgres/repository.py +5 -4
- rem/services/postgres/schema_generator.py +205 -4
- rem/services/session/compression.py +120 -50
- rem/services/session/reload.py +14 -7
- rem/services/user_service.py +41 -9
- rem/settings.py +442 -23
- rem/sql/migrations/001_install.sql +156 -0
- rem/sql/migrations/002_install_models.sql +1951 -88
- rem/sql/migrations/004_cache_system.sql +548 -0
- rem/sql/migrations/005_schema_update.sql +145 -0
- rem/utils/README.md +45 -0
- rem/utils/__init__.py +18 -0
- rem/utils/files.py +157 -1
- rem/utils/schema_loader.py +139 -10
- rem/utils/sql_paths.py +146 -0
- rem/utils/vision.py +1 -1
- rem/workers/__init__.py +3 -1
- rem/workers/db_listener.py +579 -0
- rem/workers/unlogged_maintainer.py +463 -0
- {remdb-0.3.114.dist-info → remdb-0.3.172.dist-info}/METADATA +218 -180
- {remdb-0.3.114.dist-info → remdb-0.3.172.dist-info}/RECORD +83 -68
- {remdb-0.3.114.dist-info → remdb-0.3.172.dist-info}/WHEEL +0 -0
- {remdb-0.3.114.dist-info → remdb-0.3.172.dist-info}/entry_points.txt +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
-- REM Model Schema (install_models.sql)
|
|
2
2
|
-- Generated from Pydantic models
|
|
3
3
|
-- Source: model registry
|
|
4
|
-
-- Generated at: 2025-11-
|
|
4
|
+
-- Generated at: 2025-11-29T18:45:11.372432
|
|
5
5
|
--
|
|
6
6
|
-- DO NOT EDIT MANUALLY - Regenerate with: rem db schema generate
|
|
7
7
|
--
|
|
@@ -10,6 +10,7 @@
|
|
|
10
10
|
-- 2. Embeddings tables (embeddings_<table>)
|
|
11
11
|
-- 3. KV_STORE triggers for cache maintenance
|
|
12
12
|
-- 4. Indexes (foreground only, background indexes separate)
|
|
13
|
+
-- 5. Schema table entries (for agent-like table access)
|
|
13
14
|
|
|
14
15
|
-- ============================================================================
|
|
15
16
|
-- PREREQUISITES CHECK
|
|
@@ -55,11 +56,11 @@ CREATE TABLE IF NOT EXISTS feedbacks (
|
|
|
55
56
|
tags TEXT[] DEFAULT ARRAY[]::TEXT[]
|
|
56
57
|
);
|
|
57
58
|
|
|
58
|
-
CREATE INDEX idx_feedbacks_tenant ON feedbacks (tenant_id);
|
|
59
|
-
CREATE INDEX idx_feedbacks_user ON feedbacks (user_id);
|
|
60
|
-
CREATE INDEX idx_feedbacks_graph_edges ON feedbacks USING GIN (graph_edges);
|
|
61
|
-
CREATE INDEX idx_feedbacks_metadata ON feedbacks USING GIN (metadata);
|
|
62
|
-
CREATE INDEX idx_feedbacks_tags ON feedbacks USING GIN (tags);
|
|
59
|
+
CREATE INDEX IF NOT EXISTS idx_feedbacks_tenant ON feedbacks (tenant_id);
|
|
60
|
+
CREATE INDEX IF NOT EXISTS idx_feedbacks_user ON feedbacks (user_id);
|
|
61
|
+
CREATE INDEX IF NOT EXISTS idx_feedbacks_graph_edges ON feedbacks USING GIN (graph_edges);
|
|
62
|
+
CREATE INDEX IF NOT EXISTS idx_feedbacks_metadata ON feedbacks USING GIN (metadata);
|
|
63
|
+
CREATE INDEX IF NOT EXISTS idx_feedbacks_tags ON feedbacks USING GIN (tags);
|
|
63
64
|
|
|
64
65
|
-- KV_STORE trigger for feedbacks
|
|
65
66
|
-- Trigger function to maintain KV_STORE for feedbacks
|
|
@@ -134,11 +135,11 @@ CREATE TABLE IF NOT EXISTS files (
|
|
|
134
135
|
tags TEXT[] DEFAULT ARRAY[]::TEXT[]
|
|
135
136
|
);
|
|
136
137
|
|
|
137
|
-
CREATE INDEX idx_files_tenant ON files (tenant_id);
|
|
138
|
-
CREATE INDEX idx_files_user ON files (user_id);
|
|
139
|
-
CREATE INDEX idx_files_graph_edges ON files USING GIN (graph_edges);
|
|
140
|
-
CREATE INDEX idx_files_metadata ON files USING GIN (metadata);
|
|
141
|
-
CREATE INDEX idx_files_tags ON files USING GIN (tags);
|
|
138
|
+
CREATE INDEX IF NOT EXISTS idx_files_tenant ON files (tenant_id);
|
|
139
|
+
CREATE INDEX IF NOT EXISTS idx_files_user ON files (user_id);
|
|
140
|
+
CREATE INDEX IF NOT EXISTS idx_files_graph_edges ON files USING GIN (graph_edges);
|
|
141
|
+
CREATE INDEX IF NOT EXISTS idx_files_metadata ON files USING GIN (metadata);
|
|
142
|
+
CREATE INDEX IF NOT EXISTS idx_files_tags ON files USING GIN (tags);
|
|
142
143
|
|
|
143
144
|
-- Embeddings for files
|
|
144
145
|
CREATE TABLE IF NOT EXISTS embeddings_files (
|
|
@@ -156,14 +157,14 @@ CREATE TABLE IF NOT EXISTS embeddings_files (
|
|
|
156
157
|
);
|
|
157
158
|
|
|
158
159
|
-- Index for entity lookup (get all embeddings for entity)
|
|
159
|
-
CREATE INDEX idx_embeddings_files_entity ON embeddings_files (entity_id);
|
|
160
|
+
CREATE INDEX IF NOT EXISTS idx_embeddings_files_entity ON embeddings_files (entity_id);
|
|
160
161
|
|
|
161
162
|
-- Index for field + provider lookup
|
|
162
|
-
CREATE INDEX idx_embeddings_files_field_provider ON embeddings_files (field_name, provider);
|
|
163
|
+
CREATE INDEX IF NOT EXISTS idx_embeddings_files_field_provider ON embeddings_files (field_name, provider);
|
|
163
164
|
|
|
164
165
|
-- HNSW index for vector similarity search (created in background)
|
|
165
166
|
-- Note: This will be created by background thread after data load
|
|
166
|
-
-- CREATE INDEX idx_embeddings_files_vector_hnsw ON embeddings_files
|
|
167
|
+
-- CREATE INDEX IF NOT EXISTS idx_embeddings_files_vector_hnsw ON embeddings_files
|
|
167
168
|
-- USING hnsw (embedding vector_cosine_ops);
|
|
168
169
|
|
|
169
170
|
-- KV_STORE trigger for files
|
|
@@ -247,11 +248,11 @@ CREATE TABLE IF NOT EXISTS image_resources (
|
|
|
247
248
|
tags TEXT[] DEFAULT ARRAY[]::TEXT[]
|
|
248
249
|
);
|
|
249
250
|
|
|
250
|
-
CREATE INDEX idx_image_resources_tenant ON image_resources (tenant_id);
|
|
251
|
-
CREATE INDEX idx_image_resources_user ON image_resources (user_id);
|
|
252
|
-
CREATE INDEX idx_image_resources_graph_edges ON image_resources USING GIN (graph_edges);
|
|
253
|
-
CREATE INDEX idx_image_resources_metadata ON image_resources USING GIN (metadata);
|
|
254
|
-
CREATE INDEX idx_image_resources_tags ON image_resources USING GIN (tags);
|
|
251
|
+
CREATE INDEX IF NOT EXISTS idx_image_resources_tenant ON image_resources (tenant_id);
|
|
252
|
+
CREATE INDEX IF NOT EXISTS idx_image_resources_user ON image_resources (user_id);
|
|
253
|
+
CREATE INDEX IF NOT EXISTS idx_image_resources_graph_edges ON image_resources USING GIN (graph_edges);
|
|
254
|
+
CREATE INDEX IF NOT EXISTS idx_image_resources_metadata ON image_resources USING GIN (metadata);
|
|
255
|
+
CREATE INDEX IF NOT EXISTS idx_image_resources_tags ON image_resources USING GIN (tags);
|
|
255
256
|
|
|
256
257
|
-- Embeddings for image_resources
|
|
257
258
|
CREATE TABLE IF NOT EXISTS embeddings_image_resources (
|
|
@@ -269,14 +270,14 @@ CREATE TABLE IF NOT EXISTS embeddings_image_resources (
|
|
|
269
270
|
);
|
|
270
271
|
|
|
271
272
|
-- Index for entity lookup (get all embeddings for entity)
|
|
272
|
-
CREATE INDEX idx_embeddings_image_resources_entity ON embeddings_image_resources (entity_id);
|
|
273
|
+
CREATE INDEX IF NOT EXISTS idx_embeddings_image_resources_entity ON embeddings_image_resources (entity_id);
|
|
273
274
|
|
|
274
275
|
-- Index for field + provider lookup
|
|
275
|
-
CREATE INDEX idx_embeddings_image_resources_field_provider ON embeddings_image_resources (field_name, provider);
|
|
276
|
+
CREATE INDEX IF NOT EXISTS idx_embeddings_image_resources_field_provider ON embeddings_image_resources (field_name, provider);
|
|
276
277
|
|
|
277
278
|
-- HNSW index for vector similarity search (created in background)
|
|
278
279
|
-- Note: This will be created by background thread after data load
|
|
279
|
-
-- CREATE INDEX idx_embeddings_image_resources_vector_hnsw ON embeddings_image_resources
|
|
280
|
+
-- CREATE INDEX IF NOT EXISTS idx_embeddings_image_resources_vector_hnsw ON embeddings_image_resources
|
|
280
281
|
-- USING hnsw (embedding vector_cosine_ops);
|
|
281
282
|
|
|
282
283
|
-- KV_STORE trigger for image_resources
|
|
@@ -353,11 +354,11 @@ CREATE TABLE IF NOT EXISTS messages (
|
|
|
353
354
|
tags TEXT[] DEFAULT ARRAY[]::TEXT[]
|
|
354
355
|
);
|
|
355
356
|
|
|
356
|
-
CREATE INDEX idx_messages_tenant ON messages (tenant_id);
|
|
357
|
-
CREATE INDEX idx_messages_user ON messages (user_id);
|
|
358
|
-
CREATE INDEX idx_messages_graph_edges ON messages USING GIN (graph_edges);
|
|
359
|
-
CREATE INDEX idx_messages_metadata ON messages USING GIN (metadata);
|
|
360
|
-
CREATE INDEX idx_messages_tags ON messages USING GIN (tags);
|
|
357
|
+
CREATE INDEX IF NOT EXISTS idx_messages_tenant ON messages (tenant_id);
|
|
358
|
+
CREATE INDEX IF NOT EXISTS idx_messages_user ON messages (user_id);
|
|
359
|
+
CREATE INDEX IF NOT EXISTS idx_messages_graph_edges ON messages USING GIN (graph_edges);
|
|
360
|
+
CREATE INDEX IF NOT EXISTS idx_messages_metadata ON messages USING GIN (metadata);
|
|
361
|
+
CREATE INDEX IF NOT EXISTS idx_messages_tags ON messages USING GIN (tags);
|
|
361
362
|
|
|
362
363
|
-- Embeddings for messages
|
|
363
364
|
CREATE TABLE IF NOT EXISTS embeddings_messages (
|
|
@@ -375,14 +376,14 @@ CREATE TABLE IF NOT EXISTS embeddings_messages (
|
|
|
375
376
|
);
|
|
376
377
|
|
|
377
378
|
-- Index for entity lookup (get all embeddings for entity)
|
|
378
|
-
CREATE INDEX idx_embeddings_messages_entity ON embeddings_messages (entity_id);
|
|
379
|
+
CREATE INDEX IF NOT EXISTS idx_embeddings_messages_entity ON embeddings_messages (entity_id);
|
|
379
380
|
|
|
380
381
|
-- Index for field + provider lookup
|
|
381
|
-
CREATE INDEX idx_embeddings_messages_field_provider ON embeddings_messages (field_name, provider);
|
|
382
|
+
CREATE INDEX IF NOT EXISTS idx_embeddings_messages_field_provider ON embeddings_messages (field_name, provider);
|
|
382
383
|
|
|
383
384
|
-- HNSW index for vector similarity search (created in background)
|
|
384
385
|
-- Note: This will be created by background thread after data load
|
|
385
|
-
-- CREATE INDEX idx_embeddings_messages_vector_hnsw ON embeddings_messages
|
|
386
|
+
-- CREATE INDEX IF NOT EXISTS idx_embeddings_messages_vector_hnsw ON embeddings_messages
|
|
386
387
|
-- USING hnsw (embedding vector_cosine_ops);
|
|
387
388
|
|
|
388
389
|
-- KV_STORE trigger for messages
|
|
@@ -461,11 +462,11 @@ CREATE TABLE IF NOT EXISTS moments (
|
|
|
461
462
|
tags TEXT[] DEFAULT ARRAY[]::TEXT[]
|
|
462
463
|
);
|
|
463
464
|
|
|
464
|
-
CREATE INDEX idx_moments_tenant ON moments (tenant_id);
|
|
465
|
-
CREATE INDEX idx_moments_user ON moments (user_id);
|
|
466
|
-
CREATE INDEX idx_moments_graph_edges ON moments USING GIN (graph_edges);
|
|
467
|
-
CREATE INDEX idx_moments_metadata ON moments USING GIN (metadata);
|
|
468
|
-
CREATE INDEX idx_moments_tags ON moments USING GIN (tags);
|
|
465
|
+
CREATE INDEX IF NOT EXISTS idx_moments_tenant ON moments (tenant_id);
|
|
466
|
+
CREATE INDEX IF NOT EXISTS idx_moments_user ON moments (user_id);
|
|
467
|
+
CREATE INDEX IF NOT EXISTS idx_moments_graph_edges ON moments USING GIN (graph_edges);
|
|
468
|
+
CREATE INDEX IF NOT EXISTS idx_moments_metadata ON moments USING GIN (metadata);
|
|
469
|
+
CREATE INDEX IF NOT EXISTS idx_moments_tags ON moments USING GIN (tags);
|
|
469
470
|
|
|
470
471
|
-- Embeddings for moments
|
|
471
472
|
CREATE TABLE IF NOT EXISTS embeddings_moments (
|
|
@@ -483,14 +484,14 @@ CREATE TABLE IF NOT EXISTS embeddings_moments (
|
|
|
483
484
|
);
|
|
484
485
|
|
|
485
486
|
-- Index for entity lookup (get all embeddings for entity)
|
|
486
|
-
CREATE INDEX idx_embeddings_moments_entity ON embeddings_moments (entity_id);
|
|
487
|
+
CREATE INDEX IF NOT EXISTS idx_embeddings_moments_entity ON embeddings_moments (entity_id);
|
|
487
488
|
|
|
488
489
|
-- Index for field + provider lookup
|
|
489
|
-
CREATE INDEX idx_embeddings_moments_field_provider ON embeddings_moments (field_name, provider);
|
|
490
|
+
CREATE INDEX IF NOT EXISTS idx_embeddings_moments_field_provider ON embeddings_moments (field_name, provider);
|
|
490
491
|
|
|
491
492
|
-- HNSW index for vector similarity search (created in background)
|
|
492
493
|
-- Note: This will be created by background thread after data load
|
|
493
|
-
-- CREATE INDEX idx_embeddings_moments_vector_hnsw ON embeddings_moments
|
|
494
|
+
-- CREATE INDEX IF NOT EXISTS idx_embeddings_moments_vector_hnsw ON embeddings_moments
|
|
494
495
|
-- USING hnsw (embedding vector_cosine_ops);
|
|
495
496
|
|
|
496
497
|
-- KV_STORE trigger for moments
|
|
@@ -568,11 +569,11 @@ CREATE TABLE IF NOT EXISTS ontologies (
|
|
|
568
569
|
tags TEXT[] DEFAULT ARRAY[]::TEXT[]
|
|
569
570
|
);
|
|
570
571
|
|
|
571
|
-
CREATE INDEX idx_ontologies_tenant ON ontologies (tenant_id);
|
|
572
|
-
CREATE INDEX idx_ontologies_user ON ontologies (user_id);
|
|
573
|
-
CREATE INDEX idx_ontologies_graph_edges ON ontologies USING GIN (graph_edges);
|
|
574
|
-
CREATE INDEX idx_ontologies_metadata ON ontologies USING GIN (metadata);
|
|
575
|
-
CREATE INDEX idx_ontologies_tags ON ontologies USING GIN (tags);
|
|
572
|
+
CREATE INDEX IF NOT EXISTS idx_ontologies_tenant ON ontologies (tenant_id);
|
|
573
|
+
CREATE INDEX IF NOT EXISTS idx_ontologies_user ON ontologies (user_id);
|
|
574
|
+
CREATE INDEX IF NOT EXISTS idx_ontologies_graph_edges ON ontologies USING GIN (graph_edges);
|
|
575
|
+
CREATE INDEX IF NOT EXISTS idx_ontologies_metadata ON ontologies USING GIN (metadata);
|
|
576
|
+
CREATE INDEX IF NOT EXISTS idx_ontologies_tags ON ontologies USING GIN (tags);
|
|
576
577
|
|
|
577
578
|
-- KV_STORE trigger for ontologies
|
|
578
579
|
-- Trigger function to maintain KV_STORE for ontologies
|
|
@@ -650,11 +651,11 @@ CREATE TABLE IF NOT EXISTS ontology_configs (
|
|
|
650
651
|
tags TEXT[] DEFAULT ARRAY[]::TEXT[]
|
|
651
652
|
);
|
|
652
653
|
|
|
653
|
-
CREATE INDEX idx_ontology_configs_tenant ON ontology_configs (tenant_id);
|
|
654
|
-
CREATE INDEX idx_ontology_configs_user ON ontology_configs (user_id);
|
|
655
|
-
CREATE INDEX idx_ontology_configs_graph_edges ON ontology_configs USING GIN (graph_edges);
|
|
656
|
-
CREATE INDEX idx_ontology_configs_metadata ON ontology_configs USING GIN (metadata);
|
|
657
|
-
CREATE INDEX idx_ontology_configs_tags ON ontology_configs USING GIN (tags);
|
|
654
|
+
CREATE INDEX IF NOT EXISTS idx_ontology_configs_tenant ON ontology_configs (tenant_id);
|
|
655
|
+
CREATE INDEX IF NOT EXISTS idx_ontology_configs_user ON ontology_configs (user_id);
|
|
656
|
+
CREATE INDEX IF NOT EXISTS idx_ontology_configs_graph_edges ON ontology_configs USING GIN (graph_edges);
|
|
657
|
+
CREATE INDEX IF NOT EXISTS idx_ontology_configs_metadata ON ontology_configs USING GIN (metadata);
|
|
658
|
+
CREATE INDEX IF NOT EXISTS idx_ontology_configs_tags ON ontology_configs USING GIN (tags);
|
|
658
659
|
|
|
659
660
|
-- Embeddings for ontology_configs
|
|
660
661
|
CREATE TABLE IF NOT EXISTS embeddings_ontology_configs (
|
|
@@ -672,14 +673,14 @@ CREATE TABLE IF NOT EXISTS embeddings_ontology_configs (
|
|
|
672
673
|
);
|
|
673
674
|
|
|
674
675
|
-- Index for entity lookup (get all embeddings for entity)
|
|
675
|
-
CREATE INDEX idx_embeddings_ontology_configs_entity ON embeddings_ontology_configs (entity_id);
|
|
676
|
+
CREATE INDEX IF NOT EXISTS idx_embeddings_ontology_configs_entity ON embeddings_ontology_configs (entity_id);
|
|
676
677
|
|
|
677
678
|
-- Index for field + provider lookup
|
|
678
|
-
CREATE INDEX idx_embeddings_ontology_configs_field_provider ON embeddings_ontology_configs (field_name, provider);
|
|
679
|
+
CREATE INDEX IF NOT EXISTS idx_embeddings_ontology_configs_field_provider ON embeddings_ontology_configs (field_name, provider);
|
|
679
680
|
|
|
680
681
|
-- HNSW index for vector similarity search (created in background)
|
|
681
682
|
-- Note: This will be created by background thread after data load
|
|
682
|
-
-- CREATE INDEX idx_embeddings_ontology_configs_vector_hnsw ON embeddings_ontology_configs
|
|
683
|
+
-- CREATE INDEX IF NOT EXISTS idx_embeddings_ontology_configs_vector_hnsw ON embeddings_ontology_configs
|
|
683
684
|
-- USING hnsw (embedding vector_cosine_ops);
|
|
684
685
|
|
|
685
686
|
-- KV_STORE trigger for ontology_configs
|
|
@@ -755,11 +756,11 @@ CREATE TABLE IF NOT EXISTS resources (
|
|
|
755
756
|
tags TEXT[] DEFAULT ARRAY[]::TEXT[]
|
|
756
757
|
);
|
|
757
758
|
|
|
758
|
-
CREATE INDEX idx_resources_tenant ON resources (tenant_id);
|
|
759
|
-
CREATE INDEX idx_resources_user ON resources (user_id);
|
|
760
|
-
CREATE INDEX idx_resources_graph_edges ON resources USING GIN (graph_edges);
|
|
761
|
-
CREATE INDEX idx_resources_metadata ON resources USING GIN (metadata);
|
|
762
|
-
CREATE INDEX idx_resources_tags ON resources USING GIN (tags);
|
|
759
|
+
CREATE INDEX IF NOT EXISTS idx_resources_tenant ON resources (tenant_id);
|
|
760
|
+
CREATE INDEX IF NOT EXISTS idx_resources_user ON resources (user_id);
|
|
761
|
+
CREATE INDEX IF NOT EXISTS idx_resources_graph_edges ON resources USING GIN (graph_edges);
|
|
762
|
+
CREATE INDEX IF NOT EXISTS idx_resources_metadata ON resources USING GIN (metadata);
|
|
763
|
+
CREATE INDEX IF NOT EXISTS idx_resources_tags ON resources USING GIN (tags);
|
|
763
764
|
|
|
764
765
|
-- Embeddings for resources
|
|
765
766
|
CREATE TABLE IF NOT EXISTS embeddings_resources (
|
|
@@ -777,14 +778,14 @@ CREATE TABLE IF NOT EXISTS embeddings_resources (
|
|
|
777
778
|
);
|
|
778
779
|
|
|
779
780
|
-- Index for entity lookup (get all embeddings for entity)
|
|
780
|
-
CREATE INDEX idx_embeddings_resources_entity ON embeddings_resources (entity_id);
|
|
781
|
+
CREATE INDEX IF NOT EXISTS idx_embeddings_resources_entity ON embeddings_resources (entity_id);
|
|
781
782
|
|
|
782
783
|
-- Index for field + provider lookup
|
|
783
|
-
CREATE INDEX idx_embeddings_resources_field_provider ON embeddings_resources (field_name, provider);
|
|
784
|
+
CREATE INDEX IF NOT EXISTS idx_embeddings_resources_field_provider ON embeddings_resources (field_name, provider);
|
|
784
785
|
|
|
785
786
|
-- HNSW index for vector similarity search (created in background)
|
|
786
787
|
-- Note: This will be created by background thread after data load
|
|
787
|
-
-- CREATE INDEX idx_embeddings_resources_vector_hnsw ON embeddings_resources
|
|
788
|
+
-- CREATE INDEX IF NOT EXISTS idx_embeddings_resources_vector_hnsw ON embeddings_resources
|
|
788
789
|
-- USING hnsw (embedding vector_cosine_ops);
|
|
789
790
|
|
|
790
791
|
-- KV_STORE trigger for resources
|
|
@@ -859,11 +860,11 @@ CREATE TABLE IF NOT EXISTS schemas (
|
|
|
859
860
|
tags TEXT[] DEFAULT ARRAY[]::TEXT[]
|
|
860
861
|
);
|
|
861
862
|
|
|
862
|
-
CREATE INDEX idx_schemas_tenant ON schemas (tenant_id);
|
|
863
|
-
CREATE INDEX idx_schemas_user ON schemas (user_id);
|
|
864
|
-
CREATE INDEX idx_schemas_graph_edges ON schemas USING GIN (graph_edges);
|
|
865
|
-
CREATE INDEX idx_schemas_metadata ON schemas USING GIN (metadata);
|
|
866
|
-
CREATE INDEX idx_schemas_tags ON schemas USING GIN (tags);
|
|
863
|
+
CREATE INDEX IF NOT EXISTS idx_schemas_tenant ON schemas (tenant_id);
|
|
864
|
+
CREATE INDEX IF NOT EXISTS idx_schemas_user ON schemas (user_id);
|
|
865
|
+
CREATE INDEX IF NOT EXISTS idx_schemas_graph_edges ON schemas USING GIN (graph_edges);
|
|
866
|
+
CREATE INDEX IF NOT EXISTS idx_schemas_metadata ON schemas USING GIN (metadata);
|
|
867
|
+
CREATE INDEX IF NOT EXISTS idx_schemas_tags ON schemas USING GIN (tags);
|
|
867
868
|
|
|
868
869
|
-- Embeddings for schemas
|
|
869
870
|
CREATE TABLE IF NOT EXISTS embeddings_schemas (
|
|
@@ -881,14 +882,14 @@ CREATE TABLE IF NOT EXISTS embeddings_schemas (
|
|
|
881
882
|
);
|
|
882
883
|
|
|
883
884
|
-- Index for entity lookup (get all embeddings for entity)
|
|
884
|
-
CREATE INDEX idx_embeddings_schemas_entity ON embeddings_schemas (entity_id);
|
|
885
|
+
CREATE INDEX IF NOT EXISTS idx_embeddings_schemas_entity ON embeddings_schemas (entity_id);
|
|
885
886
|
|
|
886
887
|
-- Index for field + provider lookup
|
|
887
|
-
CREATE INDEX idx_embeddings_schemas_field_provider ON embeddings_schemas (field_name, provider);
|
|
888
|
+
CREATE INDEX IF NOT EXISTS idx_embeddings_schemas_field_provider ON embeddings_schemas (field_name, provider);
|
|
888
889
|
|
|
889
890
|
-- HNSW index for vector similarity search (created in background)
|
|
890
891
|
-- Note: This will be created by background thread after data load
|
|
891
|
-
-- CREATE INDEX idx_embeddings_schemas_vector_hnsw ON embeddings_schemas
|
|
892
|
+
-- CREATE INDEX IF NOT EXISTS idx_embeddings_schemas_vector_hnsw ON embeddings_schemas
|
|
892
893
|
-- USING hnsw (embedding vector_cosine_ops);
|
|
893
894
|
|
|
894
895
|
-- KV_STORE trigger for schemas
|
|
@@ -966,11 +967,11 @@ CREATE TABLE IF NOT EXISTS sessions (
|
|
|
966
967
|
tags TEXT[] DEFAULT ARRAY[]::TEXT[]
|
|
967
968
|
);
|
|
968
969
|
|
|
969
|
-
CREATE INDEX idx_sessions_tenant ON sessions (tenant_id);
|
|
970
|
-
CREATE INDEX idx_sessions_user ON sessions (user_id);
|
|
971
|
-
CREATE INDEX idx_sessions_graph_edges ON sessions USING GIN (graph_edges);
|
|
972
|
-
CREATE INDEX idx_sessions_metadata ON sessions USING GIN (metadata);
|
|
973
|
-
CREATE INDEX idx_sessions_tags ON sessions USING GIN (tags);
|
|
970
|
+
CREATE INDEX IF NOT EXISTS idx_sessions_tenant ON sessions (tenant_id);
|
|
971
|
+
CREATE INDEX IF NOT EXISTS idx_sessions_user ON sessions (user_id);
|
|
972
|
+
CREATE INDEX IF NOT EXISTS idx_sessions_graph_edges ON sessions USING GIN (graph_edges);
|
|
973
|
+
CREATE INDEX IF NOT EXISTS idx_sessions_metadata ON sessions USING GIN (metadata);
|
|
974
|
+
CREATE INDEX IF NOT EXISTS idx_sessions_tags ON sessions USING GIN (tags);
|
|
974
975
|
|
|
975
976
|
-- Embeddings for sessions
|
|
976
977
|
CREATE TABLE IF NOT EXISTS embeddings_sessions (
|
|
@@ -988,14 +989,14 @@ CREATE TABLE IF NOT EXISTS embeddings_sessions (
|
|
|
988
989
|
);
|
|
989
990
|
|
|
990
991
|
-- Index for entity lookup (get all embeddings for entity)
|
|
991
|
-
CREATE INDEX idx_embeddings_sessions_entity ON embeddings_sessions (entity_id);
|
|
992
|
+
CREATE INDEX IF NOT EXISTS idx_embeddings_sessions_entity ON embeddings_sessions (entity_id);
|
|
992
993
|
|
|
993
994
|
-- Index for field + provider lookup
|
|
994
|
-
CREATE INDEX idx_embeddings_sessions_field_provider ON embeddings_sessions (field_name, provider);
|
|
995
|
+
CREATE INDEX IF NOT EXISTS idx_embeddings_sessions_field_provider ON embeddings_sessions (field_name, provider);
|
|
995
996
|
|
|
996
997
|
-- HNSW index for vector similarity search (created in background)
|
|
997
998
|
-- Note: This will be created by background thread after data load
|
|
998
|
-
-- CREATE INDEX idx_embeddings_sessions_vector_hnsw ON embeddings_sessions
|
|
999
|
+
-- CREATE INDEX IF NOT EXISTS idx_embeddings_sessions_vector_hnsw ON embeddings_sessions
|
|
999
1000
|
-- USING hnsw (embedding vector_cosine_ops);
|
|
1000
1001
|
|
|
1001
1002
|
-- KV_STORE trigger for sessions
|
|
@@ -1067,11 +1068,11 @@ CREATE TABLE IF NOT EXISTS shared_sessions (
|
|
|
1067
1068
|
tags TEXT[] DEFAULT ARRAY[]::TEXT[]
|
|
1068
1069
|
);
|
|
1069
1070
|
|
|
1070
|
-
CREATE INDEX idx_shared_sessions_tenant ON shared_sessions (tenant_id);
|
|
1071
|
-
CREATE INDEX idx_shared_sessions_user ON shared_sessions (user_id);
|
|
1072
|
-
CREATE INDEX idx_shared_sessions_graph_edges ON shared_sessions USING GIN (graph_edges);
|
|
1073
|
-
CREATE INDEX idx_shared_sessions_metadata ON shared_sessions USING GIN (metadata);
|
|
1074
|
-
CREATE INDEX idx_shared_sessions_tags ON shared_sessions USING GIN (tags);
|
|
1071
|
+
CREATE INDEX IF NOT EXISTS idx_shared_sessions_tenant ON shared_sessions (tenant_id);
|
|
1072
|
+
CREATE INDEX IF NOT EXISTS idx_shared_sessions_user ON shared_sessions (user_id);
|
|
1073
|
+
CREATE INDEX IF NOT EXISTS idx_shared_sessions_graph_edges ON shared_sessions USING GIN (graph_edges);
|
|
1074
|
+
CREATE INDEX IF NOT EXISTS idx_shared_sessions_metadata ON shared_sessions USING GIN (metadata);
|
|
1075
|
+
CREATE INDEX IF NOT EXISTS idx_shared_sessions_tags ON shared_sessions USING GIN (tags);
|
|
1075
1076
|
|
|
1076
1077
|
-- KV_STORE trigger for shared_sessions
|
|
1077
1078
|
-- Trigger function to maintain KV_STORE for shared_sessions
|
|
@@ -1150,11 +1151,11 @@ CREATE TABLE IF NOT EXISTS users (
|
|
|
1150
1151
|
tags TEXT[] DEFAULT ARRAY[]::TEXT[]
|
|
1151
1152
|
);
|
|
1152
1153
|
|
|
1153
|
-
CREATE INDEX idx_users_tenant ON users (tenant_id);
|
|
1154
|
-
CREATE INDEX idx_users_user ON users (user_id);
|
|
1155
|
-
CREATE INDEX idx_users_graph_edges ON users USING GIN (graph_edges);
|
|
1156
|
-
CREATE INDEX idx_users_metadata ON users USING GIN (metadata);
|
|
1157
|
-
CREATE INDEX idx_users_tags ON users USING GIN (tags);
|
|
1154
|
+
CREATE INDEX IF NOT EXISTS idx_users_tenant ON users (tenant_id);
|
|
1155
|
+
CREATE INDEX IF NOT EXISTS idx_users_user ON users (user_id);
|
|
1156
|
+
CREATE INDEX IF NOT EXISTS idx_users_graph_edges ON users USING GIN (graph_edges);
|
|
1157
|
+
CREATE INDEX IF NOT EXISTS idx_users_metadata ON users USING GIN (metadata);
|
|
1158
|
+
CREATE INDEX IF NOT EXISTS idx_users_tags ON users USING GIN (tags);
|
|
1158
1159
|
|
|
1159
1160
|
-- Embeddings for users
|
|
1160
1161
|
CREATE TABLE IF NOT EXISTS embeddings_users (
|
|
@@ -1172,14 +1173,14 @@ CREATE TABLE IF NOT EXISTS embeddings_users (
|
|
|
1172
1173
|
);
|
|
1173
1174
|
|
|
1174
1175
|
-- Index for entity lookup (get all embeddings for entity)
|
|
1175
|
-
CREATE INDEX idx_embeddings_users_entity ON embeddings_users (entity_id);
|
|
1176
|
+
CREATE INDEX IF NOT EXISTS idx_embeddings_users_entity ON embeddings_users (entity_id);
|
|
1176
1177
|
|
|
1177
1178
|
-- Index for field + provider lookup
|
|
1178
|
-
CREATE INDEX idx_embeddings_users_field_provider ON embeddings_users (field_name, provider);
|
|
1179
|
+
CREATE INDEX IF NOT EXISTS idx_embeddings_users_field_provider ON embeddings_users (field_name, provider);
|
|
1179
1180
|
|
|
1180
1181
|
-- HNSW index for vector similarity search (created in background)
|
|
1181
1182
|
-- Note: This will be created by background thread after data load
|
|
1182
|
-
-- CREATE INDEX idx_embeddings_users_vector_hnsw ON embeddings_users
|
|
1183
|
+
-- CREATE INDEX IF NOT EXISTS idx_embeddings_users_vector_hnsw ON embeddings_users
|
|
1183
1184
|
-- USING hnsw (embedding vector_cosine_ops);
|
|
1184
1185
|
|
|
1185
1186
|
-- KV_STORE trigger for users
|
|
@@ -1232,6 +1233,1868 @@ CREATE TRIGGER trg_users_kv_store
|
|
|
1232
1233
|
AFTER INSERT OR UPDATE OR DELETE ON users
|
|
1233
1234
|
FOR EACH ROW EXECUTE FUNCTION fn_users_kv_store_upsert();
|
|
1234
1235
|
|
|
1236
|
+
-- ============================================================================
|
|
1237
|
+
-- SCHEMA TABLE ENTRIES
|
|
1238
|
+
-- Every entity table gets a schemas entry for agent-like access
|
|
1239
|
+
-- ============================================================================
|
|
1240
|
+
|
|
1241
|
+
-- Schema entry for Feedback (feedbacks)
|
|
1242
|
+
INSERT INTO schemas (id, tenant_id, name, content, spec, category, metadata)
|
|
1243
|
+
VALUES (
|
|
1244
|
+
'ae554853-e743-5d73-a2db-1ce20e7089fe'::uuid,
|
|
1245
|
+
'system',
|
|
1246
|
+
'Feedback',
|
|
1247
|
+
'# Feedback
|
|
1248
|
+
|
|
1249
|
+
|
|
1250
|
+
User feedback on a message or session.
|
|
1251
|
+
|
|
1252
|
+
Captures structured feedback including:
|
|
1253
|
+
- Rating (1-5 scale or thumbs up/down)
|
|
1254
|
+
- Categories (predefined or custom)
|
|
1255
|
+
- Free-text comment
|
|
1256
|
+
- Trace reference for OTEL/Phoenix integration
|
|
1257
|
+
|
|
1258
|
+
The feedback can be attached to:
|
|
1259
|
+
- A specific message (message_id set)
|
|
1260
|
+
- An entire session (session_id set, message_id null)
|
|
1261
|
+
|
|
1262
|
+
|
|
1263
|
+
## Overview
|
|
1264
|
+
|
|
1265
|
+
The `Feedback` entity is stored in the `feedbacks` table. Each record is uniquely
|
|
1266
|
+
identified by its `id` field for lookups and graph traversal.
|
|
1267
|
+
|
|
1268
|
+
## Search Capabilities
|
|
1269
|
+
|
|
1270
|
+
This schema includes the `search_rem` tool which supports:
|
|
1271
|
+
- **LOOKUP**: O(1) exact match by id (e.g., `LOOKUP "entity-name"`)
|
|
1272
|
+
- **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
|
|
1273
|
+
- **SEARCH**: Semantic vector search on content (e.g., `SEARCH "concept" FROM feedbacks LIMIT 10`)
|
|
1274
|
+
- **SQL**: Complex queries (e.g., `SELECT * FROM feedbacks WHERE ...`)
|
|
1275
|
+
|
|
1276
|
+
## Table Info
|
|
1277
|
+
|
|
1278
|
+
| Property | Value |
|
|
1279
|
+
|----------|-------|
|
|
1280
|
+
| Table | `feedbacks` |
|
|
1281
|
+
| Entity Key | `id` |
|
|
1282
|
+
| Embedding Fields | None |
|
|
1283
|
+
| Tools | `search_rem` |
|
|
1284
|
+
|
|
1285
|
+
## Fields
|
|
1286
|
+
|
|
1287
|
+
### `id`
|
|
1288
|
+
- **Type**: `typing.Union[uuid.UUID, str, NoneType]`
|
|
1289
|
+
- **Optional**
|
|
1290
|
+
- Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.
|
|
1291
|
+
|
|
1292
|
+
### `created_at`
|
|
1293
|
+
- **Type**: `<class ''datetime.datetime''>`
|
|
1294
|
+
- **Optional**
|
|
1295
|
+
- Entity creation timestamp
|
|
1296
|
+
|
|
1297
|
+
### `updated_at`
|
|
1298
|
+
- **Type**: `<class ''datetime.datetime''>`
|
|
1299
|
+
- **Optional**
|
|
1300
|
+
- Last update timestamp
|
|
1301
|
+
|
|
1302
|
+
### `deleted_at`
|
|
1303
|
+
- **Type**: `typing.Optional[datetime.datetime]`
|
|
1304
|
+
- **Optional**
|
|
1305
|
+
- Soft deletion timestamp
|
|
1306
|
+
|
|
1307
|
+
### `tenant_id`
|
|
1308
|
+
- **Type**: `typing.Optional[str]`
|
|
1309
|
+
- **Optional**
|
|
1310
|
+
- Tenant identifier for multi-tenancy isolation
|
|
1311
|
+
|
|
1312
|
+
### `user_id`
|
|
1313
|
+
- **Type**: `typing.Optional[str]`
|
|
1314
|
+
- **Optional**
|
|
1315
|
+
- Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.
|
|
1316
|
+
|
|
1317
|
+
### `graph_edges`
|
|
1318
|
+
- **Type**: `list[dict]`
|
|
1319
|
+
- **Optional**
|
|
1320
|
+
- Knowledge graph edges stored as InlineEdge dicts
|
|
1321
|
+
|
|
1322
|
+
### `metadata`
|
|
1323
|
+
- **Type**: `<class ''dict''>`
|
|
1324
|
+
- **Optional**
|
|
1325
|
+
- Flexible metadata storage
|
|
1326
|
+
|
|
1327
|
+
### `tags`
|
|
1328
|
+
- **Type**: `list[str]`
|
|
1329
|
+
- **Optional**
|
|
1330
|
+
- Entity tags
|
|
1331
|
+
|
|
1332
|
+
### `session_id`
|
|
1333
|
+
- **Type**: `<class ''str''>`
|
|
1334
|
+
- **Required**
|
|
1335
|
+
- Session ID this feedback relates to
|
|
1336
|
+
|
|
1337
|
+
### `message_id`
|
|
1338
|
+
- **Type**: `str | None`
|
|
1339
|
+
- **Optional**
|
|
1340
|
+
- Specific message ID (null for session-level feedback)
|
|
1341
|
+
|
|
1342
|
+
### `rating`
|
|
1343
|
+
- **Type**: `int | None`
|
|
1344
|
+
- **Optional**
|
|
1345
|
+
- Rating: -1 (thumbs down), 1 (thumbs up), or 1-5 scale
|
|
1346
|
+
|
|
1347
|
+
### `categories`
|
|
1348
|
+
- **Type**: `list[str]`
|
|
1349
|
+
- **Optional**
|
|
1350
|
+
- Selected feedback categories (from FeedbackCategory or custom)
|
|
1351
|
+
|
|
1352
|
+
### `comment`
|
|
1353
|
+
- **Type**: `str | None`
|
|
1354
|
+
- **Optional**
|
|
1355
|
+
- Optional free-text feedback comment
|
|
1356
|
+
|
|
1357
|
+
### `trace_id`
|
|
1358
|
+
- **Type**: `str | None`
|
|
1359
|
+
- **Optional**
|
|
1360
|
+
- OTEL trace ID for linking to observability
|
|
1361
|
+
|
|
1362
|
+
### `span_id`
|
|
1363
|
+
- **Type**: `str | None`
|
|
1364
|
+
- **Optional**
|
|
1365
|
+
- OTEL span ID for specific span feedback
|
|
1366
|
+
|
|
1367
|
+
### `phoenix_synced`
|
|
1368
|
+
- **Type**: `<class ''bool''>`
|
|
1369
|
+
- **Optional**
|
|
1370
|
+
- Whether feedback has been synced to Phoenix as annotation
|
|
1371
|
+
|
|
1372
|
+
### `phoenix_annotation_id`
|
|
1373
|
+
- **Type**: `str | None`
|
|
1374
|
+
- **Optional**
|
|
1375
|
+
- Phoenix annotation ID after sync
|
|
1376
|
+
|
|
1377
|
+
### `annotator_kind`
|
|
1378
|
+
- **Type**: `<class ''str''>`
|
|
1379
|
+
- **Optional**
|
|
1380
|
+
- Annotator type: HUMAN, LLM, CODE
|
|
1381
|
+
|
|
1382
|
+
',
|
|
1383
|
+
'{"type": "object", "description": "\n User feedback on a message or session.\n\n Captures structured feedback including:\n - Rating (1-5 scale or thumbs up/down)\n - Categories (predefined or custom)\n - Free-text comment\n - Trace reference for OTEL/Phoenix integration\n\n The feedback can be attached to:\n - A specific message (message_id set)\n - An entire session (session_id set, message_id null)\n \n\nThis agent can search the `feedbacks` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "session_id": {"description": "Session ID this feedback relates to", "title": "Session Id", "type": "string"}, "message_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Specific message ID (null for session-level feedback)", "title": "Message Id"}, "rating": {"anyOf": [{"maximum": 5, "minimum": -1, "type": "integer"}, {"type": "null"}], "default": null, "description": "Rating: -1 (thumbs down), 1 (thumbs up), or 1-5 scale", "title": "Rating"}, "categories": {"description": "Selected feedback categories (from FeedbackCategory or custom)", "items": {"type": "string"}, "title": "Categories", "type": "array"}, "comment": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Optional free-text feedback comment", "title": "Comment"}, "trace_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "OTEL trace ID for linking to observability", "title": "Trace Id"}, "span_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "OTEL span ID for specific span feedback", "title": "Span Id"}, "phoenix_synced": {"default": false, "description": "Whether feedback has been synced to Phoenix as annotation", "title": "Phoenix Synced", "type": "boolean"}, "phoenix_annotation_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Phoenix annotation ID after sync", "title": "Phoenix Annotation Id"}, "annotator_kind": {"default": "HUMAN", "description": "Annotator type: HUMAN, LLM, CODE", "title": "Annotator Kind", "type": "string"}}, "required": ["session_id"], "json_schema_extra": {"table_name": "feedbacks", "entity_key_field": "id", "embedding_fields": [], "fully_qualified_name": "rem.models.entities.feedback.Feedback", "tools": ["search_rem"], "default_search_table": "feedbacks", "has_embeddings": false}}'::jsonb,
|
|
1384
|
+
'entity',
|
|
1385
|
+
'{"table_name": "feedbacks", "entity_key_field": "id", "embedding_fields": [], "fqn": "rem.models.entities.feedback.Feedback"}'::jsonb
|
|
1386
|
+
)
|
|
1387
|
+
ON CONFLICT (id) DO UPDATE SET
|
|
1388
|
+
name = EXCLUDED.name,
|
|
1389
|
+
content = EXCLUDED.content,
|
|
1390
|
+
spec = EXCLUDED.spec,
|
|
1391
|
+
category = EXCLUDED.category,
|
|
1392
|
+
metadata = EXCLUDED.metadata,
|
|
1393
|
+
updated_at = CURRENT_TIMESTAMP;
|
|
1394
|
+
|
|
1395
|
+
-- Schema entry for File (files)
|
|
1396
|
+
INSERT INTO schemas (id, tenant_id, name, content, spec, category, metadata)
|
|
1397
|
+
VALUES (
|
|
1398
|
+
'c3b3ef33-59d4-57a1-81a3-cc6adc45b194'::uuid,
|
|
1399
|
+
'system',
|
|
1400
|
+
'File',
|
|
1401
|
+
'# File
|
|
1402
|
+
|
|
1403
|
+
|
|
1404
|
+
File metadata and tracking.
|
|
1405
|
+
|
|
1406
|
+
Represents files uploaded to or referenced by the REM system,
|
|
1407
|
+
tracking their metadata and processing status. Tenant isolation
|
|
1408
|
+
is provided via CoreModel.tenant_id field.
|
|
1409
|
+
|
|
1410
|
+
|
|
1411
|
+
## Overview
|
|
1412
|
+
|
|
1413
|
+
The `File` entity is stored in the `files` table. Each record is uniquely
|
|
1414
|
+
identified by its `id` field for lookups and graph traversal.
|
|
1415
|
+
|
|
1416
|
+
## Search Capabilities
|
|
1417
|
+
|
|
1418
|
+
This schema includes the `search_rem` tool which supports:
|
|
1419
|
+
- **LOOKUP**: O(1) exact match by id (e.g., `LOOKUP "entity-name"`)
|
|
1420
|
+
- **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
|
|
1421
|
+
- **SEARCH**: Semantic vector search on content (e.g., `SEARCH "concept" FROM files LIMIT 10`)
|
|
1422
|
+
- **SQL**: Complex queries (e.g., `SELECT * FROM files WHERE ...`)
|
|
1423
|
+
|
|
1424
|
+
## Table Info
|
|
1425
|
+
|
|
1426
|
+
| Property | Value |
|
|
1427
|
+
|----------|-------|
|
|
1428
|
+
| Table | `files` |
|
|
1429
|
+
| Entity Key | `id` |
|
|
1430
|
+
| Embedding Fields | `content` |
|
|
1431
|
+
| Tools | `search_rem` |
|
|
1432
|
+
|
|
1433
|
+
## Fields
|
|
1434
|
+
|
|
1435
|
+
### `id`
|
|
1436
|
+
- **Type**: `typing.Union[uuid.UUID, str, NoneType]`
|
|
1437
|
+
- **Optional**
|
|
1438
|
+
- Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.
|
|
1439
|
+
|
|
1440
|
+
### `created_at`
|
|
1441
|
+
- **Type**: `<class ''datetime.datetime''>`
|
|
1442
|
+
- **Optional**
|
|
1443
|
+
- Entity creation timestamp
|
|
1444
|
+
|
|
1445
|
+
### `updated_at`
|
|
1446
|
+
- **Type**: `<class ''datetime.datetime''>`
|
|
1447
|
+
- **Optional**
|
|
1448
|
+
- Last update timestamp
|
|
1449
|
+
|
|
1450
|
+
### `deleted_at`
|
|
1451
|
+
- **Type**: `typing.Optional[datetime.datetime]`
|
|
1452
|
+
- **Optional**
|
|
1453
|
+
- Soft deletion timestamp
|
|
1454
|
+
|
|
1455
|
+
### `tenant_id`
|
|
1456
|
+
- **Type**: `typing.Optional[str]`
|
|
1457
|
+
- **Optional**
|
|
1458
|
+
- Tenant identifier for multi-tenancy isolation
|
|
1459
|
+
|
|
1460
|
+
### `user_id`
|
|
1461
|
+
- **Type**: `typing.Optional[str]`
|
|
1462
|
+
- **Optional**
|
|
1463
|
+
- Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.
|
|
1464
|
+
|
|
1465
|
+
### `graph_edges`
|
|
1466
|
+
- **Type**: `list[dict]`
|
|
1467
|
+
- **Optional**
|
|
1468
|
+
- Knowledge graph edges stored as InlineEdge dicts
|
|
1469
|
+
|
|
1470
|
+
### `metadata`
|
|
1471
|
+
- **Type**: `<class ''dict''>`
|
|
1472
|
+
- **Optional**
|
|
1473
|
+
- Flexible metadata storage
|
|
1474
|
+
|
|
1475
|
+
### `tags`
|
|
1476
|
+
- **Type**: `list[str]`
|
|
1477
|
+
- **Optional**
|
|
1478
|
+
- Entity tags
|
|
1479
|
+
|
|
1480
|
+
### `name`
|
|
1481
|
+
- **Type**: `<class ''str''>`
|
|
1482
|
+
- **Required**
|
|
1483
|
+
- File name
|
|
1484
|
+
|
|
1485
|
+
### `uri`
|
|
1486
|
+
- **Type**: `<class ''str''>`
|
|
1487
|
+
- **Required**
|
|
1488
|
+
- File storage URI (S3, local path, etc.)
|
|
1489
|
+
|
|
1490
|
+
### `content`
|
|
1491
|
+
- **Type**: `typing.Optional[str]`
|
|
1492
|
+
- **Optional**
|
|
1493
|
+
- Extracted text content (if applicable)
|
|
1494
|
+
|
|
1495
|
+
### `timestamp`
|
|
1496
|
+
- **Type**: `typing.Optional[str]`
|
|
1497
|
+
- **Optional**
|
|
1498
|
+
- File creation/modification timestamp
|
|
1499
|
+
|
|
1500
|
+
### `size_bytes`
|
|
1501
|
+
- **Type**: `typing.Optional[int]`
|
|
1502
|
+
- **Optional**
|
|
1503
|
+
- File size in bytes
|
|
1504
|
+
|
|
1505
|
+
### `mime_type`
|
|
1506
|
+
- **Type**: `typing.Optional[str]`
|
|
1507
|
+
- **Optional**
|
|
1508
|
+
- File MIME type
|
|
1509
|
+
|
|
1510
|
+
### `processing_status`
|
|
1511
|
+
- **Type**: `typing.Optional[str]`
|
|
1512
|
+
- **Optional**
|
|
1513
|
+
- File processing status (pending, processing, completed, failed)
|
|
1514
|
+
|
|
1515
|
+
',
|
|
1516
|
+
'{"type": "object", "description": "\n File metadata and tracking.\n\n Represents files uploaded to or referenced by the REM system,\n tracking their metadata and processing status. Tenant isolation\n is provided via CoreModel.tenant_id field.\n \n\nThis agent can search the `files` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"description": "File name", "title": "Name", "type": "string"}, "uri": {"description": "File storage URI (S3, local path, etc.)", "title": "Uri", "type": "string"}, "content": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Extracted text content (if applicable)", "title": "Content"}, "timestamp": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "File creation/modification timestamp", "title": "Timestamp"}, "size_bytes": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "description": "File size in bytes", "title": "Size Bytes"}, "mime_type": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "File MIME type", "title": "Mime Type"}, "processing_status": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": "pending", "description": "File processing status (pending, processing, completed, failed)", "title": "Processing Status"}}, "required": ["name", "uri"], "json_schema_extra": {"table_name": "files", "entity_key_field": "id", "embedding_fields": ["content"], "fully_qualified_name": "rem.models.entities.file.File", "tools": ["search_rem"], "default_search_table": "files", "has_embeddings": true}}'::jsonb,
|
|
1517
|
+
'entity',
|
|
1518
|
+
'{"table_name": "files", "entity_key_field": "id", "embedding_fields": ["content"], "fqn": "rem.models.entities.file.File"}'::jsonb
|
|
1519
|
+
)
|
|
1520
|
+
ON CONFLICT (id) DO UPDATE SET
|
|
1521
|
+
name = EXCLUDED.name,
|
|
1522
|
+
content = EXCLUDED.content,
|
|
1523
|
+
spec = EXCLUDED.spec,
|
|
1524
|
+
category = EXCLUDED.category,
|
|
1525
|
+
metadata = EXCLUDED.metadata,
|
|
1526
|
+
updated_at = CURRENT_TIMESTAMP;
|
|
1527
|
+
|
|
1528
|
+
-- Schema entry for ImageResource (image_resources)
|
|
1529
|
+
INSERT INTO schemas (id, tenant_id, name, content, spec, category, metadata)
|
|
1530
|
+
VALUES (
|
|
1531
|
+
'ab4bc90c-2cda-55b2-bd4b-e78e19f7d4a7'::uuid,
|
|
1532
|
+
'system',
|
|
1533
|
+
'ImageResource',
|
|
1534
|
+
'# ImageResource
|
|
1535
|
+
|
|
1536
|
+
|
|
1537
|
+
Image-specific resource with CLIP embeddings.
|
|
1538
|
+
|
|
1539
|
+
Stored in separate `image_resources` table with CLIP embeddings
|
|
1540
|
+
instead of text embeddings. This enables:
|
|
1541
|
+
- Multimodal search (text-to-image, image-to-image)
|
|
1542
|
+
- Proper dimensionality (512/768 for CLIP vs 1536 for text)
|
|
1543
|
+
- Cost tracking (CLIP tokens separate from text tokens)
|
|
1544
|
+
|
|
1545
|
+
Embedding Strategy:
|
|
1546
|
+
- Default (when JINA_API_KEY set): Jina CLIP API (jina-clip-v2)
|
|
1547
|
+
- Future: Self-hosted OpenCLIP models via KEDA-scaled pods
|
|
1548
|
+
- Fallback: No embeddings (images searchable by metadata only)
|
|
1549
|
+
|
|
1550
|
+
Vision LLM Strategy (tier/sampling gated):
|
|
1551
|
+
- Gold tier: Always get vision descriptions
|
|
1552
|
+
- Silver/Free: Probabilistic sampling (IMAGE_VLLM_SAMPLE_RATE)
|
|
1553
|
+
- Fallback: Basic metadata only
|
|
1554
|
+
|
|
1555
|
+
Tenant isolation provided via CoreModel.tenant_id field.
|
|
1556
|
+
|
|
1557
|
+
|
|
1558
|
+
## Overview
|
|
1559
|
+
|
|
1560
|
+
The `ImageResource` entity is stored in the `image_resources` table. Each record is uniquely
|
|
1561
|
+
identified by its `name` field for lookups and graph traversal.
|
|
1562
|
+
|
|
1563
|
+
## Search Capabilities
|
|
1564
|
+
|
|
1565
|
+
This schema includes the `search_rem` tool which supports:
|
|
1566
|
+
- **LOOKUP**: O(1) exact match by name (e.g., `LOOKUP "entity-name"`)
|
|
1567
|
+
- **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
|
|
1568
|
+
- **SEARCH**: Semantic vector search on content (e.g., `SEARCH "concept" FROM image_resources LIMIT 10`)
|
|
1569
|
+
- **SQL**: Complex queries (e.g., `SELECT * FROM image_resources WHERE ...`)
|
|
1570
|
+
|
|
1571
|
+
## Table Info
|
|
1572
|
+
|
|
1573
|
+
| Property | Value |
|
|
1574
|
+
|----------|-------|
|
|
1575
|
+
| Table | `image_resources` |
|
|
1576
|
+
| Entity Key | `name` |
|
|
1577
|
+
| Embedding Fields | `content` |
|
|
1578
|
+
| Tools | `search_rem` |
|
|
1579
|
+
|
|
1580
|
+
## Fields
|
|
1581
|
+
|
|
1582
|
+
### `id`
|
|
1583
|
+
- **Type**: `typing.Union[uuid.UUID, str, NoneType]`
|
|
1584
|
+
- **Optional**
|
|
1585
|
+
- Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.
|
|
1586
|
+
|
|
1587
|
+
### `created_at`
|
|
1588
|
+
- **Type**: `<class ''datetime.datetime''>`
|
|
1589
|
+
- **Optional**
|
|
1590
|
+
- Entity creation timestamp
|
|
1591
|
+
|
|
1592
|
+
### `updated_at`
|
|
1593
|
+
- **Type**: `<class ''datetime.datetime''>`
|
|
1594
|
+
- **Optional**
|
|
1595
|
+
- Last update timestamp
|
|
1596
|
+
|
|
1597
|
+
### `deleted_at`
|
|
1598
|
+
- **Type**: `typing.Optional[datetime.datetime]`
|
|
1599
|
+
- **Optional**
|
|
1600
|
+
- Soft deletion timestamp
|
|
1601
|
+
|
|
1602
|
+
### `tenant_id`
|
|
1603
|
+
- **Type**: `typing.Optional[str]`
|
|
1604
|
+
- **Optional**
|
|
1605
|
+
- Tenant identifier for multi-tenancy isolation
|
|
1606
|
+
|
|
1607
|
+
### `user_id`
|
|
1608
|
+
- **Type**: `typing.Optional[str]`
|
|
1609
|
+
- **Optional**
|
|
1610
|
+
- Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.
|
|
1611
|
+
|
|
1612
|
+
### `graph_edges`
|
|
1613
|
+
- **Type**: `list[dict]`
|
|
1614
|
+
- **Optional**
|
|
1615
|
+
- Knowledge graph edges stored as InlineEdge dicts
|
|
1616
|
+
|
|
1617
|
+
### `metadata`
|
|
1618
|
+
- **Type**: `<class ''dict''>`
|
|
1619
|
+
- **Optional**
|
|
1620
|
+
- Flexible metadata storage
|
|
1621
|
+
|
|
1622
|
+
### `tags`
|
|
1623
|
+
- **Type**: `list[str]`
|
|
1624
|
+
- **Optional**
|
|
1625
|
+
- Entity tags
|
|
1626
|
+
|
|
1627
|
+
### `name`
|
|
1628
|
+
- **Type**: `typing.Optional[str]`
|
|
1629
|
+
- **Optional**
|
|
1630
|
+
- Human-readable resource name (used as graph label). Auto-generated from uri+ordinal if not provided.
|
|
1631
|
+
|
|
1632
|
+
### `uri`
|
|
1633
|
+
- **Type**: `typing.Optional[str]`
|
|
1634
|
+
- **Optional**
|
|
1635
|
+
- Content URI or identifier (file path, URL, etc.)
|
|
1636
|
+
|
|
1637
|
+
### `ordinal`
|
|
1638
|
+
- **Type**: `<class ''int''>`
|
|
1639
|
+
- **Optional**
|
|
1640
|
+
- Chunk ordinal for splitting large documents (0 for single-chunk resources)
|
|
1641
|
+
|
|
1642
|
+
### `content`
|
|
1643
|
+
- **Type**: `<class ''str''>`
|
|
1644
|
+
- **Optional**
|
|
1645
|
+
- Resource content text
|
|
1646
|
+
|
|
1647
|
+
### `timestamp`
|
|
1648
|
+
- **Type**: `<class ''datetime.datetime''>`
|
|
1649
|
+
- **Optional**
|
|
1650
|
+
- Resource timestamp (content creation/publication time)
|
|
1651
|
+
|
|
1652
|
+
### `category`
|
|
1653
|
+
- **Type**: `typing.Optional[str]`
|
|
1654
|
+
- **Optional**
|
|
1655
|
+
- Resource category (document, conversation, artifact, etc.)
|
|
1656
|
+
|
|
1657
|
+
### `related_entities`
|
|
1658
|
+
- **Type**: `list[dict]`
|
|
1659
|
+
- **Optional**
|
|
1660
|
+
- Extracted entities (people, projects, concepts) with metadata
|
|
1661
|
+
|
|
1662
|
+
### `image_width`
|
|
1663
|
+
- **Type**: `typing.Optional[int]`
|
|
1664
|
+
- **Optional**
|
|
1665
|
+
- Image width in pixels
|
|
1666
|
+
|
|
1667
|
+
### `image_height`
|
|
1668
|
+
- **Type**: `typing.Optional[int]`
|
|
1669
|
+
- **Optional**
|
|
1670
|
+
- Image height in pixels
|
|
1671
|
+
|
|
1672
|
+
### `image_format`
|
|
1673
|
+
- **Type**: `typing.Optional[str]`
|
|
1674
|
+
- **Optional**
|
|
1675
|
+
- Image format (PNG, JPEG, GIF, WebP)
|
|
1676
|
+
|
|
1677
|
+
### `vision_description`
|
|
1678
|
+
- **Type**: `typing.Optional[str]`
|
|
1679
|
+
- **Optional**
|
|
1680
|
+
- Vision LLM generated description (markdown, only for gold tier or sampled images)
|
|
1681
|
+
|
|
1682
|
+
### `vision_provider`
|
|
1683
|
+
- **Type**: `typing.Optional[str]`
|
|
1684
|
+
- **Optional**
|
|
1685
|
+
- Vision provider used (anthropic, gemini, openai)
|
|
1686
|
+
|
|
1687
|
+
### `vision_model`
|
|
1688
|
+
- **Type**: `typing.Optional[str]`
|
|
1689
|
+
- **Optional**
|
|
1690
|
+
- Vision model used for description
|
|
1691
|
+
|
|
1692
|
+
### `clip_embedding`
|
|
1693
|
+
- **Type**: `typing.Optional[list[float]]`
|
|
1694
|
+
- **Optional**
|
|
1695
|
+
- CLIP embedding vector (512 or 768 dimensions, from Jina AI or self-hosted)
|
|
1696
|
+
|
|
1697
|
+
### `clip_dimensions`
|
|
1698
|
+
- **Type**: `typing.Optional[int]`
|
|
1699
|
+
- **Optional**
|
|
1700
|
+
- CLIP embedding dimensionality (512 for jina-clip-v2, 768 for jina-clip-v1)
|
|
1701
|
+
|
|
1702
|
+
',
|
|
1703
|
+
'{"type": "object", "description": "\n Image-specific resource with CLIP embeddings.\n\n Stored in separate `image_resources` table with CLIP embeddings\n instead of text embeddings. This enables:\n - Multimodal search (text-to-image, image-to-image)\n - Proper dimensionality (512/768 for CLIP vs 1536 for text)\n - Cost tracking (CLIP tokens separate from text tokens)\n\n Embedding Strategy:\n - Default (when JINA_API_KEY set): Jina CLIP API (jina-clip-v2)\n - Future: Self-hosted OpenCLIP models via KEDA-scaled pods\n - Fallback: No embeddings (images searchable by metadata only)\n\n Vision LLM Strategy (tier/sampling gated):\n - Gold tier: Always get vision descriptions\n - Silver/Free: Probabilistic sampling (IMAGE_VLLM_SAMPLE_RATE)\n - Fallback: Basic metadata only\n\n Tenant isolation provided via CoreModel.tenant_id field.\n \n\nThis agent can search the `image_resources` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Human-readable resource name (used as graph label). Auto-generated from uri+ordinal if not provided.", "entity_key": true, "title": "Name"}, "uri": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Content URI or identifier (file path, URL, etc.)", "title": "Uri"}, "ordinal": {"composite_key": true, "default": 0, "description": "Chunk ordinal for splitting large documents (0 for single-chunk resources)", "title": "Ordinal", "type": "integer"}, "content": {"default": "", "description": "Resource content text", "title": "Content", "type": "string"}, "timestamp": {"description": "Resource timestamp (content creation/publication time)", "format": "date-time", "title": "Timestamp", "type": "string"}, "category": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Resource category (document, conversation, artifact, etc.)", "title": "Category"}, "related_entities": {"description": "Extracted entities (people, projects, concepts) with metadata", "items": {"additionalProperties": true, "type": "object"}, "title": "Related Entities", "type": "array"}, "image_width": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "description": "Image width in pixels", "title": "Image Width"}, "image_height": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "description": "Image height in pixels", "title": "Image Height"}, "image_format": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Image format (PNG, JPEG, GIF, WebP)", "title": "Image Format"}, "vision_description": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Vision LLM generated description (markdown, only for gold tier or sampled images)", "title": "Vision Description"}, "vision_provider": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Vision provider used (anthropic, gemini, openai)", "title": "Vision Provider"}, "vision_model": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Vision model used for description", "title": "Vision Model"}, "clip_embedding": {"anyOf": [{"items": {"type": "number"}, "type": "array"}, {"type": "null"}], "default": null, "description": "CLIP embedding vector (512 or 768 dimensions, from Jina AI or self-hosted)", "title": "Clip Embedding"}, "clip_dimensions": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "description": "CLIP embedding dimensionality (512 for jina-clip-v2, 768 for jina-clip-v1)", "title": "Clip Dimensions"}}, "required": [], "json_schema_extra": {"table_name": "image_resources", "entity_key_field": "name", "embedding_fields": ["content"], "fully_qualified_name": "rem.models.entities.image_resource.ImageResource", "tools": ["search_rem"], "default_search_table": "image_resources", "has_embeddings": true}}'::jsonb,
|
|
1704
|
+
'entity',
|
|
1705
|
+
'{"table_name": "image_resources", "entity_key_field": "name", "embedding_fields": ["content"], "fqn": "rem.models.entities.image_resource.ImageResource"}'::jsonb
|
|
1706
|
+
)
|
|
1707
|
+
ON CONFLICT (id) DO UPDATE SET
|
|
1708
|
+
name = EXCLUDED.name,
|
|
1709
|
+
content = EXCLUDED.content,
|
|
1710
|
+
spec = EXCLUDED.spec,
|
|
1711
|
+
category = EXCLUDED.category,
|
|
1712
|
+
metadata = EXCLUDED.metadata,
|
|
1713
|
+
updated_at = CURRENT_TIMESTAMP;
|
|
1714
|
+
|
|
1715
|
+
-- Schema entry for Message (messages)
|
|
1716
|
+
INSERT INTO schemas (id, tenant_id, name, content, spec, category, metadata)
|
|
1717
|
+
VALUES (
|
|
1718
|
+
'be36f9da-6df4-51ba-bb41-bf51246ecec1'::uuid,
|
|
1719
|
+
'system',
|
|
1720
|
+
'Message',
|
|
1721
|
+
'# Message
|
|
1722
|
+
|
|
1723
|
+
|
|
1724
|
+
Communication content unit.
|
|
1725
|
+
|
|
1726
|
+
Represents individual messages in conversations, chats, or other
|
|
1727
|
+
communication contexts. Tenant isolation is provided via CoreModel.tenant_id field.
|
|
1728
|
+
|
|
1729
|
+
Trace fields (trace_id, span_id) enable integration with OTEL/Phoenix
|
|
1730
|
+
for observability and feedback annotation.
|
|
1731
|
+
|
|
1732
|
+
|
|
1733
|
+
## Overview
|
|
1734
|
+
|
|
1735
|
+
The `Message` entity is stored in the `messages` table. Each record is uniquely
|
|
1736
|
+
identified by its `id` field for lookups and graph traversal.
|
|
1737
|
+
|
|
1738
|
+
## Search Capabilities
|
|
1739
|
+
|
|
1740
|
+
This schema includes the `search_rem` tool which supports:
|
|
1741
|
+
- **LOOKUP**: O(1) exact match by id (e.g., `LOOKUP "entity-name"`)
|
|
1742
|
+
- **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
|
|
1743
|
+
- **SEARCH**: Semantic vector search on content (e.g., `SEARCH "concept" FROM messages LIMIT 10`)
|
|
1744
|
+
- **SQL**: Complex queries (e.g., `SELECT * FROM messages WHERE ...`)
|
|
1745
|
+
|
|
1746
|
+
## Table Info
|
|
1747
|
+
|
|
1748
|
+
| Property | Value |
|
|
1749
|
+
|----------|-------|
|
|
1750
|
+
| Table | `messages` |
|
|
1751
|
+
| Entity Key | `id` |
|
|
1752
|
+
| Embedding Fields | `content` |
|
|
1753
|
+
| Tools | `search_rem` |
|
|
1754
|
+
|
|
1755
|
+
## Fields
|
|
1756
|
+
|
|
1757
|
+
### `id`
|
|
1758
|
+
- **Type**: `typing.Union[uuid.UUID, str, NoneType]`
|
|
1759
|
+
- **Optional**
|
|
1760
|
+
- Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.
|
|
1761
|
+
|
|
1762
|
+
### `created_at`
|
|
1763
|
+
- **Type**: `<class ''datetime.datetime''>`
|
|
1764
|
+
- **Optional**
|
|
1765
|
+
- Entity creation timestamp
|
|
1766
|
+
|
|
1767
|
+
### `updated_at`
|
|
1768
|
+
- **Type**: `<class ''datetime.datetime''>`
|
|
1769
|
+
- **Optional**
|
|
1770
|
+
- Last update timestamp
|
|
1771
|
+
|
|
1772
|
+
### `deleted_at`
|
|
1773
|
+
- **Type**: `typing.Optional[datetime.datetime]`
|
|
1774
|
+
- **Optional**
|
|
1775
|
+
- Soft deletion timestamp
|
|
1776
|
+
|
|
1777
|
+
### `tenant_id`
|
|
1778
|
+
- **Type**: `typing.Optional[str]`
|
|
1779
|
+
- **Optional**
|
|
1780
|
+
- Tenant identifier for multi-tenancy isolation
|
|
1781
|
+
|
|
1782
|
+
### `user_id`
|
|
1783
|
+
- **Type**: `typing.Optional[str]`
|
|
1784
|
+
- **Optional**
|
|
1785
|
+
- Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.
|
|
1786
|
+
|
|
1787
|
+
### `graph_edges`
|
|
1788
|
+
- **Type**: `list[dict]`
|
|
1789
|
+
- **Optional**
|
|
1790
|
+
- Knowledge graph edges stored as InlineEdge dicts
|
|
1791
|
+
|
|
1792
|
+
### `metadata`
|
|
1793
|
+
- **Type**: `<class ''dict''>`
|
|
1794
|
+
- **Optional**
|
|
1795
|
+
- Flexible metadata storage
|
|
1796
|
+
|
|
1797
|
+
### `tags`
|
|
1798
|
+
- **Type**: `list[str]`
|
|
1799
|
+
- **Optional**
|
|
1800
|
+
- Entity tags
|
|
1801
|
+
|
|
1802
|
+
### `content`
|
|
1803
|
+
- **Type**: `<class ''str''>`
|
|
1804
|
+
- **Required**
|
|
1805
|
+
- Message content text
|
|
1806
|
+
|
|
1807
|
+
### `message_type`
|
|
1808
|
+
- **Type**: `str | None`
|
|
1809
|
+
- **Optional**
|
|
1810
|
+
- Message type e.g. role: ''user'', ''assistant'', ''system'', ''tool''
|
|
1811
|
+
|
|
1812
|
+
### `session_id`
|
|
1813
|
+
- **Type**: `str | None`
|
|
1814
|
+
- **Optional**
|
|
1815
|
+
- Session identifier for tracking message context
|
|
1816
|
+
|
|
1817
|
+
### `prompt`
|
|
1818
|
+
- **Type**: `str | None`
|
|
1819
|
+
- **Optional**
|
|
1820
|
+
- Custom prompt used for this message (if overridden from default)
|
|
1821
|
+
|
|
1822
|
+
### `model`
|
|
1823
|
+
- **Type**: `str | None`
|
|
1824
|
+
- **Optional**
|
|
1825
|
+
- Model used for generating this message (provider:model format)
|
|
1826
|
+
|
|
1827
|
+
### `token_count`
|
|
1828
|
+
- **Type**: `int | None`
|
|
1829
|
+
- **Optional**
|
|
1830
|
+
- Token count for this message
|
|
1831
|
+
|
|
1832
|
+
### `trace_id`
|
|
1833
|
+
- **Type**: `str | None`
|
|
1834
|
+
- **Optional**
|
|
1835
|
+
- OTEL trace ID for observability integration
|
|
1836
|
+
|
|
1837
|
+
### `span_id`
|
|
1838
|
+
- **Type**: `str | None`
|
|
1839
|
+
- **Optional**
|
|
1840
|
+
- OTEL span ID for specific span reference
|
|
1841
|
+
|
|
1842
|
+
',
|
|
1843
|
+
'{"type": "object", "description": "\n Communication content unit.\n\n Represents individual messages in conversations, chats, or other\n communication contexts. Tenant isolation is provided via CoreModel.tenant_id field.\n\n Trace fields (trace_id, span_id) enable integration with OTEL/Phoenix\n for observability and feedback annotation.\n \n\nThis agent can search the `messages` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "content": {"description": "Message content text", "title": "Content", "type": "string"}, "message_type": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Message type e.g. role: ''user'', ''assistant'', ''system'', ''tool''", "title": "Message Type"}, "session_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Session identifier for tracking message context", "title": "Session Id"}, "prompt": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Custom prompt used for this message (if overridden from default)", "title": "Prompt"}, "model": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Model used for generating this message (provider:model format)", "title": "Model"}, "token_count": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "description": "Token count for this message", "title": "Token Count"}, "trace_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "OTEL trace ID for observability integration", "title": "Trace Id"}, "span_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "OTEL span ID for specific span reference", "title": "Span Id"}}, "required": ["content"], "json_schema_extra": {"table_name": "messages", "entity_key_field": "id", "embedding_fields": ["content"], "fully_qualified_name": "rem.models.entities.message.Message", "tools": ["search_rem"], "default_search_table": "messages", "has_embeddings": true}}'::jsonb,
|
|
1844
|
+
'entity',
|
|
1845
|
+
'{"table_name": "messages", "entity_key_field": "id", "embedding_fields": ["content"], "fqn": "rem.models.entities.message.Message"}'::jsonb
|
|
1846
|
+
)
|
|
1847
|
+
ON CONFLICT (id) DO UPDATE SET
|
|
1848
|
+
name = EXCLUDED.name,
|
|
1849
|
+
content = EXCLUDED.content,
|
|
1850
|
+
spec = EXCLUDED.spec,
|
|
1851
|
+
category = EXCLUDED.category,
|
|
1852
|
+
metadata = EXCLUDED.metadata,
|
|
1853
|
+
updated_at = CURRENT_TIMESTAMP;
|
|
1854
|
+
|
|
1855
|
+
-- Schema entry for Moment (moments)
|
|
1856
|
+
INSERT INTO schemas (id, tenant_id, name, content, spec, category, metadata)
|
|
1857
|
+
VALUES (
|
|
1858
|
+
'a08f0a8c-5bab-5bf5-9760-0e67bc69bd74'::uuid,
|
|
1859
|
+
'system',
|
|
1860
|
+
'Moment',
|
|
1861
|
+
'# Moment
|
|
1862
|
+
|
|
1863
|
+
|
|
1864
|
+
Temporal narrative extracted from resources.
|
|
1865
|
+
|
|
1866
|
+
Moments provide temporal structure and context for the REM graph,
|
|
1867
|
+
enabling time-based queries and understanding of when events occurred.
|
|
1868
|
+
Tenant isolation is provided via CoreModel.tenant_id field.
|
|
1869
|
+
|
|
1870
|
+
|
|
1871
|
+
## Overview
|
|
1872
|
+
|
|
1873
|
+
The `Moment` entity is stored in the `moments` table. Each record is uniquely
|
|
1874
|
+
identified by its `name` field for lookups and graph traversal.
|
|
1875
|
+
|
|
1876
|
+
## Search Capabilities
|
|
1877
|
+
|
|
1878
|
+
This schema includes the `search_rem` tool which supports:
|
|
1879
|
+
- **LOOKUP**: O(1) exact match by name (e.g., `LOOKUP "entity-name"`)
|
|
1880
|
+
- **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
|
|
1881
|
+
- **SEARCH**: Semantic vector search on summary (e.g., `SEARCH "concept" FROM moments LIMIT 10`)
|
|
1882
|
+
- **SQL**: Complex queries (e.g., `SELECT * FROM moments WHERE ...`)
|
|
1883
|
+
|
|
1884
|
+
## Table Info
|
|
1885
|
+
|
|
1886
|
+
| Property | Value |
|
|
1887
|
+
|----------|-------|
|
|
1888
|
+
| Table | `moments` |
|
|
1889
|
+
| Entity Key | `name` |
|
|
1890
|
+
| Embedding Fields | `summary` |
|
|
1891
|
+
| Tools | `search_rem` |
|
|
1892
|
+
|
|
1893
|
+
## Fields
|
|
1894
|
+
|
|
1895
|
+
### `id`
|
|
1896
|
+
- **Type**: `typing.Union[uuid.UUID, str, NoneType]`
|
|
1897
|
+
- **Optional**
|
|
1898
|
+
- Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.
|
|
1899
|
+
|
|
1900
|
+
### `created_at`
|
|
1901
|
+
- **Type**: `<class ''datetime.datetime''>`
|
|
1902
|
+
- **Optional**
|
|
1903
|
+
- Entity creation timestamp
|
|
1904
|
+
|
|
1905
|
+
### `updated_at`
|
|
1906
|
+
- **Type**: `<class ''datetime.datetime''>`
|
|
1907
|
+
- **Optional**
|
|
1908
|
+
- Last update timestamp
|
|
1909
|
+
|
|
1910
|
+
### `deleted_at`
|
|
1911
|
+
- **Type**: `typing.Optional[datetime.datetime]`
|
|
1912
|
+
- **Optional**
|
|
1913
|
+
- Soft deletion timestamp
|
|
1914
|
+
|
|
1915
|
+
### `tenant_id`
|
|
1916
|
+
- **Type**: `typing.Optional[str]`
|
|
1917
|
+
- **Optional**
|
|
1918
|
+
- Tenant identifier for multi-tenancy isolation
|
|
1919
|
+
|
|
1920
|
+
### `user_id`
|
|
1921
|
+
- **Type**: `typing.Optional[str]`
|
|
1922
|
+
- **Optional**
|
|
1923
|
+
- Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.
|
|
1924
|
+
|
|
1925
|
+
### `graph_edges`
|
|
1926
|
+
- **Type**: `list[dict]`
|
|
1927
|
+
- **Optional**
|
|
1928
|
+
- Knowledge graph edges stored as InlineEdge dicts
|
|
1929
|
+
|
|
1930
|
+
### `metadata`
|
|
1931
|
+
- **Type**: `<class ''dict''>`
|
|
1932
|
+
- **Optional**
|
|
1933
|
+
- Flexible metadata storage
|
|
1934
|
+
|
|
1935
|
+
### `tags`
|
|
1936
|
+
- **Type**: `list[str]`
|
|
1937
|
+
- **Optional**
|
|
1938
|
+
- Entity tags
|
|
1939
|
+
|
|
1940
|
+
### `name`
|
|
1941
|
+
- **Type**: `typing.Optional[str]`
|
|
1942
|
+
- **Optional**
|
|
1943
|
+
- Human-readable moment name (used as graph label). Auto-generated from starts_timestamp+moment_type if not provided.
|
|
1944
|
+
|
|
1945
|
+
### `moment_type`
|
|
1946
|
+
- **Type**: `typing.Optional[str]`
|
|
1947
|
+
- **Optional**
|
|
1948
|
+
- Moment classification (meeting, coding-session, conversation, etc.)
|
|
1949
|
+
|
|
1950
|
+
### `category`
|
|
1951
|
+
- **Type**: `typing.Optional[str]`
|
|
1952
|
+
- **Optional**
|
|
1953
|
+
- Moment category for grouping and filtering
|
|
1954
|
+
|
|
1955
|
+
### `starts_timestamp`
|
|
1956
|
+
- **Type**: `<class ''datetime.datetime''>`
|
|
1957
|
+
- **Required**
|
|
1958
|
+
- Moment start time
|
|
1959
|
+
|
|
1960
|
+
### `ends_timestamp`
|
|
1961
|
+
- **Type**: `typing.Optional[datetime.datetime]`
|
|
1962
|
+
- **Optional**
|
|
1963
|
+
- Moment end time
|
|
1964
|
+
|
|
1965
|
+
### `present_persons`
|
|
1966
|
+
- **Type**: `list[rem.models.entities.moment.Person]`
|
|
1967
|
+
- **Optional**
|
|
1968
|
+
- People present in the moment
|
|
1969
|
+
|
|
1970
|
+
### `emotion_tags`
|
|
1971
|
+
- **Type**: `list[str]`
|
|
1972
|
+
- **Optional**
|
|
1973
|
+
- Emotion/sentiment tags (happy, frustrated, focused, etc.)
|
|
1974
|
+
|
|
1975
|
+
### `topic_tags`
|
|
1976
|
+
- **Type**: `list[str]`
|
|
1977
|
+
- **Optional**
|
|
1978
|
+
- Topic/concept tags (project names, technologies, etc.)
|
|
1979
|
+
|
|
1980
|
+
### `summary`
|
|
1981
|
+
- **Type**: `typing.Optional[str]`
|
|
1982
|
+
- **Optional**
|
|
1983
|
+
- Natural language summary of the moment
|
|
1984
|
+
|
|
1985
|
+
### `source_resource_ids`
|
|
1986
|
+
- **Type**: `list[str]`
|
|
1987
|
+
- **Optional**
|
|
1988
|
+
- Resource IDs used to construct this moment
|
|
1989
|
+
|
|
1990
|
+
',
|
|
1991
|
+
'{"type": "object", "description": "\n Temporal narrative extracted from resources.\n\n Moments provide temporal structure and context for the REM graph,\n enabling time-based queries and understanding of when events occurred.\n Tenant isolation is provided via CoreModel.tenant_id field.\n \n\nThis agent can search the `moments` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Human-readable moment name (used as graph label). Auto-generated from starts_timestamp+moment_type if not provided.", "entity_key": true, "title": "Name"}, "moment_type": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Moment classification (meeting, coding-session, conversation, etc.)", "title": "Moment Type"}, "category": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Moment category for grouping and filtering", "title": "Category"}, "starts_timestamp": {"description": "Moment start time", "format": "date-time", "title": "Starts Timestamp", "type": "string"}, "ends_timestamp": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Moment end time", "title": "Ends Timestamp"}, "present_persons": {"description": "People present in the moment", "items": {"$ref": "#/$defs/Person"}, "title": "Present Persons", "type": "array"}, "emotion_tags": {"description": "Emotion/sentiment tags (happy, frustrated, focused, etc.)", "items": {"type": "string"}, "title": "Emotion Tags", "type": "array"}, "topic_tags": {"description": "Topic/concept tags (project names, technologies, etc.)", "items": {"type": "string"}, "title": "Topic Tags", "type": "array"}, "summary": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Natural language summary of the moment", "title": "Summary"}, "source_resource_ids": {"description": "Resource IDs used to construct this moment", "items": {"type": "string"}, "title": "Source Resource Ids", "type": "array"}}, "required": ["starts_timestamp"], "json_schema_extra": {"table_name": "moments", "entity_key_field": "name", "embedding_fields": ["summary"], "fully_qualified_name": "rem.models.entities.moment.Moment", "tools": ["search_rem"], "default_search_table": "moments", "has_embeddings": true}}'::jsonb,
|
|
1992
|
+
'entity',
|
|
1993
|
+
'{"table_name": "moments", "entity_key_field": "name", "embedding_fields": ["summary"], "fqn": "rem.models.entities.moment.Moment"}'::jsonb
|
|
1994
|
+
)
|
|
1995
|
+
ON CONFLICT (id) DO UPDATE SET
|
|
1996
|
+
name = EXCLUDED.name,
|
|
1997
|
+
content = EXCLUDED.content,
|
|
1998
|
+
spec = EXCLUDED.spec,
|
|
1999
|
+
category = EXCLUDED.category,
|
|
2000
|
+
metadata = EXCLUDED.metadata,
|
|
2001
|
+
updated_at = CURRENT_TIMESTAMP;
|
|
2002
|
+
|
|
2003
|
+
-- Schema entry for Ontology (ontologies)
|
|
2004
|
+
INSERT INTO schemas (id, tenant_id, name, content, spec, category, metadata)
|
|
2005
|
+
VALUES (
|
|
2006
|
+
'a702ed74-8988-534a-9917-2977349777c1'::uuid,
|
|
2007
|
+
'system',
|
|
2008
|
+
'Ontology',
|
|
2009
|
+
'# Ontology
|
|
2010
|
+
|
|
2011
|
+
Domain-specific knowledge extracted from files using custom agents.
|
|
2012
|
+
|
|
2013
|
+
Attributes:
|
|
2014
|
+
name: Human-readable label for this ontology instance
|
|
2015
|
+
file_id: Foreign key to File entity that was processed
|
|
2016
|
+
agent_schema_id: Foreign key to Schema entity that performed extraction
|
|
2017
|
+
provider_name: LLM provider used for extraction (e.g., "anthropic", "openai")
|
|
2018
|
+
model_name: Specific model used (e.g., "claude-sonnet-4-5")
|
|
2019
|
+
extracted_data: Structured data extracted by agent (arbitrary JSON)
|
|
2020
|
+
confidence_score: Optional confidence score from extraction (0.0-1.0)
|
|
2021
|
+
extraction_timestamp: When extraction was performed
|
|
2022
|
+
embedding_text: Text used for generating embedding (derived from extracted_data)
|
|
2023
|
+
|
|
2024
|
+
Inherited from CoreModel:
|
|
2025
|
+
id: UUID or string identifier
|
|
2026
|
+
created_at: Entity creation timestamp
|
|
2027
|
+
updated_at: Last update timestamp
|
|
2028
|
+
deleted_at: Soft deletion timestamp
|
|
2029
|
+
tenant_id: Multi-tenancy isolation
|
|
2030
|
+
user_id: Ownership
|
|
2031
|
+
graph_edges: Relationships to other entities
|
|
2032
|
+
metadata: Flexible metadata storage
|
|
2033
|
+
tags: Classification tags
|
|
2034
|
+
column: Database schema metadata
|
|
2035
|
+
|
|
2036
|
+
Example Usage:
|
|
2037
|
+
# CV extraction
|
|
2038
|
+
cv_ontology = Ontology(
|
|
2039
|
+
name="john-doe-cv-2024",
|
|
2040
|
+
file_id="file-uuid-123",
|
|
2041
|
+
agent_schema_id="cv-parser-v1",
|
|
2042
|
+
provider_name="anthropic",
|
|
2043
|
+
model_name="claude-sonnet-4-5-20250929",
|
|
2044
|
+
extracted_data={
|
|
2045
|
+
"candidate_name": "John Doe",
|
|
2046
|
+
"email": "john@example.com",
|
|
2047
|
+
"skills": ["Python", "PostgreSQL", "Kubernetes"],
|
|
2048
|
+
"experience": [
|
|
2049
|
+
{
|
|
2050
|
+
"company": "TechCorp",
|
|
2051
|
+
"role": "Senior Engineer",
|
|
2052
|
+
"years": 3,
|
|
2053
|
+
"achievements": ["Led migration to k8s", "Reduced costs 40%"]
|
|
2054
|
+
}
|
|
2055
|
+
],
|
|
2056
|
+
"education": [
|
|
2057
|
+
{"degree": "BS Computer Science", "institution": "MIT", "year": 2018}
|
|
2058
|
+
]
|
|
2059
|
+
},
|
|
2060
|
+
confidence_score=0.95,
|
|
2061
|
+
tags=["cv", "engineering", "senior-level"]
|
|
2062
|
+
)
|
|
2063
|
+
|
|
2064
|
+
# Contract extraction
|
|
2065
|
+
contract_ontology = Ontology(
|
|
2066
|
+
name="acme-supplier-agreement-2024",
|
|
2067
|
+
file_id="file-uuid-456",
|
|
2068
|
+
agent_schema_id="contract-parser-v2",
|
|
2069
|
+
provider_name="openai",
|
|
2070
|
+
model_name="gpt-4.1",
|
|
2071
|
+
extracted_data={
|
|
2072
|
+
"contract_type": "supplier_agreement",
|
|
2073
|
+
"parties": [
|
|
2074
|
+
{"name": "ACME Corp", "role": "buyer"},
|
|
2075
|
+
{"name": "SupplyChain Inc", "role": "supplier"}
|
|
2076
|
+
],
|
|
2077
|
+
"effective_date": "2024-01-01",
|
|
2078
|
+
"termination_date": "2026-12-31",
|
|
2079
|
+
"payment_terms": {
|
|
2080
|
+
"amount": 500000,
|
|
2081
|
+
"currency": "USD",
|
|
2082
|
+
"frequency": "quarterly"
|
|
2083
|
+
},
|
|
2084
|
+
"key_obligations": [
|
|
2085
|
+
"Supplier must deliver within 30 days",
|
|
2086
|
+
"Buyer must pay within 60 days of invoice"
|
|
2087
|
+
]
|
|
2088
|
+
},
|
|
2089
|
+
confidence_score=0.92,
|
|
2090
|
+
tags=["contract", "supplier", "procurement"]
|
|
2091
|
+
)
|
|
2092
|
+
|
|
2093
|
+
|
|
2094
|
+
## Overview
|
|
2095
|
+
|
|
2096
|
+
The `Ontology` entity is stored in the `ontologies` table. Each record is uniquely
|
|
2097
|
+
identified by its `id` field for lookups and graph traversal.
|
|
2098
|
+
|
|
2099
|
+
## Search Capabilities
|
|
2100
|
+
|
|
2101
|
+
This schema includes the `search_rem` tool which supports:
|
|
2102
|
+
- **LOOKUP**: O(1) exact match by id (e.g., `LOOKUP "entity-name"`)
|
|
2103
|
+
- **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
|
|
2104
|
+
- **SEARCH**: Semantic vector search on content (e.g., `SEARCH "concept" FROM ontologies LIMIT 10`)
|
|
2105
|
+
- **SQL**: Complex queries (e.g., `SELECT * FROM ontologies WHERE ...`)
|
|
2106
|
+
|
|
2107
|
+
## Table Info
|
|
2108
|
+
|
|
2109
|
+
| Property | Value |
|
|
2110
|
+
|----------|-------|
|
|
2111
|
+
| Table | `ontologies` |
|
|
2112
|
+
| Entity Key | `id` |
|
|
2113
|
+
| Embedding Fields | None |
|
|
2114
|
+
| Tools | `search_rem` |
|
|
2115
|
+
|
|
2116
|
+
## Fields
|
|
2117
|
+
|
|
2118
|
+
### `id`
|
|
2119
|
+
- **Type**: `typing.Union[uuid.UUID, str, NoneType]`
|
|
2120
|
+
- **Optional**
|
|
2121
|
+
- Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.
|
|
2122
|
+
|
|
2123
|
+
### `created_at`
|
|
2124
|
+
- **Type**: `<class ''datetime.datetime''>`
|
|
2125
|
+
- **Optional**
|
|
2126
|
+
- Entity creation timestamp
|
|
2127
|
+
|
|
2128
|
+
### `updated_at`
|
|
2129
|
+
- **Type**: `<class ''datetime.datetime''>`
|
|
2130
|
+
- **Optional**
|
|
2131
|
+
- Last update timestamp
|
|
2132
|
+
|
|
2133
|
+
### `deleted_at`
|
|
2134
|
+
- **Type**: `typing.Optional[datetime.datetime]`
|
|
2135
|
+
- **Optional**
|
|
2136
|
+
- Soft deletion timestamp
|
|
2137
|
+
|
|
2138
|
+
### `tenant_id`
|
|
2139
|
+
- **Type**: `typing.Optional[str]`
|
|
2140
|
+
- **Optional**
|
|
2141
|
+
- Tenant identifier for multi-tenancy isolation
|
|
2142
|
+
|
|
2143
|
+
### `user_id`
|
|
2144
|
+
- **Type**: `typing.Optional[str]`
|
|
2145
|
+
- **Optional**
|
|
2146
|
+
- Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.
|
|
2147
|
+
|
|
2148
|
+
### `graph_edges`
|
|
2149
|
+
- **Type**: `list[dict]`
|
|
2150
|
+
- **Optional**
|
|
2151
|
+
- Knowledge graph edges stored as InlineEdge dicts
|
|
2152
|
+
|
|
2153
|
+
### `metadata`
|
|
2154
|
+
- **Type**: `<class ''dict''>`
|
|
2155
|
+
- **Optional**
|
|
2156
|
+
- Flexible metadata storage
|
|
2157
|
+
|
|
2158
|
+
### `tags`
|
|
2159
|
+
- **Type**: `list[str]`
|
|
2160
|
+
- **Optional**
|
|
2161
|
+
- Entity tags
|
|
2162
|
+
|
|
2163
|
+
### `name`
|
|
2164
|
+
- **Type**: `<class ''str''>`
|
|
2165
|
+
- **Required**
|
|
2166
|
+
|
|
2167
|
+
### `file_id`
|
|
2168
|
+
- **Type**: `uuid.UUID | str`
|
|
2169
|
+
- **Required**
|
|
2170
|
+
|
|
2171
|
+
### `agent_schema_id`
|
|
2172
|
+
- **Type**: `<class ''str''>`
|
|
2173
|
+
- **Required**
|
|
2174
|
+
|
|
2175
|
+
### `provider_name`
|
|
2176
|
+
- **Type**: `<class ''str''>`
|
|
2177
|
+
- **Required**
|
|
2178
|
+
|
|
2179
|
+
### `model_name`
|
|
2180
|
+
- **Type**: `<class ''str''>`
|
|
2181
|
+
- **Required**
|
|
2182
|
+
|
|
2183
|
+
### `extracted_data`
|
|
2184
|
+
- **Type**: `dict[str, typing.Any]`
|
|
2185
|
+
- **Required**
|
|
2186
|
+
|
|
2187
|
+
### `confidence_score`
|
|
2188
|
+
- **Type**: `typing.Optional[float]`
|
|
2189
|
+
- **Optional**
|
|
2190
|
+
|
|
2191
|
+
### `extraction_timestamp`
|
|
2192
|
+
- **Type**: `typing.Optional[str]`
|
|
2193
|
+
- **Optional**
|
|
2194
|
+
|
|
2195
|
+
### `embedding_text`
|
|
2196
|
+
- **Type**: `typing.Optional[str]`
|
|
2197
|
+
- **Optional**
|
|
2198
|
+
|
|
2199
|
+
',
|
|
2200
|
+
'{"type": "object", "description": "Domain-specific knowledge extracted from files using custom agents.\n\n Attributes:\n name: Human-readable label for this ontology instance\n file_id: Foreign key to File entity that was processed\n agent_schema_id: Foreign key to Schema entity that performed extraction\n provider_name: LLM provider used for extraction (e.g., \"anthropic\", \"openai\")\n model_name: Specific model used (e.g., \"claude-sonnet-4-5\")\n extracted_data: Structured data extracted by agent (arbitrary JSON)\n confidence_score: Optional confidence score from extraction (0.0-1.0)\n extraction_timestamp: When extraction was performed\n embedding_text: Text used for generating embedding (derived from extracted_data)\n\n Inherited from CoreModel:\n id: UUID or string identifier\n created_at: Entity creation timestamp\n updated_at: Last update timestamp\n deleted_at: Soft deletion timestamp\n tenant_id: Multi-tenancy isolation\n user_id: Ownership\n graph_edges: Relationships to other entities\n metadata: Flexible metadata storage\n tags: Classification tags\n column: Database schema metadata\n\n Example Usage:\n # CV extraction\n cv_ontology = Ontology(\n name=\"john-doe-cv-2024\",\n file_id=\"file-uuid-123\",\n agent_schema_id=\"cv-parser-v1\",\n provider_name=\"anthropic\",\n model_name=\"claude-sonnet-4-5-20250929\",\n extracted_data={\n \"candidate_name\": \"John Doe\",\n \"email\": \"john@example.com\",\n \"skills\": [\"Python\", \"PostgreSQL\", \"Kubernetes\"],\n \"experience\": [\n {\n \"company\": \"TechCorp\",\n \"role\": \"Senior Engineer\",\n \"years\": 3,\n \"achievements\": [\"Led migration to k8s\", \"Reduced costs 40%\"]\n }\n ],\n \"education\": [\n {\"degree\": \"BS Computer Science\", \"institution\": \"MIT\", \"year\": 2018}\n ]\n },\n confidence_score=0.95,\n tags=[\"cv\", \"engineering\", \"senior-level\"]\n )\n\n # Contract extraction\n contract_ontology = Ontology(\n name=\"acme-supplier-agreement-2024\",\n file_id=\"file-uuid-456\",\n agent_schema_id=\"contract-parser-v2\",\n provider_name=\"openai\",\n model_name=\"gpt-4.1\",\n extracted_data={\n \"contract_type\": \"supplier_agreement\",\n \"parties\": [\n {\"name\": \"ACME Corp\", \"role\": \"buyer\"},\n {\"name\": \"SupplyChain Inc\", \"role\": \"supplier\"}\n ],\n \"effective_date\": \"2024-01-01\",\n \"termination_date\": \"2026-12-31\",\n \"payment_terms\": {\n \"amount\": 500000,\n \"currency\": \"USD\",\n \"frequency\": \"quarterly\"\n },\n \"key_obligations\": [\n \"Supplier must deliver within 30 days\",\n \"Buyer must pay within 60 days of invoice\"\n ]\n },\n confidence_score=0.92,\n tags=[\"contract\", \"supplier\", \"procurement\"]\n )\n \n\nThis agent can search the `ontologies` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"title": "Name", "type": "string"}, "file_id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}], "title": "File Id"}, "agent_schema_id": {"title": "Agent Schema Id", "type": "string"}, "provider_name": {"title": "Provider Name", "type": "string"}, "model_name": {"title": "Model Name", "type": "string"}, "extracted_data": {"additionalProperties": true, "title": "Extracted Data", "type": "object"}, "confidence_score": {"anyOf": [{"type": "number"}, {"type": "null"}], "default": null, "title": "Confidence Score"}, "extraction_timestamp": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Extraction Timestamp"}, "embedding_text": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Embedding Text"}}, "required": ["name", "file_id", "agent_schema_id", "provider_name", "model_name", "extracted_data"], "json_schema_extra": {"table_name": "ontologies", "entity_key_field": "id", "embedding_fields": [], "fully_qualified_name": "rem.models.entities.ontology.Ontology", "tools": ["search_rem"], "default_search_table": "ontologies", "has_embeddings": false}}'::jsonb,
|
|
2201
|
+
'entity',
|
|
2202
|
+
'{"table_name": "ontologies", "entity_key_field": "id", "embedding_fields": [], "fqn": "rem.models.entities.ontology.Ontology"}'::jsonb
|
|
2203
|
+
)
|
|
2204
|
+
ON CONFLICT (id) DO UPDATE SET
|
|
2205
|
+
name = EXCLUDED.name,
|
|
2206
|
+
content = EXCLUDED.content,
|
|
2207
|
+
spec = EXCLUDED.spec,
|
|
2208
|
+
category = EXCLUDED.category,
|
|
2209
|
+
metadata = EXCLUDED.metadata,
|
|
2210
|
+
updated_at = CURRENT_TIMESTAMP;
|
|
2211
|
+
|
|
2212
|
+
-- Schema entry for OntologyConfig (ontology_configs)
|
|
2213
|
+
INSERT INTO schemas (id, tenant_id, name, content, spec, category, metadata)
|
|
2214
|
+
VALUES (
|
|
2215
|
+
'9a7e50d0-ef3a-5641-9ff4-b2be5a77053b'::uuid,
|
|
2216
|
+
'system',
|
|
2217
|
+
'OntologyConfig',
|
|
2218
|
+
'# OntologyConfig
|
|
2219
|
+
|
|
2220
|
+
User configuration for automatic ontology extraction.
|
|
2221
|
+
|
|
2222
|
+
Attributes:
|
|
2223
|
+
name: Human-readable config name
|
|
2224
|
+
agent_schema_id: Foreign key to Schema entity to use for extraction
|
|
2225
|
+
description: Purpose and scope of this config
|
|
2226
|
+
|
|
2227
|
+
# File matching rules (ANY matching rule triggers extraction)
|
|
2228
|
+
mime_type_pattern: Regex pattern for file MIME types (e.g., "application/pdf")
|
|
2229
|
+
uri_pattern: Regex pattern for file URIs (e.g., "s3://bucket/resumes/.*")
|
|
2230
|
+
tag_filter: List of tags (file must have ALL tags to match)
|
|
2231
|
+
|
|
2232
|
+
# Execution control
|
|
2233
|
+
priority: Execution order (higher = earlier, default 100)
|
|
2234
|
+
enabled: Whether this config is active (default True)
|
|
2235
|
+
|
|
2236
|
+
# LLM provider configuration
|
|
2237
|
+
provider_name: Optional LLM provider override (defaults to settings)
|
|
2238
|
+
model_name: Optional model override (defaults to settings)
|
|
2239
|
+
|
|
2240
|
+
Inherited from CoreModel:
|
|
2241
|
+
id, created_at, updated_at, deleted_at, tenant_id, user_id,
|
|
2242
|
+
graph_edges, metadata, tags, column
|
|
2243
|
+
|
|
2244
|
+
Example Usage:
|
|
2245
|
+
# CV extraction for recruitment
|
|
2246
|
+
cv_config = OntologyConfig(
|
|
2247
|
+
name="recruitment-cv-parser",
|
|
2248
|
+
agent_schema_id="cv-parser-v1",
|
|
2249
|
+
description="Extract candidate information from resumes",
|
|
2250
|
+
mime_type_pattern="application/pdf",
|
|
2251
|
+
uri_pattern=".*/resumes/.*",
|
|
2252
|
+
tag_filter=["cv", "candidate"],
|
|
2253
|
+
priority=100,
|
|
2254
|
+
enabled=True,
|
|
2255
|
+
tenant_id="acme-corp",
|
|
2256
|
+
tags=["recruitment", "hr"]
|
|
2257
|
+
)
|
|
2258
|
+
|
|
2259
|
+
# Contract analysis for legal team
|
|
2260
|
+
contract_config = OntologyConfig(
|
|
2261
|
+
name="legal-contract-analyzer",
|
|
2262
|
+
agent_schema_id="contract-parser-v2",
|
|
2263
|
+
description="Extract key terms from supplier contracts",
|
|
2264
|
+
mime_type_pattern="application/(pdf|msword|vnd.openxmlformats.*)",
|
|
2265
|
+
tag_filter=["legal", "contract"],
|
|
2266
|
+
priority=200, # Higher priority = runs first
|
|
2267
|
+
enabled=True,
|
|
2268
|
+
provider_name="openai", # Override default provider
|
|
2269
|
+
model_name="gpt-4.1",
|
|
2270
|
+
tenant_id="acme-corp",
|
|
2271
|
+
tags=["legal", "procurement"]
|
|
2272
|
+
)
|
|
2273
|
+
|
|
2274
|
+
# Medical records for healthcare
|
|
2275
|
+
medical_config = OntologyConfig(
|
|
2276
|
+
name="medical-records-extractor",
|
|
2277
|
+
agent_schema_id="medical-parser-v1",
|
|
2278
|
+
description="Extract diagnoses and treatments from medical records",
|
|
2279
|
+
mime_type_pattern="application/pdf",
|
|
2280
|
+
tag_filter=["medical", "patient-record"],
|
|
2281
|
+
priority=50,
|
|
2282
|
+
enabled=True,
|
|
2283
|
+
tenant_id="healthsystem",
|
|
2284
|
+
tags=["medical", "hipaa-compliant"]
|
|
2285
|
+
)
|
|
2286
|
+
|
|
2287
|
+
|
|
2288
|
+
## Overview
|
|
2289
|
+
|
|
2290
|
+
The `OntologyConfig` entity is stored in the `ontology_configs` table. Each record is uniquely
|
|
2291
|
+
identified by its `id` field for lookups and graph traversal.
|
|
2292
|
+
|
|
2293
|
+
## Search Capabilities
|
|
2294
|
+
|
|
2295
|
+
This schema includes the `search_rem` tool which supports:
|
|
2296
|
+
- **LOOKUP**: O(1) exact match by id (e.g., `LOOKUP "entity-name"`)
|
|
2297
|
+
- **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
|
|
2298
|
+
- **SEARCH**: Semantic vector search on description (e.g., `SEARCH "concept" FROM ontology_configs LIMIT 10`)
|
|
2299
|
+
- **SQL**: Complex queries (e.g., `SELECT * FROM ontology_configs WHERE ...`)
|
|
2300
|
+
|
|
2301
|
+
## Table Info
|
|
2302
|
+
|
|
2303
|
+
| Property | Value |
|
|
2304
|
+
|----------|-------|
|
|
2305
|
+
| Table | `ontology_configs` |
|
|
2306
|
+
| Entity Key | `id` |
|
|
2307
|
+
| Embedding Fields | `description` |
|
|
2308
|
+
| Tools | `search_rem` |
|
|
2309
|
+
|
|
2310
|
+
## Fields
|
|
2311
|
+
|
|
2312
|
+
### `id`
|
|
2313
|
+
- **Type**: `typing.Union[uuid.UUID, str, NoneType]`
|
|
2314
|
+
- **Optional**
|
|
2315
|
+
- Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.
|
|
2316
|
+
|
|
2317
|
+
### `created_at`
|
|
2318
|
+
- **Type**: `<class ''datetime.datetime''>`
|
|
2319
|
+
- **Optional**
|
|
2320
|
+
- Entity creation timestamp
|
|
2321
|
+
|
|
2322
|
+
### `updated_at`
|
|
2323
|
+
- **Type**: `<class ''datetime.datetime''>`
|
|
2324
|
+
- **Optional**
|
|
2325
|
+
- Last update timestamp
|
|
2326
|
+
|
|
2327
|
+
### `deleted_at`
|
|
2328
|
+
- **Type**: `typing.Optional[datetime.datetime]`
|
|
2329
|
+
- **Optional**
|
|
2330
|
+
- Soft deletion timestamp
|
|
2331
|
+
|
|
2332
|
+
### `tenant_id`
|
|
2333
|
+
- **Type**: `typing.Optional[str]`
|
|
2334
|
+
- **Optional**
|
|
2335
|
+
- Tenant identifier for multi-tenancy isolation
|
|
2336
|
+
|
|
2337
|
+
### `user_id`
|
|
2338
|
+
- **Type**: `typing.Optional[str]`
|
|
2339
|
+
- **Optional**
|
|
2340
|
+
- Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.
|
|
2341
|
+
|
|
2342
|
+
### `graph_edges`
|
|
2343
|
+
- **Type**: `list[dict]`
|
|
2344
|
+
- **Optional**
|
|
2345
|
+
- Knowledge graph edges stored as InlineEdge dicts
|
|
2346
|
+
|
|
2347
|
+
### `metadata`
|
|
2348
|
+
- **Type**: `<class ''dict''>`
|
|
2349
|
+
- **Optional**
|
|
2350
|
+
- Flexible metadata storage
|
|
2351
|
+
|
|
2352
|
+
### `tags`
|
|
2353
|
+
- **Type**: `list[str]`
|
|
2354
|
+
- **Optional**
|
|
2355
|
+
- Entity tags
|
|
2356
|
+
|
|
2357
|
+
### `name`
|
|
2358
|
+
- **Type**: `<class ''str''>`
|
|
2359
|
+
- **Required**
|
|
2360
|
+
|
|
2361
|
+
### `agent_schema_id`
|
|
2362
|
+
- **Type**: `<class ''str''>`
|
|
2363
|
+
- **Required**
|
|
2364
|
+
|
|
2365
|
+
### `description`
|
|
2366
|
+
- **Type**: `typing.Optional[str]`
|
|
2367
|
+
- **Optional**
|
|
2368
|
+
|
|
2369
|
+
### `mime_type_pattern`
|
|
2370
|
+
- **Type**: `typing.Optional[str]`
|
|
2371
|
+
- **Optional**
|
|
2372
|
+
|
|
2373
|
+
### `uri_pattern`
|
|
2374
|
+
- **Type**: `typing.Optional[str]`
|
|
2375
|
+
- **Optional**
|
|
2376
|
+
|
|
2377
|
+
### `tag_filter`
|
|
2378
|
+
- **Type**: `list[str]`
|
|
2379
|
+
- **Optional**
|
|
2380
|
+
|
|
2381
|
+
### `priority`
|
|
2382
|
+
- **Type**: `<class ''int''>`
|
|
2383
|
+
- **Optional**
|
|
2384
|
+
|
|
2385
|
+
### `enabled`
|
|
2386
|
+
- **Type**: `<class ''bool''>`
|
|
2387
|
+
- **Optional**
|
|
2388
|
+
|
|
2389
|
+
### `provider_name`
|
|
2390
|
+
- **Type**: `typing.Optional[str]`
|
|
2391
|
+
- **Optional**
|
|
2392
|
+
|
|
2393
|
+
### `model_name`
|
|
2394
|
+
- **Type**: `typing.Optional[str]`
|
|
2395
|
+
- **Optional**
|
|
2396
|
+
|
|
2397
|
+
',
|
|
2398
|
+
'{"type": "object", "description": "User configuration for automatic ontology extraction.\n\n Attributes:\n name: Human-readable config name\n agent_schema_id: Foreign key to Schema entity to use for extraction\n description: Purpose and scope of this config\n\n # File matching rules (ANY matching rule triggers extraction)\n mime_type_pattern: Regex pattern for file MIME types (e.g., \"application/pdf\")\n uri_pattern: Regex pattern for file URIs (e.g., \"s3://bucket/resumes/.*\")\n tag_filter: List of tags (file must have ALL tags to match)\n\n # Execution control\n priority: Execution order (higher = earlier, default 100)\n enabled: Whether this config is active (default True)\n\n # LLM provider configuration\n provider_name: Optional LLM provider override (defaults to settings)\n model_name: Optional model override (defaults to settings)\n\n Inherited from CoreModel:\n id, created_at, updated_at, deleted_at, tenant_id, user_id,\n graph_edges, metadata, tags, column\n\n Example Usage:\n # CV extraction for recruitment\n cv_config = OntologyConfig(\n name=\"recruitment-cv-parser\",\n agent_schema_id=\"cv-parser-v1\",\n description=\"Extract candidate information from resumes\",\n mime_type_pattern=\"application/pdf\",\n uri_pattern=\".*/resumes/.*\",\n tag_filter=[\"cv\", \"candidate\"],\n priority=100,\n enabled=True,\n tenant_id=\"acme-corp\",\n tags=[\"recruitment\", \"hr\"]\n )\n\n # Contract analysis for legal team\n contract_config = OntologyConfig(\n name=\"legal-contract-analyzer\",\n agent_schema_id=\"contract-parser-v2\",\n description=\"Extract key terms from supplier contracts\",\n mime_type_pattern=\"application/(pdf|msword|vnd.openxmlformats.*)\",\n tag_filter=[\"legal\", \"contract\"],\n priority=200, # Higher priority = runs first\n enabled=True,\n provider_name=\"openai\", # Override default provider\n model_name=\"gpt-4.1\",\n tenant_id=\"acme-corp\",\n tags=[\"legal\", \"procurement\"]\n )\n\n # Medical records for healthcare\n medical_config = OntologyConfig(\n name=\"medical-records-extractor\",\n agent_schema_id=\"medical-parser-v1\",\n description=\"Extract diagnoses and treatments from medical records\",\n mime_type_pattern=\"application/pdf\",\n tag_filter=[\"medical\", \"patient-record\"],\n priority=50,\n enabled=True,\n tenant_id=\"healthsystem\",\n tags=[\"medical\", \"hipaa-compliant\"]\n )\n \n\nThis agent can search the `ontology_configs` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"title": "Name", "type": "string"}, "agent_schema_id": {"title": "Agent Schema Id", "type": "string"}, "description": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Description"}, "mime_type_pattern": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Mime Type Pattern"}, "uri_pattern": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Uri Pattern"}, "tag_filter": {"default": [], "items": {"type": "string"}, "title": "Tag Filter", "type": "array"}, "priority": {"default": 100, "title": "Priority", "type": "integer"}, "enabled": {"default": true, "title": "Enabled", "type": "boolean"}, "provider_name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Provider Name"}, "model_name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Model Name"}}, "required": ["name", "agent_schema_id"], "json_schema_extra": {"table_name": "ontology_configs", "entity_key_field": "id", "embedding_fields": ["description"], "fully_qualified_name": "rem.models.entities.ontology_config.OntologyConfig", "tools": ["search_rem"], "default_search_table": "ontology_configs", "has_embeddings": true}}'::jsonb,
|
|
2399
|
+
'entity',
|
|
2400
|
+
'{"table_name": "ontology_configs", "entity_key_field": "id", "embedding_fields": ["description"], "fqn": "rem.models.entities.ontology_config.OntologyConfig"}'::jsonb
|
|
2401
|
+
)
|
|
2402
|
+
ON CONFLICT (id) DO UPDATE SET
|
|
2403
|
+
name = EXCLUDED.name,
|
|
2404
|
+
content = EXCLUDED.content,
|
|
2405
|
+
spec = EXCLUDED.spec,
|
|
2406
|
+
category = EXCLUDED.category,
|
|
2407
|
+
metadata = EXCLUDED.metadata,
|
|
2408
|
+
updated_at = CURRENT_TIMESTAMP;
|
|
2409
|
+
|
|
2410
|
+
-- Schema entry for Resource (resources)
|
|
2411
|
+
INSERT INTO schemas (id, tenant_id, name, content, spec, category, metadata)
|
|
2412
|
+
VALUES (
|
|
2413
|
+
'a579f379-4f1c-5414-8ff4-1382d0f783b7'::uuid,
|
|
2414
|
+
'system',
|
|
2415
|
+
'Resource',
|
|
2416
|
+
'# Resource
|
|
2417
|
+
|
|
2418
|
+
|
|
2419
|
+
Base content unit in REM.
|
|
2420
|
+
|
|
2421
|
+
Resources are content units that feed into dreaming workflows for moment
|
|
2422
|
+
extraction and affinity graph construction. Tenant isolation is provided
|
|
2423
|
+
via CoreModel.tenant_id field.
|
|
2424
|
+
|
|
2425
|
+
|
|
2426
|
+
## Overview
|
|
2427
|
+
|
|
2428
|
+
The `Resource` entity is stored in the `resources` table. Each record is uniquely
|
|
2429
|
+
identified by its `name` field for lookups and graph traversal.
|
|
2430
|
+
|
|
2431
|
+
## Search Capabilities
|
|
2432
|
+
|
|
2433
|
+
This schema includes the `search_rem` tool which supports:
|
|
2434
|
+
- **LOOKUP**: O(1) exact match by name (e.g., `LOOKUP "entity-name"`)
|
|
2435
|
+
- **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
|
|
2436
|
+
- **SEARCH**: Semantic vector search on content (e.g., `SEARCH "concept" FROM resources LIMIT 10`)
|
|
2437
|
+
- **SQL**: Complex queries (e.g., `SELECT * FROM resources WHERE ...`)
|
|
2438
|
+
|
|
2439
|
+
## Table Info
|
|
2440
|
+
|
|
2441
|
+
| Property | Value |
|
|
2442
|
+
|----------|-------|
|
|
2443
|
+
| Table | `resources` |
|
|
2444
|
+
| Entity Key | `name` |
|
|
2445
|
+
| Embedding Fields | `content` |
|
|
2446
|
+
| Tools | `search_rem` |
|
|
2447
|
+
|
|
2448
|
+
## Fields
|
|
2449
|
+
|
|
2450
|
+
### `id`
|
|
2451
|
+
- **Type**: `typing.Union[uuid.UUID, str, NoneType]`
|
|
2452
|
+
- **Optional**
|
|
2453
|
+
- Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.
|
|
2454
|
+
|
|
2455
|
+
### `created_at`
|
|
2456
|
+
- **Type**: `<class ''datetime.datetime''>`
|
|
2457
|
+
- **Optional**
|
|
2458
|
+
- Entity creation timestamp
|
|
2459
|
+
|
|
2460
|
+
### `updated_at`
|
|
2461
|
+
- **Type**: `<class ''datetime.datetime''>`
|
|
2462
|
+
- **Optional**
|
|
2463
|
+
- Last update timestamp
|
|
2464
|
+
|
|
2465
|
+
### `deleted_at`
|
|
2466
|
+
- **Type**: `typing.Optional[datetime.datetime]`
|
|
2467
|
+
- **Optional**
|
|
2468
|
+
- Soft deletion timestamp
|
|
2469
|
+
|
|
2470
|
+
### `tenant_id`
|
|
2471
|
+
- **Type**: `typing.Optional[str]`
|
|
2472
|
+
- **Optional**
|
|
2473
|
+
- Tenant identifier for multi-tenancy isolation
|
|
2474
|
+
|
|
2475
|
+
### `user_id`
|
|
2476
|
+
- **Type**: `typing.Optional[str]`
|
|
2477
|
+
- **Optional**
|
|
2478
|
+
- Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.
|
|
2479
|
+
|
|
2480
|
+
### `graph_edges`
|
|
2481
|
+
- **Type**: `list[dict]`
|
|
2482
|
+
- **Optional**
|
|
2483
|
+
- Knowledge graph edges stored as InlineEdge dicts
|
|
2484
|
+
|
|
2485
|
+
### `metadata`
|
|
2486
|
+
- **Type**: `<class ''dict''>`
|
|
2487
|
+
- **Optional**
|
|
2488
|
+
- Flexible metadata storage
|
|
2489
|
+
|
|
2490
|
+
### `tags`
|
|
2491
|
+
- **Type**: `list[str]`
|
|
2492
|
+
- **Optional**
|
|
2493
|
+
- Entity tags
|
|
2494
|
+
|
|
2495
|
+
### `name`
|
|
2496
|
+
- **Type**: `typing.Optional[str]`
|
|
2497
|
+
- **Optional**
|
|
2498
|
+
- Human-readable resource name (used as graph label). Auto-generated from uri+ordinal if not provided.
|
|
2499
|
+
|
|
2500
|
+
### `uri`
|
|
2501
|
+
- **Type**: `typing.Optional[str]`
|
|
2502
|
+
- **Optional**
|
|
2503
|
+
- Content URI or identifier (file path, URL, etc.)
|
|
2504
|
+
|
|
2505
|
+
### `ordinal`
|
|
2506
|
+
- **Type**: `<class ''int''>`
|
|
2507
|
+
- **Optional**
|
|
2508
|
+
- Chunk ordinal for splitting large documents (0 for single-chunk resources)
|
|
2509
|
+
|
|
2510
|
+
### `content`
|
|
2511
|
+
- **Type**: `<class ''str''>`
|
|
2512
|
+
- **Optional**
|
|
2513
|
+
- Resource content text
|
|
2514
|
+
|
|
2515
|
+
### `timestamp`
|
|
2516
|
+
- **Type**: `<class ''datetime.datetime''>`
|
|
2517
|
+
- **Optional**
|
|
2518
|
+
- Resource timestamp (content creation/publication time)
|
|
2519
|
+
|
|
2520
|
+
### `category`
|
|
2521
|
+
- **Type**: `typing.Optional[str]`
|
|
2522
|
+
- **Optional**
|
|
2523
|
+
- Resource category (document, conversation, artifact, etc.)
|
|
2524
|
+
|
|
2525
|
+
### `related_entities`
|
|
2526
|
+
- **Type**: `list[dict]`
|
|
2527
|
+
- **Optional**
|
|
2528
|
+
- Extracted entities (people, projects, concepts) with metadata
|
|
2529
|
+
|
|
2530
|
+
',
|
|
2531
|
+
'{"type": "object", "description": "\n Base content unit in REM.\n\n Resources are content units that feed into dreaming workflows for moment\n extraction and affinity graph construction. Tenant isolation is provided\n via CoreModel.tenant_id field.\n \n\nThis agent can search the `resources` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Human-readable resource name (used as graph label). Auto-generated from uri+ordinal if not provided.", "entity_key": true, "title": "Name"}, "uri": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Content URI or identifier (file path, URL, etc.)", "title": "Uri"}, "ordinal": {"composite_key": true, "default": 0, "description": "Chunk ordinal for splitting large documents (0 for single-chunk resources)", "title": "Ordinal", "type": "integer"}, "content": {"default": "", "description": "Resource content text", "title": "Content", "type": "string"}, "timestamp": {"description": "Resource timestamp (content creation/publication time)", "format": "date-time", "title": "Timestamp", "type": "string"}, "category": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Resource category (document, conversation, artifact, etc.)", "title": "Category"}, "related_entities": {"description": "Extracted entities (people, projects, concepts) with metadata", "items": {"additionalProperties": true, "type": "object"}, "title": "Related Entities", "type": "array"}}, "required": [], "json_schema_extra": {"table_name": "resources", "entity_key_field": "name", "embedding_fields": ["content"], "fully_qualified_name": "rem.models.entities.resource.Resource", "tools": ["search_rem"], "default_search_table": "resources", "has_embeddings": true}}'::jsonb,
|
|
2532
|
+
'entity',
|
|
2533
|
+
'{"table_name": "resources", "entity_key_field": "name", "embedding_fields": ["content"], "fqn": "rem.models.entities.resource.Resource"}'::jsonb
|
|
2534
|
+
)
|
|
2535
|
+
ON CONFLICT (id) DO UPDATE SET
|
|
2536
|
+
name = EXCLUDED.name,
|
|
2537
|
+
content = EXCLUDED.content,
|
|
2538
|
+
spec = EXCLUDED.spec,
|
|
2539
|
+
category = EXCLUDED.category,
|
|
2540
|
+
metadata = EXCLUDED.metadata,
|
|
2541
|
+
updated_at = CURRENT_TIMESTAMP;
|
|
2542
|
+
|
|
2543
|
+
-- Schema entry for Schema (schemas)
|
|
2544
|
+
INSERT INTO schemas (id, tenant_id, name, content, spec, category, metadata)
|
|
2545
|
+
VALUES (
|
|
2546
|
+
'2372e956-add6-58b8-a638-758a91a2b6c4'::uuid,
|
|
2547
|
+
'system',
|
|
2548
|
+
'Schema',
|
|
2549
|
+
'# Schema
|
|
2550
|
+
|
|
2551
|
+
|
|
2552
|
+
Agent schema definition.
|
|
2553
|
+
|
|
2554
|
+
Schemas define agents that can be dynamically loaded into Pydantic AI.
|
|
2555
|
+
They store JsonSchema specifications with embedded metadata for tools,
|
|
2556
|
+
resources, and system prompts.
|
|
2557
|
+
|
|
2558
|
+
For ontology extraction agents:
|
|
2559
|
+
- `provider_configs` enables multi-provider support (test across Anthropic, OpenAI, etc.)
|
|
2560
|
+
- `embedding_fields` specifies which output fields should be embedded for semantic search
|
|
2561
|
+
|
|
2562
|
+
Tenant isolation is provided via CoreModel.tenant_id field.
|
|
2563
|
+
|
|
2564
|
+
|
|
2565
|
+
## Overview
|
|
2566
|
+
|
|
2567
|
+
The `Schema` entity is stored in the `schemas` table. Each record is uniquely
|
|
2568
|
+
identified by its `id` field for lookups and graph traversal.
|
|
2569
|
+
|
|
2570
|
+
## Search Capabilities
|
|
2571
|
+
|
|
2572
|
+
This schema includes the `search_rem` tool which supports:
|
|
2573
|
+
- **LOOKUP**: O(1) exact match by id (e.g., `LOOKUP "entity-name"`)
|
|
2574
|
+
- **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
|
|
2575
|
+
- **SEARCH**: Semantic vector search on content (e.g., `SEARCH "concept" FROM schemas LIMIT 10`)
|
|
2576
|
+
- **SQL**: Complex queries (e.g., `SELECT * FROM schemas WHERE ...`)
|
|
2577
|
+
|
|
2578
|
+
## Table Info
|
|
2579
|
+
|
|
2580
|
+
| Property | Value |
|
|
2581
|
+
|----------|-------|
|
|
2582
|
+
| Table | `schemas` |
|
|
2583
|
+
| Entity Key | `id` |
|
|
2584
|
+
| Embedding Fields | `content` |
|
|
2585
|
+
| Tools | `search_rem` |
|
|
2586
|
+
|
|
2587
|
+
## Fields
|
|
2588
|
+
|
|
2589
|
+
### `id`
|
|
2590
|
+
- **Type**: `typing.Union[uuid.UUID, str, NoneType]`
|
|
2591
|
+
- **Optional**
|
|
2592
|
+
- Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.
|
|
2593
|
+
|
|
2594
|
+
### `created_at`
|
|
2595
|
+
- **Type**: `<class ''datetime.datetime''>`
|
|
2596
|
+
- **Optional**
|
|
2597
|
+
- Entity creation timestamp
|
|
2598
|
+
|
|
2599
|
+
### `updated_at`
|
|
2600
|
+
- **Type**: `<class ''datetime.datetime''>`
|
|
2601
|
+
- **Optional**
|
|
2602
|
+
- Last update timestamp
|
|
2603
|
+
|
|
2604
|
+
### `deleted_at`
|
|
2605
|
+
- **Type**: `typing.Optional[datetime.datetime]`
|
|
2606
|
+
- **Optional**
|
|
2607
|
+
- Soft deletion timestamp
|
|
2608
|
+
|
|
2609
|
+
### `tenant_id`
|
|
2610
|
+
- **Type**: `typing.Optional[str]`
|
|
2611
|
+
- **Optional**
|
|
2612
|
+
- Tenant identifier for multi-tenancy isolation
|
|
2613
|
+
|
|
2614
|
+
### `user_id`
|
|
2615
|
+
- **Type**: `typing.Optional[str]`
|
|
2616
|
+
- **Optional**
|
|
2617
|
+
- Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.
|
|
2618
|
+
|
|
2619
|
+
### `graph_edges`
|
|
2620
|
+
- **Type**: `list[dict]`
|
|
2621
|
+
- **Optional**
|
|
2622
|
+
- Knowledge graph edges stored as InlineEdge dicts
|
|
2623
|
+
|
|
2624
|
+
### `metadata`
|
|
2625
|
+
- **Type**: `<class ''dict''>`
|
|
2626
|
+
- **Optional**
|
|
2627
|
+
- Flexible metadata storage
|
|
2628
|
+
|
|
2629
|
+
### `tags`
|
|
2630
|
+
- **Type**: `list[str]`
|
|
2631
|
+
- **Optional**
|
|
2632
|
+
- Entity tags
|
|
2633
|
+
|
|
2634
|
+
### `name`
|
|
2635
|
+
- **Type**: `<class ''str''>`
|
|
2636
|
+
- **Required**
|
|
2637
|
+
- Human-readable schema name (used as identifier)
|
|
2638
|
+
|
|
2639
|
+
### `content`
|
|
2640
|
+
- **Type**: `<class ''str''>`
|
|
2641
|
+
- **Optional**
|
|
2642
|
+
- Markdown documentation and instructions for the schema
|
|
2643
|
+
|
|
2644
|
+
### `spec`
|
|
2645
|
+
- **Type**: `<class ''dict''>`
|
|
2646
|
+
- **Required**
|
|
2647
|
+
- JsonSchema specification defining the agent structure and capabilities
|
|
2648
|
+
|
|
2649
|
+
### `category`
|
|
2650
|
+
- **Type**: `typing.Optional[str]`
|
|
2651
|
+
- **Optional**
|
|
2652
|
+
- Schema category distinguishing schema types. Values: ''agent'' (AI agents), ''evaluator'' (LLM-as-a-Judge evaluators). Maps directly from json_schema_extra.kind field during ingestion.
|
|
2653
|
+
|
|
2654
|
+
### `provider_configs`
|
|
2655
|
+
- **Type**: `list[dict]`
|
|
2656
|
+
- **Optional**
|
|
2657
|
+
- Optional provider configurations for multi-provider testing. Each dict has ''provider_name'' and ''model_name''. Example: [{''provider_name'': ''anthropic'', ''model_name'': ''claude-sonnet-4-5''}]
|
|
2658
|
+
|
|
2659
|
+
### `embedding_fields`
|
|
2660
|
+
- **Type**: `list[str]`
|
|
2661
|
+
- **Optional**
|
|
2662
|
+
- JSON paths in extracted_data to embed for semantic search. Example: [''summary'', ''candidate_name'', ''skills''] for CV extraction. Values will be concatenated and embedded using configured embedding provider.
|
|
2663
|
+
|
|
2664
|
+
',
|
|
2665
|
+
'{"type": "object", "description": "\n Agent schema definition.\n\n Schemas define agents that can be dynamically loaded into Pydantic AI.\n They store JsonSchema specifications with embedded metadata for tools,\n resources, and system prompts.\n\n For ontology extraction agents:\n - `provider_configs` enables multi-provider support (test across Anthropic, OpenAI, etc.)\n - `embedding_fields` specifies which output fields should be embedded for semantic search\n\n Tenant isolation is provided via CoreModel.tenant_id field.\n \n\nThis agent can search the `schemas` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"description": "Human-readable schema name (used as identifier)", "title": "Name", "type": "string"}, "content": {"default": "", "description": "Markdown documentation and instructions for the schema", "title": "Content", "type": "string"}, "spec": {"additionalProperties": true, "description": "JsonSchema specification defining the agent structure and capabilities", "title": "Spec", "type": "object"}, "category": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Schema category distinguishing schema types. Values: ''agent'' (AI agents), ''evaluator'' (LLM-as-a-Judge evaluators). Maps directly from json_schema_extra.kind field during ingestion.", "title": "Category"}, "provider_configs": {"description": "Optional provider configurations for multi-provider testing. Each dict has ''provider_name'' and ''model_name''. Example: [{''provider_name'': ''anthropic'', ''model_name'': ''claude-sonnet-4-5''}]", "items": {"additionalProperties": true, "type": "object"}, "title": "Provider Configs", "type": "array"}, "embedding_fields": {"description": "JSON paths in extracted_data to embed for semantic search. Example: [''summary'', ''candidate_name'', ''skills''] for CV extraction. Values will be concatenated and embedded using configured embedding provider.", "items": {"type": "string"}, "title": "Embedding Fields", "type": "array"}}, "required": ["name", "spec"], "json_schema_extra": {"table_name": "schemas", "entity_key_field": "id", "embedding_fields": ["content"], "fully_qualified_name": "rem.models.entities.schema.Schema", "tools": ["search_rem"], "default_search_table": "schemas", "has_embeddings": true}}'::jsonb,
|
|
2666
|
+
'entity',
|
|
2667
|
+
'{"table_name": "schemas", "entity_key_field": "id", "embedding_fields": ["content"], "fqn": "rem.models.entities.schema.Schema"}'::jsonb
|
|
2668
|
+
)
|
|
2669
|
+
ON CONFLICT (id) DO UPDATE SET
|
|
2670
|
+
name = EXCLUDED.name,
|
|
2671
|
+
content = EXCLUDED.content,
|
|
2672
|
+
spec = EXCLUDED.spec,
|
|
2673
|
+
category = EXCLUDED.category,
|
|
2674
|
+
metadata = EXCLUDED.metadata,
|
|
2675
|
+
updated_at = CURRENT_TIMESTAMP;
|
|
2676
|
+
|
|
2677
|
+
-- Schema entry for Session (sessions)
|
|
2678
|
+
INSERT INTO schemas (id, tenant_id, name, content, spec, category, metadata)
|
|
2679
|
+
VALUES (
|
|
2680
|
+
'5893fbca-2d8e-5402-ac41-7bac2c0c472a'::uuid,
|
|
2681
|
+
'system',
|
|
2682
|
+
'Session',
|
|
2683
|
+
'# Session
|
|
2684
|
+
|
|
2685
|
+
|
|
2686
|
+
Conversation session container.
|
|
2687
|
+
|
|
2688
|
+
Groups messages together and supports different modes for normal conversations
|
|
2689
|
+
and evaluation/experimentation scenarios.
|
|
2690
|
+
|
|
2691
|
+
For evaluation sessions, stores:
|
|
2692
|
+
- original_trace_id: Reference to the original session being evaluated
|
|
2693
|
+
- settings_overrides: Model, temperature, prompt overrides
|
|
2694
|
+
- prompt: Custom prompt being tested
|
|
2695
|
+
|
|
2696
|
+
Default sessions are lightweight - just a session_id on messages.
|
|
2697
|
+
Special sessions store additional metadata for experiments.
|
|
2698
|
+
|
|
2699
|
+
|
|
2700
|
+
## Overview
|
|
2701
|
+
|
|
2702
|
+
The `Session` entity is stored in the `sessions` table. Each record is uniquely
|
|
2703
|
+
identified by its `name` field for lookups and graph traversal.
|
|
2704
|
+
|
|
2705
|
+
## Search Capabilities
|
|
2706
|
+
|
|
2707
|
+
This schema includes the `search_rem` tool which supports:
|
|
2708
|
+
- **LOOKUP**: O(1) exact match by name (e.g., `LOOKUP "entity-name"`)
|
|
2709
|
+
- **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
|
|
2710
|
+
- **SEARCH**: Semantic vector search on description (e.g., `SEARCH "concept" FROM sessions LIMIT 10`)
|
|
2711
|
+
- **SQL**: Complex queries (e.g., `SELECT * FROM sessions WHERE ...`)
|
|
2712
|
+
|
|
2713
|
+
## Table Info
|
|
2714
|
+
|
|
2715
|
+
| Property | Value |
|
|
2716
|
+
|----------|-------|
|
|
2717
|
+
| Table | `sessions` |
|
|
2718
|
+
| Entity Key | `name` |
|
|
2719
|
+
| Embedding Fields | `description` |
|
|
2720
|
+
| Tools | `search_rem` |
|
|
2721
|
+
|
|
2722
|
+
## Fields
|
|
2723
|
+
|
|
2724
|
+
### `id`
|
|
2725
|
+
- **Type**: `typing.Union[uuid.UUID, str, NoneType]`
|
|
2726
|
+
- **Optional**
|
|
2727
|
+
- Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.
|
|
2728
|
+
|
|
2729
|
+
### `created_at`
|
|
2730
|
+
- **Type**: `<class ''datetime.datetime''>`
|
|
2731
|
+
- **Optional**
|
|
2732
|
+
- Entity creation timestamp
|
|
2733
|
+
|
|
2734
|
+
### `updated_at`
|
|
2735
|
+
- **Type**: `<class ''datetime.datetime''>`
|
|
2736
|
+
- **Optional**
|
|
2737
|
+
- Last update timestamp
|
|
2738
|
+
|
|
2739
|
+
### `deleted_at`
|
|
2740
|
+
- **Type**: `typing.Optional[datetime.datetime]`
|
|
2741
|
+
- **Optional**
|
|
2742
|
+
- Soft deletion timestamp
|
|
2743
|
+
|
|
2744
|
+
### `tenant_id`
|
|
2745
|
+
- **Type**: `typing.Optional[str]`
|
|
2746
|
+
- **Optional**
|
|
2747
|
+
- Tenant identifier for multi-tenancy isolation
|
|
2748
|
+
|
|
2749
|
+
### `user_id`
|
|
2750
|
+
- **Type**: `typing.Optional[str]`
|
|
2751
|
+
- **Optional**
|
|
2752
|
+
- Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.
|
|
2753
|
+
|
|
2754
|
+
### `graph_edges`
|
|
2755
|
+
- **Type**: `list[dict]`
|
|
2756
|
+
- **Optional**
|
|
2757
|
+
- Knowledge graph edges stored as InlineEdge dicts
|
|
2758
|
+
|
|
2759
|
+
### `metadata`
|
|
2760
|
+
- **Type**: `<class ''dict''>`
|
|
2761
|
+
- **Optional**
|
|
2762
|
+
- Flexible metadata storage
|
|
2763
|
+
|
|
2764
|
+
### `tags`
|
|
2765
|
+
- **Type**: `list[str]`
|
|
2766
|
+
- **Optional**
|
|
2767
|
+
- Entity tags
|
|
2768
|
+
|
|
2769
|
+
### `name`
|
|
2770
|
+
- **Type**: `<class ''str''>`
|
|
2771
|
+
- **Required**
|
|
2772
|
+
- Session name/identifier
|
|
2773
|
+
|
|
2774
|
+
### `mode`
|
|
2775
|
+
- **Type**: `<enum ''SessionMode''>`
|
|
2776
|
+
- **Optional**
|
|
2777
|
+
- Session mode: ''normal'' or ''evaluation''
|
|
2778
|
+
|
|
2779
|
+
### `description`
|
|
2780
|
+
- **Type**: `str | None`
|
|
2781
|
+
- **Optional**
|
|
2782
|
+
- Optional session description
|
|
2783
|
+
|
|
2784
|
+
### `original_trace_id`
|
|
2785
|
+
- **Type**: `str | None`
|
|
2786
|
+
- **Optional**
|
|
2787
|
+
- For evaluation mode: ID of the original session/trace being evaluated
|
|
2788
|
+
|
|
2789
|
+
### `settings_overrides`
|
|
2790
|
+
- **Type**: `dict | None`
|
|
2791
|
+
- **Optional**
|
|
2792
|
+
- Settings overrides (model, temperature, max_tokens, system_prompt)
|
|
2793
|
+
|
|
2794
|
+
### `prompt`
|
|
2795
|
+
- **Type**: `str | None`
|
|
2796
|
+
- **Optional**
|
|
2797
|
+
- Custom prompt for this session (can override agent prompt)
|
|
2798
|
+
|
|
2799
|
+
### `agent_schema_uri`
|
|
2800
|
+
- **Type**: `str | None`
|
|
2801
|
+
- **Optional**
|
|
2802
|
+
- Agent schema used for this session
|
|
2803
|
+
|
|
2804
|
+
### `message_count`
|
|
2805
|
+
- **Type**: `<class ''int''>`
|
|
2806
|
+
- **Optional**
|
|
2807
|
+
- Number of messages in this session
|
|
2808
|
+
|
|
2809
|
+
### `total_tokens`
|
|
2810
|
+
- **Type**: `int | None`
|
|
2811
|
+
- **Optional**
|
|
2812
|
+
- Total tokens used in this session
|
|
2813
|
+
|
|
2814
|
+
',
|
|
2815
|
+
'{"type": "object", "description": "\n Conversation session container.\n\n Groups messages together and supports different modes for normal conversations\n and evaluation/experimentation scenarios.\n\n For evaluation sessions, stores:\n - original_trace_id: Reference to the original session being evaluated\n - settings_overrides: Model, temperature, prompt overrides\n - prompt: Custom prompt being tested\n\n Default sessions are lightweight - just a session_id on messages.\n Special sessions store additional metadata for experiments.\n \n\nThis agent can search the `sessions` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"description": "Session name/identifier", "entity_key": true, "title": "Name", "type": "string"}, "mode": {"$ref": "#/$defs/SessionMode", "default": "normal", "description": "Session mode: ''normal'' or ''evaluation''"}, "description": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Optional session description", "title": "Description"}, "original_trace_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "For evaluation mode: ID of the original session/trace being evaluated", "title": "Original Trace Id"}, "settings_overrides": {"anyOf": [{"additionalProperties": true, "type": "object"}, {"type": "null"}], "default": null, "description": "Settings overrides (model, temperature, max_tokens, system_prompt)", "title": "Settings Overrides"}, "prompt": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Custom prompt for this session (can override agent prompt)", "title": "Prompt"}, "agent_schema_uri": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Agent schema used for this session", "title": "Agent Schema Uri"}, "message_count": {"default": 0, "description": "Number of messages in this session", "title": "Message Count", "type": "integer"}, "total_tokens": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "description": "Total tokens used in this session", "title": "Total Tokens"}}, "required": ["name"], "json_schema_extra": {"table_name": "sessions", "entity_key_field": "name", "embedding_fields": ["description"], "fully_qualified_name": "rem.models.entities.session.Session", "tools": ["search_rem"], "default_search_table": "sessions", "has_embeddings": true}}'::jsonb,
|
|
2816
|
+
'entity',
|
|
2817
|
+
'{"table_name": "sessions", "entity_key_field": "name", "embedding_fields": ["description"], "fqn": "rem.models.entities.session.Session"}'::jsonb
|
|
2818
|
+
)
|
|
2819
|
+
ON CONFLICT (id) DO UPDATE SET
|
|
2820
|
+
name = EXCLUDED.name,
|
|
2821
|
+
content = EXCLUDED.content,
|
|
2822
|
+
spec = EXCLUDED.spec,
|
|
2823
|
+
category = EXCLUDED.category,
|
|
2824
|
+
metadata = EXCLUDED.metadata,
|
|
2825
|
+
updated_at = CURRENT_TIMESTAMP;
|
|
2826
|
+
|
|
2827
|
+
-- Schema entry for SharedSession (shared_sessions)
|
|
2828
|
+
INSERT INTO schemas (id, tenant_id, name, content, spec, category, metadata)
|
|
2829
|
+
VALUES (
|
|
2830
|
+
'be5c5711-6c45-5fc4-9cd1-e076599261c7'::uuid,
|
|
2831
|
+
'system',
|
|
2832
|
+
'SharedSession',
|
|
2833
|
+
'# SharedSession
|
|
2834
|
+
|
|
2835
|
+
|
|
2836
|
+
Session sharing record between users.
|
|
2837
|
+
|
|
2838
|
+
Links a session (identified by session_id from Message records) to a
|
|
2839
|
+
recipient user, enabling collaborative access to conversation history.
|
|
2840
|
+
|
|
2841
|
+
|
|
2842
|
+
## Overview
|
|
2843
|
+
|
|
2844
|
+
The `SharedSession` entity is stored in the `shared_sessions` table. Each record is uniquely
|
|
2845
|
+
identified by its `id` field for lookups and graph traversal.
|
|
2846
|
+
|
|
2847
|
+
## Search Capabilities
|
|
2848
|
+
|
|
2849
|
+
This schema includes the `search_rem` tool which supports:
|
|
2850
|
+
- **LOOKUP**: O(1) exact match by id (e.g., `LOOKUP "entity-name"`)
|
|
2851
|
+
- **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
|
|
2852
|
+
- **SEARCH**: Semantic vector search on content (e.g., `SEARCH "concept" FROM shared_sessions LIMIT 10`)
|
|
2853
|
+
- **SQL**: Complex queries (e.g., `SELECT * FROM shared_sessions WHERE ...`)
|
|
2854
|
+
|
|
2855
|
+
## Table Info
|
|
2856
|
+
|
|
2857
|
+
| Property | Value |
|
|
2858
|
+
|----------|-------|
|
|
2859
|
+
| Table | `shared_sessions` |
|
|
2860
|
+
| Entity Key | `id` |
|
|
2861
|
+
| Embedding Fields | None |
|
|
2862
|
+
| Tools | `search_rem` |
|
|
2863
|
+
|
|
2864
|
+
## Fields
|
|
2865
|
+
|
|
2866
|
+
### `id`
|
|
2867
|
+
- **Type**: `typing.Union[uuid.UUID, str, NoneType]`
|
|
2868
|
+
- **Optional**
|
|
2869
|
+
- Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.
|
|
2870
|
+
|
|
2871
|
+
### `created_at`
|
|
2872
|
+
- **Type**: `<class ''datetime.datetime''>`
|
|
2873
|
+
- **Optional**
|
|
2874
|
+
- Entity creation timestamp
|
|
2875
|
+
|
|
2876
|
+
### `updated_at`
|
|
2877
|
+
- **Type**: `<class ''datetime.datetime''>`
|
|
2878
|
+
- **Optional**
|
|
2879
|
+
- Last update timestamp
|
|
2880
|
+
|
|
2881
|
+
### `deleted_at`
|
|
2882
|
+
- **Type**: `typing.Optional[datetime.datetime]`
|
|
2883
|
+
- **Optional**
|
|
2884
|
+
- Soft deletion timestamp
|
|
2885
|
+
|
|
2886
|
+
### `tenant_id`
|
|
2887
|
+
- **Type**: `typing.Optional[str]`
|
|
2888
|
+
- **Optional**
|
|
2889
|
+
- Tenant identifier for multi-tenancy isolation
|
|
2890
|
+
|
|
2891
|
+
### `user_id`
|
|
2892
|
+
- **Type**: `typing.Optional[str]`
|
|
2893
|
+
- **Optional**
|
|
2894
|
+
- Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.
|
|
2895
|
+
|
|
2896
|
+
### `graph_edges`
|
|
2897
|
+
- **Type**: `list[dict]`
|
|
2898
|
+
- **Optional**
|
|
2899
|
+
- Knowledge graph edges stored as InlineEdge dicts
|
|
2900
|
+
|
|
2901
|
+
### `metadata`
|
|
2902
|
+
- **Type**: `<class ''dict''>`
|
|
2903
|
+
- **Optional**
|
|
2904
|
+
- Flexible metadata storage
|
|
2905
|
+
|
|
2906
|
+
### `tags`
|
|
2907
|
+
- **Type**: `list[str]`
|
|
2908
|
+
- **Optional**
|
|
2909
|
+
- Entity tags
|
|
2910
|
+
|
|
2911
|
+
### `session_id`
|
|
2912
|
+
- **Type**: `<class ''str''>`
|
|
2913
|
+
- **Required**
|
|
2914
|
+
- The session being shared (matches Message.session_id)
|
|
2915
|
+
|
|
2916
|
+
### `owner_user_id`
|
|
2917
|
+
- **Type**: `<class ''str''>`
|
|
2918
|
+
- **Required**
|
|
2919
|
+
- User ID of the session owner (the sharer)
|
|
2920
|
+
|
|
2921
|
+
### `shared_with_user_id`
|
|
2922
|
+
- **Type**: `<class ''str''>`
|
|
2923
|
+
- **Required**
|
|
2924
|
+
- User ID of the recipient (who can now view the session)
|
|
2925
|
+
|
|
2926
|
+
',
|
|
2927
|
+
'{"type": "object", "description": "\n Session sharing record between users.\n\n Links a session (identified by session_id from Message records) to a\n recipient user, enabling collaborative access to conversation history.\n \n\nThis agent can search the `shared_sessions` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "session_id": {"description": "The session being shared (matches Message.session_id)", "title": "Session Id", "type": "string"}, "owner_user_id": {"description": "User ID of the session owner (the sharer)", "title": "Owner User Id", "type": "string"}, "shared_with_user_id": {"description": "User ID of the recipient (who can now view the session)", "title": "Shared With User Id", "type": "string"}}, "required": ["session_id", "owner_user_id", "shared_with_user_id"], "json_schema_extra": {"table_name": "shared_sessions", "entity_key_field": "id", "embedding_fields": [], "fully_qualified_name": "rem.models.entities.shared_session.SharedSession", "tools": ["search_rem"], "default_search_table": "shared_sessions", "has_embeddings": false}}'::jsonb,
|
|
2928
|
+
'entity',
|
|
2929
|
+
'{"table_name": "shared_sessions", "entity_key_field": "id", "embedding_fields": [], "fqn": "rem.models.entities.shared_session.SharedSession"}'::jsonb
|
|
2930
|
+
)
|
|
2931
|
+
ON CONFLICT (id) DO UPDATE SET
|
|
2932
|
+
name = EXCLUDED.name,
|
|
2933
|
+
content = EXCLUDED.content,
|
|
2934
|
+
spec = EXCLUDED.spec,
|
|
2935
|
+
category = EXCLUDED.category,
|
|
2936
|
+
metadata = EXCLUDED.metadata,
|
|
2937
|
+
updated_at = CURRENT_TIMESTAMP;
|
|
2938
|
+
|
|
2939
|
+
-- Schema entry for User (users)
|
|
2940
|
+
INSERT INTO schemas (id, tenant_id, name, content, spec, category, metadata)
|
|
2941
|
+
VALUES (
|
|
2942
|
+
'1ad3d95e-32e9-54d6-ad7d-e39b9ed5018b'::uuid,
|
|
2943
|
+
'system',
|
|
2944
|
+
'User',
|
|
2945
|
+
'# User
|
|
2946
|
+
|
|
2947
|
+
|
|
2948
|
+
User entity.
|
|
2949
|
+
|
|
2950
|
+
Represents people in the REM system, either as active users
|
|
2951
|
+
or entities extracted from content. Tenant isolation is provided
|
|
2952
|
+
via CoreModel.tenant_id field.
|
|
2953
|
+
|
|
2954
|
+
Enhanced by dreaming worker:
|
|
2955
|
+
- summary: Generated from activity analysis
|
|
2956
|
+
- interests: Extracted from resources and sessions
|
|
2957
|
+
- activity_level: Computed from recent engagement
|
|
2958
|
+
- preferred_topics: Extracted from moment/resource topics
|
|
2959
|
+
|
|
2960
|
+
|
|
2961
|
+
## Overview
|
|
2962
|
+
|
|
2963
|
+
The `User` entity is stored in the `users` table. Each record is uniquely
|
|
2964
|
+
identified by its `name` field for lookups and graph traversal.
|
|
2965
|
+
|
|
2966
|
+
## Search Capabilities
|
|
2967
|
+
|
|
2968
|
+
This schema includes the `search_rem` tool which supports:
|
|
2969
|
+
- **LOOKUP**: O(1) exact match by name (e.g., `LOOKUP "entity-name"`)
|
|
2970
|
+
- **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
|
|
2971
|
+
- **SEARCH**: Semantic vector search on summary (e.g., `SEARCH "concept" FROM users LIMIT 10`)
|
|
2972
|
+
- **SQL**: Complex queries (e.g., `SELECT * FROM users WHERE ...`)
|
|
2973
|
+
|
|
2974
|
+
## Table Info
|
|
2975
|
+
|
|
2976
|
+
| Property | Value |
|
|
2977
|
+
|----------|-------|
|
|
2978
|
+
| Table | `users` |
|
|
2979
|
+
| Entity Key | `name` |
|
|
2980
|
+
| Embedding Fields | `summary` |
|
|
2981
|
+
| Tools | `search_rem` |
|
|
2982
|
+
|
|
2983
|
+
## Fields
|
|
2984
|
+
|
|
2985
|
+
### `id`
|
|
2986
|
+
- **Type**: `typing.Union[uuid.UUID, str, NoneType]`
|
|
2987
|
+
- **Optional**
|
|
2988
|
+
- Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.
|
|
2989
|
+
|
|
2990
|
+
### `created_at`
|
|
2991
|
+
- **Type**: `<class ''datetime.datetime''>`
|
|
2992
|
+
- **Optional**
|
|
2993
|
+
- Entity creation timestamp
|
|
2994
|
+
|
|
2995
|
+
### `updated_at`
|
|
2996
|
+
- **Type**: `<class ''datetime.datetime''>`
|
|
2997
|
+
- **Optional**
|
|
2998
|
+
- Last update timestamp
|
|
2999
|
+
|
|
3000
|
+
### `deleted_at`
|
|
3001
|
+
- **Type**: `typing.Optional[datetime.datetime]`
|
|
3002
|
+
- **Optional**
|
|
3003
|
+
- Soft deletion timestamp
|
|
3004
|
+
|
|
3005
|
+
### `tenant_id`
|
|
3006
|
+
- **Type**: `typing.Optional[str]`
|
|
3007
|
+
- **Optional**
|
|
3008
|
+
- Tenant identifier for multi-tenancy isolation
|
|
3009
|
+
|
|
3010
|
+
### `user_id`
|
|
3011
|
+
- **Type**: `typing.Optional[str]`
|
|
3012
|
+
- **Optional**
|
|
3013
|
+
- Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.
|
|
3014
|
+
|
|
3015
|
+
### `graph_edges`
|
|
3016
|
+
- **Type**: `list[dict]`
|
|
3017
|
+
- **Optional**
|
|
3018
|
+
- Knowledge graph edges stored as InlineEdge dicts
|
|
3019
|
+
|
|
3020
|
+
### `metadata`
|
|
3021
|
+
- **Type**: `<class ''dict''>`
|
|
3022
|
+
- **Optional**
|
|
3023
|
+
- Flexible metadata storage
|
|
3024
|
+
|
|
3025
|
+
### `tags`
|
|
3026
|
+
- **Type**: `list[str]`
|
|
3027
|
+
- **Optional**
|
|
3028
|
+
- Entity tags
|
|
3029
|
+
|
|
3030
|
+
### `name`
|
|
3031
|
+
- **Type**: `<class ''str''>`
|
|
3032
|
+
- **Required**
|
|
3033
|
+
- User name (human-readable, used as graph label)
|
|
3034
|
+
|
|
3035
|
+
### `email`
|
|
3036
|
+
- **Type**: `typing.Optional[str]`
|
|
3037
|
+
- **Optional**
|
|
3038
|
+
- User email address
|
|
3039
|
+
|
|
3040
|
+
### `role`
|
|
3041
|
+
- **Type**: `typing.Optional[str]`
|
|
3042
|
+
- **Optional**
|
|
3043
|
+
- User role (employee, contractor, external, etc.)
|
|
3044
|
+
|
|
3045
|
+
### `tier`
|
|
3046
|
+
- **Type**: `<enum ''UserTier''>`
|
|
3047
|
+
- **Optional**
|
|
3048
|
+
- User subscription tier (free, basic, pro) for feature gating
|
|
3049
|
+
|
|
3050
|
+
### `anonymous_ids`
|
|
3051
|
+
- **Type**: `list[str]`
|
|
3052
|
+
- **Optional**
|
|
3053
|
+
- Linked anonymous session IDs used for merging history
|
|
3054
|
+
|
|
3055
|
+
### `sec_policy`
|
|
3056
|
+
- **Type**: `<class ''dict''>`
|
|
3057
|
+
- **Optional**
|
|
3058
|
+
- Security policy configuration (JSON, extensible for custom policies)
|
|
3059
|
+
|
|
3060
|
+
### `summary`
|
|
3061
|
+
- **Type**: `typing.Optional[str]`
|
|
3062
|
+
- **Optional**
|
|
3063
|
+
- LLM-generated user profile summary (updated by dreaming worker)
|
|
3064
|
+
|
|
3065
|
+
### `interests`
|
|
3066
|
+
- **Type**: `list[str]`
|
|
3067
|
+
- **Optional**
|
|
3068
|
+
- User interests extracted from activity
|
|
3069
|
+
|
|
3070
|
+
### `preferred_topics`
|
|
3071
|
+
- **Type**: `list[str]`
|
|
3072
|
+
- **Optional**
|
|
3073
|
+
- Frequently discussed topics in kebab-case
|
|
3074
|
+
|
|
3075
|
+
### `activity_level`
|
|
3076
|
+
- **Type**: `typing.Optional[str]`
|
|
3077
|
+
- **Optional**
|
|
3078
|
+
- Activity level: active, moderate, inactive
|
|
3079
|
+
|
|
3080
|
+
### `last_active_at`
|
|
3081
|
+
- **Type**: `typing.Optional[datetime.datetime]`
|
|
3082
|
+
- **Optional**
|
|
3083
|
+
- Last activity timestamp
|
|
3084
|
+
|
|
3085
|
+
',
|
|
3086
|
+
'{"type": "object", "description": "\n User entity.\n\n Represents people in the REM system, either as active users\n or entities extracted from content. Tenant isolation is provided\n via CoreModel.tenant_id field.\n\n Enhanced by dreaming worker:\n - summary: Generated from activity analysis\n - interests: Extracted from resources and sessions\n - activity_level: Computed from recent engagement\n - preferred_topics: Extracted from moment/resource topics\n \n\nThis agent can search the `users` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"description": "User name (human-readable, used as graph label)", "entity_key": true, "title": "Name", "type": "string"}, "email": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "User email address", "title": "Email"}, "role": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "User role (employee, contractor, external, etc.)", "title": "Role"}, "tier": {"$ref": "#/$defs/UserTier", "default": "free", "description": "User subscription tier (free, basic, pro) for feature gating"}, "anonymous_ids": {"description": "Linked anonymous session IDs used for merging history", "items": {"type": "string"}, "title": "Anonymous Ids", "type": "array"}, "sec_policy": {"additionalProperties": true, "description": "Security policy configuration (JSON, extensible for custom policies)", "title": "Sec Policy", "type": "object"}, "summary": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "LLM-generated user profile summary (updated by dreaming worker)", "title": "Summary"}, "interests": {"description": "User interests extracted from activity", "items": {"type": "string"}, "title": "Interests", "type": "array"}, "preferred_topics": {"description": "Frequently discussed topics in kebab-case", "items": {"type": "string"}, "title": "Preferred Topics", "type": "array"}, "activity_level": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Activity level: active, moderate, inactive", "title": "Activity Level"}, "last_active_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Last activity timestamp", "title": "Last Active At"}}, "required": ["name"], "json_schema_extra": {"table_name": "users", "entity_key_field": "name", "embedding_fields": ["summary"], "fully_qualified_name": "rem.models.entities.user.User", "tools": ["search_rem"], "default_search_table": "users", "has_embeddings": true}}'::jsonb,
|
|
3087
|
+
'entity',
|
|
3088
|
+
'{"table_name": "users", "entity_key_field": "name", "embedding_fields": ["summary"], "fqn": "rem.models.entities.user.User"}'::jsonb
|
|
3089
|
+
)
|
|
3090
|
+
ON CONFLICT (id) DO UPDATE SET
|
|
3091
|
+
name = EXCLUDED.name,
|
|
3092
|
+
content = EXCLUDED.content,
|
|
3093
|
+
spec = EXCLUDED.spec,
|
|
3094
|
+
category = EXCLUDED.category,
|
|
3095
|
+
metadata = EXCLUDED.metadata,
|
|
3096
|
+
updated_at = CURRENT_TIMESTAMP;
|
|
3097
|
+
|
|
1235
3098
|
-- ============================================================================
|
|
1236
3099
|
-- RECORD MIGRATION
|
|
1237
3100
|
-- ============================================================================
|