remdb 0.3.103__py3-none-any.whl → 0.3.141__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

Files changed (74) hide show
  1. rem/agentic/agents/sse_simulator.py +2 -0
  2. rem/agentic/context.py +51 -27
  3. rem/agentic/mcp/tool_wrapper.py +155 -18
  4. rem/agentic/otel/setup.py +93 -4
  5. rem/agentic/providers/phoenix.py +371 -108
  6. rem/agentic/providers/pydantic_ai.py +195 -46
  7. rem/agentic/schema.py +361 -21
  8. rem/agentic/tools/rem_tools.py +3 -3
  9. rem/api/main.py +85 -16
  10. rem/api/mcp_router/resources.py +1 -1
  11. rem/api/mcp_router/server.py +18 -4
  12. rem/api/mcp_router/tools.py +394 -16
  13. rem/api/routers/admin.py +218 -1
  14. rem/api/routers/chat/completions.py +280 -7
  15. rem/api/routers/chat/models.py +81 -7
  16. rem/api/routers/chat/otel_utils.py +33 -0
  17. rem/api/routers/chat/sse_events.py +17 -1
  18. rem/api/routers/chat/streaming.py +177 -3
  19. rem/api/routers/feedback.py +142 -329
  20. rem/api/routers/query.py +360 -0
  21. rem/api/routers/shared_sessions.py +13 -13
  22. rem/cli/commands/README.md +237 -64
  23. rem/cli/commands/cluster.py +1808 -0
  24. rem/cli/commands/configure.py +4 -7
  25. rem/cli/commands/db.py +354 -143
  26. rem/cli/commands/experiments.py +436 -30
  27. rem/cli/commands/process.py +14 -8
  28. rem/cli/commands/schema.py +92 -45
  29. rem/cli/commands/session.py +336 -0
  30. rem/cli/dreaming.py +2 -2
  31. rem/cli/main.py +29 -6
  32. rem/config.py +8 -1
  33. rem/models/core/experiment.py +54 -0
  34. rem/models/core/rem_query.py +5 -2
  35. rem/models/entities/ontology.py +1 -1
  36. rem/models/entities/ontology_config.py +1 -1
  37. rem/models/entities/shared_session.py +2 -28
  38. rem/registry.py +10 -4
  39. rem/schemas/agents/examples/contract-analyzer.yaml +1 -1
  40. rem/schemas/agents/examples/contract-extractor.yaml +1 -1
  41. rem/schemas/agents/examples/cv-parser.yaml +1 -1
  42. rem/services/content/service.py +30 -8
  43. rem/services/embeddings/api.py +4 -4
  44. rem/services/embeddings/worker.py +16 -16
  45. rem/services/phoenix/client.py +59 -18
  46. rem/services/postgres/README.md +151 -26
  47. rem/services/postgres/__init__.py +2 -1
  48. rem/services/postgres/diff_service.py +531 -0
  49. rem/services/postgres/pydantic_to_sqlalchemy.py +427 -129
  50. rem/services/postgres/schema_generator.py +205 -4
  51. rem/services/postgres/service.py +6 -6
  52. rem/services/rem/parser.py +44 -9
  53. rem/services/rem/service.py +36 -2
  54. rem/services/session/compression.py +7 -0
  55. rem/services/session/reload.py +1 -1
  56. rem/settings.py +288 -16
  57. rem/sql/background_indexes.sql +19 -24
  58. rem/sql/migrations/001_install.sql +252 -69
  59. rem/sql/migrations/002_install_models.sql +2197 -619
  60. rem/sql/migrations/003_optional_extensions.sql +326 -0
  61. rem/sql/migrations/004_cache_system.sql +548 -0
  62. rem/utils/__init__.py +18 -0
  63. rem/utils/date_utils.py +2 -2
  64. rem/utils/schema_loader.py +110 -15
  65. rem/utils/sql_paths.py +146 -0
  66. rem/utils/vision.py +1 -1
  67. rem/workers/__init__.py +3 -1
  68. rem/workers/db_listener.py +579 -0
  69. rem/workers/unlogged_maintainer.py +463 -0
  70. {remdb-0.3.103.dist-info → remdb-0.3.141.dist-info}/METADATA +300 -215
  71. {remdb-0.3.103.dist-info → remdb-0.3.141.dist-info}/RECORD +73 -64
  72. rem/sql/migrations/003_seed_default_user.sql +0 -48
  73. {remdb-0.3.103.dist-info → remdb-0.3.141.dist-info}/WHEEL +0 -0
  74. {remdb-0.3.103.dist-info → remdb-0.3.141.dist-info}/entry_points.txt +0 -0
@@ -1,7 +1,7 @@
1
1
  -- REM Model Schema (install_models.sql)
2
2
  -- Generated from Pydantic models
3
- -- Source: directory: src/rem/models/entities
4
- -- Generated at: 2025-11-28T08:13:28.661915
3
+ -- Source: model registry
4
+ -- Generated at: 2025-11-29T18:45:11.372432
5
5
  --
6
6
  -- DO NOT EDIT MANUALLY - Regenerate with: rem db schema generate
7
7
  --
@@ -10,6 +10,7 @@
10
10
  -- 2. Embeddings tables (embeddings_<table>)
11
11
  -- 3. KV_STORE triggers for cache maintenance
12
12
  -- 4. Indexes (foreground only, background indexes separate)
13
+ -- 5. Schema table entries (for agent-like table access)
13
14
 
14
15
  -- ============================================================================
15
16
  -- PREREQUISITES CHECK
@@ -30,24 +31,102 @@ BEGIN
30
31
  END $$;
31
32
 
32
33
  -- ======================================================================
33
- -- USERS (Model: User)
34
+ -- FEEDBACKS (Model: Feedback)
34
35
  -- ======================================================================
35
36
 
36
- CREATE TABLE IF NOT EXISTS users (
37
+ CREATE TABLE IF NOT EXISTS feedbacks (
38
+ id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
39
+ tenant_id VARCHAR(100) NOT NULL,
40
+ user_id VARCHAR(256),
41
+ session_id VARCHAR(256) NOT NULL,
42
+ message_id VARCHAR(256),
43
+ rating INTEGER,
44
+ categories TEXT[] DEFAULT ARRAY[]::TEXT[],
45
+ comment TEXT,
46
+ trace_id VARCHAR(256),
47
+ span_id VARCHAR(256),
48
+ phoenix_synced BOOLEAN,
49
+ phoenix_annotation_id VARCHAR(256),
50
+ annotator_kind VARCHAR(256),
51
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
52
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
53
+ deleted_at TIMESTAMP,
54
+ graph_edges JSONB DEFAULT '[]'::jsonb,
55
+ metadata JSONB DEFAULT '{}'::jsonb,
56
+ tags TEXT[] DEFAULT ARRAY[]::TEXT[]
57
+ );
58
+
59
+ CREATE INDEX IF NOT EXISTS idx_feedbacks_tenant ON feedbacks (tenant_id);
60
+ CREATE INDEX IF NOT EXISTS idx_feedbacks_user ON feedbacks (user_id);
61
+ CREATE INDEX IF NOT EXISTS idx_feedbacks_graph_edges ON feedbacks USING GIN (graph_edges);
62
+ CREATE INDEX IF NOT EXISTS idx_feedbacks_metadata ON feedbacks USING GIN (metadata);
63
+ CREATE INDEX IF NOT EXISTS idx_feedbacks_tags ON feedbacks USING GIN (tags);
64
+
65
+ -- KV_STORE trigger for feedbacks
66
+ -- Trigger function to maintain KV_STORE for feedbacks
67
+ CREATE OR REPLACE FUNCTION fn_feedbacks_kv_store_upsert()
68
+ RETURNS TRIGGER AS $$
69
+ BEGIN
70
+ IF (TG_OP = 'DELETE') THEN
71
+ -- Remove from KV_STORE on delete
72
+ DELETE FROM kv_store
73
+ WHERE entity_id = OLD.id;
74
+ RETURN OLD;
75
+ ELSIF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN
76
+ -- Upsert to KV_STORE (O(1) lookup by entity_key)
77
+ INSERT INTO kv_store (
78
+ entity_key,
79
+ entity_type,
80
+ entity_id,
81
+ tenant_id,
82
+ user_id,
83
+ metadata,
84
+ graph_edges,
85
+ updated_at
86
+ ) VALUES (
87
+ NEW.id::VARCHAR,
88
+ 'feedbacks',
89
+ NEW.id,
90
+ NEW.tenant_id,
91
+ NEW.user_id,
92
+ NEW.metadata,
93
+ COALESCE(NEW.graph_edges, '[]'::jsonb),
94
+ CURRENT_TIMESTAMP
95
+ )
96
+ ON CONFLICT (tenant_id, entity_key)
97
+ DO UPDATE SET
98
+ entity_id = EXCLUDED.entity_id,
99
+ user_id = EXCLUDED.user_id,
100
+ metadata = EXCLUDED.metadata,
101
+ graph_edges = EXCLUDED.graph_edges,
102
+ updated_at = CURRENT_TIMESTAMP;
103
+
104
+ RETURN NEW;
105
+ END IF;
106
+ END;
107
+ $$ LANGUAGE plpgsql;
108
+
109
+ -- Create trigger
110
+ DROP TRIGGER IF EXISTS trg_feedbacks_kv_store ON feedbacks;
111
+ CREATE TRIGGER trg_feedbacks_kv_store
112
+ AFTER INSERT OR UPDATE OR DELETE ON feedbacks
113
+ FOR EACH ROW EXECUTE FUNCTION fn_feedbacks_kv_store_upsert();
114
+
115
+ -- ======================================================================
116
+ -- FILES (Model: File)
117
+ -- ======================================================================
118
+
119
+ CREATE TABLE IF NOT EXISTS files (
37
120
  id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
38
121
  tenant_id VARCHAR(100) NOT NULL,
39
122
  user_id VARCHAR(256),
40
123
  name VARCHAR(256) NOT NULL,
41
- email VARCHAR(256),
42
- role VARCHAR(256),
43
- tier TEXT,
44
- anonymous_ids TEXT[] DEFAULT ARRAY[]::TEXT[],
45
- sec_policy JSONB DEFAULT '{}'::jsonb,
46
- summary TEXT,
47
- interests TEXT[] DEFAULT ARRAY[]::TEXT[],
48
- preferred_topics TEXT[] DEFAULT ARRAY[]::TEXT[],
49
- activity_level VARCHAR(256),
50
- last_active_at TIMESTAMP,
124
+ uri VARCHAR(256) NOT NULL,
125
+ content TEXT,
126
+ timestamp VARCHAR(256),
127
+ size_bytes INTEGER,
128
+ mime_type VARCHAR(256),
129
+ processing_status VARCHAR(256),
51
130
  created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
52
131
  updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
53
132
  deleted_at TIMESTAMP,
@@ -56,16 +135,16 @@ CREATE TABLE IF NOT EXISTS users (
56
135
  tags TEXT[] DEFAULT ARRAY[]::TEXT[]
57
136
  );
58
137
 
59
- CREATE INDEX idx_users_tenant ON users (tenant_id);
60
- CREATE INDEX idx_users_user ON users (user_id);
61
- CREATE INDEX idx_users_graph_edges ON users USING GIN (graph_edges);
62
- CREATE INDEX idx_users_metadata ON users USING GIN (metadata);
63
- CREATE INDEX idx_users_tags ON users USING GIN (tags);
138
+ CREATE INDEX IF NOT EXISTS idx_files_tenant ON files (tenant_id);
139
+ CREATE INDEX IF NOT EXISTS idx_files_user ON files (user_id);
140
+ CREATE INDEX IF NOT EXISTS idx_files_graph_edges ON files USING GIN (graph_edges);
141
+ CREATE INDEX IF NOT EXISTS idx_files_metadata ON files USING GIN (metadata);
142
+ CREATE INDEX IF NOT EXISTS idx_files_tags ON files USING GIN (tags);
64
143
 
65
- -- Embeddings for users
66
- CREATE TABLE IF NOT EXISTS embeddings_users (
144
+ -- Embeddings for files
145
+ CREATE TABLE IF NOT EXISTS embeddings_files (
67
146
  id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
68
- entity_id UUID NOT NULL REFERENCES users(id) ON DELETE CASCADE,
147
+ entity_id UUID NOT NULL REFERENCES files(id) ON DELETE CASCADE,
69
148
  field_name VARCHAR(100) NOT NULL,
70
149
  provider VARCHAR(50) NOT NULL DEFAULT 'openai',
71
150
  model VARCHAR(100) NOT NULL DEFAULT 'text-embedding-3-small',
@@ -78,19 +157,19 @@ CREATE TABLE IF NOT EXISTS embeddings_users (
78
157
  );
79
158
 
80
159
  -- Index for entity lookup (get all embeddings for entity)
81
- CREATE INDEX idx_embeddings_users_entity ON embeddings_users (entity_id);
160
+ CREATE INDEX IF NOT EXISTS idx_embeddings_files_entity ON embeddings_files (entity_id);
82
161
 
83
162
  -- Index for field + provider lookup
84
- CREATE INDEX idx_embeddings_users_field_provider ON embeddings_users (field_name, provider);
163
+ CREATE INDEX IF NOT EXISTS idx_embeddings_files_field_provider ON embeddings_files (field_name, provider);
85
164
 
86
165
  -- HNSW index for vector similarity search (created in background)
87
166
  -- Note: This will be created by background thread after data load
88
- -- CREATE INDEX idx_embeddings_users_vector_hnsw ON embeddings_users
167
+ -- CREATE INDEX IF NOT EXISTS idx_embeddings_files_vector_hnsw ON embeddings_files
89
168
  -- USING hnsw (embedding vector_cosine_ops);
90
169
 
91
- -- KV_STORE trigger for users
92
- -- Trigger function to maintain KV_STORE for users
93
- CREATE OR REPLACE FUNCTION fn_users_kv_store_upsert()
170
+ -- KV_STORE trigger for files
171
+ -- Trigger function to maintain KV_STORE for files
172
+ CREATE OR REPLACE FUNCTION fn_files_kv_store_upsert()
94
173
  RETURNS TRIGGER AS $$
95
174
  BEGIN
96
175
  IF (TG_OP = 'DELETE') THEN
@@ -110,8 +189,8 @@ BEGIN
110
189
  graph_edges,
111
190
  updated_at
112
191
  ) VALUES (
113
- NEW.name::VARCHAR,
114
- 'users',
192
+ NEW.id::VARCHAR,
193
+ 'files',
115
194
  NEW.id,
116
195
  NEW.tenant_id,
117
196
  NEW.user_id,
@@ -133,10 +212,10 @@ END;
133
212
  $$ LANGUAGE plpgsql;
134
213
 
135
214
  -- Create trigger
136
- DROP TRIGGER IF EXISTS trg_users_kv_store ON users;
137
- CREATE TRIGGER trg_users_kv_store
138
- AFTER INSERT OR UPDATE OR DELETE ON users
139
- FOR EACH ROW EXECUTE FUNCTION fn_users_kv_store_upsert();
215
+ DROP TRIGGER IF EXISTS trg_files_kv_store ON files;
216
+ CREATE TRIGGER trg_files_kv_store
217
+ AFTER INSERT OR UPDATE OR DELETE ON files
218
+ FOR EACH ROW EXECUTE FUNCTION fn_files_kv_store_upsert();
140
219
 
141
220
  -- ======================================================================
142
221
  -- IMAGE_RESOURCES (Model: ImageResource)
@@ -169,11 +248,11 @@ CREATE TABLE IF NOT EXISTS image_resources (
169
248
  tags TEXT[] DEFAULT ARRAY[]::TEXT[]
170
249
  );
171
250
 
172
- CREATE INDEX idx_image_resources_tenant ON image_resources (tenant_id);
173
- CREATE INDEX idx_image_resources_user ON image_resources (user_id);
174
- CREATE INDEX idx_image_resources_graph_edges ON image_resources USING GIN (graph_edges);
175
- CREATE INDEX idx_image_resources_metadata ON image_resources USING GIN (metadata);
176
- CREATE INDEX idx_image_resources_tags ON image_resources USING GIN (tags);
251
+ CREATE INDEX IF NOT EXISTS idx_image_resources_tenant ON image_resources (tenant_id);
252
+ CREATE INDEX IF NOT EXISTS idx_image_resources_user ON image_resources (user_id);
253
+ CREATE INDEX IF NOT EXISTS idx_image_resources_graph_edges ON image_resources USING GIN (graph_edges);
254
+ CREATE INDEX IF NOT EXISTS idx_image_resources_metadata ON image_resources USING GIN (metadata);
255
+ CREATE INDEX IF NOT EXISTS idx_image_resources_tags ON image_resources USING GIN (tags);
177
256
 
178
257
  -- Embeddings for image_resources
179
258
  CREATE TABLE IF NOT EXISTS embeddings_image_resources (
@@ -191,14 +270,14 @@ CREATE TABLE IF NOT EXISTS embeddings_image_resources (
191
270
  );
192
271
 
193
272
  -- Index for entity lookup (get all embeddings for entity)
194
- CREATE INDEX idx_embeddings_image_resources_entity ON embeddings_image_resources (entity_id);
273
+ CREATE INDEX IF NOT EXISTS idx_embeddings_image_resources_entity ON embeddings_image_resources (entity_id);
195
274
 
196
275
  -- Index for field + provider lookup
197
- CREATE INDEX idx_embeddings_image_resources_field_provider ON embeddings_image_resources (field_name, provider);
276
+ CREATE INDEX IF NOT EXISTS idx_embeddings_image_resources_field_provider ON embeddings_image_resources (field_name, provider);
198
277
 
199
278
  -- HNSW index for vector similarity search (created in background)
200
279
  -- Note: This will be created by background thread after data load
201
- -- CREATE INDEX idx_embeddings_image_resources_vector_hnsw ON embeddings_image_resources
280
+ -- CREATE INDEX IF NOT EXISTS idx_embeddings_image_resources_vector_hnsw ON embeddings_image_resources
202
281
  -- USING hnsw (embedding vector_cosine_ops);
203
282
 
204
283
  -- KV_STORE trigger for image_resources
@@ -252,23 +331,21 @@ AFTER INSERT OR UPDATE OR DELETE ON image_resources
252
331
  FOR EACH ROW EXECUTE FUNCTION fn_image_resources_kv_store_upsert();
253
332
 
254
333
  -- ======================================================================
255
- -- FEEDBACKS (Model: Feedback)
334
+ -- MESSAGES (Model: Message)
256
335
  -- ======================================================================
257
336
 
258
- CREATE TABLE IF NOT EXISTS feedbacks (
337
+ CREATE TABLE IF NOT EXISTS messages (
259
338
  id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
260
339
  tenant_id VARCHAR(100) NOT NULL,
261
340
  user_id VARCHAR(256),
262
- session_id VARCHAR(256) NOT NULL,
263
- message_id VARCHAR(256),
264
- rating INTEGER,
265
- categories TEXT[] DEFAULT ARRAY[]::TEXT[],
266
- comment TEXT,
341
+ content TEXT NOT NULL,
342
+ message_type VARCHAR(256),
343
+ session_id VARCHAR(256),
344
+ prompt TEXT,
345
+ model VARCHAR(256),
346
+ token_count INTEGER,
267
347
  trace_id VARCHAR(256),
268
348
  span_id VARCHAR(256),
269
- phoenix_synced BOOLEAN,
270
- phoenix_annotation_id VARCHAR(256),
271
- annotator_kind VARCHAR(256),
272
349
  created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
273
350
  updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
274
351
  deleted_at TIMESTAMP,
@@ -277,15 +354,41 @@ CREATE TABLE IF NOT EXISTS feedbacks (
277
354
  tags TEXT[] DEFAULT ARRAY[]::TEXT[]
278
355
  );
279
356
 
280
- CREATE INDEX idx_feedbacks_tenant ON feedbacks (tenant_id);
281
- CREATE INDEX idx_feedbacks_user ON feedbacks (user_id);
282
- CREATE INDEX idx_feedbacks_graph_edges ON feedbacks USING GIN (graph_edges);
283
- CREATE INDEX idx_feedbacks_metadata ON feedbacks USING GIN (metadata);
284
- CREATE INDEX idx_feedbacks_tags ON feedbacks USING GIN (tags);
357
+ CREATE INDEX IF NOT EXISTS idx_messages_tenant ON messages (tenant_id);
358
+ CREATE INDEX IF NOT EXISTS idx_messages_user ON messages (user_id);
359
+ CREATE INDEX IF NOT EXISTS idx_messages_graph_edges ON messages USING GIN (graph_edges);
360
+ CREATE INDEX IF NOT EXISTS idx_messages_metadata ON messages USING GIN (metadata);
361
+ CREATE INDEX IF NOT EXISTS idx_messages_tags ON messages USING GIN (tags);
285
362
 
286
- -- KV_STORE trigger for feedbacks
287
- -- Trigger function to maintain KV_STORE for feedbacks
288
- CREATE OR REPLACE FUNCTION fn_feedbacks_kv_store_upsert()
363
+ -- Embeddings for messages
364
+ CREATE TABLE IF NOT EXISTS embeddings_messages (
365
+ id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
366
+ entity_id UUID NOT NULL REFERENCES messages(id) ON DELETE CASCADE,
367
+ field_name VARCHAR(100) NOT NULL,
368
+ provider VARCHAR(50) NOT NULL DEFAULT 'openai',
369
+ model VARCHAR(100) NOT NULL DEFAULT 'text-embedding-3-small',
370
+ embedding vector(1536) NOT NULL,
371
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
372
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
373
+
374
+ -- Unique: one embedding per entity per field per provider
375
+ UNIQUE (entity_id, field_name, provider)
376
+ );
377
+
378
+ -- Index for entity lookup (get all embeddings for entity)
379
+ CREATE INDEX IF NOT EXISTS idx_embeddings_messages_entity ON embeddings_messages (entity_id);
380
+
381
+ -- Index for field + provider lookup
382
+ CREATE INDEX IF NOT EXISTS idx_embeddings_messages_field_provider ON embeddings_messages (field_name, provider);
383
+
384
+ -- HNSW index for vector similarity search (created in background)
385
+ -- Note: This will be created by background thread after data load
386
+ -- CREATE INDEX IF NOT EXISTS idx_embeddings_messages_vector_hnsw ON embeddings_messages
387
+ -- USING hnsw (embedding vector_cosine_ops);
388
+
389
+ -- KV_STORE trigger for messages
390
+ -- Trigger function to maintain KV_STORE for messages
391
+ CREATE OR REPLACE FUNCTION fn_messages_kv_store_upsert()
289
392
  RETURNS TRIGGER AS $$
290
393
  BEGIN
291
394
  IF (TG_OP = 'DELETE') THEN
@@ -306,7 +409,7 @@ BEGIN
306
409
  updated_at
307
410
  ) VALUES (
308
411
  NEW.id::VARCHAR,
309
- 'feedbacks',
412
+ 'messages',
310
413
  NEW.id,
311
414
  NEW.tenant_id,
312
415
  NEW.user_id,
@@ -328,10 +431,10 @@ END;
328
431
  $$ LANGUAGE plpgsql;
329
432
 
330
433
  -- Create trigger
331
- DROP TRIGGER IF EXISTS trg_feedbacks_kv_store ON feedbacks;
332
- CREATE TRIGGER trg_feedbacks_kv_store
333
- AFTER INSERT OR UPDATE OR DELETE ON feedbacks
334
- FOR EACH ROW EXECUTE FUNCTION fn_feedbacks_kv_store_upsert();
434
+ DROP TRIGGER IF EXISTS trg_messages_kv_store ON messages;
435
+ CREATE TRIGGER trg_messages_kv_store
436
+ AFTER INSERT OR UPDATE OR DELETE ON messages
437
+ FOR EACH ROW EXECUTE FUNCTION fn_messages_kv_store_upsert();
335
438
 
336
439
  -- ======================================================================
337
440
  -- MOMENTS (Model: Moment)
@@ -359,11 +462,11 @@ CREATE TABLE IF NOT EXISTS moments (
359
462
  tags TEXT[] DEFAULT ARRAY[]::TEXT[]
360
463
  );
361
464
 
362
- CREATE INDEX idx_moments_tenant ON moments (tenant_id);
363
- CREATE INDEX idx_moments_user ON moments (user_id);
364
- CREATE INDEX idx_moments_graph_edges ON moments USING GIN (graph_edges);
365
- CREATE INDEX idx_moments_metadata ON moments USING GIN (metadata);
366
- CREATE INDEX idx_moments_tags ON moments USING GIN (tags);
465
+ CREATE INDEX IF NOT EXISTS idx_moments_tenant ON moments (tenant_id);
466
+ CREATE INDEX IF NOT EXISTS idx_moments_user ON moments (user_id);
467
+ CREATE INDEX IF NOT EXISTS idx_moments_graph_edges ON moments USING GIN (graph_edges);
468
+ CREATE INDEX IF NOT EXISTS idx_moments_metadata ON moments USING GIN (metadata);
469
+ CREATE INDEX IF NOT EXISTS idx_moments_tags ON moments USING GIN (tags);
367
470
 
368
471
  -- Embeddings for moments
369
472
  CREATE TABLE IF NOT EXISTS embeddings_moments (
@@ -381,14 +484,14 @@ CREATE TABLE IF NOT EXISTS embeddings_moments (
381
484
  );
382
485
 
383
486
  -- Index for entity lookup (get all embeddings for entity)
384
- CREATE INDEX idx_embeddings_moments_entity ON embeddings_moments (entity_id);
487
+ CREATE INDEX IF NOT EXISTS idx_embeddings_moments_entity ON embeddings_moments (entity_id);
385
488
 
386
489
  -- Index for field + provider lookup
387
- CREATE INDEX idx_embeddings_moments_field_provider ON embeddings_moments (field_name, provider);
490
+ CREATE INDEX IF NOT EXISTS idx_embeddings_moments_field_provider ON embeddings_moments (field_name, provider);
388
491
 
389
492
  -- HNSW index for vector similarity search (created in background)
390
493
  -- Note: This will be created by background thread after data load
391
- -- CREATE INDEX idx_embeddings_moments_vector_hnsw ON embeddings_moments
494
+ -- CREATE INDEX IF NOT EXISTS idx_embeddings_moments_vector_hnsw ON embeddings_moments
392
495
  -- USING hnsw (embedding vector_cosine_ops);
393
496
 
394
497
  -- KV_STORE trigger for moments
@@ -442,15 +545,22 @@ AFTER INSERT OR UPDATE OR DELETE ON moments
442
545
  FOR EACH ROW EXECUTE FUNCTION fn_moments_kv_store_upsert();
443
546
 
444
547
  -- ======================================================================
445
- -- PERSONS (Model: Person)
548
+ -- ONTOLOGIES (Model: Ontology)
446
549
  -- ======================================================================
447
550
 
448
- CREATE TABLE IF NOT EXISTS persons (
551
+ CREATE TABLE IF NOT EXISTS ontologies (
449
552
  id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
450
553
  tenant_id VARCHAR(100) NOT NULL,
451
554
  user_id VARCHAR(256),
452
555
  name VARCHAR(256) NOT NULL,
453
- role VARCHAR(256),
556
+ file_id UUID NOT NULL,
557
+ agent_schema_id VARCHAR(256) NOT NULL,
558
+ provider_name VARCHAR(256) NOT NULL,
559
+ model_name VARCHAR(256) NOT NULL,
560
+ extracted_data JSONB NOT NULL,
561
+ confidence_score DOUBLE PRECISION,
562
+ extraction_timestamp VARCHAR(256),
563
+ embedding_text TEXT,
454
564
  created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
455
565
  updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
456
566
  deleted_at TIMESTAMP,
@@ -459,15 +569,15 @@ CREATE TABLE IF NOT EXISTS persons (
459
569
  tags TEXT[] DEFAULT ARRAY[]::TEXT[]
460
570
  );
461
571
 
462
- CREATE INDEX idx_persons_tenant ON persons (tenant_id);
463
- CREATE INDEX idx_persons_user ON persons (user_id);
464
- CREATE INDEX idx_persons_graph_edges ON persons USING GIN (graph_edges);
465
- CREATE INDEX idx_persons_metadata ON persons USING GIN (metadata);
466
- CREATE INDEX idx_persons_tags ON persons USING GIN (tags);
572
+ CREATE INDEX IF NOT EXISTS idx_ontologies_tenant ON ontologies (tenant_id);
573
+ CREATE INDEX IF NOT EXISTS idx_ontologies_user ON ontologies (user_id);
574
+ CREATE INDEX IF NOT EXISTS idx_ontologies_graph_edges ON ontologies USING GIN (graph_edges);
575
+ CREATE INDEX IF NOT EXISTS idx_ontologies_metadata ON ontologies USING GIN (metadata);
576
+ CREATE INDEX IF NOT EXISTS idx_ontologies_tags ON ontologies USING GIN (tags);
467
577
 
468
- -- KV_STORE trigger for persons
469
- -- Trigger function to maintain KV_STORE for persons
470
- CREATE OR REPLACE FUNCTION fn_persons_kv_store_upsert()
578
+ -- KV_STORE trigger for ontologies
579
+ -- Trigger function to maintain KV_STORE for ontologies
580
+ CREATE OR REPLACE FUNCTION fn_ontologies_kv_store_upsert()
471
581
  RETURNS TRIGGER AS $$
472
582
  BEGIN
473
583
  IF (TG_OP = 'DELETE') THEN
@@ -488,7 +598,7 @@ BEGIN
488
598
  updated_at
489
599
  ) VALUES (
490
600
  NEW.id::VARCHAR,
491
- 'persons',
601
+ 'ontologies',
492
602
  NEW.id,
493
603
  NEW.tenant_id,
494
604
  NEW.user_id,
@@ -510,28 +620,29 @@ END;
510
620
  $$ LANGUAGE plpgsql;
511
621
 
512
622
  -- Create trigger
513
- DROP TRIGGER IF EXISTS trg_persons_kv_store ON persons;
514
- CREATE TRIGGER trg_persons_kv_store
515
- AFTER INSERT OR UPDATE OR DELETE ON persons
516
- FOR EACH ROW EXECUTE FUNCTION fn_persons_kv_store_upsert();
623
+ DROP TRIGGER IF EXISTS trg_ontologies_kv_store ON ontologies;
624
+ CREATE TRIGGER trg_ontologies_kv_store
625
+ AFTER INSERT OR UPDATE OR DELETE ON ontologies
626
+ FOR EACH ROW EXECUTE FUNCTION fn_ontologies_kv_store_upsert();
517
627
 
518
628
  -- ======================================================================
519
- -- SESSIONS (Model: Session)
629
+ -- ONTOLOGY_CONFIGS (Model: OntologyConfig)
520
630
  -- ======================================================================
521
631
 
522
- CREATE TABLE IF NOT EXISTS sessions (
632
+ CREATE TABLE IF NOT EXISTS ontology_configs (
523
633
  id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
524
634
  tenant_id VARCHAR(100) NOT NULL,
525
635
  user_id VARCHAR(256),
526
636
  name VARCHAR(256) NOT NULL,
527
- mode TEXT,
637
+ agent_schema_id VARCHAR(256) NOT NULL,
528
638
  description TEXT,
529
- original_trace_id VARCHAR(256),
530
- settings_overrides JSONB,
531
- prompt TEXT,
532
- agent_schema_uri VARCHAR(256),
533
- message_count INTEGER,
534
- total_tokens INTEGER,
639
+ mime_type_pattern VARCHAR(256),
640
+ uri_pattern VARCHAR(256),
641
+ tag_filter TEXT[],
642
+ priority INTEGER,
643
+ enabled BOOLEAN,
644
+ provider_name VARCHAR(256),
645
+ model_name VARCHAR(256),
535
646
  created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
536
647
  updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
537
648
  deleted_at TIMESTAMP,
@@ -540,16 +651,16 @@ CREATE TABLE IF NOT EXISTS sessions (
540
651
  tags TEXT[] DEFAULT ARRAY[]::TEXT[]
541
652
  );
542
653
 
543
- CREATE INDEX idx_sessions_tenant ON sessions (tenant_id);
544
- CREATE INDEX idx_sessions_user ON sessions (user_id);
545
- CREATE INDEX idx_sessions_graph_edges ON sessions USING GIN (graph_edges);
546
- CREATE INDEX idx_sessions_metadata ON sessions USING GIN (metadata);
547
- CREATE INDEX idx_sessions_tags ON sessions USING GIN (tags);
654
+ CREATE INDEX IF NOT EXISTS idx_ontology_configs_tenant ON ontology_configs (tenant_id);
655
+ CREATE INDEX IF NOT EXISTS idx_ontology_configs_user ON ontology_configs (user_id);
656
+ CREATE INDEX IF NOT EXISTS idx_ontology_configs_graph_edges ON ontology_configs USING GIN (graph_edges);
657
+ CREATE INDEX IF NOT EXISTS idx_ontology_configs_metadata ON ontology_configs USING GIN (metadata);
658
+ CREATE INDEX IF NOT EXISTS idx_ontology_configs_tags ON ontology_configs USING GIN (tags);
548
659
 
549
- -- Embeddings for sessions
550
- CREATE TABLE IF NOT EXISTS embeddings_sessions (
660
+ -- Embeddings for ontology_configs
661
+ CREATE TABLE IF NOT EXISTS embeddings_ontology_configs (
551
662
  id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
552
- entity_id UUID NOT NULL REFERENCES sessions(id) ON DELETE CASCADE,
663
+ entity_id UUID NOT NULL REFERENCES ontology_configs(id) ON DELETE CASCADE,
553
664
  field_name VARCHAR(100) NOT NULL,
554
665
  provider VARCHAR(50) NOT NULL DEFAULT 'openai',
555
666
  model VARCHAR(100) NOT NULL DEFAULT 'text-embedding-3-small',
@@ -562,19 +673,19 @@ CREATE TABLE IF NOT EXISTS embeddings_sessions (
562
673
  );
563
674
 
564
675
  -- Index for entity lookup (get all embeddings for entity)
565
- CREATE INDEX idx_embeddings_sessions_entity ON embeddings_sessions (entity_id);
676
+ CREATE INDEX IF NOT EXISTS idx_embeddings_ontology_configs_entity ON embeddings_ontology_configs (entity_id);
566
677
 
567
678
  -- Index for field + provider lookup
568
- CREATE INDEX idx_embeddings_sessions_field_provider ON embeddings_sessions (field_name, provider);
679
+ CREATE INDEX IF NOT EXISTS idx_embeddings_ontology_configs_field_provider ON embeddings_ontology_configs (field_name, provider);
569
680
 
570
681
  -- HNSW index for vector similarity search (created in background)
571
682
  -- Note: This will be created by background thread after data load
572
- -- CREATE INDEX idx_embeddings_sessions_vector_hnsw ON embeddings_sessions
683
+ -- CREATE INDEX IF NOT EXISTS idx_embeddings_ontology_configs_vector_hnsw ON embeddings_ontology_configs
573
684
  -- USING hnsw (embedding vector_cosine_ops);
574
685
 
575
- -- KV_STORE trigger for sessions
576
- -- Trigger function to maintain KV_STORE for sessions
577
- CREATE OR REPLACE FUNCTION fn_sessions_kv_store_upsert()
686
+ -- KV_STORE trigger for ontology_configs
687
+ -- Trigger function to maintain KV_STORE for ontology_configs
688
+ CREATE OR REPLACE FUNCTION fn_ontology_configs_kv_store_upsert()
578
689
  RETURNS TRIGGER AS $$
579
690
  BEGIN
580
691
  IF (TG_OP = 'DELETE') THEN
@@ -594,8 +705,8 @@ BEGIN
594
705
  graph_edges,
595
706
  updated_at
596
707
  ) VALUES (
597
- NEW.name::VARCHAR,
598
- 'sessions',
708
+ NEW.id::VARCHAR,
709
+ 'ontology_configs',
599
710
  NEW.id,
600
711
  NEW.tenant_id,
601
712
  NEW.user_id,
@@ -617,10 +728,10 @@ END;
617
728
  $$ LANGUAGE plpgsql;
618
729
 
619
730
  -- Create trigger
620
- DROP TRIGGER IF EXISTS trg_sessions_kv_store ON sessions;
621
- CREATE TRIGGER trg_sessions_kv_store
622
- AFTER INSERT OR UPDATE OR DELETE ON sessions
623
- FOR EACH ROW EXECUTE FUNCTION fn_sessions_kv_store_upsert();
731
+ DROP TRIGGER IF EXISTS trg_ontology_configs_kv_store ON ontology_configs;
732
+ CREATE TRIGGER trg_ontology_configs_kv_store
733
+ AFTER INSERT OR UPDATE OR DELETE ON ontology_configs
734
+ FOR EACH ROW EXECUTE FUNCTION fn_ontology_configs_kv_store_upsert();
624
735
 
625
736
  -- ======================================================================
626
737
  -- RESOURCES (Model: Resource)
@@ -645,11 +756,11 @@ CREATE TABLE IF NOT EXISTS resources (
645
756
  tags TEXT[] DEFAULT ARRAY[]::TEXT[]
646
757
  );
647
758
 
648
- CREATE INDEX idx_resources_tenant ON resources (tenant_id);
649
- CREATE INDEX idx_resources_user ON resources (user_id);
650
- CREATE INDEX idx_resources_graph_edges ON resources USING GIN (graph_edges);
651
- CREATE INDEX idx_resources_metadata ON resources USING GIN (metadata);
652
- CREATE INDEX idx_resources_tags ON resources USING GIN (tags);
759
+ CREATE INDEX IF NOT EXISTS idx_resources_tenant ON resources (tenant_id);
760
+ CREATE INDEX IF NOT EXISTS idx_resources_user ON resources (user_id);
761
+ CREATE INDEX IF NOT EXISTS idx_resources_graph_edges ON resources USING GIN (graph_edges);
762
+ CREATE INDEX IF NOT EXISTS idx_resources_metadata ON resources USING GIN (metadata);
763
+ CREATE INDEX IF NOT EXISTS idx_resources_tags ON resources USING GIN (tags);
653
764
 
654
765
  -- Embeddings for resources
655
766
  CREATE TABLE IF NOT EXISTS embeddings_resources (
@@ -667,14 +778,14 @@ CREATE TABLE IF NOT EXISTS embeddings_resources (
667
778
  );
668
779
 
669
780
  -- Index for entity lookup (get all embeddings for entity)
670
- CREATE INDEX idx_embeddings_resources_entity ON embeddings_resources (entity_id);
781
+ CREATE INDEX IF NOT EXISTS idx_embeddings_resources_entity ON embeddings_resources (entity_id);
671
782
 
672
783
  -- Index for field + provider lookup
673
- CREATE INDEX idx_embeddings_resources_field_provider ON embeddings_resources (field_name, provider);
784
+ CREATE INDEX IF NOT EXISTS idx_embeddings_resources_field_provider ON embeddings_resources (field_name, provider);
674
785
 
675
786
  -- HNSW index for vector similarity search (created in background)
676
787
  -- Note: This will be created by background thread after data load
677
- -- CREATE INDEX idx_embeddings_resources_vector_hnsw ON embeddings_resources
788
+ -- CREATE INDEX IF NOT EXISTS idx_embeddings_resources_vector_hnsw ON embeddings_resources
678
789
  -- USING hnsw (embedding vector_cosine_ops);
679
790
 
680
791
  -- KV_STORE trigger for resources
@@ -728,126 +839,19 @@ AFTER INSERT OR UPDATE OR DELETE ON resources
728
839
  FOR EACH ROW EXECUTE FUNCTION fn_resources_kv_store_upsert();
729
840
 
730
841
  -- ======================================================================
731
- -- MESSAGES (Model: Message)
842
+ -- SCHEMAS (Model: Schema)
732
843
  -- ======================================================================
733
844
 
734
- CREATE TABLE IF NOT EXISTS messages (
735
- id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
736
- tenant_id VARCHAR(100) NOT NULL,
737
- user_id VARCHAR(256),
738
- content TEXT NOT NULL,
739
- message_type VARCHAR(256),
740
- session_id VARCHAR(256),
741
- prompt TEXT,
742
- model VARCHAR(256),
743
- token_count INTEGER,
744
- trace_id VARCHAR(256),
745
- span_id VARCHAR(256),
746
- created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
747
- updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
748
- deleted_at TIMESTAMP,
749
- graph_edges JSONB DEFAULT '[]'::jsonb,
750
- metadata JSONB DEFAULT '{}'::jsonb,
751
- tags TEXT[] DEFAULT ARRAY[]::TEXT[]
752
- );
753
-
754
- CREATE INDEX idx_messages_tenant ON messages (tenant_id);
755
- CREATE INDEX idx_messages_user ON messages (user_id);
756
- CREATE INDEX idx_messages_graph_edges ON messages USING GIN (graph_edges);
757
- CREATE INDEX idx_messages_metadata ON messages USING GIN (metadata);
758
- CREATE INDEX idx_messages_tags ON messages USING GIN (tags);
759
-
760
- -- Embeddings for messages
761
- CREATE TABLE IF NOT EXISTS embeddings_messages (
762
- id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
763
- entity_id UUID NOT NULL REFERENCES messages(id) ON DELETE CASCADE,
764
- field_name VARCHAR(100) NOT NULL,
765
- provider VARCHAR(50) NOT NULL DEFAULT 'openai',
766
- model VARCHAR(100) NOT NULL DEFAULT 'text-embedding-3-small',
767
- embedding vector(1536) NOT NULL,
768
- created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
769
- updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
770
-
771
- -- Unique: one embedding per entity per field per provider
772
- UNIQUE (entity_id, field_name, provider)
773
- );
774
-
775
- -- Index for entity lookup (get all embeddings for entity)
776
- CREATE INDEX idx_embeddings_messages_entity ON embeddings_messages (entity_id);
777
-
778
- -- Index for field + provider lookup
779
- CREATE INDEX idx_embeddings_messages_field_provider ON embeddings_messages (field_name, provider);
780
-
781
- -- HNSW index for vector similarity search (created in background)
782
- -- Note: This will be created by background thread after data load
783
- -- CREATE INDEX idx_embeddings_messages_vector_hnsw ON embeddings_messages
784
- -- USING hnsw (embedding vector_cosine_ops);
785
-
786
- -- KV_STORE trigger for messages
787
- -- Trigger function to maintain KV_STORE for messages
788
- CREATE OR REPLACE FUNCTION fn_messages_kv_store_upsert()
789
- RETURNS TRIGGER AS $$
790
- BEGIN
791
- IF (TG_OP = 'DELETE') THEN
792
- -- Remove from KV_STORE on delete
793
- DELETE FROM kv_store
794
- WHERE entity_id = OLD.id;
795
- RETURN OLD;
796
- ELSIF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN
797
- -- Upsert to KV_STORE (O(1) lookup by entity_key)
798
- INSERT INTO kv_store (
799
- entity_key,
800
- entity_type,
801
- entity_id,
802
- tenant_id,
803
- user_id,
804
- metadata,
805
- graph_edges,
806
- updated_at
807
- ) VALUES (
808
- NEW.id::VARCHAR,
809
- 'messages',
810
- NEW.id,
811
- NEW.tenant_id,
812
- NEW.user_id,
813
- NEW.metadata,
814
- COALESCE(NEW.graph_edges, '[]'::jsonb),
815
- CURRENT_TIMESTAMP
816
- )
817
- ON CONFLICT (tenant_id, entity_key)
818
- DO UPDATE SET
819
- entity_id = EXCLUDED.entity_id,
820
- user_id = EXCLUDED.user_id,
821
- metadata = EXCLUDED.metadata,
822
- graph_edges = EXCLUDED.graph_edges,
823
- updated_at = CURRENT_TIMESTAMP;
824
-
825
- RETURN NEW;
826
- END IF;
827
- END;
828
- $$ LANGUAGE plpgsql;
829
-
830
- -- Create trigger
831
- DROP TRIGGER IF EXISTS trg_messages_kv_store ON messages;
832
- CREATE TRIGGER trg_messages_kv_store
833
- AFTER INSERT OR UPDATE OR DELETE ON messages
834
- FOR EACH ROW EXECUTE FUNCTION fn_messages_kv_store_upsert();
835
-
836
- -- ======================================================================
837
- -- FILES (Model: File)
838
- -- ======================================================================
839
-
840
- CREATE TABLE IF NOT EXISTS files (
845
+ CREATE TABLE IF NOT EXISTS schemas (
841
846
  id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
842
847
  tenant_id VARCHAR(100) NOT NULL,
843
848
  user_id VARCHAR(256),
844
849
  name VARCHAR(256) NOT NULL,
845
- uri VARCHAR(256) NOT NULL,
846
850
  content TEXT,
847
- timestamp VARCHAR(256),
848
- size_bytes INTEGER,
849
- mime_type VARCHAR(256),
850
- processing_status VARCHAR(256),
851
+ spec JSONB NOT NULL,
852
+ category VARCHAR(256),
853
+ provider_configs JSONB DEFAULT '{}'::jsonb,
854
+ embedding_fields TEXT[] DEFAULT ARRAY[]::TEXT[],
851
855
  created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
852
856
  updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
853
857
  deleted_at TIMESTAMP,
@@ -856,16 +860,16 @@ CREATE TABLE IF NOT EXISTS files (
856
860
  tags TEXT[] DEFAULT ARRAY[]::TEXT[]
857
861
  );
858
862
 
859
- CREATE INDEX idx_files_tenant ON files (tenant_id);
860
- CREATE INDEX idx_files_user ON files (user_id);
861
- CREATE INDEX idx_files_graph_edges ON files USING GIN (graph_edges);
862
- CREATE INDEX idx_files_metadata ON files USING GIN (metadata);
863
- CREATE INDEX idx_files_tags ON files USING GIN (tags);
863
+ CREATE INDEX IF NOT EXISTS idx_schemas_tenant ON schemas (tenant_id);
864
+ CREATE INDEX IF NOT EXISTS idx_schemas_user ON schemas (user_id);
865
+ CREATE INDEX IF NOT EXISTS idx_schemas_graph_edges ON schemas USING GIN (graph_edges);
866
+ CREATE INDEX IF NOT EXISTS idx_schemas_metadata ON schemas USING GIN (metadata);
867
+ CREATE INDEX IF NOT EXISTS idx_schemas_tags ON schemas USING GIN (tags);
864
868
 
865
- -- Embeddings for files
866
- CREATE TABLE IF NOT EXISTS embeddings_files (
869
+ -- Embeddings for schemas
870
+ CREATE TABLE IF NOT EXISTS embeddings_schemas (
867
871
  id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
868
- entity_id UUID NOT NULL REFERENCES files(id) ON DELETE CASCADE,
872
+ entity_id UUID NOT NULL REFERENCES schemas(id) ON DELETE CASCADE,
869
873
  field_name VARCHAR(100) NOT NULL,
870
874
  provider VARCHAR(50) NOT NULL DEFAULT 'openai',
871
875
  model VARCHAR(100) NOT NULL DEFAULT 'text-embedding-3-small',
@@ -878,100 +882,19 @@ CREATE TABLE IF NOT EXISTS embeddings_files (
878
882
  );
879
883
 
880
884
  -- Index for entity lookup (get all embeddings for entity)
881
- CREATE INDEX idx_embeddings_files_entity ON embeddings_files (entity_id);
885
+ CREATE INDEX IF NOT EXISTS idx_embeddings_schemas_entity ON embeddings_schemas (entity_id);
882
886
 
883
887
  -- Index for field + provider lookup
884
- CREATE INDEX idx_embeddings_files_field_provider ON embeddings_files (field_name, provider);
888
+ CREATE INDEX IF NOT EXISTS idx_embeddings_schemas_field_provider ON embeddings_schemas (field_name, provider);
885
889
 
886
890
  -- HNSW index for vector similarity search (created in background)
887
891
  -- Note: This will be created by background thread after data load
888
- -- CREATE INDEX idx_embeddings_files_vector_hnsw ON embeddings_files
892
+ -- CREATE INDEX IF NOT EXISTS idx_embeddings_schemas_vector_hnsw ON embeddings_schemas
889
893
  -- USING hnsw (embedding vector_cosine_ops);
890
894
 
891
- -- KV_STORE trigger for files
892
- -- Trigger function to maintain KV_STORE for files
893
- CREATE OR REPLACE FUNCTION fn_files_kv_store_upsert()
894
- RETURNS TRIGGER AS $$
895
- BEGIN
896
- IF (TG_OP = 'DELETE') THEN
897
- -- Remove from KV_STORE on delete
898
- DELETE FROM kv_store
899
- WHERE entity_id = OLD.id;
900
- RETURN OLD;
901
- ELSIF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN
902
- -- Upsert to KV_STORE (O(1) lookup by entity_key)
903
- INSERT INTO kv_store (
904
- entity_key,
905
- entity_type,
906
- entity_id,
907
- tenant_id,
908
- user_id,
909
- metadata,
910
- graph_edges,
911
- updated_at
912
- ) VALUES (
913
- NEW.id::VARCHAR,
914
- 'files',
915
- NEW.id,
916
- NEW.tenant_id,
917
- NEW.user_id,
918
- NEW.metadata,
919
- COALESCE(NEW.graph_edges, '[]'::jsonb),
920
- CURRENT_TIMESTAMP
921
- )
922
- ON CONFLICT (tenant_id, entity_key)
923
- DO UPDATE SET
924
- entity_id = EXCLUDED.entity_id,
925
- user_id = EXCLUDED.user_id,
926
- metadata = EXCLUDED.metadata,
927
- graph_edges = EXCLUDED.graph_edges,
928
- updated_at = CURRENT_TIMESTAMP;
929
-
930
- RETURN NEW;
931
- END IF;
932
- END;
933
- $$ LANGUAGE plpgsql;
934
-
935
- -- Create trigger
936
- DROP TRIGGER IF EXISTS trg_files_kv_store ON files;
937
- CREATE TRIGGER trg_files_kv_store
938
- AFTER INSERT OR UPDATE OR DELETE ON files
939
- FOR EACH ROW EXECUTE FUNCTION fn_files_kv_store_upsert();
940
-
941
- -- ======================================================================
942
- -- ONTOLOGIES (Model: Ontology)
943
- -- ======================================================================
944
-
945
- CREATE TABLE IF NOT EXISTS ontologies (
946
- id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
947
- tenant_id VARCHAR(100) NOT NULL,
948
- user_id VARCHAR(256),
949
- name VARCHAR(256) NOT NULL,
950
- file_id UUID NOT NULL,
951
- agent_schema_id VARCHAR(256) NOT NULL,
952
- provider_name VARCHAR(256) NOT NULL,
953
- model_name VARCHAR(256) NOT NULL,
954
- extracted_data JSONB NOT NULL,
955
- confidence_score DOUBLE PRECISION,
956
- extraction_timestamp VARCHAR(256),
957
- embedding_text TEXT,
958
- created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
959
- updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
960
- deleted_at TIMESTAMP,
961
- graph_edges JSONB DEFAULT '[]'::jsonb,
962
- metadata JSONB DEFAULT '{}'::jsonb,
963
- tags TEXT[] DEFAULT ARRAY[]::TEXT[]
964
- );
965
-
966
- CREATE INDEX idx_ontologies_tenant ON ontologies (tenant_id);
967
- CREATE INDEX idx_ontologies_user ON ontologies (user_id);
968
- CREATE INDEX idx_ontologies_graph_edges ON ontologies USING GIN (graph_edges);
969
- CREATE INDEX idx_ontologies_metadata ON ontologies USING GIN (metadata);
970
- CREATE INDEX idx_ontologies_tags ON ontologies USING GIN (tags);
971
-
972
- -- KV_STORE trigger for ontologies
973
- -- Trigger function to maintain KV_STORE for ontologies
974
- CREATE OR REPLACE FUNCTION fn_ontologies_kv_store_upsert()
895
+ -- KV_STORE trigger for schemas
896
+ -- Trigger function to maintain KV_STORE for schemas
897
+ CREATE OR REPLACE FUNCTION fn_schemas_kv_store_upsert()
975
898
  RETURNS TRIGGER AS $$
976
899
  BEGIN
977
900
  IF (TG_OP = 'DELETE') THEN
@@ -992,7 +915,7 @@ BEGIN
992
915
  updated_at
993
916
  ) VALUES (
994
917
  NEW.id::VARCHAR,
995
- 'ontologies',
918
+ 'schemas',
996
919
  NEW.id,
997
920
  NEW.tenant_id,
998
921
  NEW.user_id,
@@ -1014,29 +937,28 @@ END;
1014
937
  $$ LANGUAGE plpgsql;
1015
938
 
1016
939
  -- Create trigger
1017
- DROP TRIGGER IF EXISTS trg_ontologies_kv_store ON ontologies;
1018
- CREATE TRIGGER trg_ontologies_kv_store
1019
- AFTER INSERT OR UPDATE OR DELETE ON ontologies
1020
- FOR EACH ROW EXECUTE FUNCTION fn_ontologies_kv_store_upsert();
940
+ DROP TRIGGER IF EXISTS trg_schemas_kv_store ON schemas;
941
+ CREATE TRIGGER trg_schemas_kv_store
942
+ AFTER INSERT OR UPDATE OR DELETE ON schemas
943
+ FOR EACH ROW EXECUTE FUNCTION fn_schemas_kv_store_upsert();
1021
944
 
1022
945
  -- ======================================================================
1023
- -- ONTOLOGY_CONFIGS (Model: OntologyConfig)
946
+ -- SESSIONS (Model: Session)
1024
947
  -- ======================================================================
1025
948
 
1026
- CREATE TABLE IF NOT EXISTS ontology_configs (
949
+ CREATE TABLE IF NOT EXISTS sessions (
1027
950
  id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
1028
951
  tenant_id VARCHAR(100) NOT NULL,
1029
952
  user_id VARCHAR(256),
1030
953
  name VARCHAR(256) NOT NULL,
1031
- agent_schema_id VARCHAR(256) NOT NULL,
954
+ mode TEXT,
1032
955
  description TEXT,
1033
- mime_type_pattern VARCHAR(256),
1034
- uri_pattern VARCHAR(256),
1035
- tag_filter TEXT[],
1036
- priority INTEGER,
1037
- enabled BOOLEAN,
1038
- provider_name VARCHAR(256),
1039
- model_name VARCHAR(256),
956
+ original_trace_id VARCHAR(256),
957
+ settings_overrides JSONB,
958
+ prompt TEXT,
959
+ agent_schema_uri VARCHAR(256),
960
+ message_count INTEGER,
961
+ total_tokens INTEGER,
1040
962
  created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
1041
963
  updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
1042
964
  deleted_at TIMESTAMP,
@@ -1045,16 +967,16 @@ CREATE TABLE IF NOT EXISTS ontology_configs (
1045
967
  tags TEXT[] DEFAULT ARRAY[]::TEXT[]
1046
968
  );
1047
969
 
1048
- CREATE INDEX idx_ontology_configs_tenant ON ontology_configs (tenant_id);
1049
- CREATE INDEX idx_ontology_configs_user ON ontology_configs (user_id);
1050
- CREATE INDEX idx_ontology_configs_graph_edges ON ontology_configs USING GIN (graph_edges);
1051
- CREATE INDEX idx_ontology_configs_metadata ON ontology_configs USING GIN (metadata);
1052
- CREATE INDEX idx_ontology_configs_tags ON ontology_configs USING GIN (tags);
970
+ CREATE INDEX IF NOT EXISTS idx_sessions_tenant ON sessions (tenant_id);
971
+ CREATE INDEX IF NOT EXISTS idx_sessions_user ON sessions (user_id);
972
+ CREATE INDEX IF NOT EXISTS idx_sessions_graph_edges ON sessions USING GIN (graph_edges);
973
+ CREATE INDEX IF NOT EXISTS idx_sessions_metadata ON sessions USING GIN (metadata);
974
+ CREATE INDEX IF NOT EXISTS idx_sessions_tags ON sessions USING GIN (tags);
1053
975
 
1054
- -- Embeddings for ontology_configs
1055
- CREATE TABLE IF NOT EXISTS embeddings_ontology_configs (
976
+ -- Embeddings for sessions
977
+ CREATE TABLE IF NOT EXISTS embeddings_sessions (
1056
978
  id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
1057
- entity_id UUID NOT NULL REFERENCES ontology_configs(id) ON DELETE CASCADE,
979
+ entity_id UUID NOT NULL REFERENCES sessions(id) ON DELETE CASCADE,
1058
980
  field_name VARCHAR(100) NOT NULL,
1059
981
  provider VARCHAR(50) NOT NULL DEFAULT 'openai',
1060
982
  model VARCHAR(100) NOT NULL DEFAULT 'text-embedding-3-small',
@@ -1067,19 +989,19 @@ CREATE TABLE IF NOT EXISTS embeddings_ontology_configs (
1067
989
  );
1068
990
 
1069
991
  -- Index for entity lookup (get all embeddings for entity)
1070
- CREATE INDEX idx_embeddings_ontology_configs_entity ON embeddings_ontology_configs (entity_id);
992
+ CREATE INDEX IF NOT EXISTS idx_embeddings_sessions_entity ON embeddings_sessions (entity_id);
1071
993
 
1072
994
  -- Index for field + provider lookup
1073
- CREATE INDEX idx_embeddings_ontology_configs_field_provider ON embeddings_ontology_configs (field_name, provider);
995
+ CREATE INDEX IF NOT EXISTS idx_embeddings_sessions_field_provider ON embeddings_sessions (field_name, provider);
1074
996
 
1075
997
  -- HNSW index for vector similarity search (created in background)
1076
998
  -- Note: This will be created by background thread after data load
1077
- -- CREATE INDEX idx_embeddings_ontology_configs_vector_hnsw ON embeddings_ontology_configs
999
+ -- CREATE INDEX IF NOT EXISTS idx_embeddings_sessions_vector_hnsw ON embeddings_sessions
1078
1000
  -- USING hnsw (embedding vector_cosine_ops);
1079
1001
 
1080
- -- KV_STORE trigger for ontology_configs
1081
- -- Trigger function to maintain KV_STORE for ontology_configs
1082
- CREATE OR REPLACE FUNCTION fn_ontology_configs_kv_store_upsert()
1002
+ -- KV_STORE trigger for sessions
1003
+ -- Trigger function to maintain KV_STORE for sessions
1004
+ CREATE OR REPLACE FUNCTION fn_sessions_kv_store_upsert()
1083
1005
  RETURNS TRIGGER AS $$
1084
1006
  BEGIN
1085
1007
  IF (TG_OP = 'DELETE') THEN
@@ -1099,8 +1021,8 @@ BEGIN
1099
1021
  graph_edges,
1100
1022
  updated_at
1101
1023
  ) VALUES (
1102
- NEW.id::VARCHAR,
1103
- 'ontology_configs',
1024
+ NEW.name::VARCHAR,
1025
+ 'sessions',
1104
1026
  NEW.id,
1105
1027
  NEW.tenant_id,
1106
1028
  NEW.user_id,
@@ -1122,26 +1044,22 @@ END;
1122
1044
  $$ LANGUAGE plpgsql;
1123
1045
 
1124
1046
  -- Create trigger
1125
- DROP TRIGGER IF EXISTS trg_ontology_configs_kv_store ON ontology_configs;
1126
- CREATE TRIGGER trg_ontology_configs_kv_store
1127
- AFTER INSERT OR UPDATE OR DELETE ON ontology_configs
1128
- FOR EACH ROW EXECUTE FUNCTION fn_ontology_configs_kv_store_upsert();
1047
+ DROP TRIGGER IF EXISTS trg_sessions_kv_store ON sessions;
1048
+ CREATE TRIGGER trg_sessions_kv_store
1049
+ AFTER INSERT OR UPDATE OR DELETE ON sessions
1050
+ FOR EACH ROW EXECUTE FUNCTION fn_sessions_kv_store_upsert();
1129
1051
 
1130
1052
  -- ======================================================================
1131
- -- DOMAIN_RESOURCES (Model: DomainResource)
1053
+ -- SHARED_SESSIONS (Model: SharedSession)
1132
1054
  -- ======================================================================
1133
1055
 
1134
- CREATE TABLE IF NOT EXISTS domain_resources (
1056
+ CREATE TABLE IF NOT EXISTS shared_sessions (
1135
1057
  id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
1136
1058
  tenant_id VARCHAR(100) NOT NULL,
1137
1059
  user_id VARCHAR(256),
1138
- name VARCHAR(256),
1139
- uri VARCHAR(256),
1140
- ordinal INTEGER,
1141
- content TEXT,
1142
- timestamp TIMESTAMP,
1143
- category VARCHAR(256),
1144
- related_entities JSONB DEFAULT '{}'::jsonb,
1060
+ session_id VARCHAR(256) NOT NULL,
1061
+ owner_user_id VARCHAR(256) NOT NULL,
1062
+ shared_with_user_id VARCHAR(256) NOT NULL,
1145
1063
  created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
1146
1064
  updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
1147
1065
  deleted_at TIMESTAMP,
@@ -1150,41 +1068,15 @@ CREATE TABLE IF NOT EXISTS domain_resources (
1150
1068
  tags TEXT[] DEFAULT ARRAY[]::TEXT[]
1151
1069
  );
1152
1070
 
1153
- CREATE INDEX idx_domain_resources_tenant ON domain_resources (tenant_id);
1154
- CREATE INDEX idx_domain_resources_user ON domain_resources (user_id);
1155
- CREATE INDEX idx_domain_resources_graph_edges ON domain_resources USING GIN (graph_edges);
1156
- CREATE INDEX idx_domain_resources_metadata ON domain_resources USING GIN (metadata);
1157
- CREATE INDEX idx_domain_resources_tags ON domain_resources USING GIN (tags);
1071
+ CREATE INDEX IF NOT EXISTS idx_shared_sessions_tenant ON shared_sessions (tenant_id);
1072
+ CREATE INDEX IF NOT EXISTS idx_shared_sessions_user ON shared_sessions (user_id);
1073
+ CREATE INDEX IF NOT EXISTS idx_shared_sessions_graph_edges ON shared_sessions USING GIN (graph_edges);
1074
+ CREATE INDEX IF NOT EXISTS idx_shared_sessions_metadata ON shared_sessions USING GIN (metadata);
1075
+ CREATE INDEX IF NOT EXISTS idx_shared_sessions_tags ON shared_sessions USING GIN (tags);
1158
1076
 
1159
- -- Embeddings for domain_resources
1160
- CREATE TABLE IF NOT EXISTS embeddings_domain_resources (
1161
- id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
1162
- entity_id UUID NOT NULL REFERENCES domain_resources(id) ON DELETE CASCADE,
1163
- field_name VARCHAR(100) NOT NULL,
1164
- provider VARCHAR(50) NOT NULL DEFAULT 'openai',
1165
- model VARCHAR(100) NOT NULL DEFAULT 'text-embedding-3-small',
1166
- embedding vector(1536) NOT NULL,
1167
- created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
1168
- updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
1169
-
1170
- -- Unique: one embedding per entity per field per provider
1171
- UNIQUE (entity_id, field_name, provider)
1172
- );
1173
-
1174
- -- Index for entity lookup (get all embeddings for entity)
1175
- CREATE INDEX idx_embeddings_domain_resources_entity ON embeddings_domain_resources (entity_id);
1176
-
1177
- -- Index for field + provider lookup
1178
- CREATE INDEX idx_embeddings_domain_resources_field_provider ON embeddings_domain_resources (field_name, provider);
1179
-
1180
- -- HNSW index for vector similarity search (created in background)
1181
- -- Note: This will be created by background thread after data load
1182
- -- CREATE INDEX idx_embeddings_domain_resources_vector_hnsw ON embeddings_domain_resources
1183
- -- USING hnsw (embedding vector_cosine_ops);
1184
-
1185
- -- KV_STORE trigger for domain_resources
1186
- -- Trigger function to maintain KV_STORE for domain_resources
1187
- CREATE OR REPLACE FUNCTION fn_domain_resources_kv_store_upsert()
1077
+ -- KV_STORE trigger for shared_sessions
1078
+ -- Trigger function to maintain KV_STORE for shared_sessions
1079
+ CREATE OR REPLACE FUNCTION fn_shared_sessions_kv_store_upsert()
1188
1080
  RETURNS TRIGGER AS $$
1189
1081
  BEGIN
1190
1082
  IF (TG_OP = 'DELETE') THEN
@@ -1204,8 +1096,8 @@ BEGIN
1204
1096
  graph_edges,
1205
1097
  updated_at
1206
1098
  ) VALUES (
1207
- NEW.name::VARCHAR,
1208
- 'domain_resources',
1099
+ NEW.id::VARCHAR,
1100
+ 'shared_sessions',
1209
1101
  NEW.id,
1210
1102
  NEW.tenant_id,
1211
1103
  NEW.user_id,
@@ -1227,25 +1119,30 @@ END;
1227
1119
  $$ LANGUAGE plpgsql;
1228
1120
 
1229
1121
  -- Create trigger
1230
- DROP TRIGGER IF EXISTS trg_domain_resources_kv_store ON domain_resources;
1231
- CREATE TRIGGER trg_domain_resources_kv_store
1232
- AFTER INSERT OR UPDATE OR DELETE ON domain_resources
1233
- FOR EACH ROW EXECUTE FUNCTION fn_domain_resources_kv_store_upsert();
1122
+ DROP TRIGGER IF EXISTS trg_shared_sessions_kv_store ON shared_sessions;
1123
+ CREATE TRIGGER trg_shared_sessions_kv_store
1124
+ AFTER INSERT OR UPDATE OR DELETE ON shared_sessions
1125
+ FOR EACH ROW EXECUTE FUNCTION fn_shared_sessions_kv_store_upsert();
1234
1126
 
1235
1127
  -- ======================================================================
1236
- -- SCHEMAS (Model: Schema)
1128
+ -- USERS (Model: User)
1237
1129
  -- ======================================================================
1238
1130
 
1239
- CREATE TABLE IF NOT EXISTS schemas (
1131
+ CREATE TABLE IF NOT EXISTS users (
1240
1132
  id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
1241
1133
  tenant_id VARCHAR(100) NOT NULL,
1242
1134
  user_id VARCHAR(256),
1243
1135
  name VARCHAR(256) NOT NULL,
1244
- content TEXT,
1245
- spec JSONB NOT NULL,
1246
- category VARCHAR(256),
1247
- provider_configs JSONB DEFAULT '{}'::jsonb,
1248
- embedding_fields TEXT[] DEFAULT ARRAY[]::TEXT[],
1136
+ email VARCHAR(256),
1137
+ role VARCHAR(256),
1138
+ tier TEXT,
1139
+ anonymous_ids TEXT[] DEFAULT ARRAY[]::TEXT[],
1140
+ sec_policy JSONB DEFAULT '{}'::jsonb,
1141
+ summary TEXT,
1142
+ interests TEXT[] DEFAULT ARRAY[]::TEXT[],
1143
+ preferred_topics TEXT[] DEFAULT ARRAY[]::TEXT[],
1144
+ activity_level VARCHAR(256),
1145
+ last_active_at TIMESTAMP,
1249
1146
  created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
1250
1147
  updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
1251
1148
  deleted_at TIMESTAMP,
@@ -1254,16 +1151,16 @@ CREATE TABLE IF NOT EXISTS schemas (
1254
1151
  tags TEXT[] DEFAULT ARRAY[]::TEXT[]
1255
1152
  );
1256
1153
 
1257
- CREATE INDEX idx_schemas_tenant ON schemas (tenant_id);
1258
- CREATE INDEX idx_schemas_user ON schemas (user_id);
1259
- CREATE INDEX idx_schemas_graph_edges ON schemas USING GIN (graph_edges);
1260
- CREATE INDEX idx_schemas_metadata ON schemas USING GIN (metadata);
1261
- CREATE INDEX idx_schemas_tags ON schemas USING GIN (tags);
1154
+ CREATE INDEX IF NOT EXISTS idx_users_tenant ON users (tenant_id);
1155
+ CREATE INDEX IF NOT EXISTS idx_users_user ON users (user_id);
1156
+ CREATE INDEX IF NOT EXISTS idx_users_graph_edges ON users USING GIN (graph_edges);
1157
+ CREATE INDEX IF NOT EXISTS idx_users_metadata ON users USING GIN (metadata);
1158
+ CREATE INDEX IF NOT EXISTS idx_users_tags ON users USING GIN (tags);
1262
1159
 
1263
- -- Embeddings for schemas
1264
- CREATE TABLE IF NOT EXISTS embeddings_schemas (
1160
+ -- Embeddings for users
1161
+ CREATE TABLE IF NOT EXISTS embeddings_users (
1265
1162
  id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
1266
- entity_id UUID NOT NULL REFERENCES schemas(id) ON DELETE CASCADE,
1163
+ entity_id UUID NOT NULL REFERENCES users(id) ON DELETE CASCADE,
1267
1164
  field_name VARCHAR(100) NOT NULL,
1268
1165
  provider VARCHAR(50) NOT NULL DEFAULT 'openai',
1269
1166
  model VARCHAR(100) NOT NULL DEFAULT 'text-embedding-3-small',
@@ -1276,19 +1173,19 @@ CREATE TABLE IF NOT EXISTS embeddings_schemas (
1276
1173
  );
1277
1174
 
1278
1175
  -- Index for entity lookup (get all embeddings for entity)
1279
- CREATE INDEX idx_embeddings_schemas_entity ON embeddings_schemas (entity_id);
1176
+ CREATE INDEX IF NOT EXISTS idx_embeddings_users_entity ON embeddings_users (entity_id);
1280
1177
 
1281
1178
  -- Index for field + provider lookup
1282
- CREATE INDEX idx_embeddings_schemas_field_provider ON embeddings_schemas (field_name, provider);
1179
+ CREATE INDEX IF NOT EXISTS idx_embeddings_users_field_provider ON embeddings_users (field_name, provider);
1283
1180
 
1284
1181
  -- HNSW index for vector similarity search (created in background)
1285
1182
  -- Note: This will be created by background thread after data load
1286
- -- CREATE INDEX idx_embeddings_schemas_vector_hnsw ON embeddings_schemas
1183
+ -- CREATE INDEX IF NOT EXISTS idx_embeddings_users_vector_hnsw ON embeddings_users
1287
1184
  -- USING hnsw (embedding vector_cosine_ops);
1288
1185
 
1289
- -- KV_STORE trigger for schemas
1290
- -- Trigger function to maintain KV_STORE for schemas
1291
- CREATE OR REPLACE FUNCTION fn_schemas_kv_store_upsert()
1186
+ -- KV_STORE trigger for users
1187
+ -- Trigger function to maintain KV_STORE for users
1188
+ CREATE OR REPLACE FUNCTION fn_users_kv_store_upsert()
1292
1189
  RETURNS TRIGGER AS $$
1293
1190
  BEGIN
1294
1191
  IF (TG_OP = 'DELETE') THEN
@@ -1308,8 +1205,8 @@ BEGIN
1308
1205
  graph_edges,
1309
1206
  updated_at
1310
1207
  ) VALUES (
1311
- NEW.id::VARCHAR,
1312
- 'schemas',
1208
+ NEW.name::VARCHAR,
1209
+ 'users',
1313
1210
  NEW.id,
1314
1211
  NEW.tenant_id,
1315
1212
  NEW.user_id,
@@ -1331,189 +1228,1872 @@ END;
1331
1228
  $$ LANGUAGE plpgsql;
1332
1229
 
1333
1230
  -- Create trigger
1334
- DROP TRIGGER IF EXISTS trg_schemas_kv_store ON schemas;
1335
- CREATE TRIGGER trg_schemas_kv_store
1336
- AFTER INSERT OR UPDATE OR DELETE ON schemas
1337
- FOR EACH ROW EXECUTE FUNCTION fn_schemas_kv_store_upsert();
1231
+ DROP TRIGGER IF EXISTS trg_users_kv_store ON users;
1232
+ CREATE TRIGGER trg_users_kv_store
1233
+ AFTER INSERT OR UPDATE OR DELETE ON users
1234
+ FOR EACH ROW EXECUTE FUNCTION fn_users_kv_store_upsert();
1338
1235
 
1339
- -- ======================================================================
1340
- -- SHARED_SESSIONS (Session sharing between users)
1341
- -- ======================================================================
1342
- -- Lightweight linking table for session sharing. NOT a CoreModel - no
1343
- -- graph edges, metadata, or embeddings. Just tracks who shared what with whom.
1344
- --
1345
- -- See: src/rem/models/entities/shared_session.py for full documentation
1236
+ -- ============================================================================
1237
+ -- SCHEMA TABLE ENTRIES
1238
+ -- Every entity table gets a schemas entry for agent-like access
1239
+ -- ============================================================================
1346
1240
 
1347
- CREATE TABLE IF NOT EXISTS shared_sessions (
1348
- id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
1349
- session_id VARCHAR(256) NOT NULL,
1350
- owner_user_id VARCHAR(256) NOT NULL,
1351
- shared_with_user_id VARCHAR(256) NOT NULL,
1352
- tenant_id VARCHAR(100) NOT NULL DEFAULT 'default',
1353
- created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
1354
- updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
1355
- deleted_at TIMESTAMP,
1241
+ -- Schema entry for Feedback (feedbacks)
1242
+ INSERT INTO schemas (id, tenant_id, name, content, spec, category, metadata)
1243
+ VALUES (
1244
+ 'ae554853-e743-5d73-a2db-1ce20e7089fe'::uuid,
1245
+ 'system',
1246
+ 'Feedback',
1247
+ '# Feedback
1248
+
1249
+
1250
+ User feedback on a message or session.
1251
+
1252
+ Captures structured feedback including:
1253
+ - Rating (1-5 scale or thumbs up/down)
1254
+ - Categories (predefined or custom)
1255
+ - Free-text comment
1256
+ - Trace reference for OTEL/Phoenix integration
1257
+
1258
+ The feedback can be attached to:
1259
+ - A specific message (message_id set)
1260
+ - An entire session (session_id set, message_id null)
1261
+
1262
+
1263
+ ## Overview
1264
+
1265
+ The `Feedback` entity is stored in the `feedbacks` table. Each record is uniquely
1266
+ identified by its `id` field for lookups and graph traversal.
1267
+
1268
+ ## Search Capabilities
1269
+
1270
+ This schema includes the `search_rem` tool which supports:
1271
+ - **LOOKUP**: O(1) exact match by id (e.g., `LOOKUP "entity-name"`)
1272
+ - **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
1273
+ - **SEARCH**: Semantic vector search on content (e.g., `SEARCH "concept" FROM feedbacks LIMIT 10`)
1274
+ - **SQL**: Complex queries (e.g., `SELECT * FROM feedbacks WHERE ...`)
1275
+
1276
+ ## Table Info
1277
+
1278
+ | Property | Value |
1279
+ |----------|-------|
1280
+ | Table | `feedbacks` |
1281
+ | Entity Key | `id` |
1282
+ | Embedding Fields | None |
1283
+ | Tools | `search_rem` |
1284
+
1285
+ ## Fields
1286
+
1287
+ ### `id`
1288
+ - **Type**: `typing.Union[uuid.UUID, str, NoneType]`
1289
+ - **Optional**
1290
+ - Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.
1291
+
1292
+ ### `created_at`
1293
+ - **Type**: `<class ''datetime.datetime''>`
1294
+ - **Optional**
1295
+ - Entity creation timestamp
1296
+
1297
+ ### `updated_at`
1298
+ - **Type**: `<class ''datetime.datetime''>`
1299
+ - **Optional**
1300
+ - Last update timestamp
1301
+
1302
+ ### `deleted_at`
1303
+ - **Type**: `typing.Optional[datetime.datetime]`
1304
+ - **Optional**
1305
+ - Soft deletion timestamp
1306
+
1307
+ ### `tenant_id`
1308
+ - **Type**: `typing.Optional[str]`
1309
+ - **Optional**
1310
+ - Tenant identifier for multi-tenancy isolation
1311
+
1312
+ ### `user_id`
1313
+ - **Type**: `typing.Optional[str]`
1314
+ - **Optional**
1315
+ - Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.
1316
+
1317
+ ### `graph_edges`
1318
+ - **Type**: `list[dict]`
1319
+ - **Optional**
1320
+ - Knowledge graph edges stored as InlineEdge dicts
1321
+
1322
+ ### `metadata`
1323
+ - **Type**: `<class ''dict''>`
1324
+ - **Optional**
1325
+ - Flexible metadata storage
1326
+
1327
+ ### `tags`
1328
+ - **Type**: `list[str]`
1329
+ - **Optional**
1330
+ - Entity tags
1331
+
1332
+ ### `session_id`
1333
+ - **Type**: `<class ''str''>`
1334
+ - **Required**
1335
+ - Session ID this feedback relates to
1336
+
1337
+ ### `message_id`
1338
+ - **Type**: `str | None`
1339
+ - **Optional**
1340
+ - Specific message ID (null for session-level feedback)
1341
+
1342
+ ### `rating`
1343
+ - **Type**: `int | None`
1344
+ - **Optional**
1345
+ - Rating: -1 (thumbs down), 1 (thumbs up), or 1-5 scale
1346
+
1347
+ ### `categories`
1348
+ - **Type**: `list[str]`
1349
+ - **Optional**
1350
+ - Selected feedback categories (from FeedbackCategory or custom)
1351
+
1352
+ ### `comment`
1353
+ - **Type**: `str | None`
1354
+ - **Optional**
1355
+ - Optional free-text feedback comment
1356
+
1357
+ ### `trace_id`
1358
+ - **Type**: `str | None`
1359
+ - **Optional**
1360
+ - OTEL trace ID for linking to observability
1361
+
1362
+ ### `span_id`
1363
+ - **Type**: `str | None`
1364
+ - **Optional**
1365
+ - OTEL span ID for specific span feedback
1366
+
1367
+ ### `phoenix_synced`
1368
+ - **Type**: `<class ''bool''>`
1369
+ - **Optional**
1370
+ - Whether feedback has been synced to Phoenix as annotation
1371
+
1372
+ ### `phoenix_annotation_id`
1373
+ - **Type**: `str | None`
1374
+ - **Optional**
1375
+ - Phoenix annotation ID after sync
1376
+
1377
+ ### `annotator_kind`
1378
+ - **Type**: `<class ''str''>`
1379
+ - **Optional**
1380
+ - Annotator type: HUMAN, LLM, CODE
1381
+
1382
+ ',
1383
+ '{"type": "object", "description": "\n User feedback on a message or session.\n\n Captures structured feedback including:\n - Rating (1-5 scale or thumbs up/down)\n - Categories (predefined or custom)\n - Free-text comment\n - Trace reference for OTEL/Phoenix integration\n\n The feedback can be attached to:\n - A specific message (message_id set)\n - An entire session (session_id set, message_id null)\n \n\nThis agent can search the `feedbacks` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "session_id": {"description": "Session ID this feedback relates to", "title": "Session Id", "type": "string"}, "message_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Specific message ID (null for session-level feedback)", "title": "Message Id"}, "rating": {"anyOf": [{"maximum": 5, "minimum": -1, "type": "integer"}, {"type": "null"}], "default": null, "description": "Rating: -1 (thumbs down), 1 (thumbs up), or 1-5 scale", "title": "Rating"}, "categories": {"description": "Selected feedback categories (from FeedbackCategory or custom)", "items": {"type": "string"}, "title": "Categories", "type": "array"}, "comment": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Optional free-text feedback comment", "title": "Comment"}, "trace_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "OTEL trace ID for linking to observability", "title": "Trace Id"}, "span_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "OTEL span ID for specific span feedback", "title": "Span Id"}, "phoenix_synced": {"default": false, "description": "Whether feedback has been synced to Phoenix as annotation", "title": "Phoenix Synced", "type": "boolean"}, "phoenix_annotation_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Phoenix annotation ID after sync", "title": "Phoenix Annotation Id"}, "annotator_kind": {"default": "HUMAN", "description": "Annotator type: HUMAN, LLM, CODE", "title": "Annotator Kind", "type": "string"}}, "required": ["session_id"], "json_schema_extra": {"table_name": "feedbacks", "entity_key_field": "id", "embedding_fields": [], "fully_qualified_name": "rem.models.entities.feedback.Feedback", "tools": ["search_rem"], "default_search_table": "feedbacks", "has_embeddings": false}}'::jsonb,
1384
+ 'entity',
1385
+ '{"table_name": "feedbacks", "entity_key_field": "id", "embedding_fields": [], "fqn": "rem.models.entities.feedback.Feedback"}'::jsonb
1386
+ )
1387
+ ON CONFLICT (id) DO UPDATE SET
1388
+ name = EXCLUDED.name,
1389
+ content = EXCLUDED.content,
1390
+ spec = EXCLUDED.spec,
1391
+ category = EXCLUDED.category,
1392
+ metadata = EXCLUDED.metadata,
1393
+ updated_at = CURRENT_TIMESTAMP;
1394
+
1395
+ -- Schema entry for File (files)
1396
+ INSERT INTO schemas (id, tenant_id, name, content, spec, category, metadata)
1397
+ VALUES (
1398
+ 'c3b3ef33-59d4-57a1-81a3-cc6adc45b194'::uuid,
1399
+ 'system',
1400
+ 'File',
1401
+ '# File
1402
+
1403
+
1404
+ File metadata and tracking.
1405
+
1406
+ Represents files uploaded to or referenced by the REM system,
1407
+ tracking their metadata and processing status. Tenant isolation
1408
+ is provided via CoreModel.tenant_id field.
1409
+
1410
+
1411
+ ## Overview
1412
+
1413
+ The `File` entity is stored in the `files` table. Each record is uniquely
1414
+ identified by its `id` field for lookups and graph traversal.
1415
+
1416
+ ## Search Capabilities
1417
+
1418
+ This schema includes the `search_rem` tool which supports:
1419
+ - **LOOKUP**: O(1) exact match by id (e.g., `LOOKUP "entity-name"`)
1420
+ - **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
1421
+ - **SEARCH**: Semantic vector search on content (e.g., `SEARCH "concept" FROM files LIMIT 10`)
1422
+ - **SQL**: Complex queries (e.g., `SELECT * FROM files WHERE ...`)
1423
+
1424
+ ## Table Info
1425
+
1426
+ | Property | Value |
1427
+ |----------|-------|
1428
+ | Table | `files` |
1429
+ | Entity Key | `id` |
1430
+ | Embedding Fields | `content` |
1431
+ | Tools | `search_rem` |
1432
+
1433
+ ## Fields
1434
+
1435
+ ### `id`
1436
+ - **Type**: `typing.Union[uuid.UUID, str, NoneType]`
1437
+ - **Optional**
1438
+ - Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.
1439
+
1440
+ ### `created_at`
1441
+ - **Type**: `<class ''datetime.datetime''>`
1442
+ - **Optional**
1443
+ - Entity creation timestamp
1444
+
1445
+ ### `updated_at`
1446
+ - **Type**: `<class ''datetime.datetime''>`
1447
+ - **Optional**
1448
+ - Last update timestamp
1449
+
1450
+ ### `deleted_at`
1451
+ - **Type**: `typing.Optional[datetime.datetime]`
1452
+ - **Optional**
1453
+ - Soft deletion timestamp
1454
+
1455
+ ### `tenant_id`
1456
+ - **Type**: `typing.Optional[str]`
1457
+ - **Optional**
1458
+ - Tenant identifier for multi-tenancy isolation
1459
+
1460
+ ### `user_id`
1461
+ - **Type**: `typing.Optional[str]`
1462
+ - **Optional**
1463
+ - Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.
1464
+
1465
+ ### `graph_edges`
1466
+ - **Type**: `list[dict]`
1467
+ - **Optional**
1468
+ - Knowledge graph edges stored as InlineEdge dicts
1469
+
1470
+ ### `metadata`
1471
+ - **Type**: `<class ''dict''>`
1472
+ - **Optional**
1473
+ - Flexible metadata storage
1474
+
1475
+ ### `tags`
1476
+ - **Type**: `list[str]`
1477
+ - **Optional**
1478
+ - Entity tags
1479
+
1480
+ ### `name`
1481
+ - **Type**: `<class ''str''>`
1482
+ - **Required**
1483
+ - File name
1484
+
1485
+ ### `uri`
1486
+ - **Type**: `<class ''str''>`
1487
+ - **Required**
1488
+ - File storage URI (S3, local path, etc.)
1489
+
1490
+ ### `content`
1491
+ - **Type**: `typing.Optional[str]`
1492
+ - **Optional**
1493
+ - Extracted text content (if applicable)
1494
+
1495
+ ### `timestamp`
1496
+ - **Type**: `typing.Optional[str]`
1497
+ - **Optional**
1498
+ - File creation/modification timestamp
1499
+
1500
+ ### `size_bytes`
1501
+ - **Type**: `typing.Optional[int]`
1502
+ - **Optional**
1503
+ - File size in bytes
1504
+
1505
+ ### `mime_type`
1506
+ - **Type**: `typing.Optional[str]`
1507
+ - **Optional**
1508
+ - File MIME type
1509
+
1510
+ ### `processing_status`
1511
+ - **Type**: `typing.Optional[str]`
1512
+ - **Optional**
1513
+ - File processing status (pending, processing, completed, failed)
1514
+
1515
+ ',
1516
+ '{"type": "object", "description": "\n File metadata and tracking.\n\n Represents files uploaded to or referenced by the REM system,\n tracking their metadata and processing status. Tenant isolation\n is provided via CoreModel.tenant_id field.\n \n\nThis agent can search the `files` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"description": "File name", "title": "Name", "type": "string"}, "uri": {"description": "File storage URI (S3, local path, etc.)", "title": "Uri", "type": "string"}, "content": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Extracted text content (if applicable)", "title": "Content"}, "timestamp": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "File creation/modification timestamp", "title": "Timestamp"}, "size_bytes": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "description": "File size in bytes", "title": "Size Bytes"}, "mime_type": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "File MIME type", "title": "Mime Type"}, "processing_status": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": "pending", "description": "File processing status (pending, processing, completed, failed)", "title": "Processing Status"}}, "required": ["name", "uri"], "json_schema_extra": {"table_name": "files", "entity_key_field": "id", "embedding_fields": ["content"], "fully_qualified_name": "rem.models.entities.file.File", "tools": ["search_rem"], "default_search_table": "files", "has_embeddings": true}}'::jsonb,
1517
+ 'entity',
1518
+ '{"table_name": "files", "entity_key_field": "id", "embedding_fields": ["content"], "fqn": "rem.models.entities.file.File"}'::jsonb
1519
+ )
1520
+ ON CONFLICT (id) DO UPDATE SET
1521
+ name = EXCLUDED.name,
1522
+ content = EXCLUDED.content,
1523
+ spec = EXCLUDED.spec,
1524
+ category = EXCLUDED.category,
1525
+ metadata = EXCLUDED.metadata,
1526
+ updated_at = CURRENT_TIMESTAMP;
1527
+
1528
+ -- Schema entry for ImageResource (image_resources)
1529
+ INSERT INTO schemas (id, tenant_id, name, content, spec, category, metadata)
1530
+ VALUES (
1531
+ 'ab4bc90c-2cda-55b2-bd4b-e78e19f7d4a7'::uuid,
1532
+ 'system',
1533
+ 'ImageResource',
1534
+ '# ImageResource
1535
+
1536
+
1537
+ Image-specific resource with CLIP embeddings.
1538
+
1539
+ Stored in separate `image_resources` table with CLIP embeddings
1540
+ instead of text embeddings. This enables:
1541
+ - Multimodal search (text-to-image, image-to-image)
1542
+ - Proper dimensionality (512/768 for CLIP vs 1536 for text)
1543
+ - Cost tracking (CLIP tokens separate from text tokens)
1544
+
1545
+ Embedding Strategy:
1546
+ - Default (when JINA_API_KEY set): Jina CLIP API (jina-clip-v2)
1547
+ - Future: Self-hosted OpenCLIP models via KEDA-scaled pods
1548
+ - Fallback: No embeddings (images searchable by metadata only)
1549
+
1550
+ Vision LLM Strategy (tier/sampling gated):
1551
+ - Gold tier: Always get vision descriptions
1552
+ - Silver/Free: Probabilistic sampling (IMAGE_VLLM_SAMPLE_RATE)
1553
+ - Fallback: Basic metadata only
1554
+
1555
+ Tenant isolation provided via CoreModel.tenant_id field.
1556
+
1557
+
1558
+ ## Overview
1559
+
1560
+ The `ImageResource` entity is stored in the `image_resources` table. Each record is uniquely
1561
+ identified by its `name` field for lookups and graph traversal.
1562
+
1563
+ ## Search Capabilities
1564
+
1565
+ This schema includes the `search_rem` tool which supports:
1566
+ - **LOOKUP**: O(1) exact match by name (e.g., `LOOKUP "entity-name"`)
1567
+ - **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
1568
+ - **SEARCH**: Semantic vector search on content (e.g., `SEARCH "concept" FROM image_resources LIMIT 10`)
1569
+ - **SQL**: Complex queries (e.g., `SELECT * FROM image_resources WHERE ...`)
1570
+
1571
+ ## Table Info
1572
+
1573
+ | Property | Value |
1574
+ |----------|-------|
1575
+ | Table | `image_resources` |
1576
+ | Entity Key | `name` |
1577
+ | Embedding Fields | `content` |
1578
+ | Tools | `search_rem` |
1579
+
1580
+ ## Fields
1581
+
1582
+ ### `id`
1583
+ - **Type**: `typing.Union[uuid.UUID, str, NoneType]`
1584
+ - **Optional**
1585
+ - Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.
1586
+
1587
+ ### `created_at`
1588
+ - **Type**: `<class ''datetime.datetime''>`
1589
+ - **Optional**
1590
+ - Entity creation timestamp
1591
+
1592
+ ### `updated_at`
1593
+ - **Type**: `<class ''datetime.datetime''>`
1594
+ - **Optional**
1595
+ - Last update timestamp
1596
+
1597
+ ### `deleted_at`
1598
+ - **Type**: `typing.Optional[datetime.datetime]`
1599
+ - **Optional**
1600
+ - Soft deletion timestamp
1601
+
1602
+ ### `tenant_id`
1603
+ - **Type**: `typing.Optional[str]`
1604
+ - **Optional**
1605
+ - Tenant identifier for multi-tenancy isolation
1606
+
1607
+ ### `user_id`
1608
+ - **Type**: `typing.Optional[str]`
1609
+ - **Optional**
1610
+ - Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.
1611
+
1612
+ ### `graph_edges`
1613
+ - **Type**: `list[dict]`
1614
+ - **Optional**
1615
+ - Knowledge graph edges stored as InlineEdge dicts
1616
+
1617
+ ### `metadata`
1618
+ - **Type**: `<class ''dict''>`
1619
+ - **Optional**
1620
+ - Flexible metadata storage
1621
+
1622
+ ### `tags`
1623
+ - **Type**: `list[str]`
1624
+ - **Optional**
1625
+ - Entity tags
1626
+
1627
+ ### `name`
1628
+ - **Type**: `typing.Optional[str]`
1629
+ - **Optional**
1630
+ - Human-readable resource name (used as graph label). Auto-generated from uri+ordinal if not provided.
1631
+
1632
+ ### `uri`
1633
+ - **Type**: `typing.Optional[str]`
1634
+ - **Optional**
1635
+ - Content URI or identifier (file path, URL, etc.)
1636
+
1637
+ ### `ordinal`
1638
+ - **Type**: `<class ''int''>`
1639
+ - **Optional**
1640
+ - Chunk ordinal for splitting large documents (0 for single-chunk resources)
1641
+
1642
+ ### `content`
1643
+ - **Type**: `<class ''str''>`
1644
+ - **Optional**
1645
+ - Resource content text
1646
+
1647
+ ### `timestamp`
1648
+ - **Type**: `<class ''datetime.datetime''>`
1649
+ - **Optional**
1650
+ - Resource timestamp (content creation/publication time)
1651
+
1652
+ ### `category`
1653
+ - **Type**: `typing.Optional[str]`
1654
+ - **Optional**
1655
+ - Resource category (document, conversation, artifact, etc.)
1656
+
1657
+ ### `related_entities`
1658
+ - **Type**: `list[dict]`
1659
+ - **Optional**
1660
+ - Extracted entities (people, projects, concepts) with metadata
1661
+
1662
+ ### `image_width`
1663
+ - **Type**: `typing.Optional[int]`
1664
+ - **Optional**
1665
+ - Image width in pixels
1666
+
1667
+ ### `image_height`
1668
+ - **Type**: `typing.Optional[int]`
1669
+ - **Optional**
1670
+ - Image height in pixels
1671
+
1672
+ ### `image_format`
1673
+ - **Type**: `typing.Optional[str]`
1674
+ - **Optional**
1675
+ - Image format (PNG, JPEG, GIF, WebP)
1676
+
1677
+ ### `vision_description`
1678
+ - **Type**: `typing.Optional[str]`
1679
+ - **Optional**
1680
+ - Vision LLM generated description (markdown, only for gold tier or sampled images)
1681
+
1682
+ ### `vision_provider`
1683
+ - **Type**: `typing.Optional[str]`
1684
+ - **Optional**
1685
+ - Vision provider used (anthropic, gemini, openai)
1686
+
1687
+ ### `vision_model`
1688
+ - **Type**: `typing.Optional[str]`
1689
+ - **Optional**
1690
+ - Vision model used for description
1691
+
1692
+ ### `clip_embedding`
1693
+ - **Type**: `typing.Optional[list[float]]`
1694
+ - **Optional**
1695
+ - CLIP embedding vector (512 or 768 dimensions, from Jina AI or self-hosted)
1696
+
1697
+ ### `clip_dimensions`
1698
+ - **Type**: `typing.Optional[int]`
1699
+ - **Optional**
1700
+ - CLIP embedding dimensionality (512 for jina-clip-v2, 768 for jina-clip-v1)
1701
+
1702
+ ',
1703
+ '{"type": "object", "description": "\n Image-specific resource with CLIP embeddings.\n\n Stored in separate `image_resources` table with CLIP embeddings\n instead of text embeddings. This enables:\n - Multimodal search (text-to-image, image-to-image)\n - Proper dimensionality (512/768 for CLIP vs 1536 for text)\n - Cost tracking (CLIP tokens separate from text tokens)\n\n Embedding Strategy:\n - Default (when JINA_API_KEY set): Jina CLIP API (jina-clip-v2)\n - Future: Self-hosted OpenCLIP models via KEDA-scaled pods\n - Fallback: No embeddings (images searchable by metadata only)\n\n Vision LLM Strategy (tier/sampling gated):\n - Gold tier: Always get vision descriptions\n - Silver/Free: Probabilistic sampling (IMAGE_VLLM_SAMPLE_RATE)\n - Fallback: Basic metadata only\n\n Tenant isolation provided via CoreModel.tenant_id field.\n \n\nThis agent can search the `image_resources` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Human-readable resource name (used as graph label). Auto-generated from uri+ordinal if not provided.", "entity_key": true, "title": "Name"}, "uri": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Content URI or identifier (file path, URL, etc.)", "title": "Uri"}, "ordinal": {"composite_key": true, "default": 0, "description": "Chunk ordinal for splitting large documents (0 for single-chunk resources)", "title": "Ordinal", "type": "integer"}, "content": {"default": "", "description": "Resource content text", "title": "Content", "type": "string"}, "timestamp": {"description": "Resource timestamp (content creation/publication time)", "format": "date-time", "title": "Timestamp", "type": "string"}, "category": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Resource category (document, conversation, artifact, etc.)", "title": "Category"}, "related_entities": {"description": "Extracted entities (people, projects, concepts) with metadata", "items": {"additionalProperties": true, "type": "object"}, "title": "Related Entities", "type": "array"}, "image_width": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "description": "Image width in pixels", "title": "Image Width"}, "image_height": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "description": "Image height in pixels", "title": "Image Height"}, "image_format": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Image format (PNG, JPEG, GIF, WebP)", "title": "Image Format"}, "vision_description": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Vision LLM generated description (markdown, only for gold tier or sampled images)", "title": "Vision Description"}, "vision_provider": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Vision provider used (anthropic, gemini, openai)", "title": "Vision Provider"}, "vision_model": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Vision model used for description", "title": "Vision Model"}, "clip_embedding": {"anyOf": [{"items": {"type": "number"}, "type": "array"}, {"type": "null"}], "default": null, "description": "CLIP embedding vector (512 or 768 dimensions, from Jina AI or self-hosted)", "title": "Clip Embedding"}, "clip_dimensions": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "description": "CLIP embedding dimensionality (512 for jina-clip-v2, 768 for jina-clip-v1)", "title": "Clip Dimensions"}}, "required": [], "json_schema_extra": {"table_name": "image_resources", "entity_key_field": "name", "embedding_fields": ["content"], "fully_qualified_name": "rem.models.entities.image_resource.ImageResource", "tools": ["search_rem"], "default_search_table": "image_resources", "has_embeddings": true}}'::jsonb,
1704
+ 'entity',
1705
+ '{"table_name": "image_resources", "entity_key_field": "name", "embedding_fields": ["content"], "fqn": "rem.models.entities.image_resource.ImageResource"}'::jsonb
1706
+ )
1707
+ ON CONFLICT (id) DO UPDATE SET
1708
+ name = EXCLUDED.name,
1709
+ content = EXCLUDED.content,
1710
+ spec = EXCLUDED.spec,
1711
+ category = EXCLUDED.category,
1712
+ metadata = EXCLUDED.metadata,
1713
+ updated_at = CURRENT_TIMESTAMP;
1714
+
1715
+ -- Schema entry for Message (messages)
1716
+ INSERT INTO schemas (id, tenant_id, name, content, spec, category, metadata)
1717
+ VALUES (
1718
+ 'be36f9da-6df4-51ba-bb41-bf51246ecec1'::uuid,
1719
+ 'system',
1720
+ 'Message',
1721
+ '# Message
1722
+
1723
+
1724
+ Communication content unit.
1725
+
1726
+ Represents individual messages in conversations, chats, or other
1727
+ communication contexts. Tenant isolation is provided via CoreModel.tenant_id field.
1728
+
1729
+ Trace fields (trace_id, span_id) enable integration with OTEL/Phoenix
1730
+ for observability and feedback annotation.
1731
+
1732
+
1733
+ ## Overview
1734
+
1735
+ The `Message` entity is stored in the `messages` table. Each record is uniquely
1736
+ identified by its `id` field for lookups and graph traversal.
1737
+
1738
+ ## Search Capabilities
1739
+
1740
+ This schema includes the `search_rem` tool which supports:
1741
+ - **LOOKUP**: O(1) exact match by id (e.g., `LOOKUP "entity-name"`)
1742
+ - **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
1743
+ - **SEARCH**: Semantic vector search on content (e.g., `SEARCH "concept" FROM messages LIMIT 10`)
1744
+ - **SQL**: Complex queries (e.g., `SELECT * FROM messages WHERE ...`)
1745
+
1746
+ ## Table Info
1747
+
1748
+ | Property | Value |
1749
+ |----------|-------|
1750
+ | Table | `messages` |
1751
+ | Entity Key | `id` |
1752
+ | Embedding Fields | `content` |
1753
+ | Tools | `search_rem` |
1754
+
1755
+ ## Fields
1756
+
1757
+ ### `id`
1758
+ - **Type**: `typing.Union[uuid.UUID, str, NoneType]`
1759
+ - **Optional**
1760
+ - Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.
1761
+
1762
+ ### `created_at`
1763
+ - **Type**: `<class ''datetime.datetime''>`
1764
+ - **Optional**
1765
+ - Entity creation timestamp
1766
+
1767
+ ### `updated_at`
1768
+ - **Type**: `<class ''datetime.datetime''>`
1769
+ - **Optional**
1770
+ - Last update timestamp
1771
+
1772
+ ### `deleted_at`
1773
+ - **Type**: `typing.Optional[datetime.datetime]`
1774
+ - **Optional**
1775
+ - Soft deletion timestamp
1776
+
1777
+ ### `tenant_id`
1778
+ - **Type**: `typing.Optional[str]`
1779
+ - **Optional**
1780
+ - Tenant identifier for multi-tenancy isolation
1781
+
1782
+ ### `user_id`
1783
+ - **Type**: `typing.Optional[str]`
1784
+ - **Optional**
1785
+ - Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.
1786
+
1787
+ ### `graph_edges`
1788
+ - **Type**: `list[dict]`
1789
+ - **Optional**
1790
+ - Knowledge graph edges stored as InlineEdge dicts
1791
+
1792
+ ### `metadata`
1793
+ - **Type**: `<class ''dict''>`
1794
+ - **Optional**
1795
+ - Flexible metadata storage
1796
+
1797
+ ### `tags`
1798
+ - **Type**: `list[str]`
1799
+ - **Optional**
1800
+ - Entity tags
1801
+
1802
+ ### `content`
1803
+ - **Type**: `<class ''str''>`
1804
+ - **Required**
1805
+ - Message content text
1806
+
1807
+ ### `message_type`
1808
+ - **Type**: `str | None`
1809
+ - **Optional**
1810
+ - Message type e.g. role: ''user'', ''assistant'', ''system'', ''tool''
1811
+
1812
+ ### `session_id`
1813
+ - **Type**: `str | None`
1814
+ - **Optional**
1815
+ - Session identifier for tracking message context
1816
+
1817
+ ### `prompt`
1818
+ - **Type**: `str | None`
1819
+ - **Optional**
1820
+ - Custom prompt used for this message (if overridden from default)
1821
+
1822
+ ### `model`
1823
+ - **Type**: `str | None`
1824
+ - **Optional**
1825
+ - Model used for generating this message (provider:model format)
1826
+
1827
+ ### `token_count`
1828
+ - **Type**: `int | None`
1829
+ - **Optional**
1830
+ - Token count for this message
1831
+
1832
+ ### `trace_id`
1833
+ - **Type**: `str | None`
1834
+ - **Optional**
1835
+ - OTEL trace ID for observability integration
1836
+
1837
+ ### `span_id`
1838
+ - **Type**: `str | None`
1839
+ - **Optional**
1840
+ - OTEL span ID for specific span reference
1841
+
1842
+ ',
1843
+ '{"type": "object", "description": "\n Communication content unit.\n\n Represents individual messages in conversations, chats, or other\n communication contexts. Tenant isolation is provided via CoreModel.tenant_id field.\n\n Trace fields (trace_id, span_id) enable integration with OTEL/Phoenix\n for observability and feedback annotation.\n \n\nThis agent can search the `messages` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "content": {"description": "Message content text", "title": "Content", "type": "string"}, "message_type": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Message type e.g. role: ''user'', ''assistant'', ''system'', ''tool''", "title": "Message Type"}, "session_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Session identifier for tracking message context", "title": "Session Id"}, "prompt": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Custom prompt used for this message (if overridden from default)", "title": "Prompt"}, "model": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Model used for generating this message (provider:model format)", "title": "Model"}, "token_count": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "description": "Token count for this message", "title": "Token Count"}, "trace_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "OTEL trace ID for observability integration", "title": "Trace Id"}, "span_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "OTEL span ID for specific span reference", "title": "Span Id"}}, "required": ["content"], "json_schema_extra": {"table_name": "messages", "entity_key_field": "id", "embedding_fields": ["content"], "fully_qualified_name": "rem.models.entities.message.Message", "tools": ["search_rem"], "default_search_table": "messages", "has_embeddings": true}}'::jsonb,
1844
+ 'entity',
1845
+ '{"table_name": "messages", "entity_key_field": "id", "embedding_fields": ["content"], "fqn": "rem.models.entities.message.Message"}'::jsonb
1846
+ )
1847
+ ON CONFLICT (id) DO UPDATE SET
1848
+ name = EXCLUDED.name,
1849
+ content = EXCLUDED.content,
1850
+ spec = EXCLUDED.spec,
1851
+ category = EXCLUDED.category,
1852
+ metadata = EXCLUDED.metadata,
1853
+ updated_at = CURRENT_TIMESTAMP;
1854
+
1855
+ -- Schema entry for Moment (moments)
1856
+ INSERT INTO schemas (id, tenant_id, name, content, spec, category, metadata)
1857
+ VALUES (
1858
+ 'a08f0a8c-5bab-5bf5-9760-0e67bc69bd74'::uuid,
1859
+ 'system',
1860
+ 'Moment',
1861
+ '# Moment
1862
+
1863
+
1864
+ Temporal narrative extracted from resources.
1865
+
1866
+ Moments provide temporal structure and context for the REM graph,
1867
+ enabling time-based queries and understanding of when events occurred.
1868
+ Tenant isolation is provided via CoreModel.tenant_id field.
1869
+
1870
+
1871
+ ## Overview
1872
+
1873
+ The `Moment` entity is stored in the `moments` table. Each record is uniquely
1874
+ identified by its `name` field for lookups and graph traversal.
1875
+
1876
+ ## Search Capabilities
1877
+
1878
+ This schema includes the `search_rem` tool which supports:
1879
+ - **LOOKUP**: O(1) exact match by name (e.g., `LOOKUP "entity-name"`)
1880
+ - **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
1881
+ - **SEARCH**: Semantic vector search on summary (e.g., `SEARCH "concept" FROM moments LIMIT 10`)
1882
+ - **SQL**: Complex queries (e.g., `SELECT * FROM moments WHERE ...`)
1883
+
1884
+ ## Table Info
1885
+
1886
+ | Property | Value |
1887
+ |----------|-------|
1888
+ | Table | `moments` |
1889
+ | Entity Key | `name` |
1890
+ | Embedding Fields | `summary` |
1891
+ | Tools | `search_rem` |
1892
+
1893
+ ## Fields
1894
+
1895
+ ### `id`
1896
+ - **Type**: `typing.Union[uuid.UUID, str, NoneType]`
1897
+ - **Optional**
1898
+ - Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.
1899
+
1900
+ ### `created_at`
1901
+ - **Type**: `<class ''datetime.datetime''>`
1902
+ - **Optional**
1903
+ - Entity creation timestamp
1904
+
1905
+ ### `updated_at`
1906
+ - **Type**: `<class ''datetime.datetime''>`
1907
+ - **Optional**
1908
+ - Last update timestamp
1909
+
1910
+ ### `deleted_at`
1911
+ - **Type**: `typing.Optional[datetime.datetime]`
1912
+ - **Optional**
1913
+ - Soft deletion timestamp
1914
+
1915
+ ### `tenant_id`
1916
+ - **Type**: `typing.Optional[str]`
1917
+ - **Optional**
1918
+ - Tenant identifier for multi-tenancy isolation
1919
+
1920
+ ### `user_id`
1921
+ - **Type**: `typing.Optional[str]`
1922
+ - **Optional**
1923
+ - Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.
1924
+
1925
+ ### `graph_edges`
1926
+ - **Type**: `list[dict]`
1927
+ - **Optional**
1928
+ - Knowledge graph edges stored as InlineEdge dicts
1929
+
1930
+ ### `metadata`
1931
+ - **Type**: `<class ''dict''>`
1932
+ - **Optional**
1933
+ - Flexible metadata storage
1934
+
1935
+ ### `tags`
1936
+ - **Type**: `list[str]`
1937
+ - **Optional**
1938
+ - Entity tags
1939
+
1940
+ ### `name`
1941
+ - **Type**: `typing.Optional[str]`
1942
+ - **Optional**
1943
+ - Human-readable moment name (used as graph label). Auto-generated from starts_timestamp+moment_type if not provided.
1944
+
1945
+ ### `moment_type`
1946
+ - **Type**: `typing.Optional[str]`
1947
+ - **Optional**
1948
+ - Moment classification (meeting, coding-session, conversation, etc.)
1949
+
1950
+ ### `category`
1951
+ - **Type**: `typing.Optional[str]`
1952
+ - **Optional**
1953
+ - Moment category for grouping and filtering
1954
+
1955
+ ### `starts_timestamp`
1956
+ - **Type**: `<class ''datetime.datetime''>`
1957
+ - **Required**
1958
+ - Moment start time
1959
+
1960
+ ### `ends_timestamp`
1961
+ - **Type**: `typing.Optional[datetime.datetime]`
1962
+ - **Optional**
1963
+ - Moment end time
1964
+
1965
+ ### `present_persons`
1966
+ - **Type**: `list[rem.models.entities.moment.Person]`
1967
+ - **Optional**
1968
+ - People present in the moment
1969
+
1970
+ ### `emotion_tags`
1971
+ - **Type**: `list[str]`
1972
+ - **Optional**
1973
+ - Emotion/sentiment tags (happy, frustrated, focused, etc.)
1974
+
1975
+ ### `topic_tags`
1976
+ - **Type**: `list[str]`
1977
+ - **Optional**
1978
+ - Topic/concept tags (project names, technologies, etc.)
1979
+
1980
+ ### `summary`
1981
+ - **Type**: `typing.Optional[str]`
1982
+ - **Optional**
1983
+ - Natural language summary of the moment
1984
+
1985
+ ### `source_resource_ids`
1986
+ - **Type**: `list[str]`
1987
+ - **Optional**
1988
+ - Resource IDs used to construct this moment
1989
+
1990
+ ',
1991
+ '{"type": "object", "description": "\n Temporal narrative extracted from resources.\n\n Moments provide temporal structure and context for the REM graph,\n enabling time-based queries and understanding of when events occurred.\n Tenant isolation is provided via CoreModel.tenant_id field.\n \n\nThis agent can search the `moments` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Human-readable moment name (used as graph label). Auto-generated from starts_timestamp+moment_type if not provided.", "entity_key": true, "title": "Name"}, "moment_type": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Moment classification (meeting, coding-session, conversation, etc.)", "title": "Moment Type"}, "category": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Moment category for grouping and filtering", "title": "Category"}, "starts_timestamp": {"description": "Moment start time", "format": "date-time", "title": "Starts Timestamp", "type": "string"}, "ends_timestamp": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Moment end time", "title": "Ends Timestamp"}, "present_persons": {"description": "People present in the moment", "items": {"$ref": "#/$defs/Person"}, "title": "Present Persons", "type": "array"}, "emotion_tags": {"description": "Emotion/sentiment tags (happy, frustrated, focused, etc.)", "items": {"type": "string"}, "title": "Emotion Tags", "type": "array"}, "topic_tags": {"description": "Topic/concept tags (project names, technologies, etc.)", "items": {"type": "string"}, "title": "Topic Tags", "type": "array"}, "summary": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Natural language summary of the moment", "title": "Summary"}, "source_resource_ids": {"description": "Resource IDs used to construct this moment", "items": {"type": "string"}, "title": "Source Resource Ids", "type": "array"}}, "required": ["starts_timestamp"], "json_schema_extra": {"table_name": "moments", "entity_key_field": "name", "embedding_fields": ["summary"], "fully_qualified_name": "rem.models.entities.moment.Moment", "tools": ["search_rem"], "default_search_table": "moments", "has_embeddings": true}}'::jsonb,
1992
+ 'entity',
1993
+ '{"table_name": "moments", "entity_key_field": "name", "embedding_fields": ["summary"], "fqn": "rem.models.entities.moment.Moment"}'::jsonb
1994
+ )
1995
+ ON CONFLICT (id) DO UPDATE SET
1996
+ name = EXCLUDED.name,
1997
+ content = EXCLUDED.content,
1998
+ spec = EXCLUDED.spec,
1999
+ category = EXCLUDED.category,
2000
+ metadata = EXCLUDED.metadata,
2001
+ updated_at = CURRENT_TIMESTAMP;
2002
+
2003
+ -- Schema entry for Ontology (ontologies)
2004
+ INSERT INTO schemas (id, tenant_id, name, content, spec, category, metadata)
2005
+ VALUES (
2006
+ 'a702ed74-8988-534a-9917-2977349777c1'::uuid,
2007
+ 'system',
2008
+ 'Ontology',
2009
+ '# Ontology
2010
+
2011
+ Domain-specific knowledge extracted from files using custom agents.
2012
+
2013
+ Attributes:
2014
+ name: Human-readable label for this ontology instance
2015
+ file_id: Foreign key to File entity that was processed
2016
+ agent_schema_id: Foreign key to Schema entity that performed extraction
2017
+ provider_name: LLM provider used for extraction (e.g., "anthropic", "openai")
2018
+ model_name: Specific model used (e.g., "claude-sonnet-4-5")
2019
+ extracted_data: Structured data extracted by agent (arbitrary JSON)
2020
+ confidence_score: Optional confidence score from extraction (0.0-1.0)
2021
+ extraction_timestamp: When extraction was performed
2022
+ embedding_text: Text used for generating embedding (derived from extracted_data)
2023
+
2024
+ Inherited from CoreModel:
2025
+ id: UUID or string identifier
2026
+ created_at: Entity creation timestamp
2027
+ updated_at: Last update timestamp
2028
+ deleted_at: Soft deletion timestamp
2029
+ tenant_id: Multi-tenancy isolation
2030
+ user_id: Ownership
2031
+ graph_edges: Relationships to other entities
2032
+ metadata: Flexible metadata storage
2033
+ tags: Classification tags
2034
+ column: Database schema metadata
2035
+
2036
+ Example Usage:
2037
+ # CV extraction
2038
+ cv_ontology = Ontology(
2039
+ name="john-doe-cv-2024",
2040
+ file_id="file-uuid-123",
2041
+ agent_schema_id="cv-parser-v1",
2042
+ provider_name="anthropic",
2043
+ model_name="claude-sonnet-4-5-20250929",
2044
+ extracted_data={
2045
+ "candidate_name": "John Doe",
2046
+ "email": "john@example.com",
2047
+ "skills": ["Python", "PostgreSQL", "Kubernetes"],
2048
+ "experience": [
2049
+ {
2050
+ "company": "TechCorp",
2051
+ "role": "Senior Engineer",
2052
+ "years": 3,
2053
+ "achievements": ["Led migration to k8s", "Reduced costs 40%"]
2054
+ }
2055
+ ],
2056
+ "education": [
2057
+ {"degree": "BS Computer Science", "institution": "MIT", "year": 2018}
2058
+ ]
2059
+ },
2060
+ confidence_score=0.95,
2061
+ tags=["cv", "engineering", "senior-level"]
2062
+ )
1356
2063
 
1357
- -- Prevent duplicate shares (same session, same recipient, active only)
1358
- CONSTRAINT uq_active_share UNIQUE NULLS NOT DISTINCT (
1359
- tenant_id, session_id, owner_user_id, shared_with_user_id, deleted_at
1360
- )
1361
- );
2064
+ # Contract extraction
2065
+ contract_ontology = Ontology(
2066
+ name="acme-supplier-agreement-2024",
2067
+ file_id="file-uuid-456",
2068
+ agent_schema_id="contract-parser-v2",
2069
+ provider_name="openai",
2070
+ model_name="gpt-4.1",
2071
+ extracted_data={
2072
+ "contract_type": "supplier_agreement",
2073
+ "parties": [
2074
+ {"name": "ACME Corp", "role": "buyer"},
2075
+ {"name": "SupplyChain Inc", "role": "supplier"}
2076
+ ],
2077
+ "effective_date": "2024-01-01",
2078
+ "termination_date": "2026-12-31",
2079
+ "payment_terms": {
2080
+ "amount": 500000,
2081
+ "currency": "USD",
2082
+ "frequency": "quarterly"
2083
+ },
2084
+ "key_obligations": [
2085
+ "Supplier must deliver within 30 days",
2086
+ "Buyer must pay within 60 days of invoice"
2087
+ ]
2088
+ },
2089
+ confidence_score=0.92,
2090
+ tags=["contract", "supplier", "procurement"]
2091
+ )
2092
+
2093
+
2094
+ ## Overview
2095
+
2096
+ The `Ontology` entity is stored in the `ontologies` table. Each record is uniquely
2097
+ identified by its `id` field for lookups and graph traversal.
1362
2098
 
1363
- -- Index for finding shares by recipient (who is sharing WITH me)
1364
- CREATE INDEX IF NOT EXISTS idx_shared_sessions_recipient
1365
- ON shared_sessions (tenant_id, shared_with_user_id)
1366
- WHERE deleted_at IS NULL;
1367
-
1368
- -- Index for finding shares by owner (what have I shared)
1369
- CREATE INDEX IF NOT EXISTS idx_shared_sessions_owner
1370
- ON shared_sessions (tenant_id, owner_user_id)
1371
- WHERE deleted_at IS NULL;
1372
-
1373
- -- Index for finding shares by session
1374
- CREATE INDEX IF NOT EXISTS idx_shared_sessions_session
1375
- ON shared_sessions (tenant_id, session_id)
1376
- WHERE deleted_at IS NULL;
1377
-
1378
- -- Aggregation function: Get users sharing with me
1379
- CREATE OR REPLACE FUNCTION fn_get_shared_with_me(
1380
- p_tenant_id VARCHAR(100),
1381
- p_user_id VARCHAR(256),
1382
- p_limit INTEGER DEFAULT 50,
1383
- p_offset INTEGER DEFAULT 0
2099
+ ## Search Capabilities
2100
+
2101
+ This schema includes the `search_rem` tool which supports:
2102
+ - **LOOKUP**: O(1) exact match by id (e.g., `LOOKUP "entity-name"`)
2103
+ - **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
2104
+ - **SEARCH**: Semantic vector search on content (e.g., `SEARCH "concept" FROM ontologies LIMIT 10`)
2105
+ - **SQL**: Complex queries (e.g., `SELECT * FROM ontologies WHERE ...`)
2106
+
2107
+ ## Table Info
2108
+
2109
+ | Property | Value |
2110
+ |----------|-------|
2111
+ | Table | `ontologies` |
2112
+ | Entity Key | `id` |
2113
+ | Embedding Fields | None |
2114
+ | Tools | `search_rem` |
2115
+
2116
+ ## Fields
2117
+
2118
+ ### `id`
2119
+ - **Type**: `typing.Union[uuid.UUID, str, NoneType]`
2120
+ - **Optional**
2121
+ - Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.
2122
+
2123
+ ### `created_at`
2124
+ - **Type**: `<class ''datetime.datetime''>`
2125
+ - **Optional**
2126
+ - Entity creation timestamp
2127
+
2128
+ ### `updated_at`
2129
+ - **Type**: `<class ''datetime.datetime''>`
2130
+ - **Optional**
2131
+ - Last update timestamp
2132
+
2133
+ ### `deleted_at`
2134
+ - **Type**: `typing.Optional[datetime.datetime]`
2135
+ - **Optional**
2136
+ - Soft deletion timestamp
2137
+
2138
+ ### `tenant_id`
2139
+ - **Type**: `typing.Optional[str]`
2140
+ - **Optional**
2141
+ - Tenant identifier for multi-tenancy isolation
2142
+
2143
+ ### `user_id`
2144
+ - **Type**: `typing.Optional[str]`
2145
+ - **Optional**
2146
+ - Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.
2147
+
2148
+ ### `graph_edges`
2149
+ - **Type**: `list[dict]`
2150
+ - **Optional**
2151
+ - Knowledge graph edges stored as InlineEdge dicts
2152
+
2153
+ ### `metadata`
2154
+ - **Type**: `<class ''dict''>`
2155
+ - **Optional**
2156
+ - Flexible metadata storage
2157
+
2158
+ ### `tags`
2159
+ - **Type**: `list[str]`
2160
+ - **Optional**
2161
+ - Entity tags
2162
+
2163
+ ### `name`
2164
+ - **Type**: `<class ''str''>`
2165
+ - **Required**
2166
+
2167
+ ### `file_id`
2168
+ - **Type**: `uuid.UUID | str`
2169
+ - **Required**
2170
+
2171
+ ### `agent_schema_id`
2172
+ - **Type**: `<class ''str''>`
2173
+ - **Required**
2174
+
2175
+ ### `provider_name`
2176
+ - **Type**: `<class ''str''>`
2177
+ - **Required**
2178
+
2179
+ ### `model_name`
2180
+ - **Type**: `<class ''str''>`
2181
+ - **Required**
2182
+
2183
+ ### `extracted_data`
2184
+ - **Type**: `dict[str, typing.Any]`
2185
+ - **Required**
2186
+
2187
+ ### `confidence_score`
2188
+ - **Type**: `typing.Optional[float]`
2189
+ - **Optional**
2190
+
2191
+ ### `extraction_timestamp`
2192
+ - **Type**: `typing.Optional[str]`
2193
+ - **Optional**
2194
+
2195
+ ### `embedding_text`
2196
+ - **Type**: `typing.Optional[str]`
2197
+ - **Optional**
2198
+
2199
+ ',
2200
+ '{"type": "object", "description": "Domain-specific knowledge extracted from files using custom agents.\n\n Attributes:\n name: Human-readable label for this ontology instance\n file_id: Foreign key to File entity that was processed\n agent_schema_id: Foreign key to Schema entity that performed extraction\n provider_name: LLM provider used for extraction (e.g., \"anthropic\", \"openai\")\n model_name: Specific model used (e.g., \"claude-sonnet-4-5\")\n extracted_data: Structured data extracted by agent (arbitrary JSON)\n confidence_score: Optional confidence score from extraction (0.0-1.0)\n extraction_timestamp: When extraction was performed\n embedding_text: Text used for generating embedding (derived from extracted_data)\n\n Inherited from CoreModel:\n id: UUID or string identifier\n created_at: Entity creation timestamp\n updated_at: Last update timestamp\n deleted_at: Soft deletion timestamp\n tenant_id: Multi-tenancy isolation\n user_id: Ownership\n graph_edges: Relationships to other entities\n metadata: Flexible metadata storage\n tags: Classification tags\n column: Database schema metadata\n\n Example Usage:\n # CV extraction\n cv_ontology = Ontology(\n name=\"john-doe-cv-2024\",\n file_id=\"file-uuid-123\",\n agent_schema_id=\"cv-parser-v1\",\n provider_name=\"anthropic\",\n model_name=\"claude-sonnet-4-5-20250929\",\n extracted_data={\n \"candidate_name\": \"John Doe\",\n \"email\": \"john@example.com\",\n \"skills\": [\"Python\", \"PostgreSQL\", \"Kubernetes\"],\n \"experience\": [\n {\n \"company\": \"TechCorp\",\n \"role\": \"Senior Engineer\",\n \"years\": 3,\n \"achievements\": [\"Led migration to k8s\", \"Reduced costs 40%\"]\n }\n ],\n \"education\": [\n {\"degree\": \"BS Computer Science\", \"institution\": \"MIT\", \"year\": 2018}\n ]\n },\n confidence_score=0.95,\n tags=[\"cv\", \"engineering\", \"senior-level\"]\n )\n\n # Contract extraction\n contract_ontology = Ontology(\n name=\"acme-supplier-agreement-2024\",\n file_id=\"file-uuid-456\",\n agent_schema_id=\"contract-parser-v2\",\n provider_name=\"openai\",\n model_name=\"gpt-4.1\",\n extracted_data={\n \"contract_type\": \"supplier_agreement\",\n \"parties\": [\n {\"name\": \"ACME Corp\", \"role\": \"buyer\"},\n {\"name\": \"SupplyChain Inc\", \"role\": \"supplier\"}\n ],\n \"effective_date\": \"2024-01-01\",\n \"termination_date\": \"2026-12-31\",\n \"payment_terms\": {\n \"amount\": 500000,\n \"currency\": \"USD\",\n \"frequency\": \"quarterly\"\n },\n \"key_obligations\": [\n \"Supplier must deliver within 30 days\",\n \"Buyer must pay within 60 days of invoice\"\n ]\n },\n confidence_score=0.92,\n tags=[\"contract\", \"supplier\", \"procurement\"]\n )\n \n\nThis agent can search the `ontologies` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"title": "Name", "type": "string"}, "file_id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}], "title": "File Id"}, "agent_schema_id": {"title": "Agent Schema Id", "type": "string"}, "provider_name": {"title": "Provider Name", "type": "string"}, "model_name": {"title": "Model Name", "type": "string"}, "extracted_data": {"additionalProperties": true, "title": "Extracted Data", "type": "object"}, "confidence_score": {"anyOf": [{"type": "number"}, {"type": "null"}], "default": null, "title": "Confidence Score"}, "extraction_timestamp": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Extraction Timestamp"}, "embedding_text": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Embedding Text"}}, "required": ["name", "file_id", "agent_schema_id", "provider_name", "model_name", "extracted_data"], "json_schema_extra": {"table_name": "ontologies", "entity_key_field": "id", "embedding_fields": [], "fully_qualified_name": "rem.models.entities.ontology.Ontology", "tools": ["search_rem"], "default_search_table": "ontologies", "has_embeddings": false}}'::jsonb,
2201
+ 'entity',
2202
+ '{"table_name": "ontologies", "entity_key_field": "id", "embedding_fields": [], "fqn": "rem.models.entities.ontology.Ontology"}'::jsonb
1384
2203
  )
1385
- RETURNS TABLE (
1386
- user_id VARCHAR(256),
1387
- name VARCHAR(256),
1388
- email VARCHAR(256),
1389
- message_count BIGINT,
1390
- session_count BIGINT,
1391
- first_message_at TIMESTAMP,
1392
- last_message_at TIMESTAMP
1393
- ) AS $$
1394
- BEGIN
1395
- RETURN QUERY
1396
- WITH shared_with_me AS (
1397
- SELECT DISTINCT
1398
- ss.session_id,
1399
- ss.owner_user_id
1400
- FROM shared_sessions ss
1401
- WHERE ss.tenant_id = p_tenant_id
1402
- AND ss.shared_with_user_id = p_user_id
1403
- AND ss.deleted_at IS NULL
1404
- ),
1405
- message_stats AS (
1406
- SELECT
1407
- swm.owner_user_id,
1408
- COUNT(DISTINCT m.id) AS msg_count,
1409
- COUNT(DISTINCT m.session_id) AS sess_count,
1410
- MIN(m.created_at) AS first_msg,
1411
- MAX(m.created_at) AS last_msg
1412
- FROM shared_with_me swm
1413
- LEFT JOIN messages m ON m.session_id = swm.session_id
1414
- AND m.tenant_id = p_tenant_id
1415
- AND m.deleted_at IS NULL
1416
- GROUP BY swm.owner_user_id
1417
- )
1418
- SELECT
1419
- ms.owner_user_id AS user_id,
1420
- u.name,
1421
- u.email,
1422
- COALESCE(ms.msg_count, 0) AS message_count,
1423
- COALESCE(ms.sess_count, 0) AS session_count,
1424
- ms.first_msg AS first_message_at,
1425
- ms.last_msg AS last_message_at
1426
- FROM message_stats ms
1427
- LEFT JOIN users u ON u.user_id = ms.owner_user_id
1428
- AND u.tenant_id = p_tenant_id
1429
- AND u.deleted_at IS NULL
1430
- ORDER BY ms.last_msg DESC NULLS LAST, ms.msg_count DESC
1431
- LIMIT p_limit
1432
- OFFSET p_offset;
1433
- END;
1434
- $$ LANGUAGE plpgsql;
2204
+ ON CONFLICT (id) DO UPDATE SET
2205
+ name = EXCLUDED.name,
2206
+ content = EXCLUDED.content,
2207
+ spec = EXCLUDED.spec,
2208
+ category = EXCLUDED.category,
2209
+ metadata = EXCLUDED.metadata,
2210
+ updated_at = CURRENT_TIMESTAMP;
2211
+
2212
+ -- Schema entry for OntologyConfig (ontology_configs)
2213
+ INSERT INTO schemas (id, tenant_id, name, content, spec, category, metadata)
2214
+ VALUES (
2215
+ '9a7e50d0-ef3a-5641-9ff4-b2be5a77053b'::uuid,
2216
+ 'system',
2217
+ 'OntologyConfig',
2218
+ '# OntologyConfig
2219
+
2220
+ User configuration for automatic ontology extraction.
2221
+
2222
+ Attributes:
2223
+ name: Human-readable config name
2224
+ agent_schema_id: Foreign key to Schema entity to use for extraction
2225
+ description: Purpose and scope of this config
2226
+
2227
+ # File matching rules (ANY matching rule triggers extraction)
2228
+ mime_type_pattern: Regex pattern for file MIME types (e.g., "application/pdf")
2229
+ uri_pattern: Regex pattern for file URIs (e.g., "s3://bucket/resumes/.*")
2230
+ tag_filter: List of tags (file must have ALL tags to match)
2231
+
2232
+ # Execution control
2233
+ priority: Execution order (higher = earlier, default 100)
2234
+ enabled: Whether this config is active (default True)
2235
+
2236
+ # LLM provider configuration
2237
+ provider_name: Optional LLM provider override (defaults to settings)
2238
+ model_name: Optional model override (defaults to settings)
2239
+
2240
+ Inherited from CoreModel:
2241
+ id, created_at, updated_at, deleted_at, tenant_id, user_id,
2242
+ graph_edges, metadata, tags, column
2243
+
2244
+ Example Usage:
2245
+ # CV extraction for recruitment
2246
+ cv_config = OntologyConfig(
2247
+ name="recruitment-cv-parser",
2248
+ agent_schema_id="cv-parser-v1",
2249
+ description="Extract candidate information from resumes",
2250
+ mime_type_pattern="application/pdf",
2251
+ uri_pattern=".*/resumes/.*",
2252
+ tag_filter=["cv", "candidate"],
2253
+ priority=100,
2254
+ enabled=True,
2255
+ tenant_id="acme-corp",
2256
+ tags=["recruitment", "hr"]
2257
+ )
1435
2258
 
1436
- -- Count function for pagination
1437
- CREATE OR REPLACE FUNCTION fn_count_shared_with_me(
1438
- p_tenant_id VARCHAR(100),
1439
- p_user_id VARCHAR(256)
1440
- )
1441
- RETURNS BIGINT AS $$
1442
- BEGIN
1443
- RETURN (
1444
- SELECT COUNT(DISTINCT owner_user_id)
1445
- FROM shared_sessions
1446
- WHERE tenant_id = p_tenant_id
1447
- AND shared_with_user_id = p_user_id
1448
- AND deleted_at IS NULL
1449
- );
1450
- END;
1451
- $$ LANGUAGE plpgsql;
2259
+ # Contract analysis for legal team
2260
+ contract_config = OntologyConfig(
2261
+ name="legal-contract-analyzer",
2262
+ agent_schema_id="contract-parser-v2",
2263
+ description="Extract key terms from supplier contracts",
2264
+ mime_type_pattern="application/(pdf|msword|vnd.openxmlformats.*)",
2265
+ tag_filter=["legal", "contract"],
2266
+ priority=200, # Higher priority = runs first
2267
+ enabled=True,
2268
+ provider_name="openai", # Override default provider
2269
+ model_name="gpt-4.1",
2270
+ tenant_id="acme-corp",
2271
+ tags=["legal", "procurement"]
2272
+ )
1452
2273
 
1453
- -- Get messages from sessions shared by a specific owner
1454
- CREATE OR REPLACE FUNCTION fn_get_shared_messages(
1455
- p_tenant_id VARCHAR(100),
1456
- p_recipient_user_id VARCHAR(256),
1457
- p_owner_user_id VARCHAR(256),
1458
- p_limit INTEGER DEFAULT 50,
1459
- p_offset INTEGER DEFAULT 0
1460
- )
1461
- RETURNS TABLE (
1462
- id UUID,
1463
- content TEXT,
1464
- message_type VARCHAR(256),
1465
- session_id VARCHAR(256),
1466
- model VARCHAR(256),
1467
- token_count INTEGER,
1468
- created_at TIMESTAMP,
1469
- metadata JSONB
1470
- ) AS $$
1471
- BEGIN
1472
- RETURN QUERY
1473
- SELECT
1474
- m.id,
1475
- m.content,
1476
- m.message_type,
1477
- m.session_id,
1478
- m.model,
1479
- m.token_count,
1480
- m.created_at,
1481
- m.metadata
1482
- FROM messages m
1483
- INNER JOIN shared_sessions ss ON ss.session_id = m.session_id
1484
- AND ss.tenant_id = m.tenant_id
1485
- AND ss.deleted_at IS NULL
1486
- WHERE m.tenant_id = p_tenant_id
1487
- AND ss.shared_with_user_id = p_recipient_user_id
1488
- AND ss.owner_user_id = p_owner_user_id
1489
- AND m.deleted_at IS NULL
1490
- ORDER BY m.created_at DESC
1491
- LIMIT p_limit
1492
- OFFSET p_offset;
1493
- END;
1494
- $$ LANGUAGE plpgsql;
2274
+ # Medical records for healthcare
2275
+ medical_config = OntologyConfig(
2276
+ name="medical-records-extractor",
2277
+ agent_schema_id="medical-parser-v1",
2278
+ description="Extract diagnoses and treatments from medical records",
2279
+ mime_type_pattern="application/pdf",
2280
+ tag_filter=["medical", "patient-record"],
2281
+ priority=50,
2282
+ enabled=True,
2283
+ tenant_id="healthsystem",
2284
+ tags=["medical", "hipaa-compliant"]
2285
+ )
2286
+
2287
+
2288
+ ## Overview
2289
+
2290
+ The `OntologyConfig` entity is stored in the `ontology_configs` table. Each record is uniquely
2291
+ identified by its `id` field for lookups and graph traversal.
1495
2292
 
1496
- -- Count shared messages for pagination
1497
- CREATE OR REPLACE FUNCTION fn_count_shared_messages(
1498
- p_tenant_id VARCHAR(100),
1499
- p_recipient_user_id VARCHAR(256),
1500
- p_owner_user_id VARCHAR(256)
2293
+ ## Search Capabilities
2294
+
2295
+ This schema includes the `search_rem` tool which supports:
2296
+ - **LOOKUP**: O(1) exact match by id (e.g., `LOOKUP "entity-name"`)
2297
+ - **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
2298
+ - **SEARCH**: Semantic vector search on description (e.g., `SEARCH "concept" FROM ontology_configs LIMIT 10`)
2299
+ - **SQL**: Complex queries (e.g., `SELECT * FROM ontology_configs WHERE ...`)
2300
+
2301
+ ## Table Info
2302
+
2303
+ | Property | Value |
2304
+ |----------|-------|
2305
+ | Table | `ontology_configs` |
2306
+ | Entity Key | `id` |
2307
+ | Embedding Fields | `description` |
2308
+ | Tools | `search_rem` |
2309
+
2310
+ ## Fields
2311
+
2312
+ ### `id`
2313
+ - **Type**: `typing.Union[uuid.UUID, str, NoneType]`
2314
+ - **Optional**
2315
+ - Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.
2316
+
2317
+ ### `created_at`
2318
+ - **Type**: `<class ''datetime.datetime''>`
2319
+ - **Optional**
2320
+ - Entity creation timestamp
2321
+
2322
+ ### `updated_at`
2323
+ - **Type**: `<class ''datetime.datetime''>`
2324
+ - **Optional**
2325
+ - Last update timestamp
2326
+
2327
+ ### `deleted_at`
2328
+ - **Type**: `typing.Optional[datetime.datetime]`
2329
+ - **Optional**
2330
+ - Soft deletion timestamp
2331
+
2332
+ ### `tenant_id`
2333
+ - **Type**: `typing.Optional[str]`
2334
+ - **Optional**
2335
+ - Tenant identifier for multi-tenancy isolation
2336
+
2337
+ ### `user_id`
2338
+ - **Type**: `typing.Optional[str]`
2339
+ - **Optional**
2340
+ - Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.
2341
+
2342
+ ### `graph_edges`
2343
+ - **Type**: `list[dict]`
2344
+ - **Optional**
2345
+ - Knowledge graph edges stored as InlineEdge dicts
2346
+
2347
+ ### `metadata`
2348
+ - **Type**: `<class ''dict''>`
2349
+ - **Optional**
2350
+ - Flexible metadata storage
2351
+
2352
+ ### `tags`
2353
+ - **Type**: `list[str]`
2354
+ - **Optional**
2355
+ - Entity tags
2356
+
2357
+ ### `name`
2358
+ - **Type**: `<class ''str''>`
2359
+ - **Required**
2360
+
2361
+ ### `agent_schema_id`
2362
+ - **Type**: `<class ''str''>`
2363
+ - **Required**
2364
+
2365
+ ### `description`
2366
+ - **Type**: `typing.Optional[str]`
2367
+ - **Optional**
2368
+
2369
+ ### `mime_type_pattern`
2370
+ - **Type**: `typing.Optional[str]`
2371
+ - **Optional**
2372
+
2373
+ ### `uri_pattern`
2374
+ - **Type**: `typing.Optional[str]`
2375
+ - **Optional**
2376
+
2377
+ ### `tag_filter`
2378
+ - **Type**: `list[str]`
2379
+ - **Optional**
2380
+
2381
+ ### `priority`
2382
+ - **Type**: `<class ''int''>`
2383
+ - **Optional**
2384
+
2385
+ ### `enabled`
2386
+ - **Type**: `<class ''bool''>`
2387
+ - **Optional**
2388
+
2389
+ ### `provider_name`
2390
+ - **Type**: `typing.Optional[str]`
2391
+ - **Optional**
2392
+
2393
+ ### `model_name`
2394
+ - **Type**: `typing.Optional[str]`
2395
+ - **Optional**
2396
+
2397
+ ',
2398
+ '{"type": "object", "description": "User configuration for automatic ontology extraction.\n\n Attributes:\n name: Human-readable config name\n agent_schema_id: Foreign key to Schema entity to use for extraction\n description: Purpose and scope of this config\n\n # File matching rules (ANY matching rule triggers extraction)\n mime_type_pattern: Regex pattern for file MIME types (e.g., \"application/pdf\")\n uri_pattern: Regex pattern for file URIs (e.g., \"s3://bucket/resumes/.*\")\n tag_filter: List of tags (file must have ALL tags to match)\n\n # Execution control\n priority: Execution order (higher = earlier, default 100)\n enabled: Whether this config is active (default True)\n\n # LLM provider configuration\n provider_name: Optional LLM provider override (defaults to settings)\n model_name: Optional model override (defaults to settings)\n\n Inherited from CoreModel:\n id, created_at, updated_at, deleted_at, tenant_id, user_id,\n graph_edges, metadata, tags, column\n\n Example Usage:\n # CV extraction for recruitment\n cv_config = OntologyConfig(\n name=\"recruitment-cv-parser\",\n agent_schema_id=\"cv-parser-v1\",\n description=\"Extract candidate information from resumes\",\n mime_type_pattern=\"application/pdf\",\n uri_pattern=\".*/resumes/.*\",\n tag_filter=[\"cv\", \"candidate\"],\n priority=100,\n enabled=True,\n tenant_id=\"acme-corp\",\n tags=[\"recruitment\", \"hr\"]\n )\n\n # Contract analysis for legal team\n contract_config = OntologyConfig(\n name=\"legal-contract-analyzer\",\n agent_schema_id=\"contract-parser-v2\",\n description=\"Extract key terms from supplier contracts\",\n mime_type_pattern=\"application/(pdf|msword|vnd.openxmlformats.*)\",\n tag_filter=[\"legal\", \"contract\"],\n priority=200, # Higher priority = runs first\n enabled=True,\n provider_name=\"openai\", # Override default provider\n model_name=\"gpt-4.1\",\n tenant_id=\"acme-corp\",\n tags=[\"legal\", \"procurement\"]\n )\n\n # Medical records for healthcare\n medical_config = OntologyConfig(\n name=\"medical-records-extractor\",\n agent_schema_id=\"medical-parser-v1\",\n description=\"Extract diagnoses and treatments from medical records\",\n mime_type_pattern=\"application/pdf\",\n tag_filter=[\"medical\", \"patient-record\"],\n priority=50,\n enabled=True,\n tenant_id=\"healthsystem\",\n tags=[\"medical\", \"hipaa-compliant\"]\n )\n \n\nThis agent can search the `ontology_configs` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"title": "Name", "type": "string"}, "agent_schema_id": {"title": "Agent Schema Id", "type": "string"}, "description": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Description"}, "mime_type_pattern": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Mime Type Pattern"}, "uri_pattern": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Uri Pattern"}, "tag_filter": {"default": [], "items": {"type": "string"}, "title": "Tag Filter", "type": "array"}, "priority": {"default": 100, "title": "Priority", "type": "integer"}, "enabled": {"default": true, "title": "Enabled", "type": "boolean"}, "provider_name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Provider Name"}, "model_name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Model Name"}}, "required": ["name", "agent_schema_id"], "json_schema_extra": {"table_name": "ontology_configs", "entity_key_field": "id", "embedding_fields": ["description"], "fully_qualified_name": "rem.models.entities.ontology_config.OntologyConfig", "tools": ["search_rem"], "default_search_table": "ontology_configs", "has_embeddings": true}}'::jsonb,
2399
+ 'entity',
2400
+ '{"table_name": "ontology_configs", "entity_key_field": "id", "embedding_fields": ["description"], "fqn": "rem.models.entities.ontology_config.OntologyConfig"}'::jsonb
1501
2401
  )
1502
- RETURNS BIGINT AS $$
1503
- BEGIN
1504
- RETURN (
1505
- SELECT COUNT(m.id)
1506
- FROM messages m
1507
- INNER JOIN shared_sessions ss ON ss.session_id = m.session_id
1508
- AND ss.tenant_id = m.tenant_id
1509
- AND ss.deleted_at IS NULL
1510
- WHERE m.tenant_id = p_tenant_id
1511
- AND ss.shared_with_user_id = p_recipient_user_id
1512
- AND ss.owner_user_id = p_owner_user_id
1513
- AND m.deleted_at IS NULL
1514
- );
1515
- END;
1516
- $$ LANGUAGE plpgsql;
2402
+ ON CONFLICT (id) DO UPDATE SET
2403
+ name = EXCLUDED.name,
2404
+ content = EXCLUDED.content,
2405
+ spec = EXCLUDED.spec,
2406
+ category = EXCLUDED.category,
2407
+ metadata = EXCLUDED.metadata,
2408
+ updated_at = CURRENT_TIMESTAMP;
2409
+
2410
+ -- Schema entry for Resource (resources)
2411
+ INSERT INTO schemas (id, tenant_id, name, content, spec, category, metadata)
2412
+ VALUES (
2413
+ 'a579f379-4f1c-5414-8ff4-1382d0f783b7'::uuid,
2414
+ 'system',
2415
+ 'Resource',
2416
+ '# Resource
2417
+
2418
+
2419
+ Base content unit in REM.
2420
+
2421
+ Resources are content units that feed into dreaming workflows for moment
2422
+ extraction and affinity graph construction. Tenant isolation is provided
2423
+ via CoreModel.tenant_id field.
2424
+
2425
+
2426
+ ## Overview
2427
+
2428
+ The `Resource` entity is stored in the `resources` table. Each record is uniquely
2429
+ identified by its `name` field for lookups and graph traversal.
2430
+
2431
+ ## Search Capabilities
2432
+
2433
+ This schema includes the `search_rem` tool which supports:
2434
+ - **LOOKUP**: O(1) exact match by name (e.g., `LOOKUP "entity-name"`)
2435
+ - **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
2436
+ - **SEARCH**: Semantic vector search on content (e.g., `SEARCH "concept" FROM resources LIMIT 10`)
2437
+ - **SQL**: Complex queries (e.g., `SELECT * FROM resources WHERE ...`)
2438
+
2439
+ ## Table Info
2440
+
2441
+ | Property | Value |
2442
+ |----------|-------|
2443
+ | Table | `resources` |
2444
+ | Entity Key | `name` |
2445
+ | Embedding Fields | `content` |
2446
+ | Tools | `search_rem` |
2447
+
2448
+ ## Fields
2449
+
2450
+ ### `id`
2451
+ - **Type**: `typing.Union[uuid.UUID, str, NoneType]`
2452
+ - **Optional**
2453
+ - Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.
2454
+
2455
+ ### `created_at`
2456
+ - **Type**: `<class ''datetime.datetime''>`
2457
+ - **Optional**
2458
+ - Entity creation timestamp
2459
+
2460
+ ### `updated_at`
2461
+ - **Type**: `<class ''datetime.datetime''>`
2462
+ - **Optional**
2463
+ - Last update timestamp
2464
+
2465
+ ### `deleted_at`
2466
+ - **Type**: `typing.Optional[datetime.datetime]`
2467
+ - **Optional**
2468
+ - Soft deletion timestamp
2469
+
2470
+ ### `tenant_id`
2471
+ - **Type**: `typing.Optional[str]`
2472
+ - **Optional**
2473
+ - Tenant identifier for multi-tenancy isolation
2474
+
2475
+ ### `user_id`
2476
+ - **Type**: `typing.Optional[str]`
2477
+ - **Optional**
2478
+ - Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.
2479
+
2480
+ ### `graph_edges`
2481
+ - **Type**: `list[dict]`
2482
+ - **Optional**
2483
+ - Knowledge graph edges stored as InlineEdge dicts
2484
+
2485
+ ### `metadata`
2486
+ - **Type**: `<class ''dict''>`
2487
+ - **Optional**
2488
+ - Flexible metadata storage
2489
+
2490
+ ### `tags`
2491
+ - **Type**: `list[str]`
2492
+ - **Optional**
2493
+ - Entity tags
2494
+
2495
+ ### `name`
2496
+ - **Type**: `typing.Optional[str]`
2497
+ - **Optional**
2498
+ - Human-readable resource name (used as graph label). Auto-generated from uri+ordinal if not provided.
2499
+
2500
+ ### `uri`
2501
+ - **Type**: `typing.Optional[str]`
2502
+ - **Optional**
2503
+ - Content URI or identifier (file path, URL, etc.)
2504
+
2505
+ ### `ordinal`
2506
+ - **Type**: `<class ''int''>`
2507
+ - **Optional**
2508
+ - Chunk ordinal for splitting large documents (0 for single-chunk resources)
2509
+
2510
+ ### `content`
2511
+ - **Type**: `<class ''str''>`
2512
+ - **Optional**
2513
+ - Resource content text
2514
+
2515
+ ### `timestamp`
2516
+ - **Type**: `<class ''datetime.datetime''>`
2517
+ - **Optional**
2518
+ - Resource timestamp (content creation/publication time)
2519
+
2520
+ ### `category`
2521
+ - **Type**: `typing.Optional[str]`
2522
+ - **Optional**
2523
+ - Resource category (document, conversation, artifact, etc.)
2524
+
2525
+ ### `related_entities`
2526
+ - **Type**: `list[dict]`
2527
+ - **Optional**
2528
+ - Extracted entities (people, projects, concepts) with metadata
2529
+
2530
+ ',
2531
+ '{"type": "object", "description": "\n Base content unit in REM.\n\n Resources are content units that feed into dreaming workflows for moment\n extraction and affinity graph construction. Tenant isolation is provided\n via CoreModel.tenant_id field.\n \n\nThis agent can search the `resources` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Human-readable resource name (used as graph label). Auto-generated from uri+ordinal if not provided.", "entity_key": true, "title": "Name"}, "uri": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Content URI or identifier (file path, URL, etc.)", "title": "Uri"}, "ordinal": {"composite_key": true, "default": 0, "description": "Chunk ordinal for splitting large documents (0 for single-chunk resources)", "title": "Ordinal", "type": "integer"}, "content": {"default": "", "description": "Resource content text", "title": "Content", "type": "string"}, "timestamp": {"description": "Resource timestamp (content creation/publication time)", "format": "date-time", "title": "Timestamp", "type": "string"}, "category": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Resource category (document, conversation, artifact, etc.)", "title": "Category"}, "related_entities": {"description": "Extracted entities (people, projects, concepts) with metadata", "items": {"additionalProperties": true, "type": "object"}, "title": "Related Entities", "type": "array"}}, "required": [], "json_schema_extra": {"table_name": "resources", "entity_key_field": "name", "embedding_fields": ["content"], "fully_qualified_name": "rem.models.entities.resource.Resource", "tools": ["search_rem"], "default_search_table": "resources", "has_embeddings": true}}'::jsonb,
2532
+ 'entity',
2533
+ '{"table_name": "resources", "entity_key_field": "name", "embedding_fields": ["content"], "fqn": "rem.models.entities.resource.Resource"}'::jsonb
2534
+ )
2535
+ ON CONFLICT (id) DO UPDATE SET
2536
+ name = EXCLUDED.name,
2537
+ content = EXCLUDED.content,
2538
+ spec = EXCLUDED.spec,
2539
+ category = EXCLUDED.category,
2540
+ metadata = EXCLUDED.metadata,
2541
+ updated_at = CURRENT_TIMESTAMP;
2542
+
2543
+ -- Schema entry for Schema (schemas)
2544
+ INSERT INTO schemas (id, tenant_id, name, content, spec, category, metadata)
2545
+ VALUES (
2546
+ '2372e956-add6-58b8-a638-758a91a2b6c4'::uuid,
2547
+ 'system',
2548
+ 'Schema',
2549
+ '# Schema
2550
+
2551
+
2552
+ Agent schema definition.
2553
+
2554
+ Schemas define agents that can be dynamically loaded into Pydantic AI.
2555
+ They store JsonSchema specifications with embedded metadata for tools,
2556
+ resources, and system prompts.
2557
+
2558
+ For ontology extraction agents:
2559
+ - `provider_configs` enables multi-provider support (test across Anthropic, OpenAI, etc.)
2560
+ - `embedding_fields` specifies which output fields should be embedded for semantic search
2561
+
2562
+ Tenant isolation is provided via CoreModel.tenant_id field.
2563
+
2564
+
2565
+ ## Overview
2566
+
2567
+ The `Schema` entity is stored in the `schemas` table. Each record is uniquely
2568
+ identified by its `id` field for lookups and graph traversal.
2569
+
2570
+ ## Search Capabilities
2571
+
2572
+ This schema includes the `search_rem` tool which supports:
2573
+ - **LOOKUP**: O(1) exact match by id (e.g., `LOOKUP "entity-name"`)
2574
+ - **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
2575
+ - **SEARCH**: Semantic vector search on content (e.g., `SEARCH "concept" FROM schemas LIMIT 10`)
2576
+ - **SQL**: Complex queries (e.g., `SELECT * FROM schemas WHERE ...`)
2577
+
2578
+ ## Table Info
2579
+
2580
+ | Property | Value |
2581
+ |----------|-------|
2582
+ | Table | `schemas` |
2583
+ | Entity Key | `id` |
2584
+ | Embedding Fields | `content` |
2585
+ | Tools | `search_rem` |
2586
+
2587
+ ## Fields
2588
+
2589
+ ### `id`
2590
+ - **Type**: `typing.Union[uuid.UUID, str, NoneType]`
2591
+ - **Optional**
2592
+ - Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.
2593
+
2594
+ ### `created_at`
2595
+ - **Type**: `<class ''datetime.datetime''>`
2596
+ - **Optional**
2597
+ - Entity creation timestamp
2598
+
2599
+ ### `updated_at`
2600
+ - **Type**: `<class ''datetime.datetime''>`
2601
+ - **Optional**
2602
+ - Last update timestamp
2603
+
2604
+ ### `deleted_at`
2605
+ - **Type**: `typing.Optional[datetime.datetime]`
2606
+ - **Optional**
2607
+ - Soft deletion timestamp
2608
+
2609
+ ### `tenant_id`
2610
+ - **Type**: `typing.Optional[str]`
2611
+ - **Optional**
2612
+ - Tenant identifier for multi-tenancy isolation
2613
+
2614
+ ### `user_id`
2615
+ - **Type**: `typing.Optional[str]`
2616
+ - **Optional**
2617
+ - Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.
2618
+
2619
+ ### `graph_edges`
2620
+ - **Type**: `list[dict]`
2621
+ - **Optional**
2622
+ - Knowledge graph edges stored as InlineEdge dicts
2623
+
2624
+ ### `metadata`
2625
+ - **Type**: `<class ''dict''>`
2626
+ - **Optional**
2627
+ - Flexible metadata storage
2628
+
2629
+ ### `tags`
2630
+ - **Type**: `list[str]`
2631
+ - **Optional**
2632
+ - Entity tags
2633
+
2634
+ ### `name`
2635
+ - **Type**: `<class ''str''>`
2636
+ - **Required**
2637
+ - Human-readable schema name (used as identifier)
2638
+
2639
+ ### `content`
2640
+ - **Type**: `<class ''str''>`
2641
+ - **Optional**
2642
+ - Markdown documentation and instructions for the schema
2643
+
2644
+ ### `spec`
2645
+ - **Type**: `<class ''dict''>`
2646
+ - **Required**
2647
+ - JsonSchema specification defining the agent structure and capabilities
2648
+
2649
+ ### `category`
2650
+ - **Type**: `typing.Optional[str]`
2651
+ - **Optional**
2652
+ - Schema category distinguishing schema types. Values: ''agent'' (AI agents), ''evaluator'' (LLM-as-a-Judge evaluators). Maps directly from json_schema_extra.kind field during ingestion.
2653
+
2654
+ ### `provider_configs`
2655
+ - **Type**: `list[dict]`
2656
+ - **Optional**
2657
+ - Optional provider configurations for multi-provider testing. Each dict has ''provider_name'' and ''model_name''. Example: [{''provider_name'': ''anthropic'', ''model_name'': ''claude-sonnet-4-5''}]
2658
+
2659
+ ### `embedding_fields`
2660
+ - **Type**: `list[str]`
2661
+ - **Optional**
2662
+ - JSON paths in extracted_data to embed for semantic search. Example: [''summary'', ''candidate_name'', ''skills''] for CV extraction. Values will be concatenated and embedded using configured embedding provider.
2663
+
2664
+ ',
2665
+ '{"type": "object", "description": "\n Agent schema definition.\n\n Schemas define agents that can be dynamically loaded into Pydantic AI.\n They store JsonSchema specifications with embedded metadata for tools,\n resources, and system prompts.\n\n For ontology extraction agents:\n - `provider_configs` enables multi-provider support (test across Anthropic, OpenAI, etc.)\n - `embedding_fields` specifies which output fields should be embedded for semantic search\n\n Tenant isolation is provided via CoreModel.tenant_id field.\n \n\nThis agent can search the `schemas` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"description": "Human-readable schema name (used as identifier)", "title": "Name", "type": "string"}, "content": {"default": "", "description": "Markdown documentation and instructions for the schema", "title": "Content", "type": "string"}, "spec": {"additionalProperties": true, "description": "JsonSchema specification defining the agent structure and capabilities", "title": "Spec", "type": "object"}, "category": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Schema category distinguishing schema types. Values: ''agent'' (AI agents), ''evaluator'' (LLM-as-a-Judge evaluators). Maps directly from json_schema_extra.kind field during ingestion.", "title": "Category"}, "provider_configs": {"description": "Optional provider configurations for multi-provider testing. Each dict has ''provider_name'' and ''model_name''. Example: [{''provider_name'': ''anthropic'', ''model_name'': ''claude-sonnet-4-5''}]", "items": {"additionalProperties": true, "type": "object"}, "title": "Provider Configs", "type": "array"}, "embedding_fields": {"description": "JSON paths in extracted_data to embed for semantic search. Example: [''summary'', ''candidate_name'', ''skills''] for CV extraction. Values will be concatenated and embedded using configured embedding provider.", "items": {"type": "string"}, "title": "Embedding Fields", "type": "array"}}, "required": ["name", "spec"], "json_schema_extra": {"table_name": "schemas", "entity_key_field": "id", "embedding_fields": ["content"], "fully_qualified_name": "rem.models.entities.schema.Schema", "tools": ["search_rem"], "default_search_table": "schemas", "has_embeddings": true}}'::jsonb,
2666
+ 'entity',
2667
+ '{"table_name": "schemas", "entity_key_field": "id", "embedding_fields": ["content"], "fqn": "rem.models.entities.schema.Schema"}'::jsonb
2668
+ )
2669
+ ON CONFLICT (id) DO UPDATE SET
2670
+ name = EXCLUDED.name,
2671
+ content = EXCLUDED.content,
2672
+ spec = EXCLUDED.spec,
2673
+ category = EXCLUDED.category,
2674
+ metadata = EXCLUDED.metadata,
2675
+ updated_at = CURRENT_TIMESTAMP;
2676
+
2677
+ -- Schema entry for Session (sessions)
2678
+ INSERT INTO schemas (id, tenant_id, name, content, spec, category, metadata)
2679
+ VALUES (
2680
+ '5893fbca-2d8e-5402-ac41-7bac2c0c472a'::uuid,
2681
+ 'system',
2682
+ 'Session',
2683
+ '# Session
2684
+
2685
+
2686
+ Conversation session container.
2687
+
2688
+ Groups messages together and supports different modes for normal conversations
2689
+ and evaluation/experimentation scenarios.
2690
+
2691
+ For evaluation sessions, stores:
2692
+ - original_trace_id: Reference to the original session being evaluated
2693
+ - settings_overrides: Model, temperature, prompt overrides
2694
+ - prompt: Custom prompt being tested
2695
+
2696
+ Default sessions are lightweight - just a session_id on messages.
2697
+ Special sessions store additional metadata for experiments.
2698
+
2699
+
2700
+ ## Overview
2701
+
2702
+ The `Session` entity is stored in the `sessions` table. Each record is uniquely
2703
+ identified by its `name` field for lookups and graph traversal.
2704
+
2705
+ ## Search Capabilities
2706
+
2707
+ This schema includes the `search_rem` tool which supports:
2708
+ - **LOOKUP**: O(1) exact match by name (e.g., `LOOKUP "entity-name"`)
2709
+ - **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
2710
+ - **SEARCH**: Semantic vector search on description (e.g., `SEARCH "concept" FROM sessions LIMIT 10`)
2711
+ - **SQL**: Complex queries (e.g., `SELECT * FROM sessions WHERE ...`)
2712
+
2713
+ ## Table Info
2714
+
2715
+ | Property | Value |
2716
+ |----------|-------|
2717
+ | Table | `sessions` |
2718
+ | Entity Key | `name` |
2719
+ | Embedding Fields | `description` |
2720
+ | Tools | `search_rem` |
2721
+
2722
+ ## Fields
2723
+
2724
+ ### `id`
2725
+ - **Type**: `typing.Union[uuid.UUID, str, NoneType]`
2726
+ - **Optional**
2727
+ - Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.
2728
+
2729
+ ### `created_at`
2730
+ - **Type**: `<class ''datetime.datetime''>`
2731
+ - **Optional**
2732
+ - Entity creation timestamp
2733
+
2734
+ ### `updated_at`
2735
+ - **Type**: `<class ''datetime.datetime''>`
2736
+ - **Optional**
2737
+ - Last update timestamp
2738
+
2739
+ ### `deleted_at`
2740
+ - **Type**: `typing.Optional[datetime.datetime]`
2741
+ - **Optional**
2742
+ - Soft deletion timestamp
2743
+
2744
+ ### `tenant_id`
2745
+ - **Type**: `typing.Optional[str]`
2746
+ - **Optional**
2747
+ - Tenant identifier for multi-tenancy isolation
2748
+
2749
+ ### `user_id`
2750
+ - **Type**: `typing.Optional[str]`
2751
+ - **Optional**
2752
+ - Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.
2753
+
2754
+ ### `graph_edges`
2755
+ - **Type**: `list[dict]`
2756
+ - **Optional**
2757
+ - Knowledge graph edges stored as InlineEdge dicts
2758
+
2759
+ ### `metadata`
2760
+ - **Type**: `<class ''dict''>`
2761
+ - **Optional**
2762
+ - Flexible metadata storage
2763
+
2764
+ ### `tags`
2765
+ - **Type**: `list[str]`
2766
+ - **Optional**
2767
+ - Entity tags
2768
+
2769
+ ### `name`
2770
+ - **Type**: `<class ''str''>`
2771
+ - **Required**
2772
+ - Session name/identifier
2773
+
2774
+ ### `mode`
2775
+ - **Type**: `<enum ''SessionMode''>`
2776
+ - **Optional**
2777
+ - Session mode: ''normal'' or ''evaluation''
2778
+
2779
+ ### `description`
2780
+ - **Type**: `str | None`
2781
+ - **Optional**
2782
+ - Optional session description
2783
+
2784
+ ### `original_trace_id`
2785
+ - **Type**: `str | None`
2786
+ - **Optional**
2787
+ - For evaluation mode: ID of the original session/trace being evaluated
2788
+
2789
+ ### `settings_overrides`
2790
+ - **Type**: `dict | None`
2791
+ - **Optional**
2792
+ - Settings overrides (model, temperature, max_tokens, system_prompt)
2793
+
2794
+ ### `prompt`
2795
+ - **Type**: `str | None`
2796
+ - **Optional**
2797
+ - Custom prompt for this session (can override agent prompt)
2798
+
2799
+ ### `agent_schema_uri`
2800
+ - **Type**: `str | None`
2801
+ - **Optional**
2802
+ - Agent schema used for this session
2803
+
2804
+ ### `message_count`
2805
+ - **Type**: `<class ''int''>`
2806
+ - **Optional**
2807
+ - Number of messages in this session
2808
+
2809
+ ### `total_tokens`
2810
+ - **Type**: `int | None`
2811
+ - **Optional**
2812
+ - Total tokens used in this session
2813
+
2814
+ ',
2815
+ '{"type": "object", "description": "\n Conversation session container.\n\n Groups messages together and supports different modes for normal conversations\n and evaluation/experimentation scenarios.\n\n For evaluation sessions, stores:\n - original_trace_id: Reference to the original session being evaluated\n - settings_overrides: Model, temperature, prompt overrides\n - prompt: Custom prompt being tested\n\n Default sessions are lightweight - just a session_id on messages.\n Special sessions store additional metadata for experiments.\n \n\nThis agent can search the `sessions` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"description": "Session name/identifier", "entity_key": true, "title": "Name", "type": "string"}, "mode": {"$ref": "#/$defs/SessionMode", "default": "normal", "description": "Session mode: ''normal'' or ''evaluation''"}, "description": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Optional session description", "title": "Description"}, "original_trace_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "For evaluation mode: ID of the original session/trace being evaluated", "title": "Original Trace Id"}, "settings_overrides": {"anyOf": [{"additionalProperties": true, "type": "object"}, {"type": "null"}], "default": null, "description": "Settings overrides (model, temperature, max_tokens, system_prompt)", "title": "Settings Overrides"}, "prompt": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Custom prompt for this session (can override agent prompt)", "title": "Prompt"}, "agent_schema_uri": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Agent schema used for this session", "title": "Agent Schema Uri"}, "message_count": {"default": 0, "description": "Number of messages in this session", "title": "Message Count", "type": "integer"}, "total_tokens": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "description": "Total tokens used in this session", "title": "Total Tokens"}}, "required": ["name"], "json_schema_extra": {"table_name": "sessions", "entity_key_field": "name", "embedding_fields": ["description"], "fully_qualified_name": "rem.models.entities.session.Session", "tools": ["search_rem"], "default_search_table": "sessions", "has_embeddings": true}}'::jsonb,
2816
+ 'entity',
2817
+ '{"table_name": "sessions", "entity_key_field": "name", "embedding_fields": ["description"], "fqn": "rem.models.entities.session.Session"}'::jsonb
2818
+ )
2819
+ ON CONFLICT (id) DO UPDATE SET
2820
+ name = EXCLUDED.name,
2821
+ content = EXCLUDED.content,
2822
+ spec = EXCLUDED.spec,
2823
+ category = EXCLUDED.category,
2824
+ metadata = EXCLUDED.metadata,
2825
+ updated_at = CURRENT_TIMESTAMP;
2826
+
2827
+ -- Schema entry for SharedSession (shared_sessions)
2828
+ INSERT INTO schemas (id, tenant_id, name, content, spec, category, metadata)
2829
+ VALUES (
2830
+ 'be5c5711-6c45-5fc4-9cd1-e076599261c7'::uuid,
2831
+ 'system',
2832
+ 'SharedSession',
2833
+ '# SharedSession
2834
+
2835
+
2836
+ Session sharing record between users.
2837
+
2838
+ Links a session (identified by session_id from Message records) to a
2839
+ recipient user, enabling collaborative access to conversation history.
2840
+
2841
+
2842
+ ## Overview
2843
+
2844
+ The `SharedSession` entity is stored in the `shared_sessions` table. Each record is uniquely
2845
+ identified by its `id` field for lookups and graph traversal.
2846
+
2847
+ ## Search Capabilities
2848
+
2849
+ This schema includes the `search_rem` tool which supports:
2850
+ - **LOOKUP**: O(1) exact match by id (e.g., `LOOKUP "entity-name"`)
2851
+ - **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
2852
+ - **SEARCH**: Semantic vector search on content (e.g., `SEARCH "concept" FROM shared_sessions LIMIT 10`)
2853
+ - **SQL**: Complex queries (e.g., `SELECT * FROM shared_sessions WHERE ...`)
2854
+
2855
+ ## Table Info
2856
+
2857
+ | Property | Value |
2858
+ |----------|-------|
2859
+ | Table | `shared_sessions` |
2860
+ | Entity Key | `id` |
2861
+ | Embedding Fields | None |
2862
+ | Tools | `search_rem` |
2863
+
2864
+ ## Fields
2865
+
2866
+ ### `id`
2867
+ - **Type**: `typing.Union[uuid.UUID, str, NoneType]`
2868
+ - **Optional**
2869
+ - Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.
2870
+
2871
+ ### `created_at`
2872
+ - **Type**: `<class ''datetime.datetime''>`
2873
+ - **Optional**
2874
+ - Entity creation timestamp
2875
+
2876
+ ### `updated_at`
2877
+ - **Type**: `<class ''datetime.datetime''>`
2878
+ - **Optional**
2879
+ - Last update timestamp
2880
+
2881
+ ### `deleted_at`
2882
+ - **Type**: `typing.Optional[datetime.datetime]`
2883
+ - **Optional**
2884
+ - Soft deletion timestamp
2885
+
2886
+ ### `tenant_id`
2887
+ - **Type**: `typing.Optional[str]`
2888
+ - **Optional**
2889
+ - Tenant identifier for multi-tenancy isolation
2890
+
2891
+ ### `user_id`
2892
+ - **Type**: `typing.Optional[str]`
2893
+ - **Optional**
2894
+ - Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.
2895
+
2896
+ ### `graph_edges`
2897
+ - **Type**: `list[dict]`
2898
+ - **Optional**
2899
+ - Knowledge graph edges stored as InlineEdge dicts
2900
+
2901
+ ### `metadata`
2902
+ - **Type**: `<class ''dict''>`
2903
+ - **Optional**
2904
+ - Flexible metadata storage
2905
+
2906
+ ### `tags`
2907
+ - **Type**: `list[str]`
2908
+ - **Optional**
2909
+ - Entity tags
2910
+
2911
+ ### `session_id`
2912
+ - **Type**: `<class ''str''>`
2913
+ - **Required**
2914
+ - The session being shared (matches Message.session_id)
2915
+
2916
+ ### `owner_user_id`
2917
+ - **Type**: `<class ''str''>`
2918
+ - **Required**
2919
+ - User ID of the session owner (the sharer)
2920
+
2921
+ ### `shared_with_user_id`
2922
+ - **Type**: `<class ''str''>`
2923
+ - **Required**
2924
+ - User ID of the recipient (who can now view the session)
2925
+
2926
+ ',
2927
+ '{"type": "object", "description": "\n Session sharing record between users.\n\n Links a session (identified by session_id from Message records) to a\n recipient user, enabling collaborative access to conversation history.\n \n\nThis agent can search the `shared_sessions` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "session_id": {"description": "The session being shared (matches Message.session_id)", "title": "Session Id", "type": "string"}, "owner_user_id": {"description": "User ID of the session owner (the sharer)", "title": "Owner User Id", "type": "string"}, "shared_with_user_id": {"description": "User ID of the recipient (who can now view the session)", "title": "Shared With User Id", "type": "string"}}, "required": ["session_id", "owner_user_id", "shared_with_user_id"], "json_schema_extra": {"table_name": "shared_sessions", "entity_key_field": "id", "embedding_fields": [], "fully_qualified_name": "rem.models.entities.shared_session.SharedSession", "tools": ["search_rem"], "default_search_table": "shared_sessions", "has_embeddings": false}}'::jsonb,
2928
+ 'entity',
2929
+ '{"table_name": "shared_sessions", "entity_key_field": "id", "embedding_fields": [], "fqn": "rem.models.entities.shared_session.SharedSession"}'::jsonb
2930
+ )
2931
+ ON CONFLICT (id) DO UPDATE SET
2932
+ name = EXCLUDED.name,
2933
+ content = EXCLUDED.content,
2934
+ spec = EXCLUDED.spec,
2935
+ category = EXCLUDED.category,
2936
+ metadata = EXCLUDED.metadata,
2937
+ updated_at = CURRENT_TIMESTAMP;
2938
+
2939
+ -- Schema entry for User (users)
2940
+ INSERT INTO schemas (id, tenant_id, name, content, spec, category, metadata)
2941
+ VALUES (
2942
+ '1ad3d95e-32e9-54d6-ad7d-e39b9ed5018b'::uuid,
2943
+ 'system',
2944
+ 'User',
2945
+ '# User
2946
+
2947
+
2948
+ User entity.
2949
+
2950
+ Represents people in the REM system, either as active users
2951
+ or entities extracted from content. Tenant isolation is provided
2952
+ via CoreModel.tenant_id field.
2953
+
2954
+ Enhanced by dreaming worker:
2955
+ - summary: Generated from activity analysis
2956
+ - interests: Extracted from resources and sessions
2957
+ - activity_level: Computed from recent engagement
2958
+ - preferred_topics: Extracted from moment/resource topics
2959
+
2960
+
2961
+ ## Overview
2962
+
2963
+ The `User` entity is stored in the `users` table. Each record is uniquely
2964
+ identified by its `name` field for lookups and graph traversal.
2965
+
2966
+ ## Search Capabilities
2967
+
2968
+ This schema includes the `search_rem` tool which supports:
2969
+ - **LOOKUP**: O(1) exact match by name (e.g., `LOOKUP "entity-name"`)
2970
+ - **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
2971
+ - **SEARCH**: Semantic vector search on summary (e.g., `SEARCH "concept" FROM users LIMIT 10`)
2972
+ - **SQL**: Complex queries (e.g., `SELECT * FROM users WHERE ...`)
2973
+
2974
+ ## Table Info
2975
+
2976
+ | Property | Value |
2977
+ |----------|-------|
2978
+ | Table | `users` |
2979
+ | Entity Key | `name` |
2980
+ | Embedding Fields | `summary` |
2981
+ | Tools | `search_rem` |
2982
+
2983
+ ## Fields
2984
+
2985
+ ### `id`
2986
+ - **Type**: `typing.Union[uuid.UUID, str, NoneType]`
2987
+ - **Optional**
2988
+ - Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.
2989
+
2990
+ ### `created_at`
2991
+ - **Type**: `<class ''datetime.datetime''>`
2992
+ - **Optional**
2993
+ - Entity creation timestamp
2994
+
2995
+ ### `updated_at`
2996
+ - **Type**: `<class ''datetime.datetime''>`
2997
+ - **Optional**
2998
+ - Last update timestamp
2999
+
3000
+ ### `deleted_at`
3001
+ - **Type**: `typing.Optional[datetime.datetime]`
3002
+ - **Optional**
3003
+ - Soft deletion timestamp
3004
+
3005
+ ### `tenant_id`
3006
+ - **Type**: `typing.Optional[str]`
3007
+ - **Optional**
3008
+ - Tenant identifier for multi-tenancy isolation
3009
+
3010
+ ### `user_id`
3011
+ - **Type**: `typing.Optional[str]`
3012
+ - **Optional**
3013
+ - Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.
3014
+
3015
+ ### `graph_edges`
3016
+ - **Type**: `list[dict]`
3017
+ - **Optional**
3018
+ - Knowledge graph edges stored as InlineEdge dicts
3019
+
3020
+ ### `metadata`
3021
+ - **Type**: `<class ''dict''>`
3022
+ - **Optional**
3023
+ - Flexible metadata storage
3024
+
3025
+ ### `tags`
3026
+ - **Type**: `list[str]`
3027
+ - **Optional**
3028
+ - Entity tags
3029
+
3030
+ ### `name`
3031
+ - **Type**: `<class ''str''>`
3032
+ - **Required**
3033
+ - User name (human-readable, used as graph label)
3034
+
3035
+ ### `email`
3036
+ - **Type**: `typing.Optional[str]`
3037
+ - **Optional**
3038
+ - User email address
3039
+
3040
+ ### `role`
3041
+ - **Type**: `typing.Optional[str]`
3042
+ - **Optional**
3043
+ - User role (employee, contractor, external, etc.)
3044
+
3045
+ ### `tier`
3046
+ - **Type**: `<enum ''UserTier''>`
3047
+ - **Optional**
3048
+ - User subscription tier (free, basic, pro) for feature gating
3049
+
3050
+ ### `anonymous_ids`
3051
+ - **Type**: `list[str]`
3052
+ - **Optional**
3053
+ - Linked anonymous session IDs used for merging history
3054
+
3055
+ ### `sec_policy`
3056
+ - **Type**: `<class ''dict''>`
3057
+ - **Optional**
3058
+ - Security policy configuration (JSON, extensible for custom policies)
3059
+
3060
+ ### `summary`
3061
+ - **Type**: `typing.Optional[str]`
3062
+ - **Optional**
3063
+ - LLM-generated user profile summary (updated by dreaming worker)
3064
+
3065
+ ### `interests`
3066
+ - **Type**: `list[str]`
3067
+ - **Optional**
3068
+ - User interests extracted from activity
3069
+
3070
+ ### `preferred_topics`
3071
+ - **Type**: `list[str]`
3072
+ - **Optional**
3073
+ - Frequently discussed topics in kebab-case
3074
+
3075
+ ### `activity_level`
3076
+ - **Type**: `typing.Optional[str]`
3077
+ - **Optional**
3078
+ - Activity level: active, moderate, inactive
3079
+
3080
+ ### `last_active_at`
3081
+ - **Type**: `typing.Optional[datetime.datetime]`
3082
+ - **Optional**
3083
+ - Last activity timestamp
3084
+
3085
+ ',
3086
+ '{"type": "object", "description": "\n User entity.\n\n Represents people in the REM system, either as active users\n or entities extracted from content. Tenant isolation is provided\n via CoreModel.tenant_id field.\n\n Enhanced by dreaming worker:\n - summary: Generated from activity analysis\n - interests: Extracted from resources and sessions\n - activity_level: Computed from recent engagement\n - preferred_topics: Extracted from moment/resource topics\n \n\nThis agent can search the `users` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"description": "User name (human-readable, used as graph label)", "entity_key": true, "title": "Name", "type": "string"}, "email": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "User email address", "title": "Email"}, "role": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "User role (employee, contractor, external, etc.)", "title": "Role"}, "tier": {"$ref": "#/$defs/UserTier", "default": "free", "description": "User subscription tier (free, basic, pro) for feature gating"}, "anonymous_ids": {"description": "Linked anonymous session IDs used for merging history", "items": {"type": "string"}, "title": "Anonymous Ids", "type": "array"}, "sec_policy": {"additionalProperties": true, "description": "Security policy configuration (JSON, extensible for custom policies)", "title": "Sec Policy", "type": "object"}, "summary": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "LLM-generated user profile summary (updated by dreaming worker)", "title": "Summary"}, "interests": {"description": "User interests extracted from activity", "items": {"type": "string"}, "title": "Interests", "type": "array"}, "preferred_topics": {"description": "Frequently discussed topics in kebab-case", "items": {"type": "string"}, "title": "Preferred Topics", "type": "array"}, "activity_level": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Activity level: active, moderate, inactive", "title": "Activity Level"}, "last_active_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Last activity timestamp", "title": "Last Active At"}}, "required": ["name"], "json_schema_extra": {"table_name": "users", "entity_key_field": "name", "embedding_fields": ["summary"], "fully_qualified_name": "rem.models.entities.user.User", "tools": ["search_rem"], "default_search_table": "users", "has_embeddings": true}}'::jsonb,
3087
+ 'entity',
3088
+ '{"table_name": "users", "entity_key_field": "name", "embedding_fields": ["summary"], "fqn": "rem.models.entities.user.User"}'::jsonb
3089
+ )
3090
+ ON CONFLICT (id) DO UPDATE SET
3091
+ name = EXCLUDED.name,
3092
+ content = EXCLUDED.content,
3093
+ spec = EXCLUDED.spec,
3094
+ category = EXCLUDED.category,
3095
+ metadata = EXCLUDED.metadata,
3096
+ updated_at = CURRENT_TIMESTAMP;
1517
3097
 
1518
3098
  -- ============================================================================
1519
3099
  -- RECORD MIGRATION
@@ -1528,9 +3108,8 @@ SET applied_at = CURRENT_TIMESTAMP,
1528
3108
  DO $$
1529
3109
  BEGIN
1530
3110
  RAISE NOTICE '============================================================';
1531
- RAISE NOTICE 'REM Model Schema Applied: 14 tables';
3111
+ RAISE NOTICE 'REM Model Schema Applied: 12 tables';
1532
3112
  RAISE NOTICE '============================================================';
1533
- RAISE NOTICE ' ✓ domain_resources (1 embeddable fields)';
1534
3113
  RAISE NOTICE ' ✓ feedbacks';
1535
3114
  RAISE NOTICE ' ✓ files (1 embeddable fields)';
1536
3115
  RAISE NOTICE ' ✓ image_resources (1 embeddable fields)';
@@ -1538,11 +3117,10 @@ BEGIN
1538
3117
  RAISE NOTICE ' ✓ moments (1 embeddable fields)';
1539
3118
  RAISE NOTICE ' ✓ ontologies';
1540
3119
  RAISE NOTICE ' ✓ ontology_configs (1 embeddable fields)';
1541
- RAISE NOTICE ' ✓ persons';
1542
3120
  RAISE NOTICE ' ✓ resources (1 embeddable fields)';
1543
3121
  RAISE NOTICE ' ✓ schemas (1 embeddable fields)';
1544
3122
  RAISE NOTICE ' ✓ sessions (1 embeddable fields)';
1545
- RAISE NOTICE ' ✓ shared_sessions (session sharing)';
3123
+ RAISE NOTICE ' ✓ shared_sessions';
1546
3124
  RAISE NOTICE ' ✓ users (1 embeddable fields)';
1547
3125
  RAISE NOTICE '';
1548
3126
  RAISE NOTICE 'Next: Run background indexes if needed';