remdb 0.2.6__py3-none-any.whl → 0.3.118__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of remdb might be problematic. Click here for more details.
- rem/__init__.py +129 -2
- rem/agentic/README.md +76 -0
- rem/agentic/__init__.py +15 -0
- rem/agentic/agents/__init__.py +16 -2
- rem/agentic/agents/sse_simulator.py +500 -0
- rem/agentic/context.py +28 -22
- rem/agentic/llm_provider_models.py +301 -0
- rem/agentic/mcp/tool_wrapper.py +29 -3
- rem/agentic/otel/setup.py +92 -4
- rem/agentic/providers/phoenix.py +32 -43
- rem/agentic/providers/pydantic_ai.py +168 -24
- rem/agentic/schema.py +358 -21
- rem/agentic/tools/rem_tools.py +3 -3
- rem/api/README.md +238 -1
- rem/api/deps.py +255 -0
- rem/api/main.py +154 -37
- rem/api/mcp_router/resources.py +1 -1
- rem/api/mcp_router/server.py +26 -5
- rem/api/mcp_router/tools.py +454 -7
- rem/api/middleware/tracking.py +172 -0
- rem/api/routers/admin.py +494 -0
- rem/api/routers/auth.py +124 -0
- rem/api/routers/chat/completions.py +152 -16
- rem/api/routers/chat/models.py +7 -3
- rem/api/routers/chat/sse_events.py +526 -0
- rem/api/routers/chat/streaming.py +608 -45
- rem/api/routers/dev.py +81 -0
- rem/api/routers/feedback.py +148 -0
- rem/api/routers/messages.py +473 -0
- rem/api/routers/models.py +78 -0
- rem/api/routers/query.py +360 -0
- rem/api/routers/shared_sessions.py +406 -0
- rem/auth/middleware.py +126 -27
- rem/cli/commands/README.md +237 -64
- rem/cli/commands/ask.py +15 -11
- rem/cli/commands/cluster.py +1300 -0
- rem/cli/commands/configure.py +170 -97
- rem/cli/commands/db.py +396 -139
- rem/cli/commands/experiments.py +278 -96
- rem/cli/commands/process.py +22 -15
- rem/cli/commands/scaffold.py +47 -0
- rem/cli/commands/schema.py +97 -50
- rem/cli/main.py +37 -6
- rem/config.py +2 -2
- rem/models/core/core_model.py +7 -1
- rem/models/core/rem_query.py +5 -2
- rem/models/entities/__init__.py +21 -0
- rem/models/entities/domain_resource.py +38 -0
- rem/models/entities/feedback.py +123 -0
- rem/models/entities/message.py +30 -1
- rem/models/entities/session.py +83 -0
- rem/models/entities/shared_session.py +180 -0
- rem/models/entities/user.py +10 -3
- rem/registry.py +373 -0
- rem/schemas/agents/rem.yaml +7 -3
- rem/services/content/providers.py +94 -140
- rem/services/content/service.py +115 -24
- rem/services/dreaming/affinity_service.py +2 -16
- rem/services/dreaming/moment_service.py +2 -15
- rem/services/embeddings/api.py +24 -17
- rem/services/embeddings/worker.py +16 -16
- rem/services/phoenix/EXPERIMENT_DESIGN.md +3 -3
- rem/services/phoenix/client.py +252 -19
- rem/services/postgres/README.md +159 -15
- rem/services/postgres/__init__.py +2 -1
- rem/services/postgres/diff_service.py +531 -0
- rem/services/postgres/pydantic_to_sqlalchemy.py +427 -129
- rem/services/postgres/repository.py +132 -0
- rem/services/postgres/schema_generator.py +291 -9
- rem/services/postgres/service.py +6 -6
- rem/services/rate_limit.py +113 -0
- rem/services/rem/README.md +14 -0
- rem/services/rem/parser.py +44 -9
- rem/services/rem/service.py +36 -2
- rem/services/session/compression.py +17 -1
- rem/services/session/reload.py +1 -1
- rem/services/user_service.py +98 -0
- rem/settings.py +169 -22
- rem/sql/background_indexes.sql +21 -16
- rem/sql/migrations/001_install.sql +387 -54
- rem/sql/migrations/002_install_models.sql +2320 -393
- rem/sql/migrations/003_optional_extensions.sql +326 -0
- rem/sql/migrations/004_cache_system.sql +548 -0
- rem/utils/__init__.py +18 -0
- rem/utils/constants.py +97 -0
- rem/utils/date_utils.py +228 -0
- rem/utils/embeddings.py +17 -4
- rem/utils/files.py +167 -0
- rem/utils/mime_types.py +158 -0
- rem/utils/model_helpers.py +156 -1
- rem/utils/schema_loader.py +284 -21
- rem/utils/sql_paths.py +146 -0
- rem/utils/sql_types.py +3 -1
- rem/utils/vision.py +9 -14
- rem/workers/README.md +14 -14
- rem/workers/__init__.py +2 -1
- rem/workers/db_maintainer.py +74 -0
- rem/workers/unlogged_maintainer.py +463 -0
- {remdb-0.2.6.dist-info → remdb-0.3.118.dist-info}/METADATA +598 -171
- {remdb-0.2.6.dist-info → remdb-0.3.118.dist-info}/RECORD +102 -73
- {remdb-0.2.6.dist-info → remdb-0.3.118.dist-info}/WHEEL +1 -1
- rem/sql/002_install_models.sql +0 -1068
- rem/sql/install_models.sql +0 -1038
- {remdb-0.2.6.dist-info → remdb-0.3.118.dist-info}/entry_points.txt +0 -0
|
@@ -1,15 +1,16 @@
|
|
|
1
1
|
-- REM Model Schema (install_models.sql)
|
|
2
2
|
-- Generated from Pydantic models
|
|
3
|
-
-- Source
|
|
4
|
-
-- Generated at: 2025-11-
|
|
3
|
+
-- Source: model registry
|
|
4
|
+
-- Generated at: 2025-11-29T18:45:11.372432
|
|
5
5
|
--
|
|
6
|
-
-- DO NOT EDIT MANUALLY - Regenerate with: rem schema generate
|
|
6
|
+
-- DO NOT EDIT MANUALLY - Regenerate with: rem db schema generate
|
|
7
7
|
--
|
|
8
8
|
-- This script creates:
|
|
9
9
|
-- 1. Primary entity tables
|
|
10
10
|
-- 2. Embeddings tables (embeddings_<table>)
|
|
11
11
|
-- 3. KV_STORE triggers for cache maintenance
|
|
12
12
|
-- 4. Indexes (foreground only, background indexes separate)
|
|
13
|
+
-- 5. Schema table entries (for agent-like table access)
|
|
13
14
|
|
|
14
15
|
-- ============================================================================
|
|
15
16
|
-- PREREQUISITES CHECK
|
|
@@ -19,34 +20,113 @@ DO $$
|
|
|
19
20
|
BEGIN
|
|
20
21
|
-- Check that install.sql has been run
|
|
21
22
|
IF NOT EXISTS (SELECT 1 FROM pg_tables WHERE tablename = 'kv_store') THEN
|
|
22
|
-
RAISE EXCEPTION 'KV_STORE table not found. Run
|
|
23
|
+
RAISE EXCEPTION 'KV_STORE table not found. Run migrations/001_install.sql first.';
|
|
23
24
|
END IF;
|
|
24
25
|
|
|
25
26
|
IF NOT EXISTS (SELECT 1 FROM pg_extension WHERE extname = 'vector') THEN
|
|
26
|
-
RAISE EXCEPTION 'pgvector extension not found. Run
|
|
27
|
+
RAISE EXCEPTION 'pgvector extension not found. Run migrations/001_install.sql first.';
|
|
27
28
|
END IF;
|
|
28
29
|
|
|
29
30
|
RAISE NOTICE 'Prerequisites check passed';
|
|
30
31
|
END $$;
|
|
31
32
|
|
|
32
33
|
-- ======================================================================
|
|
33
|
-
--
|
|
34
|
+
-- FEEDBACKS (Model: Feedback)
|
|
34
35
|
-- ======================================================================
|
|
35
36
|
|
|
36
|
-
CREATE TABLE IF NOT EXISTS
|
|
37
|
+
CREATE TABLE IF NOT EXISTS feedbacks (
|
|
38
|
+
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
39
|
+
tenant_id VARCHAR(100) NOT NULL,
|
|
40
|
+
user_id VARCHAR(256),
|
|
41
|
+
session_id VARCHAR(256) NOT NULL,
|
|
42
|
+
message_id VARCHAR(256),
|
|
43
|
+
rating INTEGER,
|
|
44
|
+
categories TEXT[] DEFAULT ARRAY[]::TEXT[],
|
|
45
|
+
comment TEXT,
|
|
46
|
+
trace_id VARCHAR(256),
|
|
47
|
+
span_id VARCHAR(256),
|
|
48
|
+
phoenix_synced BOOLEAN,
|
|
49
|
+
phoenix_annotation_id VARCHAR(256),
|
|
50
|
+
annotator_kind VARCHAR(256),
|
|
51
|
+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
52
|
+
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
53
|
+
deleted_at TIMESTAMP,
|
|
54
|
+
graph_edges JSONB DEFAULT '[]'::jsonb,
|
|
55
|
+
metadata JSONB DEFAULT '{}'::jsonb,
|
|
56
|
+
tags TEXT[] DEFAULT ARRAY[]::TEXT[]
|
|
57
|
+
);
|
|
58
|
+
|
|
59
|
+
CREATE INDEX idx_feedbacks_tenant ON feedbacks (tenant_id);
|
|
60
|
+
CREATE INDEX idx_feedbacks_user ON feedbacks (user_id);
|
|
61
|
+
CREATE INDEX idx_feedbacks_graph_edges ON feedbacks USING GIN (graph_edges);
|
|
62
|
+
CREATE INDEX idx_feedbacks_metadata ON feedbacks USING GIN (metadata);
|
|
63
|
+
CREATE INDEX idx_feedbacks_tags ON feedbacks USING GIN (tags);
|
|
64
|
+
|
|
65
|
+
-- KV_STORE trigger for feedbacks
|
|
66
|
+
-- Trigger function to maintain KV_STORE for feedbacks
|
|
67
|
+
CREATE OR REPLACE FUNCTION fn_feedbacks_kv_store_upsert()
|
|
68
|
+
RETURNS TRIGGER AS $$
|
|
69
|
+
BEGIN
|
|
70
|
+
IF (TG_OP = 'DELETE') THEN
|
|
71
|
+
-- Remove from KV_STORE on delete
|
|
72
|
+
DELETE FROM kv_store
|
|
73
|
+
WHERE entity_id = OLD.id;
|
|
74
|
+
RETURN OLD;
|
|
75
|
+
ELSIF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN
|
|
76
|
+
-- Upsert to KV_STORE (O(1) lookup by entity_key)
|
|
77
|
+
INSERT INTO kv_store (
|
|
78
|
+
entity_key,
|
|
79
|
+
entity_type,
|
|
80
|
+
entity_id,
|
|
81
|
+
tenant_id,
|
|
82
|
+
user_id,
|
|
83
|
+
metadata,
|
|
84
|
+
graph_edges,
|
|
85
|
+
updated_at
|
|
86
|
+
) VALUES (
|
|
87
|
+
NEW.id::VARCHAR,
|
|
88
|
+
'feedbacks',
|
|
89
|
+
NEW.id,
|
|
90
|
+
NEW.tenant_id,
|
|
91
|
+
NEW.user_id,
|
|
92
|
+
NEW.metadata,
|
|
93
|
+
COALESCE(NEW.graph_edges, '[]'::jsonb),
|
|
94
|
+
CURRENT_TIMESTAMP
|
|
95
|
+
)
|
|
96
|
+
ON CONFLICT (tenant_id, entity_key)
|
|
97
|
+
DO UPDATE SET
|
|
98
|
+
entity_id = EXCLUDED.entity_id,
|
|
99
|
+
user_id = EXCLUDED.user_id,
|
|
100
|
+
metadata = EXCLUDED.metadata,
|
|
101
|
+
graph_edges = EXCLUDED.graph_edges,
|
|
102
|
+
updated_at = CURRENT_TIMESTAMP;
|
|
103
|
+
|
|
104
|
+
RETURN NEW;
|
|
105
|
+
END IF;
|
|
106
|
+
END;
|
|
107
|
+
$$ LANGUAGE plpgsql;
|
|
108
|
+
|
|
109
|
+
-- Create trigger
|
|
110
|
+
DROP TRIGGER IF EXISTS trg_feedbacks_kv_store ON feedbacks;
|
|
111
|
+
CREATE TRIGGER trg_feedbacks_kv_store
|
|
112
|
+
AFTER INSERT OR UPDATE OR DELETE ON feedbacks
|
|
113
|
+
FOR EACH ROW EXECUTE FUNCTION fn_feedbacks_kv_store_upsert();
|
|
114
|
+
|
|
115
|
+
-- ======================================================================
|
|
116
|
+
-- FILES (Model: File)
|
|
117
|
+
-- ======================================================================
|
|
118
|
+
|
|
119
|
+
CREATE TABLE IF NOT EXISTS files (
|
|
37
120
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
38
121
|
tenant_id VARCHAR(100) NOT NULL,
|
|
39
122
|
user_id VARCHAR(256),
|
|
40
123
|
name VARCHAR(256) NOT NULL,
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
preferred_topics TEXT[] DEFAULT ARRAY[]::TEXT[],
|
|
48
|
-
activity_level VARCHAR(256),
|
|
49
|
-
last_active_at TIMESTAMP,
|
|
124
|
+
uri VARCHAR(256) NOT NULL,
|
|
125
|
+
content TEXT,
|
|
126
|
+
timestamp VARCHAR(256),
|
|
127
|
+
size_bytes INTEGER,
|
|
128
|
+
mime_type VARCHAR(256),
|
|
129
|
+
processing_status VARCHAR(256),
|
|
50
130
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
51
131
|
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
52
132
|
deleted_at TIMESTAMP,
|
|
@@ -55,16 +135,16 @@ CREATE TABLE IF NOT EXISTS users (
|
|
|
55
135
|
tags TEXT[] DEFAULT ARRAY[]::TEXT[]
|
|
56
136
|
);
|
|
57
137
|
|
|
58
|
-
CREATE INDEX
|
|
59
|
-
CREATE INDEX
|
|
60
|
-
CREATE INDEX
|
|
61
|
-
CREATE INDEX
|
|
62
|
-
CREATE INDEX
|
|
138
|
+
CREATE INDEX idx_files_tenant ON files (tenant_id);
|
|
139
|
+
CREATE INDEX idx_files_user ON files (user_id);
|
|
140
|
+
CREATE INDEX idx_files_graph_edges ON files USING GIN (graph_edges);
|
|
141
|
+
CREATE INDEX idx_files_metadata ON files USING GIN (metadata);
|
|
142
|
+
CREATE INDEX idx_files_tags ON files USING GIN (tags);
|
|
63
143
|
|
|
64
|
-
-- Embeddings for
|
|
65
|
-
CREATE TABLE IF NOT EXISTS
|
|
144
|
+
-- Embeddings for files
|
|
145
|
+
CREATE TABLE IF NOT EXISTS embeddings_files (
|
|
66
146
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
67
|
-
entity_id UUID NOT NULL REFERENCES
|
|
147
|
+
entity_id UUID NOT NULL REFERENCES files(id) ON DELETE CASCADE,
|
|
68
148
|
field_name VARCHAR(100) NOT NULL,
|
|
69
149
|
provider VARCHAR(50) NOT NULL DEFAULT 'openai',
|
|
70
150
|
model VARCHAR(100) NOT NULL DEFAULT 'text-embedding-3-small',
|
|
@@ -77,19 +157,19 @@ CREATE TABLE IF NOT EXISTS embeddings_users (
|
|
|
77
157
|
);
|
|
78
158
|
|
|
79
159
|
-- Index for entity lookup (get all embeddings for entity)
|
|
80
|
-
CREATE INDEX
|
|
160
|
+
CREATE INDEX idx_embeddings_files_entity ON embeddings_files (entity_id);
|
|
81
161
|
|
|
82
162
|
-- Index for field + provider lookup
|
|
83
|
-
CREATE INDEX
|
|
163
|
+
CREATE INDEX idx_embeddings_files_field_provider ON embeddings_files (field_name, provider);
|
|
84
164
|
|
|
85
165
|
-- HNSW index for vector similarity search (created in background)
|
|
86
166
|
-- Note: This will be created by background thread after data load
|
|
87
|
-
-- CREATE INDEX
|
|
167
|
+
-- CREATE INDEX idx_embeddings_files_vector_hnsw ON embeddings_files
|
|
88
168
|
-- USING hnsw (embedding vector_cosine_ops);
|
|
89
169
|
|
|
90
|
-
-- KV_STORE trigger for
|
|
91
|
-
-- Trigger function to maintain KV_STORE for
|
|
92
|
-
CREATE OR REPLACE FUNCTION
|
|
170
|
+
-- KV_STORE trigger for files
|
|
171
|
+
-- Trigger function to maintain KV_STORE for files
|
|
172
|
+
CREATE OR REPLACE FUNCTION fn_files_kv_store_upsert()
|
|
93
173
|
RETURNS TRIGGER AS $$
|
|
94
174
|
BEGIN
|
|
95
175
|
IF (TG_OP = 'DELETE') THEN
|
|
@@ -109,8 +189,8 @@ BEGIN
|
|
|
109
189
|
graph_edges,
|
|
110
190
|
updated_at
|
|
111
191
|
) VALUES (
|
|
112
|
-
NEW.
|
|
113
|
-
'
|
|
192
|
+
NEW.id::VARCHAR,
|
|
193
|
+
'files',
|
|
114
194
|
NEW.id,
|
|
115
195
|
NEW.tenant_id,
|
|
116
196
|
NEW.user_id,
|
|
@@ -132,10 +212,10 @@ END;
|
|
|
132
212
|
$$ LANGUAGE plpgsql;
|
|
133
213
|
|
|
134
214
|
-- Create trigger
|
|
135
|
-
DROP TRIGGER IF EXISTS
|
|
136
|
-
CREATE TRIGGER
|
|
137
|
-
AFTER INSERT OR UPDATE OR DELETE ON
|
|
138
|
-
FOR EACH ROW EXECUTE FUNCTION
|
|
215
|
+
DROP TRIGGER IF EXISTS trg_files_kv_store ON files;
|
|
216
|
+
CREATE TRIGGER trg_files_kv_store
|
|
217
|
+
AFTER INSERT OR UPDATE OR DELETE ON files
|
|
218
|
+
FOR EACH ROW EXECUTE FUNCTION fn_files_kv_store_upsert();
|
|
139
219
|
|
|
140
220
|
-- ======================================================================
|
|
141
221
|
-- IMAGE_RESOURCES (Model: ImageResource)
|
|
@@ -145,7 +225,7 @@ CREATE TABLE IF NOT EXISTS image_resources (
|
|
|
145
225
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
146
226
|
tenant_id VARCHAR(100) NOT NULL,
|
|
147
227
|
user_id VARCHAR(256),
|
|
148
|
-
name VARCHAR(256)
|
|
228
|
+
name VARCHAR(256),
|
|
149
229
|
uri VARCHAR(256),
|
|
150
230
|
ordinal INTEGER,
|
|
151
231
|
content TEXT,
|
|
@@ -168,11 +248,11 @@ CREATE TABLE IF NOT EXISTS image_resources (
|
|
|
168
248
|
tags TEXT[] DEFAULT ARRAY[]::TEXT[]
|
|
169
249
|
);
|
|
170
250
|
|
|
171
|
-
CREATE INDEX
|
|
172
|
-
CREATE INDEX
|
|
173
|
-
CREATE INDEX
|
|
174
|
-
CREATE INDEX
|
|
175
|
-
CREATE INDEX
|
|
251
|
+
CREATE INDEX idx_image_resources_tenant ON image_resources (tenant_id);
|
|
252
|
+
CREATE INDEX idx_image_resources_user ON image_resources (user_id);
|
|
253
|
+
CREATE INDEX idx_image_resources_graph_edges ON image_resources USING GIN (graph_edges);
|
|
254
|
+
CREATE INDEX idx_image_resources_metadata ON image_resources USING GIN (metadata);
|
|
255
|
+
CREATE INDEX idx_image_resources_tags ON image_resources USING GIN (tags);
|
|
176
256
|
|
|
177
257
|
-- Embeddings for image_resources
|
|
178
258
|
CREATE TABLE IF NOT EXISTS embeddings_image_resources (
|
|
@@ -190,14 +270,14 @@ CREATE TABLE IF NOT EXISTS embeddings_image_resources (
|
|
|
190
270
|
);
|
|
191
271
|
|
|
192
272
|
-- Index for entity lookup (get all embeddings for entity)
|
|
193
|
-
CREATE INDEX
|
|
273
|
+
CREATE INDEX idx_embeddings_image_resources_entity ON embeddings_image_resources (entity_id);
|
|
194
274
|
|
|
195
275
|
-- Index for field + provider lookup
|
|
196
|
-
CREATE INDEX
|
|
276
|
+
CREATE INDEX idx_embeddings_image_resources_field_provider ON embeddings_image_resources (field_name, provider);
|
|
197
277
|
|
|
198
278
|
-- HNSW index for vector similarity search (created in background)
|
|
199
279
|
-- Note: This will be created by background thread after data load
|
|
200
|
-
-- CREATE INDEX
|
|
280
|
+
-- CREATE INDEX idx_embeddings_image_resources_vector_hnsw ON embeddings_image_resources
|
|
201
281
|
-- USING hnsw (embedding vector_cosine_ops);
|
|
202
282
|
|
|
203
283
|
-- KV_STORE trigger for image_resources
|
|
@@ -222,7 +302,7 @@ BEGIN
|
|
|
222
302
|
graph_edges,
|
|
223
303
|
updated_at
|
|
224
304
|
) VALUES (
|
|
225
|
-
NEW.name,
|
|
305
|
+
NEW.name::VARCHAR,
|
|
226
306
|
'image_resources',
|
|
227
307
|
NEW.id,
|
|
228
308
|
NEW.tenant_id,
|
|
@@ -250,6 +330,112 @@ CREATE TRIGGER trg_image_resources_kv_store
|
|
|
250
330
|
AFTER INSERT OR UPDATE OR DELETE ON image_resources
|
|
251
331
|
FOR EACH ROW EXECUTE FUNCTION fn_image_resources_kv_store_upsert();
|
|
252
332
|
|
|
333
|
+
-- ======================================================================
|
|
334
|
+
-- MESSAGES (Model: Message)
|
|
335
|
+
-- ======================================================================
|
|
336
|
+
|
|
337
|
+
CREATE TABLE IF NOT EXISTS messages (
|
|
338
|
+
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
339
|
+
tenant_id VARCHAR(100) NOT NULL,
|
|
340
|
+
user_id VARCHAR(256),
|
|
341
|
+
content TEXT NOT NULL,
|
|
342
|
+
message_type VARCHAR(256),
|
|
343
|
+
session_id VARCHAR(256),
|
|
344
|
+
prompt TEXT,
|
|
345
|
+
model VARCHAR(256),
|
|
346
|
+
token_count INTEGER,
|
|
347
|
+
trace_id VARCHAR(256),
|
|
348
|
+
span_id VARCHAR(256),
|
|
349
|
+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
350
|
+
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
351
|
+
deleted_at TIMESTAMP,
|
|
352
|
+
graph_edges JSONB DEFAULT '[]'::jsonb,
|
|
353
|
+
metadata JSONB DEFAULT '{}'::jsonb,
|
|
354
|
+
tags TEXT[] DEFAULT ARRAY[]::TEXT[]
|
|
355
|
+
);
|
|
356
|
+
|
|
357
|
+
CREATE INDEX idx_messages_tenant ON messages (tenant_id);
|
|
358
|
+
CREATE INDEX idx_messages_user ON messages (user_id);
|
|
359
|
+
CREATE INDEX idx_messages_graph_edges ON messages USING GIN (graph_edges);
|
|
360
|
+
CREATE INDEX idx_messages_metadata ON messages USING GIN (metadata);
|
|
361
|
+
CREATE INDEX idx_messages_tags ON messages USING GIN (tags);
|
|
362
|
+
|
|
363
|
+
-- Embeddings for messages
|
|
364
|
+
CREATE TABLE IF NOT EXISTS embeddings_messages (
|
|
365
|
+
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
366
|
+
entity_id UUID NOT NULL REFERENCES messages(id) ON DELETE CASCADE,
|
|
367
|
+
field_name VARCHAR(100) NOT NULL,
|
|
368
|
+
provider VARCHAR(50) NOT NULL DEFAULT 'openai',
|
|
369
|
+
model VARCHAR(100) NOT NULL DEFAULT 'text-embedding-3-small',
|
|
370
|
+
embedding vector(1536) NOT NULL,
|
|
371
|
+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
372
|
+
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
373
|
+
|
|
374
|
+
-- Unique: one embedding per entity per field per provider
|
|
375
|
+
UNIQUE (entity_id, field_name, provider)
|
|
376
|
+
);
|
|
377
|
+
|
|
378
|
+
-- Index for entity lookup (get all embeddings for entity)
|
|
379
|
+
CREATE INDEX idx_embeddings_messages_entity ON embeddings_messages (entity_id);
|
|
380
|
+
|
|
381
|
+
-- Index for field + provider lookup
|
|
382
|
+
CREATE INDEX idx_embeddings_messages_field_provider ON embeddings_messages (field_name, provider);
|
|
383
|
+
|
|
384
|
+
-- HNSW index for vector similarity search (created in background)
|
|
385
|
+
-- Note: This will be created by background thread after data load
|
|
386
|
+
-- CREATE INDEX idx_embeddings_messages_vector_hnsw ON embeddings_messages
|
|
387
|
+
-- USING hnsw (embedding vector_cosine_ops);
|
|
388
|
+
|
|
389
|
+
-- KV_STORE trigger for messages
|
|
390
|
+
-- Trigger function to maintain KV_STORE for messages
|
|
391
|
+
CREATE OR REPLACE FUNCTION fn_messages_kv_store_upsert()
|
|
392
|
+
RETURNS TRIGGER AS $$
|
|
393
|
+
BEGIN
|
|
394
|
+
IF (TG_OP = 'DELETE') THEN
|
|
395
|
+
-- Remove from KV_STORE on delete
|
|
396
|
+
DELETE FROM kv_store
|
|
397
|
+
WHERE entity_id = OLD.id;
|
|
398
|
+
RETURN OLD;
|
|
399
|
+
ELSIF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN
|
|
400
|
+
-- Upsert to KV_STORE (O(1) lookup by entity_key)
|
|
401
|
+
INSERT INTO kv_store (
|
|
402
|
+
entity_key,
|
|
403
|
+
entity_type,
|
|
404
|
+
entity_id,
|
|
405
|
+
tenant_id,
|
|
406
|
+
user_id,
|
|
407
|
+
metadata,
|
|
408
|
+
graph_edges,
|
|
409
|
+
updated_at
|
|
410
|
+
) VALUES (
|
|
411
|
+
NEW.id::VARCHAR,
|
|
412
|
+
'messages',
|
|
413
|
+
NEW.id,
|
|
414
|
+
NEW.tenant_id,
|
|
415
|
+
NEW.user_id,
|
|
416
|
+
NEW.metadata,
|
|
417
|
+
COALESCE(NEW.graph_edges, '[]'::jsonb),
|
|
418
|
+
CURRENT_TIMESTAMP
|
|
419
|
+
)
|
|
420
|
+
ON CONFLICT (tenant_id, entity_key)
|
|
421
|
+
DO UPDATE SET
|
|
422
|
+
entity_id = EXCLUDED.entity_id,
|
|
423
|
+
user_id = EXCLUDED.user_id,
|
|
424
|
+
metadata = EXCLUDED.metadata,
|
|
425
|
+
graph_edges = EXCLUDED.graph_edges,
|
|
426
|
+
updated_at = CURRENT_TIMESTAMP;
|
|
427
|
+
|
|
428
|
+
RETURN NEW;
|
|
429
|
+
END IF;
|
|
430
|
+
END;
|
|
431
|
+
$$ LANGUAGE plpgsql;
|
|
432
|
+
|
|
433
|
+
-- Create trigger
|
|
434
|
+
DROP TRIGGER IF EXISTS trg_messages_kv_store ON messages;
|
|
435
|
+
CREATE TRIGGER trg_messages_kv_store
|
|
436
|
+
AFTER INSERT OR UPDATE OR DELETE ON messages
|
|
437
|
+
FOR EACH ROW EXECUTE FUNCTION fn_messages_kv_store_upsert();
|
|
438
|
+
|
|
253
439
|
-- ======================================================================
|
|
254
440
|
-- MOMENTS (Model: Moment)
|
|
255
441
|
-- ======================================================================
|
|
@@ -258,7 +444,7 @@ CREATE TABLE IF NOT EXISTS moments (
|
|
|
258
444
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
259
445
|
tenant_id VARCHAR(100) NOT NULL,
|
|
260
446
|
user_id VARCHAR(256),
|
|
261
|
-
name VARCHAR(256)
|
|
447
|
+
name VARCHAR(256),
|
|
262
448
|
moment_type VARCHAR(256),
|
|
263
449
|
category VARCHAR(256),
|
|
264
450
|
starts_timestamp TIMESTAMP NOT NULL,
|
|
@@ -276,11 +462,11 @@ CREATE TABLE IF NOT EXISTS moments (
|
|
|
276
462
|
tags TEXT[] DEFAULT ARRAY[]::TEXT[]
|
|
277
463
|
);
|
|
278
464
|
|
|
279
|
-
CREATE INDEX
|
|
280
|
-
CREATE INDEX
|
|
281
|
-
CREATE INDEX
|
|
282
|
-
CREATE INDEX
|
|
283
|
-
CREATE INDEX
|
|
465
|
+
CREATE INDEX idx_moments_tenant ON moments (tenant_id);
|
|
466
|
+
CREATE INDEX idx_moments_user ON moments (user_id);
|
|
467
|
+
CREATE INDEX idx_moments_graph_edges ON moments USING GIN (graph_edges);
|
|
468
|
+
CREATE INDEX idx_moments_metadata ON moments USING GIN (metadata);
|
|
469
|
+
CREATE INDEX idx_moments_tags ON moments USING GIN (tags);
|
|
284
470
|
|
|
285
471
|
-- Embeddings for moments
|
|
286
472
|
CREATE TABLE IF NOT EXISTS embeddings_moments (
|
|
@@ -298,14 +484,14 @@ CREATE TABLE IF NOT EXISTS embeddings_moments (
|
|
|
298
484
|
);
|
|
299
485
|
|
|
300
486
|
-- Index for entity lookup (get all embeddings for entity)
|
|
301
|
-
CREATE INDEX
|
|
487
|
+
CREATE INDEX idx_embeddings_moments_entity ON embeddings_moments (entity_id);
|
|
302
488
|
|
|
303
489
|
-- Index for field + provider lookup
|
|
304
|
-
CREATE INDEX
|
|
490
|
+
CREATE INDEX idx_embeddings_moments_field_provider ON embeddings_moments (field_name, provider);
|
|
305
491
|
|
|
306
492
|
-- HNSW index for vector similarity search (created in background)
|
|
307
493
|
-- Note: This will be created by background thread after data load
|
|
308
|
-
-- CREATE INDEX
|
|
494
|
+
-- CREATE INDEX idx_embeddings_moments_vector_hnsw ON embeddings_moments
|
|
309
495
|
-- USING hnsw (embedding vector_cosine_ops);
|
|
310
496
|
|
|
311
497
|
-- KV_STORE trigger for moments
|
|
@@ -330,7 +516,7 @@ BEGIN
|
|
|
330
516
|
graph_edges,
|
|
331
517
|
updated_at
|
|
332
518
|
) VALUES (
|
|
333
|
-
NEW.name,
|
|
519
|
+
NEW.name::VARCHAR,
|
|
334
520
|
'moments',
|
|
335
521
|
NEW.id,
|
|
336
522
|
NEW.tenant_id,
|
|
@@ -359,15 +545,22 @@ AFTER INSERT OR UPDATE OR DELETE ON moments
|
|
|
359
545
|
FOR EACH ROW EXECUTE FUNCTION fn_moments_kv_store_upsert();
|
|
360
546
|
|
|
361
547
|
-- ======================================================================
|
|
362
|
-
--
|
|
548
|
+
-- ONTOLOGIES (Model: Ontology)
|
|
363
549
|
-- ======================================================================
|
|
364
550
|
|
|
365
|
-
CREATE TABLE IF NOT EXISTS
|
|
551
|
+
CREATE TABLE IF NOT EXISTS ontologies (
|
|
366
552
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
367
553
|
tenant_id VARCHAR(100) NOT NULL,
|
|
368
554
|
user_id VARCHAR(256),
|
|
369
555
|
name VARCHAR(256) NOT NULL,
|
|
370
|
-
|
|
556
|
+
file_id UUID NOT NULL,
|
|
557
|
+
agent_schema_id VARCHAR(256) NOT NULL,
|
|
558
|
+
provider_name VARCHAR(256) NOT NULL,
|
|
559
|
+
model_name VARCHAR(256) NOT NULL,
|
|
560
|
+
extracted_data JSONB NOT NULL,
|
|
561
|
+
confidence_score DOUBLE PRECISION,
|
|
562
|
+
extraction_timestamp VARCHAR(256),
|
|
563
|
+
embedding_text TEXT,
|
|
371
564
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
372
565
|
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
373
566
|
deleted_at TIMESTAMP,
|
|
@@ -376,15 +569,15 @@ CREATE TABLE IF NOT EXISTS persons (
|
|
|
376
569
|
tags TEXT[] DEFAULT ARRAY[]::TEXT[]
|
|
377
570
|
);
|
|
378
571
|
|
|
379
|
-
CREATE INDEX
|
|
380
|
-
CREATE INDEX
|
|
381
|
-
CREATE INDEX
|
|
382
|
-
CREATE INDEX
|
|
383
|
-
CREATE INDEX
|
|
572
|
+
CREATE INDEX idx_ontologies_tenant ON ontologies (tenant_id);
|
|
573
|
+
CREATE INDEX idx_ontologies_user ON ontologies (user_id);
|
|
574
|
+
CREATE INDEX idx_ontologies_graph_edges ON ontologies USING GIN (graph_edges);
|
|
575
|
+
CREATE INDEX idx_ontologies_metadata ON ontologies USING GIN (metadata);
|
|
576
|
+
CREATE INDEX idx_ontologies_tags ON ontologies USING GIN (tags);
|
|
384
577
|
|
|
385
|
-
-- KV_STORE trigger for
|
|
386
|
-
-- Trigger function to maintain KV_STORE for
|
|
387
|
-
CREATE OR REPLACE FUNCTION
|
|
578
|
+
-- KV_STORE trigger for ontologies
|
|
579
|
+
-- Trigger function to maintain KV_STORE for ontologies
|
|
580
|
+
CREATE OR REPLACE FUNCTION fn_ontologies_kv_store_upsert()
|
|
388
581
|
RETURNS TRIGGER AS $$
|
|
389
582
|
BEGIN
|
|
390
583
|
IF (TG_OP = 'DELETE') THEN
|
|
@@ -404,8 +597,8 @@ BEGIN
|
|
|
404
597
|
graph_edges,
|
|
405
598
|
updated_at
|
|
406
599
|
) VALUES (
|
|
407
|
-
NEW.
|
|
408
|
-
'
|
|
600
|
+
NEW.id::VARCHAR,
|
|
601
|
+
'ontologies',
|
|
409
602
|
NEW.id,
|
|
410
603
|
NEW.tenant_id,
|
|
411
604
|
NEW.user_id,
|
|
@@ -427,26 +620,29 @@ END;
|
|
|
427
620
|
$$ LANGUAGE plpgsql;
|
|
428
621
|
|
|
429
622
|
-- Create trigger
|
|
430
|
-
DROP TRIGGER IF EXISTS
|
|
431
|
-
CREATE TRIGGER
|
|
432
|
-
AFTER INSERT OR UPDATE OR DELETE ON
|
|
433
|
-
FOR EACH ROW EXECUTE FUNCTION
|
|
623
|
+
DROP TRIGGER IF EXISTS trg_ontologies_kv_store ON ontologies;
|
|
624
|
+
CREATE TRIGGER trg_ontologies_kv_store
|
|
625
|
+
AFTER INSERT OR UPDATE OR DELETE ON ontologies
|
|
626
|
+
FOR EACH ROW EXECUTE FUNCTION fn_ontologies_kv_store_upsert();
|
|
434
627
|
|
|
435
628
|
-- ======================================================================
|
|
436
|
-
--
|
|
629
|
+
-- ONTOLOGY_CONFIGS (Model: OntologyConfig)
|
|
437
630
|
-- ======================================================================
|
|
438
631
|
|
|
439
|
-
CREATE TABLE IF NOT EXISTS
|
|
632
|
+
CREATE TABLE IF NOT EXISTS ontology_configs (
|
|
440
633
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
441
634
|
tenant_id VARCHAR(100) NOT NULL,
|
|
442
635
|
user_id VARCHAR(256),
|
|
443
636
|
name VARCHAR(256) NOT NULL,
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
637
|
+
agent_schema_id VARCHAR(256) NOT NULL,
|
|
638
|
+
description TEXT,
|
|
639
|
+
mime_type_pattern VARCHAR(256),
|
|
640
|
+
uri_pattern VARCHAR(256),
|
|
641
|
+
tag_filter TEXT[],
|
|
642
|
+
priority INTEGER,
|
|
643
|
+
enabled BOOLEAN,
|
|
644
|
+
provider_name VARCHAR(256),
|
|
645
|
+
model_name VARCHAR(256),
|
|
450
646
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
451
647
|
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
452
648
|
deleted_at TIMESTAMP,
|
|
@@ -455,16 +651,16 @@ CREATE TABLE IF NOT EXISTS resources (
|
|
|
455
651
|
tags TEXT[] DEFAULT ARRAY[]::TEXT[]
|
|
456
652
|
);
|
|
457
653
|
|
|
458
|
-
CREATE INDEX
|
|
459
|
-
CREATE INDEX
|
|
460
|
-
CREATE INDEX
|
|
461
|
-
CREATE INDEX
|
|
462
|
-
CREATE INDEX
|
|
654
|
+
CREATE INDEX idx_ontology_configs_tenant ON ontology_configs (tenant_id);
|
|
655
|
+
CREATE INDEX idx_ontology_configs_user ON ontology_configs (user_id);
|
|
656
|
+
CREATE INDEX idx_ontology_configs_graph_edges ON ontology_configs USING GIN (graph_edges);
|
|
657
|
+
CREATE INDEX idx_ontology_configs_metadata ON ontology_configs USING GIN (metadata);
|
|
658
|
+
CREATE INDEX idx_ontology_configs_tags ON ontology_configs USING GIN (tags);
|
|
463
659
|
|
|
464
|
-
-- Embeddings for
|
|
465
|
-
CREATE TABLE IF NOT EXISTS
|
|
660
|
+
-- Embeddings for ontology_configs
|
|
661
|
+
CREATE TABLE IF NOT EXISTS embeddings_ontology_configs (
|
|
466
662
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
467
|
-
entity_id UUID NOT NULL REFERENCES
|
|
663
|
+
entity_id UUID NOT NULL REFERENCES ontology_configs(id) ON DELETE CASCADE,
|
|
468
664
|
field_name VARCHAR(100) NOT NULL,
|
|
469
665
|
provider VARCHAR(50) NOT NULL DEFAULT 'openai',
|
|
470
666
|
model VARCHAR(100) NOT NULL DEFAULT 'text-embedding-3-small',
|
|
@@ -477,19 +673,19 @@ CREATE TABLE IF NOT EXISTS embeddings_resources (
|
|
|
477
673
|
);
|
|
478
674
|
|
|
479
675
|
-- Index for entity lookup (get all embeddings for entity)
|
|
480
|
-
CREATE INDEX
|
|
676
|
+
CREATE INDEX idx_embeddings_ontology_configs_entity ON embeddings_ontology_configs (entity_id);
|
|
481
677
|
|
|
482
678
|
-- Index for field + provider lookup
|
|
483
|
-
CREATE INDEX
|
|
679
|
+
CREATE INDEX idx_embeddings_ontology_configs_field_provider ON embeddings_ontology_configs (field_name, provider);
|
|
484
680
|
|
|
485
681
|
-- HNSW index for vector similarity search (created in background)
|
|
486
682
|
-- Note: This will be created by background thread after data load
|
|
487
|
-
-- CREATE INDEX
|
|
683
|
+
-- CREATE INDEX idx_embeddings_ontology_configs_vector_hnsw ON embeddings_ontology_configs
|
|
488
684
|
-- USING hnsw (embedding vector_cosine_ops);
|
|
489
685
|
|
|
490
|
-
-- KV_STORE trigger for
|
|
491
|
-
-- Trigger function to maintain KV_STORE for
|
|
492
|
-
CREATE OR REPLACE FUNCTION
|
|
686
|
+
-- KV_STORE trigger for ontology_configs
|
|
687
|
+
-- Trigger function to maintain KV_STORE for ontology_configs
|
|
688
|
+
CREATE OR REPLACE FUNCTION fn_ontology_configs_kv_store_upsert()
|
|
493
689
|
RETURNS TRIGGER AS $$
|
|
494
690
|
BEGIN
|
|
495
691
|
IF (TG_OP = 'DELETE') THEN
|
|
@@ -509,8 +705,8 @@ BEGIN
|
|
|
509
705
|
graph_edges,
|
|
510
706
|
updated_at
|
|
511
707
|
) VALUES (
|
|
512
|
-
NEW.
|
|
513
|
-
'
|
|
708
|
+
NEW.id::VARCHAR,
|
|
709
|
+
'ontology_configs',
|
|
514
710
|
NEW.id,
|
|
515
711
|
NEW.tenant_id,
|
|
516
712
|
NEW.user_id,
|
|
@@ -532,22 +728,26 @@ END;
|
|
|
532
728
|
$$ LANGUAGE plpgsql;
|
|
533
729
|
|
|
534
730
|
-- Create trigger
|
|
535
|
-
DROP TRIGGER IF EXISTS
|
|
536
|
-
CREATE TRIGGER
|
|
537
|
-
AFTER INSERT OR UPDATE OR DELETE ON
|
|
538
|
-
FOR EACH ROW EXECUTE FUNCTION
|
|
731
|
+
DROP TRIGGER IF EXISTS trg_ontology_configs_kv_store ON ontology_configs;
|
|
732
|
+
CREATE TRIGGER trg_ontology_configs_kv_store
|
|
733
|
+
AFTER INSERT OR UPDATE OR DELETE ON ontology_configs
|
|
734
|
+
FOR EACH ROW EXECUTE FUNCTION fn_ontology_configs_kv_store_upsert();
|
|
539
735
|
|
|
540
736
|
-- ======================================================================
|
|
541
|
-
--
|
|
737
|
+
-- RESOURCES (Model: Resource)
|
|
542
738
|
-- ======================================================================
|
|
543
739
|
|
|
544
|
-
CREATE TABLE IF NOT EXISTS
|
|
740
|
+
CREATE TABLE IF NOT EXISTS resources (
|
|
545
741
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
546
742
|
tenant_id VARCHAR(100) NOT NULL,
|
|
547
743
|
user_id VARCHAR(256),
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
744
|
+
name VARCHAR(256),
|
|
745
|
+
uri VARCHAR(256),
|
|
746
|
+
ordinal INTEGER,
|
|
747
|
+
content TEXT,
|
|
748
|
+
timestamp TIMESTAMP,
|
|
749
|
+
category VARCHAR(256),
|
|
750
|
+
related_entities JSONB DEFAULT '{}'::jsonb,
|
|
551
751
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
552
752
|
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
553
753
|
deleted_at TIMESTAMP,
|
|
@@ -556,16 +756,16 @@ CREATE TABLE IF NOT EXISTS messages (
|
|
|
556
756
|
tags TEXT[] DEFAULT ARRAY[]::TEXT[]
|
|
557
757
|
);
|
|
558
758
|
|
|
559
|
-
CREATE INDEX
|
|
560
|
-
CREATE INDEX
|
|
561
|
-
CREATE INDEX
|
|
562
|
-
CREATE INDEX
|
|
563
|
-
CREATE INDEX
|
|
759
|
+
CREATE INDEX idx_resources_tenant ON resources (tenant_id);
|
|
760
|
+
CREATE INDEX idx_resources_user ON resources (user_id);
|
|
761
|
+
CREATE INDEX idx_resources_graph_edges ON resources USING GIN (graph_edges);
|
|
762
|
+
CREATE INDEX idx_resources_metadata ON resources USING GIN (metadata);
|
|
763
|
+
CREATE INDEX idx_resources_tags ON resources USING GIN (tags);
|
|
564
764
|
|
|
565
|
-
-- Embeddings for
|
|
566
|
-
CREATE TABLE IF NOT EXISTS
|
|
765
|
+
-- Embeddings for resources
|
|
766
|
+
CREATE TABLE IF NOT EXISTS embeddings_resources (
|
|
567
767
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
568
|
-
entity_id UUID NOT NULL REFERENCES
|
|
768
|
+
entity_id UUID NOT NULL REFERENCES resources(id) ON DELETE CASCADE,
|
|
569
769
|
field_name VARCHAR(100) NOT NULL,
|
|
570
770
|
provider VARCHAR(50) NOT NULL DEFAULT 'openai',
|
|
571
771
|
model VARCHAR(100) NOT NULL DEFAULT 'text-embedding-3-small',
|
|
@@ -578,19 +778,19 @@ CREATE TABLE IF NOT EXISTS embeddings_messages (
|
|
|
578
778
|
);
|
|
579
779
|
|
|
580
780
|
-- Index for entity lookup (get all embeddings for entity)
|
|
581
|
-
CREATE INDEX
|
|
781
|
+
CREATE INDEX idx_embeddings_resources_entity ON embeddings_resources (entity_id);
|
|
582
782
|
|
|
583
783
|
-- Index for field + provider lookup
|
|
584
|
-
CREATE INDEX
|
|
784
|
+
CREATE INDEX idx_embeddings_resources_field_provider ON embeddings_resources (field_name, provider);
|
|
585
785
|
|
|
586
786
|
-- HNSW index for vector similarity search (created in background)
|
|
587
787
|
-- Note: This will be created by background thread after data load
|
|
588
|
-
-- CREATE INDEX
|
|
788
|
+
-- CREATE INDEX idx_embeddings_resources_vector_hnsw ON embeddings_resources
|
|
589
789
|
-- USING hnsw (embedding vector_cosine_ops);
|
|
590
790
|
|
|
591
|
-
-- KV_STORE trigger for
|
|
592
|
-
-- Trigger function to maintain KV_STORE for
|
|
593
|
-
CREATE OR REPLACE FUNCTION
|
|
791
|
+
-- KV_STORE trigger for resources
|
|
792
|
+
-- Trigger function to maintain KV_STORE for resources
|
|
793
|
+
CREATE OR REPLACE FUNCTION fn_resources_kv_store_upsert()
|
|
594
794
|
RETURNS TRIGGER AS $$
|
|
595
795
|
BEGIN
|
|
596
796
|
IF (TG_OP = 'DELETE') THEN
|
|
@@ -610,8 +810,8 @@ BEGIN
|
|
|
610
810
|
graph_edges,
|
|
611
811
|
updated_at
|
|
612
812
|
) VALUES (
|
|
613
|
-
NEW.
|
|
614
|
-
'
|
|
813
|
+
NEW.name::VARCHAR,
|
|
814
|
+
'resources',
|
|
615
815
|
NEW.id,
|
|
616
816
|
NEW.tenant_id,
|
|
617
817
|
NEW.user_id,
|
|
@@ -633,26 +833,25 @@ END;
|
|
|
633
833
|
$$ LANGUAGE plpgsql;
|
|
634
834
|
|
|
635
835
|
-- Create trigger
|
|
636
|
-
DROP TRIGGER IF EXISTS
|
|
637
|
-
CREATE TRIGGER
|
|
638
|
-
AFTER INSERT OR UPDATE OR DELETE ON
|
|
639
|
-
FOR EACH ROW EXECUTE FUNCTION
|
|
836
|
+
DROP TRIGGER IF EXISTS trg_resources_kv_store ON resources;
|
|
837
|
+
CREATE TRIGGER trg_resources_kv_store
|
|
838
|
+
AFTER INSERT OR UPDATE OR DELETE ON resources
|
|
839
|
+
FOR EACH ROW EXECUTE FUNCTION fn_resources_kv_store_upsert();
|
|
640
840
|
|
|
641
841
|
-- ======================================================================
|
|
642
|
-
--
|
|
842
|
+
-- SCHEMAS (Model: Schema)
|
|
643
843
|
-- ======================================================================
|
|
644
844
|
|
|
645
|
-
CREATE TABLE IF NOT EXISTS
|
|
845
|
+
CREATE TABLE IF NOT EXISTS schemas (
|
|
646
846
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
647
847
|
tenant_id VARCHAR(100) NOT NULL,
|
|
648
848
|
user_id VARCHAR(256),
|
|
649
849
|
name VARCHAR(256) NOT NULL,
|
|
650
|
-
uri VARCHAR(256) NOT NULL,
|
|
651
850
|
content TEXT,
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
851
|
+
spec JSONB NOT NULL,
|
|
852
|
+
category VARCHAR(256),
|
|
853
|
+
provider_configs JSONB DEFAULT '{}'::jsonb,
|
|
854
|
+
embedding_fields TEXT[] DEFAULT ARRAY[]::TEXT[],
|
|
656
855
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
657
856
|
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
658
857
|
deleted_at TIMESTAMP,
|
|
@@ -661,16 +860,16 @@ CREATE TABLE IF NOT EXISTS files (
|
|
|
661
860
|
tags TEXT[] DEFAULT ARRAY[]::TEXT[]
|
|
662
861
|
);
|
|
663
862
|
|
|
664
|
-
CREATE INDEX
|
|
665
|
-
CREATE INDEX
|
|
666
|
-
CREATE INDEX
|
|
667
|
-
CREATE INDEX
|
|
668
|
-
CREATE INDEX
|
|
863
|
+
CREATE INDEX idx_schemas_tenant ON schemas (tenant_id);
|
|
864
|
+
CREATE INDEX idx_schemas_user ON schemas (user_id);
|
|
865
|
+
CREATE INDEX idx_schemas_graph_edges ON schemas USING GIN (graph_edges);
|
|
866
|
+
CREATE INDEX idx_schemas_metadata ON schemas USING GIN (metadata);
|
|
867
|
+
CREATE INDEX idx_schemas_tags ON schemas USING GIN (tags);
|
|
669
868
|
|
|
670
|
-
-- Embeddings for
|
|
671
|
-
CREATE TABLE IF NOT EXISTS
|
|
869
|
+
-- Embeddings for schemas
|
|
870
|
+
CREATE TABLE IF NOT EXISTS embeddings_schemas (
|
|
672
871
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
673
|
-
entity_id UUID NOT NULL REFERENCES
|
|
872
|
+
entity_id UUID NOT NULL REFERENCES schemas(id) ON DELETE CASCADE,
|
|
674
873
|
field_name VARCHAR(100) NOT NULL,
|
|
675
874
|
provider VARCHAR(50) NOT NULL DEFAULT 'openai',
|
|
676
875
|
model VARCHAR(100) NOT NULL DEFAULT 'text-embedding-3-small',
|
|
@@ -683,19 +882,19 @@ CREATE TABLE IF NOT EXISTS embeddings_files (
|
|
|
683
882
|
);
|
|
684
883
|
|
|
685
884
|
-- Index for entity lookup (get all embeddings for entity)
|
|
686
|
-
CREATE INDEX
|
|
885
|
+
CREATE INDEX idx_embeddings_schemas_entity ON embeddings_schemas (entity_id);
|
|
687
886
|
|
|
688
887
|
-- Index for field + provider lookup
|
|
689
|
-
CREATE INDEX
|
|
888
|
+
CREATE INDEX idx_embeddings_schemas_field_provider ON embeddings_schemas (field_name, provider);
|
|
690
889
|
|
|
691
890
|
-- HNSW index for vector similarity search (created in background)
|
|
692
891
|
-- Note: This will be created by background thread after data load
|
|
693
|
-
-- CREATE INDEX
|
|
892
|
+
-- CREATE INDEX idx_embeddings_schemas_vector_hnsw ON embeddings_schemas
|
|
694
893
|
-- USING hnsw (embedding vector_cosine_ops);
|
|
695
894
|
|
|
696
|
-
-- KV_STORE trigger for
|
|
697
|
-
-- Trigger function to maintain KV_STORE for
|
|
698
|
-
CREATE OR REPLACE FUNCTION
|
|
895
|
+
-- KV_STORE trigger for schemas
|
|
896
|
+
-- Trigger function to maintain KV_STORE for schemas
|
|
897
|
+
CREATE OR REPLACE FUNCTION fn_schemas_kv_store_upsert()
|
|
699
898
|
RETURNS TRIGGER AS $$
|
|
700
899
|
BEGIN
|
|
701
900
|
IF (TG_OP = 'DELETE') THEN
|
|
@@ -715,8 +914,8 @@ BEGIN
|
|
|
715
914
|
graph_edges,
|
|
716
915
|
updated_at
|
|
717
916
|
) VALUES (
|
|
718
|
-
NEW.
|
|
719
|
-
'
|
|
917
|
+
NEW.id::VARCHAR,
|
|
918
|
+
'schemas',
|
|
720
919
|
NEW.id,
|
|
721
920
|
NEW.tenant_id,
|
|
722
921
|
NEW.user_id,
|
|
@@ -738,28 +937,28 @@ END;
|
|
|
738
937
|
$$ LANGUAGE plpgsql;
|
|
739
938
|
|
|
740
939
|
-- Create trigger
|
|
741
|
-
DROP TRIGGER IF EXISTS
|
|
742
|
-
CREATE TRIGGER
|
|
743
|
-
AFTER INSERT OR UPDATE OR DELETE ON
|
|
744
|
-
FOR EACH ROW EXECUTE FUNCTION
|
|
940
|
+
DROP TRIGGER IF EXISTS trg_schemas_kv_store ON schemas;
|
|
941
|
+
CREATE TRIGGER trg_schemas_kv_store
|
|
942
|
+
AFTER INSERT OR UPDATE OR DELETE ON schemas
|
|
943
|
+
FOR EACH ROW EXECUTE FUNCTION fn_schemas_kv_store_upsert();
|
|
745
944
|
|
|
746
945
|
-- ======================================================================
|
|
747
|
-
--
|
|
946
|
+
-- SESSIONS (Model: Session)
|
|
748
947
|
-- ======================================================================
|
|
749
948
|
|
|
750
|
-
CREATE TABLE IF NOT EXISTS
|
|
949
|
+
CREATE TABLE IF NOT EXISTS sessions (
|
|
751
950
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
752
951
|
tenant_id VARCHAR(100) NOT NULL,
|
|
753
952
|
user_id VARCHAR(256),
|
|
754
953
|
name VARCHAR(256) NOT NULL,
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
954
|
+
mode TEXT,
|
|
955
|
+
description TEXT,
|
|
956
|
+
original_trace_id VARCHAR(256),
|
|
957
|
+
settings_overrides JSONB,
|
|
958
|
+
prompt TEXT,
|
|
959
|
+
agent_schema_uri VARCHAR(256),
|
|
960
|
+
message_count INTEGER,
|
|
961
|
+
total_tokens INTEGER,
|
|
763
962
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
764
963
|
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
765
964
|
deleted_at TIMESTAMP,
|
|
@@ -768,15 +967,41 @@ CREATE TABLE IF NOT EXISTS ontologies (
|
|
|
768
967
|
tags TEXT[] DEFAULT ARRAY[]::TEXT[]
|
|
769
968
|
);
|
|
770
969
|
|
|
771
|
-
CREATE INDEX
|
|
772
|
-
CREATE INDEX
|
|
773
|
-
CREATE INDEX
|
|
774
|
-
CREATE INDEX
|
|
775
|
-
CREATE INDEX
|
|
970
|
+
CREATE INDEX idx_sessions_tenant ON sessions (tenant_id);
|
|
971
|
+
CREATE INDEX idx_sessions_user ON sessions (user_id);
|
|
972
|
+
CREATE INDEX idx_sessions_graph_edges ON sessions USING GIN (graph_edges);
|
|
973
|
+
CREATE INDEX idx_sessions_metadata ON sessions USING GIN (metadata);
|
|
974
|
+
CREATE INDEX idx_sessions_tags ON sessions USING GIN (tags);
|
|
776
975
|
|
|
777
|
-
--
|
|
778
|
-
|
|
779
|
-
|
|
976
|
+
-- Embeddings for sessions
|
|
977
|
+
CREATE TABLE IF NOT EXISTS embeddings_sessions (
|
|
978
|
+
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
979
|
+
entity_id UUID NOT NULL REFERENCES sessions(id) ON DELETE CASCADE,
|
|
980
|
+
field_name VARCHAR(100) NOT NULL,
|
|
981
|
+
provider VARCHAR(50) NOT NULL DEFAULT 'openai',
|
|
982
|
+
model VARCHAR(100) NOT NULL DEFAULT 'text-embedding-3-small',
|
|
983
|
+
embedding vector(1536) NOT NULL,
|
|
984
|
+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
985
|
+
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
986
|
+
|
|
987
|
+
-- Unique: one embedding per entity per field per provider
|
|
988
|
+
UNIQUE (entity_id, field_name, provider)
|
|
989
|
+
);
|
|
990
|
+
|
|
991
|
+
-- Index for entity lookup (get all embeddings for entity)
|
|
992
|
+
CREATE INDEX idx_embeddings_sessions_entity ON embeddings_sessions (entity_id);
|
|
993
|
+
|
|
994
|
+
-- Index for field + provider lookup
|
|
995
|
+
CREATE INDEX idx_embeddings_sessions_field_provider ON embeddings_sessions (field_name, provider);
|
|
996
|
+
|
|
997
|
+
-- HNSW index for vector similarity search (created in background)
|
|
998
|
+
-- Note: This will be created by background thread after data load
|
|
999
|
+
-- CREATE INDEX idx_embeddings_sessions_vector_hnsw ON embeddings_sessions
|
|
1000
|
+
-- USING hnsw (embedding vector_cosine_ops);
|
|
1001
|
+
|
|
1002
|
+
-- KV_STORE trigger for sessions
|
|
1003
|
+
-- Trigger function to maintain KV_STORE for sessions
|
|
1004
|
+
CREATE OR REPLACE FUNCTION fn_sessions_kv_store_upsert()
|
|
780
1005
|
RETURNS TRIGGER AS $$
|
|
781
1006
|
BEGIN
|
|
782
1007
|
IF (TG_OP = 'DELETE') THEN
|
|
@@ -796,8 +1021,8 @@ BEGIN
|
|
|
796
1021
|
graph_edges,
|
|
797
1022
|
updated_at
|
|
798
1023
|
) VALUES (
|
|
799
|
-
NEW.name,
|
|
800
|
-
'
|
|
1024
|
+
NEW.name::VARCHAR,
|
|
1025
|
+
'sessions',
|
|
801
1026
|
NEW.id,
|
|
802
1027
|
NEW.tenant_id,
|
|
803
1028
|
NEW.user_id,
|
|
@@ -819,29 +1044,22 @@ END;
|
|
|
819
1044
|
$$ LANGUAGE plpgsql;
|
|
820
1045
|
|
|
821
1046
|
-- Create trigger
|
|
822
|
-
DROP TRIGGER IF EXISTS
|
|
823
|
-
CREATE TRIGGER
|
|
824
|
-
AFTER INSERT OR UPDATE OR DELETE ON
|
|
825
|
-
FOR EACH ROW EXECUTE FUNCTION
|
|
1047
|
+
DROP TRIGGER IF EXISTS trg_sessions_kv_store ON sessions;
|
|
1048
|
+
CREATE TRIGGER trg_sessions_kv_store
|
|
1049
|
+
AFTER INSERT OR UPDATE OR DELETE ON sessions
|
|
1050
|
+
FOR EACH ROW EXECUTE FUNCTION fn_sessions_kv_store_upsert();
|
|
826
1051
|
|
|
827
1052
|
-- ======================================================================
|
|
828
|
-
--
|
|
1053
|
+
-- SHARED_SESSIONS (Model: SharedSession)
|
|
829
1054
|
-- ======================================================================
|
|
830
1055
|
|
|
831
|
-
CREATE TABLE IF NOT EXISTS
|
|
1056
|
+
CREATE TABLE IF NOT EXISTS shared_sessions (
|
|
832
1057
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
833
1058
|
tenant_id VARCHAR(100) NOT NULL,
|
|
834
1059
|
user_id VARCHAR(256),
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
mime_type_pattern VARCHAR(256),
|
|
839
|
-
uri_pattern VARCHAR(256),
|
|
840
|
-
tag_filter TEXT[],
|
|
841
|
-
priority INTEGER,
|
|
842
|
-
enabled BOOLEAN,
|
|
843
|
-
provider_name VARCHAR(256),
|
|
844
|
-
model_name VARCHAR(256),
|
|
1060
|
+
session_id VARCHAR(256) NOT NULL,
|
|
1061
|
+
owner_user_id VARCHAR(256) NOT NULL,
|
|
1062
|
+
shared_with_user_id VARCHAR(256) NOT NULL,
|
|
845
1063
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
846
1064
|
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
847
1065
|
deleted_at TIMESTAMP,
|
|
@@ -850,41 +1068,15 @@ CREATE TABLE IF NOT EXISTS ontology_configs (
|
|
|
850
1068
|
tags TEXT[] DEFAULT ARRAY[]::TEXT[]
|
|
851
1069
|
);
|
|
852
1070
|
|
|
853
|
-
CREATE INDEX
|
|
854
|
-
CREATE INDEX
|
|
855
|
-
CREATE INDEX
|
|
856
|
-
CREATE INDEX
|
|
857
|
-
CREATE INDEX
|
|
858
|
-
|
|
859
|
-
-- Embeddings for ontology_configs
|
|
860
|
-
CREATE TABLE IF NOT EXISTS embeddings_ontology_configs (
|
|
861
|
-
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
862
|
-
entity_id UUID NOT NULL REFERENCES ontology_configs(id) ON DELETE CASCADE,
|
|
863
|
-
field_name VARCHAR(100) NOT NULL,
|
|
864
|
-
provider VARCHAR(50) NOT NULL DEFAULT 'openai',
|
|
865
|
-
model VARCHAR(100) NOT NULL DEFAULT 'text-embedding-3-small',
|
|
866
|
-
embedding vector(1536) NOT NULL,
|
|
867
|
-
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
868
|
-
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
869
|
-
|
|
870
|
-
-- Unique: one embedding per entity per field per provider
|
|
871
|
-
UNIQUE (entity_id, field_name, provider)
|
|
872
|
-
);
|
|
873
|
-
|
|
874
|
-
-- Index for entity lookup (get all embeddings for entity)
|
|
875
|
-
CREATE INDEX IF NOT EXISTS idx_embeddings_ontology_configs_entity ON embeddings_ontology_configs (entity_id);
|
|
876
|
-
|
|
877
|
-
-- Index for field + provider lookup
|
|
878
|
-
CREATE INDEX IF NOT EXISTS idx_embeddings_ontology_configs_field_provider ON embeddings_ontology_configs (field_name, provider);
|
|
1071
|
+
CREATE INDEX idx_shared_sessions_tenant ON shared_sessions (tenant_id);
|
|
1072
|
+
CREATE INDEX idx_shared_sessions_user ON shared_sessions (user_id);
|
|
1073
|
+
CREATE INDEX idx_shared_sessions_graph_edges ON shared_sessions USING GIN (graph_edges);
|
|
1074
|
+
CREATE INDEX idx_shared_sessions_metadata ON shared_sessions USING GIN (metadata);
|
|
1075
|
+
CREATE INDEX idx_shared_sessions_tags ON shared_sessions USING GIN (tags);
|
|
879
1076
|
|
|
880
|
-
--
|
|
881
|
-
--
|
|
882
|
-
|
|
883
|
-
-- USING hnsw (embedding vector_cosine_ops);
|
|
884
|
-
|
|
885
|
-
-- KV_STORE trigger for ontology_configs
|
|
886
|
-
-- Trigger function to maintain KV_STORE for ontology_configs
|
|
887
|
-
CREATE OR REPLACE FUNCTION fn_ontology_configs_kv_store_upsert()
|
|
1077
|
+
-- KV_STORE trigger for shared_sessions
|
|
1078
|
+
-- Trigger function to maintain KV_STORE for shared_sessions
|
|
1079
|
+
CREATE OR REPLACE FUNCTION fn_shared_sessions_kv_store_upsert()
|
|
888
1080
|
RETURNS TRIGGER AS $$
|
|
889
1081
|
BEGIN
|
|
890
1082
|
IF (TG_OP = 'DELETE') THEN
|
|
@@ -904,8 +1096,8 @@ BEGIN
|
|
|
904
1096
|
graph_edges,
|
|
905
1097
|
updated_at
|
|
906
1098
|
) VALUES (
|
|
907
|
-
NEW.
|
|
908
|
-
'
|
|
1099
|
+
NEW.id::VARCHAR,
|
|
1100
|
+
'shared_sessions',
|
|
909
1101
|
NEW.id,
|
|
910
1102
|
NEW.tenant_id,
|
|
911
1103
|
NEW.user_id,
|
|
@@ -927,25 +1119,30 @@ END;
|
|
|
927
1119
|
$$ LANGUAGE plpgsql;
|
|
928
1120
|
|
|
929
1121
|
-- Create trigger
|
|
930
|
-
DROP TRIGGER IF EXISTS
|
|
931
|
-
CREATE TRIGGER
|
|
932
|
-
AFTER INSERT OR UPDATE OR DELETE ON
|
|
933
|
-
FOR EACH ROW EXECUTE FUNCTION
|
|
1122
|
+
DROP TRIGGER IF EXISTS trg_shared_sessions_kv_store ON shared_sessions;
|
|
1123
|
+
CREATE TRIGGER trg_shared_sessions_kv_store
|
|
1124
|
+
AFTER INSERT OR UPDATE OR DELETE ON shared_sessions
|
|
1125
|
+
FOR EACH ROW EXECUTE FUNCTION fn_shared_sessions_kv_store_upsert();
|
|
934
1126
|
|
|
935
1127
|
-- ======================================================================
|
|
936
|
-
--
|
|
1128
|
+
-- USERS (Model: User)
|
|
937
1129
|
-- ======================================================================
|
|
938
1130
|
|
|
939
|
-
CREATE TABLE IF NOT EXISTS
|
|
1131
|
+
CREATE TABLE IF NOT EXISTS users (
|
|
940
1132
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
941
1133
|
tenant_id VARCHAR(100) NOT NULL,
|
|
942
1134
|
user_id VARCHAR(256),
|
|
943
1135
|
name VARCHAR(256) NOT NULL,
|
|
944
|
-
|
|
945
|
-
|
|
946
|
-
|
|
947
|
-
|
|
948
|
-
|
|
1136
|
+
email VARCHAR(256),
|
|
1137
|
+
role VARCHAR(256),
|
|
1138
|
+
tier TEXT,
|
|
1139
|
+
anonymous_ids TEXT[] DEFAULT ARRAY[]::TEXT[],
|
|
1140
|
+
sec_policy JSONB DEFAULT '{}'::jsonb,
|
|
1141
|
+
summary TEXT,
|
|
1142
|
+
interests TEXT[] DEFAULT ARRAY[]::TEXT[],
|
|
1143
|
+
preferred_topics TEXT[] DEFAULT ARRAY[]::TEXT[],
|
|
1144
|
+
activity_level VARCHAR(256),
|
|
1145
|
+
last_active_at TIMESTAMP,
|
|
949
1146
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
950
1147
|
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
951
1148
|
deleted_at TIMESTAMP,
|
|
@@ -954,16 +1151,16 @@ CREATE TABLE IF NOT EXISTS schemas (
|
|
|
954
1151
|
tags TEXT[] DEFAULT ARRAY[]::TEXT[]
|
|
955
1152
|
);
|
|
956
1153
|
|
|
957
|
-
CREATE INDEX
|
|
958
|
-
CREATE INDEX
|
|
959
|
-
CREATE INDEX
|
|
960
|
-
CREATE INDEX
|
|
961
|
-
CREATE INDEX
|
|
1154
|
+
CREATE INDEX idx_users_tenant ON users (tenant_id);
|
|
1155
|
+
CREATE INDEX idx_users_user ON users (user_id);
|
|
1156
|
+
CREATE INDEX idx_users_graph_edges ON users USING GIN (graph_edges);
|
|
1157
|
+
CREATE INDEX idx_users_metadata ON users USING GIN (metadata);
|
|
1158
|
+
CREATE INDEX idx_users_tags ON users USING GIN (tags);
|
|
962
1159
|
|
|
963
|
-
-- Embeddings for
|
|
964
|
-
CREATE TABLE IF NOT EXISTS
|
|
1160
|
+
-- Embeddings for users
|
|
1161
|
+
CREATE TABLE IF NOT EXISTS embeddings_users (
|
|
965
1162
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
966
|
-
entity_id UUID NOT NULL REFERENCES
|
|
1163
|
+
entity_id UUID NOT NULL REFERENCES users(id) ON DELETE CASCADE,
|
|
967
1164
|
field_name VARCHAR(100) NOT NULL,
|
|
968
1165
|
provider VARCHAR(50) NOT NULL DEFAULT 'openai',
|
|
969
1166
|
model VARCHAR(100) NOT NULL DEFAULT 'text-embedding-3-small',
|
|
@@ -976,19 +1173,19 @@ CREATE TABLE IF NOT EXISTS embeddings_schemas (
|
|
|
976
1173
|
);
|
|
977
1174
|
|
|
978
1175
|
-- Index for entity lookup (get all embeddings for entity)
|
|
979
|
-
CREATE INDEX
|
|
1176
|
+
CREATE INDEX idx_embeddings_users_entity ON embeddings_users (entity_id);
|
|
980
1177
|
|
|
981
1178
|
-- Index for field + provider lookup
|
|
982
|
-
CREATE INDEX
|
|
1179
|
+
CREATE INDEX idx_embeddings_users_field_provider ON embeddings_users (field_name, provider);
|
|
983
1180
|
|
|
984
1181
|
-- HNSW index for vector similarity search (created in background)
|
|
985
1182
|
-- Note: This will be created by background thread after data load
|
|
986
|
-
-- CREATE INDEX
|
|
1183
|
+
-- CREATE INDEX idx_embeddings_users_vector_hnsw ON embeddings_users
|
|
987
1184
|
-- USING hnsw (embedding vector_cosine_ops);
|
|
988
1185
|
|
|
989
|
-
-- KV_STORE trigger for
|
|
990
|
-
-- Trigger function to maintain KV_STORE for
|
|
991
|
-
CREATE OR REPLACE FUNCTION
|
|
1186
|
+
-- KV_STORE trigger for users
|
|
1187
|
+
-- Trigger function to maintain KV_STORE for users
|
|
1188
|
+
CREATE OR REPLACE FUNCTION fn_users_kv_store_upsert()
|
|
992
1189
|
RETURNS TRIGGER AS $$
|
|
993
1190
|
BEGIN
|
|
994
1191
|
IF (TG_OP = 'DELETE') THEN
|
|
@@ -1008,8 +1205,8 @@ BEGIN
|
|
|
1008
1205
|
graph_edges,
|
|
1009
1206
|
updated_at
|
|
1010
1207
|
) VALUES (
|
|
1011
|
-
NEW.name,
|
|
1012
|
-
'
|
|
1208
|
+
NEW.name::VARCHAR,
|
|
1209
|
+
'users',
|
|
1013
1210
|
NEW.id,
|
|
1014
1211
|
NEW.tenant_id,
|
|
1015
1212
|
NEW.user_id,
|
|
@@ -1031,10 +1228,1872 @@ END;
|
|
|
1031
1228
|
$$ LANGUAGE plpgsql;
|
|
1032
1229
|
|
|
1033
1230
|
-- Create trigger
|
|
1034
|
-
DROP TRIGGER IF EXISTS
|
|
1035
|
-
CREATE TRIGGER
|
|
1036
|
-
AFTER INSERT OR UPDATE OR DELETE ON
|
|
1037
|
-
FOR EACH ROW EXECUTE FUNCTION
|
|
1231
|
+
DROP TRIGGER IF EXISTS trg_users_kv_store ON users;
|
|
1232
|
+
CREATE TRIGGER trg_users_kv_store
|
|
1233
|
+
AFTER INSERT OR UPDATE OR DELETE ON users
|
|
1234
|
+
FOR EACH ROW EXECUTE FUNCTION fn_users_kv_store_upsert();
|
|
1235
|
+
|
|
1236
|
+
-- ============================================================================
|
|
1237
|
+
-- SCHEMA TABLE ENTRIES
|
|
1238
|
+
-- Every entity table gets a schemas entry for agent-like access
|
|
1239
|
+
-- ============================================================================
|
|
1240
|
+
|
|
1241
|
+
-- Schema entry for Feedback (feedbacks)
|
|
1242
|
+
INSERT INTO schemas (id, tenant_id, name, content, spec, category, metadata)
|
|
1243
|
+
VALUES (
|
|
1244
|
+
'ae554853-e743-5d73-a2db-1ce20e7089fe'::uuid,
|
|
1245
|
+
'system',
|
|
1246
|
+
'Feedback',
|
|
1247
|
+
'# Feedback
|
|
1248
|
+
|
|
1249
|
+
|
|
1250
|
+
User feedback on a message or session.
|
|
1251
|
+
|
|
1252
|
+
Captures structured feedback including:
|
|
1253
|
+
- Rating (1-5 scale or thumbs up/down)
|
|
1254
|
+
- Categories (predefined or custom)
|
|
1255
|
+
- Free-text comment
|
|
1256
|
+
- Trace reference for OTEL/Phoenix integration
|
|
1257
|
+
|
|
1258
|
+
The feedback can be attached to:
|
|
1259
|
+
- A specific message (message_id set)
|
|
1260
|
+
- An entire session (session_id set, message_id null)
|
|
1261
|
+
|
|
1262
|
+
|
|
1263
|
+
## Overview
|
|
1264
|
+
|
|
1265
|
+
The `Feedback` entity is stored in the `feedbacks` table. Each record is uniquely
|
|
1266
|
+
identified by its `id` field for lookups and graph traversal.
|
|
1267
|
+
|
|
1268
|
+
## Search Capabilities
|
|
1269
|
+
|
|
1270
|
+
This schema includes the `search_rem` tool which supports:
|
|
1271
|
+
- **LOOKUP**: O(1) exact match by id (e.g., `LOOKUP "entity-name"`)
|
|
1272
|
+
- **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
|
|
1273
|
+
- **SEARCH**: Semantic vector search on content (e.g., `SEARCH "concept" FROM feedbacks LIMIT 10`)
|
|
1274
|
+
- **SQL**: Complex queries (e.g., `SELECT * FROM feedbacks WHERE ...`)
|
|
1275
|
+
|
|
1276
|
+
## Table Info
|
|
1277
|
+
|
|
1278
|
+
| Property | Value |
|
|
1279
|
+
|----------|-------|
|
|
1280
|
+
| Table | `feedbacks` |
|
|
1281
|
+
| Entity Key | `id` |
|
|
1282
|
+
| Embedding Fields | None |
|
|
1283
|
+
| Tools | `search_rem` |
|
|
1284
|
+
|
|
1285
|
+
## Fields
|
|
1286
|
+
|
|
1287
|
+
### `id`
|
|
1288
|
+
- **Type**: `typing.Union[uuid.UUID, str, NoneType]`
|
|
1289
|
+
- **Optional**
|
|
1290
|
+
- Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.
|
|
1291
|
+
|
|
1292
|
+
### `created_at`
|
|
1293
|
+
- **Type**: `<class ''datetime.datetime''>`
|
|
1294
|
+
- **Optional**
|
|
1295
|
+
- Entity creation timestamp
|
|
1296
|
+
|
|
1297
|
+
### `updated_at`
|
|
1298
|
+
- **Type**: `<class ''datetime.datetime''>`
|
|
1299
|
+
- **Optional**
|
|
1300
|
+
- Last update timestamp
|
|
1301
|
+
|
|
1302
|
+
### `deleted_at`
|
|
1303
|
+
- **Type**: `typing.Optional[datetime.datetime]`
|
|
1304
|
+
- **Optional**
|
|
1305
|
+
- Soft deletion timestamp
|
|
1306
|
+
|
|
1307
|
+
### `tenant_id`
|
|
1308
|
+
- **Type**: `typing.Optional[str]`
|
|
1309
|
+
- **Optional**
|
|
1310
|
+
- Tenant identifier for multi-tenancy isolation
|
|
1311
|
+
|
|
1312
|
+
### `user_id`
|
|
1313
|
+
- **Type**: `typing.Optional[str]`
|
|
1314
|
+
- **Optional**
|
|
1315
|
+
- Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.
|
|
1316
|
+
|
|
1317
|
+
### `graph_edges`
|
|
1318
|
+
- **Type**: `list[dict]`
|
|
1319
|
+
- **Optional**
|
|
1320
|
+
- Knowledge graph edges stored as InlineEdge dicts
|
|
1321
|
+
|
|
1322
|
+
### `metadata`
|
|
1323
|
+
- **Type**: `<class ''dict''>`
|
|
1324
|
+
- **Optional**
|
|
1325
|
+
- Flexible metadata storage
|
|
1326
|
+
|
|
1327
|
+
### `tags`
|
|
1328
|
+
- **Type**: `list[str]`
|
|
1329
|
+
- **Optional**
|
|
1330
|
+
- Entity tags
|
|
1331
|
+
|
|
1332
|
+
### `session_id`
|
|
1333
|
+
- **Type**: `<class ''str''>`
|
|
1334
|
+
- **Required**
|
|
1335
|
+
- Session ID this feedback relates to
|
|
1336
|
+
|
|
1337
|
+
### `message_id`
|
|
1338
|
+
- **Type**: `str | None`
|
|
1339
|
+
- **Optional**
|
|
1340
|
+
- Specific message ID (null for session-level feedback)
|
|
1341
|
+
|
|
1342
|
+
### `rating`
|
|
1343
|
+
- **Type**: `int | None`
|
|
1344
|
+
- **Optional**
|
|
1345
|
+
- Rating: -1 (thumbs down), 1 (thumbs up), or 1-5 scale
|
|
1346
|
+
|
|
1347
|
+
### `categories`
|
|
1348
|
+
- **Type**: `list[str]`
|
|
1349
|
+
- **Optional**
|
|
1350
|
+
- Selected feedback categories (from FeedbackCategory or custom)
|
|
1351
|
+
|
|
1352
|
+
### `comment`
|
|
1353
|
+
- **Type**: `str | None`
|
|
1354
|
+
- **Optional**
|
|
1355
|
+
- Optional free-text feedback comment
|
|
1356
|
+
|
|
1357
|
+
### `trace_id`
|
|
1358
|
+
- **Type**: `str | None`
|
|
1359
|
+
- **Optional**
|
|
1360
|
+
- OTEL trace ID for linking to observability
|
|
1361
|
+
|
|
1362
|
+
### `span_id`
|
|
1363
|
+
- **Type**: `str | None`
|
|
1364
|
+
- **Optional**
|
|
1365
|
+
- OTEL span ID for specific span feedback
|
|
1366
|
+
|
|
1367
|
+
### `phoenix_synced`
|
|
1368
|
+
- **Type**: `<class ''bool''>`
|
|
1369
|
+
- **Optional**
|
|
1370
|
+
- Whether feedback has been synced to Phoenix as annotation
|
|
1371
|
+
|
|
1372
|
+
### `phoenix_annotation_id`
|
|
1373
|
+
- **Type**: `str | None`
|
|
1374
|
+
- **Optional**
|
|
1375
|
+
- Phoenix annotation ID after sync
|
|
1376
|
+
|
|
1377
|
+
### `annotator_kind`
|
|
1378
|
+
- **Type**: `<class ''str''>`
|
|
1379
|
+
- **Optional**
|
|
1380
|
+
- Annotator type: HUMAN, LLM, CODE
|
|
1381
|
+
|
|
1382
|
+
',
|
|
1383
|
+
'{"type": "object", "description": "\n User feedback on a message or session.\n\n Captures structured feedback including:\n - Rating (1-5 scale or thumbs up/down)\n - Categories (predefined or custom)\n - Free-text comment\n - Trace reference for OTEL/Phoenix integration\n\n The feedback can be attached to:\n - A specific message (message_id set)\n - An entire session (session_id set, message_id null)\n \n\nThis agent can search the `feedbacks` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "session_id": {"description": "Session ID this feedback relates to", "title": "Session Id", "type": "string"}, "message_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Specific message ID (null for session-level feedback)", "title": "Message Id"}, "rating": {"anyOf": [{"maximum": 5, "minimum": -1, "type": "integer"}, {"type": "null"}], "default": null, "description": "Rating: -1 (thumbs down), 1 (thumbs up), or 1-5 scale", "title": "Rating"}, "categories": {"description": "Selected feedback categories (from FeedbackCategory or custom)", "items": {"type": "string"}, "title": "Categories", "type": "array"}, "comment": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Optional free-text feedback comment", "title": "Comment"}, "trace_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "OTEL trace ID for linking to observability", "title": "Trace Id"}, "span_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "OTEL span ID for specific span feedback", "title": "Span Id"}, "phoenix_synced": {"default": false, "description": "Whether feedback has been synced to Phoenix as annotation", "title": "Phoenix Synced", "type": "boolean"}, "phoenix_annotation_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Phoenix annotation ID after sync", "title": "Phoenix Annotation Id"}, "annotator_kind": {"default": "HUMAN", "description": "Annotator type: HUMAN, LLM, CODE", "title": "Annotator Kind", "type": "string"}}, "required": ["session_id"], "json_schema_extra": {"table_name": "feedbacks", "entity_key_field": "id", "embedding_fields": [], "fully_qualified_name": "rem.models.entities.feedback.Feedback", "tools": ["search_rem"], "default_search_table": "feedbacks", "has_embeddings": false}}'::jsonb,
|
|
1384
|
+
'entity',
|
|
1385
|
+
'{"table_name": "feedbacks", "entity_key_field": "id", "embedding_fields": [], "fqn": "rem.models.entities.feedback.Feedback"}'::jsonb
|
|
1386
|
+
)
|
|
1387
|
+
ON CONFLICT (id) DO UPDATE SET
|
|
1388
|
+
name = EXCLUDED.name,
|
|
1389
|
+
content = EXCLUDED.content,
|
|
1390
|
+
spec = EXCLUDED.spec,
|
|
1391
|
+
category = EXCLUDED.category,
|
|
1392
|
+
metadata = EXCLUDED.metadata,
|
|
1393
|
+
updated_at = CURRENT_TIMESTAMP;
|
|
1394
|
+
|
|
1395
|
+
-- Schema entry for File (files)
|
|
1396
|
+
INSERT INTO schemas (id, tenant_id, name, content, spec, category, metadata)
|
|
1397
|
+
VALUES (
|
|
1398
|
+
'c3b3ef33-59d4-57a1-81a3-cc6adc45b194'::uuid,
|
|
1399
|
+
'system',
|
|
1400
|
+
'File',
|
|
1401
|
+
'# File
|
|
1402
|
+
|
|
1403
|
+
|
|
1404
|
+
File metadata and tracking.
|
|
1405
|
+
|
|
1406
|
+
Represents files uploaded to or referenced by the REM system,
|
|
1407
|
+
tracking their metadata and processing status. Tenant isolation
|
|
1408
|
+
is provided via CoreModel.tenant_id field.
|
|
1409
|
+
|
|
1410
|
+
|
|
1411
|
+
## Overview
|
|
1412
|
+
|
|
1413
|
+
The `File` entity is stored in the `files` table. Each record is uniquely
|
|
1414
|
+
identified by its `id` field for lookups and graph traversal.
|
|
1415
|
+
|
|
1416
|
+
## Search Capabilities
|
|
1417
|
+
|
|
1418
|
+
This schema includes the `search_rem` tool which supports:
|
|
1419
|
+
- **LOOKUP**: O(1) exact match by id (e.g., `LOOKUP "entity-name"`)
|
|
1420
|
+
- **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
|
|
1421
|
+
- **SEARCH**: Semantic vector search on content (e.g., `SEARCH "concept" FROM files LIMIT 10`)
|
|
1422
|
+
- **SQL**: Complex queries (e.g., `SELECT * FROM files WHERE ...`)
|
|
1423
|
+
|
|
1424
|
+
## Table Info
|
|
1425
|
+
|
|
1426
|
+
| Property | Value |
|
|
1427
|
+
|----------|-------|
|
|
1428
|
+
| Table | `files` |
|
|
1429
|
+
| Entity Key | `id` |
|
|
1430
|
+
| Embedding Fields | `content` |
|
|
1431
|
+
| Tools | `search_rem` |
|
|
1432
|
+
|
|
1433
|
+
## Fields
|
|
1434
|
+
|
|
1435
|
+
### `id`
|
|
1436
|
+
- **Type**: `typing.Union[uuid.UUID, str, NoneType]`
|
|
1437
|
+
- **Optional**
|
|
1438
|
+
- Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.
|
|
1439
|
+
|
|
1440
|
+
### `created_at`
|
|
1441
|
+
- **Type**: `<class ''datetime.datetime''>`
|
|
1442
|
+
- **Optional**
|
|
1443
|
+
- Entity creation timestamp
|
|
1444
|
+
|
|
1445
|
+
### `updated_at`
|
|
1446
|
+
- **Type**: `<class ''datetime.datetime''>`
|
|
1447
|
+
- **Optional**
|
|
1448
|
+
- Last update timestamp
|
|
1449
|
+
|
|
1450
|
+
### `deleted_at`
|
|
1451
|
+
- **Type**: `typing.Optional[datetime.datetime]`
|
|
1452
|
+
- **Optional**
|
|
1453
|
+
- Soft deletion timestamp
|
|
1454
|
+
|
|
1455
|
+
### `tenant_id`
|
|
1456
|
+
- **Type**: `typing.Optional[str]`
|
|
1457
|
+
- **Optional**
|
|
1458
|
+
- Tenant identifier for multi-tenancy isolation
|
|
1459
|
+
|
|
1460
|
+
### `user_id`
|
|
1461
|
+
- **Type**: `typing.Optional[str]`
|
|
1462
|
+
- **Optional**
|
|
1463
|
+
- Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.
|
|
1464
|
+
|
|
1465
|
+
### `graph_edges`
|
|
1466
|
+
- **Type**: `list[dict]`
|
|
1467
|
+
- **Optional**
|
|
1468
|
+
- Knowledge graph edges stored as InlineEdge dicts
|
|
1469
|
+
|
|
1470
|
+
### `metadata`
|
|
1471
|
+
- **Type**: `<class ''dict''>`
|
|
1472
|
+
- **Optional**
|
|
1473
|
+
- Flexible metadata storage
|
|
1474
|
+
|
|
1475
|
+
### `tags`
|
|
1476
|
+
- **Type**: `list[str]`
|
|
1477
|
+
- **Optional**
|
|
1478
|
+
- Entity tags
|
|
1479
|
+
|
|
1480
|
+
### `name`
|
|
1481
|
+
- **Type**: `<class ''str''>`
|
|
1482
|
+
- **Required**
|
|
1483
|
+
- File name
|
|
1484
|
+
|
|
1485
|
+
### `uri`
|
|
1486
|
+
- **Type**: `<class ''str''>`
|
|
1487
|
+
- **Required**
|
|
1488
|
+
- File storage URI (S3, local path, etc.)
|
|
1489
|
+
|
|
1490
|
+
### `content`
|
|
1491
|
+
- **Type**: `typing.Optional[str]`
|
|
1492
|
+
- **Optional**
|
|
1493
|
+
- Extracted text content (if applicable)
|
|
1494
|
+
|
|
1495
|
+
### `timestamp`
|
|
1496
|
+
- **Type**: `typing.Optional[str]`
|
|
1497
|
+
- **Optional**
|
|
1498
|
+
- File creation/modification timestamp
|
|
1499
|
+
|
|
1500
|
+
### `size_bytes`
|
|
1501
|
+
- **Type**: `typing.Optional[int]`
|
|
1502
|
+
- **Optional**
|
|
1503
|
+
- File size in bytes
|
|
1504
|
+
|
|
1505
|
+
### `mime_type`
|
|
1506
|
+
- **Type**: `typing.Optional[str]`
|
|
1507
|
+
- **Optional**
|
|
1508
|
+
- File MIME type
|
|
1509
|
+
|
|
1510
|
+
### `processing_status`
|
|
1511
|
+
- **Type**: `typing.Optional[str]`
|
|
1512
|
+
- **Optional**
|
|
1513
|
+
- File processing status (pending, processing, completed, failed)
|
|
1514
|
+
|
|
1515
|
+
',
|
|
1516
|
+
'{"type": "object", "description": "\n File metadata and tracking.\n\n Represents files uploaded to or referenced by the REM system,\n tracking their metadata and processing status. Tenant isolation\n is provided via CoreModel.tenant_id field.\n \n\nThis agent can search the `files` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"description": "File name", "title": "Name", "type": "string"}, "uri": {"description": "File storage URI (S3, local path, etc.)", "title": "Uri", "type": "string"}, "content": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Extracted text content (if applicable)", "title": "Content"}, "timestamp": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "File creation/modification timestamp", "title": "Timestamp"}, "size_bytes": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "description": "File size in bytes", "title": "Size Bytes"}, "mime_type": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "File MIME type", "title": "Mime Type"}, "processing_status": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": "pending", "description": "File processing status (pending, processing, completed, failed)", "title": "Processing Status"}}, "required": ["name", "uri"], "json_schema_extra": {"table_name": "files", "entity_key_field": "id", "embedding_fields": ["content"], "fully_qualified_name": "rem.models.entities.file.File", "tools": ["search_rem"], "default_search_table": "files", "has_embeddings": true}}'::jsonb,
|
|
1517
|
+
'entity',
|
|
1518
|
+
'{"table_name": "files", "entity_key_field": "id", "embedding_fields": ["content"], "fqn": "rem.models.entities.file.File"}'::jsonb
|
|
1519
|
+
)
|
|
1520
|
+
ON CONFLICT (id) DO UPDATE SET
|
|
1521
|
+
name = EXCLUDED.name,
|
|
1522
|
+
content = EXCLUDED.content,
|
|
1523
|
+
spec = EXCLUDED.spec,
|
|
1524
|
+
category = EXCLUDED.category,
|
|
1525
|
+
metadata = EXCLUDED.metadata,
|
|
1526
|
+
updated_at = CURRENT_TIMESTAMP;
|
|
1527
|
+
|
|
1528
|
+
-- Schema entry for ImageResource (image_resources)
|
|
1529
|
+
INSERT INTO schemas (id, tenant_id, name, content, spec, category, metadata)
|
|
1530
|
+
VALUES (
|
|
1531
|
+
'ab4bc90c-2cda-55b2-bd4b-e78e19f7d4a7'::uuid,
|
|
1532
|
+
'system',
|
|
1533
|
+
'ImageResource',
|
|
1534
|
+
'# ImageResource
|
|
1535
|
+
|
|
1536
|
+
|
|
1537
|
+
Image-specific resource with CLIP embeddings.
|
|
1538
|
+
|
|
1539
|
+
Stored in separate `image_resources` table with CLIP embeddings
|
|
1540
|
+
instead of text embeddings. This enables:
|
|
1541
|
+
- Multimodal search (text-to-image, image-to-image)
|
|
1542
|
+
- Proper dimensionality (512/768 for CLIP vs 1536 for text)
|
|
1543
|
+
- Cost tracking (CLIP tokens separate from text tokens)
|
|
1544
|
+
|
|
1545
|
+
Embedding Strategy:
|
|
1546
|
+
- Default (when JINA_API_KEY set): Jina CLIP API (jina-clip-v2)
|
|
1547
|
+
- Future: Self-hosted OpenCLIP models via KEDA-scaled pods
|
|
1548
|
+
- Fallback: No embeddings (images searchable by metadata only)
|
|
1549
|
+
|
|
1550
|
+
Vision LLM Strategy (tier/sampling gated):
|
|
1551
|
+
- Gold tier: Always get vision descriptions
|
|
1552
|
+
- Silver/Free: Probabilistic sampling (IMAGE_VLLM_SAMPLE_RATE)
|
|
1553
|
+
- Fallback: Basic metadata only
|
|
1554
|
+
|
|
1555
|
+
Tenant isolation provided via CoreModel.tenant_id field.
|
|
1556
|
+
|
|
1557
|
+
|
|
1558
|
+
## Overview
|
|
1559
|
+
|
|
1560
|
+
The `ImageResource` entity is stored in the `image_resources` table. Each record is uniquely
|
|
1561
|
+
identified by its `name` field for lookups and graph traversal.
|
|
1562
|
+
|
|
1563
|
+
## Search Capabilities
|
|
1564
|
+
|
|
1565
|
+
This schema includes the `search_rem` tool which supports:
|
|
1566
|
+
- **LOOKUP**: O(1) exact match by name (e.g., `LOOKUP "entity-name"`)
|
|
1567
|
+
- **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
|
|
1568
|
+
- **SEARCH**: Semantic vector search on content (e.g., `SEARCH "concept" FROM image_resources LIMIT 10`)
|
|
1569
|
+
- **SQL**: Complex queries (e.g., `SELECT * FROM image_resources WHERE ...`)
|
|
1570
|
+
|
|
1571
|
+
## Table Info
|
|
1572
|
+
|
|
1573
|
+
| Property | Value |
|
|
1574
|
+
|----------|-------|
|
|
1575
|
+
| Table | `image_resources` |
|
|
1576
|
+
| Entity Key | `name` |
|
|
1577
|
+
| Embedding Fields | `content` |
|
|
1578
|
+
| Tools | `search_rem` |
|
|
1579
|
+
|
|
1580
|
+
## Fields
|
|
1581
|
+
|
|
1582
|
+
### `id`
|
|
1583
|
+
- **Type**: `typing.Union[uuid.UUID, str, NoneType]`
|
|
1584
|
+
- **Optional**
|
|
1585
|
+
- Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.
|
|
1586
|
+
|
|
1587
|
+
### `created_at`
|
|
1588
|
+
- **Type**: `<class ''datetime.datetime''>`
|
|
1589
|
+
- **Optional**
|
|
1590
|
+
- Entity creation timestamp
|
|
1591
|
+
|
|
1592
|
+
### `updated_at`
|
|
1593
|
+
- **Type**: `<class ''datetime.datetime''>`
|
|
1594
|
+
- **Optional**
|
|
1595
|
+
- Last update timestamp
|
|
1596
|
+
|
|
1597
|
+
### `deleted_at`
|
|
1598
|
+
- **Type**: `typing.Optional[datetime.datetime]`
|
|
1599
|
+
- **Optional**
|
|
1600
|
+
- Soft deletion timestamp
|
|
1601
|
+
|
|
1602
|
+
### `tenant_id`
|
|
1603
|
+
- **Type**: `typing.Optional[str]`
|
|
1604
|
+
- **Optional**
|
|
1605
|
+
- Tenant identifier for multi-tenancy isolation
|
|
1606
|
+
|
|
1607
|
+
### `user_id`
|
|
1608
|
+
- **Type**: `typing.Optional[str]`
|
|
1609
|
+
- **Optional**
|
|
1610
|
+
- Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.
|
|
1611
|
+
|
|
1612
|
+
### `graph_edges`
|
|
1613
|
+
- **Type**: `list[dict]`
|
|
1614
|
+
- **Optional**
|
|
1615
|
+
- Knowledge graph edges stored as InlineEdge dicts
|
|
1616
|
+
|
|
1617
|
+
### `metadata`
|
|
1618
|
+
- **Type**: `<class ''dict''>`
|
|
1619
|
+
- **Optional**
|
|
1620
|
+
- Flexible metadata storage
|
|
1621
|
+
|
|
1622
|
+
### `tags`
|
|
1623
|
+
- **Type**: `list[str]`
|
|
1624
|
+
- **Optional**
|
|
1625
|
+
- Entity tags
|
|
1626
|
+
|
|
1627
|
+
### `name`
|
|
1628
|
+
- **Type**: `typing.Optional[str]`
|
|
1629
|
+
- **Optional**
|
|
1630
|
+
- Human-readable resource name (used as graph label). Auto-generated from uri+ordinal if not provided.
|
|
1631
|
+
|
|
1632
|
+
### `uri`
|
|
1633
|
+
- **Type**: `typing.Optional[str]`
|
|
1634
|
+
- **Optional**
|
|
1635
|
+
- Content URI or identifier (file path, URL, etc.)
|
|
1636
|
+
|
|
1637
|
+
### `ordinal`
|
|
1638
|
+
- **Type**: `<class ''int''>`
|
|
1639
|
+
- **Optional**
|
|
1640
|
+
- Chunk ordinal for splitting large documents (0 for single-chunk resources)
|
|
1641
|
+
|
|
1642
|
+
### `content`
|
|
1643
|
+
- **Type**: `<class ''str''>`
|
|
1644
|
+
- **Optional**
|
|
1645
|
+
- Resource content text
|
|
1646
|
+
|
|
1647
|
+
### `timestamp`
|
|
1648
|
+
- **Type**: `<class ''datetime.datetime''>`
|
|
1649
|
+
- **Optional**
|
|
1650
|
+
- Resource timestamp (content creation/publication time)
|
|
1651
|
+
|
|
1652
|
+
### `category`
|
|
1653
|
+
- **Type**: `typing.Optional[str]`
|
|
1654
|
+
- **Optional**
|
|
1655
|
+
- Resource category (document, conversation, artifact, etc.)
|
|
1656
|
+
|
|
1657
|
+
### `related_entities`
|
|
1658
|
+
- **Type**: `list[dict]`
|
|
1659
|
+
- **Optional**
|
|
1660
|
+
- Extracted entities (people, projects, concepts) with metadata
|
|
1661
|
+
|
|
1662
|
+
### `image_width`
|
|
1663
|
+
- **Type**: `typing.Optional[int]`
|
|
1664
|
+
- **Optional**
|
|
1665
|
+
- Image width in pixels
|
|
1666
|
+
|
|
1667
|
+
### `image_height`
|
|
1668
|
+
- **Type**: `typing.Optional[int]`
|
|
1669
|
+
- **Optional**
|
|
1670
|
+
- Image height in pixels
|
|
1671
|
+
|
|
1672
|
+
### `image_format`
|
|
1673
|
+
- **Type**: `typing.Optional[str]`
|
|
1674
|
+
- **Optional**
|
|
1675
|
+
- Image format (PNG, JPEG, GIF, WebP)
|
|
1676
|
+
|
|
1677
|
+
### `vision_description`
|
|
1678
|
+
- **Type**: `typing.Optional[str]`
|
|
1679
|
+
- **Optional**
|
|
1680
|
+
- Vision LLM generated description (markdown, only for gold tier or sampled images)
|
|
1681
|
+
|
|
1682
|
+
### `vision_provider`
|
|
1683
|
+
- **Type**: `typing.Optional[str]`
|
|
1684
|
+
- **Optional**
|
|
1685
|
+
- Vision provider used (anthropic, gemini, openai)
|
|
1686
|
+
|
|
1687
|
+
### `vision_model`
|
|
1688
|
+
- **Type**: `typing.Optional[str]`
|
|
1689
|
+
- **Optional**
|
|
1690
|
+
- Vision model used for description
|
|
1691
|
+
|
|
1692
|
+
### `clip_embedding`
|
|
1693
|
+
- **Type**: `typing.Optional[list[float]]`
|
|
1694
|
+
- **Optional**
|
|
1695
|
+
- CLIP embedding vector (512 or 768 dimensions, from Jina AI or self-hosted)
|
|
1696
|
+
|
|
1697
|
+
### `clip_dimensions`
|
|
1698
|
+
- **Type**: `typing.Optional[int]`
|
|
1699
|
+
- **Optional**
|
|
1700
|
+
- CLIP embedding dimensionality (512 for jina-clip-v2, 768 for jina-clip-v1)
|
|
1701
|
+
|
|
1702
|
+
',
|
|
1703
|
+
'{"type": "object", "description": "\n Image-specific resource with CLIP embeddings.\n\n Stored in separate `image_resources` table with CLIP embeddings\n instead of text embeddings. This enables:\n - Multimodal search (text-to-image, image-to-image)\n - Proper dimensionality (512/768 for CLIP vs 1536 for text)\n - Cost tracking (CLIP tokens separate from text tokens)\n\n Embedding Strategy:\n - Default (when JINA_API_KEY set): Jina CLIP API (jina-clip-v2)\n - Future: Self-hosted OpenCLIP models via KEDA-scaled pods\n - Fallback: No embeddings (images searchable by metadata only)\n\n Vision LLM Strategy (tier/sampling gated):\n - Gold tier: Always get vision descriptions\n - Silver/Free: Probabilistic sampling (IMAGE_VLLM_SAMPLE_RATE)\n - Fallback: Basic metadata only\n\n Tenant isolation provided via CoreModel.tenant_id field.\n \n\nThis agent can search the `image_resources` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Human-readable resource name (used as graph label). Auto-generated from uri+ordinal if not provided.", "entity_key": true, "title": "Name"}, "uri": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Content URI or identifier (file path, URL, etc.)", "title": "Uri"}, "ordinal": {"composite_key": true, "default": 0, "description": "Chunk ordinal for splitting large documents (0 for single-chunk resources)", "title": "Ordinal", "type": "integer"}, "content": {"default": "", "description": "Resource content text", "title": "Content", "type": "string"}, "timestamp": {"description": "Resource timestamp (content creation/publication time)", "format": "date-time", "title": "Timestamp", "type": "string"}, "category": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Resource category (document, conversation, artifact, etc.)", "title": "Category"}, "related_entities": {"description": "Extracted entities (people, projects, concepts) with metadata", "items": {"additionalProperties": true, "type": "object"}, "title": "Related Entities", "type": "array"}, "image_width": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "description": "Image width in pixels", "title": "Image Width"}, "image_height": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "description": "Image height in pixels", "title": "Image Height"}, "image_format": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Image format (PNG, JPEG, GIF, WebP)", "title": "Image Format"}, "vision_description": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Vision LLM generated description (markdown, only for gold tier or sampled images)", "title": "Vision Description"}, "vision_provider": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Vision provider used (anthropic, gemini, openai)", "title": "Vision Provider"}, "vision_model": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Vision model used for description", "title": "Vision Model"}, "clip_embedding": {"anyOf": [{"items": {"type": "number"}, "type": "array"}, {"type": "null"}], "default": null, "description": "CLIP embedding vector (512 or 768 dimensions, from Jina AI or self-hosted)", "title": "Clip Embedding"}, "clip_dimensions": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "description": "CLIP embedding dimensionality (512 for jina-clip-v2, 768 for jina-clip-v1)", "title": "Clip Dimensions"}}, "required": [], "json_schema_extra": {"table_name": "image_resources", "entity_key_field": "name", "embedding_fields": ["content"], "fully_qualified_name": "rem.models.entities.image_resource.ImageResource", "tools": ["search_rem"], "default_search_table": "image_resources", "has_embeddings": true}}'::jsonb,
|
|
1704
|
+
'entity',
|
|
1705
|
+
'{"table_name": "image_resources", "entity_key_field": "name", "embedding_fields": ["content"], "fqn": "rem.models.entities.image_resource.ImageResource"}'::jsonb
|
|
1706
|
+
)
|
|
1707
|
+
ON CONFLICT (id) DO UPDATE SET
|
|
1708
|
+
name = EXCLUDED.name,
|
|
1709
|
+
content = EXCLUDED.content,
|
|
1710
|
+
spec = EXCLUDED.spec,
|
|
1711
|
+
category = EXCLUDED.category,
|
|
1712
|
+
metadata = EXCLUDED.metadata,
|
|
1713
|
+
updated_at = CURRENT_TIMESTAMP;
|
|
1714
|
+
|
|
1715
|
+
-- Schema entry for Message (messages)
|
|
1716
|
+
INSERT INTO schemas (id, tenant_id, name, content, spec, category, metadata)
|
|
1717
|
+
VALUES (
|
|
1718
|
+
'be36f9da-6df4-51ba-bb41-bf51246ecec1'::uuid,
|
|
1719
|
+
'system',
|
|
1720
|
+
'Message',
|
|
1721
|
+
'# Message
|
|
1722
|
+
|
|
1723
|
+
|
|
1724
|
+
Communication content unit.
|
|
1725
|
+
|
|
1726
|
+
Represents individual messages in conversations, chats, or other
|
|
1727
|
+
communication contexts. Tenant isolation is provided via CoreModel.tenant_id field.
|
|
1728
|
+
|
|
1729
|
+
Trace fields (trace_id, span_id) enable integration with OTEL/Phoenix
|
|
1730
|
+
for observability and feedback annotation.
|
|
1731
|
+
|
|
1732
|
+
|
|
1733
|
+
## Overview
|
|
1734
|
+
|
|
1735
|
+
The `Message` entity is stored in the `messages` table. Each record is uniquely
|
|
1736
|
+
identified by its `id` field for lookups and graph traversal.
|
|
1737
|
+
|
|
1738
|
+
## Search Capabilities
|
|
1739
|
+
|
|
1740
|
+
This schema includes the `search_rem` tool which supports:
|
|
1741
|
+
- **LOOKUP**: O(1) exact match by id (e.g., `LOOKUP "entity-name"`)
|
|
1742
|
+
- **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
|
|
1743
|
+
- **SEARCH**: Semantic vector search on content (e.g., `SEARCH "concept" FROM messages LIMIT 10`)
|
|
1744
|
+
- **SQL**: Complex queries (e.g., `SELECT * FROM messages WHERE ...`)
|
|
1745
|
+
|
|
1746
|
+
## Table Info
|
|
1747
|
+
|
|
1748
|
+
| Property | Value |
|
|
1749
|
+
|----------|-------|
|
|
1750
|
+
| Table | `messages` |
|
|
1751
|
+
| Entity Key | `id` |
|
|
1752
|
+
| Embedding Fields | `content` |
|
|
1753
|
+
| Tools | `search_rem` |
|
|
1754
|
+
|
|
1755
|
+
## Fields
|
|
1756
|
+
|
|
1757
|
+
### `id`
|
|
1758
|
+
- **Type**: `typing.Union[uuid.UUID, str, NoneType]`
|
|
1759
|
+
- **Optional**
|
|
1760
|
+
- Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.
|
|
1761
|
+
|
|
1762
|
+
### `created_at`
|
|
1763
|
+
- **Type**: `<class ''datetime.datetime''>`
|
|
1764
|
+
- **Optional**
|
|
1765
|
+
- Entity creation timestamp
|
|
1766
|
+
|
|
1767
|
+
### `updated_at`
|
|
1768
|
+
- **Type**: `<class ''datetime.datetime''>`
|
|
1769
|
+
- **Optional**
|
|
1770
|
+
- Last update timestamp
|
|
1771
|
+
|
|
1772
|
+
### `deleted_at`
|
|
1773
|
+
- **Type**: `typing.Optional[datetime.datetime]`
|
|
1774
|
+
- **Optional**
|
|
1775
|
+
- Soft deletion timestamp
|
|
1776
|
+
|
|
1777
|
+
### `tenant_id`
|
|
1778
|
+
- **Type**: `typing.Optional[str]`
|
|
1779
|
+
- **Optional**
|
|
1780
|
+
- Tenant identifier for multi-tenancy isolation
|
|
1781
|
+
|
|
1782
|
+
### `user_id`
|
|
1783
|
+
- **Type**: `typing.Optional[str]`
|
|
1784
|
+
- **Optional**
|
|
1785
|
+
- Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.
|
|
1786
|
+
|
|
1787
|
+
### `graph_edges`
|
|
1788
|
+
- **Type**: `list[dict]`
|
|
1789
|
+
- **Optional**
|
|
1790
|
+
- Knowledge graph edges stored as InlineEdge dicts
|
|
1791
|
+
|
|
1792
|
+
### `metadata`
|
|
1793
|
+
- **Type**: `<class ''dict''>`
|
|
1794
|
+
- **Optional**
|
|
1795
|
+
- Flexible metadata storage
|
|
1796
|
+
|
|
1797
|
+
### `tags`
|
|
1798
|
+
- **Type**: `list[str]`
|
|
1799
|
+
- **Optional**
|
|
1800
|
+
- Entity tags
|
|
1801
|
+
|
|
1802
|
+
### `content`
|
|
1803
|
+
- **Type**: `<class ''str''>`
|
|
1804
|
+
- **Required**
|
|
1805
|
+
- Message content text
|
|
1806
|
+
|
|
1807
|
+
### `message_type`
|
|
1808
|
+
- **Type**: `str | None`
|
|
1809
|
+
- **Optional**
|
|
1810
|
+
- Message type e.g. role: ''user'', ''assistant'', ''system'', ''tool''
|
|
1811
|
+
|
|
1812
|
+
### `session_id`
|
|
1813
|
+
- **Type**: `str | None`
|
|
1814
|
+
- **Optional**
|
|
1815
|
+
- Session identifier for tracking message context
|
|
1816
|
+
|
|
1817
|
+
### `prompt`
|
|
1818
|
+
- **Type**: `str | None`
|
|
1819
|
+
- **Optional**
|
|
1820
|
+
- Custom prompt used for this message (if overridden from default)
|
|
1821
|
+
|
|
1822
|
+
### `model`
|
|
1823
|
+
- **Type**: `str | None`
|
|
1824
|
+
- **Optional**
|
|
1825
|
+
- Model used for generating this message (provider:model format)
|
|
1826
|
+
|
|
1827
|
+
### `token_count`
|
|
1828
|
+
- **Type**: `int | None`
|
|
1829
|
+
- **Optional**
|
|
1830
|
+
- Token count for this message
|
|
1831
|
+
|
|
1832
|
+
### `trace_id`
|
|
1833
|
+
- **Type**: `str | None`
|
|
1834
|
+
- **Optional**
|
|
1835
|
+
- OTEL trace ID for observability integration
|
|
1836
|
+
|
|
1837
|
+
### `span_id`
|
|
1838
|
+
- **Type**: `str | None`
|
|
1839
|
+
- **Optional**
|
|
1840
|
+
- OTEL span ID for specific span reference
|
|
1841
|
+
|
|
1842
|
+
',
|
|
1843
|
+
'{"type": "object", "description": "\n Communication content unit.\n\n Represents individual messages in conversations, chats, or other\n communication contexts. Tenant isolation is provided via CoreModel.tenant_id field.\n\n Trace fields (trace_id, span_id) enable integration with OTEL/Phoenix\n for observability and feedback annotation.\n \n\nThis agent can search the `messages` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "content": {"description": "Message content text", "title": "Content", "type": "string"}, "message_type": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Message type e.g. role: ''user'', ''assistant'', ''system'', ''tool''", "title": "Message Type"}, "session_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Session identifier for tracking message context", "title": "Session Id"}, "prompt": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Custom prompt used for this message (if overridden from default)", "title": "Prompt"}, "model": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Model used for generating this message (provider:model format)", "title": "Model"}, "token_count": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "description": "Token count for this message", "title": "Token Count"}, "trace_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "OTEL trace ID for observability integration", "title": "Trace Id"}, "span_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "OTEL span ID for specific span reference", "title": "Span Id"}}, "required": ["content"], "json_schema_extra": {"table_name": "messages", "entity_key_field": "id", "embedding_fields": ["content"], "fully_qualified_name": "rem.models.entities.message.Message", "tools": ["search_rem"], "default_search_table": "messages", "has_embeddings": true}}'::jsonb,
|
|
1844
|
+
'entity',
|
|
1845
|
+
'{"table_name": "messages", "entity_key_field": "id", "embedding_fields": ["content"], "fqn": "rem.models.entities.message.Message"}'::jsonb
|
|
1846
|
+
)
|
|
1847
|
+
ON CONFLICT (id) DO UPDATE SET
|
|
1848
|
+
name = EXCLUDED.name,
|
|
1849
|
+
content = EXCLUDED.content,
|
|
1850
|
+
spec = EXCLUDED.spec,
|
|
1851
|
+
category = EXCLUDED.category,
|
|
1852
|
+
metadata = EXCLUDED.metadata,
|
|
1853
|
+
updated_at = CURRENT_TIMESTAMP;
|
|
1854
|
+
|
|
1855
|
+
-- Schema entry for Moment (moments)
|
|
1856
|
+
INSERT INTO schemas (id, tenant_id, name, content, spec, category, metadata)
|
|
1857
|
+
VALUES (
|
|
1858
|
+
'a08f0a8c-5bab-5bf5-9760-0e67bc69bd74'::uuid,
|
|
1859
|
+
'system',
|
|
1860
|
+
'Moment',
|
|
1861
|
+
'# Moment
|
|
1862
|
+
|
|
1863
|
+
|
|
1864
|
+
Temporal narrative extracted from resources.
|
|
1865
|
+
|
|
1866
|
+
Moments provide temporal structure and context for the REM graph,
|
|
1867
|
+
enabling time-based queries and understanding of when events occurred.
|
|
1868
|
+
Tenant isolation is provided via CoreModel.tenant_id field.
|
|
1869
|
+
|
|
1870
|
+
|
|
1871
|
+
## Overview
|
|
1872
|
+
|
|
1873
|
+
The `Moment` entity is stored in the `moments` table. Each record is uniquely
|
|
1874
|
+
identified by its `name` field for lookups and graph traversal.
|
|
1875
|
+
|
|
1876
|
+
## Search Capabilities
|
|
1877
|
+
|
|
1878
|
+
This schema includes the `search_rem` tool which supports:
|
|
1879
|
+
- **LOOKUP**: O(1) exact match by name (e.g., `LOOKUP "entity-name"`)
|
|
1880
|
+
- **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
|
|
1881
|
+
- **SEARCH**: Semantic vector search on summary (e.g., `SEARCH "concept" FROM moments LIMIT 10`)
|
|
1882
|
+
- **SQL**: Complex queries (e.g., `SELECT * FROM moments WHERE ...`)
|
|
1883
|
+
|
|
1884
|
+
## Table Info
|
|
1885
|
+
|
|
1886
|
+
| Property | Value |
|
|
1887
|
+
|----------|-------|
|
|
1888
|
+
| Table | `moments` |
|
|
1889
|
+
| Entity Key | `name` |
|
|
1890
|
+
| Embedding Fields | `summary` |
|
|
1891
|
+
| Tools | `search_rem` |
|
|
1892
|
+
|
|
1893
|
+
## Fields
|
|
1894
|
+
|
|
1895
|
+
### `id`
|
|
1896
|
+
- **Type**: `typing.Union[uuid.UUID, str, NoneType]`
|
|
1897
|
+
- **Optional**
|
|
1898
|
+
- Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.
|
|
1899
|
+
|
|
1900
|
+
### `created_at`
|
|
1901
|
+
- **Type**: `<class ''datetime.datetime''>`
|
|
1902
|
+
- **Optional**
|
|
1903
|
+
- Entity creation timestamp
|
|
1904
|
+
|
|
1905
|
+
### `updated_at`
|
|
1906
|
+
- **Type**: `<class ''datetime.datetime''>`
|
|
1907
|
+
- **Optional**
|
|
1908
|
+
- Last update timestamp
|
|
1909
|
+
|
|
1910
|
+
### `deleted_at`
|
|
1911
|
+
- **Type**: `typing.Optional[datetime.datetime]`
|
|
1912
|
+
- **Optional**
|
|
1913
|
+
- Soft deletion timestamp
|
|
1914
|
+
|
|
1915
|
+
### `tenant_id`
|
|
1916
|
+
- **Type**: `typing.Optional[str]`
|
|
1917
|
+
- **Optional**
|
|
1918
|
+
- Tenant identifier for multi-tenancy isolation
|
|
1919
|
+
|
|
1920
|
+
### `user_id`
|
|
1921
|
+
- **Type**: `typing.Optional[str]`
|
|
1922
|
+
- **Optional**
|
|
1923
|
+
- Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.
|
|
1924
|
+
|
|
1925
|
+
### `graph_edges`
|
|
1926
|
+
- **Type**: `list[dict]`
|
|
1927
|
+
- **Optional**
|
|
1928
|
+
- Knowledge graph edges stored as InlineEdge dicts
|
|
1929
|
+
|
|
1930
|
+
### `metadata`
|
|
1931
|
+
- **Type**: `<class ''dict''>`
|
|
1932
|
+
- **Optional**
|
|
1933
|
+
- Flexible metadata storage
|
|
1934
|
+
|
|
1935
|
+
### `tags`
|
|
1936
|
+
- **Type**: `list[str]`
|
|
1937
|
+
- **Optional**
|
|
1938
|
+
- Entity tags
|
|
1939
|
+
|
|
1940
|
+
### `name`
|
|
1941
|
+
- **Type**: `typing.Optional[str]`
|
|
1942
|
+
- **Optional**
|
|
1943
|
+
- Human-readable moment name (used as graph label). Auto-generated from starts_timestamp+moment_type if not provided.
|
|
1944
|
+
|
|
1945
|
+
### `moment_type`
|
|
1946
|
+
- **Type**: `typing.Optional[str]`
|
|
1947
|
+
- **Optional**
|
|
1948
|
+
- Moment classification (meeting, coding-session, conversation, etc.)
|
|
1949
|
+
|
|
1950
|
+
### `category`
|
|
1951
|
+
- **Type**: `typing.Optional[str]`
|
|
1952
|
+
- **Optional**
|
|
1953
|
+
- Moment category for grouping and filtering
|
|
1954
|
+
|
|
1955
|
+
### `starts_timestamp`
|
|
1956
|
+
- **Type**: `<class ''datetime.datetime''>`
|
|
1957
|
+
- **Required**
|
|
1958
|
+
- Moment start time
|
|
1959
|
+
|
|
1960
|
+
### `ends_timestamp`
|
|
1961
|
+
- **Type**: `typing.Optional[datetime.datetime]`
|
|
1962
|
+
- **Optional**
|
|
1963
|
+
- Moment end time
|
|
1964
|
+
|
|
1965
|
+
### `present_persons`
|
|
1966
|
+
- **Type**: `list[rem.models.entities.moment.Person]`
|
|
1967
|
+
- **Optional**
|
|
1968
|
+
- People present in the moment
|
|
1969
|
+
|
|
1970
|
+
### `emotion_tags`
|
|
1971
|
+
- **Type**: `list[str]`
|
|
1972
|
+
- **Optional**
|
|
1973
|
+
- Emotion/sentiment tags (happy, frustrated, focused, etc.)
|
|
1974
|
+
|
|
1975
|
+
### `topic_tags`
|
|
1976
|
+
- **Type**: `list[str]`
|
|
1977
|
+
- **Optional**
|
|
1978
|
+
- Topic/concept tags (project names, technologies, etc.)
|
|
1979
|
+
|
|
1980
|
+
### `summary`
|
|
1981
|
+
- **Type**: `typing.Optional[str]`
|
|
1982
|
+
- **Optional**
|
|
1983
|
+
- Natural language summary of the moment
|
|
1984
|
+
|
|
1985
|
+
### `source_resource_ids`
|
|
1986
|
+
- **Type**: `list[str]`
|
|
1987
|
+
- **Optional**
|
|
1988
|
+
- Resource IDs used to construct this moment
|
|
1989
|
+
|
|
1990
|
+
',
|
|
1991
|
+
'{"type": "object", "description": "\n Temporal narrative extracted from resources.\n\n Moments provide temporal structure and context for the REM graph,\n enabling time-based queries and understanding of when events occurred.\n Tenant isolation is provided via CoreModel.tenant_id field.\n \n\nThis agent can search the `moments` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Human-readable moment name (used as graph label). Auto-generated from starts_timestamp+moment_type if not provided.", "entity_key": true, "title": "Name"}, "moment_type": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Moment classification (meeting, coding-session, conversation, etc.)", "title": "Moment Type"}, "category": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Moment category for grouping and filtering", "title": "Category"}, "starts_timestamp": {"description": "Moment start time", "format": "date-time", "title": "Starts Timestamp", "type": "string"}, "ends_timestamp": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Moment end time", "title": "Ends Timestamp"}, "present_persons": {"description": "People present in the moment", "items": {"$ref": "#/$defs/Person"}, "title": "Present Persons", "type": "array"}, "emotion_tags": {"description": "Emotion/sentiment tags (happy, frustrated, focused, etc.)", "items": {"type": "string"}, "title": "Emotion Tags", "type": "array"}, "topic_tags": {"description": "Topic/concept tags (project names, technologies, etc.)", "items": {"type": "string"}, "title": "Topic Tags", "type": "array"}, "summary": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Natural language summary of the moment", "title": "Summary"}, "source_resource_ids": {"description": "Resource IDs used to construct this moment", "items": {"type": "string"}, "title": "Source Resource Ids", "type": "array"}}, "required": ["starts_timestamp"], "json_schema_extra": {"table_name": "moments", "entity_key_field": "name", "embedding_fields": ["summary"], "fully_qualified_name": "rem.models.entities.moment.Moment", "tools": ["search_rem"], "default_search_table": "moments", "has_embeddings": true}}'::jsonb,
|
|
1992
|
+
'entity',
|
|
1993
|
+
'{"table_name": "moments", "entity_key_field": "name", "embedding_fields": ["summary"], "fqn": "rem.models.entities.moment.Moment"}'::jsonb
|
|
1994
|
+
)
|
|
1995
|
+
ON CONFLICT (id) DO UPDATE SET
|
|
1996
|
+
name = EXCLUDED.name,
|
|
1997
|
+
content = EXCLUDED.content,
|
|
1998
|
+
spec = EXCLUDED.spec,
|
|
1999
|
+
category = EXCLUDED.category,
|
|
2000
|
+
metadata = EXCLUDED.metadata,
|
|
2001
|
+
updated_at = CURRENT_TIMESTAMP;
|
|
2002
|
+
|
|
2003
|
+
-- Schema entry for Ontology (ontologies)
|
|
2004
|
+
INSERT INTO schemas (id, tenant_id, name, content, spec, category, metadata)
|
|
2005
|
+
VALUES (
|
|
2006
|
+
'a702ed74-8988-534a-9917-2977349777c1'::uuid,
|
|
2007
|
+
'system',
|
|
2008
|
+
'Ontology',
|
|
2009
|
+
'# Ontology
|
|
2010
|
+
|
|
2011
|
+
Domain-specific knowledge extracted from files using custom agents.
|
|
2012
|
+
|
|
2013
|
+
Attributes:
|
|
2014
|
+
name: Human-readable label for this ontology instance
|
|
2015
|
+
file_id: Foreign key to File entity that was processed
|
|
2016
|
+
agent_schema_id: Foreign key to Schema entity that performed extraction
|
|
2017
|
+
provider_name: LLM provider used for extraction (e.g., "anthropic", "openai")
|
|
2018
|
+
model_name: Specific model used (e.g., "claude-sonnet-4-5")
|
|
2019
|
+
extracted_data: Structured data extracted by agent (arbitrary JSON)
|
|
2020
|
+
confidence_score: Optional confidence score from extraction (0.0-1.0)
|
|
2021
|
+
extraction_timestamp: When extraction was performed
|
|
2022
|
+
embedding_text: Text used for generating embedding (derived from extracted_data)
|
|
2023
|
+
|
|
2024
|
+
Inherited from CoreModel:
|
|
2025
|
+
id: UUID or string identifier
|
|
2026
|
+
created_at: Entity creation timestamp
|
|
2027
|
+
updated_at: Last update timestamp
|
|
2028
|
+
deleted_at: Soft deletion timestamp
|
|
2029
|
+
tenant_id: Multi-tenancy isolation
|
|
2030
|
+
user_id: Ownership
|
|
2031
|
+
graph_edges: Relationships to other entities
|
|
2032
|
+
metadata: Flexible metadata storage
|
|
2033
|
+
tags: Classification tags
|
|
2034
|
+
column: Database schema metadata
|
|
2035
|
+
|
|
2036
|
+
Example Usage:
|
|
2037
|
+
# CV extraction
|
|
2038
|
+
cv_ontology = Ontology(
|
|
2039
|
+
name="john-doe-cv-2024",
|
|
2040
|
+
file_id="file-uuid-123",
|
|
2041
|
+
agent_schema_id="cv-parser-v1",
|
|
2042
|
+
provider_name="anthropic",
|
|
2043
|
+
model_name="claude-sonnet-4-5-20250929",
|
|
2044
|
+
extracted_data={
|
|
2045
|
+
"candidate_name": "John Doe",
|
|
2046
|
+
"email": "john@example.com",
|
|
2047
|
+
"skills": ["Python", "PostgreSQL", "Kubernetes"],
|
|
2048
|
+
"experience": [
|
|
2049
|
+
{
|
|
2050
|
+
"company": "TechCorp",
|
|
2051
|
+
"role": "Senior Engineer",
|
|
2052
|
+
"years": 3,
|
|
2053
|
+
"achievements": ["Led migration to k8s", "Reduced costs 40%"]
|
|
2054
|
+
}
|
|
2055
|
+
],
|
|
2056
|
+
"education": [
|
|
2057
|
+
{"degree": "BS Computer Science", "institution": "MIT", "year": 2018}
|
|
2058
|
+
]
|
|
2059
|
+
},
|
|
2060
|
+
confidence_score=0.95,
|
|
2061
|
+
tags=["cv", "engineering", "senior-level"]
|
|
2062
|
+
)
|
|
2063
|
+
|
|
2064
|
+
# Contract extraction
|
|
2065
|
+
contract_ontology = Ontology(
|
|
2066
|
+
name="acme-supplier-agreement-2024",
|
|
2067
|
+
file_id="file-uuid-456",
|
|
2068
|
+
agent_schema_id="contract-parser-v2",
|
|
2069
|
+
provider_name="openai",
|
|
2070
|
+
model_name="gpt-4o",
|
|
2071
|
+
extracted_data={
|
|
2072
|
+
"contract_type": "supplier_agreement",
|
|
2073
|
+
"parties": [
|
|
2074
|
+
{"name": "ACME Corp", "role": "buyer"},
|
|
2075
|
+
{"name": "SupplyChain Inc", "role": "supplier"}
|
|
2076
|
+
],
|
|
2077
|
+
"effective_date": "2024-01-01",
|
|
2078
|
+
"termination_date": "2026-12-31",
|
|
2079
|
+
"payment_terms": {
|
|
2080
|
+
"amount": 500000,
|
|
2081
|
+
"currency": "USD",
|
|
2082
|
+
"frequency": "quarterly"
|
|
2083
|
+
},
|
|
2084
|
+
"key_obligations": [
|
|
2085
|
+
"Supplier must deliver within 30 days",
|
|
2086
|
+
"Buyer must pay within 60 days of invoice"
|
|
2087
|
+
]
|
|
2088
|
+
},
|
|
2089
|
+
confidence_score=0.92,
|
|
2090
|
+
tags=["contract", "supplier", "procurement"]
|
|
2091
|
+
)
|
|
2092
|
+
|
|
2093
|
+
|
|
2094
|
+
## Overview
|
|
2095
|
+
|
|
2096
|
+
The `Ontology` entity is stored in the `ontologies` table. Each record is uniquely
|
|
2097
|
+
identified by its `id` field for lookups and graph traversal.
|
|
2098
|
+
|
|
2099
|
+
## Search Capabilities
|
|
2100
|
+
|
|
2101
|
+
This schema includes the `search_rem` tool which supports:
|
|
2102
|
+
- **LOOKUP**: O(1) exact match by id (e.g., `LOOKUP "entity-name"`)
|
|
2103
|
+
- **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
|
|
2104
|
+
- **SEARCH**: Semantic vector search on content (e.g., `SEARCH "concept" FROM ontologies LIMIT 10`)
|
|
2105
|
+
- **SQL**: Complex queries (e.g., `SELECT * FROM ontologies WHERE ...`)
|
|
2106
|
+
|
|
2107
|
+
## Table Info
|
|
2108
|
+
|
|
2109
|
+
| Property | Value |
|
|
2110
|
+
|----------|-------|
|
|
2111
|
+
| Table | `ontologies` |
|
|
2112
|
+
| Entity Key | `id` |
|
|
2113
|
+
| Embedding Fields | None |
|
|
2114
|
+
| Tools | `search_rem` |
|
|
2115
|
+
|
|
2116
|
+
## Fields
|
|
2117
|
+
|
|
2118
|
+
### `id`
|
|
2119
|
+
- **Type**: `typing.Union[uuid.UUID, str, NoneType]`
|
|
2120
|
+
- **Optional**
|
|
2121
|
+
- Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.
|
|
2122
|
+
|
|
2123
|
+
### `created_at`
|
|
2124
|
+
- **Type**: `<class ''datetime.datetime''>`
|
|
2125
|
+
- **Optional**
|
|
2126
|
+
- Entity creation timestamp
|
|
2127
|
+
|
|
2128
|
+
### `updated_at`
|
|
2129
|
+
- **Type**: `<class ''datetime.datetime''>`
|
|
2130
|
+
- **Optional**
|
|
2131
|
+
- Last update timestamp
|
|
2132
|
+
|
|
2133
|
+
### `deleted_at`
|
|
2134
|
+
- **Type**: `typing.Optional[datetime.datetime]`
|
|
2135
|
+
- **Optional**
|
|
2136
|
+
- Soft deletion timestamp
|
|
2137
|
+
|
|
2138
|
+
### `tenant_id`
|
|
2139
|
+
- **Type**: `typing.Optional[str]`
|
|
2140
|
+
- **Optional**
|
|
2141
|
+
- Tenant identifier for multi-tenancy isolation
|
|
2142
|
+
|
|
2143
|
+
### `user_id`
|
|
2144
|
+
- **Type**: `typing.Optional[str]`
|
|
2145
|
+
- **Optional**
|
|
2146
|
+
- Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.
|
|
2147
|
+
|
|
2148
|
+
### `graph_edges`
|
|
2149
|
+
- **Type**: `list[dict]`
|
|
2150
|
+
- **Optional**
|
|
2151
|
+
- Knowledge graph edges stored as InlineEdge dicts
|
|
2152
|
+
|
|
2153
|
+
### `metadata`
|
|
2154
|
+
- **Type**: `<class ''dict''>`
|
|
2155
|
+
- **Optional**
|
|
2156
|
+
- Flexible metadata storage
|
|
2157
|
+
|
|
2158
|
+
### `tags`
|
|
2159
|
+
- **Type**: `list[str]`
|
|
2160
|
+
- **Optional**
|
|
2161
|
+
- Entity tags
|
|
2162
|
+
|
|
2163
|
+
### `name`
|
|
2164
|
+
- **Type**: `<class ''str''>`
|
|
2165
|
+
- **Required**
|
|
2166
|
+
|
|
2167
|
+
### `file_id`
|
|
2168
|
+
- **Type**: `uuid.UUID | str`
|
|
2169
|
+
- **Required**
|
|
2170
|
+
|
|
2171
|
+
### `agent_schema_id`
|
|
2172
|
+
- **Type**: `<class ''str''>`
|
|
2173
|
+
- **Required**
|
|
2174
|
+
|
|
2175
|
+
### `provider_name`
|
|
2176
|
+
- **Type**: `<class ''str''>`
|
|
2177
|
+
- **Required**
|
|
2178
|
+
|
|
2179
|
+
### `model_name`
|
|
2180
|
+
- **Type**: `<class ''str''>`
|
|
2181
|
+
- **Required**
|
|
2182
|
+
|
|
2183
|
+
### `extracted_data`
|
|
2184
|
+
- **Type**: `dict[str, typing.Any]`
|
|
2185
|
+
- **Required**
|
|
2186
|
+
|
|
2187
|
+
### `confidence_score`
|
|
2188
|
+
- **Type**: `typing.Optional[float]`
|
|
2189
|
+
- **Optional**
|
|
2190
|
+
|
|
2191
|
+
### `extraction_timestamp`
|
|
2192
|
+
- **Type**: `typing.Optional[str]`
|
|
2193
|
+
- **Optional**
|
|
2194
|
+
|
|
2195
|
+
### `embedding_text`
|
|
2196
|
+
- **Type**: `typing.Optional[str]`
|
|
2197
|
+
- **Optional**
|
|
2198
|
+
|
|
2199
|
+
',
|
|
2200
|
+
'{"type": "object", "description": "Domain-specific knowledge extracted from files using custom agents.\n\n Attributes:\n name: Human-readable label for this ontology instance\n file_id: Foreign key to File entity that was processed\n agent_schema_id: Foreign key to Schema entity that performed extraction\n provider_name: LLM provider used for extraction (e.g., \"anthropic\", \"openai\")\n model_name: Specific model used (e.g., \"claude-sonnet-4-5\")\n extracted_data: Structured data extracted by agent (arbitrary JSON)\n confidence_score: Optional confidence score from extraction (0.0-1.0)\n extraction_timestamp: When extraction was performed\n embedding_text: Text used for generating embedding (derived from extracted_data)\n\n Inherited from CoreModel:\n id: UUID or string identifier\n created_at: Entity creation timestamp\n updated_at: Last update timestamp\n deleted_at: Soft deletion timestamp\n tenant_id: Multi-tenancy isolation\n user_id: Ownership\n graph_edges: Relationships to other entities\n metadata: Flexible metadata storage\n tags: Classification tags\n column: Database schema metadata\n\n Example Usage:\n # CV extraction\n cv_ontology = Ontology(\n name=\"john-doe-cv-2024\",\n file_id=\"file-uuid-123\",\n agent_schema_id=\"cv-parser-v1\",\n provider_name=\"anthropic\",\n model_name=\"claude-sonnet-4-5-20250929\",\n extracted_data={\n \"candidate_name\": \"John Doe\",\n \"email\": \"john@example.com\",\n \"skills\": [\"Python\", \"PostgreSQL\", \"Kubernetes\"],\n \"experience\": [\n {\n \"company\": \"TechCorp\",\n \"role\": \"Senior Engineer\",\n \"years\": 3,\n \"achievements\": [\"Led migration to k8s\", \"Reduced costs 40%\"]\n }\n ],\n \"education\": [\n {\"degree\": \"BS Computer Science\", \"institution\": \"MIT\", \"year\": 2018}\n ]\n },\n confidence_score=0.95,\n tags=[\"cv\", \"engineering\", \"senior-level\"]\n )\n\n # Contract extraction\n contract_ontology = Ontology(\n name=\"acme-supplier-agreement-2024\",\n file_id=\"file-uuid-456\",\n agent_schema_id=\"contract-parser-v2\",\n provider_name=\"openai\",\n model_name=\"gpt-4o\",\n extracted_data={\n \"contract_type\": \"supplier_agreement\",\n \"parties\": [\n {\"name\": \"ACME Corp\", \"role\": \"buyer\"},\n {\"name\": \"SupplyChain Inc\", \"role\": \"supplier\"}\n ],\n \"effective_date\": \"2024-01-01\",\n \"termination_date\": \"2026-12-31\",\n \"payment_terms\": {\n \"amount\": 500000,\n \"currency\": \"USD\",\n \"frequency\": \"quarterly\"\n },\n \"key_obligations\": [\n \"Supplier must deliver within 30 days\",\n \"Buyer must pay within 60 days of invoice\"\n ]\n },\n confidence_score=0.92,\n tags=[\"contract\", \"supplier\", \"procurement\"]\n )\n \n\nThis agent can search the `ontologies` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"title": "Name", "type": "string"}, "file_id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}], "title": "File Id"}, "agent_schema_id": {"title": "Agent Schema Id", "type": "string"}, "provider_name": {"title": "Provider Name", "type": "string"}, "model_name": {"title": "Model Name", "type": "string"}, "extracted_data": {"additionalProperties": true, "title": "Extracted Data", "type": "object"}, "confidence_score": {"anyOf": [{"type": "number"}, {"type": "null"}], "default": null, "title": "Confidence Score"}, "extraction_timestamp": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Extraction Timestamp"}, "embedding_text": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Embedding Text"}}, "required": ["name", "file_id", "agent_schema_id", "provider_name", "model_name", "extracted_data"], "json_schema_extra": {"table_name": "ontologies", "entity_key_field": "id", "embedding_fields": [], "fully_qualified_name": "rem.models.entities.ontology.Ontology", "tools": ["search_rem"], "default_search_table": "ontologies", "has_embeddings": false}}'::jsonb,
|
|
2201
|
+
'entity',
|
|
2202
|
+
'{"table_name": "ontologies", "entity_key_field": "id", "embedding_fields": [], "fqn": "rem.models.entities.ontology.Ontology"}'::jsonb
|
|
2203
|
+
)
|
|
2204
|
+
ON CONFLICT (id) DO UPDATE SET
|
|
2205
|
+
name = EXCLUDED.name,
|
|
2206
|
+
content = EXCLUDED.content,
|
|
2207
|
+
spec = EXCLUDED.spec,
|
|
2208
|
+
category = EXCLUDED.category,
|
|
2209
|
+
metadata = EXCLUDED.metadata,
|
|
2210
|
+
updated_at = CURRENT_TIMESTAMP;
|
|
2211
|
+
|
|
2212
|
+
-- Schema entry for OntologyConfig (ontology_configs)
|
|
2213
|
+
INSERT INTO schemas (id, tenant_id, name, content, spec, category, metadata)
|
|
2214
|
+
VALUES (
|
|
2215
|
+
'9a7e50d0-ef3a-5641-9ff4-b2be5a77053b'::uuid,
|
|
2216
|
+
'system',
|
|
2217
|
+
'OntologyConfig',
|
|
2218
|
+
'# OntologyConfig
|
|
2219
|
+
|
|
2220
|
+
User configuration for automatic ontology extraction.
|
|
2221
|
+
|
|
2222
|
+
Attributes:
|
|
2223
|
+
name: Human-readable config name
|
|
2224
|
+
agent_schema_id: Foreign key to Schema entity to use for extraction
|
|
2225
|
+
description: Purpose and scope of this config
|
|
2226
|
+
|
|
2227
|
+
# File matching rules (ANY matching rule triggers extraction)
|
|
2228
|
+
mime_type_pattern: Regex pattern for file MIME types (e.g., "application/pdf")
|
|
2229
|
+
uri_pattern: Regex pattern for file URIs (e.g., "s3://bucket/resumes/.*")
|
|
2230
|
+
tag_filter: List of tags (file must have ALL tags to match)
|
|
2231
|
+
|
|
2232
|
+
# Execution control
|
|
2233
|
+
priority: Execution order (higher = earlier, default 100)
|
|
2234
|
+
enabled: Whether this config is active (default True)
|
|
2235
|
+
|
|
2236
|
+
# LLM provider configuration
|
|
2237
|
+
provider_name: Optional LLM provider override (defaults to settings)
|
|
2238
|
+
model_name: Optional model override (defaults to settings)
|
|
2239
|
+
|
|
2240
|
+
Inherited from CoreModel:
|
|
2241
|
+
id, created_at, updated_at, deleted_at, tenant_id, user_id,
|
|
2242
|
+
graph_edges, metadata, tags, column
|
|
2243
|
+
|
|
2244
|
+
Example Usage:
|
|
2245
|
+
# CV extraction for recruitment
|
|
2246
|
+
cv_config = OntologyConfig(
|
|
2247
|
+
name="recruitment-cv-parser",
|
|
2248
|
+
agent_schema_id="cv-parser-v1",
|
|
2249
|
+
description="Extract candidate information from resumes",
|
|
2250
|
+
mime_type_pattern="application/pdf",
|
|
2251
|
+
uri_pattern=".*/resumes/.*",
|
|
2252
|
+
tag_filter=["cv", "candidate"],
|
|
2253
|
+
priority=100,
|
|
2254
|
+
enabled=True,
|
|
2255
|
+
tenant_id="acme-corp",
|
|
2256
|
+
tags=["recruitment", "hr"]
|
|
2257
|
+
)
|
|
2258
|
+
|
|
2259
|
+
# Contract analysis for legal team
|
|
2260
|
+
contract_config = OntologyConfig(
|
|
2261
|
+
name="legal-contract-analyzer",
|
|
2262
|
+
agent_schema_id="contract-parser-v2",
|
|
2263
|
+
description="Extract key terms from supplier contracts",
|
|
2264
|
+
mime_type_pattern="application/(pdf|msword|vnd.openxmlformats.*)",
|
|
2265
|
+
tag_filter=["legal", "contract"],
|
|
2266
|
+
priority=200, # Higher priority = runs first
|
|
2267
|
+
enabled=True,
|
|
2268
|
+
provider_name="openai", # Override default provider
|
|
2269
|
+
model_name="gpt-4o",
|
|
2270
|
+
tenant_id="acme-corp",
|
|
2271
|
+
tags=["legal", "procurement"]
|
|
2272
|
+
)
|
|
2273
|
+
|
|
2274
|
+
# Medical records for healthcare
|
|
2275
|
+
medical_config = OntologyConfig(
|
|
2276
|
+
name="medical-records-extractor",
|
|
2277
|
+
agent_schema_id="medical-parser-v1",
|
|
2278
|
+
description="Extract diagnoses and treatments from medical records",
|
|
2279
|
+
mime_type_pattern="application/pdf",
|
|
2280
|
+
tag_filter=["medical", "patient-record"],
|
|
2281
|
+
priority=50,
|
|
2282
|
+
enabled=True,
|
|
2283
|
+
tenant_id="healthsystem",
|
|
2284
|
+
tags=["medical", "hipaa-compliant"]
|
|
2285
|
+
)
|
|
2286
|
+
|
|
2287
|
+
|
|
2288
|
+
## Overview
|
|
2289
|
+
|
|
2290
|
+
The `OntologyConfig` entity is stored in the `ontology_configs` table. Each record is uniquely
|
|
2291
|
+
identified by its `id` field for lookups and graph traversal.
|
|
2292
|
+
|
|
2293
|
+
## Search Capabilities
|
|
2294
|
+
|
|
2295
|
+
This schema includes the `search_rem` tool which supports:
|
|
2296
|
+
- **LOOKUP**: O(1) exact match by id (e.g., `LOOKUP "entity-name"`)
|
|
2297
|
+
- **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
|
|
2298
|
+
- **SEARCH**: Semantic vector search on description (e.g., `SEARCH "concept" FROM ontology_configs LIMIT 10`)
|
|
2299
|
+
- **SQL**: Complex queries (e.g., `SELECT * FROM ontology_configs WHERE ...`)
|
|
2300
|
+
|
|
2301
|
+
## Table Info
|
|
2302
|
+
|
|
2303
|
+
| Property | Value |
|
|
2304
|
+
|----------|-------|
|
|
2305
|
+
| Table | `ontology_configs` |
|
|
2306
|
+
| Entity Key | `id` |
|
|
2307
|
+
| Embedding Fields | `description` |
|
|
2308
|
+
| Tools | `search_rem` |
|
|
2309
|
+
|
|
2310
|
+
## Fields
|
|
2311
|
+
|
|
2312
|
+
### `id`
|
|
2313
|
+
- **Type**: `typing.Union[uuid.UUID, str, NoneType]`
|
|
2314
|
+
- **Optional**
|
|
2315
|
+
- Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.
|
|
2316
|
+
|
|
2317
|
+
### `created_at`
|
|
2318
|
+
- **Type**: `<class ''datetime.datetime''>`
|
|
2319
|
+
- **Optional**
|
|
2320
|
+
- Entity creation timestamp
|
|
2321
|
+
|
|
2322
|
+
### `updated_at`
|
|
2323
|
+
- **Type**: `<class ''datetime.datetime''>`
|
|
2324
|
+
- **Optional**
|
|
2325
|
+
- Last update timestamp
|
|
2326
|
+
|
|
2327
|
+
### `deleted_at`
|
|
2328
|
+
- **Type**: `typing.Optional[datetime.datetime]`
|
|
2329
|
+
- **Optional**
|
|
2330
|
+
- Soft deletion timestamp
|
|
2331
|
+
|
|
2332
|
+
### `tenant_id`
|
|
2333
|
+
- **Type**: `typing.Optional[str]`
|
|
2334
|
+
- **Optional**
|
|
2335
|
+
- Tenant identifier for multi-tenancy isolation
|
|
2336
|
+
|
|
2337
|
+
### `user_id`
|
|
2338
|
+
- **Type**: `typing.Optional[str]`
|
|
2339
|
+
- **Optional**
|
|
2340
|
+
- Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.
|
|
2341
|
+
|
|
2342
|
+
### `graph_edges`
|
|
2343
|
+
- **Type**: `list[dict]`
|
|
2344
|
+
- **Optional**
|
|
2345
|
+
- Knowledge graph edges stored as InlineEdge dicts
|
|
2346
|
+
|
|
2347
|
+
### `metadata`
|
|
2348
|
+
- **Type**: `<class ''dict''>`
|
|
2349
|
+
- **Optional**
|
|
2350
|
+
- Flexible metadata storage
|
|
2351
|
+
|
|
2352
|
+
### `tags`
|
|
2353
|
+
- **Type**: `list[str]`
|
|
2354
|
+
- **Optional**
|
|
2355
|
+
- Entity tags
|
|
2356
|
+
|
|
2357
|
+
### `name`
|
|
2358
|
+
- **Type**: `<class ''str''>`
|
|
2359
|
+
- **Required**
|
|
2360
|
+
|
|
2361
|
+
### `agent_schema_id`
|
|
2362
|
+
- **Type**: `<class ''str''>`
|
|
2363
|
+
- **Required**
|
|
2364
|
+
|
|
2365
|
+
### `description`
|
|
2366
|
+
- **Type**: `typing.Optional[str]`
|
|
2367
|
+
- **Optional**
|
|
2368
|
+
|
|
2369
|
+
### `mime_type_pattern`
|
|
2370
|
+
- **Type**: `typing.Optional[str]`
|
|
2371
|
+
- **Optional**
|
|
2372
|
+
|
|
2373
|
+
### `uri_pattern`
|
|
2374
|
+
- **Type**: `typing.Optional[str]`
|
|
2375
|
+
- **Optional**
|
|
2376
|
+
|
|
2377
|
+
### `tag_filter`
|
|
2378
|
+
- **Type**: `list[str]`
|
|
2379
|
+
- **Optional**
|
|
2380
|
+
|
|
2381
|
+
### `priority`
|
|
2382
|
+
- **Type**: `<class ''int''>`
|
|
2383
|
+
- **Optional**
|
|
2384
|
+
|
|
2385
|
+
### `enabled`
|
|
2386
|
+
- **Type**: `<class ''bool''>`
|
|
2387
|
+
- **Optional**
|
|
2388
|
+
|
|
2389
|
+
### `provider_name`
|
|
2390
|
+
- **Type**: `typing.Optional[str]`
|
|
2391
|
+
- **Optional**
|
|
2392
|
+
|
|
2393
|
+
### `model_name`
|
|
2394
|
+
- **Type**: `typing.Optional[str]`
|
|
2395
|
+
- **Optional**
|
|
2396
|
+
|
|
2397
|
+
',
|
|
2398
|
+
'{"type": "object", "description": "User configuration for automatic ontology extraction.\n\n Attributes:\n name: Human-readable config name\n agent_schema_id: Foreign key to Schema entity to use for extraction\n description: Purpose and scope of this config\n\n # File matching rules (ANY matching rule triggers extraction)\n mime_type_pattern: Regex pattern for file MIME types (e.g., \"application/pdf\")\n uri_pattern: Regex pattern for file URIs (e.g., \"s3://bucket/resumes/.*\")\n tag_filter: List of tags (file must have ALL tags to match)\n\n # Execution control\n priority: Execution order (higher = earlier, default 100)\n enabled: Whether this config is active (default True)\n\n # LLM provider configuration\n provider_name: Optional LLM provider override (defaults to settings)\n model_name: Optional model override (defaults to settings)\n\n Inherited from CoreModel:\n id, created_at, updated_at, deleted_at, tenant_id, user_id,\n graph_edges, metadata, tags, column\n\n Example Usage:\n # CV extraction for recruitment\n cv_config = OntologyConfig(\n name=\"recruitment-cv-parser\",\n agent_schema_id=\"cv-parser-v1\",\n description=\"Extract candidate information from resumes\",\n mime_type_pattern=\"application/pdf\",\n uri_pattern=\".*/resumes/.*\",\n tag_filter=[\"cv\", \"candidate\"],\n priority=100,\n enabled=True,\n tenant_id=\"acme-corp\",\n tags=[\"recruitment\", \"hr\"]\n )\n\n # Contract analysis for legal team\n contract_config = OntologyConfig(\n name=\"legal-contract-analyzer\",\n agent_schema_id=\"contract-parser-v2\",\n description=\"Extract key terms from supplier contracts\",\n mime_type_pattern=\"application/(pdf|msword|vnd.openxmlformats.*)\",\n tag_filter=[\"legal\", \"contract\"],\n priority=200, # Higher priority = runs first\n enabled=True,\n provider_name=\"openai\", # Override default provider\n model_name=\"gpt-4o\",\n tenant_id=\"acme-corp\",\n tags=[\"legal\", \"procurement\"]\n )\n\n # Medical records for healthcare\n medical_config = OntologyConfig(\n name=\"medical-records-extractor\",\n agent_schema_id=\"medical-parser-v1\",\n description=\"Extract diagnoses and treatments from medical records\",\n mime_type_pattern=\"application/pdf\",\n tag_filter=[\"medical\", \"patient-record\"],\n priority=50,\n enabled=True,\n tenant_id=\"healthsystem\",\n tags=[\"medical\", \"hipaa-compliant\"]\n )\n \n\nThis agent can search the `ontology_configs` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"title": "Name", "type": "string"}, "agent_schema_id": {"title": "Agent Schema Id", "type": "string"}, "description": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Description"}, "mime_type_pattern": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Mime Type Pattern"}, "uri_pattern": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Uri Pattern"}, "tag_filter": {"default": [], "items": {"type": "string"}, "title": "Tag Filter", "type": "array"}, "priority": {"default": 100, "title": "Priority", "type": "integer"}, "enabled": {"default": true, "title": "Enabled", "type": "boolean"}, "provider_name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Provider Name"}, "model_name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Model Name"}}, "required": ["name", "agent_schema_id"], "json_schema_extra": {"table_name": "ontology_configs", "entity_key_field": "id", "embedding_fields": ["description"], "fully_qualified_name": "rem.models.entities.ontology_config.OntologyConfig", "tools": ["search_rem"], "default_search_table": "ontology_configs", "has_embeddings": true}}'::jsonb,
|
|
2399
|
+
'entity',
|
|
2400
|
+
'{"table_name": "ontology_configs", "entity_key_field": "id", "embedding_fields": ["description"], "fqn": "rem.models.entities.ontology_config.OntologyConfig"}'::jsonb
|
|
2401
|
+
)
|
|
2402
|
+
ON CONFLICT (id) DO UPDATE SET
|
|
2403
|
+
name = EXCLUDED.name,
|
|
2404
|
+
content = EXCLUDED.content,
|
|
2405
|
+
spec = EXCLUDED.spec,
|
|
2406
|
+
category = EXCLUDED.category,
|
|
2407
|
+
metadata = EXCLUDED.metadata,
|
|
2408
|
+
updated_at = CURRENT_TIMESTAMP;
|
|
2409
|
+
|
|
2410
|
+
-- Schema entry for Resource (resources)
|
|
2411
|
+
INSERT INTO schemas (id, tenant_id, name, content, spec, category, metadata)
|
|
2412
|
+
VALUES (
|
|
2413
|
+
'a579f379-4f1c-5414-8ff4-1382d0f783b7'::uuid,
|
|
2414
|
+
'system',
|
|
2415
|
+
'Resource',
|
|
2416
|
+
'# Resource
|
|
2417
|
+
|
|
2418
|
+
|
|
2419
|
+
Base content unit in REM.
|
|
2420
|
+
|
|
2421
|
+
Resources are content units that feed into dreaming workflows for moment
|
|
2422
|
+
extraction and affinity graph construction. Tenant isolation is provided
|
|
2423
|
+
via CoreModel.tenant_id field.
|
|
2424
|
+
|
|
2425
|
+
|
|
2426
|
+
## Overview
|
|
2427
|
+
|
|
2428
|
+
The `Resource` entity is stored in the `resources` table. Each record is uniquely
|
|
2429
|
+
identified by its `name` field for lookups and graph traversal.
|
|
2430
|
+
|
|
2431
|
+
## Search Capabilities
|
|
2432
|
+
|
|
2433
|
+
This schema includes the `search_rem` tool which supports:
|
|
2434
|
+
- **LOOKUP**: O(1) exact match by name (e.g., `LOOKUP "entity-name"`)
|
|
2435
|
+
- **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
|
|
2436
|
+
- **SEARCH**: Semantic vector search on content (e.g., `SEARCH "concept" FROM resources LIMIT 10`)
|
|
2437
|
+
- **SQL**: Complex queries (e.g., `SELECT * FROM resources WHERE ...`)
|
|
2438
|
+
|
|
2439
|
+
## Table Info
|
|
2440
|
+
|
|
2441
|
+
| Property | Value |
|
|
2442
|
+
|----------|-------|
|
|
2443
|
+
| Table | `resources` |
|
|
2444
|
+
| Entity Key | `name` |
|
|
2445
|
+
| Embedding Fields | `content` |
|
|
2446
|
+
| Tools | `search_rem` |
|
|
2447
|
+
|
|
2448
|
+
## Fields
|
|
2449
|
+
|
|
2450
|
+
### `id`
|
|
2451
|
+
- **Type**: `typing.Union[uuid.UUID, str, NoneType]`
|
|
2452
|
+
- **Optional**
|
|
2453
|
+
- Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.
|
|
2454
|
+
|
|
2455
|
+
### `created_at`
|
|
2456
|
+
- **Type**: `<class ''datetime.datetime''>`
|
|
2457
|
+
- **Optional**
|
|
2458
|
+
- Entity creation timestamp
|
|
2459
|
+
|
|
2460
|
+
### `updated_at`
|
|
2461
|
+
- **Type**: `<class ''datetime.datetime''>`
|
|
2462
|
+
- **Optional**
|
|
2463
|
+
- Last update timestamp
|
|
2464
|
+
|
|
2465
|
+
### `deleted_at`
|
|
2466
|
+
- **Type**: `typing.Optional[datetime.datetime]`
|
|
2467
|
+
- **Optional**
|
|
2468
|
+
- Soft deletion timestamp
|
|
2469
|
+
|
|
2470
|
+
### `tenant_id`
|
|
2471
|
+
- **Type**: `typing.Optional[str]`
|
|
2472
|
+
- **Optional**
|
|
2473
|
+
- Tenant identifier for multi-tenancy isolation
|
|
2474
|
+
|
|
2475
|
+
### `user_id`
|
|
2476
|
+
- **Type**: `typing.Optional[str]`
|
|
2477
|
+
- **Optional**
|
|
2478
|
+
- Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.
|
|
2479
|
+
|
|
2480
|
+
### `graph_edges`
|
|
2481
|
+
- **Type**: `list[dict]`
|
|
2482
|
+
- **Optional**
|
|
2483
|
+
- Knowledge graph edges stored as InlineEdge dicts
|
|
2484
|
+
|
|
2485
|
+
### `metadata`
|
|
2486
|
+
- **Type**: `<class ''dict''>`
|
|
2487
|
+
- **Optional**
|
|
2488
|
+
- Flexible metadata storage
|
|
2489
|
+
|
|
2490
|
+
### `tags`
|
|
2491
|
+
- **Type**: `list[str]`
|
|
2492
|
+
- **Optional**
|
|
2493
|
+
- Entity tags
|
|
2494
|
+
|
|
2495
|
+
### `name`
|
|
2496
|
+
- **Type**: `typing.Optional[str]`
|
|
2497
|
+
- **Optional**
|
|
2498
|
+
- Human-readable resource name (used as graph label). Auto-generated from uri+ordinal if not provided.
|
|
2499
|
+
|
|
2500
|
+
### `uri`
|
|
2501
|
+
- **Type**: `typing.Optional[str]`
|
|
2502
|
+
- **Optional**
|
|
2503
|
+
- Content URI or identifier (file path, URL, etc.)
|
|
2504
|
+
|
|
2505
|
+
### `ordinal`
|
|
2506
|
+
- **Type**: `<class ''int''>`
|
|
2507
|
+
- **Optional**
|
|
2508
|
+
- Chunk ordinal for splitting large documents (0 for single-chunk resources)
|
|
2509
|
+
|
|
2510
|
+
### `content`
|
|
2511
|
+
- **Type**: `<class ''str''>`
|
|
2512
|
+
- **Optional**
|
|
2513
|
+
- Resource content text
|
|
2514
|
+
|
|
2515
|
+
### `timestamp`
|
|
2516
|
+
- **Type**: `<class ''datetime.datetime''>`
|
|
2517
|
+
- **Optional**
|
|
2518
|
+
- Resource timestamp (content creation/publication time)
|
|
2519
|
+
|
|
2520
|
+
### `category`
|
|
2521
|
+
- **Type**: `typing.Optional[str]`
|
|
2522
|
+
- **Optional**
|
|
2523
|
+
- Resource category (document, conversation, artifact, etc.)
|
|
2524
|
+
|
|
2525
|
+
### `related_entities`
|
|
2526
|
+
- **Type**: `list[dict]`
|
|
2527
|
+
- **Optional**
|
|
2528
|
+
- Extracted entities (people, projects, concepts) with metadata
|
|
2529
|
+
|
|
2530
|
+
',
|
|
2531
|
+
'{"type": "object", "description": "\n Base content unit in REM.\n\n Resources are content units that feed into dreaming workflows for moment\n extraction and affinity graph construction. Tenant isolation is provided\n via CoreModel.tenant_id field.\n \n\nThis agent can search the `resources` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Human-readable resource name (used as graph label). Auto-generated from uri+ordinal if not provided.", "entity_key": true, "title": "Name"}, "uri": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Content URI or identifier (file path, URL, etc.)", "title": "Uri"}, "ordinal": {"composite_key": true, "default": 0, "description": "Chunk ordinal for splitting large documents (0 for single-chunk resources)", "title": "Ordinal", "type": "integer"}, "content": {"default": "", "description": "Resource content text", "title": "Content", "type": "string"}, "timestamp": {"description": "Resource timestamp (content creation/publication time)", "format": "date-time", "title": "Timestamp", "type": "string"}, "category": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Resource category (document, conversation, artifact, etc.)", "title": "Category"}, "related_entities": {"description": "Extracted entities (people, projects, concepts) with metadata", "items": {"additionalProperties": true, "type": "object"}, "title": "Related Entities", "type": "array"}}, "required": [], "json_schema_extra": {"table_name": "resources", "entity_key_field": "name", "embedding_fields": ["content"], "fully_qualified_name": "rem.models.entities.resource.Resource", "tools": ["search_rem"], "default_search_table": "resources", "has_embeddings": true}}'::jsonb,
|
|
2532
|
+
'entity',
|
|
2533
|
+
'{"table_name": "resources", "entity_key_field": "name", "embedding_fields": ["content"], "fqn": "rem.models.entities.resource.Resource"}'::jsonb
|
|
2534
|
+
)
|
|
2535
|
+
ON CONFLICT (id) DO UPDATE SET
|
|
2536
|
+
name = EXCLUDED.name,
|
|
2537
|
+
content = EXCLUDED.content,
|
|
2538
|
+
spec = EXCLUDED.spec,
|
|
2539
|
+
category = EXCLUDED.category,
|
|
2540
|
+
metadata = EXCLUDED.metadata,
|
|
2541
|
+
updated_at = CURRENT_TIMESTAMP;
|
|
2542
|
+
|
|
2543
|
+
-- Schema entry for Schema (schemas)
|
|
2544
|
+
INSERT INTO schemas (id, tenant_id, name, content, spec, category, metadata)
|
|
2545
|
+
VALUES (
|
|
2546
|
+
'2372e956-add6-58b8-a638-758a91a2b6c4'::uuid,
|
|
2547
|
+
'system',
|
|
2548
|
+
'Schema',
|
|
2549
|
+
'# Schema
|
|
2550
|
+
|
|
2551
|
+
|
|
2552
|
+
Agent schema definition.
|
|
2553
|
+
|
|
2554
|
+
Schemas define agents that can be dynamically loaded into Pydantic AI.
|
|
2555
|
+
They store JsonSchema specifications with embedded metadata for tools,
|
|
2556
|
+
resources, and system prompts.
|
|
2557
|
+
|
|
2558
|
+
For ontology extraction agents:
|
|
2559
|
+
- `provider_configs` enables multi-provider support (test across Anthropic, OpenAI, etc.)
|
|
2560
|
+
- `embedding_fields` specifies which output fields should be embedded for semantic search
|
|
2561
|
+
|
|
2562
|
+
Tenant isolation is provided via CoreModel.tenant_id field.
|
|
2563
|
+
|
|
2564
|
+
|
|
2565
|
+
## Overview
|
|
2566
|
+
|
|
2567
|
+
The `Schema` entity is stored in the `schemas` table. Each record is uniquely
|
|
2568
|
+
identified by its `id` field for lookups and graph traversal.
|
|
2569
|
+
|
|
2570
|
+
## Search Capabilities
|
|
2571
|
+
|
|
2572
|
+
This schema includes the `search_rem` tool which supports:
|
|
2573
|
+
- **LOOKUP**: O(1) exact match by id (e.g., `LOOKUP "entity-name"`)
|
|
2574
|
+
- **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
|
|
2575
|
+
- **SEARCH**: Semantic vector search on content (e.g., `SEARCH "concept" FROM schemas LIMIT 10`)
|
|
2576
|
+
- **SQL**: Complex queries (e.g., `SELECT * FROM schemas WHERE ...`)
|
|
2577
|
+
|
|
2578
|
+
## Table Info
|
|
2579
|
+
|
|
2580
|
+
| Property | Value |
|
|
2581
|
+
|----------|-------|
|
|
2582
|
+
| Table | `schemas` |
|
|
2583
|
+
| Entity Key | `id` |
|
|
2584
|
+
| Embedding Fields | `content` |
|
|
2585
|
+
| Tools | `search_rem` |
|
|
2586
|
+
|
|
2587
|
+
## Fields
|
|
2588
|
+
|
|
2589
|
+
### `id`
|
|
2590
|
+
- **Type**: `typing.Union[uuid.UUID, str, NoneType]`
|
|
2591
|
+
- **Optional**
|
|
2592
|
+
- Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.
|
|
2593
|
+
|
|
2594
|
+
### `created_at`
|
|
2595
|
+
- **Type**: `<class ''datetime.datetime''>`
|
|
2596
|
+
- **Optional**
|
|
2597
|
+
- Entity creation timestamp
|
|
2598
|
+
|
|
2599
|
+
### `updated_at`
|
|
2600
|
+
- **Type**: `<class ''datetime.datetime''>`
|
|
2601
|
+
- **Optional**
|
|
2602
|
+
- Last update timestamp
|
|
2603
|
+
|
|
2604
|
+
### `deleted_at`
|
|
2605
|
+
- **Type**: `typing.Optional[datetime.datetime]`
|
|
2606
|
+
- **Optional**
|
|
2607
|
+
- Soft deletion timestamp
|
|
2608
|
+
|
|
2609
|
+
### `tenant_id`
|
|
2610
|
+
- **Type**: `typing.Optional[str]`
|
|
2611
|
+
- **Optional**
|
|
2612
|
+
- Tenant identifier for multi-tenancy isolation
|
|
2613
|
+
|
|
2614
|
+
### `user_id`
|
|
2615
|
+
- **Type**: `typing.Optional[str]`
|
|
2616
|
+
- **Optional**
|
|
2617
|
+
- Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.
|
|
2618
|
+
|
|
2619
|
+
### `graph_edges`
|
|
2620
|
+
- **Type**: `list[dict]`
|
|
2621
|
+
- **Optional**
|
|
2622
|
+
- Knowledge graph edges stored as InlineEdge dicts
|
|
2623
|
+
|
|
2624
|
+
### `metadata`
|
|
2625
|
+
- **Type**: `<class ''dict''>`
|
|
2626
|
+
- **Optional**
|
|
2627
|
+
- Flexible metadata storage
|
|
2628
|
+
|
|
2629
|
+
### `tags`
|
|
2630
|
+
- **Type**: `list[str]`
|
|
2631
|
+
- **Optional**
|
|
2632
|
+
- Entity tags
|
|
2633
|
+
|
|
2634
|
+
### `name`
|
|
2635
|
+
- **Type**: `<class ''str''>`
|
|
2636
|
+
- **Required**
|
|
2637
|
+
- Human-readable schema name (used as identifier)
|
|
2638
|
+
|
|
2639
|
+
### `content`
|
|
2640
|
+
- **Type**: `<class ''str''>`
|
|
2641
|
+
- **Optional**
|
|
2642
|
+
- Markdown documentation and instructions for the schema
|
|
2643
|
+
|
|
2644
|
+
### `spec`
|
|
2645
|
+
- **Type**: `<class ''dict''>`
|
|
2646
|
+
- **Required**
|
|
2647
|
+
- JsonSchema specification defining the agent structure and capabilities
|
|
2648
|
+
|
|
2649
|
+
### `category`
|
|
2650
|
+
- **Type**: `typing.Optional[str]`
|
|
2651
|
+
- **Optional**
|
|
2652
|
+
- Schema category distinguishing schema types. Values: ''agent'' (AI agents), ''evaluator'' (LLM-as-a-Judge evaluators). Maps directly from json_schema_extra.kind field during ingestion.
|
|
2653
|
+
|
|
2654
|
+
### `provider_configs`
|
|
2655
|
+
- **Type**: `list[dict]`
|
|
2656
|
+
- **Optional**
|
|
2657
|
+
- Optional provider configurations for multi-provider testing. Each dict has ''provider_name'' and ''model_name''. Example: [{''provider_name'': ''anthropic'', ''model_name'': ''claude-sonnet-4-5''}]
|
|
2658
|
+
|
|
2659
|
+
### `embedding_fields`
|
|
2660
|
+
- **Type**: `list[str]`
|
|
2661
|
+
- **Optional**
|
|
2662
|
+
- JSON paths in extracted_data to embed for semantic search. Example: [''summary'', ''candidate_name'', ''skills''] for CV extraction. Values will be concatenated and embedded using configured embedding provider.
|
|
2663
|
+
|
|
2664
|
+
',
|
|
2665
|
+
'{"type": "object", "description": "\n Agent schema definition.\n\n Schemas define agents that can be dynamically loaded into Pydantic AI.\n They store JsonSchema specifications with embedded metadata for tools,\n resources, and system prompts.\n\n For ontology extraction agents:\n - `provider_configs` enables multi-provider support (test across Anthropic, OpenAI, etc.)\n - `embedding_fields` specifies which output fields should be embedded for semantic search\n\n Tenant isolation is provided via CoreModel.tenant_id field.\n \n\nThis agent can search the `schemas` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"description": "Human-readable schema name (used as identifier)", "title": "Name", "type": "string"}, "content": {"default": "", "description": "Markdown documentation and instructions for the schema", "title": "Content", "type": "string"}, "spec": {"additionalProperties": true, "description": "JsonSchema specification defining the agent structure and capabilities", "title": "Spec", "type": "object"}, "category": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Schema category distinguishing schema types. Values: ''agent'' (AI agents), ''evaluator'' (LLM-as-a-Judge evaluators). Maps directly from json_schema_extra.kind field during ingestion.", "title": "Category"}, "provider_configs": {"description": "Optional provider configurations for multi-provider testing. Each dict has ''provider_name'' and ''model_name''. Example: [{''provider_name'': ''anthropic'', ''model_name'': ''claude-sonnet-4-5''}]", "items": {"additionalProperties": true, "type": "object"}, "title": "Provider Configs", "type": "array"}, "embedding_fields": {"description": "JSON paths in extracted_data to embed for semantic search. Example: [''summary'', ''candidate_name'', ''skills''] for CV extraction. Values will be concatenated and embedded using configured embedding provider.", "items": {"type": "string"}, "title": "Embedding Fields", "type": "array"}}, "required": ["name", "spec"], "json_schema_extra": {"table_name": "schemas", "entity_key_field": "id", "embedding_fields": ["content"], "fully_qualified_name": "rem.models.entities.schema.Schema", "tools": ["search_rem"], "default_search_table": "schemas", "has_embeddings": true}}'::jsonb,
|
|
2666
|
+
'entity',
|
|
2667
|
+
'{"table_name": "schemas", "entity_key_field": "id", "embedding_fields": ["content"], "fqn": "rem.models.entities.schema.Schema"}'::jsonb
|
|
2668
|
+
)
|
|
2669
|
+
ON CONFLICT (id) DO UPDATE SET
|
|
2670
|
+
name = EXCLUDED.name,
|
|
2671
|
+
content = EXCLUDED.content,
|
|
2672
|
+
spec = EXCLUDED.spec,
|
|
2673
|
+
category = EXCLUDED.category,
|
|
2674
|
+
metadata = EXCLUDED.metadata,
|
|
2675
|
+
updated_at = CURRENT_TIMESTAMP;
|
|
2676
|
+
|
|
2677
|
+
-- Schema entry for Session (sessions)
|
|
2678
|
+
INSERT INTO schemas (id, tenant_id, name, content, spec, category, metadata)
|
|
2679
|
+
VALUES (
|
|
2680
|
+
'5893fbca-2d8e-5402-ac41-7bac2c0c472a'::uuid,
|
|
2681
|
+
'system',
|
|
2682
|
+
'Session',
|
|
2683
|
+
'# Session
|
|
2684
|
+
|
|
2685
|
+
|
|
2686
|
+
Conversation session container.
|
|
2687
|
+
|
|
2688
|
+
Groups messages together and supports different modes for normal conversations
|
|
2689
|
+
and evaluation/experimentation scenarios.
|
|
2690
|
+
|
|
2691
|
+
For evaluation sessions, stores:
|
|
2692
|
+
- original_trace_id: Reference to the original session being evaluated
|
|
2693
|
+
- settings_overrides: Model, temperature, prompt overrides
|
|
2694
|
+
- prompt: Custom prompt being tested
|
|
2695
|
+
|
|
2696
|
+
Default sessions are lightweight - just a session_id on messages.
|
|
2697
|
+
Special sessions store additional metadata for experiments.
|
|
2698
|
+
|
|
2699
|
+
|
|
2700
|
+
## Overview
|
|
2701
|
+
|
|
2702
|
+
The `Session` entity is stored in the `sessions` table. Each record is uniquely
|
|
2703
|
+
identified by its `name` field for lookups and graph traversal.
|
|
2704
|
+
|
|
2705
|
+
## Search Capabilities
|
|
2706
|
+
|
|
2707
|
+
This schema includes the `search_rem` tool which supports:
|
|
2708
|
+
- **LOOKUP**: O(1) exact match by name (e.g., `LOOKUP "entity-name"`)
|
|
2709
|
+
- **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
|
|
2710
|
+
- **SEARCH**: Semantic vector search on description (e.g., `SEARCH "concept" FROM sessions LIMIT 10`)
|
|
2711
|
+
- **SQL**: Complex queries (e.g., `SELECT * FROM sessions WHERE ...`)
|
|
2712
|
+
|
|
2713
|
+
## Table Info
|
|
2714
|
+
|
|
2715
|
+
| Property | Value |
|
|
2716
|
+
|----------|-------|
|
|
2717
|
+
| Table | `sessions` |
|
|
2718
|
+
| Entity Key | `name` |
|
|
2719
|
+
| Embedding Fields | `description` |
|
|
2720
|
+
| Tools | `search_rem` |
|
|
2721
|
+
|
|
2722
|
+
## Fields
|
|
2723
|
+
|
|
2724
|
+
### `id`
|
|
2725
|
+
- **Type**: `typing.Union[uuid.UUID, str, NoneType]`
|
|
2726
|
+
- **Optional**
|
|
2727
|
+
- Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.
|
|
2728
|
+
|
|
2729
|
+
### `created_at`
|
|
2730
|
+
- **Type**: `<class ''datetime.datetime''>`
|
|
2731
|
+
- **Optional**
|
|
2732
|
+
- Entity creation timestamp
|
|
2733
|
+
|
|
2734
|
+
### `updated_at`
|
|
2735
|
+
- **Type**: `<class ''datetime.datetime''>`
|
|
2736
|
+
- **Optional**
|
|
2737
|
+
- Last update timestamp
|
|
2738
|
+
|
|
2739
|
+
### `deleted_at`
|
|
2740
|
+
- **Type**: `typing.Optional[datetime.datetime]`
|
|
2741
|
+
- **Optional**
|
|
2742
|
+
- Soft deletion timestamp
|
|
2743
|
+
|
|
2744
|
+
### `tenant_id`
|
|
2745
|
+
- **Type**: `typing.Optional[str]`
|
|
2746
|
+
- **Optional**
|
|
2747
|
+
- Tenant identifier for multi-tenancy isolation
|
|
2748
|
+
|
|
2749
|
+
### `user_id`
|
|
2750
|
+
- **Type**: `typing.Optional[str]`
|
|
2751
|
+
- **Optional**
|
|
2752
|
+
- Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.
|
|
2753
|
+
|
|
2754
|
+
### `graph_edges`
|
|
2755
|
+
- **Type**: `list[dict]`
|
|
2756
|
+
- **Optional**
|
|
2757
|
+
- Knowledge graph edges stored as InlineEdge dicts
|
|
2758
|
+
|
|
2759
|
+
### `metadata`
|
|
2760
|
+
- **Type**: `<class ''dict''>`
|
|
2761
|
+
- **Optional**
|
|
2762
|
+
- Flexible metadata storage
|
|
2763
|
+
|
|
2764
|
+
### `tags`
|
|
2765
|
+
- **Type**: `list[str]`
|
|
2766
|
+
- **Optional**
|
|
2767
|
+
- Entity tags
|
|
2768
|
+
|
|
2769
|
+
### `name`
|
|
2770
|
+
- **Type**: `<class ''str''>`
|
|
2771
|
+
- **Required**
|
|
2772
|
+
- Session name/identifier
|
|
2773
|
+
|
|
2774
|
+
### `mode`
|
|
2775
|
+
- **Type**: `<enum ''SessionMode''>`
|
|
2776
|
+
- **Optional**
|
|
2777
|
+
- Session mode: ''normal'' or ''evaluation''
|
|
2778
|
+
|
|
2779
|
+
### `description`
|
|
2780
|
+
- **Type**: `str | None`
|
|
2781
|
+
- **Optional**
|
|
2782
|
+
- Optional session description
|
|
2783
|
+
|
|
2784
|
+
### `original_trace_id`
|
|
2785
|
+
- **Type**: `str | None`
|
|
2786
|
+
- **Optional**
|
|
2787
|
+
- For evaluation mode: ID of the original session/trace being evaluated
|
|
2788
|
+
|
|
2789
|
+
### `settings_overrides`
|
|
2790
|
+
- **Type**: `dict | None`
|
|
2791
|
+
- **Optional**
|
|
2792
|
+
- Settings overrides (model, temperature, max_tokens, system_prompt)
|
|
2793
|
+
|
|
2794
|
+
### `prompt`
|
|
2795
|
+
- **Type**: `str | None`
|
|
2796
|
+
- **Optional**
|
|
2797
|
+
- Custom prompt for this session (can override agent prompt)
|
|
2798
|
+
|
|
2799
|
+
### `agent_schema_uri`
|
|
2800
|
+
- **Type**: `str | None`
|
|
2801
|
+
- **Optional**
|
|
2802
|
+
- Agent schema used for this session
|
|
2803
|
+
|
|
2804
|
+
### `message_count`
|
|
2805
|
+
- **Type**: `<class ''int''>`
|
|
2806
|
+
- **Optional**
|
|
2807
|
+
- Number of messages in this session
|
|
2808
|
+
|
|
2809
|
+
### `total_tokens`
|
|
2810
|
+
- **Type**: `int | None`
|
|
2811
|
+
- **Optional**
|
|
2812
|
+
- Total tokens used in this session
|
|
2813
|
+
|
|
2814
|
+
',
|
|
2815
|
+
'{"type": "object", "description": "\n Conversation session container.\n\n Groups messages together and supports different modes for normal conversations\n and evaluation/experimentation scenarios.\n\n For evaluation sessions, stores:\n - original_trace_id: Reference to the original session being evaluated\n - settings_overrides: Model, temperature, prompt overrides\n - prompt: Custom prompt being tested\n\n Default sessions are lightweight - just a session_id on messages.\n Special sessions store additional metadata for experiments.\n \n\nThis agent can search the `sessions` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"description": "Session name/identifier", "entity_key": true, "title": "Name", "type": "string"}, "mode": {"$ref": "#/$defs/SessionMode", "default": "normal", "description": "Session mode: ''normal'' or ''evaluation''"}, "description": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Optional session description", "title": "Description"}, "original_trace_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "For evaluation mode: ID of the original session/trace being evaluated", "title": "Original Trace Id"}, "settings_overrides": {"anyOf": [{"additionalProperties": true, "type": "object"}, {"type": "null"}], "default": null, "description": "Settings overrides (model, temperature, max_tokens, system_prompt)", "title": "Settings Overrides"}, "prompt": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Custom prompt for this session (can override agent prompt)", "title": "Prompt"}, "agent_schema_uri": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Agent schema used for this session", "title": "Agent Schema Uri"}, "message_count": {"default": 0, "description": "Number of messages in this session", "title": "Message Count", "type": "integer"}, "total_tokens": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "description": "Total tokens used in this session", "title": "Total Tokens"}}, "required": ["name"], "json_schema_extra": {"table_name": "sessions", "entity_key_field": "name", "embedding_fields": ["description"], "fully_qualified_name": "rem.models.entities.session.Session", "tools": ["search_rem"], "default_search_table": "sessions", "has_embeddings": true}}'::jsonb,
|
|
2816
|
+
'entity',
|
|
2817
|
+
'{"table_name": "sessions", "entity_key_field": "name", "embedding_fields": ["description"], "fqn": "rem.models.entities.session.Session"}'::jsonb
|
|
2818
|
+
)
|
|
2819
|
+
ON CONFLICT (id) DO UPDATE SET
|
|
2820
|
+
name = EXCLUDED.name,
|
|
2821
|
+
content = EXCLUDED.content,
|
|
2822
|
+
spec = EXCLUDED.spec,
|
|
2823
|
+
category = EXCLUDED.category,
|
|
2824
|
+
metadata = EXCLUDED.metadata,
|
|
2825
|
+
updated_at = CURRENT_TIMESTAMP;
|
|
2826
|
+
|
|
2827
|
+
-- Schema entry for SharedSession (shared_sessions)
|
|
2828
|
+
INSERT INTO schemas (id, tenant_id, name, content, spec, category, metadata)
|
|
2829
|
+
VALUES (
|
|
2830
|
+
'be5c5711-6c45-5fc4-9cd1-e076599261c7'::uuid,
|
|
2831
|
+
'system',
|
|
2832
|
+
'SharedSession',
|
|
2833
|
+
'# SharedSession
|
|
2834
|
+
|
|
2835
|
+
|
|
2836
|
+
Session sharing record between users.
|
|
2837
|
+
|
|
2838
|
+
Links a session (identified by session_id from Message records) to a
|
|
2839
|
+
recipient user, enabling collaborative access to conversation history.
|
|
2840
|
+
|
|
2841
|
+
|
|
2842
|
+
## Overview
|
|
2843
|
+
|
|
2844
|
+
The `SharedSession` entity is stored in the `shared_sessions` table. Each record is uniquely
|
|
2845
|
+
identified by its `id` field for lookups and graph traversal.
|
|
2846
|
+
|
|
2847
|
+
## Search Capabilities
|
|
2848
|
+
|
|
2849
|
+
This schema includes the `search_rem` tool which supports:
|
|
2850
|
+
- **LOOKUP**: O(1) exact match by id (e.g., `LOOKUP "entity-name"`)
|
|
2851
|
+
- **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
|
|
2852
|
+
- **SEARCH**: Semantic vector search on content (e.g., `SEARCH "concept" FROM shared_sessions LIMIT 10`)
|
|
2853
|
+
- **SQL**: Complex queries (e.g., `SELECT * FROM shared_sessions WHERE ...`)
|
|
2854
|
+
|
|
2855
|
+
## Table Info
|
|
2856
|
+
|
|
2857
|
+
| Property | Value |
|
|
2858
|
+
|----------|-------|
|
|
2859
|
+
| Table | `shared_sessions` |
|
|
2860
|
+
| Entity Key | `id` |
|
|
2861
|
+
| Embedding Fields | None |
|
|
2862
|
+
| Tools | `search_rem` |
|
|
2863
|
+
|
|
2864
|
+
## Fields
|
|
2865
|
+
|
|
2866
|
+
### `id`
|
|
2867
|
+
- **Type**: `typing.Union[uuid.UUID, str, NoneType]`
|
|
2868
|
+
- **Optional**
|
|
2869
|
+
- Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.
|
|
2870
|
+
|
|
2871
|
+
### `created_at`
|
|
2872
|
+
- **Type**: `<class ''datetime.datetime''>`
|
|
2873
|
+
- **Optional**
|
|
2874
|
+
- Entity creation timestamp
|
|
2875
|
+
|
|
2876
|
+
### `updated_at`
|
|
2877
|
+
- **Type**: `<class ''datetime.datetime''>`
|
|
2878
|
+
- **Optional**
|
|
2879
|
+
- Last update timestamp
|
|
2880
|
+
|
|
2881
|
+
### `deleted_at`
|
|
2882
|
+
- **Type**: `typing.Optional[datetime.datetime]`
|
|
2883
|
+
- **Optional**
|
|
2884
|
+
- Soft deletion timestamp
|
|
2885
|
+
|
|
2886
|
+
### `tenant_id`
|
|
2887
|
+
- **Type**: `typing.Optional[str]`
|
|
2888
|
+
- **Optional**
|
|
2889
|
+
- Tenant identifier for multi-tenancy isolation
|
|
2890
|
+
|
|
2891
|
+
### `user_id`
|
|
2892
|
+
- **Type**: `typing.Optional[str]`
|
|
2893
|
+
- **Optional**
|
|
2894
|
+
- Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.
|
|
2895
|
+
|
|
2896
|
+
### `graph_edges`
|
|
2897
|
+
- **Type**: `list[dict]`
|
|
2898
|
+
- **Optional**
|
|
2899
|
+
- Knowledge graph edges stored as InlineEdge dicts
|
|
2900
|
+
|
|
2901
|
+
### `metadata`
|
|
2902
|
+
- **Type**: `<class ''dict''>`
|
|
2903
|
+
- **Optional**
|
|
2904
|
+
- Flexible metadata storage
|
|
2905
|
+
|
|
2906
|
+
### `tags`
|
|
2907
|
+
- **Type**: `list[str]`
|
|
2908
|
+
- **Optional**
|
|
2909
|
+
- Entity tags
|
|
2910
|
+
|
|
2911
|
+
### `session_id`
|
|
2912
|
+
- **Type**: `<class ''str''>`
|
|
2913
|
+
- **Required**
|
|
2914
|
+
- The session being shared (matches Message.session_id)
|
|
2915
|
+
|
|
2916
|
+
### `owner_user_id`
|
|
2917
|
+
- **Type**: `<class ''str''>`
|
|
2918
|
+
- **Required**
|
|
2919
|
+
- User ID of the session owner (the sharer)
|
|
2920
|
+
|
|
2921
|
+
### `shared_with_user_id`
|
|
2922
|
+
- **Type**: `<class ''str''>`
|
|
2923
|
+
- **Required**
|
|
2924
|
+
- User ID of the recipient (who can now view the session)
|
|
2925
|
+
|
|
2926
|
+
',
|
|
2927
|
+
'{"type": "object", "description": "\n Session sharing record between users.\n\n Links a session (identified by session_id from Message records) to a\n recipient user, enabling collaborative access to conversation history.\n \n\nThis agent can search the `shared_sessions` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "session_id": {"description": "The session being shared (matches Message.session_id)", "title": "Session Id", "type": "string"}, "owner_user_id": {"description": "User ID of the session owner (the sharer)", "title": "Owner User Id", "type": "string"}, "shared_with_user_id": {"description": "User ID of the recipient (who can now view the session)", "title": "Shared With User Id", "type": "string"}}, "required": ["session_id", "owner_user_id", "shared_with_user_id"], "json_schema_extra": {"table_name": "shared_sessions", "entity_key_field": "id", "embedding_fields": [], "fully_qualified_name": "rem.models.entities.shared_session.SharedSession", "tools": ["search_rem"], "default_search_table": "shared_sessions", "has_embeddings": false}}'::jsonb,
|
|
2928
|
+
'entity',
|
|
2929
|
+
'{"table_name": "shared_sessions", "entity_key_field": "id", "embedding_fields": [], "fqn": "rem.models.entities.shared_session.SharedSession"}'::jsonb
|
|
2930
|
+
)
|
|
2931
|
+
ON CONFLICT (id) DO UPDATE SET
|
|
2932
|
+
name = EXCLUDED.name,
|
|
2933
|
+
content = EXCLUDED.content,
|
|
2934
|
+
spec = EXCLUDED.spec,
|
|
2935
|
+
category = EXCLUDED.category,
|
|
2936
|
+
metadata = EXCLUDED.metadata,
|
|
2937
|
+
updated_at = CURRENT_TIMESTAMP;
|
|
2938
|
+
|
|
2939
|
+
-- Schema entry for User (users)
|
|
2940
|
+
INSERT INTO schemas (id, tenant_id, name, content, spec, category, metadata)
|
|
2941
|
+
VALUES (
|
|
2942
|
+
'1ad3d95e-32e9-54d6-ad7d-e39b9ed5018b'::uuid,
|
|
2943
|
+
'system',
|
|
2944
|
+
'User',
|
|
2945
|
+
'# User
|
|
2946
|
+
|
|
2947
|
+
|
|
2948
|
+
User entity.
|
|
2949
|
+
|
|
2950
|
+
Represents people in the REM system, either as active users
|
|
2951
|
+
or entities extracted from content. Tenant isolation is provided
|
|
2952
|
+
via CoreModel.tenant_id field.
|
|
2953
|
+
|
|
2954
|
+
Enhanced by dreaming worker:
|
|
2955
|
+
- summary: Generated from activity analysis
|
|
2956
|
+
- interests: Extracted from resources and sessions
|
|
2957
|
+
- activity_level: Computed from recent engagement
|
|
2958
|
+
- preferred_topics: Extracted from moment/resource topics
|
|
2959
|
+
|
|
2960
|
+
|
|
2961
|
+
## Overview
|
|
2962
|
+
|
|
2963
|
+
The `User` entity is stored in the `users` table. Each record is uniquely
|
|
2964
|
+
identified by its `name` field for lookups and graph traversal.
|
|
2965
|
+
|
|
2966
|
+
## Search Capabilities
|
|
2967
|
+
|
|
2968
|
+
This schema includes the `search_rem` tool which supports:
|
|
2969
|
+
- **LOOKUP**: O(1) exact match by name (e.g., `LOOKUP "entity-name"`)
|
|
2970
|
+
- **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
|
|
2971
|
+
- **SEARCH**: Semantic vector search on summary (e.g., `SEARCH "concept" FROM users LIMIT 10`)
|
|
2972
|
+
- **SQL**: Complex queries (e.g., `SELECT * FROM users WHERE ...`)
|
|
2973
|
+
|
|
2974
|
+
## Table Info
|
|
2975
|
+
|
|
2976
|
+
| Property | Value |
|
|
2977
|
+
|----------|-------|
|
|
2978
|
+
| Table | `users` |
|
|
2979
|
+
| Entity Key | `name` |
|
|
2980
|
+
| Embedding Fields | `summary` |
|
|
2981
|
+
| Tools | `search_rem` |
|
|
2982
|
+
|
|
2983
|
+
## Fields
|
|
2984
|
+
|
|
2985
|
+
### `id`
|
|
2986
|
+
- **Type**: `typing.Union[uuid.UUID, str, NoneType]`
|
|
2987
|
+
- **Optional**
|
|
2988
|
+
- Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.
|
|
2989
|
+
|
|
2990
|
+
### `created_at`
|
|
2991
|
+
- **Type**: `<class ''datetime.datetime''>`
|
|
2992
|
+
- **Optional**
|
|
2993
|
+
- Entity creation timestamp
|
|
2994
|
+
|
|
2995
|
+
### `updated_at`
|
|
2996
|
+
- **Type**: `<class ''datetime.datetime''>`
|
|
2997
|
+
- **Optional**
|
|
2998
|
+
- Last update timestamp
|
|
2999
|
+
|
|
3000
|
+
### `deleted_at`
|
|
3001
|
+
- **Type**: `typing.Optional[datetime.datetime]`
|
|
3002
|
+
- **Optional**
|
|
3003
|
+
- Soft deletion timestamp
|
|
3004
|
+
|
|
3005
|
+
### `tenant_id`
|
|
3006
|
+
- **Type**: `typing.Optional[str]`
|
|
3007
|
+
- **Optional**
|
|
3008
|
+
- Tenant identifier for multi-tenancy isolation
|
|
3009
|
+
|
|
3010
|
+
### `user_id`
|
|
3011
|
+
- **Type**: `typing.Optional[str]`
|
|
3012
|
+
- **Optional**
|
|
3013
|
+
- Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.
|
|
3014
|
+
|
|
3015
|
+
### `graph_edges`
|
|
3016
|
+
- **Type**: `list[dict]`
|
|
3017
|
+
- **Optional**
|
|
3018
|
+
- Knowledge graph edges stored as InlineEdge dicts
|
|
3019
|
+
|
|
3020
|
+
### `metadata`
|
|
3021
|
+
- **Type**: `<class ''dict''>`
|
|
3022
|
+
- **Optional**
|
|
3023
|
+
- Flexible metadata storage
|
|
3024
|
+
|
|
3025
|
+
### `tags`
|
|
3026
|
+
- **Type**: `list[str]`
|
|
3027
|
+
- **Optional**
|
|
3028
|
+
- Entity tags
|
|
3029
|
+
|
|
3030
|
+
### `name`
|
|
3031
|
+
- **Type**: `<class ''str''>`
|
|
3032
|
+
- **Required**
|
|
3033
|
+
- User name (human-readable, used as graph label)
|
|
3034
|
+
|
|
3035
|
+
### `email`
|
|
3036
|
+
- **Type**: `typing.Optional[str]`
|
|
3037
|
+
- **Optional**
|
|
3038
|
+
- User email address
|
|
3039
|
+
|
|
3040
|
+
### `role`
|
|
3041
|
+
- **Type**: `typing.Optional[str]`
|
|
3042
|
+
- **Optional**
|
|
3043
|
+
- User role (employee, contractor, external, etc.)
|
|
3044
|
+
|
|
3045
|
+
### `tier`
|
|
3046
|
+
- **Type**: `<enum ''UserTier''>`
|
|
3047
|
+
- **Optional**
|
|
3048
|
+
- User subscription tier (free, basic, pro) for feature gating
|
|
3049
|
+
|
|
3050
|
+
### `anonymous_ids`
|
|
3051
|
+
- **Type**: `list[str]`
|
|
3052
|
+
- **Optional**
|
|
3053
|
+
- Linked anonymous session IDs used for merging history
|
|
3054
|
+
|
|
3055
|
+
### `sec_policy`
|
|
3056
|
+
- **Type**: `<class ''dict''>`
|
|
3057
|
+
- **Optional**
|
|
3058
|
+
- Security policy configuration (JSON, extensible for custom policies)
|
|
3059
|
+
|
|
3060
|
+
### `summary`
|
|
3061
|
+
- **Type**: `typing.Optional[str]`
|
|
3062
|
+
- **Optional**
|
|
3063
|
+
- LLM-generated user profile summary (updated by dreaming worker)
|
|
3064
|
+
|
|
3065
|
+
### `interests`
|
|
3066
|
+
- **Type**: `list[str]`
|
|
3067
|
+
- **Optional**
|
|
3068
|
+
- User interests extracted from activity
|
|
3069
|
+
|
|
3070
|
+
### `preferred_topics`
|
|
3071
|
+
- **Type**: `list[str]`
|
|
3072
|
+
- **Optional**
|
|
3073
|
+
- Frequently discussed topics in kebab-case
|
|
3074
|
+
|
|
3075
|
+
### `activity_level`
|
|
3076
|
+
- **Type**: `typing.Optional[str]`
|
|
3077
|
+
- **Optional**
|
|
3078
|
+
- Activity level: active, moderate, inactive
|
|
3079
|
+
|
|
3080
|
+
### `last_active_at`
|
|
3081
|
+
- **Type**: `typing.Optional[datetime.datetime]`
|
|
3082
|
+
- **Optional**
|
|
3083
|
+
- Last activity timestamp
|
|
3084
|
+
|
|
3085
|
+
',
|
|
3086
|
+
'{"type": "object", "description": "\n User entity.\n\n Represents people in the REM system, either as active users\n or entities extracted from content. Tenant isolation is provided\n via CoreModel.tenant_id field.\n\n Enhanced by dreaming worker:\n - summary: Generated from activity analysis\n - interests: Extracted from resources and sessions\n - activity_level: Computed from recent engagement\n - preferred_topics: Extracted from moment/resource topics\n \n\nThis agent can search the `users` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"description": "User name (human-readable, used as graph label)", "entity_key": true, "title": "Name", "type": "string"}, "email": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "User email address", "title": "Email"}, "role": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "User role (employee, contractor, external, etc.)", "title": "Role"}, "tier": {"$ref": "#/$defs/UserTier", "default": "free", "description": "User subscription tier (free, basic, pro) for feature gating"}, "anonymous_ids": {"description": "Linked anonymous session IDs used for merging history", "items": {"type": "string"}, "title": "Anonymous Ids", "type": "array"}, "sec_policy": {"additionalProperties": true, "description": "Security policy configuration (JSON, extensible for custom policies)", "title": "Sec Policy", "type": "object"}, "summary": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "LLM-generated user profile summary (updated by dreaming worker)", "title": "Summary"}, "interests": {"description": "User interests extracted from activity", "items": {"type": "string"}, "title": "Interests", "type": "array"}, "preferred_topics": {"description": "Frequently discussed topics in kebab-case", "items": {"type": "string"}, "title": "Preferred Topics", "type": "array"}, "activity_level": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Activity level: active, moderate, inactive", "title": "Activity Level"}, "last_active_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Last activity timestamp", "title": "Last Active At"}}, "required": ["name"], "json_schema_extra": {"table_name": "users", "entity_key_field": "name", "embedding_fields": ["summary"], "fully_qualified_name": "rem.models.entities.user.User", "tools": ["search_rem"], "default_search_table": "users", "has_embeddings": true}}'::jsonb,
|
|
3087
|
+
'entity',
|
|
3088
|
+
'{"table_name": "users", "entity_key_field": "name", "embedding_fields": ["summary"], "fqn": "rem.models.entities.user.User"}'::jsonb
|
|
3089
|
+
)
|
|
3090
|
+
ON CONFLICT (id) DO UPDATE SET
|
|
3091
|
+
name = EXCLUDED.name,
|
|
3092
|
+
content = EXCLUDED.content,
|
|
3093
|
+
spec = EXCLUDED.spec,
|
|
3094
|
+
category = EXCLUDED.category,
|
|
3095
|
+
metadata = EXCLUDED.metadata,
|
|
3096
|
+
updated_at = CURRENT_TIMESTAMP;
|
|
1038
3097
|
|
|
1039
3098
|
-- ============================================================================
|
|
1040
3099
|
-- RECORD MIGRATION
|
|
@@ -1049,154 +3108,22 @@ SET applied_at = CURRENT_TIMESTAMP,
|
|
|
1049
3108
|
DO $$
|
|
1050
3109
|
BEGIN
|
|
1051
3110
|
RAISE NOTICE '============================================================';
|
|
1052
|
-
RAISE NOTICE 'REM Model Schema Applied:
|
|
3111
|
+
RAISE NOTICE 'REM Model Schema Applied: 12 tables';
|
|
1053
3112
|
RAISE NOTICE '============================================================';
|
|
3113
|
+
RAISE NOTICE ' ✓ feedbacks';
|
|
1054
3114
|
RAISE NOTICE ' ✓ files (1 embeddable fields)';
|
|
1055
3115
|
RAISE NOTICE ' ✓ image_resources (1 embeddable fields)';
|
|
1056
3116
|
RAISE NOTICE ' ✓ messages (1 embeddable fields)';
|
|
1057
3117
|
RAISE NOTICE ' ✓ moments (1 embeddable fields)';
|
|
1058
3118
|
RAISE NOTICE ' ✓ ontologies';
|
|
1059
3119
|
RAISE NOTICE ' ✓ ontology_configs (1 embeddable fields)';
|
|
1060
|
-
RAISE NOTICE ' ✓ persons';
|
|
1061
3120
|
RAISE NOTICE ' ✓ resources (1 embeddable fields)';
|
|
1062
3121
|
RAISE NOTICE ' ✓ schemas (1 embeddable fields)';
|
|
3122
|
+
RAISE NOTICE ' ✓ sessions (1 embeddable fields)';
|
|
3123
|
+
RAISE NOTICE ' ✓ shared_sessions';
|
|
1063
3124
|
RAISE NOTICE ' ✓ users (1 embeddable fields)';
|
|
1064
3125
|
RAISE NOTICE '';
|
|
1065
3126
|
RAISE NOTICE 'Next: Run background indexes if needed';
|
|
1066
3127
|
RAISE NOTICE ' rem db migrate --background-indexes';
|
|
1067
3128
|
RAISE NOTICE '============================================================';
|
|
1068
|
-
END $$;
|
|
1069
|
-
-- ============================================================================
|
|
1070
|
-
-- REM TRAVERSE (Graph Traversal)
|
|
1071
|
-
-- ============================================================================
|
|
1072
|
-
|
|
1073
|
-
-- REM TRAVERSE: Recursive graph traversal following edges
|
|
1074
|
-
-- Explores graph_edges starting from entity_key up to max_depth
|
|
1075
|
-
-- Uses cached kv_store.graph_edges for fast traversal (no polymorphic view!)
|
|
1076
|
-
-- When keys_only=false, automatically fetches full entity records
|
|
1077
|
-
CREATE OR REPLACE FUNCTION rem_traverse(
|
|
1078
|
-
p_entity_key VARCHAR(255),
|
|
1079
|
-
p_tenant_id VARCHAR(100), -- Backward compat parameter (not used for filtering)
|
|
1080
|
-
p_user_id VARCHAR(100),
|
|
1081
|
-
p_max_depth INTEGER DEFAULT 1,
|
|
1082
|
-
p_rel_type VARCHAR(100) DEFAULT NULL,
|
|
1083
|
-
p_keys_only BOOLEAN DEFAULT FALSE
|
|
1084
|
-
)
|
|
1085
|
-
RETURNS TABLE(
|
|
1086
|
-
depth INTEGER,
|
|
1087
|
-
entity_key VARCHAR(255),
|
|
1088
|
-
entity_type VARCHAR(100),
|
|
1089
|
-
entity_id UUID,
|
|
1090
|
-
rel_type VARCHAR(100),
|
|
1091
|
-
rel_weight REAL,
|
|
1092
|
-
path TEXT[],
|
|
1093
|
-
entity_record JSONB
|
|
1094
|
-
) AS $$
|
|
1095
|
-
DECLARE
|
|
1096
|
-
graph_keys RECORD;
|
|
1097
|
-
entities_by_table JSONB := '{}'::jsonb;
|
|
1098
|
-
table_keys JSONB;
|
|
1099
|
-
BEGIN
|
|
1100
|
-
-- First, build graph structure from KV store
|
|
1101
|
-
FOR graph_keys IN
|
|
1102
|
-
WITH RECURSIVE graph_traversal AS (
|
|
1103
|
-
-- Base case: Find starting entity
|
|
1104
|
-
SELECT
|
|
1105
|
-
0 AS depth,
|
|
1106
|
-
kv.entity_key,
|
|
1107
|
-
kv.entity_type,
|
|
1108
|
-
kv.entity_id,
|
|
1109
|
-
NULL::VARCHAR(100) AS rel_type,
|
|
1110
|
-
NULL::REAL AS rel_weight,
|
|
1111
|
-
ARRAY[kv.entity_key]::TEXT[] AS path
|
|
1112
|
-
FROM kv_store kv
|
|
1113
|
-
WHERE kv.user_id = p_user_id
|
|
1114
|
-
AND kv.entity_key = p_entity_key
|
|
1115
|
-
|
|
1116
|
-
UNION ALL
|
|
1117
|
-
|
|
1118
|
-
-- Recursive case: Follow outbound edges from discovered entities
|
|
1119
|
-
SELECT
|
|
1120
|
-
gt.depth + 1,
|
|
1121
|
-
target_kv.entity_key,
|
|
1122
|
-
target_kv.entity_type,
|
|
1123
|
-
target_kv.entity_id,
|
|
1124
|
-
(edge->>'rel_type')::VARCHAR(100) AS rel_type,
|
|
1125
|
-
COALESCE((edge->>'weight')::REAL, 1.0) AS rel_weight,
|
|
1126
|
-
gt.path || target_kv.entity_key AS path
|
|
1127
|
-
FROM graph_traversal gt
|
|
1128
|
-
-- Join to KV store to get source entity (with cached graph_edges!)
|
|
1129
|
-
JOIN kv_store source_kv ON source_kv.entity_key = gt.entity_key
|
|
1130
|
-
AND source_kv.user_id = p_user_id
|
|
1131
|
-
-- Extract edges directly from cached kv_store.graph_edges (NO polymorphic view!)
|
|
1132
|
-
CROSS JOIN LATERAL jsonb_array_elements(COALESCE(source_kv.graph_edges, '[]'::jsonb)) AS edge
|
|
1133
|
-
-- Lookup target entity in KV store
|
|
1134
|
-
JOIN kv_store target_kv ON target_kv.entity_key = (edge->>'dst')::VARCHAR(255)
|
|
1135
|
-
AND target_kv.user_id = p_user_id
|
|
1136
|
-
WHERE gt.depth < p_max_depth
|
|
1137
|
-
-- Filter by relationship type if specified
|
|
1138
|
-
AND (p_rel_type IS NULL OR (edge->>'rel_type')::VARCHAR(100) = p_rel_type)
|
|
1139
|
-
-- Prevent cycles by checking path
|
|
1140
|
-
AND NOT (target_kv.entity_key = ANY(gt.path))
|
|
1141
|
-
)
|
|
1142
|
-
SELECT DISTINCT ON (entity_key)
|
|
1143
|
-
gt.depth,
|
|
1144
|
-
gt.entity_key,
|
|
1145
|
-
gt.entity_type,
|
|
1146
|
-
gt.entity_id,
|
|
1147
|
-
gt.rel_type,
|
|
1148
|
-
gt.rel_weight,
|
|
1149
|
-
gt.path
|
|
1150
|
-
FROM graph_traversal gt
|
|
1151
|
-
WHERE gt.depth > 0 -- Exclude starting entity
|
|
1152
|
-
ORDER BY gt.entity_key, gt.depth
|
|
1153
|
-
LOOP
|
|
1154
|
-
IF p_keys_only THEN
|
|
1155
|
-
-- Return just graph structure (no entity_record)
|
|
1156
|
-
depth := graph_keys.depth;
|
|
1157
|
-
entity_key := graph_keys.entity_key;
|
|
1158
|
-
entity_type := graph_keys.entity_type;
|
|
1159
|
-
entity_id := graph_keys.entity_id;
|
|
1160
|
-
rel_type := graph_keys.rel_type;
|
|
1161
|
-
rel_weight := graph_keys.rel_weight;
|
|
1162
|
-
path := graph_keys.path;
|
|
1163
|
-
entity_record := NULL;
|
|
1164
|
-
RETURN NEXT;
|
|
1165
|
-
ELSE
|
|
1166
|
-
-- Build JSONB mapping {table: [keys]} for batch fetch
|
|
1167
|
-
IF entities_by_table ? graph_keys.entity_type THEN
|
|
1168
|
-
table_keys := entities_by_table->graph_keys.entity_type;
|
|
1169
|
-
entities_by_table := jsonb_set(
|
|
1170
|
-
entities_by_table,
|
|
1171
|
-
ARRAY[graph_keys.entity_type],
|
|
1172
|
-
table_keys || jsonb_build_array(graph_keys.entity_key)
|
|
1173
|
-
);
|
|
1174
|
-
ELSE
|
|
1175
|
-
entities_by_table := jsonb_set(
|
|
1176
|
-
entities_by_table,
|
|
1177
|
-
ARRAY[graph_keys.entity_type],
|
|
1178
|
-
jsonb_build_array(graph_keys.entity_key)
|
|
1179
|
-
);
|
|
1180
|
-
END IF;
|
|
1181
|
-
END IF;
|
|
1182
|
-
END LOOP;
|
|
1183
|
-
|
|
1184
|
-
-- If keys_only=false, fetch full records using rem_fetch
|
|
1185
|
-
IF NOT p_keys_only AND entities_by_table != '{}'::jsonb THEN
|
|
1186
|
-
RETURN QUERY
|
|
1187
|
-
SELECT
|
|
1188
|
-
NULL::INTEGER AS depth,
|
|
1189
|
-
f.entity_key::VARCHAR(255),
|
|
1190
|
-
f.entity_type::VARCHAR(100),
|
|
1191
|
-
NULL::UUID AS entity_id,
|
|
1192
|
-
NULL::VARCHAR(100) AS rel_type,
|
|
1193
|
-
NULL::REAL AS rel_weight,
|
|
1194
|
-
NULL::TEXT[] AS path,
|
|
1195
|
-
f.entity_record
|
|
1196
|
-
FROM rem_fetch(entities_by_table, p_user_id) f;
|
|
1197
|
-
END IF;
|
|
1198
|
-
END;
|
|
1199
|
-
$$ LANGUAGE plpgsql STABLE;
|
|
1200
|
-
|
|
1201
|
-
COMMENT ON FUNCTION rem_traverse IS
|
|
1202
|
-
'REM TRAVERSE query: Recursive graph traversal using cached kv_store.graph_edges. When keys_only=false (default), automatically fetches full entity records via rem_fetch.';
|
|
3129
|
+
END $$;
|