remdb 0.3.242__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

Files changed (235) hide show
  1. rem/__init__.py +129 -0
  2. rem/agentic/README.md +760 -0
  3. rem/agentic/__init__.py +54 -0
  4. rem/agentic/agents/README.md +155 -0
  5. rem/agentic/agents/__init__.py +38 -0
  6. rem/agentic/agents/agent_manager.py +311 -0
  7. rem/agentic/agents/sse_simulator.py +502 -0
  8. rem/agentic/context.py +425 -0
  9. rem/agentic/context_builder.py +360 -0
  10. rem/agentic/llm_provider_models.py +301 -0
  11. rem/agentic/mcp/__init__.py +0 -0
  12. rem/agentic/mcp/tool_wrapper.py +273 -0
  13. rem/agentic/otel/__init__.py +5 -0
  14. rem/agentic/otel/setup.py +240 -0
  15. rem/agentic/providers/phoenix.py +926 -0
  16. rem/agentic/providers/pydantic_ai.py +854 -0
  17. rem/agentic/query.py +117 -0
  18. rem/agentic/query_helper.py +89 -0
  19. rem/agentic/schema.py +737 -0
  20. rem/agentic/serialization.py +245 -0
  21. rem/agentic/tools/__init__.py +5 -0
  22. rem/agentic/tools/rem_tools.py +242 -0
  23. rem/api/README.md +657 -0
  24. rem/api/deps.py +253 -0
  25. rem/api/main.py +460 -0
  26. rem/api/mcp_router/prompts.py +182 -0
  27. rem/api/mcp_router/resources.py +820 -0
  28. rem/api/mcp_router/server.py +243 -0
  29. rem/api/mcp_router/tools.py +1605 -0
  30. rem/api/middleware/tracking.py +172 -0
  31. rem/api/routers/admin.py +520 -0
  32. rem/api/routers/auth.py +898 -0
  33. rem/api/routers/chat/__init__.py +5 -0
  34. rem/api/routers/chat/child_streaming.py +394 -0
  35. rem/api/routers/chat/completions.py +702 -0
  36. rem/api/routers/chat/json_utils.py +76 -0
  37. rem/api/routers/chat/models.py +202 -0
  38. rem/api/routers/chat/otel_utils.py +33 -0
  39. rem/api/routers/chat/sse_events.py +546 -0
  40. rem/api/routers/chat/streaming.py +950 -0
  41. rem/api/routers/chat/streaming_utils.py +327 -0
  42. rem/api/routers/common.py +18 -0
  43. rem/api/routers/dev.py +87 -0
  44. rem/api/routers/feedback.py +276 -0
  45. rem/api/routers/messages.py +620 -0
  46. rem/api/routers/models.py +86 -0
  47. rem/api/routers/query.py +362 -0
  48. rem/api/routers/shared_sessions.py +422 -0
  49. rem/auth/README.md +258 -0
  50. rem/auth/__init__.py +36 -0
  51. rem/auth/jwt.py +367 -0
  52. rem/auth/middleware.py +318 -0
  53. rem/auth/providers/__init__.py +16 -0
  54. rem/auth/providers/base.py +376 -0
  55. rem/auth/providers/email.py +215 -0
  56. rem/auth/providers/google.py +163 -0
  57. rem/auth/providers/microsoft.py +237 -0
  58. rem/cli/README.md +517 -0
  59. rem/cli/__init__.py +8 -0
  60. rem/cli/commands/README.md +299 -0
  61. rem/cli/commands/__init__.py +3 -0
  62. rem/cli/commands/ask.py +549 -0
  63. rem/cli/commands/cluster.py +1808 -0
  64. rem/cli/commands/configure.py +495 -0
  65. rem/cli/commands/db.py +828 -0
  66. rem/cli/commands/dreaming.py +324 -0
  67. rem/cli/commands/experiments.py +1698 -0
  68. rem/cli/commands/mcp.py +66 -0
  69. rem/cli/commands/process.py +388 -0
  70. rem/cli/commands/query.py +109 -0
  71. rem/cli/commands/scaffold.py +47 -0
  72. rem/cli/commands/schema.py +230 -0
  73. rem/cli/commands/serve.py +106 -0
  74. rem/cli/commands/session.py +453 -0
  75. rem/cli/dreaming.py +363 -0
  76. rem/cli/main.py +123 -0
  77. rem/config.py +244 -0
  78. rem/mcp_server.py +41 -0
  79. rem/models/core/__init__.py +49 -0
  80. rem/models/core/core_model.py +70 -0
  81. rem/models/core/engram.py +333 -0
  82. rem/models/core/experiment.py +672 -0
  83. rem/models/core/inline_edge.py +132 -0
  84. rem/models/core/rem_query.py +246 -0
  85. rem/models/entities/__init__.py +68 -0
  86. rem/models/entities/domain_resource.py +38 -0
  87. rem/models/entities/feedback.py +123 -0
  88. rem/models/entities/file.py +57 -0
  89. rem/models/entities/image_resource.py +88 -0
  90. rem/models/entities/message.py +64 -0
  91. rem/models/entities/moment.py +123 -0
  92. rem/models/entities/ontology.py +181 -0
  93. rem/models/entities/ontology_config.py +131 -0
  94. rem/models/entities/resource.py +95 -0
  95. rem/models/entities/schema.py +87 -0
  96. rem/models/entities/session.py +84 -0
  97. rem/models/entities/shared_session.py +180 -0
  98. rem/models/entities/subscriber.py +175 -0
  99. rem/models/entities/user.py +93 -0
  100. rem/py.typed +0 -0
  101. rem/registry.py +373 -0
  102. rem/schemas/README.md +507 -0
  103. rem/schemas/__init__.py +6 -0
  104. rem/schemas/agents/README.md +92 -0
  105. rem/schemas/agents/core/agent-builder.yaml +235 -0
  106. rem/schemas/agents/core/moment-builder.yaml +178 -0
  107. rem/schemas/agents/core/rem-query-agent.yaml +226 -0
  108. rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
  109. rem/schemas/agents/core/simple-assistant.yaml +19 -0
  110. rem/schemas/agents/core/user-profile-builder.yaml +163 -0
  111. rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
  112. rem/schemas/agents/examples/contract-extractor.yaml +134 -0
  113. rem/schemas/agents/examples/cv-parser.yaml +263 -0
  114. rem/schemas/agents/examples/hello-world.yaml +37 -0
  115. rem/schemas/agents/examples/query.yaml +54 -0
  116. rem/schemas/agents/examples/simple.yaml +21 -0
  117. rem/schemas/agents/examples/test.yaml +29 -0
  118. rem/schemas/agents/rem.yaml +132 -0
  119. rem/schemas/evaluators/hello-world/default.yaml +77 -0
  120. rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
  121. rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
  122. rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
  123. rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
  124. rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
  125. rem/services/__init__.py +18 -0
  126. rem/services/audio/INTEGRATION.md +308 -0
  127. rem/services/audio/README.md +376 -0
  128. rem/services/audio/__init__.py +15 -0
  129. rem/services/audio/chunker.py +354 -0
  130. rem/services/audio/transcriber.py +259 -0
  131. rem/services/content/README.md +1269 -0
  132. rem/services/content/__init__.py +5 -0
  133. rem/services/content/providers.py +760 -0
  134. rem/services/content/service.py +762 -0
  135. rem/services/dreaming/README.md +230 -0
  136. rem/services/dreaming/__init__.py +53 -0
  137. rem/services/dreaming/affinity_service.py +322 -0
  138. rem/services/dreaming/moment_service.py +251 -0
  139. rem/services/dreaming/ontology_service.py +54 -0
  140. rem/services/dreaming/user_model_service.py +297 -0
  141. rem/services/dreaming/utils.py +39 -0
  142. rem/services/email/__init__.py +10 -0
  143. rem/services/email/service.py +522 -0
  144. rem/services/email/templates.py +360 -0
  145. rem/services/embeddings/__init__.py +11 -0
  146. rem/services/embeddings/api.py +127 -0
  147. rem/services/embeddings/worker.py +435 -0
  148. rem/services/fs/README.md +662 -0
  149. rem/services/fs/__init__.py +62 -0
  150. rem/services/fs/examples.py +206 -0
  151. rem/services/fs/examples_paths.py +204 -0
  152. rem/services/fs/git_provider.py +935 -0
  153. rem/services/fs/local_provider.py +760 -0
  154. rem/services/fs/parsing-hooks-examples.md +172 -0
  155. rem/services/fs/paths.py +276 -0
  156. rem/services/fs/provider.py +460 -0
  157. rem/services/fs/s3_provider.py +1042 -0
  158. rem/services/fs/service.py +186 -0
  159. rem/services/git/README.md +1075 -0
  160. rem/services/git/__init__.py +17 -0
  161. rem/services/git/service.py +469 -0
  162. rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
  163. rem/services/phoenix/README.md +453 -0
  164. rem/services/phoenix/__init__.py +46 -0
  165. rem/services/phoenix/client.py +960 -0
  166. rem/services/phoenix/config.py +88 -0
  167. rem/services/phoenix/prompt_labels.py +477 -0
  168. rem/services/postgres/README.md +757 -0
  169. rem/services/postgres/__init__.py +49 -0
  170. rem/services/postgres/diff_service.py +599 -0
  171. rem/services/postgres/migration_service.py +427 -0
  172. rem/services/postgres/programmable_diff_service.py +635 -0
  173. rem/services/postgres/pydantic_to_sqlalchemy.py +562 -0
  174. rem/services/postgres/register_type.py +353 -0
  175. rem/services/postgres/repository.py +481 -0
  176. rem/services/postgres/schema_generator.py +661 -0
  177. rem/services/postgres/service.py +802 -0
  178. rem/services/postgres/sql_builder.py +355 -0
  179. rem/services/rate_limit.py +113 -0
  180. rem/services/rem/README.md +318 -0
  181. rem/services/rem/__init__.py +23 -0
  182. rem/services/rem/exceptions.py +71 -0
  183. rem/services/rem/executor.py +293 -0
  184. rem/services/rem/parser.py +180 -0
  185. rem/services/rem/queries.py +196 -0
  186. rem/services/rem/query.py +371 -0
  187. rem/services/rem/service.py +608 -0
  188. rem/services/session/README.md +374 -0
  189. rem/services/session/__init__.py +13 -0
  190. rem/services/session/compression.py +488 -0
  191. rem/services/session/pydantic_messages.py +310 -0
  192. rem/services/session/reload.py +85 -0
  193. rem/services/user_service.py +130 -0
  194. rem/settings.py +1877 -0
  195. rem/sql/background_indexes.sql +52 -0
  196. rem/sql/migrations/001_install.sql +983 -0
  197. rem/sql/migrations/002_install_models.sql +3157 -0
  198. rem/sql/migrations/003_optional_extensions.sql +326 -0
  199. rem/sql/migrations/004_cache_system.sql +282 -0
  200. rem/sql/migrations/005_schema_update.sql +145 -0
  201. rem/sql/migrations/migrate_session_id_to_uuid.sql +45 -0
  202. rem/utils/AGENTIC_CHUNKING.md +597 -0
  203. rem/utils/README.md +628 -0
  204. rem/utils/__init__.py +61 -0
  205. rem/utils/agentic_chunking.py +622 -0
  206. rem/utils/batch_ops.py +343 -0
  207. rem/utils/chunking.py +108 -0
  208. rem/utils/clip_embeddings.py +276 -0
  209. rem/utils/constants.py +97 -0
  210. rem/utils/date_utils.py +228 -0
  211. rem/utils/dict_utils.py +98 -0
  212. rem/utils/embeddings.py +436 -0
  213. rem/utils/examples/embeddings_example.py +305 -0
  214. rem/utils/examples/sql_types_example.py +202 -0
  215. rem/utils/files.py +323 -0
  216. rem/utils/markdown.py +16 -0
  217. rem/utils/mime_types.py +158 -0
  218. rem/utils/model_helpers.py +492 -0
  219. rem/utils/schema_loader.py +649 -0
  220. rem/utils/sql_paths.py +146 -0
  221. rem/utils/sql_types.py +350 -0
  222. rem/utils/user_id.py +81 -0
  223. rem/utils/vision.py +325 -0
  224. rem/workers/README.md +506 -0
  225. rem/workers/__init__.py +7 -0
  226. rem/workers/db_listener.py +579 -0
  227. rem/workers/db_maintainer.py +74 -0
  228. rem/workers/dreaming.py +502 -0
  229. rem/workers/engram_processor.py +312 -0
  230. rem/workers/sqs_file_processor.py +193 -0
  231. rem/workers/unlogged_maintainer.py +463 -0
  232. remdb-0.3.242.dist-info/METADATA +1632 -0
  233. remdb-0.3.242.dist-info/RECORD +235 -0
  234. remdb-0.3.242.dist-info/WHEEL +4 -0
  235. remdb-0.3.242.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,3157 @@
1
+ -- REM Model Schema (install_models.sql)
2
+ -- Generated from Pydantic models
3
+ -- Source: model registry
4
+ -- Generated at: 2025-12-22T17:34:54.187339
5
+ --
6
+ -- DO NOT EDIT MANUALLY - Regenerate with: rem db schema generate
7
+ --
8
+ -- This script creates:
9
+ -- 1. Primary entity tables
10
+ -- 2. Embeddings tables (embeddings_<table>)
11
+ -- 3. KV_STORE triggers for cache maintenance
12
+ -- 4. Indexes (foreground only, background indexes separate)
13
+ -- 5. Schema table entries (for agent-like table access)
14
+
15
+ -- ============================================================================
16
+ -- PREREQUISITES CHECK
17
+ -- ============================================================================
18
+
19
+ DO $$
20
+ BEGIN
21
+ -- Check that install.sql has been run
22
+ IF NOT EXISTS (SELECT 1 FROM pg_tables WHERE tablename = 'kv_store') THEN
23
+ RAISE EXCEPTION 'KV_STORE table not found. Run migrations/001_install.sql first.';
24
+ END IF;
25
+
26
+ IF NOT EXISTS (SELECT 1 FROM pg_extension WHERE extname = 'vector') THEN
27
+ RAISE EXCEPTION 'pgvector extension not found. Run migrations/001_install.sql first.';
28
+ END IF;
29
+
30
+ RAISE NOTICE 'Prerequisites check passed';
31
+ END $$;
32
+
33
+ -- ======================================================================
34
+ -- FEEDBACKS (Model: Feedback)
35
+ -- ======================================================================
36
+
37
+ CREATE TABLE IF NOT EXISTS feedbacks (
38
+ id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
39
+ tenant_id VARCHAR(100),
40
+ user_id VARCHAR(256),
41
+ session_id VARCHAR(256) NOT NULL,
42
+ message_id VARCHAR(256),
43
+ rating INTEGER,
44
+ categories TEXT[] DEFAULT ARRAY[]::TEXT[],
45
+ comment TEXT,
46
+ trace_id VARCHAR(256),
47
+ span_id VARCHAR(256),
48
+ phoenix_synced BOOLEAN,
49
+ phoenix_annotation_id VARCHAR(256),
50
+ annotator_kind VARCHAR(256),
51
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
52
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
53
+ deleted_at TIMESTAMP,
54
+ graph_edges JSONB DEFAULT '[]'::jsonb,
55
+ metadata JSONB DEFAULT '{}'::jsonb,
56
+ tags TEXT[] DEFAULT ARRAY[]::TEXT[]
57
+ );
58
+
59
+ CREATE INDEX IF NOT EXISTS idx_feedbacks_tenant ON feedbacks (tenant_id);
60
+ CREATE INDEX IF NOT EXISTS idx_feedbacks_user ON feedbacks (user_id);
61
+ CREATE INDEX IF NOT EXISTS idx_feedbacks_graph_edges ON feedbacks USING GIN (graph_edges);
62
+ CREATE INDEX IF NOT EXISTS idx_feedbacks_metadata ON feedbacks USING GIN (metadata);
63
+ CREATE INDEX IF NOT EXISTS idx_feedbacks_tags ON feedbacks USING GIN (tags);
64
+
65
+ -- KV_STORE trigger for feedbacks
66
+ -- Trigger function to maintain KV_STORE for feedbacks
67
+ CREATE OR REPLACE FUNCTION fn_feedbacks_kv_store_upsert()
68
+ RETURNS TRIGGER AS $$
69
+ BEGIN
70
+ IF (TG_OP = 'DELETE') THEN
71
+ -- Remove from KV_STORE on delete
72
+ DELETE FROM kv_store
73
+ WHERE entity_id = OLD.id;
74
+ RETURN OLD;
75
+ ELSIF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN
76
+ -- Upsert to KV_STORE (O(1) lookup by entity_key)
77
+ -- tenant_id can be NULL (meaning public/shared data)
78
+ INSERT INTO kv_store (
79
+ entity_key,
80
+ entity_type,
81
+ entity_id,
82
+ tenant_id,
83
+ user_id,
84
+ metadata,
85
+ graph_edges,
86
+ updated_at
87
+ ) VALUES (
88
+ normalize_key(NEW.id::VARCHAR),
89
+ 'feedbacks',
90
+ NEW.id,
91
+ NEW.tenant_id,
92
+ NEW.user_id,
93
+ NEW.metadata,
94
+ COALESCE(NEW.graph_edges, '[]'::jsonb),
95
+ CURRENT_TIMESTAMP
96
+ )
97
+ ON CONFLICT (COALESCE(tenant_id, ''), entity_key)
98
+ DO UPDATE SET
99
+ entity_id = EXCLUDED.entity_id,
100
+ user_id = EXCLUDED.user_id,
101
+ metadata = EXCLUDED.metadata,
102
+ graph_edges = EXCLUDED.graph_edges,
103
+ updated_at = CURRENT_TIMESTAMP;
104
+
105
+ RETURN NEW;
106
+ END IF;
107
+ END;
108
+ $$ LANGUAGE plpgsql;
109
+
110
+ -- Create trigger
111
+ DROP TRIGGER IF EXISTS trg_feedbacks_kv_store ON feedbacks;
112
+ CREATE TRIGGER trg_feedbacks_kv_store
113
+ AFTER INSERT OR UPDATE OR DELETE ON feedbacks
114
+ FOR EACH ROW EXECUTE FUNCTION fn_feedbacks_kv_store_upsert();
115
+
116
+ -- ======================================================================
117
+ -- FILES (Model: File)
118
+ -- ======================================================================
119
+
120
+ CREATE TABLE IF NOT EXISTS files (
121
+ id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
122
+ tenant_id VARCHAR(100),
123
+ user_id VARCHAR(256),
124
+ name VARCHAR(256) NOT NULL,
125
+ uri VARCHAR(256) NOT NULL,
126
+ content TEXT,
127
+ timestamp VARCHAR(256),
128
+ size_bytes INTEGER,
129
+ mime_type VARCHAR(256),
130
+ processing_status VARCHAR(256),
131
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
132
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
133
+ deleted_at TIMESTAMP,
134
+ graph_edges JSONB DEFAULT '[]'::jsonb,
135
+ metadata JSONB DEFAULT '{}'::jsonb,
136
+ tags TEXT[] DEFAULT ARRAY[]::TEXT[]
137
+ );
138
+
139
+ CREATE INDEX IF NOT EXISTS idx_files_tenant ON files (tenant_id);
140
+ CREATE INDEX IF NOT EXISTS idx_files_user ON files (user_id);
141
+ CREATE INDEX IF NOT EXISTS idx_files_graph_edges ON files USING GIN (graph_edges);
142
+ CREATE INDEX IF NOT EXISTS idx_files_metadata ON files USING GIN (metadata);
143
+ CREATE INDEX IF NOT EXISTS idx_files_tags ON files USING GIN (tags);
144
+
145
+ -- Embeddings for files
146
+ CREATE TABLE IF NOT EXISTS embeddings_files (
147
+ id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
148
+ entity_id UUID NOT NULL REFERENCES files(id) ON DELETE CASCADE,
149
+ field_name VARCHAR(100) NOT NULL,
150
+ provider VARCHAR(50) NOT NULL DEFAULT 'openai',
151
+ model VARCHAR(100) NOT NULL DEFAULT 'text-embedding-3-small',
152
+ embedding vector(1536) NOT NULL,
153
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
154
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
155
+
156
+ -- Unique: one embedding per entity per field per provider
157
+ UNIQUE (entity_id, field_name, provider)
158
+ );
159
+
160
+ -- Index for entity lookup (get all embeddings for entity)
161
+ CREATE INDEX IF NOT EXISTS idx_embeddings_files_entity ON embeddings_files (entity_id);
162
+
163
+ -- Index for field + provider lookup
164
+ CREATE INDEX IF NOT EXISTS idx_embeddings_files_field_provider ON embeddings_files (field_name, provider);
165
+
166
+ -- HNSW index for vector similarity search (created in background)
167
+ -- Note: This will be created by background thread after data load
168
+ -- CREATE INDEX idx_embeddings_files_vector_hnsw ON embeddings_files
169
+ -- USING hnsw (embedding vector_cosine_ops);
170
+
171
+ -- KV_STORE trigger for files
172
+ -- Trigger function to maintain KV_STORE for files
173
+ CREATE OR REPLACE FUNCTION fn_files_kv_store_upsert()
174
+ RETURNS TRIGGER AS $$
175
+ BEGIN
176
+ IF (TG_OP = 'DELETE') THEN
177
+ -- Remove from KV_STORE on delete
178
+ DELETE FROM kv_store
179
+ WHERE entity_id = OLD.id;
180
+ RETURN OLD;
181
+ ELSIF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN
182
+ -- Upsert to KV_STORE (O(1) lookup by entity_key)
183
+ -- tenant_id can be NULL (meaning public/shared data)
184
+ INSERT INTO kv_store (
185
+ entity_key,
186
+ entity_type,
187
+ entity_id,
188
+ tenant_id,
189
+ user_id,
190
+ metadata,
191
+ graph_edges,
192
+ updated_at
193
+ ) VALUES (
194
+ normalize_key(NEW.name::VARCHAR),
195
+ 'files',
196
+ NEW.id,
197
+ NEW.tenant_id,
198
+ NEW.user_id,
199
+ NEW.metadata,
200
+ COALESCE(NEW.graph_edges, '[]'::jsonb),
201
+ CURRENT_TIMESTAMP
202
+ )
203
+ ON CONFLICT (COALESCE(tenant_id, ''), entity_key)
204
+ DO UPDATE SET
205
+ entity_id = EXCLUDED.entity_id,
206
+ user_id = EXCLUDED.user_id,
207
+ metadata = EXCLUDED.metadata,
208
+ graph_edges = EXCLUDED.graph_edges,
209
+ updated_at = CURRENT_TIMESTAMP;
210
+
211
+ RETURN NEW;
212
+ END IF;
213
+ END;
214
+ $$ LANGUAGE plpgsql;
215
+
216
+ -- Create trigger
217
+ DROP TRIGGER IF EXISTS trg_files_kv_store ON files;
218
+ CREATE TRIGGER trg_files_kv_store
219
+ AFTER INSERT OR UPDATE OR DELETE ON files
220
+ FOR EACH ROW EXECUTE FUNCTION fn_files_kv_store_upsert();
221
+
222
+ -- ======================================================================
223
+ -- IMAGE_RESOURCES (Model: ImageResource)
224
+ -- ======================================================================
225
+
226
+ CREATE TABLE IF NOT EXISTS image_resources (
227
+ id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
228
+ tenant_id VARCHAR(100),
229
+ user_id VARCHAR(256),
230
+ name VARCHAR(256),
231
+ uri VARCHAR(256),
232
+ ordinal INTEGER,
233
+ content TEXT,
234
+ timestamp TIMESTAMP,
235
+ category VARCHAR(256),
236
+ related_entities JSONB DEFAULT '{}'::jsonb,
237
+ image_width INTEGER,
238
+ image_height INTEGER,
239
+ image_format VARCHAR(256),
240
+ vision_description TEXT,
241
+ vision_provider VARCHAR(256),
242
+ vision_model VARCHAR(256),
243
+ clip_embedding JSONB,
244
+ clip_dimensions INTEGER,
245
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
246
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
247
+ deleted_at TIMESTAMP,
248
+ graph_edges JSONB DEFAULT '[]'::jsonb,
249
+ metadata JSONB DEFAULT '{}'::jsonb,
250
+ tags TEXT[] DEFAULT ARRAY[]::TEXT[]
251
+ );
252
+
253
+ CREATE INDEX IF NOT EXISTS idx_image_resources_tenant ON image_resources (tenant_id);
254
+ CREATE INDEX IF NOT EXISTS idx_image_resources_user ON image_resources (user_id);
255
+ CREATE INDEX IF NOT EXISTS idx_image_resources_graph_edges ON image_resources USING GIN (graph_edges);
256
+ CREATE INDEX IF NOT EXISTS idx_image_resources_metadata ON image_resources USING GIN (metadata);
257
+ CREATE INDEX IF NOT EXISTS idx_image_resources_tags ON image_resources USING GIN (tags);
258
+
259
+ -- Embeddings for image_resources
260
+ CREATE TABLE IF NOT EXISTS embeddings_image_resources (
261
+ id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
262
+ entity_id UUID NOT NULL REFERENCES image_resources(id) ON DELETE CASCADE,
263
+ field_name VARCHAR(100) NOT NULL,
264
+ provider VARCHAR(50) NOT NULL DEFAULT 'openai',
265
+ model VARCHAR(100) NOT NULL DEFAULT 'text-embedding-3-small',
266
+ embedding vector(1536) NOT NULL,
267
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
268
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
269
+
270
+ -- Unique: one embedding per entity per field per provider
271
+ UNIQUE (entity_id, field_name, provider)
272
+ );
273
+
274
+ -- Index for entity lookup (get all embeddings for entity)
275
+ CREATE INDEX IF NOT EXISTS idx_embeddings_image_resources_entity ON embeddings_image_resources (entity_id);
276
+
277
+ -- Index for field + provider lookup
278
+ CREATE INDEX IF NOT EXISTS idx_embeddings_image_resources_field_provider ON embeddings_image_resources (field_name, provider);
279
+
280
+ -- HNSW index for vector similarity search (created in background)
281
+ -- Note: This will be created by background thread after data load
282
+ -- CREATE INDEX idx_embeddings_image_resources_vector_hnsw ON embeddings_image_resources
283
+ -- USING hnsw (embedding vector_cosine_ops);
284
+
285
+ -- KV_STORE trigger for image_resources
286
+ -- Trigger function to maintain KV_STORE for image_resources
287
+ CREATE OR REPLACE FUNCTION fn_image_resources_kv_store_upsert()
288
+ RETURNS TRIGGER AS $$
289
+ BEGIN
290
+ IF (TG_OP = 'DELETE') THEN
291
+ -- Remove from KV_STORE on delete
292
+ DELETE FROM kv_store
293
+ WHERE entity_id = OLD.id;
294
+ RETURN OLD;
295
+ ELSIF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN
296
+ -- Upsert to KV_STORE (O(1) lookup by entity_key)
297
+ -- tenant_id can be NULL (meaning public/shared data)
298
+ INSERT INTO kv_store (
299
+ entity_key,
300
+ entity_type,
301
+ entity_id,
302
+ tenant_id,
303
+ user_id,
304
+ metadata,
305
+ graph_edges,
306
+ updated_at
307
+ ) VALUES (
308
+ normalize_key(NEW.name::VARCHAR),
309
+ 'image_resources',
310
+ NEW.id,
311
+ NEW.tenant_id,
312
+ NEW.user_id,
313
+ NEW.metadata,
314
+ COALESCE(NEW.graph_edges, '[]'::jsonb),
315
+ CURRENT_TIMESTAMP
316
+ )
317
+ ON CONFLICT (COALESCE(tenant_id, ''), entity_key)
318
+ DO UPDATE SET
319
+ entity_id = EXCLUDED.entity_id,
320
+ user_id = EXCLUDED.user_id,
321
+ metadata = EXCLUDED.metadata,
322
+ graph_edges = EXCLUDED.graph_edges,
323
+ updated_at = CURRENT_TIMESTAMP;
324
+
325
+ RETURN NEW;
326
+ END IF;
327
+ END;
328
+ $$ LANGUAGE plpgsql;
329
+
330
+ -- Create trigger
331
+ DROP TRIGGER IF EXISTS trg_image_resources_kv_store ON image_resources;
332
+ CREATE TRIGGER trg_image_resources_kv_store
333
+ AFTER INSERT OR UPDATE OR DELETE ON image_resources
334
+ FOR EACH ROW EXECUTE FUNCTION fn_image_resources_kv_store_upsert();
335
+
336
+ -- ======================================================================
337
+ -- MESSAGES (Model: Message)
338
+ -- ======================================================================
339
+
340
+ CREATE TABLE IF NOT EXISTS messages (
341
+ id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
342
+ tenant_id VARCHAR(100),
343
+ user_id VARCHAR(256),
344
+ content TEXT NOT NULL,
345
+ message_type VARCHAR(256),
346
+ session_id VARCHAR(256),
347
+ prompt TEXT,
348
+ model VARCHAR(256),
349
+ token_count INTEGER,
350
+ trace_id VARCHAR(256),
351
+ span_id VARCHAR(256),
352
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
353
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
354
+ deleted_at TIMESTAMP,
355
+ graph_edges JSONB DEFAULT '[]'::jsonb,
356
+ metadata JSONB DEFAULT '{}'::jsonb,
357
+ tags TEXT[] DEFAULT ARRAY[]::TEXT[]
358
+ );
359
+
360
+ CREATE INDEX IF NOT EXISTS idx_messages_tenant ON messages (tenant_id);
361
+ CREATE INDEX IF NOT EXISTS idx_messages_user ON messages (user_id);
362
+ CREATE INDEX IF NOT EXISTS idx_messages_graph_edges ON messages USING GIN (graph_edges);
363
+ CREATE INDEX IF NOT EXISTS idx_messages_metadata ON messages USING GIN (metadata);
364
+ CREATE INDEX IF NOT EXISTS idx_messages_tags ON messages USING GIN (tags);
365
+
366
+ -- Embeddings for messages
367
+ CREATE TABLE IF NOT EXISTS embeddings_messages (
368
+ id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
369
+ entity_id UUID NOT NULL REFERENCES messages(id) ON DELETE CASCADE,
370
+ field_name VARCHAR(100) NOT NULL,
371
+ provider VARCHAR(50) NOT NULL DEFAULT 'openai',
372
+ model VARCHAR(100) NOT NULL DEFAULT 'text-embedding-3-small',
373
+ embedding vector(1536) NOT NULL,
374
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
375
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
376
+
377
+ -- Unique: one embedding per entity per field per provider
378
+ UNIQUE (entity_id, field_name, provider)
379
+ );
380
+
381
+ -- Index for entity lookup (get all embeddings for entity)
382
+ CREATE INDEX IF NOT EXISTS idx_embeddings_messages_entity ON embeddings_messages (entity_id);
383
+
384
+ -- Index for field + provider lookup
385
+ CREATE INDEX IF NOT EXISTS idx_embeddings_messages_field_provider ON embeddings_messages (field_name, provider);
386
+
387
+ -- HNSW index for vector similarity search (created in background)
388
+ -- Note: This will be created by background thread after data load
389
+ -- CREATE INDEX idx_embeddings_messages_vector_hnsw ON embeddings_messages
390
+ -- USING hnsw (embedding vector_cosine_ops);
391
+
392
+ -- KV_STORE trigger for messages
393
+ -- Trigger function to maintain KV_STORE for messages
394
+ CREATE OR REPLACE FUNCTION fn_messages_kv_store_upsert()
395
+ RETURNS TRIGGER AS $$
396
+ BEGIN
397
+ IF (TG_OP = 'DELETE') THEN
398
+ -- Remove from KV_STORE on delete
399
+ DELETE FROM kv_store
400
+ WHERE entity_id = OLD.id;
401
+ RETURN OLD;
402
+ ELSIF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN
403
+ -- Upsert to KV_STORE (O(1) lookup by entity_key)
404
+ -- tenant_id can be NULL (meaning public/shared data)
405
+ INSERT INTO kv_store (
406
+ entity_key,
407
+ entity_type,
408
+ entity_id,
409
+ tenant_id,
410
+ user_id,
411
+ metadata,
412
+ graph_edges,
413
+ updated_at
414
+ ) VALUES (
415
+ normalize_key(NEW.id::VARCHAR),
416
+ 'messages',
417
+ NEW.id,
418
+ NEW.tenant_id,
419
+ NEW.user_id,
420
+ NEW.metadata,
421
+ COALESCE(NEW.graph_edges, '[]'::jsonb),
422
+ CURRENT_TIMESTAMP
423
+ )
424
+ ON CONFLICT (COALESCE(tenant_id, ''), entity_key)
425
+ DO UPDATE SET
426
+ entity_id = EXCLUDED.entity_id,
427
+ user_id = EXCLUDED.user_id,
428
+ metadata = EXCLUDED.metadata,
429
+ graph_edges = EXCLUDED.graph_edges,
430
+ updated_at = CURRENT_TIMESTAMP;
431
+
432
+ RETURN NEW;
433
+ END IF;
434
+ END;
435
+ $$ LANGUAGE plpgsql;
436
+
437
+ -- Create trigger
438
+ DROP TRIGGER IF EXISTS trg_messages_kv_store ON messages;
439
+ CREATE TRIGGER trg_messages_kv_store
440
+ AFTER INSERT OR UPDATE OR DELETE ON messages
441
+ FOR EACH ROW EXECUTE FUNCTION fn_messages_kv_store_upsert();
442
+
443
+ -- ======================================================================
444
+ -- MOMENTS (Model: Moment)
445
+ -- ======================================================================
446
+
447
+ CREATE TABLE IF NOT EXISTS moments (
448
+ id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
449
+ tenant_id VARCHAR(100),
450
+ user_id VARCHAR(256),
451
+ name VARCHAR(256),
452
+ moment_type VARCHAR(256),
453
+ category VARCHAR(256),
454
+ starts_timestamp TIMESTAMP NOT NULL,
455
+ ends_timestamp TIMESTAMP,
456
+ present_persons JSONB DEFAULT '{}'::jsonb,
457
+ emotion_tags TEXT[] DEFAULT ARRAY[]::TEXT[],
458
+ topic_tags TEXT[] DEFAULT ARRAY[]::TEXT[],
459
+ summary TEXT,
460
+ source_resource_ids TEXT[] DEFAULT ARRAY[]::TEXT[],
461
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
462
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
463
+ deleted_at TIMESTAMP,
464
+ graph_edges JSONB DEFAULT '[]'::jsonb,
465
+ metadata JSONB DEFAULT '{}'::jsonb,
466
+ tags TEXT[] DEFAULT ARRAY[]::TEXT[]
467
+ );
468
+
469
+ CREATE INDEX IF NOT EXISTS idx_moments_tenant ON moments (tenant_id);
470
+ CREATE INDEX IF NOT EXISTS idx_moments_user ON moments (user_id);
471
+ CREATE INDEX IF NOT EXISTS idx_moments_graph_edges ON moments USING GIN (graph_edges);
472
+ CREATE INDEX IF NOT EXISTS idx_moments_metadata ON moments USING GIN (metadata);
473
+ CREATE INDEX IF NOT EXISTS idx_moments_tags ON moments USING GIN (tags);
474
+
475
+ -- Embeddings for moments
476
+ CREATE TABLE IF NOT EXISTS embeddings_moments (
477
+ id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
478
+ entity_id UUID NOT NULL REFERENCES moments(id) ON DELETE CASCADE,
479
+ field_name VARCHAR(100) NOT NULL,
480
+ provider VARCHAR(50) NOT NULL DEFAULT 'openai',
481
+ model VARCHAR(100) NOT NULL DEFAULT 'text-embedding-3-small',
482
+ embedding vector(1536) NOT NULL,
483
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
484
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
485
+
486
+ -- Unique: one embedding per entity per field per provider
487
+ UNIQUE (entity_id, field_name, provider)
488
+ );
489
+
490
+ -- Index for entity lookup (get all embeddings for entity)
491
+ CREATE INDEX IF NOT EXISTS idx_embeddings_moments_entity ON embeddings_moments (entity_id);
492
+
493
+ -- Index for field + provider lookup
494
+ CREATE INDEX IF NOT EXISTS idx_embeddings_moments_field_provider ON embeddings_moments (field_name, provider);
495
+
496
+ -- HNSW index for vector similarity search (created in background)
497
+ -- Note: This will be created by background thread after data load
498
+ -- CREATE INDEX idx_embeddings_moments_vector_hnsw ON embeddings_moments
499
+ -- USING hnsw (embedding vector_cosine_ops);
500
+
501
+ -- KV_STORE trigger for moments
502
+ -- Trigger function to maintain KV_STORE for moments
503
+ CREATE OR REPLACE FUNCTION fn_moments_kv_store_upsert()
504
+ RETURNS TRIGGER AS $$
505
+ BEGIN
506
+ IF (TG_OP = 'DELETE') THEN
507
+ -- Remove from KV_STORE on delete
508
+ DELETE FROM kv_store
509
+ WHERE entity_id = OLD.id;
510
+ RETURN OLD;
511
+ ELSIF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN
512
+ -- Upsert to KV_STORE (O(1) lookup by entity_key)
513
+ -- tenant_id can be NULL (meaning public/shared data)
514
+ INSERT INTO kv_store (
515
+ entity_key,
516
+ entity_type,
517
+ entity_id,
518
+ tenant_id,
519
+ user_id,
520
+ metadata,
521
+ graph_edges,
522
+ updated_at
523
+ ) VALUES (
524
+ normalize_key(NEW.name::VARCHAR),
525
+ 'moments',
526
+ NEW.id,
527
+ NEW.tenant_id,
528
+ NEW.user_id,
529
+ NEW.metadata,
530
+ COALESCE(NEW.graph_edges, '[]'::jsonb),
531
+ CURRENT_TIMESTAMP
532
+ )
533
+ ON CONFLICT (COALESCE(tenant_id, ''), entity_key)
534
+ DO UPDATE SET
535
+ entity_id = EXCLUDED.entity_id,
536
+ user_id = EXCLUDED.user_id,
537
+ metadata = EXCLUDED.metadata,
538
+ graph_edges = EXCLUDED.graph_edges,
539
+ updated_at = CURRENT_TIMESTAMP;
540
+
541
+ RETURN NEW;
542
+ END IF;
543
+ END;
544
+ $$ LANGUAGE plpgsql;
545
+
546
+ -- Create trigger
547
+ DROP TRIGGER IF EXISTS trg_moments_kv_store ON moments;
548
+ CREATE TRIGGER trg_moments_kv_store
549
+ AFTER INSERT OR UPDATE OR DELETE ON moments
550
+ FOR EACH ROW EXECUTE FUNCTION fn_moments_kv_store_upsert();
551
+
552
+ -- ======================================================================
553
+ -- ONTOLOGIES (Model: Ontology)
554
+ -- ======================================================================
555
+
556
+ CREATE TABLE IF NOT EXISTS ontologies (
557
+ id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
558
+ tenant_id VARCHAR(100),
559
+ user_id VARCHAR(256),
560
+ name VARCHAR(256) NOT NULL,
561
+ uri VARCHAR(256),
562
+ file_id UUID,
563
+ agent_schema_id VARCHAR(256),
564
+ provider_name VARCHAR(256),
565
+ model_name VARCHAR(256),
566
+ extracted_data JSONB,
567
+ confidence_score DOUBLE PRECISION,
568
+ extraction_timestamp VARCHAR(256),
569
+ content TEXT,
570
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
571
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
572
+ deleted_at TIMESTAMP,
573
+ graph_edges JSONB DEFAULT '[]'::jsonb,
574
+ metadata JSONB DEFAULT '{}'::jsonb,
575
+ tags TEXT[] DEFAULT ARRAY[]::TEXT[]
576
+ );
577
+
578
+ CREATE INDEX IF NOT EXISTS idx_ontologies_tenant ON ontologies (tenant_id);
579
+ CREATE INDEX IF NOT EXISTS idx_ontologies_user ON ontologies (user_id);
580
+ CREATE INDEX IF NOT EXISTS idx_ontologies_graph_edges ON ontologies USING GIN (graph_edges);
581
+ CREATE INDEX IF NOT EXISTS idx_ontologies_metadata ON ontologies USING GIN (metadata);
582
+ CREATE INDEX IF NOT EXISTS idx_ontologies_tags ON ontologies USING GIN (tags);
583
+
584
+ -- Embeddings for ontologies
585
+ CREATE TABLE IF NOT EXISTS embeddings_ontologies (
586
+ id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
587
+ entity_id UUID NOT NULL REFERENCES ontologies(id) ON DELETE CASCADE,
588
+ field_name VARCHAR(100) NOT NULL,
589
+ provider VARCHAR(50) NOT NULL DEFAULT 'openai',
590
+ model VARCHAR(100) NOT NULL DEFAULT 'text-embedding-3-small',
591
+ embedding vector(1536) NOT NULL,
592
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
593
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
594
+
595
+ -- Unique: one embedding per entity per field per provider
596
+ UNIQUE (entity_id, field_name, provider)
597
+ );
598
+
599
+ -- Index for entity lookup (get all embeddings for entity)
600
+ CREATE INDEX IF NOT EXISTS idx_embeddings_ontologies_entity ON embeddings_ontologies (entity_id);
601
+
602
+ -- Index for field + provider lookup
603
+ CREATE INDEX IF NOT EXISTS idx_embeddings_ontologies_field_provider ON embeddings_ontologies (field_name, provider);
604
+
605
+ -- HNSW index for vector similarity search (created in background)
606
+ -- Note: This will be created by background thread after data load
607
+ -- CREATE INDEX idx_embeddings_ontologies_vector_hnsw ON embeddings_ontologies
608
+ -- USING hnsw (embedding vector_cosine_ops);
609
+
610
+ -- KV_STORE trigger for ontologies
611
+ -- Trigger function to maintain KV_STORE for ontologies
612
+ CREATE OR REPLACE FUNCTION fn_ontologies_kv_store_upsert()
613
+ RETURNS TRIGGER AS $$
614
+ BEGIN
615
+ IF (TG_OP = 'DELETE') THEN
616
+ -- Remove from KV_STORE on delete
617
+ DELETE FROM kv_store
618
+ WHERE entity_id = OLD.id;
619
+ RETURN OLD;
620
+ ELSIF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN
621
+ -- Upsert to KV_STORE (O(1) lookup by entity_key)
622
+ -- tenant_id can be NULL (meaning public/shared data)
623
+ INSERT INTO kv_store (
624
+ entity_key,
625
+ entity_type,
626
+ entity_id,
627
+ tenant_id,
628
+ user_id,
629
+ metadata,
630
+ graph_edges,
631
+ updated_at
632
+ ) VALUES (
633
+ normalize_key(NEW.name::VARCHAR),
634
+ 'ontologies',
635
+ NEW.id,
636
+ NEW.tenant_id,
637
+ NEW.user_id,
638
+ NEW.metadata,
639
+ COALESCE(NEW.graph_edges, '[]'::jsonb),
640
+ CURRENT_TIMESTAMP
641
+ )
642
+ ON CONFLICT (COALESCE(tenant_id, ''), entity_key)
643
+ DO UPDATE SET
644
+ entity_id = EXCLUDED.entity_id,
645
+ user_id = EXCLUDED.user_id,
646
+ metadata = EXCLUDED.metadata,
647
+ graph_edges = EXCLUDED.graph_edges,
648
+ updated_at = CURRENT_TIMESTAMP;
649
+
650
+ RETURN NEW;
651
+ END IF;
652
+ END;
653
+ $$ LANGUAGE plpgsql;
654
+
655
+ -- Create trigger
656
+ DROP TRIGGER IF EXISTS trg_ontologies_kv_store ON ontologies;
657
+ CREATE TRIGGER trg_ontologies_kv_store
658
+ AFTER INSERT OR UPDATE OR DELETE ON ontologies
659
+ FOR EACH ROW EXECUTE FUNCTION fn_ontologies_kv_store_upsert();
660
+
661
+ -- ======================================================================
662
+ -- ONTOLOGY_CONFIGS (Model: OntologyConfig)
663
+ -- ======================================================================
664
+
665
+ CREATE TABLE IF NOT EXISTS ontology_configs (
666
+ id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
667
+ tenant_id VARCHAR(100),
668
+ user_id VARCHAR(256),
669
+ name VARCHAR(256) NOT NULL,
670
+ agent_schema_id VARCHAR(256) NOT NULL,
671
+ description TEXT,
672
+ mime_type_pattern VARCHAR(256),
673
+ uri_pattern VARCHAR(256),
674
+ tag_filter TEXT[],
675
+ priority INTEGER,
676
+ enabled BOOLEAN,
677
+ provider_name VARCHAR(256),
678
+ model_name VARCHAR(256),
679
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
680
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
681
+ deleted_at TIMESTAMP,
682
+ graph_edges JSONB DEFAULT '[]'::jsonb,
683
+ metadata JSONB DEFAULT '{}'::jsonb,
684
+ tags TEXT[] DEFAULT ARRAY[]::TEXT[]
685
+ );
686
+
687
+ CREATE INDEX IF NOT EXISTS idx_ontology_configs_tenant ON ontology_configs (tenant_id);
688
+ CREATE INDEX IF NOT EXISTS idx_ontology_configs_user ON ontology_configs (user_id);
689
+ CREATE INDEX IF NOT EXISTS idx_ontology_configs_graph_edges ON ontology_configs USING GIN (graph_edges);
690
+ CREATE INDEX IF NOT EXISTS idx_ontology_configs_metadata ON ontology_configs USING GIN (metadata);
691
+ CREATE INDEX IF NOT EXISTS idx_ontology_configs_tags ON ontology_configs USING GIN (tags);
692
+
693
+ -- Embeddings for ontology_configs
694
+ CREATE TABLE IF NOT EXISTS embeddings_ontology_configs (
695
+ id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
696
+ entity_id UUID NOT NULL REFERENCES ontology_configs(id) ON DELETE CASCADE,
697
+ field_name VARCHAR(100) NOT NULL,
698
+ provider VARCHAR(50) NOT NULL DEFAULT 'openai',
699
+ model VARCHAR(100) NOT NULL DEFAULT 'text-embedding-3-small',
700
+ embedding vector(1536) NOT NULL,
701
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
702
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
703
+
704
+ -- Unique: one embedding per entity per field per provider
705
+ UNIQUE (entity_id, field_name, provider)
706
+ );
707
+
708
+ -- Index for entity lookup (get all embeddings for entity)
709
+ CREATE INDEX IF NOT EXISTS idx_embeddings_ontology_configs_entity ON embeddings_ontology_configs (entity_id);
710
+
711
+ -- Index for field + provider lookup
712
+ CREATE INDEX IF NOT EXISTS idx_embeddings_ontology_configs_field_provider ON embeddings_ontology_configs (field_name, provider);
713
+
714
+ -- HNSW index for vector similarity search (created in background)
715
+ -- Note: This will be created by background thread after data load
716
+ -- CREATE INDEX idx_embeddings_ontology_configs_vector_hnsw ON embeddings_ontology_configs
717
+ -- USING hnsw (embedding vector_cosine_ops);
718
+
719
+ -- KV_STORE trigger for ontology_configs
720
+ -- Trigger function to maintain KV_STORE for ontology_configs
721
+ CREATE OR REPLACE FUNCTION fn_ontology_configs_kv_store_upsert()
722
+ RETURNS TRIGGER AS $$
723
+ BEGIN
724
+ IF (TG_OP = 'DELETE') THEN
725
+ -- Remove from KV_STORE on delete
726
+ DELETE FROM kv_store
727
+ WHERE entity_id = OLD.id;
728
+ RETURN OLD;
729
+ ELSIF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN
730
+ -- Upsert to KV_STORE (O(1) lookup by entity_key)
731
+ -- tenant_id can be NULL (meaning public/shared data)
732
+ INSERT INTO kv_store (
733
+ entity_key,
734
+ entity_type,
735
+ entity_id,
736
+ tenant_id,
737
+ user_id,
738
+ metadata,
739
+ graph_edges,
740
+ updated_at
741
+ ) VALUES (
742
+ normalize_key(NEW.name::VARCHAR),
743
+ 'ontology_configs',
744
+ NEW.id,
745
+ NEW.tenant_id,
746
+ NEW.user_id,
747
+ NEW.metadata,
748
+ COALESCE(NEW.graph_edges, '[]'::jsonb),
749
+ CURRENT_TIMESTAMP
750
+ )
751
+ ON CONFLICT (COALESCE(tenant_id, ''), entity_key)
752
+ DO UPDATE SET
753
+ entity_id = EXCLUDED.entity_id,
754
+ user_id = EXCLUDED.user_id,
755
+ metadata = EXCLUDED.metadata,
756
+ graph_edges = EXCLUDED.graph_edges,
757
+ updated_at = CURRENT_TIMESTAMP;
758
+
759
+ RETURN NEW;
760
+ END IF;
761
+ END;
762
+ $$ LANGUAGE plpgsql;
763
+
764
+ -- Create trigger
765
+ DROP TRIGGER IF EXISTS trg_ontology_configs_kv_store ON ontology_configs;
766
+ CREATE TRIGGER trg_ontology_configs_kv_store
767
+ AFTER INSERT OR UPDATE OR DELETE ON ontology_configs
768
+ FOR EACH ROW EXECUTE FUNCTION fn_ontology_configs_kv_store_upsert();
769
+
770
+ -- ======================================================================
771
+ -- RESOURCES (Model: Resource)
772
+ -- ======================================================================
773
+
774
+ CREATE TABLE IF NOT EXISTS resources (
775
+ id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
776
+ tenant_id VARCHAR(100),
777
+ user_id VARCHAR(256),
778
+ name VARCHAR(256),
779
+ uri VARCHAR(256),
780
+ ordinal INTEGER,
781
+ content TEXT,
782
+ timestamp TIMESTAMP,
783
+ category VARCHAR(256),
784
+ related_entities JSONB DEFAULT '{}'::jsonb,
785
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
786
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
787
+ deleted_at TIMESTAMP,
788
+ graph_edges JSONB DEFAULT '[]'::jsonb,
789
+ metadata JSONB DEFAULT '{}'::jsonb,
790
+ tags TEXT[] DEFAULT ARRAY[]::TEXT[]
791
+ );
792
+
793
+ CREATE INDEX IF NOT EXISTS idx_resources_tenant ON resources (tenant_id);
794
+ CREATE INDEX IF NOT EXISTS idx_resources_user ON resources (user_id);
795
+ CREATE INDEX IF NOT EXISTS idx_resources_graph_edges ON resources USING GIN (graph_edges);
796
+ CREATE INDEX IF NOT EXISTS idx_resources_metadata ON resources USING GIN (metadata);
797
+ CREATE INDEX IF NOT EXISTS idx_resources_tags ON resources USING GIN (tags);
798
+
799
+ -- Embeddings for resources
800
+ CREATE TABLE IF NOT EXISTS embeddings_resources (
801
+ id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
802
+ entity_id UUID NOT NULL REFERENCES resources(id) ON DELETE CASCADE,
803
+ field_name VARCHAR(100) NOT NULL,
804
+ provider VARCHAR(50) NOT NULL DEFAULT 'openai',
805
+ model VARCHAR(100) NOT NULL DEFAULT 'text-embedding-3-small',
806
+ embedding vector(1536) NOT NULL,
807
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
808
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
809
+
810
+ -- Unique: one embedding per entity per field per provider
811
+ UNIQUE (entity_id, field_name, provider)
812
+ );
813
+
814
+ -- Index for entity lookup (get all embeddings for entity)
815
+ CREATE INDEX IF NOT EXISTS idx_embeddings_resources_entity ON embeddings_resources (entity_id);
816
+
817
+ -- Index for field + provider lookup
818
+ CREATE INDEX IF NOT EXISTS idx_embeddings_resources_field_provider ON embeddings_resources (field_name, provider);
819
+
820
+ -- HNSW index for vector similarity search (created in background)
821
+ -- Note: This will be created by background thread after data load
822
+ -- CREATE INDEX idx_embeddings_resources_vector_hnsw ON embeddings_resources
823
+ -- USING hnsw (embedding vector_cosine_ops);
824
+
825
+ -- KV_STORE trigger for resources
826
+ -- Trigger function to maintain KV_STORE for resources
827
+ CREATE OR REPLACE FUNCTION fn_resources_kv_store_upsert()
828
+ RETURNS TRIGGER AS $$
829
+ BEGIN
830
+ IF (TG_OP = 'DELETE') THEN
831
+ -- Remove from KV_STORE on delete
832
+ DELETE FROM kv_store
833
+ WHERE entity_id = OLD.id;
834
+ RETURN OLD;
835
+ ELSIF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN
836
+ -- Upsert to KV_STORE (O(1) lookup by entity_key)
837
+ -- tenant_id can be NULL (meaning public/shared data)
838
+ INSERT INTO kv_store (
839
+ entity_key,
840
+ entity_type,
841
+ entity_id,
842
+ tenant_id,
843
+ user_id,
844
+ metadata,
845
+ graph_edges,
846
+ updated_at
847
+ ) VALUES (
848
+ normalize_key(NEW.name::VARCHAR),
849
+ 'resources',
850
+ NEW.id,
851
+ NEW.tenant_id,
852
+ NEW.user_id,
853
+ NEW.metadata,
854
+ COALESCE(NEW.graph_edges, '[]'::jsonb),
855
+ CURRENT_TIMESTAMP
856
+ )
857
+ ON CONFLICT (COALESCE(tenant_id, ''), entity_key)
858
+ DO UPDATE SET
859
+ entity_id = EXCLUDED.entity_id,
860
+ user_id = EXCLUDED.user_id,
861
+ metadata = EXCLUDED.metadata,
862
+ graph_edges = EXCLUDED.graph_edges,
863
+ updated_at = CURRENT_TIMESTAMP;
864
+
865
+ RETURN NEW;
866
+ END IF;
867
+ END;
868
+ $$ LANGUAGE plpgsql;
869
+
870
+ -- Create trigger
871
+ DROP TRIGGER IF EXISTS trg_resources_kv_store ON resources;
872
+ CREATE TRIGGER trg_resources_kv_store
873
+ AFTER INSERT OR UPDATE OR DELETE ON resources
874
+ FOR EACH ROW EXECUTE FUNCTION fn_resources_kv_store_upsert();
875
+
876
+ -- ======================================================================
877
+ -- SCHEMAS (Model: Schema)
878
+ -- ======================================================================
879
+
880
+ CREATE TABLE IF NOT EXISTS schemas (
881
+ id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
882
+ tenant_id VARCHAR(100),
883
+ user_id VARCHAR(256),
884
+ name VARCHAR(256) NOT NULL,
885
+ content TEXT,
886
+ spec JSONB NOT NULL,
887
+ category VARCHAR(256),
888
+ provider_configs JSONB DEFAULT '{}'::jsonb,
889
+ embedding_fields TEXT[] DEFAULT ARRAY[]::TEXT[],
890
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
891
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
892
+ deleted_at TIMESTAMP,
893
+ graph_edges JSONB DEFAULT '[]'::jsonb,
894
+ metadata JSONB DEFAULT '{}'::jsonb,
895
+ tags TEXT[] DEFAULT ARRAY[]::TEXT[]
896
+ );
897
+
898
+ CREATE INDEX IF NOT EXISTS idx_schemas_tenant ON schemas (tenant_id);
899
+ CREATE INDEX IF NOT EXISTS idx_schemas_user ON schemas (user_id);
900
+ CREATE INDEX IF NOT EXISTS idx_schemas_graph_edges ON schemas USING GIN (graph_edges);
901
+ CREATE INDEX IF NOT EXISTS idx_schemas_metadata ON schemas USING GIN (metadata);
902
+ CREATE INDEX IF NOT EXISTS idx_schemas_tags ON schemas USING GIN (tags);
903
+
904
+ -- Embeddings for schemas
905
+ CREATE TABLE IF NOT EXISTS embeddings_schemas (
906
+ id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
907
+ entity_id UUID NOT NULL REFERENCES schemas(id) ON DELETE CASCADE,
908
+ field_name VARCHAR(100) NOT NULL,
909
+ provider VARCHAR(50) NOT NULL DEFAULT 'openai',
910
+ model VARCHAR(100) NOT NULL DEFAULT 'text-embedding-3-small',
911
+ embedding vector(1536) NOT NULL,
912
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
913
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
914
+
915
+ -- Unique: one embedding per entity per field per provider
916
+ UNIQUE (entity_id, field_name, provider)
917
+ );
918
+
919
+ -- Index for entity lookup (get all embeddings for entity)
920
+ CREATE INDEX IF NOT EXISTS idx_embeddings_schemas_entity ON embeddings_schemas (entity_id);
921
+
922
+ -- Index for field + provider lookup
923
+ CREATE INDEX IF NOT EXISTS idx_embeddings_schemas_field_provider ON embeddings_schemas (field_name, provider);
924
+
925
+ -- HNSW index for vector similarity search (created in background)
926
+ -- Note: This will be created by background thread after data load
927
+ -- CREATE INDEX idx_embeddings_schemas_vector_hnsw ON embeddings_schemas
928
+ -- USING hnsw (embedding vector_cosine_ops);
929
+
930
+ -- KV_STORE trigger for schemas
931
+ -- Trigger function to maintain KV_STORE for schemas
932
+ CREATE OR REPLACE FUNCTION fn_schemas_kv_store_upsert()
933
+ RETURNS TRIGGER AS $$
934
+ BEGIN
935
+ IF (TG_OP = 'DELETE') THEN
936
+ -- Remove from KV_STORE on delete
937
+ DELETE FROM kv_store
938
+ WHERE entity_id = OLD.id;
939
+ RETURN OLD;
940
+ ELSIF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN
941
+ -- Upsert to KV_STORE (O(1) lookup by entity_key)
942
+ -- tenant_id can be NULL (meaning public/shared data)
943
+ INSERT INTO kv_store (
944
+ entity_key,
945
+ entity_type,
946
+ entity_id,
947
+ tenant_id,
948
+ user_id,
949
+ metadata,
950
+ graph_edges,
951
+ updated_at
952
+ ) VALUES (
953
+ normalize_key(NEW.name::VARCHAR),
954
+ 'schemas',
955
+ NEW.id,
956
+ NEW.tenant_id,
957
+ NEW.user_id,
958
+ NEW.metadata,
959
+ COALESCE(NEW.graph_edges, '[]'::jsonb),
960
+ CURRENT_TIMESTAMP
961
+ )
962
+ ON CONFLICT (COALESCE(tenant_id, ''), entity_key)
963
+ DO UPDATE SET
964
+ entity_id = EXCLUDED.entity_id,
965
+ user_id = EXCLUDED.user_id,
966
+ metadata = EXCLUDED.metadata,
967
+ graph_edges = EXCLUDED.graph_edges,
968
+ updated_at = CURRENT_TIMESTAMP;
969
+
970
+ RETURN NEW;
971
+ END IF;
972
+ END;
973
+ $$ LANGUAGE plpgsql;
974
+
975
+ -- Create trigger
976
+ DROP TRIGGER IF EXISTS trg_schemas_kv_store ON schemas;
977
+ CREATE TRIGGER trg_schemas_kv_store
978
+ AFTER INSERT OR UPDATE OR DELETE ON schemas
979
+ FOR EACH ROW EXECUTE FUNCTION fn_schemas_kv_store_upsert();
980
+
981
+ -- ======================================================================
982
+ -- SESSIONS (Model: Session)
983
+ -- ======================================================================
984
+
985
+ CREATE TABLE IF NOT EXISTS sessions (
986
+ id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
987
+ tenant_id VARCHAR(100),
988
+ user_id VARCHAR(256),
989
+ name VARCHAR(256) NOT NULL,
990
+ mode TEXT,
991
+ description TEXT,
992
+ original_trace_id VARCHAR(256),
993
+ settings_overrides JSONB,
994
+ prompt TEXT,
995
+ agent_schema_uri VARCHAR(256),
996
+ message_count INTEGER,
997
+ total_tokens INTEGER,
998
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
999
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
1000
+ deleted_at TIMESTAMP,
1001
+ graph_edges JSONB DEFAULT '[]'::jsonb,
1002
+ metadata JSONB DEFAULT '{}'::jsonb,
1003
+ tags TEXT[] DEFAULT ARRAY[]::TEXT[]
1004
+ );
1005
+
1006
+ CREATE INDEX IF NOT EXISTS idx_sessions_tenant ON sessions (tenant_id);
1007
+ CREATE INDEX IF NOT EXISTS idx_sessions_user ON sessions (user_id);
1008
+ CREATE INDEX IF NOT EXISTS idx_sessions_graph_edges ON sessions USING GIN (graph_edges);
1009
+ CREATE INDEX IF NOT EXISTS idx_sessions_metadata ON sessions USING GIN (metadata);
1010
+ CREATE INDEX IF NOT EXISTS idx_sessions_tags ON sessions USING GIN (tags);
1011
+
1012
+ -- Embeddings for sessions
1013
+ CREATE TABLE IF NOT EXISTS embeddings_sessions (
1014
+ id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
1015
+ entity_id UUID NOT NULL REFERENCES sessions(id) ON DELETE CASCADE,
1016
+ field_name VARCHAR(100) NOT NULL,
1017
+ provider VARCHAR(50) NOT NULL DEFAULT 'openai',
1018
+ model VARCHAR(100) NOT NULL DEFAULT 'text-embedding-3-small',
1019
+ embedding vector(1536) NOT NULL,
1020
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
1021
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
1022
+
1023
+ -- Unique: one embedding per entity per field per provider
1024
+ UNIQUE (entity_id, field_name, provider)
1025
+ );
1026
+
1027
+ -- Index for entity lookup (get all embeddings for entity)
1028
+ CREATE INDEX IF NOT EXISTS idx_embeddings_sessions_entity ON embeddings_sessions (entity_id);
1029
+
1030
+ -- Index for field + provider lookup
1031
+ CREATE INDEX IF NOT EXISTS idx_embeddings_sessions_field_provider ON embeddings_sessions (field_name, provider);
1032
+
1033
+ -- HNSW index for vector similarity search (created in background)
1034
+ -- Note: This will be created by background thread after data load
1035
+ -- CREATE INDEX idx_embeddings_sessions_vector_hnsw ON embeddings_sessions
1036
+ -- USING hnsw (embedding vector_cosine_ops);
1037
+
1038
+ -- KV_STORE trigger for sessions
1039
+ -- Trigger function to maintain KV_STORE for sessions
1040
+ CREATE OR REPLACE FUNCTION fn_sessions_kv_store_upsert()
1041
+ RETURNS TRIGGER AS $$
1042
+ BEGIN
1043
+ IF (TG_OP = 'DELETE') THEN
1044
+ -- Remove from KV_STORE on delete
1045
+ DELETE FROM kv_store
1046
+ WHERE entity_id = OLD.id;
1047
+ RETURN OLD;
1048
+ ELSIF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN
1049
+ -- Upsert to KV_STORE (O(1) lookup by entity_key)
1050
+ -- tenant_id can be NULL (meaning public/shared data)
1051
+ INSERT INTO kv_store (
1052
+ entity_key,
1053
+ entity_type,
1054
+ entity_id,
1055
+ tenant_id,
1056
+ user_id,
1057
+ metadata,
1058
+ graph_edges,
1059
+ updated_at
1060
+ ) VALUES (
1061
+ normalize_key(NEW.name::VARCHAR),
1062
+ 'sessions',
1063
+ NEW.id,
1064
+ NEW.tenant_id,
1065
+ NEW.user_id,
1066
+ NEW.metadata,
1067
+ COALESCE(NEW.graph_edges, '[]'::jsonb),
1068
+ CURRENT_TIMESTAMP
1069
+ )
1070
+ ON CONFLICT (COALESCE(tenant_id, ''), entity_key)
1071
+ DO UPDATE SET
1072
+ entity_id = EXCLUDED.entity_id,
1073
+ user_id = EXCLUDED.user_id,
1074
+ metadata = EXCLUDED.metadata,
1075
+ graph_edges = EXCLUDED.graph_edges,
1076
+ updated_at = CURRENT_TIMESTAMP;
1077
+
1078
+ RETURN NEW;
1079
+ END IF;
1080
+ END;
1081
+ $$ LANGUAGE plpgsql;
1082
+
1083
+ -- Create trigger
1084
+ DROP TRIGGER IF EXISTS trg_sessions_kv_store ON sessions;
1085
+ CREATE TRIGGER trg_sessions_kv_store
1086
+ AFTER INSERT OR UPDATE OR DELETE ON sessions
1087
+ FOR EACH ROW EXECUTE FUNCTION fn_sessions_kv_store_upsert();
1088
+
1089
+ -- ======================================================================
1090
+ -- SHARED_SESSIONS (Model: SharedSession)
1091
+ -- ======================================================================
1092
+
1093
+ CREATE TABLE IF NOT EXISTS shared_sessions (
1094
+ id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
1095
+ tenant_id VARCHAR(100),
1096
+ user_id VARCHAR(256),
1097
+ session_id VARCHAR(256) NOT NULL,
1098
+ owner_user_id VARCHAR(256) NOT NULL,
1099
+ shared_with_user_id VARCHAR(256) NOT NULL,
1100
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
1101
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
1102
+ deleted_at TIMESTAMP,
1103
+ graph_edges JSONB DEFAULT '[]'::jsonb,
1104
+ metadata JSONB DEFAULT '{}'::jsonb,
1105
+ tags TEXT[] DEFAULT ARRAY[]::TEXT[]
1106
+ );
1107
+
1108
+ CREATE INDEX IF NOT EXISTS idx_shared_sessions_tenant ON shared_sessions (tenant_id);
1109
+ CREATE INDEX IF NOT EXISTS idx_shared_sessions_user ON shared_sessions (user_id);
1110
+ CREATE INDEX IF NOT EXISTS idx_shared_sessions_graph_edges ON shared_sessions USING GIN (graph_edges);
1111
+ CREATE INDEX IF NOT EXISTS idx_shared_sessions_metadata ON shared_sessions USING GIN (metadata);
1112
+ CREATE INDEX IF NOT EXISTS idx_shared_sessions_tags ON shared_sessions USING GIN (tags);
1113
+
1114
+ -- KV_STORE trigger for shared_sessions
1115
+ -- Trigger function to maintain KV_STORE for shared_sessions
1116
+ CREATE OR REPLACE FUNCTION fn_shared_sessions_kv_store_upsert()
1117
+ RETURNS TRIGGER AS $$
1118
+ BEGIN
1119
+ IF (TG_OP = 'DELETE') THEN
1120
+ -- Remove from KV_STORE on delete
1121
+ DELETE FROM kv_store
1122
+ WHERE entity_id = OLD.id;
1123
+ RETURN OLD;
1124
+ ELSIF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN
1125
+ -- Upsert to KV_STORE (O(1) lookup by entity_key)
1126
+ -- tenant_id can be NULL (meaning public/shared data)
1127
+ INSERT INTO kv_store (
1128
+ entity_key,
1129
+ entity_type,
1130
+ entity_id,
1131
+ tenant_id,
1132
+ user_id,
1133
+ metadata,
1134
+ graph_edges,
1135
+ updated_at
1136
+ ) VALUES (
1137
+ normalize_key(NEW.id::VARCHAR),
1138
+ 'shared_sessions',
1139
+ NEW.id,
1140
+ NEW.tenant_id,
1141
+ NEW.user_id,
1142
+ NEW.metadata,
1143
+ COALESCE(NEW.graph_edges, '[]'::jsonb),
1144
+ CURRENT_TIMESTAMP
1145
+ )
1146
+ ON CONFLICT (COALESCE(tenant_id, ''), entity_key)
1147
+ DO UPDATE SET
1148
+ entity_id = EXCLUDED.entity_id,
1149
+ user_id = EXCLUDED.user_id,
1150
+ metadata = EXCLUDED.metadata,
1151
+ graph_edges = EXCLUDED.graph_edges,
1152
+ updated_at = CURRENT_TIMESTAMP;
1153
+
1154
+ RETURN NEW;
1155
+ END IF;
1156
+ END;
1157
+ $$ LANGUAGE plpgsql;
1158
+
1159
+ -- Create trigger
1160
+ DROP TRIGGER IF EXISTS trg_shared_sessions_kv_store ON shared_sessions;
1161
+ CREATE TRIGGER trg_shared_sessions_kv_store
1162
+ AFTER INSERT OR UPDATE OR DELETE ON shared_sessions
1163
+ FOR EACH ROW EXECUTE FUNCTION fn_shared_sessions_kv_store_upsert();
1164
+
1165
+ -- ======================================================================
1166
+ -- USERS (Model: User)
1167
+ -- ======================================================================
1168
+
1169
+ CREATE TABLE IF NOT EXISTS users (
1170
+ id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
1171
+ tenant_id VARCHAR(100),
1172
+ user_id VARCHAR(256),
1173
+ name VARCHAR(256) NOT NULL,
1174
+ email VARCHAR(256),
1175
+ role VARCHAR(256),
1176
+ tier TEXT,
1177
+ anonymous_ids TEXT[] DEFAULT ARRAY[]::TEXT[],
1178
+ sec_policy JSONB DEFAULT '{}'::jsonb,
1179
+ summary TEXT,
1180
+ interests TEXT[] DEFAULT ARRAY[]::TEXT[],
1181
+ preferred_topics TEXT[] DEFAULT ARRAY[]::TEXT[],
1182
+ activity_level VARCHAR(256),
1183
+ last_active_at TIMESTAMP,
1184
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
1185
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
1186
+ deleted_at TIMESTAMP,
1187
+ graph_edges JSONB DEFAULT '[]'::jsonb,
1188
+ metadata JSONB DEFAULT '{}'::jsonb,
1189
+ tags TEXT[] DEFAULT ARRAY[]::TEXT[]
1190
+ );
1191
+
1192
+ CREATE INDEX IF NOT EXISTS idx_users_tenant ON users (tenant_id);
1193
+ CREATE INDEX IF NOT EXISTS idx_users_user ON users (user_id);
1194
+ CREATE INDEX IF NOT EXISTS idx_users_graph_edges ON users USING GIN (graph_edges);
1195
+ CREATE INDEX IF NOT EXISTS idx_users_metadata ON users USING GIN (metadata);
1196
+ CREATE INDEX IF NOT EXISTS idx_users_tags ON users USING GIN (tags);
1197
+
1198
+ -- Embeddings for users
1199
+ CREATE TABLE IF NOT EXISTS embeddings_users (
1200
+ id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
1201
+ entity_id UUID NOT NULL REFERENCES users(id) ON DELETE CASCADE,
1202
+ field_name VARCHAR(100) NOT NULL,
1203
+ provider VARCHAR(50) NOT NULL DEFAULT 'openai',
1204
+ model VARCHAR(100) NOT NULL DEFAULT 'text-embedding-3-small',
1205
+ embedding vector(1536) NOT NULL,
1206
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
1207
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
1208
+
1209
+ -- Unique: one embedding per entity per field per provider
1210
+ UNIQUE (entity_id, field_name, provider)
1211
+ );
1212
+
1213
+ -- Index for entity lookup (get all embeddings for entity)
1214
+ CREATE INDEX IF NOT EXISTS idx_embeddings_users_entity ON embeddings_users (entity_id);
1215
+
1216
+ -- Index for field + provider lookup
1217
+ CREATE INDEX IF NOT EXISTS idx_embeddings_users_field_provider ON embeddings_users (field_name, provider);
1218
+
1219
+ -- HNSW index for vector similarity search (created in background)
1220
+ -- Note: This will be created by background thread after data load
1221
+ -- CREATE INDEX idx_embeddings_users_vector_hnsw ON embeddings_users
1222
+ -- USING hnsw (embedding vector_cosine_ops);
1223
+
1224
+ -- KV_STORE trigger for users
1225
+ -- Trigger function to maintain KV_STORE for users
1226
+ CREATE OR REPLACE FUNCTION fn_users_kv_store_upsert()
1227
+ RETURNS TRIGGER AS $$
1228
+ BEGIN
1229
+ IF (TG_OP = 'DELETE') THEN
1230
+ -- Remove from KV_STORE on delete
1231
+ DELETE FROM kv_store
1232
+ WHERE entity_id = OLD.id;
1233
+ RETURN OLD;
1234
+ ELSIF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN
1235
+ -- Upsert to KV_STORE (O(1) lookup by entity_key)
1236
+ -- tenant_id can be NULL (meaning public/shared data)
1237
+ INSERT INTO kv_store (
1238
+ entity_key,
1239
+ entity_type,
1240
+ entity_id,
1241
+ tenant_id,
1242
+ user_id,
1243
+ metadata,
1244
+ graph_edges,
1245
+ updated_at
1246
+ ) VALUES (
1247
+ normalize_key(NEW.name::VARCHAR),
1248
+ 'users',
1249
+ NEW.id,
1250
+ NEW.tenant_id,
1251
+ NEW.user_id,
1252
+ NEW.metadata,
1253
+ COALESCE(NEW.graph_edges, '[]'::jsonb),
1254
+ CURRENT_TIMESTAMP
1255
+ )
1256
+ ON CONFLICT (COALESCE(tenant_id, ''), entity_key)
1257
+ DO UPDATE SET
1258
+ entity_id = EXCLUDED.entity_id,
1259
+ user_id = EXCLUDED.user_id,
1260
+ metadata = EXCLUDED.metadata,
1261
+ graph_edges = EXCLUDED.graph_edges,
1262
+ updated_at = CURRENT_TIMESTAMP;
1263
+
1264
+ RETURN NEW;
1265
+ END IF;
1266
+ END;
1267
+ $$ LANGUAGE plpgsql;
1268
+
1269
+ -- Create trigger
1270
+ DROP TRIGGER IF EXISTS trg_users_kv_store ON users;
1271
+ CREATE TRIGGER trg_users_kv_store
1272
+ AFTER INSERT OR UPDATE OR DELETE ON users
1273
+ FOR EACH ROW EXECUTE FUNCTION fn_users_kv_store_upsert();
1274
+
1275
+ -- ============================================================================
1276
+ -- SCHEMA TABLE ENTRIES
1277
+ -- Every entity table gets a schemas entry for agent-like access
1278
+ -- ============================================================================
1279
+
1280
+ -- Schema entry for Feedback (feedbacks)
1281
+ INSERT INTO schemas (id, tenant_id, name, content, spec, category, metadata)
1282
+ VALUES (
1283
+ 'ae554853-e743-5d73-a2db-1ce20e7089fe'::uuid,
1284
+ 'system',
1285
+ 'Feedback',
1286
+ '# Feedback
1287
+
1288
+
1289
+ User feedback on a message or session.
1290
+
1291
+ Captures structured feedback including:
1292
+ - Rating (1-5 scale or thumbs up/down)
1293
+ - Categories (predefined or custom)
1294
+ - Free-text comment
1295
+ - Trace reference for OTEL/Phoenix integration
1296
+
1297
+ The feedback can be attached to:
1298
+ - A specific message (message_id set)
1299
+ - An entire session (session_id set, message_id null)
1300
+
1301
+
1302
+ ## Overview
1303
+
1304
+ The `Feedback` entity is stored in the `feedbacks` table. Each record is uniquely
1305
+ identified by its `id` field for lookups and graph traversal.
1306
+
1307
+ ## Search Capabilities
1308
+
1309
+ This schema includes the `search_rem` tool which supports:
1310
+ - **LOOKUP**: O(1) exact match by id (e.g., `LOOKUP "entity-name"`)
1311
+ - **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
1312
+ - **SEARCH**: Semantic vector search on content (e.g., `SEARCH "concept" FROM feedbacks LIMIT 10`)
1313
+ - **SQL**: Complex queries (e.g., `SELECT * FROM feedbacks WHERE ...`)
1314
+
1315
+ ## Table Info
1316
+
1317
+ | Property | Value |
1318
+ |----------|-------|
1319
+ | Table | `feedbacks` |
1320
+ | Entity Key | `id` |
1321
+ | Embedding Fields | None |
1322
+ | Tools | `search_rem` |
1323
+
1324
+ ## Fields
1325
+
1326
+ ### `id`
1327
+ - **Type**: `typing.Union[uuid.UUID, str, NoneType]`
1328
+ - **Optional**
1329
+ - Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.
1330
+
1331
+ ### `created_at`
1332
+ - **Type**: `<class ''datetime.datetime''>`
1333
+ - **Optional**
1334
+ - Entity creation timestamp
1335
+
1336
+ ### `updated_at`
1337
+ - **Type**: `<class ''datetime.datetime''>`
1338
+ - **Optional**
1339
+ - Last update timestamp
1340
+
1341
+ ### `deleted_at`
1342
+ - **Type**: `typing.Optional[datetime.datetime]`
1343
+ - **Optional**
1344
+ - Soft deletion timestamp
1345
+
1346
+ ### `tenant_id`
1347
+ - **Type**: `typing.Optional[str]`
1348
+ - **Optional**
1349
+ - Tenant identifier for multi-tenancy isolation
1350
+
1351
+ ### `user_id`
1352
+ - **Type**: `typing.Optional[str]`
1353
+ - **Optional**
1354
+ - Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.
1355
+
1356
+ ### `graph_edges`
1357
+ - **Type**: `list[dict]`
1358
+ - **Optional**
1359
+ - Knowledge graph edges stored as InlineEdge dicts
1360
+
1361
+ ### `metadata`
1362
+ - **Type**: `<class ''dict''>`
1363
+ - **Optional**
1364
+ - Flexible metadata storage
1365
+
1366
+ ### `tags`
1367
+ - **Type**: `list[str]`
1368
+ - **Optional**
1369
+ - Entity tags
1370
+
1371
+ ### `session_id`
1372
+ - **Type**: `<class ''str''>`
1373
+ - **Required**
1374
+ - Session ID this feedback relates to
1375
+
1376
+ ### `message_id`
1377
+ - **Type**: `str | None`
1378
+ - **Optional**
1379
+ - Specific message ID (null for session-level feedback)
1380
+
1381
+ ### `rating`
1382
+ - **Type**: `int | None`
1383
+ - **Optional**
1384
+ - Rating: -1 (thumbs down), 1 (thumbs up), or 1-5 scale
1385
+
1386
+ ### `categories`
1387
+ - **Type**: `list[str]`
1388
+ - **Optional**
1389
+ - Selected feedback categories (from FeedbackCategory or custom)
1390
+
1391
+ ### `comment`
1392
+ - **Type**: `str | None`
1393
+ - **Optional**
1394
+ - Optional free-text feedback comment
1395
+
1396
+ ### `trace_id`
1397
+ - **Type**: `str | None`
1398
+ - **Optional**
1399
+ - OTEL trace ID for linking to observability
1400
+
1401
+ ### `span_id`
1402
+ - **Type**: `str | None`
1403
+ - **Optional**
1404
+ - OTEL span ID for specific span feedback
1405
+
1406
+ ### `phoenix_synced`
1407
+ - **Type**: `<class ''bool''>`
1408
+ - **Optional**
1409
+ - Whether feedback has been synced to Phoenix as annotation
1410
+
1411
+ ### `phoenix_annotation_id`
1412
+ - **Type**: `str | None`
1413
+ - **Optional**
1414
+ - Phoenix annotation ID after sync
1415
+
1416
+ ### `annotator_kind`
1417
+ - **Type**: `<class ''str''>`
1418
+ - **Optional**
1419
+ - Annotator type: HUMAN, LLM, CODE
1420
+
1421
+ ',
1422
+ '{"type": "object", "description": "\n User feedback on a message or session.\n\n Captures structured feedback including:\n - Rating (1-5 scale or thumbs up/down)\n - Categories (predefined or custom)\n - Free-text comment\n - Trace reference for OTEL/Phoenix integration\n\n The feedback can be attached to:\n - A specific message (message_id set)\n - An entire session (session_id set, message_id null)\n \n\nThis agent can search the `feedbacks` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "session_id": {"description": "Session ID this feedback relates to", "title": "Session Id", "type": "string"}, "message_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Specific message ID (null for session-level feedback)", "title": "Message Id"}, "rating": {"anyOf": [{"maximum": 5, "minimum": -1, "type": "integer"}, {"type": "null"}], "default": null, "description": "Rating: -1 (thumbs down), 1 (thumbs up), or 1-5 scale", "title": "Rating"}, "categories": {"description": "Selected feedback categories (from FeedbackCategory or custom)", "items": {"type": "string"}, "title": "Categories", "type": "array"}, "comment": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Optional free-text feedback comment", "title": "Comment"}, "trace_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "OTEL trace ID for linking to observability", "title": "Trace Id"}, "span_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "OTEL span ID for specific span feedback", "title": "Span Id"}, "phoenix_synced": {"default": false, "description": "Whether feedback has been synced to Phoenix as annotation", "title": "Phoenix Synced", "type": "boolean"}, "phoenix_annotation_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Phoenix annotation ID after sync", "title": "Phoenix Annotation Id"}, "annotator_kind": {"default": "HUMAN", "description": "Annotator type: HUMAN, LLM, CODE", "title": "Annotator Kind", "type": "string"}}, "required": ["session_id"], "json_schema_extra": {"table_name": "feedbacks", "entity_key_field": "id", "embedding_fields": [], "fully_qualified_name": "rem.models.entities.feedback.Feedback", "tools": ["search_rem"], "default_search_table": "feedbacks", "has_embeddings": false}}'::jsonb,
1423
+ 'entity',
1424
+ '{"table_name": "feedbacks", "entity_key_field": "id", "embedding_fields": [], "fqn": "rem.models.entities.feedback.Feedback"}'::jsonb
1425
+ )
1426
+ ON CONFLICT (id) DO UPDATE SET
1427
+ name = EXCLUDED.name,
1428
+ content = EXCLUDED.content,
1429
+ spec = EXCLUDED.spec,
1430
+ category = EXCLUDED.category,
1431
+ metadata = EXCLUDED.metadata,
1432
+ updated_at = CURRENT_TIMESTAMP;
1433
+
1434
+ -- Schema entry for File (files)
1435
+ INSERT INTO schemas (id, tenant_id, name, content, spec, category, metadata)
1436
+ VALUES (
1437
+ 'c3b3ef33-59d4-57a1-81a3-cc6adc45b194'::uuid,
1438
+ 'system',
1439
+ 'File',
1440
+ '# File
1441
+
1442
+
1443
+ File metadata and tracking.
1444
+
1445
+ Represents files uploaded to or referenced by the REM system,
1446
+ tracking their metadata and processing status. Tenant isolation
1447
+ is provided via CoreModel.tenant_id field.
1448
+
1449
+
1450
+ ## Overview
1451
+
1452
+ The `File` entity is stored in the `files` table. Each record is uniquely
1453
+ identified by its `name` field for lookups and graph traversal.
1454
+
1455
+ ## Search Capabilities
1456
+
1457
+ This schema includes the `search_rem` tool which supports:
1458
+ - **LOOKUP**: O(1) exact match by name (e.g., `LOOKUP "entity-name"`)
1459
+ - **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
1460
+ - **SEARCH**: Semantic vector search on content (e.g., `SEARCH "concept" FROM files LIMIT 10`)
1461
+ - **SQL**: Complex queries (e.g., `SELECT * FROM files WHERE ...`)
1462
+
1463
+ ## Table Info
1464
+
1465
+ | Property | Value |
1466
+ |----------|-------|
1467
+ | Table | `files` |
1468
+ | Entity Key | `name` |
1469
+ | Embedding Fields | `content` |
1470
+ | Tools | `search_rem` |
1471
+
1472
+ ## Fields
1473
+
1474
+ ### `id`
1475
+ - **Type**: `typing.Union[uuid.UUID, str, NoneType]`
1476
+ - **Optional**
1477
+ - Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.
1478
+
1479
+ ### `created_at`
1480
+ - **Type**: `<class ''datetime.datetime''>`
1481
+ - **Optional**
1482
+ - Entity creation timestamp
1483
+
1484
+ ### `updated_at`
1485
+ - **Type**: `<class ''datetime.datetime''>`
1486
+ - **Optional**
1487
+ - Last update timestamp
1488
+
1489
+ ### `deleted_at`
1490
+ - **Type**: `typing.Optional[datetime.datetime]`
1491
+ - **Optional**
1492
+ - Soft deletion timestamp
1493
+
1494
+ ### `tenant_id`
1495
+ - **Type**: `typing.Optional[str]`
1496
+ - **Optional**
1497
+ - Tenant identifier for multi-tenancy isolation
1498
+
1499
+ ### `user_id`
1500
+ - **Type**: `typing.Optional[str]`
1501
+ - **Optional**
1502
+ - Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.
1503
+
1504
+ ### `graph_edges`
1505
+ - **Type**: `list[dict]`
1506
+ - **Optional**
1507
+ - Knowledge graph edges stored as InlineEdge dicts
1508
+
1509
+ ### `metadata`
1510
+ - **Type**: `<class ''dict''>`
1511
+ - **Optional**
1512
+ - Flexible metadata storage
1513
+
1514
+ ### `tags`
1515
+ - **Type**: `list[str]`
1516
+ - **Optional**
1517
+ - Entity tags
1518
+
1519
+ ### `name`
1520
+ - **Type**: `<class ''str''>`
1521
+ - **Required**
1522
+ - File name
1523
+
1524
+ ### `uri`
1525
+ - **Type**: `<class ''str''>`
1526
+ - **Required**
1527
+ - File storage URI (S3, local path, etc.)
1528
+
1529
+ ### `content`
1530
+ - **Type**: `typing.Optional[str]`
1531
+ - **Optional**
1532
+ - Extracted text content (if applicable)
1533
+
1534
+ ### `timestamp`
1535
+ - **Type**: `typing.Optional[str]`
1536
+ - **Optional**
1537
+ - File creation/modification timestamp
1538
+
1539
+ ### `size_bytes`
1540
+ - **Type**: `typing.Optional[int]`
1541
+ - **Optional**
1542
+ - File size in bytes
1543
+
1544
+ ### `mime_type`
1545
+ - **Type**: `typing.Optional[str]`
1546
+ - **Optional**
1547
+ - File MIME type
1548
+
1549
+ ### `processing_status`
1550
+ - **Type**: `typing.Optional[str]`
1551
+ - **Optional**
1552
+ - File processing status (pending, processing, completed, failed)
1553
+
1554
+ ',
1555
+ '{"type": "object", "description": "\n File metadata and tracking.\n\n Represents files uploaded to or referenced by the REM system,\n tracking their metadata and processing status. Tenant isolation\n is provided via CoreModel.tenant_id field.\n \n\nThis agent can search the `files` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"description": "File name", "title": "Name", "type": "string"}, "uri": {"description": "File storage URI (S3, local path, etc.)", "title": "Uri", "type": "string"}, "content": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Extracted text content (if applicable)", "title": "Content"}, "timestamp": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "File creation/modification timestamp", "title": "Timestamp"}, "size_bytes": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "description": "File size in bytes", "title": "Size Bytes"}, "mime_type": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "File MIME type", "title": "Mime Type"}, "processing_status": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": "pending", "description": "File processing status (pending, processing, completed, failed)", "title": "Processing Status"}}, "required": ["name", "uri"], "json_schema_extra": {"table_name": "files", "entity_key_field": "name", "embedding_fields": ["content"], "fully_qualified_name": "rem.models.entities.file.File", "tools": ["search_rem"], "default_search_table": "files", "has_embeddings": true}}'::jsonb,
1556
+ 'entity',
1557
+ '{"table_name": "files", "entity_key_field": "name", "embedding_fields": ["content"], "fqn": "rem.models.entities.file.File"}'::jsonb
1558
+ )
1559
+ ON CONFLICT (id) DO UPDATE SET
1560
+ name = EXCLUDED.name,
1561
+ content = EXCLUDED.content,
1562
+ spec = EXCLUDED.spec,
1563
+ category = EXCLUDED.category,
1564
+ metadata = EXCLUDED.metadata,
1565
+ updated_at = CURRENT_TIMESTAMP;
1566
+
1567
+ -- Schema entry for ImageResource (image_resources)
1568
+ INSERT INTO schemas (id, tenant_id, name, content, spec, category, metadata)
1569
+ VALUES (
1570
+ 'ab4bc90c-2cda-55b2-bd4b-e78e19f7d4a7'::uuid,
1571
+ 'system',
1572
+ 'ImageResource',
1573
+ '# ImageResource
1574
+
1575
+
1576
+ Image-specific resource with CLIP embeddings.
1577
+
1578
+ Stored in separate `image_resources` table with CLIP embeddings
1579
+ instead of text embeddings. This enables:
1580
+ - Multimodal search (text-to-image, image-to-image)
1581
+ - Proper dimensionality (512/768 for CLIP vs 1536 for text)
1582
+ - Cost tracking (CLIP tokens separate from text tokens)
1583
+
1584
+ Embedding Strategy:
1585
+ - Default (when JINA_API_KEY set): Jina CLIP API (jina-clip-v2)
1586
+ - Future: Self-hosted OpenCLIP models via KEDA-scaled pods
1587
+ - Fallback: No embeddings (images searchable by metadata only)
1588
+
1589
+ Vision LLM Strategy (tier/sampling gated):
1590
+ - Gold tier: Always get vision descriptions
1591
+ - Silver/Free: Probabilistic sampling (IMAGE_VLLM_SAMPLE_RATE)
1592
+ - Fallback: Basic metadata only
1593
+
1594
+ Tenant isolation provided via CoreModel.tenant_id field.
1595
+
1596
+
1597
+ ## Overview
1598
+
1599
+ The `ImageResource` entity is stored in the `image_resources` table. Each record is uniquely
1600
+ identified by its `name` field for lookups and graph traversal.
1601
+
1602
+ ## Search Capabilities
1603
+
1604
+ This schema includes the `search_rem` tool which supports:
1605
+ - **LOOKUP**: O(1) exact match by name (e.g., `LOOKUP "entity-name"`)
1606
+ - **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
1607
+ - **SEARCH**: Semantic vector search on content (e.g., `SEARCH "concept" FROM image_resources LIMIT 10`)
1608
+ - **SQL**: Complex queries (e.g., `SELECT * FROM image_resources WHERE ...`)
1609
+
1610
+ ## Table Info
1611
+
1612
+ | Property | Value |
1613
+ |----------|-------|
1614
+ | Table | `image_resources` |
1615
+ | Entity Key | `name` |
1616
+ | Embedding Fields | `content` |
1617
+ | Tools | `search_rem` |
1618
+
1619
+ ## Fields
1620
+
1621
+ ### `id`
1622
+ - **Type**: `typing.Union[uuid.UUID, str, NoneType]`
1623
+ - **Optional**
1624
+ - Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.
1625
+
1626
+ ### `created_at`
1627
+ - **Type**: `<class ''datetime.datetime''>`
1628
+ - **Optional**
1629
+ - Entity creation timestamp
1630
+
1631
+ ### `updated_at`
1632
+ - **Type**: `<class ''datetime.datetime''>`
1633
+ - **Optional**
1634
+ - Last update timestamp
1635
+
1636
+ ### `deleted_at`
1637
+ - **Type**: `typing.Optional[datetime.datetime]`
1638
+ - **Optional**
1639
+ - Soft deletion timestamp
1640
+
1641
+ ### `tenant_id`
1642
+ - **Type**: `typing.Optional[str]`
1643
+ - **Optional**
1644
+ - Tenant identifier for multi-tenancy isolation
1645
+
1646
+ ### `user_id`
1647
+ - **Type**: `typing.Optional[str]`
1648
+ - **Optional**
1649
+ - Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.
1650
+
1651
+ ### `graph_edges`
1652
+ - **Type**: `list[dict]`
1653
+ - **Optional**
1654
+ - Knowledge graph edges stored as InlineEdge dicts
1655
+
1656
+ ### `metadata`
1657
+ - **Type**: `<class ''dict''>`
1658
+ - **Optional**
1659
+ - Flexible metadata storage
1660
+
1661
+ ### `tags`
1662
+ - **Type**: `list[str]`
1663
+ - **Optional**
1664
+ - Entity tags
1665
+
1666
+ ### `name`
1667
+ - **Type**: `typing.Optional[str]`
1668
+ - **Optional**
1669
+ - Human-readable resource name (used as graph label). Auto-generated from uri+ordinal if not provided.
1670
+
1671
+ ### `uri`
1672
+ - **Type**: `typing.Optional[str]`
1673
+ - **Optional**
1674
+ - Content URI or identifier (file path, URL, etc.)
1675
+
1676
+ ### `ordinal`
1677
+ - **Type**: `<class ''int''>`
1678
+ - **Optional**
1679
+ - Chunk ordinal for splitting large documents (0 for single-chunk resources)
1680
+
1681
+ ### `content`
1682
+ - **Type**: `<class ''str''>`
1683
+ - **Optional**
1684
+ - Resource content text
1685
+
1686
+ ### `timestamp`
1687
+ - **Type**: `<class ''datetime.datetime''>`
1688
+ - **Optional**
1689
+ - Resource timestamp (content creation/publication time)
1690
+
1691
+ ### `category`
1692
+ - **Type**: `typing.Optional[str]`
1693
+ - **Optional**
1694
+ - Resource category (document, conversation, artifact, etc.)
1695
+
1696
+ ### `related_entities`
1697
+ - **Type**: `list[dict]`
1698
+ - **Optional**
1699
+ - Extracted entities (people, projects, concepts) with metadata
1700
+
1701
+ ### `image_width`
1702
+ - **Type**: `typing.Optional[int]`
1703
+ - **Optional**
1704
+ - Image width in pixels
1705
+
1706
+ ### `image_height`
1707
+ - **Type**: `typing.Optional[int]`
1708
+ - **Optional**
1709
+ - Image height in pixels
1710
+
1711
+ ### `image_format`
1712
+ - **Type**: `typing.Optional[str]`
1713
+ - **Optional**
1714
+ - Image format (PNG, JPEG, GIF, WebP)
1715
+
1716
+ ### `vision_description`
1717
+ - **Type**: `typing.Optional[str]`
1718
+ - **Optional**
1719
+ - Vision LLM generated description (markdown, only for gold tier or sampled images)
1720
+
1721
+ ### `vision_provider`
1722
+ - **Type**: `typing.Optional[str]`
1723
+ - **Optional**
1724
+ - Vision provider used (anthropic, gemini, openai)
1725
+
1726
+ ### `vision_model`
1727
+ - **Type**: `typing.Optional[str]`
1728
+ - **Optional**
1729
+ - Vision model used for description
1730
+
1731
+ ### `clip_embedding`
1732
+ - **Type**: `typing.Optional[list[float]]`
1733
+ - **Optional**
1734
+ - CLIP embedding vector (512 or 768 dimensions, from Jina AI or self-hosted)
1735
+
1736
+ ### `clip_dimensions`
1737
+ - **Type**: `typing.Optional[int]`
1738
+ - **Optional**
1739
+ - CLIP embedding dimensionality (512 for jina-clip-v2, 768 for jina-clip-v1)
1740
+
1741
+ ',
1742
+ '{"type": "object", "description": "\n Image-specific resource with CLIP embeddings.\n\n Stored in separate `image_resources` table with CLIP embeddings\n instead of text embeddings. This enables:\n - Multimodal search (text-to-image, image-to-image)\n - Proper dimensionality (512/768 for CLIP vs 1536 for text)\n - Cost tracking (CLIP tokens separate from text tokens)\n\n Embedding Strategy:\n - Default (when JINA_API_KEY set): Jina CLIP API (jina-clip-v2)\n - Future: Self-hosted OpenCLIP models via KEDA-scaled pods\n - Fallback: No embeddings (images searchable by metadata only)\n\n Vision LLM Strategy (tier/sampling gated):\n - Gold tier: Always get vision descriptions\n - Silver/Free: Probabilistic sampling (IMAGE_VLLM_SAMPLE_RATE)\n - Fallback: Basic metadata only\n\n Tenant isolation provided via CoreModel.tenant_id field.\n \n\nThis agent can search the `image_resources` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Human-readable resource name (used as graph label). Auto-generated from uri+ordinal if not provided.", "entity_key": true, "title": "Name"}, "uri": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Content URI or identifier (file path, URL, etc.)", "title": "Uri"}, "ordinal": {"composite_key": true, "default": 0, "description": "Chunk ordinal for splitting large documents (0 for single-chunk resources)", "title": "Ordinal", "type": "integer"}, "content": {"default": "", "description": "Resource content text", "title": "Content", "type": "string"}, "timestamp": {"description": "Resource timestamp (content creation/publication time)", "format": "date-time", "title": "Timestamp", "type": "string"}, "category": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Resource category (document, conversation, artifact, etc.)", "title": "Category"}, "related_entities": {"description": "Extracted entities (people, projects, concepts) with metadata", "items": {"additionalProperties": true, "type": "object"}, "title": "Related Entities", "type": "array"}, "image_width": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "description": "Image width in pixels", "title": "Image Width"}, "image_height": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "description": "Image height in pixels", "title": "Image Height"}, "image_format": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Image format (PNG, JPEG, GIF, WebP)", "title": "Image Format"}, "vision_description": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Vision LLM generated description (markdown, only for gold tier or sampled images)", "title": "Vision Description"}, "vision_provider": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Vision provider used (anthropic, gemini, openai)", "title": "Vision Provider"}, "vision_model": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Vision model used for description", "title": "Vision Model"}, "clip_embedding": {"anyOf": [{"items": {"type": "number"}, "type": "array"}, {"type": "null"}], "default": null, "description": "CLIP embedding vector (512 or 768 dimensions, from Jina AI or self-hosted)", "title": "Clip Embedding"}, "clip_dimensions": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "description": "CLIP embedding dimensionality (512 for jina-clip-v2, 768 for jina-clip-v1)", "title": "Clip Dimensions"}}, "required": [], "json_schema_extra": {"table_name": "image_resources", "entity_key_field": "name", "embedding_fields": ["content"], "fully_qualified_name": "rem.models.entities.image_resource.ImageResource", "tools": ["search_rem"], "default_search_table": "image_resources", "has_embeddings": true}}'::jsonb,
1743
+ 'entity',
1744
+ '{"table_name": "image_resources", "entity_key_field": "name", "embedding_fields": ["content"], "fqn": "rem.models.entities.image_resource.ImageResource"}'::jsonb
1745
+ )
1746
+ ON CONFLICT (id) DO UPDATE SET
1747
+ name = EXCLUDED.name,
1748
+ content = EXCLUDED.content,
1749
+ spec = EXCLUDED.spec,
1750
+ category = EXCLUDED.category,
1751
+ metadata = EXCLUDED.metadata,
1752
+ updated_at = CURRENT_TIMESTAMP;
1753
+
1754
+ -- Schema entry for Message (messages)
1755
+ INSERT INTO schemas (id, tenant_id, name, content, spec, category, metadata)
1756
+ VALUES (
1757
+ 'be36f9da-6df4-51ba-bb41-bf51246ecec1'::uuid,
1758
+ 'system',
1759
+ 'Message',
1760
+ '# Message
1761
+
1762
+
1763
+ Communication content unit.
1764
+
1765
+ Represents individual messages in conversations, chats, or other
1766
+ communication contexts. Tenant isolation is provided via CoreModel.tenant_id field.
1767
+
1768
+ Trace fields (trace_id, span_id) enable integration with OTEL/Phoenix
1769
+ for observability and feedback annotation.
1770
+
1771
+
1772
+ ## Overview
1773
+
1774
+ The `Message` entity is stored in the `messages` table. Each record is uniquely
1775
+ identified by its `id` field for lookups and graph traversal.
1776
+
1777
+ ## Search Capabilities
1778
+
1779
+ This schema includes the `search_rem` tool which supports:
1780
+ - **LOOKUP**: O(1) exact match by id (e.g., `LOOKUP "entity-name"`)
1781
+ - **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
1782
+ - **SEARCH**: Semantic vector search on content (e.g., `SEARCH "concept" FROM messages LIMIT 10`)
1783
+ - **SQL**: Complex queries (e.g., `SELECT * FROM messages WHERE ...`)
1784
+
1785
+ ## Table Info
1786
+
1787
+ | Property | Value |
1788
+ |----------|-------|
1789
+ | Table | `messages` |
1790
+ | Entity Key | `id` |
1791
+ | Embedding Fields | `content` |
1792
+ | Tools | `search_rem` |
1793
+
1794
+ ## Fields
1795
+
1796
+ ### `id`
1797
+ - **Type**: `typing.Union[uuid.UUID, str, NoneType]`
1798
+ - **Optional**
1799
+ - Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.
1800
+
1801
+ ### `created_at`
1802
+ - **Type**: `<class ''datetime.datetime''>`
1803
+ - **Optional**
1804
+ - Entity creation timestamp
1805
+
1806
+ ### `updated_at`
1807
+ - **Type**: `<class ''datetime.datetime''>`
1808
+ - **Optional**
1809
+ - Last update timestamp
1810
+
1811
+ ### `deleted_at`
1812
+ - **Type**: `typing.Optional[datetime.datetime]`
1813
+ - **Optional**
1814
+ - Soft deletion timestamp
1815
+
1816
+ ### `tenant_id`
1817
+ - **Type**: `typing.Optional[str]`
1818
+ - **Optional**
1819
+ - Tenant identifier for multi-tenancy isolation
1820
+
1821
+ ### `user_id`
1822
+ - **Type**: `typing.Optional[str]`
1823
+ - **Optional**
1824
+ - Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.
1825
+
1826
+ ### `graph_edges`
1827
+ - **Type**: `list[dict]`
1828
+ - **Optional**
1829
+ - Knowledge graph edges stored as InlineEdge dicts
1830
+
1831
+ ### `metadata`
1832
+ - **Type**: `<class ''dict''>`
1833
+ - **Optional**
1834
+ - Flexible metadata storage
1835
+
1836
+ ### `tags`
1837
+ - **Type**: `list[str]`
1838
+ - **Optional**
1839
+ - Entity tags
1840
+
1841
+ ### `content`
1842
+ - **Type**: `<class ''str''>`
1843
+ - **Required**
1844
+ - Message content text
1845
+
1846
+ ### `message_type`
1847
+ - **Type**: `str | None`
1848
+ - **Optional**
1849
+ - Message type e.g. role: ''user'', ''assistant'', ''system'', ''tool''
1850
+
1851
+ ### `session_id`
1852
+ - **Type**: `str | None`
1853
+ - **Optional**
1854
+ - Session identifier for tracking message context
1855
+
1856
+ ### `prompt`
1857
+ - **Type**: `str | None`
1858
+ - **Optional**
1859
+ - Custom prompt used for this message (if overridden from default)
1860
+
1861
+ ### `model`
1862
+ - **Type**: `str | None`
1863
+ - **Optional**
1864
+ - Model used for generating this message (provider:model format)
1865
+
1866
+ ### `token_count`
1867
+ - **Type**: `int | None`
1868
+ - **Optional**
1869
+ - Token count for this message
1870
+
1871
+ ### `trace_id`
1872
+ - **Type**: `str | None`
1873
+ - **Optional**
1874
+ - OTEL trace ID for observability integration
1875
+
1876
+ ### `span_id`
1877
+ - **Type**: `str | None`
1878
+ - **Optional**
1879
+ - OTEL span ID for specific span reference
1880
+
1881
+ ',
1882
+ '{"type": "object", "description": "\n Communication content unit.\n\n Represents individual messages in conversations, chats, or other\n communication contexts. Tenant isolation is provided via CoreModel.tenant_id field.\n\n Trace fields (trace_id, span_id) enable integration with OTEL/Phoenix\n for observability and feedback annotation.\n \n\nThis agent can search the `messages` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "content": {"description": "Message content text", "title": "Content", "type": "string"}, "message_type": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Message type e.g. role: ''user'', ''assistant'', ''system'', ''tool''", "title": "Message Type"}, "session_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Session identifier for tracking message context", "title": "Session Id"}, "prompt": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Custom prompt used for this message (if overridden from default)", "title": "Prompt"}, "model": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Model used for generating this message (provider:model format)", "title": "Model"}, "token_count": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "description": "Token count for this message", "title": "Token Count"}, "trace_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "OTEL trace ID for observability integration", "title": "Trace Id"}, "span_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "OTEL span ID for specific span reference", "title": "Span Id"}}, "required": ["content"], "json_schema_extra": {"table_name": "messages", "entity_key_field": "id", "embedding_fields": ["content"], "fully_qualified_name": "rem.models.entities.message.Message", "tools": ["search_rem"], "default_search_table": "messages", "has_embeddings": true}}'::jsonb,
1883
+ 'entity',
1884
+ '{"table_name": "messages", "entity_key_field": "id", "embedding_fields": ["content"], "fqn": "rem.models.entities.message.Message"}'::jsonb
1885
+ )
1886
+ ON CONFLICT (id) DO UPDATE SET
1887
+ name = EXCLUDED.name,
1888
+ content = EXCLUDED.content,
1889
+ spec = EXCLUDED.spec,
1890
+ category = EXCLUDED.category,
1891
+ metadata = EXCLUDED.metadata,
1892
+ updated_at = CURRENT_TIMESTAMP;
1893
+
1894
+ -- Schema entry for Moment (moments)
1895
+ INSERT INTO schemas (id, tenant_id, name, content, spec, category, metadata)
1896
+ VALUES (
1897
+ 'a08f0a8c-5bab-5bf5-9760-0e67bc69bd74'::uuid,
1898
+ 'system',
1899
+ 'Moment',
1900
+ '# Moment
1901
+
1902
+
1903
+ Temporal narrative extracted from resources.
1904
+
1905
+ Moments provide temporal structure and context for the REM graph,
1906
+ enabling time-based queries and understanding of when events occurred.
1907
+ Tenant isolation is provided via CoreModel.tenant_id field.
1908
+
1909
+
1910
+ ## Overview
1911
+
1912
+ The `Moment` entity is stored in the `moments` table. Each record is uniquely
1913
+ identified by its `name` field for lookups and graph traversal.
1914
+
1915
+ ## Search Capabilities
1916
+
1917
+ This schema includes the `search_rem` tool which supports:
1918
+ - **LOOKUP**: O(1) exact match by name (e.g., `LOOKUP "entity-name"`)
1919
+ - **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
1920
+ - **SEARCH**: Semantic vector search on summary (e.g., `SEARCH "concept" FROM moments LIMIT 10`)
1921
+ - **SQL**: Complex queries (e.g., `SELECT * FROM moments WHERE ...`)
1922
+
1923
+ ## Table Info
1924
+
1925
+ | Property | Value |
1926
+ |----------|-------|
1927
+ | Table | `moments` |
1928
+ | Entity Key | `name` |
1929
+ | Embedding Fields | `summary` |
1930
+ | Tools | `search_rem` |
1931
+
1932
+ ## Fields
1933
+
1934
+ ### `id`
1935
+ - **Type**: `typing.Union[uuid.UUID, str, NoneType]`
1936
+ - **Optional**
1937
+ - Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.
1938
+
1939
+ ### `created_at`
1940
+ - **Type**: `<class ''datetime.datetime''>`
1941
+ - **Optional**
1942
+ - Entity creation timestamp
1943
+
1944
+ ### `updated_at`
1945
+ - **Type**: `<class ''datetime.datetime''>`
1946
+ - **Optional**
1947
+ - Last update timestamp
1948
+
1949
+ ### `deleted_at`
1950
+ - **Type**: `typing.Optional[datetime.datetime]`
1951
+ - **Optional**
1952
+ - Soft deletion timestamp
1953
+
1954
+ ### `tenant_id`
1955
+ - **Type**: `typing.Optional[str]`
1956
+ - **Optional**
1957
+ - Tenant identifier for multi-tenancy isolation
1958
+
1959
+ ### `user_id`
1960
+ - **Type**: `typing.Optional[str]`
1961
+ - **Optional**
1962
+ - Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.
1963
+
1964
+ ### `graph_edges`
1965
+ - **Type**: `list[dict]`
1966
+ - **Optional**
1967
+ - Knowledge graph edges stored as InlineEdge dicts
1968
+
1969
+ ### `metadata`
1970
+ - **Type**: `<class ''dict''>`
1971
+ - **Optional**
1972
+ - Flexible metadata storage
1973
+
1974
+ ### `tags`
1975
+ - **Type**: `list[str]`
1976
+ - **Optional**
1977
+ - Entity tags
1978
+
1979
+ ### `name`
1980
+ - **Type**: `typing.Optional[str]`
1981
+ - **Optional**
1982
+ - Human-readable moment name (used as graph label). Auto-generated from starts_timestamp+moment_type if not provided.
1983
+
1984
+ ### `moment_type`
1985
+ - **Type**: `typing.Optional[str]`
1986
+ - **Optional**
1987
+ - Moment classification (meeting, coding-session, conversation, etc.)
1988
+
1989
+ ### `category`
1990
+ - **Type**: `typing.Optional[str]`
1991
+ - **Optional**
1992
+ - Moment category for grouping and filtering
1993
+
1994
+ ### `starts_timestamp`
1995
+ - **Type**: `<class ''datetime.datetime''>`
1996
+ - **Required**
1997
+ - Moment start time
1998
+
1999
+ ### `ends_timestamp`
2000
+ - **Type**: `typing.Optional[datetime.datetime]`
2001
+ - **Optional**
2002
+ - Moment end time
2003
+
2004
+ ### `present_persons`
2005
+ - **Type**: `list[rem.models.entities.moment.Person]`
2006
+ - **Optional**
2007
+ - People present in the moment
2008
+
2009
+ ### `emotion_tags`
2010
+ - **Type**: `list[str]`
2011
+ - **Optional**
2012
+ - Emotion/sentiment tags (happy, frustrated, focused, etc.)
2013
+
2014
+ ### `topic_tags`
2015
+ - **Type**: `list[str]`
2016
+ - **Optional**
2017
+ - Topic/concept tags (project names, technologies, etc.)
2018
+
2019
+ ### `summary`
2020
+ - **Type**: `typing.Optional[str]`
2021
+ - **Optional**
2022
+ - Natural language summary of the moment
2023
+
2024
+ ### `source_resource_ids`
2025
+ - **Type**: `list[str]`
2026
+ - **Optional**
2027
+ - Resource IDs used to construct this moment
2028
+
2029
+ ',
2030
+ '{"type": "object", "description": "\n Temporal narrative extracted from resources.\n\n Moments provide temporal structure and context for the REM graph,\n enabling time-based queries and understanding of when events occurred.\n Tenant isolation is provided via CoreModel.tenant_id field.\n \n\nThis agent can search the `moments` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Human-readable moment name (used as graph label). Auto-generated from starts_timestamp+moment_type if not provided.", "entity_key": true, "title": "Name"}, "moment_type": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Moment classification (meeting, coding-session, conversation, etc.)", "title": "Moment Type"}, "category": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Moment category for grouping and filtering", "title": "Category"}, "starts_timestamp": {"description": "Moment start time", "format": "date-time", "title": "Starts Timestamp", "type": "string"}, "ends_timestamp": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Moment end time", "title": "Ends Timestamp"}, "present_persons": {"description": "People present in the moment", "items": {"$ref": "#/$defs/Person"}, "title": "Present Persons", "type": "array"}, "emotion_tags": {"description": "Emotion/sentiment tags (happy, frustrated, focused, etc.)", "items": {"type": "string"}, "title": "Emotion Tags", "type": "array"}, "topic_tags": {"description": "Topic/concept tags (project names, technologies, etc.)", "items": {"type": "string"}, "title": "Topic Tags", "type": "array"}, "summary": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Natural language summary of the moment", "title": "Summary"}, "source_resource_ids": {"description": "Resource IDs used to construct this moment", "items": {"type": "string"}, "title": "Source Resource Ids", "type": "array"}}, "required": ["starts_timestamp"], "json_schema_extra": {"table_name": "moments", "entity_key_field": "name", "embedding_fields": ["summary"], "fully_qualified_name": "rem.models.entities.moment.Moment", "tools": ["search_rem"], "default_search_table": "moments", "has_embeddings": true}}'::jsonb,
2031
+ 'entity',
2032
+ '{"table_name": "moments", "entity_key_field": "name", "embedding_fields": ["summary"], "fqn": "rem.models.entities.moment.Moment"}'::jsonb
2033
+ )
2034
+ ON CONFLICT (id) DO UPDATE SET
2035
+ name = EXCLUDED.name,
2036
+ content = EXCLUDED.content,
2037
+ spec = EXCLUDED.spec,
2038
+ category = EXCLUDED.category,
2039
+ metadata = EXCLUDED.metadata,
2040
+ updated_at = CURRENT_TIMESTAMP;
2041
+
2042
+ -- Schema entry for Ontology (ontologies)
2043
+ INSERT INTO schemas (id, tenant_id, name, content, spec, category, metadata)
2044
+ VALUES (
2045
+ 'a702ed74-8988-534a-9917-2977349777c1'::uuid,
2046
+ 'system',
2047
+ 'Ontology',
2048
+ '# Ontology
2049
+
2050
+ Domain-specific knowledge - either agent-extracted or direct-loaded.
2051
+
2052
+ Attributes:
2053
+ name: Human-readable label for this ontology instance
2054
+ uri: External source reference (git://, s3://, https://) for direct-loaded ontologies
2055
+ file_id: Foreign key to File entity (optional - only for agent-extracted)
2056
+ agent_schema_id: Schema that performed extraction (optional - only for agent-extracted)
2057
+ provider_name: LLM provider used for extraction (optional)
2058
+ model_name: Specific model used (optional)
2059
+ extracted_data: Structured data - either extracted by agent or parsed from source
2060
+ confidence_score: Optional confidence score from extraction (0.0-1.0)
2061
+ extraction_timestamp: When extraction was performed
2062
+ content: Text used for generating embedding
2063
+
2064
+ Inherited from CoreModel:
2065
+ id: UUID or string identifier
2066
+ created_at: Entity creation timestamp
2067
+ updated_at: Last update timestamp
2068
+ deleted_at: Soft deletion timestamp
2069
+ tenant_id: Multi-tenancy isolation
2070
+ user_id: Ownership
2071
+ graph_edges: Relationships to other entities
2072
+ metadata: Flexible metadata storage
2073
+ tags: Classification tags
2074
+
2075
+ Example Usage:
2076
+ # Agent-extracted: CV parsing
2077
+ cv_ontology = Ontology(
2078
+ name="john-doe-cv-2024",
2079
+ file_id="file-uuid-123",
2080
+ agent_schema_id="cv-parser-v1",
2081
+ provider_name="anthropic",
2082
+ model_name="claude-sonnet-4-5-20250929",
2083
+ extracted_data={
2084
+ "candidate_name": "John Doe",
2085
+ "skills": ["Python", "PostgreSQL", "Kubernetes"],
2086
+ },
2087
+ confidence_score=0.95,
2088
+ tags=["cv", "engineering"]
2089
+ )
2090
+
2091
+ # Direct-loaded: Knowledge base from git
2092
+ api_docs = Ontology(
2093
+ name="rest-api-guide",
2094
+ uri="git://example-org/docs/api/rest-api-guide.md",
2095
+ content="# REST API Guide\n\nThis guide covers RESTful API design...",
2096
+ extracted_data={
2097
+ "type": "documentation",
2098
+ "category": "api",
2099
+ "version": "2.0",
2100
+ },
2101
+ tags=["api", "rest", "documentation"]
2102
+ )
2103
+
2104
+ # Direct-loaded: Technical spec from git
2105
+ config_spec = Ontology(
2106
+ name="config-schema",
2107
+ uri="git://example-org/docs/specs/config-schema.md",
2108
+ content="# Configuration Schema\n\nThis document defines...",
2109
+ extracted_data={
2110
+ "type": "specification",
2111
+ "format": "yaml",
2112
+ "version": "1.0",
2113
+ },
2114
+ tags=["config", "schema", "specification"]
2115
+ )
2116
+
2117
+
2118
+ ## Overview
2119
+
2120
+ The `Ontology` entity is stored in the `ontologies` table. Each record is uniquely
2121
+ identified by its `name` field for lookups and graph traversal.
2122
+
2123
+ ## Search Capabilities
2124
+
2125
+ This schema includes the `search_rem` tool which supports:
2126
+ - **LOOKUP**: O(1) exact match by name (e.g., `LOOKUP "entity-name"`)
2127
+ - **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
2128
+ - **SEARCH**: Semantic vector search on content (e.g., `SEARCH "concept" FROM ontologies LIMIT 10`)
2129
+ - **SQL**: Complex queries (e.g., `SELECT * FROM ontologies WHERE ...`)
2130
+
2131
+ ## Table Info
2132
+
2133
+ | Property | Value |
2134
+ |----------|-------|
2135
+ | Table | `ontologies` |
2136
+ | Entity Key | `name` |
2137
+ | Embedding Fields | `content` |
2138
+ | Tools | `search_rem` |
2139
+
2140
+ ## Fields
2141
+
2142
+ ### `id`
2143
+ - **Type**: `typing.Union[uuid.UUID, str, NoneType]`
2144
+ - **Optional**
2145
+ - Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.
2146
+
2147
+ ### `created_at`
2148
+ - **Type**: `<class ''datetime.datetime''>`
2149
+ - **Optional**
2150
+ - Entity creation timestamp
2151
+
2152
+ ### `updated_at`
2153
+ - **Type**: `<class ''datetime.datetime''>`
2154
+ - **Optional**
2155
+ - Last update timestamp
2156
+
2157
+ ### `deleted_at`
2158
+ - **Type**: `typing.Optional[datetime.datetime]`
2159
+ - **Optional**
2160
+ - Soft deletion timestamp
2161
+
2162
+ ### `tenant_id`
2163
+ - **Type**: `typing.Optional[str]`
2164
+ - **Optional**
2165
+ - Tenant identifier for multi-tenancy isolation
2166
+
2167
+ ### `user_id`
2168
+ - **Type**: `typing.Optional[str]`
2169
+ - **Optional**
2170
+ - Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.
2171
+
2172
+ ### `graph_edges`
2173
+ - **Type**: `list[dict]`
2174
+ - **Optional**
2175
+ - Knowledge graph edges stored as InlineEdge dicts
2176
+
2177
+ ### `metadata`
2178
+ - **Type**: `<class ''dict''>`
2179
+ - **Optional**
2180
+ - Flexible metadata storage
2181
+
2182
+ ### `tags`
2183
+ - **Type**: `list[str]`
2184
+ - **Optional**
2185
+ - Entity tags
2186
+
2187
+ ### `name`
2188
+ - **Type**: `<class ''str''>`
2189
+ - **Required**
2190
+
2191
+ ### `uri`
2192
+ - **Type**: `typing.Optional[str]`
2193
+ - **Optional**
2194
+
2195
+ ### `file_id`
2196
+ - **Type**: `typing.Union[uuid.UUID, str, NoneType]`
2197
+ - **Optional**
2198
+
2199
+ ### `agent_schema_id`
2200
+ - **Type**: `typing.Optional[str]`
2201
+ - **Optional**
2202
+
2203
+ ### `provider_name`
2204
+ - **Type**: `typing.Optional[str]`
2205
+ - **Optional**
2206
+
2207
+ ### `model_name`
2208
+ - **Type**: `typing.Optional[str]`
2209
+ - **Optional**
2210
+
2211
+ ### `extracted_data`
2212
+ - **Type**: `typing.Optional[dict[str, typing.Any]]`
2213
+ - **Optional**
2214
+
2215
+ ### `confidence_score`
2216
+ - **Type**: `typing.Optional[float]`
2217
+ - **Optional**
2218
+
2219
+ ### `extraction_timestamp`
2220
+ - **Type**: `typing.Optional[str]`
2221
+ - **Optional**
2222
+
2223
+ ### `content`
2224
+ - **Type**: `typing.Optional[str]`
2225
+ - **Optional**
2226
+
2227
+ ',
2228
+ '{"type": "object", "description": "Domain-specific knowledge - either agent-extracted or direct-loaded.\n\n Attributes:\n name: Human-readable label for this ontology instance\n uri: External source reference (git://, s3://, https://) for direct-loaded ontologies\n file_id: Foreign key to File entity (optional - only for agent-extracted)\n agent_schema_id: Schema that performed extraction (optional - only for agent-extracted)\n provider_name: LLM provider used for extraction (optional)\n model_name: Specific model used (optional)\n extracted_data: Structured data - either extracted by agent or parsed from source\n confidence_score: Optional confidence score from extraction (0.0-1.0)\n extraction_timestamp: When extraction was performed\n content: Text used for generating embedding\n\n Inherited from CoreModel:\n id: UUID or string identifier\n created_at: Entity creation timestamp\n updated_at: Last update timestamp\n deleted_at: Soft deletion timestamp\n tenant_id: Multi-tenancy isolation\n user_id: Ownership\n graph_edges: Relationships to other entities\n metadata: Flexible metadata storage\n tags: Classification tags\n\n Example Usage:\n # Agent-extracted: CV parsing\n cv_ontology = Ontology(\n name=\"john-doe-cv-2024\",\n file_id=\"file-uuid-123\",\n agent_schema_id=\"cv-parser-v1\",\n provider_name=\"anthropic\",\n model_name=\"claude-sonnet-4-5-20250929\",\n extracted_data={\n \"candidate_name\": \"John Doe\",\n \"skills\": [\"Python\", \"PostgreSQL\", \"Kubernetes\"],\n },\n confidence_score=0.95,\n tags=[\"cv\", \"engineering\"]\n )\n\n # Direct-loaded: Knowledge base from git\n api_docs = Ontology(\n name=\"rest-api-guide\",\n uri=\"git://example-org/docs/api/rest-api-guide.md\",\n content=\"# REST API Guide\\n\\nThis guide covers RESTful API design...\",\n extracted_data={\n \"type\": \"documentation\",\n \"category\": \"api\",\n \"version\": \"2.0\",\n },\n tags=[\"api\", \"rest\", \"documentation\"]\n )\n\n # Direct-loaded: Technical spec from git\n config_spec = Ontology(\n name=\"config-schema\",\n uri=\"git://example-org/docs/specs/config-schema.md\",\n content=\"# Configuration Schema\\n\\nThis document defines...\",\n extracted_data={\n \"type\": \"specification\",\n \"format\": \"yaml\",\n \"version\": \"1.0\",\n },\n tags=[\"config\", \"schema\", \"specification\"]\n )\n \n\nThis agent can search the `ontologies` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"title": "Name", "type": "string"}, "uri": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Uri"}, "file_id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "title": "File Id"}, "agent_schema_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Agent Schema Id"}, "provider_name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Provider Name"}, "model_name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Model Name"}, "extracted_data": {"anyOf": [{"additionalProperties": true, "type": "object"}, {"type": "null"}], "default": null, "title": "Extracted Data"}, "confidence_score": {"anyOf": [{"type": "number"}, {"type": "null"}], "default": null, "title": "Confidence Score"}, "extraction_timestamp": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Extraction Timestamp"}, "content": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Content"}}, "required": ["name"], "json_schema_extra": {"table_name": "ontologies", "entity_key_field": "name", "embedding_fields": ["content"], "fully_qualified_name": "rem.models.entities.ontology.Ontology", "tools": ["search_rem"], "default_search_table": "ontologies", "has_embeddings": true}}'::jsonb,
2229
+ 'entity',
2230
+ '{"table_name": "ontologies", "entity_key_field": "name", "embedding_fields": ["content"], "fqn": "rem.models.entities.ontology.Ontology"}'::jsonb
2231
+ )
2232
+ ON CONFLICT (id) DO UPDATE SET
2233
+ name = EXCLUDED.name,
2234
+ content = EXCLUDED.content,
2235
+ spec = EXCLUDED.spec,
2236
+ category = EXCLUDED.category,
2237
+ metadata = EXCLUDED.metadata,
2238
+ updated_at = CURRENT_TIMESTAMP;
2239
+
2240
+ -- Schema entry for OntologyConfig (ontology_configs)
2241
+ INSERT INTO schemas (id, tenant_id, name, content, spec, category, metadata)
2242
+ VALUES (
2243
+ '9a7e50d0-ef3a-5641-9ff4-b2be5a77053b'::uuid,
2244
+ 'system',
2245
+ 'OntologyConfig',
2246
+ '# OntologyConfig
2247
+
2248
+ User configuration for automatic ontology extraction.
2249
+
2250
+ Attributes:
2251
+ name: Human-readable config name
2252
+ agent_schema_id: Foreign key to Schema entity to use for extraction
2253
+ description: Purpose and scope of this config
2254
+
2255
+ # File matching rules (ANY matching rule triggers extraction)
2256
+ mime_type_pattern: Regex pattern for file MIME types (e.g., "application/pdf")
2257
+ uri_pattern: Regex pattern for file URIs (e.g., "s3://bucket/resumes/.*")
2258
+ tag_filter: List of tags (file must have ALL tags to match)
2259
+
2260
+ # Execution control
2261
+ priority: Execution order (higher = earlier, default 100)
2262
+ enabled: Whether this config is active (default True)
2263
+
2264
+ # LLM provider configuration
2265
+ provider_name: Optional LLM provider override (defaults to settings)
2266
+ model_name: Optional model override (defaults to settings)
2267
+
2268
+ Inherited from CoreModel:
2269
+ id, created_at, updated_at, deleted_at, tenant_id, user_id,
2270
+ graph_edges, metadata, tags, column
2271
+
2272
+ Example Usage:
2273
+ # CV extraction for recruitment
2274
+ cv_config = OntologyConfig(
2275
+ name="recruitment-cv-parser",
2276
+ agent_schema_id="cv-parser-v1",
2277
+ description="Extract candidate information from resumes",
2278
+ mime_type_pattern="application/pdf",
2279
+ uri_pattern=".*/resumes/.*",
2280
+ tag_filter=["cv", "candidate"],
2281
+ priority=100,
2282
+ enabled=True,
2283
+ tenant_id="acme-corp",
2284
+ tags=["recruitment", "hr"]
2285
+ )
2286
+
2287
+ # Contract analysis for legal team
2288
+ contract_config = OntologyConfig(
2289
+ name="legal-contract-analyzer",
2290
+ agent_schema_id="contract-parser-v2",
2291
+ description="Extract key terms from supplier contracts",
2292
+ mime_type_pattern="application/(pdf|msword|vnd.openxmlformats.*)",
2293
+ tag_filter=["legal", "contract"],
2294
+ priority=200, # Higher priority = runs first
2295
+ enabled=True,
2296
+ provider_name="openai", # Override default provider
2297
+ model_name="gpt-4.1",
2298
+ tenant_id="acme-corp",
2299
+ tags=["legal", "procurement"]
2300
+ )
2301
+
2302
+ # Medical records for healthcare
2303
+ medical_config = OntologyConfig(
2304
+ name="medical-records-extractor",
2305
+ agent_schema_id="medical-parser-v1",
2306
+ description="Extract diagnoses and treatments from medical records",
2307
+ mime_type_pattern="application/pdf",
2308
+ tag_filter=["medical", "patient-record"],
2309
+ priority=50,
2310
+ enabled=True,
2311
+ tenant_id="healthsystem",
2312
+ tags=["medical", "hipaa-compliant"]
2313
+ )
2314
+
2315
+
2316
+ ## Overview
2317
+
2318
+ The `OntologyConfig` entity is stored in the `ontology_configs` table. Each record is uniquely
2319
+ identified by its `name` field for lookups and graph traversal.
2320
+
2321
+ ## Search Capabilities
2322
+
2323
+ This schema includes the `search_rem` tool which supports:
2324
+ - **LOOKUP**: O(1) exact match by name (e.g., `LOOKUP "entity-name"`)
2325
+ - **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
2326
+ - **SEARCH**: Semantic vector search on description (e.g., `SEARCH "concept" FROM ontology_configs LIMIT 10`)
2327
+ - **SQL**: Complex queries (e.g., `SELECT * FROM ontology_configs WHERE ...`)
2328
+
2329
+ ## Table Info
2330
+
2331
+ | Property | Value |
2332
+ |----------|-------|
2333
+ | Table | `ontology_configs` |
2334
+ | Entity Key | `name` |
2335
+ | Embedding Fields | `description` |
2336
+ | Tools | `search_rem` |
2337
+
2338
+ ## Fields
2339
+
2340
+ ### `id`
2341
+ - **Type**: `typing.Union[uuid.UUID, str, NoneType]`
2342
+ - **Optional**
2343
+ - Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.
2344
+
2345
+ ### `created_at`
2346
+ - **Type**: `<class ''datetime.datetime''>`
2347
+ - **Optional**
2348
+ - Entity creation timestamp
2349
+
2350
+ ### `updated_at`
2351
+ - **Type**: `<class ''datetime.datetime''>`
2352
+ - **Optional**
2353
+ - Last update timestamp
2354
+
2355
+ ### `deleted_at`
2356
+ - **Type**: `typing.Optional[datetime.datetime]`
2357
+ - **Optional**
2358
+ - Soft deletion timestamp
2359
+
2360
+ ### `tenant_id`
2361
+ - **Type**: `typing.Optional[str]`
2362
+ - **Optional**
2363
+ - Tenant identifier for multi-tenancy isolation
2364
+
2365
+ ### `user_id`
2366
+ - **Type**: `typing.Optional[str]`
2367
+ - **Optional**
2368
+ - Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.
2369
+
2370
+ ### `graph_edges`
2371
+ - **Type**: `list[dict]`
2372
+ - **Optional**
2373
+ - Knowledge graph edges stored as InlineEdge dicts
2374
+
2375
+ ### `metadata`
2376
+ - **Type**: `<class ''dict''>`
2377
+ - **Optional**
2378
+ - Flexible metadata storage
2379
+
2380
+ ### `tags`
2381
+ - **Type**: `list[str]`
2382
+ - **Optional**
2383
+ - Entity tags
2384
+
2385
+ ### `name`
2386
+ - **Type**: `<class ''str''>`
2387
+ - **Required**
2388
+
2389
+ ### `agent_schema_id`
2390
+ - **Type**: `<class ''str''>`
2391
+ - **Required**
2392
+
2393
+ ### `description`
2394
+ - **Type**: `typing.Optional[str]`
2395
+ - **Optional**
2396
+
2397
+ ### `mime_type_pattern`
2398
+ - **Type**: `typing.Optional[str]`
2399
+ - **Optional**
2400
+
2401
+ ### `uri_pattern`
2402
+ - **Type**: `typing.Optional[str]`
2403
+ - **Optional**
2404
+
2405
+ ### `tag_filter`
2406
+ - **Type**: `list[str]`
2407
+ - **Optional**
2408
+
2409
+ ### `priority`
2410
+ - **Type**: `<class ''int''>`
2411
+ - **Optional**
2412
+
2413
+ ### `enabled`
2414
+ - **Type**: `<class ''bool''>`
2415
+ - **Optional**
2416
+
2417
+ ### `provider_name`
2418
+ - **Type**: `typing.Optional[str]`
2419
+ - **Optional**
2420
+
2421
+ ### `model_name`
2422
+ - **Type**: `typing.Optional[str]`
2423
+ - **Optional**
2424
+
2425
+ ',
2426
+ '{"type": "object", "description": "User configuration for automatic ontology extraction.\n\n Attributes:\n name: Human-readable config name\n agent_schema_id: Foreign key to Schema entity to use for extraction\n description: Purpose and scope of this config\n\n # File matching rules (ANY matching rule triggers extraction)\n mime_type_pattern: Regex pattern for file MIME types (e.g., \"application/pdf\")\n uri_pattern: Regex pattern for file URIs (e.g., \"s3://bucket/resumes/.*\")\n tag_filter: List of tags (file must have ALL tags to match)\n\n # Execution control\n priority: Execution order (higher = earlier, default 100)\n enabled: Whether this config is active (default True)\n\n # LLM provider configuration\n provider_name: Optional LLM provider override (defaults to settings)\n model_name: Optional model override (defaults to settings)\n\n Inherited from CoreModel:\n id, created_at, updated_at, deleted_at, tenant_id, user_id,\n graph_edges, metadata, tags, column\n\n Example Usage:\n # CV extraction for recruitment\n cv_config = OntologyConfig(\n name=\"recruitment-cv-parser\",\n agent_schema_id=\"cv-parser-v1\",\n description=\"Extract candidate information from resumes\",\n mime_type_pattern=\"application/pdf\",\n uri_pattern=\".*/resumes/.*\",\n tag_filter=[\"cv\", \"candidate\"],\n priority=100,\n enabled=True,\n tenant_id=\"acme-corp\",\n tags=[\"recruitment\", \"hr\"]\n )\n\n # Contract analysis for legal team\n contract_config = OntologyConfig(\n name=\"legal-contract-analyzer\",\n agent_schema_id=\"contract-parser-v2\",\n description=\"Extract key terms from supplier contracts\",\n mime_type_pattern=\"application/(pdf|msword|vnd.openxmlformats.*)\",\n tag_filter=[\"legal\", \"contract\"],\n priority=200, # Higher priority = runs first\n enabled=True,\n provider_name=\"openai\", # Override default provider\n model_name=\"gpt-4.1\",\n tenant_id=\"acme-corp\",\n tags=[\"legal\", \"procurement\"]\n )\n\n # Medical records for healthcare\n medical_config = OntologyConfig(\n name=\"medical-records-extractor\",\n agent_schema_id=\"medical-parser-v1\",\n description=\"Extract diagnoses and treatments from medical records\",\n mime_type_pattern=\"application/pdf\",\n tag_filter=[\"medical\", \"patient-record\"],\n priority=50,\n enabled=True,\n tenant_id=\"healthsystem\",\n tags=[\"medical\", \"hipaa-compliant\"]\n )\n \n\nThis agent can search the `ontology_configs` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"title": "Name", "type": "string"}, "agent_schema_id": {"title": "Agent Schema Id", "type": "string"}, "description": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Description"}, "mime_type_pattern": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Mime Type Pattern"}, "uri_pattern": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Uri Pattern"}, "tag_filter": {"default": [], "items": {"type": "string"}, "title": "Tag Filter", "type": "array"}, "priority": {"default": 100, "title": "Priority", "type": "integer"}, "enabled": {"default": true, "title": "Enabled", "type": "boolean"}, "provider_name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Provider Name"}, "model_name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Model Name"}}, "required": ["name", "agent_schema_id"], "json_schema_extra": {"table_name": "ontology_configs", "entity_key_field": "name", "embedding_fields": ["description"], "fully_qualified_name": "rem.models.entities.ontology_config.OntologyConfig", "tools": ["search_rem"], "default_search_table": "ontology_configs", "has_embeddings": true}}'::jsonb,
2427
+ 'entity',
2428
+ '{"table_name": "ontology_configs", "entity_key_field": "name", "embedding_fields": ["description"], "fqn": "rem.models.entities.ontology_config.OntologyConfig"}'::jsonb
2429
+ )
2430
+ ON CONFLICT (id) DO UPDATE SET
2431
+ name = EXCLUDED.name,
2432
+ content = EXCLUDED.content,
2433
+ spec = EXCLUDED.spec,
2434
+ category = EXCLUDED.category,
2435
+ metadata = EXCLUDED.metadata,
2436
+ updated_at = CURRENT_TIMESTAMP;
2437
+
2438
+ -- Schema entry for Resource (resources)
2439
+ INSERT INTO schemas (id, tenant_id, name, content, spec, category, metadata)
2440
+ VALUES (
2441
+ 'a579f379-4f1c-5414-8ff4-1382d0f783b7'::uuid,
2442
+ 'system',
2443
+ 'Resource',
2444
+ '# Resource
2445
+
2446
+
2447
+ Base content unit in REM.
2448
+
2449
+ Resources are content units that feed into dreaming workflows for moment
2450
+ extraction and affinity graph construction. Tenant isolation is provided
2451
+ via CoreModel.tenant_id field.
2452
+
2453
+
2454
+ ## Overview
2455
+
2456
+ The `Resource` entity is stored in the `resources` table. Each record is uniquely
2457
+ identified by its `name` field for lookups and graph traversal.
2458
+
2459
+ ## Search Capabilities
2460
+
2461
+ This schema includes the `search_rem` tool which supports:
2462
+ - **LOOKUP**: O(1) exact match by name (e.g., `LOOKUP "entity-name"`)
2463
+ - **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
2464
+ - **SEARCH**: Semantic vector search on content (e.g., `SEARCH "concept" FROM resources LIMIT 10`)
2465
+ - **SQL**: Complex queries (e.g., `SELECT * FROM resources WHERE ...`)
2466
+
2467
+ ## Table Info
2468
+
2469
+ | Property | Value |
2470
+ |----------|-------|
2471
+ | Table | `resources` |
2472
+ | Entity Key | `name` |
2473
+ | Embedding Fields | `content` |
2474
+ | Tools | `search_rem` |
2475
+
2476
+ ## Fields
2477
+
2478
+ ### `id`
2479
+ - **Type**: `typing.Union[uuid.UUID, str, NoneType]`
2480
+ - **Optional**
2481
+ - Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.
2482
+
2483
+ ### `created_at`
2484
+ - **Type**: `<class ''datetime.datetime''>`
2485
+ - **Optional**
2486
+ - Entity creation timestamp
2487
+
2488
+ ### `updated_at`
2489
+ - **Type**: `<class ''datetime.datetime''>`
2490
+ - **Optional**
2491
+ - Last update timestamp
2492
+
2493
+ ### `deleted_at`
2494
+ - **Type**: `typing.Optional[datetime.datetime]`
2495
+ - **Optional**
2496
+ - Soft deletion timestamp
2497
+
2498
+ ### `tenant_id`
2499
+ - **Type**: `typing.Optional[str]`
2500
+ - **Optional**
2501
+ - Tenant identifier for multi-tenancy isolation
2502
+
2503
+ ### `user_id`
2504
+ - **Type**: `typing.Optional[str]`
2505
+ - **Optional**
2506
+ - Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.
2507
+
2508
+ ### `graph_edges`
2509
+ - **Type**: `list[dict]`
2510
+ - **Optional**
2511
+ - Knowledge graph edges stored as InlineEdge dicts
2512
+
2513
+ ### `metadata`
2514
+ - **Type**: `<class ''dict''>`
2515
+ - **Optional**
2516
+ - Flexible metadata storage
2517
+
2518
+ ### `tags`
2519
+ - **Type**: `list[str]`
2520
+ - **Optional**
2521
+ - Entity tags
2522
+
2523
+ ### `name`
2524
+ - **Type**: `typing.Optional[str]`
2525
+ - **Optional**
2526
+ - Human-readable resource name (used as graph label). Auto-generated from uri+ordinal if not provided.
2527
+
2528
+ ### `uri`
2529
+ - **Type**: `typing.Optional[str]`
2530
+ - **Optional**
2531
+ - Content URI or identifier (file path, URL, etc.)
2532
+
2533
+ ### `ordinal`
2534
+ - **Type**: `<class ''int''>`
2535
+ - **Optional**
2536
+ - Chunk ordinal for splitting large documents (0 for single-chunk resources)
2537
+
2538
+ ### `content`
2539
+ - **Type**: `<class ''str''>`
2540
+ - **Optional**
2541
+ - Resource content text
2542
+
2543
+ ### `timestamp`
2544
+ - **Type**: `<class ''datetime.datetime''>`
2545
+ - **Optional**
2546
+ - Resource timestamp (content creation/publication time)
2547
+
2548
+ ### `category`
2549
+ - **Type**: `typing.Optional[str]`
2550
+ - **Optional**
2551
+ - Resource category (document, conversation, artifact, etc.)
2552
+
2553
+ ### `related_entities`
2554
+ - **Type**: `list[dict]`
2555
+ - **Optional**
2556
+ - Extracted entities (people, projects, concepts) with metadata
2557
+
2558
+ ',
2559
+ '{"type": "object", "description": "\n Base content unit in REM.\n\n Resources are content units that feed into dreaming workflows for moment\n extraction and affinity graph construction. Tenant isolation is provided\n via CoreModel.tenant_id field.\n \n\nThis agent can search the `resources` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Human-readable resource name (used as graph label). Auto-generated from uri+ordinal if not provided.", "entity_key": true, "title": "Name"}, "uri": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Content URI or identifier (file path, URL, etc.)", "title": "Uri"}, "ordinal": {"composite_key": true, "default": 0, "description": "Chunk ordinal for splitting large documents (0 for single-chunk resources)", "title": "Ordinal", "type": "integer"}, "content": {"default": "", "description": "Resource content text", "title": "Content", "type": "string"}, "timestamp": {"description": "Resource timestamp (content creation/publication time)", "format": "date-time", "title": "Timestamp", "type": "string"}, "category": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Resource category (document, conversation, artifact, etc.)", "title": "Category"}, "related_entities": {"description": "Extracted entities (people, projects, concepts) with metadata", "items": {"additionalProperties": true, "type": "object"}, "title": "Related Entities", "type": "array"}}, "required": [], "json_schema_extra": {"table_name": "resources", "entity_key_field": "name", "embedding_fields": ["content"], "fully_qualified_name": "rem.models.entities.resource.Resource", "tools": ["search_rem"], "default_search_table": "resources", "has_embeddings": true}}'::jsonb,
2560
+ 'entity',
2561
+ '{"table_name": "resources", "entity_key_field": "name", "embedding_fields": ["content"], "fqn": "rem.models.entities.resource.Resource"}'::jsonb
2562
+ )
2563
+ ON CONFLICT (id) DO UPDATE SET
2564
+ name = EXCLUDED.name,
2565
+ content = EXCLUDED.content,
2566
+ spec = EXCLUDED.spec,
2567
+ category = EXCLUDED.category,
2568
+ metadata = EXCLUDED.metadata,
2569
+ updated_at = CURRENT_TIMESTAMP;
2570
+
2571
+ -- Schema entry for Schema (schemas)
2572
+ INSERT INTO schemas (id, tenant_id, name, content, spec, category, metadata)
2573
+ VALUES (
2574
+ '2372e956-add6-58b8-a638-758a91a2b6c4'::uuid,
2575
+ 'system',
2576
+ 'Schema',
2577
+ '# Schema
2578
+
2579
+
2580
+ Agent schema definition.
2581
+
2582
+ Schemas define agents that can be dynamically loaded into Pydantic AI.
2583
+ They store JsonSchema specifications with embedded metadata for tools,
2584
+ resources, and system prompts.
2585
+
2586
+ For ontology extraction agents:
2587
+ - `provider_configs` enables multi-provider support (test across Anthropic, OpenAI, etc.)
2588
+ - `embedding_fields` specifies which output fields should be embedded for semantic search
2589
+
2590
+ Tenant isolation is provided via CoreModel.tenant_id field.
2591
+
2592
+
2593
+ ## Overview
2594
+
2595
+ The `Schema` entity is stored in the `schemas` table. Each record is uniquely
2596
+ identified by its `name` field for lookups and graph traversal.
2597
+
2598
+ ## Search Capabilities
2599
+
2600
+ This schema includes the `search_rem` tool which supports:
2601
+ - **LOOKUP**: O(1) exact match by name (e.g., `LOOKUP "entity-name"`)
2602
+ - **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
2603
+ - **SEARCH**: Semantic vector search on content (e.g., `SEARCH "concept" FROM schemas LIMIT 10`)
2604
+ - **SQL**: Complex queries (e.g., `SELECT * FROM schemas WHERE ...`)
2605
+
2606
+ ## Table Info
2607
+
2608
+ | Property | Value |
2609
+ |----------|-------|
2610
+ | Table | `schemas` |
2611
+ | Entity Key | `name` |
2612
+ | Embedding Fields | `content` |
2613
+ | Tools | `search_rem` |
2614
+
2615
+ ## Fields
2616
+
2617
+ ### `id`
2618
+ - **Type**: `typing.Union[uuid.UUID, str, NoneType]`
2619
+ - **Optional**
2620
+ - Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.
2621
+
2622
+ ### `created_at`
2623
+ - **Type**: `<class ''datetime.datetime''>`
2624
+ - **Optional**
2625
+ - Entity creation timestamp
2626
+
2627
+ ### `updated_at`
2628
+ - **Type**: `<class ''datetime.datetime''>`
2629
+ - **Optional**
2630
+ - Last update timestamp
2631
+
2632
+ ### `deleted_at`
2633
+ - **Type**: `typing.Optional[datetime.datetime]`
2634
+ - **Optional**
2635
+ - Soft deletion timestamp
2636
+
2637
+ ### `tenant_id`
2638
+ - **Type**: `typing.Optional[str]`
2639
+ - **Optional**
2640
+ - Tenant identifier for multi-tenancy isolation
2641
+
2642
+ ### `user_id`
2643
+ - **Type**: `typing.Optional[str]`
2644
+ - **Optional**
2645
+ - Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.
2646
+
2647
+ ### `graph_edges`
2648
+ - **Type**: `list[dict]`
2649
+ - **Optional**
2650
+ - Knowledge graph edges stored as InlineEdge dicts
2651
+
2652
+ ### `metadata`
2653
+ - **Type**: `<class ''dict''>`
2654
+ - **Optional**
2655
+ - Flexible metadata storage
2656
+
2657
+ ### `tags`
2658
+ - **Type**: `list[str]`
2659
+ - **Optional**
2660
+ - Entity tags
2661
+
2662
+ ### `name`
2663
+ - **Type**: `<class ''str''>`
2664
+ - **Required**
2665
+ - Human-readable schema name (used as identifier)
2666
+
2667
+ ### `content`
2668
+ - **Type**: `<class ''str''>`
2669
+ - **Optional**
2670
+ - Markdown documentation and instructions for the schema
2671
+
2672
+ ### `spec`
2673
+ - **Type**: `<class ''dict''>`
2674
+ - **Required**
2675
+ - JsonSchema specification defining the agent structure and capabilities
2676
+
2677
+ ### `category`
2678
+ - **Type**: `typing.Optional[str]`
2679
+ - **Optional**
2680
+ - Schema category distinguishing schema types. Values: ''agent'' (AI agents), ''evaluator'' (LLM-as-a-Judge evaluators). Maps directly from json_schema_extra.kind field during ingestion.
2681
+
2682
+ ### `provider_configs`
2683
+ - **Type**: `list[dict]`
2684
+ - **Optional**
2685
+ - Optional provider configurations for multi-provider testing. Each dict has ''provider_name'' and ''model_name''. Example: [{''provider_name'': ''anthropic'', ''model_name'': ''claude-sonnet-4-5''}]
2686
+
2687
+ ### `embedding_fields`
2688
+ - **Type**: `list[str]`
2689
+ - **Optional**
2690
+ - JSON paths in extracted_data to embed for semantic search. Example: [''summary'', ''candidate_name'', ''skills''] for CV extraction. Values will be concatenated and embedded using configured embedding provider.
2691
+
2692
+ ',
2693
+ '{"type": "object", "description": "\n Agent schema definition.\n\n Schemas define agents that can be dynamically loaded into Pydantic AI.\n They store JsonSchema specifications with embedded metadata for tools,\n resources, and system prompts.\n\n For ontology extraction agents:\n - `provider_configs` enables multi-provider support (test across Anthropic, OpenAI, etc.)\n - `embedding_fields` specifies which output fields should be embedded for semantic search\n\n Tenant isolation is provided via CoreModel.tenant_id field.\n \n\nThis agent can search the `schemas` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"description": "Human-readable schema name (used as identifier)", "title": "Name", "type": "string"}, "content": {"default": "", "description": "Markdown documentation and instructions for the schema", "title": "Content", "type": "string"}, "spec": {"additionalProperties": true, "description": "JsonSchema specification defining the agent structure and capabilities", "title": "Spec", "type": "object"}, "category": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Schema category distinguishing schema types. Values: ''agent'' (AI agents), ''evaluator'' (LLM-as-a-Judge evaluators). Maps directly from json_schema_extra.kind field during ingestion.", "title": "Category"}, "provider_configs": {"description": "Optional provider configurations for multi-provider testing. Each dict has ''provider_name'' and ''model_name''. Example: [{''provider_name'': ''anthropic'', ''model_name'': ''claude-sonnet-4-5''}]", "items": {"additionalProperties": true, "type": "object"}, "title": "Provider Configs", "type": "array"}, "embedding_fields": {"description": "JSON paths in extracted_data to embed for semantic search. Example: [''summary'', ''candidate_name'', ''skills''] for CV extraction. Values will be concatenated and embedded using configured embedding provider.", "items": {"type": "string"}, "title": "Embedding Fields", "type": "array"}}, "required": ["name", "spec"], "json_schema_extra": {"table_name": "schemas", "entity_key_field": "name", "embedding_fields": ["content"], "fully_qualified_name": "rem.models.entities.schema.Schema", "tools": ["search_rem"], "default_search_table": "schemas", "has_embeddings": true}}'::jsonb,
2694
+ 'entity',
2695
+ '{"table_name": "schemas", "entity_key_field": "name", "embedding_fields": ["content"], "fqn": "rem.models.entities.schema.Schema"}'::jsonb
2696
+ )
2697
+ ON CONFLICT (id) DO UPDATE SET
2698
+ name = EXCLUDED.name,
2699
+ content = EXCLUDED.content,
2700
+ spec = EXCLUDED.spec,
2701
+ category = EXCLUDED.category,
2702
+ metadata = EXCLUDED.metadata,
2703
+ updated_at = CURRENT_TIMESTAMP;
2704
+
2705
+ -- Schema entry for Session (sessions)
2706
+ INSERT INTO schemas (id, tenant_id, name, content, spec, category, metadata)
2707
+ VALUES (
2708
+ '5893fbca-2d8e-5402-ac41-7bac2c0c472a'::uuid,
2709
+ 'system',
2710
+ 'Session',
2711
+ '# Session
2712
+
2713
+
2714
+ Conversation session container.
2715
+
2716
+ Groups messages together and supports different modes for normal conversations
2717
+ and evaluation/experimentation scenarios.
2718
+
2719
+ For evaluation sessions, stores:
2720
+ - original_trace_id: Reference to the original session being evaluated
2721
+ - settings_overrides: Model, temperature, prompt overrides
2722
+ - prompt: Custom prompt being tested
2723
+
2724
+ Default sessions are lightweight - just a session_id on messages.
2725
+ Special sessions store additional metadata for experiments.
2726
+
2727
+
2728
+ ## Overview
2729
+
2730
+ The `Session` entity is stored in the `sessions` table. Each record is uniquely
2731
+ identified by its `name` field for lookups and graph traversal.
2732
+
2733
+ ## Search Capabilities
2734
+
2735
+ This schema includes the `search_rem` tool which supports:
2736
+ - **LOOKUP**: O(1) exact match by name (e.g., `LOOKUP "entity-name"`)
2737
+ - **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
2738
+ - **SEARCH**: Semantic vector search on description (e.g., `SEARCH "concept" FROM sessions LIMIT 10`)
2739
+ - **SQL**: Complex queries (e.g., `SELECT * FROM sessions WHERE ...`)
2740
+
2741
+ ## Table Info
2742
+
2743
+ | Property | Value |
2744
+ |----------|-------|
2745
+ | Table | `sessions` |
2746
+ | Entity Key | `name` |
2747
+ | Embedding Fields | `description` |
2748
+ | Tools | `search_rem` |
2749
+
2750
+ ## Fields
2751
+
2752
+ ### `id`
2753
+ - **Type**: `typing.Union[uuid.UUID, str, NoneType]`
2754
+ - **Optional**
2755
+ - Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.
2756
+
2757
+ ### `created_at`
2758
+ - **Type**: `<class ''datetime.datetime''>`
2759
+ - **Optional**
2760
+ - Entity creation timestamp
2761
+
2762
+ ### `updated_at`
2763
+ - **Type**: `<class ''datetime.datetime''>`
2764
+ - **Optional**
2765
+ - Last update timestamp
2766
+
2767
+ ### `deleted_at`
2768
+ - **Type**: `typing.Optional[datetime.datetime]`
2769
+ - **Optional**
2770
+ - Soft deletion timestamp
2771
+
2772
+ ### `tenant_id`
2773
+ - **Type**: `typing.Optional[str]`
2774
+ - **Optional**
2775
+ - Tenant identifier for multi-tenancy isolation
2776
+
2777
+ ### `user_id`
2778
+ - **Type**: `typing.Optional[str]`
2779
+ - **Optional**
2780
+ - Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.
2781
+
2782
+ ### `graph_edges`
2783
+ - **Type**: `list[dict]`
2784
+ - **Optional**
2785
+ - Knowledge graph edges stored as InlineEdge dicts
2786
+
2787
+ ### `metadata`
2788
+ - **Type**: `<class ''dict''>`
2789
+ - **Optional**
2790
+ - Flexible metadata storage
2791
+
2792
+ ### `tags`
2793
+ - **Type**: `list[str]`
2794
+ - **Optional**
2795
+ - Entity tags
2796
+
2797
+ ### `name`
2798
+ - **Type**: `<class ''str''>`
2799
+ - **Required**
2800
+ - Session name/identifier
2801
+
2802
+ ### `mode`
2803
+ - **Type**: `<enum ''SessionMode''>`
2804
+ - **Optional**
2805
+ - Session mode: ''normal'' or ''evaluation''
2806
+
2807
+ ### `description`
2808
+ - **Type**: `str | None`
2809
+ - **Optional**
2810
+ - Optional session description
2811
+
2812
+ ### `original_trace_id`
2813
+ - **Type**: `str | None`
2814
+ - **Optional**
2815
+ - For evaluation mode: ID of the original session/trace being evaluated
2816
+
2817
+ ### `settings_overrides`
2818
+ - **Type**: `dict | None`
2819
+ - **Optional**
2820
+ - Settings overrides (model, temperature, max_tokens, system_prompt)
2821
+
2822
+ ### `prompt`
2823
+ - **Type**: `str | None`
2824
+ - **Optional**
2825
+ - Custom prompt for this session (can override agent prompt)
2826
+
2827
+ ### `agent_schema_uri`
2828
+ - **Type**: `str | None`
2829
+ - **Optional**
2830
+ - Agent schema used for this session
2831
+
2832
+ ### `message_count`
2833
+ - **Type**: `<class ''int''>`
2834
+ - **Optional**
2835
+ - Number of messages in this session
2836
+
2837
+ ### `total_tokens`
2838
+ - **Type**: `int | None`
2839
+ - **Optional**
2840
+ - Total tokens used in this session
2841
+
2842
+ ',
2843
+ '{"type": "object", "description": "\n Conversation session container.\n\n Groups messages together and supports different modes for normal conversations\n and evaluation/experimentation scenarios.\n\n For evaluation sessions, stores:\n - original_trace_id: Reference to the original session being evaluated\n - settings_overrides: Model, temperature, prompt overrides\n - prompt: Custom prompt being tested\n\n Default sessions are lightweight - just a session_id on messages.\n Special sessions store additional metadata for experiments.\n \n\nThis agent can search the `sessions` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"description": "Session name/identifier", "entity_key": true, "title": "Name", "type": "string"}, "mode": {"$ref": "#/$defs/SessionMode", "default": "normal", "description": "Session mode: ''normal'' or ''evaluation''"}, "description": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Optional session description", "title": "Description"}, "original_trace_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "For evaluation mode: ID of the original session/trace being evaluated", "title": "Original Trace Id"}, "settings_overrides": {"anyOf": [{"additionalProperties": true, "type": "object"}, {"type": "null"}], "default": null, "description": "Settings overrides (model, temperature, max_tokens, system_prompt)", "title": "Settings Overrides"}, "prompt": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Custom prompt for this session (can override agent prompt)", "title": "Prompt"}, "agent_schema_uri": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Agent schema used for this session", "title": "Agent Schema Uri"}, "message_count": {"default": 0, "description": "Number of messages in this session", "title": "Message Count", "type": "integer"}, "total_tokens": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "description": "Total tokens used in this session", "title": "Total Tokens"}}, "required": ["name"], "json_schema_extra": {"table_name": "sessions", "entity_key_field": "name", "embedding_fields": ["description"], "fully_qualified_name": "rem.models.entities.session.Session", "tools": ["search_rem"], "default_search_table": "sessions", "has_embeddings": true}}'::jsonb,
2844
+ 'entity',
2845
+ '{"table_name": "sessions", "entity_key_field": "name", "embedding_fields": ["description"], "fqn": "rem.models.entities.session.Session"}'::jsonb
2846
+ )
2847
+ ON CONFLICT (id) DO UPDATE SET
2848
+ name = EXCLUDED.name,
2849
+ content = EXCLUDED.content,
2850
+ spec = EXCLUDED.spec,
2851
+ category = EXCLUDED.category,
2852
+ metadata = EXCLUDED.metadata,
2853
+ updated_at = CURRENT_TIMESTAMP;
2854
+
2855
+ -- Schema entry for SharedSession (shared_sessions)
2856
+ INSERT INTO schemas (id, tenant_id, name, content, spec, category, metadata)
2857
+ VALUES (
2858
+ 'be5c5711-6c45-5fc4-9cd1-e076599261c7'::uuid,
2859
+ 'system',
2860
+ 'SharedSession',
2861
+ '# SharedSession
2862
+
2863
+
2864
+ Session sharing record between users.
2865
+
2866
+ Links a session (identified by session_id from Message records) to a
2867
+ recipient user, enabling collaborative access to conversation history.
2868
+
2869
+
2870
+ ## Overview
2871
+
2872
+ The `SharedSession` entity is stored in the `shared_sessions` table. Each record is uniquely
2873
+ identified by its `id` field for lookups and graph traversal.
2874
+
2875
+ ## Search Capabilities
2876
+
2877
+ This schema includes the `search_rem` tool which supports:
2878
+ - **LOOKUP**: O(1) exact match by id (e.g., `LOOKUP "entity-name"`)
2879
+ - **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
2880
+ - **SEARCH**: Semantic vector search on content (e.g., `SEARCH "concept" FROM shared_sessions LIMIT 10`)
2881
+ - **SQL**: Complex queries (e.g., `SELECT * FROM shared_sessions WHERE ...`)
2882
+
2883
+ ## Table Info
2884
+
2885
+ | Property | Value |
2886
+ |----------|-------|
2887
+ | Table | `shared_sessions` |
2888
+ | Entity Key | `id` |
2889
+ | Embedding Fields | None |
2890
+ | Tools | `search_rem` |
2891
+
2892
+ ## Fields
2893
+
2894
+ ### `id`
2895
+ - **Type**: `typing.Union[uuid.UUID, str, NoneType]`
2896
+ - **Optional**
2897
+ - Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.
2898
+
2899
+ ### `created_at`
2900
+ - **Type**: `<class ''datetime.datetime''>`
2901
+ - **Optional**
2902
+ - Entity creation timestamp
2903
+
2904
+ ### `updated_at`
2905
+ - **Type**: `<class ''datetime.datetime''>`
2906
+ - **Optional**
2907
+ - Last update timestamp
2908
+
2909
+ ### `deleted_at`
2910
+ - **Type**: `typing.Optional[datetime.datetime]`
2911
+ - **Optional**
2912
+ - Soft deletion timestamp
2913
+
2914
+ ### `tenant_id`
2915
+ - **Type**: `typing.Optional[str]`
2916
+ - **Optional**
2917
+ - Tenant identifier for multi-tenancy isolation
2918
+
2919
+ ### `user_id`
2920
+ - **Type**: `typing.Optional[str]`
2921
+ - **Optional**
2922
+ - Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.
2923
+
2924
+ ### `graph_edges`
2925
+ - **Type**: `list[dict]`
2926
+ - **Optional**
2927
+ - Knowledge graph edges stored as InlineEdge dicts
2928
+
2929
+ ### `metadata`
2930
+ - **Type**: `<class ''dict''>`
2931
+ - **Optional**
2932
+ - Flexible metadata storage
2933
+
2934
+ ### `tags`
2935
+ - **Type**: `list[str]`
2936
+ - **Optional**
2937
+ - Entity tags
2938
+
2939
+ ### `session_id`
2940
+ - **Type**: `<class ''str''>`
2941
+ - **Required**
2942
+ - The session being shared (matches Message.session_id)
2943
+
2944
+ ### `owner_user_id`
2945
+ - **Type**: `<class ''str''>`
2946
+ - **Required**
2947
+ - User ID of the session owner (the sharer)
2948
+
2949
+ ### `shared_with_user_id`
2950
+ - **Type**: `<class ''str''>`
2951
+ - **Required**
2952
+ - User ID of the recipient (who can now view the session)
2953
+
2954
+ ',
2955
+ '{"type": "object", "description": "\n Session sharing record between users.\n\n Links a session (identified by session_id from Message records) to a\n recipient user, enabling collaborative access to conversation history.\n \n\nThis agent can search the `shared_sessions` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "session_id": {"description": "The session being shared (matches Message.session_id)", "title": "Session Id", "type": "string"}, "owner_user_id": {"description": "User ID of the session owner (the sharer)", "title": "Owner User Id", "type": "string"}, "shared_with_user_id": {"description": "User ID of the recipient (who can now view the session)", "title": "Shared With User Id", "type": "string"}}, "required": ["session_id", "owner_user_id", "shared_with_user_id"], "json_schema_extra": {"table_name": "shared_sessions", "entity_key_field": "id", "embedding_fields": [], "fully_qualified_name": "rem.models.entities.shared_session.SharedSession", "tools": ["search_rem"], "default_search_table": "shared_sessions", "has_embeddings": false}}'::jsonb,
2956
+ 'entity',
2957
+ '{"table_name": "shared_sessions", "entity_key_field": "id", "embedding_fields": [], "fqn": "rem.models.entities.shared_session.SharedSession"}'::jsonb
2958
+ )
2959
+ ON CONFLICT (id) DO UPDATE SET
2960
+ name = EXCLUDED.name,
2961
+ content = EXCLUDED.content,
2962
+ spec = EXCLUDED.spec,
2963
+ category = EXCLUDED.category,
2964
+ metadata = EXCLUDED.metadata,
2965
+ updated_at = CURRENT_TIMESTAMP;
2966
+
2967
+ -- Schema entry for User (users)
2968
+ INSERT INTO schemas (id, tenant_id, name, content, spec, category, metadata)
2969
+ VALUES (
2970
+ '1ad3d95e-32e9-54d6-ad7d-e39b9ed5018b'::uuid,
2971
+ 'system',
2972
+ 'User',
2973
+ '# User
2974
+
2975
+
2976
+ User entity.
2977
+
2978
+ Represents people in the REM system, either as active users
2979
+ or entities extracted from content. Tenant isolation is provided
2980
+ via CoreModel.tenant_id field.
2981
+
2982
+ Enhanced by dreaming worker:
2983
+ - summary: Generated from activity analysis
2984
+ - interests: Extracted from resources and sessions
2985
+ - activity_level: Computed from recent engagement
2986
+ - preferred_topics: Extracted from moment/resource topics
2987
+
2988
+
2989
+ ## Overview
2990
+
2991
+ The `User` entity is stored in the `users` table. Each record is uniquely
2992
+ identified by its `name` field for lookups and graph traversal.
2993
+
2994
+ ## Search Capabilities
2995
+
2996
+ This schema includes the `search_rem` tool which supports:
2997
+ - **LOOKUP**: O(1) exact match by name (e.g., `LOOKUP "entity-name"`)
2998
+ - **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
2999
+ - **SEARCH**: Semantic vector search on summary (e.g., `SEARCH "concept" FROM users LIMIT 10`)
3000
+ - **SQL**: Complex queries (e.g., `SELECT * FROM users WHERE ...`)
3001
+
3002
+ ## Table Info
3003
+
3004
+ | Property | Value |
3005
+ |----------|-------|
3006
+ | Table | `users` |
3007
+ | Entity Key | `name` |
3008
+ | Embedding Fields | `summary` |
3009
+ | Tools | `search_rem` |
3010
+
3011
+ ## Fields
3012
+
3013
+ ### `id`
3014
+ - **Type**: `typing.Union[uuid.UUID, str, NoneType]`
3015
+ - **Optional**
3016
+ - Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.
3017
+
3018
+ ### `created_at`
3019
+ - **Type**: `<class ''datetime.datetime''>`
3020
+ - **Optional**
3021
+ - Entity creation timestamp
3022
+
3023
+ ### `updated_at`
3024
+ - **Type**: `<class ''datetime.datetime''>`
3025
+ - **Optional**
3026
+ - Last update timestamp
3027
+
3028
+ ### `deleted_at`
3029
+ - **Type**: `typing.Optional[datetime.datetime]`
3030
+ - **Optional**
3031
+ - Soft deletion timestamp
3032
+
3033
+ ### `tenant_id`
3034
+ - **Type**: `typing.Optional[str]`
3035
+ - **Optional**
3036
+ - Tenant identifier for multi-tenancy isolation
3037
+
3038
+ ### `user_id`
3039
+ - **Type**: `typing.Optional[str]`
3040
+ - **Optional**
3041
+ - Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.
3042
+
3043
+ ### `graph_edges`
3044
+ - **Type**: `list[dict]`
3045
+ - **Optional**
3046
+ - Knowledge graph edges stored as InlineEdge dicts
3047
+
3048
+ ### `metadata`
3049
+ - **Type**: `<class ''dict''>`
3050
+ - **Optional**
3051
+ - Flexible metadata storage
3052
+
3053
+ ### `tags`
3054
+ - **Type**: `list[str]`
3055
+ - **Optional**
3056
+ - Entity tags
3057
+
3058
+ ### `name`
3059
+ - **Type**: `<class ''str''>`
3060
+ - **Required**
3061
+ - User name (human-readable, used as graph label)
3062
+
3063
+ ### `email`
3064
+ - **Type**: `typing.Optional[str]`
3065
+ - **Optional**
3066
+ - User email address
3067
+
3068
+ ### `role`
3069
+ - **Type**: `typing.Optional[str]`
3070
+ - **Optional**
3071
+ - User role (employee, contractor, external, etc.)
3072
+
3073
+ ### `tier`
3074
+ - **Type**: `<enum ''UserTier''>`
3075
+ - **Optional**
3076
+ - User subscription tier (free, basic, pro) for feature gating
3077
+
3078
+ ### `anonymous_ids`
3079
+ - **Type**: `list[str]`
3080
+ - **Optional**
3081
+ - Linked anonymous session IDs used for merging history
3082
+
3083
+ ### `sec_policy`
3084
+ - **Type**: `<class ''dict''>`
3085
+ - **Optional**
3086
+ - Security policy configuration (JSON, extensible for custom policies)
3087
+
3088
+ ### `summary`
3089
+ - **Type**: `typing.Optional[str]`
3090
+ - **Optional**
3091
+ - LLM-generated user profile summary (updated by dreaming worker)
3092
+
3093
+ ### `interests`
3094
+ - **Type**: `list[str]`
3095
+ - **Optional**
3096
+ - User interests extracted from activity
3097
+
3098
+ ### `preferred_topics`
3099
+ - **Type**: `list[str]`
3100
+ - **Optional**
3101
+ - Frequently discussed topics in kebab-case
3102
+
3103
+ ### `activity_level`
3104
+ - **Type**: `typing.Optional[str]`
3105
+ - **Optional**
3106
+ - Activity level: active, moderate, inactive
3107
+
3108
+ ### `last_active_at`
3109
+ - **Type**: `typing.Optional[datetime.datetime]`
3110
+ - **Optional**
3111
+ - Last activity timestamp
3112
+
3113
+ ',
3114
+ '{"type": "object", "description": "\n User entity.\n\n Represents people in the REM system, either as active users\n or entities extracted from content. Tenant isolation is provided\n via CoreModel.tenant_id field.\n\n Enhanced by dreaming worker:\n - summary: Generated from activity analysis\n - interests: Extracted from resources and sessions\n - activity_level: Computed from recent engagement\n - preferred_topics: Extracted from moment/resource topics\n \n\nThis agent can search the `users` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"description": "User name (human-readable, used as graph label)", "entity_key": true, "title": "Name", "type": "string"}, "email": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "User email address", "title": "Email"}, "role": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "User role (employee, contractor, external, etc.)", "title": "Role"}, "tier": {"$ref": "#/$defs/UserTier", "default": "free", "description": "User subscription tier (free, basic, pro) for feature gating"}, "anonymous_ids": {"description": "Linked anonymous session IDs used for merging history", "items": {"type": "string"}, "title": "Anonymous Ids", "type": "array"}, "sec_policy": {"additionalProperties": true, "description": "Security policy configuration (JSON, extensible for custom policies)", "title": "Sec Policy", "type": "object"}, "summary": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "LLM-generated user profile summary (updated by dreaming worker)", "title": "Summary"}, "interests": {"description": "User interests extracted from activity", "items": {"type": "string"}, "title": "Interests", "type": "array"}, "preferred_topics": {"description": "Frequently discussed topics in kebab-case", "items": {"type": "string"}, "title": "Preferred Topics", "type": "array"}, "activity_level": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Activity level: active, moderate, inactive", "title": "Activity Level"}, "last_active_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Last activity timestamp", "title": "Last Active At"}}, "required": ["name"], "json_schema_extra": {"table_name": "users", "entity_key_field": "name", "embedding_fields": ["summary"], "fully_qualified_name": "rem.models.entities.user.User", "tools": ["search_rem"], "default_search_table": "users", "has_embeddings": true}}'::jsonb,
3115
+ 'entity',
3116
+ '{"table_name": "users", "entity_key_field": "name", "embedding_fields": ["summary"], "fqn": "rem.models.entities.user.User"}'::jsonb
3117
+ )
3118
+ ON CONFLICT (id) DO UPDATE SET
3119
+ name = EXCLUDED.name,
3120
+ content = EXCLUDED.content,
3121
+ spec = EXCLUDED.spec,
3122
+ category = EXCLUDED.category,
3123
+ metadata = EXCLUDED.metadata,
3124
+ updated_at = CURRENT_TIMESTAMP;
3125
+
3126
+ -- ============================================================================
3127
+ -- RECORD MIGRATION
3128
+ -- ============================================================================
3129
+
3130
+ INSERT INTO rem_migrations (name, type, version)
3131
+ VALUES ('install_models.sql', 'models', '1.0.0')
3132
+ ON CONFLICT (name) DO UPDATE
3133
+ SET applied_at = CURRENT_TIMESTAMP,
3134
+ applied_by = CURRENT_USER;
3135
+
3136
+ DO $$
3137
+ BEGIN
3138
+ RAISE NOTICE '============================================================';
3139
+ RAISE NOTICE 'REM Model Schema Applied: 12 tables';
3140
+ RAISE NOTICE '============================================================';
3141
+ RAISE NOTICE ' ✓ feedbacks';
3142
+ RAISE NOTICE ' ✓ files (1 embeddable fields)';
3143
+ RAISE NOTICE ' ✓ image_resources (1 embeddable fields)';
3144
+ RAISE NOTICE ' ✓ messages (1 embeddable fields)';
3145
+ RAISE NOTICE ' ✓ moments (1 embeddable fields)';
3146
+ RAISE NOTICE ' ✓ ontologies (1 embeddable fields)';
3147
+ RAISE NOTICE ' ✓ ontology_configs (1 embeddable fields)';
3148
+ RAISE NOTICE ' ✓ resources (1 embeddable fields)';
3149
+ RAISE NOTICE ' ✓ schemas (1 embeddable fields)';
3150
+ RAISE NOTICE ' ✓ sessions (1 embeddable fields)';
3151
+ RAISE NOTICE ' ✓ shared_sessions';
3152
+ RAISE NOTICE ' ✓ users (1 embeddable fields)';
3153
+ RAISE NOTICE '';
3154
+ RAISE NOTICE 'Next: Run background indexes if needed';
3155
+ RAISE NOTICE ' rem db migrate --background-indexes';
3156
+ RAISE NOTICE '============================================================';
3157
+ END $$;