remdb 0.3.114__py3-none-any.whl → 0.3.127__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

Files changed (41) hide show
  1. rem/agentic/agents/sse_simulator.py +2 -0
  2. rem/agentic/context.py +23 -3
  3. rem/agentic/mcp/tool_wrapper.py +29 -3
  4. rem/agentic/otel/setup.py +1 -0
  5. rem/agentic/providers/pydantic_ai.py +26 -2
  6. rem/api/main.py +4 -1
  7. rem/api/mcp_router/server.py +9 -3
  8. rem/api/mcp_router/tools.py +324 -2
  9. rem/api/routers/admin.py +218 -1
  10. rem/api/routers/chat/completions.py +250 -4
  11. rem/api/routers/chat/models.py +81 -7
  12. rem/api/routers/chat/otel_utils.py +33 -0
  13. rem/api/routers/chat/sse_events.py +17 -1
  14. rem/api/routers/chat/streaming.py +35 -1
  15. rem/api/routers/feedback.py +134 -14
  16. rem/api/routers/query.py +6 -3
  17. rem/cli/commands/README.md +42 -0
  18. rem/cli/commands/cluster.py +617 -168
  19. rem/cli/commands/configure.py +1 -3
  20. rem/cli/commands/db.py +66 -22
  21. rem/cli/commands/experiments.py +242 -26
  22. rem/cli/commands/schema.py +6 -5
  23. rem/config.py +8 -1
  24. rem/services/phoenix/client.py +59 -18
  25. rem/services/postgres/diff_service.py +108 -3
  26. rem/services/postgres/schema_generator.py +205 -4
  27. rem/services/session/compression.py +7 -0
  28. rem/settings.py +150 -18
  29. rem/sql/migrations/001_install.sql +156 -0
  30. rem/sql/migrations/002_install_models.sql +1864 -1
  31. rem/sql/migrations/004_cache_system.sql +548 -0
  32. rem/utils/__init__.py +18 -0
  33. rem/utils/schema_loader.py +94 -3
  34. rem/utils/sql_paths.py +146 -0
  35. rem/workers/__init__.py +3 -1
  36. rem/workers/db_listener.py +579 -0
  37. rem/workers/unlogged_maintainer.py +463 -0
  38. {remdb-0.3.114.dist-info → remdb-0.3.127.dist-info}/METADATA +213 -177
  39. {remdb-0.3.114.dist-info → remdb-0.3.127.dist-info}/RECORD +41 -36
  40. {remdb-0.3.114.dist-info → remdb-0.3.127.dist-info}/WHEEL +0 -0
  41. {remdb-0.3.114.dist-info → remdb-0.3.127.dist-info}/entry_points.txt +0 -0
@@ -1,7 +1,7 @@
1
1
  -- REM Model Schema (install_models.sql)
2
2
  -- Generated from Pydantic models
3
3
  -- Source: model registry
4
- -- Generated at: 2025-11-29T11:08:16.713884
4
+ -- Generated at: 2025-11-29T18:45:11.372432
5
5
  --
6
6
  -- DO NOT EDIT MANUALLY - Regenerate with: rem db schema generate
7
7
  --
@@ -10,6 +10,7 @@
10
10
  -- 2. Embeddings tables (embeddings_<table>)
11
11
  -- 3. KV_STORE triggers for cache maintenance
12
12
  -- 4. Indexes (foreground only, background indexes separate)
13
+ -- 5. Schema table entries (for agent-like table access)
13
14
 
14
15
  -- ============================================================================
15
16
  -- PREREQUISITES CHECK
@@ -1232,6 +1233,1868 @@ CREATE TRIGGER trg_users_kv_store
1232
1233
  AFTER INSERT OR UPDATE OR DELETE ON users
1233
1234
  FOR EACH ROW EXECUTE FUNCTION fn_users_kv_store_upsert();
1234
1235
 
1236
+ -- ============================================================================
1237
+ -- SCHEMA TABLE ENTRIES
1238
+ -- Every entity table gets a schemas entry for agent-like access
1239
+ -- ============================================================================
1240
+
1241
+ -- Schema entry for Feedback (feedbacks)
1242
+ INSERT INTO schemas (id, tenant_id, name, content, spec, category, metadata)
1243
+ VALUES (
1244
+ 'ae554853-e743-5d73-a2db-1ce20e7089fe'::uuid,
1245
+ 'system',
1246
+ 'Feedback',
1247
+ '# Feedback
1248
+
1249
+
1250
+ User feedback on a message or session.
1251
+
1252
+ Captures structured feedback including:
1253
+ - Rating (1-5 scale or thumbs up/down)
1254
+ - Categories (predefined or custom)
1255
+ - Free-text comment
1256
+ - Trace reference for OTEL/Phoenix integration
1257
+
1258
+ The feedback can be attached to:
1259
+ - A specific message (message_id set)
1260
+ - An entire session (session_id set, message_id null)
1261
+
1262
+
1263
+ ## Overview
1264
+
1265
+ The `Feedback` entity is stored in the `feedbacks` table. Each record is uniquely
1266
+ identified by its `id` field for lookups and graph traversal.
1267
+
1268
+ ## Search Capabilities
1269
+
1270
+ This schema includes the `search_rem` tool which supports:
1271
+ - **LOOKUP**: O(1) exact match by id (e.g., `LOOKUP "entity-name"`)
1272
+ - **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
1273
+ - **SEARCH**: Semantic vector search on content (e.g., `SEARCH "concept" FROM feedbacks LIMIT 10`)
1274
+ - **SQL**: Complex queries (e.g., `SELECT * FROM feedbacks WHERE ...`)
1275
+
1276
+ ## Table Info
1277
+
1278
+ | Property | Value |
1279
+ |----------|-------|
1280
+ | Table | `feedbacks` |
1281
+ | Entity Key | `id` |
1282
+ | Embedding Fields | None |
1283
+ | Tools | `search_rem` |
1284
+
1285
+ ## Fields
1286
+
1287
+ ### `id`
1288
+ - **Type**: `typing.Union[uuid.UUID, str, NoneType]`
1289
+ - **Optional**
1290
+ - Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.
1291
+
1292
+ ### `created_at`
1293
+ - **Type**: `<class ''datetime.datetime''>`
1294
+ - **Optional**
1295
+ - Entity creation timestamp
1296
+
1297
+ ### `updated_at`
1298
+ - **Type**: `<class ''datetime.datetime''>`
1299
+ - **Optional**
1300
+ - Last update timestamp
1301
+
1302
+ ### `deleted_at`
1303
+ - **Type**: `typing.Optional[datetime.datetime]`
1304
+ - **Optional**
1305
+ - Soft deletion timestamp
1306
+
1307
+ ### `tenant_id`
1308
+ - **Type**: `typing.Optional[str]`
1309
+ - **Optional**
1310
+ - Tenant identifier for multi-tenancy isolation
1311
+
1312
+ ### `user_id`
1313
+ - **Type**: `typing.Optional[str]`
1314
+ - **Optional**
1315
+ - Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.
1316
+
1317
+ ### `graph_edges`
1318
+ - **Type**: `list[dict]`
1319
+ - **Optional**
1320
+ - Knowledge graph edges stored as InlineEdge dicts
1321
+
1322
+ ### `metadata`
1323
+ - **Type**: `<class ''dict''>`
1324
+ - **Optional**
1325
+ - Flexible metadata storage
1326
+
1327
+ ### `tags`
1328
+ - **Type**: `list[str]`
1329
+ - **Optional**
1330
+ - Entity tags
1331
+
1332
+ ### `session_id`
1333
+ - **Type**: `<class ''str''>`
1334
+ - **Required**
1335
+ - Session ID this feedback relates to
1336
+
1337
+ ### `message_id`
1338
+ - **Type**: `str | None`
1339
+ - **Optional**
1340
+ - Specific message ID (null for session-level feedback)
1341
+
1342
+ ### `rating`
1343
+ - **Type**: `int | None`
1344
+ - **Optional**
1345
+ - Rating: -1 (thumbs down), 1 (thumbs up), or 1-5 scale
1346
+
1347
+ ### `categories`
1348
+ - **Type**: `list[str]`
1349
+ - **Optional**
1350
+ - Selected feedback categories (from FeedbackCategory or custom)
1351
+
1352
+ ### `comment`
1353
+ - **Type**: `str | None`
1354
+ - **Optional**
1355
+ - Optional free-text feedback comment
1356
+
1357
+ ### `trace_id`
1358
+ - **Type**: `str | None`
1359
+ - **Optional**
1360
+ - OTEL trace ID for linking to observability
1361
+
1362
+ ### `span_id`
1363
+ - **Type**: `str | None`
1364
+ - **Optional**
1365
+ - OTEL span ID for specific span feedback
1366
+
1367
+ ### `phoenix_synced`
1368
+ - **Type**: `<class ''bool''>`
1369
+ - **Optional**
1370
+ - Whether feedback has been synced to Phoenix as annotation
1371
+
1372
+ ### `phoenix_annotation_id`
1373
+ - **Type**: `str | None`
1374
+ - **Optional**
1375
+ - Phoenix annotation ID after sync
1376
+
1377
+ ### `annotator_kind`
1378
+ - **Type**: `<class ''str''>`
1379
+ - **Optional**
1380
+ - Annotator type: HUMAN, LLM, CODE
1381
+
1382
+ ',
1383
+ '{"type": "object", "description": "\n User feedback on a message or session.\n\n Captures structured feedback including:\n - Rating (1-5 scale or thumbs up/down)\n - Categories (predefined or custom)\n - Free-text comment\n - Trace reference for OTEL/Phoenix integration\n\n The feedback can be attached to:\n - A specific message (message_id set)\n - An entire session (session_id set, message_id null)\n \n\nThis agent can search the `feedbacks` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "session_id": {"description": "Session ID this feedback relates to", "title": "Session Id", "type": "string"}, "message_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Specific message ID (null for session-level feedback)", "title": "Message Id"}, "rating": {"anyOf": [{"maximum": 5, "minimum": -1, "type": "integer"}, {"type": "null"}], "default": null, "description": "Rating: -1 (thumbs down), 1 (thumbs up), or 1-5 scale", "title": "Rating"}, "categories": {"description": "Selected feedback categories (from FeedbackCategory or custom)", "items": {"type": "string"}, "title": "Categories", "type": "array"}, "comment": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Optional free-text feedback comment", "title": "Comment"}, "trace_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "OTEL trace ID for linking to observability", "title": "Trace Id"}, "span_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "OTEL span ID for specific span feedback", "title": "Span Id"}, "phoenix_synced": {"default": false, "description": "Whether feedback has been synced to Phoenix as annotation", "title": "Phoenix Synced", "type": "boolean"}, "phoenix_annotation_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Phoenix annotation ID after sync", "title": "Phoenix Annotation Id"}, "annotator_kind": {"default": "HUMAN", "description": "Annotator type: HUMAN, LLM, CODE", "title": "Annotator Kind", "type": "string"}}, "required": ["session_id"], "json_schema_extra": {"table_name": "feedbacks", "entity_key_field": "id", "embedding_fields": [], "fully_qualified_name": "rem.models.entities.feedback.Feedback", "tools": ["search_rem"], "default_search_table": "feedbacks", "has_embeddings": false}}'::jsonb,
1384
+ 'entity',
1385
+ '{"table_name": "feedbacks", "entity_key_field": "id", "embedding_fields": [], "fqn": "rem.models.entities.feedback.Feedback"}'::jsonb
1386
+ )
1387
+ ON CONFLICT (id) DO UPDATE SET
1388
+ name = EXCLUDED.name,
1389
+ content = EXCLUDED.content,
1390
+ spec = EXCLUDED.spec,
1391
+ category = EXCLUDED.category,
1392
+ metadata = EXCLUDED.metadata,
1393
+ updated_at = CURRENT_TIMESTAMP;
1394
+
1395
+ -- Schema entry for File (files)
1396
+ INSERT INTO schemas (id, tenant_id, name, content, spec, category, metadata)
1397
+ VALUES (
1398
+ 'c3b3ef33-59d4-57a1-81a3-cc6adc45b194'::uuid,
1399
+ 'system',
1400
+ 'File',
1401
+ '# File
1402
+
1403
+
1404
+ File metadata and tracking.
1405
+
1406
+ Represents files uploaded to or referenced by the REM system,
1407
+ tracking their metadata and processing status. Tenant isolation
1408
+ is provided via CoreModel.tenant_id field.
1409
+
1410
+
1411
+ ## Overview
1412
+
1413
+ The `File` entity is stored in the `files` table. Each record is uniquely
1414
+ identified by its `id` field for lookups and graph traversal.
1415
+
1416
+ ## Search Capabilities
1417
+
1418
+ This schema includes the `search_rem` tool which supports:
1419
+ - **LOOKUP**: O(1) exact match by id (e.g., `LOOKUP "entity-name"`)
1420
+ - **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
1421
+ - **SEARCH**: Semantic vector search on content (e.g., `SEARCH "concept" FROM files LIMIT 10`)
1422
+ - **SQL**: Complex queries (e.g., `SELECT * FROM files WHERE ...`)
1423
+
1424
+ ## Table Info
1425
+
1426
+ | Property | Value |
1427
+ |----------|-------|
1428
+ | Table | `files` |
1429
+ | Entity Key | `id` |
1430
+ | Embedding Fields | `content` |
1431
+ | Tools | `search_rem` |
1432
+
1433
+ ## Fields
1434
+
1435
+ ### `id`
1436
+ - **Type**: `typing.Union[uuid.UUID, str, NoneType]`
1437
+ - **Optional**
1438
+ - Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.
1439
+
1440
+ ### `created_at`
1441
+ - **Type**: `<class ''datetime.datetime''>`
1442
+ - **Optional**
1443
+ - Entity creation timestamp
1444
+
1445
+ ### `updated_at`
1446
+ - **Type**: `<class ''datetime.datetime''>`
1447
+ - **Optional**
1448
+ - Last update timestamp
1449
+
1450
+ ### `deleted_at`
1451
+ - **Type**: `typing.Optional[datetime.datetime]`
1452
+ - **Optional**
1453
+ - Soft deletion timestamp
1454
+
1455
+ ### `tenant_id`
1456
+ - **Type**: `typing.Optional[str]`
1457
+ - **Optional**
1458
+ - Tenant identifier for multi-tenancy isolation
1459
+
1460
+ ### `user_id`
1461
+ - **Type**: `typing.Optional[str]`
1462
+ - **Optional**
1463
+ - Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.
1464
+
1465
+ ### `graph_edges`
1466
+ - **Type**: `list[dict]`
1467
+ - **Optional**
1468
+ - Knowledge graph edges stored as InlineEdge dicts
1469
+
1470
+ ### `metadata`
1471
+ - **Type**: `<class ''dict''>`
1472
+ - **Optional**
1473
+ - Flexible metadata storage
1474
+
1475
+ ### `tags`
1476
+ - **Type**: `list[str]`
1477
+ - **Optional**
1478
+ - Entity tags
1479
+
1480
+ ### `name`
1481
+ - **Type**: `<class ''str''>`
1482
+ - **Required**
1483
+ - File name
1484
+
1485
+ ### `uri`
1486
+ - **Type**: `<class ''str''>`
1487
+ - **Required**
1488
+ - File storage URI (S3, local path, etc.)
1489
+
1490
+ ### `content`
1491
+ - **Type**: `typing.Optional[str]`
1492
+ - **Optional**
1493
+ - Extracted text content (if applicable)
1494
+
1495
+ ### `timestamp`
1496
+ - **Type**: `typing.Optional[str]`
1497
+ - **Optional**
1498
+ - File creation/modification timestamp
1499
+
1500
+ ### `size_bytes`
1501
+ - **Type**: `typing.Optional[int]`
1502
+ - **Optional**
1503
+ - File size in bytes
1504
+
1505
+ ### `mime_type`
1506
+ - **Type**: `typing.Optional[str]`
1507
+ - **Optional**
1508
+ - File MIME type
1509
+
1510
+ ### `processing_status`
1511
+ - **Type**: `typing.Optional[str]`
1512
+ - **Optional**
1513
+ - File processing status (pending, processing, completed, failed)
1514
+
1515
+ ',
1516
+ '{"type": "object", "description": "\n File metadata and tracking.\n\n Represents files uploaded to or referenced by the REM system,\n tracking their metadata and processing status. Tenant isolation\n is provided via CoreModel.tenant_id field.\n \n\nThis agent can search the `files` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"description": "File name", "title": "Name", "type": "string"}, "uri": {"description": "File storage URI (S3, local path, etc.)", "title": "Uri", "type": "string"}, "content": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Extracted text content (if applicable)", "title": "Content"}, "timestamp": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "File creation/modification timestamp", "title": "Timestamp"}, "size_bytes": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "description": "File size in bytes", "title": "Size Bytes"}, "mime_type": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "File MIME type", "title": "Mime Type"}, "processing_status": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": "pending", "description": "File processing status (pending, processing, completed, failed)", "title": "Processing Status"}}, "required": ["name", "uri"], "json_schema_extra": {"table_name": "files", "entity_key_field": "id", "embedding_fields": ["content"], "fully_qualified_name": "rem.models.entities.file.File", "tools": ["search_rem"], "default_search_table": "files", "has_embeddings": true}}'::jsonb,
1517
+ 'entity',
1518
+ '{"table_name": "files", "entity_key_field": "id", "embedding_fields": ["content"], "fqn": "rem.models.entities.file.File"}'::jsonb
1519
+ )
1520
+ ON CONFLICT (id) DO UPDATE SET
1521
+ name = EXCLUDED.name,
1522
+ content = EXCLUDED.content,
1523
+ spec = EXCLUDED.spec,
1524
+ category = EXCLUDED.category,
1525
+ metadata = EXCLUDED.metadata,
1526
+ updated_at = CURRENT_TIMESTAMP;
1527
+
1528
+ -- Schema entry for ImageResource (image_resources)
1529
+ INSERT INTO schemas (id, tenant_id, name, content, spec, category, metadata)
1530
+ VALUES (
1531
+ 'ab4bc90c-2cda-55b2-bd4b-e78e19f7d4a7'::uuid,
1532
+ 'system',
1533
+ 'ImageResource',
1534
+ '# ImageResource
1535
+
1536
+
1537
+ Image-specific resource with CLIP embeddings.
1538
+
1539
+ Stored in separate `image_resources` table with CLIP embeddings
1540
+ instead of text embeddings. This enables:
1541
+ - Multimodal search (text-to-image, image-to-image)
1542
+ - Proper dimensionality (512/768 for CLIP vs 1536 for text)
1543
+ - Cost tracking (CLIP tokens separate from text tokens)
1544
+
1545
+ Embedding Strategy:
1546
+ - Default (when JINA_API_KEY set): Jina CLIP API (jina-clip-v2)
1547
+ - Future: Self-hosted OpenCLIP models via KEDA-scaled pods
1548
+ - Fallback: No embeddings (images searchable by metadata only)
1549
+
1550
+ Vision LLM Strategy (tier/sampling gated):
1551
+ - Gold tier: Always get vision descriptions
1552
+ - Silver/Free: Probabilistic sampling (IMAGE_VLLM_SAMPLE_RATE)
1553
+ - Fallback: Basic metadata only
1554
+
1555
+ Tenant isolation provided via CoreModel.tenant_id field.
1556
+
1557
+
1558
+ ## Overview
1559
+
1560
+ The `ImageResource` entity is stored in the `image_resources` table. Each record is uniquely
1561
+ identified by its `name` field for lookups and graph traversal.
1562
+
1563
+ ## Search Capabilities
1564
+
1565
+ This schema includes the `search_rem` tool which supports:
1566
+ - **LOOKUP**: O(1) exact match by name (e.g., `LOOKUP "entity-name"`)
1567
+ - **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
1568
+ - **SEARCH**: Semantic vector search on content (e.g., `SEARCH "concept" FROM image_resources LIMIT 10`)
1569
+ - **SQL**: Complex queries (e.g., `SELECT * FROM image_resources WHERE ...`)
1570
+
1571
+ ## Table Info
1572
+
1573
+ | Property | Value |
1574
+ |----------|-------|
1575
+ | Table | `image_resources` |
1576
+ | Entity Key | `name` |
1577
+ | Embedding Fields | `content` |
1578
+ | Tools | `search_rem` |
1579
+
1580
+ ## Fields
1581
+
1582
+ ### `id`
1583
+ - **Type**: `typing.Union[uuid.UUID, str, NoneType]`
1584
+ - **Optional**
1585
+ - Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.
1586
+
1587
+ ### `created_at`
1588
+ - **Type**: `<class ''datetime.datetime''>`
1589
+ - **Optional**
1590
+ - Entity creation timestamp
1591
+
1592
+ ### `updated_at`
1593
+ - **Type**: `<class ''datetime.datetime''>`
1594
+ - **Optional**
1595
+ - Last update timestamp
1596
+
1597
+ ### `deleted_at`
1598
+ - **Type**: `typing.Optional[datetime.datetime]`
1599
+ - **Optional**
1600
+ - Soft deletion timestamp
1601
+
1602
+ ### `tenant_id`
1603
+ - **Type**: `typing.Optional[str]`
1604
+ - **Optional**
1605
+ - Tenant identifier for multi-tenancy isolation
1606
+
1607
+ ### `user_id`
1608
+ - **Type**: `typing.Optional[str]`
1609
+ - **Optional**
1610
+ - Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.
1611
+
1612
+ ### `graph_edges`
1613
+ - **Type**: `list[dict]`
1614
+ - **Optional**
1615
+ - Knowledge graph edges stored as InlineEdge dicts
1616
+
1617
+ ### `metadata`
1618
+ - **Type**: `<class ''dict''>`
1619
+ - **Optional**
1620
+ - Flexible metadata storage
1621
+
1622
+ ### `tags`
1623
+ - **Type**: `list[str]`
1624
+ - **Optional**
1625
+ - Entity tags
1626
+
1627
+ ### `name`
1628
+ - **Type**: `typing.Optional[str]`
1629
+ - **Optional**
1630
+ - Human-readable resource name (used as graph label). Auto-generated from uri+ordinal if not provided.
1631
+
1632
+ ### `uri`
1633
+ - **Type**: `typing.Optional[str]`
1634
+ - **Optional**
1635
+ - Content URI or identifier (file path, URL, etc.)
1636
+
1637
+ ### `ordinal`
1638
+ - **Type**: `<class ''int''>`
1639
+ - **Optional**
1640
+ - Chunk ordinal for splitting large documents (0 for single-chunk resources)
1641
+
1642
+ ### `content`
1643
+ - **Type**: `<class ''str''>`
1644
+ - **Optional**
1645
+ - Resource content text
1646
+
1647
+ ### `timestamp`
1648
+ - **Type**: `<class ''datetime.datetime''>`
1649
+ - **Optional**
1650
+ - Resource timestamp (content creation/publication time)
1651
+
1652
+ ### `category`
1653
+ - **Type**: `typing.Optional[str]`
1654
+ - **Optional**
1655
+ - Resource category (document, conversation, artifact, etc.)
1656
+
1657
+ ### `related_entities`
1658
+ - **Type**: `list[dict]`
1659
+ - **Optional**
1660
+ - Extracted entities (people, projects, concepts) with metadata
1661
+
1662
+ ### `image_width`
1663
+ - **Type**: `typing.Optional[int]`
1664
+ - **Optional**
1665
+ - Image width in pixels
1666
+
1667
+ ### `image_height`
1668
+ - **Type**: `typing.Optional[int]`
1669
+ - **Optional**
1670
+ - Image height in pixels
1671
+
1672
+ ### `image_format`
1673
+ - **Type**: `typing.Optional[str]`
1674
+ - **Optional**
1675
+ - Image format (PNG, JPEG, GIF, WebP)
1676
+
1677
+ ### `vision_description`
1678
+ - **Type**: `typing.Optional[str]`
1679
+ - **Optional**
1680
+ - Vision LLM generated description (markdown, only for gold tier or sampled images)
1681
+
1682
+ ### `vision_provider`
1683
+ - **Type**: `typing.Optional[str]`
1684
+ - **Optional**
1685
+ - Vision provider used (anthropic, gemini, openai)
1686
+
1687
+ ### `vision_model`
1688
+ - **Type**: `typing.Optional[str]`
1689
+ - **Optional**
1690
+ - Vision model used for description
1691
+
1692
+ ### `clip_embedding`
1693
+ - **Type**: `typing.Optional[list[float]]`
1694
+ - **Optional**
1695
+ - CLIP embedding vector (512 or 768 dimensions, from Jina AI or self-hosted)
1696
+
1697
+ ### `clip_dimensions`
1698
+ - **Type**: `typing.Optional[int]`
1699
+ - **Optional**
1700
+ - CLIP embedding dimensionality (512 for jina-clip-v2, 768 for jina-clip-v1)
1701
+
1702
+ ',
1703
+ '{"type": "object", "description": "\n Image-specific resource with CLIP embeddings.\n\n Stored in separate `image_resources` table with CLIP embeddings\n instead of text embeddings. This enables:\n - Multimodal search (text-to-image, image-to-image)\n - Proper dimensionality (512/768 for CLIP vs 1536 for text)\n - Cost tracking (CLIP tokens separate from text tokens)\n\n Embedding Strategy:\n - Default (when JINA_API_KEY set): Jina CLIP API (jina-clip-v2)\n - Future: Self-hosted OpenCLIP models via KEDA-scaled pods\n - Fallback: No embeddings (images searchable by metadata only)\n\n Vision LLM Strategy (tier/sampling gated):\n - Gold tier: Always get vision descriptions\n - Silver/Free: Probabilistic sampling (IMAGE_VLLM_SAMPLE_RATE)\n - Fallback: Basic metadata only\n\n Tenant isolation provided via CoreModel.tenant_id field.\n \n\nThis agent can search the `image_resources` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Human-readable resource name (used as graph label). Auto-generated from uri+ordinal if not provided.", "entity_key": true, "title": "Name"}, "uri": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Content URI or identifier (file path, URL, etc.)", "title": "Uri"}, "ordinal": {"composite_key": true, "default": 0, "description": "Chunk ordinal for splitting large documents (0 for single-chunk resources)", "title": "Ordinal", "type": "integer"}, "content": {"default": "", "description": "Resource content text", "title": "Content", "type": "string"}, "timestamp": {"description": "Resource timestamp (content creation/publication time)", "format": "date-time", "title": "Timestamp", "type": "string"}, "category": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Resource category (document, conversation, artifact, etc.)", "title": "Category"}, "related_entities": {"description": "Extracted entities (people, projects, concepts) with metadata", "items": {"additionalProperties": true, "type": "object"}, "title": "Related Entities", "type": "array"}, "image_width": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "description": "Image width in pixels", "title": "Image Width"}, "image_height": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "description": "Image height in pixels", "title": "Image Height"}, "image_format": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Image format (PNG, JPEG, GIF, WebP)", "title": "Image Format"}, "vision_description": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Vision LLM generated description (markdown, only for gold tier or sampled images)", "title": "Vision Description"}, "vision_provider": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Vision provider used (anthropic, gemini, openai)", "title": "Vision Provider"}, "vision_model": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Vision model used for description", "title": "Vision Model"}, "clip_embedding": {"anyOf": [{"items": {"type": "number"}, "type": "array"}, {"type": "null"}], "default": null, "description": "CLIP embedding vector (512 or 768 dimensions, from Jina AI or self-hosted)", "title": "Clip Embedding"}, "clip_dimensions": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "description": "CLIP embedding dimensionality (512 for jina-clip-v2, 768 for jina-clip-v1)", "title": "Clip Dimensions"}}, "required": [], "json_schema_extra": {"table_name": "image_resources", "entity_key_field": "name", "embedding_fields": ["content"], "fully_qualified_name": "rem.models.entities.image_resource.ImageResource", "tools": ["search_rem"], "default_search_table": "image_resources", "has_embeddings": true}}'::jsonb,
1704
+ 'entity',
1705
+ '{"table_name": "image_resources", "entity_key_field": "name", "embedding_fields": ["content"], "fqn": "rem.models.entities.image_resource.ImageResource"}'::jsonb
1706
+ )
1707
+ ON CONFLICT (id) DO UPDATE SET
1708
+ name = EXCLUDED.name,
1709
+ content = EXCLUDED.content,
1710
+ spec = EXCLUDED.spec,
1711
+ category = EXCLUDED.category,
1712
+ metadata = EXCLUDED.metadata,
1713
+ updated_at = CURRENT_TIMESTAMP;
1714
+
1715
+ -- Schema entry for Message (messages)
1716
+ INSERT INTO schemas (id, tenant_id, name, content, spec, category, metadata)
1717
+ VALUES (
1718
+ 'be36f9da-6df4-51ba-bb41-bf51246ecec1'::uuid,
1719
+ 'system',
1720
+ 'Message',
1721
+ '# Message
1722
+
1723
+
1724
+ Communication content unit.
1725
+
1726
+ Represents individual messages in conversations, chats, or other
1727
+ communication contexts. Tenant isolation is provided via CoreModel.tenant_id field.
1728
+
1729
+ Trace fields (trace_id, span_id) enable integration with OTEL/Phoenix
1730
+ for observability and feedback annotation.
1731
+
1732
+
1733
+ ## Overview
1734
+
1735
+ The `Message` entity is stored in the `messages` table. Each record is uniquely
1736
+ identified by its `id` field for lookups and graph traversal.
1737
+
1738
+ ## Search Capabilities
1739
+
1740
+ This schema includes the `search_rem` tool which supports:
1741
+ - **LOOKUP**: O(1) exact match by id (e.g., `LOOKUP "entity-name"`)
1742
+ - **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
1743
+ - **SEARCH**: Semantic vector search on content (e.g., `SEARCH "concept" FROM messages LIMIT 10`)
1744
+ - **SQL**: Complex queries (e.g., `SELECT * FROM messages WHERE ...`)
1745
+
1746
+ ## Table Info
1747
+
1748
+ | Property | Value |
1749
+ |----------|-------|
1750
+ | Table | `messages` |
1751
+ | Entity Key | `id` |
1752
+ | Embedding Fields | `content` |
1753
+ | Tools | `search_rem` |
1754
+
1755
+ ## Fields
1756
+
1757
+ ### `id`
1758
+ - **Type**: `typing.Union[uuid.UUID, str, NoneType]`
1759
+ - **Optional**
1760
+ - Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.
1761
+
1762
+ ### `created_at`
1763
+ - **Type**: `<class ''datetime.datetime''>`
1764
+ - **Optional**
1765
+ - Entity creation timestamp
1766
+
1767
+ ### `updated_at`
1768
+ - **Type**: `<class ''datetime.datetime''>`
1769
+ - **Optional**
1770
+ - Last update timestamp
1771
+
1772
+ ### `deleted_at`
1773
+ - **Type**: `typing.Optional[datetime.datetime]`
1774
+ - **Optional**
1775
+ - Soft deletion timestamp
1776
+
1777
+ ### `tenant_id`
1778
+ - **Type**: `typing.Optional[str]`
1779
+ - **Optional**
1780
+ - Tenant identifier for multi-tenancy isolation
1781
+
1782
+ ### `user_id`
1783
+ - **Type**: `typing.Optional[str]`
1784
+ - **Optional**
1785
+ - Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.
1786
+
1787
+ ### `graph_edges`
1788
+ - **Type**: `list[dict]`
1789
+ - **Optional**
1790
+ - Knowledge graph edges stored as InlineEdge dicts
1791
+
1792
+ ### `metadata`
1793
+ - **Type**: `<class ''dict''>`
1794
+ - **Optional**
1795
+ - Flexible metadata storage
1796
+
1797
+ ### `tags`
1798
+ - **Type**: `list[str]`
1799
+ - **Optional**
1800
+ - Entity tags
1801
+
1802
+ ### `content`
1803
+ - **Type**: `<class ''str''>`
1804
+ - **Required**
1805
+ - Message content text
1806
+
1807
+ ### `message_type`
1808
+ - **Type**: `str | None`
1809
+ - **Optional**
1810
+ - Message type e.g. role: ''user'', ''assistant'', ''system'', ''tool''
1811
+
1812
+ ### `session_id`
1813
+ - **Type**: `str | None`
1814
+ - **Optional**
1815
+ - Session identifier for tracking message context
1816
+
1817
+ ### `prompt`
1818
+ - **Type**: `str | None`
1819
+ - **Optional**
1820
+ - Custom prompt used for this message (if overridden from default)
1821
+
1822
+ ### `model`
1823
+ - **Type**: `str | None`
1824
+ - **Optional**
1825
+ - Model used for generating this message (provider:model format)
1826
+
1827
+ ### `token_count`
1828
+ - **Type**: `int | None`
1829
+ - **Optional**
1830
+ - Token count for this message
1831
+
1832
+ ### `trace_id`
1833
+ - **Type**: `str | None`
1834
+ - **Optional**
1835
+ - OTEL trace ID for observability integration
1836
+
1837
+ ### `span_id`
1838
+ - **Type**: `str | None`
1839
+ - **Optional**
1840
+ - OTEL span ID for specific span reference
1841
+
1842
+ ',
1843
+ '{"type": "object", "description": "\n Communication content unit.\n\n Represents individual messages in conversations, chats, or other\n communication contexts. Tenant isolation is provided via CoreModel.tenant_id field.\n\n Trace fields (trace_id, span_id) enable integration with OTEL/Phoenix\n for observability and feedback annotation.\n \n\nThis agent can search the `messages` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "content": {"description": "Message content text", "title": "Content", "type": "string"}, "message_type": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Message type e.g. role: ''user'', ''assistant'', ''system'', ''tool''", "title": "Message Type"}, "session_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Session identifier for tracking message context", "title": "Session Id"}, "prompt": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Custom prompt used for this message (if overridden from default)", "title": "Prompt"}, "model": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Model used for generating this message (provider:model format)", "title": "Model"}, "token_count": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "description": "Token count for this message", "title": "Token Count"}, "trace_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "OTEL trace ID for observability integration", "title": "Trace Id"}, "span_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "OTEL span ID for specific span reference", "title": "Span Id"}}, "required": ["content"], "json_schema_extra": {"table_name": "messages", "entity_key_field": "id", "embedding_fields": ["content"], "fully_qualified_name": "rem.models.entities.message.Message", "tools": ["search_rem"], "default_search_table": "messages", "has_embeddings": true}}'::jsonb,
1844
+ 'entity',
1845
+ '{"table_name": "messages", "entity_key_field": "id", "embedding_fields": ["content"], "fqn": "rem.models.entities.message.Message"}'::jsonb
1846
+ )
1847
+ ON CONFLICT (id) DO UPDATE SET
1848
+ name = EXCLUDED.name,
1849
+ content = EXCLUDED.content,
1850
+ spec = EXCLUDED.spec,
1851
+ category = EXCLUDED.category,
1852
+ metadata = EXCLUDED.metadata,
1853
+ updated_at = CURRENT_TIMESTAMP;
1854
+
1855
+ -- Schema entry for Moment (moments)
1856
+ INSERT INTO schemas (id, tenant_id, name, content, spec, category, metadata)
1857
+ VALUES (
1858
+ 'a08f0a8c-5bab-5bf5-9760-0e67bc69bd74'::uuid,
1859
+ 'system',
1860
+ 'Moment',
1861
+ '# Moment
1862
+
1863
+
1864
+ Temporal narrative extracted from resources.
1865
+
1866
+ Moments provide temporal structure and context for the REM graph,
1867
+ enabling time-based queries and understanding of when events occurred.
1868
+ Tenant isolation is provided via CoreModel.tenant_id field.
1869
+
1870
+
1871
+ ## Overview
1872
+
1873
+ The `Moment` entity is stored in the `moments` table. Each record is uniquely
1874
+ identified by its `name` field for lookups and graph traversal.
1875
+
1876
+ ## Search Capabilities
1877
+
1878
+ This schema includes the `search_rem` tool which supports:
1879
+ - **LOOKUP**: O(1) exact match by name (e.g., `LOOKUP "entity-name"`)
1880
+ - **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
1881
+ - **SEARCH**: Semantic vector search on summary (e.g., `SEARCH "concept" FROM moments LIMIT 10`)
1882
+ - **SQL**: Complex queries (e.g., `SELECT * FROM moments WHERE ...`)
1883
+
1884
+ ## Table Info
1885
+
1886
+ | Property | Value |
1887
+ |----------|-------|
1888
+ | Table | `moments` |
1889
+ | Entity Key | `name` |
1890
+ | Embedding Fields | `summary` |
1891
+ | Tools | `search_rem` |
1892
+
1893
+ ## Fields
1894
+
1895
+ ### `id`
1896
+ - **Type**: `typing.Union[uuid.UUID, str, NoneType]`
1897
+ - **Optional**
1898
+ - Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.
1899
+
1900
+ ### `created_at`
1901
+ - **Type**: `<class ''datetime.datetime''>`
1902
+ - **Optional**
1903
+ - Entity creation timestamp
1904
+
1905
+ ### `updated_at`
1906
+ - **Type**: `<class ''datetime.datetime''>`
1907
+ - **Optional**
1908
+ - Last update timestamp
1909
+
1910
+ ### `deleted_at`
1911
+ - **Type**: `typing.Optional[datetime.datetime]`
1912
+ - **Optional**
1913
+ - Soft deletion timestamp
1914
+
1915
+ ### `tenant_id`
1916
+ - **Type**: `typing.Optional[str]`
1917
+ - **Optional**
1918
+ - Tenant identifier for multi-tenancy isolation
1919
+
1920
+ ### `user_id`
1921
+ - **Type**: `typing.Optional[str]`
1922
+ - **Optional**
1923
+ - Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.
1924
+
1925
+ ### `graph_edges`
1926
+ - **Type**: `list[dict]`
1927
+ - **Optional**
1928
+ - Knowledge graph edges stored as InlineEdge dicts
1929
+
1930
+ ### `metadata`
1931
+ - **Type**: `<class ''dict''>`
1932
+ - **Optional**
1933
+ - Flexible metadata storage
1934
+
1935
+ ### `tags`
1936
+ - **Type**: `list[str]`
1937
+ - **Optional**
1938
+ - Entity tags
1939
+
1940
+ ### `name`
1941
+ - **Type**: `typing.Optional[str]`
1942
+ - **Optional**
1943
+ - Human-readable moment name (used as graph label). Auto-generated from starts_timestamp+moment_type if not provided.
1944
+
1945
+ ### `moment_type`
1946
+ - **Type**: `typing.Optional[str]`
1947
+ - **Optional**
1948
+ - Moment classification (meeting, coding-session, conversation, etc.)
1949
+
1950
+ ### `category`
1951
+ - **Type**: `typing.Optional[str]`
1952
+ - **Optional**
1953
+ - Moment category for grouping and filtering
1954
+
1955
+ ### `starts_timestamp`
1956
+ - **Type**: `<class ''datetime.datetime''>`
1957
+ - **Required**
1958
+ - Moment start time
1959
+
1960
+ ### `ends_timestamp`
1961
+ - **Type**: `typing.Optional[datetime.datetime]`
1962
+ - **Optional**
1963
+ - Moment end time
1964
+
1965
+ ### `present_persons`
1966
+ - **Type**: `list[rem.models.entities.moment.Person]`
1967
+ - **Optional**
1968
+ - People present in the moment
1969
+
1970
+ ### `emotion_tags`
1971
+ - **Type**: `list[str]`
1972
+ - **Optional**
1973
+ - Emotion/sentiment tags (happy, frustrated, focused, etc.)
1974
+
1975
+ ### `topic_tags`
1976
+ - **Type**: `list[str]`
1977
+ - **Optional**
1978
+ - Topic/concept tags (project names, technologies, etc.)
1979
+
1980
+ ### `summary`
1981
+ - **Type**: `typing.Optional[str]`
1982
+ - **Optional**
1983
+ - Natural language summary of the moment
1984
+
1985
+ ### `source_resource_ids`
1986
+ - **Type**: `list[str]`
1987
+ - **Optional**
1988
+ - Resource IDs used to construct this moment
1989
+
1990
+ ',
1991
+ '{"type": "object", "description": "\n Temporal narrative extracted from resources.\n\n Moments provide temporal structure and context for the REM graph,\n enabling time-based queries and understanding of when events occurred.\n Tenant isolation is provided via CoreModel.tenant_id field.\n \n\nThis agent can search the `moments` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Human-readable moment name (used as graph label). Auto-generated from starts_timestamp+moment_type if not provided.", "entity_key": true, "title": "Name"}, "moment_type": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Moment classification (meeting, coding-session, conversation, etc.)", "title": "Moment Type"}, "category": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Moment category for grouping and filtering", "title": "Category"}, "starts_timestamp": {"description": "Moment start time", "format": "date-time", "title": "Starts Timestamp", "type": "string"}, "ends_timestamp": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Moment end time", "title": "Ends Timestamp"}, "present_persons": {"description": "People present in the moment", "items": {"$ref": "#/$defs/Person"}, "title": "Present Persons", "type": "array"}, "emotion_tags": {"description": "Emotion/sentiment tags (happy, frustrated, focused, etc.)", "items": {"type": "string"}, "title": "Emotion Tags", "type": "array"}, "topic_tags": {"description": "Topic/concept tags (project names, technologies, etc.)", "items": {"type": "string"}, "title": "Topic Tags", "type": "array"}, "summary": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Natural language summary of the moment", "title": "Summary"}, "source_resource_ids": {"description": "Resource IDs used to construct this moment", "items": {"type": "string"}, "title": "Source Resource Ids", "type": "array"}}, "required": ["starts_timestamp"], "json_schema_extra": {"table_name": "moments", "entity_key_field": "name", "embedding_fields": ["summary"], "fully_qualified_name": "rem.models.entities.moment.Moment", "tools": ["search_rem"], "default_search_table": "moments", "has_embeddings": true}}'::jsonb,
1992
+ 'entity',
1993
+ '{"table_name": "moments", "entity_key_field": "name", "embedding_fields": ["summary"], "fqn": "rem.models.entities.moment.Moment"}'::jsonb
1994
+ )
1995
+ ON CONFLICT (id) DO UPDATE SET
1996
+ name = EXCLUDED.name,
1997
+ content = EXCLUDED.content,
1998
+ spec = EXCLUDED.spec,
1999
+ category = EXCLUDED.category,
2000
+ metadata = EXCLUDED.metadata,
2001
+ updated_at = CURRENT_TIMESTAMP;
2002
+
2003
+ -- Schema entry for Ontology (ontologies)
2004
+ INSERT INTO schemas (id, tenant_id, name, content, spec, category, metadata)
2005
+ VALUES (
2006
+ 'a702ed74-8988-534a-9917-2977349777c1'::uuid,
2007
+ 'system',
2008
+ 'Ontology',
2009
+ '# Ontology
2010
+
2011
+ Domain-specific knowledge extracted from files using custom agents.
2012
+
2013
+ Attributes:
2014
+ name: Human-readable label for this ontology instance
2015
+ file_id: Foreign key to File entity that was processed
2016
+ agent_schema_id: Foreign key to Schema entity that performed extraction
2017
+ provider_name: LLM provider used for extraction (e.g., "anthropic", "openai")
2018
+ model_name: Specific model used (e.g., "claude-sonnet-4-5")
2019
+ extracted_data: Structured data extracted by agent (arbitrary JSON)
2020
+ confidence_score: Optional confidence score from extraction (0.0-1.0)
2021
+ extraction_timestamp: When extraction was performed
2022
+ embedding_text: Text used for generating embedding (derived from extracted_data)
2023
+
2024
+ Inherited from CoreModel:
2025
+ id: UUID or string identifier
2026
+ created_at: Entity creation timestamp
2027
+ updated_at: Last update timestamp
2028
+ deleted_at: Soft deletion timestamp
2029
+ tenant_id: Multi-tenancy isolation
2030
+ user_id: Ownership
2031
+ graph_edges: Relationships to other entities
2032
+ metadata: Flexible metadata storage
2033
+ tags: Classification tags
2034
+ column: Database schema metadata
2035
+
2036
+ Example Usage:
2037
+ # CV extraction
2038
+ cv_ontology = Ontology(
2039
+ name="john-doe-cv-2024",
2040
+ file_id="file-uuid-123",
2041
+ agent_schema_id="cv-parser-v1",
2042
+ provider_name="anthropic",
2043
+ model_name="claude-sonnet-4-5-20250929",
2044
+ extracted_data={
2045
+ "candidate_name": "John Doe",
2046
+ "email": "john@example.com",
2047
+ "skills": ["Python", "PostgreSQL", "Kubernetes"],
2048
+ "experience": [
2049
+ {
2050
+ "company": "TechCorp",
2051
+ "role": "Senior Engineer",
2052
+ "years": 3,
2053
+ "achievements": ["Led migration to k8s", "Reduced costs 40%"]
2054
+ }
2055
+ ],
2056
+ "education": [
2057
+ {"degree": "BS Computer Science", "institution": "MIT", "year": 2018}
2058
+ ]
2059
+ },
2060
+ confidence_score=0.95,
2061
+ tags=["cv", "engineering", "senior-level"]
2062
+ )
2063
+
2064
+ # Contract extraction
2065
+ contract_ontology = Ontology(
2066
+ name="acme-supplier-agreement-2024",
2067
+ file_id="file-uuid-456",
2068
+ agent_schema_id="contract-parser-v2",
2069
+ provider_name="openai",
2070
+ model_name="gpt-4o",
2071
+ extracted_data={
2072
+ "contract_type": "supplier_agreement",
2073
+ "parties": [
2074
+ {"name": "ACME Corp", "role": "buyer"},
2075
+ {"name": "SupplyChain Inc", "role": "supplier"}
2076
+ ],
2077
+ "effective_date": "2024-01-01",
2078
+ "termination_date": "2026-12-31",
2079
+ "payment_terms": {
2080
+ "amount": 500000,
2081
+ "currency": "USD",
2082
+ "frequency": "quarterly"
2083
+ },
2084
+ "key_obligations": [
2085
+ "Supplier must deliver within 30 days",
2086
+ "Buyer must pay within 60 days of invoice"
2087
+ ]
2088
+ },
2089
+ confidence_score=0.92,
2090
+ tags=["contract", "supplier", "procurement"]
2091
+ )
2092
+
2093
+
2094
+ ## Overview
2095
+
2096
+ The `Ontology` entity is stored in the `ontologies` table. Each record is uniquely
2097
+ identified by its `id` field for lookups and graph traversal.
2098
+
2099
+ ## Search Capabilities
2100
+
2101
+ This schema includes the `search_rem` tool which supports:
2102
+ - **LOOKUP**: O(1) exact match by id (e.g., `LOOKUP "entity-name"`)
2103
+ - **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
2104
+ - **SEARCH**: Semantic vector search on content (e.g., `SEARCH "concept" FROM ontologies LIMIT 10`)
2105
+ - **SQL**: Complex queries (e.g., `SELECT * FROM ontologies WHERE ...`)
2106
+
2107
+ ## Table Info
2108
+
2109
+ | Property | Value |
2110
+ |----------|-------|
2111
+ | Table | `ontologies` |
2112
+ | Entity Key | `id` |
2113
+ | Embedding Fields | None |
2114
+ | Tools | `search_rem` |
2115
+
2116
+ ## Fields
2117
+
2118
+ ### `id`
2119
+ - **Type**: `typing.Union[uuid.UUID, str, NoneType]`
2120
+ - **Optional**
2121
+ - Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.
2122
+
2123
+ ### `created_at`
2124
+ - **Type**: `<class ''datetime.datetime''>`
2125
+ - **Optional**
2126
+ - Entity creation timestamp
2127
+
2128
+ ### `updated_at`
2129
+ - **Type**: `<class ''datetime.datetime''>`
2130
+ - **Optional**
2131
+ - Last update timestamp
2132
+
2133
+ ### `deleted_at`
2134
+ - **Type**: `typing.Optional[datetime.datetime]`
2135
+ - **Optional**
2136
+ - Soft deletion timestamp
2137
+
2138
+ ### `tenant_id`
2139
+ - **Type**: `typing.Optional[str]`
2140
+ - **Optional**
2141
+ - Tenant identifier for multi-tenancy isolation
2142
+
2143
+ ### `user_id`
2144
+ - **Type**: `typing.Optional[str]`
2145
+ - **Optional**
2146
+ - Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.
2147
+
2148
+ ### `graph_edges`
2149
+ - **Type**: `list[dict]`
2150
+ - **Optional**
2151
+ - Knowledge graph edges stored as InlineEdge dicts
2152
+
2153
+ ### `metadata`
2154
+ - **Type**: `<class ''dict''>`
2155
+ - **Optional**
2156
+ - Flexible metadata storage
2157
+
2158
+ ### `tags`
2159
+ - **Type**: `list[str]`
2160
+ - **Optional**
2161
+ - Entity tags
2162
+
2163
+ ### `name`
2164
+ - **Type**: `<class ''str''>`
2165
+ - **Required**
2166
+
2167
+ ### `file_id`
2168
+ - **Type**: `uuid.UUID | str`
2169
+ - **Required**
2170
+
2171
+ ### `agent_schema_id`
2172
+ - **Type**: `<class ''str''>`
2173
+ - **Required**
2174
+
2175
+ ### `provider_name`
2176
+ - **Type**: `<class ''str''>`
2177
+ - **Required**
2178
+
2179
+ ### `model_name`
2180
+ - **Type**: `<class ''str''>`
2181
+ - **Required**
2182
+
2183
+ ### `extracted_data`
2184
+ - **Type**: `dict[str, typing.Any]`
2185
+ - **Required**
2186
+
2187
+ ### `confidence_score`
2188
+ - **Type**: `typing.Optional[float]`
2189
+ - **Optional**
2190
+
2191
+ ### `extraction_timestamp`
2192
+ - **Type**: `typing.Optional[str]`
2193
+ - **Optional**
2194
+
2195
+ ### `embedding_text`
2196
+ - **Type**: `typing.Optional[str]`
2197
+ - **Optional**
2198
+
2199
+ ',
2200
+ '{"type": "object", "description": "Domain-specific knowledge extracted from files using custom agents.\n\n Attributes:\n name: Human-readable label for this ontology instance\n file_id: Foreign key to File entity that was processed\n agent_schema_id: Foreign key to Schema entity that performed extraction\n provider_name: LLM provider used for extraction (e.g., \"anthropic\", \"openai\")\n model_name: Specific model used (e.g., \"claude-sonnet-4-5\")\n extracted_data: Structured data extracted by agent (arbitrary JSON)\n confidence_score: Optional confidence score from extraction (0.0-1.0)\n extraction_timestamp: When extraction was performed\n embedding_text: Text used for generating embedding (derived from extracted_data)\n\n Inherited from CoreModel:\n id: UUID or string identifier\n created_at: Entity creation timestamp\n updated_at: Last update timestamp\n deleted_at: Soft deletion timestamp\n tenant_id: Multi-tenancy isolation\n user_id: Ownership\n graph_edges: Relationships to other entities\n metadata: Flexible metadata storage\n tags: Classification tags\n column: Database schema metadata\n\n Example Usage:\n # CV extraction\n cv_ontology = Ontology(\n name=\"john-doe-cv-2024\",\n file_id=\"file-uuid-123\",\n agent_schema_id=\"cv-parser-v1\",\n provider_name=\"anthropic\",\n model_name=\"claude-sonnet-4-5-20250929\",\n extracted_data={\n \"candidate_name\": \"John Doe\",\n \"email\": \"john@example.com\",\n \"skills\": [\"Python\", \"PostgreSQL\", \"Kubernetes\"],\n \"experience\": [\n {\n \"company\": \"TechCorp\",\n \"role\": \"Senior Engineer\",\n \"years\": 3,\n \"achievements\": [\"Led migration to k8s\", \"Reduced costs 40%\"]\n }\n ],\n \"education\": [\n {\"degree\": \"BS Computer Science\", \"institution\": \"MIT\", \"year\": 2018}\n ]\n },\n confidence_score=0.95,\n tags=[\"cv\", \"engineering\", \"senior-level\"]\n )\n\n # Contract extraction\n contract_ontology = Ontology(\n name=\"acme-supplier-agreement-2024\",\n file_id=\"file-uuid-456\",\n agent_schema_id=\"contract-parser-v2\",\n provider_name=\"openai\",\n model_name=\"gpt-4o\",\n extracted_data={\n \"contract_type\": \"supplier_agreement\",\n \"parties\": [\n {\"name\": \"ACME Corp\", \"role\": \"buyer\"},\n {\"name\": \"SupplyChain Inc\", \"role\": \"supplier\"}\n ],\n \"effective_date\": \"2024-01-01\",\n \"termination_date\": \"2026-12-31\",\n \"payment_terms\": {\n \"amount\": 500000,\n \"currency\": \"USD\",\n \"frequency\": \"quarterly\"\n },\n \"key_obligations\": [\n \"Supplier must deliver within 30 days\",\n \"Buyer must pay within 60 days of invoice\"\n ]\n },\n confidence_score=0.92,\n tags=[\"contract\", \"supplier\", \"procurement\"]\n )\n \n\nThis agent can search the `ontologies` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"title": "Name", "type": "string"}, "file_id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}], "title": "File Id"}, "agent_schema_id": {"title": "Agent Schema Id", "type": "string"}, "provider_name": {"title": "Provider Name", "type": "string"}, "model_name": {"title": "Model Name", "type": "string"}, "extracted_data": {"additionalProperties": true, "title": "Extracted Data", "type": "object"}, "confidence_score": {"anyOf": [{"type": "number"}, {"type": "null"}], "default": null, "title": "Confidence Score"}, "extraction_timestamp": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Extraction Timestamp"}, "embedding_text": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Embedding Text"}}, "required": ["name", "file_id", "agent_schema_id", "provider_name", "model_name", "extracted_data"], "json_schema_extra": {"table_name": "ontologies", "entity_key_field": "id", "embedding_fields": [], "fully_qualified_name": "rem.models.entities.ontology.Ontology", "tools": ["search_rem"], "default_search_table": "ontologies", "has_embeddings": false}}'::jsonb,
2201
+ 'entity',
2202
+ '{"table_name": "ontologies", "entity_key_field": "id", "embedding_fields": [], "fqn": "rem.models.entities.ontology.Ontology"}'::jsonb
2203
+ )
2204
+ ON CONFLICT (id) DO UPDATE SET
2205
+ name = EXCLUDED.name,
2206
+ content = EXCLUDED.content,
2207
+ spec = EXCLUDED.spec,
2208
+ category = EXCLUDED.category,
2209
+ metadata = EXCLUDED.metadata,
2210
+ updated_at = CURRENT_TIMESTAMP;
2211
+
2212
+ -- Schema entry for OntologyConfig (ontology_configs)
2213
+ INSERT INTO schemas (id, tenant_id, name, content, spec, category, metadata)
2214
+ VALUES (
2215
+ '9a7e50d0-ef3a-5641-9ff4-b2be5a77053b'::uuid,
2216
+ 'system',
2217
+ 'OntologyConfig',
2218
+ '# OntologyConfig
2219
+
2220
+ User configuration for automatic ontology extraction.
2221
+
2222
+ Attributes:
2223
+ name: Human-readable config name
2224
+ agent_schema_id: Foreign key to Schema entity to use for extraction
2225
+ description: Purpose and scope of this config
2226
+
2227
+ # File matching rules (ANY matching rule triggers extraction)
2228
+ mime_type_pattern: Regex pattern for file MIME types (e.g., "application/pdf")
2229
+ uri_pattern: Regex pattern for file URIs (e.g., "s3://bucket/resumes/.*")
2230
+ tag_filter: List of tags (file must have ALL tags to match)
2231
+
2232
+ # Execution control
2233
+ priority: Execution order (higher = earlier, default 100)
2234
+ enabled: Whether this config is active (default True)
2235
+
2236
+ # LLM provider configuration
2237
+ provider_name: Optional LLM provider override (defaults to settings)
2238
+ model_name: Optional model override (defaults to settings)
2239
+
2240
+ Inherited from CoreModel:
2241
+ id, created_at, updated_at, deleted_at, tenant_id, user_id,
2242
+ graph_edges, metadata, tags, column
2243
+
2244
+ Example Usage:
2245
+ # CV extraction for recruitment
2246
+ cv_config = OntologyConfig(
2247
+ name="recruitment-cv-parser",
2248
+ agent_schema_id="cv-parser-v1",
2249
+ description="Extract candidate information from resumes",
2250
+ mime_type_pattern="application/pdf",
2251
+ uri_pattern=".*/resumes/.*",
2252
+ tag_filter=["cv", "candidate"],
2253
+ priority=100,
2254
+ enabled=True,
2255
+ tenant_id="acme-corp",
2256
+ tags=["recruitment", "hr"]
2257
+ )
2258
+
2259
+ # Contract analysis for legal team
2260
+ contract_config = OntologyConfig(
2261
+ name="legal-contract-analyzer",
2262
+ agent_schema_id="contract-parser-v2",
2263
+ description="Extract key terms from supplier contracts",
2264
+ mime_type_pattern="application/(pdf|msword|vnd.openxmlformats.*)",
2265
+ tag_filter=["legal", "contract"],
2266
+ priority=200, # Higher priority = runs first
2267
+ enabled=True,
2268
+ provider_name="openai", # Override default provider
2269
+ model_name="gpt-4o",
2270
+ tenant_id="acme-corp",
2271
+ tags=["legal", "procurement"]
2272
+ )
2273
+
2274
+ # Medical records for healthcare
2275
+ medical_config = OntologyConfig(
2276
+ name="medical-records-extractor",
2277
+ agent_schema_id="medical-parser-v1",
2278
+ description="Extract diagnoses and treatments from medical records",
2279
+ mime_type_pattern="application/pdf",
2280
+ tag_filter=["medical", "patient-record"],
2281
+ priority=50,
2282
+ enabled=True,
2283
+ tenant_id="healthsystem",
2284
+ tags=["medical", "hipaa-compliant"]
2285
+ )
2286
+
2287
+
2288
+ ## Overview
2289
+
2290
+ The `OntologyConfig` entity is stored in the `ontology_configs` table. Each record is uniquely
2291
+ identified by its `id` field for lookups and graph traversal.
2292
+
2293
+ ## Search Capabilities
2294
+
2295
+ This schema includes the `search_rem` tool which supports:
2296
+ - **LOOKUP**: O(1) exact match by id (e.g., `LOOKUP "entity-name"`)
2297
+ - **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
2298
+ - **SEARCH**: Semantic vector search on description (e.g., `SEARCH "concept" FROM ontology_configs LIMIT 10`)
2299
+ - **SQL**: Complex queries (e.g., `SELECT * FROM ontology_configs WHERE ...`)
2300
+
2301
+ ## Table Info
2302
+
2303
+ | Property | Value |
2304
+ |----------|-------|
2305
+ | Table | `ontology_configs` |
2306
+ | Entity Key | `id` |
2307
+ | Embedding Fields | `description` |
2308
+ | Tools | `search_rem` |
2309
+
2310
+ ## Fields
2311
+
2312
+ ### `id`
2313
+ - **Type**: `typing.Union[uuid.UUID, str, NoneType]`
2314
+ - **Optional**
2315
+ - Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.
2316
+
2317
+ ### `created_at`
2318
+ - **Type**: `<class ''datetime.datetime''>`
2319
+ - **Optional**
2320
+ - Entity creation timestamp
2321
+
2322
+ ### `updated_at`
2323
+ - **Type**: `<class ''datetime.datetime''>`
2324
+ - **Optional**
2325
+ - Last update timestamp
2326
+
2327
+ ### `deleted_at`
2328
+ - **Type**: `typing.Optional[datetime.datetime]`
2329
+ - **Optional**
2330
+ - Soft deletion timestamp
2331
+
2332
+ ### `tenant_id`
2333
+ - **Type**: `typing.Optional[str]`
2334
+ - **Optional**
2335
+ - Tenant identifier for multi-tenancy isolation
2336
+
2337
+ ### `user_id`
2338
+ - **Type**: `typing.Optional[str]`
2339
+ - **Optional**
2340
+ - Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.
2341
+
2342
+ ### `graph_edges`
2343
+ - **Type**: `list[dict]`
2344
+ - **Optional**
2345
+ - Knowledge graph edges stored as InlineEdge dicts
2346
+
2347
+ ### `metadata`
2348
+ - **Type**: `<class ''dict''>`
2349
+ - **Optional**
2350
+ - Flexible metadata storage
2351
+
2352
+ ### `tags`
2353
+ - **Type**: `list[str]`
2354
+ - **Optional**
2355
+ - Entity tags
2356
+
2357
+ ### `name`
2358
+ - **Type**: `<class ''str''>`
2359
+ - **Required**
2360
+
2361
+ ### `agent_schema_id`
2362
+ - **Type**: `<class ''str''>`
2363
+ - **Required**
2364
+
2365
+ ### `description`
2366
+ - **Type**: `typing.Optional[str]`
2367
+ - **Optional**
2368
+
2369
+ ### `mime_type_pattern`
2370
+ - **Type**: `typing.Optional[str]`
2371
+ - **Optional**
2372
+
2373
+ ### `uri_pattern`
2374
+ - **Type**: `typing.Optional[str]`
2375
+ - **Optional**
2376
+
2377
+ ### `tag_filter`
2378
+ - **Type**: `list[str]`
2379
+ - **Optional**
2380
+
2381
+ ### `priority`
2382
+ - **Type**: `<class ''int''>`
2383
+ - **Optional**
2384
+
2385
+ ### `enabled`
2386
+ - **Type**: `<class ''bool''>`
2387
+ - **Optional**
2388
+
2389
+ ### `provider_name`
2390
+ - **Type**: `typing.Optional[str]`
2391
+ - **Optional**
2392
+
2393
+ ### `model_name`
2394
+ - **Type**: `typing.Optional[str]`
2395
+ - **Optional**
2396
+
2397
+ ',
2398
+ '{"type": "object", "description": "User configuration for automatic ontology extraction.\n\n Attributes:\n name: Human-readable config name\n agent_schema_id: Foreign key to Schema entity to use for extraction\n description: Purpose and scope of this config\n\n # File matching rules (ANY matching rule triggers extraction)\n mime_type_pattern: Regex pattern for file MIME types (e.g., \"application/pdf\")\n uri_pattern: Regex pattern for file URIs (e.g., \"s3://bucket/resumes/.*\")\n tag_filter: List of tags (file must have ALL tags to match)\n\n # Execution control\n priority: Execution order (higher = earlier, default 100)\n enabled: Whether this config is active (default True)\n\n # LLM provider configuration\n provider_name: Optional LLM provider override (defaults to settings)\n model_name: Optional model override (defaults to settings)\n\n Inherited from CoreModel:\n id, created_at, updated_at, deleted_at, tenant_id, user_id,\n graph_edges, metadata, tags, column\n\n Example Usage:\n # CV extraction for recruitment\n cv_config = OntologyConfig(\n name=\"recruitment-cv-parser\",\n agent_schema_id=\"cv-parser-v1\",\n description=\"Extract candidate information from resumes\",\n mime_type_pattern=\"application/pdf\",\n uri_pattern=\".*/resumes/.*\",\n tag_filter=[\"cv\", \"candidate\"],\n priority=100,\n enabled=True,\n tenant_id=\"acme-corp\",\n tags=[\"recruitment\", \"hr\"]\n )\n\n # Contract analysis for legal team\n contract_config = OntologyConfig(\n name=\"legal-contract-analyzer\",\n agent_schema_id=\"contract-parser-v2\",\n description=\"Extract key terms from supplier contracts\",\n mime_type_pattern=\"application/(pdf|msword|vnd.openxmlformats.*)\",\n tag_filter=[\"legal\", \"contract\"],\n priority=200, # Higher priority = runs first\n enabled=True,\n provider_name=\"openai\", # Override default provider\n model_name=\"gpt-4o\",\n tenant_id=\"acme-corp\",\n tags=[\"legal\", \"procurement\"]\n )\n\n # Medical records for healthcare\n medical_config = OntologyConfig(\n name=\"medical-records-extractor\",\n agent_schema_id=\"medical-parser-v1\",\n description=\"Extract diagnoses and treatments from medical records\",\n mime_type_pattern=\"application/pdf\",\n tag_filter=[\"medical\", \"patient-record\"],\n priority=50,\n enabled=True,\n tenant_id=\"healthsystem\",\n tags=[\"medical\", \"hipaa-compliant\"]\n )\n \n\nThis agent can search the `ontology_configs` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"title": "Name", "type": "string"}, "agent_schema_id": {"title": "Agent Schema Id", "type": "string"}, "description": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Description"}, "mime_type_pattern": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Mime Type Pattern"}, "uri_pattern": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Uri Pattern"}, "tag_filter": {"default": [], "items": {"type": "string"}, "title": "Tag Filter", "type": "array"}, "priority": {"default": 100, "title": "Priority", "type": "integer"}, "enabled": {"default": true, "title": "Enabled", "type": "boolean"}, "provider_name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Provider Name"}, "model_name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Model Name"}}, "required": ["name", "agent_schema_id"], "json_schema_extra": {"table_name": "ontology_configs", "entity_key_field": "id", "embedding_fields": ["description"], "fully_qualified_name": "rem.models.entities.ontology_config.OntologyConfig", "tools": ["search_rem"], "default_search_table": "ontology_configs", "has_embeddings": true}}'::jsonb,
2399
+ 'entity',
2400
+ '{"table_name": "ontology_configs", "entity_key_field": "id", "embedding_fields": ["description"], "fqn": "rem.models.entities.ontology_config.OntologyConfig"}'::jsonb
2401
+ )
2402
+ ON CONFLICT (id) DO UPDATE SET
2403
+ name = EXCLUDED.name,
2404
+ content = EXCLUDED.content,
2405
+ spec = EXCLUDED.spec,
2406
+ category = EXCLUDED.category,
2407
+ metadata = EXCLUDED.metadata,
2408
+ updated_at = CURRENT_TIMESTAMP;
2409
+
2410
+ -- Schema entry for Resource (resources)
2411
+ INSERT INTO schemas (id, tenant_id, name, content, spec, category, metadata)
2412
+ VALUES (
2413
+ 'a579f379-4f1c-5414-8ff4-1382d0f783b7'::uuid,
2414
+ 'system',
2415
+ 'Resource',
2416
+ '# Resource
2417
+
2418
+
2419
+ Base content unit in REM.
2420
+
2421
+ Resources are content units that feed into dreaming workflows for moment
2422
+ extraction and affinity graph construction. Tenant isolation is provided
2423
+ via CoreModel.tenant_id field.
2424
+
2425
+
2426
+ ## Overview
2427
+
2428
+ The `Resource` entity is stored in the `resources` table. Each record is uniquely
2429
+ identified by its `name` field for lookups and graph traversal.
2430
+
2431
+ ## Search Capabilities
2432
+
2433
+ This schema includes the `search_rem` tool which supports:
2434
+ - **LOOKUP**: O(1) exact match by name (e.g., `LOOKUP "entity-name"`)
2435
+ - **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
2436
+ - **SEARCH**: Semantic vector search on content (e.g., `SEARCH "concept" FROM resources LIMIT 10`)
2437
+ - **SQL**: Complex queries (e.g., `SELECT * FROM resources WHERE ...`)
2438
+
2439
+ ## Table Info
2440
+
2441
+ | Property | Value |
2442
+ |----------|-------|
2443
+ | Table | `resources` |
2444
+ | Entity Key | `name` |
2445
+ | Embedding Fields | `content` |
2446
+ | Tools | `search_rem` |
2447
+
2448
+ ## Fields
2449
+
2450
+ ### `id`
2451
+ - **Type**: `typing.Union[uuid.UUID, str, NoneType]`
2452
+ - **Optional**
2453
+ - Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.
2454
+
2455
+ ### `created_at`
2456
+ - **Type**: `<class ''datetime.datetime''>`
2457
+ - **Optional**
2458
+ - Entity creation timestamp
2459
+
2460
+ ### `updated_at`
2461
+ - **Type**: `<class ''datetime.datetime''>`
2462
+ - **Optional**
2463
+ - Last update timestamp
2464
+
2465
+ ### `deleted_at`
2466
+ - **Type**: `typing.Optional[datetime.datetime]`
2467
+ - **Optional**
2468
+ - Soft deletion timestamp
2469
+
2470
+ ### `tenant_id`
2471
+ - **Type**: `typing.Optional[str]`
2472
+ - **Optional**
2473
+ - Tenant identifier for multi-tenancy isolation
2474
+
2475
+ ### `user_id`
2476
+ - **Type**: `typing.Optional[str]`
2477
+ - **Optional**
2478
+ - Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.
2479
+
2480
+ ### `graph_edges`
2481
+ - **Type**: `list[dict]`
2482
+ - **Optional**
2483
+ - Knowledge graph edges stored as InlineEdge dicts
2484
+
2485
+ ### `metadata`
2486
+ - **Type**: `<class ''dict''>`
2487
+ - **Optional**
2488
+ - Flexible metadata storage
2489
+
2490
+ ### `tags`
2491
+ - **Type**: `list[str]`
2492
+ - **Optional**
2493
+ - Entity tags
2494
+
2495
+ ### `name`
2496
+ - **Type**: `typing.Optional[str]`
2497
+ - **Optional**
2498
+ - Human-readable resource name (used as graph label). Auto-generated from uri+ordinal if not provided.
2499
+
2500
+ ### `uri`
2501
+ - **Type**: `typing.Optional[str]`
2502
+ - **Optional**
2503
+ - Content URI or identifier (file path, URL, etc.)
2504
+
2505
+ ### `ordinal`
2506
+ - **Type**: `<class ''int''>`
2507
+ - **Optional**
2508
+ - Chunk ordinal for splitting large documents (0 for single-chunk resources)
2509
+
2510
+ ### `content`
2511
+ - **Type**: `<class ''str''>`
2512
+ - **Optional**
2513
+ - Resource content text
2514
+
2515
+ ### `timestamp`
2516
+ - **Type**: `<class ''datetime.datetime''>`
2517
+ - **Optional**
2518
+ - Resource timestamp (content creation/publication time)
2519
+
2520
+ ### `category`
2521
+ - **Type**: `typing.Optional[str]`
2522
+ - **Optional**
2523
+ - Resource category (document, conversation, artifact, etc.)
2524
+
2525
+ ### `related_entities`
2526
+ - **Type**: `list[dict]`
2527
+ - **Optional**
2528
+ - Extracted entities (people, projects, concepts) with metadata
2529
+
2530
+ ',
2531
+ '{"type": "object", "description": "\n Base content unit in REM.\n\n Resources are content units that feed into dreaming workflows for moment\n extraction and affinity graph construction. Tenant isolation is provided\n via CoreModel.tenant_id field.\n \n\nThis agent can search the `resources` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Human-readable resource name (used as graph label). Auto-generated from uri+ordinal if not provided.", "entity_key": true, "title": "Name"}, "uri": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Content URI or identifier (file path, URL, etc.)", "title": "Uri"}, "ordinal": {"composite_key": true, "default": 0, "description": "Chunk ordinal for splitting large documents (0 for single-chunk resources)", "title": "Ordinal", "type": "integer"}, "content": {"default": "", "description": "Resource content text", "title": "Content", "type": "string"}, "timestamp": {"description": "Resource timestamp (content creation/publication time)", "format": "date-time", "title": "Timestamp", "type": "string"}, "category": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Resource category (document, conversation, artifact, etc.)", "title": "Category"}, "related_entities": {"description": "Extracted entities (people, projects, concepts) with metadata", "items": {"additionalProperties": true, "type": "object"}, "title": "Related Entities", "type": "array"}}, "required": [], "json_schema_extra": {"table_name": "resources", "entity_key_field": "name", "embedding_fields": ["content"], "fully_qualified_name": "rem.models.entities.resource.Resource", "tools": ["search_rem"], "default_search_table": "resources", "has_embeddings": true}}'::jsonb,
2532
+ 'entity',
2533
+ '{"table_name": "resources", "entity_key_field": "name", "embedding_fields": ["content"], "fqn": "rem.models.entities.resource.Resource"}'::jsonb
2534
+ )
2535
+ ON CONFLICT (id) DO UPDATE SET
2536
+ name = EXCLUDED.name,
2537
+ content = EXCLUDED.content,
2538
+ spec = EXCLUDED.spec,
2539
+ category = EXCLUDED.category,
2540
+ metadata = EXCLUDED.metadata,
2541
+ updated_at = CURRENT_TIMESTAMP;
2542
+
2543
+ -- Schema entry for Schema (schemas)
2544
+ INSERT INTO schemas (id, tenant_id, name, content, spec, category, metadata)
2545
+ VALUES (
2546
+ '2372e956-add6-58b8-a638-758a91a2b6c4'::uuid,
2547
+ 'system',
2548
+ 'Schema',
2549
+ '# Schema
2550
+
2551
+
2552
+ Agent schema definition.
2553
+
2554
+ Schemas define agents that can be dynamically loaded into Pydantic AI.
2555
+ They store JsonSchema specifications with embedded metadata for tools,
2556
+ resources, and system prompts.
2557
+
2558
+ For ontology extraction agents:
2559
+ - `provider_configs` enables multi-provider support (test across Anthropic, OpenAI, etc.)
2560
+ - `embedding_fields` specifies which output fields should be embedded for semantic search
2561
+
2562
+ Tenant isolation is provided via CoreModel.tenant_id field.
2563
+
2564
+
2565
+ ## Overview
2566
+
2567
+ The `Schema` entity is stored in the `schemas` table. Each record is uniquely
2568
+ identified by its `id` field for lookups and graph traversal.
2569
+
2570
+ ## Search Capabilities
2571
+
2572
+ This schema includes the `search_rem` tool which supports:
2573
+ - **LOOKUP**: O(1) exact match by id (e.g., `LOOKUP "entity-name"`)
2574
+ - **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
2575
+ - **SEARCH**: Semantic vector search on content (e.g., `SEARCH "concept" FROM schemas LIMIT 10`)
2576
+ - **SQL**: Complex queries (e.g., `SELECT * FROM schemas WHERE ...`)
2577
+
2578
+ ## Table Info
2579
+
2580
+ | Property | Value |
2581
+ |----------|-------|
2582
+ | Table | `schemas` |
2583
+ | Entity Key | `id` |
2584
+ | Embedding Fields | `content` |
2585
+ | Tools | `search_rem` |
2586
+
2587
+ ## Fields
2588
+
2589
+ ### `id`
2590
+ - **Type**: `typing.Union[uuid.UUID, str, NoneType]`
2591
+ - **Optional**
2592
+ - Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.
2593
+
2594
+ ### `created_at`
2595
+ - **Type**: `<class ''datetime.datetime''>`
2596
+ - **Optional**
2597
+ - Entity creation timestamp
2598
+
2599
+ ### `updated_at`
2600
+ - **Type**: `<class ''datetime.datetime''>`
2601
+ - **Optional**
2602
+ - Last update timestamp
2603
+
2604
+ ### `deleted_at`
2605
+ - **Type**: `typing.Optional[datetime.datetime]`
2606
+ - **Optional**
2607
+ - Soft deletion timestamp
2608
+
2609
+ ### `tenant_id`
2610
+ - **Type**: `typing.Optional[str]`
2611
+ - **Optional**
2612
+ - Tenant identifier for multi-tenancy isolation
2613
+
2614
+ ### `user_id`
2615
+ - **Type**: `typing.Optional[str]`
2616
+ - **Optional**
2617
+ - Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.
2618
+
2619
+ ### `graph_edges`
2620
+ - **Type**: `list[dict]`
2621
+ - **Optional**
2622
+ - Knowledge graph edges stored as InlineEdge dicts
2623
+
2624
+ ### `metadata`
2625
+ - **Type**: `<class ''dict''>`
2626
+ - **Optional**
2627
+ - Flexible metadata storage
2628
+
2629
+ ### `tags`
2630
+ - **Type**: `list[str]`
2631
+ - **Optional**
2632
+ - Entity tags
2633
+
2634
+ ### `name`
2635
+ - **Type**: `<class ''str''>`
2636
+ - **Required**
2637
+ - Human-readable schema name (used as identifier)
2638
+
2639
+ ### `content`
2640
+ - **Type**: `<class ''str''>`
2641
+ - **Optional**
2642
+ - Markdown documentation and instructions for the schema
2643
+
2644
+ ### `spec`
2645
+ - **Type**: `<class ''dict''>`
2646
+ - **Required**
2647
+ - JsonSchema specification defining the agent structure and capabilities
2648
+
2649
+ ### `category`
2650
+ - **Type**: `typing.Optional[str]`
2651
+ - **Optional**
2652
+ - Schema category distinguishing schema types. Values: ''agent'' (AI agents), ''evaluator'' (LLM-as-a-Judge evaluators). Maps directly from json_schema_extra.kind field during ingestion.
2653
+
2654
+ ### `provider_configs`
2655
+ - **Type**: `list[dict]`
2656
+ - **Optional**
2657
+ - Optional provider configurations for multi-provider testing. Each dict has ''provider_name'' and ''model_name''. Example: [{''provider_name'': ''anthropic'', ''model_name'': ''claude-sonnet-4-5''}]
2658
+
2659
+ ### `embedding_fields`
2660
+ - **Type**: `list[str]`
2661
+ - **Optional**
2662
+ - JSON paths in extracted_data to embed for semantic search. Example: [''summary'', ''candidate_name'', ''skills''] for CV extraction. Values will be concatenated and embedded using configured embedding provider.
2663
+
2664
+ ',
2665
+ '{"type": "object", "description": "\n Agent schema definition.\n\n Schemas define agents that can be dynamically loaded into Pydantic AI.\n They store JsonSchema specifications with embedded metadata for tools,\n resources, and system prompts.\n\n For ontology extraction agents:\n - `provider_configs` enables multi-provider support (test across Anthropic, OpenAI, etc.)\n - `embedding_fields` specifies which output fields should be embedded for semantic search\n\n Tenant isolation is provided via CoreModel.tenant_id field.\n \n\nThis agent can search the `schemas` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"description": "Human-readable schema name (used as identifier)", "title": "Name", "type": "string"}, "content": {"default": "", "description": "Markdown documentation and instructions for the schema", "title": "Content", "type": "string"}, "spec": {"additionalProperties": true, "description": "JsonSchema specification defining the agent structure and capabilities", "title": "Spec", "type": "object"}, "category": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Schema category distinguishing schema types. Values: ''agent'' (AI agents), ''evaluator'' (LLM-as-a-Judge evaluators). Maps directly from json_schema_extra.kind field during ingestion.", "title": "Category"}, "provider_configs": {"description": "Optional provider configurations for multi-provider testing. Each dict has ''provider_name'' and ''model_name''. Example: [{''provider_name'': ''anthropic'', ''model_name'': ''claude-sonnet-4-5''}]", "items": {"additionalProperties": true, "type": "object"}, "title": "Provider Configs", "type": "array"}, "embedding_fields": {"description": "JSON paths in extracted_data to embed for semantic search. Example: [''summary'', ''candidate_name'', ''skills''] for CV extraction. Values will be concatenated and embedded using configured embedding provider.", "items": {"type": "string"}, "title": "Embedding Fields", "type": "array"}}, "required": ["name", "spec"], "json_schema_extra": {"table_name": "schemas", "entity_key_field": "id", "embedding_fields": ["content"], "fully_qualified_name": "rem.models.entities.schema.Schema", "tools": ["search_rem"], "default_search_table": "schemas", "has_embeddings": true}}'::jsonb,
2666
+ 'entity',
2667
+ '{"table_name": "schemas", "entity_key_field": "id", "embedding_fields": ["content"], "fqn": "rem.models.entities.schema.Schema"}'::jsonb
2668
+ )
2669
+ ON CONFLICT (id) DO UPDATE SET
2670
+ name = EXCLUDED.name,
2671
+ content = EXCLUDED.content,
2672
+ spec = EXCLUDED.spec,
2673
+ category = EXCLUDED.category,
2674
+ metadata = EXCLUDED.metadata,
2675
+ updated_at = CURRENT_TIMESTAMP;
2676
+
2677
+ -- Schema entry for Session (sessions)
2678
+ INSERT INTO schemas (id, tenant_id, name, content, spec, category, metadata)
2679
+ VALUES (
2680
+ '5893fbca-2d8e-5402-ac41-7bac2c0c472a'::uuid,
2681
+ 'system',
2682
+ 'Session',
2683
+ '# Session
2684
+
2685
+
2686
+ Conversation session container.
2687
+
2688
+ Groups messages together and supports different modes for normal conversations
2689
+ and evaluation/experimentation scenarios.
2690
+
2691
+ For evaluation sessions, stores:
2692
+ - original_trace_id: Reference to the original session being evaluated
2693
+ - settings_overrides: Model, temperature, prompt overrides
2694
+ - prompt: Custom prompt being tested
2695
+
2696
+ Default sessions are lightweight - just a session_id on messages.
2697
+ Special sessions store additional metadata for experiments.
2698
+
2699
+
2700
+ ## Overview
2701
+
2702
+ The `Session` entity is stored in the `sessions` table. Each record is uniquely
2703
+ identified by its `name` field for lookups and graph traversal.
2704
+
2705
+ ## Search Capabilities
2706
+
2707
+ This schema includes the `search_rem` tool which supports:
2708
+ - **LOOKUP**: O(1) exact match by name (e.g., `LOOKUP "entity-name"`)
2709
+ - **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
2710
+ - **SEARCH**: Semantic vector search on description (e.g., `SEARCH "concept" FROM sessions LIMIT 10`)
2711
+ - **SQL**: Complex queries (e.g., `SELECT * FROM sessions WHERE ...`)
2712
+
2713
+ ## Table Info
2714
+
2715
+ | Property | Value |
2716
+ |----------|-------|
2717
+ | Table | `sessions` |
2718
+ | Entity Key | `name` |
2719
+ | Embedding Fields | `description` |
2720
+ | Tools | `search_rem` |
2721
+
2722
+ ## Fields
2723
+
2724
+ ### `id`
2725
+ - **Type**: `typing.Union[uuid.UUID, str, NoneType]`
2726
+ - **Optional**
2727
+ - Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.
2728
+
2729
+ ### `created_at`
2730
+ - **Type**: `<class ''datetime.datetime''>`
2731
+ - **Optional**
2732
+ - Entity creation timestamp
2733
+
2734
+ ### `updated_at`
2735
+ - **Type**: `<class ''datetime.datetime''>`
2736
+ - **Optional**
2737
+ - Last update timestamp
2738
+
2739
+ ### `deleted_at`
2740
+ - **Type**: `typing.Optional[datetime.datetime]`
2741
+ - **Optional**
2742
+ - Soft deletion timestamp
2743
+
2744
+ ### `tenant_id`
2745
+ - **Type**: `typing.Optional[str]`
2746
+ - **Optional**
2747
+ - Tenant identifier for multi-tenancy isolation
2748
+
2749
+ ### `user_id`
2750
+ - **Type**: `typing.Optional[str]`
2751
+ - **Optional**
2752
+ - Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.
2753
+
2754
+ ### `graph_edges`
2755
+ - **Type**: `list[dict]`
2756
+ - **Optional**
2757
+ - Knowledge graph edges stored as InlineEdge dicts
2758
+
2759
+ ### `metadata`
2760
+ - **Type**: `<class ''dict''>`
2761
+ - **Optional**
2762
+ - Flexible metadata storage
2763
+
2764
+ ### `tags`
2765
+ - **Type**: `list[str]`
2766
+ - **Optional**
2767
+ - Entity tags
2768
+
2769
+ ### `name`
2770
+ - **Type**: `<class ''str''>`
2771
+ - **Required**
2772
+ - Session name/identifier
2773
+
2774
+ ### `mode`
2775
+ - **Type**: `<enum ''SessionMode''>`
2776
+ - **Optional**
2777
+ - Session mode: ''normal'' or ''evaluation''
2778
+
2779
+ ### `description`
2780
+ - **Type**: `str | None`
2781
+ - **Optional**
2782
+ - Optional session description
2783
+
2784
+ ### `original_trace_id`
2785
+ - **Type**: `str | None`
2786
+ - **Optional**
2787
+ - For evaluation mode: ID of the original session/trace being evaluated
2788
+
2789
+ ### `settings_overrides`
2790
+ - **Type**: `dict | None`
2791
+ - **Optional**
2792
+ - Settings overrides (model, temperature, max_tokens, system_prompt)
2793
+
2794
+ ### `prompt`
2795
+ - **Type**: `str | None`
2796
+ - **Optional**
2797
+ - Custom prompt for this session (can override agent prompt)
2798
+
2799
+ ### `agent_schema_uri`
2800
+ - **Type**: `str | None`
2801
+ - **Optional**
2802
+ - Agent schema used for this session
2803
+
2804
+ ### `message_count`
2805
+ - **Type**: `<class ''int''>`
2806
+ - **Optional**
2807
+ - Number of messages in this session
2808
+
2809
+ ### `total_tokens`
2810
+ - **Type**: `int | None`
2811
+ - **Optional**
2812
+ - Total tokens used in this session
2813
+
2814
+ ',
2815
+ '{"type": "object", "description": "\n Conversation session container.\n\n Groups messages together and supports different modes for normal conversations\n and evaluation/experimentation scenarios.\n\n For evaluation sessions, stores:\n - original_trace_id: Reference to the original session being evaluated\n - settings_overrides: Model, temperature, prompt overrides\n - prompt: Custom prompt being tested\n\n Default sessions are lightweight - just a session_id on messages.\n Special sessions store additional metadata for experiments.\n \n\nThis agent can search the `sessions` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"description": "Session name/identifier", "entity_key": true, "title": "Name", "type": "string"}, "mode": {"$ref": "#/$defs/SessionMode", "default": "normal", "description": "Session mode: ''normal'' or ''evaluation''"}, "description": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Optional session description", "title": "Description"}, "original_trace_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "For evaluation mode: ID of the original session/trace being evaluated", "title": "Original Trace Id"}, "settings_overrides": {"anyOf": [{"additionalProperties": true, "type": "object"}, {"type": "null"}], "default": null, "description": "Settings overrides (model, temperature, max_tokens, system_prompt)", "title": "Settings Overrides"}, "prompt": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Custom prompt for this session (can override agent prompt)", "title": "Prompt"}, "agent_schema_uri": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Agent schema used for this session", "title": "Agent Schema Uri"}, "message_count": {"default": 0, "description": "Number of messages in this session", "title": "Message Count", "type": "integer"}, "total_tokens": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "description": "Total tokens used in this session", "title": "Total Tokens"}}, "required": ["name"], "json_schema_extra": {"table_name": "sessions", "entity_key_field": "name", "embedding_fields": ["description"], "fully_qualified_name": "rem.models.entities.session.Session", "tools": ["search_rem"], "default_search_table": "sessions", "has_embeddings": true}}'::jsonb,
2816
+ 'entity',
2817
+ '{"table_name": "sessions", "entity_key_field": "name", "embedding_fields": ["description"], "fqn": "rem.models.entities.session.Session"}'::jsonb
2818
+ )
2819
+ ON CONFLICT (id) DO UPDATE SET
2820
+ name = EXCLUDED.name,
2821
+ content = EXCLUDED.content,
2822
+ spec = EXCLUDED.spec,
2823
+ category = EXCLUDED.category,
2824
+ metadata = EXCLUDED.metadata,
2825
+ updated_at = CURRENT_TIMESTAMP;
2826
+
2827
+ -- Schema entry for SharedSession (shared_sessions)
2828
+ INSERT INTO schemas (id, tenant_id, name, content, spec, category, metadata)
2829
+ VALUES (
2830
+ 'be5c5711-6c45-5fc4-9cd1-e076599261c7'::uuid,
2831
+ 'system',
2832
+ 'SharedSession',
2833
+ '# SharedSession
2834
+
2835
+
2836
+ Session sharing record between users.
2837
+
2838
+ Links a session (identified by session_id from Message records) to a
2839
+ recipient user, enabling collaborative access to conversation history.
2840
+
2841
+
2842
+ ## Overview
2843
+
2844
+ The `SharedSession` entity is stored in the `shared_sessions` table. Each record is uniquely
2845
+ identified by its `id` field for lookups and graph traversal.
2846
+
2847
+ ## Search Capabilities
2848
+
2849
+ This schema includes the `search_rem` tool which supports:
2850
+ - **LOOKUP**: O(1) exact match by id (e.g., `LOOKUP "entity-name"`)
2851
+ - **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
2852
+ - **SEARCH**: Semantic vector search on content (e.g., `SEARCH "concept" FROM shared_sessions LIMIT 10`)
2853
+ - **SQL**: Complex queries (e.g., `SELECT * FROM shared_sessions WHERE ...`)
2854
+
2855
+ ## Table Info
2856
+
2857
+ | Property | Value |
2858
+ |----------|-------|
2859
+ | Table | `shared_sessions` |
2860
+ | Entity Key | `id` |
2861
+ | Embedding Fields | None |
2862
+ | Tools | `search_rem` |
2863
+
2864
+ ## Fields
2865
+
2866
+ ### `id`
2867
+ - **Type**: `typing.Union[uuid.UUID, str, NoneType]`
2868
+ - **Optional**
2869
+ - Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.
2870
+
2871
+ ### `created_at`
2872
+ - **Type**: `<class ''datetime.datetime''>`
2873
+ - **Optional**
2874
+ - Entity creation timestamp
2875
+
2876
+ ### `updated_at`
2877
+ - **Type**: `<class ''datetime.datetime''>`
2878
+ - **Optional**
2879
+ - Last update timestamp
2880
+
2881
+ ### `deleted_at`
2882
+ - **Type**: `typing.Optional[datetime.datetime]`
2883
+ - **Optional**
2884
+ - Soft deletion timestamp
2885
+
2886
+ ### `tenant_id`
2887
+ - **Type**: `typing.Optional[str]`
2888
+ - **Optional**
2889
+ - Tenant identifier for multi-tenancy isolation
2890
+
2891
+ ### `user_id`
2892
+ - **Type**: `typing.Optional[str]`
2893
+ - **Optional**
2894
+ - Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.
2895
+
2896
+ ### `graph_edges`
2897
+ - **Type**: `list[dict]`
2898
+ - **Optional**
2899
+ - Knowledge graph edges stored as InlineEdge dicts
2900
+
2901
+ ### `metadata`
2902
+ - **Type**: `<class ''dict''>`
2903
+ - **Optional**
2904
+ - Flexible metadata storage
2905
+
2906
+ ### `tags`
2907
+ - **Type**: `list[str]`
2908
+ - **Optional**
2909
+ - Entity tags
2910
+
2911
+ ### `session_id`
2912
+ - **Type**: `<class ''str''>`
2913
+ - **Required**
2914
+ - The session being shared (matches Message.session_id)
2915
+
2916
+ ### `owner_user_id`
2917
+ - **Type**: `<class ''str''>`
2918
+ - **Required**
2919
+ - User ID of the session owner (the sharer)
2920
+
2921
+ ### `shared_with_user_id`
2922
+ - **Type**: `<class ''str''>`
2923
+ - **Required**
2924
+ - User ID of the recipient (who can now view the session)
2925
+
2926
+ ',
2927
+ '{"type": "object", "description": "\n Session sharing record between users.\n\n Links a session (identified by session_id from Message records) to a\n recipient user, enabling collaborative access to conversation history.\n \n\nThis agent can search the `shared_sessions` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "session_id": {"description": "The session being shared (matches Message.session_id)", "title": "Session Id", "type": "string"}, "owner_user_id": {"description": "User ID of the session owner (the sharer)", "title": "Owner User Id", "type": "string"}, "shared_with_user_id": {"description": "User ID of the recipient (who can now view the session)", "title": "Shared With User Id", "type": "string"}}, "required": ["session_id", "owner_user_id", "shared_with_user_id"], "json_schema_extra": {"table_name": "shared_sessions", "entity_key_field": "id", "embedding_fields": [], "fully_qualified_name": "rem.models.entities.shared_session.SharedSession", "tools": ["search_rem"], "default_search_table": "shared_sessions", "has_embeddings": false}}'::jsonb,
2928
+ 'entity',
2929
+ '{"table_name": "shared_sessions", "entity_key_field": "id", "embedding_fields": [], "fqn": "rem.models.entities.shared_session.SharedSession"}'::jsonb
2930
+ )
2931
+ ON CONFLICT (id) DO UPDATE SET
2932
+ name = EXCLUDED.name,
2933
+ content = EXCLUDED.content,
2934
+ spec = EXCLUDED.spec,
2935
+ category = EXCLUDED.category,
2936
+ metadata = EXCLUDED.metadata,
2937
+ updated_at = CURRENT_TIMESTAMP;
2938
+
2939
+ -- Schema entry for User (users)
2940
+ INSERT INTO schemas (id, tenant_id, name, content, spec, category, metadata)
2941
+ VALUES (
2942
+ '1ad3d95e-32e9-54d6-ad7d-e39b9ed5018b'::uuid,
2943
+ 'system',
2944
+ 'User',
2945
+ '# User
2946
+
2947
+
2948
+ User entity.
2949
+
2950
+ Represents people in the REM system, either as active users
2951
+ or entities extracted from content. Tenant isolation is provided
2952
+ via CoreModel.tenant_id field.
2953
+
2954
+ Enhanced by dreaming worker:
2955
+ - summary: Generated from activity analysis
2956
+ - interests: Extracted from resources and sessions
2957
+ - activity_level: Computed from recent engagement
2958
+ - preferred_topics: Extracted from moment/resource topics
2959
+
2960
+
2961
+ ## Overview
2962
+
2963
+ The `User` entity is stored in the `users` table. Each record is uniquely
2964
+ identified by its `name` field for lookups and graph traversal.
2965
+
2966
+ ## Search Capabilities
2967
+
2968
+ This schema includes the `search_rem` tool which supports:
2969
+ - **LOOKUP**: O(1) exact match by name (e.g., `LOOKUP "entity-name"`)
2970
+ - **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
2971
+ - **SEARCH**: Semantic vector search on summary (e.g., `SEARCH "concept" FROM users LIMIT 10`)
2972
+ - **SQL**: Complex queries (e.g., `SELECT * FROM users WHERE ...`)
2973
+
2974
+ ## Table Info
2975
+
2976
+ | Property | Value |
2977
+ |----------|-------|
2978
+ | Table | `users` |
2979
+ | Entity Key | `name` |
2980
+ | Embedding Fields | `summary` |
2981
+ | Tools | `search_rem` |
2982
+
2983
+ ## Fields
2984
+
2985
+ ### `id`
2986
+ - **Type**: `typing.Union[uuid.UUID, str, NoneType]`
2987
+ - **Optional**
2988
+ - Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.
2989
+
2990
+ ### `created_at`
2991
+ - **Type**: `<class ''datetime.datetime''>`
2992
+ - **Optional**
2993
+ - Entity creation timestamp
2994
+
2995
+ ### `updated_at`
2996
+ - **Type**: `<class ''datetime.datetime''>`
2997
+ - **Optional**
2998
+ - Last update timestamp
2999
+
3000
+ ### `deleted_at`
3001
+ - **Type**: `typing.Optional[datetime.datetime]`
3002
+ - **Optional**
3003
+ - Soft deletion timestamp
3004
+
3005
+ ### `tenant_id`
3006
+ - **Type**: `typing.Optional[str]`
3007
+ - **Optional**
3008
+ - Tenant identifier for multi-tenancy isolation
3009
+
3010
+ ### `user_id`
3011
+ - **Type**: `typing.Optional[str]`
3012
+ - **Optional**
3013
+ - Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.
3014
+
3015
+ ### `graph_edges`
3016
+ - **Type**: `list[dict]`
3017
+ - **Optional**
3018
+ - Knowledge graph edges stored as InlineEdge dicts
3019
+
3020
+ ### `metadata`
3021
+ - **Type**: `<class ''dict''>`
3022
+ - **Optional**
3023
+ - Flexible metadata storage
3024
+
3025
+ ### `tags`
3026
+ - **Type**: `list[str]`
3027
+ - **Optional**
3028
+ - Entity tags
3029
+
3030
+ ### `name`
3031
+ - **Type**: `<class ''str''>`
3032
+ - **Required**
3033
+ - User name (human-readable, used as graph label)
3034
+
3035
+ ### `email`
3036
+ - **Type**: `typing.Optional[str]`
3037
+ - **Optional**
3038
+ - User email address
3039
+
3040
+ ### `role`
3041
+ - **Type**: `typing.Optional[str]`
3042
+ - **Optional**
3043
+ - User role (employee, contractor, external, etc.)
3044
+
3045
+ ### `tier`
3046
+ - **Type**: `<enum ''UserTier''>`
3047
+ - **Optional**
3048
+ - User subscription tier (free, basic, pro) for feature gating
3049
+
3050
+ ### `anonymous_ids`
3051
+ - **Type**: `list[str]`
3052
+ - **Optional**
3053
+ - Linked anonymous session IDs used for merging history
3054
+
3055
+ ### `sec_policy`
3056
+ - **Type**: `<class ''dict''>`
3057
+ - **Optional**
3058
+ - Security policy configuration (JSON, extensible for custom policies)
3059
+
3060
+ ### `summary`
3061
+ - **Type**: `typing.Optional[str]`
3062
+ - **Optional**
3063
+ - LLM-generated user profile summary (updated by dreaming worker)
3064
+
3065
+ ### `interests`
3066
+ - **Type**: `list[str]`
3067
+ - **Optional**
3068
+ - User interests extracted from activity
3069
+
3070
+ ### `preferred_topics`
3071
+ - **Type**: `list[str]`
3072
+ - **Optional**
3073
+ - Frequently discussed topics in kebab-case
3074
+
3075
+ ### `activity_level`
3076
+ - **Type**: `typing.Optional[str]`
3077
+ - **Optional**
3078
+ - Activity level: active, moderate, inactive
3079
+
3080
+ ### `last_active_at`
3081
+ - **Type**: `typing.Optional[datetime.datetime]`
3082
+ - **Optional**
3083
+ - Last activity timestamp
3084
+
3085
+ ',
3086
+ '{"type": "object", "description": "\n User entity.\n\n Represents people in the REM system, either as active users\n or entities extracted from content. Tenant isolation is provided\n via CoreModel.tenant_id field.\n\n Enhanced by dreaming worker:\n - summary: Generated from activity analysis\n - interests: Extracted from resources and sessions\n - activity_level: Computed from recent engagement\n - preferred_topics: Extracted from moment/resource topics\n \n\nThis agent can search the `users` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"description": "User name (human-readable, used as graph label)", "entity_key": true, "title": "Name", "type": "string"}, "email": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "User email address", "title": "Email"}, "role": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "User role (employee, contractor, external, etc.)", "title": "Role"}, "tier": {"$ref": "#/$defs/UserTier", "default": "free", "description": "User subscription tier (free, basic, pro) for feature gating"}, "anonymous_ids": {"description": "Linked anonymous session IDs used for merging history", "items": {"type": "string"}, "title": "Anonymous Ids", "type": "array"}, "sec_policy": {"additionalProperties": true, "description": "Security policy configuration (JSON, extensible for custom policies)", "title": "Sec Policy", "type": "object"}, "summary": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "LLM-generated user profile summary (updated by dreaming worker)", "title": "Summary"}, "interests": {"description": "User interests extracted from activity", "items": {"type": "string"}, "title": "Interests", "type": "array"}, "preferred_topics": {"description": "Frequently discussed topics in kebab-case", "items": {"type": "string"}, "title": "Preferred Topics", "type": "array"}, "activity_level": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Activity level: active, moderate, inactive", "title": "Activity Level"}, "last_active_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Last activity timestamp", "title": "Last Active At"}}, "required": ["name"], "json_schema_extra": {"table_name": "users", "entity_key_field": "name", "embedding_fields": ["summary"], "fully_qualified_name": "rem.models.entities.user.User", "tools": ["search_rem"], "default_search_table": "users", "has_embeddings": true}}'::jsonb,
3087
+ 'entity',
3088
+ '{"table_name": "users", "entity_key_field": "name", "embedding_fields": ["summary"], "fqn": "rem.models.entities.user.User"}'::jsonb
3089
+ )
3090
+ ON CONFLICT (id) DO UPDATE SET
3091
+ name = EXCLUDED.name,
3092
+ content = EXCLUDED.content,
3093
+ spec = EXCLUDED.spec,
3094
+ category = EXCLUDED.category,
3095
+ metadata = EXCLUDED.metadata,
3096
+ updated_at = CURRENT_TIMESTAMP;
3097
+
1235
3098
  -- ============================================================================
1236
3099
  -- RECORD MIGRATION
1237
3100
  -- ============================================================================