PyPI - remdb - Versions diffs - 0.3.172__py3-none-any.whl → 0.3.223__py3-none-any.whl - Mend

remdb 0.3.172py3-none-any.whl → 0.3.223py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of remdb might be problematic. Click here for more details.

Files changed (57) hide show

rem/agentic/README.md +262 -2
rem/agentic/context.py +173 -0
rem/agentic/context_builder.py +12 -2
rem/agentic/mcp/tool_wrapper.py +39 -16
rem/agentic/providers/pydantic_ai.py +46 -43
rem/agentic/schema.py +2 -2
rem/agentic/tools/rem_tools.py +11 -0
rem/api/main.py +1 -1
rem/api/mcp_router/resources.py +64 -8
rem/api/mcp_router/server.py +31 -24
rem/api/mcp_router/tools.py +621 -166
rem/api/routers/admin.py +30 -4
rem/api/routers/auth.py +114 -15
rem/api/routers/chat/completions.py +66 -18
rem/api/routers/chat/sse_events.py +7 -3
rem/api/routers/chat/streaming.py +254 -22
rem/api/routers/common.py +18 -0
rem/api/routers/dev.py +7 -1
rem/api/routers/feedback.py +9 -1
rem/api/routers/messages.py +176 -38
rem/api/routers/models.py +9 -1
rem/api/routers/query.py +12 -1
rem/api/routers/shared_sessions.py +16 -0
rem/auth/jwt.py +19 -4
rem/auth/middleware.py +42 -28
rem/cli/README.md +62 -0
rem/cli/commands/ask.py +1 -1
rem/cli/commands/db.py +148 -70
rem/cli/commands/process.py +171 -43
rem/models/entities/ontology.py +91 -101
rem/schemas/agents/rem.yaml +1 -1
rem/services/content/service.py +18 -5
rem/services/email/service.py +11 -2
rem/services/embeddings/worker.py +26 -12
rem/services/postgres/__init__.py +28 -3
rem/services/postgres/diff_service.py +57 -5
rem/services/postgres/programmable_diff_service.py +635 -0
rem/services/postgres/pydantic_to_sqlalchemy.py +2 -2
rem/services/postgres/register_type.py +12 -11
rem/services/postgres/repository.py +46 -25
rem/services/postgres/schema_generator.py +5 -5
rem/services/postgres/sql_builder.py +6 -5
rem/services/session/__init__.py +8 -1
rem/services/session/compression.py +40 -2
rem/services/session/pydantic_messages.py +276 -0
rem/settings.py +28 -0
rem/sql/background_indexes.sql +5 -0
rem/sql/migrations/001_install.sql +157 -10
rem/sql/migrations/002_install_models.sql +160 -132
rem/sql/migrations/004_cache_system.sql +7 -275
rem/sql/migrations/migrate_session_id_to_uuid.sql +45 -0
rem/utils/model_helpers.py +101 -0
rem/utils/schema_loader.py +6 -6
{remdb-0.3.172.dist-info → remdb-0.3.223.dist-info}/METADATA +1 -1
{remdb-0.3.172.dist-info → remdb-0.3.223.dist-info}/RECORD +57 -53
{remdb-0.3.172.dist-info → remdb-0.3.223.dist-info}/WHEEL +0 -0
{remdb-0.3.172.dist-info → remdb-0.3.223.dist-info}/entry_points.txt +0 -0

rem/sql/migrations/002_install_models.sql CHANGED Viewed

@@ -1,7 +1,7 @@
 -- REM Model Schema (install_models.sql)
 -- Generated from Pydantic models
 -- Source: model registry
--- Generated at: 2025-11-29T18:45:11.372432
+-- Generated at: 2025-12-22T17:34:54.187339
 --
 -- DO NOT EDIT MANUALLY - Regenerate with: rem db schema generate
 --
@@ -36,7 +36,7 @@ END $$;
 CREATE TABLE IF NOT EXISTS feedbacks (
     id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
-    tenant_id VARCHAR(100) NOT NULL,
+    tenant_id VARCHAR(100),
     user_id VARCHAR(256),
     session_id VARCHAR(256) NOT NULL,
     message_id VARCHAR(256),
@@ -74,6 +74,7 @@ BEGIN
         RETURN OLD;
     ELSIF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN
         -- Upsert to KV_STORE (O(1) lookup by entity_key)
+        -- tenant_id can be NULL (meaning public/shared data)
         INSERT INTO kv_store (
             entity_key,
             entity_type,
@@ -84,7 +85,7 @@ BEGIN
             graph_edges,
             updated_at
         ) VALUES (
-            NEW.id::VARCHAR,
+            normalize_key(NEW.id::VARCHAR),
             'feedbacks',
             NEW.id,
             NEW.tenant_id,
@@ -93,7 +94,7 @@ BEGIN
             COALESCE(NEW.graph_edges, '[]'::jsonb),
             CURRENT_TIMESTAMP
         )
-        ON CONFLICT (tenant_id, entity_key)
+        ON CONFLICT (COALESCE(tenant_id, ''), entity_key)
         DO UPDATE SET
             entity_id = EXCLUDED.entity_id,
             user_id = EXCLUDED.user_id,
@@ -118,7 +119,7 @@ FOR EACH ROW EXECUTE FUNCTION fn_feedbacks_kv_store_upsert();
 CREATE TABLE IF NOT EXISTS files (
     id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
-    tenant_id VARCHAR(100) NOT NULL,
+    tenant_id VARCHAR(100),
     user_id VARCHAR(256),
     name VARCHAR(256) NOT NULL,
     uri VARCHAR(256) NOT NULL,
@@ -164,7 +165,7 @@ CREATE INDEX IF NOT EXISTS idx_embeddings_files_field_provider ON embeddings_fil
 -- HNSW index for vector similarity search (created in background)
 -- Note: This will be created by background thread after data load
--- CREATE INDEX IF NOT EXISTS idx_embeddings_files_vector_hnsw ON embeddings_files
+-- CREATE INDEX idx_embeddings_files_vector_hnsw ON embeddings_files
 -- USING hnsw (embedding vector_cosine_ops);
 -- KV_STORE trigger for files
@@ -179,6 +180,7 @@ BEGIN
         RETURN OLD;
     ELSIF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN
         -- Upsert to KV_STORE (O(1) lookup by entity_key)
+        -- tenant_id can be NULL (meaning public/shared data)
         INSERT INTO kv_store (
             entity_key,
             entity_type,
@@ -189,7 +191,7 @@ BEGIN
             graph_edges,
             updated_at
         ) VALUES (
-            NEW.id::VARCHAR,
+            normalize_key(NEW.name::VARCHAR),
             'files',
             NEW.id,
             NEW.tenant_id,
@@ -198,7 +200,7 @@ BEGIN
             COALESCE(NEW.graph_edges, '[]'::jsonb),
             CURRENT_TIMESTAMP
         )
-        ON CONFLICT (tenant_id, entity_key)
+        ON CONFLICT (COALESCE(tenant_id, ''), entity_key)
         DO UPDATE SET
             entity_id = EXCLUDED.entity_id,
             user_id = EXCLUDED.user_id,
@@ -223,7 +225,7 @@ FOR EACH ROW EXECUTE FUNCTION fn_files_kv_store_upsert();
 CREATE TABLE IF NOT EXISTS image_resources (
     id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
-    tenant_id VARCHAR(100) NOT NULL,
+    tenant_id VARCHAR(100),
     user_id VARCHAR(256),
     name VARCHAR(256),
     uri VARCHAR(256),
@@ -277,7 +279,7 @@ CREATE INDEX IF NOT EXISTS idx_embeddings_image_resources_field_provider ON embe
 -- HNSW index for vector similarity search (created in background)
 -- Note: This will be created by background thread after data load
--- CREATE INDEX IF NOT EXISTS idx_embeddings_image_resources_vector_hnsw ON embeddings_image_resources
+-- CREATE INDEX idx_embeddings_image_resources_vector_hnsw ON embeddings_image_resources
 -- USING hnsw (embedding vector_cosine_ops);
 -- KV_STORE trigger for image_resources
@@ -292,6 +294,7 @@ BEGIN
         RETURN OLD;
     ELSIF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN
         -- Upsert to KV_STORE (O(1) lookup by entity_key)
+        -- tenant_id can be NULL (meaning public/shared data)
         INSERT INTO kv_store (
             entity_key,
             entity_type,
@@ -302,7 +305,7 @@ BEGIN
             graph_edges,
             updated_at
         ) VALUES (
-            NEW.name::VARCHAR,
+            normalize_key(NEW.name::VARCHAR),
             'image_resources',
             NEW.id,
             NEW.tenant_id,
@@ -311,7 +314,7 @@ BEGIN
             COALESCE(NEW.graph_edges, '[]'::jsonb),
             CURRENT_TIMESTAMP
         )
-        ON CONFLICT (tenant_id, entity_key)
+        ON CONFLICT (COALESCE(tenant_id, ''), entity_key)
         DO UPDATE SET
             entity_id = EXCLUDED.entity_id,
             user_id = EXCLUDED.user_id,
@@ -336,7 +339,7 @@ FOR EACH ROW EXECUTE FUNCTION fn_image_resources_kv_store_upsert();
 CREATE TABLE IF NOT EXISTS messages (
     id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
-    tenant_id VARCHAR(100) NOT NULL,
+    tenant_id VARCHAR(100),
     user_id VARCHAR(256),
     content TEXT NOT NULL,
     message_type VARCHAR(256),
@@ -383,7 +386,7 @@ CREATE INDEX IF NOT EXISTS idx_embeddings_messages_field_provider ON embeddings_
 -- HNSW index for vector similarity search (created in background)
 -- Note: This will be created by background thread after data load
--- CREATE INDEX IF NOT EXISTS idx_embeddings_messages_vector_hnsw ON embeddings_messages
+-- CREATE INDEX idx_embeddings_messages_vector_hnsw ON embeddings_messages
 -- USING hnsw (embedding vector_cosine_ops);
 -- KV_STORE trigger for messages
@@ -398,6 +401,7 @@ BEGIN
         RETURN OLD;
     ELSIF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN
         -- Upsert to KV_STORE (O(1) lookup by entity_key)
+        -- tenant_id can be NULL (meaning public/shared data)
         INSERT INTO kv_store (
             entity_key,
             entity_type,
@@ -408,7 +412,7 @@ BEGIN
             graph_edges,
             updated_at
         ) VALUES (
-            NEW.id::VARCHAR,
+            normalize_key(NEW.id::VARCHAR),
             'messages',
             NEW.id,
             NEW.tenant_id,
@@ -417,7 +421,7 @@ BEGIN
             COALESCE(NEW.graph_edges, '[]'::jsonb),
             CURRENT_TIMESTAMP
         )
-        ON CONFLICT (tenant_id, entity_key)
+        ON CONFLICT (COALESCE(tenant_id, ''), entity_key)
         DO UPDATE SET
             entity_id = EXCLUDED.entity_id,
             user_id = EXCLUDED.user_id,
@@ -442,7 +446,7 @@ FOR EACH ROW EXECUTE FUNCTION fn_messages_kv_store_upsert();
 CREATE TABLE IF NOT EXISTS moments (
     id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
-    tenant_id VARCHAR(100) NOT NULL,
+    tenant_id VARCHAR(100),
     user_id VARCHAR(256),
     name VARCHAR(256),
     moment_type VARCHAR(256),
@@ -491,7 +495,7 @@ CREATE INDEX IF NOT EXISTS idx_embeddings_moments_field_provider ON embeddings_m
 -- HNSW index for vector similarity search (created in background)
 -- Note: This will be created by background thread after data load
--- CREATE INDEX IF NOT EXISTS idx_embeddings_moments_vector_hnsw ON embeddings_moments
+-- CREATE INDEX idx_embeddings_moments_vector_hnsw ON embeddings_moments
 -- USING hnsw (embedding vector_cosine_ops);
 -- KV_STORE trigger for moments
@@ -506,6 +510,7 @@ BEGIN
         RETURN OLD;
     ELSIF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN
         -- Upsert to KV_STORE (O(1) lookup by entity_key)
+        -- tenant_id can be NULL (meaning public/shared data)
         INSERT INTO kv_store (
             entity_key,
             entity_type,
@@ -516,7 +521,7 @@ BEGIN
             graph_edges,
             updated_at
         ) VALUES (
-            NEW.name::VARCHAR,
+            normalize_key(NEW.name::VARCHAR),
             'moments',
             NEW.id,
             NEW.tenant_id,
@@ -525,7 +530,7 @@ BEGIN
             COALESCE(NEW.graph_edges, '[]'::jsonb),
             CURRENT_TIMESTAMP
         )
-        ON CONFLICT (tenant_id, entity_key)
+        ON CONFLICT (COALESCE(tenant_id, ''), entity_key)
         DO UPDATE SET
             entity_id = EXCLUDED.entity_id,
             user_id = EXCLUDED.user_id,
@@ -550,17 +555,18 @@ FOR EACH ROW EXECUTE FUNCTION fn_moments_kv_store_upsert();
 CREATE TABLE IF NOT EXISTS ontologies (
     id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
-    tenant_id VARCHAR(100) NOT NULL,
+    tenant_id VARCHAR(100),
     user_id VARCHAR(256),
     name VARCHAR(256) NOT NULL,
-    file_id UUID NOT NULL,
-    agent_schema_id VARCHAR(256) NOT NULL,
-    provider_name VARCHAR(256) NOT NULL,
-    model_name VARCHAR(256) NOT NULL,
-    extracted_data JSONB NOT NULL,
+    uri VARCHAR(256),
+    file_id UUID,
+    agent_schema_id VARCHAR(256),
+    provider_name VARCHAR(256),
+    model_name VARCHAR(256),
+    extracted_data JSONB,
     confidence_score DOUBLE PRECISION,
     extraction_timestamp VARCHAR(256),
-    embedding_text TEXT,
+    content TEXT,
     created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
     updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
     deleted_at TIMESTAMP,
@@ -575,6 +581,32 @@ CREATE INDEX IF NOT EXISTS idx_ontologies_graph_edges ON ontologies USING GIN (g
 CREATE INDEX IF NOT EXISTS idx_ontologies_metadata ON ontologies USING GIN (metadata);
 CREATE INDEX IF NOT EXISTS idx_ontologies_tags ON ontologies USING GIN (tags);
+-- Embeddings for ontologies
+CREATE TABLE IF NOT EXISTS embeddings_ontologies (
+    id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
+    entity_id UUID NOT NULL REFERENCES ontologies(id) ON DELETE CASCADE,
+    field_name VARCHAR(100) NOT NULL,
+    provider VARCHAR(50) NOT NULL DEFAULT 'openai',
+    model VARCHAR(100) NOT NULL DEFAULT 'text-embedding-3-small',
+    embedding vector(1536) NOT NULL,
+    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+    updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+    -- Unique: one embedding per entity per field per provider
+    UNIQUE (entity_id, field_name, provider)
+);
+-- Index for entity lookup (get all embeddings for entity)
+CREATE INDEX IF NOT EXISTS idx_embeddings_ontologies_entity ON embeddings_ontologies (entity_id);
+-- Index for field + provider lookup
+CREATE INDEX IF NOT EXISTS idx_embeddings_ontologies_field_provider ON embeddings_ontologies (field_name, provider);
+-- HNSW index for vector similarity search (created in background)
+-- Note: This will be created by background thread after data load
+-- CREATE INDEX idx_embeddings_ontologies_vector_hnsw ON embeddings_ontologies
+-- USING hnsw (embedding vector_cosine_ops);
 -- KV_STORE trigger for ontologies
 -- Trigger function to maintain KV_STORE for ontologies
 CREATE OR REPLACE FUNCTION fn_ontologies_kv_store_upsert()
@@ -587,6 +619,7 @@ BEGIN
         RETURN OLD;
     ELSIF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN
         -- Upsert to KV_STORE (O(1) lookup by entity_key)
+        -- tenant_id can be NULL (meaning public/shared data)
         INSERT INTO kv_store (
             entity_key,
             entity_type,
@@ -597,7 +630,7 @@ BEGIN
             graph_edges,
             updated_at
         ) VALUES (
-            NEW.id::VARCHAR,
+            normalize_key(NEW.name::VARCHAR),
             'ontologies',
             NEW.id,
             NEW.tenant_id,
@@ -606,7 +639,7 @@ BEGIN
             COALESCE(NEW.graph_edges, '[]'::jsonb),
             CURRENT_TIMESTAMP
         )
-        ON CONFLICT (tenant_id, entity_key)
+        ON CONFLICT (COALESCE(tenant_id, ''), entity_key)
         DO UPDATE SET
             entity_id = EXCLUDED.entity_id,
             user_id = EXCLUDED.user_id,
@@ -631,7 +664,7 @@ FOR EACH ROW EXECUTE FUNCTION fn_ontologies_kv_store_upsert();
 CREATE TABLE IF NOT EXISTS ontology_configs (
     id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
-    tenant_id VARCHAR(100) NOT NULL,
+    tenant_id VARCHAR(100),
     user_id VARCHAR(256),
     name VARCHAR(256) NOT NULL,
     agent_schema_id VARCHAR(256) NOT NULL,
@@ -680,7 +713,7 @@ CREATE INDEX IF NOT EXISTS idx_embeddings_ontology_configs_field_provider ON emb
 -- HNSW index for vector similarity search (created in background)
 -- Note: This will be created by background thread after data load
--- CREATE INDEX IF NOT EXISTS idx_embeddings_ontology_configs_vector_hnsw ON embeddings_ontology_configs
+-- CREATE INDEX idx_embeddings_ontology_configs_vector_hnsw ON embeddings_ontology_configs
 -- USING hnsw (embedding vector_cosine_ops);
 -- KV_STORE trigger for ontology_configs
@@ -695,6 +728,7 @@ BEGIN
         RETURN OLD;
     ELSIF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN
         -- Upsert to KV_STORE (O(1) lookup by entity_key)
+        -- tenant_id can be NULL (meaning public/shared data)
         INSERT INTO kv_store (
             entity_key,
             entity_type,
@@ -705,7 +739,7 @@ BEGIN
             graph_edges,
             updated_at
         ) VALUES (
-            NEW.id::VARCHAR,
+            normalize_key(NEW.name::VARCHAR),
             'ontology_configs',
             NEW.id,
             NEW.tenant_id,
@@ -714,7 +748,7 @@ BEGIN
             COALESCE(NEW.graph_edges, '[]'::jsonb),
             CURRENT_TIMESTAMP
         )
-        ON CONFLICT (tenant_id, entity_key)
+        ON CONFLICT (COALESCE(tenant_id, ''), entity_key)
         DO UPDATE SET
             entity_id = EXCLUDED.entity_id,
             user_id = EXCLUDED.user_id,
@@ -739,7 +773,7 @@ FOR EACH ROW EXECUTE FUNCTION fn_ontology_configs_kv_store_upsert();
 CREATE TABLE IF NOT EXISTS resources (
     id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
-    tenant_id VARCHAR(100) NOT NULL,
+    tenant_id VARCHAR(100),
     user_id VARCHAR(256),
     name VARCHAR(256),
     uri VARCHAR(256),
@@ -785,7 +819,7 @@ CREATE INDEX IF NOT EXISTS idx_embeddings_resources_field_provider ON embeddings
 -- HNSW index for vector similarity search (created in background)
 -- Note: This will be created by background thread after data load
--- CREATE INDEX IF NOT EXISTS idx_embeddings_resources_vector_hnsw ON embeddings_resources
+-- CREATE INDEX idx_embeddings_resources_vector_hnsw ON embeddings_resources
 -- USING hnsw (embedding vector_cosine_ops);
 -- KV_STORE trigger for resources
@@ -800,6 +834,7 @@ BEGIN
         RETURN OLD;
     ELSIF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN
         -- Upsert to KV_STORE (O(1) lookup by entity_key)
+        -- tenant_id can be NULL (meaning public/shared data)
         INSERT INTO kv_store (
             entity_key,
             entity_type,
@@ -810,7 +845,7 @@ BEGIN
             graph_edges,
             updated_at
         ) VALUES (
-            NEW.name::VARCHAR,
+            normalize_key(NEW.name::VARCHAR),
             'resources',
             NEW.id,
             NEW.tenant_id,
@@ -819,7 +854,7 @@ BEGIN
             COALESCE(NEW.graph_edges, '[]'::jsonb),
             CURRENT_TIMESTAMP
         )
-        ON CONFLICT (tenant_id, entity_key)
+        ON CONFLICT (COALESCE(tenant_id, ''), entity_key)
         DO UPDATE SET
             entity_id = EXCLUDED.entity_id,
             user_id = EXCLUDED.user_id,
@@ -844,7 +879,7 @@ FOR EACH ROW EXECUTE FUNCTION fn_resources_kv_store_upsert();
 CREATE TABLE IF NOT EXISTS schemas (
     id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
-    tenant_id VARCHAR(100) NOT NULL,
+    tenant_id VARCHAR(100),
     user_id VARCHAR(256),
     name VARCHAR(256) NOT NULL,
     content TEXT,
@@ -889,7 +924,7 @@ CREATE INDEX IF NOT EXISTS idx_embeddings_schemas_field_provider ON embeddings_s
 -- HNSW index for vector similarity search (created in background)
 -- Note: This will be created by background thread after data load
--- CREATE INDEX IF NOT EXISTS idx_embeddings_schemas_vector_hnsw ON embeddings_schemas
+-- CREATE INDEX idx_embeddings_schemas_vector_hnsw ON embeddings_schemas
 -- USING hnsw (embedding vector_cosine_ops);
 -- KV_STORE trigger for schemas
@@ -904,6 +939,7 @@ BEGIN
         RETURN OLD;
     ELSIF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN
         -- Upsert to KV_STORE (O(1) lookup by entity_key)
+        -- tenant_id can be NULL (meaning public/shared data)
         INSERT INTO kv_store (
             entity_key,
             entity_type,
@@ -914,7 +950,7 @@ BEGIN
             graph_edges,
             updated_at
         ) VALUES (
-            NEW.id::VARCHAR,
+            normalize_key(NEW.name::VARCHAR),
             'schemas',
             NEW.id,
             NEW.tenant_id,
@@ -923,7 +959,7 @@ BEGIN
             COALESCE(NEW.graph_edges, '[]'::jsonb),
             CURRENT_TIMESTAMP
         )
-        ON CONFLICT (tenant_id, entity_key)
+        ON CONFLICT (COALESCE(tenant_id, ''), entity_key)
         DO UPDATE SET
             entity_id = EXCLUDED.entity_id,
             user_id = EXCLUDED.user_id,
@@ -948,7 +984,7 @@ FOR EACH ROW EXECUTE FUNCTION fn_schemas_kv_store_upsert();
 CREATE TABLE IF NOT EXISTS sessions (
     id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
-    tenant_id VARCHAR(100) NOT NULL,
+    tenant_id VARCHAR(100),
     user_id VARCHAR(256),
     name VARCHAR(256) NOT NULL,
     mode TEXT,
@@ -996,7 +1032,7 @@ CREATE INDEX IF NOT EXISTS idx_embeddings_sessions_field_provider ON embeddings_
 -- HNSW index for vector similarity search (created in background)
 -- Note: This will be created by background thread after data load
--- CREATE INDEX IF NOT EXISTS idx_embeddings_sessions_vector_hnsw ON embeddings_sessions
+-- CREATE INDEX idx_embeddings_sessions_vector_hnsw ON embeddings_sessions
 -- USING hnsw (embedding vector_cosine_ops);
 -- KV_STORE trigger for sessions
@@ -1011,6 +1047,7 @@ BEGIN
         RETURN OLD;
     ELSIF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN
         -- Upsert to KV_STORE (O(1) lookup by entity_key)
+        -- tenant_id can be NULL (meaning public/shared data)
         INSERT INTO kv_store (
             entity_key,
             entity_type,
@@ -1021,7 +1058,7 @@ BEGIN
             graph_edges,
             updated_at
         ) VALUES (
-            NEW.name::VARCHAR,
+            normalize_key(NEW.name::VARCHAR),
             'sessions',
             NEW.id,
             NEW.tenant_id,
@@ -1030,7 +1067,7 @@ BEGIN
             COALESCE(NEW.graph_edges, '[]'::jsonb),
             CURRENT_TIMESTAMP
         )
-        ON CONFLICT (tenant_id, entity_key)
+        ON CONFLICT (COALESCE(tenant_id, ''), entity_key)
         DO UPDATE SET
             entity_id = EXCLUDED.entity_id,
             user_id = EXCLUDED.user_id,
@@ -1055,7 +1092,7 @@ FOR EACH ROW EXECUTE FUNCTION fn_sessions_kv_store_upsert();
 CREATE TABLE IF NOT EXISTS shared_sessions (
     id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
-    tenant_id VARCHAR(100) NOT NULL,
+    tenant_id VARCHAR(100),
     user_id VARCHAR(256),
     session_id VARCHAR(256) NOT NULL,
     owner_user_id VARCHAR(256) NOT NULL,
@@ -1086,6 +1123,7 @@ BEGIN
         RETURN OLD;
     ELSIF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN
         -- Upsert to KV_STORE (O(1) lookup by entity_key)
+        -- tenant_id can be NULL (meaning public/shared data)
         INSERT INTO kv_store (
             entity_key,
             entity_type,
@@ -1096,7 +1134,7 @@ BEGIN
             graph_edges,
             updated_at
         ) VALUES (
-            NEW.id::VARCHAR,
+            normalize_key(NEW.id::VARCHAR),
             'shared_sessions',
             NEW.id,
             NEW.tenant_id,
@@ -1105,7 +1143,7 @@ BEGIN
             COALESCE(NEW.graph_edges, '[]'::jsonb),
             CURRENT_TIMESTAMP
         )
-        ON CONFLICT (tenant_id, entity_key)
+        ON CONFLICT (COALESCE(tenant_id, ''), entity_key)
         DO UPDATE SET
             entity_id = EXCLUDED.entity_id,
             user_id = EXCLUDED.user_id,
@@ -1130,7 +1168,7 @@ FOR EACH ROW EXECUTE FUNCTION fn_shared_sessions_kv_store_upsert();
 CREATE TABLE IF NOT EXISTS users (
     id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
-    tenant_id VARCHAR(100) NOT NULL,
+    tenant_id VARCHAR(100),
     user_id VARCHAR(256),
     name VARCHAR(256) NOT NULL,
     email VARCHAR(256),
@@ -1180,7 +1218,7 @@ CREATE INDEX IF NOT EXISTS idx_embeddings_users_field_provider ON embeddings_use
 -- HNSW index for vector similarity search (created in background)
 -- Note: This will be created by background thread after data load
--- CREATE INDEX IF NOT EXISTS idx_embeddings_users_vector_hnsw ON embeddings_users
+-- CREATE INDEX idx_embeddings_users_vector_hnsw ON embeddings_users
 -- USING hnsw (embedding vector_cosine_ops);
 -- KV_STORE trigger for users
@@ -1195,6 +1233,7 @@ BEGIN
         RETURN OLD;
     ELSIF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN
         -- Upsert to KV_STORE (O(1) lookup by entity_key)
+        -- tenant_id can be NULL (meaning public/shared data)
         INSERT INTO kv_store (
             entity_key,
             entity_type,
@@ -1205,7 +1244,7 @@ BEGIN
             graph_edges,
             updated_at
         ) VALUES (
-            NEW.name::VARCHAR,
+            normalize_key(NEW.name::VARCHAR),
             'users',
             NEW.id,
             NEW.tenant_id,
@@ -1214,7 +1253,7 @@ BEGIN
             COALESCE(NEW.graph_edges, '[]'::jsonb),
             CURRENT_TIMESTAMP
         )
-        ON CONFLICT (tenant_id, entity_key)
+        ON CONFLICT (COALESCE(tenant_id, ''), entity_key)
         DO UPDATE SET
             entity_id = EXCLUDED.entity_id,
             user_id = EXCLUDED.user_id,
@@ -1411,12 +1450,12 @@ VALUES (
 ## Overview
 The `File` entity is stored in the `files` table. Each record is uniquely
-identified by its `id` field for lookups and graph traversal.
+identified by its `name` field for lookups and graph traversal.
 ## Search Capabilities
 This schema includes the `search_rem` tool which supports:
-- **LOOKUP**: O(1) exact match by id (e.g., `LOOKUP "entity-name"`)
+- **LOOKUP**: O(1) exact match by name (e.g., `LOOKUP "entity-name"`)
 - **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
 - **SEARCH**: Semantic vector search on content (e.g., `SEARCH "concept" FROM files LIMIT 10`)
 - **SQL**: Complex queries (e.g., `SELECT * FROM files WHERE ...`)
@@ -1426,7 +1465,7 @@ This schema includes the `search_rem` tool which supports:
 | Property | Value |
 |----------|-------|
 | Table | `files` |
-| Entity Key | `id` |
+| Entity Key | `name` |
 | Embedding Fields | `content` |
 | Tools | `search_rem` |
@@ -1513,9 +1552,9 @@ This schema includes the `search_rem` tool which supports:
 - File processing status (pending, processing, completed, failed)
 ',
-    '{"type": "object", "description": "\n    File metadata and tracking.\n\n    Represents files uploaded to or referenced by the REM system,\n    tracking their metadata and processing status. Tenant isolation\n    is provided via CoreModel.tenant_id field.\n    \n\nThis agent can search the `files` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"description": "File name", "title": "Name", "type": "string"}, "uri": {"description": "File storage URI (S3, local path, etc.)", "title": "Uri", "type": "string"}, "content": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Extracted text content (if applicable)", "title": "Content"}, "timestamp": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "File creation/modification timestamp", "title": "Timestamp"}, "size_bytes": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "description": "File size in bytes", "title": "Size Bytes"}, "mime_type": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "File MIME type", "title": "Mime Type"}, "processing_status": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": "pending", "description": "File processing status (pending, processing, completed, failed)", "title": "Processing Status"}}, "required": ["name", "uri"], "json_schema_extra": {"table_name": "files", "entity_key_field": "id", "embedding_fields": ["content"], "fully_qualified_name": "rem.models.entities.file.File", "tools": ["search_rem"], "default_search_table": "files", "has_embeddings": true}}'::jsonb,
+    '{"type": "object", "description": "\n    File metadata and tracking.\n\n    Represents files uploaded to or referenced by the REM system,\n    tracking their metadata and processing status. Tenant isolation\n    is provided via CoreModel.tenant_id field.\n    \n\nThis agent can search the `files` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"description": "File name", "title": "Name", "type": "string"}, "uri": {"description": "File storage URI (S3, local path, etc.)", "title": "Uri", "type": "string"}, "content": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Extracted text content (if applicable)", "title": "Content"}, "timestamp": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "File creation/modification timestamp", "title": "Timestamp"}, "size_bytes": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "description": "File size in bytes", "title": "Size Bytes"}, "mime_type": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "File MIME type", "title": "Mime Type"}, "processing_status": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": "pending", "description": "File processing status (pending, processing, completed, failed)", "title": "Processing Status"}}, "required": ["name", "uri"], "json_schema_extra": {"table_name": "files", "entity_key_field": "name", "embedding_fields": ["content"], "fully_qualified_name": "rem.models.entities.file.File", "tools": ["search_rem"], "default_search_table": "files", "has_embeddings": true}}'::jsonb,
     'entity',
-    '{"table_name": "files", "entity_key_field": "id", "embedding_fields": ["content"], "fqn": "rem.models.entities.file.File"}'::jsonb
+    '{"table_name": "files", "entity_key_field": "name", "embedding_fields": ["content"], "fqn": "rem.models.entities.file.File"}'::jsonb
 )
 ON CONFLICT (id) DO UPDATE SET
     name = EXCLUDED.name,
@@ -2008,18 +2047,19 @@ VALUES (
     'Ontology',
     '# Ontology
-Domain-specific knowledge extracted from files using custom agents.
+Domain-specific knowledge - either agent-extracted or direct-loaded.
     Attributes:
         name: Human-readable label for this ontology instance
-        file_id: Foreign key to File entity that was processed
-        agent_schema_id: Foreign key to Schema entity that performed extraction
-        provider_name: LLM provider used for extraction (e.g., "anthropic", "openai")
-        model_name: Specific model used (e.g., "claude-sonnet-4-5")
-        extracted_data: Structured data extracted by agent (arbitrary JSON)
+        uri: External source reference (git://, s3://, https://) for direct-loaded ontologies
+        file_id: Foreign key to File entity (optional - only for agent-extracted)
+        agent_schema_id: Schema that performed extraction (optional - only for agent-extracted)
+        provider_name: LLM provider used for extraction (optional)
+        model_name: Specific model used (optional)
+        extracted_data: Structured data - either extracted by agent or parsed from source
         confidence_score: Optional confidence score from extraction (0.0-1.0)
         extraction_timestamp: When extraction was performed
-        embedding_text: Text used for generating embedding (derived from extracted_data)
+        content: Text used for generating embedding
     Inherited from CoreModel:
         id: UUID or string identifier
@@ -2031,10 +2071,9 @@ Domain-specific knowledge extracted from files using custom agents.
         graph_edges: Relationships to other entities
         metadata: Flexible metadata storage
         tags: Classification tags
-        column: Database schema metadata
     Example Usage:
-        # CV extraction
+        # Agent-extracted: CV parsing
         cv_ontology = Ontology(
             name="john-doe-cv-2024",
             file_id="file-uuid-123",
@@ -2043,63 +2082,48 @@ Domain-specific knowledge extracted from files using custom agents.
             model_name="claude-sonnet-4-5-20250929",
             extracted_data={
                 "candidate_name": "John Doe",
-                "email": "john@example.com",
                 "skills": ["Python", "PostgreSQL", "Kubernetes"],
-                "experience": [
-                    {
-                        "company": "TechCorp",
-                        "role": "Senior Engineer",
-                        "years": 3,
-                        "achievements": ["Led migration to k8s", "Reduced costs 40%"]
-                    }
-                ],
-                "education": [
-                    {"degree": "BS Computer Science", "institution": "MIT", "year": 2018}
-                ]
             },
             confidence_score=0.95,
-            tags=["cv", "engineering", "senior-level"]
+            tags=["cv", "engineering"]
         )
-        # Contract extraction
-        contract_ontology = Ontology(
-            name="acme-supplier-agreement-2024",
-            file_id="file-uuid-456",
-            agent_schema_id="contract-parser-v2",
-            provider_name="openai",
-            model_name="gpt-4.1",
+        # Direct-loaded: Knowledge base from git
+        api_docs = Ontology(
+            name="rest-api-guide",
+            uri="git://example-org/docs/api/rest-api-guide.md",
+            content="# REST API Guide\n\nThis guide covers RESTful API design...",
+            extracted_data={
+                "type": "documentation",
+                "category": "api",
+                "version": "2.0",
+            },
+            tags=["api", "rest", "documentation"]
+        )
+        # Direct-loaded: Technical spec from git
+        config_spec = Ontology(
+            name="config-schema",
+            uri="git://example-org/docs/specs/config-schema.md",
+            content="# Configuration Schema\n\nThis document defines...",
             extracted_data={
-                "contract_type": "supplier_agreement",
-                "parties": [
-                    {"name": "ACME Corp", "role": "buyer"},
-                    {"name": "SupplyChain Inc", "role": "supplier"}
-                ],
-                "effective_date": "2024-01-01",
-                "termination_date": "2026-12-31",
-                "payment_terms": {
-                    "amount": 500000,
-                    "currency": "USD",
-                    "frequency": "quarterly"
-                },
-                "key_obligations": [
-                    "Supplier must deliver within 30 days",
-                    "Buyer must pay within 60 days of invoice"
-                ]
+                "type": "specification",
+                "format": "yaml",
+                "version": "1.0",
             },
-            confidence_score=0.92,
-            tags=["contract", "supplier", "procurement"]
+            tags=["config", "schema", "specification"]
         )
 ## Overview
 The `Ontology` entity is stored in the `ontologies` table. Each record is uniquely
-identified by its `id` field for lookups and graph traversal.
+identified by its `name` field for lookups and graph traversal.
 ## Search Capabilities
 This schema includes the `search_rem` tool which supports:
-- **LOOKUP**: O(1) exact match by id (e.g., `LOOKUP "entity-name"`)
+- **LOOKUP**: O(1) exact match by name (e.g., `LOOKUP "entity-name"`)
 - **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
 - **SEARCH**: Semantic vector search on content (e.g., `SEARCH "concept" FROM ontologies LIMIT 10`)
 - **SQL**: Complex queries (e.g., `SELECT * FROM ontologies WHERE ...`)
@@ -2109,8 +2133,8 @@ This schema includes the `search_rem` tool which supports:
 | Property | Value |
 |----------|-------|
 | Table | `ontologies` |
-| Entity Key | `id` |
-| Embedding Fields | None |
+| Entity Key | `name` |
+| Embedding Fields | `content` |
 | Tools | `search_rem` |
 ## Fields
@@ -2164,25 +2188,29 @@ This schema includes the `search_rem` tool which supports:
 - **Type**: `<class ''str''>`
 - **Required**
+### `uri`
+- **Type**: `typing.Optional[str]`
+- **Optional**
 ### `file_id`
-- **Type**: `uuid.UUID | str`
-- **Required**
+- **Type**: `typing.Union[uuid.UUID, str, NoneType]`
+- **Optional**
 ### `agent_schema_id`
-- **Type**: `<class ''str''>`
-- **Required**
+- **Type**: `typing.Optional[str]`
+- **Optional**
 ### `provider_name`
-- **Type**: `<class ''str''>`
-- **Required**
+- **Type**: `typing.Optional[str]`
+- **Optional**
 ### `model_name`
-- **Type**: `<class ''str''>`
-- **Required**
+- **Type**: `typing.Optional[str]`
+- **Optional**
 ### `extracted_data`
-- **Type**: `dict[str, typing.Any]`
-- **Required**
+- **Type**: `typing.Optional[dict[str, typing.Any]]`
+- **Optional**
 ### `confidence_score`
 - **Type**: `typing.Optional[float]`
@@ -2192,14 +2220,14 @@ This schema includes the `search_rem` tool which supports:
 - **Type**: `typing.Optional[str]`
 - **Optional**
-### `embedding_text`
+### `content`
 - **Type**: `typing.Optional[str]`
 - **Optional**
 ',
-    '{"type": "object", "description": "Domain-specific knowledge extracted from files using custom agents.\n\n    Attributes:\n        name: Human-readable label for this ontology instance\n        file_id: Foreign key to File entity that was processed\n        agent_schema_id: Foreign key to Schema entity that performed extraction\n        provider_name: LLM provider used for extraction (e.g., \"anthropic\", \"openai\")\n        model_name: Specific model used (e.g., \"claude-sonnet-4-5\")\n        extracted_data: Structured data extracted by agent (arbitrary JSON)\n        confidence_score: Optional confidence score from extraction (0.0-1.0)\n        extraction_timestamp: When extraction was performed\n        embedding_text: Text used for generating embedding (derived from extracted_data)\n\n    Inherited from CoreModel:\n        id: UUID or string identifier\n        created_at: Entity creation timestamp\n        updated_at: Last update timestamp\n        deleted_at: Soft deletion timestamp\n        tenant_id: Multi-tenancy isolation\n        user_id: Ownership\n        graph_edges: Relationships to other entities\n        metadata: Flexible metadata storage\n        tags: Classification tags\n        column: Database schema metadata\n\n    Example Usage:\n        # CV extraction\n        cv_ontology = Ontology(\n            name=\"john-doe-cv-2024\",\n            file_id=\"file-uuid-123\",\n            agent_schema_id=\"cv-parser-v1\",\n            provider_name=\"anthropic\",\n            model_name=\"claude-sonnet-4-5-20250929\",\n            extracted_data={\n                \"candidate_name\": \"John Doe\",\n                \"email\": \"john@example.com\",\n                \"skills\": [\"Python\", \"PostgreSQL\", \"Kubernetes\"],\n                \"experience\": [\n                    {\n                        \"company\": \"TechCorp\",\n                        \"role\": \"Senior Engineer\",\n                        \"years\": 3,\n                        \"achievements\": [\"Led migration to k8s\", \"Reduced costs 40%\"]\n                    }\n                ],\n                \"education\": [\n                    {\"degree\": \"BS Computer Science\", \"institution\": \"MIT\", \"year\": 2018}\n                ]\n            },\n            confidence_score=0.95,\n            tags=[\"cv\", \"engineering\", \"senior-level\"]\n        )\n\n        # Contract extraction\n        contract_ontology = Ontology(\n            name=\"acme-supplier-agreement-2024\",\n            file_id=\"file-uuid-456\",\n            agent_schema_id=\"contract-parser-v2\",\n            provider_name=\"openai\",\n            model_name=\"gpt-4.1\",\n            extracted_data={\n                \"contract_type\": \"supplier_agreement\",\n                \"parties\": [\n                    {\"name\": \"ACME Corp\", \"role\": \"buyer\"},\n                    {\"name\": \"SupplyChain Inc\", \"role\": \"supplier\"}\n                ],\n                \"effective_date\": \"2024-01-01\",\n                \"termination_date\": \"2026-12-31\",\n                \"payment_terms\": {\n                    \"amount\": 500000,\n                    \"currency\": \"USD\",\n                    \"frequency\": \"quarterly\"\n                },\n                \"key_obligations\": [\n                    \"Supplier must deliver within 30 days\",\n                    \"Buyer must pay within 60 days of invoice\"\n                ]\n            },\n            confidence_score=0.92,\n            tags=[\"contract\", \"supplier\", \"procurement\"]\n        )\n    \n\nThis agent can search the `ontologies` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"title": "Name", "type": "string"}, "file_id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}], "title": "File Id"}, "agent_schema_id": {"title": "Agent Schema Id", "type": "string"}, "provider_name": {"title": "Provider Name", "type": "string"}, "model_name": {"title": "Model Name", "type": "string"}, "extracted_data": {"additionalProperties": true, "title": "Extracted Data", "type": "object"}, "confidence_score": {"anyOf": [{"type": "number"}, {"type": "null"}], "default": null, "title": "Confidence Score"}, "extraction_timestamp": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Extraction Timestamp"}, "embedding_text": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Embedding Text"}}, "required": ["name", "file_id", "agent_schema_id", "provider_name", "model_name", "extracted_data"], "json_schema_extra": {"table_name": "ontologies", "entity_key_field": "id", "embedding_fields": [], "fully_qualified_name": "rem.models.entities.ontology.Ontology", "tools": ["search_rem"], "default_search_table": "ontologies", "has_embeddings": false}}'::jsonb,
+    '{"type": "object", "description": "Domain-specific knowledge - either agent-extracted or direct-loaded.\n\n    Attributes:\n        name: Human-readable label for this ontology instance\n        uri: External source reference (git://, s3://, https://) for direct-loaded ontologies\n        file_id: Foreign key to File entity (optional - only for agent-extracted)\n        agent_schema_id: Schema that performed extraction (optional - only for agent-extracted)\n        provider_name: LLM provider used for extraction (optional)\n        model_name: Specific model used (optional)\n        extracted_data: Structured data - either extracted by agent or parsed from source\n        confidence_score: Optional confidence score from extraction (0.0-1.0)\n        extraction_timestamp: When extraction was performed\n        content: Text used for generating embedding\n\n    Inherited from CoreModel:\n        id: UUID or string identifier\n        created_at: Entity creation timestamp\n        updated_at: Last update timestamp\n        deleted_at: Soft deletion timestamp\n        tenant_id: Multi-tenancy isolation\n        user_id: Ownership\n        graph_edges: Relationships to other entities\n        metadata: Flexible metadata storage\n        tags: Classification tags\n\n    Example Usage:\n        # Agent-extracted: CV parsing\n        cv_ontology = Ontology(\n            name=\"john-doe-cv-2024\",\n            file_id=\"file-uuid-123\",\n            agent_schema_id=\"cv-parser-v1\",\n            provider_name=\"anthropic\",\n            model_name=\"claude-sonnet-4-5-20250929\",\n            extracted_data={\n                \"candidate_name\": \"John Doe\",\n                \"skills\": [\"Python\", \"PostgreSQL\", \"Kubernetes\"],\n            },\n            confidence_score=0.95,\n            tags=[\"cv\", \"engineering\"]\n        )\n\n        # Direct-loaded: Knowledge base from git\n        api_docs = Ontology(\n            name=\"rest-api-guide\",\n            uri=\"git://example-org/docs/api/rest-api-guide.md\",\n            content=\"# REST API Guide\\n\\nThis guide covers RESTful API design...\",\n            extracted_data={\n                \"type\": \"documentation\",\n                \"category\": \"api\",\n                \"version\": \"2.0\",\n            },\n            tags=[\"api\", \"rest\", \"documentation\"]\n        )\n\n        # Direct-loaded: Technical spec from git\n        config_spec = Ontology(\n            name=\"config-schema\",\n            uri=\"git://example-org/docs/specs/config-schema.md\",\n            content=\"# Configuration Schema\\n\\nThis document defines...\",\n            extracted_data={\n                \"type\": \"specification\",\n                \"format\": \"yaml\",\n                \"version\": \"1.0\",\n            },\n            tags=[\"config\", \"schema\", \"specification\"]\n        )\n    \n\nThis agent can search the `ontologies` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"title": "Name", "type": "string"}, "uri": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Uri"}, "file_id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "title": "File Id"}, "agent_schema_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Agent Schema Id"}, "provider_name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Provider Name"}, "model_name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Model Name"}, "extracted_data": {"anyOf": [{"additionalProperties": true, "type": "object"}, {"type": "null"}], "default": null, "title": "Extracted Data"}, "confidence_score": {"anyOf": [{"type": "number"}, {"type": "null"}], "default": null, "title": "Confidence Score"}, "extraction_timestamp": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Extraction Timestamp"}, "content": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Content"}}, "required": ["name"], "json_schema_extra": {"table_name": "ontologies", "entity_key_field": "name", "embedding_fields": ["content"], "fully_qualified_name": "rem.models.entities.ontology.Ontology", "tools": ["search_rem"], "default_search_table": "ontologies", "has_embeddings": true}}'::jsonb,
     'entity',
-    '{"table_name": "ontologies", "entity_key_field": "id", "embedding_fields": [], "fqn": "rem.models.entities.ontology.Ontology"}'::jsonb
+    '{"table_name": "ontologies", "entity_key_field": "name", "embedding_fields": ["content"], "fqn": "rem.models.entities.ontology.Ontology"}'::jsonb
 )
 ON CONFLICT (id) DO UPDATE SET
     name = EXCLUDED.name,
@@ -2288,12 +2316,12 @@ User configuration for automatic ontology extraction.
 ## Overview
 The `OntologyConfig` entity is stored in the `ontology_configs` table. Each record is uniquely
-identified by its `id` field for lookups and graph traversal.
+identified by its `name` field for lookups and graph traversal.
 ## Search Capabilities
 This schema includes the `search_rem` tool which supports:
-- **LOOKUP**: O(1) exact match by id (e.g., `LOOKUP "entity-name"`)
+- **LOOKUP**: O(1) exact match by name (e.g., `LOOKUP "entity-name"`)
 - **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
 - **SEARCH**: Semantic vector search on description (e.g., `SEARCH "concept" FROM ontology_configs LIMIT 10`)
 - **SQL**: Complex queries (e.g., `SELECT * FROM ontology_configs WHERE ...`)
@@ -2303,7 +2331,7 @@ This schema includes the `search_rem` tool which supports:
 | Property | Value |
 |----------|-------|
 | Table | `ontology_configs` |
-| Entity Key | `id` |
+| Entity Key | `name` |
 | Embedding Fields | `description` |
 | Tools | `search_rem` |
@@ -2395,9 +2423,9 @@ This schema includes the `search_rem` tool which supports:
 - **Optional**
 ',
-    '{"type": "object", "description": "User configuration for automatic ontology extraction.\n\n    Attributes:\n        name: Human-readable config name\n        agent_schema_id: Foreign key to Schema entity to use for extraction\n        description: Purpose and scope of this config\n\n        # File matching rules (ANY matching rule triggers extraction)\n        mime_type_pattern: Regex pattern for file MIME types (e.g., \"application/pdf\")\n        uri_pattern: Regex pattern for file URIs (e.g., \"s3://bucket/resumes/.*\")\n        tag_filter: List of tags (file must have ALL tags to match)\n\n        # Execution control\n        priority: Execution order (higher = earlier, default 100)\n        enabled: Whether this config is active (default True)\n\n        # LLM provider configuration\n        provider_name: Optional LLM provider override (defaults to settings)\n        model_name: Optional model override (defaults to settings)\n\n    Inherited from CoreModel:\n        id, created_at, updated_at, deleted_at, tenant_id, user_id,\n        graph_edges, metadata, tags, column\n\n    Example Usage:\n        # CV extraction for recruitment\n        cv_config = OntologyConfig(\n            name=\"recruitment-cv-parser\",\n            agent_schema_id=\"cv-parser-v1\",\n            description=\"Extract candidate information from resumes\",\n            mime_type_pattern=\"application/pdf\",\n            uri_pattern=\".*/resumes/.*\",\n            tag_filter=[\"cv\", \"candidate\"],\n            priority=100,\n            enabled=True,\n            tenant_id=\"acme-corp\",\n            tags=[\"recruitment\", \"hr\"]\n        )\n\n        # Contract analysis for legal team\n        contract_config = OntologyConfig(\n            name=\"legal-contract-analyzer\",\n            agent_schema_id=\"contract-parser-v2\",\n            description=\"Extract key terms from supplier contracts\",\n            mime_type_pattern=\"application/(pdf|msword|vnd.openxmlformats.*)\",\n            tag_filter=[\"legal\", \"contract\"],\n            priority=200,  # Higher priority = runs first\n            enabled=True,\n            provider_name=\"openai\",  # Override default provider\n            model_name=\"gpt-4.1\",\n            tenant_id=\"acme-corp\",\n            tags=[\"legal\", \"procurement\"]\n        )\n\n        # Medical records for healthcare\n        medical_config = OntologyConfig(\n            name=\"medical-records-extractor\",\n            agent_schema_id=\"medical-parser-v1\",\n            description=\"Extract diagnoses and treatments from medical records\",\n            mime_type_pattern=\"application/pdf\",\n            tag_filter=[\"medical\", \"patient-record\"],\n            priority=50,\n            enabled=True,\n            tenant_id=\"healthsystem\",\n            tags=[\"medical\", \"hipaa-compliant\"]\n        )\n    \n\nThis agent can search the `ontology_configs` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"title": "Name", "type": "string"}, "agent_schema_id": {"title": "Agent Schema Id", "type": "string"}, "description": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Description"}, "mime_type_pattern": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Mime Type Pattern"}, "uri_pattern": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Uri Pattern"}, "tag_filter": {"default": [], "items": {"type": "string"}, "title": "Tag Filter", "type": "array"}, "priority": {"default": 100, "title": "Priority", "type": "integer"}, "enabled": {"default": true, "title": "Enabled", "type": "boolean"}, "provider_name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Provider Name"}, "model_name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Model Name"}}, "required": ["name", "agent_schema_id"], "json_schema_extra": {"table_name": "ontology_configs", "entity_key_field": "id", "embedding_fields": ["description"], "fully_qualified_name": "rem.models.entities.ontology_config.OntologyConfig", "tools": ["search_rem"], "default_search_table": "ontology_configs", "has_embeddings": true}}'::jsonb,
+    '{"type": "object", "description": "User configuration for automatic ontology extraction.\n\n    Attributes:\n        name: Human-readable config name\n        agent_schema_id: Foreign key to Schema entity to use for extraction\n        description: Purpose and scope of this config\n\n        # File matching rules (ANY matching rule triggers extraction)\n        mime_type_pattern: Regex pattern for file MIME types (e.g., \"application/pdf\")\n        uri_pattern: Regex pattern for file URIs (e.g., \"s3://bucket/resumes/.*\")\n        tag_filter: List of tags (file must have ALL tags to match)\n\n        # Execution control\n        priority: Execution order (higher = earlier, default 100)\n        enabled: Whether this config is active (default True)\n\n        # LLM provider configuration\n        provider_name: Optional LLM provider override (defaults to settings)\n        model_name: Optional model override (defaults to settings)\n\n    Inherited from CoreModel:\n        id, created_at, updated_at, deleted_at, tenant_id, user_id,\n        graph_edges, metadata, tags, column\n\n    Example Usage:\n        # CV extraction for recruitment\n        cv_config = OntologyConfig(\n            name=\"recruitment-cv-parser\",\n            agent_schema_id=\"cv-parser-v1\",\n            description=\"Extract candidate information from resumes\",\n            mime_type_pattern=\"application/pdf\",\n            uri_pattern=\".*/resumes/.*\",\n            tag_filter=[\"cv\", \"candidate\"],\n            priority=100,\n            enabled=True,\n            tenant_id=\"acme-corp\",\n            tags=[\"recruitment\", \"hr\"]\n        )\n\n        # Contract analysis for legal team\n        contract_config = OntologyConfig(\n            name=\"legal-contract-analyzer\",\n            agent_schema_id=\"contract-parser-v2\",\n            description=\"Extract key terms from supplier contracts\",\n            mime_type_pattern=\"application/(pdf|msword|vnd.openxmlformats.*)\",\n            tag_filter=[\"legal\", \"contract\"],\n            priority=200,  # Higher priority = runs first\n            enabled=True,\n            provider_name=\"openai\",  # Override default provider\n            model_name=\"gpt-4.1\",\n            tenant_id=\"acme-corp\",\n            tags=[\"legal\", \"procurement\"]\n        )\n\n        # Medical records for healthcare\n        medical_config = OntologyConfig(\n            name=\"medical-records-extractor\",\n            agent_schema_id=\"medical-parser-v1\",\n            description=\"Extract diagnoses and treatments from medical records\",\n            mime_type_pattern=\"application/pdf\",\n            tag_filter=[\"medical\", \"patient-record\"],\n            priority=50,\n            enabled=True,\n            tenant_id=\"healthsystem\",\n            tags=[\"medical\", \"hipaa-compliant\"]\n        )\n    \n\nThis agent can search the `ontology_configs` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"title": "Name", "type": "string"}, "agent_schema_id": {"title": "Agent Schema Id", "type": "string"}, "description": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Description"}, "mime_type_pattern": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Mime Type Pattern"}, "uri_pattern": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Uri Pattern"}, "tag_filter": {"default": [], "items": {"type": "string"}, "title": "Tag Filter", "type": "array"}, "priority": {"default": 100, "title": "Priority", "type": "integer"}, "enabled": {"default": true, "title": "Enabled", "type": "boolean"}, "provider_name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Provider Name"}, "model_name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Model Name"}}, "required": ["name", "agent_schema_id"], "json_schema_extra": {"table_name": "ontology_configs", "entity_key_field": "name", "embedding_fields": ["description"], "fully_qualified_name": "rem.models.entities.ontology_config.OntologyConfig", "tools": ["search_rem"], "default_search_table": "ontology_configs", "has_embeddings": true}}'::jsonb,
     'entity',
-    '{"table_name": "ontology_configs", "entity_key_field": "id", "embedding_fields": ["description"], "fqn": "rem.models.entities.ontology_config.OntologyConfig"}'::jsonb
+    '{"table_name": "ontology_configs", "entity_key_field": "name", "embedding_fields": ["description"], "fqn": "rem.models.entities.ontology_config.OntologyConfig"}'::jsonb
 )
 ON CONFLICT (id) DO UPDATE SET
     name = EXCLUDED.name,
@@ -2565,12 +2593,12 @@ VALUES (
 ## Overview
 The `Schema` entity is stored in the `schemas` table. Each record is uniquely
-identified by its `id` field for lookups and graph traversal.
+identified by its `name` field for lookups and graph traversal.
 ## Search Capabilities
 This schema includes the `search_rem` tool which supports:
-- **LOOKUP**: O(1) exact match by id (e.g., `LOOKUP "entity-name"`)
+- **LOOKUP**: O(1) exact match by name (e.g., `LOOKUP "entity-name"`)
 - **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
 - **SEARCH**: Semantic vector search on content (e.g., `SEARCH "concept" FROM schemas LIMIT 10`)
 - **SQL**: Complex queries (e.g., `SELECT * FROM schemas WHERE ...`)
@@ -2580,7 +2608,7 @@ This schema includes the `search_rem` tool which supports:
 | Property | Value |
 |----------|-------|
 | Table | `schemas` |
-| Entity Key | `id` |
+| Entity Key | `name` |
 | Embedding Fields | `content` |
 | Tools | `search_rem` |
@@ -2662,9 +2690,9 @@ This schema includes the `search_rem` tool which supports:
 - JSON paths in extracted_data to embed for semantic search. Example: [''summary'', ''candidate_name'', ''skills''] for CV extraction. Values will be concatenated and embedded using configured embedding provider.
 ',
-    '{"type": "object", "description": "\n    Agent schema definition.\n\n    Schemas define agents that can be dynamically loaded into Pydantic AI.\n    They store JsonSchema specifications with embedded metadata for tools,\n    resources, and system prompts.\n\n    For ontology extraction agents:\n    - `provider_configs` enables multi-provider support (test across Anthropic, OpenAI, etc.)\n    - `embedding_fields` specifies which output fields should be embedded for semantic search\n\n    Tenant isolation is provided via CoreModel.tenant_id field.\n    \n\nThis agent can search the `schemas` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"description": "Human-readable schema name (used as identifier)", "title": "Name", "type": "string"}, "content": {"default": "", "description": "Markdown documentation and instructions for the schema", "title": "Content", "type": "string"}, "spec": {"additionalProperties": true, "description": "JsonSchema specification defining the agent structure and capabilities", "title": "Spec", "type": "object"}, "category": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Schema category distinguishing schema types. Values: ''agent'' (AI agents), ''evaluator'' (LLM-as-a-Judge evaluators). Maps directly from json_schema_extra.kind field during ingestion.", "title": "Category"}, "provider_configs": {"description": "Optional provider configurations for multi-provider testing. Each dict has ''provider_name'' and ''model_name''. Example: [{''provider_name'': ''anthropic'', ''model_name'': ''claude-sonnet-4-5''}]", "items": {"additionalProperties": true, "type": "object"}, "title": "Provider Configs", "type": "array"}, "embedding_fields": {"description": "JSON paths in extracted_data to embed for semantic search. Example: [''summary'', ''candidate_name'', ''skills''] for CV extraction. Values will be concatenated and embedded using configured embedding provider.", "items": {"type": "string"}, "title": "Embedding Fields", "type": "array"}}, "required": ["name", "spec"], "json_schema_extra": {"table_name": "schemas", "entity_key_field": "id", "embedding_fields": ["content"], "fully_qualified_name": "rem.models.entities.schema.Schema", "tools": ["search_rem"], "default_search_table": "schemas", "has_embeddings": true}}'::jsonb,
+    '{"type": "object", "description": "\n    Agent schema definition.\n\n    Schemas define agents that can be dynamically loaded into Pydantic AI.\n    They store JsonSchema specifications with embedded metadata for tools,\n    resources, and system prompts.\n\n    For ontology extraction agents:\n    - `provider_configs` enables multi-provider support (test across Anthropic, OpenAI, etc.)\n    - `embedding_fields` specifies which output fields should be embedded for semantic search\n\n    Tenant isolation is provided via CoreModel.tenant_id field.\n    \n\nThis agent can search the `schemas` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"description": "Human-readable schema name (used as identifier)", "title": "Name", "type": "string"}, "content": {"default": "", "description": "Markdown documentation and instructions for the schema", "title": "Content", "type": "string"}, "spec": {"additionalProperties": true, "description": "JsonSchema specification defining the agent structure and capabilities", "title": "Spec", "type": "object"}, "category": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Schema category distinguishing schema types. Values: ''agent'' (AI agents), ''evaluator'' (LLM-as-a-Judge evaluators). Maps directly from json_schema_extra.kind field during ingestion.", "title": "Category"}, "provider_configs": {"description": "Optional provider configurations for multi-provider testing. Each dict has ''provider_name'' and ''model_name''. Example: [{''provider_name'': ''anthropic'', ''model_name'': ''claude-sonnet-4-5''}]", "items": {"additionalProperties": true, "type": "object"}, "title": "Provider Configs", "type": "array"}, "embedding_fields": {"description": "JSON paths in extracted_data to embed for semantic search. Example: [''summary'', ''candidate_name'', ''skills''] for CV extraction. Values will be concatenated and embedded using configured embedding provider.", "items": {"type": "string"}, "title": "Embedding Fields", "type": "array"}}, "required": ["name", "spec"], "json_schema_extra": {"table_name": "schemas", "entity_key_field": "name", "embedding_fields": ["content"], "fully_qualified_name": "rem.models.entities.schema.Schema", "tools": ["search_rem"], "default_search_table": "schemas", "has_embeddings": true}}'::jsonb,
     'entity',
-    '{"table_name": "schemas", "entity_key_field": "id", "embedding_fields": ["content"], "fqn": "rem.models.entities.schema.Schema"}'::jsonb
+    '{"table_name": "schemas", "entity_key_field": "name", "embedding_fields": ["content"], "fqn": "rem.models.entities.schema.Schema"}'::jsonb
 )
 ON CONFLICT (id) DO UPDATE SET
     name = EXCLUDED.name,
@@ -3115,7 +3143,7 @@ BEGIN
     RAISE NOTICE '  ✓ image_resources (1 embeddable fields)';
     RAISE NOTICE '  ✓ messages (1 embeddable fields)';
     RAISE NOTICE '  ✓ moments (1 embeddable fields)';
-    RAISE NOTICE '  ✓ ontologies';
+    RAISE NOTICE '  ✓ ontologies (1 embeddable fields)';
     RAISE NOTICE '  ✓ ontology_configs (1 embeddable fields)';
     RAISE NOTICE '  ✓ resources (1 embeddable fields)';
     RAISE NOTICE '  ✓ schemas (1 embeddable fields)';

remdb 0.3.172__py3-none-any.whl → 0.3.223__py3-none-any.whl

Potentially problematic release.

remdb 0.3.172py3-none-any.whl → 0.3.223py3-none-any.whl