remdb 0.3.242__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of remdb might be problematic. Click here for more details.
- rem/__init__.py +129 -0
- rem/agentic/README.md +760 -0
- rem/agentic/__init__.py +54 -0
- rem/agentic/agents/README.md +155 -0
- rem/agentic/agents/__init__.py +38 -0
- rem/agentic/agents/agent_manager.py +311 -0
- rem/agentic/agents/sse_simulator.py +502 -0
- rem/agentic/context.py +425 -0
- rem/agentic/context_builder.py +360 -0
- rem/agentic/llm_provider_models.py +301 -0
- rem/agentic/mcp/__init__.py +0 -0
- rem/agentic/mcp/tool_wrapper.py +273 -0
- rem/agentic/otel/__init__.py +5 -0
- rem/agentic/otel/setup.py +240 -0
- rem/agentic/providers/phoenix.py +926 -0
- rem/agentic/providers/pydantic_ai.py +854 -0
- rem/agentic/query.py +117 -0
- rem/agentic/query_helper.py +89 -0
- rem/agentic/schema.py +737 -0
- rem/agentic/serialization.py +245 -0
- rem/agentic/tools/__init__.py +5 -0
- rem/agentic/tools/rem_tools.py +242 -0
- rem/api/README.md +657 -0
- rem/api/deps.py +253 -0
- rem/api/main.py +460 -0
- rem/api/mcp_router/prompts.py +182 -0
- rem/api/mcp_router/resources.py +820 -0
- rem/api/mcp_router/server.py +243 -0
- rem/api/mcp_router/tools.py +1605 -0
- rem/api/middleware/tracking.py +172 -0
- rem/api/routers/admin.py +520 -0
- rem/api/routers/auth.py +898 -0
- rem/api/routers/chat/__init__.py +5 -0
- rem/api/routers/chat/child_streaming.py +394 -0
- rem/api/routers/chat/completions.py +702 -0
- rem/api/routers/chat/json_utils.py +76 -0
- rem/api/routers/chat/models.py +202 -0
- rem/api/routers/chat/otel_utils.py +33 -0
- rem/api/routers/chat/sse_events.py +546 -0
- rem/api/routers/chat/streaming.py +950 -0
- rem/api/routers/chat/streaming_utils.py +327 -0
- rem/api/routers/common.py +18 -0
- rem/api/routers/dev.py +87 -0
- rem/api/routers/feedback.py +276 -0
- rem/api/routers/messages.py +620 -0
- rem/api/routers/models.py +86 -0
- rem/api/routers/query.py +362 -0
- rem/api/routers/shared_sessions.py +422 -0
- rem/auth/README.md +258 -0
- rem/auth/__init__.py +36 -0
- rem/auth/jwt.py +367 -0
- rem/auth/middleware.py +318 -0
- rem/auth/providers/__init__.py +16 -0
- rem/auth/providers/base.py +376 -0
- rem/auth/providers/email.py +215 -0
- rem/auth/providers/google.py +163 -0
- rem/auth/providers/microsoft.py +237 -0
- rem/cli/README.md +517 -0
- rem/cli/__init__.py +8 -0
- rem/cli/commands/README.md +299 -0
- rem/cli/commands/__init__.py +3 -0
- rem/cli/commands/ask.py +549 -0
- rem/cli/commands/cluster.py +1808 -0
- rem/cli/commands/configure.py +495 -0
- rem/cli/commands/db.py +828 -0
- rem/cli/commands/dreaming.py +324 -0
- rem/cli/commands/experiments.py +1698 -0
- rem/cli/commands/mcp.py +66 -0
- rem/cli/commands/process.py +388 -0
- rem/cli/commands/query.py +109 -0
- rem/cli/commands/scaffold.py +47 -0
- rem/cli/commands/schema.py +230 -0
- rem/cli/commands/serve.py +106 -0
- rem/cli/commands/session.py +453 -0
- rem/cli/dreaming.py +363 -0
- rem/cli/main.py +123 -0
- rem/config.py +244 -0
- rem/mcp_server.py +41 -0
- rem/models/core/__init__.py +49 -0
- rem/models/core/core_model.py +70 -0
- rem/models/core/engram.py +333 -0
- rem/models/core/experiment.py +672 -0
- rem/models/core/inline_edge.py +132 -0
- rem/models/core/rem_query.py +246 -0
- rem/models/entities/__init__.py +68 -0
- rem/models/entities/domain_resource.py +38 -0
- rem/models/entities/feedback.py +123 -0
- rem/models/entities/file.py +57 -0
- rem/models/entities/image_resource.py +88 -0
- rem/models/entities/message.py +64 -0
- rem/models/entities/moment.py +123 -0
- rem/models/entities/ontology.py +181 -0
- rem/models/entities/ontology_config.py +131 -0
- rem/models/entities/resource.py +95 -0
- rem/models/entities/schema.py +87 -0
- rem/models/entities/session.py +84 -0
- rem/models/entities/shared_session.py +180 -0
- rem/models/entities/subscriber.py +175 -0
- rem/models/entities/user.py +93 -0
- rem/py.typed +0 -0
- rem/registry.py +373 -0
- rem/schemas/README.md +507 -0
- rem/schemas/__init__.py +6 -0
- rem/schemas/agents/README.md +92 -0
- rem/schemas/agents/core/agent-builder.yaml +235 -0
- rem/schemas/agents/core/moment-builder.yaml +178 -0
- rem/schemas/agents/core/rem-query-agent.yaml +226 -0
- rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
- rem/schemas/agents/core/simple-assistant.yaml +19 -0
- rem/schemas/agents/core/user-profile-builder.yaml +163 -0
- rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
- rem/schemas/agents/examples/contract-extractor.yaml +134 -0
- rem/schemas/agents/examples/cv-parser.yaml +263 -0
- rem/schemas/agents/examples/hello-world.yaml +37 -0
- rem/schemas/agents/examples/query.yaml +54 -0
- rem/schemas/agents/examples/simple.yaml +21 -0
- rem/schemas/agents/examples/test.yaml +29 -0
- rem/schemas/agents/rem.yaml +132 -0
- rem/schemas/evaluators/hello-world/default.yaml +77 -0
- rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
- rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
- rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
- rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
- rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
- rem/services/__init__.py +18 -0
- rem/services/audio/INTEGRATION.md +308 -0
- rem/services/audio/README.md +376 -0
- rem/services/audio/__init__.py +15 -0
- rem/services/audio/chunker.py +354 -0
- rem/services/audio/transcriber.py +259 -0
- rem/services/content/README.md +1269 -0
- rem/services/content/__init__.py +5 -0
- rem/services/content/providers.py +760 -0
- rem/services/content/service.py +762 -0
- rem/services/dreaming/README.md +230 -0
- rem/services/dreaming/__init__.py +53 -0
- rem/services/dreaming/affinity_service.py +322 -0
- rem/services/dreaming/moment_service.py +251 -0
- rem/services/dreaming/ontology_service.py +54 -0
- rem/services/dreaming/user_model_service.py +297 -0
- rem/services/dreaming/utils.py +39 -0
- rem/services/email/__init__.py +10 -0
- rem/services/email/service.py +522 -0
- rem/services/email/templates.py +360 -0
- rem/services/embeddings/__init__.py +11 -0
- rem/services/embeddings/api.py +127 -0
- rem/services/embeddings/worker.py +435 -0
- rem/services/fs/README.md +662 -0
- rem/services/fs/__init__.py +62 -0
- rem/services/fs/examples.py +206 -0
- rem/services/fs/examples_paths.py +204 -0
- rem/services/fs/git_provider.py +935 -0
- rem/services/fs/local_provider.py +760 -0
- rem/services/fs/parsing-hooks-examples.md +172 -0
- rem/services/fs/paths.py +276 -0
- rem/services/fs/provider.py +460 -0
- rem/services/fs/s3_provider.py +1042 -0
- rem/services/fs/service.py +186 -0
- rem/services/git/README.md +1075 -0
- rem/services/git/__init__.py +17 -0
- rem/services/git/service.py +469 -0
- rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
- rem/services/phoenix/README.md +453 -0
- rem/services/phoenix/__init__.py +46 -0
- rem/services/phoenix/client.py +960 -0
- rem/services/phoenix/config.py +88 -0
- rem/services/phoenix/prompt_labels.py +477 -0
- rem/services/postgres/README.md +757 -0
- rem/services/postgres/__init__.py +49 -0
- rem/services/postgres/diff_service.py +599 -0
- rem/services/postgres/migration_service.py +427 -0
- rem/services/postgres/programmable_diff_service.py +635 -0
- rem/services/postgres/pydantic_to_sqlalchemy.py +562 -0
- rem/services/postgres/register_type.py +353 -0
- rem/services/postgres/repository.py +481 -0
- rem/services/postgres/schema_generator.py +661 -0
- rem/services/postgres/service.py +802 -0
- rem/services/postgres/sql_builder.py +355 -0
- rem/services/rate_limit.py +113 -0
- rem/services/rem/README.md +318 -0
- rem/services/rem/__init__.py +23 -0
- rem/services/rem/exceptions.py +71 -0
- rem/services/rem/executor.py +293 -0
- rem/services/rem/parser.py +180 -0
- rem/services/rem/queries.py +196 -0
- rem/services/rem/query.py +371 -0
- rem/services/rem/service.py +608 -0
- rem/services/session/README.md +374 -0
- rem/services/session/__init__.py +13 -0
- rem/services/session/compression.py +488 -0
- rem/services/session/pydantic_messages.py +310 -0
- rem/services/session/reload.py +85 -0
- rem/services/user_service.py +130 -0
- rem/settings.py +1877 -0
- rem/sql/background_indexes.sql +52 -0
- rem/sql/migrations/001_install.sql +983 -0
- rem/sql/migrations/002_install_models.sql +3157 -0
- rem/sql/migrations/003_optional_extensions.sql +326 -0
- rem/sql/migrations/004_cache_system.sql +282 -0
- rem/sql/migrations/005_schema_update.sql +145 -0
- rem/sql/migrations/migrate_session_id_to_uuid.sql +45 -0
- rem/utils/AGENTIC_CHUNKING.md +597 -0
- rem/utils/README.md +628 -0
- rem/utils/__init__.py +61 -0
- rem/utils/agentic_chunking.py +622 -0
- rem/utils/batch_ops.py +343 -0
- rem/utils/chunking.py +108 -0
- rem/utils/clip_embeddings.py +276 -0
- rem/utils/constants.py +97 -0
- rem/utils/date_utils.py +228 -0
- rem/utils/dict_utils.py +98 -0
- rem/utils/embeddings.py +436 -0
- rem/utils/examples/embeddings_example.py +305 -0
- rem/utils/examples/sql_types_example.py +202 -0
- rem/utils/files.py +323 -0
- rem/utils/markdown.py +16 -0
- rem/utils/mime_types.py +158 -0
- rem/utils/model_helpers.py +492 -0
- rem/utils/schema_loader.py +649 -0
- rem/utils/sql_paths.py +146 -0
- rem/utils/sql_types.py +350 -0
- rem/utils/user_id.py +81 -0
- rem/utils/vision.py +325 -0
- rem/workers/README.md +506 -0
- rem/workers/__init__.py +7 -0
- rem/workers/db_listener.py +579 -0
- rem/workers/db_maintainer.py +74 -0
- rem/workers/dreaming.py +502 -0
- rem/workers/engram_processor.py +312 -0
- rem/workers/sqs_file_processor.py +193 -0
- rem/workers/unlogged_maintainer.py +463 -0
- remdb-0.3.242.dist-info/METADATA +1632 -0
- remdb-0.3.242.dist-info/RECORD +235 -0
- remdb-0.3.242.dist-info/WHEEL +4 -0
- remdb-0.3.242.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,983 @@
|
|
|
1
|
+
-- REM Database Installation Script
|
|
2
|
+
-- Description: Core database setup with extensions and infrastructure
|
|
3
|
+
-- Version: 1.0.0
|
|
4
|
+
-- Date: 2025-01-18
|
|
5
|
+
--
|
|
6
|
+
-- This script sets up:
|
|
7
|
+
-- 1. Required PostgreSQL extensions (pgvector, pg_trgm, uuid-ossp)
|
|
8
|
+
-- 2. Migration tracking table
|
|
9
|
+
-- 3. KV_STORE UNLOGGED cache table
|
|
10
|
+
-- 4. Helper functions
|
|
11
|
+
--
|
|
12
|
+
-- Usage:
|
|
13
|
+
-- psql -d remdb -f sql/install.sql
|
|
14
|
+
--
|
|
15
|
+
-- Dependencies:
|
|
16
|
+
-- - PostgreSQL 16+
|
|
17
|
+
-- - pgvector extension compiled and available
|
|
18
|
+
-- - pg_trgm extension (usually included)
|
|
19
|
+
|
|
20
|
+
-- ============================================================================
|
|
21
|
+
-- EXTENSIONS
|
|
22
|
+
-- ============================================================================
|
|
23
|
+
|
|
24
|
+
-- Enable pgvector extension for vector embeddings
|
|
25
|
+
CREATE EXTENSION IF NOT EXISTS vector;
|
|
26
|
+
|
|
27
|
+
-- Enable pg_trgm extension for fuzzy text search
|
|
28
|
+
CREATE EXTENSION IF NOT EXISTS pg_trgm;
|
|
29
|
+
|
|
30
|
+
-- Enable uuid-ossp for UUID generation
|
|
31
|
+
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
|
|
32
|
+
|
|
33
|
+
-- Verify critical extensions
|
|
34
|
+
DO $$
|
|
35
|
+
BEGIN
|
|
36
|
+
IF NOT EXISTS (SELECT 1 FROM pg_extension WHERE extname = 'vector') THEN
|
|
37
|
+
RAISE EXCEPTION 'pgvector extension failed to install. Ensure pgvector is compiled and available.';
|
|
38
|
+
END IF;
|
|
39
|
+
|
|
40
|
+
IF NOT EXISTS (SELECT 1 FROM pg_extension WHERE extname = 'pg_trgm') THEN
|
|
41
|
+
RAISE EXCEPTION 'pg_trgm extension failed to install.';
|
|
42
|
+
END IF;
|
|
43
|
+
|
|
44
|
+
RAISE NOTICE '✓ All required extensions installed successfully';
|
|
45
|
+
END $$;
|
|
46
|
+
|
|
47
|
+
-- ============================================================================
|
|
48
|
+
-- NORMALIZATION HELPER
|
|
49
|
+
-- ============================================================================
|
|
50
|
+
|
|
51
|
+
-- Normalize entity keys to lower-kebab-case for consistent lookups
|
|
52
|
+
-- "Mood Disorder" -> "mood-disorder"
|
|
53
|
+
-- "mood_disorder" -> "mood-disorder"
|
|
54
|
+
-- "MoodDisorder" -> "mood-disorder"
|
|
55
|
+
CREATE OR REPLACE FUNCTION normalize_key(input TEXT)
|
|
56
|
+
RETURNS TEXT AS $$
|
|
57
|
+
BEGIN
|
|
58
|
+
RETURN lower(
|
|
59
|
+
regexp_replace(
|
|
60
|
+
regexp_replace(
|
|
61
|
+
regexp_replace(input, '([a-z])([A-Z])', '\1-\2', 'g'), -- camelCase -> kebab
|
|
62
|
+
'[_\s]+', '-', 'g' -- underscores/spaces -> hyphens
|
|
63
|
+
),
|
|
64
|
+
'-+', '-', 'g' -- collapse multiple hyphens
|
|
65
|
+
)
|
|
66
|
+
);
|
|
67
|
+
END;
|
|
68
|
+
$$ LANGUAGE plpgsql IMMUTABLE;
|
|
69
|
+
|
|
70
|
+
COMMENT ON FUNCTION normalize_key IS
|
|
71
|
+
'Normalizes entity keys to lower-kebab-case for consistent lookups.
|
|
72
|
+
Examples: "Mood Disorder" -> "mood-disorder", "mood_disorder" -> "mood-disorder"';
|
|
73
|
+
|
|
74
|
+
-- ============================================================================
|
|
75
|
+
-- MIGRATION TRACKING
|
|
76
|
+
-- ============================================================================
|
|
77
|
+
|
|
78
|
+
CREATE TABLE IF NOT EXISTS rem_migrations (
|
|
79
|
+
id SERIAL PRIMARY KEY,
|
|
80
|
+
name VARCHAR(255) NOT NULL UNIQUE,
|
|
81
|
+
type VARCHAR(50) NOT NULL, -- 'install', 'models', 'data'
|
|
82
|
+
version VARCHAR(50),
|
|
83
|
+
checksum VARCHAR(64),
|
|
84
|
+
applied_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
85
|
+
applied_by VARCHAR(100) DEFAULT CURRENT_USER,
|
|
86
|
+
execution_time_ms INTEGER,
|
|
87
|
+
success BOOLEAN DEFAULT TRUE
|
|
88
|
+
);
|
|
89
|
+
|
|
90
|
+
CREATE INDEX IF NOT EXISTS idx_rem_migrations_type ON rem_migrations(type);
|
|
91
|
+
CREATE INDEX IF NOT EXISTS idx_rem_migrations_applied_at ON rem_migrations(applied_at);
|
|
92
|
+
|
|
93
|
+
COMMENT ON TABLE rem_migrations IS
|
|
94
|
+
'Tracks all applied migrations including install scripts and model schema updates';
|
|
95
|
+
|
|
96
|
+
-- ============================================================================
|
|
97
|
+
-- KV_STORE CACHE
|
|
98
|
+
-- ============================================================================
|
|
99
|
+
|
|
100
|
+
-- KV_STORE: UNLOGGED table for O(1) entity lookups in REM
|
|
101
|
+
--
|
|
102
|
+
-- Design rationale:
|
|
103
|
+
-- - UNLOGGED: Faster writes, no WAL overhead (acceptable for cache)
|
|
104
|
+
-- - Rebuilds automatically from primary tables on restart
|
|
105
|
+
-- - Supports LOOKUP queries with O(1) performance
|
|
106
|
+
-- - Supports FUZZY queries with trigram indexes
|
|
107
|
+
-- - User-scoped filtering when user_id IS NOT NULL
|
|
108
|
+
-- - Tenant isolation via tenant_id
|
|
109
|
+
--
|
|
110
|
+
-- Schema:
|
|
111
|
+
-- - entity_key: Natural language label (e.g., "sarah-chen", "project-alpha")
|
|
112
|
+
-- - entity_type: Table name (e.g., "resources", "moments")
|
|
113
|
+
-- - entity_id: UUID from primary table
|
|
114
|
+
-- - tenant_id: Tenant identifier for multi-tenancy
|
|
115
|
+
-- - user_id: Optional user scoping (NULL = system-level)
|
|
116
|
+
-- - content_summary: Denormalized text for fuzzy search
|
|
117
|
+
-- - metadata: JSONB for additional filtering
|
|
118
|
+
-- - updated_at: Timestamp for cache invalidation
|
|
119
|
+
|
|
120
|
+
CREATE UNLOGGED TABLE IF NOT EXISTS kv_store (
|
|
121
|
+
entity_key VARCHAR(255) NOT NULL,
|
|
122
|
+
entity_type VARCHAR(100) NOT NULL,
|
|
123
|
+
entity_id UUID NOT NULL,
|
|
124
|
+
tenant_id VARCHAR(100), -- NULL = public/shared data
|
|
125
|
+
user_id VARCHAR(100),
|
|
126
|
+
content_summary TEXT,
|
|
127
|
+
metadata JSONB DEFAULT '{}',
|
|
128
|
+
graph_edges JSONB DEFAULT '[]'::jsonb, -- Cached edges for fast graph traversal
|
|
129
|
+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
130
|
+
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
|
131
|
+
);
|
|
132
|
+
|
|
133
|
+
-- Unique constraint on (tenant_id, entity_key) using COALESCE to handle NULL tenant_id
|
|
134
|
+
CREATE UNIQUE INDEX IF NOT EXISTS idx_kv_store_tenant_key ON kv_store (COALESCE(tenant_id, ''), entity_key);
|
|
135
|
+
|
|
136
|
+
-- Index for user-scoped lookups (when user_id IS NOT NULL)
|
|
137
|
+
CREATE INDEX IF NOT EXISTS idx_kv_store_user ON kv_store (tenant_id, user_id)
|
|
138
|
+
WHERE user_id IS NOT NULL;
|
|
139
|
+
|
|
140
|
+
-- Index for entity_id reverse lookup (find key by ID)
|
|
141
|
+
CREATE INDEX IF NOT EXISTS idx_kv_store_entity_id ON kv_store (entity_id);
|
|
142
|
+
|
|
143
|
+
-- Trigram index for fuzzy text search (FUZZY queries)
|
|
144
|
+
CREATE INDEX IF NOT EXISTS idx_kv_store_key_trgm ON kv_store
|
|
145
|
+
USING gin (entity_key gin_trgm_ops);
|
|
146
|
+
|
|
147
|
+
-- Trigram index for content_summary fuzzy search
|
|
148
|
+
CREATE INDEX IF NOT EXISTS idx_kv_store_content_trgm ON kv_store
|
|
149
|
+
USING gin (content_summary gin_trgm_ops);
|
|
150
|
+
|
|
151
|
+
-- GIN index for metadata JSONB queries
|
|
152
|
+
CREATE INDEX IF NOT EXISTS idx_kv_store_metadata ON kv_store
|
|
153
|
+
USING gin (metadata);
|
|
154
|
+
|
|
155
|
+
-- GIN index for graph_edges JSONB queries (graph traversal)
|
|
156
|
+
CREATE INDEX IF NOT EXISTS idx_kv_store_graph_edges ON kv_store
|
|
157
|
+
USING gin (graph_edges);
|
|
158
|
+
|
|
159
|
+
-- Index for entity_type filtering
|
|
160
|
+
CREATE INDEX IF NOT EXISTS idx_kv_store_type ON kv_store (entity_type);
|
|
161
|
+
|
|
162
|
+
-- Comments
|
|
163
|
+
COMMENT ON TABLE kv_store IS
|
|
164
|
+
'UNLOGGED cache for O(1) entity lookups. Supports REM LOOKUP and FUZZY queries. Rebuilt from primary tables on restart.';
|
|
165
|
+
|
|
166
|
+
COMMENT ON COLUMN kv_store.entity_key IS
|
|
167
|
+
'Natural language label for entity (e.g., "sarah-chen", "project-alpha")';
|
|
168
|
+
|
|
169
|
+
COMMENT ON COLUMN kv_store.entity_type IS
|
|
170
|
+
'Source table name (e.g., "resources", "moments", "users")';
|
|
171
|
+
|
|
172
|
+
COMMENT ON COLUMN kv_store.entity_id IS
|
|
173
|
+
'UUID from primary table for reverse lookup';
|
|
174
|
+
|
|
175
|
+
COMMENT ON COLUMN kv_store.tenant_id IS
|
|
176
|
+
'Tenant identifier for multi-tenancy isolation. NULL = public/shared data visible to all.';
|
|
177
|
+
|
|
178
|
+
COMMENT ON COLUMN kv_store.user_id IS
|
|
179
|
+
'Optional user scoping. NULL = system-level entity, visible to all users in tenant';
|
|
180
|
+
|
|
181
|
+
COMMENT ON COLUMN kv_store.content_summary IS
|
|
182
|
+
'Denormalized text summary for fuzzy search. Concatenated from content fields.';
|
|
183
|
+
|
|
184
|
+
-- ============================================================================
|
|
185
|
+
-- HELPER FUNCTIONS
|
|
186
|
+
-- ============================================================================
|
|
187
|
+
|
|
188
|
+
-- Function to rebuild KV_STORE from primary tables
|
|
189
|
+
--
|
|
190
|
+
-- IMPORTANT: You should NOT need to call this during normal operations!
|
|
191
|
+
-- KV store is automatically populated via triggers on INSERT/UPDATE/DELETE.
|
|
192
|
+
--
|
|
193
|
+
-- Only call this function after:
|
|
194
|
+
-- 1. Database crash/restart (UNLOGGED table lost)
|
|
195
|
+
-- 2. Backup restoration (UNLOGGED tables not backed up)
|
|
196
|
+
-- 3. Bulk imports that bypass triggers (COPY, pg_restore --disable-triggers)
|
|
197
|
+
--
|
|
198
|
+
-- Usage: SELECT * FROM rebuild_kv_store();
|
|
199
|
+
CREATE OR REPLACE FUNCTION rebuild_kv_store()
|
|
200
|
+
RETURNS TABLE(table_name TEXT, rows_inserted BIGINT) AS $$
|
|
201
|
+
DECLARE
|
|
202
|
+
table_rec RECORD;
|
|
203
|
+
rows_affected BIGINT;
|
|
204
|
+
BEGIN
|
|
205
|
+
-- Clear existing cache
|
|
206
|
+
DELETE FROM kv_store;
|
|
207
|
+
RAISE NOTICE 'Cleared KV_STORE cache';
|
|
208
|
+
|
|
209
|
+
-- Rebuild from each entity table that has a KV store trigger
|
|
210
|
+
-- This query finds all tables with _kv_store triggers
|
|
211
|
+
FOR table_rec IN
|
|
212
|
+
SELECT DISTINCT event_object_table as tbl
|
|
213
|
+
FROM information_schema.triggers
|
|
214
|
+
WHERE trigger_name LIKE '%_kv_store'
|
|
215
|
+
AND trigger_schema = 'public'
|
|
216
|
+
ORDER BY event_object_table
|
|
217
|
+
LOOP
|
|
218
|
+
-- Force trigger execution by updating all non-deleted rows
|
|
219
|
+
-- This is more efficient than re-inserting
|
|
220
|
+
EXECUTE format('
|
|
221
|
+
UPDATE %I
|
|
222
|
+
SET updated_at = updated_at
|
|
223
|
+
WHERE deleted_at IS NULL
|
|
224
|
+
', table_rec.tbl);
|
|
225
|
+
|
|
226
|
+
GET DIAGNOSTICS rows_affected = ROW_COUNT;
|
|
227
|
+
|
|
228
|
+
table_name := table_rec.tbl;
|
|
229
|
+
rows_inserted := rows_affected;
|
|
230
|
+
RETURN NEXT;
|
|
231
|
+
|
|
232
|
+
RAISE NOTICE 'Rebuilt % KV entries for %', rows_affected, table_rec.tbl;
|
|
233
|
+
END LOOP;
|
|
234
|
+
END;
|
|
235
|
+
$$ LANGUAGE plpgsql;
|
|
236
|
+
|
|
237
|
+
COMMENT ON FUNCTION rebuild_kv_store() IS
|
|
238
|
+
'Rebuild KV_STORE cache from all entity tables. Call after database restart.';
|
|
239
|
+
|
|
240
|
+
-- ============================================================================
|
|
241
|
+
-- REM QUERY FUNCTIONS
|
|
242
|
+
-- ============================================================================
|
|
243
|
+
|
|
244
|
+
-- REM LOOKUP: O(1) entity lookup by natural key
|
|
245
|
+
-- Returns structured columns extracted from entity records
|
|
246
|
+
-- Parameters: entity_key, tenant_id (for backward compat), user_id (actual filter)
|
|
247
|
+
-- Note: tenant_id parameter exists for backward compatibility but is ignored
|
|
248
|
+
-- Note: Includes user-owned AND public (NULL user_id) resources
|
|
249
|
+
CREATE OR REPLACE FUNCTION rem_lookup(
|
|
250
|
+
p_entity_key VARCHAR(255),
|
|
251
|
+
p_tenant_id VARCHAR(100),
|
|
252
|
+
p_user_id VARCHAR(100)
|
|
253
|
+
)
|
|
254
|
+
RETURNS TABLE(
|
|
255
|
+
entity_type VARCHAR(100),
|
|
256
|
+
data JSONB
|
|
257
|
+
) AS $$
|
|
258
|
+
DECLARE
|
|
259
|
+
entity_table VARCHAR(100);
|
|
260
|
+
query_sql TEXT;
|
|
261
|
+
effective_user_id VARCHAR(100);
|
|
262
|
+
BEGIN
|
|
263
|
+
effective_user_id := COALESCE(p_user_id, p_tenant_id);
|
|
264
|
+
|
|
265
|
+
-- First lookup in KV store to get entity_type (table name)
|
|
266
|
+
-- Include user-owned AND public (NULL user_id) entries
|
|
267
|
+
-- Normalize input key for consistent matching
|
|
268
|
+
SELECT kv.entity_type INTO entity_table
|
|
269
|
+
FROM kv_store kv
|
|
270
|
+
WHERE (kv.user_id = effective_user_id OR kv.user_id IS NULL)
|
|
271
|
+
AND kv.entity_key = normalize_key(p_entity_key)
|
|
272
|
+
LIMIT 1;
|
|
273
|
+
|
|
274
|
+
-- If not found, check if cache is empty and maybe trigger rebuild
|
|
275
|
+
IF entity_table IS NULL THEN
|
|
276
|
+
-- SELF-HEALING: Check if this is because cache is empty
|
|
277
|
+
IF rem_kv_store_empty(effective_user_id) THEN
|
|
278
|
+
PERFORM maybe_trigger_kv_rebuild(effective_user_id, 'rem_lookup');
|
|
279
|
+
END IF;
|
|
280
|
+
RETURN;
|
|
281
|
+
END IF;
|
|
282
|
+
|
|
283
|
+
-- Fetch raw record from underlying table as JSONB
|
|
284
|
+
-- LLMs can handle unstructured JSON - no need for schema assumptions
|
|
285
|
+
query_sql := format('
|
|
286
|
+
SELECT
|
|
287
|
+
%L::VARCHAR(100) AS entity_type,
|
|
288
|
+
row_to_json(t)::jsonb AS data
|
|
289
|
+
FROM %I t
|
|
290
|
+
WHERE (t.user_id = $1 OR t.user_id IS NULL)
|
|
291
|
+
AND t.name = $2
|
|
292
|
+
AND t.deleted_at IS NULL
|
|
293
|
+
', entity_table, entity_table);
|
|
294
|
+
|
|
295
|
+
RETURN QUERY EXECUTE query_sql USING effective_user_id, p_entity_key;
|
|
296
|
+
END;
|
|
297
|
+
$$ LANGUAGE plpgsql STABLE;
|
|
298
|
+
|
|
299
|
+
COMMENT ON FUNCTION rem_lookup IS
|
|
300
|
+
'REM LOOKUP: O(1) entity lookup. Returns user-owned AND public (NULL user_id) entities.';
|
|
301
|
+
|
|
302
|
+
-- REM FETCH: Fetch full entity records from multiple tables
|
|
303
|
+
-- Takes JSONB mapping of {table_name: [entity_keys]}, fetches all records
|
|
304
|
+
-- Returns complete entity records as JSONB (not just KV store metadata)
|
|
305
|
+
-- Note: Includes user-owned AND public (NULL user_id) resources
|
|
306
|
+
CREATE OR REPLACE FUNCTION rem_fetch(
|
|
307
|
+
p_entities_by_table JSONB,
|
|
308
|
+
p_user_id VARCHAR(100)
|
|
309
|
+
)
|
|
310
|
+
RETURNS TABLE(
|
|
311
|
+
entity_key VARCHAR(255),
|
|
312
|
+
entity_type VARCHAR(100),
|
|
313
|
+
entity_record JSONB
|
|
314
|
+
) AS $$
|
|
315
|
+
DECLARE
|
|
316
|
+
table_name TEXT;
|
|
317
|
+
entity_keys JSONB;
|
|
318
|
+
query_sql TEXT;
|
|
319
|
+
BEGIN
|
|
320
|
+
-- For each table in the input JSONB
|
|
321
|
+
FOR table_name, entity_keys IN SELECT * FROM jsonb_each(p_entities_by_table)
|
|
322
|
+
LOOP
|
|
323
|
+
-- Dynamic query to fetch records from the table
|
|
324
|
+
-- Include user-owned AND public (NULL user_id)
|
|
325
|
+
query_sql := format('
|
|
326
|
+
SELECT
|
|
327
|
+
t.name::VARCHAR(255) AS entity_key,
|
|
328
|
+
%L::VARCHAR(100) AS entity_type,
|
|
329
|
+
row_to_json(t)::jsonb AS entity_record
|
|
330
|
+
FROM %I t
|
|
331
|
+
WHERE t.name = ANY(SELECT jsonb_array_elements_text($1))
|
|
332
|
+
AND (t.user_id = $2 OR t.user_id IS NULL)
|
|
333
|
+
AND t.deleted_at IS NULL
|
|
334
|
+
', table_name, table_name);
|
|
335
|
+
|
|
336
|
+
RETURN QUERY EXECUTE query_sql USING entity_keys, p_user_id;
|
|
337
|
+
END LOOP;
|
|
338
|
+
END;
|
|
339
|
+
$$ LANGUAGE plpgsql STABLE;
|
|
340
|
+
|
|
341
|
+
COMMENT ON FUNCTION rem_fetch IS
|
|
342
|
+
'REM FETCH: Batch fetch entities. Returns user-owned AND public (NULL user_id) entities.';
|
|
343
|
+
|
|
344
|
+
-- REM FUZZY: Fuzzy text search using pg_trgm similarity
|
|
345
|
+
-- Returns raw entity data as JSONB for LLM consumption
|
|
346
|
+
-- Note: Includes user-owned AND public (NULL user_id) resources
|
|
347
|
+
CREATE OR REPLACE FUNCTION rem_fuzzy(
|
|
348
|
+
p_query TEXT,
|
|
349
|
+
p_tenant_id VARCHAR(100),
|
|
350
|
+
p_threshold REAL DEFAULT 0.3,
|
|
351
|
+
p_limit INTEGER DEFAULT 10,
|
|
352
|
+
p_user_id VARCHAR(100) DEFAULT NULL
|
|
353
|
+
)
|
|
354
|
+
RETURNS TABLE(
|
|
355
|
+
entity_type VARCHAR(100),
|
|
356
|
+
similarity_score REAL,
|
|
357
|
+
data JSONB
|
|
358
|
+
) AS $$
|
|
359
|
+
DECLARE
|
|
360
|
+
kv_matches RECORD;
|
|
361
|
+
entities_by_table JSONB := '{}'::jsonb;
|
|
362
|
+
table_keys JSONB;
|
|
363
|
+
effective_user_id VARCHAR(100);
|
|
364
|
+
v_found_any BOOLEAN := FALSE;
|
|
365
|
+
BEGIN
|
|
366
|
+
effective_user_id := COALESCE(p_user_id, p_tenant_id);
|
|
367
|
+
|
|
368
|
+
-- Find matching keys in KV store (user-owned AND public)
|
|
369
|
+
FOR kv_matches IN
|
|
370
|
+
SELECT
|
|
371
|
+
kv.entity_key,
|
|
372
|
+
kv.entity_type,
|
|
373
|
+
similarity(kv.entity_key, p_query) AS sim_score
|
|
374
|
+
FROM kv_store kv
|
|
375
|
+
WHERE (kv.user_id = effective_user_id OR kv.user_id IS NULL)
|
|
376
|
+
AND kv.entity_key % p_query -- Trigram similarity operator
|
|
377
|
+
AND similarity(kv.entity_key, p_query) >= p_threshold
|
|
378
|
+
ORDER BY sim_score DESC
|
|
379
|
+
LIMIT p_limit
|
|
380
|
+
LOOP
|
|
381
|
+
v_found_any := TRUE;
|
|
382
|
+
-- Build JSONB mapping {table: [keys]}
|
|
383
|
+
IF entities_by_table ? kv_matches.entity_type THEN
|
|
384
|
+
table_keys := entities_by_table->kv_matches.entity_type;
|
|
385
|
+
entities_by_table := jsonb_set(
|
|
386
|
+
entities_by_table,
|
|
387
|
+
ARRAY[kv_matches.entity_type],
|
|
388
|
+
table_keys || jsonb_build_array(kv_matches.entity_key)
|
|
389
|
+
);
|
|
390
|
+
ELSE
|
|
391
|
+
entities_by_table := jsonb_set(
|
|
392
|
+
entities_by_table,
|
|
393
|
+
ARRAY[kv_matches.entity_type],
|
|
394
|
+
jsonb_build_array(kv_matches.entity_key)
|
|
395
|
+
);
|
|
396
|
+
END IF;
|
|
397
|
+
END LOOP;
|
|
398
|
+
|
|
399
|
+
-- SELF-HEALING: If no matches and cache is empty, trigger rebuild
|
|
400
|
+
IF NOT v_found_any AND rem_kv_store_empty(effective_user_id) THEN
|
|
401
|
+
PERFORM maybe_trigger_kv_rebuild(effective_user_id, 'rem_fuzzy');
|
|
402
|
+
END IF;
|
|
403
|
+
|
|
404
|
+
-- Fetch full records using rem_fetch (which now supports NULL user_id)
|
|
405
|
+
RETURN QUERY
|
|
406
|
+
SELECT
|
|
407
|
+
f.entity_type::VARCHAR(100),
|
|
408
|
+
similarity(f.entity_key, p_query) AS similarity_score,
|
|
409
|
+
f.entity_record AS data
|
|
410
|
+
FROM rem_fetch(entities_by_table, effective_user_id) f
|
|
411
|
+
ORDER BY similarity_score DESC;
|
|
412
|
+
END;
|
|
413
|
+
$$ LANGUAGE plpgsql STABLE;
|
|
414
|
+
|
|
415
|
+
COMMENT ON FUNCTION rem_fuzzy IS
|
|
416
|
+
'REM FUZZY: Fuzzy text search. Returns user-owned AND public (NULL user_id) entities.';
|
|
417
|
+
|
|
418
|
+
-- ============================================================================
|
|
419
|
+
-- REM TRAVERSE (Graph Traversal)
|
|
420
|
+
-- ============================================================================
|
|
421
|
+
|
|
422
|
+
-- REM TRAVERSE: Recursive graph traversal following edges
|
|
423
|
+
-- Explores graph_edges starting from entity_key up to max_depth
|
|
424
|
+
-- Uses cached kv_store.graph_edges for fast traversal (no polymorphic view!)
|
|
425
|
+
-- When keys_only=false, automatically fetches full entity records
|
|
426
|
+
-- Note: Includes user-owned AND public (NULL user_id) resources
|
|
427
|
+
CREATE OR REPLACE FUNCTION rem_traverse(
|
|
428
|
+
p_entity_key VARCHAR(255),
|
|
429
|
+
p_tenant_id VARCHAR(100), -- Backward compat parameter (not used for filtering)
|
|
430
|
+
p_user_id VARCHAR(100),
|
|
431
|
+
p_max_depth INTEGER DEFAULT 1,
|
|
432
|
+
p_rel_type VARCHAR(100) DEFAULT NULL,
|
|
433
|
+
p_keys_only BOOLEAN DEFAULT FALSE
|
|
434
|
+
)
|
|
435
|
+
RETURNS TABLE(
|
|
436
|
+
depth INTEGER,
|
|
437
|
+
entity_key VARCHAR(255),
|
|
438
|
+
entity_type VARCHAR(100),
|
|
439
|
+
entity_id UUID,
|
|
440
|
+
rel_type VARCHAR(100),
|
|
441
|
+
rel_weight REAL,
|
|
442
|
+
path TEXT[],
|
|
443
|
+
entity_record JSONB
|
|
444
|
+
) AS $$
|
|
445
|
+
DECLARE
|
|
446
|
+
graph_keys RECORD;
|
|
447
|
+
entities_by_table JSONB := '{}'::jsonb;
|
|
448
|
+
table_keys JSONB;
|
|
449
|
+
effective_user_id VARCHAR(100);
|
|
450
|
+
v_found_start BOOLEAN := FALSE;
|
|
451
|
+
BEGIN
|
|
452
|
+
effective_user_id := COALESCE(p_user_id, p_tenant_id);
|
|
453
|
+
|
|
454
|
+
-- Check if start entity exists in kv_store
|
|
455
|
+
SELECT TRUE INTO v_found_start
|
|
456
|
+
FROM kv_store kv
|
|
457
|
+
WHERE (kv.user_id = effective_user_id OR kv.user_id IS NULL)
|
|
458
|
+
AND kv.entity_key = normalize_key(p_entity_key)
|
|
459
|
+
LIMIT 1;
|
|
460
|
+
|
|
461
|
+
-- SELF-HEALING: If start not found and cache is empty, trigger rebuild
|
|
462
|
+
IF NOT COALESCE(v_found_start, FALSE) THEN
|
|
463
|
+
IF rem_kv_store_empty(effective_user_id) THEN
|
|
464
|
+
PERFORM maybe_trigger_kv_rebuild(effective_user_id, 'rem_traverse');
|
|
465
|
+
END IF;
|
|
466
|
+
RETURN;
|
|
467
|
+
END IF;
|
|
468
|
+
|
|
469
|
+
FOR graph_keys IN
|
|
470
|
+
WITH RECURSIVE graph_traversal AS (
|
|
471
|
+
-- Base case: Find starting entity (user-owned OR public)
|
|
472
|
+
-- Normalize input key for consistent matching
|
|
473
|
+
SELECT
|
|
474
|
+
0 AS depth,
|
|
475
|
+
kv.entity_key,
|
|
476
|
+
kv.entity_type,
|
|
477
|
+
kv.entity_id,
|
|
478
|
+
NULL::VARCHAR(100) AS rel_type,
|
|
479
|
+
NULL::REAL AS rel_weight,
|
|
480
|
+
ARRAY[kv.entity_key]::TEXT[] AS path
|
|
481
|
+
FROM kv_store kv
|
|
482
|
+
WHERE (kv.user_id = effective_user_id OR kv.user_id IS NULL)
|
|
483
|
+
AND kv.entity_key = normalize_key(p_entity_key)
|
|
484
|
+
|
|
485
|
+
UNION ALL
|
|
486
|
+
|
|
487
|
+
-- Recursive case: Follow outbound edges
|
|
488
|
+
SELECT
|
|
489
|
+
gt.depth + 1,
|
|
490
|
+
target_kv.entity_key,
|
|
491
|
+
target_kv.entity_type,
|
|
492
|
+
target_kv.entity_id,
|
|
493
|
+
(edge->>'rel_type')::VARCHAR(100) AS rel_type,
|
|
494
|
+
COALESCE((edge->>'weight')::REAL, 1.0) AS rel_weight,
|
|
495
|
+
gt.path || target_kv.entity_key AS path
|
|
496
|
+
FROM graph_traversal gt
|
|
497
|
+
JOIN kv_store source_kv ON source_kv.entity_key = gt.entity_key
|
|
498
|
+
AND (source_kv.user_id = effective_user_id OR source_kv.user_id IS NULL)
|
|
499
|
+
CROSS JOIN LATERAL jsonb_array_elements(COALESCE(source_kv.graph_edges, '[]'::jsonb)) AS edge
|
|
500
|
+
JOIN kv_store target_kv ON target_kv.entity_key = normalize_key((edge->>'dst')::VARCHAR(255))
|
|
501
|
+
AND (target_kv.user_id = effective_user_id OR target_kv.user_id IS NULL)
|
|
502
|
+
WHERE gt.depth < p_max_depth
|
|
503
|
+
AND (p_rel_type IS NULL OR (edge->>'rel_type')::VARCHAR(100) = p_rel_type)
|
|
504
|
+
AND NOT (target_kv.entity_key = ANY(gt.path))
|
|
505
|
+
)
|
|
506
|
+
SELECT DISTINCT ON (entity_key)
|
|
507
|
+
gt.depth,
|
|
508
|
+
gt.entity_key,
|
|
509
|
+
gt.entity_type,
|
|
510
|
+
gt.entity_id,
|
|
511
|
+
gt.rel_type,
|
|
512
|
+
gt.rel_weight,
|
|
513
|
+
gt.path
|
|
514
|
+
FROM graph_traversal gt
|
|
515
|
+
WHERE gt.depth > 0
|
|
516
|
+
ORDER BY gt.entity_key, gt.depth
|
|
517
|
+
LOOP
|
|
518
|
+
IF p_keys_only THEN
|
|
519
|
+
depth := graph_keys.depth;
|
|
520
|
+
entity_key := graph_keys.entity_key;
|
|
521
|
+
entity_type := graph_keys.entity_type;
|
|
522
|
+
entity_id := graph_keys.entity_id;
|
|
523
|
+
rel_type := graph_keys.rel_type;
|
|
524
|
+
rel_weight := graph_keys.rel_weight;
|
|
525
|
+
path := graph_keys.path;
|
|
526
|
+
entity_record := NULL;
|
|
527
|
+
RETURN NEXT;
|
|
528
|
+
ELSE
|
|
529
|
+
IF entities_by_table ? graph_keys.entity_type THEN
|
|
530
|
+
table_keys := entities_by_table->graph_keys.entity_type;
|
|
531
|
+
entities_by_table := jsonb_set(
|
|
532
|
+
entities_by_table,
|
|
533
|
+
ARRAY[graph_keys.entity_type],
|
|
534
|
+
table_keys || jsonb_build_array(graph_keys.entity_key)
|
|
535
|
+
);
|
|
536
|
+
ELSE
|
|
537
|
+
entities_by_table := jsonb_set(
|
|
538
|
+
entities_by_table,
|
|
539
|
+
ARRAY[graph_keys.entity_type],
|
|
540
|
+
jsonb_build_array(graph_keys.entity_key)
|
|
541
|
+
);
|
|
542
|
+
END IF;
|
|
543
|
+
END IF;
|
|
544
|
+
END LOOP;
|
|
545
|
+
|
|
546
|
+
IF NOT p_keys_only AND entities_by_table != '{}'::jsonb THEN
|
|
547
|
+
RETURN QUERY
|
|
548
|
+
SELECT
|
|
549
|
+
NULL::INTEGER AS depth,
|
|
550
|
+
f.entity_key::VARCHAR(255),
|
|
551
|
+
f.entity_type::VARCHAR(100),
|
|
552
|
+
NULL::UUID AS entity_id,
|
|
553
|
+
NULL::VARCHAR(100) AS rel_type,
|
|
554
|
+
NULL::REAL AS rel_weight,
|
|
555
|
+
NULL::TEXT[] AS path,
|
|
556
|
+
f.entity_record
|
|
557
|
+
FROM rem_fetch(entities_by_table, effective_user_id) f;
|
|
558
|
+
END IF;
|
|
559
|
+
END;
|
|
560
|
+
$$ LANGUAGE plpgsql STABLE;
|
|
561
|
+
|
|
562
|
+
COMMENT ON FUNCTION rem_traverse IS
|
|
563
|
+
'REM TRAVERSE: Graph traversal. Returns user-owned AND public (NULL user_id) entities.';
|
|
564
|
+
|
|
565
|
+
-- REM SEARCH: Vector similarity search using embeddings
|
|
566
|
+
-- Joins to embeddings table for semantic search
|
|
567
|
+
-- Note: Includes user-owned AND public (NULL user_id) resources
|
|
568
|
+
CREATE OR REPLACE FUNCTION rem_search(
|
|
569
|
+
p_query_embedding vector,
|
|
570
|
+
p_table_name VARCHAR(100),
|
|
571
|
+
p_field_name VARCHAR(100),
|
|
572
|
+
p_tenant_id VARCHAR(100),
|
|
573
|
+
p_provider VARCHAR(50) DEFAULT 'openai',
|
|
574
|
+
p_min_similarity REAL DEFAULT 0.7,
|
|
575
|
+
p_limit INTEGER DEFAULT 10,
|
|
576
|
+
p_user_id VARCHAR(100) DEFAULT NULL
|
|
577
|
+
)
|
|
578
|
+
RETURNS TABLE(
|
|
579
|
+
entity_type VARCHAR(100),
|
|
580
|
+
similarity_score REAL,
|
|
581
|
+
data JSONB
|
|
582
|
+
) AS $$
|
|
583
|
+
DECLARE
|
|
584
|
+
embeddings_table VARCHAR(200);
|
|
585
|
+
source_table VARCHAR(100);
|
|
586
|
+
query_sql TEXT;
|
|
587
|
+
effective_user_id VARCHAR(100);
|
|
588
|
+
BEGIN
|
|
589
|
+
embeddings_table := 'embeddings_' || p_table_name;
|
|
590
|
+
source_table := p_table_name;
|
|
591
|
+
effective_user_id := COALESCE(p_user_id, p_tenant_id);
|
|
592
|
+
|
|
593
|
+
-- Uses cosine distance <=> operator (0-2 range, 0=identical)
|
|
594
|
+
-- Similarity = 1 - distance gives 0-1 range where 1 = most similar
|
|
595
|
+
-- Includes user-owned AND public (NULL user_id) resources
|
|
596
|
+
query_sql := format('
|
|
597
|
+
SELECT
|
|
598
|
+
%L::VARCHAR(100) AS entity_type,
|
|
599
|
+
(1.0 - (e.embedding <=> $1))::REAL AS similarity_score,
|
|
600
|
+
row_to_json(t)::jsonb AS data
|
|
601
|
+
FROM %I t
|
|
602
|
+
JOIN %I e ON e.entity_id = t.id
|
|
603
|
+
WHERE (t.user_id = $2 OR t.user_id IS NULL)
|
|
604
|
+
AND e.field_name = $3
|
|
605
|
+
AND e.provider = $4
|
|
606
|
+
AND (1.0 - (e.embedding <=> $1)) >= $5
|
|
607
|
+
AND t.deleted_at IS NULL
|
|
608
|
+
ORDER BY e.embedding <=> $1
|
|
609
|
+
LIMIT $6
|
|
610
|
+
', source_table, source_table, embeddings_table);
|
|
611
|
+
|
|
612
|
+
RETURN QUERY EXECUTE query_sql
|
|
613
|
+
USING p_query_embedding, effective_user_id, p_field_name, p_provider, p_min_similarity, p_limit;
|
|
614
|
+
END;
|
|
615
|
+
$$ LANGUAGE plpgsql STABLE;
|
|
616
|
+
|
|
617
|
+
COMMENT ON FUNCTION rem_search IS
|
|
618
|
+
'REM SEARCH: Vector similarity search. Returns user-owned AND public (NULL user_id) resources.';
|
|
619
|
+
|
|
620
|
+
-- Function to get migration status
|
|
621
|
+
CREATE OR REPLACE FUNCTION migration_status()
|
|
622
|
+
RETURNS TABLE(
|
|
623
|
+
migration_type TEXT,
|
|
624
|
+
count BIGINT,
|
|
625
|
+
last_applied TIMESTAMP,
|
|
626
|
+
total_execution_ms BIGINT
|
|
627
|
+
) AS $$
|
|
628
|
+
BEGIN
|
|
629
|
+
RETURN QUERY
|
|
630
|
+
SELECT
|
|
631
|
+
type::TEXT,
|
|
632
|
+
COUNT(*)::BIGINT,
|
|
633
|
+
MAX(applied_at),
|
|
634
|
+
SUM(execution_time_ms)::BIGINT
|
|
635
|
+
FROM rem_migrations
|
|
636
|
+
WHERE success = TRUE
|
|
637
|
+
GROUP BY type
|
|
638
|
+
ORDER BY MAX(applied_at) DESC;
|
|
639
|
+
END;
|
|
640
|
+
$$ LANGUAGE plpgsql;
|
|
641
|
+
|
|
642
|
+
COMMENT ON FUNCTION migration_status() IS
|
|
643
|
+
'Get summary of applied migrations by type';
|
|
644
|
+
|
|
645
|
+
-- ============================================================================
|
|
646
|
+
-- RATE LIMITS (UNLOGGED for performance)
|
|
647
|
+
-- ============================================================================
|
|
648
|
+
-- High-performance rate limiting table. Uses UNLOGGED for speed - counts may
|
|
649
|
+
-- be lost on database crash/restart, which is acceptable (fail-open on error).
|
|
650
|
+
|
|
651
|
+
CREATE UNLOGGED TABLE IF NOT EXISTS rate_limits (
|
|
652
|
+
key VARCHAR(512) PRIMARY KEY,
|
|
653
|
+
count INTEGER NOT NULL DEFAULT 1,
|
|
654
|
+
expires_at TIMESTAMP NOT NULL,
|
|
655
|
+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
|
656
|
+
);
|
|
657
|
+
|
|
658
|
+
CREATE INDEX IF NOT EXISTS idx_rate_limits_expires ON rate_limits (expires_at);
|
|
659
|
+
|
|
660
|
+
COMMENT ON TABLE rate_limits IS
|
|
661
|
+
'UNLOGGED rate limiting table. Counts may be lost on crash (acceptable for rate limiting).';
|
|
662
|
+
|
|
663
|
+
-- ============================================================================
|
|
664
|
+
-- SHARED SESSIONS HELPER FUNCTIONS
|
|
665
|
+
-- ============================================================================
|
|
666
|
+
-- Note: The shared_sessions TABLE is created by 002_install_models.sql (auto-generated)
|
|
667
|
+
-- These functions provide aggregate queries for the session sharing workflow.
|
|
668
|
+
|
|
669
|
+
-- Count distinct users sharing sessions with the current user
|
|
670
|
+
CREATE OR REPLACE FUNCTION fn_count_shared_with_me(
|
|
671
|
+
p_tenant_id VARCHAR(100),
|
|
672
|
+
p_user_id VARCHAR(256)
|
|
673
|
+
)
|
|
674
|
+
RETURNS BIGINT AS $$
|
|
675
|
+
BEGIN
|
|
676
|
+
RETURN (
|
|
677
|
+
SELECT COUNT(DISTINCT owner_user_id)
|
|
678
|
+
FROM shared_sessions
|
|
679
|
+
WHERE tenant_id = p_tenant_id
|
|
680
|
+
AND shared_with_user_id = p_user_id
|
|
681
|
+
AND deleted_at IS NULL
|
|
682
|
+
);
|
|
683
|
+
END;
|
|
684
|
+
$$ LANGUAGE plpgsql STABLE;
|
|
685
|
+
|
|
686
|
+
COMMENT ON FUNCTION fn_count_shared_with_me IS
|
|
687
|
+
'Count distinct users sharing sessions with the specified user.';
|
|
688
|
+
|
|
689
|
+
-- Get aggregated summary of users sharing sessions with current user
|
|
690
|
+
CREATE OR REPLACE FUNCTION fn_get_shared_with_me(
|
|
691
|
+
p_tenant_id VARCHAR(100),
|
|
692
|
+
p_user_id VARCHAR(256),
|
|
693
|
+
p_limit INTEGER DEFAULT 50,
|
|
694
|
+
p_offset INTEGER DEFAULT 0
|
|
695
|
+
)
|
|
696
|
+
RETURNS TABLE(
|
|
697
|
+
user_id VARCHAR(256),
|
|
698
|
+
name VARCHAR(256),
|
|
699
|
+
email VARCHAR(256),
|
|
700
|
+
session_count BIGINT,
|
|
701
|
+
message_count BIGINT,
|
|
702
|
+
first_message_at TIMESTAMP,
|
|
703
|
+
last_message_at TIMESTAMP
|
|
704
|
+
) AS $$
|
|
705
|
+
BEGIN
|
|
706
|
+
RETURN QUERY
|
|
707
|
+
SELECT
|
|
708
|
+
ss.owner_user_id AS user_id,
|
|
709
|
+
COALESCE(u.name, ss.owner_user_id) AS name,
|
|
710
|
+
u.email AS email,
|
|
711
|
+
COUNT(DISTINCT ss.session_id)::BIGINT AS session_count,
|
|
712
|
+
COALESCE(SUM(msg_counts.msg_count), 0)::BIGINT AS message_count,
|
|
713
|
+
MIN(msg_counts.first_msg)::TIMESTAMP AS first_message_at,
|
|
714
|
+
MAX(msg_counts.last_msg)::TIMESTAMP AS last_message_at
|
|
715
|
+
FROM shared_sessions ss
|
|
716
|
+
LEFT JOIN users u ON u.id::text = ss.owner_user_id AND u.tenant_id = ss.tenant_id
|
|
717
|
+
LEFT JOIN (
|
|
718
|
+
SELECT
|
|
719
|
+
m.session_id,
|
|
720
|
+
m.user_id,
|
|
721
|
+
COUNT(*)::BIGINT AS msg_count,
|
|
722
|
+
MIN(m.created_at) AS first_msg,
|
|
723
|
+
MAX(m.created_at) AS last_msg
|
|
724
|
+
FROM messages m
|
|
725
|
+
WHERE m.tenant_id = p_tenant_id
|
|
726
|
+
AND m.deleted_at IS NULL
|
|
727
|
+
GROUP BY m.session_id, m.user_id
|
|
728
|
+
) msg_counts ON msg_counts.session_id = ss.session_id AND msg_counts.user_id = ss.owner_user_id
|
|
729
|
+
WHERE ss.tenant_id = p_tenant_id
|
|
730
|
+
AND ss.shared_with_user_id = p_user_id
|
|
731
|
+
AND ss.deleted_at IS NULL
|
|
732
|
+
GROUP BY ss.owner_user_id, u.name, u.email
|
|
733
|
+
ORDER BY MAX(msg_counts.last_msg) DESC NULLS LAST
|
|
734
|
+
LIMIT p_limit
|
|
735
|
+
OFFSET p_offset;
|
|
736
|
+
END;
|
|
737
|
+
$$ LANGUAGE plpgsql STABLE;
|
|
738
|
+
|
|
739
|
+
COMMENT ON FUNCTION fn_get_shared_with_me IS
|
|
740
|
+
'Get aggregated summary of users sharing sessions with the specified user.';
|
|
741
|
+
|
|
742
|
+
-- Count messages in sessions shared by a specific user
|
|
743
|
+
CREATE OR REPLACE FUNCTION fn_count_shared_messages(
|
|
744
|
+
p_tenant_id VARCHAR(100),
|
|
745
|
+
p_recipient_user_id VARCHAR(256),
|
|
746
|
+
p_owner_user_id VARCHAR(256)
|
|
747
|
+
)
|
|
748
|
+
RETURNS BIGINT AS $$
|
|
749
|
+
BEGIN
|
|
750
|
+
RETURN (
|
|
751
|
+
SELECT COUNT(*)
|
|
752
|
+
FROM messages m
|
|
753
|
+
WHERE m.tenant_id = p_tenant_id
|
|
754
|
+
AND m.deleted_at IS NULL
|
|
755
|
+
AND m.session_id IN (
|
|
756
|
+
SELECT ss.session_id
|
|
757
|
+
FROM shared_sessions ss
|
|
758
|
+
WHERE ss.tenant_id = p_tenant_id
|
|
759
|
+
AND ss.owner_user_id = p_owner_user_id
|
|
760
|
+
AND ss.shared_with_user_id = p_recipient_user_id
|
|
761
|
+
AND ss.deleted_at IS NULL
|
|
762
|
+
)
|
|
763
|
+
);
|
|
764
|
+
END;
|
|
765
|
+
$$ LANGUAGE plpgsql STABLE;
|
|
766
|
+
|
|
767
|
+
COMMENT ON FUNCTION fn_count_shared_messages IS
|
|
768
|
+
'Count messages in sessions shared by a specific user with the recipient.';
|
|
769
|
+
|
|
770
|
+
-- Get messages from sessions shared by a specific user
|
|
771
|
+
CREATE OR REPLACE FUNCTION fn_get_shared_messages(
|
|
772
|
+
p_tenant_id VARCHAR(100),
|
|
773
|
+
p_recipient_user_id VARCHAR(256),
|
|
774
|
+
p_owner_user_id VARCHAR(256),
|
|
775
|
+
p_limit INTEGER DEFAULT 50,
|
|
776
|
+
p_offset INTEGER DEFAULT 0
|
|
777
|
+
)
|
|
778
|
+
RETURNS TABLE(
|
|
779
|
+
id UUID,
|
|
780
|
+
content TEXT,
|
|
781
|
+
message_type VARCHAR(256),
|
|
782
|
+
session_id VARCHAR(256),
|
|
783
|
+
model VARCHAR(256),
|
|
784
|
+
token_count INTEGER,
|
|
785
|
+
created_at TIMESTAMP,
|
|
786
|
+
metadata JSONB
|
|
787
|
+
) AS $$
|
|
788
|
+
BEGIN
|
|
789
|
+
RETURN QUERY
|
|
790
|
+
SELECT
|
|
791
|
+
m.id,
|
|
792
|
+
m.content,
|
|
793
|
+
m.message_type,
|
|
794
|
+
m.session_id,
|
|
795
|
+
m.model,
|
|
796
|
+
m.token_count,
|
|
797
|
+
m.created_at,
|
|
798
|
+
m.metadata
|
|
799
|
+
FROM messages m
|
|
800
|
+
WHERE m.tenant_id = p_tenant_id
|
|
801
|
+
AND m.deleted_at IS NULL
|
|
802
|
+
AND m.session_id IN (
|
|
803
|
+
SELECT ss.session_id
|
|
804
|
+
FROM shared_sessions ss
|
|
805
|
+
WHERE ss.tenant_id = p_tenant_id
|
|
806
|
+
AND ss.owner_user_id = p_owner_user_id
|
|
807
|
+
AND ss.shared_with_user_id = p_recipient_user_id
|
|
808
|
+
AND ss.deleted_at IS NULL
|
|
809
|
+
)
|
|
810
|
+
ORDER BY m.created_at DESC
|
|
811
|
+
LIMIT p_limit
|
|
812
|
+
OFFSET p_offset;
|
|
813
|
+
END;
|
|
814
|
+
$$ LANGUAGE plpgsql STABLE;
|
|
815
|
+
|
|
816
|
+
COMMENT ON FUNCTION fn_get_shared_messages IS
|
|
817
|
+
'Get messages from sessions shared by a specific user with the recipient.';
|
|
818
|
+
|
|
819
|
+
-- ============================================================================
|
|
820
|
+
-- SESSIONS WITH USER INFO
|
|
821
|
+
-- ============================================================================
|
|
822
|
+
-- Function to list sessions with user details (name, email) for admin views
|
|
823
|
+
|
|
824
|
+
-- List sessions with user info, CTE pagination
|
|
825
|
+
-- Note: messages.session_id stores the session UUID (sessions.id)
|
|
826
|
+
CREATE OR REPLACE FUNCTION fn_list_sessions_with_user(
|
|
827
|
+
p_user_id VARCHAR(256) DEFAULT NULL, -- Filter by user_id (NULL = all users, admin only)
|
|
828
|
+
p_user_name VARCHAR(256) DEFAULT NULL, -- Filter by user name (partial match, admin only)
|
|
829
|
+
p_user_email VARCHAR(256) DEFAULT NULL, -- Filter by user email (partial match, admin only)
|
|
830
|
+
p_mode VARCHAR(50) DEFAULT NULL, -- Filter by session mode
|
|
831
|
+
p_page INTEGER DEFAULT 1,
|
|
832
|
+
p_page_size INTEGER DEFAULT 50
|
|
833
|
+
)
|
|
834
|
+
RETURNS TABLE(
|
|
835
|
+
id UUID,
|
|
836
|
+
name VARCHAR(256),
|
|
837
|
+
mode TEXT,
|
|
838
|
+
description TEXT,
|
|
839
|
+
user_id VARCHAR(256),
|
|
840
|
+
user_name VARCHAR(256),
|
|
841
|
+
user_email VARCHAR(256),
|
|
842
|
+
message_count INTEGER,
|
|
843
|
+
total_tokens INTEGER,
|
|
844
|
+
created_at TIMESTAMP,
|
|
845
|
+
updated_at TIMESTAMP,
|
|
846
|
+
metadata JSONB,
|
|
847
|
+
total_count BIGINT
|
|
848
|
+
) AS $$
|
|
849
|
+
BEGIN
|
|
850
|
+
RETURN QUERY
|
|
851
|
+
WITH session_msg_counts AS (
|
|
852
|
+
-- Count messages per session (joining on session UUID)
|
|
853
|
+
SELECT
|
|
854
|
+
m.session_id,
|
|
855
|
+
COUNT(*)::INTEGER as actual_message_count
|
|
856
|
+
FROM messages m
|
|
857
|
+
GROUP BY m.session_id
|
|
858
|
+
),
|
|
859
|
+
filtered_sessions AS (
|
|
860
|
+
SELECT
|
|
861
|
+
s.id,
|
|
862
|
+
s.name,
|
|
863
|
+
s.mode,
|
|
864
|
+
s.description,
|
|
865
|
+
s.user_id,
|
|
866
|
+
COALESCE(u.name, s.user_id)::VARCHAR(256) AS user_name,
|
|
867
|
+
u.email::VARCHAR(256) AS user_email,
|
|
868
|
+
COALESCE(mc.actual_message_count, 0) AS message_count,
|
|
869
|
+
s.total_tokens,
|
|
870
|
+
s.created_at,
|
|
871
|
+
s.updated_at,
|
|
872
|
+
s.metadata
|
|
873
|
+
FROM sessions s
|
|
874
|
+
LEFT JOIN users u ON u.id::text = s.user_id
|
|
875
|
+
LEFT JOIN session_msg_counts mc ON mc.session_id = s.id::text
|
|
876
|
+
WHERE s.deleted_at IS NULL
|
|
877
|
+
AND (p_user_id IS NULL OR s.user_id = p_user_id)
|
|
878
|
+
AND (p_user_name IS NULL OR u.name ILIKE '%' || p_user_name || '%')
|
|
879
|
+
AND (p_user_email IS NULL OR u.email ILIKE '%' || p_user_email || '%')
|
|
880
|
+
AND (p_mode IS NULL OR s.mode = p_mode)
|
|
881
|
+
),
|
|
882
|
+
counted AS (
|
|
883
|
+
SELECT *, COUNT(*) OVER () AS total_count
|
|
884
|
+
FROM filtered_sessions
|
|
885
|
+
)
|
|
886
|
+
SELECT
|
|
887
|
+
c.id,
|
|
888
|
+
c.name,
|
|
889
|
+
c.mode,
|
|
890
|
+
c.description,
|
|
891
|
+
c.user_id,
|
|
892
|
+
c.user_name,
|
|
893
|
+
c.user_email,
|
|
894
|
+
c.message_count,
|
|
895
|
+
c.total_tokens,
|
|
896
|
+
c.created_at,
|
|
897
|
+
c.updated_at,
|
|
898
|
+
c.metadata,
|
|
899
|
+
c.total_count
|
|
900
|
+
FROM counted c
|
|
901
|
+
ORDER BY c.created_at DESC
|
|
902
|
+
LIMIT p_page_size
|
|
903
|
+
OFFSET (p_page - 1) * p_page_size;
|
|
904
|
+
END;
|
|
905
|
+
$$ LANGUAGE plpgsql STABLE;
|
|
906
|
+
|
|
907
|
+
COMMENT ON FUNCTION fn_list_sessions_with_user IS
|
|
908
|
+
'List sessions with user details and computed message counts. Joins messages on session name.';
|
|
909
|
+
|
|
910
|
+
-- ============================================================================
|
|
911
|
+
-- RECORD INSTALLATION
|
|
912
|
+
-- ============================================================================
|
|
913
|
+
|
|
914
|
+
INSERT INTO rem_migrations (name, type, version)
|
|
915
|
+
VALUES ('install.sql', 'install', '1.0.0')
|
|
916
|
+
ON CONFLICT (name) DO UPDATE
|
|
917
|
+
SET applied_at = CURRENT_TIMESTAMP,
|
|
918
|
+
applied_by = CURRENT_USER;
|
|
919
|
+
|
|
920
|
+
-- ============================================================================
|
|
921
|
+
-- GRANTS FOR APPLICATION USER
|
|
922
|
+
-- ============================================================================
|
|
923
|
+
-- Grant permissions to remuser (the application database user)
|
|
924
|
+
-- This ensures the application can run migrations and manage schema
|
|
925
|
+
-- Note: remuser is created by CNPG as the database owner in bootstrap.initdb.owner
|
|
926
|
+
|
|
927
|
+
DO $$
|
|
928
|
+
DECLARE
|
|
929
|
+
app_user TEXT := 'remuser';
|
|
930
|
+
BEGIN
|
|
931
|
+
-- Only grant if the user exists (handles different deployment scenarios)
|
|
932
|
+
IF EXISTS (SELECT 1 FROM pg_roles WHERE rolname = app_user) THEN
|
|
933
|
+
-- Grant ownership of migration tracking table so app can record migrations
|
|
934
|
+
EXECUTE format('ALTER TABLE rem_migrations OWNER TO %I', app_user);
|
|
935
|
+
EXECUTE format('ALTER TABLE kv_store OWNER TO %I', app_user);
|
|
936
|
+
EXECUTE format('ALTER TABLE rate_limits OWNER TO %I', app_user);
|
|
937
|
+
|
|
938
|
+
-- Grant usage on schema
|
|
939
|
+
EXECUTE format('GRANT ALL ON SCHEMA public TO %I', app_user);
|
|
940
|
+
|
|
941
|
+
-- Grant privileges on all tables in public schema
|
|
942
|
+
EXECUTE format('GRANT ALL PRIVILEGES ON ALL TABLES IN SCHEMA public TO %I', app_user);
|
|
943
|
+
EXECUTE format('GRANT ALL PRIVILEGES ON ALL SEQUENCES IN SCHEMA public TO %I', app_user);
|
|
944
|
+
EXECUTE format('GRANT ALL PRIVILEGES ON ALL FUNCTIONS IN SCHEMA public TO %I', app_user);
|
|
945
|
+
|
|
946
|
+
-- Set default privileges for future objects
|
|
947
|
+
EXECUTE format('ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON TABLES TO %I', app_user);
|
|
948
|
+
EXECUTE format('ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON SEQUENCES TO %I', app_user);
|
|
949
|
+
EXECUTE format('ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON FUNCTIONS TO %I', app_user);
|
|
950
|
+
|
|
951
|
+
RAISE NOTICE '✓ Granted permissions to application user: %', app_user;
|
|
952
|
+
ELSE
|
|
953
|
+
RAISE NOTICE 'Application user % does not exist, skipping grants', app_user;
|
|
954
|
+
END IF;
|
|
955
|
+
END $$;
|
|
956
|
+
|
|
957
|
+
-- ============================================================================
|
|
958
|
+
-- COMPLETION
|
|
959
|
+
-- ============================================================================
|
|
960
|
+
|
|
961
|
+
DO $$
|
|
962
|
+
BEGIN
|
|
963
|
+
RAISE NOTICE '============================================================';
|
|
964
|
+
RAISE NOTICE 'REM Database Installation Complete';
|
|
965
|
+
RAISE NOTICE '============================================================';
|
|
966
|
+
RAISE NOTICE '';
|
|
967
|
+
RAISE NOTICE 'Extensions installed:';
|
|
968
|
+
RAISE NOTICE ' ✓ pgvector (vector embeddings)';
|
|
969
|
+
RAISE NOTICE ' ✓ pg_trgm (fuzzy text search)';
|
|
970
|
+
RAISE NOTICE ' ✓ uuid-ossp (UUID generation)';
|
|
971
|
+
RAISE NOTICE '';
|
|
972
|
+
RAISE NOTICE 'Infrastructure created:';
|
|
973
|
+
RAISE NOTICE ' ✓ rem_migrations (migration tracking)';
|
|
974
|
+
RAISE NOTICE ' ✓ kv_store (UNLOGGED entity cache)';
|
|
975
|
+
RAISE NOTICE ' ✓ Helper functions';
|
|
976
|
+
RAISE NOTICE '';
|
|
977
|
+
RAISE NOTICE 'Next steps:';
|
|
978
|
+
RAISE NOTICE ' 1. Generate model schema: rem schema generate --models src/rem/models/entities';
|
|
979
|
+
RAISE NOTICE ' 2. Apply model schema: rem db migrate';
|
|
980
|
+
RAISE NOTICE '';
|
|
981
|
+
RAISE NOTICE 'Status: SELECT * FROM migration_status();';
|
|
982
|
+
RAISE NOTICE '============================================================';
|
|
983
|
+
END $$;
|