remdb 0.3.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rem/__init__.py +2 -0
- rem/agentic/README.md +650 -0
- rem/agentic/__init__.py +39 -0
- rem/agentic/agents/README.md +155 -0
- rem/agentic/agents/__init__.py +8 -0
- rem/agentic/context.py +148 -0
- rem/agentic/context_builder.py +329 -0
- rem/agentic/mcp/__init__.py +0 -0
- rem/agentic/mcp/tool_wrapper.py +107 -0
- rem/agentic/otel/__init__.py +5 -0
- rem/agentic/otel/setup.py +151 -0
- rem/agentic/providers/phoenix.py +674 -0
- rem/agentic/providers/pydantic_ai.py +572 -0
- rem/agentic/query.py +117 -0
- rem/agentic/query_helper.py +89 -0
- rem/agentic/schema.py +396 -0
- rem/agentic/serialization.py +245 -0
- rem/agentic/tools/__init__.py +5 -0
- rem/agentic/tools/rem_tools.py +231 -0
- rem/api/README.md +420 -0
- rem/api/main.py +324 -0
- rem/api/mcp_router/prompts.py +182 -0
- rem/api/mcp_router/resources.py +536 -0
- rem/api/mcp_router/server.py +213 -0
- rem/api/mcp_router/tools.py +584 -0
- rem/api/routers/auth.py +229 -0
- rem/api/routers/chat/__init__.py +5 -0
- rem/api/routers/chat/completions.py +281 -0
- rem/api/routers/chat/json_utils.py +76 -0
- rem/api/routers/chat/models.py +124 -0
- rem/api/routers/chat/streaming.py +185 -0
- rem/auth/README.md +258 -0
- rem/auth/__init__.py +26 -0
- rem/auth/middleware.py +100 -0
- rem/auth/providers/__init__.py +13 -0
- rem/auth/providers/base.py +376 -0
- rem/auth/providers/google.py +163 -0
- rem/auth/providers/microsoft.py +237 -0
- rem/cli/README.md +455 -0
- rem/cli/__init__.py +8 -0
- rem/cli/commands/README.md +126 -0
- rem/cli/commands/__init__.py +3 -0
- rem/cli/commands/ask.py +566 -0
- rem/cli/commands/configure.py +497 -0
- rem/cli/commands/db.py +493 -0
- rem/cli/commands/dreaming.py +324 -0
- rem/cli/commands/experiments.py +1302 -0
- rem/cli/commands/mcp.py +66 -0
- rem/cli/commands/process.py +245 -0
- rem/cli/commands/schema.py +183 -0
- rem/cli/commands/serve.py +106 -0
- rem/cli/dreaming.py +363 -0
- rem/cli/main.py +96 -0
- rem/config.py +237 -0
- rem/mcp_server.py +41 -0
- rem/models/core/__init__.py +49 -0
- rem/models/core/core_model.py +64 -0
- rem/models/core/engram.py +333 -0
- rem/models/core/experiment.py +628 -0
- rem/models/core/inline_edge.py +132 -0
- rem/models/core/rem_query.py +243 -0
- rem/models/entities/__init__.py +43 -0
- rem/models/entities/file.py +57 -0
- rem/models/entities/image_resource.py +88 -0
- rem/models/entities/message.py +35 -0
- rem/models/entities/moment.py +123 -0
- rem/models/entities/ontology.py +191 -0
- rem/models/entities/ontology_config.py +131 -0
- rem/models/entities/resource.py +95 -0
- rem/models/entities/schema.py +87 -0
- rem/models/entities/user.py +85 -0
- rem/py.typed +0 -0
- rem/schemas/README.md +507 -0
- rem/schemas/__init__.py +6 -0
- rem/schemas/agents/README.md +92 -0
- rem/schemas/agents/core/moment-builder.yaml +178 -0
- rem/schemas/agents/core/rem-query-agent.yaml +226 -0
- rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
- rem/schemas/agents/core/simple-assistant.yaml +19 -0
- rem/schemas/agents/core/user-profile-builder.yaml +163 -0
- rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
- rem/schemas/agents/examples/contract-extractor.yaml +134 -0
- rem/schemas/agents/examples/cv-parser.yaml +263 -0
- rem/schemas/agents/examples/hello-world.yaml +37 -0
- rem/schemas/agents/examples/query.yaml +54 -0
- rem/schemas/agents/examples/simple.yaml +21 -0
- rem/schemas/agents/examples/test.yaml +29 -0
- rem/schemas/agents/rem.yaml +128 -0
- rem/schemas/evaluators/hello-world/default.yaml +77 -0
- rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
- rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
- rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
- rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
- rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
- rem/services/__init__.py +16 -0
- rem/services/audio/INTEGRATION.md +308 -0
- rem/services/audio/README.md +376 -0
- rem/services/audio/__init__.py +15 -0
- rem/services/audio/chunker.py +354 -0
- rem/services/audio/transcriber.py +259 -0
- rem/services/content/README.md +1269 -0
- rem/services/content/__init__.py +5 -0
- rem/services/content/providers.py +801 -0
- rem/services/content/service.py +676 -0
- rem/services/dreaming/README.md +230 -0
- rem/services/dreaming/__init__.py +53 -0
- rem/services/dreaming/affinity_service.py +336 -0
- rem/services/dreaming/moment_service.py +264 -0
- rem/services/dreaming/ontology_service.py +54 -0
- rem/services/dreaming/user_model_service.py +297 -0
- rem/services/dreaming/utils.py +39 -0
- rem/services/embeddings/__init__.py +11 -0
- rem/services/embeddings/api.py +120 -0
- rem/services/embeddings/worker.py +421 -0
- rem/services/fs/README.md +662 -0
- rem/services/fs/__init__.py +62 -0
- rem/services/fs/examples.py +206 -0
- rem/services/fs/examples_paths.py +204 -0
- rem/services/fs/git_provider.py +935 -0
- rem/services/fs/local_provider.py +760 -0
- rem/services/fs/parsing-hooks-examples.md +172 -0
- rem/services/fs/paths.py +276 -0
- rem/services/fs/provider.py +460 -0
- rem/services/fs/s3_provider.py +1042 -0
- rem/services/fs/service.py +186 -0
- rem/services/git/README.md +1075 -0
- rem/services/git/__init__.py +17 -0
- rem/services/git/service.py +469 -0
- rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
- rem/services/phoenix/README.md +453 -0
- rem/services/phoenix/__init__.py +46 -0
- rem/services/phoenix/client.py +686 -0
- rem/services/phoenix/config.py +88 -0
- rem/services/phoenix/prompt_labels.py +477 -0
- rem/services/postgres/README.md +575 -0
- rem/services/postgres/__init__.py +23 -0
- rem/services/postgres/migration_service.py +427 -0
- rem/services/postgres/pydantic_to_sqlalchemy.py +232 -0
- rem/services/postgres/register_type.py +352 -0
- rem/services/postgres/repository.py +337 -0
- rem/services/postgres/schema_generator.py +379 -0
- rem/services/postgres/service.py +802 -0
- rem/services/postgres/sql_builder.py +354 -0
- rem/services/rem/README.md +304 -0
- rem/services/rem/__init__.py +23 -0
- rem/services/rem/exceptions.py +71 -0
- rem/services/rem/executor.py +293 -0
- rem/services/rem/parser.py +145 -0
- rem/services/rem/queries.py +196 -0
- rem/services/rem/query.py +371 -0
- rem/services/rem/service.py +527 -0
- rem/services/session/README.md +374 -0
- rem/services/session/__init__.py +6 -0
- rem/services/session/compression.py +360 -0
- rem/services/session/reload.py +77 -0
- rem/settings.py +1235 -0
- rem/sql/002_install_models.sql +1068 -0
- rem/sql/background_indexes.sql +42 -0
- rem/sql/install_models.sql +1038 -0
- rem/sql/migrations/001_install.sql +503 -0
- rem/sql/migrations/002_install_models.sql +1202 -0
- rem/utils/AGENTIC_CHUNKING.md +597 -0
- rem/utils/README.md +583 -0
- rem/utils/__init__.py +43 -0
- rem/utils/agentic_chunking.py +622 -0
- rem/utils/batch_ops.py +343 -0
- rem/utils/chunking.py +108 -0
- rem/utils/clip_embeddings.py +276 -0
- rem/utils/dict_utils.py +98 -0
- rem/utils/embeddings.py +423 -0
- rem/utils/examples/embeddings_example.py +305 -0
- rem/utils/examples/sql_types_example.py +202 -0
- rem/utils/markdown.py +16 -0
- rem/utils/model_helpers.py +236 -0
- rem/utils/schema_loader.py +336 -0
- rem/utils/sql_types.py +348 -0
- rem/utils/user_id.py +81 -0
- rem/utils/vision.py +330 -0
- rem/workers/README.md +506 -0
- rem/workers/__init__.py +5 -0
- rem/workers/dreaming.py +502 -0
- rem/workers/engram_processor.py +312 -0
- rem/workers/sqs_file_processor.py +193 -0
- remdb-0.3.7.dist-info/METADATA +1473 -0
- remdb-0.3.7.dist-info/RECORD +187 -0
- remdb-0.3.7.dist-info/WHEEL +4 -0
- remdb-0.3.7.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,503 @@
|
|
|
1
|
+
-- REM Database Installation Script
|
|
2
|
+
-- Description: Core database setup with extensions and infrastructure
|
|
3
|
+
-- Version: 1.0.0
|
|
4
|
+
-- Date: 2025-01-18
|
|
5
|
+
--
|
|
6
|
+
-- This script sets up:
|
|
7
|
+
-- 1. Required PostgreSQL extensions (pgvector, pg_trgm, uuid-ossp)
|
|
8
|
+
-- 2. Migration tracking table
|
|
9
|
+
-- 3. KV_STORE UNLOGGED cache table
|
|
10
|
+
-- 4. Helper functions
|
|
11
|
+
--
|
|
12
|
+
-- Usage:
|
|
13
|
+
-- psql -d remdb -f sql/install.sql
|
|
14
|
+
--
|
|
15
|
+
-- Dependencies:
|
|
16
|
+
-- - PostgreSQL 16+
|
|
17
|
+
-- - pgvector extension compiled and available
|
|
18
|
+
-- - pg_trgm extension (usually included)
|
|
19
|
+
|
|
20
|
+
-- ============================================================================
|
|
21
|
+
-- EXTENSIONS
|
|
22
|
+
-- ============================================================================
|
|
23
|
+
|
|
24
|
+
-- Enable pgvector extension for vector embeddings
|
|
25
|
+
CREATE EXTENSION IF NOT EXISTS vector;
|
|
26
|
+
|
|
27
|
+
-- Enable pg_trgm extension for fuzzy text search
|
|
28
|
+
CREATE EXTENSION IF NOT EXISTS pg_trgm;
|
|
29
|
+
|
|
30
|
+
-- Enable uuid-ossp for UUID generation
|
|
31
|
+
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
|
|
32
|
+
|
|
33
|
+
-- Verify critical extensions
|
|
34
|
+
DO $$
|
|
35
|
+
BEGIN
|
|
36
|
+
IF NOT EXISTS (SELECT 1 FROM pg_extension WHERE extname = 'vector') THEN
|
|
37
|
+
RAISE EXCEPTION 'pgvector extension failed to install. Ensure pgvector is compiled and available.';
|
|
38
|
+
END IF;
|
|
39
|
+
|
|
40
|
+
IF NOT EXISTS (SELECT 1 FROM pg_extension WHERE extname = 'pg_trgm') THEN
|
|
41
|
+
RAISE EXCEPTION 'pg_trgm extension failed to install.';
|
|
42
|
+
END IF;
|
|
43
|
+
|
|
44
|
+
RAISE NOTICE '✓ All required extensions installed successfully';
|
|
45
|
+
END $$;
|
|
46
|
+
|
|
47
|
+
-- ============================================================================
|
|
48
|
+
-- MIGRATION TRACKING
|
|
49
|
+
-- ============================================================================
|
|
50
|
+
|
|
51
|
+
CREATE TABLE IF NOT EXISTS rem_migrations (
|
|
52
|
+
id SERIAL PRIMARY KEY,
|
|
53
|
+
name VARCHAR(255) NOT NULL UNIQUE,
|
|
54
|
+
type VARCHAR(50) NOT NULL, -- 'install', 'models', 'data'
|
|
55
|
+
version VARCHAR(50),
|
|
56
|
+
checksum VARCHAR(64),
|
|
57
|
+
applied_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
58
|
+
applied_by VARCHAR(100) DEFAULT CURRENT_USER,
|
|
59
|
+
execution_time_ms INTEGER,
|
|
60
|
+
success BOOLEAN DEFAULT TRUE
|
|
61
|
+
);
|
|
62
|
+
|
|
63
|
+
CREATE INDEX IF NOT EXISTS idx_rem_migrations_type ON rem_migrations(type);
|
|
64
|
+
CREATE INDEX IF NOT EXISTS idx_rem_migrations_applied_at ON rem_migrations(applied_at);
|
|
65
|
+
|
|
66
|
+
COMMENT ON TABLE rem_migrations IS
|
|
67
|
+
'Tracks all applied migrations including install scripts and model schema updates';
|
|
68
|
+
|
|
69
|
+
-- ============================================================================
|
|
70
|
+
-- KV_STORE CACHE
|
|
71
|
+
-- ============================================================================
|
|
72
|
+
|
|
73
|
+
-- KV_STORE: UNLOGGED table for O(1) entity lookups in REM
|
|
74
|
+
--
|
|
75
|
+
-- Design rationale:
|
|
76
|
+
-- - UNLOGGED: Faster writes, no WAL overhead (acceptable for cache)
|
|
77
|
+
-- - Rebuilds automatically from primary tables on restart
|
|
78
|
+
-- - Supports LOOKUP queries with O(1) performance
|
|
79
|
+
-- - Supports FUZZY queries with trigram indexes
|
|
80
|
+
-- - User-scoped filtering when user_id IS NOT NULL
|
|
81
|
+
-- - Tenant isolation via tenant_id
|
|
82
|
+
--
|
|
83
|
+
-- Schema:
|
|
84
|
+
-- - entity_key: Natural language label (e.g., "sarah-chen", "project-alpha")
|
|
85
|
+
-- - entity_type: Table name (e.g., "resources", "moments")
|
|
86
|
+
-- - entity_id: UUID from primary table
|
|
87
|
+
-- - tenant_id: Tenant identifier for multi-tenancy
|
|
88
|
+
-- - user_id: Optional user scoping (NULL = system-level)
|
|
89
|
+
-- - content_summary: Denormalized text for fuzzy search
|
|
90
|
+
-- - metadata: JSONB for additional filtering
|
|
91
|
+
-- - updated_at: Timestamp for cache invalidation
|
|
92
|
+
|
|
93
|
+
CREATE UNLOGGED TABLE IF NOT EXISTS kv_store (
|
|
94
|
+
entity_key VARCHAR(255) NOT NULL,
|
|
95
|
+
entity_type VARCHAR(100) NOT NULL,
|
|
96
|
+
entity_id UUID NOT NULL,
|
|
97
|
+
tenant_id VARCHAR(100) NOT NULL,
|
|
98
|
+
user_id VARCHAR(100),
|
|
99
|
+
content_summary TEXT,
|
|
100
|
+
metadata JSONB DEFAULT '{}',
|
|
101
|
+
graph_edges JSONB DEFAULT '[]'::jsonb, -- Cached edges for fast graph traversal
|
|
102
|
+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
103
|
+
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
104
|
+
|
|
105
|
+
-- Composite primary key: entity_key unique per tenant
|
|
106
|
+
PRIMARY KEY (tenant_id, entity_key)
|
|
107
|
+
);
|
|
108
|
+
|
|
109
|
+
-- Index for user-scoped lookups (when user_id IS NOT NULL)
|
|
110
|
+
CREATE INDEX IF NOT EXISTS idx_kv_store_user ON kv_store (tenant_id, user_id)
|
|
111
|
+
WHERE user_id IS NOT NULL;
|
|
112
|
+
|
|
113
|
+
-- Index for entity_id reverse lookup (find key by ID)
|
|
114
|
+
CREATE INDEX IF NOT EXISTS idx_kv_store_entity_id ON kv_store (entity_id);
|
|
115
|
+
|
|
116
|
+
-- Trigram index for fuzzy text search (FUZZY queries)
|
|
117
|
+
CREATE INDEX IF NOT EXISTS idx_kv_store_key_trgm ON kv_store
|
|
118
|
+
USING gin (entity_key gin_trgm_ops);
|
|
119
|
+
|
|
120
|
+
-- Trigram index for content_summary fuzzy search
|
|
121
|
+
CREATE INDEX IF NOT EXISTS idx_kv_store_content_trgm ON kv_store
|
|
122
|
+
USING gin (content_summary gin_trgm_ops);
|
|
123
|
+
|
|
124
|
+
-- GIN index for metadata JSONB queries
|
|
125
|
+
CREATE INDEX IF NOT EXISTS idx_kv_store_metadata ON kv_store
|
|
126
|
+
USING gin (metadata);
|
|
127
|
+
|
|
128
|
+
-- GIN index for graph_edges JSONB queries (graph traversal)
|
|
129
|
+
CREATE INDEX IF NOT EXISTS idx_kv_store_graph_edges ON kv_store
|
|
130
|
+
USING gin (graph_edges);
|
|
131
|
+
|
|
132
|
+
-- Index for entity_type filtering
|
|
133
|
+
CREATE INDEX IF NOT EXISTS idx_kv_store_type ON kv_store (entity_type);
|
|
134
|
+
|
|
135
|
+
-- Comments
|
|
136
|
+
COMMENT ON TABLE kv_store IS
|
|
137
|
+
'UNLOGGED cache for O(1) entity lookups. Supports REM LOOKUP and FUZZY queries. Rebuilt from primary tables on restart.';
|
|
138
|
+
|
|
139
|
+
COMMENT ON COLUMN kv_store.entity_key IS
|
|
140
|
+
'Natural language label for entity (e.g., "sarah-chen", "project-alpha")';
|
|
141
|
+
|
|
142
|
+
COMMENT ON COLUMN kv_store.entity_type IS
|
|
143
|
+
'Source table name (e.g., "resources", "moments", "users")';
|
|
144
|
+
|
|
145
|
+
COMMENT ON COLUMN kv_store.entity_id IS
|
|
146
|
+
'UUID from primary table for reverse lookup';
|
|
147
|
+
|
|
148
|
+
COMMENT ON COLUMN kv_store.tenant_id IS
|
|
149
|
+
'Tenant identifier for multi-tenancy isolation';
|
|
150
|
+
|
|
151
|
+
COMMENT ON COLUMN kv_store.user_id IS
|
|
152
|
+
'Optional user scoping. NULL = system-level entity, visible to all users in tenant';
|
|
153
|
+
|
|
154
|
+
COMMENT ON COLUMN kv_store.content_summary IS
|
|
155
|
+
'Denormalized text summary for fuzzy search. Concatenated from content fields.';
|
|
156
|
+
|
|
157
|
+
-- ============================================================================
|
|
158
|
+
-- HELPER FUNCTIONS
|
|
159
|
+
-- ============================================================================
|
|
160
|
+
|
|
161
|
+
-- Function to rebuild KV_STORE from primary tables
|
|
162
|
+
--
|
|
163
|
+
-- IMPORTANT: You should NOT need to call this during normal operations!
|
|
164
|
+
-- KV store is automatically populated via triggers on INSERT/UPDATE/DELETE.
|
|
165
|
+
--
|
|
166
|
+
-- Only call this function after:
|
|
167
|
+
-- 1. Database crash/restart (UNLOGGED table lost)
|
|
168
|
+
-- 2. Backup restoration (UNLOGGED tables not backed up)
|
|
169
|
+
-- 3. Bulk imports that bypass triggers (COPY, pg_restore --disable-triggers)
|
|
170
|
+
--
|
|
171
|
+
-- Usage: SELECT * FROM rebuild_kv_store();
|
|
172
|
+
CREATE OR REPLACE FUNCTION rebuild_kv_store()
|
|
173
|
+
RETURNS TABLE(table_name TEXT, rows_inserted BIGINT) AS $$
|
|
174
|
+
DECLARE
|
|
175
|
+
table_rec RECORD;
|
|
176
|
+
rows_affected BIGINT;
|
|
177
|
+
BEGIN
|
|
178
|
+
-- Clear existing cache
|
|
179
|
+
DELETE FROM kv_store;
|
|
180
|
+
RAISE NOTICE 'Cleared KV_STORE cache';
|
|
181
|
+
|
|
182
|
+
-- Rebuild from each entity table that has a KV store trigger
|
|
183
|
+
-- This query finds all tables with _kv_store triggers
|
|
184
|
+
FOR table_rec IN
|
|
185
|
+
SELECT DISTINCT event_object_table as tbl
|
|
186
|
+
FROM information_schema.triggers
|
|
187
|
+
WHERE trigger_name LIKE '%_kv_store'
|
|
188
|
+
AND trigger_schema = 'public'
|
|
189
|
+
ORDER BY event_object_table
|
|
190
|
+
LOOP
|
|
191
|
+
-- Force trigger execution by updating all non-deleted rows
|
|
192
|
+
-- This is more efficient than re-inserting
|
|
193
|
+
EXECUTE format('
|
|
194
|
+
UPDATE %I
|
|
195
|
+
SET updated_at = updated_at
|
|
196
|
+
WHERE deleted_at IS NULL
|
|
197
|
+
', table_rec.tbl);
|
|
198
|
+
|
|
199
|
+
GET DIAGNOSTICS rows_affected = ROW_COUNT;
|
|
200
|
+
|
|
201
|
+
table_name := table_rec.tbl;
|
|
202
|
+
rows_inserted := rows_affected;
|
|
203
|
+
RETURN NEXT;
|
|
204
|
+
|
|
205
|
+
RAISE NOTICE 'Rebuilt % KV entries for %', rows_affected, table_rec.tbl;
|
|
206
|
+
END LOOP;
|
|
207
|
+
END;
|
|
208
|
+
$$ LANGUAGE plpgsql;
|
|
209
|
+
|
|
210
|
+
COMMENT ON FUNCTION rebuild_kv_store() IS
|
|
211
|
+
'Rebuild KV_STORE cache from all entity tables. Call after database restart.';
|
|
212
|
+
|
|
213
|
+
-- ============================================================================
|
|
214
|
+
-- REM QUERY FUNCTIONS
|
|
215
|
+
-- ============================================================================
|
|
216
|
+
|
|
217
|
+
-- REM LOOKUP: O(1) entity lookup by natural key
|
|
218
|
+
-- Returns structured columns extracted from entity records
|
|
219
|
+
-- Parameters: entity_key, tenant_id (for backward compat), user_id (actual filter)
|
|
220
|
+
-- Note: tenant_id parameter exists for backward compatibility but is ignored
|
|
221
|
+
CREATE OR REPLACE FUNCTION rem_lookup(
|
|
222
|
+
p_entity_key VARCHAR(255),
|
|
223
|
+
p_tenant_id VARCHAR(100),
|
|
224
|
+
p_user_id VARCHAR(100)
|
|
225
|
+
)
|
|
226
|
+
RETURNS TABLE(
|
|
227
|
+
entity_type VARCHAR(100),
|
|
228
|
+
data JSONB
|
|
229
|
+
) AS $$
|
|
230
|
+
DECLARE
|
|
231
|
+
entity_table VARCHAR(100);
|
|
232
|
+
query_sql TEXT;
|
|
233
|
+
BEGIN
|
|
234
|
+
-- Use p_user_id for filtering (p_tenant_id ignored for backward compat)
|
|
235
|
+
-- First lookup in KV store to get entity_type (table name)
|
|
236
|
+
SELECT kv.entity_type INTO entity_table
|
|
237
|
+
FROM kv_store kv
|
|
238
|
+
WHERE kv.user_id = p_user_id
|
|
239
|
+
AND kv.entity_key = p_entity_key;
|
|
240
|
+
|
|
241
|
+
-- If not found, return empty
|
|
242
|
+
IF entity_table IS NULL THEN
|
|
243
|
+
RETURN;
|
|
244
|
+
END IF;
|
|
245
|
+
|
|
246
|
+
-- Fetch raw record from underlying table as JSONB
|
|
247
|
+
-- LLMs can handle unstructured JSON - no need for schema assumptions
|
|
248
|
+
query_sql := format('
|
|
249
|
+
SELECT
|
|
250
|
+
%L::VARCHAR(100) AS entity_type,
|
|
251
|
+
row_to_json(t)::jsonb AS data
|
|
252
|
+
FROM %I t
|
|
253
|
+
WHERE t.user_id = $1
|
|
254
|
+
AND t.name = $2
|
|
255
|
+
AND t.deleted_at IS NULL
|
|
256
|
+
', entity_table, entity_table);
|
|
257
|
+
|
|
258
|
+
RETURN QUERY EXECUTE query_sql USING p_user_id, p_entity_key;
|
|
259
|
+
END;
|
|
260
|
+
$$ LANGUAGE plpgsql STABLE;
|
|
261
|
+
|
|
262
|
+
COMMENT ON FUNCTION rem_lookup IS
|
|
263
|
+
'REM LOOKUP query: O(1) entity lookup by natural key. Returns raw entity data as JSONB for LLM consumption. tenant_id parameter exists for backward compatibility but filtering uses user_id.';
|
|
264
|
+
|
|
265
|
+
-- REM FETCH: Fetch full entity records from multiple tables
|
|
266
|
+
-- Takes JSONB mapping of {table_name: [entity_keys]}, fetches all records
|
|
267
|
+
-- Returns complete entity records as JSONB (not just KV store metadata)
|
|
268
|
+
CREATE OR REPLACE FUNCTION rem_fetch(
|
|
269
|
+
p_entities_by_table JSONB,
|
|
270
|
+
p_user_id VARCHAR(100)
|
|
271
|
+
)
|
|
272
|
+
RETURNS TABLE(
|
|
273
|
+
entity_key TEXT,
|
|
274
|
+
entity_type TEXT,
|
|
275
|
+
entity_record JSONB
|
|
276
|
+
) AS $$
|
|
277
|
+
DECLARE
|
|
278
|
+
table_name TEXT;
|
|
279
|
+
entity_keys TEXT[];
|
|
280
|
+
query_sql TEXT;
|
|
281
|
+
result_record RECORD;
|
|
282
|
+
BEGIN
|
|
283
|
+
-- Iterate over each table in the JSONB object
|
|
284
|
+
FOR table_name IN SELECT jsonb_object_keys(p_entities_by_table)
|
|
285
|
+
LOOP
|
|
286
|
+
-- Extract array of keys for this table
|
|
287
|
+
entity_keys := ARRAY(
|
|
288
|
+
SELECT jsonb_array_elements_text(p_entities_by_table->table_name)
|
|
289
|
+
);
|
|
290
|
+
|
|
291
|
+
-- Build dynamic query for this table
|
|
292
|
+
query_sql := format('
|
|
293
|
+
SELECT
|
|
294
|
+
t.name AS entity_key,
|
|
295
|
+
%L AS entity_type,
|
|
296
|
+
row_to_json(t)::jsonb AS entity_record
|
|
297
|
+
FROM %I t
|
|
298
|
+
WHERE t.user_id = $1
|
|
299
|
+
AND t.name = ANY($2)
|
|
300
|
+
AND t.deleted_at IS NULL
|
|
301
|
+
', table_name, table_name);
|
|
302
|
+
|
|
303
|
+
-- Execute and return rows for this table
|
|
304
|
+
FOR result_record IN EXECUTE query_sql USING p_user_id, entity_keys
|
|
305
|
+
LOOP
|
|
306
|
+
entity_key := result_record.entity_key;
|
|
307
|
+
entity_type := result_record.entity_type;
|
|
308
|
+
entity_record := result_record.entity_record;
|
|
309
|
+
RETURN NEXT;
|
|
310
|
+
END LOOP;
|
|
311
|
+
END LOOP;
|
|
312
|
+
END;
|
|
313
|
+
$$ LANGUAGE plpgsql STABLE;
|
|
314
|
+
|
|
315
|
+
COMMENT ON FUNCTION rem_fetch IS
|
|
316
|
+
'REM FETCH: Fetch full entity records (all columns as JSONB) from multiple tables. Takes JSONB mapping {table_name: [keys]}, fetches all records, returns unified result set. Use for hydrating LOOKUP, FUZZY, SEARCH, and TRAVERSE results.';
|
|
317
|
+
|
|
318
|
+
-- REM FUZZY: Fuzzy text search using pg_trgm similarity
|
|
319
|
+
-- Returns raw entity data as JSONB for LLM consumption
|
|
320
|
+
CREATE OR REPLACE FUNCTION rem_fuzzy(
|
|
321
|
+
p_query TEXT,
|
|
322
|
+
p_tenant_id VARCHAR(100),
|
|
323
|
+
p_threshold REAL DEFAULT 0.3,
|
|
324
|
+
p_limit INTEGER DEFAULT 10,
|
|
325
|
+
p_user_id VARCHAR(100) DEFAULT NULL
|
|
326
|
+
)
|
|
327
|
+
RETURNS TABLE(
|
|
328
|
+
entity_type VARCHAR(100),
|
|
329
|
+
similarity_score REAL,
|
|
330
|
+
data JSONB
|
|
331
|
+
) AS $$
|
|
332
|
+
DECLARE
|
|
333
|
+
kv_matches RECORD;
|
|
334
|
+
entities_by_table JSONB := '{}'::jsonb;
|
|
335
|
+
table_keys JSONB;
|
|
336
|
+
BEGIN
|
|
337
|
+
-- First, find matching keys in KV store with similarity scores
|
|
338
|
+
-- Group by table to prepare for batch fetch
|
|
339
|
+
FOR kv_matches IN
|
|
340
|
+
SELECT
|
|
341
|
+
kv.entity_key,
|
|
342
|
+
kv.entity_type,
|
|
343
|
+
similarity(kv.entity_key, p_query) AS sim_score
|
|
344
|
+
FROM kv_store kv
|
|
345
|
+
WHERE kv.user_id = COALESCE(p_user_id, p_tenant_id)
|
|
346
|
+
AND kv.entity_key % p_query -- Trigram similarity operator
|
|
347
|
+
AND similarity(kv.entity_key, p_query) >= p_threshold
|
|
348
|
+
ORDER BY sim_score DESC
|
|
349
|
+
LIMIT p_limit
|
|
350
|
+
LOOP
|
|
351
|
+
-- Build JSONB mapping {table: [keys]}
|
|
352
|
+
IF entities_by_table ? kv_matches.entity_type THEN
|
|
353
|
+
table_keys := entities_by_table->kv_matches.entity_type;
|
|
354
|
+
entities_by_table := jsonb_set(
|
|
355
|
+
entities_by_table,
|
|
356
|
+
ARRAY[kv_matches.entity_type],
|
|
357
|
+
table_keys || jsonb_build_array(kv_matches.entity_key)
|
|
358
|
+
);
|
|
359
|
+
ELSE
|
|
360
|
+
entities_by_table := jsonb_set(
|
|
361
|
+
entities_by_table,
|
|
362
|
+
ARRAY[kv_matches.entity_type],
|
|
363
|
+
jsonb_build_array(kv_matches.entity_key)
|
|
364
|
+
);
|
|
365
|
+
END IF;
|
|
366
|
+
END LOOP;
|
|
367
|
+
|
|
368
|
+
-- Fetch full records using rem_fetch helper
|
|
369
|
+
-- Return raw entity data as JSONB for LLM consumption
|
|
370
|
+
-- Use p_user_id (not p_tenant_id) for actual filtering
|
|
371
|
+
RETURN QUERY
|
|
372
|
+
SELECT
|
|
373
|
+
f.entity_type::VARCHAR(100),
|
|
374
|
+
similarity(f.entity_key, p_query) AS similarity_score,
|
|
375
|
+
f.entity_record AS data
|
|
376
|
+
FROM rem_fetch(entities_by_table, COALESCE(p_user_id, p_tenant_id)) f
|
|
377
|
+
ORDER BY similarity_score DESC;
|
|
378
|
+
END;
|
|
379
|
+
$$ LANGUAGE plpgsql STABLE;
|
|
380
|
+
|
|
381
|
+
COMMENT ON FUNCTION rem_fuzzy IS
|
|
382
|
+
'REM FUZZY query: Fuzzy text search using pg_trgm. Returns raw entity data as JSONB for LLM consumption. tenant_id parameter exists for backward compatibility but filtering uses user_id.';
|
|
383
|
+
|
|
384
|
+
-- REM TRAVERSE: Moved to 002_install_models.sql (after entity tables are created)
|
|
385
|
+
-- See 002_install_models.sql for the full rem_traverse function with keys_only parameter
|
|
386
|
+
|
|
387
|
+
-- REM SEARCH: Vector similarity search using embeddings
|
|
388
|
+
-- Joins to embeddings table for semantic search
|
|
389
|
+
CREATE OR REPLACE FUNCTION rem_search(
|
|
390
|
+
p_query_embedding vector(1536),
|
|
391
|
+
p_table_name VARCHAR(100),
|
|
392
|
+
p_field_name VARCHAR(100),
|
|
393
|
+
p_tenant_id VARCHAR(100),
|
|
394
|
+
p_provider VARCHAR(50) DEFAULT 'openai',
|
|
395
|
+
p_min_similarity REAL DEFAULT 0.7,
|
|
396
|
+
p_limit INTEGER DEFAULT 10,
|
|
397
|
+
p_user_id VARCHAR(100) DEFAULT NULL
|
|
398
|
+
)
|
|
399
|
+
RETURNS TABLE(
|
|
400
|
+
entity_type VARCHAR(100),
|
|
401
|
+
similarity_score REAL,
|
|
402
|
+
data JSONB
|
|
403
|
+
) AS $$
|
|
404
|
+
DECLARE
|
|
405
|
+
embeddings_table VARCHAR(200);
|
|
406
|
+
source_table VARCHAR(100);
|
|
407
|
+
query_sql TEXT;
|
|
408
|
+
BEGIN
|
|
409
|
+
-- Construct embeddings table name
|
|
410
|
+
embeddings_table := 'embeddings_' || p_table_name;
|
|
411
|
+
source_table := p_table_name;
|
|
412
|
+
|
|
413
|
+
-- Dynamic query to join source table with embeddings table
|
|
414
|
+
-- Returns raw entity data as JSONB for LLM consumption
|
|
415
|
+
-- Note: Using inner product for OpenAI embeddings (normalized vectors)
|
|
416
|
+
-- Inner product <#> returns negative value, so we negate it to get [0, 1]
|
|
417
|
+
-- where 1 = perfect match, 0 = orthogonal
|
|
418
|
+
query_sql := format('
|
|
419
|
+
SELECT
|
|
420
|
+
%L::VARCHAR(100) AS entity_type,
|
|
421
|
+
(1.0 - (e.embedding <#> $1) * -1.0)::REAL AS similarity_score,
|
|
422
|
+
row_to_json(t)::jsonb AS data
|
|
423
|
+
FROM %I t
|
|
424
|
+
JOIN %I e ON e.entity_id = t.id
|
|
425
|
+
WHERE t.user_id = $2
|
|
426
|
+
AND e.field_name = $3
|
|
427
|
+
AND e.provider = $4
|
|
428
|
+
AND (1.0 - (e.embedding <#> $1) * -1.0) >= $5
|
|
429
|
+
AND t.deleted_at IS NULL
|
|
430
|
+
ORDER BY e.embedding <#> $1 DESC
|
|
431
|
+
LIMIT $6
|
|
432
|
+
', source_table, source_table, embeddings_table);
|
|
433
|
+
|
|
434
|
+
RETURN QUERY EXECUTE query_sql
|
|
435
|
+
USING p_query_embedding, p_user_id, p_field_name, p_provider, p_min_similarity, p_limit;
|
|
436
|
+
END;
|
|
437
|
+
$$ LANGUAGE plpgsql STABLE;
|
|
438
|
+
|
|
439
|
+
COMMENT ON FUNCTION rem_search IS
|
|
440
|
+
'REM SEARCH query: Vector similarity search using inner product for OpenAI normalized embeddings. Returns raw entity data as JSONB for LLM consumption, scoped to user_id';
|
|
441
|
+
|
|
442
|
+
-- Function to get migration status
|
|
443
|
+
CREATE OR REPLACE FUNCTION migration_status()
|
|
444
|
+
RETURNS TABLE(
|
|
445
|
+
migration_type TEXT,
|
|
446
|
+
count BIGINT,
|
|
447
|
+
last_applied TIMESTAMP,
|
|
448
|
+
total_execution_ms BIGINT
|
|
449
|
+
) AS $$
|
|
450
|
+
BEGIN
|
|
451
|
+
RETURN QUERY
|
|
452
|
+
SELECT
|
|
453
|
+
type::TEXT,
|
|
454
|
+
COUNT(*)::BIGINT,
|
|
455
|
+
MAX(applied_at),
|
|
456
|
+
SUM(execution_time_ms)::BIGINT
|
|
457
|
+
FROM rem_migrations
|
|
458
|
+
WHERE success = TRUE
|
|
459
|
+
GROUP BY type
|
|
460
|
+
ORDER BY MAX(applied_at) DESC;
|
|
461
|
+
END;
|
|
462
|
+
$$ LANGUAGE plpgsql;
|
|
463
|
+
|
|
464
|
+
COMMENT ON FUNCTION migration_status() IS
|
|
465
|
+
'Get summary of applied migrations by type';
|
|
466
|
+
|
|
467
|
+
-- ============================================================================
|
|
468
|
+
-- RECORD INSTALLATION
|
|
469
|
+
-- ============================================================================
|
|
470
|
+
|
|
471
|
+
INSERT INTO rem_migrations (name, type, version)
|
|
472
|
+
VALUES ('install.sql', 'install', '1.0.0')
|
|
473
|
+
ON CONFLICT (name) DO UPDATE
|
|
474
|
+
SET applied_at = CURRENT_TIMESTAMP,
|
|
475
|
+
applied_by = CURRENT_USER;
|
|
476
|
+
|
|
477
|
+
-- ============================================================================
|
|
478
|
+
-- COMPLETION
|
|
479
|
+
-- ============================================================================
|
|
480
|
+
|
|
481
|
+
DO $$
|
|
482
|
+
BEGIN
|
|
483
|
+
RAISE NOTICE '============================================================';
|
|
484
|
+
RAISE NOTICE 'REM Database Installation Complete';
|
|
485
|
+
RAISE NOTICE '============================================================';
|
|
486
|
+
RAISE NOTICE '';
|
|
487
|
+
RAISE NOTICE 'Extensions installed:';
|
|
488
|
+
RAISE NOTICE ' ✓ pgvector (vector embeddings)';
|
|
489
|
+
RAISE NOTICE ' ✓ pg_trgm (fuzzy text search)';
|
|
490
|
+
RAISE NOTICE ' ✓ uuid-ossp (UUID generation)';
|
|
491
|
+
RAISE NOTICE '';
|
|
492
|
+
RAISE NOTICE 'Infrastructure created:';
|
|
493
|
+
RAISE NOTICE ' ✓ rem_migrations (migration tracking)';
|
|
494
|
+
RAISE NOTICE ' ✓ kv_store (UNLOGGED entity cache)';
|
|
495
|
+
RAISE NOTICE ' ✓ Helper functions';
|
|
496
|
+
RAISE NOTICE '';
|
|
497
|
+
RAISE NOTICE 'Next steps:';
|
|
498
|
+
RAISE NOTICE ' 1. Generate model schema: rem schema generate --models src/rem/models/entities';
|
|
499
|
+
RAISE NOTICE ' 2. Apply model schema: rem db migrate';
|
|
500
|
+
RAISE NOTICE '';
|
|
501
|
+
RAISE NOTICE 'Status: SELECT * FROM migration_status();';
|
|
502
|
+
RAISE NOTICE '============================================================';
|
|
503
|
+
END $$;
|