remdb 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

Files changed (187) hide show
  1. rem/__init__.py +2 -0
  2. rem/agentic/README.md +650 -0
  3. rem/agentic/__init__.py +39 -0
  4. rem/agentic/agents/README.md +155 -0
  5. rem/agentic/agents/__init__.py +8 -0
  6. rem/agentic/context.py +148 -0
  7. rem/agentic/context_builder.py +329 -0
  8. rem/agentic/mcp/__init__.py +0 -0
  9. rem/agentic/mcp/tool_wrapper.py +107 -0
  10. rem/agentic/otel/__init__.py +5 -0
  11. rem/agentic/otel/setup.py +151 -0
  12. rem/agentic/providers/phoenix.py +674 -0
  13. rem/agentic/providers/pydantic_ai.py +572 -0
  14. rem/agentic/query.py +117 -0
  15. rem/agentic/query_helper.py +89 -0
  16. rem/agentic/schema.py +396 -0
  17. rem/agentic/serialization.py +245 -0
  18. rem/agentic/tools/__init__.py +5 -0
  19. rem/agentic/tools/rem_tools.py +231 -0
  20. rem/api/README.md +420 -0
  21. rem/api/main.py +324 -0
  22. rem/api/mcp_router/prompts.py +182 -0
  23. rem/api/mcp_router/resources.py +536 -0
  24. rem/api/mcp_router/server.py +213 -0
  25. rem/api/mcp_router/tools.py +584 -0
  26. rem/api/routers/auth.py +229 -0
  27. rem/api/routers/chat/__init__.py +5 -0
  28. rem/api/routers/chat/completions.py +281 -0
  29. rem/api/routers/chat/json_utils.py +76 -0
  30. rem/api/routers/chat/models.py +124 -0
  31. rem/api/routers/chat/streaming.py +185 -0
  32. rem/auth/README.md +258 -0
  33. rem/auth/__init__.py +26 -0
  34. rem/auth/middleware.py +100 -0
  35. rem/auth/providers/__init__.py +13 -0
  36. rem/auth/providers/base.py +376 -0
  37. rem/auth/providers/google.py +163 -0
  38. rem/auth/providers/microsoft.py +237 -0
  39. rem/cli/README.md +455 -0
  40. rem/cli/__init__.py +8 -0
  41. rem/cli/commands/README.md +126 -0
  42. rem/cli/commands/__init__.py +3 -0
  43. rem/cli/commands/ask.py +566 -0
  44. rem/cli/commands/configure.py +497 -0
  45. rem/cli/commands/db.py +493 -0
  46. rem/cli/commands/dreaming.py +324 -0
  47. rem/cli/commands/experiments.py +1302 -0
  48. rem/cli/commands/mcp.py +66 -0
  49. rem/cli/commands/process.py +245 -0
  50. rem/cli/commands/schema.py +183 -0
  51. rem/cli/commands/serve.py +106 -0
  52. rem/cli/dreaming.py +363 -0
  53. rem/cli/main.py +96 -0
  54. rem/config.py +237 -0
  55. rem/mcp_server.py +41 -0
  56. rem/models/core/__init__.py +49 -0
  57. rem/models/core/core_model.py +64 -0
  58. rem/models/core/engram.py +333 -0
  59. rem/models/core/experiment.py +628 -0
  60. rem/models/core/inline_edge.py +132 -0
  61. rem/models/core/rem_query.py +243 -0
  62. rem/models/entities/__init__.py +43 -0
  63. rem/models/entities/file.py +57 -0
  64. rem/models/entities/image_resource.py +88 -0
  65. rem/models/entities/message.py +35 -0
  66. rem/models/entities/moment.py +123 -0
  67. rem/models/entities/ontology.py +191 -0
  68. rem/models/entities/ontology_config.py +131 -0
  69. rem/models/entities/resource.py +95 -0
  70. rem/models/entities/schema.py +87 -0
  71. rem/models/entities/user.py +85 -0
  72. rem/py.typed +0 -0
  73. rem/schemas/README.md +507 -0
  74. rem/schemas/__init__.py +6 -0
  75. rem/schemas/agents/README.md +92 -0
  76. rem/schemas/agents/core/moment-builder.yaml +178 -0
  77. rem/schemas/agents/core/rem-query-agent.yaml +226 -0
  78. rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
  79. rem/schemas/agents/core/simple-assistant.yaml +19 -0
  80. rem/schemas/agents/core/user-profile-builder.yaml +163 -0
  81. rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
  82. rem/schemas/agents/examples/contract-extractor.yaml +134 -0
  83. rem/schemas/agents/examples/cv-parser.yaml +263 -0
  84. rem/schemas/agents/examples/hello-world.yaml +37 -0
  85. rem/schemas/agents/examples/query.yaml +54 -0
  86. rem/schemas/agents/examples/simple.yaml +21 -0
  87. rem/schemas/agents/examples/test.yaml +29 -0
  88. rem/schemas/agents/rem.yaml +128 -0
  89. rem/schemas/evaluators/hello-world/default.yaml +77 -0
  90. rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
  91. rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
  92. rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
  93. rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
  94. rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
  95. rem/services/__init__.py +16 -0
  96. rem/services/audio/INTEGRATION.md +308 -0
  97. rem/services/audio/README.md +376 -0
  98. rem/services/audio/__init__.py +15 -0
  99. rem/services/audio/chunker.py +354 -0
  100. rem/services/audio/transcriber.py +259 -0
  101. rem/services/content/README.md +1269 -0
  102. rem/services/content/__init__.py +5 -0
  103. rem/services/content/providers.py +806 -0
  104. rem/services/content/service.py +676 -0
  105. rem/services/dreaming/README.md +230 -0
  106. rem/services/dreaming/__init__.py +53 -0
  107. rem/services/dreaming/affinity_service.py +336 -0
  108. rem/services/dreaming/moment_service.py +264 -0
  109. rem/services/dreaming/ontology_service.py +54 -0
  110. rem/services/dreaming/user_model_service.py +297 -0
  111. rem/services/dreaming/utils.py +39 -0
  112. rem/services/embeddings/__init__.py +11 -0
  113. rem/services/embeddings/api.py +120 -0
  114. rem/services/embeddings/worker.py +421 -0
  115. rem/services/fs/README.md +662 -0
  116. rem/services/fs/__init__.py +62 -0
  117. rem/services/fs/examples.py +206 -0
  118. rem/services/fs/examples_paths.py +204 -0
  119. rem/services/fs/git_provider.py +935 -0
  120. rem/services/fs/local_provider.py +760 -0
  121. rem/services/fs/parsing-hooks-examples.md +172 -0
  122. rem/services/fs/paths.py +276 -0
  123. rem/services/fs/provider.py +460 -0
  124. rem/services/fs/s3_provider.py +1042 -0
  125. rem/services/fs/service.py +186 -0
  126. rem/services/git/README.md +1075 -0
  127. rem/services/git/__init__.py +17 -0
  128. rem/services/git/service.py +469 -0
  129. rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
  130. rem/services/phoenix/README.md +453 -0
  131. rem/services/phoenix/__init__.py +46 -0
  132. rem/services/phoenix/client.py +686 -0
  133. rem/services/phoenix/config.py +88 -0
  134. rem/services/phoenix/prompt_labels.py +477 -0
  135. rem/services/postgres/README.md +575 -0
  136. rem/services/postgres/__init__.py +23 -0
  137. rem/services/postgres/migration_service.py +427 -0
  138. rem/services/postgres/pydantic_to_sqlalchemy.py +232 -0
  139. rem/services/postgres/register_type.py +352 -0
  140. rem/services/postgres/repository.py +337 -0
  141. rem/services/postgres/schema_generator.py +379 -0
  142. rem/services/postgres/service.py +802 -0
  143. rem/services/postgres/sql_builder.py +354 -0
  144. rem/services/rem/README.md +304 -0
  145. rem/services/rem/__init__.py +23 -0
  146. rem/services/rem/exceptions.py +71 -0
  147. rem/services/rem/executor.py +293 -0
  148. rem/services/rem/parser.py +145 -0
  149. rem/services/rem/queries.py +196 -0
  150. rem/services/rem/query.py +371 -0
  151. rem/services/rem/service.py +527 -0
  152. rem/services/session/README.md +374 -0
  153. rem/services/session/__init__.py +6 -0
  154. rem/services/session/compression.py +360 -0
  155. rem/services/session/reload.py +77 -0
  156. rem/settings.py +1235 -0
  157. rem/sql/002_install_models.sql +1068 -0
  158. rem/sql/background_indexes.sql +42 -0
  159. rem/sql/install_models.sql +1038 -0
  160. rem/sql/migrations/001_install.sql +503 -0
  161. rem/sql/migrations/002_install_models.sql +1202 -0
  162. rem/utils/AGENTIC_CHUNKING.md +597 -0
  163. rem/utils/README.md +583 -0
  164. rem/utils/__init__.py +43 -0
  165. rem/utils/agentic_chunking.py +622 -0
  166. rem/utils/batch_ops.py +343 -0
  167. rem/utils/chunking.py +108 -0
  168. rem/utils/clip_embeddings.py +276 -0
  169. rem/utils/dict_utils.py +98 -0
  170. rem/utils/embeddings.py +423 -0
  171. rem/utils/examples/embeddings_example.py +305 -0
  172. rem/utils/examples/sql_types_example.py +202 -0
  173. rem/utils/markdown.py +16 -0
  174. rem/utils/model_helpers.py +236 -0
  175. rem/utils/schema_loader.py +336 -0
  176. rem/utils/sql_types.py +348 -0
  177. rem/utils/user_id.py +81 -0
  178. rem/utils/vision.py +330 -0
  179. rem/workers/README.md +506 -0
  180. rem/workers/__init__.py +5 -0
  181. rem/workers/dreaming.py +502 -0
  182. rem/workers/engram_processor.py +312 -0
  183. rem/workers/sqs_file_processor.py +193 -0
  184. remdb-0.3.0.dist-info/METADATA +1455 -0
  185. remdb-0.3.0.dist-info/RECORD +187 -0
  186. remdb-0.3.0.dist-info/WHEEL +4 -0
  187. remdb-0.3.0.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,503 @@
1
+ -- REM Database Installation Script
2
+ -- Description: Core database setup with extensions and infrastructure
3
+ -- Version: 1.0.0
4
+ -- Date: 2025-01-18
5
+ --
6
+ -- This script sets up:
7
+ -- 1. Required PostgreSQL extensions (pgvector, pg_trgm, uuid-ossp)
8
+ -- 2. Migration tracking table
9
+ -- 3. KV_STORE UNLOGGED cache table
10
+ -- 4. Helper functions
11
+ --
12
+ -- Usage:
13
+ -- psql -d remdb -f sql/install.sql
14
+ --
15
+ -- Dependencies:
16
+ -- - PostgreSQL 16+
17
+ -- - pgvector extension compiled and available
18
+ -- - pg_trgm extension (usually included)
19
+
20
+ -- ============================================================================
21
+ -- EXTENSIONS
22
+ -- ============================================================================
23
+
24
+ -- Enable pgvector extension for vector embeddings
25
+ CREATE EXTENSION IF NOT EXISTS vector;
26
+
27
+ -- Enable pg_trgm extension for fuzzy text search
28
+ CREATE EXTENSION IF NOT EXISTS pg_trgm;
29
+
30
+ -- Enable uuid-ossp for UUID generation
31
+ CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
32
+
33
+ -- Verify critical extensions
34
+ DO $$
35
+ BEGIN
36
+ IF NOT EXISTS (SELECT 1 FROM pg_extension WHERE extname = 'vector') THEN
37
+ RAISE EXCEPTION 'pgvector extension failed to install. Ensure pgvector is compiled and available.';
38
+ END IF;
39
+
40
+ IF NOT EXISTS (SELECT 1 FROM pg_extension WHERE extname = 'pg_trgm') THEN
41
+ RAISE EXCEPTION 'pg_trgm extension failed to install.';
42
+ END IF;
43
+
44
+ RAISE NOTICE '✓ All required extensions installed successfully';
45
+ END $$;
46
+
47
+ -- ============================================================================
48
+ -- MIGRATION TRACKING
49
+ -- ============================================================================
50
+
51
+ CREATE TABLE IF NOT EXISTS rem_migrations (
52
+ id SERIAL PRIMARY KEY,
53
+ name VARCHAR(255) NOT NULL UNIQUE,
54
+ type VARCHAR(50) NOT NULL, -- 'install', 'models', 'data'
55
+ version VARCHAR(50),
56
+ checksum VARCHAR(64),
57
+ applied_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
58
+ applied_by VARCHAR(100) DEFAULT CURRENT_USER,
59
+ execution_time_ms INTEGER,
60
+ success BOOLEAN DEFAULT TRUE
61
+ );
62
+
63
+ CREATE INDEX IF NOT EXISTS idx_rem_migrations_type ON rem_migrations(type);
64
+ CREATE INDEX IF NOT EXISTS idx_rem_migrations_applied_at ON rem_migrations(applied_at);
65
+
66
+ COMMENT ON TABLE rem_migrations IS
67
+ 'Tracks all applied migrations including install scripts and model schema updates';
68
+
69
+ -- ============================================================================
70
+ -- KV_STORE CACHE
71
+ -- ============================================================================
72
+
73
+ -- KV_STORE: UNLOGGED table for O(1) entity lookups in REM
74
+ --
75
+ -- Design rationale:
76
+ -- - UNLOGGED: Faster writes, no WAL overhead (acceptable for cache)
77
+ -- - Rebuilds automatically from primary tables on restart
78
+ -- - Supports LOOKUP queries with O(1) performance
79
+ -- - Supports FUZZY queries with trigram indexes
80
+ -- - User-scoped filtering when user_id IS NOT NULL
81
+ -- - Tenant isolation via tenant_id
82
+ --
83
+ -- Schema:
84
+ -- - entity_key: Natural language label (e.g., "sarah-chen", "project-alpha")
85
+ -- - entity_type: Table name (e.g., "resources", "moments")
86
+ -- - entity_id: UUID from primary table
87
+ -- - tenant_id: Tenant identifier for multi-tenancy
88
+ -- - user_id: Optional user scoping (NULL = system-level)
89
+ -- - content_summary: Denormalized text for fuzzy search
90
+ -- - metadata: JSONB for additional filtering
91
+ -- - updated_at: Timestamp for cache invalidation
92
+
93
+ CREATE UNLOGGED TABLE IF NOT EXISTS kv_store (
94
+ entity_key VARCHAR(255) NOT NULL,
95
+ entity_type VARCHAR(100) NOT NULL,
96
+ entity_id UUID NOT NULL,
97
+ tenant_id VARCHAR(100) NOT NULL,
98
+ user_id VARCHAR(100),
99
+ content_summary TEXT,
100
+ metadata JSONB DEFAULT '{}',
101
+ graph_edges JSONB DEFAULT '[]'::jsonb, -- Cached edges for fast graph traversal
102
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
103
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
104
+
105
+ -- Composite primary key: entity_key unique per tenant
106
+ PRIMARY KEY (tenant_id, entity_key)
107
+ );
108
+
109
+ -- Index for user-scoped lookups (when user_id IS NOT NULL)
110
+ CREATE INDEX IF NOT EXISTS idx_kv_store_user ON kv_store (tenant_id, user_id)
111
+ WHERE user_id IS NOT NULL;
112
+
113
+ -- Index for entity_id reverse lookup (find key by ID)
114
+ CREATE INDEX IF NOT EXISTS idx_kv_store_entity_id ON kv_store (entity_id);
115
+
116
+ -- Trigram index for fuzzy text search (FUZZY queries)
117
+ CREATE INDEX IF NOT EXISTS idx_kv_store_key_trgm ON kv_store
118
+ USING gin (entity_key gin_trgm_ops);
119
+
120
+ -- Trigram index for content_summary fuzzy search
121
+ CREATE INDEX IF NOT EXISTS idx_kv_store_content_trgm ON kv_store
122
+ USING gin (content_summary gin_trgm_ops);
123
+
124
+ -- GIN index for metadata JSONB queries
125
+ CREATE INDEX IF NOT EXISTS idx_kv_store_metadata ON kv_store
126
+ USING gin (metadata);
127
+
128
+ -- GIN index for graph_edges JSONB queries (graph traversal)
129
+ CREATE INDEX IF NOT EXISTS idx_kv_store_graph_edges ON kv_store
130
+ USING gin (graph_edges);
131
+
132
+ -- Index for entity_type filtering
133
+ CREATE INDEX IF NOT EXISTS idx_kv_store_type ON kv_store (entity_type);
134
+
135
+ -- Comments
136
+ COMMENT ON TABLE kv_store IS
137
+ 'UNLOGGED cache for O(1) entity lookups. Supports REM LOOKUP and FUZZY queries. Rebuilt from primary tables on restart.';
138
+
139
+ COMMENT ON COLUMN kv_store.entity_key IS
140
+ 'Natural language label for entity (e.g., "sarah-chen", "project-alpha")';
141
+
142
+ COMMENT ON COLUMN kv_store.entity_type IS
143
+ 'Source table name (e.g., "resources", "moments", "users")';
144
+
145
+ COMMENT ON COLUMN kv_store.entity_id IS
146
+ 'UUID from primary table for reverse lookup';
147
+
148
+ COMMENT ON COLUMN kv_store.tenant_id IS
149
+ 'Tenant identifier for multi-tenancy isolation';
150
+
151
+ COMMENT ON COLUMN kv_store.user_id IS
152
+ 'Optional user scoping. NULL = system-level entity, visible to all users in tenant';
153
+
154
+ COMMENT ON COLUMN kv_store.content_summary IS
155
+ 'Denormalized text summary for fuzzy search. Concatenated from content fields.';
156
+
157
+ -- ============================================================================
158
+ -- HELPER FUNCTIONS
159
+ -- ============================================================================
160
+
161
+ -- Function to rebuild KV_STORE from primary tables
162
+ --
163
+ -- IMPORTANT: You should NOT need to call this during normal operations!
164
+ -- KV store is automatically populated via triggers on INSERT/UPDATE/DELETE.
165
+ --
166
+ -- Only call this function after:
167
+ -- 1. Database crash/restart (UNLOGGED table lost)
168
+ -- 2. Backup restoration (UNLOGGED tables not backed up)
169
+ -- 3. Bulk imports that bypass triggers (COPY, pg_restore --disable-triggers)
170
+ --
171
+ -- Usage: SELECT * FROM rebuild_kv_store();
172
+ CREATE OR REPLACE FUNCTION rebuild_kv_store()
173
+ RETURNS TABLE(table_name TEXT, rows_inserted BIGINT) AS $$
174
+ DECLARE
175
+ table_rec RECORD;
176
+ rows_affected BIGINT;
177
+ BEGIN
178
+ -- Clear existing cache
179
+ DELETE FROM kv_store;
180
+ RAISE NOTICE 'Cleared KV_STORE cache';
181
+
182
+ -- Rebuild from each entity table that has a KV store trigger
183
+ -- This query finds all tables with _kv_store triggers
184
+ FOR table_rec IN
185
+ SELECT DISTINCT event_object_table as tbl
186
+ FROM information_schema.triggers
187
+ WHERE trigger_name LIKE '%_kv_store'
188
+ AND trigger_schema = 'public'
189
+ ORDER BY event_object_table
190
+ LOOP
191
+ -- Force trigger execution by updating all non-deleted rows
192
+ -- This is more efficient than re-inserting
193
+ EXECUTE format('
194
+ UPDATE %I
195
+ SET updated_at = updated_at
196
+ WHERE deleted_at IS NULL
197
+ ', table_rec.tbl);
198
+
199
+ GET DIAGNOSTICS rows_affected = ROW_COUNT;
200
+
201
+ table_name := table_rec.tbl;
202
+ rows_inserted := rows_affected;
203
+ RETURN NEXT;
204
+
205
+ RAISE NOTICE 'Rebuilt % KV entries for %', rows_affected, table_rec.tbl;
206
+ END LOOP;
207
+ END;
208
+ $$ LANGUAGE plpgsql;
209
+
210
+ COMMENT ON FUNCTION rebuild_kv_store() IS
211
+ 'Rebuild KV_STORE cache from all entity tables. Call after database restart.';
212
+
213
+ -- ============================================================================
214
+ -- REM QUERY FUNCTIONS
215
+ -- ============================================================================
216
+
217
+ -- REM LOOKUP: O(1) entity lookup by natural key
218
+ -- Returns structured columns extracted from entity records
219
+ -- Parameters: entity_key, tenant_id (for backward compat), user_id (actual filter)
220
+ -- Note: tenant_id parameter exists for backward compatibility but is ignored
221
+ CREATE OR REPLACE FUNCTION rem_lookup(
222
+ p_entity_key VARCHAR(255),
223
+ p_tenant_id VARCHAR(100),
224
+ p_user_id VARCHAR(100)
225
+ )
226
+ RETURNS TABLE(
227
+ entity_type VARCHAR(100),
228
+ data JSONB
229
+ ) AS $$
230
+ DECLARE
231
+ entity_table VARCHAR(100);
232
+ query_sql TEXT;
233
+ BEGIN
234
+ -- Use p_user_id for filtering (p_tenant_id ignored for backward compat)
235
+ -- First lookup in KV store to get entity_type (table name)
236
+ SELECT kv.entity_type INTO entity_table
237
+ FROM kv_store kv
238
+ WHERE kv.user_id = p_user_id
239
+ AND kv.entity_key = p_entity_key;
240
+
241
+ -- If not found, return empty
242
+ IF entity_table IS NULL THEN
243
+ RETURN;
244
+ END IF;
245
+
246
+ -- Fetch raw record from underlying table as JSONB
247
+ -- LLMs can handle unstructured JSON - no need for schema assumptions
248
+ query_sql := format('
249
+ SELECT
250
+ %L::VARCHAR(100) AS entity_type,
251
+ row_to_json(t)::jsonb AS data
252
+ FROM %I t
253
+ WHERE t.user_id = $1
254
+ AND t.name = $2
255
+ AND t.deleted_at IS NULL
256
+ ', entity_table, entity_table);
257
+
258
+ RETURN QUERY EXECUTE query_sql USING p_user_id, p_entity_key;
259
+ END;
260
+ $$ LANGUAGE plpgsql STABLE;
261
+
262
+ COMMENT ON FUNCTION rem_lookup IS
263
+ 'REM LOOKUP query: O(1) entity lookup by natural key. Returns raw entity data as JSONB for LLM consumption. tenant_id parameter exists for backward compatibility but filtering uses user_id.';
264
+
265
+ -- REM FETCH: Fetch full entity records from multiple tables
266
+ -- Takes JSONB mapping of {table_name: [entity_keys]}, fetches all records
267
+ -- Returns complete entity records as JSONB (not just KV store metadata)
268
+ CREATE OR REPLACE FUNCTION rem_fetch(
269
+ p_entities_by_table JSONB,
270
+ p_user_id VARCHAR(100)
271
+ )
272
+ RETURNS TABLE(
273
+ entity_key TEXT,
274
+ entity_type TEXT,
275
+ entity_record JSONB
276
+ ) AS $$
277
+ DECLARE
278
+ table_name TEXT;
279
+ entity_keys TEXT[];
280
+ query_sql TEXT;
281
+ result_record RECORD;
282
+ BEGIN
283
+ -- Iterate over each table in the JSONB object
284
+ FOR table_name IN SELECT jsonb_object_keys(p_entities_by_table)
285
+ LOOP
286
+ -- Extract array of keys for this table
287
+ entity_keys := ARRAY(
288
+ SELECT jsonb_array_elements_text(p_entities_by_table->table_name)
289
+ );
290
+
291
+ -- Build dynamic query for this table
292
+ query_sql := format('
293
+ SELECT
294
+ t.name AS entity_key,
295
+ %L AS entity_type,
296
+ row_to_json(t)::jsonb AS entity_record
297
+ FROM %I t
298
+ WHERE t.user_id = $1
299
+ AND t.name = ANY($2)
300
+ AND t.deleted_at IS NULL
301
+ ', table_name, table_name);
302
+
303
+ -- Execute and return rows for this table
304
+ FOR result_record IN EXECUTE query_sql USING p_user_id, entity_keys
305
+ LOOP
306
+ entity_key := result_record.entity_key;
307
+ entity_type := result_record.entity_type;
308
+ entity_record := result_record.entity_record;
309
+ RETURN NEXT;
310
+ END LOOP;
311
+ END LOOP;
312
+ END;
313
+ $$ LANGUAGE plpgsql STABLE;
314
+
315
+ COMMENT ON FUNCTION rem_fetch IS
316
+ 'REM FETCH: Fetch full entity records (all columns as JSONB) from multiple tables. Takes JSONB mapping {table_name: [keys]}, fetches all records, returns unified result set. Use for hydrating LOOKUP, FUZZY, SEARCH, and TRAVERSE results.';
317
+
318
+ -- REM FUZZY: Fuzzy text search using pg_trgm similarity
319
+ -- Returns raw entity data as JSONB for LLM consumption
320
+ CREATE OR REPLACE FUNCTION rem_fuzzy(
321
+ p_query TEXT,
322
+ p_tenant_id VARCHAR(100),
323
+ p_threshold REAL DEFAULT 0.3,
324
+ p_limit INTEGER DEFAULT 10,
325
+ p_user_id VARCHAR(100) DEFAULT NULL
326
+ )
327
+ RETURNS TABLE(
328
+ entity_type VARCHAR(100),
329
+ similarity_score REAL,
330
+ data JSONB
331
+ ) AS $$
332
+ DECLARE
333
+ kv_matches RECORD;
334
+ entities_by_table JSONB := '{}'::jsonb;
335
+ table_keys JSONB;
336
+ BEGIN
337
+ -- First, find matching keys in KV store with similarity scores
338
+ -- Group by table to prepare for batch fetch
339
+ FOR kv_matches IN
340
+ SELECT
341
+ kv.entity_key,
342
+ kv.entity_type,
343
+ similarity(kv.entity_key, p_query) AS sim_score
344
+ FROM kv_store kv
345
+ WHERE kv.user_id = COALESCE(p_user_id, p_tenant_id)
346
+ AND kv.entity_key % p_query -- Trigram similarity operator
347
+ AND similarity(kv.entity_key, p_query) >= p_threshold
348
+ ORDER BY sim_score DESC
349
+ LIMIT p_limit
350
+ LOOP
351
+ -- Build JSONB mapping {table: [keys]}
352
+ IF entities_by_table ? kv_matches.entity_type THEN
353
+ table_keys := entities_by_table->kv_matches.entity_type;
354
+ entities_by_table := jsonb_set(
355
+ entities_by_table,
356
+ ARRAY[kv_matches.entity_type],
357
+ table_keys || jsonb_build_array(kv_matches.entity_key)
358
+ );
359
+ ELSE
360
+ entities_by_table := jsonb_set(
361
+ entities_by_table,
362
+ ARRAY[kv_matches.entity_type],
363
+ jsonb_build_array(kv_matches.entity_key)
364
+ );
365
+ END IF;
366
+ END LOOP;
367
+
368
+ -- Fetch full records using rem_fetch helper
369
+ -- Return raw entity data as JSONB for LLM consumption
370
+ -- Use p_user_id (not p_tenant_id) for actual filtering
371
+ RETURN QUERY
372
+ SELECT
373
+ f.entity_type::VARCHAR(100),
374
+ similarity(f.entity_key, p_query) AS similarity_score,
375
+ f.entity_record AS data
376
+ FROM rem_fetch(entities_by_table, COALESCE(p_user_id, p_tenant_id)) f
377
+ ORDER BY similarity_score DESC;
378
+ END;
379
+ $$ LANGUAGE plpgsql STABLE;
380
+
381
+ COMMENT ON FUNCTION rem_fuzzy IS
382
+ 'REM FUZZY query: Fuzzy text search using pg_trgm. Returns raw entity data as JSONB for LLM consumption. tenant_id parameter exists for backward compatibility but filtering uses user_id.';
383
+
384
+ -- REM TRAVERSE: Moved to 002_install_models.sql (after entity tables are created)
385
+ -- See 002_install_models.sql for the full rem_traverse function with keys_only parameter
386
+
387
+ -- REM SEARCH: Vector similarity search using embeddings
388
+ -- Joins to embeddings table for semantic search
389
+ CREATE OR REPLACE FUNCTION rem_search(
390
+ p_query_embedding vector(1536),
391
+ p_table_name VARCHAR(100),
392
+ p_field_name VARCHAR(100),
393
+ p_tenant_id VARCHAR(100),
394
+ p_provider VARCHAR(50) DEFAULT 'openai',
395
+ p_min_similarity REAL DEFAULT 0.7,
396
+ p_limit INTEGER DEFAULT 10,
397
+ p_user_id VARCHAR(100) DEFAULT NULL
398
+ )
399
+ RETURNS TABLE(
400
+ entity_type VARCHAR(100),
401
+ similarity_score REAL,
402
+ data JSONB
403
+ ) AS $$
404
+ DECLARE
405
+ embeddings_table VARCHAR(200);
406
+ source_table VARCHAR(100);
407
+ query_sql TEXT;
408
+ BEGIN
409
+ -- Construct embeddings table name
410
+ embeddings_table := 'embeddings_' || p_table_name;
411
+ source_table := p_table_name;
412
+
413
+ -- Dynamic query to join source table with embeddings table
414
+ -- Returns raw entity data as JSONB for LLM consumption
415
+ -- Note: Using inner product for OpenAI embeddings (normalized vectors)
416
+ -- Inner product <#> returns negative value, so we negate it to get [0, 1]
417
+ -- where 1 = perfect match, 0 = orthogonal
418
+ query_sql := format('
419
+ SELECT
420
+ %L::VARCHAR(100) AS entity_type,
421
+ (1.0 - (e.embedding <#> $1) * -1.0)::REAL AS similarity_score,
422
+ row_to_json(t)::jsonb AS data
423
+ FROM %I t
424
+ JOIN %I e ON e.entity_id = t.id
425
+ WHERE t.user_id = $2
426
+ AND e.field_name = $3
427
+ AND e.provider = $4
428
+ AND (1.0 - (e.embedding <#> $1) * -1.0) >= $5
429
+ AND t.deleted_at IS NULL
430
+ ORDER BY e.embedding <#> $1 DESC
431
+ LIMIT $6
432
+ ', source_table, source_table, embeddings_table);
433
+
434
+ RETURN QUERY EXECUTE query_sql
435
+ USING p_query_embedding, p_user_id, p_field_name, p_provider, p_min_similarity, p_limit;
436
+ END;
437
+ $$ LANGUAGE plpgsql STABLE;
438
+
439
+ COMMENT ON FUNCTION rem_search IS
440
+ 'REM SEARCH query: Vector similarity search using inner product for OpenAI normalized embeddings. Returns raw entity data as JSONB for LLM consumption, scoped to user_id';
441
+
442
+ -- Function to get migration status
443
+ CREATE OR REPLACE FUNCTION migration_status()
444
+ RETURNS TABLE(
445
+ migration_type TEXT,
446
+ count BIGINT,
447
+ last_applied TIMESTAMP,
448
+ total_execution_ms BIGINT
449
+ ) AS $$
450
+ BEGIN
451
+ RETURN QUERY
452
+ SELECT
453
+ type::TEXT,
454
+ COUNT(*)::BIGINT,
455
+ MAX(applied_at),
456
+ SUM(execution_time_ms)::BIGINT
457
+ FROM rem_migrations
458
+ WHERE success = TRUE
459
+ GROUP BY type
460
+ ORDER BY MAX(applied_at) DESC;
461
+ END;
462
+ $$ LANGUAGE plpgsql;
463
+
464
+ COMMENT ON FUNCTION migration_status() IS
465
+ 'Get summary of applied migrations by type';
466
+
467
+ -- ============================================================================
468
+ -- RECORD INSTALLATION
469
+ -- ============================================================================
470
+
471
+ INSERT INTO rem_migrations (name, type, version)
472
+ VALUES ('install.sql', 'install', '1.0.0')
473
+ ON CONFLICT (name) DO UPDATE
474
+ SET applied_at = CURRENT_TIMESTAMP,
475
+ applied_by = CURRENT_USER;
476
+
477
+ -- ============================================================================
478
+ -- COMPLETION
479
+ -- ============================================================================
480
+
481
+ DO $$
482
+ BEGIN
483
+ RAISE NOTICE '============================================================';
484
+ RAISE NOTICE 'REM Database Installation Complete';
485
+ RAISE NOTICE '============================================================';
486
+ RAISE NOTICE '';
487
+ RAISE NOTICE 'Extensions installed:';
488
+ RAISE NOTICE ' ✓ pgvector (vector embeddings)';
489
+ RAISE NOTICE ' ✓ pg_trgm (fuzzy text search)';
490
+ RAISE NOTICE ' ✓ uuid-ossp (UUID generation)';
491
+ RAISE NOTICE '';
492
+ RAISE NOTICE 'Infrastructure created:';
493
+ RAISE NOTICE ' ✓ rem_migrations (migration tracking)';
494
+ RAISE NOTICE ' ✓ kv_store (UNLOGGED entity cache)';
495
+ RAISE NOTICE ' ✓ Helper functions';
496
+ RAISE NOTICE '';
497
+ RAISE NOTICE 'Next steps:';
498
+ RAISE NOTICE ' 1. Generate model schema: rem schema generate --models src/rem/models/entities';
499
+ RAISE NOTICE ' 2. Apply model schema: rem db migrate';
500
+ RAISE NOTICE '';
501
+ RAISE NOTICE 'Status: SELECT * FROM migration_status();';
502
+ RAISE NOTICE '============================================================';
503
+ END $$;