remdb 0.3.0__py3-none-any.whl → 0.3.127__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of remdb might be problematic. Click here for more details.
- rem/__init__.py +129 -2
- rem/agentic/README.md +76 -0
- rem/agentic/__init__.py +15 -0
- rem/agentic/agents/__init__.py +16 -2
- rem/agentic/agents/sse_simulator.py +502 -0
- rem/agentic/context.py +51 -25
- rem/agentic/llm_provider_models.py +301 -0
- rem/agentic/mcp/tool_wrapper.py +29 -3
- rem/agentic/otel/setup.py +93 -4
- rem/agentic/providers/phoenix.py +32 -43
- rem/agentic/providers/pydantic_ai.py +168 -24
- rem/agentic/schema.py +358 -21
- rem/agentic/tools/rem_tools.py +3 -3
- rem/api/README.md +238 -1
- rem/api/deps.py +255 -0
- rem/api/main.py +154 -37
- rem/api/mcp_router/resources.py +1 -1
- rem/api/mcp_router/server.py +26 -5
- rem/api/mcp_router/tools.py +465 -7
- rem/api/middleware/tracking.py +172 -0
- rem/api/routers/admin.py +494 -0
- rem/api/routers/auth.py +124 -0
- rem/api/routers/chat/completions.py +402 -20
- rem/api/routers/chat/models.py +88 -10
- rem/api/routers/chat/otel_utils.py +33 -0
- rem/api/routers/chat/sse_events.py +542 -0
- rem/api/routers/chat/streaming.py +642 -45
- rem/api/routers/dev.py +81 -0
- rem/api/routers/feedback.py +268 -0
- rem/api/routers/messages.py +473 -0
- rem/api/routers/models.py +78 -0
- rem/api/routers/query.py +360 -0
- rem/api/routers/shared_sessions.py +406 -0
- rem/auth/middleware.py +126 -27
- rem/cli/commands/README.md +237 -64
- rem/cli/commands/ask.py +13 -10
- rem/cli/commands/cluster.py +1808 -0
- rem/cli/commands/configure.py +5 -6
- rem/cli/commands/db.py +396 -139
- rem/cli/commands/experiments.py +293 -73
- rem/cli/commands/process.py +22 -15
- rem/cli/commands/scaffold.py +47 -0
- rem/cli/commands/schema.py +97 -50
- rem/cli/main.py +29 -6
- rem/config.py +10 -3
- rem/models/core/core_model.py +7 -1
- rem/models/core/rem_query.py +5 -2
- rem/models/entities/__init__.py +21 -0
- rem/models/entities/domain_resource.py +38 -0
- rem/models/entities/feedback.py +123 -0
- rem/models/entities/message.py +30 -1
- rem/models/entities/session.py +83 -0
- rem/models/entities/shared_session.py +180 -0
- rem/models/entities/user.py +10 -3
- rem/registry.py +373 -0
- rem/schemas/agents/rem.yaml +7 -3
- rem/services/content/providers.py +94 -140
- rem/services/content/service.py +92 -20
- rem/services/dreaming/affinity_service.py +2 -16
- rem/services/dreaming/moment_service.py +2 -15
- rem/services/embeddings/api.py +24 -17
- rem/services/embeddings/worker.py +16 -16
- rem/services/phoenix/EXPERIMENT_DESIGN.md +3 -3
- rem/services/phoenix/client.py +302 -28
- rem/services/postgres/README.md +159 -15
- rem/services/postgres/__init__.py +2 -1
- rem/services/postgres/diff_service.py +531 -0
- rem/services/postgres/pydantic_to_sqlalchemy.py +427 -129
- rem/services/postgres/repository.py +132 -0
- rem/services/postgres/schema_generator.py +291 -9
- rem/services/postgres/service.py +6 -6
- rem/services/rate_limit.py +113 -0
- rem/services/rem/README.md +14 -0
- rem/services/rem/parser.py +44 -9
- rem/services/rem/service.py +36 -2
- rem/services/session/compression.py +24 -1
- rem/services/session/reload.py +1 -1
- rem/services/user_service.py +98 -0
- rem/settings.py +313 -29
- rem/sql/background_indexes.sql +21 -16
- rem/sql/migrations/001_install.sql +387 -54
- rem/sql/migrations/002_install_models.sql +2320 -393
- rem/sql/migrations/003_optional_extensions.sql +326 -0
- rem/sql/migrations/004_cache_system.sql +548 -0
- rem/utils/__init__.py +18 -0
- rem/utils/constants.py +97 -0
- rem/utils/date_utils.py +228 -0
- rem/utils/embeddings.py +17 -4
- rem/utils/files.py +167 -0
- rem/utils/mime_types.py +158 -0
- rem/utils/model_helpers.py +156 -1
- rem/utils/schema_loader.py +282 -35
- rem/utils/sql_paths.py +146 -0
- rem/utils/sql_types.py +3 -1
- rem/utils/vision.py +9 -14
- rem/workers/README.md +14 -14
- rem/workers/__init__.py +3 -1
- rem/workers/db_listener.py +579 -0
- rem/workers/db_maintainer.py +74 -0
- rem/workers/unlogged_maintainer.py +463 -0
- {remdb-0.3.0.dist-info → remdb-0.3.127.dist-info}/METADATA +464 -289
- {remdb-0.3.0.dist-info → remdb-0.3.127.dist-info}/RECORD +104 -73
- {remdb-0.3.0.dist-info → remdb-0.3.127.dist-info}/WHEEL +1 -1
- rem/sql/002_install_models.sql +0 -1068
- rem/sql/install_models.sql +0 -1038
- {remdb-0.3.0.dist-info → remdb-0.3.127.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,548 @@
|
|
|
1
|
+
-- REM Cache System
|
|
2
|
+
-- Description: Self-healing cache for UNLOGGED tables (kv_store)
|
|
3
|
+
-- Version: 1.0.0
|
|
4
|
+
-- Date: 2025-11-29
|
|
5
|
+
--
|
|
6
|
+
-- This migration adds:
|
|
7
|
+
-- 1. cache_system_state table for debouncing and API secret storage
|
|
8
|
+
-- 2. maybe_trigger_kv_rebuild() function for async rebuild triggering
|
|
9
|
+
-- 3. Updated rem_lookup/fuzzy/traverse with self-healing on empty cache
|
|
10
|
+
--
|
|
11
|
+
-- Self-Healing Flow:
|
|
12
|
+
-- Query returns 0 results → Check if kv_store empty → Trigger async rebuild
|
|
13
|
+
-- Priority: pg_net (if available) → dblink (always available)
|
|
14
|
+
|
|
15
|
+
-- ============================================================================
|
|
16
|
+
-- REQUIRED EXTENSION
|
|
17
|
+
-- ============================================================================
|
|
18
|
+
-- pgcrypto is needed for gen_random_bytes() to generate API secrets
|
|
19
|
+
CREATE EXTENSION IF NOT EXISTS pgcrypto;
|
|
20
|
+
|
|
21
|
+
-- ============================================================================
|
|
22
|
+
-- CACHE SYSTEM STATE TABLE
|
|
23
|
+
-- ============================================================================
|
|
24
|
+
-- Stores:
|
|
25
|
+
-- - Last rebuild trigger timestamp (for debouncing)
|
|
26
|
+
-- - API secret for internal endpoint authentication
|
|
27
|
+
-- - Rebuild statistics
|
|
28
|
+
|
|
29
|
+
CREATE TABLE IF NOT EXISTS cache_system_state (
|
|
30
|
+
id INTEGER PRIMARY KEY DEFAULT 1 CHECK (id = 1), -- Single row table
|
|
31
|
+
api_secret TEXT NOT NULL, -- Secret for internal API auth
|
|
32
|
+
last_triggered_at TIMESTAMPTZ, -- Debounce: last trigger time
|
|
33
|
+
last_rebuild_at TIMESTAMPTZ, -- Last successful rebuild
|
|
34
|
+
triggered_by TEXT, -- What triggered last rebuild
|
|
35
|
+
trigger_count INTEGER DEFAULT 0, -- Total trigger count
|
|
36
|
+
rebuild_count INTEGER DEFAULT 0, -- Total successful rebuilds
|
|
37
|
+
created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP,
|
|
38
|
+
updated_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP
|
|
39
|
+
);
|
|
40
|
+
|
|
41
|
+
-- Generate initial secret if table is empty
|
|
42
|
+
INSERT INTO cache_system_state (id, api_secret)
|
|
43
|
+
SELECT 1, encode(gen_random_bytes(32), 'hex')
|
|
44
|
+
WHERE NOT EXISTS (SELECT 1 FROM cache_system_state WHERE id = 1);
|
|
45
|
+
|
|
46
|
+
COMMENT ON TABLE cache_system_state IS
|
|
47
|
+
'Single-row table storing cache system state: API secret for internal auth and debounce tracking';
|
|
48
|
+
|
|
49
|
+
-- ============================================================================
|
|
50
|
+
-- HELPER: Check if extension exists
|
|
51
|
+
-- ============================================================================
|
|
52
|
+
|
|
53
|
+
CREATE OR REPLACE FUNCTION rem_extension_exists(p_extension TEXT)
|
|
54
|
+
RETURNS BOOLEAN AS $$
|
|
55
|
+
BEGIN
|
|
56
|
+
RETURN EXISTS (SELECT 1 FROM pg_extension WHERE extname = p_extension);
|
|
57
|
+
END;
|
|
58
|
+
$$ LANGUAGE plpgsql STABLE;
|
|
59
|
+
|
|
60
|
+
-- ============================================================================
|
|
61
|
+
-- HELPER: Check if kv_store is empty for user
|
|
62
|
+
-- ============================================================================
|
|
63
|
+
|
|
64
|
+
CREATE OR REPLACE FUNCTION rem_kv_store_empty(p_user_id TEXT)
|
|
65
|
+
RETURNS BOOLEAN AS $$
|
|
66
|
+
BEGIN
|
|
67
|
+
-- Quick existence check - very fast with index
|
|
68
|
+
RETURN NOT EXISTS (
|
|
69
|
+
SELECT 1 FROM kv_store
|
|
70
|
+
WHERE user_id = p_user_id
|
|
71
|
+
LIMIT 1
|
|
72
|
+
);
|
|
73
|
+
END;
|
|
74
|
+
$$ LANGUAGE plpgsql STABLE;
|
|
75
|
+
|
|
76
|
+
-- ============================================================================
|
|
77
|
+
-- MAIN: Maybe trigger KV rebuild (async, non-blocking)
|
|
78
|
+
-- ============================================================================
|
|
79
|
+
-- Called when a query returns 0 results and kv_store appears empty.
|
|
80
|
+
-- Uses pg_net (if available) to call API, falls back to dblink.
|
|
81
|
+
-- Includes debouncing to prevent request storms.
|
|
82
|
+
|
|
83
|
+
CREATE OR REPLACE FUNCTION maybe_trigger_kv_rebuild(
|
|
84
|
+
p_user_id TEXT,
|
|
85
|
+
p_triggered_by TEXT DEFAULT 'query'
|
|
86
|
+
)
|
|
87
|
+
RETURNS VOID AS $$
|
|
88
|
+
DECLARE
|
|
89
|
+
v_has_pgnet BOOLEAN;
|
|
90
|
+
v_has_dblink BOOLEAN;
|
|
91
|
+
v_last_trigger TIMESTAMPTZ;
|
|
92
|
+
v_api_secret TEXT;
|
|
93
|
+
v_debounce_seconds CONSTANT INTEGER := 30;
|
|
94
|
+
v_api_url TEXT := 'http://rem-api.rem.svc.cluster.local:8000/api/admin/internal/rebuild-kv';
|
|
95
|
+
v_request_id BIGINT;
|
|
96
|
+
BEGIN
|
|
97
|
+
-- Quick check: is kv_store actually empty for this user?
|
|
98
|
+
IF NOT rem_kv_store_empty(p_user_id) THEN
|
|
99
|
+
RETURN; -- Cache has data, nothing to do
|
|
100
|
+
END IF;
|
|
101
|
+
|
|
102
|
+
-- Try to acquire advisory lock (non-blocking, transaction-scoped)
|
|
103
|
+
-- This prevents multiple concurrent triggers
|
|
104
|
+
IF NOT pg_try_advisory_xact_lock(2147483646) THEN
|
|
105
|
+
RETURN; -- Another session is handling it
|
|
106
|
+
END IF;
|
|
107
|
+
|
|
108
|
+
-- Check debounce: was rebuild triggered recently?
|
|
109
|
+
SELECT last_triggered_at, api_secret
|
|
110
|
+
INTO v_last_trigger, v_api_secret
|
|
111
|
+
FROM cache_system_state
|
|
112
|
+
WHERE id = 1;
|
|
113
|
+
|
|
114
|
+
IF v_last_trigger IS NOT NULL
|
|
115
|
+
AND v_last_trigger > (CURRENT_TIMESTAMP - (v_debounce_seconds || ' seconds')::INTERVAL) THEN
|
|
116
|
+
RETURN; -- Triggered recently, skip
|
|
117
|
+
END IF;
|
|
118
|
+
|
|
119
|
+
-- Update state (so concurrent callers see it)
|
|
120
|
+
UPDATE cache_system_state
|
|
121
|
+
SET last_triggered_at = CURRENT_TIMESTAMP,
|
|
122
|
+
triggered_by = p_triggered_by,
|
|
123
|
+
trigger_count = trigger_count + 1,
|
|
124
|
+
updated_at = CURRENT_TIMESTAMP
|
|
125
|
+
WHERE id = 1;
|
|
126
|
+
|
|
127
|
+
-- Check available extensions
|
|
128
|
+
v_has_pgnet := rem_extension_exists('pg_net');
|
|
129
|
+
v_has_dblink := rem_extension_exists('dblink');
|
|
130
|
+
|
|
131
|
+
-- Priority 1: pg_net (async HTTP to API - supports S3 restore)
|
|
132
|
+
IF v_has_pgnet THEN
|
|
133
|
+
BEGIN
|
|
134
|
+
SELECT net.http_post(
|
|
135
|
+
url := v_api_url,
|
|
136
|
+
headers := jsonb_build_object(
|
|
137
|
+
'Content-Type', 'application/json',
|
|
138
|
+
'X-Internal-Secret', v_api_secret
|
|
139
|
+
),
|
|
140
|
+
body := jsonb_build_object(
|
|
141
|
+
'user_id', p_user_id,
|
|
142
|
+
'triggered_by', 'pg_net_' || p_triggered_by,
|
|
143
|
+
'timestamp', CURRENT_TIMESTAMP
|
|
144
|
+
)
|
|
145
|
+
) INTO v_request_id;
|
|
146
|
+
|
|
147
|
+
RAISE DEBUG 'kv_rebuild triggered via pg_net (request_id: %)', v_request_id;
|
|
148
|
+
RETURN;
|
|
149
|
+
EXCEPTION WHEN OTHERS THEN
|
|
150
|
+
RAISE WARNING 'pg_net trigger failed: %, falling back to dblink', SQLERRM;
|
|
151
|
+
END;
|
|
152
|
+
END IF;
|
|
153
|
+
|
|
154
|
+
-- Priority 2: dblink (async SQL - direct rebuild)
|
|
155
|
+
IF v_has_dblink THEN
|
|
156
|
+
BEGIN
|
|
157
|
+
-- Connect to self (same database)
|
|
158
|
+
PERFORM dblink_connect(
|
|
159
|
+
'kv_rebuild_conn',
|
|
160
|
+
format('dbname=%s', current_database())
|
|
161
|
+
);
|
|
162
|
+
|
|
163
|
+
-- Send async query (returns immediately)
|
|
164
|
+
PERFORM dblink_send_query(
|
|
165
|
+
'kv_rebuild_conn',
|
|
166
|
+
'SELECT rebuild_kv_store()'
|
|
167
|
+
);
|
|
168
|
+
|
|
169
|
+
-- Don't disconnect - query continues in background
|
|
170
|
+
-- Connection auto-closes when session ends
|
|
171
|
+
|
|
172
|
+
RAISE DEBUG 'kv_rebuild triggered via dblink';
|
|
173
|
+
RETURN;
|
|
174
|
+
EXCEPTION WHEN OTHERS THEN
|
|
175
|
+
-- Clean up failed connection
|
|
176
|
+
BEGIN
|
|
177
|
+
PERFORM dblink_disconnect('kv_rebuild_conn');
|
|
178
|
+
EXCEPTION WHEN OTHERS THEN
|
|
179
|
+
NULL;
|
|
180
|
+
END;
|
|
181
|
+
RAISE WARNING 'dblink trigger failed: %', SQLERRM;
|
|
182
|
+
END;
|
|
183
|
+
END IF;
|
|
184
|
+
|
|
185
|
+
-- No async method available - log warning but don't block query
|
|
186
|
+
RAISE WARNING 'No async rebuild method available (pg_net or dblink). Cache rebuild skipped.';
|
|
187
|
+
|
|
188
|
+
EXCEPTION WHEN OTHERS THEN
|
|
189
|
+
-- Never fail the calling query
|
|
190
|
+
RAISE WARNING 'maybe_trigger_kv_rebuild failed: %', SQLERRM;
|
|
191
|
+
END;
|
|
192
|
+
$$ LANGUAGE plpgsql;
|
|
193
|
+
|
|
194
|
+
COMMENT ON FUNCTION maybe_trigger_kv_rebuild IS
|
|
195
|
+
'Async trigger for kv_store rebuild. Uses pg_net (API) or dblink (SQL). Includes debouncing.';
|
|
196
|
+
|
|
197
|
+
-- ============================================================================
|
|
198
|
+
-- UPDATED: rem_lookup with self-healing
|
|
199
|
+
-- ============================================================================
|
|
200
|
+
|
|
201
|
+
CREATE OR REPLACE FUNCTION rem_lookup(
|
|
202
|
+
p_entity_key VARCHAR(255),
|
|
203
|
+
p_tenant_id VARCHAR(100),
|
|
204
|
+
p_user_id VARCHAR(100)
|
|
205
|
+
)
|
|
206
|
+
RETURNS TABLE(
|
|
207
|
+
entity_type VARCHAR(100),
|
|
208
|
+
data JSONB
|
|
209
|
+
) AS $$
|
|
210
|
+
DECLARE
|
|
211
|
+
entity_table VARCHAR(100);
|
|
212
|
+
query_sql TEXT;
|
|
213
|
+
effective_user_id VARCHAR(100);
|
|
214
|
+
v_result_count INTEGER := 0;
|
|
215
|
+
BEGIN
|
|
216
|
+
effective_user_id := COALESCE(p_user_id, p_tenant_id);
|
|
217
|
+
|
|
218
|
+
-- First lookup in KV store to get entity_type (table name)
|
|
219
|
+
SELECT kv.entity_type INTO entity_table
|
|
220
|
+
FROM kv_store kv
|
|
221
|
+
WHERE (kv.user_id = effective_user_id OR kv.user_id IS NULL)
|
|
222
|
+
AND kv.entity_key = p_entity_key
|
|
223
|
+
LIMIT 1;
|
|
224
|
+
|
|
225
|
+
-- If not found, check if cache is empty and maybe trigger rebuild
|
|
226
|
+
IF entity_table IS NULL THEN
|
|
227
|
+
-- SELF-HEALING: Check if this is because cache is empty
|
|
228
|
+
IF rem_kv_store_empty(effective_user_id) THEN
|
|
229
|
+
PERFORM maybe_trigger_kv_rebuild(effective_user_id, 'rem_lookup');
|
|
230
|
+
END IF;
|
|
231
|
+
RETURN;
|
|
232
|
+
END IF;
|
|
233
|
+
|
|
234
|
+
-- Fetch raw record from underlying table as JSONB
|
|
235
|
+
query_sql := format('
|
|
236
|
+
SELECT
|
|
237
|
+
%L::VARCHAR(100) AS entity_type,
|
|
238
|
+
row_to_json(t)::jsonb AS data
|
|
239
|
+
FROM %I t
|
|
240
|
+
WHERE (t.user_id = $1 OR t.user_id IS NULL)
|
|
241
|
+
AND t.name = $2
|
|
242
|
+
AND t.deleted_at IS NULL
|
|
243
|
+
', entity_table, entity_table);
|
|
244
|
+
|
|
245
|
+
RETURN QUERY EXECUTE query_sql USING effective_user_id, p_entity_key;
|
|
246
|
+
END;
|
|
247
|
+
$$ LANGUAGE plpgsql STABLE;
|
|
248
|
+
|
|
249
|
+
-- ============================================================================
|
|
250
|
+
-- UPDATED: rem_fuzzy with self-healing
|
|
251
|
+
-- ============================================================================
|
|
252
|
+
|
|
253
|
+
CREATE OR REPLACE FUNCTION rem_fuzzy(
|
|
254
|
+
p_query TEXT,
|
|
255
|
+
p_tenant_id VARCHAR(100),
|
|
256
|
+
p_threshold REAL DEFAULT 0.3,
|
|
257
|
+
p_limit INTEGER DEFAULT 10,
|
|
258
|
+
p_user_id VARCHAR(100) DEFAULT NULL
|
|
259
|
+
)
|
|
260
|
+
RETURNS TABLE(
|
|
261
|
+
entity_type VARCHAR(100),
|
|
262
|
+
similarity_score REAL,
|
|
263
|
+
data JSONB
|
|
264
|
+
) AS $$
|
|
265
|
+
DECLARE
|
|
266
|
+
kv_matches RECORD;
|
|
267
|
+
entities_by_table JSONB := '{}'::jsonb;
|
|
268
|
+
table_keys JSONB;
|
|
269
|
+
effective_user_id VARCHAR(100);
|
|
270
|
+
v_found_any BOOLEAN := FALSE;
|
|
271
|
+
BEGIN
|
|
272
|
+
effective_user_id := COALESCE(p_user_id, p_tenant_id);
|
|
273
|
+
|
|
274
|
+
-- Find matching keys in KV store
|
|
275
|
+
FOR kv_matches IN
|
|
276
|
+
SELECT
|
|
277
|
+
kv.entity_key,
|
|
278
|
+
kv.entity_type,
|
|
279
|
+
similarity(kv.entity_key, p_query) AS sim_score
|
|
280
|
+
FROM kv_store kv
|
|
281
|
+
WHERE (kv.user_id = effective_user_id OR kv.user_id IS NULL)
|
|
282
|
+
AND kv.entity_key % p_query
|
|
283
|
+
AND similarity(kv.entity_key, p_query) >= p_threshold
|
|
284
|
+
ORDER BY sim_score DESC
|
|
285
|
+
LIMIT p_limit
|
|
286
|
+
LOOP
|
|
287
|
+
v_found_any := TRUE;
|
|
288
|
+
-- Build JSONB mapping {table: [keys]}
|
|
289
|
+
IF entities_by_table ? kv_matches.entity_type THEN
|
|
290
|
+
table_keys := entities_by_table->kv_matches.entity_type;
|
|
291
|
+
entities_by_table := jsonb_set(
|
|
292
|
+
entities_by_table,
|
|
293
|
+
ARRAY[kv_matches.entity_type],
|
|
294
|
+
table_keys || jsonb_build_array(kv_matches.entity_key)
|
|
295
|
+
);
|
|
296
|
+
ELSE
|
|
297
|
+
entities_by_table := jsonb_set(
|
|
298
|
+
entities_by_table,
|
|
299
|
+
ARRAY[kv_matches.entity_type],
|
|
300
|
+
jsonb_build_array(kv_matches.entity_key)
|
|
301
|
+
);
|
|
302
|
+
END IF;
|
|
303
|
+
END LOOP;
|
|
304
|
+
|
|
305
|
+
-- SELF-HEALING: If no matches and cache is empty, trigger rebuild
|
|
306
|
+
IF NOT v_found_any AND rem_kv_store_empty(effective_user_id) THEN
|
|
307
|
+
PERFORM maybe_trigger_kv_rebuild(effective_user_id, 'rem_fuzzy');
|
|
308
|
+
END IF;
|
|
309
|
+
|
|
310
|
+
-- Fetch full records
|
|
311
|
+
RETURN QUERY
|
|
312
|
+
SELECT
|
|
313
|
+
f.entity_type::VARCHAR(100),
|
|
314
|
+
similarity(f.entity_key, p_query) AS similarity_score,
|
|
315
|
+
f.entity_record AS data
|
|
316
|
+
FROM rem_fetch(entities_by_table, effective_user_id) f
|
|
317
|
+
ORDER BY similarity_score DESC;
|
|
318
|
+
END;
|
|
319
|
+
$$ LANGUAGE plpgsql STABLE;
|
|
320
|
+
|
|
321
|
+
-- ============================================================================
|
|
322
|
+
-- UPDATED: rem_traverse with self-healing
|
|
323
|
+
-- ============================================================================
|
|
324
|
+
|
|
325
|
+
CREATE OR REPLACE FUNCTION rem_traverse(
|
|
326
|
+
p_entity_key VARCHAR(255),
|
|
327
|
+
p_tenant_id VARCHAR(100),
|
|
328
|
+
p_user_id VARCHAR(100),
|
|
329
|
+
p_max_depth INTEGER DEFAULT 1,
|
|
330
|
+
p_rel_type VARCHAR(100) DEFAULT NULL,
|
|
331
|
+
p_keys_only BOOLEAN DEFAULT FALSE
|
|
332
|
+
)
|
|
333
|
+
RETURNS TABLE(
|
|
334
|
+
depth INTEGER,
|
|
335
|
+
entity_key VARCHAR(255),
|
|
336
|
+
entity_type VARCHAR(100),
|
|
337
|
+
entity_id UUID,
|
|
338
|
+
rel_type VARCHAR(100),
|
|
339
|
+
rel_weight REAL,
|
|
340
|
+
path TEXT[],
|
|
341
|
+
entity_record JSONB
|
|
342
|
+
) AS $$
|
|
343
|
+
DECLARE
|
|
344
|
+
graph_keys RECORD;
|
|
345
|
+
entities_by_table JSONB := '{}'::jsonb;
|
|
346
|
+
table_keys JSONB;
|
|
347
|
+
effective_user_id VARCHAR(100);
|
|
348
|
+
v_found_start BOOLEAN := FALSE;
|
|
349
|
+
BEGIN
|
|
350
|
+
effective_user_id := COALESCE(p_user_id, p_tenant_id);
|
|
351
|
+
|
|
352
|
+
-- Check if start entity exists in kv_store
|
|
353
|
+
SELECT TRUE INTO v_found_start
|
|
354
|
+
FROM kv_store kv
|
|
355
|
+
WHERE (kv.user_id = effective_user_id OR kv.user_id IS NULL)
|
|
356
|
+
AND kv.entity_key = p_entity_key
|
|
357
|
+
LIMIT 1;
|
|
358
|
+
|
|
359
|
+
-- SELF-HEALING: If start not found and cache is empty, trigger rebuild
|
|
360
|
+
IF NOT COALESCE(v_found_start, FALSE) THEN
|
|
361
|
+
IF rem_kv_store_empty(effective_user_id) THEN
|
|
362
|
+
PERFORM maybe_trigger_kv_rebuild(effective_user_id, 'rem_traverse');
|
|
363
|
+
END IF;
|
|
364
|
+
RETURN;
|
|
365
|
+
END IF;
|
|
366
|
+
|
|
367
|
+
-- Original traverse logic
|
|
368
|
+
FOR graph_keys IN
|
|
369
|
+
WITH RECURSIVE graph_traversal AS (
|
|
370
|
+
SELECT
|
|
371
|
+
0 AS depth,
|
|
372
|
+
kv.entity_key,
|
|
373
|
+
kv.entity_type,
|
|
374
|
+
kv.entity_id,
|
|
375
|
+
NULL::VARCHAR(100) AS rel_type,
|
|
376
|
+
NULL::REAL AS rel_weight,
|
|
377
|
+
ARRAY[kv.entity_key]::TEXT[] AS path
|
|
378
|
+
FROM kv_store kv
|
|
379
|
+
WHERE (kv.user_id = effective_user_id OR kv.user_id IS NULL)
|
|
380
|
+
AND kv.entity_key = p_entity_key
|
|
381
|
+
|
|
382
|
+
UNION ALL
|
|
383
|
+
|
|
384
|
+
SELECT
|
|
385
|
+
gt.depth + 1,
|
|
386
|
+
target_kv.entity_key,
|
|
387
|
+
target_kv.entity_type,
|
|
388
|
+
target_kv.entity_id,
|
|
389
|
+
(edge->>'rel_type')::VARCHAR(100) AS rel_type,
|
|
390
|
+
COALESCE((edge->>'weight')::REAL, 1.0) AS rel_weight,
|
|
391
|
+
gt.path || target_kv.entity_key AS path
|
|
392
|
+
FROM graph_traversal gt
|
|
393
|
+
JOIN kv_store source_kv ON source_kv.entity_key = gt.entity_key
|
|
394
|
+
AND (source_kv.user_id = effective_user_id OR source_kv.user_id IS NULL)
|
|
395
|
+
CROSS JOIN LATERAL jsonb_array_elements(COALESCE(source_kv.graph_edges, '[]'::jsonb)) AS edge
|
|
396
|
+
JOIN kv_store target_kv ON target_kv.entity_key = (edge->>'dst')::VARCHAR(255)
|
|
397
|
+
AND (target_kv.user_id = effective_user_id OR target_kv.user_id IS NULL)
|
|
398
|
+
WHERE gt.depth < p_max_depth
|
|
399
|
+
AND (p_rel_type IS NULL OR (edge->>'rel_type')::VARCHAR(100) = p_rel_type)
|
|
400
|
+
AND NOT (target_kv.entity_key = ANY(gt.path))
|
|
401
|
+
)
|
|
402
|
+
SELECT DISTINCT ON (gt.entity_key)
|
|
403
|
+
gt.depth,
|
|
404
|
+
gt.entity_key,
|
|
405
|
+
gt.entity_type,
|
|
406
|
+
gt.entity_id,
|
|
407
|
+
gt.rel_type,
|
|
408
|
+
gt.rel_weight,
|
|
409
|
+
gt.path
|
|
410
|
+
FROM graph_traversal gt
|
|
411
|
+
WHERE gt.depth > 0
|
|
412
|
+
ORDER BY gt.entity_key, gt.depth
|
|
413
|
+
LOOP
|
|
414
|
+
IF p_keys_only THEN
|
|
415
|
+
depth := graph_keys.depth;
|
|
416
|
+
entity_key := graph_keys.entity_key;
|
|
417
|
+
entity_type := graph_keys.entity_type;
|
|
418
|
+
entity_id := graph_keys.entity_id;
|
|
419
|
+
rel_type := graph_keys.rel_type;
|
|
420
|
+
rel_weight := graph_keys.rel_weight;
|
|
421
|
+
path := graph_keys.path;
|
|
422
|
+
entity_record := NULL;
|
|
423
|
+
RETURN NEXT;
|
|
424
|
+
ELSE
|
|
425
|
+
IF entities_by_table ? graph_keys.entity_type THEN
|
|
426
|
+
table_keys := entities_by_table->graph_keys.entity_type;
|
|
427
|
+
entities_by_table := jsonb_set(
|
|
428
|
+
entities_by_table,
|
|
429
|
+
ARRAY[graph_keys.entity_type],
|
|
430
|
+
table_keys || jsonb_build_array(graph_keys.entity_key)
|
|
431
|
+
);
|
|
432
|
+
ELSE
|
|
433
|
+
entities_by_table := jsonb_set(
|
|
434
|
+
entities_by_table,
|
|
435
|
+
ARRAY[graph_keys.entity_type],
|
|
436
|
+
jsonb_build_array(graph_keys.entity_key)
|
|
437
|
+
);
|
|
438
|
+
END IF;
|
|
439
|
+
END IF;
|
|
440
|
+
END LOOP;
|
|
441
|
+
|
|
442
|
+
IF NOT p_keys_only THEN
|
|
443
|
+
RETURN QUERY
|
|
444
|
+
SELECT
|
|
445
|
+
g.depth,
|
|
446
|
+
g.entity_key,
|
|
447
|
+
g.entity_type,
|
|
448
|
+
g.entity_id,
|
|
449
|
+
g.rel_type,
|
|
450
|
+
g.rel_weight,
|
|
451
|
+
g.path,
|
|
452
|
+
f.entity_record
|
|
453
|
+
FROM (
|
|
454
|
+
SELECT * FROM rem_traverse(p_entity_key, p_tenant_id, effective_user_id, p_max_depth, p_rel_type, TRUE)
|
|
455
|
+
) g
|
|
456
|
+
LEFT JOIN rem_fetch(entities_by_table, effective_user_id) f
|
|
457
|
+
ON g.entity_key = f.entity_key;
|
|
458
|
+
END IF;
|
|
459
|
+
END;
|
|
460
|
+
$$ LANGUAGE plpgsql STABLE;
|
|
461
|
+
|
|
462
|
+
-- ============================================================================
|
|
463
|
+
-- HELPER: Get API secret for validation
|
|
464
|
+
-- ============================================================================
|
|
465
|
+
|
|
466
|
+
CREATE OR REPLACE FUNCTION rem_get_cache_api_secret()
|
|
467
|
+
RETURNS TEXT AS $$
|
|
468
|
+
DECLARE
|
|
469
|
+
v_secret TEXT;
|
|
470
|
+
BEGIN
|
|
471
|
+
SELECT api_secret INTO v_secret FROM cache_system_state WHERE id = 1;
|
|
472
|
+
RETURN v_secret;
|
|
473
|
+
END;
|
|
474
|
+
$$ LANGUAGE plpgsql STABLE SECURITY DEFINER;
|
|
475
|
+
|
|
476
|
+
-- Only allow rem user to execute
|
|
477
|
+
REVOKE ALL ON FUNCTION rem_get_cache_api_secret() FROM PUBLIC;
|
|
478
|
+
|
|
479
|
+
-- ============================================================================
|
|
480
|
+
-- HELPER: Record successful rebuild
|
|
481
|
+
-- ============================================================================
|
|
482
|
+
|
|
483
|
+
CREATE OR REPLACE FUNCTION rem_record_cache_rebuild(p_triggered_by TEXT DEFAULT 'api')
|
|
484
|
+
RETURNS VOID AS $$
|
|
485
|
+
BEGIN
|
|
486
|
+
UPDATE cache_system_state
|
|
487
|
+
SET last_rebuild_at = CURRENT_TIMESTAMP,
|
|
488
|
+
rebuild_count = rebuild_count + 1,
|
|
489
|
+
updated_at = CURRENT_TIMESTAMP
|
|
490
|
+
WHERE id = 1;
|
|
491
|
+
END;
|
|
492
|
+
$$ LANGUAGE plpgsql;
|
|
493
|
+
|
|
494
|
+
-- ============================================================================
|
|
495
|
+
-- RECORD INSTALLATION
|
|
496
|
+
-- ============================================================================
|
|
497
|
+
|
|
498
|
+
DO $$
|
|
499
|
+
BEGIN
|
|
500
|
+
IF EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'rem_migrations') THEN
|
|
501
|
+
INSERT INTO rem_migrations (name, type, version)
|
|
502
|
+
VALUES ('004_cache_system.sql', 'install', '1.0.0')
|
|
503
|
+
ON CONFLICT (name) DO UPDATE
|
|
504
|
+
SET applied_at = CURRENT_TIMESTAMP,
|
|
505
|
+
applied_by = CURRENT_USER;
|
|
506
|
+
END IF;
|
|
507
|
+
END $$;
|
|
508
|
+
|
|
509
|
+
-- ============================================================================
|
|
510
|
+
-- COMPLETION
|
|
511
|
+
-- ============================================================================
|
|
512
|
+
|
|
513
|
+
DO $$
|
|
514
|
+
DECLARE
|
|
515
|
+
v_has_pgnet BOOLEAN;
|
|
516
|
+
v_has_dblink BOOLEAN;
|
|
517
|
+
BEGIN
|
|
518
|
+
v_has_pgnet := rem_extension_exists('pg_net');
|
|
519
|
+
v_has_dblink := rem_extension_exists('dblink');
|
|
520
|
+
|
|
521
|
+
RAISE NOTICE '============================================================';
|
|
522
|
+
RAISE NOTICE 'Cache System Installation Complete';
|
|
523
|
+
RAISE NOTICE '============================================================';
|
|
524
|
+
RAISE NOTICE '';
|
|
525
|
+
RAISE NOTICE 'Tables:';
|
|
526
|
+
RAISE NOTICE ' cache_system_state - Debounce tracking and API secret';
|
|
527
|
+
RAISE NOTICE '';
|
|
528
|
+
RAISE NOTICE 'Functions:';
|
|
529
|
+
RAISE NOTICE ' maybe_trigger_kv_rebuild() - Async rebuild trigger';
|
|
530
|
+
RAISE NOTICE ' rem_lookup() - Updated with self-healing';
|
|
531
|
+
RAISE NOTICE ' rem_fuzzy() - Updated with self-healing';
|
|
532
|
+
RAISE NOTICE ' rem_traverse() - Updated with self-healing';
|
|
533
|
+
RAISE NOTICE '';
|
|
534
|
+
RAISE NOTICE 'Async Methods Available:';
|
|
535
|
+
IF v_has_pgnet THEN
|
|
536
|
+
RAISE NOTICE ' [x] pg_net - HTTP POST to API (preferred)';
|
|
537
|
+
ELSE
|
|
538
|
+
RAISE NOTICE ' [ ] pg_net - Not installed';
|
|
539
|
+
END IF;
|
|
540
|
+
IF v_has_dblink THEN
|
|
541
|
+
RAISE NOTICE ' [x] dblink - Async SQL (fallback)';
|
|
542
|
+
ELSE
|
|
543
|
+
RAISE NOTICE ' [ ] dblink - Not installed';
|
|
544
|
+
END IF;
|
|
545
|
+
RAISE NOTICE '';
|
|
546
|
+
RAISE NOTICE 'Self-Healing: Queries will auto-trigger rebuild on empty cache';
|
|
547
|
+
RAISE NOTICE '============================================================';
|
|
548
|
+
END $$;
|
rem/utils/__init__.py
CHANGED
|
@@ -5,6 +5,7 @@ Utility functions and helpers for the REM system:
|
|
|
5
5
|
- sql_types: Pydantic to PostgreSQL type mapping
|
|
6
6
|
- embeddings: Vector embeddings generation using requests library
|
|
7
7
|
- user_id: Deterministic UUID generation from email addresses
|
|
8
|
+
- sql_paths: SQL file path resolution for packages and user migrations
|
|
8
9
|
"""
|
|
9
10
|
|
|
10
11
|
from .embeddings import (
|
|
@@ -24,6 +25,15 @@ from .user_id import (
|
|
|
24
25
|
is_valid_uuid,
|
|
25
26
|
user_id_to_uuid,
|
|
26
27
|
)
|
|
28
|
+
from .sql_paths import (
|
|
29
|
+
USER_SQL_DIR_CONVENTION,
|
|
30
|
+
get_package_sql_dir,
|
|
31
|
+
get_package_migrations_dir,
|
|
32
|
+
get_user_sql_dir,
|
|
33
|
+
list_package_migrations,
|
|
34
|
+
list_user_migrations,
|
|
35
|
+
list_all_migrations,
|
|
36
|
+
)
|
|
27
37
|
|
|
28
38
|
__all__ = [
|
|
29
39
|
# SQL Types
|
|
@@ -40,4 +50,12 @@ __all__ = [
|
|
|
40
50
|
"email_to_user_id",
|
|
41
51
|
"user_id_to_uuid",
|
|
42
52
|
"is_valid_uuid",
|
|
53
|
+
# SQL Paths
|
|
54
|
+
"USER_SQL_DIR_CONVENTION",
|
|
55
|
+
"get_package_sql_dir",
|
|
56
|
+
"get_package_migrations_dir",
|
|
57
|
+
"get_user_sql_dir",
|
|
58
|
+
"list_package_migrations",
|
|
59
|
+
"list_user_migrations",
|
|
60
|
+
"list_all_migrations",
|
|
43
61
|
]
|
rem/utils/constants.py
ADDED
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Centralized constants for the REM system.
|
|
3
|
+
|
|
4
|
+
All magic numbers and commonly-used values should be defined here
|
|
5
|
+
to ensure consistency and make tuning easier.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
# =============================================================================
|
|
9
|
+
# Embedding Model Constants
|
|
10
|
+
# =============================================================================
|
|
11
|
+
|
|
12
|
+
# OpenAI embedding dimensions by model
|
|
13
|
+
OPENAI_EMBEDDING_DIMS_SMALL = 1536 # text-embedding-3-small
|
|
14
|
+
OPENAI_EMBEDDING_DIMS_LARGE = 3072 # text-embedding-3-large
|
|
15
|
+
OPENAI_EMBEDDING_DIMS_ADA = 1536 # text-embedding-ada-002
|
|
16
|
+
|
|
17
|
+
# Default embedding dimension (text-embedding-3-small)
|
|
18
|
+
DEFAULT_EMBEDDING_DIMS = 1536
|
|
19
|
+
|
|
20
|
+
# Voyage AI embedding dimensions
|
|
21
|
+
VOYAGE_EMBEDDING_DIMS = 1024 # voyage-2
|
|
22
|
+
|
|
23
|
+
# =============================================================================
|
|
24
|
+
# HTTP/API Timeouts (seconds)
|
|
25
|
+
# =============================================================================
|
|
26
|
+
|
|
27
|
+
HTTP_TIMEOUT_DEFAULT = 30.0 # Standard API calls
|
|
28
|
+
HTTP_TIMEOUT_LONG = 60.0 # Vision/embedding APIs
|
|
29
|
+
HTTP_TIMEOUT_VERY_LONG = 300.0 # Subprocess/batch operations
|
|
30
|
+
|
|
31
|
+
# Request timeout for httpx AsyncClient
|
|
32
|
+
ASYNC_CLIENT_TIMEOUT = 300.0
|
|
33
|
+
|
|
34
|
+
# =============================================================================
|
|
35
|
+
# Audio Processing Constants
|
|
36
|
+
# =============================================================================
|
|
37
|
+
|
|
38
|
+
# Minimum valid WAV file size (header only)
|
|
39
|
+
WAV_HEADER_MIN_BYTES = 44
|
|
40
|
+
|
|
41
|
+
# OpenAI Whisper API cost per minute (USD)
|
|
42
|
+
WHISPER_COST_PER_MINUTE = 0.006
|
|
43
|
+
|
|
44
|
+
# Audio chunking parameters
|
|
45
|
+
AUDIO_CHUNK_TARGET_SECONDS = 60.0 # Target chunk duration
|
|
46
|
+
AUDIO_CHUNK_WINDOW_SECONDS = 2.0 # Window for silence detection
|
|
47
|
+
SILENCE_THRESHOLD_DB = -40.0 # Silence detection threshold
|
|
48
|
+
MIN_SILENCE_MS = 500 # Minimum silence duration to split on
|
|
49
|
+
|
|
50
|
+
# =============================================================================
|
|
51
|
+
# File Processing Constants
|
|
52
|
+
# =============================================================================
|
|
53
|
+
|
|
54
|
+
# Subprocess timeout for document parsing
|
|
55
|
+
SUBPROCESS_TIMEOUT_SECONDS = 300 # 5 minutes
|
|
56
|
+
|
|
57
|
+
# Maximum file sizes
|
|
58
|
+
MAX_AUDIO_FILE_SIZE_MB = 25 # Whisper API limit
|
|
59
|
+
|
|
60
|
+
# =============================================================================
|
|
61
|
+
# Database/Query Constants
|
|
62
|
+
# =============================================================================
|
|
63
|
+
|
|
64
|
+
# Default batch sizes
|
|
65
|
+
DEFAULT_BATCH_SIZE = 100
|
|
66
|
+
EMBEDDING_BATCH_SIZE = 50
|
|
67
|
+
|
|
68
|
+
# Default pagination limits
|
|
69
|
+
DEFAULT_PAGE_SIZE = 20
|
|
70
|
+
MAX_PAGE_SIZE = 100
|
|
71
|
+
|
|
72
|
+
# =============================================================================
|
|
73
|
+
# Rate Limiting
|
|
74
|
+
# =============================================================================
|
|
75
|
+
|
|
76
|
+
# Default retry settings
|
|
77
|
+
DEFAULT_MAX_RETRIES = 3
|
|
78
|
+
RETRY_BACKOFF_MULTIPLIER = 1
|
|
79
|
+
RETRY_BACKOFF_MIN = 1
|
|
80
|
+
RETRY_BACKOFF_MAX = 60
|
|
81
|
+
|
|
82
|
+
# =============================================================================
|
|
83
|
+
# S3/Storage Constants
|
|
84
|
+
# =============================================================================
|
|
85
|
+
|
|
86
|
+
S3_URI_PREFIX = "s3://"
|
|
87
|
+
FILE_URI_PREFIX = "file://"
|
|
88
|
+
|
|
89
|
+
# =============================================================================
|
|
90
|
+
# LLM Constants
|
|
91
|
+
# =============================================================================
|
|
92
|
+
|
|
93
|
+
# Default max tokens for vision analysis
|
|
94
|
+
VISION_MAX_TOKENS = 2048
|
|
95
|
+
|
|
96
|
+
# Default temperature
|
|
97
|
+
DEFAULT_TEMPERATURE = 0.0
|