remdb 0.3.0__py3-none-any.whl → 0.3.114__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

Files changed (98) hide show
  1. rem/__init__.py +129 -2
  2. rem/agentic/README.md +76 -0
  3. rem/agentic/__init__.py +15 -0
  4. rem/agentic/agents/__init__.py +16 -2
  5. rem/agentic/agents/sse_simulator.py +500 -0
  6. rem/agentic/context.py +28 -22
  7. rem/agentic/llm_provider_models.py +301 -0
  8. rem/agentic/otel/setup.py +92 -4
  9. rem/agentic/providers/phoenix.py +32 -43
  10. rem/agentic/providers/pydantic_ai.py +142 -22
  11. rem/agentic/schema.py +358 -21
  12. rem/agentic/tools/rem_tools.py +3 -3
  13. rem/api/README.md +238 -1
  14. rem/api/deps.py +255 -0
  15. rem/api/main.py +151 -37
  16. rem/api/mcp_router/resources.py +1 -1
  17. rem/api/mcp_router/server.py +17 -2
  18. rem/api/mcp_router/tools.py +143 -7
  19. rem/api/middleware/tracking.py +172 -0
  20. rem/api/routers/admin.py +277 -0
  21. rem/api/routers/auth.py +124 -0
  22. rem/api/routers/chat/completions.py +152 -16
  23. rem/api/routers/chat/models.py +7 -3
  24. rem/api/routers/chat/sse_events.py +526 -0
  25. rem/api/routers/chat/streaming.py +608 -45
  26. rem/api/routers/dev.py +81 -0
  27. rem/api/routers/feedback.py +148 -0
  28. rem/api/routers/messages.py +473 -0
  29. rem/api/routers/models.py +78 -0
  30. rem/api/routers/query.py +357 -0
  31. rem/api/routers/shared_sessions.py +406 -0
  32. rem/auth/middleware.py +126 -27
  33. rem/cli/commands/README.md +201 -70
  34. rem/cli/commands/ask.py +13 -10
  35. rem/cli/commands/cluster.py +1359 -0
  36. rem/cli/commands/configure.py +4 -3
  37. rem/cli/commands/db.py +350 -137
  38. rem/cli/commands/experiments.py +76 -72
  39. rem/cli/commands/process.py +22 -15
  40. rem/cli/commands/scaffold.py +47 -0
  41. rem/cli/commands/schema.py +95 -49
  42. rem/cli/main.py +29 -6
  43. rem/config.py +2 -2
  44. rem/models/core/core_model.py +7 -1
  45. rem/models/core/rem_query.py +5 -2
  46. rem/models/entities/__init__.py +21 -0
  47. rem/models/entities/domain_resource.py +38 -0
  48. rem/models/entities/feedback.py +123 -0
  49. rem/models/entities/message.py +30 -1
  50. rem/models/entities/session.py +83 -0
  51. rem/models/entities/shared_session.py +180 -0
  52. rem/models/entities/user.py +10 -3
  53. rem/registry.py +373 -0
  54. rem/schemas/agents/rem.yaml +7 -3
  55. rem/services/content/providers.py +94 -140
  56. rem/services/content/service.py +92 -20
  57. rem/services/dreaming/affinity_service.py +2 -16
  58. rem/services/dreaming/moment_service.py +2 -15
  59. rem/services/embeddings/api.py +24 -17
  60. rem/services/embeddings/worker.py +16 -16
  61. rem/services/phoenix/EXPERIMENT_DESIGN.md +3 -3
  62. rem/services/phoenix/client.py +252 -19
  63. rem/services/postgres/README.md +159 -15
  64. rem/services/postgres/__init__.py +2 -1
  65. rem/services/postgres/diff_service.py +426 -0
  66. rem/services/postgres/pydantic_to_sqlalchemy.py +427 -129
  67. rem/services/postgres/repository.py +132 -0
  68. rem/services/postgres/schema_generator.py +86 -5
  69. rem/services/postgres/service.py +6 -6
  70. rem/services/rate_limit.py +113 -0
  71. rem/services/rem/README.md +14 -0
  72. rem/services/rem/parser.py +44 -9
  73. rem/services/rem/service.py +36 -2
  74. rem/services/session/compression.py +17 -1
  75. rem/services/session/reload.py +1 -1
  76. rem/services/user_service.py +98 -0
  77. rem/settings.py +169 -17
  78. rem/sql/background_indexes.sql +21 -16
  79. rem/sql/migrations/001_install.sql +231 -54
  80. rem/sql/migrations/002_install_models.sql +457 -393
  81. rem/sql/migrations/003_optional_extensions.sql +326 -0
  82. rem/utils/constants.py +97 -0
  83. rem/utils/date_utils.py +228 -0
  84. rem/utils/embeddings.py +17 -4
  85. rem/utils/files.py +167 -0
  86. rem/utils/mime_types.py +158 -0
  87. rem/utils/model_helpers.py +156 -1
  88. rem/utils/schema_loader.py +191 -35
  89. rem/utils/sql_types.py +3 -1
  90. rem/utils/vision.py +9 -14
  91. rem/workers/README.md +14 -14
  92. rem/workers/db_maintainer.py +74 -0
  93. {remdb-0.3.0.dist-info → remdb-0.3.114.dist-info}/METADATA +303 -164
  94. {remdb-0.3.0.dist-info → remdb-0.3.114.dist-info}/RECORD +96 -70
  95. {remdb-0.3.0.dist-info → remdb-0.3.114.dist-info}/WHEEL +1 -1
  96. rem/sql/002_install_models.sql +0 -1068
  97. rem/sql/install_models.sql +0 -1038
  98. {remdb-0.3.0.dist-info → remdb-0.3.114.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,326 @@
1
+ -- REM Optional Extensions
2
+ -- Description: Optional PostgreSQL extensions that enhance functionality but are not required
3
+ -- Version: 1.0.0
4
+ -- Date: 2025-11-29
5
+ --
6
+ -- These extensions are installed with try/catch - failures are logged but don't break the install.
7
+ -- This allows the same migration to work on:
8
+ -- - Custom images with extensions baked in (percolationlabs/rem-pg:18)
9
+ -- - Standard PostgreSQL images (extensions will be skipped)
10
+ --
11
+ -- Extensions:
12
+ -- - pg_net: Async HTTP/HTTPS requests from triggers and functions (Supabase)
13
+
14
+ -- ============================================================================
15
+ -- pg_net: Async HTTP Extension
16
+ -- ============================================================================
17
+ -- Enables PostgreSQL to make non-blocking HTTP requests from triggers and functions.
18
+ -- Requires: Custom image with pg_net compiled, shared_preload_libraries='pg_net'
19
+ --
20
+ -- Use cases:
21
+ -- - Webhook notifications on data changes
22
+ -- - Async event publishing to external APIs
23
+ -- - Background HTTP requests from triggers
24
+
25
+ DO $$
26
+ BEGIN
27
+ -- Attempt to create pg_net extension
28
+ CREATE EXTENSION IF NOT EXISTS pg_net;
29
+ RAISE NOTICE ' pg_net extension installed successfully';
30
+ EXCEPTION
31
+ WHEN OTHERS THEN
32
+ RAISE NOTICE ' pg_net extension not available (this is OK if using standard PostgreSQL image)';
33
+ RAISE NOTICE ' Error: %', SQLERRM;
34
+ END $$;
35
+
36
+ -- ============================================================================
37
+ -- pg_net Helper Functions (only created if extension exists)
38
+ -- ============================================================================
39
+ -- Wrapper functions for common HTTP operations with sensible defaults
40
+
41
+ DO $$
42
+ BEGIN
43
+ -- Only create helpers if pg_net is available
44
+ IF EXISTS (SELECT 1 FROM pg_extension WHERE extname = 'pg_net') THEN
45
+
46
+ -- Helper: POST JSON to a URL with standard headers
47
+ EXECUTE $func$
48
+ CREATE OR REPLACE FUNCTION rem_http_post(
49
+ p_url TEXT,
50
+ p_body JSONB,
51
+ p_headers JSONB DEFAULT '{}'::jsonb
52
+ )
53
+ RETURNS BIGINT AS $inner$
54
+ DECLARE
55
+ merged_headers JSONB;
56
+ request_id BIGINT;
57
+ BEGIN
58
+ -- Merge default headers with provided headers
59
+ merged_headers := '{"Content-Type": "application/json"}'::jsonb || p_headers;
60
+
61
+ SELECT net.http_post(
62
+ url := p_url,
63
+ headers := merged_headers,
64
+ body := p_body
65
+ ) INTO request_id;
66
+
67
+ RETURN request_id;
68
+ END;
69
+ $inner$ LANGUAGE plpgsql;
70
+ $func$;
71
+
72
+ RAISE NOTICE ' rem_http_post helper function created';
73
+
74
+ -- Helper: GET from a URL
75
+ EXECUTE $func$
76
+ CREATE OR REPLACE FUNCTION rem_http_get(
77
+ p_url TEXT,
78
+ p_headers JSONB DEFAULT '{}'::jsonb
79
+ )
80
+ RETURNS BIGINT AS $inner$
81
+ DECLARE
82
+ request_id BIGINT;
83
+ BEGIN
84
+ SELECT net.http_get(
85
+ url := p_url,
86
+ headers := p_headers
87
+ ) INTO request_id;
88
+
89
+ RETURN request_id;
90
+ END;
91
+ $inner$ LANGUAGE plpgsql;
92
+ $func$;
93
+
94
+ RAISE NOTICE ' rem_http_get helper function created';
95
+
96
+ -- ====================================================================
97
+ -- REM Query Function
98
+ -- ====================================================================
99
+ -- Executes REM queries via the REM API using pg_net
100
+ --
101
+ -- Default API host: rem-api (works in K8s same namespace)
102
+ -- For local Docker testing: Add "host.docker.internal rem-api" to /etc/hosts
103
+ -- Or override with p_api_host parameter
104
+ --
105
+ -- Example:
106
+ -- SELECT rem_query('LOOKUP sarah-chen', 'user123');
107
+ -- SELECT rem_query('SEARCH resources ''API design'' LIMIT 5', 'user123');
108
+
109
+ EXECUTE $func$
110
+ CREATE OR REPLACE FUNCTION rem_query(
111
+ p_query TEXT,
112
+ p_user_id TEXT,
113
+ p_api_host TEXT DEFAULT 'rem-api',
114
+ p_api_port INTEGER DEFAULT 8000,
115
+ p_mode TEXT DEFAULT 'rem-dialect'
116
+ )
117
+ RETURNS BIGINT AS $inner$
118
+ DECLARE
119
+ api_url TEXT;
120
+ request_body JSONB;
121
+ request_headers JSONB;
122
+ request_id BIGINT;
123
+ BEGIN
124
+ -- Build API URL
125
+ -- Default: http://rem-api:8000/api/v1/query (K8s same namespace)
126
+ api_url := format('http://%s:%s/api/v1/query', p_api_host, p_api_port);
127
+
128
+ -- Build request body
129
+ request_body := jsonb_build_object(
130
+ 'query', p_query,
131
+ 'mode', p_mode
132
+ );
133
+
134
+ -- Build headers with user ID
135
+ request_headers := jsonb_build_object(
136
+ 'Content-Type', 'application/json',
137
+ 'X-User-Id', p_user_id
138
+ );
139
+
140
+ -- Make async HTTP POST request
141
+ SELECT net.http_post(
142
+ url := api_url,
143
+ headers := request_headers,
144
+ body := request_body
145
+ ) INTO request_id;
146
+
147
+ RETURN request_id;
148
+ END;
149
+ $inner$ LANGUAGE plpgsql;
150
+ $func$;
151
+
152
+ RAISE NOTICE ' rem_query() function created';
153
+
154
+ -- Helper to get query results (waits for async response)
155
+ -- NOTE: pg_net is async by design. This function polls for the response.
156
+ -- For best results, use rem_query() and check results later, or use longer timeouts.
157
+ EXECUTE $func$
158
+ CREATE OR REPLACE FUNCTION rem_query_result(
159
+ p_request_id BIGINT,
160
+ p_timeout_ms INTEGER DEFAULT 10000
161
+ )
162
+ RETURNS JSONB AS $inner$
163
+ DECLARE
164
+ v_status_code INTEGER;
165
+ v_content TEXT;
166
+ v_found BOOLEAN;
167
+ start_time TIMESTAMP;
168
+ elapsed_ms INTEGER;
169
+ BEGIN
170
+ start_time := clock_timestamp();
171
+
172
+ -- Poll for response with timeout
173
+ -- Each iteration starts a new query to see committed data from background worker
174
+ LOOP
175
+ -- Check if response exists (background worker commits independently)
176
+ SELECT true, status_code, content::text
177
+ INTO v_found, v_status_code, v_content
178
+ FROM net._http_response
179
+ WHERE id = p_request_id;
180
+
181
+ -- Found response
182
+ IF v_found THEN
183
+ IF v_status_code = 200 THEN
184
+ RETURN v_content::jsonb;
185
+ ELSE
186
+ RETURN jsonb_build_object(
187
+ 'error', true,
188
+ 'status_code', v_status_code,
189
+ 'content', v_content
190
+ );
191
+ END IF;
192
+ END IF;
193
+
194
+ -- Check timeout
195
+ elapsed_ms := EXTRACT(EPOCH FROM (clock_timestamp() - start_time)) * 1000;
196
+ IF elapsed_ms >= p_timeout_ms THEN
197
+ RETURN jsonb_build_object(
198
+ 'error', true,
199
+ 'message', 'Request timeout - pg_net is async, response may arrive later',
200
+ 'request_id', p_request_id,
201
+ 'hint', 'Check net._http_response table or increase timeout'
202
+ );
203
+ END IF;
204
+
205
+ -- Wait 500ms before next poll (pg_net worker runs every 100ms)
206
+ PERFORM pg_sleep(0.5);
207
+ END LOOP;
208
+ END;
209
+ $inner$ LANGUAGE plpgsql;
210
+ $func$;
211
+
212
+ RAISE NOTICE ' rem_query_result() function created';
213
+
214
+ -- Convenience function: execute query and wait for result
215
+ -- WARNING: Due to PostgreSQL transaction isolation, this may timeout even when
216
+ -- the request succeeds. The background worker commits separately and the polling
217
+ -- loop may not see the response. Use rem_query() + check net._http_response for
218
+ -- more reliable async operation.
219
+ EXECUTE $func$
220
+ CREATE OR REPLACE FUNCTION rem_query_sync(
221
+ p_query TEXT,
222
+ p_user_id TEXT,
223
+ p_api_host TEXT DEFAULT 'rem-api',
224
+ p_api_port INTEGER DEFAULT 8000,
225
+ p_mode TEXT DEFAULT 'rem-dialect',
226
+ p_timeout_ms INTEGER DEFAULT 10000
227
+ )
228
+ RETURNS JSONB AS $inner$
229
+ DECLARE
230
+ request_id BIGINT;
231
+ v_status_code INTEGER;
232
+ v_content TEXT;
233
+ v_found BOOLEAN := false;
234
+ start_time TIMESTAMP;
235
+ elapsed_ms INTEGER;
236
+ BEGIN
237
+ -- Execute query - this queues the HTTP request
238
+ request_id := rem_query(p_query, p_user_id, p_api_host, p_api_port, p_mode);
239
+
240
+ -- Wait for response with explicit snapshot refresh attempts
241
+ start_time := clock_timestamp();
242
+ LOOP
243
+ -- Query in separate subtransaction-like context
244
+ SELECT true, status_code, content::text
245
+ INTO v_found, v_status_code, v_content
246
+ FROM net._http_response
247
+ WHERE id = request_id;
248
+
249
+ IF v_found THEN
250
+ IF v_status_code = 200 THEN
251
+ RETURN v_content::jsonb;
252
+ ELSE
253
+ RETURN jsonb_build_object('error', true, 'status_code', v_status_code, 'content', v_content);
254
+ END IF;
255
+ END IF;
256
+
257
+ elapsed_ms := EXTRACT(EPOCH FROM (clock_timestamp() - start_time)) * 1000;
258
+ IF elapsed_ms >= p_timeout_ms THEN
259
+ -- Return info about the async request so caller can check later
260
+ RETURN jsonb_build_object(
261
+ 'pending', true,
262
+ 'request_id', request_id,
263
+ 'message', 'Request queued but response not yet visible due to transaction isolation',
264
+ 'hint', 'Query net._http_response WHERE id = ' || request_id || ' after this transaction commits'
265
+ );
266
+ END IF;
267
+
268
+ PERFORM pg_sleep(0.3);
269
+ END LOOP;
270
+ END;
271
+ $inner$ LANGUAGE plpgsql;
272
+ $func$;
273
+
274
+ RAISE NOTICE ' rem_query_sync() function created (async pattern recommended)';
275
+
276
+ ELSE
277
+ RAISE NOTICE ' Skipping pg_net helper functions (extension not installed)';
278
+ END IF;
279
+ END $$;
280
+
281
+ -- ============================================================================
282
+ -- RECORD INSTALLATION
283
+ -- ============================================================================
284
+
285
+ DO $$
286
+ BEGIN
287
+ -- Only record if migrations table exists
288
+ IF EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'rem_migrations') THEN
289
+ INSERT INTO rem_migrations (name, type, version)
290
+ VALUES ('003_optional_extensions.sql', 'install', '1.0.0')
291
+ ON CONFLICT (name) DO UPDATE
292
+ SET applied_at = CURRENT_TIMESTAMP,
293
+ applied_by = CURRENT_USER;
294
+ END IF;
295
+ END $$;
296
+
297
+ -- ============================================================================
298
+ -- COMPLETION
299
+ -- ============================================================================
300
+
301
+ DO $$
302
+ DECLARE
303
+ pg_net_installed BOOLEAN;
304
+ BEGIN
305
+ SELECT EXISTS (SELECT 1 FROM pg_extension WHERE extname = 'pg_net') INTO pg_net_installed;
306
+
307
+ RAISE NOTICE '============================================================';
308
+ RAISE NOTICE 'Optional Extensions Installation Complete';
309
+ RAISE NOTICE '============================================================';
310
+ RAISE NOTICE '';
311
+ IF pg_net_installed THEN
312
+ RAISE NOTICE 'Installed:';
313
+ RAISE NOTICE ' pg_net (async HTTP/HTTPS requests)';
314
+ RAISE NOTICE ' rem_http_post() - POST JSON to URL';
315
+ RAISE NOTICE ' rem_http_get() - GET from URL';
316
+ RAISE NOTICE ' rem_query() - Execute REM query (async)';
317
+ RAISE NOTICE ' rem_query_result() - Get async query result';
318
+ RAISE NOTICE ' rem_query_sync() - Execute and wait for result';
319
+ ELSE
320
+ RAISE NOTICE 'Skipped (not available in this PostgreSQL image):';
321
+ RAISE NOTICE ' pg_net';
322
+ RAISE NOTICE '';
323
+ RAISE NOTICE 'To enable pg_net, use the custom image: percolationlabs/rem-pg:18';
324
+ END IF;
325
+ RAISE NOTICE '============================================================';
326
+ END $$;
rem/utils/constants.py ADDED
@@ -0,0 +1,97 @@
1
+ """
2
+ Centralized constants for the REM system.
3
+
4
+ All magic numbers and commonly-used values should be defined here
5
+ to ensure consistency and make tuning easier.
6
+ """
7
+
8
+ # =============================================================================
9
+ # Embedding Model Constants
10
+ # =============================================================================
11
+
12
+ # OpenAI embedding dimensions by model
13
+ OPENAI_EMBEDDING_DIMS_SMALL = 1536 # text-embedding-3-small
14
+ OPENAI_EMBEDDING_DIMS_LARGE = 3072 # text-embedding-3-large
15
+ OPENAI_EMBEDDING_DIMS_ADA = 1536 # text-embedding-ada-002
16
+
17
+ # Default embedding dimension (text-embedding-3-small)
18
+ DEFAULT_EMBEDDING_DIMS = 1536
19
+
20
+ # Voyage AI embedding dimensions
21
+ VOYAGE_EMBEDDING_DIMS = 1024 # voyage-2
22
+
23
+ # =============================================================================
24
+ # HTTP/API Timeouts (seconds)
25
+ # =============================================================================
26
+
27
+ HTTP_TIMEOUT_DEFAULT = 30.0 # Standard API calls
28
+ HTTP_TIMEOUT_LONG = 60.0 # Vision/embedding APIs
29
+ HTTP_TIMEOUT_VERY_LONG = 300.0 # Subprocess/batch operations
30
+
31
+ # Request timeout for httpx AsyncClient
32
+ ASYNC_CLIENT_TIMEOUT = 300.0
33
+
34
+ # =============================================================================
35
+ # Audio Processing Constants
36
+ # =============================================================================
37
+
38
+ # Minimum valid WAV file size (header only)
39
+ WAV_HEADER_MIN_BYTES = 44
40
+
41
+ # OpenAI Whisper API cost per minute (USD)
42
+ WHISPER_COST_PER_MINUTE = 0.006
43
+
44
+ # Audio chunking parameters
45
+ AUDIO_CHUNK_TARGET_SECONDS = 60.0 # Target chunk duration
46
+ AUDIO_CHUNK_WINDOW_SECONDS = 2.0 # Window for silence detection
47
+ SILENCE_THRESHOLD_DB = -40.0 # Silence detection threshold
48
+ MIN_SILENCE_MS = 500 # Minimum silence duration to split on
49
+
50
+ # =============================================================================
51
+ # File Processing Constants
52
+ # =============================================================================
53
+
54
+ # Subprocess timeout for document parsing
55
+ SUBPROCESS_TIMEOUT_SECONDS = 300 # 5 minutes
56
+
57
+ # Maximum file sizes
58
+ MAX_AUDIO_FILE_SIZE_MB = 25 # Whisper API limit
59
+
60
+ # =============================================================================
61
+ # Database/Query Constants
62
+ # =============================================================================
63
+
64
+ # Default batch sizes
65
+ DEFAULT_BATCH_SIZE = 100
66
+ EMBEDDING_BATCH_SIZE = 50
67
+
68
+ # Default pagination limits
69
+ DEFAULT_PAGE_SIZE = 20
70
+ MAX_PAGE_SIZE = 100
71
+
72
+ # =============================================================================
73
+ # Rate Limiting
74
+ # =============================================================================
75
+
76
+ # Default retry settings
77
+ DEFAULT_MAX_RETRIES = 3
78
+ RETRY_BACKOFF_MULTIPLIER = 1
79
+ RETRY_BACKOFF_MIN = 1
80
+ RETRY_BACKOFF_MAX = 60
81
+
82
+ # =============================================================================
83
+ # S3/Storage Constants
84
+ # =============================================================================
85
+
86
+ S3_URI_PREFIX = "s3://"
87
+ FILE_URI_PREFIX = "file://"
88
+
89
+ # =============================================================================
90
+ # LLM Constants
91
+ # =============================================================================
92
+
93
+ # Default max tokens for vision analysis
94
+ VISION_MAX_TOKENS = 2048
95
+
96
+ # Default temperature
97
+ DEFAULT_TEMPERATURE = 0.0
@@ -0,0 +1,228 @@
1
+ """
2
+ Centralized datetime utilities for consistent UTC-naive datetime handling.
3
+
4
+ IMPORTANT: REM uses UTC-naive datetimes throughout the codebase.
5
+ PostgreSQL stores TIMESTAMP WITHOUT TIME ZONE, so all Python datetime
6
+ operations should use UTC-naive datetimes to avoid comparison errors.
7
+
8
+ Convention:
9
+ - All timestamps are implicitly UTC
10
+ - Use utc_now() instead of datetime.utcnow() or datetime.now(timezone.utc)
11
+ - Use parse_iso() to parse ISO format strings (handles "Z" suffix)
12
+ - Use to_iso() to format datetimes as ISO strings
13
+
14
+ See CLAUDE.md Section 1 (Datetime Convention) for details.
15
+ """
16
+
17
+ from datetime import UTC, datetime, timedelta
18
+ from typing import Optional
19
+
20
+
21
+ def utc_now() -> datetime:
22
+ """
23
+ Get current UTC time as a naive datetime.
24
+
25
+ Returns:
26
+ UTC-naive datetime representing current time.
27
+
28
+ Example:
29
+ >>> now = utc_now()
30
+ >>> now.tzinfo is None
31
+ True
32
+ """
33
+ return datetime.now(UTC).replace(tzinfo=None)
34
+
35
+
36
+ def to_iso(dt: datetime) -> str:
37
+ """
38
+ Convert datetime to ISO 8601 format string.
39
+
40
+ Args:
41
+ dt: Datetime to format (should be UTC-naive)
42
+
43
+ Returns:
44
+ ISO format string (e.g., "2024-01-15T10:30:00")
45
+
46
+ Example:
47
+ >>> dt = datetime(2024, 1, 15, 10, 30, 0)
48
+ >>> to_iso(dt)
49
+ '2024-01-15T10:30:00'
50
+ """
51
+ return dt.isoformat()
52
+
53
+
54
+ def to_iso_with_z(dt: datetime) -> str:
55
+ """
56
+ Convert datetime to ISO 8601 format with Z suffix.
57
+
58
+ Use this when interfacing with external APIs that expect
59
+ the Z suffix to indicate UTC.
60
+
61
+ Args:
62
+ dt: Datetime to format (should be UTC-naive)
63
+
64
+ Returns:
65
+ ISO format string with Z suffix (e.g., "2024-01-15T10:30:00Z")
66
+ """
67
+ return dt.isoformat() + "Z"
68
+
69
+
70
+ def parse_iso(iso_string: str) -> datetime:
71
+ """
72
+ Parse ISO 8601 format string to UTC-naive datetime.
73
+
74
+ Handles:
75
+ - Standard ISO format: "2024-01-15T10:30:00"
76
+ - Z suffix: "2024-01-15T10:30:00Z"
77
+ - Timezone offset: "2024-01-15T10:30:00+00:00" (converts to naive)
78
+ - Microseconds: "2024-01-15T10:30:00.123456"
79
+
80
+ Args:
81
+ iso_string: ISO format datetime string
82
+
83
+ Returns:
84
+ UTC-naive datetime
85
+
86
+ Raises:
87
+ ValueError: If string cannot be parsed
88
+
89
+ Example:
90
+ >>> parse_iso("2024-01-15T10:30:00Z")
91
+ datetime.datetime(2024, 1, 15, 10, 30)
92
+ >>> parse_iso("2024-01-15T10:30:00+00:00")
93
+ datetime.datetime(2024, 1, 15, 10, 30)
94
+ """
95
+ # Handle Z suffix (replace with +00:00 for fromisoformat)
96
+ if iso_string.endswith("Z"):
97
+ iso_string = iso_string[:-1] + "+00:00"
98
+
99
+ # Parse the ISO string
100
+ dt = datetime.fromisoformat(iso_string)
101
+
102
+ # Convert to naive UTC if timezone-aware
103
+ if dt.tzinfo is not None:
104
+ # Convert to UTC and strip timezone
105
+ from datetime import timezone
106
+ dt = dt.astimezone(timezone.utc).replace(tzinfo=None)
107
+
108
+ return dt
109
+
110
+
111
+ def parse_iso_safe(iso_string: Optional[str], default: Optional[datetime] = None) -> Optional[datetime]:
112
+ """
113
+ Safely parse ISO string, returning default on failure.
114
+
115
+ Args:
116
+ iso_string: ISO format string or None
117
+ default: Default value if parsing fails
118
+
119
+ Returns:
120
+ Parsed datetime or default value
121
+ """
122
+ if not iso_string:
123
+ return default
124
+ try:
125
+ return parse_iso(iso_string)
126
+ except (ValueError, TypeError):
127
+ return default
128
+
129
+
130
+ def format_timestamp(dt: Optional[datetime] = None) -> str:
131
+ """
132
+ Format datetime for display/logging.
133
+
134
+ Args:
135
+ dt: Datetime to format (defaults to current UTC time)
136
+
137
+ Returns:
138
+ Formatted string like "2024-01-15 10:30:00 UTC"
139
+ """
140
+ if dt is None:
141
+ dt = utc_now()
142
+ return dt.strftime("%Y-%m-%d %H:%M:%S") + " UTC"
143
+
144
+
145
+ def format_timestamp_compact(dt: Optional[datetime] = None) -> str:
146
+ """
147
+ Format datetime as compact string for filenames/IDs.
148
+
149
+ Args:
150
+ dt: Datetime to format (defaults to current UTC time)
151
+
152
+ Returns:
153
+ Formatted string like "20240115_103000"
154
+ """
155
+ if dt is None:
156
+ dt = utc_now()
157
+ return dt.strftime("%Y%m%d_%H%M%S")
158
+
159
+
160
+ def format_timestamp_for_experiment(dt: Optional[datetime] = None) -> str:
161
+ """
162
+ Format datetime for experiment names.
163
+
164
+ Args:
165
+ dt: Datetime to format (defaults to current UTC time)
166
+
167
+ Returns:
168
+ Formatted string like "20240115-103000"
169
+ """
170
+ if dt is None:
171
+ dt = utc_now()
172
+ return dt.strftime("%Y%m%d-%H%M%S")
173
+
174
+
175
+ def days_ago(days: int) -> datetime:
176
+ """
177
+ Get datetime N days ago from now.
178
+
179
+ Args:
180
+ days: Number of days ago
181
+
182
+ Returns:
183
+ UTC-naive datetime
184
+ """
185
+ return utc_now() - timedelta(days=days)
186
+
187
+
188
+ def hours_ago(hours: int) -> datetime:
189
+ """
190
+ Get datetime N hours ago from now.
191
+
192
+ Args:
193
+ hours: Number of hours ago
194
+
195
+ Returns:
196
+ UTC-naive datetime
197
+ """
198
+ return utc_now() - timedelta(hours=hours)
199
+
200
+
201
+ def is_within_hours(dt: datetime, hours: int) -> bool:
202
+ """
203
+ Check if datetime is within N hours of now.
204
+
205
+ Args:
206
+ dt: Datetime to check (should be UTC-naive)
207
+ hours: Number of hours
208
+
209
+ Returns:
210
+ True if dt is within the time window
211
+ """
212
+ cutoff = hours_ago(hours)
213
+ return dt >= cutoff
214
+
215
+
216
+ def is_within_days(dt: datetime, days: int) -> bool:
217
+ """
218
+ Check if datetime is within N days of now.
219
+
220
+ Args:
221
+ dt: Datetime to check (should be UTC-naive)
222
+ days: Number of days
223
+
224
+ Returns:
225
+ True if dt is within the time window
226
+ """
227
+ cutoff = days_ago(days)
228
+ return dt >= cutoff
rem/utils/embeddings.py CHANGED
@@ -20,7 +20,6 @@ Usage:
20
20
  embeddings = generate_embeddings("openai:text-embedding-3-small", texts)
21
21
  """
22
22
 
23
- import os
24
23
  from typing import Any, cast
25
24
 
26
25
  import requests
@@ -31,6 +30,16 @@ from tenacity import (
31
30
  wait_exponential,
32
31
  )
33
32
 
33
+ from rem.utils.constants import (
34
+ HTTP_TIMEOUT_LONG,
35
+ OPENAI_EMBEDDING_DIMS_SMALL,
36
+ OPENAI_EMBEDDING_DIMS_LARGE,
37
+ VOYAGE_EMBEDDING_DIMS,
38
+ RETRY_BACKOFF_MULTIPLIER,
39
+ RETRY_BACKOFF_MIN,
40
+ RETRY_BACKOFF_MAX,
41
+ )
42
+
34
43
 
35
44
  class EmbeddingError(Exception):
36
45
  """Base exception for embedding generation errors."""
@@ -166,7 +175,11 @@ def _create_retry_decorator(max_retries: int):
166
175
  return retry(
167
176
  retry=retry_if_exception_type(RateLimitError),
168
177
  stop=stop_after_attempt(max_retries),
169
- wait=wait_exponential(multiplier=1, min=1, max=60),
178
+ wait=wait_exponential(
179
+ multiplier=RETRY_BACKOFF_MULTIPLIER,
180
+ min=RETRY_BACKOFF_MIN,
181
+ max=RETRY_BACKOFF_MAX,
182
+ ),
170
183
  reraise=True,
171
184
  )
172
185
 
@@ -234,7 +247,7 @@ def _generate_openai_embeddings(
234
247
  }
235
248
 
236
249
  try:
237
- response = requests.post(url, json=payload, headers=headers, timeout=60)
250
+ response = requests.post(url, json=payload, headers=headers, timeout=HTTP_TIMEOUT_LONG)
238
251
 
239
252
  # Handle rate limits
240
253
  if response.status_code == 429:
@@ -334,7 +347,7 @@ def _generate_voyage_embeddings(
334
347
  }
335
348
 
336
349
  try:
337
- response = requests.post(url, json=payload, headers=headers, timeout=60)
350
+ response = requests.post(url, json=payload, headers=headers, timeout=HTTP_TIMEOUT_LONG)
338
351
 
339
352
  # Handle rate limits
340
353
  if response.status_code == 429: