remdb 0.2.6__py3-none-any.whl → 0.3.118__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of remdb might be problematic. Click here for more details.
- rem/__init__.py +129 -2
- rem/agentic/README.md +76 -0
- rem/agentic/__init__.py +15 -0
- rem/agentic/agents/__init__.py +16 -2
- rem/agentic/agents/sse_simulator.py +500 -0
- rem/agentic/context.py +28 -22
- rem/agentic/llm_provider_models.py +301 -0
- rem/agentic/mcp/tool_wrapper.py +29 -3
- rem/agentic/otel/setup.py +92 -4
- rem/agentic/providers/phoenix.py +32 -43
- rem/agentic/providers/pydantic_ai.py +168 -24
- rem/agentic/schema.py +358 -21
- rem/agentic/tools/rem_tools.py +3 -3
- rem/api/README.md +238 -1
- rem/api/deps.py +255 -0
- rem/api/main.py +154 -37
- rem/api/mcp_router/resources.py +1 -1
- rem/api/mcp_router/server.py +26 -5
- rem/api/mcp_router/tools.py +454 -7
- rem/api/middleware/tracking.py +172 -0
- rem/api/routers/admin.py +494 -0
- rem/api/routers/auth.py +124 -0
- rem/api/routers/chat/completions.py +152 -16
- rem/api/routers/chat/models.py +7 -3
- rem/api/routers/chat/sse_events.py +526 -0
- rem/api/routers/chat/streaming.py +608 -45
- rem/api/routers/dev.py +81 -0
- rem/api/routers/feedback.py +148 -0
- rem/api/routers/messages.py +473 -0
- rem/api/routers/models.py +78 -0
- rem/api/routers/query.py +360 -0
- rem/api/routers/shared_sessions.py +406 -0
- rem/auth/middleware.py +126 -27
- rem/cli/commands/README.md +237 -64
- rem/cli/commands/ask.py +15 -11
- rem/cli/commands/cluster.py +1300 -0
- rem/cli/commands/configure.py +170 -97
- rem/cli/commands/db.py +396 -139
- rem/cli/commands/experiments.py +278 -96
- rem/cli/commands/process.py +22 -15
- rem/cli/commands/scaffold.py +47 -0
- rem/cli/commands/schema.py +97 -50
- rem/cli/main.py +37 -6
- rem/config.py +2 -2
- rem/models/core/core_model.py +7 -1
- rem/models/core/rem_query.py +5 -2
- rem/models/entities/__init__.py +21 -0
- rem/models/entities/domain_resource.py +38 -0
- rem/models/entities/feedback.py +123 -0
- rem/models/entities/message.py +30 -1
- rem/models/entities/session.py +83 -0
- rem/models/entities/shared_session.py +180 -0
- rem/models/entities/user.py +10 -3
- rem/registry.py +373 -0
- rem/schemas/agents/rem.yaml +7 -3
- rem/services/content/providers.py +94 -140
- rem/services/content/service.py +115 -24
- rem/services/dreaming/affinity_service.py +2 -16
- rem/services/dreaming/moment_service.py +2 -15
- rem/services/embeddings/api.py +24 -17
- rem/services/embeddings/worker.py +16 -16
- rem/services/phoenix/EXPERIMENT_DESIGN.md +3 -3
- rem/services/phoenix/client.py +252 -19
- rem/services/postgres/README.md +159 -15
- rem/services/postgres/__init__.py +2 -1
- rem/services/postgres/diff_service.py +531 -0
- rem/services/postgres/pydantic_to_sqlalchemy.py +427 -129
- rem/services/postgres/repository.py +132 -0
- rem/services/postgres/schema_generator.py +291 -9
- rem/services/postgres/service.py +6 -6
- rem/services/rate_limit.py +113 -0
- rem/services/rem/README.md +14 -0
- rem/services/rem/parser.py +44 -9
- rem/services/rem/service.py +36 -2
- rem/services/session/compression.py +17 -1
- rem/services/session/reload.py +1 -1
- rem/services/user_service.py +98 -0
- rem/settings.py +169 -22
- rem/sql/background_indexes.sql +21 -16
- rem/sql/migrations/001_install.sql +387 -54
- rem/sql/migrations/002_install_models.sql +2320 -393
- rem/sql/migrations/003_optional_extensions.sql +326 -0
- rem/sql/migrations/004_cache_system.sql +548 -0
- rem/utils/__init__.py +18 -0
- rem/utils/constants.py +97 -0
- rem/utils/date_utils.py +228 -0
- rem/utils/embeddings.py +17 -4
- rem/utils/files.py +167 -0
- rem/utils/mime_types.py +158 -0
- rem/utils/model_helpers.py +156 -1
- rem/utils/schema_loader.py +284 -21
- rem/utils/sql_paths.py +146 -0
- rem/utils/sql_types.py +3 -1
- rem/utils/vision.py +9 -14
- rem/workers/README.md +14 -14
- rem/workers/__init__.py +2 -1
- rem/workers/db_maintainer.py +74 -0
- rem/workers/unlogged_maintainer.py +463 -0
- {remdb-0.2.6.dist-info → remdb-0.3.118.dist-info}/METADATA +598 -171
- {remdb-0.2.6.dist-info → remdb-0.3.118.dist-info}/RECORD +102 -73
- {remdb-0.2.6.dist-info → remdb-0.3.118.dist-info}/WHEEL +1 -1
- rem/sql/002_install_models.sql +0 -1068
- rem/sql/install_models.sql +0 -1038
- {remdb-0.2.6.dist-info → remdb-0.3.118.dist-info}/entry_points.txt +0 -0
|
@@ -8,13 +8,12 @@ with temporal boundaries and metadata.
|
|
|
8
8
|
|
|
9
9
|
import json
|
|
10
10
|
from datetime import datetime, timedelta
|
|
11
|
-
from pathlib import Path
|
|
12
11
|
from typing import Any, Optional
|
|
13
12
|
from uuid import uuid4
|
|
14
13
|
|
|
15
|
-
import yaml
|
|
16
14
|
from loguru import logger
|
|
17
15
|
|
|
16
|
+
from ...utils.schema_loader import load_agent_schema
|
|
18
17
|
from ...agentic.providers.pydantic_ai import create_agent
|
|
19
18
|
from ...agentic.serialization import serialize_agent_result
|
|
20
19
|
from ...models.entities.moment import Moment, Person
|
|
@@ -101,19 +100,7 @@ async def construct_moments(
|
|
|
101
100
|
}
|
|
102
101
|
|
|
103
102
|
# Load MomentBuilder agent schema
|
|
104
|
-
|
|
105
|
-
Path(__file__).parent.parent.parent
|
|
106
|
-
/ "schemas"
|
|
107
|
-
/ "agents"
|
|
108
|
-
/ "core"
|
|
109
|
-
/ "moment-builder.yaml"
|
|
110
|
-
)
|
|
111
|
-
|
|
112
|
-
if not schema_path.exists():
|
|
113
|
-
raise FileNotFoundError(f"MomentBuilder schema not found: {schema_path}")
|
|
114
|
-
|
|
115
|
-
with open(schema_path) as f:
|
|
116
|
-
agent_schema = yaml.safe_load(f)
|
|
103
|
+
agent_schema = load_agent_schema("moment-builder")
|
|
117
104
|
|
|
118
105
|
# Prepare input data for agent
|
|
119
106
|
input_data = {
|
rem/services/embeddings/api.py
CHANGED
|
@@ -5,13 +5,20 @@ Provides synchronous and async wrappers for embedding generation using
|
|
|
5
5
|
raw HTTP requests (no OpenAI SDK dependency).
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
-
import os
|
|
9
8
|
from typing import Optional, cast
|
|
10
9
|
|
|
11
10
|
import httpx
|
|
12
11
|
import requests
|
|
13
12
|
from loguru import logger
|
|
14
13
|
|
|
14
|
+
from rem.utils.constants import DEFAULT_EMBEDDING_DIMS, HTTP_TIMEOUT_DEFAULT
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _get_openai_api_key() -> Optional[str]:
|
|
18
|
+
"""Get OpenAI API key from settings."""
|
|
19
|
+
from rem.settings import settings
|
|
20
|
+
return settings.llm.openai_api_key
|
|
21
|
+
|
|
15
22
|
|
|
16
23
|
def generate_embedding(
|
|
17
24
|
text: str,
|
|
@@ -26,19 +33,19 @@ def generate_embedding(
|
|
|
26
33
|
text: Text to embed
|
|
27
34
|
model: Model name (default: text-embedding-3-small)
|
|
28
35
|
provider: Provider name (default: openai)
|
|
29
|
-
api_key: API key (defaults to
|
|
36
|
+
api_key: API key (defaults to settings.llm.openai_api_key)
|
|
30
37
|
|
|
31
38
|
Returns:
|
|
32
39
|
Embedding vector (1536 dimensions for text-embedding-3-small)
|
|
33
40
|
"""
|
|
34
41
|
if provider == "openai":
|
|
35
|
-
api_key = api_key or
|
|
42
|
+
api_key = api_key or _get_openai_api_key()
|
|
36
43
|
if not api_key:
|
|
37
44
|
logger.warning("No OpenAI API key - returning zero vector")
|
|
38
|
-
return [0.0] *
|
|
45
|
+
return [0.0] * DEFAULT_EMBEDDING_DIMS
|
|
39
46
|
|
|
40
47
|
try:
|
|
41
|
-
logger.
|
|
48
|
+
logger.debug(f"Generating OpenAI embedding for text using {model}")
|
|
42
49
|
|
|
43
50
|
response = requests.post(
|
|
44
51
|
"https://api.openai.com/v1/embeddings",
|
|
@@ -47,22 +54,22 @@ def generate_embedding(
|
|
|
47
54
|
"Content-Type": "application/json",
|
|
48
55
|
},
|
|
49
56
|
json={"input": [text], "model": model},
|
|
50
|
-
timeout=
|
|
57
|
+
timeout=HTTP_TIMEOUT_DEFAULT,
|
|
51
58
|
)
|
|
52
59
|
response.raise_for_status()
|
|
53
60
|
|
|
54
61
|
data = response.json()
|
|
55
62
|
embedding = data["data"][0]["embedding"]
|
|
56
|
-
logger.
|
|
63
|
+
logger.debug(f"Successfully generated embedding (dimension: {len(embedding)})")
|
|
57
64
|
return cast(list[float], embedding)
|
|
58
65
|
|
|
59
66
|
except Exception as e:
|
|
60
67
|
logger.error(f"Failed to generate embedding from OpenAI: {e}", exc_info=True)
|
|
61
|
-
return [0.0] *
|
|
68
|
+
return [0.0] * DEFAULT_EMBEDDING_DIMS
|
|
62
69
|
|
|
63
70
|
else:
|
|
64
71
|
logger.warning(f"Unsupported provider '{provider}' - returning zero vector")
|
|
65
|
-
return [0.0] *
|
|
72
|
+
return [0.0] * DEFAULT_EMBEDDING_DIMS
|
|
66
73
|
|
|
67
74
|
|
|
68
75
|
async def generate_embedding_async(
|
|
@@ -78,19 +85,19 @@ async def generate_embedding_async(
|
|
|
78
85
|
text: Text to embed
|
|
79
86
|
model: Model name (default: text-embedding-3-small)
|
|
80
87
|
provider: Provider name (default: openai)
|
|
81
|
-
api_key: API key (defaults to
|
|
88
|
+
api_key: API key (defaults to settings.llm.openai_api_key)
|
|
82
89
|
|
|
83
90
|
Returns:
|
|
84
91
|
Embedding vector (1536 dimensions for text-embedding-3-small)
|
|
85
92
|
"""
|
|
86
93
|
if provider == "openai":
|
|
87
|
-
api_key = api_key or
|
|
94
|
+
api_key = api_key or _get_openai_api_key()
|
|
88
95
|
if not api_key:
|
|
89
96
|
logger.warning("No OpenAI API key - returning zero vector")
|
|
90
|
-
return [0.0] *
|
|
97
|
+
return [0.0] * DEFAULT_EMBEDDING_DIMS
|
|
91
98
|
|
|
92
99
|
try:
|
|
93
|
-
logger.
|
|
100
|
+
logger.debug(f"Generating OpenAI embedding for text using {model}")
|
|
94
101
|
|
|
95
102
|
async with httpx.AsyncClient() as client:
|
|
96
103
|
response = await client.post(
|
|
@@ -100,21 +107,21 @@ async def generate_embedding_async(
|
|
|
100
107
|
"Content-Type": "application/json",
|
|
101
108
|
},
|
|
102
109
|
json={"input": [text], "model": model},
|
|
103
|
-
timeout=
|
|
110
|
+
timeout=HTTP_TIMEOUT_DEFAULT,
|
|
104
111
|
)
|
|
105
112
|
response.raise_for_status()
|
|
106
113
|
|
|
107
114
|
data = response.json()
|
|
108
115
|
embedding = data["data"][0]["embedding"]
|
|
109
|
-
logger.
|
|
116
|
+
logger.debug(
|
|
110
117
|
f"Successfully generated embedding (dimension: {len(embedding)})"
|
|
111
118
|
)
|
|
112
119
|
return cast(list[float], embedding)
|
|
113
120
|
|
|
114
121
|
except Exception as e:
|
|
115
122
|
logger.error(f"Failed to generate embedding from OpenAI: {e}", exc_info=True)
|
|
116
|
-
return [0.0] *
|
|
123
|
+
return [0.0] * DEFAULT_EMBEDDING_DIMS
|
|
117
124
|
|
|
118
125
|
else:
|
|
119
126
|
logger.warning(f"Unsupported provider '{provider}' - returning zero vector")
|
|
120
|
-
return [0.0] *
|
|
127
|
+
return [0.0] * DEFAULT_EMBEDDING_DIMS
|
|
@@ -69,7 +69,7 @@ def get_global_embedding_worker(postgres_service: Any = None) -> "EmbeddingWorke
|
|
|
69
69
|
if postgres_service is None:
|
|
70
70
|
raise RuntimeError("Must provide postgres_service on first call to get_global_embedding_worker")
|
|
71
71
|
_global_worker = EmbeddingWorker(postgres_service=postgres_service)
|
|
72
|
-
logger.
|
|
72
|
+
logger.debug("Created global EmbeddingWorker singleton")
|
|
73
73
|
|
|
74
74
|
return _global_worker
|
|
75
75
|
|
|
@@ -117,7 +117,7 @@ class EmbeddingWorker:
|
|
|
117
117
|
"No OpenAI API key provided - embeddings will use zero vectors"
|
|
118
118
|
)
|
|
119
119
|
|
|
120
|
-
logger.
|
|
120
|
+
logger.debug(
|
|
121
121
|
f"Initialized EmbeddingWorker: {num_workers} workers, "
|
|
122
122
|
f"batch_size={batch_size}, timeout={batch_timeout}s"
|
|
123
123
|
)
|
|
@@ -125,17 +125,17 @@ class EmbeddingWorker:
|
|
|
125
125
|
async def start(self) -> None:
|
|
126
126
|
"""Start worker pool."""
|
|
127
127
|
if self.running:
|
|
128
|
-
logger.
|
|
128
|
+
logger.debug("EmbeddingWorker already running")
|
|
129
129
|
return
|
|
130
130
|
|
|
131
131
|
self.running = True
|
|
132
|
-
logger.
|
|
132
|
+
logger.debug(f"Starting {self.num_workers} embedding workers")
|
|
133
133
|
|
|
134
134
|
for i in range(self.num_workers):
|
|
135
135
|
worker = asyncio.create_task(self._worker_loop(i))
|
|
136
136
|
self.workers.append(worker)
|
|
137
137
|
|
|
138
|
-
logger.
|
|
138
|
+
logger.debug("EmbeddingWorker started")
|
|
139
139
|
|
|
140
140
|
async def stop(self) -> None:
|
|
141
141
|
"""Stop worker pool gracefully - processes remaining queue before stopping."""
|
|
@@ -143,7 +143,7 @@ class EmbeddingWorker:
|
|
|
143
143
|
return
|
|
144
144
|
|
|
145
145
|
queue_size = self.task_queue.qsize()
|
|
146
|
-
logger.
|
|
146
|
+
logger.debug(f"Stopping EmbeddingWorker (processing {queue_size} queued tasks first)")
|
|
147
147
|
|
|
148
148
|
# Wait for queue to drain (with timeout)
|
|
149
149
|
max_wait = 30 # 30 seconds max
|
|
@@ -171,7 +171,7 @@ class EmbeddingWorker:
|
|
|
171
171
|
await asyncio.gather(*self.workers, return_exceptions=True)
|
|
172
172
|
|
|
173
173
|
self.workers.clear()
|
|
174
|
-
logger.
|
|
174
|
+
logger.debug("EmbeddingWorker stopped")
|
|
175
175
|
|
|
176
176
|
async def queue_task(self, task: EmbeddingTask) -> None:
|
|
177
177
|
"""
|
|
@@ -195,7 +195,7 @@ class EmbeddingWorker:
|
|
|
195
195
|
Args:
|
|
196
196
|
worker_id: Unique worker identifier
|
|
197
197
|
"""
|
|
198
|
-
logger.
|
|
198
|
+
logger.debug(f"Worker {worker_id} started")
|
|
199
199
|
|
|
200
200
|
while self.running:
|
|
201
201
|
try:
|
|
@@ -205,7 +205,7 @@ class EmbeddingWorker:
|
|
|
205
205
|
if not batch:
|
|
206
206
|
continue
|
|
207
207
|
|
|
208
|
-
logger.
|
|
208
|
+
logger.debug(f"Worker {worker_id} processing batch of {len(batch)} tasks")
|
|
209
209
|
|
|
210
210
|
# Generate embeddings for batch
|
|
211
211
|
await self._process_batch(batch)
|
|
@@ -213,14 +213,14 @@ class EmbeddingWorker:
|
|
|
213
213
|
logger.debug(f"Worker {worker_id} completed batch")
|
|
214
214
|
|
|
215
215
|
except asyncio.CancelledError:
|
|
216
|
-
logger.
|
|
216
|
+
logger.debug(f"Worker {worker_id} cancelled")
|
|
217
217
|
break
|
|
218
218
|
except Exception as e:
|
|
219
219
|
logger.error(f"Worker {worker_id} error: {e}", exc_info=True)
|
|
220
220
|
# Continue processing (don't crash worker on error)
|
|
221
221
|
await asyncio.sleep(1)
|
|
222
222
|
|
|
223
|
-
logger.
|
|
223
|
+
logger.debug(f"Worker {worker_id} stopped")
|
|
224
224
|
|
|
225
225
|
async def _collect_batch(self) -> list[EmbeddingTask]:
|
|
226
226
|
"""
|
|
@@ -284,10 +284,10 @@ class EmbeddingWorker:
|
|
|
284
284
|
)
|
|
285
285
|
|
|
286
286
|
# Upsert to database
|
|
287
|
-
logger.
|
|
287
|
+
logger.debug(f"Upserting {len(embeddings)} embeddings to database...")
|
|
288
288
|
await self._upsert_embeddings(batch, embeddings)
|
|
289
289
|
|
|
290
|
-
logger.
|
|
290
|
+
logger.debug(
|
|
291
291
|
f"Successfully generated and stored {len(embeddings)} embeddings "
|
|
292
292
|
f"(provider={provider}, model={model})"
|
|
293
293
|
)
|
|
@@ -315,7 +315,7 @@ class EmbeddingWorker:
|
|
|
315
315
|
"""
|
|
316
316
|
if provider == "openai" and self.openai_api_key:
|
|
317
317
|
try:
|
|
318
|
-
logger.
|
|
318
|
+
logger.debug(
|
|
319
319
|
f"Generating OpenAI embeddings for {len(texts)} texts using {model}"
|
|
320
320
|
)
|
|
321
321
|
|
|
@@ -336,7 +336,7 @@ class EmbeddingWorker:
|
|
|
336
336
|
data = response.json()
|
|
337
337
|
embeddings = [item["embedding"] for item in data["data"]]
|
|
338
338
|
|
|
339
|
-
logger.
|
|
339
|
+
logger.debug(
|
|
340
340
|
f"Successfully generated {len(embeddings)} embeddings from OpenAI"
|
|
341
341
|
)
|
|
342
342
|
return embeddings
|
|
@@ -409,7 +409,7 @@ class EmbeddingWorker:
|
|
|
409
409
|
),
|
|
410
410
|
)
|
|
411
411
|
|
|
412
|
-
logger.
|
|
412
|
+
logger.debug(
|
|
413
413
|
f"Upserted embedding: {task.table_name}.{task.entity_id}.{task.field_name}"
|
|
414
414
|
)
|
|
415
415
|
|
|
@@ -164,7 +164,7 @@ cp curated-queries.csv experiments/rem-001/validation/production/
|
|
|
164
164
|
**Option C: Curated Engrams**
|
|
165
165
|
```bash
|
|
166
166
|
# Generate engrams from REM data
|
|
167
|
-
rem dreaming full --user-id test-user
|
|
167
|
+
rem dreaming full --user-id test-user --generate-test-cases
|
|
168
168
|
|
|
169
169
|
# Review and select high-quality engrams
|
|
170
170
|
rem engram list --quality high --limit 100 --output engrams.csv
|
|
@@ -357,7 +357,7 @@ Level 4 (Mature): Multiple cycles, full query capabilities
|
|
|
357
357
|
# Generate engrams from REM data
|
|
358
358
|
rem dreaming full \
|
|
359
359
|
--user-id test-user \
|
|
360
|
-
|
|
360
|
+
\
|
|
361
361
|
--generate-test-cases \
|
|
362
362
|
--quality-level 3
|
|
363
363
|
|
|
@@ -1027,7 +1027,7 @@ rem experiments experiment run rem-lookup-ask_rem-golden \
|
|
|
1027
1027
|
|
|
1028
1028
|
```bash
|
|
1029
1029
|
# 1. Generate high-quality engrams
|
|
1030
|
-
rem dreaming full
|
|
1030
|
+
rem dreaming full --generate-test-cases --quality-level 4
|
|
1031
1031
|
|
|
1032
1032
|
# 2. Export engrams
|
|
1033
1033
|
rem engram export rem-engrams-mature-mixed --output engrams.csv --format phoenix
|